Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added ROC Curve Visualizations, Confidence, and annotation chunker updates #128

Open
wants to merge 36 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
d230a23
Added label chunker
shreyasar2202 May 20, 2022
7f8ba14
Merge pull request #111 from shreyasar2202/label_chunker
JacobGlennAyers May 21, 2022
5457166
Merge branch 'main' into label_chunker
Sean1572 Jun 28, 2022
37d34bc
Updated documentation/minor fixes
Sean1572 Jun 28, 2022
62bb172
Merge branch 'main' into label_chunker
Sean1572 Jun 29, 2022
166c64d
Update tutorial to include label_chunker method
Sean1572 Jun 29, 2022
c8b0c12
Merge branch 'label_chunker' of https://github.com/UCSD-E4E/PyHa into…
Sean1572 Jun 29, 2022
03bf0bc
updated label chunker documentation
sprestrelski Jun 29, 2022
f923f2c
script to produce ROC curves
Vanessa-Salgado Jul 8, 2022
69043dc
script to produce ROC curves
Vanessa-Salgado Jul 8, 2022
7cc68d8
script to produce ROC curves
Vanessa-Salgado Jul 8, 2022
07c2135
script to produce ROC curves
Vanessa-Salgado Jul 8, 2022
a03e692
script to produce ROC curves
Vanessa-Salgado Jul 8, 2022
66d3943
Fixed Bugs, Got ROC Curves to Generate
Sean1572 Jul 8, 2022
d79a8ba
Added local_score_array to birdnet
Sean1572 Jul 8, 2022
871ed62
added local_score_dir to tweetyNET
Vanessa-Salgado Jul 8, 2022
1d289c7
Added roc_curves for all indexs in local score array
Sean1572 Jul 8, 2022
e0775a4
Clean up prints
Sean1572 Jul 8, 2022
3020a7b
renamed ROC curves
Sean1572 Jul 8, 2022
1a4cb48
Model_Comparision_test
Sean1572 Jul 9, 2022
286c5d0
Testing
Sean1572 Jul 11, 2022
a7690b0
Moved code for visulizations.py
Sean1572 Jul 11, 2022
be22d82
Fixed ISOaudio so local_score output is no default
Sean1572 Jul 11, 2022
600dbae
Added better roc curve function
Sean1572 Jul 11, 2022
2413eed
Added comments
Sean1572 Jul 11, 2022
82d6a8c
Merge branch 'main' into label_chunker
Sean1572 Jul 11, 2022
bf278d0
Added Documentation
Sean1572 Jul 11, 2022
5ab566e
Fixed issue where mutliple species would cause a larger dataframe tha…
Sean1572 Jul 11, 2022
d9b10dd
Fixed birdnet and mircofaune bugs with the annotation chunker
Sean1572 Jul 11, 2022
cc4efb5
Added additional fixes to handle other edge cases
Sean1572 Jul 12, 2022
65c9fe8
Clean Print Output
Sean1572 Jul 12, 2022
5402de5
Added progress indicator with tqdm
RyanHUNGry Nov 28, 2022
78bc4ff
Merge branch 'label_chunker' of https://github.com/UCSD-E4E/PyHa into…
RyanHUNGry Nov 28, 2022
ec72812
Merge remote-tracking branch 'origin/main' into label_chunker
RyanHUNGry Nov 28, 2022
9404391
Fixed math import
RyanHUNGry Nov 28, 2022
21be67b
Fixed notebook errors
RyanHUNGry Nov 28, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
195 changes: 195 additions & 0 deletions Piha_BirdNET_Chunk.csv

Large diffs are not rendered by default.

2,967 changes: 2,967 additions & 0 deletions Piha_Microfaune_Chunk_median_025_400.csv

Large diffs are not rendered by default.

232 changes: 232 additions & 0 deletions Piha_TweetyNET_Chunk.csv

Large diffs are not rendered by default.

124 changes: 111 additions & 13 deletions PyHa/IsoAutio.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@
import pandas as pd
import scipy.signal as scipy_signal
import numpy as np
from math import ceil
from math import ceil, floor
from copy import deepcopy
from tqdm import tqdm


def checkVerbose(
Expand Down Expand Up @@ -773,8 +774,10 @@ def generate_automated_labels_birdnet(audio_dir, isolation_parameters):
Dataframe of automated labels for the audio clip(s) in audio_dir.
"""
annotations = analyze(audio_path=audio_dir, **isolation_parameters)
return annotations

local_scores_dir = {}
for file in np.unique(annotations["IN FILE"]):
local_scores_dir[file] = annotations["CONFIDENCE"].to_list()
return annotations, local_scores_dir
def generate_automated_labels_microfaune(
audio_dir,
isolation_parameters,
Expand Down Expand Up @@ -825,8 +828,10 @@ def generate_automated_labels_microfaune(

# init labels dataframe
annotations = pd.DataFrame()
local_score_dir = {}

# generate local scores for every bird file in chosen directory
for audio_file in os.listdir(audio_dir):
for audio_file in tqdm(os.listdir(audio_dir), desc=f"Processing labels for {audio_dir}", colour='green'):
# skip directories
if os.path.isdir(audio_dir + audio_file):
continue
Expand Down Expand Up @@ -871,6 +876,8 @@ def generate_automated_labels_microfaune(
# get duration of clip
duration = len(SIGNAL) / SAMPLE_RATE

local_score_dir[audio_file] = local_scores[0]
#print(local_scores[0])
try:
# Running moment to moment algorithm and appending to a master
# dataframe.
Expand All @@ -883,7 +890,13 @@ def generate_automated_labels_microfaune(
isolation_parameters,
manual_id=manual_id,
normalize_local_scores=normalize_local_scores)
# print(new_entry)
#print(new_entry)

#determine confidence of annotations
new_entry = add_confidence_to_annotations(new_entry, local_scores[0])

#print(new_entry)

if annotations.empty:
annotations = new_entry
else:
Expand All @@ -893,7 +906,7 @@ def generate_automated_labels_microfaune(
continue
# Quick fix to indexing
annotations.reset_index(inplace=True, drop=True)
return annotations
return annotations, local_score_dir

def generate_automated_labels_tweetynet(
audio_dir,
Expand Down Expand Up @@ -941,8 +954,9 @@ def generate_automated_labels_tweetynet(

# init labels dataframe
annotations = pd.DataFrame()
local_score_dir = {}
# generate local scores for every bird file in chosen directory
for audio_file in os.listdir(audio_dir):
for audio_file in tqdm(os.listdir(audio_dir), desc=f"Processing labels for {audio_dir}", colour='green'):
# skip directories
if os.path.isdir(audio_dir + audio_file):
continue
Expand Down Expand Up @@ -1003,16 +1017,21 @@ def generate_automated_labels_tweetynet(
manual_id=manual_id,
normalize_local_scores=normalize_local_scores)
# print(new_entry)

new_entry = add_confidence_to_annotations(new_entry, local_scores[0])


if annotations.empty:
annotations = new_entry
else:
annotations = annotations.append(new_entry)
except BaseException as e:
checkVerbose("Error in isolating bird calls from" + audio_file, isolation_parameters)
continue
local_score_dir[audio_file] = local_scores[0]
# Quick fix to indexing
annotations.reset_index(inplace=True, drop=True)
return annotations
return annotations,local_score_dir


def generate_automated_labels(
Expand All @@ -1021,7 +1040,8 @@ def generate_automated_labels(
manual_id="bird",
weight_path=None,
normalized_sample_rate=44100,
normalize_local_scores=False):
normalize_local_scores=False,
include_local_scores=False):
"""
Function that generates the bird labels across a folder of audio clips
given the isolation_parameters
Expand All @@ -1048,13 +1068,18 @@ def generate_automated_labels(
normalize_local_scores (bool)
- Set whether or not to normalize the local scores.

include_local_scores (bool)
- Set whether or not to also output local_scores for ROC curve generation

Returns:
Dataframe of automated labels for the audio clips in audio_dir.
If include_local_scores = True, it outputs a tuple containing the dataframe of automated labels
and a directory containing the local scores of each file as used by the generate ROC curve functions
"""

#try:
if(isolation_parameters["model"] == 'microfaune'):
annotations = generate_automated_labels_microfaune(
annotations, local_scores = generate_automated_labels_microfaune(
audio_dir=audio_dir,
isolation_parameters=isolation_parameters,
manual_id=manual_id,
Expand All @@ -1069,10 +1094,10 @@ def generate_automated_labels(
'threshold_const', 'chunk_size']
for key in keys_to_delete:
birdnet_parameters.pop(key, None)
annotations = generate_automated_labels_birdnet(
annotations, local_scores = generate_automated_labels_birdnet(
audio_dir, birdnet_parameters)
elif(isolation_parameters['model'] == 'tweetynet'):
annotations = generate_automated_labels_tweetynet(
annotations, local_scores = generate_automated_labels_tweetynet(
audio_dir=audio_dir,
isolation_parameters=isolation_parameters,
manual_id=manual_id,
Expand All @@ -1088,7 +1113,10 @@ def generate_automated_labels(
# except:
# print("Error. Check your isolation_parameters")
# return None
return annotations
if (include_local_scores) :
return annotations, local_scores
else:
return annotations

def kaleidoscope_conversion(df):
"""
Expand All @@ -1111,6 +1139,76 @@ def kaleidoscope_conversion(df):
kaleidoscope_df = pd.concat(kaleidoscope_df, axis=1, keys=headers)
return kaleidoscope_df


def add_confidence_to_annotations(clip_df, local_score_array):
"""
Adds confidence of each annotation from a local_score array.
Takes the maximum normalized local score value within each
annotation as the confidence

Args:
clip_df: (Dataframe)
- Dataframe containing the automated annotations of a single
clip

local_score_array: (Numpy Array)
- array of local_scores from predict functions

Returns:
clip_df with an extra column containing the confidence of each annotation
"""
#data prep for processing
local_score_array = normalize(local_score_array, 0, 1)
clip_df["CONFIDENCE"] = 0
confidence_array = []

for i in range(clip_df.shape[0]):
annotation_data = clip_df.iloc[i]
# now iterate through the local_score array for each chunk
clip_length = annotation_data["CLIP LENGTH"]
#seconds_per_index = clip_length/len(local_score_clip)
index_per_seconds = len(local_score_array)/clip_length

#Get the starting and ending index of that chunk as respective to
#the local score array
start_time = annotation_data["OFFSET"]
end_time = annotation_data["OFFSET"] + annotation_data["DURATION"]
start_index = floor(start_time * index_per_seconds)
end_index = floor((end_time * index_per_seconds))
max_index = floor((clip_length * index_per_seconds))

#Compute the local maximum in this chunk in the local scores
max_score = max(local_score_array[start_index: min(end_index, max_index)])
confidence_array.append(max_score)
clip_df["CONFIDENCE"] = confidence_array
return clip_df

def normalize(arr, t_min, t_max):
"""
normalize local_score for better confidence value

Args:
arr: (Numpy Array)
- Local Score array.

t_min: (int)
- minimum value to set.

t_max: (int)
- maximum value to set.

Returns:
Numpy array of the normalized local score array.
"""
norm_arr = []
diff = t_max - t_min
arr_min = min(arr)
diff_arr = max(arr) - arr_min
for i in arr:
temp = (((i - arr_min)*diff)/diff_arr) + t_min
norm_arr.append(temp)
return norm_arr

# def annotation_combiner(df):
# # Initializing the output Pandas dataframe
# combined_annotation_df = pd.DataFrame()
Expand Down
Loading