#!/usr/bin/env python
# coding: utf-8
These lines are shebang and encoding declarations. The shebang line (#!/usr/bin/env python
) specifies the interpreter that should be used to run the script (in this case, Python). The encoding declaration (# coding: utf-8
) specifies the character encoding used in the file.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import librosa
from sklearn.model_selection import train_test_split
from glob import glob
These lines import the necessary libraries for data manipulation, visualization, audio processing, and machine learning.
def create_dataframe():
file_paths = glob("../DATASET/archive/audio_data/train/*/*.wav")
paths = []
labels = []
for file in file_paths:
df_labels = file.split("/")[-2]
labels.append(df_labels)
paths.append(file)
return paths, labels
This function create_dataframe()
generates a dataframe containing file paths and labels. It utilizes the glob
module to retrieve file paths matching a specific pattern. It extracts labels from the file paths and returns two lists: paths
(containing file paths) and labels
(containing corresponding labels).
paths, labels = create_dataframe()
This line calls the create_dataframe()
function and assigns the returned paths and labels to variables.
data = pd.DataFrame({"file_paths":paths, "labels":labels})
This line creates a Pandas DataFrame named data
using the paths and labels obtained from the create_dataframe()
function.
data = data.sort_values(by="labels")
data.reset_index(inplace=True)
data.drop("index", axis=1, inplace=True)
These lines sort the DataFrame by the 'labels' column and reset the index.
def mfccs_extraction(file):
audio, sr = librosa.load(file)
mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
scaled = np.mean(mfccs.T, axis=0)
return scaled
This function mfccs_extraction()
computes the Mel-frequency cepstral coefficients (MFCCs) for an audio file. It loads the audio file using librosa.load()
, calculates the MFCCs using librosa.feature.mfcc()
, and then computes the mean of the MFCCs along the time axis.
data["mfccs"] = data.file_paths.apply(mfccs_extraction)
This line applies the mfccs_extraction()
function to each file path in the 'file_paths' column of the DataFrame, resulting in the computation of MFCCs for each audio file.
data_extracted = data.copy()
X = np.array(data_extracted["mfccs"].tolist())
y = np.array(data_extracted["labels"].tolist())
These lines prepare the features (X
) and labels (y
) for training the machine learning model. X
contains the MFCCs extracted from audio files, and y
contains corresponding labels.
y = np.array(pd.get_dummies(y, dtype=int))
This line converts the categorical labels (y
) into one-hot encoded format using pd.get_dummies()
.
X_train, X_test, y_train , y_test = train_test_split(X, y, train_size=0.8, random_state=42)
This line splits the data into training and testing sets using train_test_split()
.
from sklearn.metrics import classification_report, accuracy_score, f1_score
This line imports metrics from scikit-learn for evaluating the model's performance.
def evalution(model, model_name):
prediction = model.predict_classes(X_test)
accuracy = accuracy_score(y_test, prediction)
f1_score_ = f1_score(y_test, prediction)
print(classification_report(y_test, prediction))
print("\naccuracy :", accuracy)
print("\f1_score: ", f1_score_)
This function evaluation()
evaluates the performance of a given model by making predictions on the test set (X_test
) and calculating accuracy and F1 score. It prints a classification report, accuracy, and F1 score.
def preprocess_for_custom_input(file_path):
audio, sr = librosa.load(file_path)
mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
scaled = np.mean(mfccs.T, axis=0)
return scaled
This function preprocess_for_custom_input()
preprocesses a single audio file for custom input. It loads the audio file, computes MFCCs, and returns the scaled MFCCs.
model = Sequential()
model.add(InputLayer((None, 40)))
model.add(Dense(512, activation="relu"))
model.add(Dropout(0.4))
model.add(Dense(512, activation="relu"))
model.add(Dropout(0.4))
model.add(Dense(512, activation="relu"))
model.add(Dropout(0.4))
model.add(Dense(10, activation="softmax"))
These lines define a Sequential model using Keras for a neural network architecture. It consists of multiple layers including Dense layers with ReLU activation and Dropout layers for regularization.
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
This line compiles the model, specifying the loss function (categorical_crossentropy
), optimizer (adam
), and metrics (accuracy
) to be used during training.
ea = EarlyStopping(monitor="val_loss", mode="min", start_from_epoch=10, patience=30)
md = ModelCheckpoint("../models/music",save_best_only=True)
These lines define early stopping (ea
) and model checkpoint (md
) callbacks to monitor the validation loss during training and save the best model respectively.
model.fit(x=X_train, y=y_train, batch_size=16, validation_data=(X_test, y_test), epochs=500, callbacks=[ea, md])
This line trains the model using the training data (X_train
, y_train
), with specified batch size, validation data (X_test
, y_test
), number of epochs, and callbacks.
history_df = pd.DataFrame(model.history.history)
This line creates a DataFrame (history_df
) containing the training history of the model, including loss and accuracy values for each epoch.
history_df[["loss", "val_loss"]].plot(title="Loss graph", xlabel="epochs", ylabel="val")
This line plots the training and validation loss over epochs.
history_df[["accuracy", "val_accuracy"]].plot(title="Accuracy graph", xlabel="epochs", ylabel="val")
This line plots the training and validation accuracy over epochs.