Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bregman lib error #42

Open
Pipe-Runner opened this issue Sep 17, 2018 · 3 comments
Open

Bregman lib error #42

Pipe-Runner opened this issue Sep 17, 2018 · 3 comments

Comments

@Pipe-Runner
Copy link

Pipe-Runner commented Sep 17, 2018

The Chromagram function used in chapter 5 for K-Means classification returns an error when fed an audio file.
TypeError: 'float' object cannot be interpreted as an index

@zoldaten
Copy link

zoldaten commented Mar 30, 2021

this error from Bregman lib that doesnt work with lastest numpy.
try to install from here for python3 https://github.com/pkmital/BregmanToolkit
Then change in features_base.py:
num_frames = 1000 #len(self.x)

try this code first:

from bregman.suite import *

p = default_feature_params()
import os
path=r'./audio_dataset/'
os.chdir(path)

audio_file = ("amen.wav")
print(audio_file)
F = Features(audio_file, p)
imagesc(F.X,dbscale=True)
#title=title('Default constant-Q spectrogram')

F.inverse(F.X, pvoc=True) # invert features to audio
play(balance_signal(F.x_hat),F.sample_rate)

p['feature']='stft'
p['nfft']=1024
p['wfft']=512
p['nhop']=256
F = Features(audio_file, p)
imagesc(F.X,dbscale=True)
title('Wide-band spectrogram')

F.inverse(F.X) # invert features to audio
play(balance_signal(F.x_hat),F.sample_rate)

tuts = get_tutorials()
execfile(tuts[0])

execfile(tuts[1])

@zoldaten
Copy link

zoldaten commented Apr 2, 2021

To fix the issue with python2 - BinRoot/BregmanToolkit@f7b924b

@zoldaten
Copy link

zoldaten commented Apr 2, 2021

import tensorflow as tf
import numpy as np
from bregman.suite import *
import os

path=r'./audio_dataset/'
os.chdir(path)
print(os.getcwd())

k = 2
max_iterations = 100

filenames = tf.train.match_filenames_once('*.wav')
count_num_files = tf.size(filenames)
#print(count_num_files)
init = (tf.global_variables_initializer(), tf.local_variables_initializer())

filename_queue = tf.train.string_input_producer(filenames)
reader = tf.WholeFileReader()
filename, file_contents = reader.read(filename_queue)

chromo = tf.placeholder(tf.float32)
max_freqs = tf.argmax(chromo, 0)

def get_next_chromogram(sess):
audio_file = sess.run(filename)
F = Chromagram(audio_file, nfft=16384, wfft=8192, nhop=2205)
return F.X, audio_file

def extract_feature_vector(sess, chromo_data):
num_features, num_samples = np.shape(chromo_data)
freq_vals = sess.run(max_freqs, feed_dict={chromo: chromo_data})
hist, bins = np.histogram(freq_vals, bins=range(num_features + 1))
normalized_hist = hist.astype(float) / num_samples
return normalized_hist

def get_dataset(sess):
num_files = sess.run(count_num_files)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
xs = list()
names = list()
plt.figure()
for _ in range(num_files):
chromo_data, filename = get_next_chromogram(sess)

    plt.subplot(1, 2, 1)
    plt.imshow(chromo_data, cmap='Greys', interpolation='nearest')
    plt.title('Visualization of Sound Spectrum')

    plt.subplot(1, 2, 2)
    freq_vals = sess.run(max_freqs, feed_dict={chromo: chromo_data})
    plt.hist(freq_vals)
    plt.title('Histogram of Notes')
    plt.xlabel('Musical Note')
    plt.ylabel('Count')
    plt.savefig('{}.png'.format(filename))
    plt.clf()

    plt.clf()
    names.append(filename)
    x = extract_feature_vector(sess, chromo_data)
    xs.append(x)
xs = np.asmatrix(xs)
return xs, names

def initial_cluster_centroids(X, k):
return X[0:k, :]

def assign_cluster(X, centroids):
expanded_vectors = tf.expand_dims(X, 0)
expanded_centroids = tf.expand_dims(centroids, 1)
distances = tf.reduce_sum(tf.square(tf.subtract(expanded_vectors, expanded_centroids)), 2)
mins = tf.argmin(distances, 0)
return mins

def recompute_centroids(X, Y):
sums = tf.unsorted_segment_sum(X, Y, k)
counts = tf.unsorted_segment_sum(tf.ones_like(X), Y, k)
return sums / counts

with tf.Session() as sess:
sess.run(init)
X, names = get_dataset(sess)
centroids = initial_cluster_centroids(X, k)
i, converged = 0, False
while not converged and i < max_iterations:
i += 1
Y = assign_cluster(X, centroids)
centroids = sess.run(recompute_centroids(X, Y))
print(zip(sess.run(Y), names))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants