-
Notifications
You must be signed in to change notification settings - Fork 0
/
sound_generator_class.py
59 lines (47 loc) · 1.93 KB
/
sound_generator_class.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env python
# coding: utf-8
# # Building Sound Generation class
#
# the VAE was tranined on spectograms not raw audio so our task here is to reconstruct the audio from spectograms using inverse short time fourirer transform (ISTFT) using Griffin-lim method
# In[1]:
from vae_class import VAE
from preprocessing_model import MinMaxNormalizer
import librosa
# ## sound generator class
# responasible for genearting audios from spectrogram
#
# ### Reconstruction the audio
#
#
# 1. Reshaping the signal
# 2. DeNormalize the signals; by using the saved values of min, max for every spectrogram
# 3. convert the signal from log scale to linear scale
# 4. apply ISTFT to convert signal from spectrogram to audio in time domain
# 5. append the reconstructed signal to the list
#
#
# In[1]:
class SoundGenerator:
def __init__(self, vae, hop_length):
self.vae = vae
self.hop_length = hop_length
self._min_max_normalizer = MinMaxNormalizer(0,1)
def generate(self, spectrograms, min_max_values):
#reconstruct images/spects after being encoded
generated_spectograms, latent_representations = self.vae.reconstruct(spectrograms)
signals= self.convert_spectograms_to_audio(generated_spectograms, min_max_values)
return signals, latent_representations
def convert_spectograms_to_audio(self, spectrograms,min_max_values):
signals = []
for spectrogram, min_max_val in zip( spectrograms,min_max_values):
#1 reshaping; removing the dummy third dim of input signals, (the channel size)
log_spectrogram = spectrogram[:, : , 0]
#2 applying deNorm
denorm_log_spec = self._min_max_normalizer.denormlize(log_spectrogram, min_max_val["min"], min_max_val["max"])
#3 linearize the spectrogram
spec = librosa.db_to_amplitude(denorm_log_spec)
#4 apply STFT
signal = librosa.istft(spec, hop_length= self.hop_length)
#5 append
signals.append(signal)
return signals