from IPython.lib.display import Audio
import noise
import os
import random
from scipy.io import wavfile
from scipy.signal import stft, spectrogram
import numpy as np
import matplotlib.pyplot as plt
import librosa
from librosa import display
#noisy data
sample_path="../database/noisy/train/"
dirs = os.listdir(sample_path)
# dirs.pop(-1)
subDir = random.choice(dirs)
samples = os.listdir(sample_path+"/"+subDir)
sample = random.choice(samples)
path = sample_path+"/"+subDir+'/'+sample
samplerate, data = wavfile.read(path)
data = np.float32(data)
print(data.shape)
print(len(data))
print(samplerate)
(16000,) 16000 16000
# from librosa.display import waveshow
display.waveshow(data, sr=samplerate, marker='.', label='Full signal')
<librosa.display.AdaptiveWaveplot at 0x131217ebf40>
Audio(data, rate=samplerate, autoplay=True)
xFFT, fft = noise.dft(data,samplerate)
print(len(xFFT),len(fft))
16000 16000
# plt.plot(xFFT,np.fft.fftshift(fft))
plt.plot(np.fft.fftshift(fft))
C:\Users\master\AppData\Roaming\Python\Python39\site-packages\numpy\core\_asarray.py:83: ComplexWarning: Casting complex values to real discards the imaginary part return array(a, dtype, copy=False, order=order)
[<matplotlib.lines.Line2D at 0x131ad3c4700>]
f, t, Zxx = stft(data, fs=samplerate, window='hann')
Zxx.shape
(129, 126)
plt.imshow(np.abs(Zxx))
<matplotlib.image.AxesImage at 0x131ad43faf0>
f, t, Sxx = spectrogram(data, fs=samplerate/2, window='hann', nperseg=128, noverlap=64, nfft=256 )
fig, ax = plt.subplots()
img = plt.pcolormesh(t, (f), (Sxx), shading='nearest')
ax.set_yscale('symlog')
img.set_cmap('nipy_spectral')
fig.colorbar(img)
print(Sxx.shape)
(129, 249)
S = np.abs(librosa.stft(np.float32(data), n_fft=4096))**2
# chroma = librosa.feature.chroma_stft(S=S, sr=samplerate)
# librosa.display.specshow(S, y_axis='chroma', x_axis='time')
SS = librosa.amplitude_to_db(S,ref=np.max)
librosa.display.specshow(SS,y_axis='log', x_axis='time')
SS.shape
(2049, 16)
data.dtype
dtype('float32')
MS = librosa.feature.melspectrogram(y=np.float32(data), sr=samplerate, n_mels=36, fmax=4000)
MS.shape
(36, 32)
fig, ax = plt.subplots()
img = display.specshow(MS**2, y_axis='linear', x_axis='time')
fig.colorbar(img, ax=ax)
<matplotlib.colorbar.Colorbar at 0x131b0008f10>
imgplot = plt.imshow(S[-1:0:-1,:])
imgplot.set_cmap('nipy_spectral')
plt.colorbar()
<matplotlib.colorbar.Colorbar at 0x131b00c34f0>
from scipy.fftpack import fft, dct
DCT = dct(S, norm=None)
DCT.shape
(2049, 16)
imgplot = plt.imshow(np.abs(DCT))
imgplot.set_cmap('nipy_spectral')
plt.colorbar()
<matplotlib.colorbar.Colorbar at 0x131b017cac0>
mfcc = librosa.feature.mfcc((data), sr=samplerate, n_mels=36, fmax=4000)
mfcc.shape
(20, 32)
fig, ax = plt.subplots()
img = display.specshow(mfcc, x_axis='time', ax =ax)
fig.colorbar(img, ax=ax)
<matplotlib.colorbar.Colorbar at 0x131b0236cd0>
D = np.abs(librosa.stft(data))**2
# S = librosa.feature.melspectrogram(S=D, sr=sr)
print(D.shape)
S = librosa.feature.melspectrogram(S=D, sr=samplerate, n_mels=32, fmax=samplerate/2)
S_dB = librosa.power_to_db(S, ref=np.max)
fig, ax = plt.subplots()
img = librosa.display.specshow(S_dB, x_axis='time',y_axis='mel', sr=samplerate,
fmax=samplerate/2, ax=ax)
fig.colorbar(img, ax=ax, format='%+2.0f dB')
ax.set(title='Mel-frequency spectrogram')
(1025, 32)
[Text(0.5, 1.0, 'Mel-frequency spectrogram')]
### This line is for automated code script
# SSS = librosa.feature.melspectrogram(y=data, sr=samplerate)
# SSS_dB = librosa.power_to_db(SSS, ref=np.max)
# fig, ax = plt.subplots()
# img = librosa.display.specshow(SSS_dB, x_axis='time',y_axis='mel', sr=samplerate,
# fmax=samplerate/2, ax=ax)
# fig.colorbar(img, ax=ax, format='%+2.0f dB')
# ax.set(title='Mel-frequency spectrogram')
### MFCC
mfcc = librosa.feature.mfcc(S=S_dB, sr=samplerate, n_mfcc=32)
fig, ax = plt.subplots()
img = librosa.display.specshow(mfcc, x_axis='time', ax=ax)
fig.colorbar(img, ax=ax)
ax.set(title='MFCC')
print(mfcc.shape)
(32, 32)
imgplot = plt.imshow(mfcc)
imgplot.set_cmap('nipy_spectral')
plt.colorbar()
<matplotlib.colorbar.Colorbar at 0x131ad4733a0>
fig, ax = plt.subplots()
img = display.specshow(mfcc, x_axis='time', ax=ax)
fig.colorbar(img, ax=ax)
<matplotlib.colorbar.Colorbar at 0x131afcbeeb0>
mfcc.dtype
dtype('float32')
import pandas as pd
df = pd.DataFrame(columns = ['name','word','mfcc'])
df.append({'name': sample,'word': subDir,'mfcc': mfcc}, ignore_index=True)
| name | word | mfcc | |
|---|---|---|---|
| 0 | f88f97a7_nohash_0.wav | on | [[-191.95135, -192.05437, -192.35475, -192.324... |
adding cepstrum to all datas
for subDir in dirs:
print(subDir)
samples = os.listdir(sample_path+"/"+subDir)
for sample in samples:
#sample = samples[0]
path = sample_path+"/"+subDir+'/'+sample
samplerate, data = wavfile.read(path)
# try:
mfcc = librosa.feature.mfcc(np.float32(data), sr=samplerate, n_mels=16, fmax=4000)
df = df.append({'name': sample,'word': subDir,'mfcc': mfcc}, ignore_index=True)
down eight five four go left nine no off on one right seven six stop three two up yes zero
df.to_pickle("noisy.pickle")
len(os.listdir(sample_path))
20