본문 바로가기

Domain Knowledge/Speech

Normalized log mel-spectrogram의 python 구현

import os
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt 


min_level_db= -100
def normalize_mel(S):
    return np.clip((S-min_level_db)/-min_level_db,0,1)


def feature_extraction(path):
    y = librosa.load(path,16000)[0]
    S =  librosa.feature.melspectrogram(y=y, n_mels=80, n_fft=512, win_length=400, hop_length=160) # 320/80
    norm_log_S = normalize_mel(librosa.power_to_db(S, ref=np.max))
    return norm_log_S
                        

a = feature_extraction('sample1.wav')
librosa.display.specshow(a, y_axis='mel', x_axis='time')

plt.colorbar(format='%+2.0f dB')
plt.title('Mel-Spectrogram')
plt.tight_layout()
plt.savefig('Mel-Spectrogram example.png')
plt.show()