我是能够生成MFCC从系统捕获的音频和绘图,但经过一些重构和配置Tensorflow与CUDA。我使用Librosa生成mfcc,matplotlib.pyplot和librosa.display绘制MFCC和声音设备从立体声混合捕捉声音从窗口。当前的配置可以根据示例.wav文件创建和绘制MFCC,但是当使用系统捕获的声音时,它无法绘制它,因为它生成一个3D数组,而不是在运行MFCC时生成一个2D。下面是生成和绘制的代码
N_MFCC = 40
N_MELS = 40
N_FFT = 512
HOP_LENGTH = 160
MIN_FREQ = 0
MAX_FREQ = None
def create_mfcc(record, sample_rate):
features = librosa.feature.mfcc(record, sample_rate, n_fft=N_FFT,n_mfcc=N_MFCC,
n_mels=N_MELS,hop_length=HOP_LENGTH,fmin=MIN_FREQ, fmax=MAX_FREQ, htk=False)
return features
def plot_and_save_mfcc(mfcc_data, file_name, sample_rate):
plt.figure(figsize=(10, 8))
plt.title('Current audio MFCC', fontsize=18)
plt.xlabel('Time [s]', fontsize=18)
librosa_display.specshow(mfcc_data, sr=sample_rate)
plt.savefig(file_name)
plt.cla()这将生成此堆栈跟踪。
Traceback (most recent call last):
File "main.py", line 68, in <module>
main()
File "main.py", line 63, in main
start_listening_and_creating_mfcc()
File "main.py", line 48, in start_listening_and_creating_mfcc
plot_and_save_mfcc(mfcc_data, conf.DEFAULT_MFCC_IMAGE_NAME.format(image_count), conf.SAMPLE_RATE)
File "main.py", line 38, in plot_and_save_mfcc
librosa_display.specshow(mfcc_data, sr=sample_rate)
File anaconda3\lib\site-packages\librosa\util\decorators.py", line 88, in inner_f
return f(*args, **kwargs)
File anaconda3\lib\site-packages\librosa\display.py", line 879, in specshow
out = axes.pcolormesh(x_coords, y_coords, data, **kwargs)
File anaconda3\lib\site-packages\matplotlib\__init__.py", line 1361, in inner
return func(ax, *map(sanitize_sequence, args), **kwargs)
File anaconda3\lib\site-packages\matplotlib\axes\_axes.py", line 6183, in pcolormesh
X, Y, C, shading = self._pcolorargs('pcolormesh', *args,
File anaconda3\lib\site-packages\matplotlib\axes\_axes.py", line 5671, in _pcolorargs
nrows, ncols = C.shape
ValueError: too many values to unpack (expected 2)我试过调试它并更改mfcc配置,但没有成功。也尝试重新配置我的环境,但这也没有帮助。
编辑:这是系统音频(48000,40,1)和.wav示例文件(40,122个)的.wav
如前所述,我忽略了一个函数,但这里是它和用于为.wav文件加载和创建mfcc的函数
def create_mfcc_from_file(file_path):
(signal, sample_rate) = librosa.load(file_path)
librosa_features = create_mfcc(signal, sample_rate)
plot_and_save_mfcc(librosa_features, 'mfcc-librosa', sample_rate)
def start_listening_and_creating_mfcc():
image_count = 0
while True:
my_recording = record_window()
mfcc_data = create_mfcc(my_recording, conf.SAMPLE_RATE)
plot_and_save_mfcc(mfcc_data, conf.DEFAULT_MFCC_IMAGE_NAME.format(image_count), conf.SAMPLE_RATE)
wav.write(conf.DEFAULT_MFCC_IMAGE_NAME.format(image_count) + '.wav', conf.SAMPLE_RATE, my_recording)
image_count += 1发布于 2022-09-09 14:25:15
def delta(feat, N):
"""Compute delta features from a feature vector sequence.
:param feat: A numpy array of size (NUMFRAMES by number of features) containing features. Each row holds 1 feature vector.
:param N: For each frame, calculate delta features based on preceding and following N frames
:returns: A numpy array of size (NUMFRAMES by number of features) containing delta features. Each row holds 1 delta feature vector.
"""
if N < 0:
raise ValueError('N must be an integer >0')
NUMFRAMES = len(feat)
denominator = 2 * sum([i**2 for i in range(1, N+1)])
delta_feat = numpy.empty_like(feat)
padded = numpy.pad(feat, ((N, N), (0, 0)), mode='edge') # padded version of feat
for t in range(NUMFRAMES):
delta_feat[t] = numpy.dot(numpy.arange(-N, N+1), padded[t : t+2*N+1]) / denominator
plt.plot(signal, c='c')# [t : t+2*N+1] == [(N+t)-N : (N+t)+N+1]
return delta_feathttps://stackoverflow.com/questions/71723779
复制相似问题