import glob
import scipy.io.wavfile as wav
import pandas as pd
import numpy as np
import scipy
import librosa
import webrtcvad
def get_vector(sig,rate):
vec=np.empty((1,3))
start=0
end=320
while(sig.shape[0]>=end+160):
vad = webrtcvad.Vad()
vad.set_mode(2)
res=vad.is_speech(sig[start:end].tobytes(),rate) #speech_probability
zero_crosses = np.nonzero(np.diff(sig[start:end]>0))[0].shape[0]/0.02 # zero crosses
f=scipy.fft(sig[start:end])
f0=min(np.absolute(f)) # f0 frequency
start=start+160
end=end+160
vec=np.vstack((vec,np.array([res,zero_crosses,f0],ndmin=2)))
mfcc_feat=librosa.feature.mfcc(sig,rate,numcep=12,winlen=0.020)[0:vec.shape[0],:] # mfcc
fbank=librosa.feature.melspectrogram(sig,rate,nfilt=5)[0:vec.shape[0],:] # log filterbank energies
mfcc_grad=np.gradient(mfcc_feat,axis=0) # mfcc first derivative
final_feature=np.hstack((mfcc_feat,mfcc_grad,fbank,vec))
return final_feature
df=pd.DataFrame()
for i in range(1,6):
for file in glob.glob("Actor_0102/*.wav".format(i)):
print(file)
(sig,rate,) = librosa.load(file)
# get mfcc
mfcc_feat = librosa.feature.mfcc(sig,rate)
# get filterbank energies
fbank_feat = librosa.feature.melspectrogram(sig,rate)
final_vector=get_vector(sig,rate)
feed_dict={"Features":final_vector.astype(np.float64),"name":file.split('/')[-1].split(',')[0]}
df=df.append(feed_dict,ignore_index=True)
df.to_csv("Mfccfeatures.csv")我正在使用RAVDESS数据集并获得错误:
webrtcvad.Error: Error while processing frame
当我尝试创建多个音频文件的特性并将它们存储在CSV文件中时
发布于 2021-05-02 14:45:57
我也面临着同样的错误信息,并最终解决了。
(我不知道你和我的问题是否相似.)
你已经检查过音频的采样率了吗?韦布尔特瓦德说
The WebRTC VAD only accepts 16-bit mono PCM audio, sampled at 8000, 16000, 32000 or 48000 Hz。
当我尝试48000 (最初是44100)的速率时,它起了作用。
https://stackoverflow.com/questions/54182120
复制相似问题