我有二维文本特征(1096,100),我需要结合我的文本功能和我的音频功能,其中的文本功能需要在3D。形状应为(1096,50,100),其中50为max_seq_len。
我该怎么做?
def get_average_word2vec(tokens_list, vector, generate_missing=False, k=100):
if len(tokens_list)<1:
return np.zeros(k)
if generate_missing:
vectorized = [vector[word] if word in vector else np.random.rand(k) for word in tokens_list]
else:
vectorized = [vector[word] if word in vector else np.zeros(k) for word in tokens_list]
length = len(vectorized)
summed = np.sum(vectorized, axis=0)
averaged = np.divide(summed, length)
return averaged
def get_word2vec_embeddings(vectors, clean_text, generate_missing=False):
embeddings = clean_text['tokens'].apply(lambda x: get_average_word2vec(x, vectors,
generate_missing=generate_missing))
return list(embeddings)
text_embeddings = get_word2vec_embeddings (word2vec, df, generate_missing=True)
text_embeddings = np.asarray(text_embeddings)
text_embeddings.shape
(1096, 100)https://stackoverflow.com/questions/72589801
复制相似问题