今天,我正在做一个关于来电被转录并被保存到文本文件中的项目,但我对python和python循环也有点陌生。我想在SQL server列上循环,让每行循环通过我使用的蔚蓝的语音到文本服务(所有的电话机OID)。我在这个问题上已经被困了几天了,所以我想我可能会在这里找到一些帮助。
import azure.cognitiveservices.speech as speechsdk
import time
from os import path
from pydub import AudioSegment
import requests
import hashlib
import sys
import os.path
import pyodbc
databaseName = '*'
username = '*'
password = '*'
server = '*'
driver = '*'
try:
CONNECTION_STRING = 'DRIVER='+driver+';SERVER='+server+';DATABASE='+databaseName+';UID='+username+';PWD='+ password
conn = pyodbc.connect(CONNECTION_STRING)
cursor = conn.cursor()
storedproc = "* = *'"
cursor.execute(storedproc)
row = cursor.fetchone()
while row:
array = [(int(row[1]))]
row = cursor.fetchone()
i = 0
while i<len(array):
OID = (array[i])
i = i + 1
print(OID)
string = f"{OID}*"
encoded = string.encode()
result = hashlib.sha256(encoded)
resultHash = (result.hexdigest())
Telefoongesprek = requests.get(f"*{OID}", headers={f"api-key":f"{resultHash}"})
with open("Telefoongesprek.mp3", "wb") as f:
f.write(Telefoongesprek.content)
src = "Telefoongesprek.mp3"
dst = "Telefoongesprek.wav"
sound = AudioSegment.from_file(src)
sound.export(dst, format="wav")
def speech_recognize_continuous_from_file():
speech_config = speechsdk.SpeechConfig(subscription="*", region="*")
speech_config.speech_recognition_language = "nl-NL"
audio_config = speechsdk.audio.AudioConfig(filename="Telefoongesprek.wav")
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
done = False
def stop_cb(evt):
print('CLOSING on {}'.format(evt))
nonlocal done
done = True
all_results = []
def handle_final_result(evt):
all_results.append(evt.result.text)
speech_recognizer.recognized.connect(handle_final_result)
speech_recognizer.session_started.connect(handle_final_result)
speech_recognizer.session_stopped.connect(handle_final_result)
speech_recognizer.canceled.connect(handle_final_result)
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)
speech_recognizer.start_continuous_recognition()
while not done:
time.sleep(.5)
speech_recognizer.stop_continuous_recognition()
print(all_results)
telefoongesprek = str(all_results)
filename = f"C:\\Users\\Beau\\Contact-verkeer\\contact-verkeer\\telefoon\\STT Transcriptions\\Telefoongesprek#{OID}.txt"
file = open(filename, "w")
file.write(telefoongesprek)
file.close()
speech_recognize_continuous_from_file()
cursor.close()
del cursor
conn.close()
except Exception as e:
print("Error: %s" % e)一切都彼此分开,但我只是不知道该如何放置循环和我应该使用的循环(For/While循环)。在这里,我试图循环一个数组,但我不这样做,这是正确的。
错误消息:解码失败。ffmpeg返回错误代码:1 mp3 @ 000001cb8c57e0o0无法读取帧大小:无法查找到1073。
这意味着我的azure函数找不到mp3文件,这意味着"Mp3 to Wav“转换不起作用。
提前感谢!
发布于 2021-09-15 09:21:11
如果我理解你的问题,你有一个有很多电话细节的数据库。每行中的字段值之一用于创建关联的mp3文件。您希望在数据库中的每个mp3文件上使用azure对文本进行演讲。
所以你可以用两种方式来做:
另一种技术是在一个循环中完成所有事情,就像您所展示的方式一样,这需要进行一些修正。
好的,现在这个部分已经清楚了,我们可以进入演讲到文本部分。因此,azure允许您发送压缩格式进行转录,这意味着您实际上不需要将它转换为wav文件。
请看下面修改后的代码:
# code snippet borrowed from azure samples
def speech_recognize_continuous_from_file(filename):
class BinaryFileReaderCallback(speechsdk.audio.PullAudioInputStreamCallback):
def __init__(self, filename: str):
super().__init__()
self._file_h = open(filename, "rb")
def read(self, buffer: memoryview) -> int:
try:
size = buffer.nbytes
frames = self._file_h.read(size)
buffer[:len(frames)] = frames
return len(frames)
except Exception as ex:
print('Exception in `read`: {}'.format(ex))
raise
def close(self) -> None:
print('closing file')
try:
self._file_h.close()
except Exception as ex:
print('Exception in `close`: {}'.format(ex))
raise
# Creates an audio stream format. For an example we are using MP3 compressed file here
compressed_format = speechsdk.audio.AudioStreamFormat(compressed_stream_format=speechsdk.AudioStreamContainerFormat.MP3)
callback = BinaryFileReaderCallback(filename=filename)
stream = speechsdk.audio.PullAudioInputStream(stream_format=compressed_format, pull_stream_callback=callback)
speech_config = speechsdk.SpeechConfig(subscription="*", region="*")
speech_config.speech_recognition_language = "nl-NL"
audio_config = speechsdk.audio.AudioConfig(stream=stream)
# Creates a speech recognizer using a file as audio input, also specify the speech language
speech_recognizer = speechsdk.SpeechRecognizer(speech_config, audio_config)
done = False
def stop_cb(evt):
print('CLOSING on {}'.format(evt))
nonlocal done
done = True
all_results = []
def handle_final_result(evt):
all_results.append(evt.result.text)
speech_recognizer.recognized.connect(handle_final_result)
speech_recognizer.session_started.connect(handle_final_result)
speech_recognizer.session_stopped.connect(handle_final_result)
speech_recognizer.canceled.connect(handle_final_result)
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)
speech_recognizer.start_continuous_recognition()
while not done:
time.sleep(.5)
speech_recognizer.stop_continuous_recognition()
print(all_results)
telefoongesprek = str(all_results)
filename = f"C:\\Users\\Beau\\Contact-verkeer\\contact-verkeer\\telefoon\\STT Transcriptions\\Telefoongesprek#{OID}.txt"
file = open(filename, "w")
file.write(telefoongesprek)
file.close()
try:
CONNECTION_STRING = 'DRIVER='+driver+';SERVER='+server+';DATABASE='+databaseName+';UID='+username+';PWD='+ password
conn = pyodbc.connect(CONNECTION_STRING)
cursor = conn.cursor()
storedproc = "* = *'"
cursor.execute(storedproc)
row = cursor.fetchone()
# loop through the rows
while row:
array = [(int(row[1]))]
i = 0
while i<len(array):
OID = (array[i])
i = i + 1
print(OID)
string = f"{OID}*"
encoded = string.encode()
result = hashlib.sha256(encoded)
resultHash = (result.hexdigest())
telefoongesprek_response = requests.get(f"*{OID}", headers={f"api-key":f"{resultHash}"})
# save the file to local disk as mp3
with open("Telefoongesprek.mp3", "wb") as f:
f.write(telefoongesprek_response.content)
# do the speech to text on the mp3 file
speech_recognize_continuous_from_file(f.name)
# fetch the next row
row = cursor.fetchone()
cursor.close()
del cursor
conn.close()
except Exception as e:
print("Error: %s" % e)我还没有测试这个完整的代码,因为我没有数据库连接。请随时修改您的用例,如果您有任何问题,请告诉我。
https://stackoverflow.com/questions/69189551
复制相似问题