refactor(项目): 尝试 Python 语音识别和内容发送

This commit is contained in:
himeditator
2025-06-17 21:26:16 +08:00
parent 1e83ad2199
commit d1bee65ae1
11 changed files with 158 additions and 357 deletions

View File

@@ -4,6 +4,7 @@ from dashscope.audio.asr import (
TranslationResult,
TranslationRecognizerRealtime
)
from datetime import datetime
class Callback(TranslationRecognizerCallback):
"""
@@ -12,17 +13,15 @@ class Callback(TranslationRecognizerCallback):
def __init__(self):
super().__init__()
self.usage = 0
self.sentences = []
self.translations = []
self.cur_id = -1
self.time_str = ''
def on_open(self) -> None:
print("\nGummy 流式翻译开始...\n")
print("INFO gummy translation start...")
def on_close(self) -> None:
print(f"\nTokens消耗{self.usage}")
print(f"流式翻译结束...\n")
for i in range(len(self.sentences)):
print(f"\n{self.sentences[i]}\n{self.translations[i]}\n")
print(f"INFO tokens useage: {self.usage}")
print(f"INFO translation end...")
def on_event(
self,
@@ -31,38 +30,37 @@ class Callback(TranslationRecognizerCallback):
translation_result: TranslationResult,
usage
) -> None:
caption = {}
if transcription_result is not None:
id = transcription_result.sentence_id
text = transcription_result.text
if transcription_result.stash is not None:
stash = transcription_result.stash.text
caption['id'] = transcription_result.sentence_id
caption['text'] = transcription_result.text
if caption['id'] != self.cur_id:
self.cur_id = caption['id']
cur_time = datetime.now().strftime('%H:%M:%S')
caption['time_s'] = cur_time
self.time_str = cur_time
else:
stash = ""
print(f"#{id}: {text}{stash}")
if usage: self.sentences.append(text)
caption['time_s'] = self.time_str
caption['time_t'] = datetime.now().strftime('%H:%M:%S')
caption['translation'] = ""
if translation_result is not None:
lang = translation_result.get_language_list()[0]
text = translation_result.get_translation(lang).text
if translation_result.get_translation(lang).stash is not None:
stash = translation_result.get_translation(lang).stash.text
else:
stash = ""
print(f"#{lang}: {text}{stash}")
if usage: self.translations.append(text)
caption['translation'] = translation_result.get_translation(lang).text
if usage: self.usage += usage['duration']
if usage:
self.usage += usage['duration']
print(caption)
class GummyTranslator:
def __init__(self, rate, source, target):
self.translator = TranslationRecognizerRealtime(
model = "gummy-realtime-v1",
format = "pcm",
sample_rate = rate,
transcription_enabled = True,
translation_enabled = True,
source_language = source,
translation_target_languages = [target],
callback = Callback()
)
model = "gummy-realtime-v1",
format = "pcm",
sample_rate = rate,
transcription_enabled = True,
translation_enabled = (target is not None),
source_language = source,
translation_target_languages = [target],
callback = Callback()
)