mirror of
https://github.com/HiMeditator/auto-caption.git
synced 2026-02-14 03:24:44 +08:00
refactor(项目): 尝试 Python 语音识别和内容发送
This commit is contained in:
@@ -4,6 +4,7 @@ from dashscope.audio.asr import (
|
||||
TranslationResult,
|
||||
TranslationRecognizerRealtime
|
||||
)
|
||||
from datetime import datetime
|
||||
|
||||
class Callback(TranslationRecognizerCallback):
|
||||
"""
|
||||
@@ -12,17 +13,15 @@ class Callback(TranslationRecognizerCallback):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.usage = 0
|
||||
self.sentences = []
|
||||
self.translations = []
|
||||
self.cur_id = -1
|
||||
self.time_str = ''
|
||||
|
||||
def on_open(self) -> None:
|
||||
print("\nGummy 流式翻译开始...\n")
|
||||
print("INFO gummy translation start...")
|
||||
|
||||
def on_close(self) -> None:
|
||||
print(f"\nTokens消耗:{self.usage}")
|
||||
print(f"流式翻译结束...\n")
|
||||
for i in range(len(self.sentences)):
|
||||
print(f"\n{self.sentences[i]}\n{self.translations[i]}\n")
|
||||
print(f"INFO tokens useage: {self.usage}")
|
||||
print(f"INFO translation end...")
|
||||
|
||||
def on_event(
|
||||
self,
|
||||
@@ -31,38 +30,37 @@ class Callback(TranslationRecognizerCallback):
|
||||
translation_result: TranslationResult,
|
||||
usage
|
||||
) -> None:
|
||||
caption = {}
|
||||
if transcription_result is not None:
|
||||
id = transcription_result.sentence_id
|
||||
text = transcription_result.text
|
||||
if transcription_result.stash is not None:
|
||||
stash = transcription_result.stash.text
|
||||
caption['id'] = transcription_result.sentence_id
|
||||
caption['text'] = transcription_result.text
|
||||
if caption['id'] != self.cur_id:
|
||||
self.cur_id = caption['id']
|
||||
cur_time = datetime.now().strftime('%H:%M:%S')
|
||||
caption['time_s'] = cur_time
|
||||
self.time_str = cur_time
|
||||
else:
|
||||
stash = ""
|
||||
print(f"#{id}: {text}{stash}")
|
||||
if usage: self.sentences.append(text)
|
||||
caption['time_s'] = self.time_str
|
||||
caption['time_t'] = datetime.now().strftime('%H:%M:%S')
|
||||
caption['translation'] = ""
|
||||
|
||||
if translation_result is not None:
|
||||
lang = translation_result.get_language_list()[0]
|
||||
text = translation_result.get_translation(lang).text
|
||||
if translation_result.get_translation(lang).stash is not None:
|
||||
stash = translation_result.get_translation(lang).stash.text
|
||||
else:
|
||||
stash = ""
|
||||
print(f"#{lang}: {text}{stash}")
|
||||
if usage: self.translations.append(text)
|
||||
caption['translation'] = translation_result.get_translation(lang).text
|
||||
|
||||
if usage: self.usage += usage['duration']
|
||||
|
||||
if usage:
|
||||
self.usage += usage['duration']
|
||||
print(caption)
|
||||
|
||||
class GummyTranslator:
|
||||
def __init__(self, rate, source, target):
|
||||
self.translator = TranslationRecognizerRealtime(
|
||||
model = "gummy-realtime-v1",
|
||||
format = "pcm",
|
||||
sample_rate = rate,
|
||||
transcription_enabled = True,
|
||||
translation_enabled = True,
|
||||
source_language = source,
|
||||
translation_target_languages = [target],
|
||||
callback = Callback()
|
||||
)
|
||||
model = "gummy-realtime-v1",
|
||||
format = "pcm",
|
||||
sample_rate = rate,
|
||||
transcription_enabled = True,
|
||||
translation_enabled = (target is not None),
|
||||
source_language = source,
|
||||
translation_target_languages = [target],
|
||||
callback = Callback()
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user