mirror of
https://github.com/HiMeditator/auto-caption.git
synced 2026-02-15 20:34:47 +08:00
refactor(engine): 字幕引擎文件夹重命名,字幕记录添加降序选择
- 字幕记录表格可以按时间降序排列 - 将 caption-engine 重命名为 engine - 更新了相关文件和文件夹的路径 - 修改了 README 和 TODO 文档中的相关内容 - 更新了 Electron 构建配置
This commit is contained in:
2
engine/audio2text/__init__.py
Normal file
2
engine/audio2text/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
from dashscope.common.error import InvalidParameter
|
||||
from .gummy import GummyTranslator
|
||||
105
engine/audio2text/gummy.py
Normal file
105
engine/audio2text/gummy.py
Normal file
@@ -0,0 +1,105 @@
|
||||
from dashscope.audio.asr import (
|
||||
TranslationRecognizerCallback,
|
||||
TranscriptionResult,
|
||||
TranslationResult,
|
||||
TranslationRecognizerRealtime
|
||||
)
|
||||
import dashscope
|
||||
from datetime import datetime
|
||||
import json
|
||||
import sys
|
||||
|
||||
class Callback(TranslationRecognizerCallback):
|
||||
"""
|
||||
语音大模型流式传输回调对象
|
||||
"""
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.usage = 0
|
||||
self.cur_id = -1
|
||||
self.time_str = ''
|
||||
|
||||
def on_open(self) -> None:
|
||||
# print("on_open")
|
||||
pass
|
||||
|
||||
def on_close(self) -> None:
|
||||
# print("on_close")
|
||||
pass
|
||||
|
||||
def on_event(
|
||||
self,
|
||||
request_id,
|
||||
transcription_result: TranscriptionResult,
|
||||
translation_result: TranslationResult,
|
||||
usage
|
||||
) -> None:
|
||||
caption = {}
|
||||
if transcription_result is not None:
|
||||
caption['index'] = transcription_result.sentence_id
|
||||
caption['text'] = transcription_result.text
|
||||
if caption['index'] != self.cur_id:
|
||||
self.cur_id = caption['index']
|
||||
cur_time = datetime.now().strftime('%H:%M:%S.%f')[:-3]
|
||||
caption['time_s'] = cur_time
|
||||
self.time_str = cur_time
|
||||
else:
|
||||
caption['time_s'] = self.time_str
|
||||
caption['time_t'] = datetime.now().strftime('%H:%M:%S.%f')[:-3]
|
||||
caption['translation'] = ""
|
||||
|
||||
if translation_result is not None:
|
||||
lang = translation_result.get_language_list()[0]
|
||||
caption['translation'] = translation_result.get_translation(lang).text
|
||||
|
||||
if usage:
|
||||
self.usage += usage['duration']
|
||||
|
||||
# print(caption)
|
||||
self.send_to_node(caption)
|
||||
|
||||
def send_to_node(self, data):
|
||||
"""
|
||||
将数据发送到 Node.js 进程
|
||||
"""
|
||||
try:
|
||||
json_data = json.dumps(data) + '\n'
|
||||
sys.stdout.write(json_data)
|
||||
sys.stdout.flush()
|
||||
except Exception as e:
|
||||
print(f"Error sending data to Node.js: {e}", file=sys.stderr)
|
||||
|
||||
class GummyTranslator:
|
||||
"""
|
||||
使用 Gummy 引擎流式处理的音频数据,并在标准输出中输出与 Auto Caption 软件可读取的 JSON 字符串数据
|
||||
|
||||
初始化参数:
|
||||
rate: 音频采样率
|
||||
source: 源语言代码字符串(zh, en, ja 等)
|
||||
target: 目标语言代码字符串(zh, en, ja 等)
|
||||
"""
|
||||
def __init__(self, rate, source, target, api_key):
|
||||
if api_key:
|
||||
dashscope.api_key = api_key
|
||||
self.translator = TranslationRecognizerRealtime(
|
||||
model = "gummy-realtime-v1",
|
||||
format = "pcm",
|
||||
sample_rate = rate,
|
||||
transcription_enabled = True,
|
||||
translation_enabled = (target is not None),
|
||||
source_language = source,
|
||||
translation_target_languages = [target],
|
||||
callback = Callback()
|
||||
)
|
||||
|
||||
def start(self):
|
||||
"""启动 Gummy 引擎"""
|
||||
self.translator.start()
|
||||
|
||||
def send_audio_frame(self, data):
|
||||
"""发送音频帧"""
|
||||
self.translator.send_audio_frame(data)
|
||||
|
||||
def stop(self):
|
||||
"""停止 Gummy 引擎"""
|
||||
self.translator.stop()
|
||||
Reference in New Issue
Block a user