feat(engine): 重构字幕引擎并实现 WebSocket 通信

- 重构了 Gummy 和 Vosk 字幕引擎的代码,提高了可扩展性和可读性
- 合并 Gummy 和 Vosk 引擎为单个可执行文件
- 实现了字幕引擎和主程序之间的 WebSocket 通信,避免了孤儿进程问题
This commit is contained in:
himeditator
2025-07-28 15:49:52 +08:00
parent b658ef5440
commit cd9f3a847d
19 changed files with 242 additions and 293 deletions

View File

@@ -1,2 +1,3 @@
from dashscope.common.error import InvalidParameter
from .gummy import GummyTranslator
from .gummy import GummyRecognizer
from .vosk import VoskRecognizer

View File

@@ -62,7 +62,7 @@ class Callback(TranslationRecognizerCallback):
stdout_obj(caption)
class GummyTranslator:
class GummyRecognizer:
"""
使用 Gummy 引擎流式处理的音频数据,并在标准输出中输出与 Auto Caption 软件可读取的 JSON 字符串数据
@@ -70,6 +70,7 @@ class GummyTranslator:
rate: 音频采样率
source: 源语言代码字符串zh, en, ja 等)
target: 目标语言代码字符串zh, en, ja 等)
api_key: 阿里云百炼平台 API KEY
"""
def __init__(self, rate: int, source: str, target: str | None, api_key: str | None):
if api_key:

View File

@@ -2,7 +2,8 @@ import json
from datetime import datetime
from vosk import Model, KaldiRecognizer, SetLogLevel
from utils import stdout_obj
from utils import stdout_cmd, stdout_obj
class VoskRecognizer:
"""
@@ -11,7 +12,7 @@ class VoskRecognizer:
初始化参数:
model_path: Vosk 识别模型路径
"""
def __int__(self, model_path: str):
def __init__(self, model_path: str):
SetLogLevel(-1)
if model_path.startswith('"'):
model_path = model_path[1:]
@@ -24,7 +25,11 @@ class VoskRecognizer:
self.model = Model(self.model_path)
self.recognizer = KaldiRecognizer(self.model, 16000)
def start(self):
"""启动 Vosk 引擎"""
stdout_cmd('info', 'Vosk recognizer started.')
def send_audio_frame(self, data: bytes):
"""
发送音频帧给 Vosk 引擎,引擎将自动识别并将识别结果输出到标准输出中
@@ -57,3 +62,7 @@ class VoskRecognizer:
self.prev_content = content
stdout_obj(caption)
def stop(self):
"""停止 Vosk 引擎"""
stdout_cmd('info', 'Vosk recognizer closed.')