feat(engine): 重构字幕引擎，新增 Sherpa-ONNX SenseVoice 语音识别模型

- 重构字幕引擎，将音频采集改为在新线程上进行 - 重构 audio2text 中的类，调整运行逻辑 - 更新 main 函数，添加对 Sosv 模型的支持 - 修改 AudioStream 类，默认使用 16000Hz 采样率
2026-02-18 14:44:41 +08:00 · 2025-09-06 20:49:46 +08:00
parent 2b7ce06f04
commit eba2c5ca45
14 changed files with 377 additions and 112 deletions
--- a/engine/audio2text/vosk.py
+++ b/engine/audio2text/vosk.py
@@ -4,6 +4,7 @@ import time
 from datetime import datetime

 from vosk import Model, KaldiRecognizer, SetLogLevel
+from utils import shared_data
 from utils import stdout_cmd, stdout_obj, google_translate, ollama_translate


@@ -82,6 +83,13 @@ class VoskRecognizer:
        
        stdout_obj(caption)

+    def translate(self):
+        """持续读取共享数据中的音频帧，并进行语音识别，将识别结果输出到标准输出中"""
+        global shared_data
+        while shared_data.status == 'running':
+            chunk = shared_data.chunk_queue.get()
+            self.send_audio_frame(chunk)
+
    def stop(self):
        """停止 Vosk 引擎"""
        stdout_cmd('info', 'Vosk recognizer closed.')