feat(engine): 重构字幕引擎，新增 Sherpa-ONNX SenseVoice 语音识别模型

- 重构字幕引擎，将音频采集改为在新线程上进行 - 重构 audio2text 中的类，调整运行逻辑 - 更新 main 函数，添加对 Sosv 模型的支持 - 修改 AudioStream 类，默认使用 16000Hz 采样率
2026-02-04 12:24:42 +08:00 · 2025-09-06 20:49:46 +08:00
parent 2b7ce06f04
commit eba2c5ca45
14 changed files with 377 additions and 112 deletions
--- a/engine/utils/init.py
+++ b/engine/utils/init.py
@@ -5,6 +5,6 @@ from .audioprcs import (
    resample_mono_chunk
 )
 from .sysout import stdout, stdout_err, stdout_cmd, stdout_obj, stderr
-from .thdata import thread_data
+from .shared import shared_data
 from .server import start_server
 from .translation import ollama_translate, google_translate
--- a/engine/utils/audioprcs.py
+++ b/engine/utils/audioprcs.py
@@ -49,9 +49,18 @@ def resample_chunk_mono(chunk: bytes, channels: int, orig_sr: int, target_sr: in
        # (length,)
        chunk_mono = np.mean(chunk_np.astype(np.float32), axis=1)

+    if orig_sr == target_sr:
+        return chunk_mono.astype(np.int16).tobytes()
+    
    ratio = target_sr / orig_sr
    chunk_mono_r = samplerate.resample(chunk_mono, ratio, converter_type=mode)
    chunk_mono_r = np.round(chunk_mono_r).astype(np.int16)
+    real_len = round(chunk_mono.shape[0] * ratio)
+    if(chunk_mono_r.shape[0] > real_len):
+        chunk_mono_r = chunk_mono_r[:real_len]
+    else:
+        while chunk_mono_r.shape[0] < real_len:
+            chunk_mono_r = np.append(chunk_mono_r, chunk_mono_r[-1])
    return chunk_mono_r.tobytes()


@@ -81,9 +90,18 @@ def resample_chunk_mono_np(chunk: bytes, channels: int, orig_sr: int, target_sr:
        # (length,)
        chunk_mono = np.mean(chunk_np.astype(np.float32), axis=1)

+    if orig_sr == target_sr:
+        return chunk_mono.astype(dtype)
+
    ratio = target_sr / orig_sr
    chunk_mono_r = samplerate.resample(chunk_mono, ratio, converter_type=mode)
    chunk_mono_r = chunk_mono_r.astype(dtype)
+    real_len = round(chunk_mono.shape[0] * ratio)
+    if(chunk_mono_r.shape[0] > real_len):
+        chunk_mono_r = chunk_mono_r[:real_len]
+    else:
+        while chunk_mono_r.shape[0] < real_len:
+            chunk_mono_r = np.append(chunk_mono_r, chunk_mono_r[-1])
    return chunk_mono_r


@@ -100,9 +118,16 @@ def resample_mono_chunk(chunk: bytes, orig_sr: int, target_sr: int, mode="sinc_b
    Return:
        单通道音频数据块
    """
+    if orig_sr == target_sr: return chunk
    chunk_np = np.frombuffer(chunk, dtype=np.int16)
    chunk_np = chunk_np.astype(np.float32)
    ratio = target_sr / orig_sr
    chunk_r =  samplerate.resample(chunk_np, ratio, converter_type=mode)
    chunk_r = np.round(chunk_r).astype(np.int16)
+    real_len = round(chunk_np.shape[0] * ratio)
+    if(chunk_r.shape[0] > real_len):
+        chunk_r = chunk_r[:real_len]
+    else:
+        while chunk_r.shape[0] < real_len:
+            chunk_r = np.append(chunk_r, chunk_r[-1])
    return chunk_r.tobytes()
--- a/engine/utils/server.py
+++ b/engine/utils/server.py
@@ -1,13 +1,12 @@
 import socket
 import threading
 import json
-# import time
-from utils import thread_data, stdout_cmd, stderr
+from utils import shared_data, stdout_cmd, stderr


 def handle_client(client_socket):
-    global thread_data
-    while thread_data.status == 'running':
+    global shared_data
+    while shared_data.status == 'running':
        try:
            data = client_socket.recv(4096).decode('utf-8')
            if not data:
@@ -15,13 +14,13 @@ def handle_client(client_socket):
            data = json.loads(data)

            if data['command'] == 'stop':
-                thread_data.status = 'stop'
+                shared_data.status = 'stop'
                break
        except Exception as e:
            stderr(f'Communication error: {e}')
            break
    
-    thread_data.status = 'stop'
+    shared_data.status = 'stop'
    client_socket.close()


@@ -34,7 +33,6 @@ def start_server(port: int):
        stderr(str(e))
        stdout_cmd('kill')
        return
-    # time.sleep(20)
    stdout_cmd('connect')

    client, addr = server.accept()
--- a/engine/utils/shared.py
+++ b/engine/utils/shared.py
@@ -0,0 +1,8 @@
+import queue
+
+class SharedData:
+    def __init__(self):
+        self.status = "running"
+        self.chunk_queue = queue.Queue()
+
+shared_data = SharedData()
--- a/engine/utils/thdata.py
+++ b/engine/utils/thdata.py
@@ -1,5 +0,0 @@
-class ThreadData:
-    def __init__(self):
-        self.status = "running"
-
-thread_data = ThreadData()