feat(engine): 重构字幕引擎并实现 WebSocket 通信

- 重构了 Gummy 和 Vosk 字幕引擎的代码，提高了可扩展性和可读性 - 合并 Gummy 和 Vosk 引擎为单个可执行文件 - 实现了字幕引擎和主程序之间的 WebSocket 通信，避免了孤儿进程问题
2026-02-14 20:02:03 +08:00 · 2025-07-28 15:49:52 +08:00
parent b658ef5440
commit cd9f3a847d
19 changed files with 242 additions and 293 deletions
--- a/engine/utils/init.py
+++ b/engine/utils/init.py
@@ -1,2 +1,4 @@
-from .process import merge_chunk_channels, resample_chunk_mono, resample_mono_chunk
-from .sysout import stdout, stdout_cmd, stdout_obj, stderr
+from .audioprcs import merge_chunk_channels, resample_chunk_mono, resample_mono_chunk
+from .sysout import stdout, stdout_cmd, stdout_obj, stderr
+from .thdata import thread_data
+from .server import start_server
--- a/engine/utils/audioprcs.py
+++ b/engine/utils/audioprcs.py
@@ -1,6 +1,6 @@
 import samplerate
 import numpy as np
-
+import numpy.core.multiarray

 def merge_chunk_channels(chunk: bytes, channels: int) -> bytes:
    """
@@ -13,6 +13,7 @@ def merge_chunk_channels(chunk: bytes, channels: int) -> bytes:
    Returns:
        单通道音频数据块
    """
+    if channels == 1: return chunk
    # (length * channels,)
    chunk_np = np.frombuffer(chunk, dtype=np.int16)
    # (length, channels)
@@ -37,13 +38,17 @@ def resample_chunk_mono(chunk: bytes, channels: int, orig_sr: int, target_sr: in
    Return:
        单通道音频数据块
    """
-    # (length * channels,)
-    chunk_np = np.frombuffer(chunk, dtype=np.int16)
-    # (length, channels)
-    chunk_np = chunk_np.reshape(-1, channels)
-    # (length,)
-    chunk_mono_f = np.mean(chunk_np.astype(np.float32), axis=1)
-    chunk_mono = chunk_mono_f.astype(np.int16)
+    if channels == 1:
+        chunk_mono = chunk
+    else:
+        # (length * channels,)
+        chunk_np = np.frombuffer(chunk, dtype=np.int16)
+        # (length, channels)
+        chunk_np = chunk_np.reshape(-1, channels)
+        # (length,)
+        chunk_mono_f = np.mean(chunk_np.astype(np.float32), axis=1)
+        chunk_mono = chunk_mono_f.astype(np.int16)
+
    ratio = target_sr / orig_sr
    chunk_mono_r = samplerate.resample(chunk_mono, ratio, converter_type=mode)
    chunk_mono_r = np.round(chunk_mono_r).astype(np.int16)
--- a/engine/utils/server.py
+++ b/engine/utils/server.py
@@ -0,0 +1,37 @@
+import socket
+import threading
+import json
+from utils import thread_data, stdout_cmd, stderr
+
+
+def handle_client(client_socket):
+    global thread_data
+    while True:
+        try:
+            data = client_socket.recv(4096).decode('utf-8')
+            if not data:
+                break
+            data = json.loads(data)
+
+            if data['command'] == 'stop':
+                if thread_data.status == 'running':
+                    thread_data.status = 'stop'
+                    break
+        except Exception as e:
+            stderr(f'Communication error: {e}')
+            break
+    
+    thread_data.status = 'stop'
+    client_socket.close()
+
+
+def start_server(port: int):
+    server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    server.bind(('localhost', port))
+    server.listen(1)
+    stdout_cmd('ready')
+
+    client, addr = server.accept()
+    client_handler = threading.Thread(target=handle_client, args=(client,))
+    client_handler.daemon = True
+    client_handler.start()
--- a/engine/utils/thdata.py
+++ b/engine/utils/thdata.py
@@ -0,0 +1,5 @@
+class ThreadData:
+    def __init__(self):
+        self.status = "running"
+
+thread_data = ThreadData()