mirror of
https://github.com/HiMeditator/auto-caption.git
synced 2026-02-14 20:02:03 +08:00
feat(engine): 重构字幕引擎并实现 WebSocket 通信
- 重构了 Gummy 和 Vosk 字幕引擎的代码,提高了可扩展性和可读性 - 合并 Gummy 和 Vosk 引擎为单个可执行文件 - 实现了字幕引擎和主程序之间的 WebSocket 通信,避免了孤儿进程问题
This commit is contained in:
@@ -1,2 +1,4 @@
|
||||
from .process import merge_chunk_channels, resample_chunk_mono, resample_mono_chunk
|
||||
from .sysout import stdout, stdout_cmd, stdout_obj, stderr
|
||||
from .audioprcs import merge_chunk_channels, resample_chunk_mono, resample_mono_chunk
|
||||
from .sysout import stdout, stdout_cmd, stdout_obj, stderr
|
||||
from .thdata import thread_data
|
||||
from .server import start_server
|
||||
@@ -1,6 +1,6 @@
|
||||
import samplerate
|
||||
import numpy as np
|
||||
|
||||
import numpy.core.multiarray
|
||||
|
||||
def merge_chunk_channels(chunk: bytes, channels: int) -> bytes:
|
||||
"""
|
||||
@@ -13,6 +13,7 @@ def merge_chunk_channels(chunk: bytes, channels: int) -> bytes:
|
||||
Returns:
|
||||
单通道音频数据块
|
||||
"""
|
||||
if channels == 1: return chunk
|
||||
# (length * channels,)
|
||||
chunk_np = np.frombuffer(chunk, dtype=np.int16)
|
||||
# (length, channels)
|
||||
@@ -37,13 +38,17 @@ def resample_chunk_mono(chunk: bytes, channels: int, orig_sr: int, target_sr: in
|
||||
Return:
|
||||
单通道音频数据块
|
||||
"""
|
||||
# (length * channels,)
|
||||
chunk_np = np.frombuffer(chunk, dtype=np.int16)
|
||||
# (length, channels)
|
||||
chunk_np = chunk_np.reshape(-1, channels)
|
||||
# (length,)
|
||||
chunk_mono_f = np.mean(chunk_np.astype(np.float32), axis=1)
|
||||
chunk_mono = chunk_mono_f.astype(np.int16)
|
||||
if channels == 1:
|
||||
chunk_mono = chunk
|
||||
else:
|
||||
# (length * channels,)
|
||||
chunk_np = np.frombuffer(chunk, dtype=np.int16)
|
||||
# (length, channels)
|
||||
chunk_np = chunk_np.reshape(-1, channels)
|
||||
# (length,)
|
||||
chunk_mono_f = np.mean(chunk_np.astype(np.float32), axis=1)
|
||||
chunk_mono = chunk_mono_f.astype(np.int16)
|
||||
|
||||
ratio = target_sr / orig_sr
|
||||
chunk_mono_r = samplerate.resample(chunk_mono, ratio, converter_type=mode)
|
||||
chunk_mono_r = np.round(chunk_mono_r).astype(np.int16)
|
||||
37
engine/utils/server.py
Normal file
37
engine/utils/server.py
Normal file
@@ -0,0 +1,37 @@
|
||||
import socket
|
||||
import threading
|
||||
import json
|
||||
from utils import thread_data, stdout_cmd, stderr
|
||||
|
||||
|
||||
def handle_client(client_socket):
|
||||
global thread_data
|
||||
while True:
|
||||
try:
|
||||
data = client_socket.recv(4096).decode('utf-8')
|
||||
if not data:
|
||||
break
|
||||
data = json.loads(data)
|
||||
|
||||
if data['command'] == 'stop':
|
||||
if thread_data.status == 'running':
|
||||
thread_data.status = 'stop'
|
||||
break
|
||||
except Exception as e:
|
||||
stderr(f'Communication error: {e}')
|
||||
break
|
||||
|
||||
thread_data.status = 'stop'
|
||||
client_socket.close()
|
||||
|
||||
|
||||
def start_server(port: int):
|
||||
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
server.bind(('localhost', port))
|
||||
server.listen(1)
|
||||
stdout_cmd('ready')
|
||||
|
||||
client, addr = server.accept()
|
||||
client_handler = threading.Thread(target=handle_client, args=(client,))
|
||||
client_handler.daemon = True
|
||||
client_handler.start()
|
||||
5
engine/utils/thdata.py
Normal file
5
engine/utils/thdata.py
Normal file
@@ -0,0 +1,5 @@
|
||||
class ThreadData:
|
||||
def __init__(self):
|
||||
self.status = "running"
|
||||
|
||||
thread_data = ThreadData()
|
||||
Reference in New Issue
Block a user