feat(engine): 优化字幕引擎通信和控制逻辑,优化窗口信息展示

- 优化错误处理和引擎重启逻辑
- 添加字幕引擎强制终止功能
- 调整通知和错误提示的显示位置
- 优化日志记录精度到毫秒级
This commit is contained in:
himeditator
2025-07-28 21:44:49 +08:00
parent cd9f3a847d
commit e4f937e6b6
12 changed files with 171 additions and 72 deletions

View File

@@ -6,7 +6,7 @@ from dashscope.audio.asr import (
)
import dashscope
from datetime import datetime
from utils import stdout_cmd, stdout_obj
from utils import stdout_cmd, stdout_obj, stderr
class Callback(TranslationRecognizerCallback):
@@ -96,4 +96,7 @@ class GummyRecognizer:
def stop(self):
"""停止 Gummy 引擎"""
self.translator.stop()
try:
self.translator.stop()
except Exception:
return

View File

@@ -1,5 +1,5 @@
import argparse
from utils import stdout_cmd
from utils import stdout_cmd, stderr
from utils import thread_data, start_server
from utils import merge_chunk_channels, resample_chunk_mono
from audio2text import InvalidParameter, GummyRecognizer
@@ -8,6 +8,7 @@ from sysaudio import AudioStream
def main_gummy(s: str, t: str, a: int, c: int, k: str):
global thread_data
stream = AudioStream(a, c)
if t == 'none':
engine = GummyRecognizer(stream.RATE, s, None, k)
@@ -17,6 +18,7 @@ def main_gummy(s: str, t: str, a: int, c: int, k: str):
stream.open_stream()
engine.start()
restart_count = 0
while thread_data.status == "running":
try:
chunk = stream.read_chunk()
@@ -24,18 +26,22 @@ def main_gummy(s: str, t: str, a: int, c: int, k: str):
chunk_mono = merge_chunk_channels(chunk, stream.CHANNELS)
try:
engine.send_audio_frame(chunk_mono)
except InvalidParameter:
stdout_cmd('info', 'Gummy engine stopped, restart engine')
engine.start()
engine.send_audio_frame(chunk_mono)
except InvalidParameter as e:
restart_count += 1
if restart_count > 8:
stderr(str(e))
thread_data.status = "kill"
break
else:
stdout_cmd('info', f'Gummy engine stopped, trying to restart #{restart_count}')
except KeyboardInterrupt:
break
stream.close_stream()
engine.stop()
def main_vosk(a: int, c: int, m: str):
global thread_data
stream = AudioStream(a, c)
engine = VoskRecognizer(m)
@@ -68,9 +74,8 @@ if __name__ == "__main__":
parser.add_argument('-k', '--api_key', default='', help='API KEY for Gummy model')
# vosk
parser.add_argument('-m', '--model_path', default='', help='The path to the vosk model.')
# for test
args = parser.parse_args()
args = parser.parse_args()
if int(args.port) == 0:
thread_data.status = "running"
else:
@@ -91,4 +96,7 @@ if __name__ == "__main__":
args.model_path
)
else:
raise ValueError('Invalid caption engine specified.')
raise ValueError('Invalid caption engine specified.')
if thread_data.status == "kill":
stdout_cmd('kill')

View File

@@ -1,6 +1,6 @@
import samplerate
import numpy as np
import numpy.core.multiarray
import numpy.core.multiarray # do not remove
def merge_chunk_channels(chunk: bytes, channels: int) -> bytes:
"""

View File

@@ -6,7 +6,7 @@ from utils import thread_data, stdout_cmd, stderr
def handle_client(client_socket):
global thread_data
while True:
while thread_data.status == 'running':
try:
data = client_socket.recv(4096).decode('utf-8')
if not data:
@@ -14,9 +14,8 @@ def handle_client(client_socket):
data = json.loads(data)
if data['command'] == 'stop':
if thread_data.status == 'running':
thread_data.status = 'stop'
break
thread_data.status = 'stop'
break
except Exception as e:
stderr(f'Communication error: {e}')
break
@@ -29,7 +28,7 @@ def start_server(port: int):
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server.bind(('localhost', port))
server.listen(1)
stdout_cmd('ready')
stdout_cmd('connect')
client, addr = server.accept()
client_handler = threading.Thread(target=handle_client, args=(client,))