From e4f937e6b60ed1aba0bd1abdf47c8c2ac90a2e12 Mon Sep 17 00:00:00 2001 From: himeditator Date: Mon, 28 Jul 2025 21:44:49 +0800 Subject: [PATCH] =?UTF-8?q?feat(engine):=20=E4=BC=98=E5=8C=96=E5=AD=97?= =?UTF-8?q?=E5=B9=95=E5=BC=95=E6=93=8E=E9=80=9A=E4=BF=A1=E5=92=8C=E6=8E=A7?= =?UTF-8?q?=E5=88=B6=E9=80=BB=E8=BE=91=EF=BC=8C=E4=BC=98=E5=8C=96=E7=AA=97?= =?UTF-8?q?=E5=8F=A3=E4=BF=A1=E6=81=AF=E5=B1=95=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 优化错误处理和引擎重启逻辑 - 添加字幕引擎强制终止功能 - 调整通知和错误提示的显示位置 - 优化日志记录精度到毫秒级 --- docs/CHANGELOG.md | 6 +- docs/api-docs/caption-engine.md | 72 ++++++++++---- engine/audio2text/gummy.py | 7 +- engine/main.py | 28 ++++-- engine/utils/audioprcs.py | 2 +- engine/utils/server.py | 9 +- src/main/utils/CaptionEngine.ts | 94 +++++++++++++------ src/main/utils/Log.ts | 3 +- src/renderer/src/components/CaptionStyle.vue | 3 +- src/renderer/src/components/EngineControl.vue | 1 + src/renderer/src/components/EngineStatus.vue | 14 ++- src/renderer/src/stores/engineControl.ts | 4 +- 12 files changed, 171 insertions(+), 72 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index a1afb67..0f51d25 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -123,9 +123,13 @@ - 新增字幕记录排序功能,可选择字幕记录正序或倒叙显示 +### 优化体验 + +- 交换窗口界面信息和错误提示弹窗的位置,防止提示信息挡住操作 + ### 项目优化 - 重构字幕引擎,提示字幕引擎代码的可扩展性和可读性 -- 合并 Gummy 和 Vosk 引擎为单个可执行文件 +- 合并 Gummy 和 Vosk 引擎为单个可执行文件,减小软件体积 - 字幕引擎和主程序添加 WebScoket 通信,完全避免字幕引擎成为孤儿进程 diff --git a/docs/api-docs/caption-engine.md b/docs/api-docs/caption-engine.md index d284b5a..52799b5 100644 --- a/docs/api-docs/caption-engine.md +++ b/docs/api-docs/caption-engine.md @@ -1,17 +1,63 @@ # caption engine api-doc -本文档主要 Electron 主进程和字幕引擎进程的通信约定。 +本文档主要介绍字幕引擎和 Electron 主进程进程的通信约定。 ## 原理说明 -本项目的 Python 进程通过标准输出向 Electron 主进程发送数据。 +本项目的 Python 进程通过标准输出向 Electron 主进程发送数据。Python 进程标准输出 (`sys.stdout`) 的内容一定为一行一行的字符串。且每行字符串均可以解释为一个 JSON 对象。每个 JSON 对象一定有 `command` 参数。 -Python 进程标准输出 (`sys.stdout`) 的内容一定为一行一行的字符串。且每行字符串均可以解释为一个 JSON 对象。每个 JSON 对象一定有 `command` 参数。 +Electron 主进程通过 WebSocket 向 Python 进程发送数据。发送的数据均是转化为字符串的对象,对象格式一定为: -## 输出约定 +```js +{ + command: string, + content: string +} +``` + +## 标准输出约定 + +> 数据传递方向:字幕引擎进程 => Electron 主进程 当 JSON 对象的 `command` 参数为下列值时,表示的对应的含义: +### `connect` + +```js +{ + command: "connect", + content: "" +} +``` + +字幕引擎 WebSocket 服务已经准备好,命令 Electron 主进程连接字幕引擎 WebSocket 服务 + +### `kill` + +```js +{ + command: "connect", + content: "" +} +``` + +命令 Electron 主进程强制结束字幕引擎进程。 + +### `caption` + +```js +{ + command: "caption", + index: number, + time_s: string, + time_t: string, + text: string, + translation: string +} +``` + +Python 端监听到的音频流转换为的字幕数据。 + ### `print` ```js @@ -45,18 +91,12 @@ Python 端打印的提示信息,比起 `print`,该信息更希望 Electron Gummy 字幕引擎结束时打印计费消耗信息。 +## WebSocket -### `caption` +> 数据传递方向:Electron 主进程 => 字幕引擎进程 -```js -{ - command: "caption", - index: number, - time_s: string, - time_t: string, - text: string, - translation: string -} -``` +当 JSON 对象的 `command` 参数为下列值时,表示的对应的含义: -Python 端监听到的音频流转换为的字幕数据。 \ No newline at end of file +### `stop` + +命令当前字幕引擎停止监听并结束任务。 \ No newline at end of file diff --git a/engine/audio2text/gummy.py b/engine/audio2text/gummy.py index 1f503b8..7d95b9b 100644 --- a/engine/audio2text/gummy.py +++ b/engine/audio2text/gummy.py @@ -6,7 +6,7 @@ from dashscope.audio.asr import ( ) import dashscope from datetime import datetime -from utils import stdout_cmd, stdout_obj +from utils import stdout_cmd, stdout_obj, stderr class Callback(TranslationRecognizerCallback): @@ -96,4 +96,7 @@ class GummyRecognizer: def stop(self): """停止 Gummy 引擎""" - self.translator.stop() + try: + self.translator.stop() + except Exception: + return diff --git a/engine/main.py b/engine/main.py index 09583e5..92ef24d 100644 --- a/engine/main.py +++ b/engine/main.py @@ -1,5 +1,5 @@ import argparse -from utils import stdout_cmd +from utils import stdout_cmd, stderr from utils import thread_data, start_server from utils import merge_chunk_channels, resample_chunk_mono from audio2text import InvalidParameter, GummyRecognizer @@ -8,6 +8,7 @@ from sysaudio import AudioStream def main_gummy(s: str, t: str, a: int, c: int, k: str): + global thread_data stream = AudioStream(a, c) if t == 'none': engine = GummyRecognizer(stream.RATE, s, None, k) @@ -17,6 +18,7 @@ def main_gummy(s: str, t: str, a: int, c: int, k: str): stream.open_stream() engine.start() + restart_count = 0 while thread_data.status == "running": try: chunk = stream.read_chunk() @@ -24,18 +26,22 @@ def main_gummy(s: str, t: str, a: int, c: int, k: str): chunk_mono = merge_chunk_channels(chunk, stream.CHANNELS) try: engine.send_audio_frame(chunk_mono) - except InvalidParameter: - stdout_cmd('info', 'Gummy engine stopped, restart engine') - engine.start() - engine.send_audio_frame(chunk_mono) + except InvalidParameter as e: + restart_count += 1 + if restart_count > 8: + stderr(str(e)) + thread_data.status = "kill" + break + else: + stdout_cmd('info', f'Gummy engine stopped, trying to restart #{restart_count}') except KeyboardInterrupt: break stream.close_stream() engine.stop() - def main_vosk(a: int, c: int, m: str): + global thread_data stream = AudioStream(a, c) engine = VoskRecognizer(m) @@ -68,9 +74,8 @@ if __name__ == "__main__": parser.add_argument('-k', '--api_key', default='', help='API KEY for Gummy model') # vosk parser.add_argument('-m', '--model_path', default='', help='The path to the vosk model.') - # for test - args = parser.parse_args() - + + args = parser.parse_args() if int(args.port) == 0: thread_data.status = "running" else: @@ -91,4 +96,7 @@ if __name__ == "__main__": args.model_path ) else: - raise ValueError('Invalid caption engine specified.') \ No newline at end of file + raise ValueError('Invalid caption engine specified.') + + if thread_data.status == "kill": + stdout_cmd('kill') \ No newline at end of file diff --git a/engine/utils/audioprcs.py b/engine/utils/audioprcs.py index 7f24563..a4362a2 100644 --- a/engine/utils/audioprcs.py +++ b/engine/utils/audioprcs.py @@ -1,6 +1,6 @@ import samplerate import numpy as np -import numpy.core.multiarray +import numpy.core.multiarray # do not remove def merge_chunk_channels(chunk: bytes, channels: int) -> bytes: """ diff --git a/engine/utils/server.py b/engine/utils/server.py index 48fe3ce..9026e2e 100644 --- a/engine/utils/server.py +++ b/engine/utils/server.py @@ -6,7 +6,7 @@ from utils import thread_data, stdout_cmd, stderr def handle_client(client_socket): global thread_data - while True: + while thread_data.status == 'running': try: data = client_socket.recv(4096).decode('utf-8') if not data: @@ -14,9 +14,8 @@ def handle_client(client_socket): data = json.loads(data) if data['command'] == 'stop': - if thread_data.status == 'running': - thread_data.status = 'stop' - break + thread_data.status = 'stop' + break except Exception as e: stderr(f'Communication error: {e}') break @@ -29,7 +28,7 @@ def start_server(port: int): server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) server.bind(('localhost', port)) server.listen(1) - stdout_cmd('ready') + stdout_cmd('connect') client, addr = server.accept() client_handler = threading.Thread(target=handle_client, args=(client,)) diff --git a/src/main/utils/CaptionEngine.ts b/src/main/utils/CaptionEngine.ts index 6bd3408..80e5ad7 100644 --- a/src/main/utils/CaptionEngine.ts +++ b/src/main/utils/CaptionEngine.ts @@ -1,4 +1,4 @@ -import { spawn } from 'child_process' +import { exec, spawn } from 'child_process' import { app } from 'electron' import { is } from '@electron-toolkit/utils' import path from 'path' @@ -13,11 +13,11 @@ export class CaptionEngine { command: string[] = [] process: any | undefined client: net.Socket | undefined - status: 'running' | 'stopping' | 'stopped' = 'stopped' + status: 'running' | 'starting' | 'stopping' | 'stopped' = 'stopped' private getApp(): boolean { if (allConfig.controls.customized) { - Log.info('Using customized engine') + Log.info('Using customized caption engine') this.appPath = allConfig.controls.customizedApp this.command = allConfig.controls.customizedCommand.split(' ') } @@ -30,14 +30,14 @@ export class CaptionEngine { } this.command = [] if (is.dev) { - // this.appPath = path.join( - // app.getAppPath(), 'engine', - // 'subenv', 'Scripts', 'python.exe' - // ) - // this.command.push(path.join( - // app.getAppPath(), 'engine', 'main.py' - // )) - this.appPath = path.join(app.getAppPath(), 'engine', 'dist', 'main.exe') + this.appPath = path.join( + app.getAppPath(), 'engine', + 'subenv', 'Scripts', 'python.exe' + ) + this.command.push(path.join( + app.getAppPath(), 'engine', 'main.py' + )) + // this.appPath = path.join(app.getAppPath(), 'engine', 'dist', 'main.exe') } else { this.appPath = path.join(process.resourcesPath, 'engine', 'main.exe') @@ -73,6 +73,14 @@ export class CaptionEngine { Log.info('Connected to caption engine server'); }); this.status = 'running' + allConfig.controls.engineEnabled = true + if(controlWindow.window){ + allConfig.sendControls(controlWindow.window) + controlWindow.window.webContents.send( + 'control.engine.started', + this.process.pid + ) + } } public sendCommand(command: string, content: string = "") { @@ -93,19 +101,11 @@ export class CaptionEngine { if(!this.getApp()){ return } this.process = spawn(this.appPath, this.command) - Log.info('Caption Engine Started, PID:', this.process.pid) - - allConfig.controls.engineEnabled = true - if(controlWindow.window){ - allConfig.sendControls(controlWindow.window) - controlWindow.window.webContents.send( - 'control.engine.started', - this.process.pid - ) - } - + this.status = 'starting' + Log.info('Caption Engine Starting, PID:', this.process.pid) + this.process.stdout.on('data', (data: any) => { - const lines = data.toString().split('\n'); + const lines = data.toString().split('\n') lines.forEach((line: string) => { if (line.trim()) { try { @@ -120,13 +120,18 @@ export class CaptionEngine { }); this.process.stderr.on('data', (data: any) => { - if(this.status === 'stopping') return - controlWindow.sendErrorMessage(i18n('engine.error') + data) - Log.error(`Engine Error: ${data}`); + const lines = data.toString().split('\n') + lines.forEach((line: string) => { + if(line.trim()){ + controlWindow.sendErrorMessage(/*i18n('engine.error') +*/ line) + console.error(line) + } + }) }); this.process.on('close', (code: any) => { this.process = undefined; + this.client = undefined allConfig.controls.engineEnabled = false if(controlWindow.window){ allConfig.sendControls(controlWindow.window) @@ -150,25 +155,52 @@ export class CaptionEngine { this.status = 'stopping' Log.info('Caption engine process stopping...') } + + public kill(){ + if(this.status !== 'running'){ + Log.warn('Engine is not running, current status:', this.status) + return + } + if (this.process.pid) { + Log.warn('Trying to kill engine process, PID:', this.process.pid) + if(this.client){ + this.client.destroy() + this.client = undefined + } + let cmd = `kill ${this.process.pid}`; + if (process.platform === "win32") { + cmd = `taskkill /pid ${this.process.pid} /t /f` + } + exec(cmd) + } + this.status = 'stopping' + } } function handleEngineData(data: any) { - if(data.command === 'ready'){ + if(data.command === 'connect'){ captionEngine.connect() } + else if(data.command === 'kill') { + if(captionEngine.status !== 'stopped') { + Log.warn('Error occurred, trying to kill Gummy engine...') + captionEngine.kill() + } + } else if(data.command === 'caption') { allConfig.updateCaptionLog(data); } else if(data.command === 'print') { - console.log(data.content) - // Log.info('Engine Print:', data.content) + Log.info('Engine Print:', data.content) } else if(data.command === 'info') { Log.info('Engine Info:', data.content) } else if(data.command === 'usage') { - console.error(data.content) - // Log.info('Gummy Engine Usage: ', data.content) + Log.info('Gummy Engine Usage: ', data.content) + } + else { + Log.warn('Unknown command:', data) } } diff --git a/src/main/utils/Log.ts b/src/main/utils/Log.ts index 93f1022..c226c08 100644 --- a/src/main/utils/Log.ts +++ b/src/main/utils/Log.ts @@ -3,7 +3,8 @@ function getTimeString() { const HH = String(now.getHours()).padStart(2, '0') const MM = String(now.getMinutes()).padStart(2, '0') const SS = String(now.getSeconds()).padStart(2, '0') - return `${HH}:${MM}:${SS}` + const MS = String(now.getMilliseconds()).padStart(3, '0') + return `${HH}:${MM}:${SS}.${MS}` } export class Log { diff --git a/src/renderer/src/components/CaptionStyle.vue b/src/renderer/src/components/CaptionStyle.vue index 4331d44..5b90392 100644 --- a/src/renderer/src/components/CaptionStyle.vue +++ b/src/renderer/src/components/CaptionStyle.vue @@ -282,7 +282,8 @@ function applyStyle(){ captionStyle.sendStylesChange(); - notification.open({ + notification.open({ + placement: 'topLeft', message: t('noti.styleChange'), description: t('noti.styleInfo') }); diff --git a/src/renderer/src/components/EngineControl.vue b/src/renderer/src/components/EngineControl.vue index 08ffa5a..ab610fc 100644 --- a/src/renderer/src/components/EngineControl.vue +++ b/src/renderer/src/components/EngineControl.vue @@ -164,6 +164,7 @@ function applyChange(){ engineControl.sendControlsChange() notification.open({ + placement: 'topLeft', message: t('noti.engineChange'), description: t('noti.changeInfo') }); diff --git a/src/renderer/src/components/EngineStatus.vue b/src/renderer/src/components/EngineStatus.vue index f838d72..b5f383d 100644 --- a/src/renderer/src/components/EngineStatus.vue +++ b/src/renderer/src/components/EngineStatus.vue @@ -61,12 +61,14 @@ >{{ $t('status.openCaption') }} {{ $t('status.startEngine') }} {{ $t('status.stopEngine') }} @@ -119,13 +121,14 @@