diff --git a/docs/api-docs/caption-engine.md b/docs/api-docs/caption-engine.md index 461db42..d284b5a 100644 --- a/docs/api-docs/caption-engine.md +++ b/docs/api-docs/caption-engine.md @@ -32,7 +32,19 @@ Python 进程标准输出 (`sys.stdout`) 的内容一定为一行一行的字符 } ``` -Python 端打印的提示信息。 +Python 端打印的提示信息,比起 `print`,该信息更希望 Electron 端的关注。 + +### `usage` + +```js +{ + command: "usage", + content: string +} +``` + +Gummy 字幕引擎结束时打印计费消耗信息。 + ### `caption` @@ -42,7 +54,6 @@ Python 端打印的提示信息。 index: number, time_s: string, time_t: string, - end: boolean, text: string, translation: string } diff --git a/engine/audio2text/gummy.py b/engine/audio2text/gummy.py index 072f5a2..e49f47b 100644 --- a/engine/audio2text/gummy.py +++ b/engine/audio2text/gummy.py @@ -15,18 +15,20 @@ class Callback(TranslationRecognizerCallback): """ def __init__(self): super().__init__() + self.index = 0 self.usage = 0 self.cur_id = -1 - self.index = 0 self.time_str = '' def on_open(self) -> None: + self.usage = 0 self.cur_id = -1 self.time_str = '' stdout_cmd('info', 'Gummy translator started.') def on_close(self) -> None: stdout_cmd('info', 'Gummy translator closed.') + stdout_cmd('usage', str(self.usage)) def on_event( self, @@ -46,7 +48,6 @@ class Callback(TranslationRecognizerCallback): caption['index'] = self.index caption['time_s'] = self.time_str caption['time_t'] = datetime.now().strftime('%H:%M:%S.%f')[:-3] - caption['end'] = transcription_result.is_sentence_end caption['text'] = transcription_result.text caption['translation'] = "" @@ -57,7 +58,8 @@ class Callback(TranslationRecognizerCallback): if usage: self.usage += usage['duration'] - stdout_obj(caption) + if 'text' in caption: + stdout_obj(caption) class GummyTranslator: @@ -88,7 +90,7 @@ class GummyTranslator: self.translator.start() def send_audio_frame(self, data): - """发送音频帧""" + """发送音频帧,擎将自动识别并将识别结果输出到标准输出中""" self.translator.send_audio_frame(data) def stop(self): diff --git a/engine/audio2text/vosk.py b/engine/audio2text/vosk.py new file mode 100644 index 0000000..7c34459 --- /dev/null +++ b/engine/audio2text/vosk.py @@ -0,0 +1,59 @@ +import json +from datetime import datetime + +from vosk import Model, KaldiRecognizer, SetLogLevel +from utils import stdout_obj + +class VoskRecognizer: + """ + 使用 Vosk 引擎流式处理的音频数据,并在标准输出中输出与 Auto Caption 软件可读取的 JSON 字符串数据 + + 初始化参数: + model_path: Vosk 识别模型路径 + """ + def __int__(self, model_path: str): + SetLogLevel(-1) + if model_path.startswith('"'): + model_path = model_path[1:] + if model_path.endswith('"'): + model_path = model_path[:-1] + self.model_path = model_path + self.time_str = '' + self.cur_id = 0 + self.prev_content = '' + + self.model = Model(self.model_path) + self.recognizer = KaldiRecognizer(self.model, 16000) + + def send_audio_frame(self, data: bytes): + """ + 发送音频帧给 Vosk 引擎,引擎将自动识别并将识别结果输出到标准输出中 + + Args: + data: 音频帧数据,采样率必须为 16000Hz + """ + caption = {} + caption['command'] = 'caption' + caption['translation'] = '' + + if self.recognizer.AcceptWaveform(data): + content = json.loads(self.recognizer.Result()).get('text', '') + caption['index'] = self.cur_id + caption['text'] = content + caption['time_s'] = self.time_str + caption['time_t'] = datetime.now().strftime('%H:%M:%S.%f')[:-3] + self.prev_content = '' + self.cur_id += 1 + else: + content = json.loads(self.recognizer.PartialResult()).get('partial', '') + if content == '' or content == self.prev_content: + return + if self.prev_content == '': + self.time_str = datetime.now().strftime('%H:%M:%S.%f')[:-3] + caption['index'] = self.cur_id + caption['text'] = content + caption['time_s'] = self.time_str + caption['time_t'] = datetime.now().strftime('%H:%M:%S.%f')[:-3] + self.prev_content = content + + stdout_obj(caption) diff --git a/engine/main-vosk.py b/engine/main-vosk.py index a095909..5a3fb32 100644 --- a/engine/main-vosk.py +++ b/engine/main-vosk.py @@ -49,6 +49,7 @@ def convert_audio_to_text(audio_type, chunk_rate, model_path): continue if prev_content == '': time_str = datetime.now().strftime('%H:%M:%S.%f')[:-3] + caption['command'] = 'caption' caption['index'] = cur_id caption['text'] = content caption['time_s'] = time_str diff --git a/engine/main.py b/engine/main.py new file mode 100644 index 0000000..6ea3d53 --- /dev/null +++ b/engine/main.py @@ -0,0 +1,37 @@ +import argparse + +def gummy_engine(s, t, a, c, k): + pass + +def vosk_engine(a, c, m): + pass + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Convert system audio stream to text') + # both + parser.add_argument('-e', '--caption_engine', default='gummy', help='Caption engine: gummy or vosk') + parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output, 1 for input') + parser.add_argument('-c', '--chunk_rate', default=20, help='Number of audio stream chunks collected per second') + # gummy + parser.add_argument('-s', '--source_language', default='en', help='Source language code') + parser.add_argument('-t', '--target_language', default='zh', help='Target language code') + parser.add_argument('-k', '--api_key', default='', help='API KEY for Gummy model') + # vosk + parser.add_argument('-m', '--model_path', default='', help='The path to the vosk model.') + args = parser.parse_args() + if args.caption_engine == 'gummy': + gummy_engine( + args.source_language, + args.target_language, + int(args.audio_type), + int(args.chunk_rate), + args.api_key + ) + elif args.caption_engine == 'vosk': + vosk_engine( + int(args.audio_type), + int(args.chunk_rate), + args.model_path + ) + else: + raise ValueError('Invalid caption engine specified.') \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index 7bb8f63..8a9850e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -22,6 +22,7 @@ "@electron-toolkit/eslint-config-ts": "^3.0.0", "@electron-toolkit/tsconfig": "^1.0.1", "@types/node": "^22.14.1", + "@types/pidusage": "^2.0.5", "@vitejs/plugin-vue": "^5.2.3", "electron": "^35.1.5", "electron-builder": "^25.1.8", @@ -2296,6 +2297,13 @@ "undici-types": "~6.21.0" } }, + "node_modules/@types/pidusage": { + "version": "2.0.5", + "resolved": "https://registry.npmmirror.com/@types/pidusage/-/pidusage-2.0.5.tgz", + "integrity": "sha512-MIiyZI4/MK9UGUXWt0jJcCZhVw7YdhBuTOuqP/BjuLDLZ2PmmViMIQgZiWxtaMicQfAz/kMrZ5T7PKxFSkTeUA==", + "dev": true, + "license": "MIT" + }, "node_modules/@types/plist": { "version": "3.0.5", "resolved": "https://registry.npmmirror.com/@types/plist/-/plist-3.0.5.tgz", diff --git a/package.json b/package.json index f2ed497..7a8cef4 100644 --- a/package.json +++ b/package.json @@ -13,7 +13,7 @@ "typecheck:web": "vue-tsc --noEmit -p tsconfig.web.json --composite false", "typecheck": "npm run typecheck:node && npm run typecheck:web", "start": "electron-vite preview", - "dev": "electron-vite dev", + "dev": "chcp 65001 && electron-vite dev", "build": "npm run typecheck && electron-vite build", "postinstall": "electron-builder install-app-deps", "build:unpack": "npm run build && electron-builder --dir", @@ -35,6 +35,7 @@ "@electron-toolkit/eslint-config-ts": "^3.0.0", "@electron-toolkit/tsconfig": "^1.0.1", "@types/node": "^22.14.1", + "@types/pidusage": "^2.0.5", "@vitejs/plugin-vue": "^5.2.3", "electron": "^35.1.5", "electron-builder": "^25.1.8", diff --git a/src/main/utils/AllConfig.ts b/src/main/utils/AllConfig.ts index 8bfd015..d041f1d 100644 --- a/src/main/utils/AllConfig.ts +++ b/src/main/utils/AllConfig.ts @@ -2,6 +2,7 @@ import { UILanguage, UITheme, Styles, Controls, CaptionItem, FullConfig } from '../types' +import { Log } from './Log' import { app, BrowserWindow } from 'electron' import * as path from 'path' import * as fs from 'fs' @@ -48,6 +49,7 @@ class AllConfig { uiTheme: UITheme = 'system'; styles: Styles = {...defaultStyles}; controls: Controls = {...defaultControls}; + lastLogIndex: number = -1; captionLog: CaptionItem[] = []; constructor() {} @@ -61,7 +63,7 @@ class AllConfig { if(config.leftBarWidth) this.leftBarWidth = config.leftBarWidth if(config.styles) this.setStyles(config.styles) if(config.controls) this.setControls(config.controls) - console.log('[INFO] Read Config from:', configPath) + Log.info('Read Config from:', configPath) } } @@ -75,7 +77,7 @@ class AllConfig { } const configPath = path.join(app.getPath('userData'), 'config.json') fs.writeFileSync(configPath, JSON.stringify(config, null, 2)) - console.log('[INFO] Write Config to:', configPath) + Log.info('Write Config to:', configPath) } public getFullConfig(): FullConfig { @@ -96,7 +98,7 @@ class AllConfig { this.styles[key] = args[key] } } - console.log('[INFO] Set Styles:', this.styles) + Log.info('Set Styles:', this.styles) } public resetStyles() { @@ -105,7 +107,7 @@ class AllConfig { public sendStyles(window: BrowserWindow) { window.webContents.send('both.styles.set', this.styles) - console.log(`[INFO] Send Styles to #${window.id}:`, this.styles) + Log.info(`Send Styles to #${window.id}:`, this.styles) } public setControls(args: Object) { @@ -116,27 +118,28 @@ class AllConfig { } } this.controls.engineEnabled = engineEnabled - console.log('[INFO] Set Controls:', this.controls) + Log.info('Set Controls:', this.controls) } public sendControls(window: BrowserWindow) { window.webContents.send('control.controls.set', this.controls) - console.log(`[INFO] Send Controls to #${window.id}:`, this.controls) + Log.info(`Send Controls to #${window.id}:`, this.controls) } public updateCaptionLog(log: CaptionItem) { let command: 'add' | 'upd' = 'add' if( this.captionLog.length && - this.captionLog[this.captionLog.length - 1].index === log.index && - this.captionLog[this.captionLog.length - 1].time_s === log.time_s + this.lastLogIndex === log.index ) { this.captionLog.splice(this.captionLog.length - 1, 1, log) command = 'upd' } else { this.captionLog.push(log) + this.lastLogIndex = log.index } + this.captionLog[this.captionLog.length - 1].index = this.captionLog.length for(const window of BrowserWindow.getAllWindows()){ this.sendCaptionLog(window, command) } diff --git a/src/main/utils/CaptionEngine.ts b/src/main/utils/CaptionEngine.ts index 0c03160..8f102b5 100644 --- a/src/main/utils/CaptionEngine.ts +++ b/src/main/utils/CaptionEngine.ts @@ -5,6 +5,7 @@ import path from 'path' import { controlWindow } from '../ControlWindow' import { allConfig } from './AllConfig' import { i18n } from '../i18n' +import { Log } from './Log' export class CaptionEngine { appPath: string = '' @@ -14,7 +15,7 @@ export class CaptionEngine { private getApp(): boolean { if (allConfig.controls.customized && allConfig.controls.customizedApp) { - console.log('[INFO] Using customized engine') + Log.info('Using customized engine') this.appPath = allConfig.controls.customizedApp this.command = allConfig.controls.customizedCommand.split(' ') } @@ -25,9 +26,7 @@ export class CaptionEngine { return false } let gummyName = 'main-gummy' - if (process.platform === 'win32') { - gummyName += '.exe' - } + if (process.platform === 'win32') { gummyName += '.exe' } this.command = [] if (is.dev) { this.appPath = path.join( @@ -56,31 +55,33 @@ export class CaptionEngine { else if(allConfig.controls.engine === 'vosk'){ allConfig.controls.customized = false let voskName = 'main-vosk' - if (process.platform === 'win32') { - voskName += '.exe' - } + if (process.platform === 'win32') { voskName += '.exe' } + this.command = [] if (is.dev) { this.appPath = path.join( - app.getAppPath(), - 'engine', 'dist', voskName + app.getAppPath(), 'engine', + 'subenv', 'Scripts', 'python.exe' ) + this.command.push(path.join( + app.getAppPath(), 'engine', 'main-vosk.py' + )) } else { this.appPath = path.join( process.resourcesPath, 'engine', voskName ) } - this.command = [] this.command.push('-a', allConfig.controls.audio ? '1' : '0') this.command.push('-m', `"${allConfig.controls.modelPath}"`) } - console.log('[INFO] Engine Path:', this.appPath) - console.log('[INFO] Engine Command:', this.command) + Log.info('Engine Path:', this.appPath) + Log.info('Engine Command:', this.command) return true } public start() { if (this.processStatus !== 'stopped') { + Log.warn('Caption engine status is not stopped, cannot start') return } if(!this.getApp()){ return } @@ -90,12 +91,12 @@ export class CaptionEngine { } catch (e) { controlWindow.sendErrorMessage(i18n('engine.start.error') + e) - console.error('[ERROR] Error starting subprocess:', e) + Log.error('Error starting engine:', e) return } this.processStatus = 'running' - console.log('[INFO] Caption Engine Started, PID:', this.process.pid) + Log.info('Caption Engine Started, PID:', this.process.pid) allConfig.controls.engineEnabled = true if(controlWindow.window){ @@ -111,27 +112,23 @@ export class CaptionEngine { lines.forEach((line: string) => { if (line.trim()) { try { - const caption = JSON.parse(line); - if(caption.index === undefined) { - console.log('[INFO] Engine Bad Output:', caption); - } - else allConfig.updateCaptionLog(caption); + const data_obj = JSON.parse(line) + handleEngineData(data_obj) } catch (e) { controlWindow.sendErrorMessage(i18n('engine.output.parse.error') + e) - console.error('[ERROR] Error parsing JSON:', e); + Log.error('Error parsing JSON:', e) } } }); }); - this.process.stderr.on('data', (data) => { + this.process.stderr.on('data', (data: any) => { if(this.processStatus === 'stopping') return controlWindow.sendErrorMessage(i18n('engine.error') + data) - console.error(`[ERROR] Subprocess Error: ${data}`); + Log.error(`Engine Error: ${data}`); }); this.process.on('close', (code: any) => { - console.log(`[INFO] Subprocess exited with code ${code}`); this.process = undefined; allConfig.controls.engineEnabled = false if(controlWindow.window){ @@ -139,14 +136,14 @@ export class CaptionEngine { controlWindow.window.webContents.send('control.engine.stopped') } this.processStatus = 'stopped' - console.log('[INFO] Caption engine process stopped') + Log.info(`Engine exited with code ${code}`) }); } public stop() { if(this.processStatus !== 'running') return if (this.process.pid) { - console.log('[INFO] Trying to stop process, PID:', this.process.pid) + Log.info('Trying to stop process, PID:', this.process.pid) let cmd = `kill ${this.process.pid}`; if (process.platform === "win32") { cmd = `taskkill /pid ${this.process.pid} /t /f` @@ -154,7 +151,7 @@ export class CaptionEngine { exec(cmd, (error) => { if (error) { controlWindow.sendErrorMessage(i18n('engine.shutdown.error') + error) - console.error(`[ERROR] Failed to kill process: ${error}`) + Log.error(`Failed to kill process: ${error}`) } }) } @@ -166,11 +163,26 @@ export class CaptionEngine { controlWindow.window.webContents.send('control.engine.stopped') } this.processStatus = 'stopped' - console.log('[INFO] Process PID undefined, caption engine process stopped') + Log.info('Process PID undefined, caption engine process stopped') return } this.processStatus = 'stopping' - console.log('[INFO] Caption engine process stopping') + Log.info('Caption engine process stopping') + } +} + +function handleEngineData(data: any) { + if(data.command === 'caption') { + allConfig.updateCaptionLog(data); + } + else if(data.command === 'print') { + Log.info('Engine print:', data.content) + } + else if(data.command === 'info') { + Log.info('Engine info:', data.content) + } + else if(data.command === 'usage') { + Log.info('Caption engine usage: ', data.content) } } diff --git a/src/main/utils/Log.ts b/src/main/utils/Log.ts new file mode 100644 index 0000000..f2568ce --- /dev/null +++ b/src/main/utils/Log.ts @@ -0,0 +1,21 @@ +function getTimeString() { + const now = new Date() + const HH = String(now.getHours()).padStart(2, '0') + const MM = String(now.getMinutes()).padStart(2, '0') + const SS = String(now.getSeconds()).padStart(2, '0') + return `${HH}:${MM}:${SS}` +} + +export class Log { + static info(...msg: any[]){ + console.log(`[INFO ${getTimeString()}]`, ...msg) + } + + static warn(...msg: any[]){ + console.log(`[WARN ${getTimeString()}]`, ...msg) + } + + static error(...msg: any[]){ + console.log(`[ERROR ${getTimeString()}]`, ...msg) + } +} diff --git a/src/renderer/src/components/CaptionLog.vue b/src/renderer/src/components/CaptionLog.vue index ada58b7..ac416d6 100644 --- a/src/renderer/src/components/CaptionLog.vue +++ b/src/renderer/src/components/CaptionLog.vue @@ -174,6 +174,12 @@ const columns = [ dataIndex: 'index', key: 'index', width: 80, + sorter: (a: CaptionItem, b: CaptionItem) => { + if(a.index <= b.index) return -1 + return 1 + }, + sortDirections: ['descend'], + defaultSortOrder: 'descend', }, { title: 'time', @@ -184,8 +190,7 @@ const columns = [ if(a.time_s <= b.time_s) return -1 return 1 }, - sortDirections: ['descend'], - defaultSortOrder: 'descend', + sortDirections: ['descend', 'ascend'], }, { title: 'content',