feat(engine): 优化字幕引擎输出格式、准备合并两个字幕引擎

- 重构字幕引擎相关代码
- 准备合并两个字幕引擎
This commit is contained in:
himeditator
2025-07-27 17:15:12 +08:00
parent 3792eb88b6
commit b658ef5440
11 changed files with 205 additions and 45 deletions

View File

@@ -32,7 +32,19 @@ Python 进程标准输出 (`sys.stdout`) 的内容一定为一行一行的字符
} }
``` ```
Python 端打印的提示信息。 Python 端打印的提示信息,比起 `print`,该信息更希望 Electron 端的关注
### `usage`
```js
{
command: "usage",
content: string
}
```
Gummy 字幕引擎结束时打印计费消耗信息。
### `caption` ### `caption`
@@ -42,7 +54,6 @@ Python 端打印的提示信息。
index: number, index: number,
time_s: string, time_s: string,
time_t: string, time_t: string,
end: boolean,
text: string, text: string,
translation: string translation: string
} }

View File

@@ -15,18 +15,20 @@ class Callback(TranslationRecognizerCallback):
""" """
def __init__(self): def __init__(self):
super().__init__() super().__init__()
self.index = 0
self.usage = 0 self.usage = 0
self.cur_id = -1 self.cur_id = -1
self.index = 0
self.time_str = '' self.time_str = ''
def on_open(self) -> None: def on_open(self) -> None:
self.usage = 0
self.cur_id = -1 self.cur_id = -1
self.time_str = '' self.time_str = ''
stdout_cmd('info', 'Gummy translator started.') stdout_cmd('info', 'Gummy translator started.')
def on_close(self) -> None: def on_close(self) -> None:
stdout_cmd('info', 'Gummy translator closed.') stdout_cmd('info', 'Gummy translator closed.')
stdout_cmd('usage', str(self.usage))
def on_event( def on_event(
self, self,
@@ -46,7 +48,6 @@ class Callback(TranslationRecognizerCallback):
caption['index'] = self.index caption['index'] = self.index
caption['time_s'] = self.time_str caption['time_s'] = self.time_str
caption['time_t'] = datetime.now().strftime('%H:%M:%S.%f')[:-3] caption['time_t'] = datetime.now().strftime('%H:%M:%S.%f')[:-3]
caption['end'] = transcription_result.is_sentence_end
caption['text'] = transcription_result.text caption['text'] = transcription_result.text
caption['translation'] = "" caption['translation'] = ""
@@ -57,7 +58,8 @@ class Callback(TranslationRecognizerCallback):
if usage: if usage:
self.usage += usage['duration'] self.usage += usage['duration']
stdout_obj(caption) if 'text' in caption:
stdout_obj(caption)
class GummyTranslator: class GummyTranslator:
@@ -88,7 +90,7 @@ class GummyTranslator:
self.translator.start() self.translator.start()
def send_audio_frame(self, data): def send_audio_frame(self, data):
"""发送音频帧""" """发送音频帧,擎将自动识别并将识别结果输出到标准输出中"""
self.translator.send_audio_frame(data) self.translator.send_audio_frame(data)
def stop(self): def stop(self):

59
engine/audio2text/vosk.py Normal file
View File

@@ -0,0 +1,59 @@
import json
from datetime import datetime
from vosk import Model, KaldiRecognizer, SetLogLevel
from utils import stdout_obj
class VoskRecognizer:
"""
使用 Vosk 引擎流式处理的音频数据,并在标准输出中输出与 Auto Caption 软件可读取的 JSON 字符串数据
初始化参数:
model_path: Vosk 识别模型路径
"""
def __int__(self, model_path: str):
SetLogLevel(-1)
if model_path.startswith('"'):
model_path = model_path[1:]
if model_path.endswith('"'):
model_path = model_path[:-1]
self.model_path = model_path
self.time_str = ''
self.cur_id = 0
self.prev_content = ''
self.model = Model(self.model_path)
self.recognizer = KaldiRecognizer(self.model, 16000)
def send_audio_frame(self, data: bytes):
"""
发送音频帧给 Vosk 引擎,引擎将自动识别并将识别结果输出到标准输出中
Args:
data: 音频帧数据,采样率必须为 16000Hz
"""
caption = {}
caption['command'] = 'caption'
caption['translation'] = ''
if self.recognizer.AcceptWaveform(data):
content = json.loads(self.recognizer.Result()).get('text', '')
caption['index'] = self.cur_id
caption['text'] = content
caption['time_s'] = self.time_str
caption['time_t'] = datetime.now().strftime('%H:%M:%S.%f')[:-3]
self.prev_content = ''
self.cur_id += 1
else:
content = json.loads(self.recognizer.PartialResult()).get('partial', '')
if content == '' or content == self.prev_content:
return
if self.prev_content == '':
self.time_str = datetime.now().strftime('%H:%M:%S.%f')[:-3]
caption['index'] = self.cur_id
caption['text'] = content
caption['time_s'] = self.time_str
caption['time_t'] = datetime.now().strftime('%H:%M:%S.%f')[:-3]
self.prev_content = content
stdout_obj(caption)

View File

@@ -49,6 +49,7 @@ def convert_audio_to_text(audio_type, chunk_rate, model_path):
continue continue
if prev_content == '': if prev_content == '':
time_str = datetime.now().strftime('%H:%M:%S.%f')[:-3] time_str = datetime.now().strftime('%H:%M:%S.%f')[:-3]
caption['command'] = 'caption'
caption['index'] = cur_id caption['index'] = cur_id
caption['text'] = content caption['text'] = content
caption['time_s'] = time_str caption['time_s'] = time_str

37
engine/main.py Normal file
View File

@@ -0,0 +1,37 @@
import argparse
def gummy_engine(s, t, a, c, k):
pass
def vosk_engine(a, c, m):
pass
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Convert system audio stream to text')
# both
parser.add_argument('-e', '--caption_engine', default='gummy', help='Caption engine: gummy or vosk')
parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output, 1 for input')
parser.add_argument('-c', '--chunk_rate', default=20, help='Number of audio stream chunks collected per second')
# gummy
parser.add_argument('-s', '--source_language', default='en', help='Source language code')
parser.add_argument('-t', '--target_language', default='zh', help='Target language code')
parser.add_argument('-k', '--api_key', default='', help='API KEY for Gummy model')
# vosk
parser.add_argument('-m', '--model_path', default='', help='The path to the vosk model.')
args = parser.parse_args()
if args.caption_engine == 'gummy':
gummy_engine(
args.source_language,
args.target_language,
int(args.audio_type),
int(args.chunk_rate),
args.api_key
)
elif args.caption_engine == 'vosk':
vosk_engine(
int(args.audio_type),
int(args.chunk_rate),
args.model_path
)
else:
raise ValueError('Invalid caption engine specified.')

8
package-lock.json generated
View File

@@ -22,6 +22,7 @@
"@electron-toolkit/eslint-config-ts": "^3.0.0", "@electron-toolkit/eslint-config-ts": "^3.0.0",
"@electron-toolkit/tsconfig": "^1.0.1", "@electron-toolkit/tsconfig": "^1.0.1",
"@types/node": "^22.14.1", "@types/node": "^22.14.1",
"@types/pidusage": "^2.0.5",
"@vitejs/plugin-vue": "^5.2.3", "@vitejs/plugin-vue": "^5.2.3",
"electron": "^35.1.5", "electron": "^35.1.5",
"electron-builder": "^25.1.8", "electron-builder": "^25.1.8",
@@ -2296,6 +2297,13 @@
"undici-types": "~6.21.0" "undici-types": "~6.21.0"
} }
}, },
"node_modules/@types/pidusage": {
"version": "2.0.5",
"resolved": "https://registry.npmmirror.com/@types/pidusage/-/pidusage-2.0.5.tgz",
"integrity": "sha512-MIiyZI4/MK9UGUXWt0jJcCZhVw7YdhBuTOuqP/BjuLDLZ2PmmViMIQgZiWxtaMicQfAz/kMrZ5T7PKxFSkTeUA==",
"dev": true,
"license": "MIT"
},
"node_modules/@types/plist": { "node_modules/@types/plist": {
"version": "3.0.5", "version": "3.0.5",
"resolved": "https://registry.npmmirror.com/@types/plist/-/plist-3.0.5.tgz", "resolved": "https://registry.npmmirror.com/@types/plist/-/plist-3.0.5.tgz",

View File

@@ -13,7 +13,7 @@
"typecheck:web": "vue-tsc --noEmit -p tsconfig.web.json --composite false", "typecheck:web": "vue-tsc --noEmit -p tsconfig.web.json --composite false",
"typecheck": "npm run typecheck:node && npm run typecheck:web", "typecheck": "npm run typecheck:node && npm run typecheck:web",
"start": "electron-vite preview", "start": "electron-vite preview",
"dev": "electron-vite dev", "dev": "chcp 65001 && electron-vite dev",
"build": "npm run typecheck && electron-vite build", "build": "npm run typecheck && electron-vite build",
"postinstall": "electron-builder install-app-deps", "postinstall": "electron-builder install-app-deps",
"build:unpack": "npm run build && electron-builder --dir", "build:unpack": "npm run build && electron-builder --dir",
@@ -35,6 +35,7 @@
"@electron-toolkit/eslint-config-ts": "^3.0.0", "@electron-toolkit/eslint-config-ts": "^3.0.0",
"@electron-toolkit/tsconfig": "^1.0.1", "@electron-toolkit/tsconfig": "^1.0.1",
"@types/node": "^22.14.1", "@types/node": "^22.14.1",
"@types/pidusage": "^2.0.5",
"@vitejs/plugin-vue": "^5.2.3", "@vitejs/plugin-vue": "^5.2.3",
"electron": "^35.1.5", "electron": "^35.1.5",
"electron-builder": "^25.1.8", "electron-builder": "^25.1.8",

View File

@@ -2,6 +2,7 @@ import {
UILanguage, UITheme, Styles, Controls, UILanguage, UITheme, Styles, Controls,
CaptionItem, FullConfig CaptionItem, FullConfig
} from '../types' } from '../types'
import { Log } from './Log'
import { app, BrowserWindow } from 'electron' import { app, BrowserWindow } from 'electron'
import * as path from 'path' import * as path from 'path'
import * as fs from 'fs' import * as fs from 'fs'
@@ -48,6 +49,7 @@ class AllConfig {
uiTheme: UITheme = 'system'; uiTheme: UITheme = 'system';
styles: Styles = {...defaultStyles}; styles: Styles = {...defaultStyles};
controls: Controls = {...defaultControls}; controls: Controls = {...defaultControls};
lastLogIndex: number = -1;
captionLog: CaptionItem[] = []; captionLog: CaptionItem[] = [];
constructor() {} constructor() {}
@@ -61,7 +63,7 @@ class AllConfig {
if(config.leftBarWidth) this.leftBarWidth = config.leftBarWidth if(config.leftBarWidth) this.leftBarWidth = config.leftBarWidth
if(config.styles) this.setStyles(config.styles) if(config.styles) this.setStyles(config.styles)
if(config.controls) this.setControls(config.controls) if(config.controls) this.setControls(config.controls)
console.log('[INFO] Read Config from:', configPath) Log.info('Read Config from:', configPath)
} }
} }
@@ -75,7 +77,7 @@ class AllConfig {
} }
const configPath = path.join(app.getPath('userData'), 'config.json') const configPath = path.join(app.getPath('userData'), 'config.json')
fs.writeFileSync(configPath, JSON.stringify(config, null, 2)) fs.writeFileSync(configPath, JSON.stringify(config, null, 2))
console.log('[INFO] Write Config to:', configPath) Log.info('Write Config to:', configPath)
} }
public getFullConfig(): FullConfig { public getFullConfig(): FullConfig {
@@ -96,7 +98,7 @@ class AllConfig {
this.styles[key] = args[key] this.styles[key] = args[key]
} }
} }
console.log('[INFO] Set Styles:', this.styles) Log.info('Set Styles:', this.styles)
} }
public resetStyles() { public resetStyles() {
@@ -105,7 +107,7 @@ class AllConfig {
public sendStyles(window: BrowserWindow) { public sendStyles(window: BrowserWindow) {
window.webContents.send('both.styles.set', this.styles) window.webContents.send('both.styles.set', this.styles)
console.log(`[INFO] Send Styles to #${window.id}:`, this.styles) Log.info(`Send Styles to #${window.id}:`, this.styles)
} }
public setControls(args: Object) { public setControls(args: Object) {
@@ -116,27 +118,28 @@ class AllConfig {
} }
} }
this.controls.engineEnabled = engineEnabled this.controls.engineEnabled = engineEnabled
console.log('[INFO] Set Controls:', this.controls) Log.info('Set Controls:', this.controls)
} }
public sendControls(window: BrowserWindow) { public sendControls(window: BrowserWindow) {
window.webContents.send('control.controls.set', this.controls) window.webContents.send('control.controls.set', this.controls)
console.log(`[INFO] Send Controls to #${window.id}:`, this.controls) Log.info(`Send Controls to #${window.id}:`, this.controls)
} }
public updateCaptionLog(log: CaptionItem) { public updateCaptionLog(log: CaptionItem) {
let command: 'add' | 'upd' = 'add' let command: 'add' | 'upd' = 'add'
if( if(
this.captionLog.length && this.captionLog.length &&
this.captionLog[this.captionLog.length - 1].index === log.index && this.lastLogIndex === log.index
this.captionLog[this.captionLog.length - 1].time_s === log.time_s
) { ) {
this.captionLog.splice(this.captionLog.length - 1, 1, log) this.captionLog.splice(this.captionLog.length - 1, 1, log)
command = 'upd' command = 'upd'
} }
else { else {
this.captionLog.push(log) this.captionLog.push(log)
this.lastLogIndex = log.index
} }
this.captionLog[this.captionLog.length - 1].index = this.captionLog.length
for(const window of BrowserWindow.getAllWindows()){ for(const window of BrowserWindow.getAllWindows()){
this.sendCaptionLog(window, command) this.sendCaptionLog(window, command)
} }

View File

@@ -5,6 +5,7 @@ import path from 'path'
import { controlWindow } from '../ControlWindow' import { controlWindow } from '../ControlWindow'
import { allConfig } from './AllConfig' import { allConfig } from './AllConfig'
import { i18n } from '../i18n' import { i18n } from '../i18n'
import { Log } from './Log'
export class CaptionEngine { export class CaptionEngine {
appPath: string = '' appPath: string = ''
@@ -14,7 +15,7 @@ export class CaptionEngine {
private getApp(): boolean { private getApp(): boolean {
if (allConfig.controls.customized && allConfig.controls.customizedApp) { if (allConfig.controls.customized && allConfig.controls.customizedApp) {
console.log('[INFO] Using customized engine') Log.info('Using customized engine')
this.appPath = allConfig.controls.customizedApp this.appPath = allConfig.controls.customizedApp
this.command = allConfig.controls.customizedCommand.split(' ') this.command = allConfig.controls.customizedCommand.split(' ')
} }
@@ -25,9 +26,7 @@ export class CaptionEngine {
return false return false
} }
let gummyName = 'main-gummy' let gummyName = 'main-gummy'
if (process.platform === 'win32') { if (process.platform === 'win32') { gummyName += '.exe' }
gummyName += '.exe'
}
this.command = [] this.command = []
if (is.dev) { if (is.dev) {
this.appPath = path.join( this.appPath = path.join(
@@ -56,31 +55,33 @@ export class CaptionEngine {
else if(allConfig.controls.engine === 'vosk'){ else if(allConfig.controls.engine === 'vosk'){
allConfig.controls.customized = false allConfig.controls.customized = false
let voskName = 'main-vosk' let voskName = 'main-vosk'
if (process.platform === 'win32') { if (process.platform === 'win32') { voskName += '.exe' }
voskName += '.exe' this.command = []
}
if (is.dev) { if (is.dev) {
this.appPath = path.join( this.appPath = path.join(
app.getAppPath(), app.getAppPath(), 'engine',
'engine', 'dist', voskName 'subenv', 'Scripts', 'python.exe'
) )
this.command.push(path.join(
app.getAppPath(), 'engine', 'main-vosk.py'
))
} }
else { else {
this.appPath = path.join( this.appPath = path.join(
process.resourcesPath, 'engine', voskName process.resourcesPath, 'engine', voskName
) )
} }
this.command = []
this.command.push('-a', allConfig.controls.audio ? '1' : '0') this.command.push('-a', allConfig.controls.audio ? '1' : '0')
this.command.push('-m', `"${allConfig.controls.modelPath}"`) this.command.push('-m', `"${allConfig.controls.modelPath}"`)
} }
console.log('[INFO] Engine Path:', this.appPath) Log.info('Engine Path:', this.appPath)
console.log('[INFO] Engine Command:', this.command) Log.info('Engine Command:', this.command)
return true return true
} }
public start() { public start() {
if (this.processStatus !== 'stopped') { if (this.processStatus !== 'stopped') {
Log.warn('Caption engine status is not stopped, cannot start')
return return
} }
if(!this.getApp()){ return } if(!this.getApp()){ return }
@@ -90,12 +91,12 @@ export class CaptionEngine {
} }
catch (e) { catch (e) {
controlWindow.sendErrorMessage(i18n('engine.start.error') + e) controlWindow.sendErrorMessage(i18n('engine.start.error') + e)
console.error('[ERROR] Error starting subprocess:', e) Log.error('Error starting engine:', e)
return return
} }
this.processStatus = 'running' this.processStatus = 'running'
console.log('[INFO] Caption Engine Started, PID:', this.process.pid) Log.info('Caption Engine Started, PID:', this.process.pid)
allConfig.controls.engineEnabled = true allConfig.controls.engineEnabled = true
if(controlWindow.window){ if(controlWindow.window){
@@ -111,27 +112,23 @@ export class CaptionEngine {
lines.forEach((line: string) => { lines.forEach((line: string) => {
if (line.trim()) { if (line.trim()) {
try { try {
const caption = JSON.parse(line); const data_obj = JSON.parse(line)
if(caption.index === undefined) { handleEngineData(data_obj)
console.log('[INFO] Engine Bad Output:', caption);
}
else allConfig.updateCaptionLog(caption);
} catch (e) { } catch (e) {
controlWindow.sendErrorMessage(i18n('engine.output.parse.error') + e) controlWindow.sendErrorMessage(i18n('engine.output.parse.error') + e)
console.error('[ERROR] Error parsing JSON:', e); Log.error('Error parsing JSON:', e)
} }
} }
}); });
}); });
this.process.stderr.on('data', (data) => { this.process.stderr.on('data', (data: any) => {
if(this.processStatus === 'stopping') return if(this.processStatus === 'stopping') return
controlWindow.sendErrorMessage(i18n('engine.error') + data) controlWindow.sendErrorMessage(i18n('engine.error') + data)
console.error(`[ERROR] Subprocess Error: ${data}`); Log.error(`Engine Error: ${data}`);
}); });
this.process.on('close', (code: any) => { this.process.on('close', (code: any) => {
console.log(`[INFO] Subprocess exited with code ${code}`);
this.process = undefined; this.process = undefined;
allConfig.controls.engineEnabled = false allConfig.controls.engineEnabled = false
if(controlWindow.window){ if(controlWindow.window){
@@ -139,14 +136,14 @@ export class CaptionEngine {
controlWindow.window.webContents.send('control.engine.stopped') controlWindow.window.webContents.send('control.engine.stopped')
} }
this.processStatus = 'stopped' this.processStatus = 'stopped'
console.log('[INFO] Caption engine process stopped') Log.info(`Engine exited with code ${code}`)
}); });
} }
public stop() { public stop() {
if(this.processStatus !== 'running') return if(this.processStatus !== 'running') return
if (this.process.pid) { if (this.process.pid) {
console.log('[INFO] Trying to stop process, PID:', this.process.pid) Log.info('Trying to stop process, PID:', this.process.pid)
let cmd = `kill ${this.process.pid}`; let cmd = `kill ${this.process.pid}`;
if (process.platform === "win32") { if (process.platform === "win32") {
cmd = `taskkill /pid ${this.process.pid} /t /f` cmd = `taskkill /pid ${this.process.pid} /t /f`
@@ -154,7 +151,7 @@ export class CaptionEngine {
exec(cmd, (error) => { exec(cmd, (error) => {
if (error) { if (error) {
controlWindow.sendErrorMessage(i18n('engine.shutdown.error') + error) controlWindow.sendErrorMessage(i18n('engine.shutdown.error') + error)
console.error(`[ERROR] Failed to kill process: ${error}`) Log.error(`Failed to kill process: ${error}`)
} }
}) })
} }
@@ -166,11 +163,26 @@ export class CaptionEngine {
controlWindow.window.webContents.send('control.engine.stopped') controlWindow.window.webContents.send('control.engine.stopped')
} }
this.processStatus = 'stopped' this.processStatus = 'stopped'
console.log('[INFO] Process PID undefined, caption engine process stopped') Log.info('Process PID undefined, caption engine process stopped')
return return
} }
this.processStatus = 'stopping' this.processStatus = 'stopping'
console.log('[INFO] Caption engine process stopping') Log.info('Caption engine process stopping')
}
}
function handleEngineData(data: any) {
if(data.command === 'caption') {
allConfig.updateCaptionLog(data);
}
else if(data.command === 'print') {
Log.info('Engine print:', data.content)
}
else if(data.command === 'info') {
Log.info('Engine info:', data.content)
}
else if(data.command === 'usage') {
Log.info('Caption engine usage: ', data.content)
} }
} }

21
src/main/utils/Log.ts Normal file
View File

@@ -0,0 +1,21 @@
function getTimeString() {
const now = new Date()
const HH = String(now.getHours()).padStart(2, '0')
const MM = String(now.getMinutes()).padStart(2, '0')
const SS = String(now.getSeconds()).padStart(2, '0')
return `${HH}:${MM}:${SS}`
}
export class Log {
static info(...msg: any[]){
console.log(`[INFO ${getTimeString()}]`, ...msg)
}
static warn(...msg: any[]){
console.log(`[WARN ${getTimeString()}]`, ...msg)
}
static error(...msg: any[]){
console.log(`[ERROR ${getTimeString()}]`, ...msg)
}
}

View File

@@ -174,6 +174,12 @@ const columns = [
dataIndex: 'index', dataIndex: 'index',
key: 'index', key: 'index',
width: 80, width: 80,
sorter: (a: CaptionItem, b: CaptionItem) => {
if(a.index <= b.index) return -1
return 1
},
sortDirections: ['descend'],
defaultSortOrder: 'descend',
}, },
{ {
title: 'time', title: 'time',
@@ -184,8 +190,7 @@ const columns = [
if(a.time_s <= b.time_s) return -1 if(a.time_s <= b.time_s) return -1
return 1 return 1
}, },
sortDirections: ['descend'], sortDirections: ['descend', 'ascend'],
defaultSortOrder: 'descend',
}, },
{ {
title: 'content', title: 'content',