feat(engine): 优化字幕引擎输出格式、准备合并两个字幕引擎

- 重构字幕引擎相关代码
- 准备合并两个字幕引擎
This commit is contained in:
himeditator
2025-07-27 17:15:12 +08:00
parent 3792eb88b6
commit b658ef5440
11 changed files with 205 additions and 45 deletions

View File

@@ -32,7 +32,19 @@ Python 进程标准输出 (`sys.stdout`) 的内容一定为一行一行的字符
}
```
Python 端打印的提示信息。
Python 端打印的提示信息,比起 `print`,该信息更希望 Electron 端的关注
### `usage`
```js
{
command: "usage",
content: string
}
```
Gummy 字幕引擎结束时打印计费消耗信息。
### `caption`
@@ -42,7 +54,6 @@ Python 端打印的提示信息。
index: number,
time_s: string,
time_t: string,
end: boolean,
text: string,
translation: string
}

View File

@@ -15,18 +15,20 @@ class Callback(TranslationRecognizerCallback):
"""
def __init__(self):
super().__init__()
self.index = 0
self.usage = 0
self.cur_id = -1
self.index = 0
self.time_str = ''
def on_open(self) -> None:
self.usage = 0
self.cur_id = -1
self.time_str = ''
stdout_cmd('info', 'Gummy translator started.')
def on_close(self) -> None:
stdout_cmd('info', 'Gummy translator closed.')
stdout_cmd('usage', str(self.usage))
def on_event(
self,
@@ -46,7 +48,6 @@ class Callback(TranslationRecognizerCallback):
caption['index'] = self.index
caption['time_s'] = self.time_str
caption['time_t'] = datetime.now().strftime('%H:%M:%S.%f')[:-3]
caption['end'] = transcription_result.is_sentence_end
caption['text'] = transcription_result.text
caption['translation'] = ""
@@ -57,7 +58,8 @@ class Callback(TranslationRecognizerCallback):
if usage:
self.usage += usage['duration']
stdout_obj(caption)
if 'text' in caption:
stdout_obj(caption)
class GummyTranslator:
@@ -88,7 +90,7 @@ class GummyTranslator:
self.translator.start()
def send_audio_frame(self, data):
"""发送音频帧"""
"""发送音频帧,擎将自动识别并将识别结果输出到标准输出中"""
self.translator.send_audio_frame(data)
def stop(self):

59
engine/audio2text/vosk.py Normal file
View File

@@ -0,0 +1,59 @@
import json
from datetime import datetime
from vosk import Model, KaldiRecognizer, SetLogLevel
from utils import stdout_obj
class VoskRecognizer:
"""
使用 Vosk 引擎流式处理的音频数据,并在标准输出中输出与 Auto Caption 软件可读取的 JSON 字符串数据
初始化参数:
model_path: Vosk 识别模型路径
"""
def __int__(self, model_path: str):
SetLogLevel(-1)
if model_path.startswith('"'):
model_path = model_path[1:]
if model_path.endswith('"'):
model_path = model_path[:-1]
self.model_path = model_path
self.time_str = ''
self.cur_id = 0
self.prev_content = ''
self.model = Model(self.model_path)
self.recognizer = KaldiRecognizer(self.model, 16000)
def send_audio_frame(self, data: bytes):
"""
发送音频帧给 Vosk 引擎,引擎将自动识别并将识别结果输出到标准输出中
Args:
data: 音频帧数据,采样率必须为 16000Hz
"""
caption = {}
caption['command'] = 'caption'
caption['translation'] = ''
if self.recognizer.AcceptWaveform(data):
content = json.loads(self.recognizer.Result()).get('text', '')
caption['index'] = self.cur_id
caption['text'] = content
caption['time_s'] = self.time_str
caption['time_t'] = datetime.now().strftime('%H:%M:%S.%f')[:-3]
self.prev_content = ''
self.cur_id += 1
else:
content = json.loads(self.recognizer.PartialResult()).get('partial', '')
if content == '' or content == self.prev_content:
return
if self.prev_content == '':
self.time_str = datetime.now().strftime('%H:%M:%S.%f')[:-3]
caption['index'] = self.cur_id
caption['text'] = content
caption['time_s'] = self.time_str
caption['time_t'] = datetime.now().strftime('%H:%M:%S.%f')[:-3]
self.prev_content = content
stdout_obj(caption)

View File

@@ -49,6 +49,7 @@ def convert_audio_to_text(audio_type, chunk_rate, model_path):
continue
if prev_content == '':
time_str = datetime.now().strftime('%H:%M:%S.%f')[:-3]
caption['command'] = 'caption'
caption['index'] = cur_id
caption['text'] = content
caption['time_s'] = time_str

37
engine/main.py Normal file
View File

@@ -0,0 +1,37 @@
import argparse
def gummy_engine(s, t, a, c, k):
pass
def vosk_engine(a, c, m):
pass
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Convert system audio stream to text')
# both
parser.add_argument('-e', '--caption_engine', default='gummy', help='Caption engine: gummy or vosk')
parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output, 1 for input')
parser.add_argument('-c', '--chunk_rate', default=20, help='Number of audio stream chunks collected per second')
# gummy
parser.add_argument('-s', '--source_language', default='en', help='Source language code')
parser.add_argument('-t', '--target_language', default='zh', help='Target language code')
parser.add_argument('-k', '--api_key', default='', help='API KEY for Gummy model')
# vosk
parser.add_argument('-m', '--model_path', default='', help='The path to the vosk model.')
args = parser.parse_args()
if args.caption_engine == 'gummy':
gummy_engine(
args.source_language,
args.target_language,
int(args.audio_type),
int(args.chunk_rate),
args.api_key
)
elif args.caption_engine == 'vosk':
vosk_engine(
int(args.audio_type),
int(args.chunk_rate),
args.model_path
)
else:
raise ValueError('Invalid caption engine specified.')

8
package-lock.json generated
View File

@@ -22,6 +22,7 @@
"@electron-toolkit/eslint-config-ts": "^3.0.0",
"@electron-toolkit/tsconfig": "^1.0.1",
"@types/node": "^22.14.1",
"@types/pidusage": "^2.0.5",
"@vitejs/plugin-vue": "^5.2.3",
"electron": "^35.1.5",
"electron-builder": "^25.1.8",
@@ -2296,6 +2297,13 @@
"undici-types": "~6.21.0"
}
},
"node_modules/@types/pidusage": {
"version": "2.0.5",
"resolved": "https://registry.npmmirror.com/@types/pidusage/-/pidusage-2.0.5.tgz",
"integrity": "sha512-MIiyZI4/MK9UGUXWt0jJcCZhVw7YdhBuTOuqP/BjuLDLZ2PmmViMIQgZiWxtaMicQfAz/kMrZ5T7PKxFSkTeUA==",
"dev": true,
"license": "MIT"
},
"node_modules/@types/plist": {
"version": "3.0.5",
"resolved": "https://registry.npmmirror.com/@types/plist/-/plist-3.0.5.tgz",

View File

@@ -13,7 +13,7 @@
"typecheck:web": "vue-tsc --noEmit -p tsconfig.web.json --composite false",
"typecheck": "npm run typecheck:node && npm run typecheck:web",
"start": "electron-vite preview",
"dev": "electron-vite dev",
"dev": "chcp 65001 && electron-vite dev",
"build": "npm run typecheck && electron-vite build",
"postinstall": "electron-builder install-app-deps",
"build:unpack": "npm run build && electron-builder --dir",
@@ -35,6 +35,7 @@
"@electron-toolkit/eslint-config-ts": "^3.0.0",
"@electron-toolkit/tsconfig": "^1.0.1",
"@types/node": "^22.14.1",
"@types/pidusage": "^2.0.5",
"@vitejs/plugin-vue": "^5.2.3",
"electron": "^35.1.5",
"electron-builder": "^25.1.8",

View File

@@ -2,6 +2,7 @@ import {
UILanguage, UITheme, Styles, Controls,
CaptionItem, FullConfig
} from '../types'
import { Log } from './Log'
import { app, BrowserWindow } from 'electron'
import * as path from 'path'
import * as fs from 'fs'
@@ -48,6 +49,7 @@ class AllConfig {
uiTheme: UITheme = 'system';
styles: Styles = {...defaultStyles};
controls: Controls = {...defaultControls};
lastLogIndex: number = -1;
captionLog: CaptionItem[] = [];
constructor() {}
@@ -61,7 +63,7 @@ class AllConfig {
if(config.leftBarWidth) this.leftBarWidth = config.leftBarWidth
if(config.styles) this.setStyles(config.styles)
if(config.controls) this.setControls(config.controls)
console.log('[INFO] Read Config from:', configPath)
Log.info('Read Config from:', configPath)
}
}
@@ -75,7 +77,7 @@ class AllConfig {
}
const configPath = path.join(app.getPath('userData'), 'config.json')
fs.writeFileSync(configPath, JSON.stringify(config, null, 2))
console.log('[INFO] Write Config to:', configPath)
Log.info('Write Config to:', configPath)
}
public getFullConfig(): FullConfig {
@@ -96,7 +98,7 @@ class AllConfig {
this.styles[key] = args[key]
}
}
console.log('[INFO] Set Styles:', this.styles)
Log.info('Set Styles:', this.styles)
}
public resetStyles() {
@@ -105,7 +107,7 @@ class AllConfig {
public sendStyles(window: BrowserWindow) {
window.webContents.send('both.styles.set', this.styles)
console.log(`[INFO] Send Styles to #${window.id}:`, this.styles)
Log.info(`Send Styles to #${window.id}:`, this.styles)
}
public setControls(args: Object) {
@@ -116,27 +118,28 @@ class AllConfig {
}
}
this.controls.engineEnabled = engineEnabled
console.log('[INFO] Set Controls:', this.controls)
Log.info('Set Controls:', this.controls)
}
public sendControls(window: BrowserWindow) {
window.webContents.send('control.controls.set', this.controls)
console.log(`[INFO] Send Controls to #${window.id}:`, this.controls)
Log.info(`Send Controls to #${window.id}:`, this.controls)
}
public updateCaptionLog(log: CaptionItem) {
let command: 'add' | 'upd' = 'add'
if(
this.captionLog.length &&
this.captionLog[this.captionLog.length - 1].index === log.index &&
this.captionLog[this.captionLog.length - 1].time_s === log.time_s
this.lastLogIndex === log.index
) {
this.captionLog.splice(this.captionLog.length - 1, 1, log)
command = 'upd'
}
else {
this.captionLog.push(log)
this.lastLogIndex = log.index
}
this.captionLog[this.captionLog.length - 1].index = this.captionLog.length
for(const window of BrowserWindow.getAllWindows()){
this.sendCaptionLog(window, command)
}

View File

@@ -5,6 +5,7 @@ import path from 'path'
import { controlWindow } from '../ControlWindow'
import { allConfig } from './AllConfig'
import { i18n } from '../i18n'
import { Log } from './Log'
export class CaptionEngine {
appPath: string = ''
@@ -14,7 +15,7 @@ export class CaptionEngine {
private getApp(): boolean {
if (allConfig.controls.customized && allConfig.controls.customizedApp) {
console.log('[INFO] Using customized engine')
Log.info('Using customized engine')
this.appPath = allConfig.controls.customizedApp
this.command = allConfig.controls.customizedCommand.split(' ')
}
@@ -25,9 +26,7 @@ export class CaptionEngine {
return false
}
let gummyName = 'main-gummy'
if (process.platform === 'win32') {
gummyName += '.exe'
}
if (process.platform === 'win32') { gummyName += '.exe' }
this.command = []
if (is.dev) {
this.appPath = path.join(
@@ -56,31 +55,33 @@ export class CaptionEngine {
else if(allConfig.controls.engine === 'vosk'){
allConfig.controls.customized = false
let voskName = 'main-vosk'
if (process.platform === 'win32') {
voskName += '.exe'
}
if (process.platform === 'win32') { voskName += '.exe' }
this.command = []
if (is.dev) {
this.appPath = path.join(
app.getAppPath(),
'engine', 'dist', voskName
app.getAppPath(), 'engine',
'subenv', 'Scripts', 'python.exe'
)
this.command.push(path.join(
app.getAppPath(), 'engine', 'main-vosk.py'
))
}
else {
this.appPath = path.join(
process.resourcesPath, 'engine', voskName
)
}
this.command = []
this.command.push('-a', allConfig.controls.audio ? '1' : '0')
this.command.push('-m', `"${allConfig.controls.modelPath}"`)
}
console.log('[INFO] Engine Path:', this.appPath)
console.log('[INFO] Engine Command:', this.command)
Log.info('Engine Path:', this.appPath)
Log.info('Engine Command:', this.command)
return true
}
public start() {
if (this.processStatus !== 'stopped') {
Log.warn('Caption engine status is not stopped, cannot start')
return
}
if(!this.getApp()){ return }
@@ -90,12 +91,12 @@ export class CaptionEngine {
}
catch (e) {
controlWindow.sendErrorMessage(i18n('engine.start.error') + e)
console.error('[ERROR] Error starting subprocess:', e)
Log.error('Error starting engine:', e)
return
}
this.processStatus = 'running'
console.log('[INFO] Caption Engine Started, PID:', this.process.pid)
Log.info('Caption Engine Started, PID:', this.process.pid)
allConfig.controls.engineEnabled = true
if(controlWindow.window){
@@ -111,27 +112,23 @@ export class CaptionEngine {
lines.forEach((line: string) => {
if (line.trim()) {
try {
const caption = JSON.parse(line);
if(caption.index === undefined) {
console.log('[INFO] Engine Bad Output:', caption);
}
else allConfig.updateCaptionLog(caption);
const data_obj = JSON.parse(line)
handleEngineData(data_obj)
} catch (e) {
controlWindow.sendErrorMessage(i18n('engine.output.parse.error') + e)
console.error('[ERROR] Error parsing JSON:', e);
Log.error('Error parsing JSON:', e)
}
}
});
});
this.process.stderr.on('data', (data) => {
this.process.stderr.on('data', (data: any) => {
if(this.processStatus === 'stopping') return
controlWindow.sendErrorMessage(i18n('engine.error') + data)
console.error(`[ERROR] Subprocess Error: ${data}`);
Log.error(`Engine Error: ${data}`);
});
this.process.on('close', (code: any) => {
console.log(`[INFO] Subprocess exited with code ${code}`);
this.process = undefined;
allConfig.controls.engineEnabled = false
if(controlWindow.window){
@@ -139,14 +136,14 @@ export class CaptionEngine {
controlWindow.window.webContents.send('control.engine.stopped')
}
this.processStatus = 'stopped'
console.log('[INFO] Caption engine process stopped')
Log.info(`Engine exited with code ${code}`)
});
}
public stop() {
if(this.processStatus !== 'running') return
if (this.process.pid) {
console.log('[INFO] Trying to stop process, PID:', this.process.pid)
Log.info('Trying to stop process, PID:', this.process.pid)
let cmd = `kill ${this.process.pid}`;
if (process.platform === "win32") {
cmd = `taskkill /pid ${this.process.pid} /t /f`
@@ -154,7 +151,7 @@ export class CaptionEngine {
exec(cmd, (error) => {
if (error) {
controlWindow.sendErrorMessage(i18n('engine.shutdown.error') + error)
console.error(`[ERROR] Failed to kill process: ${error}`)
Log.error(`Failed to kill process: ${error}`)
}
})
}
@@ -166,11 +163,26 @@ export class CaptionEngine {
controlWindow.window.webContents.send('control.engine.stopped')
}
this.processStatus = 'stopped'
console.log('[INFO] Process PID undefined, caption engine process stopped')
Log.info('Process PID undefined, caption engine process stopped')
return
}
this.processStatus = 'stopping'
console.log('[INFO] Caption engine process stopping')
Log.info('Caption engine process stopping')
}
}
function handleEngineData(data: any) {
if(data.command === 'caption') {
allConfig.updateCaptionLog(data);
}
else if(data.command === 'print') {
Log.info('Engine print:', data.content)
}
else if(data.command === 'info') {
Log.info('Engine info:', data.content)
}
else if(data.command === 'usage') {
Log.info('Caption engine usage: ', data.content)
}
}

21
src/main/utils/Log.ts Normal file
View File

@@ -0,0 +1,21 @@
function getTimeString() {
const now = new Date()
const HH = String(now.getHours()).padStart(2, '0')
const MM = String(now.getMinutes()).padStart(2, '0')
const SS = String(now.getSeconds()).padStart(2, '0')
return `${HH}:${MM}:${SS}`
}
export class Log {
static info(...msg: any[]){
console.log(`[INFO ${getTimeString()}]`, ...msg)
}
static warn(...msg: any[]){
console.log(`[WARN ${getTimeString()}]`, ...msg)
}
static error(...msg: any[]){
console.log(`[ERROR ${getTimeString()}]`, ...msg)
}
}

View File

@@ -174,6 +174,12 @@ const columns = [
dataIndex: 'index',
key: 'index',
width: 80,
sorter: (a: CaptionItem, b: CaptionItem) => {
if(a.index <= b.index) return -1
return 1
},
sortDirections: ['descend'],
defaultSortOrder: 'descend',
},
{
title: 'time',
@@ -184,8 +190,7 @@ const columns = [
if(a.time_s <= b.time_s) return -1
return 1
},
sortDirections: ['descend'],
defaultSortOrder: 'descend',
sortDirections: ['descend', 'ascend'],
},
{
title: 'content',