mirror of
https://github.com/HiMeditator/auto-caption.git
synced 2026-02-04 04:14:42 +08:00
feat(engine): 优化字幕引擎输出格式、准备合并两个字幕引擎
- 重构字幕引擎相关代码 - 准备合并两个字幕引擎
This commit is contained in:
@@ -32,7 +32,19 @@ Python 进程标准输出 (`sys.stdout`) 的内容一定为一行一行的字符
|
||||
}
|
||||
```
|
||||
|
||||
Python 端打印的提示信息。
|
||||
Python 端打印的提示信息,比起 `print`,该信息更希望 Electron 端的关注。
|
||||
|
||||
### `usage`
|
||||
|
||||
```js
|
||||
{
|
||||
command: "usage",
|
||||
content: string
|
||||
}
|
||||
```
|
||||
|
||||
Gummy 字幕引擎结束时打印计费消耗信息。
|
||||
|
||||
|
||||
### `caption`
|
||||
|
||||
@@ -42,7 +54,6 @@ Python 端打印的提示信息。
|
||||
index: number,
|
||||
time_s: string,
|
||||
time_t: string,
|
||||
end: boolean,
|
||||
text: string,
|
||||
translation: string
|
||||
}
|
||||
|
||||
@@ -15,18 +15,20 @@ class Callback(TranslationRecognizerCallback):
|
||||
"""
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.index = 0
|
||||
self.usage = 0
|
||||
self.cur_id = -1
|
||||
self.index = 0
|
||||
self.time_str = ''
|
||||
|
||||
def on_open(self) -> None:
|
||||
self.usage = 0
|
||||
self.cur_id = -1
|
||||
self.time_str = ''
|
||||
stdout_cmd('info', 'Gummy translator started.')
|
||||
|
||||
def on_close(self) -> None:
|
||||
stdout_cmd('info', 'Gummy translator closed.')
|
||||
stdout_cmd('usage', str(self.usage))
|
||||
|
||||
def on_event(
|
||||
self,
|
||||
@@ -46,7 +48,6 @@ class Callback(TranslationRecognizerCallback):
|
||||
caption['index'] = self.index
|
||||
caption['time_s'] = self.time_str
|
||||
caption['time_t'] = datetime.now().strftime('%H:%M:%S.%f')[:-3]
|
||||
caption['end'] = transcription_result.is_sentence_end
|
||||
caption['text'] = transcription_result.text
|
||||
caption['translation'] = ""
|
||||
|
||||
@@ -57,7 +58,8 @@ class Callback(TranslationRecognizerCallback):
|
||||
if usage:
|
||||
self.usage += usage['duration']
|
||||
|
||||
stdout_obj(caption)
|
||||
if 'text' in caption:
|
||||
stdout_obj(caption)
|
||||
|
||||
|
||||
class GummyTranslator:
|
||||
@@ -88,7 +90,7 @@ class GummyTranslator:
|
||||
self.translator.start()
|
||||
|
||||
def send_audio_frame(self, data):
|
||||
"""发送音频帧"""
|
||||
"""发送音频帧,擎将自动识别并将识别结果输出到标准输出中"""
|
||||
self.translator.send_audio_frame(data)
|
||||
|
||||
def stop(self):
|
||||
|
||||
59
engine/audio2text/vosk.py
Normal file
59
engine/audio2text/vosk.py
Normal file
@@ -0,0 +1,59 @@
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
from vosk import Model, KaldiRecognizer, SetLogLevel
|
||||
from utils import stdout_obj
|
||||
|
||||
class VoskRecognizer:
|
||||
"""
|
||||
使用 Vosk 引擎流式处理的音频数据,并在标准输出中输出与 Auto Caption 软件可读取的 JSON 字符串数据
|
||||
|
||||
初始化参数:
|
||||
model_path: Vosk 识别模型路径
|
||||
"""
|
||||
def __int__(self, model_path: str):
|
||||
SetLogLevel(-1)
|
||||
if model_path.startswith('"'):
|
||||
model_path = model_path[1:]
|
||||
if model_path.endswith('"'):
|
||||
model_path = model_path[:-1]
|
||||
self.model_path = model_path
|
||||
self.time_str = ''
|
||||
self.cur_id = 0
|
||||
self.prev_content = ''
|
||||
|
||||
self.model = Model(self.model_path)
|
||||
self.recognizer = KaldiRecognizer(self.model, 16000)
|
||||
|
||||
def send_audio_frame(self, data: bytes):
|
||||
"""
|
||||
发送音频帧给 Vosk 引擎,引擎将自动识别并将识别结果输出到标准输出中
|
||||
|
||||
Args:
|
||||
data: 音频帧数据,采样率必须为 16000Hz
|
||||
"""
|
||||
caption = {}
|
||||
caption['command'] = 'caption'
|
||||
caption['translation'] = ''
|
||||
|
||||
if self.recognizer.AcceptWaveform(data):
|
||||
content = json.loads(self.recognizer.Result()).get('text', '')
|
||||
caption['index'] = self.cur_id
|
||||
caption['text'] = content
|
||||
caption['time_s'] = self.time_str
|
||||
caption['time_t'] = datetime.now().strftime('%H:%M:%S.%f')[:-3]
|
||||
self.prev_content = ''
|
||||
self.cur_id += 1
|
||||
else:
|
||||
content = json.loads(self.recognizer.PartialResult()).get('partial', '')
|
||||
if content == '' or content == self.prev_content:
|
||||
return
|
||||
if self.prev_content == '':
|
||||
self.time_str = datetime.now().strftime('%H:%M:%S.%f')[:-3]
|
||||
caption['index'] = self.cur_id
|
||||
caption['text'] = content
|
||||
caption['time_s'] = self.time_str
|
||||
caption['time_t'] = datetime.now().strftime('%H:%M:%S.%f')[:-3]
|
||||
self.prev_content = content
|
||||
|
||||
stdout_obj(caption)
|
||||
@@ -49,6 +49,7 @@ def convert_audio_to_text(audio_type, chunk_rate, model_path):
|
||||
continue
|
||||
if prev_content == '':
|
||||
time_str = datetime.now().strftime('%H:%M:%S.%f')[:-3]
|
||||
caption['command'] = 'caption'
|
||||
caption['index'] = cur_id
|
||||
caption['text'] = content
|
||||
caption['time_s'] = time_str
|
||||
|
||||
37
engine/main.py
Normal file
37
engine/main.py
Normal file
@@ -0,0 +1,37 @@
|
||||
import argparse
|
||||
|
||||
def gummy_engine(s, t, a, c, k):
|
||||
pass
|
||||
|
||||
def vosk_engine(a, c, m):
|
||||
pass
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description='Convert system audio stream to text')
|
||||
# both
|
||||
parser.add_argument('-e', '--caption_engine', default='gummy', help='Caption engine: gummy or vosk')
|
||||
parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output, 1 for input')
|
||||
parser.add_argument('-c', '--chunk_rate', default=20, help='Number of audio stream chunks collected per second')
|
||||
# gummy
|
||||
parser.add_argument('-s', '--source_language', default='en', help='Source language code')
|
||||
parser.add_argument('-t', '--target_language', default='zh', help='Target language code')
|
||||
parser.add_argument('-k', '--api_key', default='', help='API KEY for Gummy model')
|
||||
# vosk
|
||||
parser.add_argument('-m', '--model_path', default='', help='The path to the vosk model.')
|
||||
args = parser.parse_args()
|
||||
if args.caption_engine == 'gummy':
|
||||
gummy_engine(
|
||||
args.source_language,
|
||||
args.target_language,
|
||||
int(args.audio_type),
|
||||
int(args.chunk_rate),
|
||||
args.api_key
|
||||
)
|
||||
elif args.caption_engine == 'vosk':
|
||||
vosk_engine(
|
||||
int(args.audio_type),
|
||||
int(args.chunk_rate),
|
||||
args.model_path
|
||||
)
|
||||
else:
|
||||
raise ValueError('Invalid caption engine specified.')
|
||||
8
package-lock.json
generated
8
package-lock.json
generated
@@ -22,6 +22,7 @@
|
||||
"@electron-toolkit/eslint-config-ts": "^3.0.0",
|
||||
"@electron-toolkit/tsconfig": "^1.0.1",
|
||||
"@types/node": "^22.14.1",
|
||||
"@types/pidusage": "^2.0.5",
|
||||
"@vitejs/plugin-vue": "^5.2.3",
|
||||
"electron": "^35.1.5",
|
||||
"electron-builder": "^25.1.8",
|
||||
@@ -2296,6 +2297,13 @@
|
||||
"undici-types": "~6.21.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/pidusage": {
|
||||
"version": "2.0.5",
|
||||
"resolved": "https://registry.npmmirror.com/@types/pidusage/-/pidusage-2.0.5.tgz",
|
||||
"integrity": "sha512-MIiyZI4/MK9UGUXWt0jJcCZhVw7YdhBuTOuqP/BjuLDLZ2PmmViMIQgZiWxtaMicQfAz/kMrZ5T7PKxFSkTeUA==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/plist": {
|
||||
"version": "3.0.5",
|
||||
"resolved": "https://registry.npmmirror.com/@types/plist/-/plist-3.0.5.tgz",
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
"typecheck:web": "vue-tsc --noEmit -p tsconfig.web.json --composite false",
|
||||
"typecheck": "npm run typecheck:node && npm run typecheck:web",
|
||||
"start": "electron-vite preview",
|
||||
"dev": "electron-vite dev",
|
||||
"dev": "chcp 65001 && electron-vite dev",
|
||||
"build": "npm run typecheck && electron-vite build",
|
||||
"postinstall": "electron-builder install-app-deps",
|
||||
"build:unpack": "npm run build && electron-builder --dir",
|
||||
@@ -35,6 +35,7 @@
|
||||
"@electron-toolkit/eslint-config-ts": "^3.0.0",
|
||||
"@electron-toolkit/tsconfig": "^1.0.1",
|
||||
"@types/node": "^22.14.1",
|
||||
"@types/pidusage": "^2.0.5",
|
||||
"@vitejs/plugin-vue": "^5.2.3",
|
||||
"electron": "^35.1.5",
|
||||
"electron-builder": "^25.1.8",
|
||||
|
||||
@@ -2,6 +2,7 @@ import {
|
||||
UILanguage, UITheme, Styles, Controls,
|
||||
CaptionItem, FullConfig
|
||||
} from '../types'
|
||||
import { Log } from './Log'
|
||||
import { app, BrowserWindow } from 'electron'
|
||||
import * as path from 'path'
|
||||
import * as fs from 'fs'
|
||||
@@ -48,6 +49,7 @@ class AllConfig {
|
||||
uiTheme: UITheme = 'system';
|
||||
styles: Styles = {...defaultStyles};
|
||||
controls: Controls = {...defaultControls};
|
||||
lastLogIndex: number = -1;
|
||||
captionLog: CaptionItem[] = [];
|
||||
|
||||
constructor() {}
|
||||
@@ -61,7 +63,7 @@ class AllConfig {
|
||||
if(config.leftBarWidth) this.leftBarWidth = config.leftBarWidth
|
||||
if(config.styles) this.setStyles(config.styles)
|
||||
if(config.controls) this.setControls(config.controls)
|
||||
console.log('[INFO] Read Config from:', configPath)
|
||||
Log.info('Read Config from:', configPath)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -75,7 +77,7 @@ class AllConfig {
|
||||
}
|
||||
const configPath = path.join(app.getPath('userData'), 'config.json')
|
||||
fs.writeFileSync(configPath, JSON.stringify(config, null, 2))
|
||||
console.log('[INFO] Write Config to:', configPath)
|
||||
Log.info('Write Config to:', configPath)
|
||||
}
|
||||
|
||||
public getFullConfig(): FullConfig {
|
||||
@@ -96,7 +98,7 @@ class AllConfig {
|
||||
this.styles[key] = args[key]
|
||||
}
|
||||
}
|
||||
console.log('[INFO] Set Styles:', this.styles)
|
||||
Log.info('Set Styles:', this.styles)
|
||||
}
|
||||
|
||||
public resetStyles() {
|
||||
@@ -105,7 +107,7 @@ class AllConfig {
|
||||
|
||||
public sendStyles(window: BrowserWindow) {
|
||||
window.webContents.send('both.styles.set', this.styles)
|
||||
console.log(`[INFO] Send Styles to #${window.id}:`, this.styles)
|
||||
Log.info(`Send Styles to #${window.id}:`, this.styles)
|
||||
}
|
||||
|
||||
public setControls(args: Object) {
|
||||
@@ -116,27 +118,28 @@ class AllConfig {
|
||||
}
|
||||
}
|
||||
this.controls.engineEnabled = engineEnabled
|
||||
console.log('[INFO] Set Controls:', this.controls)
|
||||
Log.info('Set Controls:', this.controls)
|
||||
}
|
||||
|
||||
public sendControls(window: BrowserWindow) {
|
||||
window.webContents.send('control.controls.set', this.controls)
|
||||
console.log(`[INFO] Send Controls to #${window.id}:`, this.controls)
|
||||
Log.info(`Send Controls to #${window.id}:`, this.controls)
|
||||
}
|
||||
|
||||
public updateCaptionLog(log: CaptionItem) {
|
||||
let command: 'add' | 'upd' = 'add'
|
||||
if(
|
||||
this.captionLog.length &&
|
||||
this.captionLog[this.captionLog.length - 1].index === log.index &&
|
||||
this.captionLog[this.captionLog.length - 1].time_s === log.time_s
|
||||
this.lastLogIndex === log.index
|
||||
) {
|
||||
this.captionLog.splice(this.captionLog.length - 1, 1, log)
|
||||
command = 'upd'
|
||||
}
|
||||
else {
|
||||
this.captionLog.push(log)
|
||||
this.lastLogIndex = log.index
|
||||
}
|
||||
this.captionLog[this.captionLog.length - 1].index = this.captionLog.length
|
||||
for(const window of BrowserWindow.getAllWindows()){
|
||||
this.sendCaptionLog(window, command)
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import path from 'path'
|
||||
import { controlWindow } from '../ControlWindow'
|
||||
import { allConfig } from './AllConfig'
|
||||
import { i18n } from '../i18n'
|
||||
import { Log } from './Log'
|
||||
|
||||
export class CaptionEngine {
|
||||
appPath: string = ''
|
||||
@@ -14,7 +15,7 @@ export class CaptionEngine {
|
||||
|
||||
private getApp(): boolean {
|
||||
if (allConfig.controls.customized && allConfig.controls.customizedApp) {
|
||||
console.log('[INFO] Using customized engine')
|
||||
Log.info('Using customized engine')
|
||||
this.appPath = allConfig.controls.customizedApp
|
||||
this.command = allConfig.controls.customizedCommand.split(' ')
|
||||
}
|
||||
@@ -25,9 +26,7 @@ export class CaptionEngine {
|
||||
return false
|
||||
}
|
||||
let gummyName = 'main-gummy'
|
||||
if (process.platform === 'win32') {
|
||||
gummyName += '.exe'
|
||||
}
|
||||
if (process.platform === 'win32') { gummyName += '.exe' }
|
||||
this.command = []
|
||||
if (is.dev) {
|
||||
this.appPath = path.join(
|
||||
@@ -56,31 +55,33 @@ export class CaptionEngine {
|
||||
else if(allConfig.controls.engine === 'vosk'){
|
||||
allConfig.controls.customized = false
|
||||
let voskName = 'main-vosk'
|
||||
if (process.platform === 'win32') {
|
||||
voskName += '.exe'
|
||||
}
|
||||
if (process.platform === 'win32') { voskName += '.exe' }
|
||||
this.command = []
|
||||
if (is.dev) {
|
||||
this.appPath = path.join(
|
||||
app.getAppPath(),
|
||||
'engine', 'dist', voskName
|
||||
app.getAppPath(), 'engine',
|
||||
'subenv', 'Scripts', 'python.exe'
|
||||
)
|
||||
this.command.push(path.join(
|
||||
app.getAppPath(), 'engine', 'main-vosk.py'
|
||||
))
|
||||
}
|
||||
else {
|
||||
this.appPath = path.join(
|
||||
process.resourcesPath, 'engine', voskName
|
||||
)
|
||||
}
|
||||
this.command = []
|
||||
this.command.push('-a', allConfig.controls.audio ? '1' : '0')
|
||||
this.command.push('-m', `"${allConfig.controls.modelPath}"`)
|
||||
}
|
||||
console.log('[INFO] Engine Path:', this.appPath)
|
||||
console.log('[INFO] Engine Command:', this.command)
|
||||
Log.info('Engine Path:', this.appPath)
|
||||
Log.info('Engine Command:', this.command)
|
||||
return true
|
||||
}
|
||||
|
||||
public start() {
|
||||
if (this.processStatus !== 'stopped') {
|
||||
Log.warn('Caption engine status is not stopped, cannot start')
|
||||
return
|
||||
}
|
||||
if(!this.getApp()){ return }
|
||||
@@ -90,12 +91,12 @@ export class CaptionEngine {
|
||||
}
|
||||
catch (e) {
|
||||
controlWindow.sendErrorMessage(i18n('engine.start.error') + e)
|
||||
console.error('[ERROR] Error starting subprocess:', e)
|
||||
Log.error('Error starting engine:', e)
|
||||
return
|
||||
}
|
||||
|
||||
this.processStatus = 'running'
|
||||
console.log('[INFO] Caption Engine Started, PID:', this.process.pid)
|
||||
Log.info('Caption Engine Started, PID:', this.process.pid)
|
||||
|
||||
allConfig.controls.engineEnabled = true
|
||||
if(controlWindow.window){
|
||||
@@ -111,27 +112,23 @@ export class CaptionEngine {
|
||||
lines.forEach((line: string) => {
|
||||
if (line.trim()) {
|
||||
try {
|
||||
const caption = JSON.parse(line);
|
||||
if(caption.index === undefined) {
|
||||
console.log('[INFO] Engine Bad Output:', caption);
|
||||
}
|
||||
else allConfig.updateCaptionLog(caption);
|
||||
const data_obj = JSON.parse(line)
|
||||
handleEngineData(data_obj)
|
||||
} catch (e) {
|
||||
controlWindow.sendErrorMessage(i18n('engine.output.parse.error') + e)
|
||||
console.error('[ERROR] Error parsing JSON:', e);
|
||||
Log.error('Error parsing JSON:', e)
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
this.process.stderr.on('data', (data) => {
|
||||
this.process.stderr.on('data', (data: any) => {
|
||||
if(this.processStatus === 'stopping') return
|
||||
controlWindow.sendErrorMessage(i18n('engine.error') + data)
|
||||
console.error(`[ERROR] Subprocess Error: ${data}`);
|
||||
Log.error(`Engine Error: ${data}`);
|
||||
});
|
||||
|
||||
this.process.on('close', (code: any) => {
|
||||
console.log(`[INFO] Subprocess exited with code ${code}`);
|
||||
this.process = undefined;
|
||||
allConfig.controls.engineEnabled = false
|
||||
if(controlWindow.window){
|
||||
@@ -139,14 +136,14 @@ export class CaptionEngine {
|
||||
controlWindow.window.webContents.send('control.engine.stopped')
|
||||
}
|
||||
this.processStatus = 'stopped'
|
||||
console.log('[INFO] Caption engine process stopped')
|
||||
Log.info(`Engine exited with code ${code}`)
|
||||
});
|
||||
}
|
||||
|
||||
public stop() {
|
||||
if(this.processStatus !== 'running') return
|
||||
if (this.process.pid) {
|
||||
console.log('[INFO] Trying to stop process, PID:', this.process.pid)
|
||||
Log.info('Trying to stop process, PID:', this.process.pid)
|
||||
let cmd = `kill ${this.process.pid}`;
|
||||
if (process.platform === "win32") {
|
||||
cmd = `taskkill /pid ${this.process.pid} /t /f`
|
||||
@@ -154,7 +151,7 @@ export class CaptionEngine {
|
||||
exec(cmd, (error) => {
|
||||
if (error) {
|
||||
controlWindow.sendErrorMessage(i18n('engine.shutdown.error') + error)
|
||||
console.error(`[ERROR] Failed to kill process: ${error}`)
|
||||
Log.error(`Failed to kill process: ${error}`)
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -166,11 +163,26 @@ export class CaptionEngine {
|
||||
controlWindow.window.webContents.send('control.engine.stopped')
|
||||
}
|
||||
this.processStatus = 'stopped'
|
||||
console.log('[INFO] Process PID undefined, caption engine process stopped')
|
||||
Log.info('Process PID undefined, caption engine process stopped')
|
||||
return
|
||||
}
|
||||
this.processStatus = 'stopping'
|
||||
console.log('[INFO] Caption engine process stopping')
|
||||
Log.info('Caption engine process stopping')
|
||||
}
|
||||
}
|
||||
|
||||
function handleEngineData(data: any) {
|
||||
if(data.command === 'caption') {
|
||||
allConfig.updateCaptionLog(data);
|
||||
}
|
||||
else if(data.command === 'print') {
|
||||
Log.info('Engine print:', data.content)
|
||||
}
|
||||
else if(data.command === 'info') {
|
||||
Log.info('Engine info:', data.content)
|
||||
}
|
||||
else if(data.command === 'usage') {
|
||||
Log.info('Caption engine usage: ', data.content)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
21
src/main/utils/Log.ts
Normal file
21
src/main/utils/Log.ts
Normal file
@@ -0,0 +1,21 @@
|
||||
function getTimeString() {
|
||||
const now = new Date()
|
||||
const HH = String(now.getHours()).padStart(2, '0')
|
||||
const MM = String(now.getMinutes()).padStart(2, '0')
|
||||
const SS = String(now.getSeconds()).padStart(2, '0')
|
||||
return `${HH}:${MM}:${SS}`
|
||||
}
|
||||
|
||||
export class Log {
|
||||
static info(...msg: any[]){
|
||||
console.log(`[INFO ${getTimeString()}]`, ...msg)
|
||||
}
|
||||
|
||||
static warn(...msg: any[]){
|
||||
console.log(`[WARN ${getTimeString()}]`, ...msg)
|
||||
}
|
||||
|
||||
static error(...msg: any[]){
|
||||
console.log(`[ERROR ${getTimeString()}]`, ...msg)
|
||||
}
|
||||
}
|
||||
@@ -174,6 +174,12 @@ const columns = [
|
||||
dataIndex: 'index',
|
||||
key: 'index',
|
||||
width: 80,
|
||||
sorter: (a: CaptionItem, b: CaptionItem) => {
|
||||
if(a.index <= b.index) return -1
|
||||
return 1
|
||||
},
|
||||
sortDirections: ['descend'],
|
||||
defaultSortOrder: 'descend',
|
||||
},
|
||||
{
|
||||
title: 'time',
|
||||
@@ -184,8 +190,7 @@ const columns = [
|
||||
if(a.time_s <= b.time_s) return -1
|
||||
return 1
|
||||
},
|
||||
sortDirections: ['descend'],
|
||||
defaultSortOrder: 'descend',
|
||||
sortDirections: ['descend', 'ascend'],
|
||||
},
|
||||
{
|
||||
title: 'content',
|
||||
|
||||
Reference in New Issue
Block a user