diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index cbdcae4..09f264c 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -153,4 +153,18 @@ ### 优化体验 - 优化软件用户界面的部分组件 -- 更清晰的日志输出 \ No newline at end of file +- 更清晰的日志输出 + + +## v0.8.0 + +2025-09-?? + +### 新增功能 + +- 字幕引擎添加超时关闭功能:如果在规定时间字幕引擎没有启动成功会自动关闭、在字幕引擎启动过程中也可选择关闭字幕引擎 +- 添加非实时翻译功能:支持调用 Ollama 本地模型进行翻译、支持调用 Google 翻译 API 进行翻译 + +### 优化体验 + +- 带有额外信息的标签颜色改为与主题色一致 \ No newline at end of file diff --git a/docs/api-docs/caption-engine.md b/docs/api-docs/caption-engine.md index 3c03335..be2f0ad 100644 --- a/docs/api-docs/caption-engine.md +++ b/docs/api-docs/caption-engine.md @@ -92,6 +92,17 @@ Python 端监听到的音频流转换为的字幕数据。 Python 端打印的提示信息,会计入日志。 +### `warn` + +```js +{ + command: "warn", + content: string +} +``` + +Python 端打印的警告信息,会计入日志。 + ### `error` ```js @@ -101,7 +112,7 @@ Python 端打印的提示信息,会计入日志。 } ``` -Python 端打印的错误信息,该错误信息需要在前端弹窗显示。 +Python 端打印的错误信息,该错误信息会在前端弹窗显示。 ### `usage` diff --git a/engine/audio2text/vosk.py b/engine/audio2text/vosk.py index 0355adb..850a85d 100644 --- a/engine/audio2text/vosk.py +++ b/engine/audio2text/vosk.py @@ -4,7 +4,7 @@ import time from datetime import datetime from vosk import Model, KaldiRecognizer, SetLogLevel -from utils import stdout_cmd, stdout_obj, google_translate +from utils import stdout_cmd, stdout_obj, google_translate, ollama_translate class VoskRecognizer: @@ -14,8 +14,10 @@ class VoskRecognizer: 初始化参数: model_path: Vosk 识别模型路径 target: 翻译目标语言 + trans_model: 翻译模型名称 + ollama_name: Ollama 模型名称 """ - def __init__(self, model_path: str, target: str | None): + def __init__(self, model_path: str, target: str | None, trans_model: str, ollama_name: str): SetLogLevel(-1) if model_path.startswith('"'): model_path = model_path[1:] @@ -23,8 +25,12 @@ class VoskRecognizer: model_path = model_path[:-1] self.model_path = model_path self.target = target + if trans_model == 'google': + self.trans_func = google_translate + else: + self.trans_func = ollama_translate + self.ollama_name = ollama_name self.time_str = '' - self.trans_time = time.time() self.cur_id = 0 self.prev_content = '' @@ -58,8 +64,8 @@ class VoskRecognizer: if self.target: self.trans_time = time.time() th = threading.Thread( - target=google_translate, - args=(caption['text'], self.target, self.time_str) + target=self.trans_func, + args=(self.ollama_name, self.target, caption['text'], self.time_str) ) th.start() else: @@ -75,13 +81,6 @@ class VoskRecognizer: self.prev_content = content stdout_obj(caption) - if self.target and time.time() - self.trans_time > 2.0: - self.trans_time = time.time() - th = threading.Thread( - target=google_translate, - args=(caption['text'], self.target, self.time_str) - ) - th.start() def stop(self): """停止 Vosk 引擎""" diff --git a/engine/main.py b/engine/main.py index 836f5b6..dd01e7c 100644 --- a/engine/main.py +++ b/engine/main.py @@ -44,10 +44,13 @@ def main_gummy(s: str, t: str, a: int, c: int, k: str): engine.stop() -def main_vosk(a: int, c: int, m: str, t: str): +def main_vosk(a: int, c: int, m: str, t: str, tm: str, on: str): global thread_data stream = AudioStream(a, c) - engine = VoskRecognizer(m, None if t == 'none' else t) + engine = VoskRecognizer( + m, None if t == 'none' else t, + tm, on + ) stream.open_stream() engine.start() @@ -78,6 +81,8 @@ if __name__ == "__main__": parser.add_argument('-k', '--api_key', default='', help='API KEY for Gummy model') # vosk only parser.add_argument('-m', '--model_path', default='', help='The path to the vosk model.') + parser.add_argument('-tm', '--translation_model', default='', help='Google translate API KEY') + parser.add_argument('-on', '--ollama_name', default='', help='Ollama model name for translation') args = parser.parse_args() if int(args.port) == 0: @@ -98,7 +103,9 @@ if __name__ == "__main__": int(args.audio_type), int(args.chunk_rate), args.model_path, - args.target_language + args.target_language, + args.translation_model, + args.ollama_name ) else: raise ValueError('Invalid caption engine specified.') diff --git a/engine/utils/translation.py b/engine/utils/translation.py index d45e2bf..33d0c6f 100644 --- a/engine/utils/translation.py +++ b/engine/utils/translation.py @@ -2,7 +2,7 @@ from ollama import chat from ollama import ChatResponse import asyncio from googletrans import Translator -from .sysout import stdout, stdout_obj +from .sysout import stdout_cmd, stdout_obj lang_map = { 'en': 'English', @@ -13,38 +13,29 @@ lang_map = { 'ru': 'Russian', 'ja': 'Japanese', 'ko': 'Korean', - 'zh': 'Chinese' + 'zh-cn': 'Chinese' } -def ollama_translate(model: str, target: str, text: str, chunk_size = 3): - stream = chat( +def ollama_translate(model: str, target: str, text: str, time_s: str): + response: ChatResponse = chat( model=model, messages=[ {"role": "system", "content": f"/no_think Translate the following content into {lang_map[target]}, and do not output any additional information."}, {"role": "user", "content": text} - ], - stream=True + ] ) - chunk_content = "" - in_thinking = False - count = 0 - for chunk in stream: - if count == 0 and chunk['message']['content'].startswith(""): - in_thinking = True - if in_thinking: - if "" in chunk['message']['content']: - in_thinking = False - continue - chunk_content += ' '.join(chunk['message']['content'].split('\n')) - count += 1 - if count % chunk_size == 0: - print(chunk_content, end='') - chunk_content = "" - count = 0 - if chunk_content: - print(chunk_content) + content = response.message.content or "" + if content.startswith(''): + index = content.find('') + if index != -1: + content = content[index+8:] + stdout_obj({ + "command": "translation", + "time_s": time_s, + "translation": content.strip() + }) -def google_translate(text: str, target: str, time_s: str): +def google_translate(model: str, target: str, text: str, time_s: str): translator = Translator() try: res = asyncio.run(translator.translate(text, dest=target)) @@ -54,4 +45,4 @@ def google_translate(text: str, target: str, time_s: str): "translation": res.text }) except Exception as e: - stdout(f"Google Translation Request failed: {str(e)}") + stdout_cmd("warn", f"Google translation request failed, please check your network connection...") diff --git a/src/main/types/index.ts b/src/main/types/index.ts index d1d2382..b44fa2e 100644 --- a/src/main/types/index.ts +++ b/src/main/types/index.ts @@ -6,6 +6,8 @@ export interface Controls { engineEnabled: boolean, sourceLang: string, targetLang: string, + transModel: string, + ollamaName: string, engine: string, audio: 0 | 1, translation: boolean, @@ -46,11 +48,6 @@ export interface CaptionItem { translation: string } -export interface CaptionTranslation { - time_s: string, - translation: string -} - export interface SoftwareLogItem { type: "INFO" | "WARN" | "ERROR", index: number, diff --git a/src/main/utils/AllConfig.ts b/src/main/utils/AllConfig.ts index 4049c7f..567d6b0 100644 --- a/src/main/utils/AllConfig.ts +++ b/src/main/utils/AllConfig.ts @@ -1,13 +1,17 @@ import { UILanguage, UITheme, Styles, Controls, - CaptionItem, CaptionTranslation, - FullConfig, SoftwareLogItem + CaptionItem, FullConfig, SoftwareLogItem } from '../types' import { Log } from './Log' import { app, BrowserWindow } from 'electron' import * as path from 'path' import * as fs from 'fs' +interface CaptionTranslation { + time_s: string, + translation: string +} + const defaultStyles: Styles = { lineBreak: 1, fontFamily: 'sans-serif', @@ -32,6 +36,8 @@ const defaultStyles: Styles = { const defaultControls: Controls = { sourceLang: 'en', targetLang: 'zh', + transModel: 'ollama', + ollamaName: '', engine: 'gummy', audio: 0, engineEnabled: false, diff --git a/src/main/utils/CaptionEngine.ts b/src/main/utils/CaptionEngine.ts index 9431a01..516f0ae 100644 --- a/src/main/utils/CaptionEngine.ts +++ b/src/main/utils/CaptionEngine.ts @@ -81,7 +81,9 @@ export class CaptionEngine { } else if(allConfig.controls.engine === 'vosk'){ this.command.push('-e', 'vosk') - this.command.push('-m', `"${allConfig.controls.modelPath}"`) + this.command.push('-m', `"${allConfig.controls.modelPath}"`) + this.command.push('-tm', allConfig.controls.transModel) + this.command.push('-on', allConfig.controls.ollamaName) } } Log.info('Engine Path:', this.appPath) @@ -257,6 +259,9 @@ function handleEngineData(data: any) { else if(data.command === 'info') { Log.info('Engine Info:', data.content) } + else if(data.command === 'warn') { + Log.warn('Engine Warn:', data.content) + } else if(data.command === 'error') { Log.error('Engine Error:', data.content) controlWindow.sendErrorMessage(/*i18n('engine.error') +*/ data.content) diff --git a/src/renderer/src/components/EngineControl.vue b/src/renderer/src/components/EngineControl.vue index 7f7e2cf..fa19041 100644 --- a/src/renderer/src/components/EngineControl.vue +++ b/src/renderer/src/components/EngineControl.vue @@ -5,6 +5,14 @@ {{ $t('engine.applyChange') }} | {{ $t('engine.cancelChange') }} +
+ {{ $t('engine.captionEngine') }} + +
{{ $t('engine.sourceLang') }}
-
- {{ $t('engine.captionEngine') }} +
+ {{ $t('engine.transModel') }}
+
+ + + {{ $t('engine.ollama') }} + + +
{{ $t('engine.audioType') }}
- + - {{ $t('engine.apikey') }} + {{ $t('engine.apikey') }}
- + - {{ $t('engine.modelPath') }} + {{ $t('engine.modelPath') }}
- + {{ $t('engine.startTimeout') }}