diff --git a/README.md b/README.md index a9fd4ae..c8802a3 100644 --- a/README.md +++ b/README.md @@ -188,15 +188,3 @@ npm run build:mac # For Linux npm run build:linux ``` - -注意,根据不同的平台需要修改项目根目录下 `electron-builder.yml` 文件中的配置内容: - -```yml -extraResources: - # For Windows - - from: ./engine/dist/main.exe - to: ./engine/main.exe - # For macOS and Linux - # - from: ./engine/dist/main - # to: ./engine/main -``` diff --git a/README_en.md b/README_en.md index 0353bc8..38386e3 100644 --- a/README_en.md +++ b/README_en.md @@ -188,15 +188,3 @@ npm run build:mac # For Linux npm run build:linux ``` - -Note: You need to modify the configuration content in the `electron-builder.yml` file in the project root directory according to different platforms: - -```yml -extraResources: - # For Windows - - from: ./engine/dist/main.exe - to: ./engine/main.exe - # For macOS and Linux - # - from: ./engine/dist/main - # to: ./engine/main -``` \ No newline at end of file diff --git a/README_ja.md b/README_ja.md index 2c0de12..ef68759 100644 --- a/README_ja.md +++ b/README_ja.md @@ -188,15 +188,3 @@ npm run build:mac # Linux 用 npm run build:linux ``` - -注意: プラットフォームに応じて、プロジェクトルートディレクトリにある `electron-builder.yml` ファイルの設定内容を変更する必要があります: - -```yml -extraResources: - # Windows 用 - - from: ./engine/dist/main.exe - to: ./engine/main.exe - # macOS と Linux 用 - # - from: ./engine/dist/main - # to: ./engine/main -``` diff --git a/docs/api-docs/caption-engine.md b/docs/api-docs/caption-engine.md index 159ab25..3c03335 100644 --- a/docs/api-docs/caption-engine.md +++ b/docs/api-docs/caption-engine.md @@ -58,6 +58,18 @@ Electron 主进程通过 TCP Socket 向 Python 进程发送数据。发送的数 Python 端监听到的音频流转换为的字幕数据。 +### `translation` + +```js +{ + command: "translation", + time_s: string, + translation: string +} +``` + +语音识别的内容的翻译,可以根据起始时间确定对应的字幕。 + ### `print` ```js @@ -67,7 +79,7 @@ Python 端监听到的音频流转换为的字幕数据。 } ``` -输出 Python 端打印的内容。 +输出 Python 端打印的内容,不计入日志。 ### `info` @@ -78,7 +90,7 @@ Python 端监听到的音频流转换为的字幕数据。 } ``` -Python 端打印的提示信息,比起 `print`,该信息更希望 Electron 端的关注。 +Python 端打印的提示信息,会计入日志。 ### `error` diff --git a/electron-builder.yml b/electron-builder.yml index 12bd784..55b23d9 100644 --- a/electron-builder.yml +++ b/electron-builder.yml @@ -15,14 +15,13 @@ files: - '!assets/*' - '!.repomap/*' - '!.virtualme/*' - extraResources: # For Windows - from: ./engine/dist/main.exe to: ./engine/main.exe # For macOS and Linux - # - from: ./engine/dist/main - # to: ./engine/main + - from: ./engine/dist/main + to: ./engine/main win: executableName: auto-caption icon: build/icon.png diff --git a/engine/audio2text/vosk.py b/engine/audio2text/vosk.py index 402b7fd..0355adb 100644 --- a/engine/audio2text/vosk.py +++ b/engine/audio2text/vosk.py @@ -1,8 +1,10 @@ import json +import threading +import time from datetime import datetime from vosk import Model, KaldiRecognizer, SetLogLevel -from utils import stdout_cmd, stdout_obj +from utils import stdout_cmd, stdout_obj, google_translate class VoskRecognizer: @@ -11,15 +13,18 @@ class VoskRecognizer: 初始化参数: model_path: Vosk 识别模型路径 + target: 翻译目标语言 """ - def __init__(self, model_path: str): + def __init__(self, model_path: str, target: str | None): SetLogLevel(-1) if model_path.startswith('"'): model_path = model_path[1:] if model_path.endswith('"'): model_path = model_path[:-1] self.model_path = model_path + self.target = target self.time_str = '' + self.trans_time = time.time() self.cur_id = 0 self.prev_content = '' @@ -48,7 +53,15 @@ class VoskRecognizer: caption['time_s'] = self.time_str caption['time_t'] = datetime.now().strftime('%H:%M:%S.%f')[:-3] self.prev_content = '' + if content == '': return self.cur_id += 1 + if self.target: + self.trans_time = time.time() + th = threading.Thread( + target=google_translate, + args=(caption['text'], self.target, self.time_str) + ) + th.start() else: content = json.loads(self.recognizer.PartialResult()).get('partial', '') if content == '' or content == self.prev_content: @@ -62,6 +75,13 @@ class VoskRecognizer: self.prev_content = content stdout_obj(caption) + if self.target and time.time() - self.trans_time > 2.0: + self.trans_time = time.time() + th = threading.Thread( + target=google_translate, + args=(caption['text'], self.target, self.time_str) + ) + th.start() def stop(self): """停止 Vosk 引擎""" diff --git a/engine/main.py b/engine/main.py index cf6d512..836f5b6 100644 --- a/engine/main.py +++ b/engine/main.py @@ -44,10 +44,10 @@ def main_gummy(s: str, t: str, a: int, c: int, k: str): engine.stop() -def main_vosk(a: int, c: int, m: str): +def main_vosk(a: int, c: int, m: str, t: str): global thread_data stream = AudioStream(a, c) - engine = VoskRecognizer(m) + engine = VoskRecognizer(m, None if t == 'none' else t) stream.open_stream() engine.start() @@ -72,9 +72,9 @@ if __name__ == "__main__": parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output, 1 for input') parser.add_argument('-c', '--chunk_rate', default=10, help='Number of audio stream chunks collected per second') parser.add_argument('-p', '--port', default=8080, help='The port to run the server on, 0 for no server') + parser.add_argument('-t', '--target_language', default='zh', help='Target language code, "none" for no translation') # gummy only parser.add_argument('-s', '--source_language', default='en', help='Source language code') - parser.add_argument('-t', '--target_language', default='zh', help='Target language code') parser.add_argument('-k', '--api_key', default='', help='API KEY for Gummy model') # vosk only parser.add_argument('-m', '--model_path', default='', help='The path to the vosk model.') @@ -97,7 +97,8 @@ if __name__ == "__main__": main_vosk( int(args.audio_type), int(args.chunk_rate), - args.model_path + args.model_path, + args.target_language ) else: raise ValueError('Invalid caption engine specified.') diff --git a/engine/requirements.txt b/engine/requirements.txt index 894ed59..bbd3925 100644 --- a/engine/requirements.txt +++ b/engine/requirements.txt @@ -5,3 +5,5 @@ vosk pyinstaller pyaudio; sys_platform == 'darwin' pyaudiowpatch; sys_platform == 'win32' +googletrans +ollama diff --git a/engine/utils/__init__.py b/engine/utils/__init__.py index bb202b0..0e07ecf 100644 --- a/engine/utils/__init__.py +++ b/engine/utils/__init__.py @@ -6,4 +6,5 @@ from .audioprcs import ( ) from .sysout import stdout, stdout_err, stdout_cmd, stdout_obj, stderr from .thdata import thread_data -from .server import start_server \ No newline at end of file +from .server import start_server +from .translation import ollama_translate, google_translate \ No newline at end of file diff --git a/engine/utils/translation.py b/engine/utils/translation.py new file mode 100644 index 0000000..d45e2bf --- /dev/null +++ b/engine/utils/translation.py @@ -0,0 +1,57 @@ +from ollama import chat +from ollama import ChatResponse +import asyncio +from googletrans import Translator +from .sysout import stdout, stdout_obj + +lang_map = { + 'en': 'English', + 'es': 'Spanish', + 'fr': 'French', + 'de': 'German', + 'it': 'Italian', + 'ru': 'Russian', + 'ja': 'Japanese', + 'ko': 'Korean', + 'zh': 'Chinese' +} + +def ollama_translate(model: str, target: str, text: str, chunk_size = 3): + stream = chat( + model=model, + messages=[ + {"role": "system", "content": f"/no_think Translate the following content into {lang_map[target]}, and do not output any additional information."}, + {"role": "user", "content": text} + ], + stream=True + ) + chunk_content = "" + in_thinking = False + count = 0 + for chunk in stream: + if count == 0 and chunk['message']['content'].startswith(""): + in_thinking = True + if in_thinking: + if "" in chunk['message']['content']: + in_thinking = False + continue + chunk_content += ' '.join(chunk['message']['content'].split('\n')) + count += 1 + if count % chunk_size == 0: + print(chunk_content, end='') + chunk_content = "" + count = 0 + if chunk_content: + print(chunk_content) + +def google_translate(text: str, target: str, time_s: str): + translator = Translator() + try: + res = asyncio.run(translator.translate(text, dest=target)) + stdout_obj({ + "command": "translation", + "time_s": time_s, + "translation": res.text + }) + except Exception as e: + stdout(f"Google Translation Request failed: {str(e)}") diff --git a/src/main/types/index.ts b/src/main/types/index.ts index 9c31e33..d1d2382 100644 --- a/src/main/types/index.ts +++ b/src/main/types/index.ts @@ -46,6 +46,11 @@ export interface CaptionItem { translation: string } +export interface CaptionTranslation { + time_s: string, + translation: string +} + export interface SoftwareLogItem { type: "INFO" | "WARN" | "ERROR", index: number, diff --git a/src/main/utils/AllConfig.ts b/src/main/utils/AllConfig.ts index c730873..4049c7f 100644 --- a/src/main/utils/AllConfig.ts +++ b/src/main/utils/AllConfig.ts @@ -1,6 +1,7 @@ import { UILanguage, UITheme, Styles, Controls, - CaptionItem, FullConfig, SoftwareLogItem + CaptionItem, CaptionTranslation, + FullConfig, SoftwareLogItem } from '../types' import { Log } from './Log' import { app, BrowserWindow } from 'electron' @@ -158,12 +159,28 @@ class AllConfig { } } - public sendCaptionLog(window: BrowserWindow, command: 'add' | 'upd' | 'set') { + public updateCaptionTranslation(trans: CaptionTranslation){ + for(let i = this.captionLog.length - 1; i >= 0; i--){ + if(this.captionLog[i].time_s === trans.time_s){ + this.captionLog[i].translation = trans.translation + for(const window of BrowserWindow.getAllWindows()){ + this.sendCaptionLog(window, 'upd', i) + } + break + } + } + } + public sendCaptionLog( + window: BrowserWindow, + command: 'add' | 'upd' | 'set', + index: number | undefined = undefined + ) { if(command === 'add'){ - window.webContents.send(`both.captionLog.add`, this.captionLog[this.captionLog.length - 1]) + window.webContents.send(`both.captionLog.add`, this.captionLog.at(-1)) } else if(command === 'upd'){ - window.webContents.send(`both.captionLog.upd`, this.captionLog[this.captionLog.length - 1]) + if(index !== undefined) window.webContents.send(`both.captionLog.upd`, this.captionLog[index]) + else window.webContents.send(`both.captionLog.upd`, this.captionLog.at(-1)) } else if(command === 'set'){ window.webContents.send(`both.captionLog.set`, this.captionLog) diff --git a/src/main/utils/CaptionEngine.ts b/src/main/utils/CaptionEngine.ts index 9733789..9431a01 100644 --- a/src/main/utils/CaptionEngine.ts +++ b/src/main/utils/CaptionEngine.ts @@ -67,21 +67,20 @@ export class CaptionEngine { this.command.push('-a', allConfig.controls.audio ? '1' : '0') this.port = Math.floor(Math.random() * (65535 - 1024 + 1)) + 1024 this.command.push('-p', this.port.toString()) + this.command.push( + '-t', allConfig.controls.translation ? + allConfig.controls.targetLang : 'none' + ) if(allConfig.controls.engine === 'gummy') { this.command.push('-e', 'gummy') this.command.push('-s', allConfig.controls.sourceLang) - this.command.push( - '-t', allConfig.controls.translation ? - allConfig.controls.targetLang : 'none' - ) if(allConfig.controls.API_KEY) { this.command.push('-k', allConfig.controls.API_KEY) } } else if(allConfig.controls.engine === 'vosk'){ this.command.push('-e', 'vosk') - this.command.push('-m', `"${allConfig.controls.modelPath}"`) } } @@ -249,8 +248,11 @@ function handleEngineData(data: any) { else if(data.command === 'caption') { allConfig.updateCaptionLog(data); } + else if(data.command === 'translation') { + allConfig.updateCaptionTranslation(data); + } else if(data.command === 'print') { - Log.info('Engine Print:', data.content) + console.log(data.content) } else if(data.command === 'info') { Log.info('Engine Info:', data.content) diff --git a/src/renderer/src/components/EngineControl.vue b/src/renderer/src/components/EngineControl.vue index a784f08..7f7e2cf 100644 --- a/src/renderer/src/components/EngineControl.vue +++ b/src/renderer/src/components/EngineControl.vue @@ -8,6 +8,7 @@
{{ $t('engine.sourceLang') }} {{ $t('engine.transLang') }} { watch(currentEngine, (val) => { if(val == 'vosk'){ currentSourceLang.value = 'auto' - currentTargetLang.value = '' + currentTargetLang.value = useGeneralSettingStore().uiLanguage + if(currentTargetLang.value === 'zh') { + currentTargetLang.value = 'zh-cn' + } } else if(val == 'gummy'){ currentSourceLang.value = 'auto' diff --git a/src/renderer/src/i18n/config/engine.ts b/src/renderer/src/i18n/config/engine.ts index f086455..7d7d50d 100644 --- a/src/renderer/src/i18n/config/engine.ts +++ b/src/renderer/src/i18n/config/engine.ts @@ -21,6 +21,15 @@ export const engines = { label: '本地 - Vosk', languages: [ { value: 'auto', label: '需要自行配置模型' }, + { value: 'en', label: '英语' }, + { value: 'zh-cn', label: '中文' }, + { value: 'ja', label: '日语' }, + { value: 'ko', label: '韩语' }, + { value: 'de', label: '德语' }, + { value: 'fr', label: '法语' }, + { value: 'ru', label: '俄语' }, + { value: 'es', label: '西班牙语' }, + { value: 'it', label: '意大利语' }, ] } ], @@ -46,6 +55,15 @@ export const engines = { label: 'Local - Vosk', languages: [ { value: 'auto', label: 'Model needs to be configured manually' }, + { value: 'en', label: 'English' }, + { value: 'zh-cn', label: 'Chinese' }, + { value: 'ja', label: 'Japanese' }, + { value: 'ko', label: 'Korean' }, + { value: 'de', label: 'German' }, + { value: 'fr', label: 'French' }, + { value: 'ru', label: 'Russian' }, + { value: 'es', label: 'Spanish' }, + { value: 'it', label: 'Italian' }, ] } ], @@ -71,6 +89,15 @@ export const engines = { label: 'ローカル - Vosk', languages: [ { value: 'auto', label: 'モデルを手動で設定する必要があります' }, + { value: 'en', label: '英語' }, + { value: 'zh-cn', label: '中国語' }, + { value: 'ja', label: '日本語' }, + { value: 'ko', label: '韓国語' }, + { value: 'de', label: 'ドイツ語' }, + { value: 'fr', label: 'フランス語' }, + { value: 'ru', label: 'ロシア語' }, + { value: 'es', label: 'スペイン語' }, + { value: 'it', label: 'イタリア語' }, ] } ] diff --git a/src/renderer/src/stores/captionLog.ts b/src/renderer/src/stores/captionLog.ts index 8f27cfb..dc66308 100644 --- a/src/renderer/src/stores/captionLog.ts +++ b/src/renderer/src/stores/captionLog.ts @@ -15,7 +15,12 @@ export const useCaptionLogStore = defineStore('captionLog', () => { }) window.electron.ipcRenderer.on('both.captionLog.upd', (_, log) => { - captionData.value.splice(captionData.value.length - 1, 1, log) + for(let i = captionData.value.length - 1; i >= 0; i--) { + if(captionData.value[i].time_s === log.time_s){ + captionData.value.splice(i, 1, log) + break + } + } }) window.electron.ipcRenderer.on('both.captionLog.set', (_, logs) => {