diff --git a/README.md b/README.md
index a9fd4ae..c8802a3 100644
--- a/README.md
+++ b/README.md
@@ -188,15 +188,3 @@ npm run build:mac
# For Linux
npm run build:linux
```
-
-注意,根据不同的平台需要修改项目根目录下 `electron-builder.yml` 文件中的配置内容:
-
-```yml
-extraResources:
- # For Windows
- - from: ./engine/dist/main.exe
- to: ./engine/main.exe
- # For macOS and Linux
- # - from: ./engine/dist/main
- # to: ./engine/main
-```
diff --git a/README_en.md b/README_en.md
index 0353bc8..38386e3 100644
--- a/README_en.md
+++ b/README_en.md
@@ -188,15 +188,3 @@ npm run build:mac
# For Linux
npm run build:linux
```
-
-Note: You need to modify the configuration content in the `electron-builder.yml` file in the project root directory according to different platforms:
-
-```yml
-extraResources:
- # For Windows
- - from: ./engine/dist/main.exe
- to: ./engine/main.exe
- # For macOS and Linux
- # - from: ./engine/dist/main
- # to: ./engine/main
-```
\ No newline at end of file
diff --git a/README_ja.md b/README_ja.md
index 2c0de12..ef68759 100644
--- a/README_ja.md
+++ b/README_ja.md
@@ -188,15 +188,3 @@ npm run build:mac
# Linux 用
npm run build:linux
```
-
-注意: プラットフォームに応じて、プロジェクトルートディレクトリにある `electron-builder.yml` ファイルの設定内容を変更する必要があります:
-
-```yml
-extraResources:
- # Windows 用
- - from: ./engine/dist/main.exe
- to: ./engine/main.exe
- # macOS と Linux 用
- # - from: ./engine/dist/main
- # to: ./engine/main
-```
diff --git a/docs/api-docs/caption-engine.md b/docs/api-docs/caption-engine.md
index 159ab25..3c03335 100644
--- a/docs/api-docs/caption-engine.md
+++ b/docs/api-docs/caption-engine.md
@@ -58,6 +58,18 @@ Electron 主进程通过 TCP Socket 向 Python 进程发送数据。发送的数
Python 端监听到的音频流转换为的字幕数据。
+### `translation`
+
+```js
+{
+ command: "translation",
+ time_s: string,
+ translation: string
+}
+```
+
+语音识别的内容的翻译,可以根据起始时间确定对应的字幕。
+
### `print`
```js
@@ -67,7 +79,7 @@ Python 端监听到的音频流转换为的字幕数据。
}
```
-输出 Python 端打印的内容。
+输出 Python 端打印的内容,不计入日志。
### `info`
@@ -78,7 +90,7 @@ Python 端监听到的音频流转换为的字幕数据。
}
```
-Python 端打印的提示信息,比起 `print`,该信息更希望 Electron 端的关注。
+Python 端打印的提示信息,会计入日志。
### `error`
diff --git a/electron-builder.yml b/electron-builder.yml
index 12bd784..55b23d9 100644
--- a/electron-builder.yml
+++ b/electron-builder.yml
@@ -15,14 +15,13 @@ files:
- '!assets/*'
- '!.repomap/*'
- '!.virtualme/*'
-
extraResources:
# For Windows
- from: ./engine/dist/main.exe
to: ./engine/main.exe
# For macOS and Linux
- # - from: ./engine/dist/main
- # to: ./engine/main
+ - from: ./engine/dist/main
+ to: ./engine/main
win:
executableName: auto-caption
icon: build/icon.png
diff --git a/engine/audio2text/vosk.py b/engine/audio2text/vosk.py
index 402b7fd..0355adb 100644
--- a/engine/audio2text/vosk.py
+++ b/engine/audio2text/vosk.py
@@ -1,8 +1,10 @@
import json
+import threading
+import time
from datetime import datetime
from vosk import Model, KaldiRecognizer, SetLogLevel
-from utils import stdout_cmd, stdout_obj
+from utils import stdout_cmd, stdout_obj, google_translate
class VoskRecognizer:
@@ -11,15 +13,18 @@ class VoskRecognizer:
初始化参数:
model_path: Vosk 识别模型路径
+ target: 翻译目标语言
"""
- def __init__(self, model_path: str):
+ def __init__(self, model_path: str, target: str | None):
SetLogLevel(-1)
if model_path.startswith('"'):
model_path = model_path[1:]
if model_path.endswith('"'):
model_path = model_path[:-1]
self.model_path = model_path
+ self.target = target
self.time_str = ''
+ self.trans_time = time.time()
self.cur_id = 0
self.prev_content = ''
@@ -48,7 +53,15 @@ class VoskRecognizer:
caption['time_s'] = self.time_str
caption['time_t'] = datetime.now().strftime('%H:%M:%S.%f')[:-3]
self.prev_content = ''
+ if content == '': return
self.cur_id += 1
+ if self.target:
+ self.trans_time = time.time()
+ th = threading.Thread(
+ target=google_translate,
+ args=(caption['text'], self.target, self.time_str)
+ )
+ th.start()
else:
content = json.loads(self.recognizer.PartialResult()).get('partial', '')
if content == '' or content == self.prev_content:
@@ -62,6 +75,13 @@ class VoskRecognizer:
self.prev_content = content
stdout_obj(caption)
+ if self.target and time.time() - self.trans_time > 2.0:
+ self.trans_time = time.time()
+ th = threading.Thread(
+ target=google_translate,
+ args=(caption['text'], self.target, self.time_str)
+ )
+ th.start()
def stop(self):
"""停止 Vosk 引擎"""
diff --git a/engine/main.py b/engine/main.py
index cf6d512..836f5b6 100644
--- a/engine/main.py
+++ b/engine/main.py
@@ -44,10 +44,10 @@ def main_gummy(s: str, t: str, a: int, c: int, k: str):
engine.stop()
-def main_vosk(a: int, c: int, m: str):
+def main_vosk(a: int, c: int, m: str, t: str):
global thread_data
stream = AudioStream(a, c)
- engine = VoskRecognizer(m)
+ engine = VoskRecognizer(m, None if t == 'none' else t)
stream.open_stream()
engine.start()
@@ -72,9 +72,9 @@ if __name__ == "__main__":
parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output, 1 for input')
parser.add_argument('-c', '--chunk_rate', default=10, help='Number of audio stream chunks collected per second')
parser.add_argument('-p', '--port', default=8080, help='The port to run the server on, 0 for no server')
+ parser.add_argument('-t', '--target_language', default='zh', help='Target language code, "none" for no translation')
# gummy only
parser.add_argument('-s', '--source_language', default='en', help='Source language code')
- parser.add_argument('-t', '--target_language', default='zh', help='Target language code')
parser.add_argument('-k', '--api_key', default='', help='API KEY for Gummy model')
# vosk only
parser.add_argument('-m', '--model_path', default='', help='The path to the vosk model.')
@@ -97,7 +97,8 @@ if __name__ == "__main__":
main_vosk(
int(args.audio_type),
int(args.chunk_rate),
- args.model_path
+ args.model_path,
+ args.target_language
)
else:
raise ValueError('Invalid caption engine specified.')
diff --git a/engine/requirements.txt b/engine/requirements.txt
index 894ed59..bbd3925 100644
--- a/engine/requirements.txt
+++ b/engine/requirements.txt
@@ -5,3 +5,5 @@ vosk
pyinstaller
pyaudio; sys_platform == 'darwin'
pyaudiowpatch; sys_platform == 'win32'
+googletrans
+ollama
diff --git a/engine/utils/__init__.py b/engine/utils/__init__.py
index bb202b0..0e07ecf 100644
--- a/engine/utils/__init__.py
+++ b/engine/utils/__init__.py
@@ -6,4 +6,5 @@ from .audioprcs import (
)
from .sysout import stdout, stdout_err, stdout_cmd, stdout_obj, stderr
from .thdata import thread_data
-from .server import start_server
\ No newline at end of file
+from .server import start_server
+from .translation import ollama_translate, google_translate
\ No newline at end of file
diff --git a/engine/utils/translation.py b/engine/utils/translation.py
new file mode 100644
index 0000000..d45e2bf
--- /dev/null
+++ b/engine/utils/translation.py
@@ -0,0 +1,57 @@
+from ollama import chat
+from ollama import ChatResponse
+import asyncio
+from googletrans import Translator
+from .sysout import stdout, stdout_obj
+
+lang_map = {
+ 'en': 'English',
+ 'es': 'Spanish',
+ 'fr': 'French',
+ 'de': 'German',
+ 'it': 'Italian',
+ 'ru': 'Russian',
+ 'ja': 'Japanese',
+ 'ko': 'Korean',
+ 'zh': 'Chinese'
+}
+
+def ollama_translate(model: str, target: str, text: str, chunk_size = 3):
+ stream = chat(
+ model=model,
+ messages=[
+ {"role": "system", "content": f"/no_think Translate the following content into {lang_map[target]}, and do not output any additional information."},
+ {"role": "user", "content": text}
+ ],
+ stream=True
+ )
+ chunk_content = ""
+ in_thinking = False
+ count = 0
+ for chunk in stream:
+ if count == 0 and chunk['message']['content'].startswith("