Merge pull request #25 from nocmt/dev_glmasr

feat(engine): 添加GLM-ASR语音识别引擎支持
This commit is contained in:
HS RedWoods
2026-01-10 20:17:35 +08:00
committed by GitHub
27 changed files with 607 additions and 126 deletions

5
.gitignore vendored
View File

@@ -7,8 +7,13 @@ out
__pycache__
.venv
test.py
engine/build
engine/portaudio
engine/pyinstaller_cache
engine/models
engine/notebook
# engine/main.spec
.repomap
.virtualme

View File

@@ -41,7 +41,7 @@ SOSV 模型下载:[ Shepra-ONNX SenseVoice Model](https://github.com/HiMeditat
- 支持调用本地 Ollama 模型或云端 Google 翻译 API 进行翻译
- 跨平台Windows、macOS、Linux、多界面语言中文、英语、日语支持
- 丰富的字幕样式设置(字体、字体大小、字体粗细、字体颜色、背景颜色等)
- 灵活的字幕引擎选择(阿里云 Gummy 云端模型、本地 Vosk 模型、本地 SOSV 模型、还可以自己开发模型)
- 灵活的字幕引擎选择(阿里云 Gummy 云端模型、GLM-ASR 云端模型、本地 Vosk 模型、本地 SOSV 模型、还可以自己开发模型)
- 多语言识别与翻译(见下文“⚙️ 自带字幕引擎说明”)
- 字幕记录展示与导出(支持导出 `.srt``.json` 格式)
@@ -62,6 +62,7 @@ macOS 平台和 Linux 平台获取系统音频输出需要进行额外设置,
| | 识别效果 | 部署类型 | 支持语言 | 翻译 | 备注 |
| ------------------------------------------------------------ | -------- | ------------- | ---------- | ---------- | ---------------------------------------------------------- |
| [Gummy](https://help.aliyun.com/zh/model-studio/gummy-speech-recognition-translation) | 很好😊 | 云端 / 阿里云 | 10 种 | 自带翻译 | 收费0.54CNY / 小时 |
| [glm-asr-2512](https://docs.bigmodel.cn/cn/guide/models/sound-and-video/glm-asr-2512) | 很好😊 | 云端 / 智谱 AI | 4 种 | 需额外配置 | 收费,约 0.72CNY / 小时 |
| [Vosk](https://alphacephei.com/vosk) | 较差😞 | 本地 / CPU | 超过 30 种 | 需额外配置 | 支持的语言非常多 |
| [SOSV](https://k2-fsa.github.io/sherpa/onnx/sense-voice/index.html) | 一般😐 | 本地 / CPU | 5 种 | 需额外配置 | 仅有一个模型 |
| 自己开发 | 🤔 | 自定义 | 自定义 | 自定义 | 根据[文档](./docs/engine-manual/zh.md)使用 Python 自己开发 |

View File

@@ -8,5 +8,9 @@
<true/>
<key>com.apple.security.cs.allow-dyld-environment-variables</key>
<true/>
<key>com.apple.security.cs.disable-library-validation</key>
<true/>
<key>com.apple.security.device.audio-input</key>
<true/>
</dict>
</plist>
</plist>

View File

@@ -1,3 +1,4 @@
from .gummy import GummyRecognizer
from .vosk import VoskRecognizer
from .sosv import SosvRecognizer
from .sosv import SosvRecognizer
from .glm import GlmRecognizer

163
engine/audio2text/glm.py Normal file
View File

@@ -0,0 +1,163 @@
import threading
import io
import wave
import struct
import math
import audioop
import requests
from datetime import datetime
from utils import shared_data
from utils import stdout_cmd, stdout_obj, google_translate, ollama_translate
class GlmRecognizer:
"""
使用 GLM-ASR 引擎处理音频数据,并在标准输出中输出 Auto Caption 软件可读取的 JSON 字符串数据
初始化参数:
url: GLM-ASR API URL
model: GLM-ASR 模型名称
api_key: GLM-ASR API Key
source: 源语言
target: 目标语言
trans_model: 翻译模型名称
ollama_name: Ollama 模型名称
"""
def __init__(self, url: str, model: str, api_key: str, source: str, target: str | None, trans_model: str, ollama_name: str, ollama_url: str = '', ollama_api_key: str = ''):
self.url = url
self.model = model
self.api_key = api_key
self.source = source
self.target = target
if trans_model == 'google':
self.trans_func = google_translate
else:
self.trans_func = ollama_translate
self.ollama_name = ollama_name
self.ollama_url = ollama_url
self.ollama_api_key = ollama_api_key
self.audio_buffer = []
self.is_speech = False
self.silence_frames = 0
self.speech_start_time = None
self.time_str = ''
self.cur_id = 0
# VAD settings (假设 16k 16bit, chunk size 1024 or similar)
# 16bit = 2 bytes per sample.
# RMS threshold needs tuning. 500 is a conservative guess for silence.
self.threshold = 500
self.silence_limit = 15 # frames (approx 0.5-1s depending on chunk size)
self.min_speech_frames = 10 # frames
def start(self):
"""启动 GLM 引擎"""
stdout_cmd('info', 'GLM-ASR recognizer started.')
def stop(self):
"""停止 GLM 引擎"""
stdout_cmd('info', 'GLM-ASR recognizer stopped.')
def process_audio(self, chunk):
# chunk is bytes (int16)
rms = audioop.rms(chunk, 2)
if rms > self.threshold:
if not self.is_speech:
self.is_speech = True
self.time_str = datetime.now().strftime('%H:%M:%S.%f')[:-3]
self.audio_buffer = []
self.audio_buffer.append(chunk)
self.silence_frames = 0
else:
if self.is_speech:
self.audio_buffer.append(chunk)
self.silence_frames += 1
if self.silence_frames > self.silence_limit:
# Speech ended
if len(self.audio_buffer) > self.min_speech_frames:
self.recognize(self.audio_buffer, self.time_str)
self.is_speech = False
self.audio_buffer = []
self.silence_frames = 0
def recognize(self, audio_frames, time_s):
audio_bytes = b''.join(audio_frames)
wav_io = io.BytesIO()
with wave.open(wav_io, 'wb') as wav_file:
wav_file.setnchannels(1)
wav_file.setsampwidth(2)
wav_file.setframerate(16000)
wav_file.writeframes(audio_bytes)
wav_io.seek(0)
threading.Thread(
target=self._do_request,
args=(wav_io.read(), time_s, self.cur_id)
).start()
self.cur_id += 1
def _do_request(self, audio_content, time_s, index):
try:
files = {
'file': ('audio.wav', audio_content, 'audio/wav')
}
data = {
'model': self.model,
'stream': 'false'
}
headers = {
'Authorization': f'Bearer {self.api_key}'
}
response = requests.post(self.url, headers=headers, data=data, files=files, timeout=15)
if response.status_code == 200:
res_json = response.json()
text = res_json.get('text', '')
if text:
self.output_caption(text, time_s, index)
else:
try:
err_msg = response.json()
stdout_cmd('error', f"GLM API Error: {err_msg}")
except:
stdout_cmd('error', f"GLM API Error: {response.text}")
except Exception as e:
stdout_cmd('error', f"GLM Request Failed: {str(e)}")
def output_caption(self, text, time_s, index):
caption = {
'command': 'caption',
'index': index,
'time_s': time_s,
'time_t': datetime.now().strftime('%H:%M:%S.%f')[:-3],
'text': text,
'translation': ''
}
if self.target:
if self.trans_func == ollama_translate:
th = threading.Thread(
target=self.trans_func,
args=(self.ollama_name, self.target, caption['text'], time_s, self.ollama_url, self.ollama_api_key),
daemon=True
)
else:
th = threading.Thread(
target=self.trans_func,
args=(self.ollama_name, self.target, caption['text'], time_s),
daemon=True
)
th.start()
stdout_obj(caption)
def translate(self):
global shared_data
while shared_data.status == 'running':
chunk = shared_data.chunk_queue.get()
self.process_audio(chunk)

View File

@@ -29,7 +29,7 @@ class SosvRecognizer:
trans_model: 翻译模型名称
ollama_name: Ollama 模型名称
"""
def __init__(self, model_path: str, source: str, target: str | None, trans_model: str, ollama_name: str):
def __init__(self, model_path: str, source: str, target: str | None, trans_model: str, ollama_name: str, ollama_url: str = '', ollama_api_key: str = ''):
if model_path.startswith('"'):
model_path = model_path[1:]
if model_path.endswith('"'):
@@ -45,6 +45,8 @@ class SosvRecognizer:
else:
self.trans_func = ollama_translate
self.ollama_name = ollama_name
self.ollama_url = ollama_url
self.ollama_api_key = ollama_api_key
self.time_str = ''
self.cur_id = 0
self.prev_content = ''
@@ -152,7 +154,7 @@ class SosvRecognizer:
if self.target:
th = threading.Thread(
target=self.trans_func,
args=(self.ollama_name, self.target, caption['text'], self.time_str),
args=(self.ollama_name, self.target, caption['text'], self.time_str, self.ollama_url, self.ollama_api_key),
daemon=True
)
th.start()

View File

@@ -18,7 +18,7 @@ class VoskRecognizer:
trans_model: 翻译模型名称
ollama_name: Ollama 模型名称
"""
def __init__(self, model_path: str, target: str | None, trans_model: str, ollama_name: str):
def __init__(self, model_path: str, target: str | None, trans_model: str, ollama_name: str, ollama_url: str = '', ollama_api_key: str = ''):
SetLogLevel(-1)
if model_path.startswith('"'):
model_path = model_path[1:]
@@ -31,6 +31,8 @@ class VoskRecognizer:
else:
self.trans_func = ollama_translate
self.ollama_name = ollama_name
self.ollama_url = ollama_url
self.ollama_api_key = ollama_api_key
self.time_str = ''
self.cur_id = 0
self.prev_content = ''
@@ -66,7 +68,7 @@ class VoskRecognizer:
if self.target:
th = threading.Thread(
target=self.trans_func,
args=(self.ollama_name, self.target, caption['text'], self.time_str),
args=(self.ollama_name, self.target, caption['text'], self.time_str, self.ollama_url, self.ollama_api_key),
daemon=True
)
th.start()

View File

@@ -8,6 +8,7 @@ from utils import merge_chunk_channels, resample_chunk_mono
from audio2text import GummyRecognizer
from audio2text import VoskRecognizer
from audio2text import SosvRecognizer
from audio2text import GlmRecognizer
from sysaudio import AudioStream
@@ -74,7 +75,7 @@ def main_gummy(s: str, t: str, a: int, c: int, k: str, r: bool, rp: str):
engine.stop()
def main_vosk(a: int, c: int, vosk: str, t: str, tm: str, omn: str, r: bool, rp: str):
def main_vosk(a: int, c: int, vosk: str, t: str, tm: str, omn: str, ourl: str, okey: str, r: bool, rp: str):
"""
Parameters:
a: Audio source: 0 for output, 1 for input
@@ -83,14 +84,16 @@ def main_vosk(a: int, c: int, vosk: str, t: str, tm: str, omn: str, r: bool, rp:
t: Target language
tm: Translation model type, ollama or google
omn: Ollama model name
ourl: Ollama Base URL
okey: Ollama API Key
r: Whether to record the audio
rp: Path to save the recorded audio
"""
stream = AudioStream(a, c)
if t == 'none':
engine = VoskRecognizer(vosk, None, tm, omn)
engine = VoskRecognizer(vosk, None, tm, omn, ourl, okey)
else:
engine = VoskRecognizer(vosk, t, tm, omn)
engine = VoskRecognizer(vosk, t, tm, omn, ourl, okey)
engine.start()
stream_thread = threading.Thread(
@@ -106,7 +109,7 @@ def main_vosk(a: int, c: int, vosk: str, t: str, tm: str, omn: str, r: bool, rp:
engine.stop()
def main_sosv(a: int, c: int, sosv: str, s: str, t: str, tm: str, omn: str, r: bool, rp: str):
def main_sosv(a: int, c: int, sosv: str, s: str, t: str, tm: str, omn: str, ourl: str, okey: str, r: bool, rp: str):
"""
Parameters:
a: Audio source: 0 for output, 1 for input
@@ -116,14 +119,16 @@ def main_sosv(a: int, c: int, sosv: str, s: str, t: str, tm: str, omn: str, r: b
t: Target language
tm: Translation model type, ollama or google
omn: Ollama model name
ourl: Ollama API URL
okey: Ollama API Key
r: Whether to record the audio
rp: Path to save the recorded audio
"""
stream = AudioStream(a, c)
if t == 'none':
engine = SosvRecognizer(sosv, s, None, tm, omn)
engine = SosvRecognizer(sosv, s, None, tm, omn, ourl, okey)
else:
engine = SosvRecognizer(sosv, s, t, tm, omn)
engine = SosvRecognizer(sosv, s, t, tm, omn, ourl, okey)
engine.start()
stream_thread = threading.Thread(
@@ -139,16 +144,54 @@ def main_sosv(a: int, c: int, sosv: str, s: str, t: str, tm: str, omn: str, r: b
engine.stop()
def main_glm(a: int, c: int, url: str, model: str, key: str, s: str, t: str, tm: str, omn: str, ourl: str, okey: str, r: bool, rp: str):
"""
Parameters:
a: Audio source
c: Chunk rate
url: GLM API URL
model: GLM Model Name
key: GLM API Key
s: Source language
t: Target language
tm: Translation model
omn: Ollama model name
ourl: Ollama API URL
okey: Ollama API Key
r: Record
rp: Record path
"""
stream = AudioStream(a, c)
if t == 'none':
engine = GlmRecognizer(url, model, key, s, None, tm, omn, ourl, okey)
else:
engine = GlmRecognizer(url, model, key, s, t, tm, omn, ourl, okey)
engine.start()
stream_thread = threading.Thread(
target=audio_recording,
args=(stream, True, r, rp),
daemon=True
)
stream_thread.start()
try:
engine.translate()
except KeyboardInterrupt:
stdout("Keyboard interrupt detected. Exiting...")
engine.stop()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Convert system audio stream to text')
# all
parser.add_argument('-e', '--caption_engine', default='gummy', help='Caption engine: gummy or vosk or sosv')
parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output, 1 for input')
parser.add_argument('-c', '--chunk_rate', default=10, help='Number of audio stream chunks collected per second')
parser.add_argument('-p', '--port', default=0, help='The port to run the server on, 0 for no server')
parser.add_argument('-d', '--display_caption', default=0, help='Display caption on terminal, 0 for no display, 1 for display')
parser.add_argument('-a', '--audio_type', type=int, default=0, help='Audio stream source: 0 for output, 1 for input')
parser.add_argument('-c', '--chunk_rate', type=int, default=10, help='Number of audio stream chunks collected per second')
parser.add_argument('-p', '--port', type=int, default=0, help='The port to run the server on, 0 for no server')
parser.add_argument('-d', '--display_caption', type=int, default=0, help='Display caption on terminal, 0 for no display, 1 for display')
parser.add_argument('-t', '--target_language', default='none', help='Target language code, "none" for no translation')
parser.add_argument('-r', '--record', default=0, help='Whether to record the audio, 0 for no recording, 1 for recording')
parser.add_argument('-r', '--record', type=int, default=0, help='Whether to record the audio, 0 for no recording, 1 for recording')
parser.add_argument('-rp', '--record_path', default='', help='Path to save the recorded audio')
# gummy and sosv
parser.add_argument('-s', '--source_language', default='auto', help='Source language code')
@@ -157,20 +200,24 @@ if __name__ == "__main__":
# vosk and sosv
parser.add_argument('-tm', '--translation_model', default='ollama', help='Model for translation: ollama or google')
parser.add_argument('-omn', '--ollama_name', default='', help='Ollama model name for translation')
parser.add_argument('-ourl', '--ollama_url', default='', help='Ollama API URL')
parser.add_argument('-okey', '--ollama_api_key', default='', help='Ollama API Key')
# vosk only
parser.add_argument('-vosk', '--vosk_model', default='', help='The path to the vosk model.')
# sosv only
parser.add_argument('-sosv', '--sosv_model', default=None, help='The SenseVoice model path')
# glm only
parser.add_argument('-gurl', '--glm_url', default='https://open.bigmodel.cn/api/paas/v4/audio/transcriptions', help='GLM API URL')
parser.add_argument('-gmodel', '--glm_model', default='glm-asr-2512', help='GLM Model Name')
parser.add_argument('-gkey', '--glm_api_key', default='', help='GLM API Key')
args = parser.parse_args()
if int(args.port) == 0:
shared_data.status = "running"
else:
start_server(int(args.port))
if int(args.display_caption) != 0:
if args.port != 0:
threading.Thread(target=start_server, args=(args.port,), daemon=True).start()
if args.display_caption == '1':
change_caption_display(True)
print("Caption will be displayed on terminal")
if args.caption_engine == 'gummy':
main_gummy(
@@ -179,7 +226,7 @@ if __name__ == "__main__":
int(args.audio_type),
int(args.chunk_rate),
args.api_key,
True if int(args.record) == 1 else False,
bool(int(args.record)),
args.record_path
)
elif args.caption_engine == 'vosk':
@@ -190,7 +237,9 @@ if __name__ == "__main__":
args.target_language,
args.translation_model,
args.ollama_name,
True if int(args.record) == 1 else False,
args.ollama_url,
args.ollama_api_key,
bool(int(args.record)),
args.record_path
)
elif args.caption_engine == 'sosv':
@@ -202,7 +251,25 @@ if __name__ == "__main__":
args.target_language,
args.translation_model,
args.ollama_name,
True if int(args.record) == 1 else False,
args.ollama_url,
args.ollama_api_key,
bool(int(args.record)),
args.record_path
)
elif args.caption_engine == 'glm':
main_glm(
int(args.audio_type),
int(args.chunk_rate),
args.glm_url,
args.glm_model,
args.glm_api_key,
args.source_language,
args.target_language,
args.translation_model,
args.ollama_name,
args.ollama_url,
args.ollama_api_key,
bool(int(args.record)),
args.record_path
)
else:

View File

@@ -6,11 +6,17 @@ import sys
if sys.platform == 'win32':
vosk_path = str(Path('./.venv/Lib/site-packages/vosk').resolve())
else:
vosk_path = str(Path('./.venv/lib/python3.12/site-packages/vosk').resolve())
venv_lib = Path('./.venv/lib')
python_dirs = list(venv_lib.glob('python*'))
if python_dirs:
vosk_path = str((python_dirs[0] / 'site-packages' / 'vosk').resolve())
else:
vosk_path = str(Path('./.venv/lib/python3.12/site-packages/vosk').resolve())
a = Analysis(
['main.py'],
pathex=[],
# binaries=[('portaudio/lib/.libs/libportaudio.2.dylib', '.')],
binaries=[],
datas=[(vosk_path, 'vosk')],
hiddenimports=[],
@@ -27,21 +33,27 @@ pyz = PYZ(a.pure)
exe = EXE(
pyz,
a.scripts,
a.binaries,
a.datas,
[],
exclude_binaries=True,
name='main',
debug=False,
bootloader_ignore_signals=False,
strip=False,
upx=True,
upx_exclude=[],
runtime_tmpdir=None,
console=True,
disable_windowed_traceback=False,
argv_emulation=False,
target_arch=None,
codesign_identity=None,
entitlements_file=None,
onefile=True,
)
coll = COLLECT(
exe,
a.binaries,
a.datas,
strip=False,
upx=True,
upx_exclude=[],
name='main',
)

View File

@@ -7,4 +7,6 @@ pyaudio; sys_platform == 'darwin'
pyaudiowpatch; sys_platform == 'win32'
googletrans
ollama
sherpa_onnx
sherpa_onnx
requests
openai

View File

@@ -47,7 +47,6 @@ def translation_display(obj):
def stdout_obj(obj):
global display_caption
print(obj['command'], display_caption)
if obj['command'] == 'caption' and display_caption:
caption_display(obj)
return

View File

@@ -1,5 +1,9 @@
from ollama import chat
from ollama import chat, Client
from ollama import ChatResponse
try:
from openai import OpenAI
except ImportError:
OpenAI = None
import asyncio
from googletrans import Translator
from .sysout import stdout_cmd, stdout_obj
@@ -17,15 +21,43 @@ lang_map = {
'zh-cn': 'Chinese'
}
def ollama_translate(model: str, target: str, text: str, time_s: str):
response: ChatResponse = chat(
model=model,
messages=[
{"role": "system", "content": f"/no_think Translate the following content into {lang_map[target]}, and do not output any additional information."},
{"role": "user", "content": text}
]
)
content = response.message.content or ""
def ollama_translate(model: str, target: str, text: str, time_s: str, url: str = '', key: str = ''):
content = ""
try:
if url:
if OpenAI:
client = OpenAI(base_url=url, api_key=key if key else "ollama")
openai_response = client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": f"/no_think Translate the following content into {lang_map[target]}, and do not output any additional information."},
{"role": "user", "content": text}
]
)
content = openai_response.choices[0].message.content or ""
else:
client = Client(host=url)
response: ChatResponse = client.chat(
model=model,
messages=[
{"role": "system", "content": f"/no_think Translate the following content into {lang_map[target]}, and do not output any additional information."},
{"role": "user", "content": text}
]
)
content = response.message.content or ""
else:
response: ChatResponse = chat(
model=model,
messages=[
{"role": "system", "content": f"/no_think Translate the following content into {lang_map[target]}, and do not output any additional information."},
{"role": "user", "content": text}
]
)
content = response.message.content or ""
except Exception as e:
stdout_cmd("warn", f"Translation failed: {str(e)}")
return
if content.startswith('<think>'):
index = content.find('</think>')
if index != -1:

67
package-lock.json generated
View File

@@ -110,6 +110,7 @@
"integrity": "sha512-IaaGWsQqfsQWVLqMn9OB92MNN7zukfVA4s7KKAI0KfrrDsZ0yhi5uV4baBuLuN7n3vsZpwP8asPPcVwApxvjBQ==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@ampproject/remapping": "^2.2.0",
"@babel/code-frame": "^7.27.1",
@@ -2274,6 +2275,7 @@
"resolved": "https://registry.npmmirror.com/@types/node/-/node-22.15.17.tgz",
"integrity": "sha512-wIX2aSZL5FE+MR0JlvF87BNVrtFWf6AE6rxSE9X7OwnVvoyCQjpzSRJ+M87se/4QCkCiebQAqrJ0y6fwIyi7nw==",
"license": "MIT",
"peer": true,
"dependencies": {
"undici-types": "~6.21.0"
}
@@ -2360,6 +2362,7 @@
"integrity": "sha512-B2MdzyWxCE2+SqiZHAjPphft+/2x2FlO9YBx7eKE1BCb+rqBlQdhtAEhzIEdozHd55DXPmxBdpMygFJjfjjA9A==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@typescript-eslint/scope-manager": "8.32.0",
"@typescript-eslint/types": "8.32.0",
@@ -2791,6 +2794,7 @@
"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
"dev": true,
"license": "MIT",
"peer": true,
"bin": {
"acorn": "bin/acorn"
},
@@ -2851,6 +2855,7 @@
"integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"fast-deep-equal": "^3.1.1",
"fast-json-stable-stringify": "^2.0.0",
@@ -3064,7 +3069,6 @@
"integrity": "sha512-+25nxyyznAXF7Nef3y0EbBeqmGZgeN/BxHX29Rs39djAfaFalmQ89SE6CWyDCHzGL0yt/ycBtNOmGTW0FyGWNw==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"archiver-utils": "^2.1.0",
"async": "^3.2.4",
@@ -3084,7 +3088,6 @@
"integrity": "sha512-bEL/yUb/fNNiNTuUz979Z0Yg5L+LzLxGJz8x79lYmR54fmTIb6ob/hNQgkQnIUDWIFjZVQwl9Xs356I6BAMHfw==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"glob": "^7.1.4",
"graceful-fs": "^4.2.0",
@@ -3107,7 +3110,6 @@
"integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"core-util-is": "~1.0.0",
"inherits": "~2.0.3",
@@ -3123,8 +3125,7 @@
"resolved": "https://registry.npmmirror.com/safe-buffer/-/safe-buffer-5.1.2.tgz",
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==",
"dev": true,
"license": "MIT",
"peer": true
"license": "MIT"
},
"node_modules/archiver-utils/node_modules/string_decoder": {
"version": "1.1.1",
@@ -3132,7 +3133,6 @@
"integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"safe-buffer": "~5.1.0"
}
@@ -3351,6 +3351,7 @@
}
],
"license": "MIT",
"peer": true,
"dependencies": {
"caniuse-lite": "^1.0.30001716",
"electron-to-chromium": "^1.5.149",
@@ -3848,7 +3849,6 @@
"integrity": "sha512-D3uMHtGc/fcO1Gt1/L7i1e33VOvD4A9hfQLP+6ewd+BvG/gQ84Yh4oftEhAdjSMgBgwGL+jsppT7JYNpo6MHHg==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"buffer-crc32": "^0.2.13",
"crc32-stream": "^4.0.2",
@@ -3994,7 +3994,6 @@
"integrity": "sha512-ROmzCKrTnOwybPcJApAA6WBWij23HVfGVNKqqrZpuyZOHqK2CwHSvpGuyt/UNNvaIjEd8X5IFGp4Mh+Ie1IHJQ==",
"dev": true,
"license": "Apache-2.0",
"peer": true,
"bin": {
"crc32": "bin/crc32.njs"
},
@@ -4008,7 +4007,6 @@
"integrity": "sha512-NT7w2JVU7DFroFdYkeq8cywxrgjPHWkdX1wjpRQXPX5Asews3tA+Ght6lddQO5Mkumffp3X7GEqku3epj2toIw==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"crc-32": "^1.2.0",
"readable-stream": "^3.4.0"
@@ -4248,6 +4246,7 @@
"integrity": "sha512-NoXo6Liy2heSklTI5OIZbCgXC1RzrDQsZkeEwXhdOro3FT1VBOvbubvscdPnjVuQ4AMwwv61oaH96AbiYg9EnQ==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"app-builder-lib": "25.1.8",
"builder-util": "25.1.7",
@@ -4410,6 +4409,7 @@
"integrity": "sha512-6dLslJrQYB1qvqVPYRv1PhAA/uytC66nUeiTcq2JXiBzrmTWCHppqtGUjZhvnSRVatBCT5/SFdizdzcBiEiYUg==",
"hasInstallScript": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@electron/get": "^2.0.0",
"@types/node": "^22.7.7",
@@ -4454,7 +4454,6 @@
"integrity": "sha512-2ntkJ+9+0GFP6nAISiMabKt6eqBB0kX1QqHNWFWAXgi0VULKGisM46luRFpIBiU3u/TDmhZMM8tzvo2Abn3ayg==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"app-builder-lib": "25.1.8",
"archiver": "^5.3.1",
@@ -4468,7 +4467,6 @@
"integrity": "sha512-oRXApq54ETRj4eMiFzGnHWGy+zo5raudjuxN0b8H7s/RU2oW0Wvsx9O0ACRN/kRq9E8Vu/ReskGB5o3ji+FzHQ==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"graceful-fs": "^4.2.0",
"jsonfile": "^6.0.1",
@@ -4484,7 +4482,6 @@
"integrity": "sha512-5dgndWOriYSm5cnYaJNhalLNDKOqFwyDB/rr1E9ZsGciGvKPs8R2xYGCacuf3z6K1YKDz182fd+fY3cn3pMqXQ==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"universalify": "^2.0.0"
},
@@ -4498,7 +4495,6 @@
"integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==",
"dev": true,
"license": "MIT",
"peer": true,
"engines": {
"node": ">= 10.0.0"
}
@@ -4813,6 +4809,7 @@
"integrity": "sha512-LSehfdpgMeWcTZkWZVIJl+tkZ2nuSkyyB9C27MZqFWXuph7DvaowgcTvKqxvpLW1JZIk8PN7hFY3Rj9LQ7m7lg==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@eslint-community/eslint-utils": "^4.2.0",
"@eslint-community/regexpp": "^4.12.1",
@@ -4874,6 +4871,7 @@
"integrity": "sha512-zc1UmCpNltmVY34vuLRV61r1K27sWuX39E+uyUnY8xS2Bex88VV9cugG+UZbRSRGtGyFboj+D8JODyme1plMpw==",
"dev": true,
"license": "MIT",
"peer": true,
"bin": {
"eslint-config-prettier": "bin/cli.js"
},
@@ -5351,8 +5349,7 @@
"resolved": "https://registry.npmmirror.com/fs-constants/-/fs-constants-1.0.0.tgz",
"integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==",
"dev": true,
"license": "MIT",
"peer": true
"license": "MIT"
},
"node_modules/fs-extra": {
"version": "8.1.0",
@@ -6108,8 +6105,7 @@
"resolved": "https://registry.npmmirror.com/isarray/-/isarray-1.0.0.tgz",
"integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==",
"dev": true,
"license": "MIT",
"peer": true
"license": "MIT"
},
"node_modules/isbinaryfile": {
"version": "5.0.4",
@@ -6300,7 +6296,6 @@
"integrity": "sha512-b94GiNHQNy6JNTrt5w6zNyffMrNkXZb3KTkCZJb2V1xaEGCk093vkZ2jk3tpaeP33/OiXC+WvK9AxUebnf5nbw==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"readable-stream": "^2.0.5"
},
@@ -6314,7 +6309,6 @@
"integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"core-util-is": "~1.0.0",
"inherits": "~2.0.3",
@@ -6330,8 +6324,7 @@
"resolved": "https://registry.npmmirror.com/safe-buffer/-/safe-buffer-5.1.2.tgz",
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==",
"dev": true,
"license": "MIT",
"peer": true
"license": "MIT"
},
"node_modules/lazystream/node_modules/string_decoder": {
"version": "1.1.1",
@@ -6339,7 +6332,6 @@
"integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"safe-buffer": "~5.1.0"
}
@@ -6391,32 +6383,28 @@
"resolved": "https://registry.npmmirror.com/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
"integrity": "sha512-qjxPLHd3r5DnsdGacqOMU6pb/avJzdh9tFX2ymgoZE27BmjXrNy/y4LoaiTeAb+O3gL8AfpJGtqfX/ae2leYYQ==",
"dev": true,
"license": "MIT",
"peer": true
"license": "MIT"
},
"node_modules/lodash.difference": {
"version": "4.5.0",
"resolved": "https://registry.npmmirror.com/lodash.difference/-/lodash.difference-4.5.0.tgz",
"integrity": "sha512-dS2j+W26TQ7taQBGN8Lbbq04ssV3emRw4NY58WErlTO29pIqS0HmoT5aJ9+TUQ1N3G+JOZSji4eugsWwGp9yPA==",
"dev": true,
"license": "MIT",
"peer": true
"license": "MIT"
},
"node_modules/lodash.flatten": {
"version": "4.4.0",
"resolved": "https://registry.npmmirror.com/lodash.flatten/-/lodash.flatten-4.4.0.tgz",
"integrity": "sha512-C5N2Z3DgnnKr0LOpv/hKCgKdb7ZZwafIrsesve6lmzvZIRZRGaZ/l6Q8+2W7NaT+ZwO3fFlSCzCzrDCFdJfZ4g==",
"dev": true,
"license": "MIT",
"peer": true
"license": "MIT"
},
"node_modules/lodash.isplainobject": {
"version": "4.0.6",
"resolved": "https://registry.npmmirror.com/lodash.isplainobject/-/lodash.isplainobject-4.0.6.tgz",
"integrity": "sha512-oSXzaWypCMHkPC3NvBEaPHf0KsA5mvPrOPgQWDsbg8n7orZ290M0BmC/jgRZ4vcJ6DTAhjrsSYgdsW/F+MFOBA==",
"dev": true,
"license": "MIT",
"peer": true
"license": "MIT"
},
"node_modules/lodash.merge": {
"version": "4.6.2",
@@ -6430,8 +6418,7 @@
"resolved": "https://registry.npmmirror.com/lodash.union/-/lodash.union-4.6.0.tgz",
"integrity": "sha512-c4pB2CdGrGdjMKYLA+XiRDO7Y0PRQbm/Gzg8qMj+QH+pFVAoTp5sBpO0odL3FjoPCGjK96p6qsP+yQoiLoOBcw==",
"dev": true,
"license": "MIT",
"peer": true
"license": "MIT"
},
"node_modules/log-symbols": {
"version": "4.1.0",
@@ -6984,7 +6971,6 @@
"integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==",
"dev": true,
"license": "MIT",
"peer": true,
"engines": {
"node": ">=0.10.0"
}
@@ -7408,6 +7394,7 @@
"integrity": "sha512-QQtaxnoDJeAkDvDKWCLiwIXkTgRhwYDEQCghU9Z6q03iyek/rxRh/2lC3HB7P8sWT2xC/y5JDctPLBIGzHKbhw==",
"dev": true,
"license": "MIT",
"peer": true,
"bin": {
"prettier": "bin/prettier.cjs"
},
@@ -7436,8 +7423,7 @@
"resolved": "https://registry.npmmirror.com/process-nextick-args/-/process-nextick-args-2.0.1.tgz",
"integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==",
"dev": true,
"license": "MIT",
"peer": true
"license": "MIT"
},
"node_modules/progress": {
"version": "2.0.3",
@@ -7556,7 +7542,6 @@
"integrity": "sha512-v05I2k7xN8zXvPD9N+z/uhXPaj0sUFCe2rcWZIpBsqxfP7xXFQ0tipAd/wjj1YxWyWtUS5IDJpOG82JKt2EAVA==",
"dev": true,
"license": "Apache-2.0",
"peer": true,
"dependencies": {
"minimatch": "^5.1.0"
}
@@ -7567,7 +7552,6 @@
"integrity": "sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==",
"dev": true,
"license": "ISC",
"peer": true,
"dependencies": {
"brace-expansion": "^2.0.1"
},
@@ -8235,7 +8219,6 @@
"integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"bl": "^4.0.3",
"end-of-stream": "^1.4.1",
@@ -8360,6 +8343,7 @@
"integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==",
"dev": true,
"license": "MIT",
"peer": true,
"engines": {
"node": ">=12"
},
@@ -8462,6 +8446,7 @@
"integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==",
"devOptional": true,
"license": "Apache-2.0",
"peer": true,
"bin": {
"tsc": "bin/tsc",
"tsserver": "bin/tsserver"
@@ -8611,6 +8596,7 @@
"integrity": "sha512-cZn6NDFE7wdTpINgs++ZJ4N49W2vRp8LCKrn3Ob1kYNtOo21vfDoaV5GzBfLU4MovSAB8uNRm4jgzVQZ+mBzPQ==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"esbuild": "^0.25.0",
"fdir": "^6.4.4",
@@ -8701,6 +8687,7 @@
"integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==",
"dev": true,
"license": "MIT",
"peer": true,
"engines": {
"node": ">=12"
},
@@ -8720,6 +8707,7 @@
"resolved": "https://registry.npmmirror.com/vue/-/vue-3.5.13.tgz",
"integrity": "sha512-wmeiSMxkZCSc+PM2w2VRsOYAZC8GdipNFRTsLSfodVqI9mbejKeXEGr8SckuLnrQPGe3oJN5c3K0vpoU9q/wCQ==",
"license": "MIT",
"peer": true,
"dependencies": {
"@vue/compiler-dom": "3.5.13",
"@vue/compiler-sfc": "3.5.13",
@@ -8742,6 +8730,7 @@
"integrity": "sha512-dbCBnd2e02dYWsXoqX5yKUZlOt+ExIpq7hmHKPb5ZqKcjf++Eo0hMseFTZMLKThrUk61m+Uv6A2YSBve6ZvuDQ==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"debug": "^4.4.0",
"eslint-scope": "^8.2.0",
@@ -9046,7 +9035,6 @@
"integrity": "sha512-9qv4rlDiopXg4E69k+vMHjNN63YFMe9sZMrdlvKnCjlCRWeCBswPPMPUfx+ipsAWq1LXHe70RcbaHdJJpS6hyQ==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"archiver-utils": "^3.0.4",
"compress-commons": "^4.1.2",
@@ -9062,7 +9050,6 @@
"integrity": "sha512-KVgf4XQVrTjhyWmx6cte4RxonPLR9onExufI1jhvw/MQ4BB6IsZD5gT8Lq+u/+pRkWna/6JoHpiQioaqFP5Rzw==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"glob": "^7.2.3",
"graceful-fs": "^4.2.0",

View File

@@ -1,7 +1,7 @@
{
"name": "auto-caption",
"productName": "Auto Caption",
"version": "1.0.0",
"version": "1.1.0",
"description": "A cross-platform subtitle display software.",
"main": "./out/main/index.js",
"author": "himeditator",

View File

@@ -8,6 +8,8 @@ export interface Controls {
targetLang: string,
transModel: string,
ollamaName: string,
ollamaUrl: string,
ollamaApiKey: string,
engine: string,
audio: 0 | 1,
translation: boolean,
@@ -15,6 +17,9 @@ export interface Controls {
API_KEY: string,
voskModelPath: string,
sosvModelPath: string,
glmUrl: string,
glmModel: string,
glmApiKey: string,
recordingPath: string,
customized: boolean,
customizedApp: string,

View File

@@ -4,9 +4,10 @@ import {
} from '../types'
import { Log } from './Log'
import { app, BrowserWindow } from 'electron'
import { passwordMaskingForObject } from './UtilsFunc'
import * as path from 'path'
import * as fs from 'fs'
import os from 'os'
import * as os from 'os'
interface CaptionTranslation {
time_s: string,
@@ -44,13 +45,18 @@ const defaultControls: Controls = {
sourceLang: 'en',
targetLang: 'zh',
transModel: 'ollama',
ollamaName: '',
ollamaName: 'qwen2.5:0.5b',
ollamaUrl: 'http://localhost:11434',
ollamaApiKey: '',
engine: 'gummy',
audio: 0,
engineEnabled: false,
API_KEY: '',
voskModelPath: '',
sosvModelPath: '',
glmUrl: 'https://open.bigmodel.cn/api/paas/v4/audio/transcriptions',
glmModel: 'glm-asr-2512',
glmApiKey: '',
recordingPath: getDesktopPath(),
translation: true,
recording: false,
@@ -146,9 +152,7 @@ class AllConfig {
}
}
this.controls.engineEnabled = engineEnabled
let _controls = {...this.controls}
_controls.API_KEY = _controls.API_KEY.replace(/./g, '*')
Log.info('Set Controls:', _controls)
Log.info('Set Controls:', passwordMaskingForObject(this.controls))
}
public sendControls(window: BrowserWindow, info = true) {

View File

@@ -1,12 +1,13 @@
import { exec, spawn } from 'child_process'
import { app } from 'electron'
import { is } from '@electron-toolkit/utils'
import path from 'path'
import net from 'net'
import * as path from 'path'
import * as net from 'net'
import { controlWindow } from '../ControlWindow'
import { allConfig } from './AllConfig'
import { i18n } from '../i18n'
import { Log } from './Log'
import { passwordMaskingForList } from './UtilsFunc'
export class CaptionEngine {
appPath: string = ''
@@ -60,7 +61,7 @@ export class CaptionEngine {
this.appPath = path.join(process.resourcesPath, 'engine', 'main.exe')
}
else {
this.appPath = path.join(process.resourcesPath, 'engine', 'main')
this.appPath = path.join(process.resourcesPath, 'engine', 'main', 'main')
}
}
this.command.push('-a', allConfig.controls.audio ? '1' : '0')
@@ -87,6 +88,8 @@ export class CaptionEngine {
this.command.push('-vosk', `"${allConfig.controls.voskModelPath}"`)
this.command.push('-tm', allConfig.controls.transModel)
this.command.push('-omn', allConfig.controls.ollamaName)
if(allConfig.controls.ollamaUrl) this.command.push('-ourl', allConfig.controls.ollamaUrl)
if(allConfig.controls.ollamaApiKey) this.command.push('-okey', allConfig.controls.ollamaApiKey)
}
else if(allConfig.controls.engine === 'sosv'){
this.command.push('-e', 'sosv')
@@ -94,15 +97,25 @@ export class CaptionEngine {
this.command.push('-sosv', `"${allConfig.controls.sosvModelPath}"`)
this.command.push('-tm', allConfig.controls.transModel)
this.command.push('-omn', allConfig.controls.ollamaName)
if(allConfig.controls.ollamaUrl) this.command.push('-ourl', allConfig.controls.ollamaUrl)
if(allConfig.controls.ollamaApiKey) this.command.push('-okey', allConfig.controls.ollamaApiKey)
}
else if(allConfig.controls.engine === 'glm'){
this.command.push('-e', 'glm')
this.command.push('-s', allConfig.controls.sourceLang)
this.command.push('-gurl', allConfig.controls.glmUrl)
this.command.push('-gmodel', allConfig.controls.glmModel)
if(allConfig.controls.glmApiKey) {
this.command.push('-gkey', allConfig.controls.glmApiKey)
}
this.command.push('-tm', allConfig.controls.transModel)
this.command.push('-omn', allConfig.controls.ollamaName)
if(allConfig.controls.ollamaUrl) this.command.push('-ourl', allConfig.controls.ollamaUrl)
if(allConfig.controls.ollamaApiKey) this.command.push('-okey', allConfig.controls.ollamaApiKey)
}
}
Log.info('Engine Path:', this.appPath)
if(this.command.length > 2 && this.command.at(-2) === '-k') {
const _command = [...this.command]
_command[_command.length -1] = _command[_command.length -1].replace(/./g, '*')
Log.info('Engine Command:', _command)
}
else Log.info('Engine Command:', this.command)
Log.info('Engine Command:', passwordMaskingForList(this.command))
return true
}
@@ -165,7 +178,7 @@ export class CaptionEngine {
const data_obj = JSON.parse(line)
handleEngineData(data_obj)
} catch (e) {
controlWindow.sendErrorMessage(i18n('engine.output.parse.error') + e)
// controlWindow.sendErrorMessage(i18n('engine.output.parse.error') + e)
Log.error('Error parsing JSON:', e)
}
}

View File

@@ -0,0 +1,24 @@
function passwordMasking(pwd: string) {
return pwd.replace(/./g, '*')
}
export function passwordMaskingForList(args: string[]) {
const maskedArgs = [...args]
for(let i = 1; i < maskedArgs.length; i++) {
if(maskedArgs[i-1] === '-k' || maskedArgs[i-1] === '-okey' || maskedArgs[i-1] === '-gkey') {
maskedArgs[i] = passwordMasking(maskedArgs[i])
}
}
return maskedArgs
}
export function passwordMaskingForObject(args: Record<string, any>) {
const maskedArgs = {...args}
for(const key in maskedArgs) {
const lKey = key.toLowerCase()
if(lKey.includes('api') && lKey.includes('key')) {
maskedArgs[key] = passwordMasking(maskedArgs[key])
}
}
return maskedArgs
}

View File

@@ -2,7 +2,7 @@
<html>
<head>
<meta charset="UTF-8" />
<title>Auto Caption v1.0.0</title>
<title>Auto Caption v1.1.0</title>
<!-- https://developer.mozilla.org/en-US/docs/Web/HTTP/CSP -->
<meta
http-equiv="Content-Security-Policy"

View File

@@ -41,17 +41,63 @@
<div class="input-item" v-if="transModel && currentTransModel === 'ollama'">
<a-popover placement="right">
<template #content>
<p class="label-hover-info">{{ $t('engine.ollamaNote') }}</p>
<p class="label-hover-info">{{ $t('engine.modelNameNote') }}</p>
</template>
<span class="input-label info-label"
:style="{color: uiColor}"
>{{ $t('engine.ollama') }}</span>
>{{ $t('engine.modelName') }}</span>
</a-popover>
<a-input
class="input-area"
v-model:value="currentOllamaName"
></a-input>
</div>
<div class="input-item" v-if="transModel && currentTransModel === 'ollama'">
<a-popover placement="right">
<template #content>
<p class="label-hover-info">{{ $t('engine.baseURL') }}</p>
</template>
<span class="input-label info-label"
:style="{color: uiColor}"
>Base URL</span>
</a-popover>
<a-input
class="input-area"
v-model:value="currentOllamaUrl"
placeholder="http://localhost:11434"
></a-input>
</div>
<div class="input-item" v-if="transModel && currentTransModel === 'ollama'">
<a-popover placement="right">
<template #content>
<p class="label-hover-info">{{ $t('engine.apiKey') }}</p>
</template>
<span class="input-label info-label"
:style="{color: uiColor}"
>API Key</span>
</a-popover>
<a-input
class="input-area"
type="password"
v-model:value="currentOllamaApiKey"
/>
</div>
<div class="input-item" v-if="currentEngine === 'glm'">
<span class="input-label">GLM API URL</span>
<a-input
class="input-area"
v-model:value="currentGlmUrl"
placeholder="https://open.bigmodel.cn/api/paas/v4/audio/transcriptions"
></a-input>
</div>
<div class="input-item" v-if="currentEngine === 'glm'">
<span class="input-label">GLM Model Name</span>
<a-input
class="input-area"
v-model:value="currentGlmModel"
placeholder="glm-asr-2512"
></a-input>
</div>
<div class="input-item">
<span class="input-label">{{ $t('engine.audioType') }}</span>
<a-select
@@ -115,7 +161,7 @@
</template>
<span class="input-label info-label"
:style="{color: uiColor}"
>{{ $t('engine.apikey') }}</span>
>ALI {{ $t('engine.apikey') }}</span>
</a-popover>
<a-input
class="input-area"
@@ -123,6 +169,24 @@
v-model:value="currentAPI_KEY"
/>
</div>
<div class="input-item">
<a-popover placement="right">
<template #content>
<p class="label-hover-info">{{ $t('engine.glmApikeyInfo') }}</p>
<p><a href="https://open.bigmodel.cn/" target="_blank">
https://open.bigmodel.cn
</a></p>
</template>
<span class="input-label info-label"
:style="{color: uiColor}"
>GLM {{ $t('engine.apikey') }}</span>
</a-popover>
<a-input
class="input-area"
type="password"
v-model:value="currentGlmApiKey"
/>
</div>
<div class="input-item">
<a-popover placement="right">
<template #content>
@@ -239,9 +303,14 @@ const currentTranslation = ref<boolean>(true)
const currentRecording = ref<boolean>(false)
const currentTransModel = ref('ollama')
const currentOllamaName = ref('')
const currentOllamaUrl = ref('')
const currentOllamaApiKey = ref('')
const currentAPI_KEY = ref<string>('')
const currentVoskModelPath = ref<string>('')
const currentSosvModelPath = ref<string>('')
const currentGlmUrl = ref<string>('')
const currentGlmModel = ref<string>('')
const currentGlmApiKey = ref<string>('')
const currentRecordingPath = ref<string>('')
const currentCustomized = ref<boolean>(false)
const currentCustomizedApp = ref('')
@@ -294,12 +363,17 @@ function applyChange(){
engineControl.transModel = currentTransModel.value
engineControl.ollamaName = currentOllamaName.value
engineControl.engine = currentEngine.value
engineControl.ollamaUrl = currentOllamaUrl.value ?? "http://localhost:11434"
engineControl.ollamaApiKey = currentOllamaApiKey.value
engineControl.audio = currentAudio.value
engineControl.translation = currentTranslation.value
engineControl.recording = currentRecording.value
engineControl.API_KEY = currentAPI_KEY.value
engineControl.voskModelPath = currentVoskModelPath.value
engineControl.sosvModelPath = currentSosvModelPath.value
engineControl.glmUrl = currentGlmUrl.value ?? "https://open.bigmodel.cn/api/paas/v4/audio/transcriptions"
engineControl.glmModel = currentGlmModel.value ?? "glm-asr-2512"
engineControl.glmApiKey = currentGlmApiKey.value
engineControl.recordingPath = currentRecordingPath.value
engineControl.customized = currentCustomized.value
engineControl.customizedApp = currentCustomizedApp.value
@@ -320,6 +394,8 @@ function cancelChange(){
currentTargetLang.value = engineControl.targetLang
currentTransModel.value = engineControl.transModel
currentOllamaName.value = engineControl.ollamaName
currentOllamaUrl.value = engineControl.ollamaUrl
currentOllamaApiKey.value = engineControl.ollamaApiKey
currentEngine.value = engineControl.engine
currentAudio.value = engineControl.audio
currentTranslation.value = engineControl.translation
@@ -327,6 +403,9 @@ function cancelChange(){
currentAPI_KEY.value = engineControl.API_KEY
currentVoskModelPath.value = engineControl.voskModelPath
currentSosvModelPath.value = engineControl.sosvModelPath
currentGlmUrl.value = engineControl.glmUrl
currentGlmModel.value = engineControl.glmModel
currentGlmApiKey.value = engineControl.glmApiKey
currentRecordingPath.value = engineControl.recordingPath
currentCustomized.value = engineControl.customized
currentCustomizedApp.value = engineControl.customizedApp

View File

@@ -101,7 +101,7 @@
<p class="about-desc">{{ $t('status.about.desc') }}</p>
<a-divider />
<div class="about-info">
<p><b>{{ $t('status.about.version') }}</b><a-tag color="green">v1.0.0</a-tag></p>
<p><b>{{ $t('status.about.version') }}</b><a-tag color="green">v1.1.0</a-tag></p>
<p>
<b>{{ $t('status.about.author') }}</b>
<a

View File

@@ -34,7 +34,7 @@ export const engines = {
{ value: 'it', type: 1, label: '意大利语' },
],
transModel: [
{ value: 'ollama', label: 'Ollama 本地模型' },
{ value: 'ollama', label: 'Ollama 模型或 OpenAI 兼容模型' },
{ value: 'google', label: 'Google API 调用' },
]
},
@@ -55,7 +55,22 @@ export const engines = {
{ value: 'it', type: 1, label: '意大利语' },
],
transModel: [
{ value: 'ollama', label: 'Ollama 本地模型' },
{ value: 'ollama', label: 'Ollama 模型或 OpenAI 兼容模型' },
{ value: 'google', label: 'Google API 调用' },
]
},
{
value: 'glm',
label: '云端 / 智谱AI / GLM-ASR',
languages: [
{ value: 'auto', type: -1, label: '自动检测' },
{ value: 'en', type: 0, label: '英语' },
{ value: 'zh', type: 0, label: '中文' },
{ value: 'ja', type: 0, label: '日语' },
{ value: 'ko', type: 0, label: '韩语' },
],
transModel: [
{ value: 'ollama', label: 'Ollama 模型或 OpenAI 兼容模型' },
{ value: 'google', label: 'Google API 调用' },
]
}
@@ -94,7 +109,7 @@ export const engines = {
{ value: 'it', type: 1, label: 'Italian' },
],
transModel: [
{ value: 'ollama', label: 'Ollama Local Model' },
{ value: 'ollama', label: 'Ollama Model or OpenAI-compatible Model' },
{ value: 'google', label: 'Google API Call' },
]
},
@@ -115,7 +130,22 @@ export const engines = {
{ value: 'it', type: 1, label: 'Italian' },
],
transModel: [
{ value: 'ollama', label: 'Ollama Local Model' },
{ value: 'ollama', label: 'Ollama Model or OpenAI-compatible Model' },
{ value: 'google', label: 'Google API Call' },
]
},
{
value: 'glm',
label: 'Cloud / Zhipu AI / GLM-ASR',
languages: [
{ value: 'auto', type: -1, label: 'Auto Detect' },
{ value: 'en', type: 0, label: 'English' },
{ value: 'zh', type: 0, label: 'Chinese' },
{ value: 'ja', type: 0, label: 'Japanese' },
{ value: 'ko', type: 0, label: 'Korean' },
],
transModel: [
{ value: 'ollama', label: 'Ollama Model or OpenAI-compatible Model' },
{ value: 'google', label: 'Google API Call' },
]
}
@@ -154,7 +184,7 @@ export const engines = {
{ value: 'it', type: 1, label: 'イタリア語' },
],
transModel: [
{ value: 'ollama', label: 'Ollama ローカルモデル' },
{ value: 'ollama', label: 'Ollama モデルまたは OpenAI 互換モデル' },
{ value: 'google', label: 'Google API 呼び出し' },
]
},
@@ -175,7 +205,22 @@ export const engines = {
{ value: 'it', type: 1, label: 'イタリア語' },
],
transModel: [
{ value: 'ollama', label: 'Ollama ローカルモデル' },
{ value: 'ollama', label: 'Ollama モデルまたは OpenAI 互換モデル' },
{ value: 'google', label: 'Google API 呼び出し' },
]
},
{
value: 'glm',
label: 'クラウド / 智譜AI / GLM-ASR',
languages: [
{ value: 'auto', type: -1, label: '自動検出' },
{ value: 'en', type: 0, label: '英語' },
{ value: 'zh', type: 0, label: '中国語' },
{ value: 'ja', type: 0, label: '日本語' },
{ value: 'ko', type: 0, label: '韓国語' },
],
transModel: [
{ value: 'ollama', label: 'Ollama モデルまたは OpenAI 互換モデル' },
{ value: 'google', label: 'Google API 呼び出し' },
]
}

View File

@@ -22,7 +22,7 @@ export default {
"stopped": "Caption Engine Stopped",
"stoppedInfo": "The caption engine has stopped. You can click the 'Start Caption Engine' button to restart it.",
"error": "An error occurred",
"engineError": "The subtitle engine encountered an error and requested a forced exit.",
"engineError": "The caption engine encountered an error and requested a forced exit.",
"socketError": "The Socket connection between the main program and the caption engine failed",
"engineChange": "Cpation Engine Configuration Changed",
"changeInfo": "If the caption engine is already running, you need to restart it for the changes to take effect.",
@@ -50,8 +50,10 @@ export default {
"sourceLang": "Source",
"transLang": "Translation",
"transModel": "Model",
"ollama": "Ollama",
"ollamaNote": "To use for translation, the name of the local Ollama model that will call the service on the default port. It is recommended to use a non-inference model with less than 1B parameters.",
"modelName": "Model Name",
"modelNameNote": "Please enter the translation model name you wish to use, which can be either a local Ollama model or an OpenAI API compatible cloud model. If the Base URL field is left blank, the local Ollama service will be called by default; otherwise, the API service at the specified address will be called via the Python OpenAI library.",
"baseURL": "The base request URL for calling OpenAI API. If left empty, the local default port Ollama model will be used.",
"apiKey": "The API KEY required for the model corresponding to OpenAI API.",
"captionEngine": "Engine",
"audioType": "Audio Type",
"systemOutput": "System Audio Output (Speaker)",
@@ -65,9 +67,10 @@ export default {
"recordingPath": "Save Path",
"startTimeout": "Timeout",
"seconds": "seconds",
"apikeyInfo": "API KEY required for the Gummy subtitle engine, which needs to be obtained from the Alibaba Cloud Bailing platform. For more details, see the project user manual.",
"voskModelPathInfo": "The folder path of the model required by the Vosk subtitle engine. You need to download the required model to your local machine in advance. For more details, see the project user manual.",
"sosvModelPathInfo": "The folder path of the model required by the SOSV subtitle engine. You need to download the required model to your local machine in advance. For more details, see the project user manual.",
"apikeyInfo": "API KEY required for the Gummy caption engine, which needs to be obtained from the Alibaba Cloud Bailing platform. For more details, see the project user manual.",
"glmApikeyInfo": "API KEY required for GLM caption engine, which needs to be obtained from the Zhipu AI platform.",
"voskModelPathInfo": "The folder path of the model required by the Vosk caption engine. You need to download the required model to your local machine in advance. For more details, see the project user manual.",
"sosvModelPathInfo": "The folder path of the model required by the SOSV caption engine. You need to download the required model to your local machine in advance. For more details, see the project user manual.",
"recordingPathInfo": "The path to save recording files, requiring a folder path. The software will automatically name the recording file and save it as .wav file.",
"modelDownload": "Model Download Link",
"startTimeoutInfo": "Caption engine startup timeout duration. Engine will be forcefully stopped if startup exceeds this time. Recommended range: 10-120 seconds.",
@@ -143,7 +146,7 @@ export default {
"projLink": "Project Link",
"manual": "User Manual",
"engineDoc": "Caption Engine Manual",
"date": "September 8th, 2025"
"date": "January 10th, 2026"
}
},
log: {

View File

@@ -50,8 +50,10 @@ export default {
"sourceLang": "ソース言語",
"transLang": "翻訳言語",
"transModel": "翻訳モデル",
"ollama": "Ollama",
"ollamaNote": "翻訳に使用する、デフォルトポートでサービスを呼び出すローカルOllamaモデルの名前。1B 未満のパラメータを持つ非推論モデルの使用を推奨します。",
"modelName": "モデル名",
"modelNameNote": "使用する翻訳モデル名を入力してください。Ollama のローカルモデルでも OpenAI API 互換のクラウドモデルでも可能です。Base URL フィールドが未入力の場合、デフォルトでローカルOllama サービスが呼び出され、それ以外の場合は Python OpenAI ライブラリ経由で指定されたアドレスの API サービスが呼び出されます。",
"baseURL": "OpenAI API を呼び出すための基本リクエスト URL です。未記入の場合、ローカルのデフォルトポートの Ollama モデルが呼び出されます。",
"apiKey": "OpenAI API に対応するモデルを使用するために必要な API キーです。",
"captionEngine": "エンジン",
"audioType": "オーディオ",
"systemOutput": "システムオーディオ出力(スピーカー)",
@@ -66,6 +68,7 @@ export default {
"startTimeout": "時間制限",
"seconds": "秒",
"apikeyInfo": "Gummy 字幕エンジンに必要な API KEY は、アリババクラウド百煉プラットフォームから取得する必要があります。詳細情報はプロジェクトのユーザーマニュアルをご覧ください。",
"glmApikeyInfo": "GLM 字幕エンジンに必要な API KEY で、智譜 AI プラットフォームから取得する必要があります。",
"voskModelPathInfo": "Vosk 字幕エンジンに必要なモデルのフォルダパスです。必要なモデルを事前にローカルマシンにダウンロードする必要があります。詳細情報はプロジェクトのユーザーマニュアルをご覧ください。",
"sosvModelPathInfo": "SOSV 字幕エンジンに必要なモデルのフォルダパスです。必要なモデルを事前にローカルマシンにダウンロードする必要があります。詳細情報はプロジェクトのユーザーマニュアルをご覧ください。",
"recordingPathInfo": "録音ファイルの保存パスで、フォルダパスを指定する必要があります。ソフトウェアが自動的に録音ファイルに名前を付けて .wav ファイルとして保存します。",
@@ -142,7 +145,7 @@ export default {
"projLink": "プロジェクトリンク",
"manual": "ユーザーマニュアル",
"engineDoc": "字幕エンジンマニュアル",
"date": "202598 日"
"date": "2026110 日"
}
},
log: {

View File

@@ -50,8 +50,10 @@ export default {
"sourceLang": "源语言",
"transLang": "翻译语言",
"transModel": "翻译模型",
"ollama": "Ollama",
"ollamaNote": "要使用的进行翻译的本地 Ollama 模型的名称,将调用默认端口的服务,建议使用参数量小于 1B 的非推理模型。",
"modelName": "模型名称",
"modelNameNote": "请输入要使用的翻译模型名称,可以是 Ollama 本地模型,也可以是 OpenAI API 兼容的云端模型。若未填写 Base URL 字段,则默认调用本地 Ollama 服务,否则会通过 Python OpenAI 库调用该地址指向的 API 服务。",
"baseURL": "调用 OpenAI API 的基础请求地址,如果不填写则调用本地默认端口的 Ollama 模型。",
"apiKey": "调用 OpenAI API 对应的模型需要使用的 API KEY。",
"captionEngine": "字幕引擎",
"audioType": "音频类型",
"systemOutput": "系统音频输出(扬声器)",
@@ -66,6 +68,7 @@ export default {
"startTimeout": "启动超时",
"seconds": "秒",
"apikeyInfo": "Gummy 字幕引擎需要的 API KEY需要在阿里云百炼平台获取。详细信息见项目用户手册。",
"glmApikeyInfo": "GLM 字幕引擎需要的 API KEY需要在智谱 AI 平台获取。",
"voskModelPathInfo": "Vosk 字幕引擎需要的模型的文件夹路径,需要提前下载需要的模型到本地。信息详情见项目用户手册。",
"sosvModelPathInfo": "SOSV 字幕引擎需要的模型的文件夹路径,需要提前下载需要的模型到本地。信息详情见项目用户手册。",
"recordingPathInfo": "录音文件保存路径,需要提供文件夹路径。软件会自动命名录音文件并保存为 .wav 文件。",
@@ -142,7 +145,7 @@ export default {
"projLink": "项目链接",
"manual": "用户手册",
"engineDoc": "字幕引擎手册",
"date": "202598 日"
"date": "2026110 日"
}
},
log: {

View File

@@ -21,6 +21,8 @@ export const useEngineControlStore = defineStore('engineControl', () => {
const targetLang = ref<string>('zh')
const transModel = ref<string>('ollama')
const ollamaName = ref<string>('')
const ollamaUrl = ref<string>('')
const ollamaApiKey = ref<string>('')
const engine = ref<string>('gummy')
const audio = ref<0 | 1>(0)
const translation = ref<boolean>(true)
@@ -28,6 +30,9 @@ export const useEngineControlStore = defineStore('engineControl', () => {
const API_KEY = ref<string>('')
const voskModelPath = ref<string>('')
const sosvModelPath = ref<string>('')
const glmUrl = ref<string>('https://open.bigmodel.cn/api/paas/v4/audio/transcriptions')
const glmModel = ref<string>('glm-asr-2512')
const glmApiKey = ref<string>('')
const recordingPath = ref<string>('')
const customized = ref<boolean>(false)
const customizedApp = ref<string>('')
@@ -44,6 +49,8 @@ export const useEngineControlStore = defineStore('engineControl', () => {
targetLang: targetLang.value,
transModel: transModel.value,
ollamaName: ollamaName.value,
ollamaUrl: ollamaUrl.value,
ollamaApiKey: ollamaApiKey.value,
engine: engine.value,
audio: audio.value,
translation: translation.value,
@@ -51,6 +58,9 @@ export const useEngineControlStore = defineStore('engineControl', () => {
API_KEY: API_KEY.value,
voskModelPath: voskModelPath.value,
sosvModelPath: sosvModelPath.value,
glmUrl: glmUrl.value,
glmModel: glmModel.value,
glmApiKey: glmApiKey.value,
recordingPath: recordingPath.value,
customized: customized.value,
customizedApp: customizedApp.value,
@@ -80,6 +90,8 @@ export const useEngineControlStore = defineStore('engineControl', () => {
targetLang.value = controls.targetLang
transModel.value = controls.transModel
ollamaName.value = controls.ollamaName
ollamaUrl.value = controls.ollamaUrl
ollamaApiKey.value = controls.ollamaApiKey
engine.value = controls.engine
audio.value = controls.audio
engineEnabled.value = controls.engineEnabled
@@ -88,6 +100,9 @@ export const useEngineControlStore = defineStore('engineControl', () => {
API_KEY.value = controls.API_KEY
voskModelPath.value = controls.voskModelPath
sosvModelPath.value = controls.sosvModelPath
glmUrl.value = controls.glmUrl || 'https://open.bigmodel.cn/api/paas/v4/audio/transcriptions'
glmModel.value = controls.glmModel || 'glm-asr-2512'
glmApiKey.value = controls.glmApiKey
recordingPath.value = controls.recordingPath
customized.value = controls.customized
customizedApp.value = controls.customizedApp
@@ -150,6 +165,8 @@ export const useEngineControlStore = defineStore('engineControl', () => {
targetLang, // 目标语言
transModel, // 翻译模型
ollamaName, // Ollama 模型
ollamaUrl,
ollamaApiKey,
engine, // 字幕引擎
audio, // 选择音频
translation, // 是否启用翻译
@@ -157,6 +174,9 @@ export const useEngineControlStore = defineStore('engineControl', () => {
API_KEY, // API KEY
voskModelPath, // vosk 模型路径
sosvModelPath, // sosv 模型路径
glmUrl, // GLM API URL
glmModel, // GLM 模型名称
glmApiKey, // GLM API Key
recordingPath, // 录音保存路径
customized, // 是否使用自定义字幕引擎
customizedApp, // 自定义字幕引擎的应用程序

View File

@@ -8,6 +8,8 @@ export interface Controls {
targetLang: string,
transModel: string,
ollamaName: string,
ollamaUrl: string,
ollamaApiKey: string,
engine: string,
audio: 0 | 1,
translation: boolean,
@@ -15,6 +17,9 @@ export interface Controls {
API_KEY: string,
voskModelPath: string,
sosvModelPath: string,
glmUrl: string,
glmModel: string,
glmApiKey: string,
recordingPath: string,
customized: boolean,
customizedApp: string,