From 0825e4890283fe245d2a665566cf9a4e73f31281 Mon Sep 17 00:00:00 2001 From: nocmt Date: Sat, 10 Jan 2026 16:02:24 +0800 Subject: [PATCH] =?UTF-8?q?feat(engine):=20=E6=B7=BB=E5=8A=A0GLM-ASR?= =?UTF-8?q?=E8=AF=AD=E9=9F=B3=E8=AF=86=E5=88=AB=E5=BC=95=E6=93=8E=E6=94=AF?= =?UTF-8?q?=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增GLM-ASR云端语音识别引擎实现 - 扩展配置界面添加GLM相关参数设置 - Ollama支持自定义域名和Apikey以支持云端和其他LLM - 修改音频处理逻辑以支持新引擎 - 更新依赖项和构建配置 - 修复Ollama翻译功能相关问题 --- .gitignore | 7 + build/entitlements.mac.plist | 6 +- engine/audio2text/__init__.py | 3 +- engine/audio2text/glm.py | 163 ++++++++++++++++++ engine/audio2text/sosv.py | 6 +- engine/audio2text/vosk.py | 6 +- engine/main.py | 109 +++++++++--- engine/main.spec | 24 ++- engine/requirements.txt | 4 +- engine/utils/sysout.py | 1 - engine/utils/translation.py | 52 ++++-- package-lock.json | 67 +++---- src/main/types/index.ts | 5 + src/main/utils/AllConfig.ts | 9 +- src/main/utils/CaptionEngine.ts | 25 ++- src/renderer/src/components/EngineControl.vue | 53 ++++++ src/renderer/src/i18n/config/engine.ts | 45 +++++ src/renderer/src/stores/engineControl.ts | 20 +++ src/renderer/src/types/index.ts | 5 + 19 files changed, 519 insertions(+), 91 deletions(-) create mode 100644 engine/audio2text/glm.py diff --git a/.gitignore b/.gitignore index 8ad69a7..c5e680d 100644 --- a/.gitignore +++ b/.gitignore @@ -8,7 +8,14 @@ __pycache__ .venv test.py engine/build +engine/dist +engine/.venv +engine/__pycache__ +engine/portaudio +package-lock.json +engine/pyinstaller_cache engine/models engine/notebook .repomap .virtualme +engine/main.spec diff --git a/build/entitlements.mac.plist b/build/entitlements.mac.plist index 273c351..956f5b4 100644 --- a/build/entitlements.mac.plist +++ b/build/entitlements.mac.plist @@ -8,5 +8,9 @@ com.apple.security.cs.allow-dyld-environment-variables + com.apple.security.cs.disable-library-validation + + com.apple.security.device.audio-input + - \ No newline at end of file + diff --git a/engine/audio2text/__init__.py b/engine/audio2text/__init__.py index a4e7f15..c2f19d4 100644 --- a/engine/audio2text/__init__.py +++ b/engine/audio2text/__init__.py @@ -1,3 +1,4 @@ from .gummy import GummyRecognizer from .vosk import VoskRecognizer -from .sosv import SosvRecognizer \ No newline at end of file +from .sosv import SosvRecognizer +from .glm import GlmRecognizer diff --git a/engine/audio2text/glm.py b/engine/audio2text/glm.py new file mode 100644 index 0000000..e39021c --- /dev/null +++ b/engine/audio2text/glm.py @@ -0,0 +1,163 @@ +import threading +import io +import wave +import struct +import math +import audioop +import requests +from datetime import datetime + +from utils import shared_data +from utils import stdout_cmd, stdout_obj, google_translate, ollama_translate + +class GlmRecognizer: + """ + 使用 GLM-ASR 引擎处理音频数据,并在标准输出中输出 Auto Caption 软件可读取的 JSON 字符串数据 + + 初始化参数: + url: GLM-ASR API URL + model: GLM-ASR 模型名称 + api_key: GLM-ASR API Key + source: 源语言 + target: 目标语言 + trans_model: 翻译模型名称 + ollama_name: Ollama 模型名称 + """ + def __init__(self, url: str, model: str, api_key: str, source: str, target: str | None, trans_model: str, ollama_name: str, ollama_url: str = '', ollama_api_key: str = ''): + self.url = url + self.model = model + self.api_key = api_key + self.source = source + self.target = target + if trans_model == 'google': + self.trans_func = google_translate + else: + self.trans_func = ollama_translate + self.ollama_name = ollama_name + self.ollama_url = ollama_url + self.ollama_api_key = ollama_api_key + + self.audio_buffer = [] + self.is_speech = False + self.silence_frames = 0 + self.speech_start_time = None + self.time_str = '' + self.cur_id = 0 + + # VAD settings (假设 16k 16bit, chunk size 1024 or similar) + # 16bit = 2 bytes per sample. + # RMS threshold needs tuning. 500 is a conservative guess for silence. + self.threshold = 500 + self.silence_limit = 15 # frames (approx 0.5-1s depending on chunk size) + self.min_speech_frames = 10 # frames + + def start(self): + """启动 GLM 引擎""" + stdout_cmd('info', 'GLM-ASR recognizer started.') + + def stop(self): + """停止 GLM 引擎""" + stdout_cmd('info', 'GLM-ASR recognizer stopped.') + + def process_audio(self, chunk): + # chunk is bytes (int16) + rms = audioop.rms(chunk, 2) + + if rms > self.threshold: + if not self.is_speech: + self.is_speech = True + self.time_str = datetime.now().strftime('%H:%M:%S.%f')[:-3] + self.audio_buffer = [] + self.audio_buffer.append(chunk) + self.silence_frames = 0 + else: + if self.is_speech: + self.audio_buffer.append(chunk) + self.silence_frames += 1 + if self.silence_frames > self.silence_limit: + # Speech ended + if len(self.audio_buffer) > self.min_speech_frames: + self.recognize(self.audio_buffer, self.time_str) + self.is_speech = False + self.audio_buffer = [] + self.silence_frames = 0 + + def recognize(self, audio_frames, time_s): + audio_bytes = b''.join(audio_frames) + + wav_io = io.BytesIO() + with wave.open(wav_io, 'wb') as wav_file: + wav_file.setnchannels(1) + wav_file.setsampwidth(2) + wav_file.setframerate(16000) + wav_file.writeframes(audio_bytes) + wav_io.seek(0) + + threading.Thread( + target=self._do_request, + args=(wav_io.read(), time_s, self.cur_id) + ).start() + self.cur_id += 1 + + def _do_request(self, audio_content, time_s, index): + try: + files = { + 'file': ('audio.wav', audio_content, 'audio/wav') + } + data = { + 'model': self.model, + 'stream': 'false' + } + headers = { + 'Authorization': f'Bearer {self.api_key}' + } + + response = requests.post(self.url, headers=headers, data=data, files=files, timeout=15) + + if response.status_code == 200: + res_json = response.json() + text = res_json.get('text', '') + if text: + self.output_caption(text, time_s, index) + else: + try: + err_msg = response.json() + stdout_cmd('error', f"GLM API Error: {err_msg}") + except: + stdout_cmd('error', f"GLM API Error: {response.text}") + + except Exception as e: + stdout_cmd('error', f"GLM Request Failed: {str(e)}") + + def output_caption(self, text, time_s, index): + caption = { + 'command': 'caption', + 'index': index, + 'time_s': time_s, + 'time_t': datetime.now().strftime('%H:%M:%S.%f')[:-3], + 'text': text, + 'translation': '' + } + + if self.target: + if self.trans_func == ollama_translate: + th = threading.Thread( + target=self.trans_func, + args=(self.ollama_name, self.target, caption['text'], time_s, self.ollama_url, self.ollama_api_key), + daemon=True + ) + else: + th = threading.Thread( + target=self.trans_func, + args=(self.ollama_name, self.target, caption['text'], time_s), + daemon=True + ) + th.start() + + stdout_obj(caption) + + def translate(self): + global shared_data + while shared_data.status == 'running': + chunk = shared_data.chunk_queue.get() + self.process_audio(chunk) diff --git a/engine/audio2text/sosv.py b/engine/audio2text/sosv.py index 666b0b1..4466fe3 100644 --- a/engine/audio2text/sosv.py +++ b/engine/audio2text/sosv.py @@ -29,7 +29,7 @@ class SosvRecognizer: trans_model: 翻译模型名称 ollama_name: Ollama 模型名称 """ - def __init__(self, model_path: str, source: str, target: str | None, trans_model: str, ollama_name: str): + def __init__(self, model_path: str, source: str, target: str | None, trans_model: str, ollama_name: str, ollama_url: str = '', ollama_api_key: str = ''): if model_path.startswith('"'): model_path = model_path[1:] if model_path.endswith('"'): @@ -45,6 +45,8 @@ class SosvRecognizer: else: self.trans_func = ollama_translate self.ollama_name = ollama_name + self.ollama_url = ollama_url + self.ollama_api_key = ollama_api_key self.time_str = '' self.cur_id = 0 self.prev_content = '' @@ -152,7 +154,7 @@ class SosvRecognizer: if self.target: th = threading.Thread( target=self.trans_func, - args=(self.ollama_name, self.target, caption['text'], self.time_str), + args=(self.ollama_name, self.target, caption['text'], self.time_str, self.ollama_url, self.ollama_api_key), daemon=True ) th.start() diff --git a/engine/audio2text/vosk.py b/engine/audio2text/vosk.py index 86b0991..480d298 100644 --- a/engine/audio2text/vosk.py +++ b/engine/audio2text/vosk.py @@ -18,7 +18,7 @@ class VoskRecognizer: trans_model: 翻译模型名称 ollama_name: Ollama 模型名称 """ - def __init__(self, model_path: str, target: str | None, trans_model: str, ollama_name: str): + def __init__(self, model_path: str, target: str | None, trans_model: str, ollama_name: str, ollama_url: str = '', ollama_api_key: str = ''): SetLogLevel(-1) if model_path.startswith('"'): model_path = model_path[1:] @@ -31,6 +31,8 @@ class VoskRecognizer: else: self.trans_func = ollama_translate self.ollama_name = ollama_name + self.ollama_url = ollama_url + self.ollama_api_key = ollama_api_key self.time_str = '' self.cur_id = 0 self.prev_content = '' @@ -66,7 +68,7 @@ class VoskRecognizer: if self.target: th = threading.Thread( target=self.trans_func, - args=(self.ollama_name, self.target, caption['text'], self.time_str), + args=(self.ollama_name, self.target, caption['text'], self.time_str, self.ollama_url, self.ollama_api_key), daemon=True ) th.start() diff --git a/engine/main.py b/engine/main.py index 76db7e8..2d05bd5 100644 --- a/engine/main.py +++ b/engine/main.py @@ -8,6 +8,7 @@ from utils import merge_chunk_channels, resample_chunk_mono from audio2text import GummyRecognizer from audio2text import VoskRecognizer from audio2text import SosvRecognizer +from audio2text import GlmRecognizer from sysaudio import AudioStream @@ -74,7 +75,7 @@ def main_gummy(s: str, t: str, a: int, c: int, k: str, r: bool, rp: str): engine.stop() -def main_vosk(a: int, c: int, vosk: str, t: str, tm: str, omn: str, r: bool, rp: str): +def main_vosk(a: int, c: int, vosk: str, t: str, tm: str, omn: str, ourl: str, okey: str, r: bool, rp: str): """ Parameters: a: Audio source: 0 for output, 1 for input @@ -83,14 +84,16 @@ def main_vosk(a: int, c: int, vosk: str, t: str, tm: str, omn: str, r: bool, rp: t: Target language tm: Translation model type, ollama or google omn: Ollama model name + ourl: Ollama Base URL + okey: Ollama API Key r: Whether to record the audio rp: Path to save the recorded audio """ stream = AudioStream(a, c) if t == 'none': - engine = VoskRecognizer(vosk, None, tm, omn) + engine = VoskRecognizer(vosk, None, tm, omn, ourl, okey) else: - engine = VoskRecognizer(vosk, t, tm, omn) + engine = VoskRecognizer(vosk, t, tm, omn, ourl, okey) engine.start() stream_thread = threading.Thread( @@ -106,7 +109,7 @@ def main_vosk(a: int, c: int, vosk: str, t: str, tm: str, omn: str, r: bool, rp: engine.stop() -def main_sosv(a: int, c: int, sosv: str, s: str, t: str, tm: str, omn: str, r: bool, rp: str): +def main_sosv(a: int, c: int, sosv: str, s: str, t: str, tm: str, omn: str, ourl: str, okey: str, r: bool, rp: str): """ Parameters: a: Audio source: 0 for output, 1 for input @@ -116,14 +119,16 @@ def main_sosv(a: int, c: int, sosv: str, s: str, t: str, tm: str, omn: str, r: b t: Target language tm: Translation model type, ollama or google omn: Ollama model name + ourl: Ollama API URL + okey: Ollama API Key r: Whether to record the audio rp: Path to save the recorded audio """ stream = AudioStream(a, c) if t == 'none': - engine = SosvRecognizer(sosv, s, None, tm, omn) + engine = SosvRecognizer(sosv, s, None, tm, omn, ourl, okey) else: - engine = SosvRecognizer(sosv, s, t, tm, omn) + engine = SosvRecognizer(sosv, s, t, tm, omn, ourl, okey) engine.start() stream_thread = threading.Thread( @@ -139,16 +144,54 @@ def main_sosv(a: int, c: int, sosv: str, s: str, t: str, tm: str, omn: str, r: b engine.stop() +def main_glm(a: int, c: int, url: str, model: str, key: str, s: str, t: str, tm: str, omn: str, ourl: str, okey: str, r: bool, rp: str): + """ + Parameters: + a: Audio source + c: Chunk rate + url: GLM API URL + model: GLM Model Name + key: GLM API Key + s: Source language + t: Target language + tm: Translation model + omn: Ollama model name + ourl: Ollama API URL + okey: Ollama API Key + r: Record + rp: Record path + """ + stream = AudioStream(a, c) + if t == 'none': + engine = GlmRecognizer(url, model, key, s, None, tm, omn, ourl, okey) + else: + engine = GlmRecognizer(url, model, key, s, t, tm, omn, ourl, okey) + + engine.start() + stream_thread = threading.Thread( + target=audio_recording, + args=(stream, True, r, rp), + daemon=True + ) + stream_thread.start() + try: + engine.translate() + except KeyboardInterrupt: + stdout("Keyboard interrupt detected. Exiting...") + engine.stop() + + + if __name__ == "__main__": parser = argparse.ArgumentParser(description='Convert system audio stream to text') # all parser.add_argument('-e', '--caption_engine', default='gummy', help='Caption engine: gummy or vosk or sosv') - parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output, 1 for input') - parser.add_argument('-c', '--chunk_rate', default=10, help='Number of audio stream chunks collected per second') - parser.add_argument('-p', '--port', default=0, help='The port to run the server on, 0 for no server') - parser.add_argument('-d', '--display_caption', default=0, help='Display caption on terminal, 0 for no display, 1 for display') + parser.add_argument('-a', '--audio_type', type=int, default=0, help='Audio stream source: 0 for output, 1 for input') + parser.add_argument('-c', '--chunk_rate', type=int, default=10, help='Number of audio stream chunks collected per second') + parser.add_argument('-p', '--port', type=int, default=0, help='The port to run the server on, 0 for no server') + parser.add_argument('-d', '--display_caption', type=int, default=0, help='Display caption on terminal, 0 for no display, 1 for display') parser.add_argument('-t', '--target_language', default='none', help='Target language code, "none" for no translation') - parser.add_argument('-r', '--record', default=0, help='Whether to record the audio, 0 for no recording, 1 for recording') + parser.add_argument('-r', '--record', type=int, default=0, help='Whether to record the audio, 0 for no recording, 1 for recording') parser.add_argument('-rp', '--record_path', default='', help='Path to save the recorded audio') # gummy and sosv parser.add_argument('-s', '--source_language', default='auto', help='Source language code') @@ -157,20 +200,24 @@ if __name__ == "__main__": # vosk and sosv parser.add_argument('-tm', '--translation_model', default='ollama', help='Model for translation: ollama or google') parser.add_argument('-omn', '--ollama_name', default='', help='Ollama model name for translation') + parser.add_argument('-ourl', '--ollama_url', default='', help='Ollama API URL') + parser.add_argument('-okey', '--ollama_api_key', default='', help='Ollama API Key') # vosk only parser.add_argument('-vosk', '--vosk_model', default='', help='The path to the vosk model.') # sosv only parser.add_argument('-sosv', '--sosv_model', default=None, help='The SenseVoice model path') + # glm only + parser.add_argument('-gurl', '--glm_url', default='https://open.bigmodel.cn/api/paas/v4/audio/transcriptions', help='GLM API URL') + parser.add_argument('-gmodel', '--glm_model', default='glm-asr-2512', help='GLM Model Name') + parser.add_argument('-gkey', '--glm_api_key', default='', help='GLM API Key') args = parser.parse_args() - if int(args.port) == 0: - shared_data.status = "running" - else: - start_server(int(args.port)) - - if int(args.display_caption) != 0: + + if args.port != 0: + threading.Thread(target=start_server, args=(args.port,), daemon=True).start() + + if args.display_caption == '1': change_caption_display(True) - print("Caption will be displayed on terminal") if args.caption_engine == 'gummy': main_gummy( @@ -179,7 +226,7 @@ if __name__ == "__main__": int(args.audio_type), int(args.chunk_rate), args.api_key, - True if int(args.record) == 1 else False, + bool(int(args.record)), args.record_path ) elif args.caption_engine == 'vosk': @@ -190,7 +237,9 @@ if __name__ == "__main__": args.target_language, args.translation_model, args.ollama_name, - True if int(args.record) == 1 else False, + args.ollama_url, + args.ollama_api_key, + bool(int(args.record)), args.record_path ) elif args.caption_engine == 'sosv': @@ -202,7 +251,25 @@ if __name__ == "__main__": args.target_language, args.translation_model, args.ollama_name, - True if int(args.record) == 1 else False, + args.ollama_url, + args.ollama_api_key, + bool(int(args.record)), + args.record_path + ) + elif args.caption_engine == 'glm': + main_glm( + int(args.audio_type), + int(args.chunk_rate), + args.glm_url, + args.glm_model, + args.glm_api_key, + args.source_language, + args.target_language, + args.translation_model, + args.ollama_name, + args.ollama_url, + args.ollama_api_key, + bool(int(args.record)), args.record_path ) else: diff --git a/engine/main.spec b/engine/main.spec index 5475c62..f36f03a 100644 --- a/engine/main.spec +++ b/engine/main.spec @@ -6,11 +6,17 @@ import sys if sys.platform == 'win32': vosk_path = str(Path('./.venv/Lib/site-packages/vosk').resolve()) else: - vosk_path = str(Path('./.venv/lib/python3.12/site-packages/vosk').resolve()) + venv_lib = Path('./.venv/lib') + python_dirs = list(venv_lib.glob('python*')) + if python_dirs: + vosk_path = str((python_dirs[0] / 'site-packages' / 'vosk').resolve()) + else: + vosk_path = str(Path('./.venv/lib/python3.12/site-packages/vosk').resolve()) a = Analysis( ['main.py'], pathex=[], + # binaries=[('portaudio/lib/.libs/libportaudio.2.dylib', '.')], binaries=[], datas=[(vosk_path, 'vosk')], hiddenimports=[], @@ -27,21 +33,27 @@ pyz = PYZ(a.pure) exe = EXE( pyz, a.scripts, - a.binaries, - a.datas, [], + exclude_binaries=True, name='main', debug=False, bootloader_ignore_signals=False, strip=False, upx=True, - upx_exclude=[], - runtime_tmpdir=None, console=True, disable_windowed_traceback=False, argv_emulation=False, target_arch=None, codesign_identity=None, entitlements_file=None, - onefile=True, +) + +coll = COLLECT( + exe, + a.binaries, + a.datas, + strip=False, + upx=True, + upx_exclude=[], + name='main', ) diff --git a/engine/requirements.txt b/engine/requirements.txt index 3d33f8f..1e4140e 100644 --- a/engine/requirements.txt +++ b/engine/requirements.txt @@ -7,4 +7,6 @@ pyaudio; sys_platform == 'darwin' pyaudiowpatch; sys_platform == 'win32' googletrans ollama -sherpa_onnx \ No newline at end of file +sherpa_onnx +requests +openai diff --git a/engine/utils/sysout.py b/engine/utils/sysout.py index e2c6811..7a07827 100644 --- a/engine/utils/sysout.py +++ b/engine/utils/sysout.py @@ -47,7 +47,6 @@ def translation_display(obj): def stdout_obj(obj): global display_caption - print(obj['command'], display_caption) if obj['command'] == 'caption' and display_caption: caption_display(obj) return diff --git a/engine/utils/translation.py b/engine/utils/translation.py index b65b6c2..26aa3f2 100644 --- a/engine/utils/translation.py +++ b/engine/utils/translation.py @@ -1,5 +1,9 @@ -from ollama import chat +from ollama import chat, Client from ollama import ChatResponse +try: + from openai import OpenAI +except ImportError: + OpenAI = None import asyncio from googletrans import Translator from .sysout import stdout_cmd, stdout_obj @@ -17,15 +21,43 @@ lang_map = { 'zh-cn': 'Chinese' } -def ollama_translate(model: str, target: str, text: str, time_s: str): - response: ChatResponse = chat( - model=model, - messages=[ - {"role": "system", "content": f"/no_think Translate the following content into {lang_map[target]}, and do not output any additional information."}, - {"role": "user", "content": text} - ] - ) - content = response.message.content or "" +def ollama_translate(model: str, target: str, text: str, time_s: str, url: str = '', key: str = ''): + content = "" + try: + if url: + if OpenAI: + client = OpenAI(base_url=url, api_key=key if key else "ollama") + response = client.chat.completions.create( + model=model, + messages=[ + {"role": "system", "content": f"/no_think Translate the following content into {lang_map[target]}, and do not output any additional information."}, + {"role": "user", "content": text} + ] + ) + content = response.choices[0].message.content or "" + else: + client = Client(host=url) + response: ChatResponse = client.chat( + model=model, + messages=[ + {"role": "system", "content": f"/no_think Translate the following content into {lang_map[target]}, and do not output any additional information."}, + {"role": "user", "content": text} + ] + ) + content = response.message.content or "" + else: + response: ChatResponse = chat( + model=model, + messages=[ + {"role": "system", "content": f"/no_think Translate the following content into {lang_map[target]}, and do not output any additional information."}, + {"role": "user", "content": text} + ] + ) + content = response.message.content or "" + except Exception as e: + stdout_cmd("warn", f"Translation failed: {str(e)}") + return + if content.startswith(''): index = content.find('') if index != -1: diff --git a/package-lock.json b/package-lock.json index d8cab74..d70c789 100644 --- a/package-lock.json +++ b/package-lock.json @@ -110,6 +110,7 @@ "integrity": "sha512-IaaGWsQqfsQWVLqMn9OB92MNN7zukfVA4s7KKAI0KfrrDsZ0yhi5uV4baBuLuN7n3vsZpwP8asPPcVwApxvjBQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@ampproject/remapping": "^2.2.0", "@babel/code-frame": "^7.27.1", @@ -2274,6 +2275,7 @@ "resolved": "https://registry.npmmirror.com/@types/node/-/node-22.15.17.tgz", "integrity": "sha512-wIX2aSZL5FE+MR0JlvF87BNVrtFWf6AE6rxSE9X7OwnVvoyCQjpzSRJ+M87se/4QCkCiebQAqrJ0y6fwIyi7nw==", "license": "MIT", + "peer": true, "dependencies": { "undici-types": "~6.21.0" } @@ -2360,6 +2362,7 @@ "integrity": "sha512-B2MdzyWxCE2+SqiZHAjPphft+/2x2FlO9YBx7eKE1BCb+rqBlQdhtAEhzIEdozHd55DXPmxBdpMygFJjfjjA9A==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.32.0", "@typescript-eslint/types": "8.32.0", @@ -2791,6 +2794,7 @@ "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "dev": true, "license": "MIT", + "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -2851,6 +2855,7 @@ "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "fast-deep-equal": "^3.1.1", "fast-json-stable-stringify": "^2.0.0", @@ -3064,7 +3069,6 @@ "integrity": "sha512-+25nxyyznAXF7Nef3y0EbBeqmGZgeN/BxHX29Rs39djAfaFalmQ89SE6CWyDCHzGL0yt/ycBtNOmGTW0FyGWNw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "archiver-utils": "^2.1.0", "async": "^3.2.4", @@ -3084,7 +3088,6 @@ "integrity": "sha512-bEL/yUb/fNNiNTuUz979Z0Yg5L+LzLxGJz8x79lYmR54fmTIb6ob/hNQgkQnIUDWIFjZVQwl9Xs356I6BAMHfw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "glob": "^7.1.4", "graceful-fs": "^4.2.0", @@ -3107,7 +3110,6 @@ "integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "core-util-is": "~1.0.0", "inherits": "~2.0.3", @@ -3123,8 +3125,7 @@ "resolved": "https://registry.npmmirror.com/safe-buffer/-/safe-buffer-5.1.2.tgz", "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/archiver-utils/node_modules/string_decoder": { "version": "1.1.1", @@ -3132,7 +3133,6 @@ "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "safe-buffer": "~5.1.0" } @@ -3351,6 +3351,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "caniuse-lite": "^1.0.30001716", "electron-to-chromium": "^1.5.149", @@ -3848,7 +3849,6 @@ "integrity": "sha512-D3uMHtGc/fcO1Gt1/L7i1e33VOvD4A9hfQLP+6ewd+BvG/gQ84Yh4oftEhAdjSMgBgwGL+jsppT7JYNpo6MHHg==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "buffer-crc32": "^0.2.13", "crc32-stream": "^4.0.2", @@ -3994,7 +3994,6 @@ "integrity": "sha512-ROmzCKrTnOwybPcJApAA6WBWij23HVfGVNKqqrZpuyZOHqK2CwHSvpGuyt/UNNvaIjEd8X5IFGp4Mh+Ie1IHJQ==", "dev": true, "license": "Apache-2.0", - "peer": true, "bin": { "crc32": "bin/crc32.njs" }, @@ -4008,7 +4007,6 @@ "integrity": "sha512-NT7w2JVU7DFroFdYkeq8cywxrgjPHWkdX1wjpRQXPX5Asews3tA+Ght6lddQO5Mkumffp3X7GEqku3epj2toIw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "crc-32": "^1.2.0", "readable-stream": "^3.4.0" @@ -4248,6 +4246,7 @@ "integrity": "sha512-NoXo6Liy2heSklTI5OIZbCgXC1RzrDQsZkeEwXhdOro3FT1VBOvbubvscdPnjVuQ4AMwwv61oaH96AbiYg9EnQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "app-builder-lib": "25.1.8", "builder-util": "25.1.7", @@ -4410,6 +4409,7 @@ "integrity": "sha512-6dLslJrQYB1qvqVPYRv1PhAA/uytC66nUeiTcq2JXiBzrmTWCHppqtGUjZhvnSRVatBCT5/SFdizdzcBiEiYUg==", "hasInstallScript": true, "license": "MIT", + "peer": true, "dependencies": { "@electron/get": "^2.0.0", "@types/node": "^22.7.7", @@ -4454,7 +4454,6 @@ "integrity": "sha512-2ntkJ+9+0GFP6nAISiMabKt6eqBB0kX1QqHNWFWAXgi0VULKGisM46luRFpIBiU3u/TDmhZMM8tzvo2Abn3ayg==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "app-builder-lib": "25.1.8", "archiver": "^5.3.1", @@ -4468,7 +4467,6 @@ "integrity": "sha512-oRXApq54ETRj4eMiFzGnHWGy+zo5raudjuxN0b8H7s/RU2oW0Wvsx9O0ACRN/kRq9E8Vu/ReskGB5o3ji+FzHQ==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "graceful-fs": "^4.2.0", "jsonfile": "^6.0.1", @@ -4484,7 +4482,6 @@ "integrity": "sha512-5dgndWOriYSm5cnYaJNhalLNDKOqFwyDB/rr1E9ZsGciGvKPs8R2xYGCacuf3z6K1YKDz182fd+fY3cn3pMqXQ==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "universalify": "^2.0.0" }, @@ -4498,7 +4495,6 @@ "integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">= 10.0.0" } @@ -4813,6 +4809,7 @@ "integrity": "sha512-LSehfdpgMeWcTZkWZVIJl+tkZ2nuSkyyB9C27MZqFWXuph7DvaowgcTvKqxvpLW1JZIk8PN7hFY3Rj9LQ7m7lg==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.2.0", "@eslint-community/regexpp": "^4.12.1", @@ -4874,6 +4871,7 @@ "integrity": "sha512-zc1UmCpNltmVY34vuLRV61r1K27sWuX39E+uyUnY8xS2Bex88VV9cugG+UZbRSRGtGyFboj+D8JODyme1plMpw==", "dev": true, "license": "MIT", + "peer": true, "bin": { "eslint-config-prettier": "bin/cli.js" }, @@ -5351,8 +5349,7 @@ "resolved": "https://registry.npmmirror.com/fs-constants/-/fs-constants-1.0.0.tgz", "integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/fs-extra": { "version": "8.1.0", @@ -6108,8 +6105,7 @@ "resolved": "https://registry.npmmirror.com/isarray/-/isarray-1.0.0.tgz", "integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/isbinaryfile": { "version": "5.0.4", @@ -6300,7 +6296,6 @@ "integrity": "sha512-b94GiNHQNy6JNTrt5w6zNyffMrNkXZb3KTkCZJb2V1xaEGCk093vkZ2jk3tpaeP33/OiXC+WvK9AxUebnf5nbw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "readable-stream": "^2.0.5" }, @@ -6314,7 +6309,6 @@ "integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "core-util-is": "~1.0.0", "inherits": "~2.0.3", @@ -6330,8 +6324,7 @@ "resolved": "https://registry.npmmirror.com/safe-buffer/-/safe-buffer-5.1.2.tgz", "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/lazystream/node_modules/string_decoder": { "version": "1.1.1", @@ -6339,7 +6332,6 @@ "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "safe-buffer": "~5.1.0" } @@ -6391,32 +6383,28 @@ "resolved": "https://registry.npmmirror.com/lodash.defaults/-/lodash.defaults-4.2.0.tgz", "integrity": "sha512-qjxPLHd3r5DnsdGacqOMU6pb/avJzdh9tFX2ymgoZE27BmjXrNy/y4LoaiTeAb+O3gL8AfpJGtqfX/ae2leYYQ==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/lodash.difference": { "version": "4.5.0", "resolved": "https://registry.npmmirror.com/lodash.difference/-/lodash.difference-4.5.0.tgz", "integrity": "sha512-dS2j+W26TQ7taQBGN8Lbbq04ssV3emRw4NY58WErlTO29pIqS0HmoT5aJ9+TUQ1N3G+JOZSji4eugsWwGp9yPA==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/lodash.flatten": { "version": "4.4.0", "resolved": "https://registry.npmmirror.com/lodash.flatten/-/lodash.flatten-4.4.0.tgz", "integrity": "sha512-C5N2Z3DgnnKr0LOpv/hKCgKdb7ZZwafIrsesve6lmzvZIRZRGaZ/l6Q8+2W7NaT+ZwO3fFlSCzCzrDCFdJfZ4g==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/lodash.isplainobject": { "version": "4.0.6", "resolved": "https://registry.npmmirror.com/lodash.isplainobject/-/lodash.isplainobject-4.0.6.tgz", "integrity": "sha512-oSXzaWypCMHkPC3NvBEaPHf0KsA5mvPrOPgQWDsbg8n7orZ290M0BmC/jgRZ4vcJ6DTAhjrsSYgdsW/F+MFOBA==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/lodash.merge": { "version": "4.6.2", @@ -6430,8 +6418,7 @@ "resolved": "https://registry.npmmirror.com/lodash.union/-/lodash.union-4.6.0.tgz", "integrity": "sha512-c4pB2CdGrGdjMKYLA+XiRDO7Y0PRQbm/Gzg8qMj+QH+pFVAoTp5sBpO0odL3FjoPCGjK96p6qsP+yQoiLoOBcw==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/log-symbols": { "version": "4.1.0", @@ -6984,7 +6971,6 @@ "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=0.10.0" } @@ -7408,6 +7394,7 @@ "integrity": "sha512-QQtaxnoDJeAkDvDKWCLiwIXkTgRhwYDEQCghU9Z6q03iyek/rxRh/2lC3HB7P8sWT2xC/y5JDctPLBIGzHKbhw==", "dev": true, "license": "MIT", + "peer": true, "bin": { "prettier": "bin/prettier.cjs" }, @@ -7436,8 +7423,7 @@ "resolved": "https://registry.npmmirror.com/process-nextick-args/-/process-nextick-args-2.0.1.tgz", "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/progress": { "version": "2.0.3", @@ -7556,7 +7542,6 @@ "integrity": "sha512-v05I2k7xN8zXvPD9N+z/uhXPaj0sUFCe2rcWZIpBsqxfP7xXFQ0tipAd/wjj1YxWyWtUS5IDJpOG82JKt2EAVA==", "dev": true, "license": "Apache-2.0", - "peer": true, "dependencies": { "minimatch": "^5.1.0" } @@ -7567,7 +7552,6 @@ "integrity": "sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==", "dev": true, "license": "ISC", - "peer": true, "dependencies": { "brace-expansion": "^2.0.1" }, @@ -8235,7 +8219,6 @@ "integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "bl": "^4.0.3", "end-of-stream": "^1.4.1", @@ -8360,6 +8343,7 @@ "integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==", "dev": true, "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -8462,6 +8446,7 @@ "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", "devOptional": true, "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -8611,6 +8596,7 @@ "integrity": "sha512-cZn6NDFE7wdTpINgs++ZJ4N49W2vRp8LCKrn3Ob1kYNtOo21vfDoaV5GzBfLU4MovSAB8uNRm4jgzVQZ+mBzPQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "esbuild": "^0.25.0", "fdir": "^6.4.4", @@ -8701,6 +8687,7 @@ "integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==", "dev": true, "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -8720,6 +8707,7 @@ "resolved": "https://registry.npmmirror.com/vue/-/vue-3.5.13.tgz", "integrity": "sha512-wmeiSMxkZCSc+PM2w2VRsOYAZC8GdipNFRTsLSfodVqI9mbejKeXEGr8SckuLnrQPGe3oJN5c3K0vpoU9q/wCQ==", "license": "MIT", + "peer": true, "dependencies": { "@vue/compiler-dom": "3.5.13", "@vue/compiler-sfc": "3.5.13", @@ -8742,6 +8730,7 @@ "integrity": "sha512-dbCBnd2e02dYWsXoqX5yKUZlOt+ExIpq7hmHKPb5ZqKcjf++Eo0hMseFTZMLKThrUk61m+Uv6A2YSBve6ZvuDQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "debug": "^4.4.0", "eslint-scope": "^8.2.0", @@ -9046,7 +9035,6 @@ "integrity": "sha512-9qv4rlDiopXg4E69k+vMHjNN63YFMe9sZMrdlvKnCjlCRWeCBswPPMPUfx+ipsAWq1LXHe70RcbaHdJJpS6hyQ==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "archiver-utils": "^3.0.4", "compress-commons": "^4.1.2", @@ -9062,7 +9050,6 @@ "integrity": "sha512-KVgf4XQVrTjhyWmx6cte4RxonPLR9onExufI1jhvw/MQ4BB6IsZD5gT8Lq+u/+pRkWna/6JoHpiQioaqFP5Rzw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "glob": "^7.2.3", "graceful-fs": "^4.2.0", diff --git a/src/main/types/index.ts b/src/main/types/index.ts index 4fe5db2..ce94e4b 100644 --- a/src/main/types/index.ts +++ b/src/main/types/index.ts @@ -8,6 +8,8 @@ export interface Controls { targetLang: string, transModel: string, ollamaName: string, + ollamaUrl: string, + ollamaApiKey: string, engine: string, audio: 0 | 1, translation: boolean, @@ -15,6 +17,9 @@ export interface Controls { API_KEY: string, voskModelPath: string, sosvModelPath: string, + glmUrl: string, + glmModel: string, + glmApiKey: string, recordingPath: string, customized: boolean, customizedApp: string, diff --git a/src/main/utils/AllConfig.ts b/src/main/utils/AllConfig.ts index 5a7ea8c..790b83a 100644 --- a/src/main/utils/AllConfig.ts +++ b/src/main/utils/AllConfig.ts @@ -6,7 +6,7 @@ import { Log } from './Log' import { app, BrowserWindow } from 'electron' import * as path from 'path' import * as fs from 'fs' -import os from 'os' +import * as os from 'os' interface CaptionTranslation { time_s: string, @@ -44,13 +44,18 @@ const defaultControls: Controls = { sourceLang: 'en', targetLang: 'zh', transModel: 'ollama', - ollamaName: '', + ollamaName: 'qwen2.5:0.5b', + ollamaUrl: 'http://localhost:11434', + ollamaApiKey: '', engine: 'gummy', audio: 0, engineEnabled: false, API_KEY: '', voskModelPath: '', sosvModelPath: '', + glmUrl: 'https://open.bigmodel.cn/api/paas/v4/audio/transcriptions', + glmModel: 'glm-asr-2512', + glmApiKey: '', recordingPath: getDesktopPath(), translation: true, recording: false, diff --git a/src/main/utils/CaptionEngine.ts b/src/main/utils/CaptionEngine.ts index c87e383..1dadbb4 100644 --- a/src/main/utils/CaptionEngine.ts +++ b/src/main/utils/CaptionEngine.ts @@ -1,8 +1,8 @@ import { exec, spawn } from 'child_process' import { app } from 'electron' import { is } from '@electron-toolkit/utils' -import path from 'path' -import net from 'net' +import * as path from 'path' +import * as net from 'net' import { controlWindow } from '../ControlWindow' import { allConfig } from './AllConfig' import { i18n } from '../i18n' @@ -60,7 +60,7 @@ export class CaptionEngine { this.appPath = path.join(process.resourcesPath, 'engine', 'main.exe') } else { - this.appPath = path.join(process.resourcesPath, 'engine', 'main') + this.appPath = path.join(process.resourcesPath, 'engine', 'main', 'main') } } this.command.push('-a', allConfig.controls.audio ? '1' : '0') @@ -87,6 +87,8 @@ export class CaptionEngine { this.command.push('-vosk', `"${allConfig.controls.voskModelPath}"`) this.command.push('-tm', allConfig.controls.transModel) this.command.push('-omn', allConfig.controls.ollamaName) + if(allConfig.controls.ollamaUrl) this.command.push('-ourl', allConfig.controls.ollamaUrl) + if(allConfig.controls.ollamaApiKey) this.command.push('-okey', allConfig.controls.ollamaApiKey) } else if(allConfig.controls.engine === 'sosv'){ this.command.push('-e', 'sosv') @@ -94,10 +96,25 @@ export class CaptionEngine { this.command.push('-sosv', `"${allConfig.controls.sosvModelPath}"`) this.command.push('-tm', allConfig.controls.transModel) this.command.push('-omn', allConfig.controls.ollamaName) + if(allConfig.controls.ollamaUrl) this.command.push('-ourl', allConfig.controls.ollamaUrl) + if(allConfig.controls.ollamaApiKey) this.command.push('-okey', allConfig.controls.ollamaApiKey) + } + else if(allConfig.controls.engine === 'glm'){ + this.command.push('-e', 'glm') + this.command.push('-s', allConfig.controls.sourceLang) + this.command.push('-gurl', allConfig.controls.glmUrl) + this.command.push('-gmodel', allConfig.controls.glmModel) + if(allConfig.controls.glmApiKey) { + this.command.push('-gkey', allConfig.controls.glmApiKey) + } + this.command.push('-tm', allConfig.controls.transModel) + this.command.push('-omn', allConfig.controls.ollamaName) + if(allConfig.controls.ollamaUrl) this.command.push('-ourl', allConfig.controls.ollamaUrl) + if(allConfig.controls.ollamaApiKey) this.command.push('-okey', allConfig.controls.ollamaApiKey) } } Log.info('Engine Path:', this.appPath) - if(this.command.length > 2 && this.command.at(-2) === '-k') { + if(this.command.length > 2 && this.command[this.command.length - 2] === '-k') { const _command = [...this.command] _command[_command.length -1] = _command[_command.length -1].replace(/./g, '*') Log.info('Engine Command:', _command) diff --git a/src/renderer/src/components/EngineControl.vue b/src/renderer/src/components/EngineControl.vue index b6e04e8..d152882 100644 --- a/src/renderer/src/components/EngineControl.vue +++ b/src/renderer/src/components/EngineControl.vue @@ -52,6 +52,44 @@ v-model:value="currentOllamaName" > +
+ Ollama Domain + +
+
+ Ollama API Key + +
+
+ GLM API URL + +
+
+ GLM Model Name + +
+
+ GLM API Key + +
{{ $t('engine.audioType') }} (true) const currentRecording = ref(false) const currentTransModel = ref('ollama') const currentOllamaName = ref('') +const currentOllamaUrl = ref('') +const currentOllamaApiKey = ref('') const currentAPI_KEY = ref('') const currentVoskModelPath = ref('') const currentSosvModelPath = ref('') +const currentGlmUrl = ref('') +const currentGlmModel = ref('') +const currentGlmApiKey = ref('') const currentRecordingPath = ref('') const currentCustomized = ref(false) const currentCustomizedApp = ref('') @@ -294,12 +337,17 @@ function applyChange(){ engineControl.transModel = currentTransModel.value engineControl.ollamaName = currentOllamaName.value engineControl.engine = currentEngine.value + engineControl.ollamaUrl = currentOllamaUrl.value ?? "http://localhost:11434" + engineControl.ollamaApiKey = currentOllamaApiKey.value engineControl.audio = currentAudio.value engineControl.translation = currentTranslation.value engineControl.recording = currentRecording.value engineControl.API_KEY = currentAPI_KEY.value engineControl.voskModelPath = currentVoskModelPath.value engineControl.sosvModelPath = currentSosvModelPath.value + engineControl.glmUrl = currentGlmUrl.value ?? "https://open.bigmodel.cn/api/paas/v4/audio/transcriptions" + engineControl.glmModel = currentGlmModel.value ?? "glm-asr-2512" + engineControl.glmApiKey = currentGlmApiKey.value engineControl.recordingPath = currentRecordingPath.value engineControl.customized = currentCustomized.value engineControl.customizedApp = currentCustomizedApp.value @@ -320,6 +368,8 @@ function cancelChange(){ currentTargetLang.value = engineControl.targetLang currentTransModel.value = engineControl.transModel currentOllamaName.value = engineControl.ollamaName + currentOllamaUrl.value = engineControl.ollamaUrl + currentOllamaApiKey.value = engineControl.ollamaApiKey currentEngine.value = engineControl.engine currentAudio.value = engineControl.audio currentTranslation.value = engineControl.translation @@ -327,6 +377,9 @@ function cancelChange(){ currentAPI_KEY.value = engineControl.API_KEY currentVoskModelPath.value = engineControl.voskModelPath currentSosvModelPath.value = engineControl.sosvModelPath + currentGlmUrl.value = engineControl.glmUrl + currentGlmModel.value = engineControl.glmModel + currentGlmApiKey.value = engineControl.glmApiKey currentRecordingPath.value = engineControl.recordingPath currentCustomized.value = engineControl.customized currentCustomizedApp.value = engineControl.customizedApp diff --git a/src/renderer/src/i18n/config/engine.ts b/src/renderer/src/i18n/config/engine.ts index e8ff7e4..37f36a0 100644 --- a/src/renderer/src/i18n/config/engine.ts +++ b/src/renderer/src/i18n/config/engine.ts @@ -58,6 +58,21 @@ export const engines = { { value: 'ollama', label: 'Ollama 本地模型' }, { value: 'google', label: 'Google API 调用' }, ] + }, + { + value: 'glm', + label: '云端 / 智谱AI / GLM-ASR', + languages: [ + { value: 'auto', type: -1, label: '自动检测' }, + { value: 'en', type: 0, label: '英语' }, + { value: 'zh', type: 0, label: '中文' }, + { value: 'ja', type: 0, label: '日语' }, + { value: 'ko', type: 0, label: '韩语' }, + ], + transModel: [ + { value: 'ollama', label: 'Ollama 本地模型' }, + { value: 'google', label: 'Google API 调用' }, + ] } ], en: [ @@ -118,6 +133,21 @@ export const engines = { { value: 'ollama', label: 'Ollama Local Model' }, { value: 'google', label: 'Google API Call' }, ] + }, + { + value: 'glm', + label: 'Cloud / Zhipu AI / GLM-ASR', + languages: [ + { value: 'auto', type: -1, label: 'Auto Detect' }, + { value: 'en', type: 0, label: 'English' }, + { value: 'zh', type: 0, label: 'Chinese' }, + { value: 'ja', type: 0, label: 'Japanese' }, + { value: 'ko', type: 0, label: 'Korean' }, + ], + transModel: [ + { value: 'ollama', label: 'Ollama Local Model' }, + { value: 'google', label: 'Google API Call' }, + ] } ], ja: [ @@ -178,6 +208,21 @@ export const engines = { { value: 'ollama', label: 'Ollama ローカルモデル' }, { value: 'google', label: 'Google API 呼び出し' }, ] + }, + { + value: 'glm', + label: 'クラウド / 智譜AI / GLM-ASR', + languages: [ + { value: 'auto', type: -1, label: '自動検出' }, + { value: 'en', type: 0, label: '英語' }, + { value: 'zh', type: 0, label: '中国語' }, + { value: 'ja', type: 0, label: '日本語' }, + { value: 'ko', type: 0, label: '韓国語' }, + ], + transModel: [ + { value: 'ollama', label: 'Ollama ローカルモデル' }, + { value: 'google', label: 'Google API 呼び出し' }, + ] } ] } diff --git a/src/renderer/src/stores/engineControl.ts b/src/renderer/src/stores/engineControl.ts index 88e64a8..8736401 100644 --- a/src/renderer/src/stores/engineControl.ts +++ b/src/renderer/src/stores/engineControl.ts @@ -21,6 +21,8 @@ export const useEngineControlStore = defineStore('engineControl', () => { const targetLang = ref('zh') const transModel = ref('ollama') const ollamaName = ref('') + const ollamaUrl = ref('') + const ollamaApiKey = ref('') const engine = ref('gummy') const audio = ref<0 | 1>(0) const translation = ref(true) @@ -28,6 +30,9 @@ export const useEngineControlStore = defineStore('engineControl', () => { const API_KEY = ref('') const voskModelPath = ref('') const sosvModelPath = ref('') + const glmUrl = ref('https://open.bigmodel.cn/api/paas/v4/audio/transcriptions') + const glmModel = ref('glm-asr-2512') + const glmApiKey = ref('') const recordingPath = ref('') const customized = ref(false) const customizedApp = ref('') @@ -44,6 +49,8 @@ export const useEngineControlStore = defineStore('engineControl', () => { targetLang: targetLang.value, transModel: transModel.value, ollamaName: ollamaName.value, + ollamaUrl: ollamaUrl.value, + ollamaApiKey: ollamaApiKey.value, engine: engine.value, audio: audio.value, translation: translation.value, @@ -51,6 +58,9 @@ export const useEngineControlStore = defineStore('engineControl', () => { API_KEY: API_KEY.value, voskModelPath: voskModelPath.value, sosvModelPath: sosvModelPath.value, + glmUrl: glmUrl.value, + glmModel: glmModel.value, + glmApiKey: glmApiKey.value, recordingPath: recordingPath.value, customized: customized.value, customizedApp: customizedApp.value, @@ -80,6 +90,8 @@ export const useEngineControlStore = defineStore('engineControl', () => { targetLang.value = controls.targetLang transModel.value = controls.transModel ollamaName.value = controls.ollamaName + ollamaUrl.value = controls.ollamaUrl + ollamaApiKey.value = controls.ollamaApiKey engine.value = controls.engine audio.value = controls.audio engineEnabled.value = controls.engineEnabled @@ -88,6 +100,9 @@ export const useEngineControlStore = defineStore('engineControl', () => { API_KEY.value = controls.API_KEY voskModelPath.value = controls.voskModelPath sosvModelPath.value = controls.sosvModelPath + glmUrl.value = controls.glmUrl || 'https://open.bigmodel.cn/api/paas/v4/audio/transcriptions' + glmModel.value = controls.glmModel || 'glm-asr-2512' + glmApiKey.value = controls.glmApiKey recordingPath.value = controls.recordingPath customized.value = controls.customized customizedApp.value = controls.customizedApp @@ -150,6 +165,8 @@ export const useEngineControlStore = defineStore('engineControl', () => { targetLang, // 目标语言 transModel, // 翻译模型 ollamaName, // Ollama 模型 + ollamaUrl, + ollamaApiKey, engine, // 字幕引擎 audio, // 选择音频 translation, // 是否启用翻译 @@ -157,6 +174,9 @@ export const useEngineControlStore = defineStore('engineControl', () => { API_KEY, // API KEY voskModelPath, // vosk 模型路径 sosvModelPath, // sosv 模型路径 + glmUrl, // GLM API URL + glmModel, // GLM 模型名称 + glmApiKey, // GLM API Key recordingPath, // 录音保存路径 customized, // 是否使用自定义字幕引擎 customizedApp, // 自定义字幕引擎的应用程序 diff --git a/src/renderer/src/types/index.ts b/src/renderer/src/types/index.ts index 4fe5db2..ce94e4b 100644 --- a/src/renderer/src/types/index.ts +++ b/src/renderer/src/types/index.ts @@ -8,6 +8,8 @@ export interface Controls { targetLang: string, transModel: string, ollamaName: string, + ollamaUrl: string, + ollamaApiKey: string, engine: string, audio: 0 | 1, translation: boolean, @@ -15,6 +17,9 @@ export interface Controls { API_KEY: string, voskModelPath: string, sosvModelPath: string, + glmUrl: string, + glmModel: string, + glmApiKey: string, recordingPath: string, customized: boolean, customizedApp: string,