mirror of
https://github.com/HiMeditator/auto-caption.git
synced 2026-02-04 04:14:42 +08:00
Merge pull request #25 from nocmt/dev_glmasr
feat(engine): 添加GLM-ASR语音识别引擎支持
This commit is contained in:
5
.gitignore
vendored
5
.gitignore
vendored
@@ -7,8 +7,13 @@ out
|
||||
__pycache__
|
||||
.venv
|
||||
test.py
|
||||
|
||||
engine/build
|
||||
engine/portaudio
|
||||
engine/pyinstaller_cache
|
||||
engine/models
|
||||
engine/notebook
|
||||
# engine/main.spec
|
||||
|
||||
.repomap
|
||||
.virtualme
|
||||
|
||||
@@ -41,7 +41,7 @@ SOSV 模型下载:[ Shepra-ONNX SenseVoice Model](https://github.com/HiMeditat
|
||||
- 支持调用本地 Ollama 模型或云端 Google 翻译 API 进行翻译
|
||||
- 跨平台(Windows、macOS、Linux)、多界面语言(中文、英语、日语)支持
|
||||
- 丰富的字幕样式设置(字体、字体大小、字体粗细、字体颜色、背景颜色等)
|
||||
- 灵活的字幕引擎选择(阿里云 Gummy 云端模型、本地 Vosk 模型、本地 SOSV 模型、还可以自己开发模型)
|
||||
- 灵活的字幕引擎选择(阿里云 Gummy 云端模型、GLM-ASR 云端模型、本地 Vosk 模型、本地 SOSV 模型、还可以自己开发模型)
|
||||
- 多语言识别与翻译(见下文“⚙️ 自带字幕引擎说明”)
|
||||
- 字幕记录展示与导出(支持导出 `.srt` 和 `.json` 格式)
|
||||
|
||||
@@ -62,6 +62,7 @@ macOS 平台和 Linux 平台获取系统音频输出需要进行额外设置,
|
||||
| | 识别效果 | 部署类型 | 支持语言 | 翻译 | 备注 |
|
||||
| ------------------------------------------------------------ | -------- | ------------- | ---------- | ---------- | ---------------------------------------------------------- |
|
||||
| [Gummy](https://help.aliyun.com/zh/model-studio/gummy-speech-recognition-translation) | 很好😊 | 云端 / 阿里云 | 10 种 | 自带翻译 | 收费,0.54CNY / 小时 |
|
||||
| [glm-asr-2512](https://docs.bigmodel.cn/cn/guide/models/sound-and-video/glm-asr-2512) | 很好😊 | 云端 / 智谱 AI | 4 种 | 需额外配置 | 收费,约 0.72CNY / 小时 |
|
||||
| [Vosk](https://alphacephei.com/vosk) | 较差😞 | 本地 / CPU | 超过 30 种 | 需额外配置 | 支持的语言非常多 |
|
||||
| [SOSV](https://k2-fsa.github.io/sherpa/onnx/sense-voice/index.html) | 一般😐 | 本地 / CPU | 5 种 | 需额外配置 | 仅有一个模型 |
|
||||
| 自己开发 | 🤔 | 自定义 | 自定义 | 自定义 | 根据[文档](./docs/engine-manual/zh.md)使用 Python 自己开发 |
|
||||
|
||||
@@ -8,5 +8,9 @@
|
||||
<true/>
|
||||
<key>com.apple.security.cs.allow-dyld-environment-variables</key>
|
||||
<true/>
|
||||
<key>com.apple.security.cs.disable-library-validation</key>
|
||||
<true/>
|
||||
<key>com.apple.security.device.audio-input</key>
|
||||
<true/>
|
||||
</dict>
|
||||
</plist>
|
||||
</plist>
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from .gummy import GummyRecognizer
|
||||
from .vosk import VoskRecognizer
|
||||
from .sosv import SosvRecognizer
|
||||
from .sosv import SosvRecognizer
|
||||
from .glm import GlmRecognizer
|
||||
|
||||
163
engine/audio2text/glm.py
Normal file
163
engine/audio2text/glm.py
Normal file
@@ -0,0 +1,163 @@
|
||||
import threading
|
||||
import io
|
||||
import wave
|
||||
import struct
|
||||
import math
|
||||
import audioop
|
||||
import requests
|
||||
from datetime import datetime
|
||||
|
||||
from utils import shared_data
|
||||
from utils import stdout_cmd, stdout_obj, google_translate, ollama_translate
|
||||
|
||||
class GlmRecognizer:
|
||||
"""
|
||||
使用 GLM-ASR 引擎处理音频数据,并在标准输出中输出 Auto Caption 软件可读取的 JSON 字符串数据
|
||||
|
||||
初始化参数:
|
||||
url: GLM-ASR API URL
|
||||
model: GLM-ASR 模型名称
|
||||
api_key: GLM-ASR API Key
|
||||
source: 源语言
|
||||
target: 目标语言
|
||||
trans_model: 翻译模型名称
|
||||
ollama_name: Ollama 模型名称
|
||||
"""
|
||||
def __init__(self, url: str, model: str, api_key: str, source: str, target: str | None, trans_model: str, ollama_name: str, ollama_url: str = '', ollama_api_key: str = ''):
|
||||
self.url = url
|
||||
self.model = model
|
||||
self.api_key = api_key
|
||||
self.source = source
|
||||
self.target = target
|
||||
if trans_model == 'google':
|
||||
self.trans_func = google_translate
|
||||
else:
|
||||
self.trans_func = ollama_translate
|
||||
self.ollama_name = ollama_name
|
||||
self.ollama_url = ollama_url
|
||||
self.ollama_api_key = ollama_api_key
|
||||
|
||||
self.audio_buffer = []
|
||||
self.is_speech = False
|
||||
self.silence_frames = 0
|
||||
self.speech_start_time = None
|
||||
self.time_str = ''
|
||||
self.cur_id = 0
|
||||
|
||||
# VAD settings (假设 16k 16bit, chunk size 1024 or similar)
|
||||
# 16bit = 2 bytes per sample.
|
||||
# RMS threshold needs tuning. 500 is a conservative guess for silence.
|
||||
self.threshold = 500
|
||||
self.silence_limit = 15 # frames (approx 0.5-1s depending on chunk size)
|
||||
self.min_speech_frames = 10 # frames
|
||||
|
||||
def start(self):
|
||||
"""启动 GLM 引擎"""
|
||||
stdout_cmd('info', 'GLM-ASR recognizer started.')
|
||||
|
||||
def stop(self):
|
||||
"""停止 GLM 引擎"""
|
||||
stdout_cmd('info', 'GLM-ASR recognizer stopped.')
|
||||
|
||||
def process_audio(self, chunk):
|
||||
# chunk is bytes (int16)
|
||||
rms = audioop.rms(chunk, 2)
|
||||
|
||||
if rms > self.threshold:
|
||||
if not self.is_speech:
|
||||
self.is_speech = True
|
||||
self.time_str = datetime.now().strftime('%H:%M:%S.%f')[:-3]
|
||||
self.audio_buffer = []
|
||||
self.audio_buffer.append(chunk)
|
||||
self.silence_frames = 0
|
||||
else:
|
||||
if self.is_speech:
|
||||
self.audio_buffer.append(chunk)
|
||||
self.silence_frames += 1
|
||||
if self.silence_frames > self.silence_limit:
|
||||
# Speech ended
|
||||
if len(self.audio_buffer) > self.min_speech_frames:
|
||||
self.recognize(self.audio_buffer, self.time_str)
|
||||
self.is_speech = False
|
||||
self.audio_buffer = []
|
||||
self.silence_frames = 0
|
||||
|
||||
def recognize(self, audio_frames, time_s):
|
||||
audio_bytes = b''.join(audio_frames)
|
||||
|
||||
wav_io = io.BytesIO()
|
||||
with wave.open(wav_io, 'wb') as wav_file:
|
||||
wav_file.setnchannels(1)
|
||||
wav_file.setsampwidth(2)
|
||||
wav_file.setframerate(16000)
|
||||
wav_file.writeframes(audio_bytes)
|
||||
wav_io.seek(0)
|
||||
|
||||
threading.Thread(
|
||||
target=self._do_request,
|
||||
args=(wav_io.read(), time_s, self.cur_id)
|
||||
).start()
|
||||
self.cur_id += 1
|
||||
|
||||
def _do_request(self, audio_content, time_s, index):
|
||||
try:
|
||||
files = {
|
||||
'file': ('audio.wav', audio_content, 'audio/wav')
|
||||
}
|
||||
data = {
|
||||
'model': self.model,
|
||||
'stream': 'false'
|
||||
}
|
||||
headers = {
|
||||
'Authorization': f'Bearer {self.api_key}'
|
||||
}
|
||||
|
||||
response = requests.post(self.url, headers=headers, data=data, files=files, timeout=15)
|
||||
|
||||
if response.status_code == 200:
|
||||
res_json = response.json()
|
||||
text = res_json.get('text', '')
|
||||
if text:
|
||||
self.output_caption(text, time_s, index)
|
||||
else:
|
||||
try:
|
||||
err_msg = response.json()
|
||||
stdout_cmd('error', f"GLM API Error: {err_msg}")
|
||||
except:
|
||||
stdout_cmd('error', f"GLM API Error: {response.text}")
|
||||
|
||||
except Exception as e:
|
||||
stdout_cmd('error', f"GLM Request Failed: {str(e)}")
|
||||
|
||||
def output_caption(self, text, time_s, index):
|
||||
caption = {
|
||||
'command': 'caption',
|
||||
'index': index,
|
||||
'time_s': time_s,
|
||||
'time_t': datetime.now().strftime('%H:%M:%S.%f')[:-3],
|
||||
'text': text,
|
||||
'translation': ''
|
||||
}
|
||||
|
||||
if self.target:
|
||||
if self.trans_func == ollama_translate:
|
||||
th = threading.Thread(
|
||||
target=self.trans_func,
|
||||
args=(self.ollama_name, self.target, caption['text'], time_s, self.ollama_url, self.ollama_api_key),
|
||||
daemon=True
|
||||
)
|
||||
else:
|
||||
th = threading.Thread(
|
||||
target=self.trans_func,
|
||||
args=(self.ollama_name, self.target, caption['text'], time_s),
|
||||
daemon=True
|
||||
)
|
||||
th.start()
|
||||
|
||||
stdout_obj(caption)
|
||||
|
||||
def translate(self):
|
||||
global shared_data
|
||||
while shared_data.status == 'running':
|
||||
chunk = shared_data.chunk_queue.get()
|
||||
self.process_audio(chunk)
|
||||
@@ -29,7 +29,7 @@ class SosvRecognizer:
|
||||
trans_model: 翻译模型名称
|
||||
ollama_name: Ollama 模型名称
|
||||
"""
|
||||
def __init__(self, model_path: str, source: str, target: str | None, trans_model: str, ollama_name: str):
|
||||
def __init__(self, model_path: str, source: str, target: str | None, trans_model: str, ollama_name: str, ollama_url: str = '', ollama_api_key: str = ''):
|
||||
if model_path.startswith('"'):
|
||||
model_path = model_path[1:]
|
||||
if model_path.endswith('"'):
|
||||
@@ -45,6 +45,8 @@ class SosvRecognizer:
|
||||
else:
|
||||
self.trans_func = ollama_translate
|
||||
self.ollama_name = ollama_name
|
||||
self.ollama_url = ollama_url
|
||||
self.ollama_api_key = ollama_api_key
|
||||
self.time_str = ''
|
||||
self.cur_id = 0
|
||||
self.prev_content = ''
|
||||
@@ -152,7 +154,7 @@ class SosvRecognizer:
|
||||
if self.target:
|
||||
th = threading.Thread(
|
||||
target=self.trans_func,
|
||||
args=(self.ollama_name, self.target, caption['text'], self.time_str),
|
||||
args=(self.ollama_name, self.target, caption['text'], self.time_str, self.ollama_url, self.ollama_api_key),
|
||||
daemon=True
|
||||
)
|
||||
th.start()
|
||||
|
||||
@@ -18,7 +18,7 @@ class VoskRecognizer:
|
||||
trans_model: 翻译模型名称
|
||||
ollama_name: Ollama 模型名称
|
||||
"""
|
||||
def __init__(self, model_path: str, target: str | None, trans_model: str, ollama_name: str):
|
||||
def __init__(self, model_path: str, target: str | None, trans_model: str, ollama_name: str, ollama_url: str = '', ollama_api_key: str = ''):
|
||||
SetLogLevel(-1)
|
||||
if model_path.startswith('"'):
|
||||
model_path = model_path[1:]
|
||||
@@ -31,6 +31,8 @@ class VoskRecognizer:
|
||||
else:
|
||||
self.trans_func = ollama_translate
|
||||
self.ollama_name = ollama_name
|
||||
self.ollama_url = ollama_url
|
||||
self.ollama_api_key = ollama_api_key
|
||||
self.time_str = ''
|
||||
self.cur_id = 0
|
||||
self.prev_content = ''
|
||||
@@ -66,7 +68,7 @@ class VoskRecognizer:
|
||||
if self.target:
|
||||
th = threading.Thread(
|
||||
target=self.trans_func,
|
||||
args=(self.ollama_name, self.target, caption['text'], self.time_str),
|
||||
args=(self.ollama_name, self.target, caption['text'], self.time_str, self.ollama_url, self.ollama_api_key),
|
||||
daemon=True
|
||||
)
|
||||
th.start()
|
||||
|
||||
109
engine/main.py
109
engine/main.py
@@ -8,6 +8,7 @@ from utils import merge_chunk_channels, resample_chunk_mono
|
||||
from audio2text import GummyRecognizer
|
||||
from audio2text import VoskRecognizer
|
||||
from audio2text import SosvRecognizer
|
||||
from audio2text import GlmRecognizer
|
||||
from sysaudio import AudioStream
|
||||
|
||||
|
||||
@@ -74,7 +75,7 @@ def main_gummy(s: str, t: str, a: int, c: int, k: str, r: bool, rp: str):
|
||||
engine.stop()
|
||||
|
||||
|
||||
def main_vosk(a: int, c: int, vosk: str, t: str, tm: str, omn: str, r: bool, rp: str):
|
||||
def main_vosk(a: int, c: int, vosk: str, t: str, tm: str, omn: str, ourl: str, okey: str, r: bool, rp: str):
|
||||
"""
|
||||
Parameters:
|
||||
a: Audio source: 0 for output, 1 for input
|
||||
@@ -83,14 +84,16 @@ def main_vosk(a: int, c: int, vosk: str, t: str, tm: str, omn: str, r: bool, rp:
|
||||
t: Target language
|
||||
tm: Translation model type, ollama or google
|
||||
omn: Ollama model name
|
||||
ourl: Ollama Base URL
|
||||
okey: Ollama API Key
|
||||
r: Whether to record the audio
|
||||
rp: Path to save the recorded audio
|
||||
"""
|
||||
stream = AudioStream(a, c)
|
||||
if t == 'none':
|
||||
engine = VoskRecognizer(vosk, None, tm, omn)
|
||||
engine = VoskRecognizer(vosk, None, tm, omn, ourl, okey)
|
||||
else:
|
||||
engine = VoskRecognizer(vosk, t, tm, omn)
|
||||
engine = VoskRecognizer(vosk, t, tm, omn, ourl, okey)
|
||||
|
||||
engine.start()
|
||||
stream_thread = threading.Thread(
|
||||
@@ -106,7 +109,7 @@ def main_vosk(a: int, c: int, vosk: str, t: str, tm: str, omn: str, r: bool, rp:
|
||||
engine.stop()
|
||||
|
||||
|
||||
def main_sosv(a: int, c: int, sosv: str, s: str, t: str, tm: str, omn: str, r: bool, rp: str):
|
||||
def main_sosv(a: int, c: int, sosv: str, s: str, t: str, tm: str, omn: str, ourl: str, okey: str, r: bool, rp: str):
|
||||
"""
|
||||
Parameters:
|
||||
a: Audio source: 0 for output, 1 for input
|
||||
@@ -116,14 +119,16 @@ def main_sosv(a: int, c: int, sosv: str, s: str, t: str, tm: str, omn: str, r: b
|
||||
t: Target language
|
||||
tm: Translation model type, ollama or google
|
||||
omn: Ollama model name
|
||||
ourl: Ollama API URL
|
||||
okey: Ollama API Key
|
||||
r: Whether to record the audio
|
||||
rp: Path to save the recorded audio
|
||||
"""
|
||||
stream = AudioStream(a, c)
|
||||
if t == 'none':
|
||||
engine = SosvRecognizer(sosv, s, None, tm, omn)
|
||||
engine = SosvRecognizer(sosv, s, None, tm, omn, ourl, okey)
|
||||
else:
|
||||
engine = SosvRecognizer(sosv, s, t, tm, omn)
|
||||
engine = SosvRecognizer(sosv, s, t, tm, omn, ourl, okey)
|
||||
|
||||
engine.start()
|
||||
stream_thread = threading.Thread(
|
||||
@@ -139,16 +144,54 @@ def main_sosv(a: int, c: int, sosv: str, s: str, t: str, tm: str, omn: str, r: b
|
||||
engine.stop()
|
||||
|
||||
|
||||
def main_glm(a: int, c: int, url: str, model: str, key: str, s: str, t: str, tm: str, omn: str, ourl: str, okey: str, r: bool, rp: str):
|
||||
"""
|
||||
Parameters:
|
||||
a: Audio source
|
||||
c: Chunk rate
|
||||
url: GLM API URL
|
||||
model: GLM Model Name
|
||||
key: GLM API Key
|
||||
s: Source language
|
||||
t: Target language
|
||||
tm: Translation model
|
||||
omn: Ollama model name
|
||||
ourl: Ollama API URL
|
||||
okey: Ollama API Key
|
||||
r: Record
|
||||
rp: Record path
|
||||
"""
|
||||
stream = AudioStream(a, c)
|
||||
if t == 'none':
|
||||
engine = GlmRecognizer(url, model, key, s, None, tm, omn, ourl, okey)
|
||||
else:
|
||||
engine = GlmRecognizer(url, model, key, s, t, tm, omn, ourl, okey)
|
||||
|
||||
engine.start()
|
||||
stream_thread = threading.Thread(
|
||||
target=audio_recording,
|
||||
args=(stream, True, r, rp),
|
||||
daemon=True
|
||||
)
|
||||
stream_thread.start()
|
||||
try:
|
||||
engine.translate()
|
||||
except KeyboardInterrupt:
|
||||
stdout("Keyboard interrupt detected. Exiting...")
|
||||
engine.stop()
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description='Convert system audio stream to text')
|
||||
# all
|
||||
parser.add_argument('-e', '--caption_engine', default='gummy', help='Caption engine: gummy or vosk or sosv')
|
||||
parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output, 1 for input')
|
||||
parser.add_argument('-c', '--chunk_rate', default=10, help='Number of audio stream chunks collected per second')
|
||||
parser.add_argument('-p', '--port', default=0, help='The port to run the server on, 0 for no server')
|
||||
parser.add_argument('-d', '--display_caption', default=0, help='Display caption on terminal, 0 for no display, 1 for display')
|
||||
parser.add_argument('-a', '--audio_type', type=int, default=0, help='Audio stream source: 0 for output, 1 for input')
|
||||
parser.add_argument('-c', '--chunk_rate', type=int, default=10, help='Number of audio stream chunks collected per second')
|
||||
parser.add_argument('-p', '--port', type=int, default=0, help='The port to run the server on, 0 for no server')
|
||||
parser.add_argument('-d', '--display_caption', type=int, default=0, help='Display caption on terminal, 0 for no display, 1 for display')
|
||||
parser.add_argument('-t', '--target_language', default='none', help='Target language code, "none" for no translation')
|
||||
parser.add_argument('-r', '--record', default=0, help='Whether to record the audio, 0 for no recording, 1 for recording')
|
||||
parser.add_argument('-r', '--record', type=int, default=0, help='Whether to record the audio, 0 for no recording, 1 for recording')
|
||||
parser.add_argument('-rp', '--record_path', default='', help='Path to save the recorded audio')
|
||||
# gummy and sosv
|
||||
parser.add_argument('-s', '--source_language', default='auto', help='Source language code')
|
||||
@@ -157,20 +200,24 @@ if __name__ == "__main__":
|
||||
# vosk and sosv
|
||||
parser.add_argument('-tm', '--translation_model', default='ollama', help='Model for translation: ollama or google')
|
||||
parser.add_argument('-omn', '--ollama_name', default='', help='Ollama model name for translation')
|
||||
parser.add_argument('-ourl', '--ollama_url', default='', help='Ollama API URL')
|
||||
parser.add_argument('-okey', '--ollama_api_key', default='', help='Ollama API Key')
|
||||
# vosk only
|
||||
parser.add_argument('-vosk', '--vosk_model', default='', help='The path to the vosk model.')
|
||||
# sosv only
|
||||
parser.add_argument('-sosv', '--sosv_model', default=None, help='The SenseVoice model path')
|
||||
# glm only
|
||||
parser.add_argument('-gurl', '--glm_url', default='https://open.bigmodel.cn/api/paas/v4/audio/transcriptions', help='GLM API URL')
|
||||
parser.add_argument('-gmodel', '--glm_model', default='glm-asr-2512', help='GLM Model Name')
|
||||
parser.add_argument('-gkey', '--glm_api_key', default='', help='GLM API Key')
|
||||
|
||||
args = parser.parse_args()
|
||||
if int(args.port) == 0:
|
||||
shared_data.status = "running"
|
||||
else:
|
||||
start_server(int(args.port))
|
||||
|
||||
if int(args.display_caption) != 0:
|
||||
|
||||
if args.port != 0:
|
||||
threading.Thread(target=start_server, args=(args.port,), daemon=True).start()
|
||||
|
||||
if args.display_caption == '1':
|
||||
change_caption_display(True)
|
||||
print("Caption will be displayed on terminal")
|
||||
|
||||
if args.caption_engine == 'gummy':
|
||||
main_gummy(
|
||||
@@ -179,7 +226,7 @@ if __name__ == "__main__":
|
||||
int(args.audio_type),
|
||||
int(args.chunk_rate),
|
||||
args.api_key,
|
||||
True if int(args.record) == 1 else False,
|
||||
bool(int(args.record)),
|
||||
args.record_path
|
||||
)
|
||||
elif args.caption_engine == 'vosk':
|
||||
@@ -190,7 +237,9 @@ if __name__ == "__main__":
|
||||
args.target_language,
|
||||
args.translation_model,
|
||||
args.ollama_name,
|
||||
True if int(args.record) == 1 else False,
|
||||
args.ollama_url,
|
||||
args.ollama_api_key,
|
||||
bool(int(args.record)),
|
||||
args.record_path
|
||||
)
|
||||
elif args.caption_engine == 'sosv':
|
||||
@@ -202,7 +251,25 @@ if __name__ == "__main__":
|
||||
args.target_language,
|
||||
args.translation_model,
|
||||
args.ollama_name,
|
||||
True if int(args.record) == 1 else False,
|
||||
args.ollama_url,
|
||||
args.ollama_api_key,
|
||||
bool(int(args.record)),
|
||||
args.record_path
|
||||
)
|
||||
elif args.caption_engine == 'glm':
|
||||
main_glm(
|
||||
int(args.audio_type),
|
||||
int(args.chunk_rate),
|
||||
args.glm_url,
|
||||
args.glm_model,
|
||||
args.glm_api_key,
|
||||
args.source_language,
|
||||
args.target_language,
|
||||
args.translation_model,
|
||||
args.ollama_name,
|
||||
args.ollama_url,
|
||||
args.ollama_api_key,
|
||||
bool(int(args.record)),
|
||||
args.record_path
|
||||
)
|
||||
else:
|
||||
|
||||
@@ -6,11 +6,17 @@ import sys
|
||||
if sys.platform == 'win32':
|
||||
vosk_path = str(Path('./.venv/Lib/site-packages/vosk').resolve())
|
||||
else:
|
||||
vosk_path = str(Path('./.venv/lib/python3.12/site-packages/vosk').resolve())
|
||||
venv_lib = Path('./.venv/lib')
|
||||
python_dirs = list(venv_lib.glob('python*'))
|
||||
if python_dirs:
|
||||
vosk_path = str((python_dirs[0] / 'site-packages' / 'vosk').resolve())
|
||||
else:
|
||||
vosk_path = str(Path('./.venv/lib/python3.12/site-packages/vosk').resolve())
|
||||
|
||||
a = Analysis(
|
||||
['main.py'],
|
||||
pathex=[],
|
||||
# binaries=[('portaudio/lib/.libs/libportaudio.2.dylib', '.')],
|
||||
binaries=[],
|
||||
datas=[(vosk_path, 'vosk')],
|
||||
hiddenimports=[],
|
||||
@@ -27,21 +33,27 @@ pyz = PYZ(a.pure)
|
||||
exe = EXE(
|
||||
pyz,
|
||||
a.scripts,
|
||||
a.binaries,
|
||||
a.datas,
|
||||
[],
|
||||
exclude_binaries=True,
|
||||
name='main',
|
||||
debug=False,
|
||||
bootloader_ignore_signals=False,
|
||||
strip=False,
|
||||
upx=True,
|
||||
upx_exclude=[],
|
||||
runtime_tmpdir=None,
|
||||
console=True,
|
||||
disable_windowed_traceback=False,
|
||||
argv_emulation=False,
|
||||
target_arch=None,
|
||||
codesign_identity=None,
|
||||
entitlements_file=None,
|
||||
onefile=True,
|
||||
)
|
||||
|
||||
coll = COLLECT(
|
||||
exe,
|
||||
a.binaries,
|
||||
a.datas,
|
||||
strip=False,
|
||||
upx=True,
|
||||
upx_exclude=[],
|
||||
name='main',
|
||||
)
|
||||
|
||||
@@ -7,4 +7,6 @@ pyaudio; sys_platform == 'darwin'
|
||||
pyaudiowpatch; sys_platform == 'win32'
|
||||
googletrans
|
||||
ollama
|
||||
sherpa_onnx
|
||||
sherpa_onnx
|
||||
requests
|
||||
openai
|
||||
|
||||
@@ -47,7 +47,6 @@ def translation_display(obj):
|
||||
|
||||
def stdout_obj(obj):
|
||||
global display_caption
|
||||
print(obj['command'], display_caption)
|
||||
if obj['command'] == 'caption' and display_caption:
|
||||
caption_display(obj)
|
||||
return
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
from ollama import chat
|
||||
from ollama import chat, Client
|
||||
from ollama import ChatResponse
|
||||
try:
|
||||
from openai import OpenAI
|
||||
except ImportError:
|
||||
OpenAI = None
|
||||
import asyncio
|
||||
from googletrans import Translator
|
||||
from .sysout import stdout_cmd, stdout_obj
|
||||
@@ -17,15 +21,43 @@ lang_map = {
|
||||
'zh-cn': 'Chinese'
|
||||
}
|
||||
|
||||
def ollama_translate(model: str, target: str, text: str, time_s: str):
|
||||
response: ChatResponse = chat(
|
||||
model=model,
|
||||
messages=[
|
||||
{"role": "system", "content": f"/no_think Translate the following content into {lang_map[target]}, and do not output any additional information."},
|
||||
{"role": "user", "content": text}
|
||||
]
|
||||
)
|
||||
content = response.message.content or ""
|
||||
def ollama_translate(model: str, target: str, text: str, time_s: str, url: str = '', key: str = ''):
|
||||
content = ""
|
||||
try:
|
||||
if url:
|
||||
if OpenAI:
|
||||
client = OpenAI(base_url=url, api_key=key if key else "ollama")
|
||||
openai_response = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[
|
||||
{"role": "system", "content": f"/no_think Translate the following content into {lang_map[target]}, and do not output any additional information."},
|
||||
{"role": "user", "content": text}
|
||||
]
|
||||
)
|
||||
content = openai_response.choices[0].message.content or ""
|
||||
else:
|
||||
client = Client(host=url)
|
||||
response: ChatResponse = client.chat(
|
||||
model=model,
|
||||
messages=[
|
||||
{"role": "system", "content": f"/no_think Translate the following content into {lang_map[target]}, and do not output any additional information."},
|
||||
{"role": "user", "content": text}
|
||||
]
|
||||
)
|
||||
content = response.message.content or ""
|
||||
else:
|
||||
response: ChatResponse = chat(
|
||||
model=model,
|
||||
messages=[
|
||||
{"role": "system", "content": f"/no_think Translate the following content into {lang_map[target]}, and do not output any additional information."},
|
||||
{"role": "user", "content": text}
|
||||
]
|
||||
)
|
||||
content = response.message.content or ""
|
||||
except Exception as e:
|
||||
stdout_cmd("warn", f"Translation failed: {str(e)}")
|
||||
return
|
||||
|
||||
if content.startswith('<think>'):
|
||||
index = content.find('</think>')
|
||||
if index != -1:
|
||||
|
||||
67
package-lock.json
generated
67
package-lock.json
generated
@@ -110,6 +110,7 @@
|
||||
"integrity": "sha512-IaaGWsQqfsQWVLqMn9OB92MNN7zukfVA4s7KKAI0KfrrDsZ0yhi5uV4baBuLuN7n3vsZpwP8asPPcVwApxvjBQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@ampproject/remapping": "^2.2.0",
|
||||
"@babel/code-frame": "^7.27.1",
|
||||
@@ -2274,6 +2275,7 @@
|
||||
"resolved": "https://registry.npmmirror.com/@types/node/-/node-22.15.17.tgz",
|
||||
"integrity": "sha512-wIX2aSZL5FE+MR0JlvF87BNVrtFWf6AE6rxSE9X7OwnVvoyCQjpzSRJ+M87se/4QCkCiebQAqrJ0y6fwIyi7nw==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"undici-types": "~6.21.0"
|
||||
}
|
||||
@@ -2360,6 +2362,7 @@
|
||||
"integrity": "sha512-B2MdzyWxCE2+SqiZHAjPphft+/2x2FlO9YBx7eKE1BCb+rqBlQdhtAEhzIEdozHd55DXPmxBdpMygFJjfjjA9A==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@typescript-eslint/scope-manager": "8.32.0",
|
||||
"@typescript-eslint/types": "8.32.0",
|
||||
@@ -2791,6 +2794,7 @@
|
||||
"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"bin": {
|
||||
"acorn": "bin/acorn"
|
||||
},
|
||||
@@ -2851,6 +2855,7 @@
|
||||
"integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"fast-deep-equal": "^3.1.1",
|
||||
"fast-json-stable-stringify": "^2.0.0",
|
||||
@@ -3064,7 +3069,6 @@
|
||||
"integrity": "sha512-+25nxyyznAXF7Nef3y0EbBeqmGZgeN/BxHX29Rs39djAfaFalmQ89SE6CWyDCHzGL0yt/ycBtNOmGTW0FyGWNw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"archiver-utils": "^2.1.0",
|
||||
"async": "^3.2.4",
|
||||
@@ -3084,7 +3088,6 @@
|
||||
"integrity": "sha512-bEL/yUb/fNNiNTuUz979Z0Yg5L+LzLxGJz8x79lYmR54fmTIb6ob/hNQgkQnIUDWIFjZVQwl9Xs356I6BAMHfw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"glob": "^7.1.4",
|
||||
"graceful-fs": "^4.2.0",
|
||||
@@ -3107,7 +3110,6 @@
|
||||
"integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"core-util-is": "~1.0.0",
|
||||
"inherits": "~2.0.3",
|
||||
@@ -3123,8 +3125,7 @@
|
||||
"resolved": "https://registry.npmmirror.com/safe-buffer/-/safe-buffer-5.1.2.tgz",
|
||||
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/archiver-utils/node_modules/string_decoder": {
|
||||
"version": "1.1.1",
|
||||
@@ -3132,7 +3133,6 @@
|
||||
"integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"safe-buffer": "~5.1.0"
|
||||
}
|
||||
@@ -3351,6 +3351,7 @@
|
||||
}
|
||||
],
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"caniuse-lite": "^1.0.30001716",
|
||||
"electron-to-chromium": "^1.5.149",
|
||||
@@ -3848,7 +3849,6 @@
|
||||
"integrity": "sha512-D3uMHtGc/fcO1Gt1/L7i1e33VOvD4A9hfQLP+6ewd+BvG/gQ84Yh4oftEhAdjSMgBgwGL+jsppT7JYNpo6MHHg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"buffer-crc32": "^0.2.13",
|
||||
"crc32-stream": "^4.0.2",
|
||||
@@ -3994,7 +3994,6 @@
|
||||
"integrity": "sha512-ROmzCKrTnOwybPcJApAA6WBWij23HVfGVNKqqrZpuyZOHqK2CwHSvpGuyt/UNNvaIjEd8X5IFGp4Mh+Ie1IHJQ==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"peer": true,
|
||||
"bin": {
|
||||
"crc32": "bin/crc32.njs"
|
||||
},
|
||||
@@ -4008,7 +4007,6 @@
|
||||
"integrity": "sha512-NT7w2JVU7DFroFdYkeq8cywxrgjPHWkdX1wjpRQXPX5Asews3tA+Ght6lddQO5Mkumffp3X7GEqku3epj2toIw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"crc-32": "^1.2.0",
|
||||
"readable-stream": "^3.4.0"
|
||||
@@ -4248,6 +4246,7 @@
|
||||
"integrity": "sha512-NoXo6Liy2heSklTI5OIZbCgXC1RzrDQsZkeEwXhdOro3FT1VBOvbubvscdPnjVuQ4AMwwv61oaH96AbiYg9EnQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"app-builder-lib": "25.1.8",
|
||||
"builder-util": "25.1.7",
|
||||
@@ -4410,6 +4409,7 @@
|
||||
"integrity": "sha512-6dLslJrQYB1qvqVPYRv1PhAA/uytC66nUeiTcq2JXiBzrmTWCHppqtGUjZhvnSRVatBCT5/SFdizdzcBiEiYUg==",
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@electron/get": "^2.0.0",
|
||||
"@types/node": "^22.7.7",
|
||||
@@ -4454,7 +4454,6 @@
|
||||
"integrity": "sha512-2ntkJ+9+0GFP6nAISiMabKt6eqBB0kX1QqHNWFWAXgi0VULKGisM46luRFpIBiU3u/TDmhZMM8tzvo2Abn3ayg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"app-builder-lib": "25.1.8",
|
||||
"archiver": "^5.3.1",
|
||||
@@ -4468,7 +4467,6 @@
|
||||
"integrity": "sha512-oRXApq54ETRj4eMiFzGnHWGy+zo5raudjuxN0b8H7s/RU2oW0Wvsx9O0ACRN/kRq9E8Vu/ReskGB5o3ji+FzHQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"graceful-fs": "^4.2.0",
|
||||
"jsonfile": "^6.0.1",
|
||||
@@ -4484,7 +4482,6 @@
|
||||
"integrity": "sha512-5dgndWOriYSm5cnYaJNhalLNDKOqFwyDB/rr1E9ZsGciGvKPs8R2xYGCacuf3z6K1YKDz182fd+fY3cn3pMqXQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"universalify": "^2.0.0"
|
||||
},
|
||||
@@ -4498,7 +4495,6 @@
|
||||
"integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">= 10.0.0"
|
||||
}
|
||||
@@ -4813,6 +4809,7 @@
|
||||
"integrity": "sha512-LSehfdpgMeWcTZkWZVIJl+tkZ2nuSkyyB9C27MZqFWXuph7DvaowgcTvKqxvpLW1JZIk8PN7hFY3Rj9LQ7m7lg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@eslint-community/eslint-utils": "^4.2.0",
|
||||
"@eslint-community/regexpp": "^4.12.1",
|
||||
@@ -4874,6 +4871,7 @@
|
||||
"integrity": "sha512-zc1UmCpNltmVY34vuLRV61r1K27sWuX39E+uyUnY8xS2Bex88VV9cugG+UZbRSRGtGyFboj+D8JODyme1plMpw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"bin": {
|
||||
"eslint-config-prettier": "bin/cli.js"
|
||||
},
|
||||
@@ -5351,8 +5349,7 @@
|
||||
"resolved": "https://registry.npmmirror.com/fs-constants/-/fs-constants-1.0.0.tgz",
|
||||
"integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/fs-extra": {
|
||||
"version": "8.1.0",
|
||||
@@ -6108,8 +6105,7 @@
|
||||
"resolved": "https://registry.npmmirror.com/isarray/-/isarray-1.0.0.tgz",
|
||||
"integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/isbinaryfile": {
|
||||
"version": "5.0.4",
|
||||
@@ -6300,7 +6296,6 @@
|
||||
"integrity": "sha512-b94GiNHQNy6JNTrt5w6zNyffMrNkXZb3KTkCZJb2V1xaEGCk093vkZ2jk3tpaeP33/OiXC+WvK9AxUebnf5nbw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"readable-stream": "^2.0.5"
|
||||
},
|
||||
@@ -6314,7 +6309,6 @@
|
||||
"integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"core-util-is": "~1.0.0",
|
||||
"inherits": "~2.0.3",
|
||||
@@ -6330,8 +6324,7 @@
|
||||
"resolved": "https://registry.npmmirror.com/safe-buffer/-/safe-buffer-5.1.2.tgz",
|
||||
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/lazystream/node_modules/string_decoder": {
|
||||
"version": "1.1.1",
|
||||
@@ -6339,7 +6332,6 @@
|
||||
"integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"safe-buffer": "~5.1.0"
|
||||
}
|
||||
@@ -6391,32 +6383,28 @@
|
||||
"resolved": "https://registry.npmmirror.com/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
|
||||
"integrity": "sha512-qjxPLHd3r5DnsdGacqOMU6pb/avJzdh9tFX2ymgoZE27BmjXrNy/y4LoaiTeAb+O3gL8AfpJGtqfX/ae2leYYQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/lodash.difference": {
|
||||
"version": "4.5.0",
|
||||
"resolved": "https://registry.npmmirror.com/lodash.difference/-/lodash.difference-4.5.0.tgz",
|
||||
"integrity": "sha512-dS2j+W26TQ7taQBGN8Lbbq04ssV3emRw4NY58WErlTO29pIqS0HmoT5aJ9+TUQ1N3G+JOZSji4eugsWwGp9yPA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/lodash.flatten": {
|
||||
"version": "4.4.0",
|
||||
"resolved": "https://registry.npmmirror.com/lodash.flatten/-/lodash.flatten-4.4.0.tgz",
|
||||
"integrity": "sha512-C5N2Z3DgnnKr0LOpv/hKCgKdb7ZZwafIrsesve6lmzvZIRZRGaZ/l6Q8+2W7NaT+ZwO3fFlSCzCzrDCFdJfZ4g==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/lodash.isplainobject": {
|
||||
"version": "4.0.6",
|
||||
"resolved": "https://registry.npmmirror.com/lodash.isplainobject/-/lodash.isplainobject-4.0.6.tgz",
|
||||
"integrity": "sha512-oSXzaWypCMHkPC3NvBEaPHf0KsA5mvPrOPgQWDsbg8n7orZ290M0BmC/jgRZ4vcJ6DTAhjrsSYgdsW/F+MFOBA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/lodash.merge": {
|
||||
"version": "4.6.2",
|
||||
@@ -6430,8 +6418,7 @@
|
||||
"resolved": "https://registry.npmmirror.com/lodash.union/-/lodash.union-4.6.0.tgz",
|
||||
"integrity": "sha512-c4pB2CdGrGdjMKYLA+XiRDO7Y0PRQbm/Gzg8qMj+QH+pFVAoTp5sBpO0odL3FjoPCGjK96p6qsP+yQoiLoOBcw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/log-symbols": {
|
||||
"version": "4.1.0",
|
||||
@@ -6984,7 +6971,6 @@
|
||||
"integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
@@ -7408,6 +7394,7 @@
|
||||
"integrity": "sha512-QQtaxnoDJeAkDvDKWCLiwIXkTgRhwYDEQCghU9Z6q03iyek/rxRh/2lC3HB7P8sWT2xC/y5JDctPLBIGzHKbhw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"bin": {
|
||||
"prettier": "bin/prettier.cjs"
|
||||
},
|
||||
@@ -7436,8 +7423,7 @@
|
||||
"resolved": "https://registry.npmmirror.com/process-nextick-args/-/process-nextick-args-2.0.1.tgz",
|
||||
"integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/progress": {
|
||||
"version": "2.0.3",
|
||||
@@ -7556,7 +7542,6 @@
|
||||
"integrity": "sha512-v05I2k7xN8zXvPD9N+z/uhXPaj0sUFCe2rcWZIpBsqxfP7xXFQ0tipAd/wjj1YxWyWtUS5IDJpOG82JKt2EAVA==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"minimatch": "^5.1.0"
|
||||
}
|
||||
@@ -7567,7 +7552,6 @@
|
||||
"integrity": "sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==",
|
||||
"dev": true,
|
||||
"license": "ISC",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"brace-expansion": "^2.0.1"
|
||||
},
|
||||
@@ -8235,7 +8219,6 @@
|
||||
"integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"bl": "^4.0.3",
|
||||
"end-of-stream": "^1.4.1",
|
||||
@@ -8360,6 +8343,7 @@
|
||||
"integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
@@ -8462,6 +8446,7 @@
|
||||
"integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==",
|
||||
"devOptional": true,
|
||||
"license": "Apache-2.0",
|
||||
"peer": true,
|
||||
"bin": {
|
||||
"tsc": "bin/tsc",
|
||||
"tsserver": "bin/tsserver"
|
||||
@@ -8611,6 +8596,7 @@
|
||||
"integrity": "sha512-cZn6NDFE7wdTpINgs++ZJ4N49W2vRp8LCKrn3Ob1kYNtOo21vfDoaV5GzBfLU4MovSAB8uNRm4jgzVQZ+mBzPQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"esbuild": "^0.25.0",
|
||||
"fdir": "^6.4.4",
|
||||
@@ -8701,6 +8687,7 @@
|
||||
"integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
@@ -8720,6 +8707,7 @@
|
||||
"resolved": "https://registry.npmmirror.com/vue/-/vue-3.5.13.tgz",
|
||||
"integrity": "sha512-wmeiSMxkZCSc+PM2w2VRsOYAZC8GdipNFRTsLSfodVqI9mbejKeXEGr8SckuLnrQPGe3oJN5c3K0vpoU9q/wCQ==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@vue/compiler-dom": "3.5.13",
|
||||
"@vue/compiler-sfc": "3.5.13",
|
||||
@@ -8742,6 +8730,7 @@
|
||||
"integrity": "sha512-dbCBnd2e02dYWsXoqX5yKUZlOt+ExIpq7hmHKPb5ZqKcjf++Eo0hMseFTZMLKThrUk61m+Uv6A2YSBve6ZvuDQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"debug": "^4.4.0",
|
||||
"eslint-scope": "^8.2.0",
|
||||
@@ -9046,7 +9035,6 @@
|
||||
"integrity": "sha512-9qv4rlDiopXg4E69k+vMHjNN63YFMe9sZMrdlvKnCjlCRWeCBswPPMPUfx+ipsAWq1LXHe70RcbaHdJJpS6hyQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"archiver-utils": "^3.0.4",
|
||||
"compress-commons": "^4.1.2",
|
||||
@@ -9062,7 +9050,6 @@
|
||||
"integrity": "sha512-KVgf4XQVrTjhyWmx6cte4RxonPLR9onExufI1jhvw/MQ4BB6IsZD5gT8Lq+u/+pRkWna/6JoHpiQioaqFP5Rzw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"glob": "^7.2.3",
|
||||
"graceful-fs": "^4.2.0",
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "auto-caption",
|
||||
"productName": "Auto Caption",
|
||||
"version": "1.0.0",
|
||||
"version": "1.1.0",
|
||||
"description": "A cross-platform subtitle display software.",
|
||||
"main": "./out/main/index.js",
|
||||
"author": "himeditator",
|
||||
|
||||
@@ -8,6 +8,8 @@ export interface Controls {
|
||||
targetLang: string,
|
||||
transModel: string,
|
||||
ollamaName: string,
|
||||
ollamaUrl: string,
|
||||
ollamaApiKey: string,
|
||||
engine: string,
|
||||
audio: 0 | 1,
|
||||
translation: boolean,
|
||||
@@ -15,6 +17,9 @@ export interface Controls {
|
||||
API_KEY: string,
|
||||
voskModelPath: string,
|
||||
sosvModelPath: string,
|
||||
glmUrl: string,
|
||||
glmModel: string,
|
||||
glmApiKey: string,
|
||||
recordingPath: string,
|
||||
customized: boolean,
|
||||
customizedApp: string,
|
||||
|
||||
@@ -4,9 +4,10 @@ import {
|
||||
} from '../types'
|
||||
import { Log } from './Log'
|
||||
import { app, BrowserWindow } from 'electron'
|
||||
import { passwordMaskingForObject } from './UtilsFunc'
|
||||
import * as path from 'path'
|
||||
import * as fs from 'fs'
|
||||
import os from 'os'
|
||||
import * as os from 'os'
|
||||
|
||||
interface CaptionTranslation {
|
||||
time_s: string,
|
||||
@@ -44,13 +45,18 @@ const defaultControls: Controls = {
|
||||
sourceLang: 'en',
|
||||
targetLang: 'zh',
|
||||
transModel: 'ollama',
|
||||
ollamaName: '',
|
||||
ollamaName: 'qwen2.5:0.5b',
|
||||
ollamaUrl: 'http://localhost:11434',
|
||||
ollamaApiKey: '',
|
||||
engine: 'gummy',
|
||||
audio: 0,
|
||||
engineEnabled: false,
|
||||
API_KEY: '',
|
||||
voskModelPath: '',
|
||||
sosvModelPath: '',
|
||||
glmUrl: 'https://open.bigmodel.cn/api/paas/v4/audio/transcriptions',
|
||||
glmModel: 'glm-asr-2512',
|
||||
glmApiKey: '',
|
||||
recordingPath: getDesktopPath(),
|
||||
translation: true,
|
||||
recording: false,
|
||||
@@ -146,9 +152,7 @@ class AllConfig {
|
||||
}
|
||||
}
|
||||
this.controls.engineEnabled = engineEnabled
|
||||
let _controls = {...this.controls}
|
||||
_controls.API_KEY = _controls.API_KEY.replace(/./g, '*')
|
||||
Log.info('Set Controls:', _controls)
|
||||
Log.info('Set Controls:', passwordMaskingForObject(this.controls))
|
||||
}
|
||||
|
||||
public sendControls(window: BrowserWindow, info = true) {
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
import { exec, spawn } from 'child_process'
|
||||
import { app } from 'electron'
|
||||
import { is } from '@electron-toolkit/utils'
|
||||
import path from 'path'
|
||||
import net from 'net'
|
||||
import * as path from 'path'
|
||||
import * as net from 'net'
|
||||
import { controlWindow } from '../ControlWindow'
|
||||
import { allConfig } from './AllConfig'
|
||||
import { i18n } from '../i18n'
|
||||
import { Log } from './Log'
|
||||
import { passwordMaskingForList } from './UtilsFunc'
|
||||
|
||||
export class CaptionEngine {
|
||||
appPath: string = ''
|
||||
@@ -60,7 +61,7 @@ export class CaptionEngine {
|
||||
this.appPath = path.join(process.resourcesPath, 'engine', 'main.exe')
|
||||
}
|
||||
else {
|
||||
this.appPath = path.join(process.resourcesPath, 'engine', 'main')
|
||||
this.appPath = path.join(process.resourcesPath, 'engine', 'main', 'main')
|
||||
}
|
||||
}
|
||||
this.command.push('-a', allConfig.controls.audio ? '1' : '0')
|
||||
@@ -87,6 +88,8 @@ export class CaptionEngine {
|
||||
this.command.push('-vosk', `"${allConfig.controls.voskModelPath}"`)
|
||||
this.command.push('-tm', allConfig.controls.transModel)
|
||||
this.command.push('-omn', allConfig.controls.ollamaName)
|
||||
if(allConfig.controls.ollamaUrl) this.command.push('-ourl', allConfig.controls.ollamaUrl)
|
||||
if(allConfig.controls.ollamaApiKey) this.command.push('-okey', allConfig.controls.ollamaApiKey)
|
||||
}
|
||||
else if(allConfig.controls.engine === 'sosv'){
|
||||
this.command.push('-e', 'sosv')
|
||||
@@ -94,15 +97,25 @@ export class CaptionEngine {
|
||||
this.command.push('-sosv', `"${allConfig.controls.sosvModelPath}"`)
|
||||
this.command.push('-tm', allConfig.controls.transModel)
|
||||
this.command.push('-omn', allConfig.controls.ollamaName)
|
||||
if(allConfig.controls.ollamaUrl) this.command.push('-ourl', allConfig.controls.ollamaUrl)
|
||||
if(allConfig.controls.ollamaApiKey) this.command.push('-okey', allConfig.controls.ollamaApiKey)
|
||||
}
|
||||
else if(allConfig.controls.engine === 'glm'){
|
||||
this.command.push('-e', 'glm')
|
||||
this.command.push('-s', allConfig.controls.sourceLang)
|
||||
this.command.push('-gurl', allConfig.controls.glmUrl)
|
||||
this.command.push('-gmodel', allConfig.controls.glmModel)
|
||||
if(allConfig.controls.glmApiKey) {
|
||||
this.command.push('-gkey', allConfig.controls.glmApiKey)
|
||||
}
|
||||
this.command.push('-tm', allConfig.controls.transModel)
|
||||
this.command.push('-omn', allConfig.controls.ollamaName)
|
||||
if(allConfig.controls.ollamaUrl) this.command.push('-ourl', allConfig.controls.ollamaUrl)
|
||||
if(allConfig.controls.ollamaApiKey) this.command.push('-okey', allConfig.controls.ollamaApiKey)
|
||||
}
|
||||
}
|
||||
Log.info('Engine Path:', this.appPath)
|
||||
if(this.command.length > 2 && this.command.at(-2) === '-k') {
|
||||
const _command = [...this.command]
|
||||
_command[_command.length -1] = _command[_command.length -1].replace(/./g, '*')
|
||||
Log.info('Engine Command:', _command)
|
||||
}
|
||||
else Log.info('Engine Command:', this.command)
|
||||
Log.info('Engine Command:', passwordMaskingForList(this.command))
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -165,7 +178,7 @@ export class CaptionEngine {
|
||||
const data_obj = JSON.parse(line)
|
||||
handleEngineData(data_obj)
|
||||
} catch (e) {
|
||||
controlWindow.sendErrorMessage(i18n('engine.output.parse.error') + e)
|
||||
// controlWindow.sendErrorMessage(i18n('engine.output.parse.error') + e)
|
||||
Log.error('Error parsing JSON:', e)
|
||||
}
|
||||
}
|
||||
|
||||
24
src/main/utils/UtilsFunc.ts
Normal file
24
src/main/utils/UtilsFunc.ts
Normal file
@@ -0,0 +1,24 @@
|
||||
function passwordMasking(pwd: string) {
|
||||
return pwd.replace(/./g, '*')
|
||||
}
|
||||
|
||||
export function passwordMaskingForList(args: string[]) {
|
||||
const maskedArgs = [...args]
|
||||
for(let i = 1; i < maskedArgs.length; i++) {
|
||||
if(maskedArgs[i-1] === '-k' || maskedArgs[i-1] === '-okey' || maskedArgs[i-1] === '-gkey') {
|
||||
maskedArgs[i] = passwordMasking(maskedArgs[i])
|
||||
}
|
||||
}
|
||||
return maskedArgs
|
||||
}
|
||||
|
||||
export function passwordMaskingForObject(args: Record<string, any>) {
|
||||
const maskedArgs = {...args}
|
||||
for(const key in maskedArgs) {
|
||||
const lKey = key.toLowerCase()
|
||||
if(lKey.includes('api') && lKey.includes('key')) {
|
||||
maskedArgs[key] = passwordMasking(maskedArgs[key])
|
||||
}
|
||||
}
|
||||
return maskedArgs
|
||||
}
|
||||
@@ -2,7 +2,7 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<title>Auto Caption v1.0.0</title>
|
||||
<title>Auto Caption v1.1.0</title>
|
||||
<!-- https://developer.mozilla.org/en-US/docs/Web/HTTP/CSP -->
|
||||
<meta
|
||||
http-equiv="Content-Security-Policy"
|
||||
|
||||
@@ -41,17 +41,63 @@
|
||||
<div class="input-item" v-if="transModel && currentTransModel === 'ollama'">
|
||||
<a-popover placement="right">
|
||||
<template #content>
|
||||
<p class="label-hover-info">{{ $t('engine.ollamaNote') }}</p>
|
||||
<p class="label-hover-info">{{ $t('engine.modelNameNote') }}</p>
|
||||
</template>
|
||||
<span class="input-label info-label"
|
||||
:style="{color: uiColor}"
|
||||
>{{ $t('engine.ollama') }}</span>
|
||||
>{{ $t('engine.modelName') }}</span>
|
||||
</a-popover>
|
||||
<a-input
|
||||
class="input-area"
|
||||
v-model:value="currentOllamaName"
|
||||
></a-input>
|
||||
</div>
|
||||
<div class="input-item" v-if="transModel && currentTransModel === 'ollama'">
|
||||
<a-popover placement="right">
|
||||
<template #content>
|
||||
<p class="label-hover-info">{{ $t('engine.baseURL') }}</p>
|
||||
</template>
|
||||
<span class="input-label info-label"
|
||||
:style="{color: uiColor}"
|
||||
>Base URL</span>
|
||||
</a-popover>
|
||||
<a-input
|
||||
class="input-area"
|
||||
v-model:value="currentOllamaUrl"
|
||||
placeholder="http://localhost:11434"
|
||||
></a-input>
|
||||
</div>
|
||||
<div class="input-item" v-if="transModel && currentTransModel === 'ollama'">
|
||||
<a-popover placement="right">
|
||||
<template #content>
|
||||
<p class="label-hover-info">{{ $t('engine.apiKey') }}</p>
|
||||
</template>
|
||||
<span class="input-label info-label"
|
||||
:style="{color: uiColor}"
|
||||
>API Key</span>
|
||||
</a-popover>
|
||||
<a-input
|
||||
class="input-area"
|
||||
type="password"
|
||||
v-model:value="currentOllamaApiKey"
|
||||
/>
|
||||
</div>
|
||||
<div class="input-item" v-if="currentEngine === 'glm'">
|
||||
<span class="input-label">GLM API URL</span>
|
||||
<a-input
|
||||
class="input-area"
|
||||
v-model:value="currentGlmUrl"
|
||||
placeholder="https://open.bigmodel.cn/api/paas/v4/audio/transcriptions"
|
||||
></a-input>
|
||||
</div>
|
||||
<div class="input-item" v-if="currentEngine === 'glm'">
|
||||
<span class="input-label">GLM Model Name</span>
|
||||
<a-input
|
||||
class="input-area"
|
||||
v-model:value="currentGlmModel"
|
||||
placeholder="glm-asr-2512"
|
||||
></a-input>
|
||||
</div>
|
||||
<div class="input-item">
|
||||
<span class="input-label">{{ $t('engine.audioType') }}</span>
|
||||
<a-select
|
||||
@@ -115,7 +161,7 @@
|
||||
</template>
|
||||
<span class="input-label info-label"
|
||||
:style="{color: uiColor}"
|
||||
>{{ $t('engine.apikey') }}</span>
|
||||
>ALI {{ $t('engine.apikey') }}</span>
|
||||
</a-popover>
|
||||
<a-input
|
||||
class="input-area"
|
||||
@@ -123,6 +169,24 @@
|
||||
v-model:value="currentAPI_KEY"
|
||||
/>
|
||||
</div>
|
||||
<div class="input-item">
|
||||
<a-popover placement="right">
|
||||
<template #content>
|
||||
<p class="label-hover-info">{{ $t('engine.glmApikeyInfo') }}</p>
|
||||
<p><a href="https://open.bigmodel.cn/" target="_blank">
|
||||
https://open.bigmodel.cn
|
||||
</a></p>
|
||||
</template>
|
||||
<span class="input-label info-label"
|
||||
:style="{color: uiColor}"
|
||||
>GLM {{ $t('engine.apikey') }}</span>
|
||||
</a-popover>
|
||||
<a-input
|
||||
class="input-area"
|
||||
type="password"
|
||||
v-model:value="currentGlmApiKey"
|
||||
/>
|
||||
</div>
|
||||
<div class="input-item">
|
||||
<a-popover placement="right">
|
||||
<template #content>
|
||||
@@ -239,9 +303,14 @@ const currentTranslation = ref<boolean>(true)
|
||||
const currentRecording = ref<boolean>(false)
|
||||
const currentTransModel = ref('ollama')
|
||||
const currentOllamaName = ref('')
|
||||
const currentOllamaUrl = ref('')
|
||||
const currentOllamaApiKey = ref('')
|
||||
const currentAPI_KEY = ref<string>('')
|
||||
const currentVoskModelPath = ref<string>('')
|
||||
const currentSosvModelPath = ref<string>('')
|
||||
const currentGlmUrl = ref<string>('')
|
||||
const currentGlmModel = ref<string>('')
|
||||
const currentGlmApiKey = ref<string>('')
|
||||
const currentRecordingPath = ref<string>('')
|
||||
const currentCustomized = ref<boolean>(false)
|
||||
const currentCustomizedApp = ref('')
|
||||
@@ -294,12 +363,17 @@ function applyChange(){
|
||||
engineControl.transModel = currentTransModel.value
|
||||
engineControl.ollamaName = currentOllamaName.value
|
||||
engineControl.engine = currentEngine.value
|
||||
engineControl.ollamaUrl = currentOllamaUrl.value ?? "http://localhost:11434"
|
||||
engineControl.ollamaApiKey = currentOllamaApiKey.value
|
||||
engineControl.audio = currentAudio.value
|
||||
engineControl.translation = currentTranslation.value
|
||||
engineControl.recording = currentRecording.value
|
||||
engineControl.API_KEY = currentAPI_KEY.value
|
||||
engineControl.voskModelPath = currentVoskModelPath.value
|
||||
engineControl.sosvModelPath = currentSosvModelPath.value
|
||||
engineControl.glmUrl = currentGlmUrl.value ?? "https://open.bigmodel.cn/api/paas/v4/audio/transcriptions"
|
||||
engineControl.glmModel = currentGlmModel.value ?? "glm-asr-2512"
|
||||
engineControl.glmApiKey = currentGlmApiKey.value
|
||||
engineControl.recordingPath = currentRecordingPath.value
|
||||
engineControl.customized = currentCustomized.value
|
||||
engineControl.customizedApp = currentCustomizedApp.value
|
||||
@@ -320,6 +394,8 @@ function cancelChange(){
|
||||
currentTargetLang.value = engineControl.targetLang
|
||||
currentTransModel.value = engineControl.transModel
|
||||
currentOllamaName.value = engineControl.ollamaName
|
||||
currentOllamaUrl.value = engineControl.ollamaUrl
|
||||
currentOllamaApiKey.value = engineControl.ollamaApiKey
|
||||
currentEngine.value = engineControl.engine
|
||||
currentAudio.value = engineControl.audio
|
||||
currentTranslation.value = engineControl.translation
|
||||
@@ -327,6 +403,9 @@ function cancelChange(){
|
||||
currentAPI_KEY.value = engineControl.API_KEY
|
||||
currentVoskModelPath.value = engineControl.voskModelPath
|
||||
currentSosvModelPath.value = engineControl.sosvModelPath
|
||||
currentGlmUrl.value = engineControl.glmUrl
|
||||
currentGlmModel.value = engineControl.glmModel
|
||||
currentGlmApiKey.value = engineControl.glmApiKey
|
||||
currentRecordingPath.value = engineControl.recordingPath
|
||||
currentCustomized.value = engineControl.customized
|
||||
currentCustomizedApp.value = engineControl.customizedApp
|
||||
|
||||
@@ -101,7 +101,7 @@
|
||||
<p class="about-desc">{{ $t('status.about.desc') }}</p>
|
||||
<a-divider />
|
||||
<div class="about-info">
|
||||
<p><b>{{ $t('status.about.version') }}</b><a-tag color="green">v1.0.0</a-tag></p>
|
||||
<p><b>{{ $t('status.about.version') }}</b><a-tag color="green">v1.1.0</a-tag></p>
|
||||
<p>
|
||||
<b>{{ $t('status.about.author') }}</b>
|
||||
<a
|
||||
|
||||
@@ -34,7 +34,7 @@ export const engines = {
|
||||
{ value: 'it', type: 1, label: '意大利语' },
|
||||
],
|
||||
transModel: [
|
||||
{ value: 'ollama', label: 'Ollama 本地模型' },
|
||||
{ value: 'ollama', label: 'Ollama 模型或 OpenAI 兼容模型' },
|
||||
{ value: 'google', label: 'Google API 调用' },
|
||||
]
|
||||
},
|
||||
@@ -55,7 +55,22 @@ export const engines = {
|
||||
{ value: 'it', type: 1, label: '意大利语' },
|
||||
],
|
||||
transModel: [
|
||||
{ value: 'ollama', label: 'Ollama 本地模型' },
|
||||
{ value: 'ollama', label: 'Ollama 模型或 OpenAI 兼容模型' },
|
||||
{ value: 'google', label: 'Google API 调用' },
|
||||
]
|
||||
},
|
||||
{
|
||||
value: 'glm',
|
||||
label: '云端 / 智谱AI / GLM-ASR',
|
||||
languages: [
|
||||
{ value: 'auto', type: -1, label: '自动检测' },
|
||||
{ value: 'en', type: 0, label: '英语' },
|
||||
{ value: 'zh', type: 0, label: '中文' },
|
||||
{ value: 'ja', type: 0, label: '日语' },
|
||||
{ value: 'ko', type: 0, label: '韩语' },
|
||||
],
|
||||
transModel: [
|
||||
{ value: 'ollama', label: 'Ollama 模型或 OpenAI 兼容模型' },
|
||||
{ value: 'google', label: 'Google API 调用' },
|
||||
]
|
||||
}
|
||||
@@ -94,7 +109,7 @@ export const engines = {
|
||||
{ value: 'it', type: 1, label: 'Italian' },
|
||||
],
|
||||
transModel: [
|
||||
{ value: 'ollama', label: 'Ollama Local Model' },
|
||||
{ value: 'ollama', label: 'Ollama Model or OpenAI-compatible Model' },
|
||||
{ value: 'google', label: 'Google API Call' },
|
||||
]
|
||||
},
|
||||
@@ -115,7 +130,22 @@ export const engines = {
|
||||
{ value: 'it', type: 1, label: 'Italian' },
|
||||
],
|
||||
transModel: [
|
||||
{ value: 'ollama', label: 'Ollama Local Model' },
|
||||
{ value: 'ollama', label: 'Ollama Model or OpenAI-compatible Model' },
|
||||
{ value: 'google', label: 'Google API Call' },
|
||||
]
|
||||
},
|
||||
{
|
||||
value: 'glm',
|
||||
label: 'Cloud / Zhipu AI / GLM-ASR',
|
||||
languages: [
|
||||
{ value: 'auto', type: -1, label: 'Auto Detect' },
|
||||
{ value: 'en', type: 0, label: 'English' },
|
||||
{ value: 'zh', type: 0, label: 'Chinese' },
|
||||
{ value: 'ja', type: 0, label: 'Japanese' },
|
||||
{ value: 'ko', type: 0, label: 'Korean' },
|
||||
],
|
||||
transModel: [
|
||||
{ value: 'ollama', label: 'Ollama Model or OpenAI-compatible Model' },
|
||||
{ value: 'google', label: 'Google API Call' },
|
||||
]
|
||||
}
|
||||
@@ -154,7 +184,7 @@ export const engines = {
|
||||
{ value: 'it', type: 1, label: 'イタリア語' },
|
||||
],
|
||||
transModel: [
|
||||
{ value: 'ollama', label: 'Ollama ローカルモデル' },
|
||||
{ value: 'ollama', label: 'Ollama モデルまたは OpenAI 互換モデル' },
|
||||
{ value: 'google', label: 'Google API 呼び出し' },
|
||||
]
|
||||
},
|
||||
@@ -175,7 +205,22 @@ export const engines = {
|
||||
{ value: 'it', type: 1, label: 'イタリア語' },
|
||||
],
|
||||
transModel: [
|
||||
{ value: 'ollama', label: 'Ollama ローカルモデル' },
|
||||
{ value: 'ollama', label: 'Ollama モデルまたは OpenAI 互換モデル' },
|
||||
{ value: 'google', label: 'Google API 呼び出し' },
|
||||
]
|
||||
},
|
||||
{
|
||||
value: 'glm',
|
||||
label: 'クラウド / 智譜AI / GLM-ASR',
|
||||
languages: [
|
||||
{ value: 'auto', type: -1, label: '自動検出' },
|
||||
{ value: 'en', type: 0, label: '英語' },
|
||||
{ value: 'zh', type: 0, label: '中国語' },
|
||||
{ value: 'ja', type: 0, label: '日本語' },
|
||||
{ value: 'ko', type: 0, label: '韓国語' },
|
||||
],
|
||||
transModel: [
|
||||
{ value: 'ollama', label: 'Ollama モデルまたは OpenAI 互換モデル' },
|
||||
{ value: 'google', label: 'Google API 呼び出し' },
|
||||
]
|
||||
}
|
||||
|
||||
@@ -22,7 +22,7 @@ export default {
|
||||
"stopped": "Caption Engine Stopped",
|
||||
"stoppedInfo": "The caption engine has stopped. You can click the 'Start Caption Engine' button to restart it.",
|
||||
"error": "An error occurred",
|
||||
"engineError": "The subtitle engine encountered an error and requested a forced exit.",
|
||||
"engineError": "The caption engine encountered an error and requested a forced exit.",
|
||||
"socketError": "The Socket connection between the main program and the caption engine failed",
|
||||
"engineChange": "Cpation Engine Configuration Changed",
|
||||
"changeInfo": "If the caption engine is already running, you need to restart it for the changes to take effect.",
|
||||
@@ -50,8 +50,10 @@ export default {
|
||||
"sourceLang": "Source",
|
||||
"transLang": "Translation",
|
||||
"transModel": "Model",
|
||||
"ollama": "Ollama",
|
||||
"ollamaNote": "To use for translation, the name of the local Ollama model that will call the service on the default port. It is recommended to use a non-inference model with less than 1B parameters.",
|
||||
"modelName": "Model Name",
|
||||
"modelNameNote": "Please enter the translation model name you wish to use, which can be either a local Ollama model or an OpenAI API compatible cloud model. If the Base URL field is left blank, the local Ollama service will be called by default; otherwise, the API service at the specified address will be called via the Python OpenAI library.",
|
||||
"baseURL": "The base request URL for calling OpenAI API. If left empty, the local default port Ollama model will be used.",
|
||||
"apiKey": "The API KEY required for the model corresponding to OpenAI API.",
|
||||
"captionEngine": "Engine",
|
||||
"audioType": "Audio Type",
|
||||
"systemOutput": "System Audio Output (Speaker)",
|
||||
@@ -65,9 +67,10 @@ export default {
|
||||
"recordingPath": "Save Path",
|
||||
"startTimeout": "Timeout",
|
||||
"seconds": "seconds",
|
||||
"apikeyInfo": "API KEY required for the Gummy subtitle engine, which needs to be obtained from the Alibaba Cloud Bailing platform. For more details, see the project user manual.",
|
||||
"voskModelPathInfo": "The folder path of the model required by the Vosk subtitle engine. You need to download the required model to your local machine in advance. For more details, see the project user manual.",
|
||||
"sosvModelPathInfo": "The folder path of the model required by the SOSV subtitle engine. You need to download the required model to your local machine in advance. For more details, see the project user manual.",
|
||||
"apikeyInfo": "API KEY required for the Gummy caption engine, which needs to be obtained from the Alibaba Cloud Bailing platform. For more details, see the project user manual.",
|
||||
"glmApikeyInfo": "API KEY required for GLM caption engine, which needs to be obtained from the Zhipu AI platform.",
|
||||
"voskModelPathInfo": "The folder path of the model required by the Vosk caption engine. You need to download the required model to your local machine in advance. For more details, see the project user manual.",
|
||||
"sosvModelPathInfo": "The folder path of the model required by the SOSV caption engine. You need to download the required model to your local machine in advance. For more details, see the project user manual.",
|
||||
"recordingPathInfo": "The path to save recording files, requiring a folder path. The software will automatically name the recording file and save it as .wav file.",
|
||||
"modelDownload": "Model Download Link",
|
||||
"startTimeoutInfo": "Caption engine startup timeout duration. Engine will be forcefully stopped if startup exceeds this time. Recommended range: 10-120 seconds.",
|
||||
@@ -143,7 +146,7 @@ export default {
|
||||
"projLink": "Project Link",
|
||||
"manual": "User Manual",
|
||||
"engineDoc": "Caption Engine Manual",
|
||||
"date": "September 8th, 2025"
|
||||
"date": "January 10th, 2026"
|
||||
}
|
||||
},
|
||||
log: {
|
||||
|
||||
@@ -50,8 +50,10 @@ export default {
|
||||
"sourceLang": "ソース言語",
|
||||
"transLang": "翻訳言語",
|
||||
"transModel": "翻訳モデル",
|
||||
"ollama": "Ollama",
|
||||
"ollamaNote": "翻訳に使用する、デフォルトポートでサービスを呼び出すローカルOllamaモデルの名前。1B 未満のパラメータを持つ非推論モデルの使用を推奨します。",
|
||||
"modelName": "モデル名",
|
||||
"modelNameNote": "使用する翻訳モデル名を入力してください。Ollama のローカルモデルでも OpenAI API 互換のクラウドモデルでも可能です。Base URL フィールドが未入力の場合、デフォルトでローカルの Ollama サービスが呼び出され、それ以外の場合は Python OpenAI ライブラリ経由で指定されたアドレスの API サービスが呼び出されます。",
|
||||
"baseURL": "OpenAI API を呼び出すための基本リクエスト URL です。未記入の場合、ローカルのデフォルトポートの Ollama モデルが呼び出されます。",
|
||||
"apiKey": "OpenAI API に対応するモデルを使用するために必要な API キーです。",
|
||||
"captionEngine": "エンジン",
|
||||
"audioType": "オーディオ",
|
||||
"systemOutput": "システムオーディオ出力(スピーカー)",
|
||||
@@ -66,6 +68,7 @@ export default {
|
||||
"startTimeout": "時間制限",
|
||||
"seconds": "秒",
|
||||
"apikeyInfo": "Gummy 字幕エンジンに必要な API KEY は、アリババクラウド百煉プラットフォームから取得する必要があります。詳細情報はプロジェクトのユーザーマニュアルをご覧ください。",
|
||||
"glmApikeyInfo": "GLM 字幕エンジンに必要な API KEY で、智譜 AI プラットフォームから取得する必要があります。",
|
||||
"voskModelPathInfo": "Vosk 字幕エンジンに必要なモデルのフォルダパスです。必要なモデルを事前にローカルマシンにダウンロードする必要があります。詳細情報はプロジェクトのユーザーマニュアルをご覧ください。",
|
||||
"sosvModelPathInfo": "SOSV 字幕エンジンに必要なモデルのフォルダパスです。必要なモデルを事前にローカルマシンにダウンロードする必要があります。詳細情報はプロジェクトのユーザーマニュアルをご覧ください。",
|
||||
"recordingPathInfo": "録音ファイルの保存パスで、フォルダパスを指定する必要があります。ソフトウェアが自動的に録音ファイルに名前を付けて .wav ファイルとして保存します。",
|
||||
@@ -142,7 +145,7 @@ export default {
|
||||
"projLink": "プロジェクトリンク",
|
||||
"manual": "ユーザーマニュアル",
|
||||
"engineDoc": "字幕エンジンマニュアル",
|
||||
"date": "2025 年 9 月 8 日"
|
||||
"date": "2026 年 1 月 10 日"
|
||||
}
|
||||
},
|
||||
log: {
|
||||
|
||||
@@ -50,8 +50,10 @@ export default {
|
||||
"sourceLang": "源语言",
|
||||
"transLang": "翻译语言",
|
||||
"transModel": "翻译模型",
|
||||
"ollama": "Ollama",
|
||||
"ollamaNote": "要使用的进行翻译的本地 Ollama 模型的名称,将调用默认端口的服务,建议使用参数量小于 1B 的非推理模型。",
|
||||
"modelName": "模型名称",
|
||||
"modelNameNote": "请输入要使用的翻译模型名称,可以是 Ollama 本地模型,也可以是 OpenAI API 兼容的云端模型。若未填写 Base URL 字段,则默认调用本地 Ollama 服务,否则会通过 Python OpenAI 库调用该地址指向的 API 服务。",
|
||||
"baseURL": "调用 OpenAI API 的基础请求地址,如果不填写则调用本地默认端口的 Ollama 模型。",
|
||||
"apiKey": "调用 OpenAI API 对应的模型需要使用的 API KEY。",
|
||||
"captionEngine": "字幕引擎",
|
||||
"audioType": "音频类型",
|
||||
"systemOutput": "系统音频输出(扬声器)",
|
||||
@@ -66,6 +68,7 @@ export default {
|
||||
"startTimeout": "启动超时",
|
||||
"seconds": "秒",
|
||||
"apikeyInfo": "Gummy 字幕引擎需要的 API KEY,需要在阿里云百炼平台获取。详细信息见项目用户手册。",
|
||||
"glmApikeyInfo": "GLM 字幕引擎需要的 API KEY,需要在智谱 AI 平台获取。",
|
||||
"voskModelPathInfo": "Vosk 字幕引擎需要的模型的文件夹路径,需要提前下载需要的模型到本地。信息详情见项目用户手册。",
|
||||
"sosvModelPathInfo": "SOSV 字幕引擎需要的模型的文件夹路径,需要提前下载需要的模型到本地。信息详情见项目用户手册。",
|
||||
"recordingPathInfo": "录音文件保存路径,需要提供文件夹路径。软件会自动命名录音文件并保存为 .wav 文件。",
|
||||
@@ -142,7 +145,7 @@ export default {
|
||||
"projLink": "项目链接",
|
||||
"manual": "用户手册",
|
||||
"engineDoc": "字幕引擎手册",
|
||||
"date": "2025 年 9 月 8 日"
|
||||
"date": "2026 年 1 月 10 日"
|
||||
}
|
||||
},
|
||||
log: {
|
||||
|
||||
@@ -21,6 +21,8 @@ export const useEngineControlStore = defineStore('engineControl', () => {
|
||||
const targetLang = ref<string>('zh')
|
||||
const transModel = ref<string>('ollama')
|
||||
const ollamaName = ref<string>('')
|
||||
const ollamaUrl = ref<string>('')
|
||||
const ollamaApiKey = ref<string>('')
|
||||
const engine = ref<string>('gummy')
|
||||
const audio = ref<0 | 1>(0)
|
||||
const translation = ref<boolean>(true)
|
||||
@@ -28,6 +30,9 @@ export const useEngineControlStore = defineStore('engineControl', () => {
|
||||
const API_KEY = ref<string>('')
|
||||
const voskModelPath = ref<string>('')
|
||||
const sosvModelPath = ref<string>('')
|
||||
const glmUrl = ref<string>('https://open.bigmodel.cn/api/paas/v4/audio/transcriptions')
|
||||
const glmModel = ref<string>('glm-asr-2512')
|
||||
const glmApiKey = ref<string>('')
|
||||
const recordingPath = ref<string>('')
|
||||
const customized = ref<boolean>(false)
|
||||
const customizedApp = ref<string>('')
|
||||
@@ -44,6 +49,8 @@ export const useEngineControlStore = defineStore('engineControl', () => {
|
||||
targetLang: targetLang.value,
|
||||
transModel: transModel.value,
|
||||
ollamaName: ollamaName.value,
|
||||
ollamaUrl: ollamaUrl.value,
|
||||
ollamaApiKey: ollamaApiKey.value,
|
||||
engine: engine.value,
|
||||
audio: audio.value,
|
||||
translation: translation.value,
|
||||
@@ -51,6 +58,9 @@ export const useEngineControlStore = defineStore('engineControl', () => {
|
||||
API_KEY: API_KEY.value,
|
||||
voskModelPath: voskModelPath.value,
|
||||
sosvModelPath: sosvModelPath.value,
|
||||
glmUrl: glmUrl.value,
|
||||
glmModel: glmModel.value,
|
||||
glmApiKey: glmApiKey.value,
|
||||
recordingPath: recordingPath.value,
|
||||
customized: customized.value,
|
||||
customizedApp: customizedApp.value,
|
||||
@@ -80,6 +90,8 @@ export const useEngineControlStore = defineStore('engineControl', () => {
|
||||
targetLang.value = controls.targetLang
|
||||
transModel.value = controls.transModel
|
||||
ollamaName.value = controls.ollamaName
|
||||
ollamaUrl.value = controls.ollamaUrl
|
||||
ollamaApiKey.value = controls.ollamaApiKey
|
||||
engine.value = controls.engine
|
||||
audio.value = controls.audio
|
||||
engineEnabled.value = controls.engineEnabled
|
||||
@@ -88,6 +100,9 @@ export const useEngineControlStore = defineStore('engineControl', () => {
|
||||
API_KEY.value = controls.API_KEY
|
||||
voskModelPath.value = controls.voskModelPath
|
||||
sosvModelPath.value = controls.sosvModelPath
|
||||
glmUrl.value = controls.glmUrl || 'https://open.bigmodel.cn/api/paas/v4/audio/transcriptions'
|
||||
glmModel.value = controls.glmModel || 'glm-asr-2512'
|
||||
glmApiKey.value = controls.glmApiKey
|
||||
recordingPath.value = controls.recordingPath
|
||||
customized.value = controls.customized
|
||||
customizedApp.value = controls.customizedApp
|
||||
@@ -150,6 +165,8 @@ export const useEngineControlStore = defineStore('engineControl', () => {
|
||||
targetLang, // 目标语言
|
||||
transModel, // 翻译模型
|
||||
ollamaName, // Ollama 模型
|
||||
ollamaUrl,
|
||||
ollamaApiKey,
|
||||
engine, // 字幕引擎
|
||||
audio, // 选择音频
|
||||
translation, // 是否启用翻译
|
||||
@@ -157,6 +174,9 @@ export const useEngineControlStore = defineStore('engineControl', () => {
|
||||
API_KEY, // API KEY
|
||||
voskModelPath, // vosk 模型路径
|
||||
sosvModelPath, // sosv 模型路径
|
||||
glmUrl, // GLM API URL
|
||||
glmModel, // GLM 模型名称
|
||||
glmApiKey, // GLM API Key
|
||||
recordingPath, // 录音保存路径
|
||||
customized, // 是否使用自定义字幕引擎
|
||||
customizedApp, // 自定义字幕引擎的应用程序
|
||||
|
||||
@@ -8,6 +8,8 @@ export interface Controls {
|
||||
targetLang: string,
|
||||
transModel: string,
|
||||
ollamaName: string,
|
||||
ollamaUrl: string,
|
||||
ollamaApiKey: string,
|
||||
engine: string,
|
||||
audio: 0 | 1,
|
||||
translation: boolean,
|
||||
@@ -15,6 +17,9 @@ export interface Controls {
|
||||
API_KEY: string,
|
||||
voskModelPath: string,
|
||||
sosvModelPath: string,
|
||||
glmUrl: string,
|
||||
glmModel: string,
|
||||
glmApiKey: string,
|
||||
recordingPath: string,
|
||||
customized: boolean,
|
||||
customizedApp: string,
|
||||
|
||||
Reference in New Issue
Block a user