mirror of
https://github.com/HiMeditator/auto-caption.git
synced 2026-02-04 04:14:42 +08:00
feat(engine): 添加 Vosk 本地离线引擎支持
- 新增 Vosk 引擎配置和识别逻辑 - 更新用户界面,增加 Vosk 引擎选项和模型路径设置 - 更新依赖,添加 vosk 库
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -7,5 +7,6 @@ out
|
|||||||
__pycache__
|
__pycache__
|
||||||
subenv
|
subenv
|
||||||
caption-engine/build
|
caption-engine/build
|
||||||
|
caption-engine/models
|
||||||
output.wav
|
output.wav
|
||||||
.venv
|
.venv
|
||||||
|
|||||||
12
README.md
12
README.md
@@ -2,11 +2,13 @@
|
|||||||
<img src="./build/icon.png" width="100px" height="100px"/>
|
<img src="./build/icon.png" width="100px" height="100px"/>
|
||||||
<h1 align="center">auto-caption</h1>
|
<h1 align="center">auto-caption</h1>
|
||||||
<p>Auto Caption 是一个跨平台的实时字幕显示软件。</p>
|
<p>Auto Caption 是一个跨平台的实时字幕显示软件。</p>
|
||||||
<img src="https://img.shields.io/badge/version-0.3.0-blue">
|
<p>
|
||||||
<img src="https://img.shields.io/github/issues/HiMeditator/auto-caption?color=orange">
|
<img src="https://img.shields.io/badge/version-0.3.0-blue">
|
||||||
<img src="https://img.shields.io/github/languages/top/HiMeditator/auto-caption?color=royalblue">
|
<img src="https://img.shields.io/github/issues/HiMeditator/auto-caption?color=orange">
|
||||||
<img src="https://img.shields.io/github/repo-size/HiMeditator/auto-caption?color=green">
|
<img src="https://img.shields.io/github/languages/top/HiMeditator/auto-caption?color=royalblue">
|
||||||
<img src="https://visitor-badge.laobi.icu/badge?page_id=himeditator.auto-caption">
|
<img src="https://img.shields.io/github/repo-size/HiMeditator/auto-caption?color=green">
|
||||||
|
<img src="https://visitor-badge.laobi.icu/badge?page_id=himeditator.auto-caption">
|
||||||
|
</p>
|
||||||
<p>
|
<p>
|
||||||
| <b>简体中文</b>
|
| <b>简体中文</b>
|
||||||
| <a href="./README_en.md">English</a>
|
| <a href="./README_en.md">English</a>
|
||||||
|
|||||||
12
README_en.md
12
README_en.md
@@ -2,11 +2,13 @@
|
|||||||
<img src="./build/icon.png" width="100px" height="100px"/>
|
<img src="./build/icon.png" width="100px" height="100px"/>
|
||||||
<h1 align="center">auto-caption</h1>
|
<h1 align="center">auto-caption</h1>
|
||||||
<p>Auto Caption is a cross-platform real-time caption display software.</p>
|
<p>Auto Caption is a cross-platform real-time caption display software.</p>
|
||||||
<img src="https://img.shields.io/badge/version-0.3.0-blue">
|
<p>
|
||||||
<img src="https://img.shields.io/github/issues/HiMeditator/auto-caption?color=orange">
|
<img src="https://img.shields.io/badge/version-0.3.0-blue">
|
||||||
<img src="https://img.shields.io/github/languages/top/HiMeditator/auto-caption?color=royalblue">
|
<img src="https://img.shields.io/github/issues/HiMeditator/auto-caption?color=orange">
|
||||||
<img src="https://img.shields.io/github/repo-size/HiMeditator/auto-caption?color=green">
|
<img src="https://img.shields.io/github/languages/top/HiMeditator/auto-caption?color=royalblue">
|
||||||
<img src="https://visitor-badge.laobi.icu/badge?page_id=himeditator.auto-caption">
|
<img src="https://img.shields.io/github/repo-size/HiMeditator/auto-caption?color=green">
|
||||||
|
<img src="https://visitor-badge.laobi.icu/badge?page_id=himeditator.auto-caption">
|
||||||
|
</p>
|
||||||
<p>
|
<p>
|
||||||
| <a href="./README.md">简体中文</a>
|
| <a href="./README.md">简体中文</a>
|
||||||
| <b>English</b>
|
| <b>English</b>
|
||||||
|
|||||||
12
README_ja.md
12
README_ja.md
@@ -2,11 +2,13 @@
|
|||||||
<img src="./build/icon.png" width="100px" height="100px"/>
|
<img src="./build/icon.png" width="100px" height="100px"/>
|
||||||
<h1 align="center">auto-caption</h1>
|
<h1 align="center">auto-caption</h1>
|
||||||
<p>Auto Caption はクロスプラットフォームのリアルタイム字幕表示ソフトウェアです。</p>
|
<p>Auto Caption はクロスプラットフォームのリアルタイム字幕表示ソフトウェアです。</p>
|
||||||
<img src="https://img.shields.io/badge/version-0.3.0-blue">
|
<p>
|
||||||
<img src="https://img.shields.io/github/issues/HiMeditator/auto-caption?color=orange">
|
<img src="https://img.shields.io/badge/version-0.3.0-blue">
|
||||||
<img src="https://img.shields.io/github/languages/top/HiMeditator/auto-caption?color=royalblue">
|
<img src="https://img.shields.io/github/issues/HiMeditator/auto-caption?color=orange">
|
||||||
<img src="https://img.shields.io/github/repo-size/HiMeditator/auto-caption?color=green">
|
<img src="https://img.shields.io/github/languages/top/HiMeditator/auto-caption?color=royalblue">
|
||||||
<img src="https://visitor-badge.laobi.icu/badge?page_id=himeditator.auto-caption">
|
<img src="https://img.shields.io/github/repo-size/HiMeditator/auto-caption?color=green">
|
||||||
|
<img src="https://visitor-badge.laobi.icu/badge?page_id=himeditator.auto-caption">
|
||||||
|
</p>
|
||||||
<p>
|
<p>
|
||||||
| <a href="./README.md">简体中文</a>
|
| <a href="./README.md">简体中文</a>
|
||||||
| <a href="./README_en.md">English</a>
|
| <a href="./README_en.md">English</a>
|
||||||
|
|||||||
@@ -47,3 +47,22 @@ def resampleRawChunk(chunk, channels, orig_sr, target_sr, mode="sinc_best"):
|
|||||||
chunk_mono_r = samplerate.resample(chunk_mono, ratio, converter_type=mode)
|
chunk_mono_r = samplerate.resample(chunk_mono, ratio, converter_type=mode)
|
||||||
chunk_mono_r = np.round(chunk_mono_r).astype(np.int16)
|
chunk_mono_r = np.round(chunk_mono_r).astype(np.int16)
|
||||||
return chunk_mono_r.tobytes()
|
return chunk_mono_r.tobytes()
|
||||||
|
|
||||||
|
def resampleMonoChunk(chunk, orig_sr, target_sr, mode="sinc_best"):
|
||||||
|
"""
|
||||||
|
将当前单通道进行重采样
|
||||||
|
|
||||||
|
Args:
|
||||||
|
chunk: (bytes)单通道音频数据块
|
||||||
|
orig_sr: 原始采样率
|
||||||
|
target_sr: 目标采样率
|
||||||
|
mode: 重采样模式,可选:'sinc_best' | 'sinc_medium' | 'sinc_fastest' | 'zero_order_hold' | 'linear'
|
||||||
|
|
||||||
|
Return:
|
||||||
|
(bytes)单通道音频数据块
|
||||||
|
"""
|
||||||
|
chunk_np = np.frombuffer(chunk, dtype=np.int16)
|
||||||
|
ratio = target_sr / orig_sr
|
||||||
|
chunk_r = samplerate.resample(chunk_np, ratio, converter_type=mode)
|
||||||
|
chunk_r = np.round(chunk_r).astype(np.int16)
|
||||||
|
return chunk_r.tobytes()
|
||||||
|
|||||||
83
caption-engine/main-vosk.py
Normal file
83
caption-engine/main-vosk.py
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import argparse
|
||||||
|
from datetime import datetime
|
||||||
|
import numpy.core.multiarray
|
||||||
|
|
||||||
|
if sys.platform == 'win32':
|
||||||
|
from sysaudio.win import AudioStream
|
||||||
|
elif sys.platform == 'darwin':
|
||||||
|
from sysaudio.darwin import AudioStream
|
||||||
|
elif sys.platform == 'linux':
|
||||||
|
from sysaudio.linux import AudioStream
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(f"Unsupported platform: {sys.platform}")
|
||||||
|
|
||||||
|
from vosk import Model, KaldiRecognizer, SetLogLevel
|
||||||
|
from audioprcs import resampleRawChunk
|
||||||
|
|
||||||
|
SetLogLevel(-1)
|
||||||
|
|
||||||
|
def convert_audio_to_text(audio_type, chunk_rate, model_path):
|
||||||
|
sys.stdout.reconfigure(line_buffering=True) # type: ignore
|
||||||
|
|
||||||
|
if model_path.startswith('"'):
|
||||||
|
model_path = model_path[1:]
|
||||||
|
if model_path.endswith('"'):
|
||||||
|
model_path = model_path[:-1]
|
||||||
|
|
||||||
|
model = Model(model_path)
|
||||||
|
recognizer = KaldiRecognizer(model, 16000)
|
||||||
|
|
||||||
|
stream = AudioStream(audio_type, chunk_rate)
|
||||||
|
stream.openStream()
|
||||||
|
|
||||||
|
time_str = ''
|
||||||
|
cur_id = 0
|
||||||
|
prev_content = ''
|
||||||
|
|
||||||
|
while True:
|
||||||
|
chunk = stream.read_chunk()
|
||||||
|
chunk_mono = resampleRawChunk(chunk, stream.CHANNELS, stream.RATE, 16000)
|
||||||
|
|
||||||
|
caption = {}
|
||||||
|
if recognizer.AcceptWaveform(chunk_mono):
|
||||||
|
content = json.loads(recognizer.Result()).get('text', '')
|
||||||
|
caption['index'] = cur_id
|
||||||
|
caption['text'] = content
|
||||||
|
caption['time_s'] = time_str
|
||||||
|
caption['time_t'] = datetime.now().strftime('%H:%M:%S.%f')[:-3]
|
||||||
|
caption['translation'] = ''
|
||||||
|
prev_content = ''
|
||||||
|
cur_id += 1
|
||||||
|
else:
|
||||||
|
content = json.loads(recognizer.PartialResult()).get('partial', '')
|
||||||
|
if content == '' or content == prev_content:
|
||||||
|
continue
|
||||||
|
if prev_content == '':
|
||||||
|
time_str = datetime.now().strftime('%H:%M:%S.%f')[:-3]
|
||||||
|
caption['index'] = cur_id
|
||||||
|
caption['text'] = content
|
||||||
|
caption['time_s'] = time_str
|
||||||
|
caption['time_t'] = datetime.now().strftime('%H:%M:%S.%f')[:-3]
|
||||||
|
caption['translation'] = ''
|
||||||
|
prev_content = content
|
||||||
|
try:
|
||||||
|
json_str = json.dumps(caption) + '\n'
|
||||||
|
sys.stdout.write(json_str)
|
||||||
|
sys.stdout.flush()
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description='Convert system audio stream to text')
|
||||||
|
parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output audio stream, 1 for input audio stream')
|
||||||
|
parser.add_argument('-c', '--chunk_rate', default=20, help='The number of audio stream chunks collected per second.')
|
||||||
|
parser.add_argument('-m', '--model_path', default='', help='The path to the vosk model.')
|
||||||
|
args = parser.parse_args()
|
||||||
|
convert_audio_to_text(
|
||||||
|
int(args.audio_type),
|
||||||
|
int(args.chunk_rate),
|
||||||
|
args.model_path
|
||||||
|
)
|
||||||
42
caption-engine/main-vosk.spec
Normal file
42
caption-engine/main-vosk.spec
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
# -*- mode: python ; coding: utf-8 -*-
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
vosk_path = str(Path('./subenv/Lib/site-packages/vosk').resolve())
|
||||||
|
|
||||||
|
a = Analysis(
|
||||||
|
['main-vosk.py'],
|
||||||
|
pathex=[],
|
||||||
|
binaries=[],
|
||||||
|
datas=[(vosk_path, 'vosk')],
|
||||||
|
hiddenimports=[],
|
||||||
|
hookspath=[],
|
||||||
|
hooksconfig={},
|
||||||
|
runtime_hooks=[],
|
||||||
|
excludes=[],
|
||||||
|
noarchive=False,
|
||||||
|
optimize=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
pyz = PYZ(a.pure)
|
||||||
|
|
||||||
|
exe = EXE(
|
||||||
|
pyz,
|
||||||
|
a.scripts,
|
||||||
|
a.binaries,
|
||||||
|
a.datas,
|
||||||
|
[],
|
||||||
|
name='main-vosk',
|
||||||
|
debug=False,
|
||||||
|
bootloader_ignore_signals=False,
|
||||||
|
strip=False,
|
||||||
|
upx=True,
|
||||||
|
upx_exclude=[],
|
||||||
|
runtime_tmpdir=None,
|
||||||
|
console=True,
|
||||||
|
disable_windowed_traceback=False,
|
||||||
|
argv_emulation=False,
|
||||||
|
target_arch=None,
|
||||||
|
codesign_identity=None,
|
||||||
|
entitlements_file=None,
|
||||||
|
)
|
||||||
@@ -3,4 +3,5 @@ numpy
|
|||||||
samplerate
|
samplerate
|
||||||
PyAudio
|
PyAudio
|
||||||
PyAudioWPatch # Windows only
|
PyAudioWPatch # Windows only
|
||||||
|
vosk
|
||||||
pyinstaller
|
pyinstaller
|
||||||
|
|||||||
@@ -57,7 +57,7 @@ class AudioStream:
|
|||||||
self.stream = None
|
self.stream = None
|
||||||
self.SAMP_WIDTH = pyaudio.get_sample_size(pyaudio.paInt16)
|
self.SAMP_WIDTH = pyaudio.get_sample_size(pyaudio.paInt16)
|
||||||
self.FORMAT = pyaudio.paInt16
|
self.FORMAT = pyaudio.paInt16
|
||||||
self.CHANNELS = self.device["maxInputChannels"]
|
self.CHANNELS = int(self.device["maxInputChannels"])
|
||||||
self.RATE = int(self.device["defaultSampleRate"])
|
self.RATE = int(self.device["defaultSampleRate"])
|
||||||
self.CHUNK = self.RATE // chunk_rate
|
self.CHUNK = self.RATE // chunk_rate
|
||||||
self.INDEX = self.device["index"]
|
self.INDEX = self.device["index"]
|
||||||
|
|||||||
@@ -10,8 +10,15 @@ files:
|
|||||||
- '!{.env,.env.*,.npmrc,pnpm-lock.yaml}'
|
- '!{.env,.env.*,.npmrc,pnpm-lock.yaml}'
|
||||||
- '!{tsconfig.json,tsconfig.node.json,tsconfig.web.json}'
|
- '!{tsconfig.json,tsconfig.node.json,tsconfig.web.json}'
|
||||||
extraResources:
|
extraResources:
|
||||||
from: ./caption-engine/dist/main-gummy.exe
|
- from: ./caption-engine/dist/main-gummy.exe
|
||||||
to: ./caption-engine/main-gummy.exe
|
to: ./caption-engine/main-gummy.exe
|
||||||
|
- from: ./caption-engine/dist/main-vosk.exe
|
||||||
|
to: ./caption-engine/main-vosk.exe
|
||||||
|
# For macOS and Linux
|
||||||
|
# - from: ./caption-engine/dist/main-gummy
|
||||||
|
# to: ./caption-engine/main-gummy
|
||||||
|
# - from: ./caption-engine/dist/main-vosk
|
||||||
|
# to: ./caption-engine/main-vosk
|
||||||
asarUnpack:
|
asarUnpack:
|
||||||
- resources/**
|
- resources/**
|
||||||
win:
|
win:
|
||||||
|
|||||||
124
engine-test/vosk.ipynb
Normal file
124
engine-test/vosk.ipynb
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"id": "6fb12704",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"d:\\Projects\\auto-caption\\caption-engine\\subenv\\Lib\\site-packages\\vosk\\__init__.py\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import vosk\n",
|
||||||
|
"print(vosk.__file__)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 11,
|
||||||
|
"id": "63a06f5c",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"\n",
|
||||||
|
" 采样设备:\n",
|
||||||
|
" - 设备类型:音频输入\n",
|
||||||
|
" - 序号:1\n",
|
||||||
|
" - 名称:麦克风阵列 (Realtek(R) Audio)\n",
|
||||||
|
" - 最大输入通道数:2\n",
|
||||||
|
" - 默认低输入延迟:0.09s\n",
|
||||||
|
" - 默认高输入延迟:0.18s\n",
|
||||||
|
" - 默认采样率:44100.0Hz\n",
|
||||||
|
" - 是否回环设备:False\n",
|
||||||
|
"\n",
|
||||||
|
" 音频样本块大小:2205\n",
|
||||||
|
" 样本位宽:2\n",
|
||||||
|
" 采样格式:8\n",
|
||||||
|
" 音频通道数:2\n",
|
||||||
|
" 音频采样率:44100\n",
|
||||||
|
" \n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import sys\n",
|
||||||
|
"import os\n",
|
||||||
|
"import json\n",
|
||||||
|
"from vosk import Model, KaldiRecognizer\n",
|
||||||
|
"\n",
|
||||||
|
"current_dir = os.getcwd() \n",
|
||||||
|
"sys.path.append(os.path.join(current_dir, '../caption-engine'))\n",
|
||||||
|
"\n",
|
||||||
|
"from sysaudio.win import AudioStream\n",
|
||||||
|
"from audioprcs import resampleRawChunk, mergeChunkChannels\n",
|
||||||
|
"\n",
|
||||||
|
"stream = AudioStream(1)\n",
|
||||||
|
"stream.printInfo()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 12,
|
||||||
|
"id": "5d5a0afa",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"model = Model(os.path.join(\n",
|
||||||
|
" current_dir,\n",
|
||||||
|
" '../caption-engine/models/vosk-model-small-cn-0.22'\n",
|
||||||
|
"))\n",
|
||||||
|
"recognizer = KaldiRecognizer(model, 16000)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "7e9d1530",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"stream.openStream()\n",
|
||||||
|
"\n",
|
||||||
|
"for i in range(200):\n",
|
||||||
|
" chunk = stream.read_chunk()\n",
|
||||||
|
" chunk_mono = resampleRawChunk(chunk, stream.CHANNELS, stream.RATE, 16000)\n",
|
||||||
|
" if recognizer.AcceptWaveform(chunk_mono):\n",
|
||||||
|
" result = json.loads(recognizer.Result())\n",
|
||||||
|
" print(\"acc:\", result.get(\"text\", \"\"))\n",
|
||||||
|
" else:\n",
|
||||||
|
" partial = json.loads(recognizer.PartialResult())\n",
|
||||||
|
" print(\"else:\", partial.get(\"partial\", \"\"))"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "subenv",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.12.1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
@@ -6,10 +6,11 @@ export interface Controls {
|
|||||||
engineEnabled: boolean,
|
engineEnabled: boolean,
|
||||||
sourceLang: string,
|
sourceLang: string,
|
||||||
targetLang: string,
|
targetLang: string,
|
||||||
engine: 'gummy',
|
engine: string,
|
||||||
audio: 0 | 1,
|
audio: 0 | 1,
|
||||||
translation: boolean,
|
translation: boolean,
|
||||||
API_KEY: string,
|
API_KEY: string,
|
||||||
|
modelPath: string,
|
||||||
customized: boolean,
|
customized: boolean,
|
||||||
customizedApp: string,
|
customizedApp: string,
|
||||||
customizedCommand: string
|
customizedCommand: string
|
||||||
|
|||||||
@@ -34,6 +34,7 @@ const defaultControls: Controls = {
|
|||||||
audio: 0,
|
audio: 0,
|
||||||
engineEnabled: false,
|
engineEnabled: false,
|
||||||
API_KEY: '',
|
API_KEY: '',
|
||||||
|
modelPath: '',
|
||||||
translation: true,
|
translation: true,
|
||||||
customized: false,
|
customized: false,
|
||||||
customizedApp: '',
|
customizedApp: '',
|
||||||
|
|||||||
@@ -13,26 +13,20 @@ export class CaptionEngine {
|
|||||||
processStatus: 'running' | 'stopping' | 'stopped' = 'stopped'
|
processStatus: 'running' | 'stopping' | 'stopped' = 'stopped'
|
||||||
|
|
||||||
private getApp(): boolean {
|
private getApp(): boolean {
|
||||||
|
allConfig.controls.customized = false
|
||||||
if (allConfig.controls.customized && allConfig.controls.customizedApp) {
|
if (allConfig.controls.customized && allConfig.controls.customizedApp) {
|
||||||
this.appPath = allConfig.controls.customizedApp
|
this.appPath = allConfig.controls.customizedApp
|
||||||
this.command = [allConfig.controls.customizedCommand]
|
this.command = [allConfig.controls.customizedCommand]
|
||||||
|
allConfig.controls.customized = true
|
||||||
}
|
}
|
||||||
else if (allConfig.controls.engine === 'gummy') {
|
else if (allConfig.controls.engine === 'gummy') {
|
||||||
allConfig.controls.customized = false
|
|
||||||
if(!allConfig.controls.API_KEY && !process.env.DASHSCOPE_API_KEY) {
|
if(!allConfig.controls.API_KEY && !process.env.DASHSCOPE_API_KEY) {
|
||||||
controlWindow.sendErrorMessage(i18n('gummy.key.missing'))
|
controlWindow.sendErrorMessage(i18n('gummy.key.missing'))
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
let gummyName = ''
|
let gummyName = 'main-gummy'
|
||||||
if (process.platform === 'win32') {
|
if (process.platform === 'win32') {
|
||||||
gummyName = 'main-gummy.exe'
|
gummyName += '.exe'
|
||||||
}
|
|
||||||
else if (process.platform === 'darwin' || process.platform === 'linux') {
|
|
||||||
gummyName = 'main-gummy'
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
controlWindow.sendErrorMessage(i18n('platform.unsupported') + process.platform)
|
|
||||||
throw new Error(i18n('platform.unsupported'))
|
|
||||||
}
|
}
|
||||||
if (is.dev) {
|
if (is.dev) {
|
||||||
this.appPath = path.join(
|
this.appPath = path.join(
|
||||||
@@ -55,10 +49,29 @@ export class CaptionEngine {
|
|||||||
if(allConfig.controls.API_KEY) {
|
if(allConfig.controls.API_KEY) {
|
||||||
this.command.push('-k', allConfig.controls.API_KEY)
|
this.command.push('-k', allConfig.controls.API_KEY)
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log('[INFO] Engine Path:', this.appPath)
|
|
||||||
console.log('[INFO] Engine Command:', this.command)
|
|
||||||
}
|
}
|
||||||
|
else if(allConfig.controls.engine === 'vosk'){
|
||||||
|
let voskName = 'main-vosk'
|
||||||
|
if (process.platform === 'win32') {
|
||||||
|
voskName += '.exe'
|
||||||
|
}
|
||||||
|
if (is.dev) {
|
||||||
|
this.appPath = path.join(
|
||||||
|
app.getAppPath(),
|
||||||
|
'caption-engine', 'dist', voskName
|
||||||
|
)
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
this.appPath = path.join(
|
||||||
|
process.resourcesPath, 'caption-engine', voskName
|
||||||
|
)
|
||||||
|
}
|
||||||
|
this.command = []
|
||||||
|
this.command.push('-a', allConfig.controls.audio ? '1' : '0')
|
||||||
|
this.command.push('-m', `"${allConfig.controls.modelPath}"`)
|
||||||
|
}
|
||||||
|
console.log('[INFO] Engine Path:', this.appPath)
|
||||||
|
console.log('[INFO] Engine Command:', this.command)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -16,6 +16,7 @@
|
|||||||
<div class="input-item">
|
<div class="input-item">
|
||||||
<span class="input-label">{{ $t('engine.transLang') }}</span>
|
<span class="input-label">{{ $t('engine.transLang') }}</span>
|
||||||
<a-select
|
<a-select
|
||||||
|
:disabled="currentEngine === 'vosk'"
|
||||||
class="input-area"
|
class="input-area"
|
||||||
v-model:value="currentTargetLang"
|
v-model:value="currentTargetLang"
|
||||||
:options="langList.filter((item) => item.value !== 'auto')"
|
:options="langList.filter((item) => item.value !== 'auto')"
|
||||||
@@ -47,7 +48,8 @@
|
|||||||
<a-switch v-model:checked="showMore" />
|
<a-switch v-model:checked="showMore" />
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<a-card size="small" :title="$t('engine.custom.title')" v-show="showMore">
|
|
||||||
|
<a-card size="small" :title="$t('engine.showMore')" v-show="showMore">
|
||||||
<div class="input-item">
|
<div class="input-item">
|
||||||
<span class="input-label">{{ $t('engine.apikey') }}</span>
|
<span class="input-label">{{ $t('engine.apikey') }}</span>
|
||||||
<a-input
|
<a-input
|
||||||
@@ -56,6 +58,13 @@
|
|||||||
v-model:value="currentAPI_KEY"
|
v-model:value="currentAPI_KEY"
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="input-item">
|
||||||
|
<span class="input-label">{{ $t('engine.modelPath') }}</span>
|
||||||
|
<a-input
|
||||||
|
class="input-area"
|
||||||
|
v-model:value="currentModelPath"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
<div class="input-item">
|
<div class="input-item">
|
||||||
<span style="margin-right:5px;">{{ $t('engine.customEngine') }}</span>
|
<span style="margin-right:5px;">{{ $t('engine.customEngine') }}</span>
|
||||||
<a-switch v-model:checked="currentCustomized" />
|
<a-switch v-model:checked="currentCustomized" />
|
||||||
@@ -85,9 +94,8 @@
|
|||||||
></a-input>
|
></a-input>
|
||||||
</div>
|
</div>
|
||||||
</a-card>
|
</a-card>
|
||||||
</div>
|
</div>
|
||||||
</a-card>
|
</a-card>
|
||||||
|
|
||||||
</a-card>
|
</a-card>
|
||||||
<div style="height: 20px;"></div>
|
<div style="height: 20px;"></div>
|
||||||
</template>
|
</template>
|
||||||
@@ -95,6 +103,7 @@
|
|||||||
<script setup lang="ts">
|
<script setup lang="ts">
|
||||||
import { ref, computed, watch } from 'vue'
|
import { ref, computed, watch } from 'vue'
|
||||||
import { storeToRefs } from 'pinia'
|
import { storeToRefs } from 'pinia'
|
||||||
|
import { useGeneralSettingStore } from '@renderer/stores/generalSetting'
|
||||||
import { useEngineControlStore } from '@renderer/stores/engineControl'
|
import { useEngineControlStore } from '@renderer/stores/engineControl'
|
||||||
import { notification } from 'ant-design-vue'
|
import { notification } from 'ant-design-vue'
|
||||||
import { InfoCircleOutlined } from '@ant-design/icons-vue';
|
import { InfoCircleOutlined } from '@ant-design/icons-vue';
|
||||||
@@ -108,10 +117,11 @@ const { platform, captionEngine, audioType, changeSignal } = storeToRefs(engineC
|
|||||||
|
|
||||||
const currentSourceLang = ref('auto')
|
const currentSourceLang = ref('auto')
|
||||||
const currentTargetLang = ref('zh')
|
const currentTargetLang = ref('zh')
|
||||||
const currentEngine = ref<'gummy'>('gummy')
|
const currentEngine = ref<string>('gummy')
|
||||||
const currentAudio = ref<0 | 1>(0)
|
const currentAudio = ref<0 | 1>(0)
|
||||||
const currentTranslation = ref<boolean>(false)
|
const currentTranslation = ref<boolean>(false)
|
||||||
const currentAPI_KEY = ref<string>('')
|
const currentAPI_KEY = ref<string>('')
|
||||||
|
const currentModelPath = ref<string>('')
|
||||||
const currentCustomized = ref<boolean>(false)
|
const currentCustomized = ref<boolean>(false)
|
||||||
const currentCustomizedApp = ref('')
|
const currentCustomizedApp = ref('')
|
||||||
const currentCustomizedCommand = ref('')
|
const currentCustomizedCommand = ref('')
|
||||||
@@ -132,6 +142,7 @@ function applyChange(){
|
|||||||
engineControl.audio = currentAudio.value
|
engineControl.audio = currentAudio.value
|
||||||
engineControl.translation = currentTranslation.value
|
engineControl.translation = currentTranslation.value
|
||||||
engineControl.API_KEY = currentAPI_KEY.value
|
engineControl.API_KEY = currentAPI_KEY.value
|
||||||
|
engineControl.modelPath = currentModelPath.value
|
||||||
engineControl.customized = currentCustomized.value
|
engineControl.customized = currentCustomized.value
|
||||||
engineControl.customizedApp = currentCustomizedApp.value
|
engineControl.customizedApp = currentCustomizedApp.value
|
||||||
engineControl.customizedCommand = currentCustomizedCommand.value
|
engineControl.customizedCommand = currentCustomizedCommand.value
|
||||||
@@ -151,6 +162,7 @@ function cancelChange(){
|
|||||||
currentAudio.value = engineControl.audio
|
currentAudio.value = engineControl.audio
|
||||||
currentTranslation.value = engineControl.translation
|
currentTranslation.value = engineControl.translation
|
||||||
currentAPI_KEY.value = engineControl.API_KEY
|
currentAPI_KEY.value = engineControl.API_KEY
|
||||||
|
currentModelPath.value = engineControl.modelPath
|
||||||
currentCustomized.value = engineControl.customized
|
currentCustomized.value = engineControl.customized
|
||||||
currentCustomizedApp.value = engineControl.customizedApp
|
currentCustomizedApp.value = engineControl.customizedApp
|
||||||
currentCustomizedCommand.value = engineControl.customizedCommand
|
currentCustomizedCommand.value = engineControl.customizedCommand
|
||||||
@@ -162,6 +174,17 @@ watch(changeSignal, (val) => {
|
|||||||
engineControl.changeSignal = false;
|
engineControl.changeSignal = false;
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
watch(currentEngine, (val) => {
|
||||||
|
if(val == 'vosk'){
|
||||||
|
currentSourceLang.value = 'auto'
|
||||||
|
currentTargetLang.value = ''
|
||||||
|
}
|
||||||
|
else if(val == 'gummy'){
|
||||||
|
currentSourceLang.value = 'auto'
|
||||||
|
currentTargetLang.value = useGeneralSettingStore().uiLanguage
|
||||||
|
}
|
||||||
|
})
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<style scoped>
|
<style scoped>
|
||||||
|
|||||||
@@ -16,6 +16,13 @@ export const engines = {
|
|||||||
{ value: 'it', label: '意大利语' },
|
{ value: 'it', label: '意大利语' },
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
value: 'vosk',
|
||||||
|
label: '本地 - Vosk',
|
||||||
|
languages: [
|
||||||
|
{ value: 'auto', label: '需要自行配置模型' },
|
||||||
|
]
|
||||||
|
}
|
||||||
],
|
],
|
||||||
en: [
|
en: [
|
||||||
{
|
{
|
||||||
@@ -34,6 +41,13 @@ export const engines = {
|
|||||||
{ value: 'it', label: 'Italian' },
|
{ value: 'it', label: 'Italian' },
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
value: 'vosk',
|
||||||
|
label: 'Local - Vosk',
|
||||||
|
languages: [
|
||||||
|
{ value: 'auto', label: 'Model needs to be configured manually' },
|
||||||
|
]
|
||||||
|
}
|
||||||
],
|
],
|
||||||
ja: [
|
ja: [
|
||||||
{
|
{
|
||||||
@@ -52,6 +66,13 @@ export const engines = {
|
|||||||
{ value: 'it', label: 'イタリア語' },
|
{ value: 'it', label: 'イタリア語' },
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
value: 'vosk',
|
||||||
|
label: 'ローカル - Vosk',
|
||||||
|
languages: [
|
||||||
|
{ value: 'auto', label: 'モデルを手動で設定する必要があります' },
|
||||||
|
]
|
||||||
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -48,6 +48,7 @@ export default {
|
|||||||
"enableTranslation": "启用翻译",
|
"enableTranslation": "启用翻译",
|
||||||
"showMore": "更多设置",
|
"showMore": "更多设置",
|
||||||
"apikey": "API KEY",
|
"apikey": "API KEY",
|
||||||
|
"modelPath": "模型路径",
|
||||||
"customEngine": "自定义引擎",
|
"customEngine": "自定义引擎",
|
||||||
custom: {
|
custom: {
|
||||||
"title": "自定义字幕引擎",
|
"title": "自定义字幕引擎",
|
||||||
|
|||||||
@@ -16,13 +16,14 @@ export const useEngineControlStore = defineStore('engineControl', () => {
|
|||||||
|
|
||||||
const captionEngine = ref(engines[useGeneralSettingStore().uiLanguage])
|
const captionEngine = ref(engines[useGeneralSettingStore().uiLanguage])
|
||||||
const audioType = ref(audioTypes[useGeneralSettingStore().uiLanguage])
|
const audioType = ref(audioTypes[useGeneralSettingStore().uiLanguage])
|
||||||
const API_KEY = ref<string>('')
|
|
||||||
const engineEnabled = ref(false)
|
const engineEnabled = ref(false)
|
||||||
const sourceLang = ref<string>('en')
|
const sourceLang = ref<string>('en')
|
||||||
const targetLang = ref<string>('zh')
|
const targetLang = ref<string>('zh')
|
||||||
const engine = ref<'gummy'>('gummy')
|
const engine = ref<string>('gummy')
|
||||||
const audio = ref<0 | 1>(0)
|
const audio = ref<0 | 1>(0)
|
||||||
const translation = ref<boolean>(true)
|
const translation = ref<boolean>(true)
|
||||||
|
const API_KEY = ref<string>('')
|
||||||
|
const modelPath = ref<string>('')
|
||||||
const customized = ref<boolean>(false)
|
const customized = ref<boolean>(false)
|
||||||
const customizedApp = ref<string>('')
|
const customizedApp = ref<string>('')
|
||||||
const customizedCommand = ref<string>('')
|
const customizedCommand = ref<string>('')
|
||||||
@@ -38,6 +39,7 @@ export const useEngineControlStore = defineStore('engineControl', () => {
|
|||||||
audio: audio.value,
|
audio: audio.value,
|
||||||
translation: translation.value,
|
translation: translation.value,
|
||||||
API_KEY: API_KEY.value,
|
API_KEY: API_KEY.value,
|
||||||
|
modelPath: modelPath.value,
|
||||||
customized: customized.value,
|
customized: customized.value,
|
||||||
customizedApp: customizedApp.value,
|
customizedApp: customizedApp.value,
|
||||||
customizedCommand: customizedCommand.value
|
customizedCommand: customizedCommand.value
|
||||||
@@ -53,6 +55,7 @@ export const useEngineControlStore = defineStore('engineControl', () => {
|
|||||||
engineEnabled.value = controls.engineEnabled
|
engineEnabled.value = controls.engineEnabled
|
||||||
translation.value = controls.translation
|
translation.value = controls.translation
|
||||||
API_KEY.value = controls.API_KEY
|
API_KEY.value = controls.API_KEY
|
||||||
|
modelPath.value = controls.modelPath
|
||||||
customized.value = controls.customized
|
customized.value = controls.customized
|
||||||
customizedApp.value = controls.customizedApp
|
customizedApp.value = controls.customizedApp
|
||||||
customizedCommand.value = controls.customizedCommand
|
customizedCommand.value = controls.customizedCommand
|
||||||
@@ -102,7 +105,7 @@ export const useEngineControlStore = defineStore('engineControl', () => {
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
platform, // 系统平台
|
platform, // 系统平台
|
||||||
captionEngine, // 字幕引擎
|
captionEngine, // 字幕引擎列表
|
||||||
audioType, // 音频类型
|
audioType, // 音频类型
|
||||||
engineEnabled, // 字幕引擎是否启用
|
engineEnabled, // 字幕引擎是否启用
|
||||||
sourceLang, // 源语言
|
sourceLang, // 源语言
|
||||||
@@ -111,6 +114,7 @@ export const useEngineControlStore = defineStore('engineControl', () => {
|
|||||||
audio, // 选择音频
|
audio, // 选择音频
|
||||||
translation, // 是否启用翻译
|
translation, // 是否启用翻译
|
||||||
API_KEY, // API KEY
|
API_KEY, // API KEY
|
||||||
|
modelPath, // vosk 模型路径
|
||||||
customized, // 是否使用自定义字幕引擎
|
customized, // 是否使用自定义字幕引擎
|
||||||
customizedApp, // 自定义字幕引擎的应用程序
|
customizedApp, // 自定义字幕引擎的应用程序
|
||||||
customizedCommand, // 自定义字幕引擎的命令
|
customizedCommand, // 自定义字幕引擎的命令
|
||||||
|
|||||||
@@ -6,10 +6,11 @@ export interface Controls {
|
|||||||
engineEnabled: boolean,
|
engineEnabled: boolean,
|
||||||
sourceLang: string,
|
sourceLang: string,
|
||||||
targetLang: string,
|
targetLang: string,
|
||||||
engine: 'gummy',
|
engine: string,
|
||||||
audio: 0 | 1,
|
audio: 0 | 1,
|
||||||
translation: boolean,
|
translation: boolean,
|
||||||
API_KEY: string,
|
API_KEY: string,
|
||||||
|
modelPath: string,
|
||||||
customized: boolean,
|
customized: boolean,
|
||||||
customizedApp: string,
|
customizedApp: string,
|
||||||
customizedCommand: string
|
customizedCommand: string
|
||||||
|
|||||||
Reference in New Issue
Block a user