mirror of
https://github.com/HiMeditator/auto-caption.git
synced 2026-03-06 19:37:30 +08:00
feat(engine): 重构字幕引擎并实现 WebSocket 通信
- 重构了 Gummy 和 Vosk 字幕引擎的代码,提高了可扩展性和可读性 - 合并 Gummy 和 Vosk 引擎为单个可执行文件 - 实现了字幕引擎和主程序之间的 WebSocket 通信,避免了孤儿进程问题
This commit is contained in:
@@ -114,3 +114,18 @@
|
|||||||
|
|
||||||
- 修复无法调用自定义字幕引擎的 bug
|
- 修复无法调用自定义字幕引擎的 bug
|
||||||
- 修复自定义字幕引擎的参数失效 bug
|
- 修复自定义字幕引擎的参数失效 bug
|
||||||
|
|
||||||
|
## v0.6.0
|
||||||
|
|
||||||
|
2025-07-xx
|
||||||
|
|
||||||
|
### 新增功能
|
||||||
|
|
||||||
|
- 新增字幕记录排序功能,可选择字幕记录正序或倒叙显示
|
||||||
|
|
||||||
|
### 项目优化
|
||||||
|
|
||||||
|
- 重构字幕引擎,提示字幕引擎代码的可扩展性和可读性
|
||||||
|
- 合并 Gummy 和 Vosk 引擎为单个可执行文件
|
||||||
|
- 字幕引擎和主程序添加 WebScoket 通信,完全避免字幕引擎成为孤儿进程
|
||||||
|
|
||||||
|
|||||||
@@ -16,10 +16,11 @@
|
|||||||
- [x] 可以导出 srt 格式的字幕记录 *2025/07/14*
|
- [x] 可以导出 srt 格式的字幕记录 *2025/07/14*
|
||||||
- [x] 可以获取字幕引擎的系统资源消耗情况 *2025/07/15*
|
- [x] 可以获取字幕引擎的系统资源消耗情况 *2025/07/15*
|
||||||
- [x] 添加字幕记录按时间降序排列选择 *2025/07/26*
|
- [x] 添加字幕记录按时间降序排列选择 *2025/07/26*
|
||||||
|
- [x] 重构字幕引擎 *2025/07/28*
|
||||||
|
|
||||||
## 待完成
|
## 待完成
|
||||||
|
|
||||||
- [ ] 重构字幕引擎
|
- [ ] 优化前端界面提示消息
|
||||||
- [ ] 验证 / 添加基于 sherpa-onnx 的字幕引擎
|
- [ ] 验证 / 添加基于 sherpa-onnx 的字幕引擎
|
||||||
|
|
||||||
## 后续计划
|
## 后续计划
|
||||||
|
|||||||
@@ -11,20 +11,15 @@ files:
|
|||||||
- '!{.env,.env.*,.npmrc,pnpm-lock.yaml}'
|
- '!{.env,.env.*,.npmrc,pnpm-lock.yaml}'
|
||||||
- '!{tsconfig.json,tsconfig.node.json,tsconfig.web.json}'
|
- '!{tsconfig.json,tsconfig.node.json,tsconfig.web.json}'
|
||||||
- '!engine/*'
|
- '!engine/*'
|
||||||
- '!engine-test/*'
|
|
||||||
- '!docs/*'
|
- '!docs/*'
|
||||||
- '!assets/*'
|
- '!assets/*'
|
||||||
extraResources:
|
extraResources:
|
||||||
# For Windows
|
# For Windows
|
||||||
- from: ./engine/dist/main-gummy.exe
|
- from: ./engine/dist/main.exe
|
||||||
to: ./engine/main-gummy.exe
|
to: ./engine/main.exe
|
||||||
- from: ./engine/dist/main-vosk.exe
|
|
||||||
to: ./engine/main-vosk.exe
|
|
||||||
# For macOS and Linux
|
# For macOS and Linux
|
||||||
# - from: ./engine/dist/main-gummy
|
# - from: ./engine/dist/main
|
||||||
# to: ./engine/main-gummy
|
# to: ./engine/main
|
||||||
# - from: ./engine/dist/main-vosk
|
|
||||||
# to: ./engine/main-vosk
|
|
||||||
win:
|
win:
|
||||||
executableName: auto-caption
|
executableName: auto-caption
|
||||||
icon: build/icon.png
|
icon: build/icon.png
|
||||||
|
|||||||
@@ -1,2 +1,3 @@
|
|||||||
from dashscope.common.error import InvalidParameter
|
from dashscope.common.error import InvalidParameter
|
||||||
from .gummy import GummyTranslator
|
from .gummy import GummyRecognizer
|
||||||
|
from .vosk import VoskRecognizer
|
||||||
@@ -62,7 +62,7 @@ class Callback(TranslationRecognizerCallback):
|
|||||||
stdout_obj(caption)
|
stdout_obj(caption)
|
||||||
|
|
||||||
|
|
||||||
class GummyTranslator:
|
class GummyRecognizer:
|
||||||
"""
|
"""
|
||||||
使用 Gummy 引擎流式处理的音频数据,并在标准输出中输出与 Auto Caption 软件可读取的 JSON 字符串数据
|
使用 Gummy 引擎流式处理的音频数据,并在标准输出中输出与 Auto Caption 软件可读取的 JSON 字符串数据
|
||||||
|
|
||||||
@@ -70,6 +70,7 @@ class GummyTranslator:
|
|||||||
rate: 音频采样率
|
rate: 音频采样率
|
||||||
source: 源语言代码字符串(zh, en, ja 等)
|
source: 源语言代码字符串(zh, en, ja 等)
|
||||||
target: 目标语言代码字符串(zh, en, ja 等)
|
target: 目标语言代码字符串(zh, en, ja 等)
|
||||||
|
api_key: 阿里云百炼平台 API KEY
|
||||||
"""
|
"""
|
||||||
def __init__(self, rate: int, source: str, target: str | None, api_key: str | None):
|
def __init__(self, rate: int, source: str, target: str | None, api_key: str | None):
|
||||||
if api_key:
|
if api_key:
|
||||||
|
|||||||
@@ -2,7 +2,8 @@ import json
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from vosk import Model, KaldiRecognizer, SetLogLevel
|
from vosk import Model, KaldiRecognizer, SetLogLevel
|
||||||
from utils import stdout_obj
|
from utils import stdout_cmd, stdout_obj
|
||||||
|
|
||||||
|
|
||||||
class VoskRecognizer:
|
class VoskRecognizer:
|
||||||
"""
|
"""
|
||||||
@@ -11,7 +12,7 @@ class VoskRecognizer:
|
|||||||
初始化参数:
|
初始化参数:
|
||||||
model_path: Vosk 识别模型路径
|
model_path: Vosk 识别模型路径
|
||||||
"""
|
"""
|
||||||
def __int__(self, model_path: str):
|
def __init__(self, model_path: str):
|
||||||
SetLogLevel(-1)
|
SetLogLevel(-1)
|
||||||
if model_path.startswith('"'):
|
if model_path.startswith('"'):
|
||||||
model_path = model_path[1:]
|
model_path = model_path[1:]
|
||||||
@@ -24,7 +25,11 @@ class VoskRecognizer:
|
|||||||
|
|
||||||
self.model = Model(self.model_path)
|
self.model = Model(self.model_path)
|
||||||
self.recognizer = KaldiRecognizer(self.model, 16000)
|
self.recognizer = KaldiRecognizer(self.model, 16000)
|
||||||
|
|
||||||
|
def start(self):
|
||||||
|
"""启动 Vosk 引擎"""
|
||||||
|
stdout_cmd('info', 'Vosk recognizer started.')
|
||||||
|
|
||||||
def send_audio_frame(self, data: bytes):
|
def send_audio_frame(self, data: bytes):
|
||||||
"""
|
"""
|
||||||
发送音频帧给 Vosk 引擎,引擎将自动识别并将识别结果输出到标准输出中
|
发送音频帧给 Vosk 引擎,引擎将自动识别并将识别结果输出到标准输出中
|
||||||
@@ -57,3 +62,7 @@ class VoskRecognizer:
|
|||||||
self.prev_content = content
|
self.prev_content = content
|
||||||
|
|
||||||
stdout_obj(caption)
|
stdout_obj(caption)
|
||||||
|
|
||||||
|
def stop(self):
|
||||||
|
"""停止 Vosk 引擎"""
|
||||||
|
stdout_cmd('info', 'Vosk recognizer closed.')
|
||||||
@@ -1,49 +0,0 @@
|
|||||||
import sys
|
|
||||||
import argparse
|
|
||||||
from sysaudio import AudioStream
|
|
||||||
from utils import merge_chunk_channels
|
|
||||||
from audio2text import InvalidParameter, GummyTranslator
|
|
||||||
|
|
||||||
|
|
||||||
def convert_audio_to_text(s_lang, t_lang, audio_type, chunk_rate, api_key):
|
|
||||||
stream = AudioStream(audio_type, chunk_rate)
|
|
||||||
|
|
||||||
if t_lang == 'none':
|
|
||||||
gummy = GummyTranslator(stream.RATE, s_lang, None, api_key)
|
|
||||||
else:
|
|
||||||
gummy = GummyTranslator(stream.RATE, s_lang, t_lang, api_key)
|
|
||||||
|
|
||||||
stream.open_stream()
|
|
||||||
gummy.start()
|
|
||||||
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
chunk = stream.read_chunk()
|
|
||||||
if chunk is None: continue
|
|
||||||
chunk_mono = merge_chunk_channels(chunk, stream.CHANNELS)
|
|
||||||
try:
|
|
||||||
gummy.send_audio_frame(chunk_mono)
|
|
||||||
except InvalidParameter:
|
|
||||||
gummy.start()
|
|
||||||
gummy.send_audio_frame(chunk_mono)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
stream.close_stream()
|
|
||||||
gummy.stop()
|
|
||||||
break
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
parser = argparse.ArgumentParser(description='Convert system audio stream to text')
|
|
||||||
parser.add_argument('-s', '--source_language', default='en', help='Source language code')
|
|
||||||
parser.add_argument('-t', '--target_language', default='zh', help='Target language code')
|
|
||||||
parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output audio stream, 1 for input audio stream')
|
|
||||||
parser.add_argument('-c', '--chunk_rate', default=20, help='The number of audio stream chunks collected per second.')
|
|
||||||
parser.add_argument('-k', '--api_key', default='', help='API KEY for Gummy model')
|
|
||||||
args = parser.parse_args()
|
|
||||||
convert_audio_to_text(
|
|
||||||
args.source_language,
|
|
||||||
args.target_language,
|
|
||||||
int(args.audio_type),
|
|
||||||
int(args.chunk_rate),
|
|
||||||
args.api_key
|
|
||||||
)
|
|
||||||
@@ -1,39 +0,0 @@
|
|||||||
# -*- mode: python ; coding: utf-8 -*-
|
|
||||||
|
|
||||||
|
|
||||||
a = Analysis(
|
|
||||||
['main-gummy.py'],
|
|
||||||
pathex=[],
|
|
||||||
binaries=[],
|
|
||||||
datas=[],
|
|
||||||
hiddenimports=[],
|
|
||||||
hookspath=[],
|
|
||||||
hooksconfig={},
|
|
||||||
runtime_hooks=[],
|
|
||||||
excludes=[],
|
|
||||||
noarchive=False,
|
|
||||||
optimize=0,
|
|
||||||
)
|
|
||||||
pyz = PYZ(a.pure)
|
|
||||||
|
|
||||||
exe = EXE(
|
|
||||||
pyz,
|
|
||||||
a.scripts,
|
|
||||||
a.binaries,
|
|
||||||
a.datas,
|
|
||||||
[],
|
|
||||||
name='main-gummy',
|
|
||||||
debug=False,
|
|
||||||
bootloader_ignore_signals=False,
|
|
||||||
strip=False,
|
|
||||||
upx=True,
|
|
||||||
upx_exclude=[],
|
|
||||||
runtime_tmpdir=None,
|
|
||||||
console=True,
|
|
||||||
disable_windowed_traceback=False,
|
|
||||||
argv_emulation=False,
|
|
||||||
target_arch=None,
|
|
||||||
codesign_identity=None,
|
|
||||||
entitlements_file=None,
|
|
||||||
onefile=True,
|
|
||||||
)
|
|
||||||
@@ -1,77 +0,0 @@
|
|||||||
import sys
|
|
||||||
import json
|
|
||||||
import argparse
|
|
||||||
from datetime import datetime
|
|
||||||
import numpy.core.multiarray
|
|
||||||
|
|
||||||
from sysaudio import AudioStream
|
|
||||||
from vosk import Model, KaldiRecognizer, SetLogLevel
|
|
||||||
from utils import resample_chunk_mono
|
|
||||||
|
|
||||||
SetLogLevel(-1)
|
|
||||||
|
|
||||||
def convert_audio_to_text(audio_type, chunk_rate, model_path):
|
|
||||||
sys.stdout.reconfigure(line_buffering=True) # type: ignore
|
|
||||||
|
|
||||||
if model_path.startswith('"'):
|
|
||||||
model_path = model_path[1:]
|
|
||||||
if model_path.endswith('"'):
|
|
||||||
model_path = model_path[:-1]
|
|
||||||
|
|
||||||
model = Model(model_path)
|
|
||||||
recognizer = KaldiRecognizer(model, 16000)
|
|
||||||
|
|
||||||
stream = AudioStream(audio_type, chunk_rate)
|
|
||||||
stream.open_stream()
|
|
||||||
|
|
||||||
time_str = ''
|
|
||||||
cur_id = 0
|
|
||||||
prev_content = ''
|
|
||||||
|
|
||||||
while True:
|
|
||||||
chunk = stream.read_chunk()
|
|
||||||
if chunk is None: continue
|
|
||||||
chunk_mono = resample_chunk_mono(chunk, stream.CHANNELS, stream.RATE, 16000)
|
|
||||||
|
|
||||||
caption = {}
|
|
||||||
if recognizer.AcceptWaveform(chunk_mono):
|
|
||||||
content = json.loads(recognizer.Result()).get('text', '')
|
|
||||||
caption['index'] = cur_id
|
|
||||||
caption['text'] = content
|
|
||||||
caption['time_s'] = time_str
|
|
||||||
caption['time_t'] = datetime.now().strftime('%H:%M:%S.%f')[:-3]
|
|
||||||
caption['translation'] = ''
|
|
||||||
prev_content = ''
|
|
||||||
cur_id += 1
|
|
||||||
else:
|
|
||||||
content = json.loads(recognizer.PartialResult()).get('partial', '')
|
|
||||||
if content == '' or content == prev_content:
|
|
||||||
continue
|
|
||||||
if prev_content == '':
|
|
||||||
time_str = datetime.now().strftime('%H:%M:%S.%f')[:-3]
|
|
||||||
caption['command'] = 'caption'
|
|
||||||
caption['index'] = cur_id
|
|
||||||
caption['text'] = content
|
|
||||||
caption['time_s'] = time_str
|
|
||||||
caption['time_t'] = datetime.now().strftime('%H:%M:%S.%f')[:-3]
|
|
||||||
caption['translation'] = ''
|
|
||||||
prev_content = content
|
|
||||||
try:
|
|
||||||
json_str = json.dumps(caption) + '\n'
|
|
||||||
sys.stdout.write(json_str)
|
|
||||||
sys.stdout.flush()
|
|
||||||
except Exception as e:
|
|
||||||
print(e)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
parser = argparse.ArgumentParser(description='Convert system audio stream to text')
|
|
||||||
parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output audio stream, 1 for input audio stream')
|
|
||||||
parser.add_argument('-c', '--chunk_rate', default=20, help='The number of audio stream chunks collected per second.')
|
|
||||||
parser.add_argument('-m', '--model_path', default='', help='The path to the vosk model.')
|
|
||||||
args = parser.parse_args()
|
|
||||||
convert_audio_to_text(
|
|
||||||
int(args.audio_type),
|
|
||||||
int(args.chunk_rate),
|
|
||||||
args.model_path
|
|
||||||
)
|
|
||||||
@@ -1,10 +1,59 @@
|
|||||||
import argparse
|
import argparse
|
||||||
|
from utils import stdout_cmd
|
||||||
|
from utils import thread_data, start_server
|
||||||
|
from utils import merge_chunk_channels, resample_chunk_mono
|
||||||
|
from audio2text import InvalidParameter, GummyRecognizer
|
||||||
|
from audio2text import VoskRecognizer
|
||||||
|
from sysaudio import AudioStream
|
||||||
|
|
||||||
def gummy_engine(s, t, a, c, k):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def vosk_engine(a, c, m):
|
def main_gummy(s: str, t: str, a: int, c: int, k: str):
|
||||||
pass
|
stream = AudioStream(a, c)
|
||||||
|
if t == 'none':
|
||||||
|
engine = GummyRecognizer(stream.RATE, s, None, k)
|
||||||
|
else:
|
||||||
|
engine = GummyRecognizer(stream.RATE, s, t, k)
|
||||||
|
|
||||||
|
stream.open_stream()
|
||||||
|
engine.start()
|
||||||
|
|
||||||
|
while thread_data.status == "running":
|
||||||
|
try:
|
||||||
|
chunk = stream.read_chunk()
|
||||||
|
if chunk is None: continue
|
||||||
|
chunk_mono = merge_chunk_channels(chunk, stream.CHANNELS)
|
||||||
|
try:
|
||||||
|
engine.send_audio_frame(chunk_mono)
|
||||||
|
except InvalidParameter:
|
||||||
|
stdout_cmd('info', 'Gummy engine stopped, restart engine')
|
||||||
|
engine.start()
|
||||||
|
engine.send_audio_frame(chunk_mono)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
break
|
||||||
|
|
||||||
|
stream.close_stream()
|
||||||
|
engine.stop()
|
||||||
|
|
||||||
|
|
||||||
|
def main_vosk(a: int, c: int, m: str):
|
||||||
|
stream = AudioStream(a, c)
|
||||||
|
engine = VoskRecognizer(m)
|
||||||
|
|
||||||
|
stream.open_stream()
|
||||||
|
engine.start()
|
||||||
|
|
||||||
|
while thread_data.status == "running":
|
||||||
|
try:
|
||||||
|
chunk = stream.read_chunk()
|
||||||
|
if chunk is None: continue
|
||||||
|
chunk_mono = resample_chunk_mono(chunk, stream.CHANNELS, stream.RATE, 16000)
|
||||||
|
engine.send_audio_frame(chunk_mono)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
break
|
||||||
|
|
||||||
|
stream.close_stream()
|
||||||
|
engine.stop()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser(description='Convert system audio stream to text')
|
parser = argparse.ArgumentParser(description='Convert system audio stream to text')
|
||||||
@@ -12,15 +61,23 @@ if __name__ == "__main__":
|
|||||||
parser.add_argument('-e', '--caption_engine', default='gummy', help='Caption engine: gummy or vosk')
|
parser.add_argument('-e', '--caption_engine', default='gummy', help='Caption engine: gummy or vosk')
|
||||||
parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output, 1 for input')
|
parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output, 1 for input')
|
||||||
parser.add_argument('-c', '--chunk_rate', default=20, help='Number of audio stream chunks collected per second')
|
parser.add_argument('-c', '--chunk_rate', default=20, help='Number of audio stream chunks collected per second')
|
||||||
|
parser.add_argument('-p', '--port', default=7070, help='The port to run the server on, 0 for no server')
|
||||||
# gummy
|
# gummy
|
||||||
parser.add_argument('-s', '--source_language', default='en', help='Source language code')
|
parser.add_argument('-s', '--source_language', default='en', help='Source language code')
|
||||||
parser.add_argument('-t', '--target_language', default='zh', help='Target language code')
|
parser.add_argument('-t', '--target_language', default='zh', help='Target language code')
|
||||||
parser.add_argument('-k', '--api_key', default='', help='API KEY for Gummy model')
|
parser.add_argument('-k', '--api_key', default='', help='API KEY for Gummy model')
|
||||||
# vosk
|
# vosk
|
||||||
parser.add_argument('-m', '--model_path', default='', help='The path to the vosk model.')
|
parser.add_argument('-m', '--model_path', default='', help='The path to the vosk model.')
|
||||||
|
# for test
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if int(args.port) == 0:
|
||||||
|
thread_data.status = "running"
|
||||||
|
else:
|
||||||
|
start_server(int(args.port))
|
||||||
|
|
||||||
if args.caption_engine == 'gummy':
|
if args.caption_engine == 'gummy':
|
||||||
gummy_engine(
|
main_gummy(
|
||||||
args.source_language,
|
args.source_language,
|
||||||
args.target_language,
|
args.target_language,
|
||||||
int(args.audio_type),
|
int(args.audio_type),
|
||||||
@@ -28,7 +85,7 @@ if __name__ == "__main__":
|
|||||||
args.api_key
|
args.api_key
|
||||||
)
|
)
|
||||||
elif args.caption_engine == 'vosk':
|
elif args.caption_engine == 'vosk':
|
||||||
vosk_engine(
|
main_vosk(
|
||||||
int(args.audio_type),
|
int(args.audio_type),
|
||||||
int(args.chunk_rate),
|
int(args.chunk_rate),
|
||||||
args.model_path
|
args.model_path
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ else:
|
|||||||
vosk_path = str(Path('./subenv/lib/python3.12/site-packages/vosk').resolve())
|
vosk_path = str(Path('./subenv/lib/python3.12/site-packages/vosk').resolve())
|
||||||
|
|
||||||
a = Analysis(
|
a = Analysis(
|
||||||
['main-vosk.py'],
|
['main.py'],
|
||||||
pathex=[],
|
pathex=[],
|
||||||
binaries=[],
|
binaries=[],
|
||||||
datas=[(vosk_path, 'vosk')],
|
datas=[(vosk_path, 'vosk')],
|
||||||
@@ -30,7 +30,7 @@ exe = EXE(
|
|||||||
a.binaries,
|
a.binaries,
|
||||||
a.datas,
|
a.datas,
|
||||||
[],
|
[],
|
||||||
name='main-vosk',
|
name='main',
|
||||||
debug=False,
|
debug=False,
|
||||||
bootloader_ignore_signals=False,
|
bootloader_ignore_signals=False,
|
||||||
strip=False,
|
strip=False,
|
||||||
@@ -1,2 +1,4 @@
|
|||||||
from .process import merge_chunk_channels, resample_chunk_mono, resample_mono_chunk
|
from .audioprcs import merge_chunk_channels, resample_chunk_mono, resample_mono_chunk
|
||||||
from .sysout import stdout, stdout_cmd, stdout_obj, stderr
|
from .sysout import stdout, stdout_cmd, stdout_obj, stderr
|
||||||
|
from .thdata import thread_data
|
||||||
|
from .server import start_server
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
import samplerate
|
import samplerate
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import numpy.core.multiarray
|
||||||
|
|
||||||
def merge_chunk_channels(chunk: bytes, channels: int) -> bytes:
|
def merge_chunk_channels(chunk: bytes, channels: int) -> bytes:
|
||||||
"""
|
"""
|
||||||
@@ -13,6 +13,7 @@ def merge_chunk_channels(chunk: bytes, channels: int) -> bytes:
|
|||||||
Returns:
|
Returns:
|
||||||
单通道音频数据块
|
单通道音频数据块
|
||||||
"""
|
"""
|
||||||
|
if channels == 1: return chunk
|
||||||
# (length * channels,)
|
# (length * channels,)
|
||||||
chunk_np = np.frombuffer(chunk, dtype=np.int16)
|
chunk_np = np.frombuffer(chunk, dtype=np.int16)
|
||||||
# (length, channels)
|
# (length, channels)
|
||||||
@@ -37,13 +38,17 @@ def resample_chunk_mono(chunk: bytes, channels: int, orig_sr: int, target_sr: in
|
|||||||
Return:
|
Return:
|
||||||
单通道音频数据块
|
单通道音频数据块
|
||||||
"""
|
"""
|
||||||
# (length * channels,)
|
if channels == 1:
|
||||||
chunk_np = np.frombuffer(chunk, dtype=np.int16)
|
chunk_mono = chunk
|
||||||
# (length, channels)
|
else:
|
||||||
chunk_np = chunk_np.reshape(-1, channels)
|
# (length * channels,)
|
||||||
# (length,)
|
chunk_np = np.frombuffer(chunk, dtype=np.int16)
|
||||||
chunk_mono_f = np.mean(chunk_np.astype(np.float32), axis=1)
|
# (length, channels)
|
||||||
chunk_mono = chunk_mono_f.astype(np.int16)
|
chunk_np = chunk_np.reshape(-1, channels)
|
||||||
|
# (length,)
|
||||||
|
chunk_mono_f = np.mean(chunk_np.astype(np.float32), axis=1)
|
||||||
|
chunk_mono = chunk_mono_f.astype(np.int16)
|
||||||
|
|
||||||
ratio = target_sr / orig_sr
|
ratio = target_sr / orig_sr
|
||||||
chunk_mono_r = samplerate.resample(chunk_mono, ratio, converter_type=mode)
|
chunk_mono_r = samplerate.resample(chunk_mono, ratio, converter_type=mode)
|
||||||
chunk_mono_r = np.round(chunk_mono_r).astype(np.int16)
|
chunk_mono_r = np.round(chunk_mono_r).astype(np.int16)
|
||||||
37
engine/utils/server.py
Normal file
37
engine/utils/server.py
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
import socket
|
||||||
|
import threading
|
||||||
|
import json
|
||||||
|
from utils import thread_data, stdout_cmd, stderr
|
||||||
|
|
||||||
|
|
||||||
|
def handle_client(client_socket):
|
||||||
|
global thread_data
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
data = client_socket.recv(4096).decode('utf-8')
|
||||||
|
if not data:
|
||||||
|
break
|
||||||
|
data = json.loads(data)
|
||||||
|
|
||||||
|
if data['command'] == 'stop':
|
||||||
|
if thread_data.status == 'running':
|
||||||
|
thread_data.status = 'stop'
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
stderr(f'Communication error: {e}')
|
||||||
|
break
|
||||||
|
|
||||||
|
thread_data.status = 'stop'
|
||||||
|
client_socket.close()
|
||||||
|
|
||||||
|
|
||||||
|
def start_server(port: int):
|
||||||
|
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||||
|
server.bind(('localhost', port))
|
||||||
|
server.listen(1)
|
||||||
|
stdout_cmd('ready')
|
||||||
|
|
||||||
|
client, addr = server.accept()
|
||||||
|
client_handler = threading.Thread(target=handle_client, args=(client,))
|
||||||
|
client_handler.daemon = True
|
||||||
|
client_handler.start()
|
||||||
5
engine/utils/thdata.py
Normal file
5
engine/utils/thdata.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
class ThreadData:
|
||||||
|
def __init__(self):
|
||||||
|
self.status = "running"
|
||||||
|
|
||||||
|
thread_data = ThreadData()
|
||||||
@@ -1,7 +1,8 @@
|
|||||||
import { spawn, exec } from 'child_process'
|
import { spawn } from 'child_process'
|
||||||
import { app } from 'electron'
|
import { app } from 'electron'
|
||||||
import { is } from '@electron-toolkit/utils'
|
import { is } from '@electron-toolkit/utils'
|
||||||
import path from 'path'
|
import path from 'path'
|
||||||
|
import net from 'net'
|
||||||
import { controlWindow } from '../ControlWindow'
|
import { controlWindow } from '../ControlWindow'
|
||||||
import { allConfig } from './AllConfig'
|
import { allConfig } from './AllConfig'
|
||||||
import { i18n } from '../i18n'
|
import { i18n } from '../i18n'
|
||||||
@@ -11,91 +12,87 @@ export class CaptionEngine {
|
|||||||
appPath: string = ''
|
appPath: string = ''
|
||||||
command: string[] = []
|
command: string[] = []
|
||||||
process: any | undefined
|
process: any | undefined
|
||||||
processStatus: 'running' | 'stopping' | 'stopped' = 'stopped'
|
client: net.Socket | undefined
|
||||||
|
status: 'running' | 'stopping' | 'stopped' = 'stopped'
|
||||||
|
|
||||||
private getApp(): boolean {
|
private getApp(): boolean {
|
||||||
if (allConfig.controls.customized && allConfig.controls.customizedApp) {
|
if (allConfig.controls.customized) {
|
||||||
Log.info('Using customized engine')
|
Log.info('Using customized engine')
|
||||||
this.appPath = allConfig.controls.customizedApp
|
this.appPath = allConfig.controls.customizedApp
|
||||||
this.command = allConfig.controls.customizedCommand.split(' ')
|
this.command = allConfig.controls.customizedCommand.split(' ')
|
||||||
}
|
}
|
||||||
else if (allConfig.controls.engine === 'gummy') {
|
else {
|
||||||
allConfig.controls.customized = false
|
if(allConfig.controls.engine === 'gummy' &&
|
||||||
if(!allConfig.controls.API_KEY && !process.env.DASHSCOPE_API_KEY) {
|
!allConfig.controls.API_KEY && !process.env.DASHSCOPE_API_KEY
|
||||||
|
) {
|
||||||
controlWindow.sendErrorMessage(i18n('gummy.key.missing'))
|
controlWindow.sendErrorMessage(i18n('gummy.key.missing'))
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
let gummyName = 'main-gummy'
|
|
||||||
if (process.platform === 'win32') { gummyName += '.exe' }
|
|
||||||
this.command = []
|
this.command = []
|
||||||
if (is.dev) {
|
if (is.dev) {
|
||||||
this.appPath = path.join(
|
// this.appPath = path.join(
|
||||||
app.getAppPath(), 'engine',
|
// app.getAppPath(), 'engine',
|
||||||
'subenv', 'Scripts', 'python.exe'
|
// 'subenv', 'Scripts', 'python.exe'
|
||||||
)
|
// )
|
||||||
this.command.push(path.join(
|
// this.command.push(path.join(
|
||||||
app.getAppPath(), 'engine', 'main-gummy.py'
|
// app.getAppPath(), 'engine', 'main.py'
|
||||||
))
|
// ))
|
||||||
|
this.appPath = path.join(app.getAppPath(), 'engine', 'dist', 'main.exe')
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
this.appPath = path.join(
|
this.appPath = path.join(process.resourcesPath, 'engine', 'main.exe')
|
||||||
process.resourcesPath, 'engine', gummyName
|
}
|
||||||
|
|
||||||
|
if(allConfig.controls.engine === 'gummy') {
|
||||||
|
this.command.push('-e', 'gummy')
|
||||||
|
this.command.push('-s', allConfig.controls.sourceLang)
|
||||||
|
this.command.push(
|
||||||
|
'-t', allConfig.controls.translation ?
|
||||||
|
allConfig.controls.targetLang : 'none'
|
||||||
)
|
)
|
||||||
|
this.command.push('-a', allConfig.controls.audio ? '1' : '0')
|
||||||
|
if(allConfig.controls.API_KEY) {
|
||||||
|
this.command.push('-k', allConfig.controls.API_KEY)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
this.command.push('-s', allConfig.controls.sourceLang)
|
else if(allConfig.controls.engine === 'vosk'){
|
||||||
this.command.push(
|
this.command.push('-e', 'vosk')
|
||||||
'-t', allConfig.controls.translation ?
|
this.command.push('-a', allConfig.controls.audio ? '1' : '0')
|
||||||
allConfig.controls.targetLang : 'none'
|
this.command.push('-m', `"${allConfig.controls.modelPath}"`)
|
||||||
)
|
|
||||||
this.command.push('-a', allConfig.controls.audio ? '1' : '0')
|
|
||||||
if(allConfig.controls.API_KEY) {
|
|
||||||
this.command.push('-k', allConfig.controls.API_KEY)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if(allConfig.controls.engine === 'vosk'){
|
|
||||||
allConfig.controls.customized = false
|
|
||||||
let voskName = 'main-vosk'
|
|
||||||
if (process.platform === 'win32') { voskName += '.exe' }
|
|
||||||
this.command = []
|
|
||||||
if (is.dev) {
|
|
||||||
this.appPath = path.join(
|
|
||||||
app.getAppPath(), 'engine',
|
|
||||||
'subenv', 'Scripts', 'python.exe'
|
|
||||||
)
|
|
||||||
this.command.push(path.join(
|
|
||||||
app.getAppPath(), 'engine', 'main-vosk.py'
|
|
||||||
))
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
this.appPath = path.join(
|
|
||||||
process.resourcesPath, 'engine', voskName
|
|
||||||
)
|
|
||||||
}
|
|
||||||
this.command.push('-a', allConfig.controls.audio ? '1' : '0')
|
|
||||||
this.command.push('-m', `"${allConfig.controls.modelPath}"`)
|
|
||||||
}
|
|
||||||
Log.info('Engine Path:', this.appPath)
|
Log.info('Engine Path:', this.appPath)
|
||||||
Log.info('Engine Command:', this.command)
|
Log.info('Engine Command:', this.command)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public connect() {
|
||||||
|
if(this.client) { Log.warn('Client already exists, ignoring...') }
|
||||||
|
Log.info('Connecting to caption engine server...');
|
||||||
|
this.client = net.createConnection({ port: 7070 }, () => {
|
||||||
|
Log.info('Connected to caption engine server');
|
||||||
|
});
|
||||||
|
this.status = 'running'
|
||||||
|
}
|
||||||
|
|
||||||
|
public sendCommand(command: string, content: string = "") {
|
||||||
|
if(this.client === undefined) {
|
||||||
|
Log.error('Client not initialized yet')
|
||||||
|
return
|
||||||
|
}
|
||||||
|
const data = JSON.stringify({command, content})
|
||||||
|
this.client.write(data);
|
||||||
|
Log.info(`Send data to python server: ${data}`);
|
||||||
|
}
|
||||||
|
|
||||||
public start() {
|
public start() {
|
||||||
if (this.processStatus !== 'stopped') {
|
if (this.status !== 'stopped') {
|
||||||
Log.warn('Caption engine status is not stopped, cannot start')
|
Log.warn('Casption engine is not stopped, current status:', this.status)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if(!this.getApp()){ return }
|
if(!this.getApp()){ return }
|
||||||
|
|
||||||
try {
|
this.process = spawn(this.appPath, this.command)
|
||||||
this.process = spawn(this.appPath, this.command)
|
|
||||||
}
|
|
||||||
catch (e) {
|
|
||||||
controlWindow.sendErrorMessage(i18n('engine.start.error') + e)
|
|
||||||
Log.error('Error starting engine:', e)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
this.processStatus = 'running'
|
|
||||||
Log.info('Caption Engine Started, PID:', this.process.pid)
|
Log.info('Caption Engine Started, PID:', this.process.pid)
|
||||||
|
|
||||||
allConfig.controls.engineEnabled = true
|
allConfig.controls.engineEnabled = true
|
||||||
@@ -123,7 +120,7 @@ export class CaptionEngine {
|
|||||||
});
|
});
|
||||||
|
|
||||||
this.process.stderr.on('data', (data: any) => {
|
this.process.stderr.on('data', (data: any) => {
|
||||||
if(this.processStatus === 'stopping') return
|
if(this.status === 'stopping') return
|
||||||
controlWindow.sendErrorMessage(i18n('engine.error') + data)
|
controlWindow.sendErrorMessage(i18n('engine.error') + data)
|
||||||
Log.error(`Engine Error: ${data}`);
|
Log.error(`Engine Error: ${data}`);
|
||||||
});
|
});
|
||||||
@@ -135,54 +132,43 @@ export class CaptionEngine {
|
|||||||
allConfig.sendControls(controlWindow.window)
|
allConfig.sendControls(controlWindow.window)
|
||||||
controlWindow.window.webContents.send('control.engine.stopped')
|
controlWindow.window.webContents.send('control.engine.stopped')
|
||||||
}
|
}
|
||||||
this.processStatus = 'stopped'
|
this.status = 'stopped'
|
||||||
Log.info(`Engine exited with code ${code}`)
|
Log.info(`Engine exited with code ${code}`)
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
public stop() {
|
public stop() {
|
||||||
if(this.processStatus !== 'running') return
|
if(this.status !== 'running'){
|
||||||
if (this.process.pid) {
|
Log.warn('Engine is not running, current status:', this.status)
|
||||||
Log.info('Trying to stop process, PID:', this.process.pid)
|
|
||||||
let cmd = `kill ${this.process.pid}`;
|
|
||||||
if (process.platform === "win32") {
|
|
||||||
cmd = `taskkill /pid ${this.process.pid} /t /f`
|
|
||||||
}
|
|
||||||
exec(cmd, (error) => {
|
|
||||||
if (error) {
|
|
||||||
controlWindow.sendErrorMessage(i18n('engine.shutdown.error') + error)
|
|
||||||
Log.error(`Failed to kill process: ${error}`)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
this.process = undefined;
|
|
||||||
allConfig.controls.engineEnabled = false
|
|
||||||
if(controlWindow.window){
|
|
||||||
allConfig.sendControls(controlWindow.window)
|
|
||||||
controlWindow.window.webContents.send('control.engine.stopped')
|
|
||||||
}
|
|
||||||
this.processStatus = 'stopped'
|
|
||||||
Log.info('Process PID undefined, caption engine process stopped')
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
this.processStatus = 'stopping'
|
this.sendCommand('stop')
|
||||||
Log.info('Caption engine process stopping')
|
if(this.client){
|
||||||
|
this.client.destroy()
|
||||||
|
this.client = undefined
|
||||||
|
}
|
||||||
|
this.status = 'stopping'
|
||||||
|
Log.info('Caption engine process stopping...')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function handleEngineData(data: any) {
|
function handleEngineData(data: any) {
|
||||||
if(data.command === 'caption') {
|
if(data.command === 'ready'){
|
||||||
|
captionEngine.connect()
|
||||||
|
}
|
||||||
|
else if(data.command === 'caption') {
|
||||||
allConfig.updateCaptionLog(data);
|
allConfig.updateCaptionLog(data);
|
||||||
}
|
}
|
||||||
else if(data.command === 'print') {
|
else if(data.command === 'print') {
|
||||||
Log.info('Engine print:', data.content)
|
console.log(data.content)
|
||||||
|
// Log.info('Engine Print:', data.content)
|
||||||
}
|
}
|
||||||
else if(data.command === 'info') {
|
else if(data.command === 'info') {
|
||||||
Log.info('Engine info:', data.content)
|
Log.info('Engine Info:', data.content)
|
||||||
}
|
}
|
||||||
else if(data.command === 'usage') {
|
else if(data.command === 'usage') {
|
||||||
Log.info('Caption engine usage: ', data.content)
|
console.error(data.content)
|
||||||
|
// Log.info('Gummy Engine Usage: ', data.content)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -12,10 +12,10 @@ export class Log {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static warn(...msg: any[]){
|
static warn(...msg: any[]){
|
||||||
console.log(`[WARN ${getTimeString()}]`, ...msg)
|
console.warn(`[WARN ${getTimeString()}]`, ...msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
static error(...msg: any[]){
|
static error(...msg: any[]){
|
||||||
console.log(`[ERROR ${getTimeString()}]`, ...msg)
|
console.error(`[ERROR ${getTimeString()}]`, ...msg)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
<a-col :span="6">
|
<a-col :span="6">
|
||||||
<a-statistic
|
<a-statistic
|
||||||
:title="$t('status.engine')"
|
:title="$t('status.engine')"
|
||||||
:value="(customized && customizedApp)?$t('status.customized'):engine"
|
:value="customized?$t('status.customized'):engine"
|
||||||
/>
|
/>
|
||||||
</a-col>
|
</a-col>
|
||||||
<a-popover :title="$t('status.engineStatus')">
|
<a-popover :title="$t('status.engineStatus')">
|
||||||
@@ -130,7 +130,7 @@ const showAbout = ref(false)
|
|||||||
const captionLog = useCaptionLogStore()
|
const captionLog = useCaptionLogStore()
|
||||||
const { captionData } = storeToRefs(captionLog)
|
const { captionData } = storeToRefs(captionLog)
|
||||||
const engineControl = useEngineControlStore()
|
const engineControl = useEngineControlStore()
|
||||||
const { engineEnabled, engine, customized, customizedApp } = storeToRefs(engineControl)
|
const { engineEnabled, engine, customized } = storeToRefs(engineControl)
|
||||||
|
|
||||||
const pid = ref(0)
|
const pid = ref(0)
|
||||||
const ppid = ref(0)
|
const ppid = ref(0)
|
||||||
|
|||||||
@@ -82,7 +82,7 @@ export const useEngineControlStore = defineStore('engineControl', () => {
|
|||||||
notification.open({
|
notification.open({
|
||||||
message: t('noti.started'),
|
message: t('noti.started'),
|
||||||
description:
|
description:
|
||||||
((customized.value && customizedApp.value) ? str1 : str0) +
|
(customized.value ? str1 : str0) +
|
||||||
`${t('noti.pidInfo')}${args}`
|
`${t('noti.pidInfo')}${args}`
|
||||||
});
|
});
|
||||||
})
|
})
|
||||||
|
|||||||
Reference in New Issue
Block a user