mirror of
https://github.com/HiMeditator/auto-caption.git
synced 2026-02-04 04:06:09 +08:00
- 添加 Ollama 大模型翻译和 Google 翻译(非实时),支持多种语言 - 为 Vosk 引擎添加非实时翻译 - 为新增的翻译功能添加和修改接口 - 修改 Electron 构建配置,之后不同平台构建无需修改构建文件
88 lines
3.1 KiB
Python
88 lines
3.1 KiB
Python
import json
|
|
import threading
|
|
import time
|
|
from datetime import datetime
|
|
|
|
from vosk import Model, KaldiRecognizer, SetLogLevel
|
|
from utils import stdout_cmd, stdout_obj, google_translate
|
|
|
|
|
|
class VoskRecognizer:
|
|
"""
|
|
使用 Vosk 引擎流式处理的音频数据,并在标准输出中输出与 Auto Caption 软件可读取的 JSON 字符串数据
|
|
|
|
初始化参数:
|
|
model_path: Vosk 识别模型路径
|
|
target: 翻译目标语言
|
|
"""
|
|
def __init__(self, model_path: str, target: str | None):
|
|
SetLogLevel(-1)
|
|
if model_path.startswith('"'):
|
|
model_path = model_path[1:]
|
|
if model_path.endswith('"'):
|
|
model_path = model_path[:-1]
|
|
self.model_path = model_path
|
|
self.target = target
|
|
self.time_str = ''
|
|
self.trans_time = time.time()
|
|
self.cur_id = 0
|
|
self.prev_content = ''
|
|
|
|
self.model = Model(self.model_path)
|
|
self.recognizer = KaldiRecognizer(self.model, 16000)
|
|
|
|
def start(self):
|
|
"""启动 Vosk 引擎"""
|
|
stdout_cmd('info', 'Vosk recognizer started.')
|
|
|
|
def send_audio_frame(self, data: bytes):
|
|
"""
|
|
发送音频帧给 Vosk 引擎,引擎将自动识别并将识别结果输出到标准输出中
|
|
|
|
Args:
|
|
data: 音频帧数据,采样率必须为 16000Hz
|
|
"""
|
|
caption = {}
|
|
caption['command'] = 'caption'
|
|
caption['translation'] = ''
|
|
|
|
if self.recognizer.AcceptWaveform(data):
|
|
content = json.loads(self.recognizer.Result()).get('text', '')
|
|
caption['index'] = self.cur_id
|
|
caption['text'] = content
|
|
caption['time_s'] = self.time_str
|
|
caption['time_t'] = datetime.now().strftime('%H:%M:%S.%f')[:-3]
|
|
self.prev_content = ''
|
|
if content == '': return
|
|
self.cur_id += 1
|
|
if self.target:
|
|
self.trans_time = time.time()
|
|
th = threading.Thread(
|
|
target=google_translate,
|
|
args=(caption['text'], self.target, self.time_str)
|
|
)
|
|
th.start()
|
|
else:
|
|
content = json.loads(self.recognizer.PartialResult()).get('partial', '')
|
|
if content == '' or content == self.prev_content:
|
|
return
|
|
if self.prev_content == '':
|
|
self.time_str = datetime.now().strftime('%H:%M:%S.%f')[:-3]
|
|
caption['index'] = self.cur_id
|
|
caption['text'] = content
|
|
caption['time_s'] = self.time_str
|
|
caption['time_t'] = datetime.now().strftime('%H:%M:%S.%f')[:-3]
|
|
self.prev_content = content
|
|
|
|
stdout_obj(caption)
|
|
if self.target and time.time() - self.trans_time > 2.0:
|
|
self.trans_time = time.time()
|
|
th = threading.Thread(
|
|
target=google_translate,
|
|
args=(caption['text'], self.target, self.time_str)
|
|
)
|
|
th.start()
|
|
|
|
def stop(self):
|
|
"""停止 Vosk 引擎"""
|
|
stdout_cmd('info', 'Vosk recognizer closed.') |