mirror of
https://github.com/HiMeditator/auto-caption.git
synced 2026-02-13 19:23:26 +08:00
feat(translation): 添加非实时翻译功能用户界面组件
This commit is contained in:
@@ -4,7 +4,7 @@ import time
|
||||
from datetime import datetime
|
||||
|
||||
from vosk import Model, KaldiRecognizer, SetLogLevel
|
||||
from utils import stdout_cmd, stdout_obj, google_translate
|
||||
from utils import stdout_cmd, stdout_obj, google_translate, ollama_translate
|
||||
|
||||
|
||||
class VoskRecognizer:
|
||||
@@ -14,8 +14,10 @@ class VoskRecognizer:
|
||||
初始化参数:
|
||||
model_path: Vosk 识别模型路径
|
||||
target: 翻译目标语言
|
||||
trans_model: 翻译模型名称
|
||||
ollama_name: Ollama 模型名称
|
||||
"""
|
||||
def __init__(self, model_path: str, target: str | None):
|
||||
def __init__(self, model_path: str, target: str | None, trans_model: str, ollama_name: str):
|
||||
SetLogLevel(-1)
|
||||
if model_path.startswith('"'):
|
||||
model_path = model_path[1:]
|
||||
@@ -23,8 +25,12 @@ class VoskRecognizer:
|
||||
model_path = model_path[:-1]
|
||||
self.model_path = model_path
|
||||
self.target = target
|
||||
if trans_model == 'google':
|
||||
self.trans_func = google_translate
|
||||
else:
|
||||
self.trans_func = ollama_translate
|
||||
self.ollama_name = ollama_name
|
||||
self.time_str = ''
|
||||
self.trans_time = time.time()
|
||||
self.cur_id = 0
|
||||
self.prev_content = ''
|
||||
|
||||
@@ -58,8 +64,8 @@ class VoskRecognizer:
|
||||
if self.target:
|
||||
self.trans_time = time.time()
|
||||
th = threading.Thread(
|
||||
target=google_translate,
|
||||
args=(caption['text'], self.target, self.time_str)
|
||||
target=self.trans_func,
|
||||
args=(self.ollama_name, self.target, caption['text'], self.time_str)
|
||||
)
|
||||
th.start()
|
||||
else:
|
||||
@@ -75,13 +81,6 @@ class VoskRecognizer:
|
||||
self.prev_content = content
|
||||
|
||||
stdout_obj(caption)
|
||||
if self.target and time.time() - self.trans_time > 2.0:
|
||||
self.trans_time = time.time()
|
||||
th = threading.Thread(
|
||||
target=google_translate,
|
||||
args=(caption['text'], self.target, self.time_str)
|
||||
)
|
||||
th.start()
|
||||
|
||||
def stop(self):
|
||||
"""停止 Vosk 引擎"""
|
||||
|
||||
@@ -44,10 +44,13 @@ def main_gummy(s: str, t: str, a: int, c: int, k: str):
|
||||
engine.stop()
|
||||
|
||||
|
||||
def main_vosk(a: int, c: int, m: str, t: str):
|
||||
def main_vosk(a: int, c: int, m: str, t: str, tm: str, on: str):
|
||||
global thread_data
|
||||
stream = AudioStream(a, c)
|
||||
engine = VoskRecognizer(m, None if t == 'none' else t)
|
||||
engine = VoskRecognizer(
|
||||
m, None if t == 'none' else t,
|
||||
tm, on
|
||||
)
|
||||
|
||||
stream.open_stream()
|
||||
engine.start()
|
||||
@@ -78,6 +81,8 @@ if __name__ == "__main__":
|
||||
parser.add_argument('-k', '--api_key', default='', help='API KEY for Gummy model')
|
||||
# vosk only
|
||||
parser.add_argument('-m', '--model_path', default='', help='The path to the vosk model.')
|
||||
parser.add_argument('-tm', '--translation_model', default='', help='Google translate API KEY')
|
||||
parser.add_argument('-on', '--ollama_name', default='', help='Ollama model name for translation')
|
||||
|
||||
args = parser.parse_args()
|
||||
if int(args.port) == 0:
|
||||
@@ -98,7 +103,9 @@ if __name__ == "__main__":
|
||||
int(args.audio_type),
|
||||
int(args.chunk_rate),
|
||||
args.model_path,
|
||||
args.target_language
|
||||
args.target_language,
|
||||
args.translation_model,
|
||||
args.ollama_name
|
||||
)
|
||||
else:
|
||||
raise ValueError('Invalid caption engine specified.')
|
||||
|
||||
@@ -2,7 +2,7 @@ from ollama import chat
|
||||
from ollama import ChatResponse
|
||||
import asyncio
|
||||
from googletrans import Translator
|
||||
from .sysout import stdout, stdout_obj
|
||||
from .sysout import stdout_cmd, stdout_obj
|
||||
|
||||
lang_map = {
|
||||
'en': 'English',
|
||||
@@ -13,38 +13,29 @@ lang_map = {
|
||||
'ru': 'Russian',
|
||||
'ja': 'Japanese',
|
||||
'ko': 'Korean',
|
||||
'zh': 'Chinese'
|
||||
'zh-cn': 'Chinese'
|
||||
}
|
||||
|
||||
def ollama_translate(model: str, target: str, text: str, chunk_size = 3):
|
||||
stream = chat(
|
||||
def ollama_translate(model: str, target: str, text: str, time_s: str):
|
||||
response: ChatResponse = chat(
|
||||
model=model,
|
||||
messages=[
|
||||
{"role": "system", "content": f"/no_think Translate the following content into {lang_map[target]}, and do not output any additional information."},
|
||||
{"role": "user", "content": text}
|
||||
],
|
||||
stream=True
|
||||
]
|
||||
)
|
||||
chunk_content = ""
|
||||
in_thinking = False
|
||||
count = 0
|
||||
for chunk in stream:
|
||||
if count == 0 and chunk['message']['content'].startswith("<think>"):
|
||||
in_thinking = True
|
||||
if in_thinking:
|
||||
if "</think>" in chunk['message']['content']:
|
||||
in_thinking = False
|
||||
continue
|
||||
chunk_content += ' '.join(chunk['message']['content'].split('\n'))
|
||||
count += 1
|
||||
if count % chunk_size == 0:
|
||||
print(chunk_content, end='')
|
||||
chunk_content = ""
|
||||
count = 0
|
||||
if chunk_content:
|
||||
print(chunk_content)
|
||||
content = response.message.content or ""
|
||||
if content.startswith('<think>'):
|
||||
index = content.find('</think>')
|
||||
if index != -1:
|
||||
content = content[index+8:]
|
||||
stdout_obj({
|
||||
"command": "translation",
|
||||
"time_s": time_s,
|
||||
"translation": content.strip()
|
||||
})
|
||||
|
||||
def google_translate(text: str, target: str, time_s: str):
|
||||
def google_translate(model: str, target: str, text: str, time_s: str):
|
||||
translator = Translator()
|
||||
try:
|
||||
res = asyncio.run(translator.translate(text, dest=target))
|
||||
@@ -54,4 +45,4 @@ def google_translate(text: str, target: str, time_s: str):
|
||||
"translation": res.text
|
||||
})
|
||||
except Exception as e:
|
||||
stdout(f"Google Translation Request failed: {str(e)}")
|
||||
stdout_cmd("warn", f"Google translation request failed, please check your network connection...")
|
||||
|
||||
Reference in New Issue
Block a user