feat(translation): 添加非实时翻译功能用户界面组件

This commit is contained in:
himeditator
2025-09-04 23:41:22 +08:00
parent 14987cbfc5
commit 2b7ce06f04
15 changed files with 193 additions and 77 deletions

View File

@@ -153,4 +153,18 @@
### 优化体验
- 优化软件用户界面的部分组件
- 更清晰的日志输出
- 更清晰的日志输出
## v0.8.0
2025-09-??
### 新增功能
- 字幕引擎添加超时关闭功能:如果在规定时间字幕引擎没有启动成功会自动关闭、在字幕引擎启动过程中也可选择关闭字幕引擎
- 添加非实时翻译功能:支持调用 Ollama 本地模型进行翻译、支持调用 Google 翻译 API 进行翻译
### 优化体验
- 带有额外信息的标签颜色改为与主题色一致

View File

@@ -92,6 +92,17 @@ Python 端监听到的音频流转换为的字幕数据。
Python 端打印的提示信息,会计入日志。
### `warn`
```js
{
command: "warn",
content: string
}
```
Python 端打印的警告信息,会计入日志。
### `error`
```js
@@ -101,7 +112,7 @@ Python 端打印的提示信息,会计入日志。
}
```
Python 端打印的错误信息,该错误信息需要在前端弹窗显示。
Python 端打印的错误信息,该错误信息在前端弹窗显示。
### `usage`

View File

@@ -4,7 +4,7 @@ import time
from datetime import datetime
from vosk import Model, KaldiRecognizer, SetLogLevel
from utils import stdout_cmd, stdout_obj, google_translate
from utils import stdout_cmd, stdout_obj, google_translate, ollama_translate
class VoskRecognizer:
@@ -14,8 +14,10 @@ class VoskRecognizer:
初始化参数:
model_path: Vosk 识别模型路径
target: 翻译目标语言
trans_model: 翻译模型名称
ollama_name: Ollama 模型名称
"""
def __init__(self, model_path: str, target: str | None):
def __init__(self, model_path: str, target: str | None, trans_model: str, ollama_name: str):
SetLogLevel(-1)
if model_path.startswith('"'):
model_path = model_path[1:]
@@ -23,8 +25,12 @@ class VoskRecognizer:
model_path = model_path[:-1]
self.model_path = model_path
self.target = target
if trans_model == 'google':
self.trans_func = google_translate
else:
self.trans_func = ollama_translate
self.ollama_name = ollama_name
self.time_str = ''
self.trans_time = time.time()
self.cur_id = 0
self.prev_content = ''
@@ -58,8 +64,8 @@ class VoskRecognizer:
if self.target:
self.trans_time = time.time()
th = threading.Thread(
target=google_translate,
args=(caption['text'], self.target, self.time_str)
target=self.trans_func,
args=(self.ollama_name, self.target, caption['text'], self.time_str)
)
th.start()
else:
@@ -75,13 +81,6 @@ class VoskRecognizer:
self.prev_content = content
stdout_obj(caption)
if self.target and time.time() - self.trans_time > 2.0:
self.trans_time = time.time()
th = threading.Thread(
target=google_translate,
args=(caption['text'], self.target, self.time_str)
)
th.start()
def stop(self):
"""停止 Vosk 引擎"""

View File

@@ -44,10 +44,13 @@ def main_gummy(s: str, t: str, a: int, c: int, k: str):
engine.stop()
def main_vosk(a: int, c: int, m: str, t: str):
def main_vosk(a: int, c: int, m: str, t: str, tm: str, on: str):
global thread_data
stream = AudioStream(a, c)
engine = VoskRecognizer(m, None if t == 'none' else t)
engine = VoskRecognizer(
m, None if t == 'none' else t,
tm, on
)
stream.open_stream()
engine.start()
@@ -78,6 +81,8 @@ if __name__ == "__main__":
parser.add_argument('-k', '--api_key', default='', help='API KEY for Gummy model')
# vosk only
parser.add_argument('-m', '--model_path', default='', help='The path to the vosk model.')
parser.add_argument('-tm', '--translation_model', default='', help='Google translate API KEY')
parser.add_argument('-on', '--ollama_name', default='', help='Ollama model name for translation')
args = parser.parse_args()
if int(args.port) == 0:
@@ -98,7 +103,9 @@ if __name__ == "__main__":
int(args.audio_type),
int(args.chunk_rate),
args.model_path,
args.target_language
args.target_language,
args.translation_model,
args.ollama_name
)
else:
raise ValueError('Invalid caption engine specified.')

View File

@@ -2,7 +2,7 @@ from ollama import chat
from ollama import ChatResponse
import asyncio
from googletrans import Translator
from .sysout import stdout, stdout_obj
from .sysout import stdout_cmd, stdout_obj
lang_map = {
'en': 'English',
@@ -13,38 +13,29 @@ lang_map = {
'ru': 'Russian',
'ja': 'Japanese',
'ko': 'Korean',
'zh': 'Chinese'
'zh-cn': 'Chinese'
}
def ollama_translate(model: str, target: str, text: str, chunk_size = 3):
stream = chat(
def ollama_translate(model: str, target: str, text: str, time_s: str):
response: ChatResponse = chat(
model=model,
messages=[
{"role": "system", "content": f"/no_think Translate the following content into {lang_map[target]}, and do not output any additional information."},
{"role": "user", "content": text}
],
stream=True
]
)
chunk_content = ""
in_thinking = False
count = 0
for chunk in stream:
if count == 0 and chunk['message']['content'].startswith("<think>"):
in_thinking = True
if in_thinking:
if "</think>" in chunk['message']['content']:
in_thinking = False
continue
chunk_content += ' '.join(chunk['message']['content'].split('\n'))
count += 1
if count % chunk_size == 0:
print(chunk_content, end='')
chunk_content = ""
count = 0
if chunk_content:
print(chunk_content)
content = response.message.content or ""
if content.startswith('<think>'):
index = content.find('</think>')
if index != -1:
content = content[index+8:]
stdout_obj({
"command": "translation",
"time_s": time_s,
"translation": content.strip()
})
def google_translate(text: str, target: str, time_s: str):
def google_translate(model: str, target: str, text: str, time_s: str):
translator = Translator()
try:
res = asyncio.run(translator.translate(text, dest=target))
@@ -54,4 +45,4 @@ def google_translate(text: str, target: str, time_s: str):
"translation": res.text
})
except Exception as e:
stdout(f"Google Translation Request failed: {str(e)}")
stdout_cmd("warn", f"Google translation request failed, please check your network connection...")

View File

@@ -6,6 +6,8 @@ export interface Controls {
engineEnabled: boolean,
sourceLang: string,
targetLang: string,
transModel: string,
ollamaName: string,
engine: string,
audio: 0 | 1,
translation: boolean,
@@ -46,11 +48,6 @@ export interface CaptionItem {
translation: string
}
export interface CaptionTranslation {
time_s: string,
translation: string
}
export interface SoftwareLogItem {
type: "INFO" | "WARN" | "ERROR",
index: number,

View File

@@ -1,13 +1,17 @@
import {
UILanguage, UITheme, Styles, Controls,
CaptionItem, CaptionTranslation,
FullConfig, SoftwareLogItem
CaptionItem, FullConfig, SoftwareLogItem
} from '../types'
import { Log } from './Log'
import { app, BrowserWindow } from 'electron'
import * as path from 'path'
import * as fs from 'fs'
interface CaptionTranslation {
time_s: string,
translation: string
}
const defaultStyles: Styles = {
lineBreak: 1,
fontFamily: 'sans-serif',
@@ -32,6 +36,8 @@ const defaultStyles: Styles = {
const defaultControls: Controls = {
sourceLang: 'en',
targetLang: 'zh',
transModel: 'ollama',
ollamaName: '',
engine: 'gummy',
audio: 0,
engineEnabled: false,

View File

@@ -81,7 +81,9 @@ export class CaptionEngine {
}
else if(allConfig.controls.engine === 'vosk'){
this.command.push('-e', 'vosk')
this.command.push('-m', `"${allConfig.controls.modelPath}"`)
this.command.push('-m', `"${allConfig.controls.modelPath}"`)
this.command.push('-tm', allConfig.controls.transModel)
this.command.push('-on', allConfig.controls.ollamaName)
}
}
Log.info('Engine Path:', this.appPath)
@@ -257,6 +259,9 @@ function handleEngineData(data: any) {
else if(data.command === 'info') {
Log.info('Engine Info:', data.content)
}
else if(data.command === 'warn') {
Log.warn('Engine Warn:', data.content)
}
else if(data.command === 'error') {
Log.error('Engine Error:', data.content)
controlWindow.sendErrorMessage(/*i18n('engine.error') +*/ data.content)

View File

@@ -5,6 +5,14 @@
<a @click="applyChange">{{ $t('engine.applyChange') }}</a> |
<a @click="cancelChange">{{ $t('engine.cancelChange') }}</a>
</template>
<div class="input-item">
<span class="input-label">{{ $t('engine.captionEngine') }}</span>
<a-select
class="input-area"
v-model:value="currentEngine"
:options="captionEngine"
></a-select>
</div>
<div class="input-item">
<span class="input-label">{{ $t('engine.sourceLang') }}</span>
<a-select
@@ -22,14 +30,28 @@
:options="langList.filter((item) => item.value !== 'auto')"
></a-select>
</div>
<div class="input-item">
<span class="input-label">{{ $t('engine.captionEngine') }}</span>
<div class="input-item" v-if="transModel">
<span class="input-label">{{ $t('engine.transModel') }}</span>
<a-select
class="input-area"
v-model:value="currentEngine"
:options="captionEngine"
v-model:value="currentTransModel"
:options="transModel"
></a-select>
</div>
<div class="input-item" v-if="transModel && currentTransModel === 'ollama'">
<a-popover placement="right">
<template #content>
<p class="label-hover-info">{{ $t('engine.ollamaNote') }}</p>
</template>
<span class="input-label info-label"
:style="{color: uiColor}"
>{{ $t('engine.ollama') }}</span>
</a-popover>
<a-input
class="input-area"
v-model:value="currentOllamaName"
></a-input>
</div>
<div class="input-item">
<span class="input-label">{{ $t('engine.audioType') }}</span>
<a-select
@@ -80,11 +102,13 @@
<a-card size="small" :title="$t('engine.showMore')" v-show="showMore" style="margin-top:10px;">
<div class="input-item">
<a-popover>
<a-popover placement="right">
<template #content>
<p class="label-hover-info">{{ $t('engine.apikeyInfo') }}</p>
</template>
<span class="input-label info-label">{{ $t('engine.apikey') }}</span>
<span class="input-label info-label"
:style="{color: uiColor}"
>{{ $t('engine.apikey') }}</span>
</a-popover>
<a-input
class="input-area"
@@ -93,14 +117,17 @@
/>
</div>
<div class="input-item">
<a-popover>
<a-popover placement="right">
<template #content>
<p class="label-hover-info">{{ $t('engine.modelPathInfo') }}</p>
</template>
<span class="input-label info-label">{{ $t('engine.modelPath') }}</span>
<span class="input-label info-label"
:style="{color: uiColor}"
>{{ $t('engine.modelPath') }}</span>
</a-popover>
<span
class="input-folder"
:style="{color: uiColor}"
@click="selectFolderPath"
><span><FolderOpenOutlined /></span></span>
<a-input
@@ -110,13 +137,13 @@
/>
</div>
<div class="input-item">
<a-popover>
<a-popover placement="right">
<template #content>
<p class="label-hover-info">{{ $t('engine.startTimeoutInfo') }}</p>
</template>
<span
class="input-label info-label"
style="vertical-align: middle;"
:style="{color: uiColor, verticalAlign: 'middle'}"
>{{ $t('engine.startTimeout') }}</span>
</a-popover>
<a-input-number
@@ -134,12 +161,12 @@
</template>
<script setup lang="ts">
import { ref, computed, watch } from 'vue'
import { ref, computed, watch, h } from 'vue'
import { storeToRefs } from 'pinia'
import { useGeneralSettingStore } from '@renderer/stores/generalSetting'
import { useEngineControlStore } from '@renderer/stores/engineControl'
import { notification } from 'ant-design-vue'
import { FolderOpenOutlined ,InfoCircleOutlined } from '@ant-design/icons-vue';
import { ExclamationCircleOutlined, FolderOpenOutlined ,InfoCircleOutlined } from '@ant-design/icons-vue';
import { useI18n } from 'vue-i18n'
const { t } = useI18n()
@@ -148,11 +175,16 @@ const showMore = ref(false)
const engineControl = useEngineControlStore()
const { captionEngine, audioType, changeSignal } = storeToRefs(engineControl)
const generalSetting = useGeneralSettingStore()
const { uiColor } = storeToRefs(generalSetting)
const currentSourceLang = ref('auto')
const currentTargetLang = ref('zh')
const currentEngine = ref<string>('gummy')
const currentAudio = ref<0 | 1>(0)
const currentTranslation = ref<boolean>(false)
const currentTranslation = ref<boolean>(true)
const currentTransModel = ref('ollama')
const currentOllamaName = ref('')
const currentAPI_KEY = ref<string>('')
const currentModelPath = ref<string>('')
const currentCustomized = ref<boolean>(false)
@@ -169,9 +201,33 @@ const langList = computed(() => {
return []
})
const transModel = computed(() => {
for(let item of captionEngine.value){
if(item.value === currentEngine.value) {
return item.transModel
}
}
return []
})
function applyChange(){
if(
currentTranslation.value && transModel.value &&
currentTransModel.value === 'ollama' && !currentOllamaName.value.trim()
) {
notification.open({
message: t('noti.ollamaNameNull'),
description: t('noti.ollamaNameNullNote'),
duration: null,
icon: () => h(ExclamationCircleOutlined, { style: 'color: #ff4d4f' })
})
return
}
engineControl.sourceLang = currentSourceLang.value
engineControl.targetLang = currentTargetLang.value
engineControl.transModel = currentTransModel.value
engineControl.ollamaName = currentOllamaName.value
engineControl.engine = currentEngine.value
engineControl.audio = currentAudio.value
engineControl.translation = currentTranslation.value
@@ -194,6 +250,8 @@ function applyChange(){
function cancelChange(){
currentSourceLang.value = engineControl.sourceLang
currentTargetLang.value = engineControl.targetLang
currentTransModel.value = engineControl.transModel
currentOllamaName.value = engineControl.ollamaName
currentEngine.value = engineControl.engine
currentAudio.value = engineControl.audio
currentTranslation.value = engineControl.translation
@@ -243,8 +301,8 @@ watch(currentEngine, (val) => {
}
.info-label {
color: #1677ff;
cursor: pointer;
font-style: italic;
}
.input-folder {
@@ -255,20 +313,12 @@ watch(currentEngine, (val) => {
transition: all 0.25s;
}
.input-folder>span {
padding: 0 2px;
border: 2px solid #1677ff;
color: #1677ff;
border-radius: 30%;
}
.input-folder:hover {
transform: scale(1.1);
}
.customize-note {
padding: 10px 10px 0;
color: red;
max-width: min(40vw, 480px);
}
</style>

View File

@@ -30,6 +30,10 @@ export const engines = {
{ value: 'ru', label: '俄语' },
{ value: 'es', label: '西班牙语' },
{ value: 'it', label: '意大利语' },
],
transModel: [
{ value: 'ollama', label: 'Ollama 本地模型' },
{ value: 'google', label: 'Google API 调用' },
]
}
],
@@ -64,6 +68,10 @@ export const engines = {
{ value: 'ru', label: 'Russian' },
{ value: 'es', label: 'Spanish' },
{ value: 'it', label: 'Italian' },
],
transModel: [
{ value: 'ollama', label: 'Ollama Local Model' },
{ value: 'google', label: 'Google API Call' },
]
}
],
@@ -98,8 +106,11 @@ export const engines = {
{ value: 'ru', label: 'ロシア語' },
{ value: 'es', label: 'スペイン語' },
{ value: 'it', label: 'イタリア語' },
],
transModel: [
{ value: 'ollama', label: 'Ollama ローカルモデル' },
{ value: 'google', label: 'Google API 呼び出し' },
]
}
]
}

View File

@@ -28,7 +28,9 @@ export default {
"changeInfo": "If the caption engine is already running, you need to restart it for the changes to take effect.",
"styleChange": "Caption Style Changed",
"styleInfo": "Caption style changes have been saved and applied.",
"engineStartTimeout": "Caption engine startup timeout, automatically force stopped"
"engineStartTimeout": "Caption engine startup timeout, automatically force stopped",
"ollamaNameNull": "'Ollama' Field is Empty",
"ollamaNameNullNote": "When selecting Ollama model as the translation model, the 'Ollama' field cannot be empty and must be filled with the name of a locally configured Ollama model."
},
general: {
"title": "General Settings",
@@ -47,6 +49,9 @@ export default {
"cancelChange": "Cancel Changes",
"sourceLang": "Source",
"transLang": "Translation",
"transModel": "Model",
"ollama": "Ollama",
"ollamaNote": "To use for translation, the name of the local Ollama model that will call the service on the default port. It is recommended to use a non-inference model with less than 1B parameters.",
"captionEngine": "Engine",
"audioType": "Audio Type",
"systemOutput": "System Audio Output (Speaker)",

View File

@@ -28,7 +28,9 @@ export default {
"changeInfo": "字幕エンジンがすでに起動している場合、変更を有効にするには再起動が必要です。",
"styleChange": "字幕のスタイルが変更されました",
"styleInfo": "字幕のスタイル変更が保存され、適用されました",
"engineStartTimeout": "字幕エンジンの起動がタイムアウトしました。自動的に強制停止しました"
"engineStartTimeout": "字幕エンジンの起動がタイムアウトしました。自動的に強制停止しました",
"ollamaNameNull": "Ollama フィールドが空です",
"ollamaNameNullNote": "Ollama モデルを翻訳モデルとして選択する場合、Ollama フィールドは空にできません。ローカルで設定された Ollama モデルの名前を入力してください。"
},
general: {
"title": "一般設定",
@@ -47,6 +49,9 @@ export default {
"cancelChange": "変更をキャンセル",
"sourceLang": "ソース言語",
"transLang": "翻訳言語",
"transModel": "翻訳モデル",
"ollama": "Ollama",
"ollamaNote": "翻訳に使用する、デフォルトポートでサービスを呼び出すローカルOllamaモデルの名前。1B 未満のパラメータを持つ非推論モデルの使用を推奨します。",
"captionEngine": "エンジン",
"audioType": "オーディオ",
"systemOutput": "システムオーディオ出力(スピーカー)",

View File

@@ -28,7 +28,9 @@ export default {
"changeInfo": "如果字幕引擎已经启动,需要重启字幕引擎修改才会生效",
"styleChange": "字幕样式已修改",
"styleInfo": "字幕样式修改已经保存并生效",
"engineStartTimeout": "字幕引擎启动超时,已自动强制停止"
"engineStartTimeout": "字幕引擎启动超时,已自动强制停止",
"ollamaNameNull": "Ollama 字段为空",
"ollamaNameNullNote": "选择 Ollama 模型作为翻译模型时Ollama 字段不能为空,需要填写本地已经配置好的 Ollama 模型的名称。"
},
general: {
"title": "通用设置",
@@ -47,6 +49,9 @@ export default {
"cancelChange": "取消更改",
"sourceLang": "源语言",
"transLang": "翻译语言",
"transModel": "翻译模型",
"ollama": "Ollama",
"ollamaNote": "要使用的进行翻译的本地 Ollama 模型的名称,将调用默认端口的服务,建议使用参数量小于 1B 的非推理模型。",
"captionEngine": "字幕引擎",
"audioType": "音频类型",
"systemOutput": "系统音频输出(扬声器)",

View File

@@ -19,6 +19,8 @@ export const useEngineControlStore = defineStore('engineControl', () => {
const engineEnabled = ref(false)
const sourceLang = ref<string>('en')
const targetLang = ref<string>('zh')
const transModel = ref<string>('ollama')
const ollamaName = ref<string>('')
const engine = ref<string>('gummy')
const audio = ref<0 | 1>(0)
const translation = ref<boolean>(true)
@@ -37,6 +39,8 @@ export const useEngineControlStore = defineStore('engineControl', () => {
engineEnabled: engineEnabled.value,
sourceLang: sourceLang.value,
targetLang: targetLang.value,
transModel: transModel.value,
ollamaName: ollamaName.value,
engine: engine.value,
audio: audio.value,
translation: translation.value,
@@ -68,6 +72,8 @@ export const useEngineControlStore = defineStore('engineControl', () => {
}
sourceLang.value = controls.sourceLang
targetLang.value = controls.targetLang
transModel.value = controls.transModel
ollamaName.value = controls.ollamaName
engine.value = controls.engine
audio.value = controls.audio
engineEnabled.value = controls.engineEnabled
@@ -132,6 +138,8 @@ export const useEngineControlStore = defineStore('engineControl', () => {
engineEnabled, // 字幕引擎是否启用
sourceLang, // 源语言
targetLang, // 目标语言
transModel, // 翻译模型
ollamaName, // Ollama 模型
engine, // 字幕引擎
audio, // 选择音频
translation, // 是否启用翻译

View File

@@ -6,6 +6,8 @@ export interface Controls {
engineEnabled: boolean,
sourceLang: string,
targetLang: string,
transModel: string,
ollamaName: string,
engine: string,
audio: 0 | 1,
translation: boolean,