feat(translation): 添加非实时翻译功能用户界面组件

2026-02-04 04:14:42 +08:00 · 2025-09-04 23:41:22 +08:00
parent 14987cbfc5
commit 2b7ce06f04
15 changed files with 193 additions and 77 deletions
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@@ -153,4 +153,18 @@
 ### 优化体验

 - 优化软件用户界面的部分组件
- 更清晰的日志输出
+- 更清晰的日志输出
+
+
+## v0.8.0
+
+2025-09-??
+
+### 新增功能
+
+- 字幕引擎添加超时关闭功能：如果在规定时间字幕引擎没有启动成功会自动关闭、在字幕引擎启动过程中也可选择关闭字幕引擎
+- 添加非实时翻译功能：支持调用 Ollama 本地模型进行翻译、支持调用 Google 翻译 API 进行翻译
+
+### 优化体验
+
+- 带有额外信息的标签颜色改为与主题色一致
--- a/docs/api-docs/caption-engine.md
+++ b/docs/api-docs/caption-engine.md
@@ -92,6 +92,17 @@ Python 端监听到的音频流转换为的字幕数据。

 Python 端打印的提示信息，会计入日志。

+### `warn`
+
+```js
+{
+  command: "warn",
+  content: string
+}
+```
+
+Python 端打印的警告信息，会计入日志。
+
 ### `error`

 ```js
@@ -101,7 +112,7 @@ Python 端打印的提示信息，会计入日志。
 }
 ```

-Python 端打印的错误信息，该错误信息需要在前端弹窗显示。
+Python 端打印的错误信息，该错误信息会在前端弹窗显示。

 ### `usage`

--- a/engine/audio2text/vosk.py
+++ b/engine/audio2text/vosk.py
@@ -4,7 +4,7 @@ import time
 from datetime import datetime

 from vosk import Model, KaldiRecognizer, SetLogLevel
-from utils import stdout_cmd, stdout_obj, google_translate
+from utils import stdout_cmd, stdout_obj, google_translate, ollama_translate


 class VoskRecognizer:
@@ -14,8 +14,10 @@ class VoskRecognizer:
    初始化参数：
        model_path: Vosk 识别模型路径
        target: 翻译目标语言
+        trans_model: 翻译模型名称
+        ollama_name: Ollama 模型名称
    """
-    def __init__(self, model_path: str, target: str | None):
+    def __init__(self, model_path: str, target: str | None, trans_model: str, ollama_name: str):
        SetLogLevel(-1)
        if model_path.startswith('"'):
            model_path = model_path[1:]
@@ -23,8 +25,12 @@ class VoskRecognizer:
            model_path = model_path[:-1]
        self.model_path = model_path
        self.target = target
+        if trans_model == 'google':
+            self.trans_func = google_translate
+        else:
+            self.trans_func = ollama_translate
+        self.ollama_name = ollama_name
        self.time_str = ''
-        self.trans_time = time.time()
        self.cur_id = 0
        self.prev_content = ''

@@ -58,8 +64,8 @@ class VoskRecognizer:
            if self.target:
                self.trans_time = time.time()
                th = threading.Thread(
-                    target=google_translate,
-                    args=(caption['text'], self.target, self.time_str)
+                    target=self.trans_func,
+                    args=(self.ollama_name, self.target, caption['text'], self.time_str)
                )
                th.start()
        else:
@@ -75,13 +81,6 @@ class VoskRecognizer:
            self.prev_content = content
        
        stdout_obj(caption)
-        if self.target and time.time() - self.trans_time > 2.0:
-            self.trans_time = time.time()
-            th = threading.Thread(
-                target=google_translate,
-                args=(caption['text'], self.target, self.time_str)
-            )
-            th.start()

    def stop(self):
        """停止 Vosk 引擎"""
--- a/engine/main.py
+++ b/engine/main.py
@@ -44,10 +44,13 @@ def main_gummy(s: str, t: str, a: int, c: int, k: str):
    engine.stop()


-def main_vosk(a: int, c: int, m: str, t: str):
+def main_vosk(a: int, c: int, m: str, t: str, tm: str, on: str):
    global thread_data
    stream = AudioStream(a, c)
-    engine = VoskRecognizer(m, None if t == 'none' else t)
+    engine = VoskRecognizer(
+        m, None if t == 'none' else t,
+        tm, on
+    )

    stream.open_stream()
    engine.start()
@@ -78,6 +81,8 @@ if __name__ == "__main__":
    parser.add_argument('-k', '--api_key', default='', help='API KEY for Gummy model')
    # vosk only
    parser.add_argument('-m', '--model_path', default='', help='The path to the vosk model.')
+    parser.add_argument('-tm', '--translation_model', default='', help='Google translate API KEY')
+    parser.add_argument('-on', '--ollama_name', default='', help='Ollama model name for translation')

    args = parser.parse_args()
    if int(args.port) == 0:
@@ -98,7 +103,9 @@ if __name__ == "__main__":
            int(args.audio_type),
            int(args.chunk_rate),
            args.model_path,
-            args.target_language
+            args.target_language,
+            args.translation_model,
+            args.ollama_name
        )
    else:
        raise ValueError('Invalid caption engine specified.')
--- a/engine/utils/translation.py
+++ b/engine/utils/translation.py
@@ -2,7 +2,7 @@ from ollama import chat
 from ollama import ChatResponse
 import asyncio
 from googletrans import Translator
-from .sysout import stdout, stdout_obj
+from .sysout import stdout_cmd, stdout_obj

 lang_map = {
    'en': 'English',
@@ -13,38 +13,29 @@ lang_map = {
    'ru': 'Russian',
    'ja': 'Japanese',
    'ko': 'Korean',
-    'zh': 'Chinese'
+    'zh-cn': 'Chinese'
 }

-def ollama_translate(model: str, target: str, text: str, chunk_size = 3):
-    stream = chat(
+def ollama_translate(model: str, target: str, text: str, time_s: str):
+    response: ChatResponse = chat(
        model=model,
        messages=[
            {"role": "system", "content": f"/no_think Translate the following content into {lang_map[target]}, and do not output any additional information."},
            {"role": "user", "content": text}
-        ],
-        stream=True
+        ]
    )
-    chunk_content = ""
-    in_thinking = False
-    count = 0
-    for chunk in stream:
-        if count == 0 and chunk['message']['content'].startswith("<think>"):
-            in_thinking = True
-        if in_thinking:
-            if "</think>" in chunk['message']['content']:
-                in_thinking = False
-            continue
-        chunk_content += ' '.join(chunk['message']['content'].split('\n'))
-        count += 1
-        if count % chunk_size == 0:
-            print(chunk_content, end='')
-            chunk_content = ""
-            count = 0
-    if chunk_content:
-        print(chunk_content)
+    content = response.message.content or ""
+    if content.startswith('<think>'):
+        index = content.find('</think>')
+        if index != -1:
+            content = content[index+8:]
+    stdout_obj({
+        "command": "translation",
+        "time_s": time_s,
+        "translation": content.strip()
+    })

-def google_translate(text: str, target: str, time_s: str):
+def google_translate(model: str, target: str, text: str, time_s: str):
    translator = Translator()
    try:
        res = asyncio.run(translator.translate(text, dest=target))
@@ -54,4 +45,4 @@ def google_translate(text: str, target: str, time_s: str):
            "translation": res.text
        })
    except Exception as e:
-        stdout(f"Google Translation Request failed: {str(e)}")
+        stdout_cmd("warn", f"Google translation request failed, please check your network connection...")
--- a/src/main/types/index.ts
+++ b/src/main/types/index.ts
@@ -6,6 +6,8 @@ export interface Controls {
  engineEnabled: boolean,
  sourceLang: string,
  targetLang: string,
+  transModel: string,
+  ollamaName: string,
  engine: string,
  audio: 0 | 1,
  translation: boolean,
@@ -46,11 +48,6 @@ export interface CaptionItem {
  translation: string
 }

-export interface CaptionTranslation {
-  time_s: string,
-  translation: string
-}
-
 export interface SoftwareLogItem {
  type: "INFO" | "WARN" | "ERROR",
  index: number,
--- a/src/main/utils/AllConfig.ts
+++ b/src/main/utils/AllConfig.ts
@@ -1,13 +1,17 @@
 import {
  UILanguage, UITheme, Styles, Controls,
-  CaptionItem, CaptionTranslation,
-  FullConfig, SoftwareLogItem
+  CaptionItem, FullConfig, SoftwareLogItem
 } from '../types'
 import { Log } from './Log'
 import { app, BrowserWindow } from 'electron'
 import * as path from 'path'
 import * as fs from 'fs'

+interface CaptionTranslation {
+  time_s: string,
+  translation: string
+}
+
 const defaultStyles: Styles = {
  lineBreak: 1,
  fontFamily: 'sans-serif',
@@ -32,6 +36,8 @@ const defaultStyles: Styles = {
 const defaultControls: Controls = {
  sourceLang: 'en',
  targetLang: 'zh',
+  transModel: 'ollama',
+  ollamaName: '',
  engine: 'gummy',
  audio: 0,
  engineEnabled: false,
--- a/src/main/utils/CaptionEngine.ts
+++ b/src/main/utils/CaptionEngine.ts
@@ -81,7 +81,9 @@ export class CaptionEngine {
      }
      else if(allConfig.controls.engine === 'vosk'){
        this.command.push('-e', 'vosk')
-        this.command.push('-m', `"${allConfig.controls.modelPath}"`)        
+        this.command.push('-m', `"${allConfig.controls.modelPath}"`)
+        this.command.push('-tm', allConfig.controls.transModel)
+        this.command.push('-on', allConfig.controls.ollamaName)
      }
    }
    Log.info('Engine Path:', this.appPath)
@@ -257,6 +259,9 @@ function handleEngineData(data: any) {
  else if(data.command === 'info') {
    Log.info('Engine Info:', data.content)
  }
+  else if(data.command === 'warn') {
+    Log.warn('Engine Warn:', data.content)
+  }
  else if(data.command === 'error') {
    Log.error('Engine Error:', data.content)
    controlWindow.sendErrorMessage(/*i18n('engine.error') +*/ data.content)
--- a/src/renderer/src/components/EngineControl.vue
+++ b/src/renderer/src/components/EngineControl.vue
@@ -5,6 +5,14 @@
      <a @click="applyChange">{{ $t('engine.applyChange') }}</a> |
      <a @click="cancelChange">{{ $t('engine.cancelChange') }}</a>
    </template>
+    <div class="input-item">
+      <span class="input-label">{{ $t('engine.captionEngine') }}</span>
+      <a-select
+        class="input-area"
+        v-model:value="currentEngine"
+        :options="captionEngine"
+      ></a-select>
+    </div>
    <div class="input-item">
      <span class="input-label">{{ $t('engine.sourceLang') }}</span>
      <a-select
@@ -22,14 +30,28 @@
        :options="langList.filter((item) => item.value !== 'auto')"
      ></a-select>
    </div>
-    <div class="input-item">
-      <span class="input-label">{{ $t('engine.captionEngine') }}</span>
+    <div class="input-item" v-if="transModel">
+      <span class="input-label">{{ $t('engine.transModel') }}</span>
      <a-select
        class="input-area"
-        v-model:value="currentEngine"
-        :options="captionEngine"
+        v-model:value="currentTransModel"
+        :options="transModel"
      ></a-select>
    </div>
+    <div class="input-item" v-if="transModel && currentTransModel === 'ollama'">
+      <a-popover placement="right">
+        <template #content>
+          <p class="label-hover-info">{{ $t('engine.ollamaNote') }}</p>
+        </template>
+        <span class="input-label info-label"
+          :style="{color: uiColor}"
+        >{{ $t('engine.ollama') }}</span>
+      </a-popover>
+      <a-input
+        class="input-area"
+        v-model:value="currentOllamaName"
+      ></a-input>
+    </div>
    <div class="input-item">
      <span class="input-label">{{ $t('engine.audioType') }}</span>
      <a-select
@@ -80,11 +102,13 @@

    <a-card size="small" :title="$t('engine.showMore')" v-show="showMore" style="margin-top:10px;">
      <div class="input-item">
-        <a-popover>
+        <a-popover placement="right">
          <template #content>
            <p class="label-hover-info">{{ $t('engine.apikeyInfo') }}</p>
          </template>
-          <span class="input-label info-label">{{ $t('engine.apikey') }}</span>
+          <span class="input-label info-label"
+            :style="{color: uiColor}"
+          >{{ $t('engine.apikey') }}</span>
        </a-popover>
        <a-input
          class="input-area"
@@ -93,14 +117,17 @@
        />
      </div>
      <div class="input-item">
-        <a-popover>
+        <a-popover placement="right">
          <template #content>
            <p class="label-hover-info">{{ $t('engine.modelPathInfo') }}</p>
          </template>
-          <span class="input-label info-label">{{ $t('engine.modelPath') }}</span>
+          <span class="input-label info-label"
+            :style="{color: uiColor}"
+          >{{ $t('engine.modelPath') }}</span>
        </a-popover>
        <span
          class="input-folder"
+          :style="{color: uiColor}"
          @click="selectFolderPath"
        ><span><FolderOpenOutlined /></span></span>
        <a-input
@@ -110,13 +137,13 @@
        />
      </div>
      <div class="input-item">
-        <a-popover>
+        <a-popover placement="right">
          <template #content>
            <p class="label-hover-info">{{ $t('engine.startTimeoutInfo') }}</p>
          </template>
          <span
            class="input-label info-label"
-            style="vertical-align: middle;"
+            :style="{color: uiColor, verticalAlign: 'middle'}"
          >{{ $t('engine.startTimeout') }}</span>
        </a-popover>
        <a-input-number
@@ -134,12 +161,12 @@
 </template>

 <script setup lang="ts">
-import { ref, computed, watch } from 'vue'
+import { ref, computed, watch, h } from 'vue'
 import { storeToRefs } from 'pinia'
 import { useGeneralSettingStore } from '@renderer/stores/generalSetting'
 import { useEngineControlStore } from '@renderer/stores/engineControl'
 import { notification } from 'ant-design-vue'
-import { FolderOpenOutlined ,InfoCircleOutlined } from '@ant-design/icons-vue';
+import { ExclamationCircleOutlined, FolderOpenOutlined ,InfoCircleOutlined } from '@ant-design/icons-vue';
 import { useI18n } from 'vue-i18n'

 const { t } = useI18n()
@@ -148,11 +175,16 @@ const showMore = ref(false)
 const engineControl = useEngineControlStore()
 const { captionEngine, audioType, changeSignal } = storeToRefs(engineControl)

+const generalSetting = useGeneralSettingStore()
+const { uiColor } = storeToRefs(generalSetting)
+
 const currentSourceLang = ref('auto')
 const currentTargetLang = ref('zh')
 const currentEngine = ref<string>('gummy')
 const currentAudio = ref<0 | 1>(0)
-const currentTranslation = ref<boolean>(false)
+const currentTranslation = ref<boolean>(true)
+const currentTransModel = ref('ollama')
+const currentOllamaName = ref('')
 const currentAPI_KEY = ref<string>('')
 const currentModelPath = ref<string>('')
 const currentCustomized = ref<boolean>(false)
@@ -169,9 +201,33 @@ const langList = computed(() => {
  return []
 })

+const transModel = computed(() => {
+  for(let item of captionEngine.value){
+    if(item.value === currentEngine.value) {
+      return item.transModel
+    }
+  }
+  return []
+})
+
 function applyChange(){
+  if(
+    currentTranslation.value && transModel.value &&
+    currentTransModel.value === 'ollama' && !currentOllamaName.value.trim()
+  ) {
+    notification.open({
+      message: t('noti.ollamaNameNull'),
+      description: t('noti.ollamaNameNullNote'),
+      duration: null,
+      icon: () => h(ExclamationCircleOutlined, { style: 'color: #ff4d4f' })
+    })
+    return
+  }
+
  engineControl.sourceLang = currentSourceLang.value
  engineControl.targetLang = currentTargetLang.value
+  engineControl.transModel = currentTransModel.value
+  engineControl.ollamaName = currentOllamaName.value
  engineControl.engine = currentEngine.value
  engineControl.audio = currentAudio.value
  engineControl.translation = currentTranslation.value
@@ -194,6 +250,8 @@ function applyChange(){
 function cancelChange(){
  currentSourceLang.value = engineControl.sourceLang
  currentTargetLang.value = engineControl.targetLang
+  currentTransModel.value = engineControl.transModel
+  currentOllamaName.value = engineControl.ollamaName
  currentEngine.value = engineControl.engine
  currentAudio.value = engineControl.audio
  currentTranslation.value = engineControl.translation
@@ -243,8 +301,8 @@ watch(currentEngine, (val) => {
 }

 .info-label {
-  color: #1677ff;
  cursor: pointer;
+  font-style: italic;
 }

 .input-folder {
@@ -255,20 +313,12 @@ watch(currentEngine, (val) => {
  transition: all 0.25s;
 }

-.input-folder>span {
-  padding: 0 2px;
-  border: 2px solid #1677ff;
-  color: #1677ff;
-  border-radius: 30%;
-}
-
 .input-folder:hover {
  transform: scale(1.1);
 }

 .customize-note {
  padding: 10px 10px 0;
-  color: red;
  max-width: min(40vw, 480px);
 }
 </style>
--- a/src/renderer/src/i18n/config/engine.ts
+++ b/src/renderer/src/i18n/config/engine.ts
@@ -30,6 +30,10 @@ export const engines = {
        { value: 'ru', label: '俄语' },
        { value: 'es', label: '西班牙语' },
        { value: 'it', label: '意大利语' },
+      ],
+      transModel: [
+        { value: 'ollama', label: 'Ollama 本地模型' },
+        { value: 'google', label: 'Google API 调用' },
      ]
    }
  ],
@@ -64,6 +68,10 @@ export const engines = {
        { value: 'ru', label: 'Russian' },
        { value: 'es', label: 'Spanish' },
        { value: 'it', label: 'Italian' },
+      ],
+      transModel: [
+        { value: 'ollama', label: 'Ollama Local Model' },
+        { value: 'google', label: 'Google API Call' },
      ]
    }
  ],
@@ -98,8 +106,11 @@ export const engines = {
        { value: 'ru', label: 'ロシア語' },
        { value: 'es', label: 'スペイン語' },
        { value: 'it', label: 'イタリア語' },
+      ],
+      transModel: [
+        { value: 'ollama', label: 'Ollama ローカルモデル' },
+        { value: 'google', label: 'Google API 呼び出し' },
      ]
    }
  ]
 }
-
--- a/src/renderer/src/i18n/lang/en.ts
+++ b/src/renderer/src/i18n/lang/en.ts
@@ -28,7 +28,9 @@ export default {
    "changeInfo": "If the caption engine is already running, you need to restart it for the changes to take effect.",
    "styleChange": "Caption Style Changed",
    "styleInfo": "Caption style changes have been saved and applied.",
-    "engineStartTimeout": "Caption engine startup timeout, automatically force stopped"
+    "engineStartTimeout": "Caption engine startup timeout, automatically force stopped",
+    "ollamaNameNull": "'Ollama' Field is Empty",
+    "ollamaNameNullNote": "When selecting Ollama model as the translation model, the 'Ollama' field cannot be empty and must be filled with the name of a locally configured Ollama model."
  },
  general: {
    "title": "General Settings",
@@ -47,6 +49,9 @@ export default {
    "cancelChange": "Cancel Changes",
    "sourceLang": "Source",
    "transLang": "Translation",
+    "transModel": "Model",
+    "ollama": "Ollama",
+    "ollamaNote": "To use for translation, the name of the local Ollama model that will call the service on the default port. It is recommended to use a non-inference model with less than 1B parameters.",
    "captionEngine": "Engine",
    "audioType": "Audio Type",
    "systemOutput": "System Audio Output (Speaker)",
--- a/src/renderer/src/i18n/lang/ja.ts
+++ b/src/renderer/src/i18n/lang/ja.ts
@@ -28,7 +28,9 @@ export default {
    "changeInfo": "字幕エンジンがすでに起動している場合、変更を有効にするには再起動が必要です。",
    "styleChange": "字幕のスタイルが変更されました",
    "styleInfo": "字幕のスタイル変更が保存され、適用されました",
-    "engineStartTimeout": "字幕エンジンの起動がタイムアウトしました。自動的に強制停止しました"
+    "engineStartTimeout": "字幕エンジンの起動がタイムアウトしました。自動的に強制停止しました",
+    "ollamaNameNull": "Ollama フィールドが空です",
+    "ollamaNameNullNote": "Ollama モデルを翻訳モデルとして選択する場合、Ollama フィールドは空にできません。ローカルで設定された Ollama モデルの名前を入力してください。"
  },
  general: {
    "title": "一般設定",
@@ -47,6 +49,9 @@ export default {
    "cancelChange": "変更をキャンセル",
    "sourceLang": "ソース言語",
    "transLang": "翻訳言語",
+    "transModel": "翻訳モデル",
+    "ollama": "Ollama",
+    "ollamaNote": "翻訳に使用する、デフォルトポートでサービスを呼び出すローカルOllamaモデルの名前。1B 未満のパラメータを持つ非推論モデルの使用を推奨します。",
    "captionEngine": "エンジン",
    "audioType": "オーディオ",
    "systemOutput": "システムオーディオ出力（スピーカー）",
--- a/src/renderer/src/i18n/lang/zh.ts
+++ b/src/renderer/src/i18n/lang/zh.ts
@@ -28,7 +28,9 @@ export default {
    "changeInfo": "如果字幕引擎已经启动，需要重启字幕引擎修改才会生效",
    "styleChange": "字幕样式已修改",
    "styleInfo": "字幕样式修改已经保存并生效",
-    "engineStartTimeout": "字幕引擎启动超时，已自动强制停止"
+    "engineStartTimeout": "字幕引擎启动超时，已自动强制停止",
+    "ollamaNameNull": "Ollama 字段为空",
+    "ollamaNameNullNote": "选择 Ollama 模型作为翻译模型时，Ollama 字段不能为空，需要填写本地已经配置好的 Ollama 模型的名称。"
  },
  general: {
    "title": "通用设置",
@@ -47,6 +49,9 @@ export default {
    "cancelChange": "取消更改",
    "sourceLang": "源语言",
    "transLang": "翻译语言",
+    "transModel": "翻译模型",
+    "ollama": "Ollama",
+    "ollamaNote": "要使用的进行翻译的本地 Ollama 模型的名称，将调用默认端口的服务，建议使用参数量小于 1B 的非推理模型。",
    "captionEngine": "字幕引擎",
    "audioType": "音频类型",
    "systemOutput": "系统音频输出（扬声器）",
--- a/src/renderer/src/stores/engineControl.ts
+++ b/src/renderer/src/stores/engineControl.ts
@@ -19,6 +19,8 @@ export const useEngineControlStore = defineStore('engineControl', () => {
  const engineEnabled = ref(false)
  const sourceLang = ref<string>('en')
  const targetLang = ref<string>('zh')
+  const transModel = ref<string>('ollama')
+  const ollamaName = ref<string>('')
  const engine = ref<string>('gummy')
  const audio = ref<0 | 1>(0)
  const translation = ref<boolean>(true)
@@ -37,6 +39,8 @@ export const useEngineControlStore = defineStore('engineControl', () => {
      engineEnabled: engineEnabled.value,
      sourceLang: sourceLang.value,
      targetLang: targetLang.value,
+      transModel: transModel.value,
+      ollamaName: ollamaName.value,
      engine: engine.value,
      audio: audio.value,
      translation: translation.value,
@@ -68,6 +72,8 @@ export const useEngineControlStore = defineStore('engineControl', () => {
    }
    sourceLang.value = controls.sourceLang
    targetLang.value = controls.targetLang
+    transModel.value = controls.transModel
+    ollamaName.value = controls.ollamaName
    engine.value = controls.engine
    audio.value = controls.audio
    engineEnabled.value = controls.engineEnabled
@@ -132,6 +138,8 @@ export const useEngineControlStore = defineStore('engineControl', () => {
    engineEnabled,      // 字幕引擎是否启用
    sourceLang,         // 源语言
    targetLang,         // 目标语言
+    transModel,         // 翻译模型
+    ollamaName,        // Ollama 模型
    engine,             // 字幕引擎
    audio,              // 选择音频
    translation,        // 是否启用翻译
--- a/src/renderer/src/types/index.ts
+++ b/src/renderer/src/types/index.ts
@@ -6,6 +6,8 @@ export interface Controls {
  engineEnabled: boolean,
  sourceLang: string,
  targetLang: string,
+  transModel: string,
+  ollamaName: string,
  engine: string,
  audio: 0 | 1,
  translation: boolean,