From cbbaaa95a37073a3285016b7d21430c9f5dd3132 Mon Sep 17 00:00:00 2001 From: himeditator mac Date: Tue, 8 Jul 2025 21:05:43 +0800 Subject: [PATCH] =?UTF-8?q?feat(gummy):=20=E6=94=AF=E6=8C=81=E9=80=9A?= =?UTF-8?q?=E8=BF=87=E8=AE=BE=E7=BD=AE=E6=B7=BB=E5=8A=A0=20API=20KEY?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 更新 main-gummy.py 以支持 API KEY 参数 - 修改 electron-builder.yml 以调整 Gummy 可执行文件路径 --- caption-engine/audio2text/gummy.py | 5 +- caption-engine/main-gummy.py | 10 ++- electron-builder.yml | 4 +- package-lock.json | 4 +- package.json | 2 +- src/main/i18n/lang/en.ts | 2 +- src/main/i18n/lang/ja.ts | 2 +- src/main/i18n/lang/zh.ts | 2 +- src/main/types/index.ts | 1 + src/main/utils/AllConfig.ts | 5 +- src/main/utils/CaptionEngine.ts | 25 ++++-- src/renderer/src/components/EngineControl.vue | 78 +++++++++++-------- src/renderer/src/i18n/lang/en.ts | 2 + src/renderer/src/i18n/lang/ja.ts | 2 + src/renderer/src/i18n/lang/zh.ts | 2 + src/renderer/src/stores/engineControl.ts | 5 +- src/renderer/src/types/index.ts | 1 + 17 files changed, 99 insertions(+), 53 deletions(-) diff --git a/caption-engine/audio2text/gummy.py b/caption-engine/audio2text/gummy.py index 0b79d98..ceca937 100644 --- a/caption-engine/audio2text/gummy.py +++ b/caption-engine/audio2text/gummy.py @@ -4,6 +4,7 @@ from dashscope.audio.asr import ( TranslationResult, TranslationRecognizerRealtime ) +import dashscope from datetime import datetime import json import sys @@ -77,7 +78,9 @@ class GummyTranslator: source: 源语言代码字符串(zh, en, ja 等) target: 目标语言代码字符串(zh, en, ja 等) """ - def __init__(self, rate, source, target): + def __init__(self, rate, source, target, api_key): + if api_key: + dashscope.api_key = api_key self.translator = TranslationRecognizerRealtime( model = "gummy-realtime-v1", format = "pcm", diff --git a/caption-engine/main-gummy.py b/caption-engine/main-gummy.py index c4b31b1..d4f0ea9 100644 --- a/caption-engine/main-gummy.py +++ b/caption-engine/main-gummy.py @@ -14,14 +14,14 @@ from audioprcs import mergeChunkChannels from audio2text import InvalidParameter, GummyTranslator -def convert_audio_to_text(s_lang, t_lang, audio_type, chunk_rate): +def convert_audio_to_text(s_lang, t_lang, audio_type, chunk_rate, api_key): sys.stdout.reconfigure(line_buffering=True) # type: ignore stream = AudioStream(audio_type, chunk_rate) if t_lang == 'none': - gummy = GummyTranslator(stream.RATE, s_lang, None) + gummy = GummyTranslator(stream.RATE, s_lang, None, api_key) else: - gummy = GummyTranslator(stream.RATE, s_lang, t_lang) + gummy = GummyTranslator(stream.RATE, s_lang, t_lang, api_key) stream.openStream() gummy.start() @@ -47,10 +47,12 @@ if __name__ == "__main__": parser.add_argument('-t', '--target_language', default='zh', help='Target language code') parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output audio stream, 1 for input audio stream') parser.add_argument('-c', '--chunk_rate', default=20, help='The number of audio stream chunks collected per second.') + parser.add_argument('-k', '--api_key', default='', help='API KEY for Gummy model') args = parser.parse_args() convert_audio_to_text( args.source_language, args.target_language, int(args.audio_type), - int(args.chunk_rate) + int(args.chunk_rate), + args.api_key ) diff --git a/electron-builder.yml b/electron-builder.yml index fc3a4cc..b685454 100644 --- a/electron-builder.yml +++ b/electron-builder.yml @@ -10,8 +10,8 @@ files: - '!{.env,.env.*,.npmrc,pnpm-lock.yaml}' - '!{tsconfig.json,tsconfig.node.json,tsconfig.web.json}' extraResources: - from: ./caption-engine/dist/main-gummy.exe - to: ./caption-engine/dist/main-gummy.exe + from: ./caption-engine/dist/main-gummy + to: ./caption-engine/main-gummy asarUnpack: - resources/** win: diff --git a/package-lock.json b/package-lock.json index fa0e63a..b39ea1d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "auto-caption", - "version": "0.2.1", + "version": "0.3.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "auto-caption", - "version": "0.2.1", + "version": "0.3.0", "hasInstallScript": true, "dependencies": { "@electron-toolkit/preload": "^3.0.1", diff --git a/package.json b/package.json index b2611ef..af36fcc 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "auto-caption", "productName": "Auto Caption", - "version": "0.2.1", + "version": "0.3.0", "description": "A cross-platform subtitle display software.", "main": "./out/main/index.js", "author": "himeditator", diff --git a/src/main/i18n/lang/en.ts b/src/main/i18n/lang/en.ts index 87a969e..ad8adde 100644 --- a/src/main/i18n/lang/en.ts +++ b/src/main/i18n/lang/en.ts @@ -1,5 +1,5 @@ export default { - "gummy.env.missing": "DASHSCOPE_API_KEY environment variable not detected. To use the gummy engine, you need to obtain an API Key from Alibaba Cloud's Bailian platform and add it to your local environment variables.", + "gummy.key.missing": "API KEY is not set, and the DASHSCOPE_API_KEY environment variable is not detected. To use the gummy engine, you need to obtain an API KEY from the Alibaba Cloud Bailian platform and add it to the settings or configure it in the local environment variables.", "platform.unsupported": "Unsupported platform: ", "engine.start.error": "Caption engine failed to start: ", "engine.output.parse.error": "Unable to parse caption engine output as a JSON object: ", diff --git a/src/main/i18n/lang/ja.ts b/src/main/i18n/lang/ja.ts index 13122bc..60595cc 100644 --- a/src/main/i18n/lang/ja.ts +++ b/src/main/i18n/lang/ja.ts @@ -1,5 +1,5 @@ export default { - "gummy.env.missing": "DASHSCOPE_API_KEY 環境変数が検出されませんでした。Gummy エンジンを使用するには、Alibaba Cloud の百煉プラットフォームから API Key を取得し、ローカル環境変数に追加する必要があります。", + "gummy.key.missing": "API KEY が設定されておらず、DASHSCOPE_API_KEY 環境変数も検出されていません。Gummy エンジンを使用するには、Alibaba Cloud Bailian プラットフォームから API KEY を取得し、設定に追加するか、ローカルの環境変数に設定する必要があります。", "platform.unsupported": "サポートされていないプラットフォーム: ", "engine.start.error": "字幕エンジンの起動に失敗しました: ", "engine.output.parse.error": "字幕エンジンの出力を JSON オブジェクトとして解析できませんでした: ", diff --git a/src/main/i18n/lang/zh.ts b/src/main/i18n/lang/zh.ts index 608c6d0..61fbdd2 100644 --- a/src/main/i18n/lang/zh.ts +++ b/src/main/i18n/lang/zh.ts @@ -1,5 +1,5 @@ export default { - "gummy.env.missing": "没有检测到 DASHSCOPE_API_KEY 环境变量,如果要使用 gummy 引擎,需要在阿里云百炼平台获取 API Key 并添加到本机环境变量", + "gummy.key.missing": "没有设置 API KEY,也没有检测到 DASHSCOPE_API_KEY 环境变量。如果要使用 gummy 引擎,需要在阿里云百炼平台获取 API KEY,并在添加到设置中或者配置到本机环境变量。", "platform.unsupported": "不支持的平台:", "engine.start.error": "字幕引擎启动失败:", "engine.output.parse.error": "字幕引擎输出内容无法解析为 JSON 对象:", diff --git a/src/main/types/index.ts b/src/main/types/index.ts index bc2632f..460e634 100644 --- a/src/main/types/index.ts +++ b/src/main/types/index.ts @@ -9,6 +9,7 @@ export interface Controls { engine: 'gummy', audio: 0 | 1, translation: boolean, + API_KEY: string, customized: boolean, customizedApp: string, customizedCommand: string diff --git a/src/main/utils/AllConfig.ts b/src/main/utils/AllConfig.ts index 8b28d7d..74204e3 100644 --- a/src/main/utils/AllConfig.ts +++ b/src/main/utils/AllConfig.ts @@ -26,6 +26,7 @@ const defaultControls: Controls = { engine: 'gummy', audio: 0, engineEnabled: false, + API_KEY: '', translation: true, customized: false, customizedApp: '', @@ -82,7 +83,7 @@ class AllConfig { } } - public setStyles(args: Styles) { + public setStyles(args: Object) { for(let key in this.styles) { if(key in args) { this.styles[key] = args[key] @@ -100,7 +101,7 @@ class AllConfig { console.log(`[INFO] Send Styles to #${window.id}:`, this.styles) } - public setControls(args: Controls) { + public setControls(args: Object) { const engineEnabled = this.controls.engineEnabled for(let key in this.controls){ if(key in args) { diff --git a/src/main/utils/CaptionEngine.ts b/src/main/utils/CaptionEngine.ts index 668c4a0..6ca6595 100644 --- a/src/main/utils/CaptionEngine.ts +++ b/src/main/utils/CaptionEngine.ts @@ -19,8 +19,8 @@ export class CaptionEngine { } else if (allConfig.controls.engine === 'gummy') { allConfig.controls.customized = false - if(!process.env.DASHSCOPE_API_KEY) { - controlWindow.sendErrorMessage(i18n('gummy.env.missing')) + if(!allConfig.controls.API_KEY && !process.env.DASHSCOPE_API_KEY) { + controlWindow.sendErrorMessage(i18n('gummy.key.missing')) return false } let gummyName = '' @@ -42,8 +42,7 @@ export class CaptionEngine { } else { this.appPath = path.join( - process.resourcesPath, - 'caption-engine', 'dist', gummyName + process.resourcesPath, 'caption-engine', gummyName ) } this.command = [] @@ -53,6 +52,9 @@ export class CaptionEngine { allConfig.controls.targetLang : 'none' ) this.command.push('-a', allConfig.controls.audio ? '1' : '0') + if(allConfig.controls.API_KEY) { + this.command.push('-k', allConfig.controls.API_KEY) + } console.log('[INFO] Engine Path:', this.appPath) console.log('[INFO] Engine Command:', this.command) @@ -61,7 +63,7 @@ export class CaptionEngine { } public start() { - if (this.processStatus!== 'stopped') { + if (this.processStatus !== 'stopped') { return } if(!this.getApp()){ return } @@ -122,7 +124,7 @@ export class CaptionEngine { public stop() { if(this.processStatus !== 'running') return - if (this.process) { + if (this.process.pid) { console.log('[INFO] Trying to stop process, PID:', this.process.pid) let cmd = `kill ${this.process.pid}`; if (process.platform === "win32") { @@ -135,6 +137,17 @@ export class CaptionEngine { } }) } + else { + this.process = undefined; + allConfig.controls.engineEnabled = false + if(controlWindow.window){ + allConfig.sendControls(controlWindow.window) + controlWindow.window.webContents.send('control.engine.stopped') + } + this.processStatus = 'stopped' + console.log('[INFO] Process PID undefined, caption engine process stopped') + return + } this.processStatus = 'stopping' console.log('[INFO] Caption engine process stopping') } diff --git a/src/renderer/src/components/EngineControl.vue b/src/renderer/src/components/EngineControl.vue index 26deb28..b459f46 100644 --- a/src/renderer/src/components/EngineControl.vue +++ b/src/renderer/src/components/EngineControl.vue @@ -43,36 +43,51 @@
- {{ $t('engine.customEngine') }} - + {{ $t('engine.showMore') }} +
-
- - -
- {{ $t('engine.custom.app') }} - -
-
- {{ $t('engine.custom.command') }} - -
-
-
+ +
+ {{ $t('engine.apikey') }} + +
+
+ {{ $t('engine.customEngine') }} + +
+
+ + +
+ {{ $t('engine.custom.app') }} + +
+
+ {{ $t('engine.custom.command') }} + +
+
+
+
+
@@ -86,6 +101,7 @@ import { InfoCircleOutlined } from '@ant-design/icons-vue'; import { useI18n } from 'vue-i18n' const { t } = useI18n() +const showMore = ref(false) const engineControl = useEngineControlStore() const { platform, captionEngine, audioType, changeSignal } = storeToRefs(engineControl) @@ -95,7 +111,7 @@ const currentTargetLang = ref('zh') const currentEngine = ref<'gummy'>('gummy') const currentAudio = ref<0 | 1>(0) const currentTranslation = ref(false) - +const currentAPI_KEY = ref('') const currentCustomized = ref(false) const currentCustomizedApp = ref('') const currentCustomizedCommand = ref('') @@ -115,7 +131,7 @@ function applyChange(){ engineControl.engine = currentEngine.value engineControl.audio = currentAudio.value engineControl.translation = currentTranslation.value - + engineControl.API_KEY = currentAPI_KEY.value engineControl.customized = currentCustomized.value engineControl.customizedApp = currentCustomizedApp.value engineControl.customizedCommand = currentCustomizedCommand.value @@ -134,7 +150,7 @@ function cancelChange(){ currentEngine.value = engineControl.engine currentAudio.value = engineControl.audio currentTranslation.value = engineControl.translation - + currentAPI_KEY.value = engineControl.API_KEY currentCustomized.value = engineControl.customized currentCustomizedApp.value = engineControl.customizedApp currentCustomizedCommand.value = engineControl.customizedCommand diff --git a/src/renderer/src/i18n/lang/en.ts b/src/renderer/src/i18n/lang/en.ts index 143a3d0..549f06c 100644 --- a/src/renderer/src/i18n/lang/en.ts +++ b/src/renderer/src/i18n/lang/en.ts @@ -46,6 +46,8 @@ export default { "systemOutput": "System Audio Output (Speaker)", "systemInput": "System Audio Input (Microphone)", "enableTranslation": "Translation", + "showMore": "More Settings", + "apikey": "API KEY", "customEngine": "Custom Engine", custom: { "title": "Custom Caption Engine", diff --git a/src/renderer/src/i18n/lang/ja.ts b/src/renderer/src/i18n/lang/ja.ts index 028d88c..8a88d30 100644 --- a/src/renderer/src/i18n/lang/ja.ts +++ b/src/renderer/src/i18n/lang/ja.ts @@ -46,6 +46,8 @@ export default { "systemOutput": "システムオーディオ出力(スピーカー)", "systemInput": "システムオーディオ入力(マイク)", "enableTranslation": "翻訳", + "showMore": "詳細設定", + "apikey": "API KEY", "customEngine": "カスタムエンジン", custom: { "title": "カスタムキャプションエンジン", diff --git a/src/renderer/src/i18n/lang/zh.ts b/src/renderer/src/i18n/lang/zh.ts index 72cdc37..d2af5ef 100644 --- a/src/renderer/src/i18n/lang/zh.ts +++ b/src/renderer/src/i18n/lang/zh.ts @@ -46,6 +46,8 @@ export default { "systemOutput": "系统音频输出(扬声器)", "systemInput": "系统音频输入(麦克风)", "enableTranslation": "启用翻译", + "showMore": "更多设置", + "apikey": "API KEY", "customEngine": "自定义引擎", custom: { "title": "自定义字幕引擎", diff --git a/src/renderer/src/stores/engineControl.ts b/src/renderer/src/stores/engineControl.ts index eb7a3fb..17e62ce 100644 --- a/src/renderer/src/stores/engineControl.ts +++ b/src/renderer/src/stores/engineControl.ts @@ -16,7 +16,7 @@ export const useEngineControlStore = defineStore('engineControl', () => { const captionEngine = ref(engines[useGeneralSettingStore().uiLanguage]) const audioType = ref(audioTypes[useGeneralSettingStore().uiLanguage]) - + const API_KEY = ref('') const engineEnabled = ref(false) const sourceLang = ref('en') const targetLang = ref('zh') @@ -37,6 +37,7 @@ export const useEngineControlStore = defineStore('engineControl', () => { engine: engine.value, audio: audio.value, translation: translation.value, + API_KEY: API_KEY.value, customized: customized.value, customizedApp: customizedApp.value, customizedCommand: customizedCommand.value @@ -51,6 +52,7 @@ export const useEngineControlStore = defineStore('engineControl', () => { audio.value = controls.audio engineEnabled.value = controls.engineEnabled translation.value = controls.translation + API_KEY.value = controls.API_KEY customized.value = controls.customized customizedApp.value = controls.customizedApp customizedCommand.value = controls.customizedCommand @@ -108,6 +110,7 @@ export const useEngineControlStore = defineStore('engineControl', () => { engine, // 字幕引擎 audio, // 选择音频 translation, // 是否启用翻译 + API_KEY, // API KEY customized, // 是否使用自定义字幕引擎 customizedApp, // 自定义字幕引擎的应用程序 customizedCommand, // 自定义字幕引擎的命令 diff --git a/src/renderer/src/types/index.ts b/src/renderer/src/types/index.ts index bc2632f..460e634 100644 --- a/src/renderer/src/types/index.ts +++ b/src/renderer/src/types/index.ts @@ -9,6 +9,7 @@ export interface Controls { engine: 'gummy', audio: 0 | 1, translation: boolean, + API_KEY: string, customized: boolean, customizedApp: string, customizedCommand: string