diff --git a/README.md b/README.md index 94da6f0..dcd2f21 100644 --- a/README.md +++ b/README.md @@ -3,12 +3,8 @@

auto-caption

Auto Caption 是一个跨平台的实时字幕显示软件。

- - - - - - + + @@ -77,7 +73,7 @@ macOS 平台和 Linux 平台获取系统音频输出需要进行额外设置, ## ⚙️ 自带字幕引擎说明 -目前软件自带 2 个字幕引擎,正在规划 1 个新的引擎。它们的详细信息如下。 +目前软件自带 2 个字幕引擎,正在规划新的引擎。它们的详细信息如下。 ### Gummy 字幕引擎(云端) @@ -108,9 +104,15 @@ $$ 基于 [vosk-api](https://github.com/alphacep/vosk-api) 开发。目前只支持生成音频对应的原文,不支持生成翻译内容。 -### FunASR 字幕引擎(本地) +### 新规划字幕引擎 + +以下为备选模型,将根据模型效果和集成难易程度选择。 + +- [faster-whisper](https://github.com/SYSTRAN/faster-whisper) +- [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx) +- [SenseVoice](https://github.com/FunAudioLLM/SenseVoice) +- [FunASR](https://github.com/modelscope/FunASR) -如果可行,将基于 [FunASR](https://github.com/modelscope/FunASR) 进行开发。还未进行调研和可行性验证。 ## 🚀 项目运行 diff --git a/README_en.md b/README_en.md index b51f799..c1fb451 100644 --- a/README_en.md +++ b/README_en.md @@ -3,12 +3,8 @@

auto-caption

Auto Caption is a cross-platform real-time caption display software.

- - - - - - + + @@ -77,7 +73,7 @@ To use the Vosk local caption engine, first download your required model from [V ## ⚙️ Built-in Subtitle Engines -Currently, the software comes with 2 subtitle engines, with 1 new engine planned. Details are as follows. +Currently, the software comes with 2 subtitle engines, with new engines under development. Their detailed information is as follows. ### Gummy Subtitle Engine (Cloud) @@ -108,9 +104,14 @@ The engine only uploads data when receiving audio streams, so the actual upload Developed based on [vosk-api](https://github.com/alphacep/vosk-api). Currently only supports generating original text from audio, does not support translation content. -### FunASR Subtitle Engine (Local) +### Planned New Subtitle Engines -If feasible, will be developed based on [FunASR](https://github.com/modelscope/FunASR). Not yet researched or verified for feasibility. +The following are candidate models that will be selected based on model performance and ease of integration. + +- [faster-whisper](https://github.com/SYSTRAN/faster-whisper) +- [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx) +- [SenseVoice](https://github.com/FunAudioLLM/SenseVoice) +- [FunASR](https://github.com/modelscope/FunASR) ## 🚀 Project Setup diff --git a/README_ja.md b/README_ja.md index d046119..f82c976 100644 --- a/README_ja.md +++ b/README_ja.md @@ -3,12 +3,8 @@

auto-caption

Auto Caption はクロスプラットフォームのリアルタイム字幕表示ソフトウェアです。

- - - - - - + + @@ -77,7 +73,7 @@ Vosk ローカル字幕エンジンを使用するには、まず [Vosk Models]( ## ⚙️ 字幕エンジン説明 -現在ソフトウェアには2つの字幕エンジンが組み込まれており、1つの新しいエンジンを計画中です。詳細は以下の通りです。 +現在、ソフトウェアには2つの字幕エンジンが搭載されており、新しいエンジンが計画されています。それらの詳細情報は以下の通りです。 ### Gummy 字幕エンジン(クラウド) @@ -108,9 +104,14 @@ $$ [vosk-api](https://github.com/alphacep/vosk-api) をベースに開発されています。現在は音声に対応する原文の生成のみをサポートしており、翻訳コンテンツはサポートしていません。 -### FunASR字幕エンジン(ローカル) +### 新規計画字幕エンジン -可能であれば、[FunASR](https://github.com/modelscope/FunASR) をベースに開発予定です。まだ調査と実現可能性の検証を行っていません。 +以下は候補モデルであり、モデルの性能と統合の容易さに基づいて選択されます。 + +- [faster-whisper](https://github.com/SYSTRAN/faster-whisper) +- [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx) +- [SenseVoice](https://github.com/FunAudioLLM/SenseVoice) +- [FunASR](https://github.com/modelscope/FunASR) ## 🚀 プロジェクト実行 diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index e6de0aa..72fd4c3 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -137,3 +137,11 @@ - 合并 Gummy 和 Vosk 引擎为单个可执行文件 - 字幕引擎和主程序添加 Socket 通信,完全避免字幕引擎成为孤儿进程 +## v0.7.0 + +2025-08-xx + +### 新增功能 + +- 添加字幕窗口宽度记忆,重新打开时与上次字幕窗口宽度一致 +- 在尝试关闭字幕引擎 4s 后字幕引擎仍未关闭,则强制关闭字幕引擎 \ No newline at end of file diff --git a/engine/main.py b/engine/main.py index e8ec17d..9b25b1a 100644 --- a/engine/main.py +++ b/engine/main.py @@ -40,6 +40,7 @@ def main_gummy(s: str, t: str, a: int, c: int, k: str): stream.close_stream() engine.stop() + def main_vosk(a: int, c: int, m: str): global thread_data stream = AudioStream(a, c) @@ -99,4 +100,4 @@ if __name__ == "__main__": raise ValueError('Invalid caption engine specified.') if thread_data.status == "kill": - stdout_cmd('kill') \ No newline at end of file + stdout_cmd('kill') diff --git a/engine/test.py b/engine/test.py deleted file mode 100644 index 3f56bab..0000000 --- a/engine/test.py +++ /dev/null @@ -1,101 +0,0 @@ -import argparse -from utils import stdout_cmd, stderr -from utils import thread_data, start_server -from utils import merge_chunk_channels, resample_chunk_mono -from audio2text import InvalidParameter, GummyRecognizer -from audio2text import VoskRecognizer -from sysaudio import AudioStream - -def main_gummy(s: str, t: str, a: int, c: int, k: str): - global thread_data - stream = AudioStream(a, c) - if t == 'none': - engine = GummyRecognizer(stream.RATE, s, None, k) - else: - engine = GummyRecognizer(stream.RATE, s, t, k) - - stream.open_stream() - engine.start() - - restart_count = 0 - while thread_data.status == "running": - try: - chunk = stream.read_chunk() - if chunk is None: continue - chunk_mono = merge_chunk_channels(chunk, stream.CHANNELS) - try: - engine.send_audio_frame(chunk_mono) - except InvalidParameter as e: - restart_count += 1 - if restart_count > 8: - stderr(str(e)) - thread_data.status = "kill" - break - else: - stdout_cmd('info', f'Gummy engine stopped, trying to restart #{restart_count}') - except KeyboardInterrupt: - break - - stream.close_stream() - engine.stop() - -def main_vosk(a: int, c: int, m: str): - global thread_data - stream = AudioStream(a, c) - engine = VoskRecognizer(m) - - stream.open_stream() - engine.start() - - while thread_data.status == "running": - try: - chunk = stream.read_chunk() - if chunk is None: continue - chunk_mono = resample_chunk_mono(chunk, stream.CHANNELS, stream.RATE, 16000) - engine.send_audio_frame(chunk_mono) - except KeyboardInterrupt: - break - - stream.close_stream() - engine.stop() - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Convert system audio stream to text') - # both - parser.add_argument('-e', '--caption_engine', default='gummy', help='Caption engine: gummy or vosk') - parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output, 1 for input') - parser.add_argument('-c', '--chunk_rate', default=20, help='Number of audio stream chunks collected per second') - parser.add_argument('-p', '--port', default=8080, help='The port to run the server on, 0 for no server') - # gummy - parser.add_argument('-s', '--source_language', default='en', help='Source language code') - parser.add_argument('-t', '--target_language', default='zh', help='Target language code') - parser.add_argument('-k', '--api_key', default='', help='API KEY for Gummy model') - # vosk - parser.add_argument('-m', '--model_path', default='', help='The path to the vosk model.') - - args = parser.parse_args() - if int(args.port) == 0: - thread_data.status = "running" - else: - start_server(int(args.port)) - - if args.caption_engine == 'gummy': - main_gummy( - args.source_language, - args.target_language, - int(args.audio_type), - int(args.chunk_rate), - args.api_key - ) - elif args.caption_engine == 'vosk': - main_vosk( - int(args.audio_type), - int(args.chunk_rate), - args.model_path - ) - else: - raise ValueError('Invalid caption engine specified.') - - if thread_data.status == "kill": - stdout_cmd('kill') \ No newline at end of file diff --git a/src/main/CaptionWindow.ts b/src/main/CaptionWindow.ts index e460e9e..cb74311 100644 --- a/src/main/CaptionWindow.ts +++ b/src/main/CaptionWindow.ts @@ -3,6 +3,7 @@ import path from 'path' import { is } from '@electron-toolkit/utils' import icon from '../../build/icon.png?asset' import { controlWindow } from './ControlWindow' +import { allConfig } from './utils/AllConfig' class CaptionWindow { window: BrowserWindow | undefined; @@ -10,7 +11,7 @@ class CaptionWindow { public createWindow(): void { this.window = new BrowserWindow({ icon: icon, - width: 900, + width: allConfig.captionWindowWidth, height: 100, minWidth: 480, show: false, @@ -30,6 +31,12 @@ class CaptionWindow { this.window?.show() }) + this.window.on('close', () => { + if(this.window) { + allConfig.captionWindowWidth = this.window?.getBounds().width; + } + }) + this.window.on('closed', () => { this.window = undefined }) diff --git a/src/main/index.ts b/src/main/index.ts index fd27cac..94039a9 100644 --- a/src/main/index.ts +++ b/src/main/index.ts @@ -25,7 +25,7 @@ app.whenReady().then(() => { }) app.on('will-quit', async () => { - captionEngine.stop() + captionEngine.kill() allConfig.writeConfig() }); diff --git a/src/main/utils/AllConfig.ts b/src/main/utils/AllConfig.ts index ae8f104..310564a 100644 --- a/src/main/utils/AllConfig.ts +++ b/src/main/utils/AllConfig.ts @@ -44,11 +44,14 @@ const defaultControls: Controls = { class AllConfig { + captionWindowWidth: number = 900; + uiLanguage: UILanguage = 'zh'; leftBarWidth: number = 8; uiTheme: UITheme = 'system'; styles: Styles = {...defaultStyles}; controls: Controls = {...defaultControls}; + lastLogIndex: number = -1; captionLog: CaptionItem[] = []; @@ -58,6 +61,7 @@ class AllConfig { const configPath = path.join(app.getPath('userData'), 'config.json') if(fs.existsSync(configPath)){ const config = JSON.parse(fs.readFileSync(configPath, 'utf-8')) + if(config.captionWindowWidth) this.captionWindowWidth = config.captionWindowWidth if(config.uiLanguage) this.uiLanguage = config.uiLanguage if(config.uiTheme) this.uiTheme = config.uiTheme if(config.leftBarWidth) this.leftBarWidth = config.leftBarWidth @@ -69,6 +73,7 @@ class AllConfig { public writeConfig() { const config = { + captionWindowWidth: this.captionWindowWidth, uiLanguage: this.uiLanguage, uiTheme: this.uiTheme, leftBarWidth: this.leftBarWidth, diff --git a/src/main/utils/CaptionEngine.ts b/src/main/utils/CaptionEngine.ts index 3d164e1..451ddfa 100644 --- a/src/main/utils/CaptionEngine.ts +++ b/src/main/utils/CaptionEngine.ts @@ -15,6 +15,7 @@ export class CaptionEngine { client: net.Socket | undefined port: number = 8080 status: 'running' | 'starting' | 'stopping' | 'stopped' = 'stopped' + timerID: NodeJS.Timeout | undefined private getApp(): boolean { if (allConfig.controls.customized) { @@ -160,6 +161,7 @@ export class CaptionEngine { controlWindow.window.webContents.send('control.engine.stopped') } this.status = 'stopped' + clearInterval(this.timerID) Log.info(`Engine exited with code ${code}`) }); } @@ -176,9 +178,15 @@ export class CaptionEngine { } this.status = 'stopping' Log.info('Caption engine process stopping...') + this.timerID = setTimeout(() => { + if(this.status !== 'stopping') return + Log.warn('Engine process still not stopped, trying to kill...') + this.kill() + }, 4000); } public kill(){ + if(!this.process || !this.process.pid) return if(this.status !== 'running'){ Log.warn('Trying to kill engine which is not running, current status:', this.status) }