From 1c0bf1f9c4740b517492a60473594924b844e56a Mon Sep 17 00:00:00 2001 From: himeditator Date: Sun, 3 Aug 2025 16:40:26 +0800 Subject: [PATCH] =?UTF-8?q?refactor(engine):=20=E4=BF=AE=E6=94=B9=E8=99=9A?= =?UTF-8?q?=E6=8B=9F=E7=8E=AF=E5=A2=83=E8=AE=BE=E7=BD=AE=EF=BC=8C=E4=BF=AE?= =?UTF-8?q?=E6=94=B9=E9=9F=B3=E9=A2=91=E5=B7=A5=E5=85=B7=E5=87=BD=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 更新虚拟环境目录名为 .venv - 调整音频块采集速率默认值为 10 - 为 AudioStream 类添加重设音频块大小的方法 - 更新依赖文件 requirements.txt --- .gitignore | 2 +- README.md | 21 ++++++++------------- README_en.md | 19 +++++++------------ README_ja.md | 19 +++++++------------ docs/engine-manual/en.md | 2 +- docs/engine-manual/ja.md | 2 +- docs/engine-manual/zh.md | 2 +- engine/main.py | 2 +- engine/main.spec | 4 ++-- engine/requirements.txt | 7 +++++++ engine/requirements_darwin.txt | 6 ------ engine/requirements_linux.txt | 5 ----- engine/requirements_win.txt | 6 ------ engine/sysaudio/darwin.py | 10 ++++++++-- engine/sysaudio/linux.py | 10 ++++++++-- engine/sysaudio/win.py | 10 ++++++++-- engine/utils/__init__.py | 7 ++++++- engine/utils/audioprcs.py | 31 +++++++++++++++++++++++++++++++ src/main/utils/CaptionEngine.ts | 4 ++-- 19 files changed, 99 insertions(+), 70 deletions(-) create mode 100644 engine/requirements.txt delete mode 100644 engine/requirements_darwin.txt delete mode 100644 engine/requirements_linux.txt delete mode 100644 engine/requirements_win.txt diff --git a/.gitignore b/.gitignore index 81ae53f..42e7241 100644 --- a/.gitignore +++ b/.gitignore @@ -6,7 +6,7 @@ out *.log* __pycache__ .venv -subenv +test.py engine/build engine/models engine/notebook diff --git a/README.md b/README.md index 4ea7f7b..83e514f 100644 --- a/README.md +++ b/README.md @@ -129,29 +129,24 @@ npm install ```bash # in ./engine folder -python -m venv subenv +python -m venv .venv # or -python3 -m venv subenv +python3 -m venv .venv ``` 然后激活虚拟环境: ```bash # Windows -subenv/Scripts/activate +.venv/Scripts/activate # Linux or macOS -source subenv/bin/activate +source .venv/bin/activate ``` 然后安装依赖(这一步在 macOS 和 Linux 可能会报错,一般是因为构建失败,需要根据报错信息进行处理): ```bash -# Windows -pip install -r requirements_win.txt -# macOS -pip install -r requirements_darwin.txt -# Linux -pip install -r requirements_linux.txt +pip install -r requirements.txt ``` 如果在 Linux 系统上安装 `samplerate` 模块报错,可以尝试使用以下命令单独安装: @@ -170,12 +165,12 @@ pyinstaller ./main.spec ``` # Windows -vosk_path = str(Path('./subenv/Lib/site-packages/vosk').resolve()) +vosk_path = str(Path('./.venv/Lib/site-packages/vosk').resolve()) # Linux or macOS -vosk_path = str(Path('./subenv/lib/python3.x/site-packages/vosk').resolve()) +vosk_path = str(Path('./.venv/lib/python3.x/site-packages/vosk').resolve()) ``` -此时项目构建完成,在进入 `engine/dist` 文件夹可见对应的可执行文件。即可进行后续操作。 +此时项目构建完成,进入 `engine/dist` 文件夹可见对应的可执行文件。即可进行后续操作。 ### 运行项目 diff --git a/README_en.md b/README_en.md index c1fb451..6b5278c 100644 --- a/README_en.md +++ b/README_en.md @@ -129,29 +129,24 @@ First enter the `engine` folder and execute the following commands to create a v ```bash # in ./engine folder -python -m venv subenv +python -m venv .venv # or -python3 -m venv subenv +python3 -m venv .venv ``` Then activate the virtual environment: ```bash # Windows -subenv/Scripts/activate +.venv/Scripts/activate # Linux or macOS -source subenv/bin/activate +source .venv/bin/activate ``` Then install dependencies (this step might result in errors on macOS and Linux, usually due to build failures, and you need to handle them based on the error messages): ```bash -# Windows -pip install -r requirements_win.txt -# macOS -pip install -r requirements_darwin.txt -# Linux -pip install -r requirements_linux.txt +pip install -r requirements.txt ``` If you encounter errors when installing the `samplerate` module on Linux systems, you can try installing it separately with this command: @@ -170,9 +165,9 @@ Note that the path to the `vosk` library in `main-vosk.spec` might be incorrect ``` # Windows -vosk_path = str(Path('./subenv/Lib/site-packages/vosk').resolve()) +vosk_path = str(Path('./.venv/Lib/site-packages/vosk').resolve()) # Linux or macOS -vosk_path = str(Path('./subenv/lib/python3.x/site-packages/vosk').resolve()) +vosk_path = str(Path('./.venv/lib/python3.x/site-packages/vosk').resolve()) ``` After the build completes, you can find the executable file in the `engine/dist` folder. Then proceed with subsequent operations. diff --git a/README_ja.md b/README_ja.md index f82c976..e16fb29 100644 --- a/README_ja.md +++ b/README_ja.md @@ -129,29 +129,24 @@ npm install ```bash # ./engine フォルダ内 -python -m venv subenv +python -m venv .venv # または -python3 -m venv subenv +python3 -m venv .venv ``` 次に仮想環境をアクティブにします: ```bash # Windows -subenv/Scripts/activate +.venv/Scripts/activate # Linux または macOS -source subenv/bin/activate +source .venv/bin/activate ``` 次に依存関係をインストールします(このステップでは macOS と Linux でエラーが発生する可能性があります。通常はビルド失敗によるもので、エラーメッセージに基づいて対処する必要があります): ```bash -# Windows -pip install -r requirements_win.txt -# macOS -pip install -r requirements_darwin.txt -# Linux -pip install -r requirements_linux.txt +pip install -r requirements.txt ``` Linux システムで `samplerate` モジュールのインストールに問題が発生した場合、以下のコマンドで個別にインストールを試すことができます: @@ -170,9 +165,9 @@ pyinstaller ./main.spec ``` # Windows -vosk_path = str(Path('./subenv/Lib/site-packages/vosk').resolve()) +vosk_path = str(Path('./.venv/Lib/site-packages/vosk').resolve()) # Linux または macOS -vosk_path = str(Path('./subenv/lib/python3.x/site-packages/vosk').resolve()) +vosk_path = str(Path('./.venv/lib/python3.x/site-packages/vosk').resolve()) ``` これでプロジェクトのビルドが完了し、`engine/dist` フォルダ内に対応する実行可能ファイルが確認できます。その後、次の操作に進むことができます。 diff --git a/docs/engine-manual/en.md b/docs/engine-manual/en.md index aa9afd0..226c6d0 100644 --- a/docs/engine-manual/en.md +++ b/docs/engine-manual/en.md @@ -155,7 +155,7 @@ if __name__ == "__main__": # Common parameters parser.add_argument('-e', '--caption_engine', default='gummy', help='Caption engine: gummy or vosk') parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output, 1 for input') - parser.add_argument('-c', '--chunk_rate', default=20, help='Number of audio stream chunks collected per second') + parser.add_argument('-c', '--chunk_rate', default=10, help='Number of audio stream chunks collected per second') parser.add_argument('-p', '--port', default=8080, help='The port to run the server on, 0 for no server') # Gummy-specific parameters parser.add_argument('-s', '--source_language', default='en', help='Source language code') diff --git a/docs/engine-manual/ja.md b/docs/engine-manual/ja.md index da5b8a3..fb1c093 100644 --- a/docs/engine-manual/ja.md +++ b/docs/engine-manual/ja.md @@ -157,7 +157,7 @@ if __name__ == "__main__": # 共通 parser.add_argument('-e', '--caption_engine', default='gummy', help='字幕エンジン: gummyまたはvosk') parser.add_argument('-a', '--audio_type', default=0, help='オーディオストリームソース: 0は出力、1は入力') - parser.add_argument('-c', '--chunk_rate', default=20, help='1秒あたりに収集するオーディオストリームブロックの数') + parser.add_argument('-c', '--chunk_rate', default=10, help='1秒あたりに収集するオーディオストリームブロックの数') parser.add_argument('-p', '--port', default=8080, help='サーバーを実行するポート、0はサーバーなし') # gummy専用 parser.add_argument('-s', '--source_language', default='en', help='ソース言語コード') diff --git a/docs/engine-manual/zh.md b/docs/engine-manual/zh.md index f3bae27..93df5f8 100644 --- a/docs/engine-manual/zh.md +++ b/docs/engine-manual/zh.md @@ -156,7 +156,7 @@ if __name__ == "__main__": # both parser.add_argument('-e', '--caption_engine', default='gummy', help='Caption engine: gummy or vosk') parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output, 1 for input') - parser.add_argument('-c', '--chunk_rate', default=20, help='Number of audio stream chunks collected per second') + parser.add_argument('-c', '--chunk_rate', default=10, help='Number of audio stream chunks collected per second') parser.add_argument('-p', '--port', default=8080, help='The port to run the server on, 0 for no server') # gummy only parser.add_argument('-s', '--source_language', default='en', help='Source language code') diff --git a/engine/main.py b/engine/main.py index 9b25b1a..063ed71 100644 --- a/engine/main.py +++ b/engine/main.py @@ -67,7 +67,7 @@ if __name__ == "__main__": # both parser.add_argument('-e', '--caption_engine', default='gummy', help='Caption engine: gummy or vosk') parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output, 1 for input') - parser.add_argument('-c', '--chunk_rate', default=20, help='Number of audio stream chunks collected per second') + parser.add_argument('-c', '--chunk_rate', default=10, help='Number of audio stream chunks collected per second') parser.add_argument('-p', '--port', default=8080, help='The port to run the server on, 0 for no server') # gummy only parser.add_argument('-s', '--source_language', default='en', help='Source language code') diff --git a/engine/main.spec b/engine/main.spec index 81fdd2e..5475c62 100644 --- a/engine/main.spec +++ b/engine/main.spec @@ -4,9 +4,9 @@ from pathlib import Path import sys if sys.platform == 'win32': - vosk_path = str(Path('./subenv/Lib/site-packages/vosk').resolve()) + vosk_path = str(Path('./.venv/Lib/site-packages/vosk').resolve()) else: - vosk_path = str(Path('./subenv/lib/python3.12/site-packages/vosk').resolve()) + vosk_path = str(Path('./.venv/lib/python3.12/site-packages/vosk').resolve()) a = Analysis( ['main.py'], diff --git a/engine/requirements.txt b/engine/requirements.txt new file mode 100644 index 0000000..894ed59 --- /dev/null +++ b/engine/requirements.txt @@ -0,0 +1,7 @@ +dashscope +numpy +samplerate +vosk +pyinstaller +pyaudio; sys_platform == 'darwin' +pyaudiowpatch; sys_platform == 'win32' diff --git a/engine/requirements_darwin.txt b/engine/requirements_darwin.txt deleted file mode 100644 index 0115a46..0000000 --- a/engine/requirements_darwin.txt +++ /dev/null @@ -1,6 +0,0 @@ -dashscope -numpy -samplerate -PyAudio -vosk -pyinstaller diff --git a/engine/requirements_linux.txt b/engine/requirements_linux.txt deleted file mode 100644 index 3574b4b..0000000 --- a/engine/requirements_linux.txt +++ /dev/null @@ -1,5 +0,0 @@ -dashscope -numpy -vosk -pyinstaller -samplerate # pip install samplerate --only-binary=:all: diff --git a/engine/requirements_win.txt b/engine/requirements_win.txt deleted file mode 100644 index 46e134d..0000000 --- a/engine/requirements_win.txt +++ /dev/null @@ -1,6 +0,0 @@ -dashscope -numpy -samplerate -PyAudioWPatch -vosk -pyinstaller diff --git a/engine/sysaudio/darwin.py b/engine/sysaudio/darwin.py index a3a8d6e..6f32487 100644 --- a/engine/sysaudio/darwin.py +++ b/engine/sysaudio/darwin.py @@ -22,9 +22,9 @@ class AudioStream: 初始化参数: audio_type: 0-系统音频输出流(需配合 BlackHole),1-系统音频输入流 - chunk_rate: 每秒采集音频块的数量,默认为20 + chunk_rate: 每秒采集音频块的数量,默认为10 """ - def __init__(self, audio_type=0, chunk_rate=20): + def __init__(self, audio_type=0, chunk_rate=10): self.audio_type = audio_type self.mic = pyaudio.PyAudio() if self.audio_type == 0: @@ -40,6 +40,12 @@ class AudioStream: self.RATE = int(self.device["defaultSampleRate"]) self.CHUNK = self.RATE // chunk_rate + def reset_chunk_size(self, chunk_size: int): + """ + 重新设置音频块大小 + """ + self.CHUNK = chunk_size + def get_info(self): dev_info = f""" 采样设备: diff --git a/engine/sysaudio/linux.py b/engine/sysaudio/linux.py index 0a5644a..4599674 100644 --- a/engine/sysaudio/linux.py +++ b/engine/sysaudio/linux.py @@ -41,9 +41,9 @@ class AudioStream: 初始化参数: audio_type: 0-系统音频输出流(不支持,不会生效),1-系统音频输入流(默认) - chunk_rate: 每秒采集音频块的数量,默认为20 + chunk_rate: 每秒采集音频块的数量,默认为10 """ - def __init__(self, audio_type=1, chunk_rate=20): + def __init__(self, audio_type=1, chunk_rate=10): self.audio_type = audio_type if self.audio_type == 0: @@ -58,6 +58,12 @@ class AudioStream: self.RATE = 48000 self.CHUNK = self.RATE // chunk_rate + def reset_chunk_size(self, chunk_size: int): + """ + 重新设置音频块大小 + """ + self.CHUNK = chunk_size + def get_info(self): dev_info = f""" 音频捕获进程: diff --git a/engine/sysaudio/win.py b/engine/sysaudio/win.py index 247b434..fcf5b49 100644 --- a/engine/sysaudio/win.py +++ b/engine/sysaudio/win.py @@ -46,9 +46,9 @@ class AudioStream: 初始化参数: audio_type: 0-系统音频输出流(默认),1-系统音频输入流 - chunk_rate: 每秒采集音频块的数量,默认为20 + chunk_rate: 每秒采集音频块的数量,默认为10 """ - def __init__(self, audio_type=0, chunk_rate=20): + def __init__(self, audio_type=0, chunk_rate=10, chunk_size=-1): self.audio_type = audio_type self.mic = pyaudio.PyAudio() if self.audio_type == 0: @@ -64,6 +64,12 @@ class AudioStream: self.RATE = int(self.device["defaultSampleRate"]) self.CHUNK = self.RATE // chunk_rate + def reset_chunk_size(self, chunk_size: int): + """ + 重新设置音频块大小 + """ + self.CHUNK = chunk_size + def get_info(self): dev_info = f""" 采样设备: diff --git a/engine/utils/__init__.py b/engine/utils/__init__.py index 5de3464..35cf6d1 100644 --- a/engine/utils/__init__.py +++ b/engine/utils/__init__.py @@ -1,4 +1,9 @@ -from .audioprcs import merge_chunk_channels, resample_chunk_mono, resample_mono_chunk +from .audioprcs import ( + merge_chunk_channels, + resample_chunk_mono, + resample_chunk_mono_np, + resample_mono_chunk +) from .sysout import stdout, stdout_cmd, stdout_obj, stderr from .thdata import thread_data from .server import start_server \ No newline at end of file diff --git a/engine/utils/audioprcs.py b/engine/utils/audioprcs.py index e083c5e..1169bb8 100644 --- a/engine/utils/audioprcs.py +++ b/engine/utils/audioprcs.py @@ -55,6 +55,37 @@ def resample_chunk_mono(chunk: bytes, channels: int, orig_sr: int, target_sr: in return chunk_mono_r.tobytes() +def resample_chunk_mono_np(chunk: bytes, channels: int, orig_sr: int, target_sr: int, mode="sinc_best") -> np.ndarray: + """ + 将当前多通道音频数据块转换成单通道音频数据块,然后进行重采样,返回 Numpy 数组 + + Args: + chunk: 多通道音频数据块 + channels: 通道数 + orig_sr: 原始采样率 + target_sr: 目标采样率 + mode: 重采样模式,可选:'sinc_best' | 'sinc_medium' | 'sinc_fastest' | 'zero_order_hold' | 'linear' + + Return: + 单通道音频数据块 + """ + if channels == 1: + chunk_mono = np.frombuffer(chunk, dtype=np.int16) + chunk_mono = chunk_mono.astype(np.float32) + else: + # (length * channels,) + chunk_np = np.frombuffer(chunk, dtype=np.int16) + # (length, channels) + chunk_np = chunk_np.reshape(-1, channels) + # (length,) + chunk_mono = np.mean(chunk_np.astype(np.float32), axis=1) + + ratio = target_sr / orig_sr + chunk_mono_r = samplerate.resample(chunk_mono, ratio, converter_type=mode) + chunk_mono_r = np.round(chunk_mono_r).astype(np.int16) + return chunk_mono_r + + def resample_mono_chunk(chunk: bytes, orig_sr: int, target_sr: int, mode="sinc_best") -> bytes: """ 将当前单通道音频块进行重采样 diff --git a/src/main/utils/CaptionEngine.ts b/src/main/utils/CaptionEngine.ts index 451ddfa..2ea1a8a 100644 --- a/src/main/utils/CaptionEngine.ts +++ b/src/main/utils/CaptionEngine.ts @@ -37,7 +37,7 @@ export class CaptionEngine { if(process.platform === "win32") { this.appPath = path.join( app.getAppPath(), 'engine', - 'subenv', 'Scripts', 'python.exe' + '.venv', 'Scripts', 'python.exe' ) this.command.push(path.join( app.getAppPath(), 'engine', 'main.py' @@ -47,7 +47,7 @@ export class CaptionEngine { else { this.appPath = path.join( app.getAppPath(), 'engine', - 'subenv', 'bin', 'python3' + '.venv', 'bin', 'python3' ) this.command.push(path.join( app.getAppPath(), 'engine', 'main.py'