From 1c0bf1f9c4740b517492a60473594924b844e56a Mon Sep 17 00:00:00 2001
From: himeditator <hironin@foxmail.com>
Date: Sun, 3 Aug 2025 16:40:26 +0800
Subject: [PATCH] =?UTF-8?q?refactor(engine):=20=E4=BF=AE=E6=94=B9=E8=99=9A?=
 =?UTF-8?q?=E6=8B=9F=E7=8E=AF=E5=A2=83=E8=AE=BE=E7=BD=AE=EF=BC=8C=E4=BF=AE?=
 =?UTF-8?q?=E6=94=B9=E9=9F=B3=E9=A2=91=E5=B7=A5=E5=85=B7=E5=87=BD=E6=95=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 更新虚拟环境目录名为 .venv
- 调整音频块采集速率默认值为 10
- 为 AudioStream 类添加重设音频块大小的方法
- 更新依赖文件 requirements.txt
---
 .gitignore                      |  2 +-
 README.md                       | 21 ++++++++-------------
 README_en.md                    | 19 +++++++------------
 README_ja.md                    | 19 +++++++------------
 docs/engine-manual/en.md        |  2 +-
 docs/engine-manual/ja.md        |  2 +-
 docs/engine-manual/zh.md        |  2 +-
 engine/main.py                  |  2 +-
 engine/main.spec                |  4 ++--
 engine/requirements.txt         |  7 +++++++
 engine/requirements_darwin.txt  |  6 ------
 engine/requirements_linux.txt   |  5 -----
 engine/requirements_win.txt     |  6 ------
 engine/sysaudio/darwin.py       | 10 ++++++++--
 engine/sysaudio/linux.py        | 10 ++++++++--
 engine/sysaudio/win.py          | 10 ++++++++--
 engine/utils/__init__.py        |  7 ++++++-
 engine/utils/audioprcs.py       | 31 +++++++++++++++++++++++++++++++
 src/main/utils/CaptionEngine.ts |  4 ++--
 19 files changed, 99 insertions(+), 70 deletions(-)
 create mode 100644 engine/requirements.txt
 delete mode 100644 engine/requirements_darwin.txt
 delete mode 100644 engine/requirements_linux.txt
 delete mode 100644 engine/requirements_win.txt

diff --git a/.gitignore b/.gitignore
index 81ae53f..42e7241 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,7 +6,7 @@ out
 *.log*
 __pycache__
 .venv
-subenv
+test.py
 engine/build
 engine/models
 engine/notebook
diff --git a/README.md b/README.md
index 4ea7f7b..83e514f 100644
--- a/README.md
+++ b/README.md
@@ -129,29 +129,24 @@ npm install
 
 ```bash
 # in ./engine folder
-python -m venv subenv
+python -m venv .venv
 # or
-python3 -m venv subenv
+python3 -m venv .venv
 ```
 
 然后激活虚拟环境：
 
 ```bash
 # Windows
-subenv/Scripts/activate
+.venv/Scripts/activate
 # Linux or macOS
-source subenv/bin/activate
+source .venv/bin/activate
 ```
 
 然后安装依赖（这一步在 macOS 和 Linux 可能会报错，一般是因为构建失败，需要根据报错信息进行处理）：
 
 ```bash
-# Windows
-pip install -r requirements_win.txt
-# macOS
-pip install -r requirements_darwin.txt
-# Linux
-pip install -r requirements_linux.txt
+pip install -r requirements.txt
 ```
 
 如果在 Linux 系统上安装 `samplerate` 模块报错，可以尝试使用以下命令单独安装：
@@ -170,12 +165,12 @@ pyinstaller ./main.spec
 
 ```
 # Windows
-vosk_path = str(Path('./subenv/Lib/site-packages/vosk').resolve())
+vosk_path = str(Path('./.venv/Lib/site-packages/vosk').resolve())
 # Linux or macOS
-vosk_path = str(Path('./subenv/lib/python3.x/site-packages/vosk').resolve())
+vosk_path = str(Path('./.venv/lib/python3.x/site-packages/vosk').resolve())
 ```
 
-此时项目构建完成，在进入 `engine/dist` 文件夹可见对应的可执行文件。即可进行后续操作。
+此时项目构建完成，进入 `engine/dist` 文件夹可见对应的可执行文件。即可进行后续操作。
 
 ### 运行项目
 
diff --git a/README_en.md b/README_en.md
index c1fb451..6b5278c 100644
--- a/README_en.md
+++ b/README_en.md
@@ -129,29 +129,24 @@ First enter the `engine` folder and execute the following commands to create a v
 
 ```bash
 # in ./engine folder
-python -m venv subenv
+python -m venv .venv
 # or
-python3 -m venv subenv
+python3 -m venv .venv
 ```
 
 Then activate the virtual environment:
 
 ```bash
 # Windows
-subenv/Scripts/activate
+.venv/Scripts/activate
 # Linux or macOS
-source subenv/bin/activate
+source .venv/bin/activate
 ```
 
 Then install dependencies (this step might result in errors on macOS and Linux, usually due to build failures, and you need to handle them based on the error messages):
 
 ```bash
-# Windows
-pip install -r requirements_win.txt
-# macOS
-pip install -r requirements_darwin.txt
-# Linux
-pip install -r requirements_linux.txt
+pip install -r requirements.txt
 ```
 
 If you encounter errors when installing the `samplerate` module on Linux systems, you can try installing it separately with this command:
@@ -170,9 +165,9 @@ Note that the path to the `vosk` library in `main-vosk.spec` might be incorrect
 
 ```
 # Windows
-vosk_path = str(Path('./subenv/Lib/site-packages/vosk').resolve())
+vosk_path = str(Path('./.venv/Lib/site-packages/vosk').resolve())
 # Linux or macOS
-vosk_path = str(Path('./subenv/lib/python3.x/site-packages/vosk').resolve())
+vosk_path = str(Path('./.venv/lib/python3.x/site-packages/vosk').resolve())
 ```
 
 After the build completes, you can find the executable file in the `engine/dist` folder. Then proceed with subsequent operations.
diff --git a/README_ja.md b/README_ja.md
index f82c976..e16fb29 100644
--- a/README_ja.md
+++ b/README_ja.md
@@ -129,29 +129,24 @@ npm install
 
 ```bash
 # ./engine フォルダ内
-python -m venv subenv
+python -m venv .venv
 # または
-python3 -m venv subenv
+python3 -m venv .venv
 ```
 
 次に仮想環境をアクティブにします：
 
 ```bash
 # Windows
-subenv/Scripts/activate
+.venv/Scripts/activate
 # Linux または macOS
-source subenv/bin/activate
+source .venv/bin/activate
 ```
 
 次に依存関係をインストールします（このステップでは macOS と Linux でエラーが発生する可能性があります。通常はビルド失敗によるもので、エラーメッセージに基づいて対処する必要があります）：
 
 ```bash
-# Windows
-pip install -r requirements_win.txt
-# macOS
-pip install -r requirements_darwin.txt
-# Linux
-pip install -r requirements_linux.txt
+pip install -r requirements.txt
 ```
 
 Linux システムで `samplerate` モジュールのインストールに問題が発生した場合、以下のコマンドで個別にインストールを試すことができます：
@@ -170,9 +165,9 @@ pyinstaller ./main.spec
 
 ```
 # Windows
-vosk_path = str(Path('./subenv/Lib/site-packages/vosk').resolve())
+vosk_path = str(Path('./.venv/Lib/site-packages/vosk').resolve())
 # Linux または macOS
-vosk_path = str(Path('./subenv/lib/python3.x/site-packages/vosk').resolve())
+vosk_path = str(Path('./.venv/lib/python3.x/site-packages/vosk').resolve())
 ```
 
 これでプロジェクトのビルドが完了し、`engine/dist` フォルダ内に対応する実行可能ファイルが確認できます。その後、次の操作に進むことができます。
diff --git a/docs/engine-manual/en.md b/docs/engine-manual/en.md
index aa9afd0..226c6d0 100644
--- a/docs/engine-manual/en.md
+++ b/docs/engine-manual/en.md
@@ -155,7 +155,7 @@ if __name__ == "__main__":
     # Common parameters  
     parser.add_argument('-e', '--caption_engine', default='gummy', help='Caption engine: gummy or vosk')  
     parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output, 1 for input')  
-    parser.add_argument('-c', '--chunk_rate', default=20, help='Number of audio stream chunks collected per second')  
+    parser.add_argument('-c', '--chunk_rate', default=10, help='Number of audio stream chunks collected per second')  
     parser.add_argument('-p', '--port', default=8080, help='The port to run the server on, 0 for no server')  
     # Gummy-specific parameters  
     parser.add_argument('-s', '--source_language', default='en', help='Source language code')  
diff --git a/docs/engine-manual/ja.md b/docs/engine-manual/ja.md
index da5b8a3..fb1c093 100644
--- a/docs/engine-manual/ja.md
+++ b/docs/engine-manual/ja.md
@@ -157,7 +157,7 @@ if __name__ == "__main__":
     # 共通
     parser.add_argument('-e', '--caption_engine', default='gummy', help='字幕エンジン: gummyまたはvosk')
     parser.add_argument('-a', '--audio_type', default=0, help='オーディオストリームソース: 0は出力、1は入力')
-    parser.add_argument('-c', '--chunk_rate', default=20, help='1秒あたりに収集するオーディオストリームブロックの数')
+    parser.add_argument('-c', '--chunk_rate', default=10, help='1秒あたりに収集するオーディオストリームブロックの数')
     parser.add_argument('-p', '--port', default=8080, help='サーバーを実行するポート、0はサーバーなし')
     # gummy専用
     parser.add_argument('-s', '--source_language', default='en', help='ソース言語コード')
diff --git a/docs/engine-manual/zh.md b/docs/engine-manual/zh.md
index f3bae27..93df5f8 100644
--- a/docs/engine-manual/zh.md
+++ b/docs/engine-manual/zh.md
@@ -156,7 +156,7 @@ if __name__ == "__main__":
     # both
     parser.add_argument('-e', '--caption_engine', default='gummy', help='Caption engine: gummy or vosk')
     parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output, 1 for input')
-    parser.add_argument('-c', '--chunk_rate', default=20, help='Number of audio stream chunks collected per second')
+    parser.add_argument('-c', '--chunk_rate', default=10, help='Number of audio stream chunks collected per second')
     parser.add_argument('-p', '--port', default=8080, help='The port to run the server on, 0 for no server')
     # gummy only
     parser.add_argument('-s', '--source_language', default='en', help='Source language code')
diff --git a/engine/main.py b/engine/main.py
index 9b25b1a..063ed71 100644
--- a/engine/main.py
+++ b/engine/main.py
@@ -67,7 +67,7 @@ if __name__ == "__main__":
     # both
     parser.add_argument('-e', '--caption_engine', default='gummy', help='Caption engine: gummy or vosk')
     parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output, 1 for input')
-    parser.add_argument('-c', '--chunk_rate', default=20, help='Number of audio stream chunks collected per second')
+    parser.add_argument('-c', '--chunk_rate', default=10, help='Number of audio stream chunks collected per second')
     parser.add_argument('-p', '--port', default=8080, help='The port to run the server on, 0 for no server')
     # gummy only
     parser.add_argument('-s', '--source_language', default='en', help='Source language code')
diff --git a/engine/main.spec b/engine/main.spec
index 81fdd2e..5475c62 100644
--- a/engine/main.spec
+++ b/engine/main.spec
@@ -4,9 +4,9 @@ from pathlib import Path
 import sys
 
 if sys.platform == 'win32':
-    vosk_path = str(Path('./subenv/Lib/site-packages/vosk').resolve())
+    vosk_path = str(Path('./.venv/Lib/site-packages/vosk').resolve())
 else:
-    vosk_path = str(Path('./subenv/lib/python3.12/site-packages/vosk').resolve())
+    vosk_path = str(Path('./.venv/lib/python3.12/site-packages/vosk').resolve())
 
 a = Analysis(
     ['main.py'],
diff --git a/engine/requirements.txt b/engine/requirements.txt
new file mode 100644
index 0000000..894ed59
--- /dev/null
+++ b/engine/requirements.txt
@@ -0,0 +1,7 @@
+dashscope
+numpy
+samplerate
+vosk
+pyinstaller
+pyaudio; sys_platform == 'darwin'
+pyaudiowpatch; sys_platform == 'win32'
diff --git a/engine/requirements_darwin.txt b/engine/requirements_darwin.txt
deleted file mode 100644
index 0115a46..0000000
--- a/engine/requirements_darwin.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-dashscope
-numpy
-samplerate
-PyAudio
-vosk
-pyinstaller
diff --git a/engine/requirements_linux.txt b/engine/requirements_linux.txt
deleted file mode 100644
index 3574b4b..0000000
--- a/engine/requirements_linux.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-dashscope
-numpy
-vosk
-pyinstaller
-samplerate # pip install samplerate --only-binary=:all:
diff --git a/engine/requirements_win.txt b/engine/requirements_win.txt
deleted file mode 100644
index 46e134d..0000000
--- a/engine/requirements_win.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-dashscope
-numpy
-samplerate
-PyAudioWPatch
-vosk
-pyinstaller
diff --git a/engine/sysaudio/darwin.py b/engine/sysaudio/darwin.py
index a3a8d6e..6f32487 100644
--- a/engine/sysaudio/darwin.py
+++ b/engine/sysaudio/darwin.py
@@ -22,9 +22,9 @@ class AudioStream:
 
     初始化参数：
         audio_type: 0-系统音频输出流（需配合 BlackHole），1-系统音频输入流
-        chunk_rate: 每秒采集音频块的数量，默认为20
+        chunk_rate: 每秒采集音频块的数量，默认为10
     """
-    def __init__(self, audio_type=0, chunk_rate=20):
+    def __init__(self, audio_type=0, chunk_rate=10):
         self.audio_type = audio_type
         self.mic = pyaudio.PyAudio()
         if self.audio_type == 0:
@@ -40,6 +40,12 @@ class AudioStream:
         self.RATE = int(self.device["defaultSampleRate"])
         self.CHUNK = self.RATE // chunk_rate
 
+    def reset_chunk_size(self, chunk_size: int):
+        """
+        重新设置音频块大小
+        """
+        self.CHUNK = chunk_size
+
     def get_info(self):
         dev_info = f"""
         采样设备：
diff --git a/engine/sysaudio/linux.py b/engine/sysaudio/linux.py
index 0a5644a..4599674 100644
--- a/engine/sysaudio/linux.py
+++ b/engine/sysaudio/linux.py
@@ -41,9 +41,9 @@ class AudioStream:
 
     初始化参数：
         audio_type: 0-系统音频输出流（不支持，不会生效），1-系统音频输入流（默认）
-        chunk_rate: 每秒采集音频块的数量，默认为20
+        chunk_rate: 每秒采集音频块的数量，默认为10
     """
-    def __init__(self, audio_type=1,  chunk_rate=20):
+    def __init__(self, audio_type=1,  chunk_rate=10):
         self.audio_type = audio_type
 
         if self.audio_type == 0:
@@ -58,6 +58,12 @@ class AudioStream:
         self.RATE = 48000
         self.CHUNK = self.RATE // chunk_rate
 
+    def reset_chunk_size(self, chunk_size: int):
+        """
+        重新设置音频块大小
+        """
+        self.CHUNK = chunk_size
+
     def get_info(self):
         dev_info = f"""
         音频捕获进程：
diff --git a/engine/sysaudio/win.py b/engine/sysaudio/win.py
index 247b434..fcf5b49 100644
--- a/engine/sysaudio/win.py
+++ b/engine/sysaudio/win.py
@@ -46,9 +46,9 @@ class AudioStream:
 
     初始化参数：
         audio_type: 0-系统音频输出流（默认），1-系统音频输入流
-        chunk_rate: 每秒采集音频块的数量，默认为20
+        chunk_rate: 每秒采集音频块的数量，默认为10
     """
-    def __init__(self, audio_type=0, chunk_rate=20):
+    def __init__(self, audio_type=0, chunk_rate=10, chunk_size=-1):
         self.audio_type = audio_type
         self.mic = pyaudio.PyAudio()
         if self.audio_type == 0:
@@ -64,6 +64,12 @@ class AudioStream:
         self.RATE = int(self.device["defaultSampleRate"])
         self.CHUNK = self.RATE // chunk_rate
 
+    def reset_chunk_size(self, chunk_size: int):
+        """
+        重新设置音频块大小
+        """
+        self.CHUNK = chunk_size
+
     def get_info(self):
         dev_info = f"""
         采样设备：
diff --git a/engine/utils/__init__.py b/engine/utils/__init__.py
index 5de3464..35cf6d1 100644
--- a/engine/utils/__init__.py
+++ b/engine/utils/__init__.py
@@ -1,4 +1,9 @@
-from .audioprcs import merge_chunk_channels, resample_chunk_mono, resample_mono_chunk
+from .audioprcs import (
+    merge_chunk_channels,
+    resample_chunk_mono,
+    resample_chunk_mono_np,
+    resample_mono_chunk
+)
 from .sysout import stdout, stdout_cmd, stdout_obj, stderr
 from .thdata import thread_data
 from .server import start_server
\ No newline at end of file
diff --git a/engine/utils/audioprcs.py b/engine/utils/audioprcs.py
index e083c5e..1169bb8 100644
--- a/engine/utils/audioprcs.py
+++ b/engine/utils/audioprcs.py
@@ -55,6 +55,37 @@ def resample_chunk_mono(chunk: bytes, channels: int, orig_sr: int, target_sr: in
     return chunk_mono_r.tobytes()
 
 
+def resample_chunk_mono_np(chunk: bytes, channels: int, orig_sr: int, target_sr: int, mode="sinc_best") -> np.ndarray:
+    """
+    将当前多通道音频数据块转换成单通道音频数据块，然后进行重采样，返回 Numpy 数组
+
+    Args:
+        chunk: 多通道音频数据块
+        channels: 通道数
+        orig_sr: 原始采样率
+        target_sr: 目标采样率
+        mode: 重采样模式，可选：'sinc_best' | 'sinc_medium' | 'sinc_fastest' | 'zero_order_hold' | 'linear'
+
+    Return:
+        单通道音频数据块
+    """
+    if channels == 1:
+        chunk_mono = np.frombuffer(chunk, dtype=np.int16)
+        chunk_mono = chunk_mono.astype(np.float32)
+    else:
+        # (length * channels,)
+        chunk_np = np.frombuffer(chunk, dtype=np.int16)
+        # (length, channels)
+        chunk_np = chunk_np.reshape(-1, channels)
+        # (length,)
+        chunk_mono = np.mean(chunk_np.astype(np.float32), axis=1)
+
+    ratio = target_sr / orig_sr
+    chunk_mono_r = samplerate.resample(chunk_mono, ratio, converter_type=mode)
+    chunk_mono_r = np.round(chunk_mono_r).astype(np.int16)
+    return chunk_mono_r
+
+
 def resample_mono_chunk(chunk: bytes, orig_sr: int, target_sr: int, mode="sinc_best") -> bytes:
     """
     将当前单通道音频块进行重采样
diff --git a/src/main/utils/CaptionEngine.ts b/src/main/utils/CaptionEngine.ts
index 451ddfa..2ea1a8a 100644
--- a/src/main/utils/CaptionEngine.ts
+++ b/src/main/utils/CaptionEngine.ts
@@ -37,7 +37,7 @@ export class CaptionEngine {
         if(process.platform === "win32") {
           this.appPath = path.join(
             app.getAppPath(), 'engine',
-            'subenv', 'Scripts', 'python.exe'
+            '.venv', 'Scripts', 'python.exe'
           )
           this.command.push(path.join(
             app.getAppPath(), 'engine', 'main.py'
@@ -47,7 +47,7 @@ export class CaptionEngine {
         else {
           this.appPath = path.join(
             app.getAppPath(), 'engine',
-            'subenv', 'bin', 'python3'
+            '.venv', 'bin', 'python3'
           )
           this.command.push(path.join(
             app.getAppPath(), 'engine', 'main.py'