refactor(engine): 修改虚拟环境设置，修改音频工具函数

- 更新虚拟环境目录名为 .venv - 调整音频块采集速率默认值为 10 - 为 AudioStream 类添加重设音频块大小的方法 - 更新依赖文件 requirements.txt
2026-02-04 04:14:42 +08:00 · 2025-08-03 16:40:26 +08:00
parent 38b4b15cec
commit 1c0bf1f9c4
19 changed files with 99 additions and 70 deletions
--- a/engine/main.py
+++ b/engine/main.py
@@ -67,7 +67,7 @@ if __name__ == "__main__":
    # both
    parser.add_argument('-e', '--caption_engine', default='gummy', help='Caption engine: gummy or vosk')
    parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output, 1 for input')
-    parser.add_argument('-c', '--chunk_rate', default=20, help='Number of audio stream chunks collected per second')
+    parser.add_argument('-c', '--chunk_rate', default=10, help='Number of audio stream chunks collected per second')
    parser.add_argument('-p', '--port', default=8080, help='The port to run the server on, 0 for no server')
    # gummy only
    parser.add_argument('-s', '--source_language', default='en', help='Source language code')
--- a/engine/main.spec
+++ b/engine/main.spec
@@ -4,9 +4,9 @@ from pathlib import Path
 import sys

 if sys.platform == 'win32':
-    vosk_path = str(Path('./subenv/Lib/site-packages/vosk').resolve())
+    vosk_path = str(Path('./.venv/Lib/site-packages/vosk').resolve())
 else:
-    vosk_path = str(Path('./subenv/lib/python3.12/site-packages/vosk').resolve())
+    vosk_path = str(Path('./.venv/lib/python3.12/site-packages/vosk').resolve())

 a = Analysis(
    ['main.py'],
--- a/engine/requirements.txt
+++ b/engine/requirements.txt
@@ -0,0 +1,7 @@
+dashscope
+numpy
+samplerate
+vosk
+pyinstaller
+pyaudio; sys_platform == 'darwin'
+pyaudiowpatch; sys_platform == 'win32'
--- a/engine/requirements_darwin.txt
+++ b/engine/requirements_darwin.txt
@@ -1,6 +0,0 @@
-dashscope
-numpy
-samplerate
-PyAudio
-vosk
-pyinstaller
--- a/engine/requirements_linux.txt
+++ b/engine/requirements_linux.txt
@@ -1,5 +0,0 @@
-dashscope
-numpy
-vosk
-pyinstaller
-samplerate # pip install samplerate --only-binary=:all:
--- a/engine/requirements_win.txt
+++ b/engine/requirements_win.txt
@@ -1,6 +0,0 @@
-dashscope
-numpy
-samplerate
-PyAudioWPatch
-vosk
-pyinstaller
--- a/engine/sysaudio/darwin.py
+++ b/engine/sysaudio/darwin.py
@@ -22,9 +22,9 @@ class AudioStream:

    初始化参数：
        audio_type: 0-系统音频输出流（需配合 BlackHole），1-系统音频输入流
-        chunk_rate: 每秒采集音频块的数量，默认为20
+        chunk_rate: 每秒采集音频块的数量，默认为10
    """
-    def __init__(self, audio_type=0, chunk_rate=20):
+    def __init__(self, audio_type=0, chunk_rate=10):
        self.audio_type = audio_type
        self.mic = pyaudio.PyAudio()
        if self.audio_type == 0:
@@ -40,6 +40,12 @@ class AudioStream:
        self.RATE = int(self.device["defaultSampleRate"])
        self.CHUNK = self.RATE // chunk_rate

+    def reset_chunk_size(self, chunk_size: int):
+        """
+        重新设置音频块大小
+        """
+        self.CHUNK = chunk_size
+
    def get_info(self):
        dev_info = f"""
        采样设备：
--- a/engine/sysaudio/linux.py
+++ b/engine/sysaudio/linux.py
@@ -41,9 +41,9 @@ class AudioStream:

    初始化参数：
        audio_type: 0-系统音频输出流（不支持，不会生效），1-系统音频输入流（默认）
-        chunk_rate: 每秒采集音频块的数量，默认为20
+        chunk_rate: 每秒采集音频块的数量，默认为10
    """
-    def __init__(self, audio_type=1,  chunk_rate=20):
+    def __init__(self, audio_type=1,  chunk_rate=10):
        self.audio_type = audio_type

        if self.audio_type == 0:
@@ -58,6 +58,12 @@ class AudioStream:
        self.RATE = 48000
        self.CHUNK = self.RATE // chunk_rate

+    def reset_chunk_size(self, chunk_size: int):
+        """
+        重新设置音频块大小
+        """
+        self.CHUNK = chunk_size
+
    def get_info(self):
        dev_info = f"""
        音频捕获进程：
--- a/engine/sysaudio/win.py
+++ b/engine/sysaudio/win.py
@@ -46,9 +46,9 @@ class AudioStream:

    初始化参数：
        audio_type: 0-系统音频输出流（默认），1-系统音频输入流
-        chunk_rate: 每秒采集音频块的数量，默认为20
+        chunk_rate: 每秒采集音频块的数量，默认为10
    """
-    def __init__(self, audio_type=0, chunk_rate=20):
+    def __init__(self, audio_type=0, chunk_rate=10, chunk_size=-1):
        self.audio_type = audio_type
        self.mic = pyaudio.PyAudio()
        if self.audio_type == 0:
@@ -64,6 +64,12 @@ class AudioStream:
        self.RATE = int(self.device["defaultSampleRate"])
        self.CHUNK = self.RATE // chunk_rate

+    def reset_chunk_size(self, chunk_size: int):
+        """
+        重新设置音频块大小
+        """
+        self.CHUNK = chunk_size
+
    def get_info(self):
        dev_info = f"""
        采样设备：
--- a/engine/utils/init.py
+++ b/engine/utils/init.py
@@ -1,4 +1,9 @@
-from .audioprcs import merge_chunk_channels, resample_chunk_mono, resample_mono_chunk
+from .audioprcs import (
+    merge_chunk_channels,
+    resample_chunk_mono,
+    resample_chunk_mono_np,
+    resample_mono_chunk
+)
 from .sysout import stdout, stdout_cmd, stdout_obj, stderr
 from .thdata import thread_data
 from .server import start_server
--- a/engine/utils/audioprcs.py
+++ b/engine/utils/audioprcs.py
@@ -55,6 +55,37 @@ def resample_chunk_mono(chunk: bytes, channels: int, orig_sr: int, target_sr: in
    return chunk_mono_r.tobytes()


+def resample_chunk_mono_np(chunk: bytes, channels: int, orig_sr: int, target_sr: int, mode="sinc_best") -> np.ndarray:
+    """
+    将当前多通道音频数据块转换成单通道音频数据块，然后进行重采样，返回 Numpy 数组
+
+    Args:
+        chunk: 多通道音频数据块
+        channels: 通道数
+        orig_sr: 原始采样率
+        target_sr: 目标采样率
+        mode: 重采样模式，可选：'sinc_best' | 'sinc_medium' | 'sinc_fastest' | 'zero_order_hold' | 'linear'
+
+    Return:
+        单通道音频数据块
+    """
+    if channels == 1:
+        chunk_mono = np.frombuffer(chunk, dtype=np.int16)
+        chunk_mono = chunk_mono.astype(np.float32)
+    else:
+        # (length * channels,)
+        chunk_np = np.frombuffer(chunk, dtype=np.int16)
+        # (length, channels)
+        chunk_np = chunk_np.reshape(-1, channels)
+        # (length,)
+        chunk_mono = np.mean(chunk_np.astype(np.float32), axis=1)
+
+    ratio = target_sr / orig_sr
+    chunk_mono_r = samplerate.resample(chunk_mono, ratio, converter_type=mode)
+    chunk_mono_r = np.round(chunk_mono_r).astype(np.int16)
+    return chunk_mono_r
+
+
 def resample_mono_chunk(chunk: bytes, orig_sr: int, target_sr: int, mode="sinc_best") -> bytes:
    """
    将当前单通道音频块进行重采样