feat(audio): 重构音频处理模块、音频流重采样测试成功

This commit is contained in:
himeditator
2025-07-07 00:47:36 +08:00
parent f2aa075e65
commit 0696651f04
11 changed files with 253 additions and 115 deletions

View File

@@ -1 +1 @@
from .streamchnl import mergeStreamChannels
from .process import mergeChunkChannels, resampleRawChunk

View File

@@ -0,0 +1,49 @@
import samplerate
import numpy as np
def mergeChunkChannels(chunk, channels):
"""
将当前多通道音频数据块转换为单通道音频数据块
Args:
chunk: (bytes)多通道音频数据块
channels: 通道数
Returns:
(bytes)单通道音频数据块
"""
# (length * channels,)
chunk_np = np.frombuffer(chunk, dtype=np.int16)
# (length, channels)
chunk_np = chunk_np.reshape(-1, channels)
# (length,)
chunk_mono_f = np.mean(chunk_np.astype(np.float32), axis=1)
chunk_mono = np.round(chunk_mono_f).astype(np.int16)
return chunk_mono.tobytes()
def resampleRawChunk(chunk, channels, orig_sr, target_sr, mode="sinc_best"):
"""
将当前多通道音频数据块转换成单通道音频数据块,然后进行重采样
Args:
chunk: (bytes)多通道音频数据块
channels: 通道数
orig_sr: 原始采样率
target_sr: 目标采样率
mode: 重采样模式,可选:'sinc_best' | 'sinc_medium' | 'sinc_fastest' | 'zero_order_hold' | 'linear'
Return:
(bytes)单通道音频数据块
"""
# (length * channels,)
chunk_np = np.frombuffer(chunk, dtype=np.int16)
# (length, channels)
chunk_np = chunk_np.reshape(-1, channels)
# (length,)
chunk_mono_f = np.mean(chunk_np.astype(np.float32), axis=1)
chunk_mono = chunk_mono_f.astype(np.int16)
ratio = target_sr / orig_sr
chunk_mono_r = samplerate.resample(chunk_mono, ratio, converter_type=mode)
chunk_mono_r = np.round(chunk_mono_r).astype(np.int16)
return chunk_mono_r.tobytes()

View File

@@ -1,22 +0,0 @@
import numpy as np
def mergeStreamChannels(data, channels):
"""
将当前多通道流数据合并为单通道流数据
Args:
data: 多通道数据
channels: 通道数
Returns:
mono_data_bytes: 单通道数据
"""
# (length * channels,)
data_np = np.frombuffer(data, dtype=np.int16)
# (length, channels)
data_np_r = data_np.reshape(-1, channels)
# (length,)
mono_data = np.mean(data_np_r.astype(np.float32), axis=1)
mono_data = mono_data.astype(np.int16)
mono_data_bytes = mono_data.tobytes()
return mono_data_bytes

View File

@@ -8,7 +8,7 @@ elif sys.platform == 'linux':
else:
raise NotImplementedError(f"Unsupported platform: {sys.platform}")
from audioprcs import mergeStreamChannels
from audioprcs import mergeChunkChannels
from audio2text import InvalidParameter, GummyTranslator
@@ -26,13 +26,13 @@ def convert_audio_to_text(s_lang, t_lang, audio_type):
while True:
try:
data = stream.read_chunk()
data = mergeStreamChannels(data, stream.CHANNELS)
chunk = stream.read_chunk()
chunk_mono = mergeChunkChannels(chunk, stream.CHANNELS)
try:
gummy.send_audio_frame(data)
gummy.send_audio_frame(chunk_mono)
except InvalidParameter:
gummy.start()
gummy.send_audio_frame(data)
gummy.send_audio_frame(chunk_mono)
except KeyboardInterrupt:
stream.closeStream()
gummy.stop()

View File

@@ -1,5 +1,6 @@
dashscope==1.23.5
numpy==2.2.6
samplerate==0.2.1
PyAudio==0.2.14
PyAudioWPatch==0.2.12.7 # Windows only
pyinstaller==6.14.1

View File

@@ -34,7 +34,7 @@ class AudioStream:
音频样本块大小:{self.CHUNK}
样本位宽:{self.SAMP_WIDTH}
音频数据格式:{self.FORMAT}
采样格式:{self.FORMAT}
音频通道数:{self.CHANNELS}
音频采样率:{self.RATE}
"""

View File

@@ -65,7 +65,7 @@ class AudioStream:
def printInfo(self):
dev_info = f"""
采样设备:
- 设备类型:{ "音频输" if self.audio_type == 0 else "音频输" }
- 设备类型:{ "音频输" if self.audio_type == 0 else "音频输" }
- 序号:{self.device['index']}
- 名称:{self.device['name']}
- 最大输入通道数:{self.device['maxInputChannels']}
@@ -76,7 +76,7 @@ class AudioStream:
音频样本块大小:{self.CHUNK}
样本位宽:{self.SAMP_WIDTH}
音频数据格式:{self.FORMAT}
采样格式:{self.FORMAT}
音频通道数:{self.CHANNELS}
音频采样率:{self.RATE}
"""