mirror of
https://github.com/HiMeditator/auto-caption.git
synced 2026-03-07 03:47:29 +08:00
feat(audio): 重构音频处理模块、音频流重采样测试成功
This commit is contained in:
@@ -1 +1 @@
|
||||
from .streamchnl import mergeStreamChannels
|
||||
from .process import mergeChunkChannels, resampleRawChunk
|
||||
|
||||
49
caption-engine/audioprcs/process.py
Normal file
49
caption-engine/audioprcs/process.py
Normal file
@@ -0,0 +1,49 @@
|
||||
import samplerate
|
||||
import numpy as np
|
||||
|
||||
def mergeChunkChannels(chunk, channels):
|
||||
"""
|
||||
将当前多通道音频数据块转换为单通道音频数据块
|
||||
|
||||
Args:
|
||||
chunk: (bytes)多通道音频数据块
|
||||
channels: 通道数
|
||||
|
||||
Returns:
|
||||
(bytes)单通道音频数据块
|
||||
"""
|
||||
# (length * channels,)
|
||||
chunk_np = np.frombuffer(chunk, dtype=np.int16)
|
||||
# (length, channels)
|
||||
chunk_np = chunk_np.reshape(-1, channels)
|
||||
# (length,)
|
||||
chunk_mono_f = np.mean(chunk_np.astype(np.float32), axis=1)
|
||||
chunk_mono = np.round(chunk_mono_f).astype(np.int16)
|
||||
return chunk_mono.tobytes()
|
||||
|
||||
|
||||
def resampleRawChunk(chunk, channels, orig_sr, target_sr, mode="sinc_best"):
|
||||
"""
|
||||
将当前多通道音频数据块转换成单通道音频数据块,然后进行重采样
|
||||
|
||||
Args:
|
||||
chunk: (bytes)多通道音频数据块
|
||||
channels: 通道数
|
||||
orig_sr: 原始采样率
|
||||
target_sr: 目标采样率
|
||||
mode: 重采样模式,可选:'sinc_best' | 'sinc_medium' | 'sinc_fastest' | 'zero_order_hold' | 'linear'
|
||||
|
||||
Return:
|
||||
(bytes)单通道音频数据块
|
||||
"""
|
||||
# (length * channels,)
|
||||
chunk_np = np.frombuffer(chunk, dtype=np.int16)
|
||||
# (length, channels)
|
||||
chunk_np = chunk_np.reshape(-1, channels)
|
||||
# (length,)
|
||||
chunk_mono_f = np.mean(chunk_np.astype(np.float32), axis=1)
|
||||
chunk_mono = chunk_mono_f.astype(np.int16)
|
||||
ratio = target_sr / orig_sr
|
||||
chunk_mono_r = samplerate.resample(chunk_mono, ratio, converter_type=mode)
|
||||
chunk_mono_r = np.round(chunk_mono_r).astype(np.int16)
|
||||
return chunk_mono_r.tobytes()
|
||||
@@ -1,22 +0,0 @@
|
||||
import numpy as np
|
||||
|
||||
def mergeStreamChannels(data, channels):
|
||||
"""
|
||||
将当前多通道流数据合并为单通道流数据
|
||||
|
||||
Args:
|
||||
data: 多通道数据
|
||||
channels: 通道数
|
||||
|
||||
Returns:
|
||||
mono_data_bytes: 单通道数据
|
||||
"""
|
||||
# (length * channels,)
|
||||
data_np = np.frombuffer(data, dtype=np.int16)
|
||||
# (length, channels)
|
||||
data_np_r = data_np.reshape(-1, channels)
|
||||
# (length,)
|
||||
mono_data = np.mean(data_np_r.astype(np.float32), axis=1)
|
||||
mono_data = mono_data.astype(np.int16)
|
||||
mono_data_bytes = mono_data.tobytes()
|
||||
return mono_data_bytes
|
||||
Reference in New Issue
Block a user