mirror of
https://github.com/HiMeditator/auto-caption.git
synced 2026-02-17 14:14:53 +08:00
feat(engine): 重构字幕引擎,新增 Sherpa-ONNX SenseVoice 语音识别模型
- 重构字幕引擎,将音频采集改为在新线程上进行 - 重构 audio2text 中的类,调整运行逻辑 - 更新 main 函数,添加对 Sosv 模型的支持 - 修改 AudioStream 类,默认使用 16000Hz 采样率
This commit is contained in:
@@ -49,9 +49,18 @@ def resample_chunk_mono(chunk: bytes, channels: int, orig_sr: int, target_sr: in
|
||||
# (length,)
|
||||
chunk_mono = np.mean(chunk_np.astype(np.float32), axis=1)
|
||||
|
||||
if orig_sr == target_sr:
|
||||
return chunk_mono.astype(np.int16).tobytes()
|
||||
|
||||
ratio = target_sr / orig_sr
|
||||
chunk_mono_r = samplerate.resample(chunk_mono, ratio, converter_type=mode)
|
||||
chunk_mono_r = np.round(chunk_mono_r).astype(np.int16)
|
||||
real_len = round(chunk_mono.shape[0] * ratio)
|
||||
if(chunk_mono_r.shape[0] > real_len):
|
||||
chunk_mono_r = chunk_mono_r[:real_len]
|
||||
else:
|
||||
while chunk_mono_r.shape[0] < real_len:
|
||||
chunk_mono_r = np.append(chunk_mono_r, chunk_mono_r[-1])
|
||||
return chunk_mono_r.tobytes()
|
||||
|
||||
|
||||
@@ -81,9 +90,18 @@ def resample_chunk_mono_np(chunk: bytes, channels: int, orig_sr: int, target_sr:
|
||||
# (length,)
|
||||
chunk_mono = np.mean(chunk_np.astype(np.float32), axis=1)
|
||||
|
||||
if orig_sr == target_sr:
|
||||
return chunk_mono.astype(dtype)
|
||||
|
||||
ratio = target_sr / orig_sr
|
||||
chunk_mono_r = samplerate.resample(chunk_mono, ratio, converter_type=mode)
|
||||
chunk_mono_r = chunk_mono_r.astype(dtype)
|
||||
real_len = round(chunk_mono.shape[0] * ratio)
|
||||
if(chunk_mono_r.shape[0] > real_len):
|
||||
chunk_mono_r = chunk_mono_r[:real_len]
|
||||
else:
|
||||
while chunk_mono_r.shape[0] < real_len:
|
||||
chunk_mono_r = np.append(chunk_mono_r, chunk_mono_r[-1])
|
||||
return chunk_mono_r
|
||||
|
||||
|
||||
@@ -100,9 +118,16 @@ def resample_mono_chunk(chunk: bytes, orig_sr: int, target_sr: int, mode="sinc_b
|
||||
Return:
|
||||
单通道音频数据块
|
||||
"""
|
||||
if orig_sr == target_sr: return chunk
|
||||
chunk_np = np.frombuffer(chunk, dtype=np.int16)
|
||||
chunk_np = chunk_np.astype(np.float32)
|
||||
ratio = target_sr / orig_sr
|
||||
chunk_r = samplerate.resample(chunk_np, ratio, converter_type=mode)
|
||||
chunk_r = np.round(chunk_r).astype(np.int16)
|
||||
real_len = round(chunk_np.shape[0] * ratio)
|
||||
if(chunk_r.shape[0] > real_len):
|
||||
chunk_r = chunk_r[:real_len]
|
||||
else:
|
||||
while chunk_r.shape[0] < real_len:
|
||||
chunk_r = np.append(chunk_r, chunk_r[-1])
|
||||
return chunk_r.tobytes()
|
||||
|
||||
Reference in New Issue
Block a user