mirror of
https://github.com/HiMeditator/auto-caption.git
synced 2026-02-04 12:24:42 +08:00
feat(engine): 重构字幕引擎,新增 Sherpa-ONNX SenseVoice 语音识别模型
- 重构字幕引擎,将音频采集改为在新线程上进行 - 重构 audio2text 中的类,调整运行逻辑 - 更新 main 函数,添加对 Sosv 模型的支持 - 修改 AudioStream 类,默认使用 16000Hz 采样率
This commit is contained in:
@@ -5,6 +5,6 @@ from .audioprcs import (
|
||||
resample_mono_chunk
|
||||
)
|
||||
from .sysout import stdout, stdout_err, stdout_cmd, stdout_obj, stderr
|
||||
from .thdata import thread_data
|
||||
from .shared import shared_data
|
||||
from .server import start_server
|
||||
from .translation import ollama_translate, google_translate
|
||||
@@ -49,9 +49,18 @@ def resample_chunk_mono(chunk: bytes, channels: int, orig_sr: int, target_sr: in
|
||||
# (length,)
|
||||
chunk_mono = np.mean(chunk_np.astype(np.float32), axis=1)
|
||||
|
||||
if orig_sr == target_sr:
|
||||
return chunk_mono.astype(np.int16).tobytes()
|
||||
|
||||
ratio = target_sr / orig_sr
|
||||
chunk_mono_r = samplerate.resample(chunk_mono, ratio, converter_type=mode)
|
||||
chunk_mono_r = np.round(chunk_mono_r).astype(np.int16)
|
||||
real_len = round(chunk_mono.shape[0] * ratio)
|
||||
if(chunk_mono_r.shape[0] > real_len):
|
||||
chunk_mono_r = chunk_mono_r[:real_len]
|
||||
else:
|
||||
while chunk_mono_r.shape[0] < real_len:
|
||||
chunk_mono_r = np.append(chunk_mono_r, chunk_mono_r[-1])
|
||||
return chunk_mono_r.tobytes()
|
||||
|
||||
|
||||
@@ -81,9 +90,18 @@ def resample_chunk_mono_np(chunk: bytes, channels: int, orig_sr: int, target_sr:
|
||||
# (length,)
|
||||
chunk_mono = np.mean(chunk_np.astype(np.float32), axis=1)
|
||||
|
||||
if orig_sr == target_sr:
|
||||
return chunk_mono.astype(dtype)
|
||||
|
||||
ratio = target_sr / orig_sr
|
||||
chunk_mono_r = samplerate.resample(chunk_mono, ratio, converter_type=mode)
|
||||
chunk_mono_r = chunk_mono_r.astype(dtype)
|
||||
real_len = round(chunk_mono.shape[0] * ratio)
|
||||
if(chunk_mono_r.shape[0] > real_len):
|
||||
chunk_mono_r = chunk_mono_r[:real_len]
|
||||
else:
|
||||
while chunk_mono_r.shape[0] < real_len:
|
||||
chunk_mono_r = np.append(chunk_mono_r, chunk_mono_r[-1])
|
||||
return chunk_mono_r
|
||||
|
||||
|
||||
@@ -100,9 +118,16 @@ def resample_mono_chunk(chunk: bytes, orig_sr: int, target_sr: int, mode="sinc_b
|
||||
Return:
|
||||
单通道音频数据块
|
||||
"""
|
||||
if orig_sr == target_sr: return chunk
|
||||
chunk_np = np.frombuffer(chunk, dtype=np.int16)
|
||||
chunk_np = chunk_np.astype(np.float32)
|
||||
ratio = target_sr / orig_sr
|
||||
chunk_r = samplerate.resample(chunk_np, ratio, converter_type=mode)
|
||||
chunk_r = np.round(chunk_r).astype(np.int16)
|
||||
real_len = round(chunk_np.shape[0] * ratio)
|
||||
if(chunk_r.shape[0] > real_len):
|
||||
chunk_r = chunk_r[:real_len]
|
||||
else:
|
||||
while chunk_r.shape[0] < real_len:
|
||||
chunk_r = np.append(chunk_r, chunk_r[-1])
|
||||
return chunk_r.tobytes()
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
import socket
|
||||
import threading
|
||||
import json
|
||||
# import time
|
||||
from utils import thread_data, stdout_cmd, stderr
|
||||
from utils import shared_data, stdout_cmd, stderr
|
||||
|
||||
|
||||
def handle_client(client_socket):
|
||||
global thread_data
|
||||
while thread_data.status == 'running':
|
||||
global shared_data
|
||||
while shared_data.status == 'running':
|
||||
try:
|
||||
data = client_socket.recv(4096).decode('utf-8')
|
||||
if not data:
|
||||
@@ -15,13 +14,13 @@ def handle_client(client_socket):
|
||||
data = json.loads(data)
|
||||
|
||||
if data['command'] == 'stop':
|
||||
thread_data.status = 'stop'
|
||||
shared_data.status = 'stop'
|
||||
break
|
||||
except Exception as e:
|
||||
stderr(f'Communication error: {e}')
|
||||
break
|
||||
|
||||
thread_data.status = 'stop'
|
||||
shared_data.status = 'stop'
|
||||
client_socket.close()
|
||||
|
||||
|
||||
@@ -34,7 +33,6 @@ def start_server(port: int):
|
||||
stderr(str(e))
|
||||
stdout_cmd('kill')
|
||||
return
|
||||
# time.sleep(20)
|
||||
stdout_cmd('connect')
|
||||
|
||||
client, addr = server.accept()
|
||||
|
||||
8
engine/utils/shared.py
Normal file
8
engine/utils/shared.py
Normal file
@@ -0,0 +1,8 @@
|
||||
import queue
|
||||
|
||||
class SharedData:
|
||||
def __init__(self):
|
||||
self.status = "running"
|
||||
self.chunk_queue = queue.Queue()
|
||||
|
||||
shared_data = SharedData()
|
||||
@@ -1,5 +0,0 @@
|
||||
class ThreadData:
|
||||
def __init__(self):
|
||||
self.status = "running"
|
||||
|
||||
thread_data = ThreadData()
|
||||
Reference in New Issue
Block a user