mirror of
https://github.com/HiMeditator/auto-caption.git
synced 2026-02-16 05:01:07 +08:00
refactor(engine): 重构字幕引擎
- 更新 GummyTranslator 类,优化字幕生成逻辑 - 移除 audioprcs 模块,音频处理功能转移到 utils 模块 - 重构 sysaudio 模块,提高音频流管理的灵活性和稳定性 - 修改 TODO.md,完成按时间降序排列字幕记录的功能 - 更新文档,说明因资源限制将不再维护英文和日文文档
This commit is contained in:
@@ -6,8 +6,8 @@ from dashscope.audio.asr import (
|
||||
)
|
||||
import dashscope
|
||||
from datetime import datetime
|
||||
import json
|
||||
import sys
|
||||
from utils import stdout_cmd, stdout_obj
|
||||
|
||||
|
||||
class Callback(TranslationRecognizerCallback):
|
||||
"""
|
||||
@@ -17,15 +17,16 @@ class Callback(TranslationRecognizerCallback):
|
||||
super().__init__()
|
||||
self.usage = 0
|
||||
self.cur_id = -1
|
||||
self.index = 0
|
||||
self.time_str = ''
|
||||
|
||||
def on_open(self) -> None:
|
||||
# print("on_open")
|
||||
pass
|
||||
self.cur_id = -1
|
||||
self.time_str = ''
|
||||
stdout_cmd('info', 'Gummy translator started.')
|
||||
|
||||
def on_close(self) -> None:
|
||||
# print("on_close")
|
||||
pass
|
||||
stdout_cmd('info', 'Gummy translator closed.')
|
||||
|
||||
def on_event(
|
||||
self,
|
||||
@@ -35,17 +36,18 @@ class Callback(TranslationRecognizerCallback):
|
||||
usage
|
||||
) -> None:
|
||||
caption = {}
|
||||
|
||||
if transcription_result is not None:
|
||||
caption['index'] = transcription_result.sentence_id
|
||||
caption['text'] = transcription_result.text
|
||||
if caption['index'] != self.cur_id:
|
||||
self.cur_id = caption['index']
|
||||
cur_time = datetime.now().strftime('%H:%M:%S.%f')[:-3]
|
||||
caption['time_s'] = cur_time
|
||||
self.time_str = cur_time
|
||||
else:
|
||||
caption['time_s'] = self.time_str
|
||||
if self.cur_id != transcription_result.sentence_id:
|
||||
self.time_str = datetime.now().strftime('%H:%M:%S.%f')[:-3]
|
||||
self.cur_id = transcription_result.sentence_id
|
||||
self.index += 1
|
||||
caption['command'] = 'caption'
|
||||
caption['index'] = self.index
|
||||
caption['time_s'] = self.time_str
|
||||
caption['time_t'] = datetime.now().strftime('%H:%M:%S.%f')[:-3]
|
||||
caption['end'] = transcription_result.is_sentence_end
|
||||
caption['text'] = transcription_result.text
|
||||
caption['translation'] = ""
|
||||
|
||||
if translation_result is not None:
|
||||
@@ -55,19 +57,8 @@ class Callback(TranslationRecognizerCallback):
|
||||
if usage:
|
||||
self.usage += usage['duration']
|
||||
|
||||
# print(caption)
|
||||
self.send_to_node(caption)
|
||||
stdout_obj(caption)
|
||||
|
||||
def send_to_node(self, data):
|
||||
"""
|
||||
将数据发送到 Node.js 进程
|
||||
"""
|
||||
try:
|
||||
json_data = json.dumps(data) + '\n'
|
||||
sys.stdout.write(json_data)
|
||||
sys.stdout.flush()
|
||||
except Exception as e:
|
||||
print(f"Error sending data to Node.js: {e}", file=sys.stderr)
|
||||
|
||||
class GummyTranslator:
|
||||
"""
|
||||
@@ -78,7 +69,7 @@ class GummyTranslator:
|
||||
source: 源语言代码字符串(zh, en, ja 等)
|
||||
target: 目标语言代码字符串(zh, en, ja 等)
|
||||
"""
|
||||
def __init__(self, rate, source, target, api_key):
|
||||
def __init__(self, rate: int, source: str, target: str | None, api_key: str | None):
|
||||
if api_key:
|
||||
dashscope.api_key = api_key
|
||||
self.translator = TranslationRecognizerRealtime(
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
from .process import mergeChunkChannels, resampleRawChunk, resampleMonoChunk
|
||||
@@ -1,21 +1,11 @@
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
if sys.platform == 'win32':
|
||||
from sysaudio.win import AudioStream
|
||||
elif sys.platform == 'darwin':
|
||||
from sysaudio.darwin import AudioStream
|
||||
elif sys.platform == 'linux':
|
||||
from sysaudio.linux import AudioStream
|
||||
else:
|
||||
raise NotImplementedError(f"Unsupported platform: {sys.platform}")
|
||||
|
||||
from audioprcs import mergeChunkChannels
|
||||
from sysaudio import AudioStream
|
||||
from utils import merge_chunk_channels
|
||||
from audio2text import InvalidParameter, GummyTranslator
|
||||
|
||||
|
||||
def convert_audio_to_text(s_lang, t_lang, audio_type, chunk_rate, api_key):
|
||||
sys.stdout.reconfigure(line_buffering=True) # type: ignore
|
||||
stream = AudioStream(audio_type, chunk_rate)
|
||||
|
||||
if t_lang == 'none':
|
||||
@@ -23,20 +13,21 @@ def convert_audio_to_text(s_lang, t_lang, audio_type, chunk_rate, api_key):
|
||||
else:
|
||||
gummy = GummyTranslator(stream.RATE, s_lang, t_lang, api_key)
|
||||
|
||||
stream.openStream()
|
||||
stream.open_stream()
|
||||
gummy.start()
|
||||
|
||||
while True:
|
||||
try:
|
||||
chunk = stream.read_chunk()
|
||||
chunk_mono = mergeChunkChannels(chunk, stream.CHANNELS)
|
||||
if chunk is None: continue
|
||||
chunk_mono = merge_chunk_channels(chunk, stream.CHANNELS)
|
||||
try:
|
||||
gummy.send_audio_frame(chunk_mono)
|
||||
except InvalidParameter:
|
||||
gummy.start()
|
||||
gummy.send_audio_frame(chunk_mono)
|
||||
except KeyboardInterrupt:
|
||||
stream.closeStream()
|
||||
stream.close_stream()
|
||||
gummy.stop()
|
||||
break
|
||||
|
||||
|
||||
@@ -4,17 +4,9 @@ import argparse
|
||||
from datetime import datetime
|
||||
import numpy.core.multiarray
|
||||
|
||||
if sys.platform == 'win32':
|
||||
from sysaudio.win import AudioStream
|
||||
elif sys.platform == 'darwin':
|
||||
from sysaudio.darwin import AudioStream
|
||||
elif sys.platform == 'linux':
|
||||
from sysaudio.linux import AudioStream
|
||||
else:
|
||||
raise NotImplementedError(f"Unsupported platform: {sys.platform}")
|
||||
|
||||
from sysaudio import AudioStream
|
||||
from vosk import Model, KaldiRecognizer, SetLogLevel
|
||||
from audioprcs import resampleRawChunk
|
||||
from utils import resample_chunk_mono
|
||||
|
||||
SetLogLevel(-1)
|
||||
|
||||
@@ -30,7 +22,7 @@ def convert_audio_to_text(audio_type, chunk_rate, model_path):
|
||||
recognizer = KaldiRecognizer(model, 16000)
|
||||
|
||||
stream = AudioStream(audio_type, chunk_rate)
|
||||
stream.openStream()
|
||||
stream.open_stream()
|
||||
|
||||
time_str = ''
|
||||
cur_id = 0
|
||||
@@ -38,7 +30,8 @@ def convert_audio_to_text(audio_type, chunk_rate, model_path):
|
||||
|
||||
while True:
|
||||
chunk = stream.read_chunk()
|
||||
chunk_mono = resampleRawChunk(chunk, stream.CHANNELS, stream.RATE, 16000)
|
||||
if chunk is None: continue
|
||||
chunk_mono = resample_chunk_mono(chunk, stream.CHANNELS, stream.RATE, 16000)
|
||||
|
||||
caption = {}
|
||||
if recognizer.AcceptWaveform(chunk_mono):
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
import sys
|
||||
|
||||
if sys.platform == "win32":
|
||||
from .win import AudioStream
|
||||
elif sys.platform == "darwin":
|
||||
from .darwin import AudioStream
|
||||
elif sys.platform == "linux":
|
||||
from .linux import AudioStream
|
||||
else:
|
||||
raise NotImplementedError(f"Unsupported platform: {sys.platform}")
|
||||
@@ -1,11 +1,24 @@
|
||||
"""获取 MacOS 系统音频输入/输出流"""
|
||||
|
||||
import pyaudio
|
||||
from textwrap import dedent
|
||||
|
||||
|
||||
def get_blackhole_device(mic: pyaudio.PyAudio):
|
||||
"""
|
||||
获取 BlackHole 设备
|
||||
"""
|
||||
device_count = mic.get_device_count()
|
||||
for i in range(device_count):
|
||||
dev_info = mic.get_device_info_by_index(i)
|
||||
if 'blackhole' in str(dev_info["name"]).lower():
|
||||
return dev_info
|
||||
raise Exception("The device containing BlackHole was not found.")
|
||||
|
||||
|
||||
class AudioStream:
|
||||
"""
|
||||
获取系统音频流(支持 BlackHole 作为系统音频输出捕获)
|
||||
获取系统音频流(如果要捕获输出音频,仅支持 BlackHole 作为系统音频输出捕获)
|
||||
|
||||
初始化参数:
|
||||
audio_type: 0-系统音频输出流(需配合 BlackHole),1-系统音频输入流
|
||||
@@ -15,46 +28,40 @@ class AudioStream:
|
||||
self.audio_type = audio_type
|
||||
self.mic = pyaudio.PyAudio()
|
||||
if self.audio_type == 0:
|
||||
self.device = self.getOutputDeviceInfo()
|
||||
self.device = get_blackhole_device(self.mic)
|
||||
else:
|
||||
self.device = self.mic.get_default_input_device_info()
|
||||
self.stop_signal = False
|
||||
self.stream = None
|
||||
self.SAMP_WIDTH = pyaudio.get_sample_size(pyaudio.paInt16)
|
||||
self.INDEX = self.device["index"]
|
||||
self.FORMAT = pyaudio.paInt16
|
||||
self.CHANNELS = self.device["maxInputChannels"]
|
||||
self.SAMP_WIDTH = pyaudio.get_sample_size(self.FORMAT)
|
||||
self.CHANNELS = int(self.device["maxInputChannels"])
|
||||
self.RATE = int(self.device["defaultSampleRate"])
|
||||
self.CHUNK = self.RATE // chunk_rate
|
||||
self.INDEX = self.device["index"]
|
||||
|
||||
def getOutputDeviceInfo(self):
|
||||
"""查找指定关键词的输入设备"""
|
||||
device_count = self.mic.get_device_count()
|
||||
for i in range(device_count):
|
||||
dev_info = self.mic.get_device_info_by_index(i)
|
||||
if 'blackhole' in dev_info["name"].lower():
|
||||
return dev_info
|
||||
raise Exception("The device containing BlackHole was not found.")
|
||||
|
||||
def printInfo(self):
|
||||
def get_info(self):
|
||||
dev_info = f"""
|
||||
采样输入设备:
|
||||
采样设备:
|
||||
- 设备类型:{ "音频输出" if self.audio_type == 0 else "音频输入" }
|
||||
- 序号:{self.device['index']}
|
||||
- 名称:{self.device['name']}
|
||||
- 设备序号:{self.device['index']}
|
||||
- 设备名称:{self.device['name']}
|
||||
- 最大输入通道数:{self.device['maxInputChannels']}
|
||||
- 默认低输入延迟:{self.device['defaultLowInputLatency']}s
|
||||
- 默认高输入延迟:{self.device['defaultHighInputLatency']}s
|
||||
- 默认采样率:{self.device['defaultSampleRate']}Hz
|
||||
- 是否回环设备:{self.device['isLoopbackDevice']}
|
||||
|
||||
音频样本块大小:{self.CHUNK}
|
||||
设备序号:{self.INDEX}
|
||||
样本格式:{self.FORMAT}
|
||||
样本位宽:{self.SAMP_WIDTH}
|
||||
采样格式:{self.FORMAT}
|
||||
音频通道数:{self.CHANNELS}
|
||||
音频采样率:{self.RATE}
|
||||
样本通道数:{self.CHANNELS}
|
||||
样本采样率:{self.RATE}
|
||||
样本块大小:{self.CHUNK}
|
||||
"""
|
||||
print(dev_info)
|
||||
return dedent(dev_info).strip()
|
||||
|
||||
def openStream(self):
|
||||
def open_stream(self):
|
||||
"""
|
||||
打开并返回系统音频输出流
|
||||
"""
|
||||
@@ -72,14 +79,24 @@ class AudioStream:
|
||||
"""
|
||||
读取音频数据
|
||||
"""
|
||||
if self.stop_signal:
|
||||
self.close_stream()
|
||||
return None
|
||||
if not self.stream: return None
|
||||
return self.stream.read(self.CHUNK, exception_on_overflow=False)
|
||||
|
||||
def closeStream(self):
|
||||
def close_stream_signal(self):
|
||||
"""
|
||||
关闭系统音频输出流
|
||||
线程安全的关闭系统音频输入流,不一定会立即关闭
|
||||
"""
|
||||
if self.stream is None: return
|
||||
self.stream.stop_stream()
|
||||
self.stream.close()
|
||||
self.stream = None
|
||||
self.stop_signal = True
|
||||
|
||||
def close_stream(self):
|
||||
"""
|
||||
立即关闭系统音频输入流
|
||||
"""
|
||||
if self.stream is not None:
|
||||
self.stream.stop_stream()
|
||||
self.stream.close()
|
||||
self.stream = None
|
||||
self.stop_signal = False
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
"""获取 Linux 系统音频输入流"""
|
||||
|
||||
import subprocess
|
||||
from textwrap import dedent
|
||||
|
||||
def findMonitorSource():
|
||||
|
||||
def find_monitor_source():
|
||||
result = subprocess.run(
|
||||
["pactl", "list", "short", "sources"],
|
||||
stdout=subprocess.PIPE, text=True
|
||||
@@ -16,7 +18,8 @@ def findMonitorSource():
|
||||
|
||||
raise RuntimeError("System output monitor device not found")
|
||||
|
||||
def findInputSource():
|
||||
|
||||
def find_input_source():
|
||||
result = subprocess.run(
|
||||
["pactl", "list", "short", "sources"],
|
||||
stdout=subprocess.PIPE, text=True
|
||||
@@ -28,8 +31,10 @@ def findInputSource():
|
||||
name = parts[1]
|
||||
if ".monitor" not in name:
|
||||
return name
|
||||
|
||||
raise RuntimeError("Microphone input device not found")
|
||||
|
||||
|
||||
class AudioStream:
|
||||
"""
|
||||
获取系统音频流
|
||||
@@ -42,34 +47,33 @@ class AudioStream:
|
||||
self.audio_type = audio_type
|
||||
|
||||
if self.audio_type == 0:
|
||||
self.source = findMonitorSource()
|
||||
self.source = find_monitor_source()
|
||||
else:
|
||||
self.source = findInputSource()
|
||||
|
||||
self.source = find_input_source()
|
||||
self.stop_signal = False
|
||||
self.process = None
|
||||
|
||||
self.SAMP_WIDTH = 2
|
||||
self.FORMAT = 16
|
||||
self.SAMP_WIDTH = 2
|
||||
self.CHANNELS = 2
|
||||
self.RATE = 48000
|
||||
self.CHUNK = self.RATE // chunk_rate
|
||||
|
||||
def printInfo(self):
|
||||
def get_info(self):
|
||||
dev_info = f"""
|
||||
音频捕获进程:
|
||||
- 捕获类型:{"音频输出" if self.audio_type == 0 else "音频输入"}
|
||||
- 设备源:{self.source}
|
||||
- 捕获进程PID:{self.process.pid if self.process else "None"}
|
||||
- 捕获进程 PID:{self.process.pid if self.process else "None"}
|
||||
|
||||
音频样本块大小:{self.CHUNK}
|
||||
样本格式:{self.FORMAT}
|
||||
样本位宽:{self.SAMP_WIDTH}
|
||||
采样格式:{self.FORMAT}
|
||||
音频通道数:{self.CHANNELS}
|
||||
音频采样率:{self.RATE}
|
||||
样本通道数:{self.CHANNELS}
|
||||
样本采样率:{self.RATE}
|
||||
样本块大小:{self.CHUNK}
|
||||
"""
|
||||
print(dev_info)
|
||||
|
||||
def openStream(self):
|
||||
def open_stream(self):
|
||||
"""
|
||||
启动音频捕获进程
|
||||
"""
|
||||
@@ -82,13 +86,23 @@ class AudioStream:
|
||||
"""
|
||||
读取音频数据
|
||||
"""
|
||||
if self.process:
|
||||
if self.stop_signal:
|
||||
self.close_stream()
|
||||
return None
|
||||
if self.process and self.process.stdout:
|
||||
return self.process.stdout.read(self.CHUNK)
|
||||
return None
|
||||
|
||||
def closeStream(self):
|
||||
def close_stream_signal(self):
|
||||
"""
|
||||
线程安全的关闭系统音频输入流,不一定会立即关闭
|
||||
"""
|
||||
self.stop_signal = True
|
||||
|
||||
def close_stream(self):
|
||||
"""
|
||||
关闭系统音频捕获进程
|
||||
"""
|
||||
if self.process:
|
||||
self.process.terminate()
|
||||
self.stop_signal = False
|
||||
|
||||
@@ -1,14 +1,15 @@
|
||||
"""获取 Windows 系统音频输入/输出流"""
|
||||
|
||||
import pyaudiowpatch as pyaudio
|
||||
from textwrap import dedent
|
||||
|
||||
|
||||
def getDefaultLoopbackDevice(mic: pyaudio.PyAudio, info = True)->dict:
|
||||
def get_default_loopback_device(mic: pyaudio.PyAudio, info = True)->dict:
|
||||
"""
|
||||
获取默认的系统音频输出的回环设备
|
||||
Args:
|
||||
mic (pyaudio.PyAudio): pyaudio对象
|
||||
info (bool, optional): 是否打印设备信息
|
||||
mic: pyaudio对象
|
||||
info: 是否打印设备信息
|
||||
|
||||
Returns:
|
||||
dict: 系统音频输出的回环设备
|
||||
@@ -51,38 +52,40 @@ class AudioStream:
|
||||
self.audio_type = audio_type
|
||||
self.mic = pyaudio.PyAudio()
|
||||
if self.audio_type == 0:
|
||||
self.device = getDefaultLoopbackDevice(self.mic, False)
|
||||
self.device = get_default_loopback_device(self.mic, False)
|
||||
else:
|
||||
self.device = self.mic.get_default_input_device_info()
|
||||
self.stop_signal = False
|
||||
self.stream = None
|
||||
self.SAMP_WIDTH = pyaudio.get_sample_size(pyaudio.paInt16)
|
||||
self.INDEX = self.device["index"]
|
||||
self.FORMAT = pyaudio.paInt16
|
||||
self.SAMP_WIDTH = pyaudio.get_sample_size(self.FORMAT)
|
||||
self.CHANNELS = int(self.device["maxInputChannels"])
|
||||
self.RATE = int(self.device["defaultSampleRate"])
|
||||
self.CHUNK = self.RATE // chunk_rate
|
||||
self.INDEX = self.device["index"]
|
||||
|
||||
def printInfo(self):
|
||||
def get_info(self):
|
||||
dev_info = f"""
|
||||
采样设备:
|
||||
- 设备类型:{ "音频输出" if self.audio_type == 0 else "音频输入" }
|
||||
- 序号:{self.device['index']}
|
||||
- 名称:{self.device['name']}
|
||||
- 设备序号:{self.device['index']}
|
||||
- 设备名称:{self.device['name']}
|
||||
- 最大输入通道数:{self.device['maxInputChannels']}
|
||||
- 默认低输入延迟:{self.device['defaultLowInputLatency']}s
|
||||
- 默认高输入延迟:{self.device['defaultHighInputLatency']}s
|
||||
- 默认采样率:{self.device['defaultSampleRate']}Hz
|
||||
- 是否回环设备:{self.device['isLoopbackDevice']}
|
||||
|
||||
音频样本块大小:{self.CHUNK}
|
||||
设备序号:{self.INDEX}
|
||||
样本格式:{self.FORMAT}
|
||||
样本位宽:{self.SAMP_WIDTH}
|
||||
采样格式:{self.FORMAT}
|
||||
音频通道数:{self.CHANNELS}
|
||||
音频采样率:{self.RATE}
|
||||
样本通道数:{self.CHANNELS}
|
||||
样本采样率:{self.RATE}
|
||||
样本块大小:{self.CHUNK}
|
||||
"""
|
||||
print(dev_info)
|
||||
return dedent(dev_info).strip()
|
||||
|
||||
def openStream(self):
|
||||
def open_stream(self):
|
||||
"""
|
||||
打开并返回系统音频输出流
|
||||
"""
|
||||
@@ -96,18 +99,28 @@ class AudioStream:
|
||||
)
|
||||
return self.stream
|
||||
|
||||
def read_chunk(self):
|
||||
def read_chunk(self) -> bytes | None:
|
||||
"""
|
||||
读取音频数据
|
||||
"""
|
||||
if self.stop_signal:
|
||||
self.close_stream()
|
||||
return None
|
||||
if not self.stream: return None
|
||||
return self.stream.read(self.CHUNK, exception_on_overflow=False)
|
||||
|
||||
def closeStream(self):
|
||||
def close_stream_signal(self):
|
||||
"""
|
||||
关闭系统音频输出流
|
||||
线程安全的关闭系统音频输入流,不一定会立即关闭
|
||||
"""
|
||||
if self.stream is None: return
|
||||
self.stream.stop_stream()
|
||||
self.stream.close()
|
||||
self.stream = None
|
||||
self.stop_signal = True
|
||||
|
||||
def close_stream(self):
|
||||
"""
|
||||
关闭系统音频输入流
|
||||
"""
|
||||
if self.stream is not None:
|
||||
self.stream.stop_stream()
|
||||
self.stream.close()
|
||||
self.stream = None
|
||||
self.stop_signal = False
|
||||
|
||||
2
engine/utils/__init__.py
Normal file
2
engine/utils/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
from .process import merge_chunk_channels, resample_chunk_mono, resample_mono_chunk
|
||||
from .sysout import stdout, stdout_cmd, stdout_obj, stderr
|
||||
@@ -1,16 +1,17 @@
|
||||
import samplerate
|
||||
import numpy as np
|
||||
|
||||
def mergeChunkChannels(chunk, channels):
|
||||
|
||||
def merge_chunk_channels(chunk: bytes, channels: int) -> bytes:
|
||||
"""
|
||||
将当前多通道音频数据块转换为单通道音频数据块
|
||||
|
||||
Args:
|
||||
chunk: (bytes)多通道音频数据块
|
||||
chunk: 多通道音频数据块
|
||||
channels: 通道数
|
||||
|
||||
Returns:
|
||||
(bytes)单通道音频数据块
|
||||
单通道音频数据块
|
||||
"""
|
||||
# (length * channels,)
|
||||
chunk_np = np.frombuffer(chunk, dtype=np.int16)
|
||||
@@ -22,19 +23,19 @@ def mergeChunkChannels(chunk, channels):
|
||||
return chunk_mono.tobytes()
|
||||
|
||||
|
||||
def resampleRawChunk(chunk, channels, orig_sr, target_sr, mode="sinc_best"):
|
||||
def resample_chunk_mono(chunk: bytes, channels: int, orig_sr: int, target_sr: int, mode="sinc_best") -> bytes:
|
||||
"""
|
||||
将当前多通道音频数据块转换成单通道音频数据块,然后进行重采样
|
||||
|
||||
Args:
|
||||
chunk: (bytes)多通道音频数据块
|
||||
chunk: 多通道音频数据块
|
||||
channels: 通道数
|
||||
orig_sr: 原始采样率
|
||||
target_sr: 目标采样率
|
||||
mode: 重采样模式,可选:'sinc_best' | 'sinc_medium' | 'sinc_fastest' | 'zero_order_hold' | 'linear'
|
||||
|
||||
Return:
|
||||
(bytes)单通道音频数据块
|
||||
单通道音频数据块
|
||||
"""
|
||||
# (length * channels,)
|
||||
chunk_np = np.frombuffer(chunk, dtype=np.int16)
|
||||
@@ -44,22 +45,23 @@ def resampleRawChunk(chunk, channels, orig_sr, target_sr, mode="sinc_best"):
|
||||
chunk_mono_f = np.mean(chunk_np.astype(np.float32), axis=1)
|
||||
chunk_mono = chunk_mono_f.astype(np.int16)
|
||||
ratio = target_sr / orig_sr
|
||||
chunk_mono_r = samplerate.resample(chunk_mono, ratio, converter_type=mode)
|
||||
chunk_mono_r = samplerate.resample(chunk_mono, ratio, converter_type=mode)
|
||||
chunk_mono_r = np.round(chunk_mono_r).astype(np.int16)
|
||||
return chunk_mono_r.tobytes()
|
||||
|
||||
def resampleMonoChunk(chunk, orig_sr, target_sr, mode="sinc_best"):
|
||||
|
||||
def resample_mono_chunk(chunk: bytes, orig_sr: int, target_sr: int, mode="sinc_best") -> bytes:
|
||||
"""
|
||||
将当前单通道音频块进行重采样
|
||||
|
||||
Args:
|
||||
chunk: (bytes)单通道音频数据块
|
||||
chunk: 单通道音频数据块
|
||||
orig_sr: 原始采样率
|
||||
target_sr: 目标采样率
|
||||
mode: 重采样模式,可选:'sinc_best' | 'sinc_medium' | 'sinc_fastest' | 'zero_order_hold' | 'linear'
|
||||
|
||||
Return:
|
||||
(bytes)单通道音频数据块
|
||||
单通道音频数据块
|
||||
"""
|
||||
chunk_np = np.frombuffer(chunk, dtype=np.int16)
|
||||
ratio = target_sr / orig_sr
|
||||
18
engine/utils/sysout.py
Normal file
18
engine/utils/sysout.py
Normal file
@@ -0,0 +1,18 @@
|
||||
import sys
|
||||
import json
|
||||
|
||||
def stdout(text: str):
|
||||
stdout_cmd("print", text)
|
||||
|
||||
def stdout_cmd(command: str, content = ""):
|
||||
msg = { "command": command, "content": content }
|
||||
sys.stdout.write(json.dumps(msg) + "\n")
|
||||
sys.stdout.flush()
|
||||
|
||||
def stdout_obj(obj):
|
||||
sys.stdout.write(json.dumps(obj) + "\n")
|
||||
sys.stdout.flush()
|
||||
|
||||
def stderr(text: str):
|
||||
sys.stderr.write(text + "\n")
|
||||
sys.stderr.flush()
|
||||
Reference in New Issue
Block a user