mirror of
https://github.com/HiMeditator/auto-caption.git
synced 2026-02-12 18:44:57 +08:00
feat(python-subprocess): 尝试字幕显示新解决方案
- 使用 python 子进程解析字幕 - 通过 websocket 通信将字幕传递给软件
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
"""获取 Windows 系统音频输出流"""
|
||||
|
||||
import pyaudiowpatch as pyaudio
|
||||
import numpy as np
|
||||
|
||||
|
||||
def getDefaultLoopbackDevice(mic: pyaudio.PyAudio, info = True)->dict:
|
||||
"""
|
||||
@@ -38,16 +40,77 @@ def getDefaultLoopbackDevice(mic: pyaudio.PyAudio, info = True)->dict:
|
||||
return default_speaker
|
||||
|
||||
|
||||
def getOutputStream():
|
||||
mic = pyaudio.PyAudio()
|
||||
default_speaker = getDefaultLoopbackDevice(mic, False)
|
||||
def mergeStreamChannels(data, channels):
|
||||
"""
|
||||
将当前多通道流数据合并为单通道流数据
|
||||
|
||||
stream = mic.open(
|
||||
format = pyaudio.paInt16,
|
||||
channels = default_speaker["maxInputChannels"],
|
||||
rate = int(default_speaker["defaultSampleRate"]),
|
||||
input = True,
|
||||
input_device_index = default_speaker["index"]
|
||||
)
|
||||
Args:
|
||||
data: 多通道数据
|
||||
channels: 通道数
|
||||
|
||||
return stream
|
||||
Returns:
|
||||
mono_data_bytes: 单通道数据
|
||||
"""
|
||||
# (length * channels,)
|
||||
data_np = np.frombuffer(data, dtype=np.int16)
|
||||
# (length, channels)
|
||||
data_np_r = data_np.reshape(-1, channels)
|
||||
# (length,)
|
||||
mono_data = np.mean(data_np_r.astype(np.float32), axis=1)
|
||||
mono_data = mono_data.astype(np.int16)
|
||||
mono_data_bytes = mono_data.tobytes()
|
||||
return mono_data_bytes
|
||||
|
||||
class LoopbackStream:
|
||||
def __init__(self):
|
||||
self.mic = pyaudio.PyAudio()
|
||||
self.loopback = getDefaultLoopbackDevice(self.mic, False)
|
||||
self.stream = None
|
||||
self.SAMP_WIDTH = pyaudio.get_sample_size(pyaudio.paInt16)
|
||||
self.FORMAT = pyaudio.paInt16
|
||||
self.CHANNELS = self.loopback["maxInputChannels"]
|
||||
self.RATE = int(self.loopback["defaultSampleRate"])
|
||||
self.CHUNK = self.RATE // 10
|
||||
self.INDEX = self.loopback["index"]
|
||||
|
||||
def printInfo(self):
|
||||
dev_info = f"""
|
||||
采样输入设备:
|
||||
- 序号:{self.loopback['index']}
|
||||
- 名称:{self.loopback['name']}
|
||||
- 最大输入通道数:{self.loopback['maxInputChannels']}
|
||||
- 默认低输入延迟:{self.loopback['defaultLowInputLatency']}s
|
||||
- 默认高输入延迟:{self.loopback['defaultHighInputLatency']}s
|
||||
- 默认采样率:{self.loopback['defaultSampleRate']}Hz
|
||||
- 是否回环设备:{self.loopback['isLoopbackDevice']}
|
||||
|
||||
音频样本块大小:{self.CHUNK}
|
||||
样本位宽:{self.SAMP_WIDTH}
|
||||
音频数据格式:{self.FORMAT}
|
||||
音频通道数:{self.CHANNELS}
|
||||
音频采样率:{self.RATE}
|
||||
"""
|
||||
print(dev_info)
|
||||
|
||||
def openStream(self):
|
||||
"""
|
||||
打开并返回系统音频输出流
|
||||
"""
|
||||
if self.stream: return self.stream
|
||||
self.stream = self.mic.open(
|
||||
format = self.FORMAT,
|
||||
channels = self.CHANNELS,
|
||||
rate = self.RATE,
|
||||
input = True,
|
||||
input_device_index = self.INDEX
|
||||
)
|
||||
return self.stream
|
||||
|
||||
def closeStream(self):
|
||||
"""
|
||||
关闭系统音频输出流
|
||||
"""
|
||||
if self.stream is None: return
|
||||
self.stream.stop_stream()
|
||||
self.stream.close()
|
||||
self.stream = None
|
||||
Reference in New Issue
Block a user