feat(python-subprocess): 尝试字幕显示新解决方案

- 使用 python 子进程解析字幕
- 通过 websocket 通信将字幕传递给软件
This commit is contained in:
himeditator
2025-06-15 12:43:57 +08:00
parent f10530eb67
commit 8858189bf6
18 changed files with 572 additions and 64 deletions

View File

@@ -1,41 +1,17 @@
from sysaudio.win import getDefaultLoopbackDevice
from audio2text.gummy import getGummpyTranslator
import pyaudiowpatch as pyaudio
import numpy as np
from sysaudio.win import LoopbackStream, mergeStreamChannels
from audio2text.gummy import GummyTranslator
mic = pyaudio.PyAudio()
loopback = getDefaultLoopbackDevice(mic)
loopback = LoopbackStream()
loopback.openStream()
SAMP_WIDTH = pyaudio.get_sample_size(pyaudio.paInt16)
FORMAT = pyaudio.paInt16
CHANNELS = loopback["maxInputChannels"]
RATE = int(loopback["defaultSampleRate"])
CHUNK = RATE // 10
INDEX = loopback["index"]
gummy = GummyTranslator(loopback.RATE, "ja", "zh")
gummy.translator.start()
for i in range(0, 100):
if not loopback.stream: continue
data = loopback.stream.read(loopback.CHUNK)
data = mergeStreamChannels(data, loopback.CHANNELS)
gummy.translator.send_audio_frame(data)
RECORD_SECONDS = 20 # 监听时长(s)
stream = mic.open(
format = FORMAT,
channels = CHANNELS,
rate = RATE,
input = True,
input_device_index = INDEX
)
translator = getGummpyTranslator(rate=RATE)
translator.start()
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
data_np = np.frombuffer(data, dtype=np.int16)
data_np_r = data_np.reshape(-1, CHANNELS)
mono_data = np.mean(data_np_r.astype(np.float32), axis=1)
mono_data = mono_data.astype(np.int16)
mono_data_bytes = mono_data.tobytes()
translator.send_audio_frame(mono_data_bytes)
translator.stop()
stream.stop_stream()
stream.close()
gummy.translator.stop()
loopback.closeStream()