feat(sysaudio): 支持 macOS 系统音频流采集

- 新增 darwin.py 文件实现 macOS 音频流采集功能
- 修改 main-gummy.py 以支持 macOS 平台
- 更新 AllConfig 和 CaptionEngine 以适配新平台
This commit is contained in:
himeditator mac
2025-07-08 17:04:15 +08:00
parent 65da30f83d
commit 7e953db6bd
14 changed files with 141 additions and 36 deletions

View File

@@ -3,6 +3,8 @@ import argparse
if sys.platform == 'win32':
from sysaudio.win import AudioStream
elif sys.platform == 'darwin':
from sysaudio.darwin import AudioStream
elif sys.platform == 'linux':
from sysaudio.linux import AudioStream
else:
@@ -12,9 +14,9 @@ from audioprcs import mergeChunkChannels
from audio2text import InvalidParameter, GummyTranslator
def convert_audio_to_text(s_lang, t_lang, audio_type):
def convert_audio_to_text(s_lang, t_lang, audio_type, chunk_rate):
sys.stdout.reconfigure(line_buffering=True) # type: ignore
stream = AudioStream(audio_type)
stream = AudioStream(audio_type, chunk_rate)
if t_lang == 'none':
gummy = GummyTranslator(stream.RATE, s_lang, None)
@@ -43,10 +45,12 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Convert system audio stream to text')
parser.add_argument('-s', '--source_language', default='en', help='Source language code')
parser.add_argument('-t', '--target_language', default='zh', help='Target language code')
parser.add_argument('-a', '--audio_type', default='0', help='Audio stream source: 0 for output audio stream, 1 for input audio stream')
parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output audio stream, 1 for input audio stream')
parser.add_argument('-c', '--chunk_rate', default=20, help='The number of audio stream chunks collected per second.')
args = parser.parse_args()
convert_audio_to_text(
args.source_language,
args.target_language,
int(args.audio_type)
int(args.audio_type),
int(args.chunk_rate)
)

View File

@@ -1,6 +1,6 @@
dashscope==1.23.5
numpy==2.2.6
samplerate==0.2.1
PyAudio==0.2.14
PyAudioWPatch==0.2.12.7 # Windows only
pyinstaller==6.14.1
dashscope
numpy
samplerate
PyAudio
PyAudioWPatch # Windows only
pyinstaller

View File

@@ -0,0 +1,85 @@
"""获取 MacOS 系统音频输入/输出流"""
import pyaudio
class AudioStream:
"""
获取系统音频流(支持 BlackHole 作为系统音频输出捕获)
初始化参数:
audio_type: 0-系统音频输出流(需配合 BlackHole1-系统音频输入流
chunk_rate: 每秒采集音频块的数量默认为20
"""
def __init__(self, audio_type=0, chunk_rate=20):
self.audio_type = audio_type
self.mic = pyaudio.PyAudio()
if self.audio_type == 0:
self.device = self.getOutputDeviceInfo()
else:
self.device = self.mic.get_default_input_device_info()
self.stream = None
self.SAMP_WIDTH = pyaudio.get_sample_size(pyaudio.paInt16)
self.FORMAT = pyaudio.paInt16
self.CHANNELS = self.device["maxInputChannels"]
self.RATE = int(self.device["defaultSampleRate"])
self.CHUNK = self.RATE // chunk_rate
self.INDEX = self.device["index"]
def getOutputDeviceInfo(self):
"""查找指定关键词的输入设备"""
device_count = self.mic.get_device_count()
for i in range(device_count):
dev_info = self.mic.get_device_info_by_index(i)
if 'blackhole' in dev_info["name"].lower():
return dev_info
raise Exception("The device containing BlackHole was not found.")
def printInfo(self):
dev_info = f"""
采样输入设备:
- 设备类型:{ "音频输出" if self.audio_type == 0 else "音频输入" }
- 序号:{self.device['index']}
- 名称:{self.device['name']}
- 最大输入通道数:{self.device['maxInputChannels']}
- 默认低输入延迟:{self.device['defaultLowInputLatency']}s
- 默认高输入延迟:{self.device['defaultHighInputLatency']}s
- 默认采样率:{self.device['defaultSampleRate']}Hz
音频样本块大小:{self.CHUNK}
样本位宽:{self.SAMP_WIDTH}
采样格式:{self.FORMAT}
音频通道数:{self.CHANNELS}
音频采样率:{self.RATE}
"""
print(dev_info)
def openStream(self):
"""
打开并返回系统音频输出流
"""
if self.stream: return self.stream
self.stream = self.mic.open(
format = self.FORMAT,
channels = int(self.CHANNELS),
rate = self.RATE,
input = True,
input_device_index = int(self.INDEX)
)
return self.stream
def read_chunk(self):
"""
读取音频数据
"""
if not self.stream: return None
return self.stream.read(self.CHUNK, exception_on_overflow=False)
def closeStream(self):
"""
关闭系统音频输出流
"""
if self.stream is None: return
self.stream.stop_stream()
self.stream.close()
self.stream = None

View File

@@ -1,3 +1,5 @@
"""获取 Linux 系统音频输入流"""
import pyaudio

View File

@@ -1,4 +1,4 @@
"""获取 Windows 系统音频输出流"""
"""获取 Windows 系统音频输入/输出流"""
import pyaudiowpatch as pyaudio
@@ -101,7 +101,7 @@ class AudioStream:
读取音频数据
"""
if not self.stream: return None
return self.stream.read(self.CHUNK)
return self.stream.read(self.CHUNK, exception_on_overflow=False)
def closeStream(self):
"""