feat(sysaudio): 支持 macOS 系统音频流采集

- 新增 darwin.py 文件实现 macOS 音频流采集功能
- 修改 main-gummy.py 以支持 macOS 平台
- 更新 AllConfig 和 CaptionEngine 以适配新平台
This commit is contained in:
himeditator mac
2025-07-08 17:04:15 +08:00
parent 65da30f83d
commit 7e953db6bd
14 changed files with 141 additions and 36 deletions

1
.gitignore vendored
View File

@@ -8,3 +8,4 @@ __pycache__
subenv subenv
caption-engine/build caption-engine/build
output.wav output.wav
.venv

View File

@@ -3,6 +3,8 @@ import argparse
if sys.platform == 'win32': if sys.platform == 'win32':
from sysaudio.win import AudioStream from sysaudio.win import AudioStream
elif sys.platform == 'darwin':
from sysaudio.darwin import AudioStream
elif sys.platform == 'linux': elif sys.platform == 'linux':
from sysaudio.linux import AudioStream from sysaudio.linux import AudioStream
else: else:
@@ -12,9 +14,9 @@ from audioprcs import mergeChunkChannels
from audio2text import InvalidParameter, GummyTranslator from audio2text import InvalidParameter, GummyTranslator
def convert_audio_to_text(s_lang, t_lang, audio_type): def convert_audio_to_text(s_lang, t_lang, audio_type, chunk_rate):
sys.stdout.reconfigure(line_buffering=True) # type: ignore sys.stdout.reconfigure(line_buffering=True) # type: ignore
stream = AudioStream(audio_type) stream = AudioStream(audio_type, chunk_rate)
if t_lang == 'none': if t_lang == 'none':
gummy = GummyTranslator(stream.RATE, s_lang, None) gummy = GummyTranslator(stream.RATE, s_lang, None)
@@ -43,10 +45,12 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Convert system audio stream to text') parser = argparse.ArgumentParser(description='Convert system audio stream to text')
parser.add_argument('-s', '--source_language', default='en', help='Source language code') parser.add_argument('-s', '--source_language', default='en', help='Source language code')
parser.add_argument('-t', '--target_language', default='zh', help='Target language code') parser.add_argument('-t', '--target_language', default='zh', help='Target language code')
parser.add_argument('-a', '--audio_type', default='0', help='Audio stream source: 0 for output audio stream, 1 for input audio stream') parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output audio stream, 1 for input audio stream')
parser.add_argument('-c', '--chunk_rate', default=20, help='The number of audio stream chunks collected per second.')
args = parser.parse_args() args = parser.parse_args()
convert_audio_to_text( convert_audio_to_text(
args.source_language, args.source_language,
args.target_language, args.target_language,
int(args.audio_type) int(args.audio_type),
int(args.chunk_rate)
) )

View File

@@ -1,6 +1,6 @@
dashscope==1.23.5 dashscope
numpy==2.2.6 numpy
samplerate==0.2.1 samplerate
PyAudio==0.2.14 PyAudio
PyAudioWPatch==0.2.12.7 # Windows only PyAudioWPatch # Windows only
pyinstaller==6.14.1 pyinstaller

View File

@@ -0,0 +1,85 @@
"""获取 MacOS 系统音频输入/输出流"""
import pyaudio
class AudioStream:
"""
获取系统音频流(支持 BlackHole 作为系统音频输出捕获)
初始化参数:
audio_type: 0-系统音频输出流(需配合 BlackHole1-系统音频输入流
chunk_rate: 每秒采集音频块的数量默认为20
"""
def __init__(self, audio_type=0, chunk_rate=20):
self.audio_type = audio_type
self.mic = pyaudio.PyAudio()
if self.audio_type == 0:
self.device = self.getOutputDeviceInfo()
else:
self.device = self.mic.get_default_input_device_info()
self.stream = None
self.SAMP_WIDTH = pyaudio.get_sample_size(pyaudio.paInt16)
self.FORMAT = pyaudio.paInt16
self.CHANNELS = self.device["maxInputChannels"]
self.RATE = int(self.device["defaultSampleRate"])
self.CHUNK = self.RATE // chunk_rate
self.INDEX = self.device["index"]
def getOutputDeviceInfo(self):
"""查找指定关键词的输入设备"""
device_count = self.mic.get_device_count()
for i in range(device_count):
dev_info = self.mic.get_device_info_by_index(i)
if 'blackhole' in dev_info["name"].lower():
return dev_info
raise Exception("The device containing BlackHole was not found.")
def printInfo(self):
dev_info = f"""
采样输入设备:
- 设备类型:{ "音频输出" if self.audio_type == 0 else "音频输入" }
- 序号:{self.device['index']}
- 名称:{self.device['name']}
- 最大输入通道数:{self.device['maxInputChannels']}
- 默认低输入延迟:{self.device['defaultLowInputLatency']}s
- 默认高输入延迟:{self.device['defaultHighInputLatency']}s
- 默认采样率:{self.device['defaultSampleRate']}Hz
音频样本块大小:{self.CHUNK}
样本位宽:{self.SAMP_WIDTH}
采样格式:{self.FORMAT}
音频通道数:{self.CHANNELS}
音频采样率:{self.RATE}
"""
print(dev_info)
def openStream(self):
"""
打开并返回系统音频输出流
"""
if self.stream: return self.stream
self.stream = self.mic.open(
format = self.FORMAT,
channels = int(self.CHANNELS),
rate = self.RATE,
input = True,
input_device_index = int(self.INDEX)
)
return self.stream
def read_chunk(self):
"""
读取音频数据
"""
if not self.stream: return None
return self.stream.read(self.CHUNK, exception_on_overflow=False)
def closeStream(self):
"""
关闭系统音频输出流
"""
if self.stream is None: return
self.stream.stop_stream()
self.stream.close()
self.stream = None

View File

@@ -1,3 +1,5 @@
"""获取 Linux 系统音频输入流"""
import pyaudio import pyaudio

View File

@@ -1,4 +1,4 @@
"""获取 Windows 系统音频输出流""" """获取 Windows 系统音频输入/输出流"""
import pyaudiowpatch as pyaudio import pyaudiowpatch as pyaudio
@@ -101,7 +101,7 @@ class AudioStream:
读取音频数据 读取音频数据
""" """
if not self.stream: return None if not self.stream: return None
return self.stream.read(self.CHUNK) return self.stream.read(self.CHUNK, exception_on_overflow=False)
def closeStream(self): def closeStream(self):
""" """

View File

@@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 7,
"id": "1e12f3ef", "id": "1e12f3ef",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@@ -11,15 +11,14 @@
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"\n", "\n",
" 采样设备:\n", " 采样输入设备:\n",
" - 设备类型:音频输出\n", " - 设备类型:音频输出\n",
" - 序号:26\n", " - 序号:0\n",
" - 名称:耳机 (HUAWEI FreeLace 活力版) [Loopback]\n", " - 名称:BlackHole 2ch\n",
" - 最大输入通道数2\n", " - 最大输入通道数2\n",
" - 默认低输入延迟0.003s\n", " - 默认低输入延迟0.01s\n",
" - 默认高输入延迟0.01s\n", " - 默认高输入延迟0.1s\n",
" - 默认采样率48000.0Hz\n", " - 默认采样率48000.0Hz\n",
" - 是否回环设备True\n",
"\n", "\n",
" 音频样本块大小2400\n", " 音频样本块大小2400\n",
" 样本位宽2\n", " 样本位宽2\n",
@@ -38,7 +37,7 @@
"current_dir = os.getcwd() \n", "current_dir = os.getcwd() \n",
"sys.path.append(os.path.join(current_dir, '../caption-engine'))\n", "sys.path.append(os.path.join(current_dir, '../caption-engine'))\n",
"\n", "\n",
"from sysaudio.win import AudioStream\n", "from sysaudio.darwin import AudioStream\n",
"from audioprcs import resampleRawChunk, mergeChunkChannels\n", "from audioprcs import resampleRawChunk, mergeChunkChannels\n",
"\n", "\n",
"stream = AudioStream(0)\n", "stream = AudioStream(0)\n",
@@ -47,7 +46,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 5,
"id": "a72914f4", "id": "a72914f4",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@@ -84,7 +83,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 8,
"id": "a6e8a098", "id": "a6e8a098",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@@ -168,7 +167,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "mystd", "display_name": ".venv",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
@@ -182,7 +181,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.10.12" "version": "3.9.6"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@@ -37,6 +37,7 @@ export interface CaptionItem {
} }
export interface FullConfig { export interface FullConfig {
platform: string,
uiLanguage: UILanguage, uiLanguage: UILanguage,
uiTheme: UITheme, uiTheme: UITheme,
leftBarWidth: number, leftBarWidth: number,

View File

@@ -51,6 +51,7 @@ class AllConfig {
if(config.uiTheme) this.uiTheme = config.uiTheme if(config.uiTheme) this.uiTheme = config.uiTheme
if(config.leftBarWidth) this.leftBarWidth = config.leftBarWidth if(config.leftBarWidth) this.leftBarWidth = config.leftBarWidth
if(config.styles) this.setStyles(config.styles) if(config.styles) this.setStyles(config.styles)
if(process.platform !== 'win32' && process.platform !== 'darwin') config.controls.audio = 1
if(config.controls) this.setControls(config.controls) if(config.controls) this.setControls(config.controls)
console.log('[INFO] Read Config from:', configPath) console.log('[INFO] Read Config from:', configPath)
} }
@@ -71,6 +72,7 @@ class AllConfig {
public getFullConfig(): FullConfig { public getFullConfig(): FullConfig {
return { return {
platform: process.platform,
uiLanguage: this.uiLanguage, uiLanguage: this.uiLanguage,
uiTheme: this.uiTheme, uiTheme: this.uiTheme,
leftBarWidth: this.leftBarWidth, leftBarWidth: this.leftBarWidth,

View File

@@ -27,7 +27,7 @@ export class CaptionEngine {
if (process.platform === 'win32') { if (process.platform === 'win32') {
gummyName = 'main-gummy.exe' gummyName = 'main-gummy.exe'
} }
else if (process.platform === 'linux') { else if (process.platform === 'darwin' || process.platform === 'linux') {
gummyName = 'main-gummy' gummyName = 'main-gummy'
} }
else { else {
@@ -124,16 +124,16 @@ export class CaptionEngine {
if(this.processStatus !== 'running') return if(this.processStatus !== 'running') return
if (this.process) { if (this.process) {
console.log('[INFO] Trying to stop process, PID:', this.process.pid) console.log('[INFO] Trying to stop process, PID:', this.process.pid)
if (process.platform === "win32" && this.process.pid) { let cmd = `kill ${this.process.pid}`;
exec(`taskkill /pid ${this.process.pid} /t /f`, (error) => { if (process.platform === "win32") {
if (error) { cmd = `taskkill /pid ${this.process.pid} /t /f`
controlWindow.sendErrorMessage(i18n('engine.shutdown.error') + error)
console.error(`[ERROR] Failed to kill process: ${error}`)
}
});
} else {
this.process.kill('SIGKILL');
} }
exec(cmd, (error) => {
if (error) {
controlWindow.sendErrorMessage(i18n('engine.shutdown.error') + error)
console.error(`[ERROR] Failed to kill process: ${error}`)
}
})
} }
this.processStatus = 'stopping' this.processStatus = 'stopping'
console.log('[INFO] Caption engine process stopping') console.log('[INFO] Caption engine process stopping')

View File

@@ -16,6 +16,7 @@ onMounted(() => {
useGeneralSettingStore().uiTheme = data.uiTheme useGeneralSettingStore().uiTheme = data.uiTheme
useGeneralSettingStore().leftBarWidth = data.leftBarWidth useGeneralSettingStore().leftBarWidth = data.leftBarWidth
useCaptionStyleStore().setStyles(data.styles) useCaptionStyleStore().setStyles(data.styles)
useEngineControlStore().platform = data.platform
useEngineControlStore().setControls(data.controls) useEngineControlStore().setControls(data.controls)
useCaptionLogStore().captionData = data.captionLog useCaptionLogStore().captionData = data.captionLog
}) })

View File

@@ -32,6 +32,7 @@
<div class="input-item"> <div class="input-item">
<span class="input-label">{{ $t('engine.audioType') }}</span> <span class="input-label">{{ $t('engine.audioType') }}</span>
<a-select <a-select
:disabled="platform !== 'win32' && platform !== 'darwin'"
class="input-area" class="input-area"
v-model:value="currentAudio" v-model:value="currentAudio"
:options="audioType" :options="audioType"
@@ -87,7 +88,7 @@ import { useI18n } from 'vue-i18n'
const { t } = useI18n() const { t } = useI18n()
const engineControl = useEngineControlStore() const engineControl = useEngineControlStore()
const { captionEngine, audioType, changeSignal } = storeToRefs(engineControl) const { platform, captionEngine, audioType, changeSignal } = storeToRefs(engineControl)
const currentSourceLang = ref('auto') const currentSourceLang = ref('auto')
const currentTargetLang = ref('zh') const currentTargetLang = ref('zh')

View File

@@ -1,4 +1,4 @@
import { ref } from 'vue' import { ref, watch } from 'vue'
import { defineStore } from 'pinia' import { defineStore } from 'pinia'
import { notification } from 'ant-design-vue' import { notification } from 'ant-design-vue'
@@ -12,6 +12,7 @@ import { useGeneralSettingStore } from './generalSetting'
export const useEngineControlStore = defineStore('engineControl', () => { export const useEngineControlStore = defineStore('engineControl', () => {
const { t } = useI18n() const { t } = useI18n()
const platform = ref('unknown')
const captionEngine = ref(engines[useGeneralSettingStore().uiLanguage]) const captionEngine = ref(engines[useGeneralSettingStore().uiLanguage])
const audioType = ref(audioTypes[useGeneralSettingStore().uiLanguage]) const audioType = ref(audioTypes[useGeneralSettingStore().uiLanguage])
@@ -91,7 +92,14 @@ export const useEngineControlStore = defineStore('engineControl', () => {
}); });
}) })
watch(platform, (newValue) => {
if(newValue !== 'win32' && newValue !== 'darwin') {
audio.value = 1
}
})
return { return {
platform, // 系统平台
captionEngine, // 字幕引擎 captionEngine, // 字幕引擎
audioType, // 音频类型 audioType, // 音频类型
engineEnabled, // 字幕引擎是否启用 engineEnabled, // 字幕引擎是否启用

View File

@@ -37,6 +37,7 @@ export interface CaptionItem {
} }
export interface FullConfig { export interface FullConfig {
platform: string,
uiLanguage: UILanguage, uiLanguage: UILanguage,
uiTheme: UITheme, uiTheme: UITheme,
leftBarWidth: number, leftBarWidth: number,