mirror of
https://github.com/HiMeditator/auto-caption.git
synced 2026-02-04 04:14:42 +08:00
feat(sysaudio): 支持 macOS 系统音频流采集
- 新增 darwin.py 文件实现 macOS 音频流采集功能 - 修改 main-gummy.py 以支持 macOS 平台 - 更新 AllConfig 和 CaptionEngine 以适配新平台
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -8,3 +8,4 @@ __pycache__
|
||||
subenv
|
||||
caption-engine/build
|
||||
output.wav
|
||||
.venv
|
||||
@@ -3,6 +3,8 @@ import argparse
|
||||
|
||||
if sys.platform == 'win32':
|
||||
from sysaudio.win import AudioStream
|
||||
elif sys.platform == 'darwin':
|
||||
from sysaudio.darwin import AudioStream
|
||||
elif sys.platform == 'linux':
|
||||
from sysaudio.linux import AudioStream
|
||||
else:
|
||||
@@ -12,9 +14,9 @@ from audioprcs import mergeChunkChannels
|
||||
from audio2text import InvalidParameter, GummyTranslator
|
||||
|
||||
|
||||
def convert_audio_to_text(s_lang, t_lang, audio_type):
|
||||
def convert_audio_to_text(s_lang, t_lang, audio_type, chunk_rate):
|
||||
sys.stdout.reconfigure(line_buffering=True) # type: ignore
|
||||
stream = AudioStream(audio_type)
|
||||
stream = AudioStream(audio_type, chunk_rate)
|
||||
|
||||
if t_lang == 'none':
|
||||
gummy = GummyTranslator(stream.RATE, s_lang, None)
|
||||
@@ -43,10 +45,12 @@ if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description='Convert system audio stream to text')
|
||||
parser.add_argument('-s', '--source_language', default='en', help='Source language code')
|
||||
parser.add_argument('-t', '--target_language', default='zh', help='Target language code')
|
||||
parser.add_argument('-a', '--audio_type', default='0', help='Audio stream source: 0 for output audio stream, 1 for input audio stream')
|
||||
parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output audio stream, 1 for input audio stream')
|
||||
parser.add_argument('-c', '--chunk_rate', default=20, help='The number of audio stream chunks collected per second.')
|
||||
args = parser.parse_args()
|
||||
convert_audio_to_text(
|
||||
args.source_language,
|
||||
args.target_language,
|
||||
int(args.audio_type)
|
||||
int(args.audio_type),
|
||||
int(args.chunk_rate)
|
||||
)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
dashscope==1.23.5
|
||||
numpy==2.2.6
|
||||
samplerate==0.2.1
|
||||
PyAudio==0.2.14
|
||||
PyAudioWPatch==0.2.12.7 # Windows only
|
||||
pyinstaller==6.14.1
|
||||
dashscope
|
||||
numpy
|
||||
samplerate
|
||||
PyAudio
|
||||
PyAudioWPatch # Windows only
|
||||
pyinstaller
|
||||
|
||||
85
caption-engine/sysaudio/darwin.py
Normal file
85
caption-engine/sysaudio/darwin.py
Normal file
@@ -0,0 +1,85 @@
|
||||
"""获取 MacOS 系统音频输入/输出流"""
|
||||
|
||||
import pyaudio
|
||||
|
||||
|
||||
class AudioStream:
|
||||
"""
|
||||
获取系统音频流(支持 BlackHole 作为系统音频输出捕获)
|
||||
|
||||
初始化参数:
|
||||
audio_type: 0-系统音频输出流(需配合 BlackHole),1-系统音频输入流
|
||||
chunk_rate: 每秒采集音频块的数量,默认为20
|
||||
"""
|
||||
def __init__(self, audio_type=0, chunk_rate=20):
|
||||
self.audio_type = audio_type
|
||||
self.mic = pyaudio.PyAudio()
|
||||
if self.audio_type == 0:
|
||||
self.device = self.getOutputDeviceInfo()
|
||||
else:
|
||||
self.device = self.mic.get_default_input_device_info()
|
||||
self.stream = None
|
||||
self.SAMP_WIDTH = pyaudio.get_sample_size(pyaudio.paInt16)
|
||||
self.FORMAT = pyaudio.paInt16
|
||||
self.CHANNELS = self.device["maxInputChannels"]
|
||||
self.RATE = int(self.device["defaultSampleRate"])
|
||||
self.CHUNK = self.RATE // chunk_rate
|
||||
self.INDEX = self.device["index"]
|
||||
|
||||
def getOutputDeviceInfo(self):
|
||||
"""查找指定关键词的输入设备"""
|
||||
device_count = self.mic.get_device_count()
|
||||
for i in range(device_count):
|
||||
dev_info = self.mic.get_device_info_by_index(i)
|
||||
if 'blackhole' in dev_info["name"].lower():
|
||||
return dev_info
|
||||
raise Exception("The device containing BlackHole was not found.")
|
||||
|
||||
def printInfo(self):
|
||||
dev_info = f"""
|
||||
采样输入设备:
|
||||
- 设备类型:{ "音频输出" if self.audio_type == 0 else "音频输入" }
|
||||
- 序号:{self.device['index']}
|
||||
- 名称:{self.device['name']}
|
||||
- 最大输入通道数:{self.device['maxInputChannels']}
|
||||
- 默认低输入延迟:{self.device['defaultLowInputLatency']}s
|
||||
- 默认高输入延迟:{self.device['defaultHighInputLatency']}s
|
||||
- 默认采样率:{self.device['defaultSampleRate']}Hz
|
||||
|
||||
音频样本块大小:{self.CHUNK}
|
||||
样本位宽:{self.SAMP_WIDTH}
|
||||
采样格式:{self.FORMAT}
|
||||
音频通道数:{self.CHANNELS}
|
||||
音频采样率:{self.RATE}
|
||||
"""
|
||||
print(dev_info)
|
||||
|
||||
def openStream(self):
|
||||
"""
|
||||
打开并返回系统音频输出流
|
||||
"""
|
||||
if self.stream: return self.stream
|
||||
self.stream = self.mic.open(
|
||||
format = self.FORMAT,
|
||||
channels = int(self.CHANNELS),
|
||||
rate = self.RATE,
|
||||
input = True,
|
||||
input_device_index = int(self.INDEX)
|
||||
)
|
||||
return self.stream
|
||||
|
||||
def read_chunk(self):
|
||||
"""
|
||||
读取音频数据
|
||||
"""
|
||||
if not self.stream: return None
|
||||
return self.stream.read(self.CHUNK, exception_on_overflow=False)
|
||||
|
||||
def closeStream(self):
|
||||
"""
|
||||
关闭系统音频输出流
|
||||
"""
|
||||
if self.stream is None: return
|
||||
self.stream.stop_stream()
|
||||
self.stream.close()
|
||||
self.stream = None
|
||||
@@ -1,3 +1,5 @@
|
||||
"""获取 Linux 系统音频输入流"""
|
||||
|
||||
import pyaudio
|
||||
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""获取 Windows 系统音频输出流"""
|
||||
"""获取 Windows 系统音频输入/输出流"""
|
||||
|
||||
import pyaudiowpatch as pyaudio
|
||||
|
||||
@@ -101,7 +101,7 @@ class AudioStream:
|
||||
读取音频数据
|
||||
"""
|
||||
if not self.stream: return None
|
||||
return self.stream.read(self.CHUNK)
|
||||
return self.stream.read(self.CHUNK, exception_on_overflow=False)
|
||||
|
||||
def closeStream(self):
|
||||
"""
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 7,
|
||||
"id": "1e12f3ef",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -11,15 +11,14 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
" 采样设备:\n",
|
||||
" 采样输入设备:\n",
|
||||
" - 设备类型:音频输出\n",
|
||||
" - 序号:26\n",
|
||||
" - 名称:耳机 (HUAWEI FreeLace 活力版) [Loopback]\n",
|
||||
" - 序号:0\n",
|
||||
" - 名称:BlackHole 2ch\n",
|
||||
" - 最大输入通道数:2\n",
|
||||
" - 默认低输入延迟:0.003s\n",
|
||||
" - 默认高输入延迟:0.01s\n",
|
||||
" - 默认低输入延迟:0.01s\n",
|
||||
" - 默认高输入延迟:0.1s\n",
|
||||
" - 默认采样率:48000.0Hz\n",
|
||||
" - 是否回环设备:True\n",
|
||||
"\n",
|
||||
" 音频样本块大小:2400\n",
|
||||
" 样本位宽:2\n",
|
||||
@@ -38,7 +37,7 @@
|
||||
"current_dir = os.getcwd() \n",
|
||||
"sys.path.append(os.path.join(current_dir, '../caption-engine'))\n",
|
||||
"\n",
|
||||
"from sysaudio.win import AudioStream\n",
|
||||
"from sysaudio.darwin import AudioStream\n",
|
||||
"from audioprcs import resampleRawChunk, mergeChunkChannels\n",
|
||||
"\n",
|
||||
"stream = AudioStream(0)\n",
|
||||
@@ -47,7 +46,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 5,
|
||||
"id": "a72914f4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -84,7 +83,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 8,
|
||||
"id": "a6e8a098",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -168,7 +167,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "mystd",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -182,7 +181,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -37,6 +37,7 @@ export interface CaptionItem {
|
||||
}
|
||||
|
||||
export interface FullConfig {
|
||||
platform: string,
|
||||
uiLanguage: UILanguage,
|
||||
uiTheme: UITheme,
|
||||
leftBarWidth: number,
|
||||
|
||||
@@ -51,6 +51,7 @@ class AllConfig {
|
||||
if(config.uiTheme) this.uiTheme = config.uiTheme
|
||||
if(config.leftBarWidth) this.leftBarWidth = config.leftBarWidth
|
||||
if(config.styles) this.setStyles(config.styles)
|
||||
if(process.platform !== 'win32' && process.platform !== 'darwin') config.controls.audio = 1
|
||||
if(config.controls) this.setControls(config.controls)
|
||||
console.log('[INFO] Read Config from:', configPath)
|
||||
}
|
||||
@@ -71,6 +72,7 @@ class AllConfig {
|
||||
|
||||
public getFullConfig(): FullConfig {
|
||||
return {
|
||||
platform: process.platform,
|
||||
uiLanguage: this.uiLanguage,
|
||||
uiTheme: this.uiTheme,
|
||||
leftBarWidth: this.leftBarWidth,
|
||||
|
||||
@@ -27,7 +27,7 @@ export class CaptionEngine {
|
||||
if (process.platform === 'win32') {
|
||||
gummyName = 'main-gummy.exe'
|
||||
}
|
||||
else if (process.platform === 'linux') {
|
||||
else if (process.platform === 'darwin' || process.platform === 'linux') {
|
||||
gummyName = 'main-gummy'
|
||||
}
|
||||
else {
|
||||
@@ -124,16 +124,16 @@ export class CaptionEngine {
|
||||
if(this.processStatus !== 'running') return
|
||||
if (this.process) {
|
||||
console.log('[INFO] Trying to stop process, PID:', this.process.pid)
|
||||
if (process.platform === "win32" && this.process.pid) {
|
||||
exec(`taskkill /pid ${this.process.pid} /t /f`, (error) => {
|
||||
let cmd = `kill ${this.process.pid}`;
|
||||
if (process.platform === "win32") {
|
||||
cmd = `taskkill /pid ${this.process.pid} /t /f`
|
||||
}
|
||||
exec(cmd, (error) => {
|
||||
if (error) {
|
||||
controlWindow.sendErrorMessage(i18n('engine.shutdown.error') + error)
|
||||
console.error(`[ERROR] Failed to kill process: ${error}`)
|
||||
}
|
||||
});
|
||||
} else {
|
||||
this.process.kill('SIGKILL');
|
||||
}
|
||||
})
|
||||
}
|
||||
this.processStatus = 'stopping'
|
||||
console.log('[INFO] Caption engine process stopping')
|
||||
|
||||
@@ -16,6 +16,7 @@ onMounted(() => {
|
||||
useGeneralSettingStore().uiTheme = data.uiTheme
|
||||
useGeneralSettingStore().leftBarWidth = data.leftBarWidth
|
||||
useCaptionStyleStore().setStyles(data.styles)
|
||||
useEngineControlStore().platform = data.platform
|
||||
useEngineControlStore().setControls(data.controls)
|
||||
useCaptionLogStore().captionData = data.captionLog
|
||||
})
|
||||
|
||||
@@ -32,6 +32,7 @@
|
||||
<div class="input-item">
|
||||
<span class="input-label">{{ $t('engine.audioType') }}</span>
|
||||
<a-select
|
||||
:disabled="platform !== 'win32' && platform !== 'darwin'"
|
||||
class="input-area"
|
||||
v-model:value="currentAudio"
|
||||
:options="audioType"
|
||||
@@ -87,7 +88,7 @@ import { useI18n } from 'vue-i18n'
|
||||
const { t } = useI18n()
|
||||
|
||||
const engineControl = useEngineControlStore()
|
||||
const { captionEngine, audioType, changeSignal } = storeToRefs(engineControl)
|
||||
const { platform, captionEngine, audioType, changeSignal } = storeToRefs(engineControl)
|
||||
|
||||
const currentSourceLang = ref('auto')
|
||||
const currentTargetLang = ref('zh')
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { ref } from 'vue'
|
||||
import { ref, watch } from 'vue'
|
||||
import { defineStore } from 'pinia'
|
||||
|
||||
import { notification } from 'ant-design-vue'
|
||||
@@ -12,6 +12,7 @@ import { useGeneralSettingStore } from './generalSetting'
|
||||
|
||||
export const useEngineControlStore = defineStore('engineControl', () => {
|
||||
const { t } = useI18n()
|
||||
const platform = ref('unknown')
|
||||
|
||||
const captionEngine = ref(engines[useGeneralSettingStore().uiLanguage])
|
||||
const audioType = ref(audioTypes[useGeneralSettingStore().uiLanguage])
|
||||
@@ -91,7 +92,14 @@ export const useEngineControlStore = defineStore('engineControl', () => {
|
||||
});
|
||||
})
|
||||
|
||||
watch(platform, (newValue) => {
|
||||
if(newValue !== 'win32' && newValue !== 'darwin') {
|
||||
audio.value = 1
|
||||
}
|
||||
})
|
||||
|
||||
return {
|
||||
platform, // 系统平台
|
||||
captionEngine, // 字幕引擎
|
||||
audioType, // 音频类型
|
||||
engineEnabled, // 字幕引擎是否启用
|
||||
|
||||
@@ -37,6 +37,7 @@ export interface CaptionItem {
|
||||
}
|
||||
|
||||
export interface FullConfig {
|
||||
platform: string,
|
||||
uiLanguage: UILanguage,
|
||||
uiTheme: UITheme,
|
||||
leftBarWidth: number,
|
||||
|
||||
Reference in New Issue
Block a user