mirror of
https://github.com/HiMeditator/auto-caption.git
synced 2026-02-04 04:14:42 +08:00
feat(sysaudio): 支持 macOS 系统音频流采集
- 新增 darwin.py 文件实现 macOS 音频流采集功能 - 修改 main-gummy.py 以支持 macOS 平台 - 更新 AllConfig 和 CaptionEngine 以适配新平台
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -8,3 +8,4 @@ __pycache__
|
|||||||
subenv
|
subenv
|
||||||
caption-engine/build
|
caption-engine/build
|
||||||
output.wav
|
output.wav
|
||||||
|
.venv
|
||||||
@@ -3,6 +3,8 @@ import argparse
|
|||||||
|
|
||||||
if sys.platform == 'win32':
|
if sys.platform == 'win32':
|
||||||
from sysaudio.win import AudioStream
|
from sysaudio.win import AudioStream
|
||||||
|
elif sys.platform == 'darwin':
|
||||||
|
from sysaudio.darwin import AudioStream
|
||||||
elif sys.platform == 'linux':
|
elif sys.platform == 'linux':
|
||||||
from sysaudio.linux import AudioStream
|
from sysaudio.linux import AudioStream
|
||||||
else:
|
else:
|
||||||
@@ -12,9 +14,9 @@ from audioprcs import mergeChunkChannels
|
|||||||
from audio2text import InvalidParameter, GummyTranslator
|
from audio2text import InvalidParameter, GummyTranslator
|
||||||
|
|
||||||
|
|
||||||
def convert_audio_to_text(s_lang, t_lang, audio_type):
|
def convert_audio_to_text(s_lang, t_lang, audio_type, chunk_rate):
|
||||||
sys.stdout.reconfigure(line_buffering=True) # type: ignore
|
sys.stdout.reconfigure(line_buffering=True) # type: ignore
|
||||||
stream = AudioStream(audio_type)
|
stream = AudioStream(audio_type, chunk_rate)
|
||||||
|
|
||||||
if t_lang == 'none':
|
if t_lang == 'none':
|
||||||
gummy = GummyTranslator(stream.RATE, s_lang, None)
|
gummy = GummyTranslator(stream.RATE, s_lang, None)
|
||||||
@@ -43,10 +45,12 @@ if __name__ == "__main__":
|
|||||||
parser = argparse.ArgumentParser(description='Convert system audio stream to text')
|
parser = argparse.ArgumentParser(description='Convert system audio stream to text')
|
||||||
parser.add_argument('-s', '--source_language', default='en', help='Source language code')
|
parser.add_argument('-s', '--source_language', default='en', help='Source language code')
|
||||||
parser.add_argument('-t', '--target_language', default='zh', help='Target language code')
|
parser.add_argument('-t', '--target_language', default='zh', help='Target language code')
|
||||||
parser.add_argument('-a', '--audio_type', default='0', help='Audio stream source: 0 for output audio stream, 1 for input audio stream')
|
parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output audio stream, 1 for input audio stream')
|
||||||
|
parser.add_argument('-c', '--chunk_rate', default=20, help='The number of audio stream chunks collected per second.')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
convert_audio_to_text(
|
convert_audio_to_text(
|
||||||
args.source_language,
|
args.source_language,
|
||||||
args.target_language,
|
args.target_language,
|
||||||
int(args.audio_type)
|
int(args.audio_type),
|
||||||
|
int(args.chunk_rate)
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
dashscope==1.23.5
|
dashscope
|
||||||
numpy==2.2.6
|
numpy
|
||||||
samplerate==0.2.1
|
samplerate
|
||||||
PyAudio==0.2.14
|
PyAudio
|
||||||
PyAudioWPatch==0.2.12.7 # Windows only
|
PyAudioWPatch # Windows only
|
||||||
pyinstaller==6.14.1
|
pyinstaller
|
||||||
|
|||||||
85
caption-engine/sysaudio/darwin.py
Normal file
85
caption-engine/sysaudio/darwin.py
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
"""获取 MacOS 系统音频输入/输出流"""
|
||||||
|
|
||||||
|
import pyaudio
|
||||||
|
|
||||||
|
|
||||||
|
class AudioStream:
|
||||||
|
"""
|
||||||
|
获取系统音频流(支持 BlackHole 作为系统音频输出捕获)
|
||||||
|
|
||||||
|
初始化参数:
|
||||||
|
audio_type: 0-系统音频输出流(需配合 BlackHole),1-系统音频输入流
|
||||||
|
chunk_rate: 每秒采集音频块的数量,默认为20
|
||||||
|
"""
|
||||||
|
def __init__(self, audio_type=0, chunk_rate=20):
|
||||||
|
self.audio_type = audio_type
|
||||||
|
self.mic = pyaudio.PyAudio()
|
||||||
|
if self.audio_type == 0:
|
||||||
|
self.device = self.getOutputDeviceInfo()
|
||||||
|
else:
|
||||||
|
self.device = self.mic.get_default_input_device_info()
|
||||||
|
self.stream = None
|
||||||
|
self.SAMP_WIDTH = pyaudio.get_sample_size(pyaudio.paInt16)
|
||||||
|
self.FORMAT = pyaudio.paInt16
|
||||||
|
self.CHANNELS = self.device["maxInputChannels"]
|
||||||
|
self.RATE = int(self.device["defaultSampleRate"])
|
||||||
|
self.CHUNK = self.RATE // chunk_rate
|
||||||
|
self.INDEX = self.device["index"]
|
||||||
|
|
||||||
|
def getOutputDeviceInfo(self):
|
||||||
|
"""查找指定关键词的输入设备"""
|
||||||
|
device_count = self.mic.get_device_count()
|
||||||
|
for i in range(device_count):
|
||||||
|
dev_info = self.mic.get_device_info_by_index(i)
|
||||||
|
if 'blackhole' in dev_info["name"].lower():
|
||||||
|
return dev_info
|
||||||
|
raise Exception("The device containing BlackHole was not found.")
|
||||||
|
|
||||||
|
def printInfo(self):
|
||||||
|
dev_info = f"""
|
||||||
|
采样输入设备:
|
||||||
|
- 设备类型:{ "音频输出" if self.audio_type == 0 else "音频输入" }
|
||||||
|
- 序号:{self.device['index']}
|
||||||
|
- 名称:{self.device['name']}
|
||||||
|
- 最大输入通道数:{self.device['maxInputChannels']}
|
||||||
|
- 默认低输入延迟:{self.device['defaultLowInputLatency']}s
|
||||||
|
- 默认高输入延迟:{self.device['defaultHighInputLatency']}s
|
||||||
|
- 默认采样率:{self.device['defaultSampleRate']}Hz
|
||||||
|
|
||||||
|
音频样本块大小:{self.CHUNK}
|
||||||
|
样本位宽:{self.SAMP_WIDTH}
|
||||||
|
采样格式:{self.FORMAT}
|
||||||
|
音频通道数:{self.CHANNELS}
|
||||||
|
音频采样率:{self.RATE}
|
||||||
|
"""
|
||||||
|
print(dev_info)
|
||||||
|
|
||||||
|
def openStream(self):
|
||||||
|
"""
|
||||||
|
打开并返回系统音频输出流
|
||||||
|
"""
|
||||||
|
if self.stream: return self.stream
|
||||||
|
self.stream = self.mic.open(
|
||||||
|
format = self.FORMAT,
|
||||||
|
channels = int(self.CHANNELS),
|
||||||
|
rate = self.RATE,
|
||||||
|
input = True,
|
||||||
|
input_device_index = int(self.INDEX)
|
||||||
|
)
|
||||||
|
return self.stream
|
||||||
|
|
||||||
|
def read_chunk(self):
|
||||||
|
"""
|
||||||
|
读取音频数据
|
||||||
|
"""
|
||||||
|
if not self.stream: return None
|
||||||
|
return self.stream.read(self.CHUNK, exception_on_overflow=False)
|
||||||
|
|
||||||
|
def closeStream(self):
|
||||||
|
"""
|
||||||
|
关闭系统音频输出流
|
||||||
|
"""
|
||||||
|
if self.stream is None: return
|
||||||
|
self.stream.stop_stream()
|
||||||
|
self.stream.close()
|
||||||
|
self.stream = None
|
||||||
@@ -1,3 +1,5 @@
|
|||||||
|
"""获取 Linux 系统音频输入流"""
|
||||||
|
|
||||||
import pyaudio
|
import pyaudio
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
"""获取 Windows 系统音频输出流"""
|
"""获取 Windows 系统音频输入/输出流"""
|
||||||
|
|
||||||
import pyaudiowpatch as pyaudio
|
import pyaudiowpatch as pyaudio
|
||||||
|
|
||||||
@@ -101,7 +101,7 @@ class AudioStream:
|
|||||||
读取音频数据
|
读取音频数据
|
||||||
"""
|
"""
|
||||||
if not self.stream: return None
|
if not self.stream: return None
|
||||||
return self.stream.read(self.CHUNK)
|
return self.stream.read(self.CHUNK, exception_on_overflow=False)
|
||||||
|
|
||||||
def closeStream(self):
|
def closeStream(self):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 7,
|
||||||
"id": "1e12f3ef",
|
"id": "1e12f3ef",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -11,15 +11,14 @@
|
|||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"\n",
|
"\n",
|
||||||
" 采样设备:\n",
|
" 采样输入设备:\n",
|
||||||
" - 设备类型:音频输出\n",
|
" - 设备类型:音频输出\n",
|
||||||
" - 序号:26\n",
|
" - 序号:0\n",
|
||||||
" - 名称:耳机 (HUAWEI FreeLace 活力版) [Loopback]\n",
|
" - 名称:BlackHole 2ch\n",
|
||||||
" - 最大输入通道数:2\n",
|
" - 最大输入通道数:2\n",
|
||||||
" - 默认低输入延迟:0.003s\n",
|
" - 默认低输入延迟:0.01s\n",
|
||||||
" - 默认高输入延迟:0.01s\n",
|
" - 默认高输入延迟:0.1s\n",
|
||||||
" - 默认采样率:48000.0Hz\n",
|
" - 默认采样率:48000.0Hz\n",
|
||||||
" - 是否回环设备:True\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
" 音频样本块大小:2400\n",
|
" 音频样本块大小:2400\n",
|
||||||
" 样本位宽:2\n",
|
" 样本位宽:2\n",
|
||||||
@@ -38,7 +37,7 @@
|
|||||||
"current_dir = os.getcwd() \n",
|
"current_dir = os.getcwd() \n",
|
||||||
"sys.path.append(os.path.join(current_dir, '../caption-engine'))\n",
|
"sys.path.append(os.path.join(current_dir, '../caption-engine'))\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from sysaudio.win import AudioStream\n",
|
"from sysaudio.darwin import AudioStream\n",
|
||||||
"from audioprcs import resampleRawChunk, mergeChunkChannels\n",
|
"from audioprcs import resampleRawChunk, mergeChunkChannels\n",
|
||||||
"\n",
|
"\n",
|
||||||
"stream = AudioStream(0)\n",
|
"stream = AudioStream(0)\n",
|
||||||
@@ -47,7 +46,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": 5,
|
||||||
"id": "a72914f4",
|
"id": "a72914f4",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -84,7 +83,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 8,
|
||||||
"id": "a6e8a098",
|
"id": "a6e8a098",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -168,7 +167,7 @@
|
|||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "mystd",
|
"display_name": ".venv",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
@@ -182,7 +181,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.10.12"
|
"version": "3.9.6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
@@ -37,6 +37,7 @@ export interface CaptionItem {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export interface FullConfig {
|
export interface FullConfig {
|
||||||
|
platform: string,
|
||||||
uiLanguage: UILanguage,
|
uiLanguage: UILanguage,
|
||||||
uiTheme: UITheme,
|
uiTheme: UITheme,
|
||||||
leftBarWidth: number,
|
leftBarWidth: number,
|
||||||
|
|||||||
@@ -51,6 +51,7 @@ class AllConfig {
|
|||||||
if(config.uiTheme) this.uiTheme = config.uiTheme
|
if(config.uiTheme) this.uiTheme = config.uiTheme
|
||||||
if(config.leftBarWidth) this.leftBarWidth = config.leftBarWidth
|
if(config.leftBarWidth) this.leftBarWidth = config.leftBarWidth
|
||||||
if(config.styles) this.setStyles(config.styles)
|
if(config.styles) this.setStyles(config.styles)
|
||||||
|
if(process.platform !== 'win32' && process.platform !== 'darwin') config.controls.audio = 1
|
||||||
if(config.controls) this.setControls(config.controls)
|
if(config.controls) this.setControls(config.controls)
|
||||||
console.log('[INFO] Read Config from:', configPath)
|
console.log('[INFO] Read Config from:', configPath)
|
||||||
}
|
}
|
||||||
@@ -71,6 +72,7 @@ class AllConfig {
|
|||||||
|
|
||||||
public getFullConfig(): FullConfig {
|
public getFullConfig(): FullConfig {
|
||||||
return {
|
return {
|
||||||
|
platform: process.platform,
|
||||||
uiLanguage: this.uiLanguage,
|
uiLanguage: this.uiLanguage,
|
||||||
uiTheme: this.uiTheme,
|
uiTheme: this.uiTheme,
|
||||||
leftBarWidth: this.leftBarWidth,
|
leftBarWidth: this.leftBarWidth,
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ export class CaptionEngine {
|
|||||||
if (process.platform === 'win32') {
|
if (process.platform === 'win32') {
|
||||||
gummyName = 'main-gummy.exe'
|
gummyName = 'main-gummy.exe'
|
||||||
}
|
}
|
||||||
else if (process.platform === 'linux') {
|
else if (process.platform === 'darwin' || process.platform === 'linux') {
|
||||||
gummyName = 'main-gummy'
|
gummyName = 'main-gummy'
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@@ -124,16 +124,16 @@ export class CaptionEngine {
|
|||||||
if(this.processStatus !== 'running') return
|
if(this.processStatus !== 'running') return
|
||||||
if (this.process) {
|
if (this.process) {
|
||||||
console.log('[INFO] Trying to stop process, PID:', this.process.pid)
|
console.log('[INFO] Trying to stop process, PID:', this.process.pid)
|
||||||
if (process.platform === "win32" && this.process.pid) {
|
let cmd = `kill ${this.process.pid}`;
|
||||||
exec(`taskkill /pid ${this.process.pid} /t /f`, (error) => {
|
if (process.platform === "win32") {
|
||||||
if (error) {
|
cmd = `taskkill /pid ${this.process.pid} /t /f`
|
||||||
controlWindow.sendErrorMessage(i18n('engine.shutdown.error') + error)
|
|
||||||
console.error(`[ERROR] Failed to kill process: ${error}`)
|
|
||||||
}
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
this.process.kill('SIGKILL');
|
|
||||||
}
|
}
|
||||||
|
exec(cmd, (error) => {
|
||||||
|
if (error) {
|
||||||
|
controlWindow.sendErrorMessage(i18n('engine.shutdown.error') + error)
|
||||||
|
console.error(`[ERROR] Failed to kill process: ${error}`)
|
||||||
|
}
|
||||||
|
})
|
||||||
}
|
}
|
||||||
this.processStatus = 'stopping'
|
this.processStatus = 'stopping'
|
||||||
console.log('[INFO] Caption engine process stopping')
|
console.log('[INFO] Caption engine process stopping')
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ onMounted(() => {
|
|||||||
useGeneralSettingStore().uiTheme = data.uiTheme
|
useGeneralSettingStore().uiTheme = data.uiTheme
|
||||||
useGeneralSettingStore().leftBarWidth = data.leftBarWidth
|
useGeneralSettingStore().leftBarWidth = data.leftBarWidth
|
||||||
useCaptionStyleStore().setStyles(data.styles)
|
useCaptionStyleStore().setStyles(data.styles)
|
||||||
|
useEngineControlStore().platform = data.platform
|
||||||
useEngineControlStore().setControls(data.controls)
|
useEngineControlStore().setControls(data.controls)
|
||||||
useCaptionLogStore().captionData = data.captionLog
|
useCaptionLogStore().captionData = data.captionLog
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -32,6 +32,7 @@
|
|||||||
<div class="input-item">
|
<div class="input-item">
|
||||||
<span class="input-label">{{ $t('engine.audioType') }}</span>
|
<span class="input-label">{{ $t('engine.audioType') }}</span>
|
||||||
<a-select
|
<a-select
|
||||||
|
:disabled="platform !== 'win32' && platform !== 'darwin'"
|
||||||
class="input-area"
|
class="input-area"
|
||||||
v-model:value="currentAudio"
|
v-model:value="currentAudio"
|
||||||
:options="audioType"
|
:options="audioType"
|
||||||
@@ -87,7 +88,7 @@ import { useI18n } from 'vue-i18n'
|
|||||||
const { t } = useI18n()
|
const { t } = useI18n()
|
||||||
|
|
||||||
const engineControl = useEngineControlStore()
|
const engineControl = useEngineControlStore()
|
||||||
const { captionEngine, audioType, changeSignal } = storeToRefs(engineControl)
|
const { platform, captionEngine, audioType, changeSignal } = storeToRefs(engineControl)
|
||||||
|
|
||||||
const currentSourceLang = ref('auto')
|
const currentSourceLang = ref('auto')
|
||||||
const currentTargetLang = ref('zh')
|
const currentTargetLang = ref('zh')
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import { ref } from 'vue'
|
import { ref, watch } from 'vue'
|
||||||
import { defineStore } from 'pinia'
|
import { defineStore } from 'pinia'
|
||||||
|
|
||||||
import { notification } from 'ant-design-vue'
|
import { notification } from 'ant-design-vue'
|
||||||
@@ -12,6 +12,7 @@ import { useGeneralSettingStore } from './generalSetting'
|
|||||||
|
|
||||||
export const useEngineControlStore = defineStore('engineControl', () => {
|
export const useEngineControlStore = defineStore('engineControl', () => {
|
||||||
const { t } = useI18n()
|
const { t } = useI18n()
|
||||||
|
const platform = ref('unknown')
|
||||||
|
|
||||||
const captionEngine = ref(engines[useGeneralSettingStore().uiLanguage])
|
const captionEngine = ref(engines[useGeneralSettingStore().uiLanguage])
|
||||||
const audioType = ref(audioTypes[useGeneralSettingStore().uiLanguage])
|
const audioType = ref(audioTypes[useGeneralSettingStore().uiLanguage])
|
||||||
@@ -91,7 +92,14 @@ export const useEngineControlStore = defineStore('engineControl', () => {
|
|||||||
});
|
});
|
||||||
})
|
})
|
||||||
|
|
||||||
|
watch(platform, (newValue) => {
|
||||||
|
if(newValue !== 'win32' && newValue !== 'darwin') {
|
||||||
|
audio.value = 1
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
platform, // 系统平台
|
||||||
captionEngine, // 字幕引擎
|
captionEngine, // 字幕引擎
|
||||||
audioType, // 音频类型
|
audioType, // 音频类型
|
||||||
engineEnabled, // 字幕引擎是否启用
|
engineEnabled, // 字幕引擎是否启用
|
||||||
|
|||||||
@@ -37,6 +37,7 @@ export interface CaptionItem {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export interface FullConfig {
|
export interface FullConfig {
|
||||||
|
platform: string,
|
||||||
uiLanguage: UILanguage,
|
uiLanguage: UILanguage,
|
||||||
uiTheme: UITheme,
|
uiTheme: UITheme,
|
||||||
leftBarWidth: number,
|
leftBarWidth: number,
|
||||||
|
|||||||
Reference in New Issue
Block a user