diff --git a/.gitignore b/.gitignore index d4eaf7e..75d2a08 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ __pycache__ subenv caption-engine/build output.wav +.venv \ No newline at end of file diff --git a/caption-engine/main-gummy.py b/caption-engine/main-gummy.py index e3981fb..c4b31b1 100644 --- a/caption-engine/main-gummy.py +++ b/caption-engine/main-gummy.py @@ -3,6 +3,8 @@ import argparse if sys.platform == 'win32': from sysaudio.win import AudioStream +elif sys.platform == 'darwin': + from sysaudio.darwin import AudioStream elif sys.platform == 'linux': from sysaudio.linux import AudioStream else: @@ -12,9 +14,9 @@ from audioprcs import mergeChunkChannels from audio2text import InvalidParameter, GummyTranslator -def convert_audio_to_text(s_lang, t_lang, audio_type): +def convert_audio_to_text(s_lang, t_lang, audio_type, chunk_rate): sys.stdout.reconfigure(line_buffering=True) # type: ignore - stream = AudioStream(audio_type) + stream = AudioStream(audio_type, chunk_rate) if t_lang == 'none': gummy = GummyTranslator(stream.RATE, s_lang, None) @@ -43,10 +45,12 @@ if __name__ == "__main__": parser = argparse.ArgumentParser(description='Convert system audio stream to text') parser.add_argument('-s', '--source_language', default='en', help='Source language code') parser.add_argument('-t', '--target_language', default='zh', help='Target language code') - parser.add_argument('-a', '--audio_type', default='0', help='Audio stream source: 0 for output audio stream, 1 for input audio stream') + parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output audio stream, 1 for input audio stream') + parser.add_argument('-c', '--chunk_rate', default=20, help='The number of audio stream chunks collected per second.') args = parser.parse_args() convert_audio_to_text( args.source_language, args.target_language, - int(args.audio_type) + int(args.audio_type), + int(args.chunk_rate) ) diff --git a/caption-engine/requirements.txt b/caption-engine/requirements.txt index 3dc686b..981c2fb 100644 --- a/caption-engine/requirements.txt +++ b/caption-engine/requirements.txt @@ -1,6 +1,6 @@ -dashscope==1.23.5 -numpy==2.2.6 -samplerate==0.2.1 -PyAudio==0.2.14 -PyAudioWPatch==0.2.12.7 # Windows only -pyinstaller==6.14.1 +dashscope +numpy +samplerate +PyAudio +PyAudioWPatch # Windows only +pyinstaller diff --git a/caption-engine/sysaudio/darwin.py b/caption-engine/sysaudio/darwin.py new file mode 100644 index 0000000..4c5d6dd --- /dev/null +++ b/caption-engine/sysaudio/darwin.py @@ -0,0 +1,85 @@ +"""获取 MacOS 系统音频输入/输出流""" + +import pyaudio + + +class AudioStream: + """ + 获取系统音频流(支持 BlackHole 作为系统音频输出捕获) + + 初始化参数: + audio_type: 0-系统音频输出流(需配合 BlackHole),1-系统音频输入流 + chunk_rate: 每秒采集音频块的数量,默认为20 + """ + def __init__(self, audio_type=0, chunk_rate=20): + self.audio_type = audio_type + self.mic = pyaudio.PyAudio() + if self.audio_type == 0: + self.device = self.getOutputDeviceInfo() + else: + self.device = self.mic.get_default_input_device_info() + self.stream = None + self.SAMP_WIDTH = pyaudio.get_sample_size(pyaudio.paInt16) + self.FORMAT = pyaudio.paInt16 + self.CHANNELS = self.device["maxInputChannels"] + self.RATE = int(self.device["defaultSampleRate"]) + self.CHUNK = self.RATE // chunk_rate + self.INDEX = self.device["index"] + + def getOutputDeviceInfo(self): + """查找指定关键词的输入设备""" + device_count = self.mic.get_device_count() + for i in range(device_count): + dev_info = self.mic.get_device_info_by_index(i) + if 'blackhole' in dev_info["name"].lower(): + return dev_info + raise Exception("The device containing BlackHole was not found.") + + def printInfo(self): + dev_info = f""" + 采样输入设备: + - 设备类型:{ "音频输出" if self.audio_type == 0 else "音频输入" } + - 序号:{self.device['index']} + - 名称:{self.device['name']} + - 最大输入通道数:{self.device['maxInputChannels']} + - 默认低输入延迟:{self.device['defaultLowInputLatency']}s + - 默认高输入延迟:{self.device['defaultHighInputLatency']}s + - 默认采样率:{self.device['defaultSampleRate']}Hz + + 音频样本块大小:{self.CHUNK} + 样本位宽:{self.SAMP_WIDTH} + 采样格式:{self.FORMAT} + 音频通道数:{self.CHANNELS} + 音频采样率:{self.RATE} + """ + print(dev_info) + + def openStream(self): + """ + 打开并返回系统音频输出流 + """ + if self.stream: return self.stream + self.stream = self.mic.open( + format = self.FORMAT, + channels = int(self.CHANNELS), + rate = self.RATE, + input = True, + input_device_index = int(self.INDEX) + ) + return self.stream + + def read_chunk(self): + """ + 读取音频数据 + """ + if not self.stream: return None + return self.stream.read(self.CHUNK, exception_on_overflow=False) + + def closeStream(self): + """ + 关闭系统音频输出流 + """ + if self.stream is None: return + self.stream.stop_stream() + self.stream.close() + self.stream = None diff --git a/caption-engine/sysaudio/linux.py b/caption-engine/sysaudio/linux.py index 92361a0..8e609a4 100644 --- a/caption-engine/sysaudio/linux.py +++ b/caption-engine/sysaudio/linux.py @@ -1,3 +1,5 @@ +"""获取 Linux 系统音频输入流""" + import pyaudio diff --git a/caption-engine/sysaudio/win.py b/caption-engine/sysaudio/win.py index fbf17b9..5db1d9d 100644 --- a/caption-engine/sysaudio/win.py +++ b/caption-engine/sysaudio/win.py @@ -1,4 +1,4 @@ -"""获取 Windows 系统音频输出流""" +"""获取 Windows 系统音频输入/输出流""" import pyaudiowpatch as pyaudio @@ -101,7 +101,7 @@ class AudioStream: 读取音频数据 """ if not self.stream: return None - return self.stream.read(self.CHUNK) + return self.stream.read(self.CHUNK, exception_on_overflow=False) def closeStream(self): """ diff --git a/engine-test/resample.ipynb b/engine-test/resample.ipynb index 2177c51..7c1016d 100644 --- a/engine-test/resample.ipynb +++ b/engine-test/resample.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "1e12f3ef", "metadata": {}, "outputs": [ @@ -11,15 +11,14 @@ "output_type": "stream", "text": [ "\n", - " 采样设备:\n", + " 采样输入设备:\n", " - 设备类型:音频输出\n", - " - 序号:26\n", - " - 名称:耳机 (HUAWEI FreeLace 活力版) [Loopback]\n", + " - 序号:0\n", + " - 名称:BlackHole 2ch\n", " - 最大输入通道数:2\n", - " - 默认低输入延迟:0.003s\n", - " - 默认高输入延迟:0.01s\n", + " - 默认低输入延迟:0.01s\n", + " - 默认高输入延迟:0.1s\n", " - 默认采样率:48000.0Hz\n", - " - 是否回环设备:True\n", "\n", " 音频样本块大小:2400\n", " 样本位宽:2\n", @@ -38,7 +37,7 @@ "current_dir = os.getcwd() \n", "sys.path.append(os.path.join(current_dir, '../caption-engine'))\n", "\n", - "from sysaudio.win import AudioStream\n", + "from sysaudio.darwin import AudioStream\n", "from audioprcs import resampleRawChunk, mergeChunkChannels\n", "\n", "stream = AudioStream(0)\n", @@ -47,7 +46,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 5, "id": "a72914f4", "metadata": {}, "outputs": [ @@ -84,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "a6e8a098", "metadata": {}, "outputs": [ @@ -168,7 +167,7 @@ ], "metadata": { "kernelspec": { - "display_name": "mystd", + "display_name": ".venv", "language": "python", "name": "python3" }, @@ -182,7 +181,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.9.6" } }, "nbformat": 4, diff --git a/src/main/types/index.ts b/src/main/types/index.ts index 571145d..bc2632f 100644 --- a/src/main/types/index.ts +++ b/src/main/types/index.ts @@ -37,6 +37,7 @@ export interface CaptionItem { } export interface FullConfig { + platform: string, uiLanguage: UILanguage, uiTheme: UITheme, leftBarWidth: number, diff --git a/src/main/utils/AllConfig.ts b/src/main/utils/AllConfig.ts index 5decb3e..8b28d7d 100644 --- a/src/main/utils/AllConfig.ts +++ b/src/main/utils/AllConfig.ts @@ -51,6 +51,7 @@ class AllConfig { if(config.uiTheme) this.uiTheme = config.uiTheme if(config.leftBarWidth) this.leftBarWidth = config.leftBarWidth if(config.styles) this.setStyles(config.styles) + if(process.platform !== 'win32' && process.platform !== 'darwin') config.controls.audio = 1 if(config.controls) this.setControls(config.controls) console.log('[INFO] Read Config from:', configPath) } @@ -71,6 +72,7 @@ class AllConfig { public getFullConfig(): FullConfig { return { + platform: process.platform, uiLanguage: this.uiLanguage, uiTheme: this.uiTheme, leftBarWidth: this.leftBarWidth, diff --git a/src/main/utils/CaptionEngine.ts b/src/main/utils/CaptionEngine.ts index 4581e4c..668c4a0 100644 --- a/src/main/utils/CaptionEngine.ts +++ b/src/main/utils/CaptionEngine.ts @@ -27,7 +27,7 @@ export class CaptionEngine { if (process.platform === 'win32') { gummyName = 'main-gummy.exe' } - else if (process.platform === 'linux') { + else if (process.platform === 'darwin' || process.platform === 'linux') { gummyName = 'main-gummy' } else { @@ -124,16 +124,16 @@ export class CaptionEngine { if(this.processStatus !== 'running') return if (this.process) { console.log('[INFO] Trying to stop process, PID:', this.process.pid) - if (process.platform === "win32" && this.process.pid) { - exec(`taskkill /pid ${this.process.pid} /t /f`, (error) => { - if (error) { - controlWindow.sendErrorMessage(i18n('engine.shutdown.error') + error) - console.error(`[ERROR] Failed to kill process: ${error}`) - } - }); - } else { - this.process.kill('SIGKILL'); + let cmd = `kill ${this.process.pid}`; + if (process.platform === "win32") { + cmd = `taskkill /pid ${this.process.pid} /t /f` } + exec(cmd, (error) => { + if (error) { + controlWindow.sendErrorMessage(i18n('engine.shutdown.error') + error) + console.error(`[ERROR] Failed to kill process: ${error}`) + } + }) } this.processStatus = 'stopping' console.log('[INFO] Caption engine process stopping') diff --git a/src/renderer/src/App.vue b/src/renderer/src/App.vue index 3189e9e..637ffe3 100644 --- a/src/renderer/src/App.vue +++ b/src/renderer/src/App.vue @@ -16,6 +16,7 @@ onMounted(() => { useGeneralSettingStore().uiTheme = data.uiTheme useGeneralSettingStore().leftBarWidth = data.leftBarWidth useCaptionStyleStore().setStyles(data.styles) + useEngineControlStore().platform = data.platform useEngineControlStore().setControls(data.controls) useCaptionLogStore().captionData = data.captionLog }) diff --git a/src/renderer/src/components/EngineControl.vue b/src/renderer/src/components/EngineControl.vue index ea8dd29..26deb28 100644 --- a/src/renderer/src/components/EngineControl.vue +++ b/src/renderer/src/components/EngineControl.vue @@ -32,6 +32,7 @@