diff --git a/.gitignore b/.gitignore
index 4a5feda..f385eb0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,4 +6,4 @@ out
 *.log*
 __pycache__
 subenv
-build
\ No newline at end of file
+python-subprocess/build
\ No newline at end of file
diff --git a/README.md b/README.md
index 2fee440..cedbaa8 100644
--- a/README.md
+++ b/README.md
@@ -21,6 +21,9 @@
 - 灵活的字幕引擎选择
 - 多语言识别与翻译
 - 字幕记录展示与导出
+- 生成音频输出和麦克风输入的字幕
+
+说明：Windows 平台支持生成音频输出和麦克风输入的字幕，Linux 平台仅支持生成麦克风输入的字幕。
 
 ## 🚀 项目运行
 
@@ -32,11 +35,42 @@ npm install
 
 ### 构建字幕引擎
 
-字幕引擎原理：所谓的字幕引擎实际上是一个子程序，它会实时获取系统音频输入（录音）或输出（播放声音）的流式数据，并调用音频转文字的模型生成对应音频的字幕。生成的字幕通过 IPC 输出为转换为字符串的 JSON 数据，并返回给主程序。主程序读取字幕数据，处理后显示在窗口上。
+> #### 背景介绍
+>
+> 所谓的字幕引擎实际上是一个子程序，它会实时获取系统音频输入（录音）或输出（播放声音）的流式数据，并调用音频转文字的模型生成对应音频的字幕。生成的字幕通过 IPC 输出为转换为字符串的 JSON 数据，并返回给主程序。主程序读取字幕数据，处理后显示在窗口上。
+>
+>目前项目默认使用[阿里云 Gummy 模型](https://help.aliyun.com/zh/model-studio/gummy-speech-recognition-translation/)，需要获取阿里云百炼平台的 API KEY 并配置到环境变量中才能正常使用该模型，相关介绍：[获取API KEY](https://help.aliyun.com/zh/model-studio/get-api-key)、[将API Key配置到环境变量](https://help.aliyun.com/zh/model-studio/configure-api-key-through-environment-variables)。
+>
+> 本项目的 gummy 字幕引擎是一个 python 子程序，通过 pyinstaller 打包为可执行文件。 运行字幕引擎子程序的代码在 `src\main\utils\engine.ts` 文件中。
 
-目前项目默认使用 [阿里云 Gummy 模型](https://help.aliyun.com/zh/model-studio/gummy-speech-recognition-translation/)，需要有阿里云百炼平台的 API KEY 才能正常使用该模型。
+首先进入 `python-subprocess` 文件夹，执行如下指令创建虚拟环境：
 
-gummy 字幕引擎是一个 python 子程序，可以选择配置好 python 环境后直接运行该程序，也可以使用 pyinstaller 构建一个可执行文件。 运行字幕引擎子程序的代码在 `src\main\utils\engine.ts` 文件中
+```bash
+python -m venv subenv
+```
+
+然后激活虚拟环境：
+
+```bash
+# Windows
+subenv/Scripts/activate
+# Linux
+source myenv/bin/activate
+```
+
+然后安装依赖：
+
+```bash
+pip install -r requirements.txt
+```
+
+然后使用 `pyinstaller` 构建项目：
+
+```bash
+pyinstaller --onefile main-gummy.py
+```
+
+此时项目构建完成，在进入 `python-subprocess/dist` 文件夹可见对应的可执行文件。即可进行后续操作。
 
 ### 运行项目
 
diff --git a/electron-builder.yml b/electron-builder.yml
index d222608..ef543cc 100644
--- a/electron-builder.yml
+++ b/electron-builder.yml
@@ -1,4 +1,4 @@
-appId: com.electron.app
+appId: com.himeditator.autocaption
 productName: auto-caption
 directories:
   buildResources: build
@@ -9,10 +9,14 @@ files:
   - '!{.eslintcache,eslint.config.mjs,.prettierignore,.prettierrc.yaml,dev-app-update.yml,CHANGELOG.md,README.md}'
   - '!{.env,.env.*,.npmrc,pnpm-lock.yaml}'
   - '!{tsconfig.json,tsconfig.node.json,tsconfig.web.json}'
+extraResources:
+  from: ./python-subprocess/dist/main-gummy.exe
+  to: ./python-subprocess/dist/main-gummy.exe
 asarUnpack:
   - resources/**
 win:
   executableName: auto-caption
+  icon: resources/icon.png
 nsis:
   artifactName: ${name}-${version}-setup.${ext}
   shortcutName: ${productName}
diff --git a/python-subprocess/main-gummy.py b/python-subprocess/main-gummy.py
new file mode 100644
index 0000000..a169a34
--- /dev/null
+++ b/python-subprocess/main-gummy.py
@@ -0,0 +1,48 @@
+import sys
+
+if sys.platform == 'win32':
+    from sysaudio.win import AudioStream, mergeStreamChannels
+elif sys.platform == 'linux':
+    from sysaudio.linux import AudioStream, mergeStreamChannels
+else:
+    raise NotImplementedError(f"Unsupported platform: {sys.platform}")
+
+from audio2text.gummy import GummyTranslator
+import sys
+import argparse
+
+def convert_audio_to_text(s_lang, t_lang, audio_type):
+    sys.stdout.reconfigure(line_buffering=True)
+    stream = AudioStream(audio_type)
+    stream.openStream()
+
+    if t_lang == 'none':
+        gummy = GummyTranslator(stream.RATE, s_lang, None)
+    else:
+        gummy = GummyTranslator(stream.RATE, s_lang, t_lang)
+    gummy.translator.start()
+
+    while True:
+        try:
+            if not stream.stream: continue
+            data = stream.stream.read(stream.CHUNK)
+            data = mergeStreamChannels(data, stream.CHANNELS)
+            gummy.translator.send_audio_frame(data)
+        except KeyboardInterrupt:
+            stream.closeStream()
+            gummy.translator.stop()
+            break
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Convert system audio stream to text')
+    parser.add_argument('-s', '--source_language', default='en', help='Source language code')
+    parser.add_argument('-t', '--target_language', default='zh', help='Target language code')
+    parser.add_argument('-a', '--audio_type', default='0', help='Audio stream source: 0 for output audio stream, 1 for input audio stream')
+    args = parser.parse_args()
+    convert_audio_to_text(
+        args.source_language,
+        args.target_language,
+        0 if args.audio_type == '0' else 1
+    )
+    
\ No newline at end of file
diff --git a/python-subprocess/main.spec b/python-subprocess/main-gummy.spec
similarity index 93%
rename from python-subprocess/main.spec
rename to python-subprocess/main-gummy.spec
index 2ba8dd9..bb6ac23 100644
--- a/python-subprocess/main.spec
+++ b/python-subprocess/main-gummy.spec
@@ -2,7 +2,7 @@
 
 
 a = Analysis(
-    ['main.py'],
+    ['main-gummy.py'],
     pathex=[],
     binaries=[],
     datas=[],
@@ -22,7 +22,7 @@ exe = EXE(
     a.binaries,
     a.datas,
     [],
-    name='main',
+    name='main-gummy',
     debug=False,
     bootloader_ignore_signals=False,
     strip=False,
diff --git a/python-subprocess/main.py b/python-subprocess/main.py
deleted file mode 100644
index 6edf167..0000000
--- a/python-subprocess/main.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from sysaudio.win import LoopbackStream, mergeStreamChannels
-from audio2text.gummy import GummyTranslator
-import sys
-import argparse
-
-def convert_audio_to_text(s_lang, t_lang, audio_source):
-    sys.stdout.reconfigure(line_buffering=True)
-    loopback = LoopbackStream()
-    loopback.openStream()
-
-    gummy = GummyTranslator(loopback.RATE, s_lang, t_lang)
-    gummy.translator.start()
-
-    while True:
-        if not loopback.stream: continue
-        data = loopback.stream.read(loopback.CHUNK)
-        data = mergeStreamChannels(data, loopback.CHANNELS)
-        gummy.translator.send_audio_frame(data)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description='Convert system audio stream to text')
-    parser.add_argument('-s', '--s_lang', default='en', help='Source language code')
-    parser.add_argument('-t', '--t_lang', default='zh', help='Target language code')
-    parser.add_argument('-a', '--audio', default=0, help='Audio stream source: 0 for output audio stream, 1 for input audio stream')
-    args = parser.parse_args()
-    convert_audio_to_text(args.s_lang, args.t_lang, args.audio)
\ No newline at end of file
diff --git a/python-subprocess/requirements.txt b/python-subprocess/requirements.txt
index c70e828..82df817 100644
Binary files a/python-subprocess/requirements.txt and b/python-subprocess/requirements.txt differ
diff --git a/python-subprocess/sysaudio/linux.py b/python-subprocess/sysaudio/linux.py
new file mode 100644
index 0000000..3473515
--- /dev/null
+++ b/python-subprocess/sysaudio/linux.py
@@ -0,0 +1,79 @@
+import pyaudio
+import numpy as np
+
+def mergeStreamChannels(data, channels):
+    """
+    将当前多通道流数据合并为单通道流数据
+
+    Args:
+        data: 多通道数据
+        channels: 通道数
+
+    Returns:
+        mono_data_bytes: 单通道数据
+    """
+    # (length * channels,)
+    data_np = np.frombuffer(data, dtype=np.int16)
+    # (length, channels)
+    data_np_r = data_np.reshape(-1, channels)
+    # (length,)
+    mono_data = np.mean(data_np_r.astype(np.float32), axis=1)
+    mono_data = mono_data.astype(np.int16)
+    mono_data_bytes = mono_data.tobytes()
+    return mono_data_bytes
+
+
+class AudioStream:
+    def __init__(self, audio_type=1):
+        self.audio_type = audio_type
+        self.mic = pyaudio.PyAudio()
+        self.device = self.mic.get_default_input_device_info()
+        self.stream = None
+        self.SAMP_WIDTH = pyaudio.get_sample_size(pyaudio.paInt16)
+        self.FORMAT = pyaudio.paInt16
+        self.CHANNELS = self.device["maxInputChannels"]
+        self.RATE = int(self.device["defaultSampleRate"])
+        self.CHUNK = self.RATE // 20
+        self.INDEX = self.device["index"]
+
+    def printInfo(self):
+        dev_info = f"""
+        采样输入设备：
+            - 设备类型：{ "音频输入（Linux平台目前仅支持该项）" }
+            - 序号：{self.device['index']}
+            - 名称：{self.device['name']}
+            - 最大输入通道数：{self.device['maxInputChannels']}
+            - 默认低输入延迟：{self.device['defaultLowInputLatency']}s
+            - 默认高输入延迟：{self.device['defaultHighInputLatency']}s
+            - 默认采样率：{self.device['defaultSampleRate']}Hz
+
+        音频样本块大小：{self.CHUNK}
+        样本位宽：{self.SAMP_WIDTH}
+        音频数据格式：{self.FORMAT}
+        音频通道数：{self.CHANNELS}
+        音频采样率：{self.RATE}
+        """
+        print(dev_info)
+
+    def openStream(self):
+        """
+        打开并返回系统音频输出流
+        """
+        if self.stream: return self.stream
+        self.stream = self.mic.open(
+            format = self.FORMAT,
+            channels = self.CHANNELS,
+            rate = self.RATE,
+            input = True,
+            input_device_index = self.INDEX
+        )
+        return self.stream
+    
+    def closeStream(self):
+        """
+        关闭系统音频输出流
+        """
+        if self.stream is None: return
+        self.stream.stop_stream()
+        self.stream.close()
+        self.stream = None
\ No newline at end of file
diff --git a/python-subprocess/sysaudio/win.py b/python-subprocess/sysaudio/win.py
index 6363720..f7c7af6 100644
--- a/python-subprocess/sysaudio/win.py
+++ b/python-subprocess/sysaudio/win.py
@@ -61,28 +61,39 @@ def mergeStreamChannels(data, channels):
     mono_data_bytes = mono_data.tobytes()
     return mono_data_bytes
 
-class LoopbackStream:
-    def __init__(self):
+class AudioStream:
+    """
+    获取系统音频流
+    
+    参数：
+        audio_type: （默认）0-系统音频输出流，1-系统音频输入流
+    """
+    def __init__(self, audio_type=0):
+        self.audio_type = audio_type
         self.mic = pyaudio.PyAudio()
-        self.loopback = getDefaultLoopbackDevice(self.mic, False)
+        if self.audio_type == 0:
+            self.device = getDefaultLoopbackDevice(self.mic, False)
+        else:
+            self.device = self.mic.get_default_input_device_info()
         self.stream = None
         self.SAMP_WIDTH = pyaudio.get_sample_size(pyaudio.paInt16)
         self.FORMAT = pyaudio.paInt16
-        self.CHANNELS = self.loopback["maxInputChannels"]
-        self.RATE = int(self.loopback["defaultSampleRate"])
+        self.CHANNELS = self.device["maxInputChannels"]
+        self.RATE = int(self.device["defaultSampleRate"])
         self.CHUNK = self.RATE // 20
-        self.INDEX = self.loopback["index"]
+        self.INDEX = self.device["index"]
 
     def printInfo(self):
         dev_info = f"""
-        采样输入设备：
-            - 序号：{self.loopback['index']}
-            - 名称：{self.loopback['name']}
-            - 最大输入通道数：{self.loopback['maxInputChannels']}
-            - 默认低输入延迟：{self.loopback['defaultLowInputLatency']}s
-            - 默认高输入延迟：{self.loopback['defaultHighInputLatency']}s
-            - 默认采样率：{self.loopback['defaultSampleRate']}Hz
-            - 是否回环设备：{self.loopback['isLoopbackDevice']}
+        采样设备：
+            - 设备类型：{ "音频输入" if self.audio_type == 0 else "音频输出" }
+            - 序号：{self.device['index']}
+            - 名称：{self.device['name']}
+            - 最大输入通道数：{self.device['maxInputChannels']}
+            - 默认低输入延迟：{self.device['defaultLowInputLatency']}s
+            - 默认高输入延迟：{self.device['defaultHighInputLatency']}s
+            - 默认采样率：{self.device['defaultSampleRate']}Hz
+            - 是否回环设备：{self.device['isLoopbackDevice']}
 
         音频样本块大小：{self.CHUNK}
         样本位宽：{self.SAMP_WIDTH}
diff --git a/src/main/index.ts b/src/main/index.ts
index fc084a9..aeb76db 100644
--- a/src/main/index.ts
+++ b/src/main/index.ts
@@ -2,6 +2,7 @@ import { app, BrowserWindow } from 'electron'
 import { electronApp, optimizer } from '@electron-toolkit/utils'
 import { controlWindow } from './control'
 import { captionWindow } from './caption'
+import { captionEngine } from './utils/config'
 
 app.whenReady().then(() => {
   electronApp.setAppUserModelId('com.himeditator.autocaption')
@@ -22,6 +23,10 @@ app.whenReady().then(() => {
   })
 })
 
+app.on('will-quit', async () => { 
+  captionEngine.stop()
+});
+
 app.on('window-all-closed', () => {
   if (process.platform !== 'darwin') {
     app.quit()
diff --git a/src/main/types/index.ts b/src/main/types/index.ts
index 033a285..90e76e5 100644
--- a/src/main/types/index.ts
+++ b/src/main/types/index.ts
@@ -23,6 +23,7 @@ export interface Controls {
   sourceLang: string,
   targetLang: string,
   engine: string,
+  audio: 0 | 1,
   translation: boolean,
   customized: boolean,
   customizedApp: string,
diff --git a/src/main/utils/config.ts b/src/main/utils/config.ts
index 86d08e3..d1b5212 100644
--- a/src/main/utils/config.ts
+++ b/src/main/utils/config.ts
@@ -22,6 +22,7 @@ export const controls: Controls = {
   sourceLang: 'en',
   targetLang: 'zh',
   engine: 'gummy',
+  audio: 0,
   engineEnabled: false,
   translation: true,
   customized: false,
@@ -74,6 +75,7 @@ export function setControls(args: any) {
   controls.sourceLang = args.sourceLang
   controls.targetLang = args.targetLang
   controls.engine = args.engine
+  controls.audio = args.audio
   controls.translation = args.translation
   controls.customized = args.customized
   controls.customizedApp = args.customizedApp
diff --git a/src/main/utils/engine.ts b/src/main/utils/engine.ts
index 2ac9aff..56992c7 100644
--- a/src/main/utils/engine.ts
+++ b/src/main/utils/engine.ts
@@ -1,5 +1,6 @@
-import { spawn } from 'child_process'
+import { spawn, exec } from 'child_process'
 import { app } from 'electron'
+import { is } from '@electron-toolkit/utils'
 import path from 'path'
 import { addCaptionLog, controls } from './config'
 
@@ -14,24 +15,29 @@ export class CaptionEngine {
             this.command = [ controls.customizedCommand ]
         }
         else if(controls.engine === 'gummy'){
-            this.appPath = path.join(
-                app.getAppPath(),
-                'python-subprocess', 'subenv', 'Scripts', 'python.exe'
-            )
+            if(is.dev){
+                this.appPath = path.join(
+                    app.getAppPath(),
+                    'python-subprocess', 'dist', 'main-gummy.exe'
+                )
+            }
+            else{
+                this.appPath = path.join(
+                    process.resourcesPath,
+                    'python-subprocess', 'dist', 'main-gummy.exe'
+                )
+            }
             this.command = []
-            this.command.push(path.join(
-                app.getAppPath(),
-                'python-subprocess', 'main.py'
-            ))
             this.command.push('-s', controls.sourceLang)
             this.command.push('-t',  controls.translation ? controls.targetLang : 'none')
+            this.command.push('-a', controls.audio ? '1' : '0')
 
-            console.log(this.appPath)
-            console.log(this.command)
+            console.log('[INFO] engine', this.appPath)
+            console.log('[INFO] engine command',this.command)
         }
     }
 
-    public start() { 
+    public start() {
         if (this.process) {
             this.stop();
         }
@@ -70,7 +76,15 @@ export class CaptionEngine {
 
     public stop() {
         if (this.process) {
-            this.process.kill();
+            if (process.platform === "win32" && this.process.pid) {
+                exec(`taskkill /pid ${this.process.pid} /t /f`, (error) => {
+                    if (error) {
+                        console.error(`Failed to kill process: ${error}`);
+                    }
+                });
+            } else {
+                this.process.kill('SIGKILL');
+            }
             this.process = undefined;
             controls.engineEnabled = false;
             console.log('[INFO] Caption engine process stopped');
diff --git a/src/renderer/src/components/CaptionControl.vue b/src/renderer/src/components/CaptionControl.vue
index cc4e1db..da01cef 100644
--- a/src/renderer/src/components/CaptionControl.vue
+++ b/src/renderer/src/components/CaptionControl.vue
@@ -29,6 +29,14 @@
         :options="captionEngine"
       ></a-select>
     </div>
+    <div class="control-item">
+      <span class="control-label">音频选择</span>
+      <a-select
+        class="control-input"
+        v-model:value="currentAudio"
+        :options="audioType"
+      ></a-select>
+    </div>
     <div class="control-item">
       <span class="control-label">启用翻译</span>
       <a-switch v-model:checked="currentTranslation" />
@@ -62,13 +70,15 @@
 import { ref, computed, watch } from 'vue'
 import { storeToRefs } from 'pinia'
 import { useCaptionControlStore } from '@renderer/stores/captionControl'
+import { notification } from 'ant-design-vue'
 
 const captionControl = useCaptionControlStore()
-const { captionEngine, changeSignal } = storeToRefs(captionControl)
+const { captionEngine, audioType, changeSignal } = storeToRefs(captionControl)
 
 const currentSourceLang = ref('auto')
 const currentTargetLang = ref('zh')
 const currentEngine = ref('gummy')
+const currentAudio = ref<0 | 1>(0)
 const currentTranslation = ref<boolean>(false)
 
 const currentCustomized = ref<boolean>(false)
@@ -88,6 +98,7 @@ function applyChange(){
   captionControl.sourceLang = currentSourceLang.value
   captionControl.targetLang = currentTargetLang.value
   captionControl.engine = currentEngine.value
+  captionControl.audio = currentAudio.value
   captionControl.translation = currentTranslation.value
 
   captionControl.customized = currentCustomized.value
@@ -95,12 +106,18 @@ function applyChange(){
   captionControl.customizedCommand = currentCustomizedCommand.value
 
   captionControl.sendControlChange()
+
+  notification.open({
+      message: '字幕控制已更改',
+      description: '如果字幕引擎已经启动，需要关闭后重启才会生效'
+  });
 }
 
 function cancelChange(){
   currentSourceLang.value = captionControl.sourceLang
   currentTargetLang.value = captionControl.targetLang
   currentEngine.value = captionControl.engine
+  currentAudio.value = captionControl.audio
   currentTranslation.value = captionControl.translation
 
   currentCustomized.value = captionControl.customized
diff --git a/src/renderer/src/stores/captionControl.ts b/src/renderer/src/stores/captionControl.ts
index 4db7dec..93cfb17 100644
--- a/src/renderer/src/stores/captionControl.ts
+++ b/src/renderer/src/stores/captionControl.ts
@@ -16,11 +16,23 @@ export const useCaptionControlStore = defineStore('captionControl', () => {
       ]
     },
   ])
+  const audioType = ref([
+    {
+      value: 0,
+      label: '系统音频输出（扬声器）'
+    },
+    {
+      value: 1,
+      label: '系统音频输入（麦克风）'
+    }
+  ])
+
   const engineEnabled = ref(false)
 
   const sourceLang = ref<string>('en')
   const targetLang = ref<string>('zh')
   const engine = ref<string>('gummy')
+  const audio = ref<0 | 1>(0)
   const translation = ref<boolean>(true)
   const customized = ref<boolean>(false)
   const customizedApp = ref<string>('')
@@ -34,6 +46,7 @@ export const useCaptionControlStore = defineStore('captionControl', () => {
       sourceLang: sourceLang.value,
       targetLang: targetLang.value,
       engine: engine.value,
+      audio: audio.value,
       translation: translation.value,
       customized: customized.value,
       customizedApp: customizedApp.value,
@@ -54,6 +67,7 @@ export const useCaptionControlStore = defineStore('captionControl', () => {
     sourceLang.value = controls.sourceLang
     targetLang.value = controls.targetLang
     engine.value = controls.engine
+    audio.value = controls.audio
     translation.value = controls.translation
     customized.value = controls.customized
     customizedApp.value = controls.customizedApp
@@ -73,7 +87,8 @@ export const useCaptionControlStore = defineStore('captionControl', () => {
     engineEnabled.value = true
     notification.open({
       message: '字幕引擎启动',
-      description: `原语言：${sourceLang.value}，是否翻译：${translation.value?'是':'否'}` + 
+      description: `原语言：${sourceLang.value}，是否翻译：${translation.value?'是':'否'}，` + 
+        `字幕引擎：${engine.value}，音频类型：${audio.value ? '输入音频' : '输出音频'}` +
         (translation.value ? `，翻译语言：${targetLang.value}` : '')
     });
   })
@@ -88,10 +103,12 @@ export const useCaptionControlStore = defineStore('captionControl', () => {
 
   return {
     captionEngine,      // 字幕引擎
+    audioType,          // 音频类型
     engineEnabled,      // 字幕引擎是否启用
     sourceLang,         // 源语言
     targetLang,         // 目标语言
     engine,             // 字幕引擎
+    audio,              // 选择音频
     translation,        // 是否启用翻译
     customized,         // 是否使用自定义字幕引擎
     customizedApp,      // 自定义字幕引擎的应用程序