diff --git a/.gitignore b/.gitignore
index 75d2a08..0dd50b6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,5 +7,6 @@ out
 __pycache__
 subenv
 caption-engine/build
+caption-engine/models
 output.wav
-.venv
\ No newline at end of file
+.venv
diff --git a/README.md b/README.md
index 36c77c2..1ccfe38 100644
--- a/README.md
+++ b/README.md
@@ -2,11 +2,13 @@
     <img src="./build/icon.png" width="100px" height="100px"/>
     <h1 align="center">auto-caption</h1>
     <p>Auto Caption 是一个跨平台的实时字幕显示软件。</p>
-    <img src="https://img.shields.io/badge/version-0.3.0-blue">
-    <img src="https://img.shields.io/github/issues/HiMeditator/auto-caption?color=orange">
-    <img src="https://img.shields.io/github/languages/top/HiMeditator/auto-caption?color=royalblue">
-    <img src="https://img.shields.io/github/repo-size/HiMeditator/auto-caption?color=green">
-    <img src="https://visitor-badge.laobi.icu/badge?page_id=himeditator.auto-caption">
+    <p>
+      <img src="https://img.shields.io/badge/version-0.3.0-blue">
+      <img src="https://img.shields.io/github/issues/HiMeditator/auto-caption?color=orange">
+      <img src="https://img.shields.io/github/languages/top/HiMeditator/auto-caption?color=royalblue">
+      <img src="https://img.shields.io/github/repo-size/HiMeditator/auto-caption?color=green">
+      <img src="https://visitor-badge.laobi.icu/badge?page_id=himeditator.auto-caption">
+    </p>
     <p>
         | <b>简体中文</b>
         | <a href="./README_en.md">English</a>
diff --git a/README_en.md b/README_en.md
index a1e302d..7c3be90 100644
--- a/README_en.md
+++ b/README_en.md
@@ -2,11 +2,13 @@
     <img src="./build/icon.png" width="100px" height="100px"/>
     <h1 align="center">auto-caption</h1>
     <p>Auto Caption is a cross-platform real-time caption display software.</p>
-    <img src="https://img.shields.io/badge/version-0.3.0-blue">
-    <img src="https://img.shields.io/github/issues/HiMeditator/auto-caption?color=orange">
-    <img src="https://img.shields.io/github/languages/top/HiMeditator/auto-caption?color=royalblue">
-    <img src="https://img.shields.io/github/repo-size/HiMeditator/auto-caption?color=green">
-    <img src="https://visitor-badge.laobi.icu/badge?page_id=himeditator.auto-caption">
+    <p>
+      <img src="https://img.shields.io/badge/version-0.3.0-blue">
+      <img src="https://img.shields.io/github/issues/HiMeditator/auto-caption?color=orange">
+      <img src="https://img.shields.io/github/languages/top/HiMeditator/auto-caption?color=royalblue">
+      <img src="https://img.shields.io/github/repo-size/HiMeditator/auto-caption?color=green">
+      <img src="https://visitor-badge.laobi.icu/badge?page_id=himeditator.auto-caption">
+    </p>
     <p>
         | <a href="./README.md">简体中文</a>
         | <b>English</b>
diff --git a/README_ja.md b/README_ja.md
index d5ed76a..e0a9431 100644
--- a/README_ja.md
+++ b/README_ja.md
@@ -2,11 +2,13 @@
     <img src="./build/icon.png" width="100px" height="100px"/>
     <h1 align="center">auto-caption</h1>
     <p>Auto Caption はクロスプラットフォームのリアルタイム字幕表示ソフトウェアです。</p>
-    <img src="https://img.shields.io/badge/version-0.3.0-blue">
-    <img src="https://img.shields.io/github/issues/HiMeditator/auto-caption?color=orange">
-    <img src="https://img.shields.io/github/languages/top/HiMeditator/auto-caption?color=royalblue">
-    <img src="https://img.shields.io/github/repo-size/HiMeditator/auto-caption?color=green">
-    <img src="https://visitor-badge.laobi.icu/badge?page_id=himeditator.auto-caption">
+    <p>
+      <img src="https://img.shields.io/badge/version-0.3.0-blue">
+      <img src="https://img.shields.io/github/issues/HiMeditator/auto-caption?color=orange">
+      <img src="https://img.shields.io/github/languages/top/HiMeditator/auto-caption?color=royalblue">
+      <img src="https://img.shields.io/github/repo-size/HiMeditator/auto-caption?color=green">
+      <img src="https://visitor-badge.laobi.icu/badge?page_id=himeditator.auto-caption">
+    </p>
     <p>
         | <a href="./README.md">简体中文</a>
         | <a href="./README_en.md">English</a>
diff --git a/caption-engine/audioprcs/process.py b/caption-engine/audioprcs/process.py
index ff9c61a..650081b 100644
--- a/caption-engine/audioprcs/process.py
+++ b/caption-engine/audioprcs/process.py
@@ -47,3 +47,22 @@ def resampleRawChunk(chunk, channels, orig_sr, target_sr, mode="sinc_best"):
     chunk_mono_r =  samplerate.resample(chunk_mono, ratio, converter_type=mode)
     chunk_mono_r = np.round(chunk_mono_r).astype(np.int16)
     return chunk_mono_r.tobytes()
+
+def resampleMonoChunk(chunk, orig_sr, target_sr, mode="sinc_best"):
+    """
+    将当前单通道进行重采样
+
+    Args:
+        chunk: (bytes)单通道音频数据块
+        orig_sr: 原始采样率
+        target_sr: 目标采样率
+        mode: 重采样模式，可选：'sinc_best' | 'sinc_medium' | 'sinc_fastest' | 'zero_order_hold' | 'linear'
+
+    Return:
+        (bytes)单通道音频数据块
+    """
+    chunk_np = np.frombuffer(chunk, dtype=np.int16)
+    ratio = target_sr / orig_sr
+    chunk_r =  samplerate.resample(chunk_np, ratio, converter_type=mode)
+    chunk_r = np.round(chunk_r).astype(np.int16)
+    return chunk_r.tobytes()
diff --git a/caption-engine/main-vosk.py b/caption-engine/main-vosk.py
new file mode 100644
index 0000000..cf407f7
--- /dev/null
+++ b/caption-engine/main-vosk.py
@@ -0,0 +1,83 @@
+import sys
+import json
+import argparse
+from datetime import datetime
+import numpy.core.multiarray
+
+if sys.platform == 'win32':
+    from sysaudio.win import AudioStream
+elif sys.platform == 'darwin':
+    from sysaudio.darwin import AudioStream
+elif sys.platform == 'linux':
+    from sysaudio.linux import AudioStream
+else:
+    raise NotImplementedError(f"Unsupported platform: {sys.platform}")
+
+from vosk import Model, KaldiRecognizer, SetLogLevel
+from audioprcs import resampleRawChunk
+
+SetLogLevel(-1)
+
+def convert_audio_to_text(audio_type, chunk_rate, model_path):
+    sys.stdout.reconfigure(line_buffering=True) # type: ignore
+
+    if model_path.startswith('"'):
+        model_path = model_path[1:]
+    if model_path.endswith('"'):
+        model_path = model_path[:-1]
+
+    model = Model(model_path)
+    recognizer = KaldiRecognizer(model, 16000)
+
+    stream = AudioStream(audio_type, chunk_rate)
+    stream.openStream()
+
+    time_str = ''
+    cur_id = 0
+    prev_content = ''
+
+    while True:
+        chunk = stream.read_chunk()
+        chunk_mono = resampleRawChunk(chunk, stream.CHANNELS, stream.RATE, 16000)
+
+        caption = {}
+        if recognizer.AcceptWaveform(chunk_mono):
+            content = json.loads(recognizer.Result()).get('text', '')
+            caption['index'] = cur_id
+            caption['text'] = content
+            caption['time_s'] = time_str
+            caption['time_t'] = datetime.now().strftime('%H:%M:%S.%f')[:-3]
+            caption['translation'] = ''
+            prev_content = ''
+            cur_id += 1
+        else:
+            content = json.loads(recognizer.PartialResult()).get('partial', '')
+            if content == '' or content == prev_content:
+                continue
+            if prev_content == '':
+                time_str = datetime.now().strftime('%H:%M:%S.%f')[:-3]
+            caption['index'] = cur_id
+            caption['text'] = content
+            caption['time_s'] = time_str
+            caption['time_t'] = datetime.now().strftime('%H:%M:%S.%f')[:-3]
+            caption['translation'] = ''
+            prev_content = content
+        try:
+            json_str = json.dumps(caption) + '\n'
+            sys.stdout.write(json_str)
+            sys.stdout.flush()
+        except Exception as e:
+            print(e)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Convert system audio stream to text')
+    parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output audio stream, 1 for input audio stream')
+    parser.add_argument('-c', '--chunk_rate', default=20, help='The number of audio stream chunks collected per second.')
+    parser.add_argument('-m', '--model_path', default='', help='The path to the vosk model.')
+    args = parser.parse_args()
+    convert_audio_to_text(
+        int(args.audio_type),
+        int(args.chunk_rate),
+        args.model_path
+    )
diff --git a/caption-engine/main-vosk.spec b/caption-engine/main-vosk.spec
new file mode 100644
index 0000000..aab7f83
--- /dev/null
+++ b/caption-engine/main-vosk.spec
@@ -0,0 +1,42 @@
+# -*- mode: python ; coding: utf-8 -*-
+
+from pathlib import Path
+
+vosk_path = str(Path('./subenv/Lib/site-packages/vosk').resolve())
+
+a = Analysis(
+    ['main-vosk.py'],
+    pathex=[],
+    binaries=[],
+    datas=[(vosk_path, 'vosk')],
+    hiddenimports=[],
+    hookspath=[],
+    hooksconfig={},
+    runtime_hooks=[],
+    excludes=[],
+    noarchive=False,
+    optimize=0,
+)
+
+pyz = PYZ(a.pure)
+
+exe = EXE(
+    pyz,
+    a.scripts,
+    a.binaries,
+    a.datas,
+    [],
+    name='main-vosk',
+    debug=False,
+    bootloader_ignore_signals=False,
+    strip=False,
+    upx=True,
+    upx_exclude=[],
+    runtime_tmpdir=None,
+    console=True,
+    disable_windowed_traceback=False,
+    argv_emulation=False,
+    target_arch=None,
+    codesign_identity=None,
+    entitlements_file=None,
+)
diff --git a/caption-engine/requirements.txt b/caption-engine/requirements.txt
index 981c2fb..0f168fb 100644
--- a/caption-engine/requirements.txt
+++ b/caption-engine/requirements.txt
@@ -3,4 +3,5 @@ numpy
 samplerate
 PyAudio
 PyAudioWPatch # Windows only
+vosk
 pyinstaller
diff --git a/caption-engine/sysaudio/win.py b/caption-engine/sysaudio/win.py
index 5db1d9d..c6765ce 100644
--- a/caption-engine/sysaudio/win.py
+++ b/caption-engine/sysaudio/win.py
@@ -57,7 +57,7 @@ class AudioStream:
         self.stream = None
         self.SAMP_WIDTH = pyaudio.get_sample_size(pyaudio.paInt16)
         self.FORMAT = pyaudio.paInt16
-        self.CHANNELS = self.device["maxInputChannels"]
+        self.CHANNELS = int(self.device["maxInputChannels"])
         self.RATE = int(self.device["defaultSampleRate"])
         self.CHUNK = self.RATE // chunk_rate
         self.INDEX = self.device["index"]
diff --git a/electron-builder.yml b/electron-builder.yml
index a80df13..c3efa72 100644
--- a/electron-builder.yml
+++ b/electron-builder.yml
@@ -10,8 +10,15 @@ files:
   - '!{.env,.env.*,.npmrc,pnpm-lock.yaml}'
   - '!{tsconfig.json,tsconfig.node.json,tsconfig.web.json}'
 extraResources:
-  from: ./caption-engine/dist/main-gummy.exe
-  to: ./caption-engine/main-gummy.exe
+  - from: ./caption-engine/dist/main-gummy.exe
+    to: ./caption-engine/main-gummy.exe
+  - from: ./caption-engine/dist/main-vosk.exe
+    to: ./caption-engine/main-vosk.exe
+  # For macOS and Linux
+  # - from: ./caption-engine/dist/main-gummy
+  #   to: ./caption-engine/main-gummy
+  # - from: ./caption-engine/dist/main-vosk
+  #   to: ./caption-engine/main-vosk
 asarUnpack:
   - resources/**
 win:
diff --git a/engine-test/vosk.ipynb b/engine-test/vosk.ipynb
new file mode 100644
index 0000000..6238a68
--- /dev/null
+++ b/engine-test/vosk.ipynb
@@ -0,0 +1,124 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "6fb12704",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "d:\\Projects\\auto-caption\\caption-engine\\subenv\\Lib\\site-packages\\vosk\\__init__.py\n"
+     ]
+    }
+   ],
+   "source": [
+    "import vosk\n",
+    "print(vosk.__file__)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "63a06f5c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "        采样设备：\n",
+      "            - 设备类型：音频输入\n",
+      "            - 序号：1\n",
+      "            - 名称：麦克风阵列 (Realtek(R) Audio)\n",
+      "            - 最大输入通道数：2\n",
+      "            - 默认低输入延迟：0.09s\n",
+      "            - 默认高输入延迟：0.18s\n",
+      "            - 默认采样率：44100.0Hz\n",
+      "            - 是否回环设备：False\n",
+      "\n",
+      "        音频样本块大小：2205\n",
+      "        样本位宽：2\n",
+      "        采样格式：8\n",
+      "        音频通道数：2\n",
+      "        音频采样率：44100\n",
+      "        \n"
+     ]
+    }
+   ],
+   "source": [
+    "import sys\n",
+    "import os\n",
+    "import json\n",
+    "from vosk import Model, KaldiRecognizer\n",
+    "\n",
+    "current_dir = os.getcwd() \n",
+    "sys.path.append(os.path.join(current_dir, '../caption-engine'))\n",
+    "\n",
+    "from sysaudio.win import AudioStream\n",
+    "from audioprcs import resampleRawChunk, mergeChunkChannels\n",
+    "\n",
+    "stream = AudioStream(1)\n",
+    "stream.printInfo()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "5d5a0afa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = Model(os.path.join(\n",
+    "    current_dir,\n",
+    "    '../caption-engine/models/vosk-model-small-cn-0.22'\n",
+    "))\n",
+    "recognizer = KaldiRecognizer(model, 16000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7e9d1530",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stream.openStream()\n",
+    "\n",
+    "for i in range(200):\n",
+    "    chunk = stream.read_chunk()\n",
+    "    chunk_mono = resampleRawChunk(chunk, stream.CHANNELS, stream.RATE, 16000)\n",
+    "    if recognizer.AcceptWaveform(chunk_mono):\n",
+    "        result = json.loads(recognizer.Result())\n",
+    "        print(\"acc:\", result.get(\"text\", \"\"))\n",
+    "    else:\n",
+    "        partial = json.loads(recognizer.PartialResult())\n",
+    "        print(\"else:\", partial.get(\"partial\", \"\"))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "subenv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/src/main/types/index.ts b/src/main/types/index.ts
index 9bdb881..59ca5a4 100644
--- a/src/main/types/index.ts
+++ b/src/main/types/index.ts
@@ -6,10 +6,11 @@ export interface Controls {
   engineEnabled: boolean,
   sourceLang: string,
   targetLang: string,
-  engine: 'gummy',
+  engine: string,
   audio: 0 | 1,
   translation: boolean,
   API_KEY: string,
+  modelPath: string,
   customized: boolean,
   customizedApp: string,
   customizedCommand: string
diff --git a/src/main/utils/AllConfig.ts b/src/main/utils/AllConfig.ts
index 8d16045..d3318ab 100644
--- a/src/main/utils/AllConfig.ts
+++ b/src/main/utils/AllConfig.ts
@@ -34,6 +34,7 @@ const defaultControls: Controls = {
   audio: 0,
   engineEnabled: false,
   API_KEY: '',
+  modelPath: '',
   translation: true,
   customized: false,
   customizedApp: '',
diff --git a/src/main/utils/CaptionEngine.ts b/src/main/utils/CaptionEngine.ts
index 6ca6595..c254de9 100644
--- a/src/main/utils/CaptionEngine.ts
+++ b/src/main/utils/CaptionEngine.ts
@@ -13,26 +13,20 @@ export class CaptionEngine {
   processStatus: 'running' | 'stopping' | 'stopped' = 'stopped'
 
   private getApp(): boolean {
+    allConfig.controls.customized = false
     if (allConfig.controls.customized && allConfig.controls.customizedApp) {
       this.appPath = allConfig.controls.customizedApp
       this.command = [allConfig.controls.customizedCommand]
+      allConfig.controls.customized = true
     }
     else if (allConfig.controls.engine === 'gummy') {
-      allConfig.controls.customized = false
       if(!allConfig.controls.API_KEY && !process.env.DASHSCOPE_API_KEY) {
         controlWindow.sendErrorMessage(i18n('gummy.key.missing'))
         return false
       }
-      let gummyName = ''
+      let gummyName = 'main-gummy'
       if (process.platform === 'win32') {
-        gummyName = 'main-gummy.exe'
-      }
-      else if (process.platform === 'darwin' || process.platform === 'linux') {
-        gummyName = 'main-gummy'
-      }
-      else {
-        controlWindow.sendErrorMessage(i18n('platform.unsupported') + process.platform)
-        throw new Error(i18n('platform.unsupported'))
+        gummyName += '.exe'
       }
       if (is.dev) {
         this.appPath = path.join(
@@ -55,10 +49,29 @@ export class CaptionEngine {
       if(allConfig.controls.API_KEY) {
         this.command.push('-k', allConfig.controls.API_KEY)
       }
-
-      console.log('[INFO] Engine Path:', this.appPath)
-      console.log('[INFO] Engine Command:', this.command)
     }
+    else if(allConfig.controls.engine === 'vosk'){
+      let voskName = 'main-vosk'
+      if (process.platform === 'win32') {
+        voskName += '.exe'
+      }
+      if (is.dev) {
+        this.appPath = path.join(
+          app.getAppPath(),
+          'caption-engine', 'dist', voskName
+        )
+      }
+      else {
+        this.appPath = path.join(
+          process.resourcesPath, 'caption-engine', voskName
+        )
+      }
+      this.command = []
+      this.command.push('-a', allConfig.controls.audio ? '1' : '0')
+      this.command.push('-m', `"${allConfig.controls.modelPath}"`)
+    }
+    console.log('[INFO] Engine Path:', this.appPath)
+    console.log('[INFO] Engine Command:', this.command)
     return true
   }
 
diff --git a/src/renderer/src/components/EngineControl.vue b/src/renderer/src/components/EngineControl.vue
index b459f46..06b85cf 100644
--- a/src/renderer/src/components/EngineControl.vue
+++ b/src/renderer/src/components/EngineControl.vue
@@ -16,6 +16,7 @@
     <div class="input-item">
       <span class="input-label">{{ $t('engine.transLang') }}</span>
       <a-select
+        :disabled="currentEngine === 'vosk'"
         class="input-area"
         v-model:value="currentTargetLang"
         :options="langList.filter((item) => item.value !== 'auto')"
@@ -47,7 +48,8 @@
         <a-switch v-model:checked="showMore" />
       </div>
     </div>
-    <a-card size="small" :title="$t('engine.custom.title')" v-show="showMore">
+
+    <a-card size="small" :title="$t('engine.showMore')" v-show="showMore">
       <div class="input-item">
         <span class="input-label">{{ $t('engine.apikey') }}</span>
         <a-input
@@ -56,6 +58,13 @@
           v-model:value="currentAPI_KEY"
         />
       </div>
+      <div class="input-item">
+        <span class="input-label">{{ $t('engine.modelPath') }}</span>
+        <a-input
+          class="input-area"
+          v-model:value="currentModelPath"
+        />
+      </div>
       <div class="input-item">
         <span style="margin-right:5px;">{{ $t('engine.customEngine') }}</span>
         <a-switch v-model:checked="currentCustomized" />
@@ -85,9 +94,8 @@
             ></a-input>
           </div>
         </a-card>
-      </div>      
+      </div>
     </a-card>
-
   </a-card>
   <div style="height: 20px;"></div>
 </template>
@@ -95,6 +103,7 @@
 <script setup lang="ts">
 import { ref, computed, watch } from 'vue'
 import { storeToRefs } from 'pinia'
+import { useGeneralSettingStore } from '@renderer/stores/generalSetting'
 import { useEngineControlStore } from '@renderer/stores/engineControl'
 import { notification } from 'ant-design-vue'
 import { InfoCircleOutlined } from '@ant-design/icons-vue';
@@ -108,10 +117,11 @@ const { platform, captionEngine, audioType, changeSignal } = storeToRefs(engineC
 
 const currentSourceLang = ref('auto')
 const currentTargetLang = ref('zh')
-const currentEngine = ref<'gummy'>('gummy')
+const currentEngine = ref<string>('gummy')
 const currentAudio = ref<0 | 1>(0)
 const currentTranslation = ref<boolean>(false)
 const currentAPI_KEY = ref<string>('')
+const currentModelPath = ref<string>('')
 const currentCustomized = ref<boolean>(false)
 const currentCustomizedApp = ref('')
 const currentCustomizedCommand = ref('')
@@ -132,6 +142,7 @@ function applyChange(){
   engineControl.audio = currentAudio.value
   engineControl.translation = currentTranslation.value
   engineControl.API_KEY = currentAPI_KEY.value
+  engineControl.modelPath = currentModelPath.value
   engineControl.customized = currentCustomized.value
   engineControl.customizedApp = currentCustomizedApp.value
   engineControl.customizedCommand = currentCustomizedCommand.value
@@ -151,6 +162,7 @@ function cancelChange(){
   currentAudio.value = engineControl.audio
   currentTranslation.value = engineControl.translation
   currentAPI_KEY.value = engineControl.API_KEY
+  currentModelPath.value = engineControl.modelPath
   currentCustomized.value = engineControl.customized
   currentCustomizedApp.value = engineControl.customizedApp
   currentCustomizedCommand.value = engineControl.customizedCommand
@@ -162,6 +174,17 @@ watch(changeSignal, (val) => {
     engineControl.changeSignal = false;
   }
 })
+
+watch(currentEngine, (val) => {
+  if(val == 'vosk'){
+    currentSourceLang.value = 'auto'
+    currentTargetLang.value = ''
+  }
+  else if(val == 'gummy'){
+    currentSourceLang.value = 'auto'
+    currentTargetLang.value = useGeneralSettingStore().uiLanguage
+  }
+})
 </script>
 
 <style scoped>
diff --git a/src/renderer/src/i18n/config/engine.ts b/src/renderer/src/i18n/config/engine.ts
index d9cf655..f086455 100644
--- a/src/renderer/src/i18n/config/engine.ts
+++ b/src/renderer/src/i18n/config/engine.ts
@@ -16,6 +16,13 @@ export const engines = {
         { value: 'it', label: '意大利语' },
       ]
     },
+    {
+      value: 'vosk',
+      label: '本地 -  Vosk',
+      languages: [
+        { value: 'auto', label: '需要自行配置模型' },
+      ]
+    }
   ],
   en: [
     {
@@ -34,6 +41,13 @@ export const engines = {
         { value: 'it', label: 'Italian' },
       ]
     },
+    {
+      value: 'vosk',
+      label: 'Local - Vosk',
+      languages: [
+        { value: 'auto', label: 'Model needs to be configured manually' },
+      ]
+    }
   ],
   ja: [
     {
@@ -52,6 +66,13 @@ export const engines = {
         { value: 'it', label: 'イタリア語' },
       ]
     },
+    {
+      value: 'vosk',
+      label: 'ローカル - Vosk',
+      languages: [
+        { value: 'auto', label: 'モデルを手動で設定する必要があります' },
+      ]
+    }
   ]
 }
 
diff --git a/src/renderer/src/i18n/lang/zh.ts b/src/renderer/src/i18n/lang/zh.ts
index 0561cc5..580b557 100644
--- a/src/renderer/src/i18n/lang/zh.ts
+++ b/src/renderer/src/i18n/lang/zh.ts
@@ -48,6 +48,7 @@ export default {
     "enableTranslation": "启用翻译",
     "showMore": "更多设置",
     "apikey": "API KEY",
+    "modelPath": "模型路径",
     "customEngine": "自定义引擎",
     custom: {
       "title": "自定义字幕引擎",
diff --git a/src/renderer/src/stores/engineControl.ts b/src/renderer/src/stores/engineControl.ts
index 17e62ce..f5ea7dc 100644
--- a/src/renderer/src/stores/engineControl.ts
+++ b/src/renderer/src/stores/engineControl.ts
@@ -16,13 +16,14 @@ export const useEngineControlStore = defineStore('engineControl', () => {
 
   const captionEngine = ref(engines[useGeneralSettingStore().uiLanguage])
   const audioType = ref(audioTypes[useGeneralSettingStore().uiLanguage])
-  const API_KEY = ref<string>('')
   const engineEnabled = ref(false)
   const sourceLang = ref<string>('en')
   const targetLang = ref<string>('zh')
-  const engine = ref<'gummy'>('gummy')
+  const engine = ref<string>('gummy')
   const audio = ref<0 | 1>(0)
   const translation = ref<boolean>(true)
+  const API_KEY = ref<string>('')
+  const modelPath = ref<string>('')
   const customized = ref<boolean>(false)
   const customizedApp = ref<string>('')
   const customizedCommand = ref<string>('')
@@ -38,6 +39,7 @@ export const useEngineControlStore = defineStore('engineControl', () => {
       audio: audio.value,
       translation: translation.value,
       API_KEY: API_KEY.value,
+      modelPath: modelPath.value,
       customized: customized.value,
       customizedApp: customizedApp.value,
       customizedCommand: customizedCommand.value
@@ -53,6 +55,7 @@ export const useEngineControlStore = defineStore('engineControl', () => {
     engineEnabled.value = controls.engineEnabled
     translation.value = controls.translation
     API_KEY.value = controls.API_KEY
+    modelPath.value = controls.modelPath
     customized.value = controls.customized
     customizedApp.value = controls.customizedApp
     customizedCommand.value = controls.customizedCommand
@@ -102,7 +105,7 @@ export const useEngineControlStore = defineStore('engineControl', () => {
 
   return {
     platform,           // 系统平台
-    captionEngine,      // 字幕引擎
+    captionEngine,      // 字幕引擎列表
     audioType,          // 音频类型
     engineEnabled,      // 字幕引擎是否启用
     sourceLang,         // 源语言
@@ -111,6 +114,7 @@ export const useEngineControlStore = defineStore('engineControl', () => {
     audio,              // 选择音频
     translation,        // 是否启用翻译
     API_KEY,            // API KEY
+    modelPath,          // vosk 模型路径
     customized,         // 是否使用自定义字幕引擎
     customizedApp,      // 自定义字幕引擎的应用程序
     customizedCommand,  // 自定义字幕引擎的命令
diff --git a/src/renderer/src/types/index.ts b/src/renderer/src/types/index.ts
index 9bdb881..59ca5a4 100644
--- a/src/renderer/src/types/index.ts
+++ b/src/renderer/src/types/index.ts
@@ -6,10 +6,11 @@ export interface Controls {
   engineEnabled: boolean,
   sourceLang: string,
   targetLang: string,
-  engine: 'gummy',
+  engine: string,
   audio: 0 | 1,
   translation: boolean,
   API_KEY: string,
+  modelPath: string,
   customized: boolean,
   customizedApp: string,
   customizedCommand: string