From 8858189bf68be75b48a1482a58a234aa75485620 Mon Sep 17 00:00:00 2001
From: himeditator <hironin@foxmail.com>
Date: Sun, 15 Jun 2025 12:43:57 +0800
Subject: [PATCH] =?UTF-8?q?feat(python-subprocess):=20=E5=B0=9D=E8=AF=95?=
 =?UTF-8?q?=E5=AD=97=E5=B9=95=E6=98=BE=E7=A4=BA=E6=96=B0=E8=A7=A3=E5=86=B3?=
 =?UTF-8?q?=E6=96=B9=E6=A1=88?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 使用 python 子进程解析字幕
- 通过 websocket 通信将字幕传递给软件
---
 package-lock.json                             |  35 ++-
 package.json                                  |   4 +-
 python-prototype/webscoket.ipynb              |  66 ++++++
 python-prototype/wstest.py                    |  21 ++
 python-subprocess/README.md                   |   8 +-
 python-subprocess/audio2caption.py            | 223 ++++++++++++++++++
 python-subprocess/audio2text/gummy.py         |  23 +-
 python-subprocess/main.py                     |  50 +---
 python-subprocess/sysaudio/win.py             |  85 ++++++-
 src/main/index.ts                             |  12 +-
 src/main/pyComm.ts                            |  52 ++++
 src/renderer/caption/index.html               |  17 ++
 src/renderer/caption/src/App.vue              |   6 +
 src/renderer/caption/src/assets/Template.vue  |  11 +
 .../caption/src/assets/styles/reset.css       |   5 +
 src/renderer/caption/src/env.d.ts             |   1 +
 src/renderer/caption/src/main.ts              |   8 +
 src/renderer/caption/src/stores/caption.ts    |   9 +
 18 files changed, 572 insertions(+), 64 deletions(-)
 create mode 100644 python-prototype/webscoket.ipynb
 create mode 100644 python-prototype/wstest.py
 create mode 100644 python-subprocess/audio2caption.py
 create mode 100644 src/main/pyComm.ts
 create mode 100644 src/renderer/caption/index.html
 create mode 100644 src/renderer/caption/src/App.vue
 create mode 100644 src/renderer/caption/src/assets/Template.vue
 create mode 100644 src/renderer/caption/src/assets/styles/reset.css
 create mode 100644 src/renderer/caption/src/env.d.ts
 create mode 100644 src/renderer/caption/src/main.ts
 create mode 100644 src/renderer/caption/src/stores/caption.ts

diff --git a/package-lock.json b/package-lock.json
index d17b239..a6ac7c7 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -11,13 +11,15 @@
       "dependencies": {
         "@electron-toolkit/preload": "^3.0.1",
         "@electron-toolkit/utils": "^4.0.0",
-        "pinia": "^3.0.2"
+        "pinia": "^3.0.2",
+        "ws": "^8.18.2"
       },
       "devDependencies": {
         "@electron-toolkit/eslint-config-prettier": "3.0.0",
         "@electron-toolkit/eslint-config-ts": "^3.0.0",
         "@electron-toolkit/tsconfig": "^1.0.1",
         "@types/node": "^22.14.1",
+        "@types/ws": "^8.18.1",
         "@vitejs/plugin-vue": "^5.2.3",
         "electron": "^35.1.5",
         "electron-builder": "^25.1.8",
@@ -2209,6 +2211,16 @@
       "license": "MIT",
       "optional": true
     },
+    "node_modules/@types/ws": {
+      "version": "8.18.1",
+      "resolved": "https://registry.npmmirror.com/@types/ws/-/ws-8.18.1.tgz",
+      "integrity": "sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@types/node": "*"
+      }
+    },
     "node_modules/@types/yauzl": {
       "version": "2.10.3",
       "resolved": "https://registry.npmmirror.com/@types/yauzl/-/yauzl-2.10.3.tgz",
@@ -9300,6 +9312,27 @@
       "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
       "license": "ISC"
     },
+    "node_modules/ws": {
+      "version": "8.18.2",
+      "resolved": "https://registry.npmmirror.com/ws/-/ws-8.18.2.tgz",
+      "integrity": "sha512-DMricUmwGZUVr++AEAe2uiVM7UoO9MAVZMDu05UQOaUII0lp+zOzLLU4Xqh/JvTqklB1T4uELaaPBKyjE1r4fQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=10.0.0"
+      },
+      "peerDependencies": {
+        "bufferutil": "^4.0.1",
+        "utf-8-validate": ">=5.0.2"
+      },
+      "peerDependenciesMeta": {
+        "bufferutil": {
+          "optional": true
+        },
+        "utf-8-validate": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/xml-name-validator": {
       "version": "4.0.0",
       "resolved": "https://registry.npmmirror.com/xml-name-validator/-/xml-name-validator-4.0.0.tgz",
diff --git a/package.json b/package.json
index 7c189f6..e533d54 100644
--- a/package.json
+++ b/package.json
@@ -23,13 +23,15 @@
   "dependencies": {
     "@electron-toolkit/preload": "^3.0.1",
     "@electron-toolkit/utils": "^4.0.0",
-    "pinia": "^3.0.2"
+    "pinia": "^3.0.2",
+    "ws": "^8.18.2"
   },
   "devDependencies": {
     "@electron-toolkit/eslint-config-prettier": "3.0.0",
     "@electron-toolkit/eslint-config-ts": "^3.0.0",
     "@electron-toolkit/tsconfig": "^1.0.1",
     "@types/node": "^22.14.1",
+    "@types/ws": "^8.18.1",
     "@vitejs/plugin-vue": "^5.2.3",
     "electron": "^35.1.5",
     "electron-builder": "^25.1.8",
diff --git a/python-prototype/webscoket.ipynb b/python-prototype/webscoket.ipynb
new file mode 100644
index 0000000..62a1cbf
--- /dev/null
+++ b/python-prototype/webscoket.ipynb
@@ -0,0 +1,66 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4604aefd",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Received: {\"message\":\"Electron Initialized\"}\n",
+      "Received: {\"command\":\"process_data\",\"payload\":{\"some\":\"data\"}}\n",
+      "Client disconnected\n",
+      "Received: {\"message\":\"Electron Initialized\"}\n",
+      "Received: {\"command\":\"process_data\",\"payload\":{\"some\":\"data\"}}\n",
+      "Client disconnected\n"
+     ]
+    }
+   ],
+   "source": [
+    "import asyncio\n",
+    "import websockets\n",
+    "import nest_asyncio\n",
+    "\n",
+    "# 应用补丁，允许在 Jupyter 中运行嵌套事件循环\n",
+    "nest_asyncio.apply()\n",
+    "\n",
+    "async def handle_client(websocket, path=\"/\"):\n",
+    "    try:\n",
+    "        async for message in websocket:\n",
+    "            print(f\"Received: {message}\")\n",
+    "            await websocket.send(f\"Echo: {message}\")\n",
+    "    except websockets.exceptions.ConnectionClosed:\n",
+    "        print(\"Client disconnected\")\n",
+    "\n",
+    "start_server = websockets.serve(handle_client, \"localhost\", 8765)\n",
+    "\n",
+    "asyncio.get_event_loop().run_until_complete(start_server)\n",
+    "asyncio.get_event_loop().run_forever()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "mystd",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/python-prototype/wstest.py b/python-prototype/wstest.py
new file mode 100644
index 0000000..9e786a6
--- /dev/null
+++ b/python-prototype/wstest.py
@@ -0,0 +1,21 @@
+import asyncio
+import websockets
+import json  # 导入 json 模块
+
+# WebSocket 服务器处理函数
+async def echo(websocket):
+    async for message in websocket:
+        print(f"收到客户端消息: {message}")
+        # 发送响应给客户端
+        response = {"respond": "Hello, Client!"}
+        await websocket.send(json.dumps(response))  # 将字典转换为 JSON 字符串
+        print(f"已发送响应: {response}")
+
+# 启动服务器
+async def main():
+    async with websockets.serve(echo, "localhost", 8765):
+        await asyncio.Future()  # 保持服务器运行
+
+if __name__ == "__main__":
+    print("WebSocket 服务器已启动，监听 ws://localhost:8765")
+    asyncio.run(main())
\ No newline at end of file
diff --git a/python-subprocess/README.md b/python-subprocess/README.md
index 7d84356..0bdd26b 100644
--- a/python-subprocess/README.md
+++ b/python-subprocess/README.md
@@ -2,7 +2,11 @@
 
 拟实现功能：
 
-- [ ] 可以获取 Windows 系统音频流
+- [x] 可以获取 Windows 系统音频流
+- [ ] 可以对音频流进行转换（调整声道数和采样率）
 - [ ] 可以获取 Linux 系统视频流
 - [ ] 添加字幕图形界面
-- [ ] 界面中可以实时显示当前系统音频对应的字幕
+  - [ ] 字幕显示
+  - [ ] 字幕样式设置
+  - [ ] 字幕页面删除标题栏
+- [ ] 界面中实时显示当前系统音频对应的字幕
diff --git a/python-subprocess/audio2caption.py b/python-subprocess/audio2caption.py
new file mode 100644
index 0000000..24afba3
--- /dev/null
+++ b/python-subprocess/audio2caption.py
@@ -0,0 +1,223 @@
+import pyaudiowpatch as pyaudio
+import numpy as np
+import tkinter as tk
+from tkinter import ttk
+from dashscope.audio.asr import (
+    TranslationRecognizerCallback,
+    TranslationRecognizerRealtime
+)
+import threading
+import queue
+
+class AudioCapture:
+    def __init__(self):
+        self.audio = pyaudio.PyAudio()
+        self.stream = None
+        self.is_running = False
+        self.setup_audio()
+
+    def setup_audio(self):
+        try:
+            wasapi_info = self.audio.get_host_api_info_by_type(pyaudio.paWASAPI)
+        except OSError:
+            raise Exception("WASAPI 不可用")
+
+        default_speaker = self.audio.get_device_info_by_index(wasapi_info["defaultOutputDevice"])
+        
+        if not default_speaker["isLoopbackDevice"]:
+            for loopback in self.audio.get_loopback_device_info_generator():
+                if default_speaker["name"] in loopback["name"]:
+                    default_speaker = loopback
+                    break
+            else:
+                raise Exception("未找到默认回环输出设备")
+
+        self.device_info = default_speaker
+        self.channels = default_speaker["maxInputChannels"]
+        self.rate = int(default_speaker["defaultSampleRate"])
+        self.chunk = self.rate // 10
+
+    def start_stream(self):
+        self.stream = self.audio.open(
+            format=pyaudio.paInt16,
+            channels=self.channels,
+            rate=self.rate,
+            input=True,
+            input_device_index=self.device_info["index"]
+        )
+        self.is_running = True
+
+    def stop_stream(self):
+        if self.stream:
+            self.is_running = False
+            self.stream.stop_stream()
+            self.stream.close()
+            self.audio.terminate()
+
+class CaptionCallback(TranslationRecognizerCallback):
+    def __init__(self, text_queue):
+        super().__init__()
+        self.text_queue = text_queue
+        self.usage = 0
+
+    def on_open(self) -> None:
+        self.text_queue.put(("status", "开始识别..."))
+
+    def on_close(self) -> None:
+        self.text_queue.put(("status", f"识别结束，消耗 Tokens: {self.usage}"))
+
+    def on_event(self, request_id, transcription_result, translation_result, usage) -> None:
+        if transcription_result is not None:
+            text = transcription_result.text
+            if transcription_result.stash is not None:
+                text += transcription_result.stash.text
+            self.text_queue.put(("caption", text))
+
+        if translation_result is not None:
+            lang = translation_result.get_language_list()[0]
+            text = translation_result.get_translation(lang).text
+            if translation_result.get_translation(lang).stash is not None:
+                text += translation_result.get_translation(lang).stash.text
+            self.text_queue.put(("translation", text))
+
+        if usage:
+            self.usage += usage['duration']
+
+class CaptionApp:
+    def __init__(self):
+        self.root = tk.Tk()
+        self.root.title("实时字幕")
+        self.root.geometry("800x400")
+        
+        self.setup_ui()
+        self.text_queue = queue.Queue()
+        self.audio_capture = AudioCapture()
+        self.translator = None
+        self.is_running = False
+        # 添加字幕缓存
+        self.caption_cache = []
+        self.translation_cache = []
+
+    def setup_ui(self):
+        # 状态标签
+        self.status_label = ttk.Label(self.root, text="就绪")
+        self.status_label.pack(pady=5)
+
+        # 字幕显示区域
+        self.caption_frame = ttk.Frame(self.root)
+        self.caption_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
+
+        # 创建两个标签用于显示字幕和翻译
+        self.caption_label1 = ttk.Label(self.caption_frame, text="", font=("Arial", 14))
+        self.caption_label1.pack(fill=tk.X, pady=5)
+        self.translation_label1 = ttk.Label(self.caption_frame, text="", font=("Arial", 14))
+        self.translation_label1.pack(fill=tk.X, pady=5)
+        
+        self.caption_label2 = ttk.Label(self.caption_frame, text="", font=("Arial", 14))
+        self.caption_label2.pack(fill=tk.X, pady=5)
+        self.translation_label2 = ttk.Label(self.caption_frame, text="", font=("Arial", 14))
+        self.translation_label2.pack(fill=tk.X, pady=5)
+
+        # 控制按钮
+        self.control_frame = ttk.Frame(self.root)
+        self.control_frame.pack(pady=10)
+
+        self.start_button = ttk.Button(self.control_frame, text="开始", command=self.start_caption)
+        self.start_button.pack(side=tk.LEFT, padx=5)
+
+        self.stop_button = ttk.Button(self.control_frame, text="停止", command=self.stop_caption, state=tk.DISABLED)
+        self.stop_button.pack(side=tk.LEFT, padx=5)
+
+    def start_caption(self):
+        self.is_running = True
+        self.start_button.config(state=tk.DISABLED)
+        self.stop_button.config(state=tk.NORMAL)
+        
+        # 初始化翻译器
+        self.translator = TranslationRecognizerRealtime(
+            model="gummy-realtime-v1",
+            format="pcm",
+            sample_rate=self.audio_capture.rate,
+            transcription_enabled=True,
+            translation_enabled=True,
+            source_language="ja",
+            translation_target_languages=["zh"],
+            callback=CaptionCallback(self.text_queue)
+        )
+        
+        # 启动音频捕获和翻译
+        self.audio_capture.start_stream()
+        self.translator.start()
+        
+        # 启动处理线程
+        threading.Thread(target=self.process_audio, daemon=True).start()
+        threading.Thread(target=self.update_ui, daemon=True).start()
+
+    def stop_caption(self):
+        self.is_running = False
+        self.start_button.config(state=tk.NORMAL)
+        self.stop_button.config(state=tk.DISABLED)
+        
+        if self.translator:
+            self.translator.stop()
+        self.audio_capture.stop_stream()
+
+    def process_audio(self):
+        while self.is_running:
+            try:
+                data = self.audio_capture.stream.read(self.audio_capture.chunk)
+                data_np = np.frombuffer(data, dtype=np.int16)
+                data_np_r = data_np.reshape(-1, self.audio_capture.channels)
+                mono_data = np.mean(data_np_r.astype(np.float32), axis=1)
+                mono_data = mono_data.astype(np.int16)
+                mono_data_bytes = mono_data.tobytes()
+                self.translator.send_audio_frame(mono_data_bytes)
+            except Exception as e:
+                self.text_queue.put(("error", str(e)))
+                break
+
+    def update_caption_display(self):
+        # 更新字幕显示
+        if len(self.caption_cache) > 0:
+            self.caption_label1.config(text=self.caption_cache[-1])
+        if len(self.caption_cache) > 1:
+            self.caption_label2.config(text=self.caption_cache[-2])
+        else:
+            self.caption_label2.config(text="")
+
+        # 更新翻译显示
+        if len(self.translation_cache) > 0:
+            self.translation_label1.config(text=f"翻译: {self.translation_cache[-1]}")
+        if len(self.translation_cache) > 1:
+            self.translation_label2.config(text=f"翻译: {self.translation_cache[-2]}")
+        else:
+            self.translation_label2.config(text="")
+
+    def update_ui(self):
+        while self.is_running:
+            try:
+                msg_type, text = self.text_queue.get(timeout=0.1)
+                if msg_type == "status":
+                    self.status_label.config(text=text)
+                elif msg_type == "caption":
+                    self.caption_cache.append(text)
+                    if len(self.caption_cache) > 2:
+                        self.caption_cache.pop(0)
+                    self.update_caption_display()
+                elif msg_type == "translation":
+                    self.translation_cache.append(text)
+                    if len(self.translation_cache) > 2:
+                        self.translation_cache.pop(0)
+                    self.update_caption_display()
+                elif msg_type == "error":
+                    self.status_label.config(text=f"错误: {text}")
+                    self.stop_caption()
+            except queue.Empty:
+                continue
+
+    def run(self):
+        self.root.mainloop()
+
+if __name__ == "__main__":
+    app = CaptionApp()
+    app.run() 
\ No newline at end of file
diff --git a/python-subprocess/audio2text/gummy.py b/python-subprocess/audio2text/gummy.py
index 8300846..3f2078d 100644
--- a/python-subprocess/audio2text/gummy.py
+++ b/python-subprocess/audio2text/gummy.py
@@ -1,8 +1,9 @@
-from dashscope.audio.asr import \
-    TranslationRecognizerCallback, \
-    TranscriptionResult, \
-    TranslationResult, \
-    TranslationRecognizerRealtime
+from dashscope.audio.asr import (
+    TranslationRecognizerCallback,
+    TranscriptionResult,
+    TranslationResult,
+    TranslationRecognizerRealtime    
+)
 
 class Callback(TranslationRecognizerCallback):
     """
@@ -53,15 +54,15 @@ class Callback(TranslationRecognizerCallback):
         if usage: self.usage += usage['duration']
 
 
-def getGummpyTranslator(rate) -> TranslationRecognizerRealtime:
-    translator = TranslationRecognizerRealtime(
+class GummyTranslator:
+    def __init__(self, rate, source, target):
+        self.translator = TranslationRecognizerRealtime(
         model = "gummy-realtime-v1",
         format = "pcm",
         sample_rate = rate,
         transcription_enabled = True,
         translation_enabled = True,
-        source_language = "ja",
-        translation_target_languages = ["zh"],
+        source_language = source,
+        translation_target_languages = [target],
         callback = Callback()
-    )
-    return translator
+    )
\ No newline at end of file
diff --git a/python-subprocess/main.py b/python-subprocess/main.py
index 2c58972..80cc171 100644
--- a/python-subprocess/main.py
+++ b/python-subprocess/main.py
@@ -1,41 +1,17 @@
-from sysaudio.win import getDefaultLoopbackDevice
-from audio2text.gummy import getGummpyTranslator
-import pyaudiowpatch as pyaudio
-import numpy as np
+from sysaudio.win import LoopbackStream, mergeStreamChannels
+from audio2text.gummy import GummyTranslator
 
-mic = pyaudio.PyAudio()
-loopback = getDefaultLoopbackDevice(mic)
+loopback = LoopbackStream()
+loopback.openStream()
 
-SAMP_WIDTH = pyaudio.get_sample_size(pyaudio.paInt16)
-FORMAT = pyaudio.paInt16
-CHANNELS = loopback["maxInputChannels"]
-RATE = int(loopback["defaultSampleRate"])
-CHUNK = RATE // 10
-INDEX = loopback["index"]
+gummy = GummyTranslator(loopback.RATE, "ja", "zh")
+gummy.translator.start()
 
+for i in range(0, 100):
+    if not loopback.stream: continue
+    data = loopback.stream.read(loopback.CHUNK)
+    data = mergeStreamChannels(data, loopback.CHANNELS)
+    gummy.translator.send_audio_frame(data)
 
-RECORD_SECONDS = 20 # 监听时长(s)
-
-stream = mic.open(
-    format = FORMAT,
-    channels = CHANNELS,
-    rate = RATE,
-    input = True,
-    input_device_index = INDEX
-)
-
-translator = getGummpyTranslator(rate=RATE)
-translator.start()
-
-for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
-    data = stream.read(CHUNK)
-    data_np = np.frombuffer(data, dtype=np.int16)
-    data_np_r = data_np.reshape(-1, CHANNELS)
-    mono_data = np.mean(data_np_r.astype(np.float32), axis=1)
-    mono_data = mono_data.astype(np.int16)
-    mono_data_bytes = mono_data.tobytes()
-    translator.send_audio_frame(mono_data_bytes)
-
-translator.stop()
-stream.stop_stream()
-stream.close()
\ No newline at end of file
+gummy.translator.stop()
+loopback.closeStream()
\ No newline at end of file
diff --git a/python-subprocess/sysaudio/win.py b/python-subprocess/sysaudio/win.py
index 194bf29..d3acdb2 100644
--- a/python-subprocess/sysaudio/win.py
+++ b/python-subprocess/sysaudio/win.py
@@ -1,6 +1,8 @@
 """获取 Windows 系统音频输出流"""
 
 import pyaudiowpatch as pyaudio
+import numpy as np
+
 
 def getDefaultLoopbackDevice(mic: pyaudio.PyAudio, info = True)->dict:
     """
@@ -38,16 +40,77 @@ def getDefaultLoopbackDevice(mic: pyaudio.PyAudio, info = True)->dict:
     return default_speaker
 
 
-def getOutputStream():
-    mic = pyaudio.PyAudio()
-    default_speaker = getDefaultLoopbackDevice(mic, False)
+def mergeStreamChannels(data, channels):
+    """
+    将当前多通道流数据合并为单通道流数据
 
-    stream = mic.open(
-        format = pyaudio.paInt16,
-        channels = default_speaker["maxInputChannels"],
-        rate = int(default_speaker["defaultSampleRate"]),
-        input = True,
-        input_device_index = default_speaker["index"]
-    )
+    Args:
+        data: 多通道数据
+        channels: 通道数
 
-    return stream
\ No newline at end of file
+    Returns:
+        mono_data_bytes: 单通道数据
+    """
+    # (length * channels,)
+    data_np = np.frombuffer(data, dtype=np.int16)
+    # (length, channels)
+    data_np_r = data_np.reshape(-1, channels)
+    # (length,)
+    mono_data = np.mean(data_np_r.astype(np.float32), axis=1)
+    mono_data = mono_data.astype(np.int16)
+    mono_data_bytes = mono_data.tobytes()
+    return mono_data_bytes
+
+class LoopbackStream:
+    def __init__(self):
+        self.mic = pyaudio.PyAudio()
+        self.loopback = getDefaultLoopbackDevice(self.mic, False)
+        self.stream = None
+        self.SAMP_WIDTH = pyaudio.get_sample_size(pyaudio.paInt16)
+        self.FORMAT = pyaudio.paInt16
+        self.CHANNELS = self.loopback["maxInputChannels"]
+        self.RATE = int(self.loopback["defaultSampleRate"])
+        self.CHUNK = self.RATE // 10
+        self.INDEX = self.loopback["index"]
+
+    def printInfo(self):
+        dev_info = f"""
+        采样输入设备：
+            - 序号：{self.loopback['index']}
+            - 名称：{self.loopback['name']}
+            - 最大输入通道数：{self.loopback['maxInputChannels']}
+            - 默认低输入延迟：{self.loopback['defaultLowInputLatency']}s
+            - 默认高输入延迟：{self.loopback['defaultHighInputLatency']}s
+            - 默认采样率：{self.loopback['defaultSampleRate']}Hz
+            - 是否回环设备：{self.loopback['isLoopbackDevice']}
+
+        音频样本块大小：{self.CHUNK}
+        样本位宽：{self.SAMP_WIDTH}
+        音频数据格式：{self.FORMAT}
+        音频通道数：{self.CHANNELS}
+        音频采样率：{self.RATE}
+        """
+        print(dev_info)
+
+    def openStream(self):
+        """
+        打开并返回系统音频输出流
+        """
+        if self.stream: return self.stream
+        self.stream = self.mic.open(
+            format = self.FORMAT,
+            channels = self.CHANNELS,
+            rate = self.RATE,
+            input = True,
+            input_device_index = self.INDEX
+        )
+        return self.stream
+    
+    def closeStream(self):
+        """
+        关闭系统音频输出流
+        """
+        if self.stream is None: return
+        self.stream.stop_stream()
+        self.stream.close()
+        self.stream = None
\ No newline at end of file
diff --git a/src/main/index.ts b/src/main/index.ts
index 0e2eec8..37b0c37 100644
--- a/src/main/index.ts
+++ b/src/main/index.ts
@@ -3,6 +3,16 @@ import path from 'path'
 import { electronApp, optimizer, is } from '@electron-toolkit/utils'
 import icon from '../../resources/icon.png?asset'
 
+import { PythonConnector } from './pyComm';
+
+const pythonConnector = new PythonConnector();
+setTimeout(() => {
+  pythonConnector.send({
+    command: 'process_data',
+    payload: { some: 'data' }
+  });
+}, 2000);
+
 let mainWindow: BrowserWindow | undefined
 
 function createMainWindow(): void {
@@ -32,7 +42,7 @@ function createMainWindow(): void {
   if (is.dev && process.env['ELECTRON_RENDERER_URL']) {
     mainWindow.loadURL(process.env['ELECTRON_RENDERER_URL'])
   } else {
-    mainWindow.loadFile(path.join(__dirname, '../renderer/index.html'))
+    mainWindow.loadFile(path.join(__dirname, '../renderer/main/index.html'))
   }
 }
 
diff --git a/src/main/pyComm.ts b/src/main/pyComm.ts
new file mode 100644
index 0000000..09cd897
--- /dev/null
+++ b/src/main/pyComm.ts
@@ -0,0 +1,52 @@
+import WebSocket from 'ws';
+
+export class PythonConnector {
+  ws: WebSocket | null;
+
+  constructor() {
+    this.ws = null;
+    this.connect();
+  }
+
+  connect() {
+    this.ws = new WebSocket('ws://localhost:8765');
+
+    this.ws.on('open', () => {
+      console.log('Python server connected');
+      this.send({ message: 'Electron Initialized' });
+    });
+
+    this.ws.on('message', (data) => {
+      const message = JSON.parse(data.toString());
+      console.log('Get message from Python:', message);
+      
+      // 在这里处理来自 Python 的消息
+      if (message.notification) {
+        this.handleNotification(message.notification);
+      }
+    });
+
+    this.ws.on('close', () => {
+      console.log('Connection closed. Reconnecting...');
+      setTimeout(() => this.connect(), 3000);
+    });
+
+    this.ws.on('error', (error) => {
+      console.error('WebSocket Error:', error);
+    });
+  }
+
+  send(data) {
+    if (this.ws && this.ws.readyState === WebSocket.OPEN) {
+      this.ws.send(JSON.stringify(data));
+    } else {
+      console.error('WebSocket not connected');
+    }
+  }
+
+  handleNotification(notification) {
+    // 处理 Python 主动推送的通知
+    console.log('Handel notification:', notification);
+    // 可以在这里更新 UI 或触发其他操作
+  }
+}
\ No newline at end of file
diff --git a/src/renderer/caption/index.html b/src/renderer/caption/index.html
new file mode 100644
index 0000000..00b0f01
--- /dev/null
+++ b/src/renderer/caption/index.html
@@ -0,0 +1,17 @@
+<!doctype html>
+<html>
+  <head>
+    <meta charset="UTF-8" />
+    <title>Auto Caption Player</title>
+    <!-- https://developer.mozilla.org/en-US/docs/Web/HTTP/CSP -->
+    <meta
+      http-equiv="Content-Security-Policy"
+      content="default-src 'self'; script-src 'self'; style-src 'self' 'unsafe-inline'; img-src 'self' data:"
+    />
+  </head>
+
+  <body>
+    <div id="app"></div>
+    <script type="module" src="/src/main.ts"></script>
+  </body>
+</html>
diff --git a/src/renderer/caption/src/App.vue b/src/renderer/caption/src/App.vue
new file mode 100644
index 0000000..ce66d92
--- /dev/null
+++ b/src/renderer/caption/src/App.vue
@@ -0,0 +1,6 @@
+<template>
+  <h1>Caption</h1>
+</template>
+
+<script setup lang="ts">
+</script>
diff --git a/src/renderer/caption/src/assets/Template.vue b/src/renderer/caption/src/assets/Template.vue
new file mode 100644
index 0000000..b802c0a
--- /dev/null
+++ b/src/renderer/caption/src/assets/Template.vue
@@ -0,0 +1,11 @@
+<template>
+
+</template>
+
+<script setup lang="ts">
+
+</script>
+
+<style scoped>
+
+</style>
\ No newline at end of file
diff --git a/src/renderer/caption/src/assets/styles/reset.css b/src/renderer/caption/src/assets/styles/reset.css
new file mode 100644
index 0000000..93152d2
--- /dev/null
+++ b/src/renderer/caption/src/assets/styles/reset.css
@@ -0,0 +1,5 @@
+body {
+    margin: 0;
+    padding: 0;
+    background-color: black;
+}
\ No newline at end of file
diff --git a/src/renderer/caption/src/env.d.ts b/src/renderer/caption/src/env.d.ts
new file mode 100644
index 0000000..11f02fe
--- /dev/null
+++ b/src/renderer/caption/src/env.d.ts
@@ -0,0 +1 @@
+/// <reference types="vite/client" />
diff --git a/src/renderer/caption/src/main.ts b/src/renderer/caption/src/main.ts
new file mode 100644
index 0000000..d8c9746
--- /dev/null
+++ b/src/renderer/caption/src/main.ts
@@ -0,0 +1,8 @@
+import './assets/styles/reset.css'
+import { createPinia } from 'pinia'
+import { createApp } from 'vue'
+import App from './App.vue'
+
+const app = createApp(App)
+app.use(createPinia())
+app.mount('#app')
\ No newline at end of file
diff --git a/src/renderer/caption/src/stores/caption.ts b/src/renderer/caption/src/stores/caption.ts
new file mode 100644
index 0000000..3027019
--- /dev/null
+++ b/src/renderer/caption/src/stores/caption.ts
@@ -0,0 +1,9 @@
+import { ref } from 'vue'
+import { defineStore } from 'pinia'
+
+export const useCaptionStore = defineStore('caption', () => {
+  const captionFontFamily = ref<string>('sans-serif')
+  const captionFontSize = ref<number>(24)
+  const captionFontColor = ref<string>('#ffffff')
+  return { captionFontFamily, captionFontSize, captionFontColor }
+})
\ No newline at end of file