{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "6fb12704", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "d:\\Projects\\auto-caption\\caption-engine\\subenv\\Lib\\site-packages\\vosk\\__init__.py\n" ] } ], "source": [ "import vosk\n", "print(vosk.__file__)" ] }, { "cell_type": "code", "execution_count": 11, "id": "63a06f5c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " 采样设备:\n", " - 设备类型:音频输入\n", " - 序号:1\n", " - 名称:麦克风阵列 (Realtek(R) Audio)\n", " - 最大输入通道数:2\n", " - 默认低输入延迟:0.09s\n", " - 默认高输入延迟:0.18s\n", " - 默认采样率:44100.0Hz\n", " - 是否回环设备:False\n", "\n", " 音频样本块大小:2205\n", " 样本位宽:2\n", " 采样格式:8\n", " 音频通道数:2\n", " 音频采样率:44100\n", " \n" ] } ], "source": [ "import sys\n", "import os\n", "import json\n", "from vosk import Model, KaldiRecognizer\n", "\n", "current_dir = os.getcwd() \n", "sys.path.append(os.path.join(current_dir, '../caption-engine'))\n", "\n", "from sysaudio.win import AudioStream\n", "from audioprcs import resampleRawChunk, mergeChunkChannels\n", "\n", "stream = AudioStream(1)\n", "stream.printInfo()" ] }, { "cell_type": "code", "execution_count": 12, "id": "5d5a0afa", "metadata": {}, "outputs": [], "source": [ "model = Model(os.path.join(\n", " current_dir,\n", " '../caption-engine/models/vosk-model-small-cn-0.22'\n", "))\n", "recognizer = KaldiRecognizer(model, 16000)" ] }, { "cell_type": "code", "execution_count": null, "id": "7e9d1530", "metadata": {}, "outputs": [], "source": [ "stream.openStream()\n", "\n", "for i in range(200):\n", " chunk = stream.read_chunk()\n", " chunk_mono = resampleRawChunk(chunk, stream.CHANNELS, stream.RATE, 16000)\n", " if recognizer.AcceptWaveform(chunk_mono):\n", " result = json.loads(recognizer.Result())\n", " print(\"acc:\", result.get(\"text\", \"\"))\n", " else:\n", " partial = json.loads(recognizer.PartialResult())\n", " print(\"else:\", partial.get(\"partial\", \"\"))" ] } ], "metadata": { "kernelspec": { "display_name": "subenv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.1" } }, "nbformat": 4, "nbformat_minor": 5 }