mirror of
https://github.com/HiMeditator/auto-caption.git
synced 2026-02-04 04:14:42 +08:00
125 lines
3.1 KiB
Plaintext
125 lines
3.1 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "6fb12704",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"d:\\Projects\\auto-caption\\caption-engine\\subenv\\Lib\\site-packages\\vosk\\__init__.py\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import vosk\n",
|
||
"print(vosk.__file__)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"id": "63a06f5c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"\n",
|
||
" 采样设备:\n",
|
||
" - 设备类型:音频输入\n",
|
||
" - 序号:1\n",
|
||
" - 名称:麦克风阵列 (Realtek(R) Audio)\n",
|
||
" - 最大输入通道数:2\n",
|
||
" - 默认低输入延迟:0.09s\n",
|
||
" - 默认高输入延迟:0.18s\n",
|
||
" - 默认采样率:44100.0Hz\n",
|
||
" - 是否回环设备:False\n",
|
||
"\n",
|
||
" 音频样本块大小:2205\n",
|
||
" 样本位宽:2\n",
|
||
" 采样格式:8\n",
|
||
" 音频通道数:2\n",
|
||
" 音频采样率:44100\n",
|
||
" \n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import sys\n",
|
||
"import os\n",
|
||
"import json\n",
|
||
"from vosk import Model, KaldiRecognizer\n",
|
||
"\n",
|
||
"current_dir = os.getcwd() \n",
|
||
"sys.path.append(os.path.join(current_dir, '../caption-engine'))\n",
|
||
"\n",
|
||
"from sysaudio.win import AudioStream\n",
|
||
"from audioprcs import resampleRawChunk, mergeChunkChannels\n",
|
||
"\n",
|
||
"stream = AudioStream(1)\n",
|
||
"stream.printInfo()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"id": "5d5a0afa",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = Model(os.path.join(\n",
|
||
" current_dir,\n",
|
||
" '../caption-engine/models/vosk-model-small-cn-0.22'\n",
|
||
"))\n",
|
||
"recognizer = KaldiRecognizer(model, 16000)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "7e9d1530",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"stream.openStream()\n",
|
||
"\n",
|
||
"for i in range(200):\n",
|
||
" chunk = stream.read_chunk()\n",
|
||
" chunk_mono = resampleRawChunk(chunk, stream.CHANNELS, stream.RATE, 16000)\n",
|
||
" if recognizer.AcceptWaveform(chunk_mono):\n",
|
||
" result = json.loads(recognizer.Result())\n",
|
||
" print(\"acc:\", result.get(\"text\", \"\"))\n",
|
||
" else:\n",
|
||
" partial = json.loads(recognizer.PartialResult())\n",
|
||
" print(\"else:\", partial.get(\"partial\", \"\"))"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "subenv",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.12.1"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|