mirror of
https://github.com/HiMeditator/auto-caption.git
synced 2026-02-14 19:48:18 +08:00
- 更新 GummyTranslator 类,优化字幕生成逻辑 - 移除 audioprcs 模块,音频处理功能转移到 utils 模块 - 重构 sysaudio 模块,提高音频流管理的灵活性和稳定性 - 修改 TODO.md,完成按时间降序排列字幕记录的功能 - 更新文档,说明因资源限制将不再维护英文和日文文档
50 lines
1.8 KiB
Python
50 lines
1.8 KiB
Python
import sys
|
|
import argparse
|
|
from sysaudio import AudioStream
|
|
from utils import merge_chunk_channels
|
|
from audio2text import InvalidParameter, GummyTranslator
|
|
|
|
|
|
def convert_audio_to_text(s_lang, t_lang, audio_type, chunk_rate, api_key):
|
|
stream = AudioStream(audio_type, chunk_rate)
|
|
|
|
if t_lang == 'none':
|
|
gummy = GummyTranslator(stream.RATE, s_lang, None, api_key)
|
|
else:
|
|
gummy = GummyTranslator(stream.RATE, s_lang, t_lang, api_key)
|
|
|
|
stream.open_stream()
|
|
gummy.start()
|
|
|
|
while True:
|
|
try:
|
|
chunk = stream.read_chunk()
|
|
if chunk is None: continue
|
|
chunk_mono = merge_chunk_channels(chunk, stream.CHANNELS)
|
|
try:
|
|
gummy.send_audio_frame(chunk_mono)
|
|
except InvalidParameter:
|
|
gummy.start()
|
|
gummy.send_audio_frame(chunk_mono)
|
|
except KeyboardInterrupt:
|
|
stream.close_stream()
|
|
gummy.stop()
|
|
break
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description='Convert system audio stream to text')
|
|
parser.add_argument('-s', '--source_language', default='en', help='Source language code')
|
|
parser.add_argument('-t', '--target_language', default='zh', help='Target language code')
|
|
parser.add_argument('-a', '--audio_type', default=0, help='Audio stream source: 0 for output audio stream, 1 for input audio stream')
|
|
parser.add_argument('-c', '--chunk_rate', default=20, help='The number of audio stream chunks collected per second.')
|
|
parser.add_argument('-k', '--api_key', default='', help='API KEY for Gummy model')
|
|
args = parser.parse_args()
|
|
convert_audio_to_text(
|
|
args.source_language,
|
|
args.target_language,
|
|
int(args.audio_type),
|
|
int(args.chunk_rate),
|
|
args.api_key
|
|
)
|