diff --git a/README.md b/README.md index 38acc2f..b971b2f 100755 --- a/README.md +++ b/README.md @@ -201,7 +201,7 @@ cd <源码所在目录> ##### (4) macOS 运行 (Apple Silicon) - 适用于 macOS (Apple Silicon) 设备 - macOS (Intel) 请使用CPU, 强行使用GPU只会更慢 -- macOS (Apple Silicon)上字幕检测精准模式的模型(ch_det_fast/model.onnx)精度似乎不太理想, 推荐使用快速模式 +- macOS (Apple Silicon)上字幕检测PP-OCRv4-Server模型精度似乎不太理想, 推荐使用其他模型 ```shell pip install paddlepaddle==3.0.0 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/ pip install torch==2.7.0 torchvision==0.22.0 diff --git a/README_en.md b/README_en.md index 1b532f4..888ee90 100755 --- a/README_en.md +++ b/README_en.md @@ -202,7 +202,7 @@ This project supports four running modes: CUDA (NVIDIA GPU acceleration), CPU (n ##### (4) Running on macOS (Apple Silicon) - Suitable for macOS (Apple Silicon) devices - For macOS (Intel), please use the CPU mode. Forcing GPU usage will only be slower. -- On macOS (Apple Silicon), the accuracy of the subtitle detection precision mode model (ch_det_fast/model.onnx) does not seem to be ideal. It is recommended to use the fast mode instead. +- On macOS (Apple Silicon), the accuracy of the PP-OCRv4-Server model for subtitle detection seems suboptimal. We recommend using an alternative model. ```shell pip install paddlepaddle==3.0.0 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/ pip install torch==2.7.0 torchvision==0.22.0 diff --git a/backend/config.py b/backend/config.py index 2d11076..97357f5 100644 --- a/backend/config.py +++ b/backend/config.py @@ -52,7 +52,7 @@ class Config(QConfig): # 【设置inpaint算法】 inpaintMode = OptionsConfigItem("Main", "InpaintMode", InpaintMode.STTN_AUTO, OptionsValidator(InpaintMode), EnumSerializer(InpaintMode)) - subtitleDetectMode = OptionsConfigItem("Main", "SubtitleDetectMode", SubtitleDetectMode.Accurate, OptionsValidator(SubtitleDetectMode), EnumSerializer(SubtitleDetectMode)) + subtitleDetectMode = OptionsConfigItem("Main", "SubtitleDetectMode", SubtitleDetectMode.PP_OCRv4_SERVER, OptionsValidator(SubtitleDetectMode), EnumSerializer(SubtitleDetectMode)) # 【设置像素点偏差】 # 用于判断是不是非字幕区域(一般认为字幕文本框的长度是要大于宽度的,如果字幕框的高大于宽,且大于的幅度超过指定像素点大小,则认为是错误检测) diff --git a/backend/interface/ch.ini b/backend/interface/ch.ini index 342a09c..228cbc6 100644 --- a/backend/interface/ch.ini +++ b/backend/interface/ch.ini @@ -76,7 +76,6 @@ DeleteABSection = 删除当前处理区块 DeleteSelection = 删除当前激活选区 [Main] -SubtitleDetectionAcceleratorON = 字幕检测使用{}进行加速 OnnxExectionProviderNotSupportedSkipped = ONNX 执行提供程序: {} 不支持,已跳过。 OnnxExecutionProviderDetected=检测到 ONNX 执行提供程序: {} OnnxRuntimeNotInstall = ONNX 运行环境未安装,已跳过。 @@ -85,7 +84,8 @@ DirectMLWarning = 警告: DirectML 加速仅适用于 STTN 模型,其他模型 ProcessingStartFindingSubtitles = [处理中] 开始查找字幕... FinishedFindingSubtitles = [结束] 查找字幕完成... ProcessingStartRemovingSubtitles = [处理中] 开始移除字幕... -UseModel = 去除字幕使用模型: {} +SubtitleRemoverModel = 去除字幕模型: {} +SubtitleDetectionModel = 字幕检测模型: {} FullScreenProcessingNote = 未设置字幕区域,将对全屏进行处理,最终效果可能不理想 ReadFileFailed = 读取文件 {} 失败 FinishedProcessing = [完成] 字幕移除成功, 文件已保存到 {} @@ -124,10 +124,6 @@ LAMA = LAMA ProPainter = ProPainter OpenCV = OpenCV -[SubtitleDetectMode] -Fast = 快速 -Accurate = 精准 - [InterfaceLanguage] ChineseSimplified = 简体中文 ChineseTraditional = 繁體中文 diff --git a/backend/interface/chinese_cht.ini b/backend/interface/chinese_cht.ini index ff42f9a..5a17064 100644 --- a/backend/interface/chinese_cht.ini +++ b/backend/interface/chinese_cht.ini @@ -76,7 +76,6 @@ DeleteABSection = 刪除當前處理區塊 DeleteSelection = 刪除當前激活選區 [Main] -SubtitleDetectionAcceleratorON = 字幕檢測使用{}進行加速 OnnxExectionProviderNotSupportedSkipped = ONNX 執行提供者: {} 不支援,已略過。 OnnxExecutionProviderDetected = 偵測到 ONNX 執行提供者: {} OnnxRuntimeNotInstall = ONNX 執行環境未安裝,已略過。 @@ -85,7 +84,8 @@ DirectMLWarning = 警告:DirectML 加速僅適用於 STTN 模型,其他模 ProcessingStartFindingSubtitles = [處理中] 開始搜尋字幕... FinishedFindingSubtitles = [結束] 搜尋字幕完成... ProcessingStartRemovingSubtitles = [處理中] 開始移除字幕... -UseModel = 去除字幕使用模型: {} +SubtitleRemoverModel = 去除字幕模型: {} +SubtitleDetectionModel = 字幕檢測模型: {} FullScreenProcessingNote = 未設定字幕區域,將對全螢幕進行處理,最終效果可能不理想 ReadFileFailed = 讀取檔案 {} 失敗 FinishedProcessing = [完成] 字幕移除成功,檔案已儲存至 {} @@ -124,10 +124,6 @@ LAMA = LAMA ProPainter = ProPainter OpenCV = OpenCV -[SubtitleDetectMode] -Fast = 快速 -Accurate = 精準 - [InterfaceLanguage] ChineseSimplified = 简体中文 ChineseTraditional = 繁體中文 diff --git a/backend/interface/en.ini b/backend/interface/en.ini index f440937..29886e9 100644 --- a/backend/interface/en.ini +++ b/backend/interface/en.ini @@ -76,7 +76,6 @@ DeleteABSection = Delete Section DeleteSelection = Delete Selection [Main] -SubtitleDetectionAcceleratorON = Subtitle detection accelerated with {} OnnxExectionProviderNotSupportedSkipped = ONNX provider: {} not supported, skipped. OnnxExecutionProviderDetected = Detected ONNX provider: {} OnnxRuntimeNotInstall = ONNX runtime not installed, skipped. @@ -85,7 +84,8 @@ DirectMLWarning = Warning: DirectML acceleration only works with STTN model. ProcessingStartFindingSubtitles = [Processing] Detecting subtitles... FinishedFindingSubtitles = [Complete] Subtitle detection finished. ProcessingStartRemovingSubtitles = [Processing] Removing subtitles... -UseModel = Use model for subtitle removal: {} +SubtitleRemoverModel = Subtitle removal model: {} +SubtitleDetectionModel = Subtitle detection model: {} FullScreenProcessingNote = Processing full screen (no area selected). Quality may vary. ReadFileFailed = Failed to read file: {} FinishedProcessing = [Complete] Subtitles removed. Output saved to: {} @@ -124,10 +124,6 @@ LAMA = LAMA ProPainter = ProPainter OpenCV = OpenCV -[SubtitleDetectMode] -Fast = Fast -Accurate = Accurate - [InterfaceLanguage] ChineseSimplified = 简体中文 ChineseTraditional = 繁體中文 diff --git a/backend/interface/es.ini b/backend/interface/es.ini index 0580c94..87b25aa 100644 --- a/backend/interface/es.ini +++ b/backend/interface/es.ini @@ -76,7 +76,6 @@ DeleteABSection = Eliminar sección DeleteSelection = Eliminar selección [Main] -SubtitleDetectionAcceleratorON = Detección de subtítulos acelerada con {} OnnxExectionProviderNotSupportedSkipped = Proveedor ONNX: {} no soportado, omitido. OnnxExecutionProviderDetected = Proveedor ONNX detectado: {} OnnxRuntimeNotInstall = Entorno ONNX no instalado, omitido. @@ -85,7 +84,8 @@ DirectMLWarning = Advertencia: Aceleración DirectML solo funciona con modelo ST ProcessingStartFindingSubtitles = [Procesando] Detectando subtítulos... FinishedFindingSubtitles = [Completo] Detección finalizada. ProcessingStartRemovingSubtitles = [Procesando] Eliminando subtítulos... -UseModel = Usar modelo para eliminar subtítulos: {} +SubtitleRemoverModel = Modelo de eliminación de subtítulos: {} +SubtitleDetectionModel = Modelo de detección de subtítulos: {} FullScreenProcessingNote = Procesando pantalla completa (área no seleccionada). ReadFileFailed = Error al leer archivo: {} FinishedProcessing = [Completo] Subtítulos eliminados. Guardado en: {} @@ -124,10 +124,6 @@ LAMA = LAMA ProPainter = ProPainter OpenCV = OpenCV -[SubtitleDetectMode] -Fast = Rápido -Accurate = Preciso - [InterfaceLanguage] ChineseSimplified = 简体中文 ChineseTraditional = 繁體中文 diff --git a/backend/interface/japan.ini b/backend/interface/japan.ini index b7ce6e9..58cc1ae 100644 --- a/backend/interface/japan.ini +++ b/backend/interface/japan.ini @@ -76,7 +76,6 @@ DeleteABSection = 現在の処理区間を削除 DeleteSelection = 現在のアクティブ選択範囲を削除 [Main] -SubtitleDetectionAcceleratorON = 字幕検出を{}で加速 OnnxExectionProviderNotSupportedSkipped = ONNXプロバイダ: {} 非対応 OnnxExecutionProviderDetected=ONNXプロバイダ検出: {} OnnxRuntimeNotInstall = ONNXランタイム未インストール @@ -85,7 +84,8 @@ DirectMLWarning = 警告: DirectML加速はSTTNモデルのみ ProcessingStartFindingSubtitles = [処理中] 字幕検索開始... FinishedFindingSubtitles = [完了] 字幕検索終了 ProcessingStartRemovingSubtitles = [処理中] 字幕削除開始... -UseModel = 字幕除去用モデル: {} +SubtitleRemoverModel = 字幕除去モデル: {} +SubtitleDetectionModel = 字幕検出モデル: {} FullScreenProcessingNote = 全画面処理(領域未選択) ReadFileFailed = ファイル読み込み失敗: {} FinishedProcessing = [完了] 字幕削除成功。保存先: {} @@ -124,10 +124,6 @@ LAMA = LAMA ProPainter = ProPainter OpenCV = OpenCV -[SubtitleDetectMode] -Fast = 高速 -Accurate = 高精度 - [InterfaceLanguage] ChineseSimplified = 简体中文 ChineseTraditional = 繁體中文 diff --git a/backend/interface/ko.ini b/backend/interface/ko.ini index eac91d3..40b0eb8 100644 --- a/backend/interface/ko.ini +++ b/backend/interface/ko.ini @@ -76,7 +76,6 @@ DeleteABSection = 현재 처리 구간 삭제 DeleteSelection = 현재 활성 선택 영역 삭제 [Main] -SubtitleDetectionAcceleratorON = 자막 감지 {} 가속 사용 OnnxExectionProviderNotSupportedSkipped = ONNX 공급자: {} 지원 안됨 OnnxExecutionProviderDetected=ONNX 공급자 감지: {} OnnxRuntimeNotInstall = ONNX 런타임 미설치 @@ -85,7 +84,8 @@ DirectMLWarning = 경고: DirectML 가속은 STTN 모델 전용 ProcessingStartFindingSubtitles = [진행중] 자막 검색 시작... FinishedFindingSubtitles = [완료] 자막 검색 완료 ProcessingStartRemovingSubtitles = [진행중] 자막 제거 시작... -UseModel = 자막 제거 모델 사용: {} +SubtitleRemoverModel = 자막 제거 모델: {} +SubtitleDetectionModel = 자막 감지 모델: {} FullScreenProcessingNote = 전체 화면 처리 (영역 미선택) ReadFileFailed = 파일 읽기 실패: {} FinishedProcessing = [완료] 자막 제거 완료. 저장 위치: {} @@ -124,10 +124,6 @@ LAMA = LAMA ProPainter = ProPainter OpenCV = OpenCV -[SubtitleDetectMode] -Fast = 빠름 -Accurate = 정확 - [InterfaceLanguage] ChineseSimplified = 简体中文 ChineseTraditional = 繁體中文 diff --git a/backend/interface/vi.ini b/backend/interface/vi.ini index 7eb8f17..6b86c63 100644 --- a/backend/interface/vi.ini +++ b/backend/interface/vi.ini @@ -76,7 +76,6 @@ DeleteABSection = Xóa vùng xử lý hiện tại DeleteSelection = Xóa vùng chọn hiện tại [Main] -SubtitleDetectionAcceleratorON = Phát hiện phụ đề được tăng tốc bằng {} OnnxExectionProviderNotSupportedSkipped = ONNX provider: {} không hỗ trợ OnnxExecutionProviderDetected= Phát hiện ONNX provider: {} OnnxRuntimeNotInstall = Chưa cài đặt ONNX Runtime @@ -85,7 +84,8 @@ DirectMLWarning = Cảnh báo: DirectML chỉ hỗ trợ STTN ProcessingStartFindingSubtitles = [Đang xử lý] Bắt đầu tìm phụ đề... FinishedFindingSubtitles = [Hoàn thành] Tìm phụ đề xong ProcessingStartRemovingSubtitles = [Đang xử lý] Bắt đầu xóa phụ đề... -UseModel = Sử dụng mô hình xóa phụ đề: {} +SubtitleRemoverModel = Mô hình xóa phụ đề: {} +SubtitleDetectionModel = Mô hình phát hiện phụ đề: {} FullScreenProcessingNote = Xử lý toàn màn hình (không chọn vùng) ReadFileFailed = Lỗi đọc file: {} FinishedProcessing = [Hoàn thành] Xóa phụ đề thành công, lưu tại: {} @@ -124,10 +124,6 @@ LAMA = LAMA ProPainter = ProPainter OpenCV = OpenCV -[SubtitleDetectMode] -Fast = Nhanh -Accurate = Chính xác - [InterfaceLanguage] ChineseSimplified = 简体中文 ChineseTraditional = 繁體中文 diff --git a/backend/main.py b/backend/main.py index e729f55..afe37b4 100644 --- a/backend/main.py +++ b/backend/main.py @@ -334,7 +334,6 @@ class SubtitleRemover: # 如果使用GPU加速,则打印GPU加速提示 if self.hardware_accelerator.has_accelerator(): accelerator_name = self.hardware_accelerator.accelerator_name - self.append_output(tr['Main']['SubtitleDetectionAcceleratorON'].format(accelerator_name)) if accelerator_name == 'DirectML' and config.inpaintMode.value not in [InpaintMode.STTN_AUTO, InpaintMode.STTN_DET]: self.append_output(tr['Main']['DirectMLWarning']) os.makedirs(os.path.dirname(self.video_out_path), exist_ok=True) @@ -401,7 +400,8 @@ class SubtitleRemover: model_device = 'DirectML' if self.hardware_accelerator.has_cuda() or self.hardware_accelerator.has_mps(): model_device = accelerator_name - self.append_output(tr['Main']['UseModel'].format(f"{model_friendly_name} ({model_device})")) + self.append_output(tr['Main']['SubtitleRemoverModel'].format(f"{model_friendly_name} ({model_device})")) + self.append_output(tr['Main']['SubtitleDetectionModel'].format(f"{config.subtitleDetectMode.value.value} ({", ".join(self.hardware_accelerator.onnx_providers)})")) def merge_audio_to_video(self): # 创建音频临时对象,windows下delete=True会有permission denied的报错 diff --git a/backend/models/V4/ch_det/fs_manifest.csv b/backend/models/V4/ch_det/fs_manifest.csv new file mode 100644 index 0000000..8c89ff4 --- /dev/null +++ b/backend/models/V4/ch_det/fs_manifest.csv @@ -0,0 +1,4 @@ +filename,filesize,encoding,header +inference_1.onnx,50000000,, +inference_2.onnx,50000000,, +inference_3.onnx,13443410,, diff --git a/backend/models/V4/ch_det/inference_1.onnx b/backend/models/V4/ch_det/inference_1.onnx new file mode 100644 index 0000000..3c85877 Binary files /dev/null and b/backend/models/V4/ch_det/inference_1.onnx differ diff --git a/backend/models/V4/ch_det/inference_2.onnx b/backend/models/V4/ch_det/inference_2.onnx new file mode 100644 index 0000000..1d70fb8 Binary files /dev/null and b/backend/models/V4/ch_det/inference_2.onnx differ diff --git a/backend/models/V4/ch_det/inference_3.onnx b/backend/models/V4/ch_det/inference_3.onnx new file mode 100644 index 0000000..9b394fa Binary files /dev/null and b/backend/models/V4/ch_det/inference_3.onnx differ diff --git a/backend/models/V4/ch_det_fast/inference.onnx b/backend/models/V4/ch_det_fast/inference.onnx new file mode 100644 index 0000000..a4efe53 Binary files /dev/null and b/backend/models/V4/ch_det_fast/inference.onnx differ diff --git a/backend/tools/constant.py b/backend/tools/constant.py index 0645bc5..3eec677 100644 --- a/backend/tools/constant.py +++ b/backend/tools/constant.py @@ -16,5 +16,7 @@ class SubtitleDetectMode(Enum): """ 字幕检测算法枚举 """ - Fast = 0 - Accurate = 1 \ No newline at end of file + PP_OCRv4_SERVER = "PP-OCRv4-Server" + PP_OCRv4_MOBILE = "PP-OCRv4-Mobile" + PP_OCRv5_SERVER = "PP-OCRv5-Server" + PP_OCRv5_MOBILE = "PP-OCRv5-Mobile" \ No newline at end of file diff --git a/backend/tools/model_config.py b/backend/tools/model_config.py index 5ee43c9..9c0107d 100644 --- a/backend/tools/model_config.py +++ b/backend/tools/model_config.py @@ -9,10 +9,14 @@ class ModelConfig: self.STTN_AUTO_MODEL_PATH = os.path.join(BASE_DIR, 'models', 'sttn-auto', 'infer_model.pth') self.STTN_DET_MODEL_PATH = os.path.join(BASE_DIR, 'models', 'sttn-det', 'sttn.pth') self.PROPAINTER_MODEL_DIR = os.path.join(BASE_DIR,'models', 'propainter') - if config.subtitleDetectMode.value == SubtitleDetectMode.Fast: + if config.subtitleDetectMode.value == SubtitleDetectMode.PP_OCRv5_MOBILE: self.DET_MODEL_DIR = os.path.join(BASE_DIR,'models', 'V5', 'ch_det_fast') - elif config.subtitleDetectMode.value == SubtitleDetectMode.Accurate: + elif config.subtitleDetectMode.value == SubtitleDetectMode.PP_OCRv5_SERVER: self.DET_MODEL_DIR = os.path.join(BASE_DIR, 'models', 'V5', 'ch_det') + elif config.subtitleDetectMode.value == SubtitleDetectMode.PP_OCRv4_MOBILE: + self.DET_MODEL_DIR = os.path.join(BASE_DIR,'models', 'V4', 'ch_det_fast') + elif config.subtitleDetectMode.value == SubtitleDetectMode.PP_OCRv4_SERVER: + self.DET_MODEL_DIR = os.path.join(BASE_DIR, 'models', 'V4', 'ch_det') else: raise ValueError(f"Invalid subtitle detect mode: {config.subtitleDetectMode.value}") diff --git a/ui/setting_interface.py b/ui/setting_interface.py index 67527da..d83bc28 100644 --- a/ui/setting_interface.py +++ b/ui/setting_interface.py @@ -43,7 +43,7 @@ class SettingInterface(QtWidgets.QVBoxLayout): title=tr["SubtitleExtractorGUI"]["SubtitleDetectMode"], content="", parent=parent, - texts=[list(tr['SubtitleDetectMode'].values())[i] for i,_ in enumerate(config.subtitleDetectMode.validator.options)], + texts=[mode.value for mode in config.subtitleDetectMode.validator.options], ) self.addWidget(self.subtitle_detect_model_combo)