mirror of
https://github.com/YaoFANGUK/video-subtitle-remover.git
synced 2026-06-11 03:23:13 +08:00
使用PySide6-Fluent-Widgets重构整套UI
添加任务列表组件并优化视频加载逻辑 支持可视化显示字幕区域 整理所有模型, 分别为STTN智能擦除, STTN字幕检测, LAMA, ProPainter, OpenCV 提高处理性能 新增CPU运行模式并优化多语言支持 修复Propainter模式部分视频报错 本次提交新增了CPU运行模式,适用于无GPU加速的场景。同时,优化了多语言支持,新增了日语、韩语、越南语等语言配置文件,并更新了README文档以反映新的运行模式和多语言支持。此外,修复了部分代码逻辑,提升了系统的稳定性和兼容性。
This commit is contained in:
@@ -1,162 +1,122 @@
|
||||
import warnings
|
||||
from enum import Enum, unique
|
||||
warnings.filterwarnings('ignore')
|
||||
|
||||
import os
|
||||
import torch
|
||||
import logging
|
||||
import platform
|
||||
import stat
|
||||
from fsplit.filesplit import Filesplit
|
||||
import onnxruntime as ort
|
||||
from pathlib import Path
|
||||
from qfluentwidgets import (qconfig, ConfigItem, QConfig, OptionsValidator, BoolValidator, OptionsConfigItem,
|
||||
EnumSerializer, RangeValidator, RangeConfigItem)
|
||||
from backend.tools.constant import InpaintMode, SubtitleDetectMode
|
||||
import configparser
|
||||
|
||||
# 项目版本号
|
||||
VERSION = "1.1.1"
|
||||
# ×××××××××××××××××××× [不要改] start ××××××××××××××××××××
|
||||
logging.disable(logging.DEBUG) # 关闭DEBUG日志的打印
|
||||
logging.disable(logging.WARNING) # 关闭WARNING日志的打印
|
||||
try:
|
||||
import torch_directml
|
||||
device = torch_directml.device(torch_directml.default_device())
|
||||
USE_DML = True
|
||||
except:
|
||||
USE_DML = False
|
||||
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
LAMA_MODEL_PATH = os.path.join(BASE_DIR, 'models', 'big-lama')
|
||||
STTN_MODEL_PATH = os.path.join(BASE_DIR, 'models', 'sttn', 'infer_model.pth')
|
||||
VIDEO_INPAINT_MODEL_PATH = os.path.join(BASE_DIR, 'models', 'video')
|
||||
MODEL_VERSION = 'V4'
|
||||
DET_MODEL_BASE = os.path.join(BASE_DIR, 'models')
|
||||
DET_MODEL_PATH = os.path.join(DET_MODEL_BASE, MODEL_VERSION, 'ch_det')
|
||||
VERSION = "1.2.2"
|
||||
PROJECT_HOME_URL = "https://github.com/YaoFANGUK/video-subtitle-remover"
|
||||
PROJECT_ISSUES_URL = PROJECT_HOME_URL + "/issues"
|
||||
PROJECT_RELEASES_URL = PROJECT_HOME_URL + "/releases"
|
||||
PROJECT_UPDATE_URLS = [
|
||||
"https://api.github.com/repos/YaoFANGUK/video-subtitle-remover/releases/latest",
|
||||
"https://accelerate.xdow.net/api/repos/YaoFANGUK/video-subtitle-remover/releases/latest",
|
||||
]
|
||||
|
||||
# 查看该路径下是否有模型完整文件,没有的话合并小文件生成完整文件
|
||||
if 'big-lama.pt' not in (os.listdir(LAMA_MODEL_PATH)):
|
||||
fs = Filesplit()
|
||||
fs.merge(input_dir=LAMA_MODEL_PATH)
|
||||
# 硬件加速选项开关
|
||||
HARDWARD_ACCELERATION_OPTION = True
|
||||
|
||||
if 'inference.pdiparams' not in os.listdir(DET_MODEL_PATH):
|
||||
fs = Filesplit()
|
||||
fs.merge(input_dir=DET_MODEL_PATH)
|
||||
class Config(QConfig):
|
||||
# 界面语言设置
|
||||
intefaceTexts = {
|
||||
'简体中文': 'ch',
|
||||
'繁體中文': 'chinese_cht',
|
||||
'English': 'en',
|
||||
'한국어': 'ko',
|
||||
'日本語': 'japan',
|
||||
'Tiếng Việt': 'vi',
|
||||
'Español': 'es'
|
||||
}
|
||||
interface = OptionsConfigItem("Window", "Interface", "ChineseSimplified", OptionsValidator(intefaceTexts.values()), restart = True)
|
||||
|
||||
# 窗口位置和大小
|
||||
windowX = ConfigItem("Window", "X", None)
|
||||
windowY = ConfigItem("Window", "Y", None)
|
||||
windowW = ConfigItem("Window", "Width", 1200)
|
||||
windowH = ConfigItem("Window", "Height", 1200)
|
||||
|
||||
if 'ProPainter.pth' not in os.listdir(VIDEO_INPAINT_MODEL_PATH):
|
||||
fs = Filesplit()
|
||||
fs.merge(input_dir=VIDEO_INPAINT_MODEL_PATH)
|
||||
subtitleSelectionAreaX = ConfigItem("Main", "SubtitleSelectionAreaX", 0.15)
|
||||
subtitleSelectionAreaY = ConfigItem("Main", "SubtitleSelectionAreaY", 0.88)
|
||||
subtitleSelectionAreaW = ConfigItem("Main", "SubtitleSelectionAreaW", 0.70)
|
||||
subtitleSelectionAreaH = ConfigItem("Main", "SubtitleSelectionAreaH", 0.11)
|
||||
|
||||
# 指定ffmpeg可执行程序路径
|
||||
sys_str = platform.system()
|
||||
if sys_str == "Windows":
|
||||
ffmpeg_bin = os.path.join('win_x64', 'ffmpeg.exe')
|
||||
elif sys_str == "Linux":
|
||||
ffmpeg_bin = os.path.join('linux_x64', 'ffmpeg')
|
||||
else:
|
||||
ffmpeg_bin = os.path.join('macos', 'ffmpeg')
|
||||
FFMPEG_PATH = os.path.join(BASE_DIR, '', 'ffmpeg', ffmpeg_bin)
|
||||
|
||||
if 'ffmpeg.exe' not in os.listdir(os.path.join(BASE_DIR, '', 'ffmpeg', 'win_x64')):
|
||||
fs = Filesplit()
|
||||
fs.merge(input_dir=os.path.join(BASE_DIR, '', 'ffmpeg', 'win_x64'))
|
||||
# 将ffmpeg添加可执行权限
|
||||
os.chmod(FFMPEG_PATH, stat.S_IRWXU + stat.S_IRWXG + stat.S_IRWXO)
|
||||
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
|
||||
|
||||
# 是否使用ONNX(DirectML/AMD/Intel)
|
||||
ONNX_PROVIDERS = []
|
||||
available_providers = ort.get_available_providers()
|
||||
for provider in available_providers:
|
||||
if provider in [
|
||||
"CPUExecutionProvider"
|
||||
]:
|
||||
continue
|
||||
if provider not in [
|
||||
"DmlExecutionProvider", # DirectML,适用于 Windows GPU
|
||||
"ROCMExecutionProvider", # AMD ROCm
|
||||
"MIGraphXExecutionProvider", # AMD MIGraphX
|
||||
"VitisAIExecutionProvider", # AMD VitisAI,适用于 RyzenAI & Windows, 实测和DirectML性能似乎差不多
|
||||
"OpenVINOExecutionProvider", # Intel GPU
|
||||
"MetalExecutionProvider", # Apple macOS
|
||||
"CoreMLExecutionProvider", # Apple macOS
|
||||
"CUDAExecutionProvider", # Nvidia GPU
|
||||
]:
|
||||
continue
|
||||
ONNX_PROVIDERS.append(provider)
|
||||
# ×××××××××××××××××××× [不要改] end ××××××××××××××××××××
|
||||
|
||||
|
||||
@unique
|
||||
class InpaintMode(Enum):
|
||||
"""
|
||||
图像重绘算法枚举
|
||||
MODE可选算法类型
|
||||
- InpaintMode.STTN_AUTO 智能擦除版
|
||||
- InpaintMode.STTN_DET 带字幕检测版, 无智能擦除
|
||||
- InpaintMode.LAMA 算法:对于动画类视频效果好,速度一般,不可以跳过字幕检测
|
||||
- InpaintMode.PROPAINTER 算法: 需要消耗大量显存,速度较慢,对运动非常剧烈的视频效果较好
|
||||
"""
|
||||
STTN = 'sttn'
|
||||
LAMA = 'lama'
|
||||
PROPAINTER = 'propainter'
|
||||
# 【设置inpaint算法】
|
||||
inpaintMode = OptionsConfigItem("Main", "InpaintMode", InpaintMode.STTN_AUTO, OptionsValidator(InpaintMode), EnumSerializer(InpaintMode))
|
||||
|
||||
subtitleDetectMode = OptionsConfigItem("Main", "SubtitleDetectMode", SubtitleDetectMode.Accurate, OptionsValidator(SubtitleDetectMode), EnumSerializer(SubtitleDetectMode))
|
||||
|
||||
# 【设置像素点偏差】
|
||||
# 用于判断是不是非字幕区域(一般认为字幕文本框的长度是要大于宽度的,如果字幕框的高大于宽,且大于的幅度超过指定像素点大小,则认为是错误检测)
|
||||
subtitleYXAxisDifferencePixel = RangeConfigItem("Main", "SubtitleYXAxisDifferencePixel", 10, RangeValidator(0, 300))
|
||||
# 用于放大mask大小,防止自动检测的文本框过小,inpaint阶段出现文字边,有残留
|
||||
subtitleAreaDeviationPixel = RangeConfigItem("Main", "SubtitleAreaDeviationPixel", 10, RangeValidator(1, 300))
|
||||
# 同于判断两个文本框是否为同一行字幕,高度差距指定像素点以内认为是同一行
|
||||
subtitleAreaYAxisDifferencePixel = RangeConfigItem("Main", "SubtitleAreaYAxisDifferencePixel", 20, RangeValidator(0, 300))
|
||||
# 用于判断两个字幕文本的矩形框是否相似,如果X轴和Y轴偏差都在指定阈值内,则认为时同一个文本框
|
||||
subtitleAreaPixelToleranceYPixel = RangeConfigItem("Main", "SubtitleAreaPixelToleranceYPixel", 20, RangeValidator(0, 300))
|
||||
subtitleAreaPixelToleranceXPixel = RangeConfigItem("Main", "SubtitleAreaPixelToleranceXPixel", 20, RangeValidator(0, 300))
|
||||
subtitleTimelineBackwardFrameCount = RangeConfigItem("Main", "SubtitleTimelineBackwardFrameCount", 3, RangeValidator(0, 300))
|
||||
subtitleTimelineForwardFrameCount = RangeConfigItem("Main", "subtitleTimelineForwardFrameCount", 3, RangeValidator(0, 300))
|
||||
# 以下参数仅适用STTN算法时,才生效
|
||||
"""
|
||||
1. STTN_SKIP_DETECTION
|
||||
含义:是否使用跳过检测
|
||||
效果:设置为True跳过字幕检测,会省去很大时间,但是可能误伤无字幕的视频帧或者会导致去除的字幕漏了
|
||||
|
||||
# ×××××××××××××××××××× [可以改] start ××××××××××××××××××××
|
||||
# 是否使用h264编码,如果需要安卓手机分享生成的视频,请打开该选项
|
||||
USE_H264 = True
|
||||
2. STTN_NEIGHBOR_STRIDE
|
||||
含义:相邻帧数步长, 如果需要为第50帧填充缺失的区域,STTN_NEIGHBOR_STRIDE=5,那么算法会使用第45帧、第40帧等作为参照。
|
||||
效果:用于控制参考帧选择的密度,较大的步长意味着使用更少、更分散的参考帧,较小的步长意味着使用更多、更集中的参考帧。
|
||||
|
||||
# ×××××××××× 通用设置 start ××××××××××
|
||||
"""
|
||||
MODE可选算法类型
|
||||
- InpaintMode.STTN 算法:对于真人视频效果较好,速度快,可以跳过字幕检测
|
||||
- InpaintMode.LAMA 算法:对于动画类视频效果好,速度一般,不可以跳过字幕检测
|
||||
- InpaintMode.PROPAINTER 算法: 需要消耗大量显存,速度较慢,对运动非常剧烈的视频效果较好
|
||||
"""
|
||||
# 【设置inpaint算法】
|
||||
MODE = InpaintMode.STTN
|
||||
# 【设置像素点偏差】
|
||||
# 用于判断是不是非字幕区域(一般认为字幕文本框的长度是要大于宽度的,如果字幕框的高大于宽,且大于的幅度超过指定像素点大小,则认为是错误检测)
|
||||
THRESHOLD_HEIGHT_WIDTH_DIFFERENCE = 10
|
||||
# 用于放大mask大小,防止自动检测的文本框过小,inpaint阶段出现文字边,有残留
|
||||
SUBTITLE_AREA_DEVIATION_PIXEL = 20
|
||||
# 同于判断两个文本框是否为同一行字幕,高度差距指定像素点以内认为是同一行
|
||||
THRESHOLD_HEIGHT_DIFFERENCE = 20
|
||||
# 用于判断两个字幕文本的矩形框是否相似,如果X轴和Y轴偏差都在指定阈值内,则认为时同一个文本框
|
||||
PIXEL_TOLERANCE_Y = 20 # 允许检测框纵向偏差的像素点数
|
||||
PIXEL_TOLERANCE_X = 20 # 允许检测框横向偏差的像素点数
|
||||
# ×××××××××× 通用设置 end ××××××××××
|
||||
3. STTN_REFERENCE_LENGTH
|
||||
含义:参数帧数量,STTN算法会查看每个待修复帧的前后若干帧来获得用于修复的上下文信息
|
||||
效果:调大会增加显存占用,处理效果变好,但是处理速度变慢
|
||||
|
||||
# ×××××××××× InpaintMode.STTN算法设置 start ××××××××××
|
||||
# 以下参数仅适用STTN算法时,才生效
|
||||
"""
|
||||
1. STTN_SKIP_DETECTION
|
||||
含义:是否使用跳过检测
|
||||
效果:设置为True跳过字幕检测,会省去很大时间,但是可能误伤无字幕的视频帧或者会导致去除的字幕漏了
|
||||
4. STTN_MAX_LOAD_NUM
|
||||
含义:STTN算法每次最多加载的视频帧数量
|
||||
效果:设置越大速度越慢,但效果越好
|
||||
注意:要保证STTN_MAX_LOAD_NUM大于STTN_NEIGHBOR_STRIDE和STTN_REFERENCE_LENGTH
|
||||
"""
|
||||
# 参考帧步长
|
||||
sttnNeighborStride = RangeConfigItem("Sttn", "NeighborStride", 5, RangeValidator(1, 100))
|
||||
# 参考帧数量
|
||||
sttnReferenceLength = RangeConfigItem("Sttn", "ReferenceLength", 10, RangeValidator(1, 100))
|
||||
# 设置STTN算法最大同时处理的帧数量
|
||||
sttnMaxLoadNum = RangeConfigItem("Sttn", "MaxLoadNum", 50, RangeValidator(1, 300))
|
||||
getSttnMaxLoadNum = lambda self: max(self.sttnMaxLoadNum.value, self.sttnNeighborStride.value * self.sttnReferenceLength.value)
|
||||
|
||||
# 以下参数仅适用PROPAINTER算法时,才生效
|
||||
# 【根据自己的GPU显存大小设置】最大同时处理的图片数量,设置越大处理效果越好,但是要求显存越高
|
||||
# 1280x720p视频设置80需要25G显存,设置50需要19G显存
|
||||
# 720x480p视频设置80需要8G显存,设置50需要7G显存
|
||||
propainterMaxLoadNum = RangeConfigItem("ProPainter", "MaxLoadNum", 70, RangeValidator(1, 300))
|
||||
|
||||
2. STTN_NEIGHBOR_STRIDE
|
||||
含义:相邻帧数步长, 如果需要为第50帧填充缺失的区域,STTN_NEIGHBOR_STRIDE=5,那么算法会使用第45帧、第40帧等作为参照。
|
||||
效果:用于控制参考帧选择的密度,较大的步长意味着使用更少、更分散的参考帧,较小的步长意味着使用更多、更集中的参考帧。
|
||||
# 是否使用硬件加速
|
||||
hardwareAcceleration = ConfigItem("Main", "HardwareAcceleration", HARDWARD_ACCELERATION_OPTION, BoolValidator())
|
||||
|
||||
# 启动时检查应用更新
|
||||
checkUpdateOnStartup = ConfigItem("Main", "CheckUpdateOnStartup", True, BoolValidator())
|
||||
|
||||
3. STTN_REFERENCE_LENGTH
|
||||
含义:参数帧数量,STTN算法会查看每个待修复帧的前后若干帧来获得用于修复的上下文信息
|
||||
效果:调大会增加显存占用,处理效果变好,但是处理速度变慢
|
||||
CONFIG_FILE = 'config/config.json'
|
||||
config = Config()
|
||||
qconfig.load(CONFIG_FILE, config)
|
||||
|
||||
4. STTN_MAX_LOAD_NUM
|
||||
含义:STTN算法每次最多加载的视频帧数量
|
||||
效果:设置越大速度越慢,但效果越好
|
||||
注意:要保证STTN_MAX_LOAD_NUM大于STTN_NEIGHBOR_STRIDE和STTN_REFERENCE_LENGTH
|
||||
"""
|
||||
STTN_SKIP_DETECTION = True
|
||||
# 参考帧步长
|
||||
STTN_NEIGHBOR_STRIDE = 5
|
||||
# 参考帧长度(数量)
|
||||
STTN_REFERENCE_LENGTH = 10
|
||||
# 设置STTN算法最大同时处理的帧数量
|
||||
STTN_MAX_LOAD_NUM = 50
|
||||
if STTN_MAX_LOAD_NUM < STTN_REFERENCE_LENGTH * STTN_NEIGHBOR_STRIDE:
|
||||
STTN_MAX_LOAD_NUM = STTN_REFERENCE_LENGTH * STTN_NEIGHBOR_STRIDE
|
||||
# ×××××××××× InpaintMode.STTN算法设置 end ××××××××××
|
||||
# 读取界面语言配置
|
||||
tr = configparser.ConfigParser()
|
||||
|
||||
# ×××××××××× InpaintMode.PROPAINTER算法设置 start ××××××××××
|
||||
# 【根据自己的GPU显存大小设置】最大同时处理的图片数量,设置越大处理效果越好,但是要求显存越高
|
||||
# 1280x720p视频设置80需要25G显存,设置50需要19G显存
|
||||
# 720x480p视频设置80需要8G显存,设置50需要7G显存
|
||||
PROPAINTER_MAX_LOAD_NUM = 70
|
||||
# ×××××××××× InpaintMode.PROPAINTER算法设置 end ××××××××××
|
||||
TRANSLATION_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'interface', f"{config.interface.value}.ini")
|
||||
tr.read(TRANSLATION_FILE, encoding='utf-8')
|
||||
|
||||
# ×××××××××× InpaintMode.LAMA算法设置 start ××××××××××
|
||||
# 是否开启极速模式,开启后不保证inpaint效果,仅仅对包含文本的区域文本进行去除
|
||||
LAMA_SUPER_FAST = False
|
||||
# ×××××××××× InpaintMode.LAMA算法设置 end ××××××××××
|
||||
# ×××××××××××××××××××× [可以改] end ××××××××××××××××××××
|
||||
# 项目的base目录
|
||||
BASE_DIR = str(Path(os.path.abspath(__file__)).parent)
|
||||
|
||||
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
|
||||
Reference in New Issue
Block a user