diff --git a/.gitignore b/.gitignore index fd3441c..d065daf 100644 --- a/.gitignore +++ b/.gitignore @@ -369,7 +369,6 @@ test_*.mp4 test*_no_sub*.mp4 /test/coods/ /local_test/ -/backend/models/propainter/ProPainter.pth /backend/models/big-lama/big-lama.pt /test/debug/ /backend/tools/train/release_model/ diff --git a/README.md b/README.md index 96e8d65..07d1a46 100755 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ options: Output video file path (optional) --subtitle-area-coords YMIN YMAX XMIN XMAX, -c YMIN YMAX XMIN XMAX Subtitle area coordinates (ymin ymax xmin xmax). Can be specified multiple times for multiple areas. - --inpaint-mode {sttn-auto,sttn-det,lama,propainter,opencv} + --inpaint-mode {sttn-auto,sttn-det,lama,opencv} Inpaint mode, default is sttn-auto ``` ## 演示 @@ -235,7 +235,6 @@ STTN_SKIP_DETECTION = True # 跳过字幕检测,跳过后可能会导致要去 > - InpaintMode.STTN 算法:对于真人视频效果较好,速度快,可以跳过字幕检测 > - InpaintMode.LAMA 算法:对于图片效果最好,对动画类视频效果好,速度一般,不可以跳过字幕检测 -> - InpaintMode.PROPAINTER 算法: 需要消耗大量显存,速度较慢,对运动非常剧烈的视频效果较好 - 使用STTN算法 diff --git a/README_en.md b/README_en.md index fb8d1ef..ee5e36e 100755 --- a/README_en.md +++ b/README_en.md @@ -73,7 +73,7 @@ options: Output video file path (optional) --subtitle-area-coords YMIN YMAX XMIN XMAX, -c YMIN YMAX XMIN XMAX Subtitle area coordinates (ymin ymax xmin xmax). Can be specified multiple times for multiple areas. - --inpaint-mode {sttn-auto,sttn-det,lama,propainter,opencv} + --inpaint-mode {sttn-auto,sttn-det,lama,opencv} Inpaint mode, default is sttn-auto ``` ## Demonstration @@ -234,7 +234,6 @@ Modify the values in backend/config.py and try different removal algorithms. Her > - InpaintMode.STTN algorithm: Good for live-action videos and fast in speed, capable of skipping subtitle detection > - InpaintMode.LAMA algorithm: Best for images and effective for animated videos, moderate speed, unable to skip subtitle detection -> - InpaintMode.PROPAINTER algorithm: Consumes a significant amount of VRAM, slower in speed, works better for videos with very intense movement - Using the STTN algorithm diff --git a/backend/config.py b/backend/config.py index 6fe9331..091ab9e 100644 --- a/backend/config.py +++ b/backend/config.py @@ -47,7 +47,6 @@ class Config(QConfig): - InpaintMode.STTN_AUTO 智能擦除版 - InpaintMode.STTN_DET 带字幕检测版, 无智能擦除 - InpaintMode.LAMA 算法:对于动画类视频效果好,速度一般,不可以跳过字幕检测 - - InpaintMode.PROPAINTER 算法: 需要消耗大量显存,速度较慢,对运动非常剧烈的视频效果较好 """ # 【设置inpaint算法】 inpaintMode = OptionsConfigItem("Main", "InpaintMode", InpaintMode.STTN_AUTO, OptionsValidator(InpaintMode), EnumSerializer(InpaintMode)) @@ -92,12 +91,6 @@ class Config(QConfig): # 设置STTN算法最大同时处理的帧数量 sttnMaxLoadNum = RangeConfigItem("Sttn", "MaxLoadNum", 50, RangeValidator(1, 300)) getSttnMaxLoadNum = lambda self: max(self.sttnMaxLoadNum.value, self.sttnNeighborStride.value * self.sttnReferenceLength.value) - - # 以下参数仅适用PROPAINTER算法时,才生效 - # 【根据自己的GPU显存大小设置】最大同时处理的图片数量,设置越大处理效果越好,但是要求显存越高 - # 1280x720p视频设置80需要25G显存,设置50需要19G显存 - # 720x480p视频设置80需要8G显存,设置50需要7G显存 - propainterMaxLoadNum = RangeConfigItem("ProPainter", "MaxLoadNum", 70, RangeValidator(1, 300)) # 是否使用硬件加速 hardwareAcceleration = ConfigItem("Main", "HardwareAcceleration", HARDWARD_ACCELERATION_OPTION, BoolValidator()) diff --git a/backend/inpaint/propainter_inpaint.py b/backend/inpaint/propainter_inpaint.py deleted file mode 100644 index 710941d..0000000 --- a/backend/inpaint/propainter_inpaint.py +++ /dev/null @@ -1,447 +0,0 @@ -# -*- coding: utf-8 -*- -import os -import gc -import cv2 -import numpy as np -import scipy.ndimage -from PIL import Image -from typing import List - -import torch -import torchvision - -from backend import config -from backend.inpaint.video.model.modules.flow_comp_raft import RAFT_bi -from backend.inpaint.video.model.recurrent_flow_completion import RecurrentFlowCompleteNet -from backend.inpaint.video.model.propainter import InpaintGenerator -from backend.inpaint.video.core.utils import to_tensors -from backend.inpaint.video.model.misc import get_device -from backend.tools.inpaint_tools import get_inpaint_area_by_mask - -import warnings - -warnings.filterwarnings("ignore") - -def binary_mask(mask, th=0.1): - mask[mask > th] = 1 - mask[mask <= th] = 0 - return mask - - -# read frame-wise masks -def read_mask(mpath, length, size, flow_mask_dilates=8, mask_dilates=5): - masks_img = [] - masks_dilated = [] - flow_masks = [] - # 如果传入的直接为numpy array - if isinstance(mpath, np.ndarray): - if mpath.ndim == 3 and mpath.shape[2] == 1: - mpath = mpath.squeeze(2) # 从 (H,W,1) 转为 (H,W) - elif mpath.ndim == 3 and mpath.shape[2] == 3: - # 如果是彩色图像,转为灰度 - mpath = cv2.cvtColor(mpath, cv2.COLOR_BGR2GRAY) - masks_img = [Image.fromarray(mpath)] - # input single img path - else: - if isinstance(mpath, str): - if mpath.endswith(('jpg', 'jpeg', 'png', 'JPG', 'JPEG', 'PNG')): - masks_img = [Image.open(mpath)] - else: - mnames = sorted(os.listdir(mpath)) - for mp in mnames: - masks_img.append(Image.open(os.path.join(mpath, mp))) - - for mask_img in masks_img: - mask_img = np.array(mask_img.convert('L')) - - # Dilate 8 pixel so that all known pixel is trustworthy - if flow_mask_dilates > 0: - flow_mask_img = scipy.ndimage.binary_dilation(mask_img, iterations=flow_mask_dilates).astype(np.uint8) - else: - flow_mask_img = binary_mask(mask_img).astype(np.uint8) - # Close the small holes inside the foreground objects - # flow_mask_img = cv2.morphologyEx(flow_mask_img, cv2.MORPH_CLOSE, np.ones((21, 21),np.uint8)).astype(bool) - # flow_mask_img = scipy.ndimage.binary_fill_holes(flow_mask_img).astype(np.uint8) - flow_masks.append(Image.fromarray(flow_mask_img * 255)) - - if mask_dilates > 0: - mask_img = scipy.ndimage.binary_dilation(mask_img, iterations=mask_dilates).astype(np.uint8) - else: - mask_img = binary_mask(mask_img).astype(np.uint8) - masks_dilated.append(Image.fromarray(mask_img * 255)) - - if len(masks_img) == 1: - flow_masks = flow_masks * length - masks_dilated = masks_dilated * length - - return flow_masks, masks_dilated - - -def extrapolation(video_ori, scale): - """Prepares the data for video outpainting. - """ - nFrame = len(video_ori) - imgW, imgH = video_ori[0].size - - # Defines new FOV. - imgH_extr = int(scale[0] * imgH) - imgW_extr = int(scale[1] * imgW) - imgH_extr = imgH_extr - imgH_extr % 8 - imgW_extr = imgW_extr - imgW_extr % 8 - H_start = int((imgH_extr - imgH) / 2) - W_start = int((imgW_extr - imgW) / 2) - - # Extrapolates the FOV for video. - frames = [] - for v in video_ori: - frame = np.zeros((imgH_extr, imgW_extr, 3), dtype=np.uint8) - frame[H_start: H_start + imgH, W_start: W_start + imgW, :] = v - frames.append(Image.fromarray(frame)) - - # Generates the mask for missing region. - masks_dilated = [] - flow_masks = [] - - dilate_h = 4 if H_start > 10 else 0 - dilate_w = 4 if W_start > 10 else 0 - mask = np.ones(((imgH_extr, imgW_extr)), dtype=np.uint8) - - mask[H_start + dilate_h: H_start + imgH - dilate_h, - W_start + dilate_w: W_start + imgW - dilate_w] = 0 - flow_masks.append(Image.fromarray(mask * 255)) - - mask[H_start: H_start + imgH, W_start: W_start + imgW] = 0 - masks_dilated.append(Image.fromarray(mask * 255)) - - flow_masks = flow_masks * nFrame - masks_dilated = masks_dilated * nFrame - - return frames, flow_masks, masks_dilated, (imgW_extr, imgH_extr) - - -def get_ref_index(mid_neighbor_id, neighbor_ids, length, ref_stride=10, ref_num=-1): - ref_index = [] - if ref_num == -1: - for i in range(0, length, ref_stride): - if i not in neighbor_ids: - ref_index.append(i) - else: - start_idx = max(0, mid_neighbor_id - ref_stride * (ref_num // 2)) - end_idx = min(length, mid_neighbor_id + ref_stride * (ref_num // 2)) - for i in range(start_idx, end_idx, ref_stride): - if i not in neighbor_ids: - if len(ref_index) > ref_num: - break - ref_index.append(i) - return ref_index - - -class PropainterInpaint: - def __init__(self, device, model_dir, sub_video_length=80, use_fp16=True): - self.device = device - self.model_dir = model_dir - self.use_fp16 = use_fp16 - self.use_half = True if self.use_fp16 else False - if self.device == torch.device('cpu'): - self.use_half = False - # Length of sub-video for long video inference. - self.sub_video_length = sub_video_length - # Length of local neighboring frames.' - self.neighbor_length = 10 - # Mask dilation for video and flow masking - self.mask_dilation = 4 - # Stride of global reference frames - self.ref_stride = 10 - # Iterations for RAFT inference - self.raft_iter = 20 - # Stride of global reference frames - self.ref_stride = 10 - # 设置raft模型 - self.fix_raft = self.init_raft_model() - # 设置fix_flow模型 - self.fix_flow_complete = self.init_fix_flow_model() - # 设置inpaint模型 - self.model = self.init_inpaint_model() - - def init_raft_model(self): - # set up RAFT and flow competition model - return RAFT_bi(os.path.join(self.model_dir, 'raft-things.pth'), self.device) - - def init_fix_flow_model(self): - fix_flow_complete_model = RecurrentFlowCompleteNet( - os.path.join(self.model_dir, 'recurrent_flow_completion.pth')) - for p in fix_flow_complete_model.parameters(): - p.requires_grad = False - - if self.use_half: - fix_flow_complete_model = fix_flow_complete_model.half() - fix_flow_complete_model.to(self.device) - fix_flow_complete_model.eval() - return fix_flow_complete_model - - def init_inpaint_model(self): - # set up ProPainter model - model = InpaintGenerator(model_path=os.path.join(self.model_dir, 'ProPainter.pth')) - if self.use_half: - model = model.half() - model = model.to(self.device).eval() - return model - - def inpaint(self, frames, mask): - if isinstance(frames[0], np.ndarray): - frames = [Image.fromarray(cv2.cvtColor(f, cv2.COLOR_BGR2RGB)) for f in frames] - size = frames[0].size - frames_len = len(frames) - flow_masks, masks_dilated = read_mask(mask, frames_len, size, - flow_mask_dilates=self.mask_dilation, - mask_dilates=self.mask_dilation) - w, h = size - # for saving the masked frames or video - masked_frame_for_save = [] - for i in range(len(frames)): - mask_ = np.expand_dims(np.array(masks_dilated[i]), 2).repeat(3, axis=2) / 255. - img = np.array(frames[i]) - green = np.zeros([h, w, 3]) - green[:, :, 1] = 255 - alpha = 0.6 - # alpha = 1.0 - fuse_img = (1 - alpha) * img + alpha * green - fuse_img = mask_ * fuse_img + (1 - mask_) * img - masked_frame_for_save.append(fuse_img.astype(np.uint8)) - - frames_inp = [np.array(f).astype(np.uint8) for f in frames] - frames = to_tensors()(frames).unsqueeze(0) * 2 - 1 - flow_masks = to_tensors()(flow_masks).unsqueeze(0) - masks_dilated = to_tensors()(masks_dilated).unsqueeze(0) - frames, flow_masks, masks_dilated = frames.to(self.device), flow_masks.to(self.device), masks_dilated.to( - self.device) - video_length = frames.size(1) - with torch.no_grad(): - # ---- compute flow ---- - if frames.size(-1) <= 640: - short_clip_len = 12 - elif frames.size(-1) <= 720: - short_clip_len = 8 - elif frames.size(-1) <= 1280: - short_clip_len = 4 - else: - short_clip_len = 2 - - # use fp32 for RAFT - if frames.size(1) > short_clip_len: - gt_flows_f_list, gt_flows_b_list = [], [] - for f in range(0, video_length, short_clip_len): - end_f = min(video_length, f + short_clip_len) - if f == 0: - flows_f, flows_b = self.fix_raft(frames[:, f:end_f], iters=self.raft_iter) - else: - flows_f, flows_b = self.fix_raft(frames[:, f - 1:end_f], iters=self.raft_iter) - gt_flows_f_list.append(flows_f) - gt_flows_b_list.append(flows_b) - torch.cuda.empty_cache() - gt_flows_f = torch.cat(gt_flows_f_list, dim=1) - gt_flows_b = torch.cat(gt_flows_b_list, dim=1) - gt_flows_bi = (gt_flows_f, gt_flows_b) - else: - gt_flows_bi = self.fix_raft(frames, iters=self.raft_iter) - torch.cuda.empty_cache() - - if self.use_half: - frames, flow_masks, masks_dilated = frames.half(), flow_masks.half(), masks_dilated.half() - gt_flows_bi = (gt_flows_bi[0].half(), gt_flows_bi[1].half()) - - # ---- complete flow ---- - flow_length = gt_flows_bi[0].size(1) - if flow_length > self.sub_video_length: - pred_flows_f, pred_flows_b = [], [] - pad_len = 5 - for f in range(0, flow_length, self.sub_video_length): - s_f = max(0, f - pad_len) - e_f = min(flow_length, f + self.sub_video_length + pad_len) - pad_len_s = max(0, f) - s_f - pad_len_e = e_f - min(flow_length, f + self.sub_video_length) - pred_flows_bi_sub, _ = self.fix_flow_complete.forward_bidirect_flow( - (gt_flows_bi[0][:, s_f:e_f], gt_flows_bi[1][:, s_f:e_f]), - flow_masks[:, s_f:e_f + 1]) - pred_flows_bi_sub = self.fix_flow_complete.combine_flow( - (gt_flows_bi[0][:, s_f:e_f], gt_flows_bi[1][:, s_f:e_f]), - pred_flows_bi_sub, - flow_masks[:, s_f:e_f + 1]) - - pred_flows_f.append(pred_flows_bi_sub[0][:, pad_len_s:e_f - s_f - pad_len_e]) - pred_flows_b.append(pred_flows_bi_sub[1][:, pad_len_s:e_f - s_f - pad_len_e]) - torch.cuda.empty_cache() - - pred_flows_f = torch.cat(pred_flows_f, dim=1) - pred_flows_b = torch.cat(pred_flows_b, dim=1) - pred_flows_bi = (pred_flows_f, pred_flows_b) - else: - pred_flows_bi, _ = self.fix_flow_complete.forward_bidirect_flow(gt_flows_bi, flow_masks) - pred_flows_bi = self.fix_flow_complete.combine_flow(gt_flows_bi, pred_flows_bi, flow_masks) - torch.cuda.empty_cache() - - # ---- image propagation ---- - masked_frames = frames * (1 - masks_dilated) - # ensure a minimum of 100 frames for image propagation - subvideo_length_img_prop = min(100, self.sub_video_length) - if video_length > subvideo_length_img_prop: - updated_frames, updated_masks = [], [] - pad_len = 10 - for f in range(0, video_length, subvideo_length_img_prop): - s_f = max(0, f - pad_len) - e_f = min(video_length, f + subvideo_length_img_prop + pad_len) - pad_len_s = max(0, f) - s_f - pad_len_e = e_f - min(video_length, f + subvideo_length_img_prop) - - b, t, _, _, _ = masks_dilated[:, s_f:e_f].size() - pred_flows_bi_sub = (pred_flows_bi[0][:, s_f:e_f - 1], pred_flows_bi[1][:, s_f:e_f - 1]) - prop_imgs_sub, updated_local_masks_sub = self.model.img_propagation(masked_frames[:, s_f:e_f], - pred_flows_bi_sub, - masks_dilated[:, s_f:e_f], - 'nearest') - updated_frames_sub = frames[:, s_f:e_f] * (1 - masks_dilated[:, s_f:e_f]) + prop_imgs_sub.view(b, t, 3, h, w) * masks_dilated[:, s_f:e_f] - updated_masks_sub = updated_local_masks_sub.view(b, t, 1, h, w) - updated_frames.append(updated_frames_sub[:, pad_len_s:e_f - s_f - pad_len_e]) - updated_masks.append(updated_masks_sub[:, pad_len_s:e_f - s_f - pad_len_e]) - torch.cuda.empty_cache() - - updated_frames = torch.cat(updated_frames, dim=1) - updated_masks = torch.cat(updated_masks, dim=1) - else: - b, t, _, _, _ = masks_dilated.size() - prop_imgs, updated_local_masks = self.model.img_propagation(masked_frames, pred_flows_bi, masks_dilated, - 'nearest') - updated_frames = frames * (1 - masks_dilated) + prop_imgs.view(b, t, 3, h, w) * masks_dilated - updated_masks = updated_local_masks.view(b, t, 1, h, w) - torch.cuda.empty_cache() - - ori_frames = frames_inp - comp_frames = [None] * video_length - - neighbor_stride = self.neighbor_length // 2 - if video_length > self.sub_video_length: - ref_num = self.sub_video_length // self.ref_stride - else: - ref_num = -1 - - # ---- feature propagation + transformer ---- - for f in range(0, video_length, neighbor_stride): - neighbor_ids = [ - i for i in range(max(0, f - neighbor_stride), - min(video_length, f + neighbor_stride + 1)) - ] - ref_ids = get_ref_index(f, neighbor_ids, video_length, self.ref_stride, ref_num) - selected_imgs = updated_frames[:, neighbor_ids + ref_ids, :, :, :] - selected_masks = masks_dilated[:, neighbor_ids + ref_ids, :, :, :] - selected_update_masks = updated_masks[:, neighbor_ids + ref_ids, :, :, :] - selected_pred_flows_bi = ( - pred_flows_bi[0][:, neighbor_ids[:-1], :, :, :], pred_flows_bi[1][:, neighbor_ids[:-1], :, :, :]) - - with torch.no_grad(): - # 1.0 indicates mask - l_t = len(neighbor_ids) - pred_img = self.model(selected_imgs, selected_pred_flows_bi, selected_masks, selected_update_masks, l_t) - pred_img = pred_img.view(-1, 3, h, w) - pred_img = (pred_img + 1) / 2 - pred_img = pred_img.cpu().permute(0, 2, 3, 1).numpy() * 255 - binary_masks = masks_dilated[0, neighbor_ids, :, :, :].cpu().permute( - 0, 2, 3, 1).numpy().astype(np.uint8) - for i in range(len(neighbor_ids)): - idx = neighbor_ids[i] - img = np.array(pred_img[i]).astype(np.uint8) * binary_masks[i] \ - + ori_frames[idx] * (1 - binary_masks[i]) - if comp_frames[idx] is None: - comp_frames[idx] = img - else: - comp_frames[idx] = comp_frames[idx].astype(np.float32) * 0.5 + img.astype(np.float32) * 0.5 - comp_frames[idx] = comp_frames[idx].astype(np.uint8) - torch.cuda.empty_cache() - # save videos frame - comp_frames = [cv2.cvtColor(i, cv2.COLOR_RGB2BGR) for i in comp_frames] - return comp_frames - - def __call__(self, input_frames: List[np.ndarray], input_mask: np.ndarray): - """ - :param input_frames: 原视频帧 - :param input_mask: 字幕区域mask - """ - mask = input_mask[:, :, None] - H_ori, W_ori = mask.shape[:2] - H_ori = int(H_ori + 0.5) - W_ori = int(W_ori + 0.5) - # 确定去字幕的垂直高度部分 - split_h = int(W_ori * 3 / 16) - inpaint_area = get_inpaint_area_by_mask(W_ori, H_ori, split_h, mask, multiple=8) - # 初始化帧存储变量 - # 高分辨率帧存储列表 - frames_hr = [f.copy() for f in input_frames] - frames_scaled = {} # 存放缩放后帧的字典 - masks_scaled = {} # 存放缩放后遮罩的字典 - comps = {} # 存放补全后帧的字典 - # 存储最终的视频帧 - inpainted_frames = [] - for k in range(len(inpaint_area)): - frames_scaled[k] = [] # 为每个去除部分初始化一个列表 - masks_scaled[k] = [] # 为每个去除部分初始化一个列表 - - # 读取并缩放帧 - for j in range(len(frames_hr)): - image = frames_hr[j] - # 对每个去除部分进行切割和缩放 - for k in range(len(inpaint_area)): - image_crop = image[inpaint_area[k][0]:inpaint_area[k][1], inpaint_area[k][2]:inpaint_area[k][3], :] # 切割 - mask_crop = mask[inpaint_area[k][0]:inpaint_area[k][1], inpaint_area[k][2]:inpaint_area[k][3], :] # 切割 - frames_scaled[k].append(image_crop) # 将缩放后的帧添加到对应列表 - masks_scaled[k].append(mask_crop) # 将缩放后的遮罩添加到对应列表 - - # 处理每一个去除部分 - for k in range(len(inpaint_area)): - # 调用inpaint函数进行处理 - comps[k] = self.inpaint(frames_scaled[k], masks_scaled[k][0]) - del frames_scaled[k], masks_scaled[k] - gc.collect() - - # 如果存在去除部分 - if inpaint_area: - for j in range(len(frames_hr)): - frame = frames_hr[j] # 取出原始帧 - # 对于模式中的每一个段落 - for k in range(len(inpaint_area)): - comp = comps[k][j] # 获取补全后的帧 - # 实现遮罩区域内的图像融合 - frame[inpaint_area[k][0]:inpaint_area[k][1], inpaint_area[k][2]:inpaint_area[k][3], :] = comp - # 将最终帧添加到列表 - inpainted_frames.append(frame) - # print(f'processing frame, {len(frames_hr) - j} left') - else: - inpainted_frames = frames_hr - return inpainted_frames - - -def read_frames(v_path): - video_cap = cv2.VideoCapture(v_path) - video_frames = [] - while True: - ret, frame = video_cap.read() - if not ret: - break - video_frames.append(frame) - video_frames = [Image.fromarray(f) for f in video_frames] - return video_frames - - -if __name__ == '__main__': - # PropainterInpaint - propainter_inpaint = PropainterInpaint(get_device(), ModelConfig().PROPAINTER_MODEL_DIR, sub_video_length=80) - frames = read_frames('/home/yao/Documents/Project/video-subtitle-remover/local_test/test1.mp4') - mask = cv2.imread('/home/yao/Documents/Project/video-subtitle-remover/local_test/test1_mask.png') - inpainted_frames = propainter_inpaint.inpaint(frames, mask) - save_root = '/home/yao/Documents/Project/video-subtitle-remover/local_test/' - video_out_path = os.path.join(save_root, 'inpaint_out.mp4') - print("size: ", inpainted_frames[0].shape) - video_writer = cv2.VideoWriter(video_out_path, cv2.VideoWriter_fourcc(*'mp4v'), 24, (640, 360)) - for comp_frame in inpainted_frames: - video_writer.write(comp_frame) - video_writer.release() - print(f'\nAll results are saved in {save_root}') - diff --git a/backend/inpaint/video/model/propainter.py b/backend/inpaint/video/model/propainter.py deleted file mode 100644 index a83ed3d..0000000 --- a/backend/inpaint/video/model/propainter.py +++ /dev/null @@ -1,539 +0,0 @@ -''' Towards An End-to-End Framework for Video Inpainting -''' - -import torch -import torch.nn as nn -import torch.nn.functional as F -import torchvision - -from einops import rearrange - -from backend.inpaint.video.model.modules.base_module import BaseNetwork -from backend.inpaint.video.model.modules.sparse_transformer import TemporalSparseTransformerBlock, SoftSplit, SoftComp -from backend.inpaint.video.model.modules.spectral_norm import spectral_norm as _spectral_norm -from backend.inpaint.video.model.modules.flow_loss_utils import flow_warp -from backend.inpaint.video.model.modules.deformconv import ModulatedDeformConv2d - -from .misc import constant_init - - -def length_sq(x): - return torch.sum(torch.square(x), dim=1, keepdim=True) - - -def fbConsistencyCheck(flow_fw, flow_bw, alpha1=0.01, alpha2=0.5): - flow_bw_warped = flow_warp(flow_bw, flow_fw.permute(0, 2, 3, 1)) # wb(wf(x)) - flow_diff_fw = flow_fw + flow_bw_warped # wf + wb(wf(x)) - - mag_sq_fw = length_sq(flow_fw) + length_sq(flow_bw_warped) # |wf| + |wb(wf(x))| - occ_thresh_fw = alpha1 * mag_sq_fw + alpha2 - - # fb_valid_fw = (length_sq(flow_diff_fw) < occ_thresh_fw).float() - fb_valid_fw = (length_sq(flow_diff_fw) < occ_thresh_fw).to(flow_fw) - return fb_valid_fw - - -class DeformableAlignment(ModulatedDeformConv2d): - """Second-order deformable alignment module.""" - - def __init__(self, *args, **kwargs): - # self.max_residue_magnitude = kwargs.pop('max_residue_magnitude', 10) - self.max_residue_magnitude = kwargs.pop('max_residue_magnitude', 3) - - super(DeformableAlignment, self).__init__(*args, **kwargs) - - self.conv_offset = nn.Sequential( - nn.Conv2d(2 * self.out_channels + 2 + 1 + 2, self.out_channels, 3, 1, 1), - nn.LeakyReLU(negative_slope=0.1, inplace=True), - nn.Conv2d(self.out_channels, self.out_channels, 3, 1, 1), - nn.LeakyReLU(negative_slope=0.1, inplace=True), - nn.Conv2d(self.out_channels, self.out_channels, 3, 1, 1), - nn.LeakyReLU(negative_slope=0.1, inplace=True), - nn.Conv2d(self.out_channels, 27 * self.deform_groups, 3, 1, 1), - ) - self.init_offset() - - def init_offset(self): - constant_init(self.conv_offset[-1], val=0, bias=0) - - def forward(self, x, cond_feat, flow): - out = self.conv_offset(cond_feat) - o1, o2, mask = torch.chunk(out, 3, dim=1) - - # offset - offset = self.max_residue_magnitude * torch.tanh(torch.cat((o1, o2), dim=1)) - offset = offset + flow.flip(1).repeat(1, offset.size(1) // 2, 1, 1) - - # mask - mask = torch.sigmoid(mask) - - return torchvision.ops.deform_conv2d(x, offset, self.weight, self.bias, - self.stride, self.padding, - self.dilation, mask) - - -class BidirectionalPropagation(nn.Module): - def __init__(self, channel, learnable=True): - super(BidirectionalPropagation, self).__init__() - self.deform_align = nn.ModuleDict() - self.backbone = nn.ModuleDict() - self.channel = channel - self.prop_list = ['backward_1', 'forward_1'] - self.learnable = learnable - - if self.learnable: - for i, module in enumerate(self.prop_list): - self.deform_align[module] = DeformableAlignment( - channel, channel, 3, padding=1, deform_groups=16) - - self.backbone[module] = nn.Sequential( - nn.Conv2d(2 * channel + 2, channel, 3, 1, 1), - nn.LeakyReLU(negative_slope=0.2, inplace=True), - nn.Conv2d(channel, channel, 3, 1, 1), - ) - - self.fuse = nn.Sequential( - nn.Conv2d(2 * channel + 2, channel, 3, 1, 1), - nn.LeakyReLU(negative_slope=0.2, inplace=True), - nn.Conv2d(channel, channel, 3, 1, 1), - ) - - def binary_mask(self, mask, th=0.1): - mask[mask > th] = 1 - mask[mask <= th] = 0 - # return mask.float() - return mask.to(mask) - - def forward(self, x, flows_forward, flows_backward, mask, interpolation='bilinear'): - """ - x shape : [b, t, c, h, w] - return [b, t, c, h, w] - """ - - # For backward warping - # pred_flows_forward for backward feature propagation - # pred_flows_backward for forward feature propagation - b, t, c, h, w = x.shape - feats, masks = {}, {} - feats['input'] = [x[:, i, :, :, :] for i in range(0, t)] - masks['input'] = [mask[:, i, :, :, :] for i in range(0, t)] - - prop_list = ['backward_1', 'forward_1'] - cache_list = ['input'] + prop_list - - for p_i, module_name in enumerate(prop_list): - feats[module_name] = [] - masks[module_name] = [] - - if 'backward' in module_name: - frame_idx = range(0, t) - frame_idx = frame_idx[::-1] - flow_idx = frame_idx - flows_for_prop = flows_forward - flows_for_check = flows_backward - else: - frame_idx = range(0, t) - flow_idx = range(-1, t - 1) - flows_for_prop = flows_backward - flows_for_check = flows_forward - - for i, idx in enumerate(frame_idx): - feat_current = feats[cache_list[p_i]][idx] - mask_current = masks[cache_list[p_i]][idx] - - if i == 0: - feat_prop = feat_current - mask_prop = mask_current - else: - flow_prop = flows_for_prop[:, flow_idx[i], :, :, :] - flow_check = flows_for_check[:, flow_idx[i], :, :, :] - flow_vaild_mask = fbConsistencyCheck(flow_prop, flow_check) - feat_warped = flow_warp(feat_prop, flow_prop.permute(0, 2, 3, 1), interpolation) - - if self.learnable: - cond = torch.cat([feat_current, feat_warped, flow_prop, flow_vaild_mask, mask_current], dim=1) - feat_prop = self.deform_align[module_name](feat_prop, cond, flow_prop) - mask_prop = mask_current - else: - mask_prop_valid = flow_warp(mask_prop, flow_prop.permute(0, 2, 3, 1)) - mask_prop_valid = self.binary_mask(mask_prop_valid) - - union_vaild_mask = self.binary_mask(mask_current * flow_vaild_mask * (1 - mask_prop_valid)) - feat_prop = union_vaild_mask * feat_warped + (1 - union_vaild_mask) * feat_current - # update mask - mask_prop = self.binary_mask(mask_current * (1 - (flow_vaild_mask * (1 - mask_prop_valid)))) - - # refine - if self.learnable: - feat = torch.cat([feat_current, feat_prop, mask_current], dim=1) - feat_prop = feat_prop + self.backbone[module_name](feat) - # feat_prop = self.backbone[module_name](feat_prop) - - feats[module_name].append(feat_prop) - masks[module_name].append(mask_prop) - - # end for - if 'backward' in module_name: - feats[module_name] = feats[module_name][::-1] - masks[module_name] = masks[module_name][::-1] - - outputs_b = torch.stack(feats['backward_1'], dim=1).view(-1, c, h, w) - outputs_f = torch.stack(feats['forward_1'], dim=1).view(-1, c, h, w) - - if self.learnable: - mask_in = mask.view(-1, 2, h, w) - masks_b, masks_f = None, None - outputs = self.fuse(torch.cat([outputs_b, outputs_f, mask_in], dim=1)) + x.view(-1, c, h, w) - else: - masks_b = torch.stack(masks['backward_1'], dim=1) - masks_f = torch.stack(masks['forward_1'], dim=1) - outputs = outputs_f - - return outputs_b.view(b, -1, c, h, w), outputs_f.view(b, -1, c, h, w), \ - outputs.view(b, -1, c, h, w), masks_f - - -class Encoder(nn.Module): - def __init__(self): - super(Encoder, self).__init__() - self.group = [1, 2, 4, 8, 1] - self.layers = nn.ModuleList([ - nn.Conv2d(5, 64, kernel_size=3, stride=2, padding=1), - nn.LeakyReLU(0.2, inplace=True), - nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1), - nn.LeakyReLU(0.2, inplace=True), - nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1), - nn.LeakyReLU(0.2, inplace=True), - nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1), - nn.LeakyReLU(0.2, inplace=True), - nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1, groups=1), - nn.LeakyReLU(0.2, inplace=True), - nn.Conv2d(640, 512, kernel_size=3, stride=1, padding=1, groups=2), - nn.LeakyReLU(0.2, inplace=True), - nn.Conv2d(768, 384, kernel_size=3, stride=1, padding=1, groups=4), - nn.LeakyReLU(0.2, inplace=True), - nn.Conv2d(640, 256, kernel_size=3, stride=1, padding=1, groups=8), - nn.LeakyReLU(0.2, inplace=True), - nn.Conv2d(512, 128, kernel_size=3, stride=1, padding=1, groups=1), - nn.LeakyReLU(0.2, inplace=True) - ]) - - def forward(self, x): - bt, c, _, _ = x.size() - # h, w = h//4, w//4 - out = x - for i, layer in enumerate(self.layers): - if i == 8: - x0 = out - _, _, h, w = x0.size() - if i > 8 and i % 2 == 0: - g = self.group[(i - 8) // 2] - x = x0.view(bt, g, -1, h, w) - o = out.view(bt, g, -1, h, w) - out = torch.cat([x, o], 2).view(bt, -1, h, w) - out = layer(out) - return out - - -class deconv(nn.Module): - def __init__(self, - input_channel, - output_channel, - kernel_size=3, - padding=0): - super().__init__() - self.conv = nn.Conv2d(input_channel, - output_channel, - kernel_size=kernel_size, - stride=1, - padding=padding) - - def forward(self, x): - x = F.interpolate(x, - scale_factor=2, - mode='bilinear', - align_corners=True) - return self.conv(x) - - -class InpaintGenerator(BaseNetwork): - def __init__(self, init_weights=True, model_path=None): - super(InpaintGenerator, self).__init__() - channel = 128 - hidden = 512 - - # encoder - self.encoder = Encoder() - - # decoder - self.decoder = nn.Sequential( - deconv(channel, 128, kernel_size=3, padding=1), - nn.LeakyReLU(0.2, inplace=True), - nn.Conv2d(128, 64, kernel_size=3, stride=1, padding=1), - nn.LeakyReLU(0.2, inplace=True), - deconv(64, 64, kernel_size=3, padding=1), - nn.LeakyReLU(0.2, inplace=True), - nn.Conv2d(64, 3, kernel_size=3, stride=1, padding=1)) - - # soft split and soft composition - kernel_size = (7, 7) - padding = (3, 3) - stride = (3, 3) - t2t_params = { - 'kernel_size': kernel_size, - 'stride': stride, - 'padding': padding - } - self.ss = SoftSplit(channel, hidden, kernel_size, stride, padding) - self.sc = SoftComp(channel, hidden, kernel_size, stride, padding) - self.max_pool = nn.MaxPool2d(kernel_size, stride, padding) - - # feature propagation module - self.img_prop_module = BidirectionalPropagation(3, learnable=False) - self.feat_prop_module = BidirectionalPropagation(128, learnable=True) - - depths = 8 - num_heads = 4 - window_size = (5, 9) - pool_size = (4, 4) - self.transformers = TemporalSparseTransformerBlock(dim=hidden, - n_head=num_heads, - window_size=window_size, - pool_size=pool_size, - depths=depths, - t2t_params=t2t_params) - if init_weights: - self.init_weights() - - if model_path is not None: - print('Pretrained ProPainter has loaded...') - ckpt = torch.load(model_path, map_location='cpu') - self.load_state_dict(ckpt, strict=True) - - # print network parameter number - self.print_network() - - def img_propagation(self, masked_frames, completed_flows, masks, interpolation='nearest'): - _, _, prop_frames, updated_masks = self.img_prop_module(masked_frames, completed_flows[0], completed_flows[1], - masks, interpolation) - return prop_frames, updated_masks - - def forward(self, masked_frames, completed_flows, masks_in, masks_updated, num_local_frames, - interpolation='bilinear', t_dilation=2): - """ - Args: - masks_in: original mask - masks_updated: updated mask after image propagation - """ - - l_t = num_local_frames - b, t, _, ori_h, ori_w = masked_frames.size() - - # extracting features - enc_feat = self.encoder(torch.cat([masked_frames.view(b * t, 3, ori_h, ori_w), - masks_in.view(b * t, 1, ori_h, ori_w), - masks_updated.view(b * t, 1, ori_h, ori_w)], dim=1)) - _, c, h, w = enc_feat.size() - local_feat = enc_feat.view(b, t, c, h, w)[:, :l_t, ...] - ref_feat = enc_feat.view(b, t, c, h, w)[:, l_t:, ...] - fold_feat_size = (h, w) - - ds_flows_f = F.interpolate(completed_flows[0].view(-1, 2, ori_h, ori_w), scale_factor=1 / 4, mode='bilinear', - align_corners=False).view(b, l_t - 1, 2, h, w) / 4.0 - ds_flows_b = F.interpolate(completed_flows[1].view(-1, 2, ori_h, ori_w), scale_factor=1 / 4, mode='bilinear', - align_corners=False).view(b, l_t - 1, 2, h, w) / 4.0 - ds_mask_in = F.interpolate(masks_in.reshape(-1, 1, ori_h, ori_w), scale_factor=1 / 4, mode='nearest').view(b, t, - 1, h, - w) - ds_mask_in_local = ds_mask_in[:, :l_t] - ds_mask_updated_local = F.interpolate(masks_updated[:, :l_t].reshape(-1, 1, ori_h, ori_w), scale_factor=1 / 4, - mode='nearest').view(b, l_t, 1, h, w) - - if self.training: - mask_pool_l = self.max_pool(ds_mask_in.view(-1, 1, h, w)) - mask_pool_l = mask_pool_l.view(b, t, 1, mask_pool_l.size(-2), mask_pool_l.size(-1)) - else: - mask_pool_l = self.max_pool(ds_mask_in_local.view(-1, 1, h, w)) - mask_pool_l = mask_pool_l.view(b, l_t, 1, mask_pool_l.size(-2), mask_pool_l.size(-1)) - - prop_mask_in = torch.cat([ds_mask_in_local, ds_mask_updated_local], dim=2) - _, _, local_feat, _ = self.feat_prop_module(local_feat, ds_flows_f, ds_flows_b, prop_mask_in, interpolation) - enc_feat = torch.cat((local_feat, ref_feat), dim=1) - - trans_feat = self.ss(enc_feat.view(-1, c, h, w), b, fold_feat_size) - mask_pool_l = rearrange(mask_pool_l, 'b t c h w -> b t h w c').contiguous() - trans_feat = self.transformers(trans_feat, fold_feat_size, mask_pool_l, t_dilation=t_dilation) - trans_feat = self.sc(trans_feat, t, fold_feat_size) - trans_feat = trans_feat.view(b, t, -1, h, w) - - enc_feat = enc_feat + trans_feat - - if self.training: - output = self.decoder(enc_feat.view(-1, c, h, w)) - output = torch.tanh(output).view(b, t, 3, ori_h, ori_w) - else: - output = self.decoder(enc_feat[:, :l_t].view(-1, c, h, w)) - output = torch.tanh(output).view(b, l_t, 3, ori_h, ori_w) - - return output - - -# ###################################################################### -# Discriminator for Temporal Patch GAN -# ###################################################################### -class Discriminator(BaseNetwork): - def __init__(self, - in_channels=3, - use_sigmoid=False, - use_spectral_norm=True, - init_weights=True): - super(Discriminator, self).__init__() - self.use_sigmoid = use_sigmoid - nf = 32 - - self.conv = nn.Sequential( - spectral_norm( - nn.Conv3d(in_channels=in_channels, - out_channels=nf * 1, - kernel_size=(3, 5, 5), - stride=(1, 2, 2), - padding=1, - bias=not use_spectral_norm), use_spectral_norm), - # nn.InstanceNorm2d(64, track_running_stats=False), - nn.LeakyReLU(0.2, inplace=True), - spectral_norm( - nn.Conv3d(nf * 1, - nf * 2, - kernel_size=(3, 5, 5), - stride=(1, 2, 2), - padding=(1, 2, 2), - bias=not use_spectral_norm), use_spectral_norm), - # nn.InstanceNorm2d(128, track_running_stats=False), - nn.LeakyReLU(0.2, inplace=True), - spectral_norm( - nn.Conv3d(nf * 2, - nf * 4, - kernel_size=(3, 5, 5), - stride=(1, 2, 2), - padding=(1, 2, 2), - bias=not use_spectral_norm), use_spectral_norm), - # nn.InstanceNorm2d(256, track_running_stats=False), - nn.LeakyReLU(0.2, inplace=True), - spectral_norm( - nn.Conv3d(nf * 4, - nf * 4, - kernel_size=(3, 5, 5), - stride=(1, 2, 2), - padding=(1, 2, 2), - bias=not use_spectral_norm), use_spectral_norm), - # nn.InstanceNorm2d(256, track_running_stats=False), - nn.LeakyReLU(0.2, inplace=True), - spectral_norm( - nn.Conv3d(nf * 4, - nf * 4, - kernel_size=(3, 5, 5), - stride=(1, 2, 2), - padding=(1, 2, 2), - bias=not use_spectral_norm), use_spectral_norm), - # nn.InstanceNorm2d(256, track_running_stats=False), - nn.LeakyReLU(0.2, inplace=True), - nn.Conv3d(nf * 4, - nf * 4, - kernel_size=(3, 5, 5), - stride=(1, 2, 2), - padding=(1, 2, 2))) - - if init_weights: - self.init_weights() - - def forward(self, xs): - # T, C, H, W = xs.shape (old) - # B, T, C, H, W (new) - xs_t = torch.transpose(xs, 1, 2) - feat = self.conv(xs_t) - if self.use_sigmoid: - feat = torch.sigmoid(feat) - out = torch.transpose(feat, 1, 2) # B, T, C, H, W - return out - - -class Discriminator_2D(BaseNetwork): - def __init__(self, - in_channels=3, - use_sigmoid=False, - use_spectral_norm=True, - init_weights=True): - super(Discriminator_2D, self).__init__() - self.use_sigmoid = use_sigmoid - nf = 32 - - self.conv = nn.Sequential( - spectral_norm( - nn.Conv3d(in_channels=in_channels, - out_channels=nf * 1, - kernel_size=(1, 5, 5), - stride=(1, 2, 2), - padding=(0, 2, 2), - bias=not use_spectral_norm), use_spectral_norm), - # nn.InstanceNorm2d(64, track_running_stats=False), - nn.LeakyReLU(0.2, inplace=True), - spectral_norm( - nn.Conv3d(nf * 1, - nf * 2, - kernel_size=(1, 5, 5), - stride=(1, 2, 2), - padding=(0, 2, 2), - bias=not use_spectral_norm), use_spectral_norm), - # nn.InstanceNorm2d(128, track_running_stats=False), - nn.LeakyReLU(0.2, inplace=True), - spectral_norm( - nn.Conv3d(nf * 2, - nf * 4, - kernel_size=(1, 5, 5), - stride=(1, 2, 2), - padding=(0, 2, 2), - bias=not use_spectral_norm), use_spectral_norm), - # nn.InstanceNorm2d(256, track_running_stats=False), - nn.LeakyReLU(0.2, inplace=True), - spectral_norm( - nn.Conv3d(nf * 4, - nf * 4, - kernel_size=(1, 5, 5), - stride=(1, 2, 2), - padding=(0, 2, 2), - bias=not use_spectral_norm), use_spectral_norm), - # nn.InstanceNorm2d(256, track_running_stats=False), - nn.LeakyReLU(0.2, inplace=True), - spectral_norm( - nn.Conv3d(nf * 4, - nf * 4, - kernel_size=(1, 5, 5), - stride=(1, 2, 2), - padding=(0, 2, 2), - bias=not use_spectral_norm), use_spectral_norm), - # nn.InstanceNorm2d(256, track_running_stats=False), - nn.LeakyReLU(0.2, inplace=True), - nn.Conv3d(nf * 4, - nf * 4, - kernel_size=(1, 5, 5), - stride=(1, 2, 2), - padding=(0, 2, 2))) - - if init_weights: - self.init_weights() - - def forward(self, xs): - # T, C, H, W = xs.shape (old) - # B, T, C, H, W (new) - xs_t = torch.transpose(xs, 1, 2) - feat = self.conv(xs_t) - if self.use_sigmoid: - feat = torch.sigmoid(feat) - out = torch.transpose(feat, 1, 2) # B, T, C, H, W - return out - - -def spectral_norm(module, mode=True): - if mode: - return _spectral_norm(module) - return module diff --git a/backend/interface/ch.ini b/backend/interface/ch.ini index ecd60cd..e8e3da2 100644 --- a/backend/interface/ch.ini +++ b/backend/interface/ch.ini @@ -11,7 +11,6 @@ BasicSetting = 基础设置 AdvancedSetting = 高级设置 SubtitleDetectionSetting = 字幕检测设置 SttnSetting = STTN设置 -ProPainterSetting = ProPainter设置 AboutSetting = 关于 HardwareAcceleration = 硬件加速 HardwareAccelerationDesc = 使用GPU或ONNX后端进行加速处理 @@ -36,8 +35,6 @@ SttnReferenceLength = 参考帧数量 SttnReferenceLengthDesc = 默认为10 SttnMaxLoadNum = 最大同时处理的帧数量 SttnMaxLoadNumDesc = 设置越大处理效果越好,但是要求显存越高,默认为50 -PropainterMaxLoadNum = 最大同时处理的帧数量 -PropainterMaxLoadNumDesc = 设置越大处理效果越好,但是要求显存越高,默认为70 CheckUpdateOnStartup = 在应用程序启动时检查更新 CheckUpdateOnStartupDesc = 新版本将更加稳定, 并拥有更多功能(建议启用此选项) UpdatesAvailableTitle = 有可用更新 @@ -67,7 +64,6 @@ SelectSubtitleArea = 请在视频预览中框选处理区域: {} InpaintModeDesc = STTN智能擦除, 对于真人视频效果较好,速度快, 智能擦除(最低4GB显存) STTN字幕检测 带字幕检测版, 无智能擦除(最低4GB显存) LAMA: 对于动画类视频效果好,速度一般(显存要求较低) - ProPainter: 需要消耗大量显存,速度较慢,对运动非常剧烈的视频效果较好(最低8GB显存) OpenCV: 极速模式, 不保证inpaint效果,仅仅对包含文本的区域文本进行去除(显存要求较低) SubtitleDetectMode = 字幕检测 ErrorDuringProcessing = 处理过程中发生错误: {} @@ -122,7 +118,6 @@ RequestError = 尝试访问 {} 失败, 原因: {} SttnAuto = STTN智能擦除 SttnDet = STTN字幕检测 LAMA = LAMA -ProPainter = ProPainter OpenCV = OpenCV [SubtitleDetectMode] diff --git a/backend/interface/chinese_cht.ini b/backend/interface/chinese_cht.ini index b4a70cb..273cd73 100644 --- a/backend/interface/chinese_cht.ini +++ b/backend/interface/chinese_cht.ini @@ -11,7 +11,6 @@ BasicSetting = 基礎設定 AdvancedSetting = 進階設定 SubtitleDetectionSetting = 字幕檢測設定 SttnSetting = STTN設定 -ProPainterSetting = ProPainter設定 AboutSetting = 關於 HardwareAcceleration = 硬體加速 HardwareAccelerationDesc = 使用GPU或ONNX後端進行加速處理 @@ -36,8 +35,6 @@ SttnReferenceLength = 參考影格數量 SttnReferenceLengthDesc = 預設為10 SttnMaxLoadNum = 最大同時處理的影格數量 SttnMaxLoadNumDesc = 數值越大處理效果越好,但需更高顯示記憶體,預設為50 -PropainterMaxLoadNum = 最大同時處理的影格數量 -PropainterMaxLoadNumDesc = 數值越大處理效果越好,但需更高顯示記憶體,預設為70 CheckUpdateOnStartup = 在應用程式啟動時檢查更新 CheckUpdateOnStartupDesc = 新版本將更穩定並提供更多功能(建議啟用此選項) UpdatesAvailableTitle = 有可用更新 @@ -64,10 +61,9 @@ VideoPreview = 影片預覽 InterfaceLanguage = 介面語言 InpaintMode = 處理模型 SelectSubtitleArea = 請在影片預覽中框選處理區域: {} -InpaintModeDesc = STTN智能擦除,對於真人視頻效果較好,速度快,智能擦除(最低4GB顯存) - STTN字幕檢測 帶字幕檢測版,無智能擦除(最低4GB顯存) - LAMA:對於動畫類視頻效果好,速度一般(顯存要求較低) - ProPainter:需要消耗大量顯存,速度較慢,對運動非常劇烈的視頻效果較好(最低8GB顯存) +InpaintModeDesc = STTN智能擦除,對於真人視頻效果較好,速度快,智能擦除(最低4GB顯存) + STTN字幕檢測 帶字幕檢測版,無智能擦除(最低4GB顯存) + LAMA:對於動畫類視頻效果好,速度一般(顯存要求較低) OpenCV:極速模式,不保證inpaint效果,僅僅對包含文本的區域文本進行去除(顯存要求較低) SubtitleDetectMode = 字幕檢測模式 ErrorDuringProcessing = 處理過程中發生錯誤: {} @@ -118,11 +114,10 @@ TargetFileNotFound = 檔案尚未生成,請先等待任務完成 VersionInfo = 當前版本: {} 最新版本: {} RequestError = 嘗試存取 {} 失敗,原因: {} -[InpaintMode] -SttnAuto = STTN智慧擦除 -SttnDet = STTN字幕檢測 -LAMA = LAMA -ProPainter = ProPainter +[InpaintMode] +SttnAuto = STTN智慧擦除 +SttnDet = STTN字幕檢測 +LAMA = LAMA OpenCV = OpenCV [SubtitleDetectMode] diff --git a/backend/interface/en.ini b/backend/interface/en.ini index c5499f9..17db9eb 100644 --- a/backend/interface/en.ini +++ b/backend/interface/en.ini @@ -7,11 +7,10 @@ CopyrightTitle = About CopyrightDesc = © Copyright 2023, YaoFANGUK, Jason Eric (UI Design), Current Version: {} ProjectLinkTitle = Subtitle Remover ProjectLinkDesc = AI-based image/video hard subtitle removal and text watermark removal, generating output files with original resolution. No third-party API required, locally implemented. -BasicSetting = Basic Settings -AdvancedSetting = Advanced Settings -SubtitleDetectionSetting = Subtitle Detection Settings -SttnSetting = STTN Settings -ProPainterSetting = ProPainter Settings +BasicSetting = Basic Settings +AdvancedSetting = Advanced Settings +SubtitleDetectionSetting = Subtitle Detection Settings +SttnSetting = STTN Settings AboutSetting = About HardwareAcceleration = Hardware Acceleration HardwareAccelerationDesc = Accelerate processing using GPU or ONNX backend @@ -34,10 +33,8 @@ SttnNeighborStride = Reference Frame Stride SttnNeighborStrideDesc = Default: 5 SttnReferenceLength = Reference Frame Count SttnReferenceLengthDesc = Default: 10 -SttnMaxLoadNum = Max Concurrent Processing Frames -SttnMaxLoadNumDesc = Higher values improve quality but require more VRAM (default 50). -PropainterMaxLoadNum = Max Concurrent Processing Frames -PropainterMaxLoadNumDesc = Higher values improve quality but require more VRAM (default 70). +SttnMaxLoadNum = Max Concurrent Processing Frames +SttnMaxLoadNumDesc = Higher values improve quality but require more VRAM (default 50). CheckUpdateOnStartup = Check Updates on Startup CheckUpdateOnStartupDesc = New versions offer improved stability and features (recommended). UpdatesAvailableTitle = Update Available @@ -67,7 +64,6 @@ SelectSubtitleArea = Select processing area in video preview: {} InpaintModeDesc = STTN Smart Inpainting: Best for real-person videos, fast speed, smart inpainting (minimum 4GB VRAM) STTN Subtitle Detection: With subtitle detection, no smart inpainting (minimum 4GB VRAM) LAMA: Good for animation videos, moderate speed (low VRAM requirement) - ProPainter: Consumes a lot of VRAM, slower speed, best for videos with intense motion (minimum 8GB VRAM) OpenCV: Ultra-fast mode, inpainting effect not guaranteed, only removes text in detected regions (low VRAM requirement) SubtitleDetectMode = Subtitle Detection ErrorDuringProcessing = Error during processing: {} @@ -122,7 +118,6 @@ RequestError = Failed to access {}. Reason: {} SttnAuto = STTN Smart Erase SttnDet = STTN Detection LAMA = LAMA -ProPainter = ProPainter OpenCV = OpenCV [SubtitleDetectMode] diff --git a/backend/interface/es.ini b/backend/interface/es.ini index 96a087e..75e69fa 100644 --- a/backend/interface/es.ini +++ b/backend/interface/es.ini @@ -11,7 +11,6 @@ BasicSetting = Configuración básica AdvancedSetting = Configuración avanzada SubtitleDetectionSetting = Detección de subtítulos SttnSetting = Configuración STTN -ProPainterSetting = Configuración ProPainter AboutSetting = Acerca de HardwareAcceleration = Aceleración hardware HardwareAccelerationDesc = Usar GPU o backend ONNX para acelerar el procesamiento @@ -36,8 +35,6 @@ SttnReferenceLength = Cantidad de referencias SttnReferenceLengthDesc = Valor predeterminado: 10 SttnMaxLoadNum = Máx. fotogramas simultáneos SttnMaxLoadNumDesc = Mayor valor mejora calidad pero requiere más VRAM (valor predeterminado 50). -PropainterMaxLoadNum = Máx. fotogramas simultáneos -PropainterMaxLoadNumDesc = Mayor valor mejora calidad pero requiere más VRAM (valor predeterminado 70). CheckUpdateOnStartup = Buscar actualizaciones al iniciar CheckUpdateOnStartupDesc = Versiones nuevas ofrecen mejor estabilidad y funciones (recomendado). UpdatesAvailableTitle = Actualización disponible @@ -67,7 +64,6 @@ SelectSubtitleArea = Selecciona área en vista previa: {} InpaintModeDesc = STTN Borrado inteligente: Mejor para videos de personas reales, velocidad rápida, borrado inteligente (mínimo 4GB de VRAM) STTN Detección de subtítulos: Con detección de subtítulos, sin borrado inteligente (mínimo 4GB de VRAM) LAMA: Bueno para videos animados, velocidad media (bajo requerimiento de VRAM) - ProPainter: Consume mucha VRAM, velocidad lenta, mejor para videos con mucho movimiento (mínimo 8GB de VRAM) OpenCV: Modo ultra rápido, el efecto de borrado no está garantizado, solo elimina texto en las áreas detectadas (bajo requerimiento de VRAM) SubtitleDetectMode = Detección de subtítulos ErrorDuringProcessing = Error durante el procesamiento: {} @@ -118,11 +114,10 @@ TargetFileNotFound = Archivo resultado no generado. Espera a completar. VersionInfo = Versión actual: {} Última versión: {} RequestError = Error accediendo {}. Razón: {} -[InpaintMode] -SttnAuto = STTN borrado inteligente -SttnDet = STTN detección -LAMA = LAMA -ProPainter = ProPainter +[InpaintMode] +SttnAuto = STTN borrado inteligente +SttnDet = STTN detección +LAMA = LAMA OpenCV = OpenCV [SubtitleDetectMode] diff --git a/backend/interface/japan.ini b/backend/interface/japan.ini index 15e57ea..15b129c 100644 --- a/backend/interface/japan.ini +++ b/backend/interface/japan.ini @@ -11,7 +11,6 @@ BasicSetting = 基本設定 AdvancedSetting = 高度設定 SubtitleDetectionSetting = 字幕検出設定 SttnSetting = STTN設定 -ProPainterSetting = ProPainter設定 AboutSetting = 情報 HardwareAcceleration = ハードウェアアクセラレーション HardwareAccelerationDesc = GPUまたはONNXバックエンドを使用した高速処理 @@ -36,8 +35,6 @@ SttnReferenceLength = 参照フレーム数 SttnReferenceLengthDesc = デフォルト: 10 SttnMaxLoadNum = 最大同時処理フレーム数 SttnMaxLoadNumDesc = 値が大きいほど高品質(VRAM要求増加、デフォルト50) -PropainterMaxLoadNum = 最大同時処理フレーム数 -PropainterMaxLoadNumDesc = 値が大きいほど高品質(VRAM要求増加、デフォルト70) CheckUpdateOnStartup = 起動時アップデート確認 CheckUpdateOnStartupDesc = 新バージョンは安定性/機能向上(推奨) UpdatesAvailableTitle = 利用可能なアップデート @@ -67,7 +64,6 @@ SelectSubtitleArea = プレビューで処理領域を選択: {} InpaintModeDesc = STTNスマート消去:実写動画に最適、高速、スマート消去(最低4GB VRAM) STTN字幕検出:字幕検出付き、スマート消去なし(最低4GB VRAM) LAMA:アニメ動画に最適、速度は普通(VRAM要件低め) - ProPainter:大量のVRAMを消費、速度は遅い、激しい動きの動画に最適(最低8GB VRAM) OpenCV:超高速モード、消去効果は保証されません、検出されたテキスト領域のみ削除(VRAM要件低め) SubtitleDetectMode = 字幕検出 ErrorDuringProcessing = 処理中にエラーが発生しました: {} @@ -122,7 +118,6 @@ RequestError = {} へのアクセス失敗。理由: {} SttnAuto = STTNインテリジェント消去 SttnDet = STTN字幕検出 LAMA = LAMA -ProPainter = ProPainter OpenCV = OpenCV [SubtitleDetectMode] diff --git a/backend/interface/ko.ini b/backend/interface/ko.ini index 8469570..68e06db 100644 --- a/backend/interface/ko.ini +++ b/backend/interface/ko.ini @@ -11,7 +11,6 @@ BasicSetting = 기본 설정 AdvancedSetting = 고급 설정 SubtitleDetectionSetting = 자막 감지 설정 SttnSetting = STTN 설정 -ProPainterSetting = ProPainter 설정 AboutSetting = 정보 HardwareAcceleration = 하드웨어 가속 HardwareAccelerationDesc = GPU 또는 ONNX 백엔드 사용 가속 처리 @@ -36,8 +35,6 @@ SttnReferenceLength = 참조 프레임 수 SttnReferenceLengthDesc = 기본값: 10 SttnMaxLoadNum = 최대 동시 처리 프레임 SttnMaxLoadNumDesc = 값 클수록 품질 향상 (VRAM 요구 증가, 기본값 50) -PropainterMaxLoadNum = 최대 동시 처리 프레임 -PropainterMaxLoadNumDesc = 값 클수록 품질 향상 (VRAM 요구 증가, 기본값 70) CheckUpdateOnStartup = 시작시 업데이트 확인 CheckUpdateOnStartupDesc = 새 버전은 안정성/기능 개선 포함 (권장) UpdatesAvailableTitle = 업데이트 가능 @@ -67,7 +64,6 @@ SelectSubtitleArea = 미리보기에서 처리 영역 선택: {} InpaintModeDesc = STTN 스마트 지우기: 실제 인물 영상에 적합, 빠른 속도, 스마트 지우기(최소 4GB VRAM) STTN 자막 감지: 자막 감지 버전, 스마트 지우기 없음(최소 4GB VRAM) LAMA: 애니메이션 영상에 적합, 보통 속도(VRAM 요구량 낮음) - ProPainter: 많은 VRAM 소모, 느린 속도, 격렬한 움직임 영상에 적합(최소 8GB VRAM) OpenCV: 초고속 모드, 인페인트 효과 보장 안 됨, 텍스트 영역만 제거(VRAM 요구량 낮음) SubtitleDetectMode = 자막 감지 ErrorDuringProcessing = 처리 중 오류: {} @@ -122,7 +118,6 @@ RequestError = {} 접근 실패. 이유: {} SttnAuto = STTN 지능형 제거 SttnDet = STTN 자막 감지 LAMA = LAMA -ProPainter = ProPainter OpenCV = OpenCV [SubtitleDetectMode] diff --git a/backend/interface/vi.ini b/backend/interface/vi.ini index 015e2b4..a4b89d0 100644 --- a/backend/interface/vi.ini +++ b/backend/interface/vi.ini @@ -11,7 +11,6 @@ BasicSetting = Cài đặt cơ bản AdvancedSetting = Cài đặt nâng cao SubtitleDetectionSetting = Cài đặt phát hiện phụ đề SttnSetting = Cài đặt STTN -ProPainterSetting = Cài đặt ProPainter AboutSetting = Giới thiệu HardwareAcceleration = Tăng tốc phần cứng HardwareAccelerationDesc = Sử dụng GPU hoặc backend ONNX để tăng tốc xử lý @@ -36,8 +35,6 @@ SttnReferenceLength = Số khung tham chiếu SttnReferenceLengthDesc = Mặc định: 10 SttnMaxLoadNum = Số khung xử lý tối đa SttnMaxLoadNumDesc = Càng cao càng tốt (yêu cầu nhiều VRAM, mặc định 50) -PropainterMaxLoadNum = Số khung xử lý tối đa -PropainterMaxLoadNumDesc = Càng cao càng tốt (yêu cầu nhiều VRAM, mặc định 70) CheckUpdateOnStartup = Kiểm tra cập nhật khi khởi động CheckUpdateOnStartupDesc = Phiên bản mới ổn định hơn (khuyến nghị bật) UpdatesAvailableTitle = Có bản cập nhật @@ -67,7 +64,6 @@ SelectSubtitleArea = Chọn vùng xử lý trong preview: {} InpaintModeDesc = STTN Xóa thông minh: Phù hợp cho video người thật, tốc độ nhanh, xóa thông minh (tối thiểu 4GB VRAM) STTN Phát hiện phụ đề: Có phát hiện phụ đề, không xóa thông minh (tối thiểu 4GB VRAM) LAMA: Phù hợp cho video hoạt hình, tốc độ trung bình (yêu cầu VRAM thấp) - ProPainter: Tiêu tốn nhiều VRAM, tốc độ chậm, phù hợp cho video chuyển động mạnh (tối thiểu 8GB VRAM) OpenCV: Chế độ siêu nhanh, không đảm bảo hiệu quả xóa, chỉ xóa vùng chứa văn bản (yêu cầu VRAM thấp) SubtitleDetectMode = Chế độ phát hiện ErrorDuringProcessing = Lỗi khi xử lý: {} @@ -122,7 +118,6 @@ RequestError = Lỗi truy cập {}, lý do: {} SttnAuto = STTN xóa thông minh SttnDet = STTN phát hiện LAMA = LAMA -ProPainter = ProPainter OpenCV = OpenCV [SubtitleDetectMode] diff --git a/backend/main.py b/backend/main.py index 4efc96b..b067f1c 100644 --- a/backend/main.py +++ b/backend/main.py @@ -19,7 +19,6 @@ from backend.inpaint.sttn_auto_inpaint import STTNAutoInpaint from backend.inpaint.sttn_det_inpaint import STTNDetInpaint from backend.inpaint.lama_inpaint import LamaInpaint from backend.inpaint.opencv_inpaint import OpenCVInpaint -from backend.inpaint.propainter_inpaint import PropainterInpaint from backend.tools.inpaint_tools import create_mask, batch_generator, expand_frame_ranges from backend.tools.model_config import ModelConfig from backend.tools.ffmpeg_cli import FFmpegCLI @@ -67,7 +66,6 @@ class SubtitleRemover: except Exception: self.video_writer = cv2.VideoWriter(get_readable_path(self.video_temp_file.name), cv2.VideoWriter_fourcc(*'mp4v'), self.fps, self.size) self.video_out_path = os.path.abspath(os.path.join(os.path.dirname(self.video_path), f'{self.vd_name}_no_sub.mp4')) - self.propainter_inpaint = None self.ext = os.path.splitext(vd_path)[-1] if self.is_picture: pic_dir = os.path.join(os.path.dirname(self.video_path), 'no_sub') @@ -156,94 +154,6 @@ class SubtitleRemover: """ pass - def propainter_mode(self, tbar): - sub_detector = SubtitleDetect(self.video_path, self.sub_areas) - sub_list = sub_detector.find_subtitle_frame_no(sub_remover=self) - if len(sub_list) == 0: - raise Exception(tr['Main']['NoSubtitleDetected'].format(self.video_path)) - continuous_frame_no_list = sub_detector.find_continuous_ranges_with_same_mask(sub_list) - scene_div_points = sub_detector.get_scene_div_frame_no(self.video_path) - continuous_frame_no_list = sub_detector.split_range_by_scene(continuous_frame_no_list, - scene_div_points) - del sub_detector - gc.collect() - device = self.hardware_accelerator.device if self.hardware_accelerator.has_cuda() else torch.device("cpu") - propainter_inpaint = PropainterInpaint(device, self.model_config.PROPAINTER_MODEL_DIR, config.propainterMaxLoadNum.value) - self.append_output(tr['Main']['ProcessingStartRemovingSubtitles']) - index = 0 - # 使用帧预读取,I/O 与推理重叠 - reader = FramePrefetcher(self.video_cap) - while True: - ret, frame = reader.read() - if not ret: - break - index += 1 - # 如果当前帧没有水印/文本则直接写 - if index not in sub_list.keys(): - self.video_writer.write(frame) - # self.append_output(f'write frame: {index}') - self.update_progress(tbar, increment=1) - self.update_preview_with_comp(frame, frame) - continue - # 如果有水印,判断该帧是不是开头帧 - else: - # 如果是开头帧,则批推理到尾帧 - if self.is_current_frame_no_start(index, continuous_frame_no_list): - # self.append_output(f'No 1 Current index: {index}') - start_frame_no = index - # self.append_output(f'find start: {start_frame_no}') - # 找到结束帧 - end_frame_no = self.find_frame_no_end(index, continuous_frame_no_list) - # 判断当前帧号是不是字幕起始位置 - # 如果获取的结束帧号不为-1则说明 - if end_frame_no != -1: - # self.append_output(f'find end: {end_frame_no}') - # ************ 读取该区间所有帧 start ************ - temp_frames = list() - # 将头帧加入处理列表 - temp_frames.append(frame) - inner_index = 0 - # 一直读取到尾帧 - while index < end_frame_no: - ret, frame = reader.read() - if not ret: - break - index += 1 - temp_frames.append(frame) - # ************ 读取该区间所有帧 end ************ - if len(temp_frames) < 1: - # 没有待处理,直接跳过 - continue - elif len(temp_frames) == 1: - inner_index += 1 - single_mask = create_mask(self.mask_size, sub_list[index]) - inpainted_frame = self.lama_inpaint.inpaint(frame, single_mask) - self.video_writer.write(inpainted_frame) - # self.append_output(f'write frame: {start_frame_no + inner_index} with mask {sub_list[start_frame_no]}') - self.update_progress(tbar, increment=1) - continue - else: - # 将读取的视频帧分批处理 - # 1. 获取当前批次使用的mask - mask = create_mask(self.mask_size, sub_list[start_frame_no]) - for batch in batch_generator(temp_frames, config.propainterMaxLoadNum.value): - # 2. 调用批推理 - if len(batch) == 1: - single_mask = create_mask(self.mask_size, sub_list[start_frame_no]) - inpainted_frame = self.lama_inpaint.inpaint(frame, single_mask) - self.video_writer.write(inpainted_frame) - # self.append_output(f'write frame: {start_frame_no + inner_index} with mask {sub_list[start_frame_no]}') - inner_index += 1 - self.update_progress(tbar, increment=1) - elif len(batch) > 1: - inpainted_frames = propainter_inpaint(batch, mask) - for i, inpainted_frame in enumerate(inpainted_frames): - self.video_writer.write(inpainted_frame) - # self.append_output(f'write frame: {start_frame_no + inner_index} with mask {sub_list[index]}') - inner_index += 1 - self.update_preview_with_comp(np.clip(batch[i]+mask[:,:,np.newaxis]*0.3,0,255).astype(np.uint8), inpainted_frame) - self.update_progress(tbar, increment=len(batch)) - def sttn_auto_mode(self, tbar): """ 使用sttn对选中区域进行重绘,不进行字幕检测 @@ -372,9 +282,7 @@ class SubtitleRemover: else: # 精准模式下,获取场景分割的帧号,进一步切割 self.log_model() - if config.inpaintMode.value == InpaintMode.PROPAINTER: - self.propainter_mode(tbar) - elif config.inpaintMode.value == InpaintMode.STTN_AUTO: + if config.inpaintMode.value == InpaintMode.STTN_AUTO: self.sttn_auto_mode(tbar) elif config.inpaintMode.value == InpaintMode.STTN_DET: self.video_inpaint(tbar, self.sttn_det_inpaint) diff --git a/backend/models/propainter/ProPainter_1.pth b/backend/models/propainter/ProPainter_1.pth deleted file mode 100644 index 0a85ad6..0000000 Binary files a/backend/models/propainter/ProPainter_1.pth and /dev/null differ diff --git a/backend/models/propainter/ProPainter_2.pth b/backend/models/propainter/ProPainter_2.pth deleted file mode 100644 index 948aebc..0000000 Binary files a/backend/models/propainter/ProPainter_2.pth and /dev/null differ diff --git a/backend/models/propainter/ProPainter_3.pth b/backend/models/propainter/ProPainter_3.pth deleted file mode 100644 index cc3586e..0000000 Binary files a/backend/models/propainter/ProPainter_3.pth and /dev/null differ diff --git a/backend/models/propainter/ProPainter_4.pth b/backend/models/propainter/ProPainter_4.pth deleted file mode 100644 index aff41a0..0000000 Binary files a/backend/models/propainter/ProPainter_4.pth and /dev/null differ diff --git a/backend/models/propainter/fs_manifest.csv b/backend/models/propainter/fs_manifest.csv deleted file mode 100644 index 3583bcc..0000000 --- a/backend/models/propainter/fs_manifest.csv +++ /dev/null @@ -1,5 +0,0 @@ -filename,filesize,encoding,header -ProPainter_1.pth,50000000,, -ProPainter_2.pth,50000000,, -ProPainter_3.pth,50000000,, -ProPainter_4.pth,7780510,, diff --git a/backend/models/propainter/raft-things.pth b/backend/models/propainter/raft-things.pth deleted file mode 100644 index dbe6f9f..0000000 Binary files a/backend/models/propainter/raft-things.pth and /dev/null differ diff --git a/backend/models/propainter/recurrent_flow_completion.pth b/backend/models/propainter/recurrent_flow_completion.pth deleted file mode 100644 index 28d11ea..0000000 Binary files a/backend/models/propainter/recurrent_flow_completion.pth and /dev/null differ diff --git a/backend/tools/constant.py b/backend/tools/constant.py index 40d57cd..3e63a3c 100644 --- a/backend/tools/constant.py +++ b/backend/tools/constant.py @@ -8,7 +8,6 @@ class InpaintMode(Enum): STTN_AUTO = "sttn-auto" STTN_DET = "sttn-det" LAMA = "lama" - PROPAINTER = "propainter" OPENCV = "opencv" @unique diff --git a/backend/tools/model_config.py b/backend/tools/model_config.py index 09e21a6..d5b9e71 100644 --- a/backend/tools/model_config.py +++ b/backend/tools/model_config.py @@ -13,7 +13,6 @@ class ModelConfig: self.LAMA_MODEL_DIR = os.path.join(BASE_DIR, 'models', 'big-lama') self.STTN_AUTO_MODEL_PATH = os.path.join(BASE_DIR, 'models', 'sttn-auto', 'infer_model.pth') self.STTN_DET_MODEL_PATH = os.path.join(BASE_DIR, 'models', 'sttn-det', 'sttn.pth') - self.PROPAINTER_MODEL_DIR = os.path.join(BASE_DIR,'models', 'propainter') if config.subtitleDetectMode.value == SubtitleDetectMode.PP_OCRv5_MOBILE: self.DET_MODEL_DIR = os.path.join(BASE_DIR,'models', 'V5', 'ch_det_fast') elif config.subtitleDetectMode.value == SubtitleDetectMode.PP_OCRv5_SERVER: @@ -23,4 +22,3 @@ class ModelConfig: self.DET_MODEL_NAME = _MODEL_NAME_MAP[config.subtitleDetectMode.value] merge_big_file_if_not_exists(self.LAMA_MODEL_DIR, 'bit-lama.pt') - merge_big_file_if_not_exists(self.PROPAINTER_MODEL_DIR, 'ProPainter.pth') diff --git a/ui/advanced_setting_interface.py b/ui/advanced_setting_interface.py index 1bad94c..5fc4481 100644 --- a/ui/advanced_setting_interface.py +++ b/ui/advanced_setting_interface.py @@ -56,9 +56,6 @@ class AdvancedSettingInterface(ScrollArea): self.sttn_group.addSettingCard(self.sttn_max_load_num) self.expandLayout.addWidget(self.sttn_group) - self.propainter_group.addSettingCard(self.propainter_max_load_num) - self.expandLayout.addWidget(self.propainter_group) - self.advanced_group.addSettingCard(self.save_directory) self.advanced_group.addSettingCard(self.check_update_on_startup) self.expandLayout.addWidget(self.advanced_group) @@ -77,8 +74,6 @@ class AdvancedSettingInterface(ScrollArea): self.subtitle_detection_group = SettingCardGroup(tr["Setting"]["SubtitleDetectionSetting"], self.scrollWidget) # STTN设置组 self.sttn_group = SettingCardGroup(tr["Setting"]["SttnSetting"], self.scrollWidget) - # Propainter设置组 - self.propainter_group = SettingCardGroup(tr["Setting"]["ProPainterSetting"], self.scrollWidget) # 高级设置组 self.advanced_group = SettingCardGroup(tr["Setting"]["AdvancedSetting"], self.scrollWidget) # 关于设置组 @@ -164,14 +159,6 @@ class AdvancedSettingInterface(ScrollArea): parent=self.sttn_group ) - self.propainter_max_load_num = RangeSettingCard( - configItem=config.propainterMaxLoadNum, - icon=FluentIcon.DICTIONARY, - title=tr["Setting"]["PropainterMaxLoadNum"], - content=tr["Setting"]["PropainterMaxLoadNumDesc"], - parent=self.propainter_group - ) - # 视频保存路径 self.save_directory = PushSettingCard( text=tr["Setting"]["ChooseDirectory"],