diff --git a/.gitignore b/.gitignore
index fd3441c..d065daf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -369,7 +369,6 @@ test_*.mp4
 test*_no_sub*.mp4
 /test/coods/
 /local_test/
-/backend/models/propainter/ProPainter.pth
 /backend/models/big-lama/big-lama.pt
 /test/debug/
 /backend/tools/train/release_model/
diff --git a/README.md b/README.md
index 96e8d65..07d1a46 100755
--- a/README.md
+++ b/README.md
@@ -74,7 +74,7 @@ options:
                         Output video file path (optional)
   --subtitle-area-coords YMIN YMAX XMIN XMAX, -c YMIN YMAX XMIN XMAX
                         Subtitle area coordinates (ymin ymax xmin xmax). Can be specified multiple times for multiple areas.
-  --inpaint-mode {sttn-auto,sttn-det,lama,propainter,opencv}
+  --inpaint-mode {sttn-auto,sttn-det,lama,opencv}
                         Inpaint mode, default is sttn-auto
 ```
 ## 演示
@@ -235,7 +235,6 @@ STTN_SKIP_DETECTION = True # 跳过字幕检测，跳过后可能会导致要去
 
 > - InpaintMode.STTN 算法：对于真人视频效果较好，速度快，可以跳过字幕检测
 > - InpaintMode.LAMA 算法：对于图片效果最好，对动画类视频效果好，速度一般，不可以跳过字幕检测
-> - InpaintMode.PROPAINTER 算法： 需要消耗大量显存，速度较慢，对运动非常剧烈的视频效果较好
 
 - 使用STTN算法
 
diff --git a/README_en.md b/README_en.md
index fb8d1ef..ee5e36e 100755
--- a/README_en.md
+++ b/README_en.md
@@ -73,7 +73,7 @@ options:
                         Output video file path (optional)
   --subtitle-area-coords YMIN YMAX XMIN XMAX, -c YMIN YMAX XMIN XMAX
                         Subtitle area coordinates (ymin ymax xmin xmax). Can be specified multiple times for multiple areas.
-  --inpaint-mode {sttn-auto,sttn-det,lama,propainter,opencv}
+  --inpaint-mode {sttn-auto,sttn-det,lama,opencv}
                         Inpaint mode, default is sttn-auto
 ```
 ## Demonstration
@@ -234,7 +234,6 @@ Modify the values in backend/config.py and try different removal algorithms. Her
 
 > - InpaintMode.STTN algorithm: Good for live-action videos and fast in speed, capable of skipping subtitle detection
 > - InpaintMode.LAMA algorithm: Best for images and effective for animated videos, moderate speed, unable to skip subtitle detection
-> - InpaintMode.PROPAINTER algorithm: Consumes a significant amount of VRAM, slower in speed, works better for videos with very intense movement
 
 - Using the STTN algorithm
 
diff --git a/backend/config.py b/backend/config.py
index 6fe9331..091ab9e 100644
--- a/backend/config.py
+++ b/backend/config.py
@@ -47,7 +47,6 @@ class Config(QConfig):
     - InpaintMode.STTN_AUTO 智能擦除版
     - InpaintMode.STTN_DET 带字幕检测版, 无智能擦除
     - InpaintMode.LAMA 算法：对于动画类视频效果好，速度一般，不可以跳过字幕检测
-    - InpaintMode.PROPAINTER 算法： 需要消耗大量显存，速度较慢，对运动非常剧烈的视频效果较好
     """
     # 【设置inpaint算法】
     inpaintMode = OptionsConfigItem("Main", "InpaintMode", InpaintMode.STTN_AUTO, OptionsValidator(InpaintMode), EnumSerializer(InpaintMode))
@@ -92,12 +91,6 @@ class Config(QConfig):
     # 设置STTN算法最大同时处理的帧数量
     sttnMaxLoadNum = RangeConfigItem("Sttn", "MaxLoadNum", 50, RangeValidator(1, 300))
     getSttnMaxLoadNum = lambda self: max(self.sttnMaxLoadNum.value, self.sttnNeighborStride.value * self.sttnReferenceLength.value)
-    
-    # 以下参数仅适用PROPAINTER算法时，才生效
-    # 【根据自己的GPU显存大小设置】最大同时处理的图片数量，设置越大处理效果越好，但是要求显存越高
-    # 1280x720p视频设置80需要25G显存，设置50需要19G显存
-    # 720x480p视频设置80需要8G显存，设置50需要7G显存
-    propainterMaxLoadNum = RangeConfigItem("ProPainter", "MaxLoadNum", 70, RangeValidator(1, 300))
 
     # 是否使用硬件加速
     hardwareAcceleration = ConfigItem("Main", "HardwareAcceleration", HARDWARD_ACCELERATION_OPTION, BoolValidator())
diff --git a/backend/inpaint/propainter_inpaint.py b/backend/inpaint/propainter_inpaint.py
deleted file mode 100644
index 710941d..0000000
--- a/backend/inpaint/propainter_inpaint.py
+++ /dev/null
@@ -1,447 +0,0 @@
-# -*- coding: utf-8 -*-
-import os
-import gc
-import cv2
-import numpy as np
-import scipy.ndimage
-from PIL import Image
-from typing import List
-
-import torch
-import torchvision
-
-from backend import config
-from backend.inpaint.video.model.modules.flow_comp_raft import RAFT_bi
-from backend.inpaint.video.model.recurrent_flow_completion import RecurrentFlowCompleteNet
-from backend.inpaint.video.model.propainter import InpaintGenerator
-from backend.inpaint.video.core.utils import to_tensors
-from backend.inpaint.video.model.misc import get_device
-from backend.tools.inpaint_tools import get_inpaint_area_by_mask
-
-import warnings
-
-warnings.filterwarnings("ignore")
-
-def binary_mask(mask, th=0.1):
-    mask[mask > th] = 1
-    mask[mask <= th] = 0
-    return mask
-
-
-# read frame-wise masks
-def read_mask(mpath, length, size, flow_mask_dilates=8, mask_dilates=5):
-    masks_img = []
-    masks_dilated = []
-    flow_masks = []
-    # 如果传入的直接为numpy array
-    if isinstance(mpath, np.ndarray):
-        if mpath.ndim == 3 and mpath.shape[2] == 1:
-            mpath = mpath.squeeze(2)  # 从 (H,W,1) 转为 (H,W)
-        elif mpath.ndim == 3 and mpath.shape[2] == 3:
-            # 如果是彩色图像，转为灰度
-            mpath = cv2.cvtColor(mpath, cv2.COLOR_BGR2GRAY)
-        masks_img = [Image.fromarray(mpath)]
-    # input single img path
-    else:
-        if isinstance(mpath, str):
-            if mpath.endswith(('jpg', 'jpeg', 'png', 'JPG', 'JPEG', 'PNG')):
-                masks_img = [Image.open(mpath)]
-        else:
-            mnames = sorted(os.listdir(mpath))
-            for mp in mnames:
-                masks_img.append(Image.open(os.path.join(mpath, mp)))
-
-    for mask_img in masks_img:
-        mask_img = np.array(mask_img.convert('L'))
-
-        # Dilate 8 pixel so that all known pixel is trustworthy
-        if flow_mask_dilates > 0:
-            flow_mask_img = scipy.ndimage.binary_dilation(mask_img, iterations=flow_mask_dilates).astype(np.uint8)
-        else:
-            flow_mask_img = binary_mask(mask_img).astype(np.uint8)
-        # Close the small holes inside the foreground objects
-        # flow_mask_img = cv2.morphologyEx(flow_mask_img, cv2.MORPH_CLOSE, np.ones((21, 21),np.uint8)).astype(bool)
-        # flow_mask_img = scipy.ndimage.binary_fill_holes(flow_mask_img).astype(np.uint8)
-        flow_masks.append(Image.fromarray(flow_mask_img * 255))
-
-        if mask_dilates > 0:
-            mask_img = scipy.ndimage.binary_dilation(mask_img, iterations=mask_dilates).astype(np.uint8)
-        else:
-            mask_img = binary_mask(mask_img).astype(np.uint8)
-        masks_dilated.append(Image.fromarray(mask_img * 255))
-
-    if len(masks_img) == 1:
-        flow_masks = flow_masks * length
-        masks_dilated = masks_dilated * length
-
-    return flow_masks, masks_dilated
-
-
-def extrapolation(video_ori, scale):
-    """Prepares the data for video outpainting.
-    """
-    nFrame = len(video_ori)
-    imgW, imgH = video_ori[0].size
-
-    # Defines new FOV.
-    imgH_extr = int(scale[0] * imgH)
-    imgW_extr = int(scale[1] * imgW)
-    imgH_extr = imgH_extr - imgH_extr % 8
-    imgW_extr = imgW_extr - imgW_extr % 8
-    H_start = int((imgH_extr - imgH) / 2)
-    W_start = int((imgW_extr - imgW) / 2)
-
-    # Extrapolates the FOV for video.
-    frames = []
-    for v in video_ori:
-        frame = np.zeros((imgH_extr, imgW_extr, 3), dtype=np.uint8)
-        frame[H_start: H_start + imgH, W_start: W_start + imgW, :] = v
-        frames.append(Image.fromarray(frame))
-
-    # Generates the mask for missing region.
-    masks_dilated = []
-    flow_masks = []
-
-    dilate_h = 4 if H_start > 10 else 0
-    dilate_w = 4 if W_start > 10 else 0
-    mask = np.ones(((imgH_extr, imgW_extr)), dtype=np.uint8)
-
-    mask[H_start + dilate_h: H_start + imgH - dilate_h,
-    W_start + dilate_w: W_start + imgW - dilate_w] = 0
-    flow_masks.append(Image.fromarray(mask * 255))
-
-    mask[H_start: H_start + imgH, W_start: W_start + imgW] = 0
-    masks_dilated.append(Image.fromarray(mask * 255))
-
-    flow_masks = flow_masks * nFrame
-    masks_dilated = masks_dilated * nFrame
-
-    return frames, flow_masks, masks_dilated, (imgW_extr, imgH_extr)
-
-
-def get_ref_index(mid_neighbor_id, neighbor_ids, length, ref_stride=10, ref_num=-1):
-    ref_index = []
-    if ref_num == -1:
-        for i in range(0, length, ref_stride):
-            if i not in neighbor_ids:
-                ref_index.append(i)
-    else:
-        start_idx = max(0, mid_neighbor_id - ref_stride * (ref_num // 2))
-        end_idx = min(length, mid_neighbor_id + ref_stride * (ref_num // 2))
-        for i in range(start_idx, end_idx, ref_stride):
-            if i not in neighbor_ids:
-                if len(ref_index) > ref_num:
-                    break
-                ref_index.append(i)
-    return ref_index
-
-
-class PropainterInpaint:
-    def __init__(self, device, model_dir, sub_video_length=80, use_fp16=True):
-        self.device = device
-        self.model_dir = model_dir
-        self.use_fp16 = use_fp16
-        self.use_half = True if self.use_fp16 else False
-        if self.device == torch.device('cpu'):
-            self.use_half = False
-        # Length of sub-video for long video inference.
-        self.sub_video_length = sub_video_length
-        # Length of local neighboring frames.'
-        self.neighbor_length = 10
-        # Mask dilation for video and flow masking
-        self.mask_dilation = 4
-        # Stride of global reference frames
-        self.ref_stride = 10
-        # Iterations for RAFT inference
-        self.raft_iter = 20
-        # Stride of global reference frames
-        self.ref_stride = 10
-        # 设置raft模型
-        self.fix_raft = self.init_raft_model()
-        # 设置fix_flow模型
-        self.fix_flow_complete = self.init_fix_flow_model()
-        # 设置inpaint模型
-        self.model = self.init_inpaint_model()
-
-    def init_raft_model(self):
-        # set up RAFT and flow competition model
-        return RAFT_bi(os.path.join(self.model_dir, 'raft-things.pth'), self.device)
-
-    def init_fix_flow_model(self):
-        fix_flow_complete_model = RecurrentFlowCompleteNet(
-            os.path.join(self.model_dir, 'recurrent_flow_completion.pth'))
-        for p in fix_flow_complete_model.parameters():
-            p.requires_grad = False
-            
-        if self.use_half:
-            fix_flow_complete_model = fix_flow_complete_model.half()
-        fix_flow_complete_model.to(self.device)
-        fix_flow_complete_model.eval()
-        return fix_flow_complete_model
-
-    def init_inpaint_model(self):
-        # set up ProPainter model
-        model = InpaintGenerator(model_path=os.path.join(self.model_dir, 'ProPainter.pth'))
-        if self.use_half:
-            model = model.half()
-        model = model.to(self.device).eval()
-        return model
-
-    def inpaint(self, frames, mask):
-        if isinstance(frames[0], np.ndarray):
-            frames = [Image.fromarray(cv2.cvtColor(f, cv2.COLOR_BGR2RGB)) for f in frames]
-        size = frames[0].size
-        frames_len = len(frames)
-        flow_masks, masks_dilated = read_mask(mask, frames_len, size,
-                                              flow_mask_dilates=self.mask_dilation,
-                                              mask_dilates=self.mask_dilation)
-        w, h = size
-        # for saving the masked frames or video
-        masked_frame_for_save = []
-        for i in range(len(frames)):
-            mask_ = np.expand_dims(np.array(masks_dilated[i]), 2).repeat(3, axis=2) / 255.
-            img = np.array(frames[i])
-            green = np.zeros([h, w, 3])
-            green[:, :, 1] = 255
-            alpha = 0.6
-            # alpha = 1.0
-            fuse_img = (1 - alpha) * img + alpha * green
-            fuse_img = mask_ * fuse_img + (1 - mask_) * img
-            masked_frame_for_save.append(fuse_img.astype(np.uint8))
-
-        frames_inp = [np.array(f).astype(np.uint8) for f in frames]
-        frames = to_tensors()(frames).unsqueeze(0) * 2 - 1
-        flow_masks = to_tensors()(flow_masks).unsqueeze(0)
-        masks_dilated = to_tensors()(masks_dilated).unsqueeze(0)
-        frames, flow_masks, masks_dilated = frames.to(self.device), flow_masks.to(self.device), masks_dilated.to(
-            self.device)
-        video_length = frames.size(1)
-        with torch.no_grad():
-            # ---- compute flow ----
-            if frames.size(-1) <= 640:
-                short_clip_len = 12
-            elif frames.size(-1) <= 720:
-                short_clip_len = 8
-            elif frames.size(-1) <= 1280:
-                short_clip_len = 4
-            else:
-                short_clip_len = 2
-
-            # use fp32 for RAFT
-            if frames.size(1) > short_clip_len:
-                gt_flows_f_list, gt_flows_b_list = [], []
-                for f in range(0, video_length, short_clip_len):
-                    end_f = min(video_length, f + short_clip_len)
-                    if f == 0:
-                        flows_f, flows_b = self.fix_raft(frames[:, f:end_f], iters=self.raft_iter)
-                    else:
-                        flows_f, flows_b = self.fix_raft(frames[:, f - 1:end_f], iters=self.raft_iter)
-                    gt_flows_f_list.append(flows_f)
-                    gt_flows_b_list.append(flows_b)
-                    torch.cuda.empty_cache()
-                gt_flows_f = torch.cat(gt_flows_f_list, dim=1)
-                gt_flows_b = torch.cat(gt_flows_b_list, dim=1)
-                gt_flows_bi = (gt_flows_f, gt_flows_b)
-            else:
-                gt_flows_bi = self.fix_raft(frames, iters=self.raft_iter)
-                torch.cuda.empty_cache()
-
-            if self.use_half:
-                frames, flow_masks, masks_dilated = frames.half(), flow_masks.half(), masks_dilated.half()
-                gt_flows_bi = (gt_flows_bi[0].half(), gt_flows_bi[1].half())
-
-            # ---- complete flow ----
-            flow_length = gt_flows_bi[0].size(1)
-            if flow_length > self.sub_video_length:
-                pred_flows_f, pred_flows_b = [], []
-                pad_len = 5
-                for f in range(0, flow_length, self.sub_video_length):
-                    s_f = max(0, f - pad_len)
-                    e_f = min(flow_length, f + self.sub_video_length + pad_len)
-                    pad_len_s = max(0, f) - s_f
-                    pad_len_e = e_f - min(flow_length, f + self.sub_video_length)
-                    pred_flows_bi_sub, _ = self.fix_flow_complete.forward_bidirect_flow(
-                        (gt_flows_bi[0][:, s_f:e_f], gt_flows_bi[1][:, s_f:e_f]),
-                        flow_masks[:, s_f:e_f + 1])
-                    pred_flows_bi_sub = self.fix_flow_complete.combine_flow(
-                        (gt_flows_bi[0][:, s_f:e_f], gt_flows_bi[1][:, s_f:e_f]),
-                        pred_flows_bi_sub,
-                        flow_masks[:, s_f:e_f + 1])
-
-                    pred_flows_f.append(pred_flows_bi_sub[0][:, pad_len_s:e_f - s_f - pad_len_e])
-                    pred_flows_b.append(pred_flows_bi_sub[1][:, pad_len_s:e_f - s_f - pad_len_e])
-                    torch.cuda.empty_cache()
-
-                pred_flows_f = torch.cat(pred_flows_f, dim=1)
-                pred_flows_b = torch.cat(pred_flows_b, dim=1)
-                pred_flows_bi = (pred_flows_f, pred_flows_b)
-            else:
-                pred_flows_bi, _ = self.fix_flow_complete.forward_bidirect_flow(gt_flows_bi, flow_masks)
-                pred_flows_bi = self.fix_flow_complete.combine_flow(gt_flows_bi, pred_flows_bi, flow_masks)
-                torch.cuda.empty_cache()
-
-            # ---- image propagation ----
-            masked_frames = frames * (1 - masks_dilated)
-            # ensure a minimum of 100 frames for image propagation
-            subvideo_length_img_prop = min(100, self.sub_video_length)
-            if video_length > subvideo_length_img_prop:
-                updated_frames, updated_masks = [], []
-                pad_len = 10
-                for f in range(0, video_length, subvideo_length_img_prop):
-                    s_f = max(0, f - pad_len)
-                    e_f = min(video_length, f + subvideo_length_img_prop + pad_len)
-                    pad_len_s = max(0, f) - s_f
-                    pad_len_e = e_f - min(video_length, f + subvideo_length_img_prop)
-
-                    b, t, _, _, _ = masks_dilated[:, s_f:e_f].size()
-                    pred_flows_bi_sub = (pred_flows_bi[0][:, s_f:e_f - 1], pred_flows_bi[1][:, s_f:e_f - 1])
-                    prop_imgs_sub, updated_local_masks_sub = self.model.img_propagation(masked_frames[:, s_f:e_f],
-                                                                                        pred_flows_bi_sub,
-                                                                                        masks_dilated[:, s_f:e_f],
-                                                                                        'nearest')
-                    updated_frames_sub = frames[:, s_f:e_f] * (1 - masks_dilated[:, s_f:e_f]) + prop_imgs_sub.view(b, t, 3, h, w) * masks_dilated[:, s_f:e_f]
-                    updated_masks_sub = updated_local_masks_sub.view(b, t, 1, h, w)
-                    updated_frames.append(updated_frames_sub[:, pad_len_s:e_f - s_f - pad_len_e])
-                    updated_masks.append(updated_masks_sub[:, pad_len_s:e_f - s_f - pad_len_e])
-                    torch.cuda.empty_cache()
-
-                updated_frames = torch.cat(updated_frames, dim=1)
-                updated_masks = torch.cat(updated_masks, dim=1)
-            else:
-                b, t, _, _, _ = masks_dilated.size()
-                prop_imgs, updated_local_masks = self.model.img_propagation(masked_frames, pred_flows_bi, masks_dilated,
-                                                                       'nearest')
-                updated_frames = frames * (1 - masks_dilated) + prop_imgs.view(b, t, 3, h, w) * masks_dilated
-                updated_masks = updated_local_masks.view(b, t, 1, h, w)
-                torch.cuda.empty_cache()
-
-        ori_frames = frames_inp
-        comp_frames = [None] * video_length
-
-        neighbor_stride = self.neighbor_length // 2
-        if video_length > self.sub_video_length:
-            ref_num = self.sub_video_length // self.ref_stride
-        else:
-            ref_num = -1
-
-        # ---- feature propagation + transformer ----
-        for f in range(0, video_length, neighbor_stride):
-            neighbor_ids = [
-                i for i in range(max(0, f - neighbor_stride),
-                                 min(video_length, f + neighbor_stride + 1))
-            ]
-            ref_ids = get_ref_index(f, neighbor_ids, video_length, self.ref_stride, ref_num)
-            selected_imgs = updated_frames[:, neighbor_ids + ref_ids, :, :, :]
-            selected_masks = masks_dilated[:, neighbor_ids + ref_ids, :, :, :]
-            selected_update_masks = updated_masks[:, neighbor_ids + ref_ids, :, :, :]
-            selected_pred_flows_bi = (
-                pred_flows_bi[0][:, neighbor_ids[:-1], :, :, :], pred_flows_bi[1][:, neighbor_ids[:-1], :, :, :])
-
-            with torch.no_grad():
-                # 1.0 indicates mask
-                l_t = len(neighbor_ids)
-                pred_img = self.model(selected_imgs, selected_pred_flows_bi, selected_masks, selected_update_masks, l_t)
-                pred_img = pred_img.view(-1, 3, h, w)
-                pred_img = (pred_img + 1) / 2
-                pred_img = pred_img.cpu().permute(0, 2, 3, 1).numpy() * 255
-                binary_masks = masks_dilated[0, neighbor_ids, :, :, :].cpu().permute(
-                    0, 2, 3, 1).numpy().astype(np.uint8)
-                for i in range(len(neighbor_ids)):
-                    idx = neighbor_ids[i]
-                    img = np.array(pred_img[i]).astype(np.uint8) * binary_masks[i] \
-                          + ori_frames[idx] * (1 - binary_masks[i])
-                    if comp_frames[idx] is None:
-                        comp_frames[idx] = img
-                    else:
-                        comp_frames[idx] = comp_frames[idx].astype(np.float32) * 0.5 + img.astype(np.float32) * 0.5
-                    comp_frames[idx] = comp_frames[idx].astype(np.uint8)
-            torch.cuda.empty_cache()
-        # save videos frame
-        comp_frames = [cv2.cvtColor(i, cv2.COLOR_RGB2BGR) for i in comp_frames]
-        return comp_frames
-
-    def __call__(self, input_frames: List[np.ndarray], input_mask: np.ndarray):
-        """
-        :param input_frames: 原视频帧
-        :param input_mask: 字幕区域mask
-        """
-        mask = input_mask[:, :, None]
-        H_ori, W_ori = mask.shape[:2]
-        H_ori = int(H_ori + 0.5)
-        W_ori = int(W_ori + 0.5)
-        # 确定去字幕的垂直高度部分
-        split_h = int(W_ori * 3 / 16)
-        inpaint_area = get_inpaint_area_by_mask(W_ori, H_ori, split_h, mask, multiple=8)
-        # 初始化帧存储变量
-        # 高分辨率帧存储列表
-        frames_hr = [f.copy() for f in input_frames]
-        frames_scaled = {}  # 存放缩放后帧的字典
-        masks_scaled = {}  # 存放缩放后遮罩的字典
-        comps = {}  # 存放补全后帧的字典
-        # 存储最终的视频帧
-        inpainted_frames = []
-        for k in range(len(inpaint_area)):
-            frames_scaled[k] = []  # 为每个去除部分初始化一个列表
-            masks_scaled[k] = []  # 为每个去除部分初始化一个列表
-
-        # 读取并缩放帧
-        for j in range(len(frames_hr)):
-            image = frames_hr[j]
-            # 对每个去除部分进行切割和缩放
-            for k in range(len(inpaint_area)):
-                image_crop = image[inpaint_area[k][0]:inpaint_area[k][1], inpaint_area[k][2]:inpaint_area[k][3], :]  # 切割
-                mask_crop = mask[inpaint_area[k][0]:inpaint_area[k][1], inpaint_area[k][2]:inpaint_area[k][3], :]  # 切割
-                frames_scaled[k].append(image_crop)  # 将缩放后的帧添加到对应列表
-                masks_scaled[k].append(mask_crop)  # 将缩放后的遮罩添加到对应列表
-
-        # 处理每一个去除部分
-        for k in range(len(inpaint_area)):
-            # 调用inpaint函数进行处理
-            comps[k] = self.inpaint(frames_scaled[k], masks_scaled[k][0])
-            del frames_scaled[k], masks_scaled[k]
-            gc.collect()
-
-        # 如果存在去除部分
-        if inpaint_area:
-            for j in range(len(frames_hr)):
-                frame = frames_hr[j]  # 取出原始帧
-                # 对于模式中的每一个段落
-                for k in range(len(inpaint_area)):
-                    comp = comps[k][j]  # 获取补全后的帧
-                    # 实现遮罩区域内的图像融合
-                    frame[inpaint_area[k][0]:inpaint_area[k][1], inpaint_area[k][2]:inpaint_area[k][3], :] = comp
-                # 将最终帧添加到列表
-                inpainted_frames.append(frame)
-                # print(f'processing frame, {len(frames_hr) - j} left')
-        else:
-            inpainted_frames = frames_hr
-        return inpainted_frames
-
-
-def read_frames(v_path):
-    video_cap = cv2.VideoCapture(v_path)
-    video_frames = []
-    while True:
-        ret, frame = video_cap.read()
-        if not ret:
-            break
-        video_frames.append(frame)
-    video_frames = [Image.fromarray(f) for f in video_frames]
-    return video_frames
-
-
-if __name__ == '__main__':
-    # PropainterInpaint
-    propainter_inpaint = PropainterInpaint(get_device(), ModelConfig().PROPAINTER_MODEL_DIR, sub_video_length=80)
-    frames = read_frames('/home/yao/Documents/Project/video-subtitle-remover/local_test/test1.mp4')
-    mask = cv2.imread('/home/yao/Documents/Project/video-subtitle-remover/local_test/test1_mask.png')
-    inpainted_frames = propainter_inpaint.inpaint(frames, mask)
-    save_root = '/home/yao/Documents/Project/video-subtitle-remover/local_test/'
-    video_out_path = os.path.join(save_root, 'inpaint_out.mp4')
-    print("size: ", inpainted_frames[0].shape)
-    video_writer = cv2.VideoWriter(video_out_path, cv2.VideoWriter_fourcc(*'mp4v'), 24, (640, 360))
-    for comp_frame in inpainted_frames:
-        video_writer.write(comp_frame)
-    video_writer.release()
-    print(f'\nAll results are saved in {save_root}')
-
diff --git a/backend/inpaint/video/model/propainter.py b/backend/inpaint/video/model/propainter.py
deleted file mode 100644
index a83ed3d..0000000
--- a/backend/inpaint/video/model/propainter.py
+++ /dev/null
@@ -1,539 +0,0 @@
-''' Towards An End-to-End Framework for Video Inpainting
-'''
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import torchvision
-
-from einops import rearrange
-
-from backend.inpaint.video.model.modules.base_module import BaseNetwork
-from backend.inpaint.video.model.modules.sparse_transformer import TemporalSparseTransformerBlock, SoftSplit, SoftComp
-from backend.inpaint.video.model.modules.spectral_norm import spectral_norm as _spectral_norm
-from backend.inpaint.video.model.modules.flow_loss_utils import flow_warp
-from backend.inpaint.video.model.modules.deformconv import ModulatedDeformConv2d
-
-from .misc import constant_init
-
-
-def length_sq(x):
-    return torch.sum(torch.square(x), dim=1, keepdim=True)
-
-
-def fbConsistencyCheck(flow_fw, flow_bw, alpha1=0.01, alpha2=0.5):
-    flow_bw_warped = flow_warp(flow_bw, flow_fw.permute(0, 2, 3, 1))  # wb(wf(x))
-    flow_diff_fw = flow_fw + flow_bw_warped  # wf + wb(wf(x))
-
-    mag_sq_fw = length_sq(flow_fw) + length_sq(flow_bw_warped)  # |wf| + |wb(wf(x))|
-    occ_thresh_fw = alpha1 * mag_sq_fw + alpha2
-
-    # fb_valid_fw = (length_sq(flow_diff_fw) < occ_thresh_fw).float()
-    fb_valid_fw = (length_sq(flow_diff_fw) < occ_thresh_fw).to(flow_fw)
-    return fb_valid_fw
-
-
-class DeformableAlignment(ModulatedDeformConv2d):
-    """Second-order deformable alignment module."""
-
-    def __init__(self, *args, **kwargs):
-        # self.max_residue_magnitude = kwargs.pop('max_residue_magnitude', 10)
-        self.max_residue_magnitude = kwargs.pop('max_residue_magnitude', 3)
-
-        super(DeformableAlignment, self).__init__(*args, **kwargs)
-
-        self.conv_offset = nn.Sequential(
-            nn.Conv2d(2 * self.out_channels + 2 + 1 + 2, self.out_channels, 3, 1, 1),
-            nn.LeakyReLU(negative_slope=0.1, inplace=True),
-            nn.Conv2d(self.out_channels, self.out_channels, 3, 1, 1),
-            nn.LeakyReLU(negative_slope=0.1, inplace=True),
-            nn.Conv2d(self.out_channels, self.out_channels, 3, 1, 1),
-            nn.LeakyReLU(negative_slope=0.1, inplace=True),
-            nn.Conv2d(self.out_channels, 27 * self.deform_groups, 3, 1, 1),
-        )
-        self.init_offset()
-
-    def init_offset(self):
-        constant_init(self.conv_offset[-1], val=0, bias=0)
-
-    def forward(self, x, cond_feat, flow):
-        out = self.conv_offset(cond_feat)
-        o1, o2, mask = torch.chunk(out, 3, dim=1)
-
-        # offset
-        offset = self.max_residue_magnitude * torch.tanh(torch.cat((o1, o2), dim=1))
-        offset = offset + flow.flip(1).repeat(1, offset.size(1) // 2, 1, 1)
-
-        # mask
-        mask = torch.sigmoid(mask)
-
-        return torchvision.ops.deform_conv2d(x, offset, self.weight, self.bias,
-                                             self.stride, self.padding,
-                                             self.dilation, mask)
-
-
-class BidirectionalPropagation(nn.Module):
-    def __init__(self, channel, learnable=True):
-        super(BidirectionalPropagation, self).__init__()
-        self.deform_align = nn.ModuleDict()
-        self.backbone = nn.ModuleDict()
-        self.channel = channel
-        self.prop_list = ['backward_1', 'forward_1']
-        self.learnable = learnable
-
-        if self.learnable:
-            for i, module in enumerate(self.prop_list):
-                self.deform_align[module] = DeformableAlignment(
-                    channel, channel, 3, padding=1, deform_groups=16)
-
-                self.backbone[module] = nn.Sequential(
-                    nn.Conv2d(2 * channel + 2, channel, 3, 1, 1),
-                    nn.LeakyReLU(negative_slope=0.2, inplace=True),
-                    nn.Conv2d(channel, channel, 3, 1, 1),
-                )
-
-            self.fuse = nn.Sequential(
-                nn.Conv2d(2 * channel + 2, channel, 3, 1, 1),
-                nn.LeakyReLU(negative_slope=0.2, inplace=True),
-                nn.Conv2d(channel, channel, 3, 1, 1),
-            )
-
-    def binary_mask(self, mask, th=0.1):
-        mask[mask > th] = 1
-        mask[mask <= th] = 0
-        # return mask.float()
-        return mask.to(mask)
-
-    def forward(self, x, flows_forward, flows_backward, mask, interpolation='bilinear'):
-        """
-        x shape : [b, t, c, h, w]
-        return [b, t, c, h, w]
-        """
-
-        # For backward warping
-        # pred_flows_forward for backward feature propagation
-        # pred_flows_backward for forward feature propagation
-        b, t, c, h, w = x.shape
-        feats, masks = {}, {}
-        feats['input'] = [x[:, i, :, :, :] for i in range(0, t)]
-        masks['input'] = [mask[:, i, :, :, :] for i in range(0, t)]
-
-        prop_list = ['backward_1', 'forward_1']
-        cache_list = ['input'] + prop_list
-
-        for p_i, module_name in enumerate(prop_list):
-            feats[module_name] = []
-            masks[module_name] = []
-
-            if 'backward' in module_name:
-                frame_idx = range(0, t)
-                frame_idx = frame_idx[::-1]
-                flow_idx = frame_idx
-                flows_for_prop = flows_forward
-                flows_for_check = flows_backward
-            else:
-                frame_idx = range(0, t)
-                flow_idx = range(-1, t - 1)
-                flows_for_prop = flows_backward
-                flows_for_check = flows_forward
-
-            for i, idx in enumerate(frame_idx):
-                feat_current = feats[cache_list[p_i]][idx]
-                mask_current = masks[cache_list[p_i]][idx]
-
-                if i == 0:
-                    feat_prop = feat_current
-                    mask_prop = mask_current
-                else:
-                    flow_prop = flows_for_prop[:, flow_idx[i], :, :, :]
-                    flow_check = flows_for_check[:, flow_idx[i], :, :, :]
-                    flow_vaild_mask = fbConsistencyCheck(flow_prop, flow_check)
-                    feat_warped = flow_warp(feat_prop, flow_prop.permute(0, 2, 3, 1), interpolation)
-
-                    if self.learnable:
-                        cond = torch.cat([feat_current, feat_warped, flow_prop, flow_vaild_mask, mask_current], dim=1)
-                        feat_prop = self.deform_align[module_name](feat_prop, cond, flow_prop)
-                        mask_prop = mask_current
-                    else:
-                        mask_prop_valid = flow_warp(mask_prop, flow_prop.permute(0, 2, 3, 1))
-                        mask_prop_valid = self.binary_mask(mask_prop_valid)
-
-                        union_vaild_mask = self.binary_mask(mask_current * flow_vaild_mask * (1 - mask_prop_valid))
-                        feat_prop = union_vaild_mask * feat_warped + (1 - union_vaild_mask) * feat_current
-                        # update mask
-                        mask_prop = self.binary_mask(mask_current * (1 - (flow_vaild_mask * (1 - mask_prop_valid))))
-
-                # refine
-                if self.learnable:
-                    feat = torch.cat([feat_current, feat_prop, mask_current], dim=1)
-                    feat_prop = feat_prop + self.backbone[module_name](feat)
-                    # feat_prop = self.backbone[module_name](feat_prop)
-
-                feats[module_name].append(feat_prop)
-                masks[module_name].append(mask_prop)
-
-            # end for
-            if 'backward' in module_name:
-                feats[module_name] = feats[module_name][::-1]
-                masks[module_name] = masks[module_name][::-1]
-
-        outputs_b = torch.stack(feats['backward_1'], dim=1).view(-1, c, h, w)
-        outputs_f = torch.stack(feats['forward_1'], dim=1).view(-1, c, h, w)
-
-        if self.learnable:
-            mask_in = mask.view(-1, 2, h, w)
-            masks_b, masks_f = None, None
-            outputs = self.fuse(torch.cat([outputs_b, outputs_f, mask_in], dim=1)) + x.view(-1, c, h, w)
-        else:
-            masks_b = torch.stack(masks['backward_1'], dim=1)
-            masks_f = torch.stack(masks['forward_1'], dim=1)
-            outputs = outputs_f
-
-        return outputs_b.view(b, -1, c, h, w), outputs_f.view(b, -1, c, h, w), \
-            outputs.view(b, -1, c, h, w), masks_f
-
-
-class Encoder(nn.Module):
-    def __init__(self):
-        super(Encoder, self).__init__()
-        self.group = [1, 2, 4, 8, 1]
-        self.layers = nn.ModuleList([
-            nn.Conv2d(5, 64, kernel_size=3, stride=2, padding=1),
-            nn.LeakyReLU(0.2, inplace=True),
-            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
-            nn.LeakyReLU(0.2, inplace=True),
-            nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),
-            nn.LeakyReLU(0.2, inplace=True),
-            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
-            nn.LeakyReLU(0.2, inplace=True),
-            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1, groups=1),
-            nn.LeakyReLU(0.2, inplace=True),
-            nn.Conv2d(640, 512, kernel_size=3, stride=1, padding=1, groups=2),
-            nn.LeakyReLU(0.2, inplace=True),
-            nn.Conv2d(768, 384, kernel_size=3, stride=1, padding=1, groups=4),
-            nn.LeakyReLU(0.2, inplace=True),
-            nn.Conv2d(640, 256, kernel_size=3, stride=1, padding=1, groups=8),
-            nn.LeakyReLU(0.2, inplace=True),
-            nn.Conv2d(512, 128, kernel_size=3, stride=1, padding=1, groups=1),
-            nn.LeakyReLU(0.2, inplace=True)
-        ])
-
-    def forward(self, x):
-        bt, c, _, _ = x.size()
-        # h, w = h//4, w//4
-        out = x
-        for i, layer in enumerate(self.layers):
-            if i == 8:
-                x0 = out
-                _, _, h, w = x0.size()
-            if i > 8 and i % 2 == 0:
-                g = self.group[(i - 8) // 2]
-                x = x0.view(bt, g, -1, h, w)
-                o = out.view(bt, g, -1, h, w)
-                out = torch.cat([x, o], 2).view(bt, -1, h, w)
-            out = layer(out)
-        return out
-
-
-class deconv(nn.Module):
-    def __init__(self,
-                 input_channel,
-                 output_channel,
-                 kernel_size=3,
-                 padding=0):
-        super().__init__()
-        self.conv = nn.Conv2d(input_channel,
-                              output_channel,
-                              kernel_size=kernel_size,
-                              stride=1,
-                              padding=padding)
-
-    def forward(self, x):
-        x = F.interpolate(x,
-                          scale_factor=2,
-                          mode='bilinear',
-                          align_corners=True)
-        return self.conv(x)
-
-
-class InpaintGenerator(BaseNetwork):
-    def __init__(self, init_weights=True, model_path=None):
-        super(InpaintGenerator, self).__init__()
-        channel = 128
-        hidden = 512
-
-        # encoder
-        self.encoder = Encoder()
-
-        # decoder
-        self.decoder = nn.Sequential(
-            deconv(channel, 128, kernel_size=3, padding=1),
-            nn.LeakyReLU(0.2, inplace=True),
-            nn.Conv2d(128, 64, kernel_size=3, stride=1, padding=1),
-            nn.LeakyReLU(0.2, inplace=True),
-            deconv(64, 64, kernel_size=3, padding=1),
-            nn.LeakyReLU(0.2, inplace=True),
-            nn.Conv2d(64, 3, kernel_size=3, stride=1, padding=1))
-
-        # soft split and soft composition
-        kernel_size = (7, 7)
-        padding = (3, 3)
-        stride = (3, 3)
-        t2t_params = {
-            'kernel_size': kernel_size,
-            'stride': stride,
-            'padding': padding
-        }
-        self.ss = SoftSplit(channel, hidden, kernel_size, stride, padding)
-        self.sc = SoftComp(channel, hidden, kernel_size, stride, padding)
-        self.max_pool = nn.MaxPool2d(kernel_size, stride, padding)
-
-        # feature propagation module
-        self.img_prop_module = BidirectionalPropagation(3, learnable=False)
-        self.feat_prop_module = BidirectionalPropagation(128, learnable=True)
-
-        depths = 8
-        num_heads = 4
-        window_size = (5, 9)
-        pool_size = (4, 4)
-        self.transformers = TemporalSparseTransformerBlock(dim=hidden,
-                                                           n_head=num_heads,
-                                                           window_size=window_size,
-                                                           pool_size=pool_size,
-                                                           depths=depths,
-                                                           t2t_params=t2t_params)
-        if init_weights:
-            self.init_weights()
-
-        if model_path is not None:
-            print('Pretrained ProPainter has loaded...')
-            ckpt = torch.load(model_path, map_location='cpu')
-            self.load_state_dict(ckpt, strict=True)
-
-        # print network parameter number
-        self.print_network()
-
-    def img_propagation(self, masked_frames, completed_flows, masks, interpolation='nearest'):
-        _, _, prop_frames, updated_masks = self.img_prop_module(masked_frames, completed_flows[0], completed_flows[1],
-                                                                masks, interpolation)
-        return prop_frames, updated_masks
-
-    def forward(self, masked_frames, completed_flows, masks_in, masks_updated, num_local_frames,
-                interpolation='bilinear', t_dilation=2):
-        """
-        Args:
-            masks_in: original mask
-            masks_updated: updated mask after image propagation
-        """
-
-        l_t = num_local_frames
-        b, t, _, ori_h, ori_w = masked_frames.size()
-
-        # extracting features
-        enc_feat = self.encoder(torch.cat([masked_frames.view(b * t, 3, ori_h, ori_w),
-                                           masks_in.view(b * t, 1, ori_h, ori_w),
-                                           masks_updated.view(b * t, 1, ori_h, ori_w)], dim=1))
-        _, c, h, w = enc_feat.size()
-        local_feat = enc_feat.view(b, t, c, h, w)[:, :l_t, ...]
-        ref_feat = enc_feat.view(b, t, c, h, w)[:, l_t:, ...]
-        fold_feat_size = (h, w)
-
-        ds_flows_f = F.interpolate(completed_flows[0].view(-1, 2, ori_h, ori_w), scale_factor=1 / 4, mode='bilinear',
-                                   align_corners=False).view(b, l_t - 1, 2, h, w) / 4.0
-        ds_flows_b = F.interpolate(completed_flows[1].view(-1, 2, ori_h, ori_w), scale_factor=1 / 4, mode='bilinear',
-                                   align_corners=False).view(b, l_t - 1, 2, h, w) / 4.0
-        ds_mask_in = F.interpolate(masks_in.reshape(-1, 1, ori_h, ori_w), scale_factor=1 / 4, mode='nearest').view(b, t,
-                                                                                                                   1, h,
-                                                                                                                   w)
-        ds_mask_in_local = ds_mask_in[:, :l_t]
-        ds_mask_updated_local = F.interpolate(masks_updated[:, :l_t].reshape(-1, 1, ori_h, ori_w), scale_factor=1 / 4,
-                                              mode='nearest').view(b, l_t, 1, h, w)
-
-        if self.training:
-            mask_pool_l = self.max_pool(ds_mask_in.view(-1, 1, h, w))
-            mask_pool_l = mask_pool_l.view(b, t, 1, mask_pool_l.size(-2), mask_pool_l.size(-1))
-        else:
-            mask_pool_l = self.max_pool(ds_mask_in_local.view(-1, 1, h, w))
-            mask_pool_l = mask_pool_l.view(b, l_t, 1, mask_pool_l.size(-2), mask_pool_l.size(-1))
-
-        prop_mask_in = torch.cat([ds_mask_in_local, ds_mask_updated_local], dim=2)
-        _, _, local_feat, _ = self.feat_prop_module(local_feat, ds_flows_f, ds_flows_b, prop_mask_in, interpolation)
-        enc_feat = torch.cat((local_feat, ref_feat), dim=1)
-
-        trans_feat = self.ss(enc_feat.view(-1, c, h, w), b, fold_feat_size)
-        mask_pool_l = rearrange(mask_pool_l, 'b t c h w -> b t h w c').contiguous()
-        trans_feat = self.transformers(trans_feat, fold_feat_size, mask_pool_l, t_dilation=t_dilation)
-        trans_feat = self.sc(trans_feat, t, fold_feat_size)
-        trans_feat = trans_feat.view(b, t, -1, h, w)
-
-        enc_feat = enc_feat + trans_feat
-
-        if self.training:
-            output = self.decoder(enc_feat.view(-1, c, h, w))
-            output = torch.tanh(output).view(b, t, 3, ori_h, ori_w)
-        else:
-            output = self.decoder(enc_feat[:, :l_t].view(-1, c, h, w))
-            output = torch.tanh(output).view(b, l_t, 3, ori_h, ori_w)
-
-        return output
-
-
-# ######################################################################
-#  Discriminator for Temporal Patch GAN
-# ######################################################################
-class Discriminator(BaseNetwork):
-    def __init__(self,
-                 in_channels=3,
-                 use_sigmoid=False,
-                 use_spectral_norm=True,
-                 init_weights=True):
-        super(Discriminator, self).__init__()
-        self.use_sigmoid = use_sigmoid
-        nf = 32
-
-        self.conv = nn.Sequential(
-            spectral_norm(
-                nn.Conv3d(in_channels=in_channels,
-                          out_channels=nf * 1,
-                          kernel_size=(3, 5, 5),
-                          stride=(1, 2, 2),
-                          padding=1,
-                          bias=not use_spectral_norm), use_spectral_norm),
-            # nn.InstanceNorm2d(64, track_running_stats=False),
-            nn.LeakyReLU(0.2, inplace=True),
-            spectral_norm(
-                nn.Conv3d(nf * 1,
-                          nf * 2,
-                          kernel_size=(3, 5, 5),
-                          stride=(1, 2, 2),
-                          padding=(1, 2, 2),
-                          bias=not use_spectral_norm), use_spectral_norm),
-            # nn.InstanceNorm2d(128, track_running_stats=False),
-            nn.LeakyReLU(0.2, inplace=True),
-            spectral_norm(
-                nn.Conv3d(nf * 2,
-                          nf * 4,
-                          kernel_size=(3, 5, 5),
-                          stride=(1, 2, 2),
-                          padding=(1, 2, 2),
-                          bias=not use_spectral_norm), use_spectral_norm),
-            # nn.InstanceNorm2d(256, track_running_stats=False),
-            nn.LeakyReLU(0.2, inplace=True),
-            spectral_norm(
-                nn.Conv3d(nf * 4,
-                          nf * 4,
-                          kernel_size=(3, 5, 5),
-                          stride=(1, 2, 2),
-                          padding=(1, 2, 2),
-                          bias=not use_spectral_norm), use_spectral_norm),
-            # nn.InstanceNorm2d(256, track_running_stats=False),
-            nn.LeakyReLU(0.2, inplace=True),
-            spectral_norm(
-                nn.Conv3d(nf * 4,
-                          nf * 4,
-                          kernel_size=(3, 5, 5),
-                          stride=(1, 2, 2),
-                          padding=(1, 2, 2),
-                          bias=not use_spectral_norm), use_spectral_norm),
-            # nn.InstanceNorm2d(256, track_running_stats=False),
-            nn.LeakyReLU(0.2, inplace=True),
-            nn.Conv3d(nf * 4,
-                      nf * 4,
-                      kernel_size=(3, 5, 5),
-                      stride=(1, 2, 2),
-                      padding=(1, 2, 2)))
-
-        if init_weights:
-            self.init_weights()
-
-    def forward(self, xs):
-        # T, C, H, W = xs.shape (old)
-        # B, T, C, H, W (new)
-        xs_t = torch.transpose(xs, 1, 2)
-        feat = self.conv(xs_t)
-        if self.use_sigmoid:
-            feat = torch.sigmoid(feat)
-        out = torch.transpose(feat, 1, 2)  # B, T, C, H, W
-        return out
-
-
-class Discriminator_2D(BaseNetwork):
-    def __init__(self,
-                 in_channels=3,
-                 use_sigmoid=False,
-                 use_spectral_norm=True,
-                 init_weights=True):
-        super(Discriminator_2D, self).__init__()
-        self.use_sigmoid = use_sigmoid
-        nf = 32
-
-        self.conv = nn.Sequential(
-            spectral_norm(
-                nn.Conv3d(in_channels=in_channels,
-                          out_channels=nf * 1,
-                          kernel_size=(1, 5, 5),
-                          stride=(1, 2, 2),
-                          padding=(0, 2, 2),
-                          bias=not use_spectral_norm), use_spectral_norm),
-            # nn.InstanceNorm2d(64, track_running_stats=False),
-            nn.LeakyReLU(0.2, inplace=True),
-            spectral_norm(
-                nn.Conv3d(nf * 1,
-                          nf * 2,
-                          kernel_size=(1, 5, 5),
-                          stride=(1, 2, 2),
-                          padding=(0, 2, 2),
-                          bias=not use_spectral_norm), use_spectral_norm),
-            # nn.InstanceNorm2d(128, track_running_stats=False),
-            nn.LeakyReLU(0.2, inplace=True),
-            spectral_norm(
-                nn.Conv3d(nf * 2,
-                          nf * 4,
-                          kernel_size=(1, 5, 5),
-                          stride=(1, 2, 2),
-                          padding=(0, 2, 2),
-                          bias=not use_spectral_norm), use_spectral_norm),
-            # nn.InstanceNorm2d(256, track_running_stats=False),
-            nn.LeakyReLU(0.2, inplace=True),
-            spectral_norm(
-                nn.Conv3d(nf * 4,
-                          nf * 4,
-                          kernel_size=(1, 5, 5),
-                          stride=(1, 2, 2),
-                          padding=(0, 2, 2),
-                          bias=not use_spectral_norm), use_spectral_norm),
-            # nn.InstanceNorm2d(256, track_running_stats=False),
-            nn.LeakyReLU(0.2, inplace=True),
-            spectral_norm(
-                nn.Conv3d(nf * 4,
-                          nf * 4,
-                          kernel_size=(1, 5, 5),
-                          stride=(1, 2, 2),
-                          padding=(0, 2, 2),
-                          bias=not use_spectral_norm), use_spectral_norm),
-            # nn.InstanceNorm2d(256, track_running_stats=False),
-            nn.LeakyReLU(0.2, inplace=True),
-            nn.Conv3d(nf * 4,
-                      nf * 4,
-                      kernel_size=(1, 5, 5),
-                      stride=(1, 2, 2),
-                      padding=(0, 2, 2)))
-
-        if init_weights:
-            self.init_weights()
-
-    def forward(self, xs):
-        # T, C, H, W = xs.shape (old)
-        # B, T, C, H, W (new)
-        xs_t = torch.transpose(xs, 1, 2)
-        feat = self.conv(xs_t)
-        if self.use_sigmoid:
-            feat = torch.sigmoid(feat)
-        out = torch.transpose(feat, 1, 2)  # B, T, C, H, W
-        return out
-
-
-def spectral_norm(module, mode=True):
-    if mode:
-        return _spectral_norm(module)
-    return module
diff --git a/backend/interface/ch.ini b/backend/interface/ch.ini
index ecd60cd..e8e3da2 100644
--- a/backend/interface/ch.ini
+++ b/backend/interface/ch.ini
@@ -11,7 +11,6 @@ BasicSetting = 基础设置
 AdvancedSetting = 高级设置
 SubtitleDetectionSetting = 字幕检测设置
 SttnSetting = STTN设置
-ProPainterSetting = ProPainter设置
 AboutSetting = 关于
 HardwareAcceleration = 硬件加速
 HardwareAccelerationDesc = 使用GPU或ONNX后端进行加速处理
@@ -36,8 +35,6 @@ SttnReferenceLength = 参考帧数量
 SttnReferenceLengthDesc = 默认为10
 SttnMaxLoadNum = 最大同时处理的帧数量
 SttnMaxLoadNumDesc = 设置越大处理效果越好，但是要求显存越高，默认为50
-PropainterMaxLoadNum = 最大同时处理的帧数量
-PropainterMaxLoadNumDesc = 设置越大处理效果越好，但是要求显存越高，默认为70
 CheckUpdateOnStartup = 在应用程序启动时检查更新
 CheckUpdateOnStartupDesc = 新版本将更加稳定, 并拥有更多功能(建议启用此选项)
 UpdatesAvailableTitle = 有可用更新
@@ -67,7 +64,6 @@ SelectSubtitleArea = 请在视频预览中框选处理区域: {}
 InpaintModeDesc = STTN智能擦除, 对于真人视频效果较好，速度快, 智能擦除(最低4GB显存)
     STTN字幕检测 带字幕检测版, 无智能擦除(最低4GB显存)
     LAMA: 对于动画类视频效果好，速度一般(显存要求较低)
-    ProPainter: 需要消耗大量显存，速度较慢，对运动非常剧烈的视频效果较好(最低8GB显存)
     OpenCV: 极速模式, 不保证inpaint效果，仅仅对包含文本的区域文本进行去除(显存要求较低)
 SubtitleDetectMode = 字幕检测
 ErrorDuringProcessing = 处理过程中发生错误: {}
@@ -122,7 +118,6 @@ RequestError = 尝试访问 {} 失败, 原因: {}
 SttnAuto = STTN智能擦除
 SttnDet = STTN字幕检测
 LAMA = LAMA
-ProPainter = ProPainter
 OpenCV = OpenCV
 
 [SubtitleDetectMode]
diff --git a/backend/interface/chinese_cht.ini b/backend/interface/chinese_cht.ini
index b4a70cb..273cd73 100644
--- a/backend/interface/chinese_cht.ini
+++ b/backend/interface/chinese_cht.ini
@@ -11,7 +11,6 @@ BasicSetting = 基礎設定
 AdvancedSetting = 進階設定  
 SubtitleDetectionSetting = 字幕檢測設定  
 SttnSetting = STTN設定  
-ProPainterSetting = ProPainter設定  
 AboutSetting = 關於  
 HardwareAcceleration = 硬體加速  
 HardwareAccelerationDesc = 使用GPU或ONNX後端進行加速處理  
@@ -36,8 +35,6 @@ SttnReferenceLength = 參考影格數量
 SttnReferenceLengthDesc = 預設為10  
 SttnMaxLoadNum = 最大同時處理的影格數量  
 SttnMaxLoadNumDesc = 數值越大處理效果越好，但需更高顯示記憶體，預設為50  
-PropainterMaxLoadNum = 最大同時處理的影格數量  
-PropainterMaxLoadNumDesc = 數值越大處理效果越好，但需更高顯示記憶體，預設為70  
 CheckUpdateOnStartup = 在應用程式啟動時檢查更新  
 CheckUpdateOnStartupDesc = 新版本將更穩定並提供更多功能（建議啟用此選項）  
 UpdatesAvailableTitle = 有可用更新  
@@ -64,10 +61,9 @@ VideoPreview = 影片預覽
 InterfaceLanguage = 介面語言  
 InpaintMode = 處理模型  
 SelectSubtitleArea = 請在影片預覽中框選處理區域: {}
-InpaintModeDesc = STTN智能擦除，對於真人視頻效果較好，速度快，智能擦除（最低4GB顯存） 
-    STTN字幕檢測 帶字幕檢測版，無智能擦除（最低4GB顯存） 
-    LAMA：對於動畫類視頻效果好，速度一般（顯存要求較低） 
-    ProPainter：需要消耗大量顯存，速度較慢，對運動非常劇烈的視頻效果較好（最低8GB顯存） 
+InpaintModeDesc = STTN智能擦除，對於真人視頻效果較好，速度快，智能擦除（最低4GB顯存）
+    STTN字幕檢測 帶字幕檢測版，無智能擦除（最低4GB顯存）
+    LAMA：對於動畫類視頻效果好，速度一般（顯存要求較低）
     OpenCV：極速模式，不保證inpaint效果，僅僅對包含文本的區域文本進行去除（顯存要求較低）  
 SubtitleDetectMode = 字幕檢測模式 
 ErrorDuringProcessing = 處理過程中發生錯誤: {} 
@@ -118,11 +114,10 @@ TargetFileNotFound = 檔案尚未生成，請先等待任務完成
 VersionInfo = 當前版本: {} 最新版本: {}  
 RequestError = 嘗試存取 {} 失敗，原因: {}  
 
-[InpaintMode]  
-SttnAuto = STTN智慧擦除  
-SttnDet = STTN字幕檢測  
-LAMA = LAMA  
-ProPainter = ProPainter  
+[InpaintMode]
+SttnAuto = STTN智慧擦除
+SttnDet = STTN字幕檢測
+LAMA = LAMA
 OpenCV = OpenCV
 
 [SubtitleDetectMode]
diff --git a/backend/interface/en.ini b/backend/interface/en.ini
index c5499f9..17db9eb 100644
--- a/backend/interface/en.ini
+++ b/backend/interface/en.ini
@@ -7,11 +7,10 @@ CopyrightTitle = About
 CopyrightDesc = © Copyright 2023, YaoFANGUK, Jason Eric (UI Design), Current Version: {}  
 ProjectLinkTitle = Subtitle Remover  
 ProjectLinkDesc = AI-based image/video hard subtitle removal and text watermark removal, generating output files with original resolution. No third-party API required, locally implemented.  
-BasicSetting = Basic Settings  
-AdvancedSetting = Advanced Settings  
-SubtitleDetectionSetting = Subtitle Detection Settings  
-SttnSetting = STTN Settings  
-ProPainterSetting = ProPainter Settings  
+BasicSetting = Basic Settings
+AdvancedSetting = Advanced Settings
+SubtitleDetectionSetting = Subtitle Detection Settings
+SttnSetting = STTN Settings
 AboutSetting = About  
 HardwareAcceleration = Hardware Acceleration  
 HardwareAccelerationDesc = Accelerate processing using GPU or ONNX backend  
@@ -34,10 +33,8 @@ SttnNeighborStride = Reference Frame Stride
 SttnNeighborStrideDesc = Default: 5  
 SttnReferenceLength = Reference Frame Count  
 SttnReferenceLengthDesc = Default: 10  
-SttnMaxLoadNum = Max Concurrent Processing Frames  
-SttnMaxLoadNumDesc = Higher values improve quality but require more VRAM (default 50).  
-PropainterMaxLoadNum = Max Concurrent Processing Frames  
-PropainterMaxLoadNumDesc = Higher values improve quality but require more VRAM (default 70).  
+SttnMaxLoadNum = Max Concurrent Processing Frames
+SttnMaxLoadNumDesc = Higher values improve quality but require more VRAM (default 50).
 CheckUpdateOnStartup = Check Updates on Startup  
 CheckUpdateOnStartupDesc = New versions offer improved stability and features (recommended).  
 UpdatesAvailableTitle = Update Available  
@@ -67,7 +64,6 @@ SelectSubtitleArea = Select processing area in video preview: {}
 InpaintModeDesc = STTN Smart Inpainting: Best for real-person videos, fast speed, smart inpainting (minimum 4GB VRAM)
     STTN Subtitle Detection: With subtitle detection, no smart inpainting (minimum 4GB VRAM)
     LAMA: Good for animation videos, moderate speed (low VRAM requirement)
-    ProPainter: Consumes a lot of VRAM, slower speed, best for videos with intense motion (minimum 8GB VRAM)
     OpenCV: Ultra-fast mode, inpainting effect not guaranteed, only removes text in detected regions (low VRAM requirement)
 SubtitleDetectMode = Subtitle Detection  
 ErrorDuringProcessing = Error during processing: {}
@@ -122,7 +118,6 @@ RequestError = Failed to access {}. Reason: {}
 SttnAuto = STTN Smart Erase
 SttnDet = STTN Detection
 LAMA = LAMA
-ProPainter = ProPainter
 OpenCV = OpenCV
 
 [SubtitleDetectMode]
diff --git a/backend/interface/es.ini b/backend/interface/es.ini
index 96a087e..75e69fa 100644
--- a/backend/interface/es.ini
+++ b/backend/interface/es.ini
@@ -11,7 +11,6 @@ BasicSetting = Configuración básica
 AdvancedSetting = Configuración avanzada  
 SubtitleDetectionSetting = Detección de subtítulos  
 SttnSetting = Configuración STTN  
-ProPainterSetting = Configuración ProPainter  
 AboutSetting = Acerca de  
 HardwareAcceleration = Aceleración hardware  
 HardwareAccelerationDesc = Usar GPU o backend ONNX para acelerar el procesamiento  
@@ -36,8 +35,6 @@ SttnReferenceLength = Cantidad de referencias
 SttnReferenceLengthDesc = Valor predeterminado: 10  
 SttnMaxLoadNum = Máx. fotogramas simultáneos  
 SttnMaxLoadNumDesc = Mayor valor mejora calidad pero requiere más VRAM (valor predeterminado 50).  
-PropainterMaxLoadNum = Máx. fotogramas simultáneos  
-PropainterMaxLoadNumDesc = Mayor valor mejora calidad pero requiere más VRAM (valor predeterminado 70).  
 CheckUpdateOnStartup = Buscar actualizaciones al iniciar  
 CheckUpdateOnStartupDesc = Versiones nuevas ofrecen mejor estabilidad y funciones (recomendado).  
 UpdatesAvailableTitle = Actualización disponible  
@@ -67,7 +64,6 @@ SelectSubtitleArea = Selecciona área en vista previa: {}
 InpaintModeDesc = STTN Borrado inteligente: Mejor para videos de personas reales, velocidad rápida, borrado inteligente (mínimo 4GB de VRAM) 
     STTN Detección de subtítulos: Con detección de subtítulos, sin borrado inteligente (mínimo 4GB de VRAM) 
     LAMA: Bueno para videos animados, velocidad media (bajo requerimiento de VRAM) 
-    ProPainter: Consume mucha VRAM, velocidad lenta, mejor para videos con mucho movimiento (mínimo 8GB de VRAM) 
     OpenCV: Modo ultra rápido, el efecto de borrado no está garantizado, solo elimina texto en las áreas detectadas (bajo requerimiento de VRAM)
 SubtitleDetectMode = Detección de subtítulos  
 ErrorDuringProcessing = Error durante el procesamiento: {}
@@ -118,11 +114,10 @@ TargetFileNotFound = Archivo resultado no generado. Espera a completar.
 VersionInfo = Versión actual: {} Última versión: {}  
 RequestError = Error accediendo {}. Razón: {}  
 
-[InpaintMode]  
-SttnAuto = STTN borrado inteligente  
-SttnDet = STTN detección  
-LAMA = LAMA  
-ProPainter = ProPainter  
+[InpaintMode]
+SttnAuto = STTN borrado inteligente
+SttnDet = STTN detección
+LAMA = LAMA
 OpenCV = OpenCV
 
 [SubtitleDetectMode]
diff --git a/backend/interface/japan.ini b/backend/interface/japan.ini
index 15e57ea..15b129c 100644
--- a/backend/interface/japan.ini
+++ b/backend/interface/japan.ini
@@ -11,7 +11,6 @@ BasicSetting = 基本設定
 AdvancedSetting = 高度設定
 SubtitleDetectionSetting = 字幕検出設定
 SttnSetting = STTN設定
-ProPainterSetting = ProPainter設定
 AboutSetting = 情報
 HardwareAcceleration = ハードウェアアクセラレーション
 HardwareAccelerationDesc = GPUまたはONNXバックエンドを使用した高速処理
@@ -36,8 +35,6 @@ SttnReferenceLength = 参照フレーム数
 SttnReferenceLengthDesc = デフォルト: 10
 SttnMaxLoadNum = 最大同時処理フレーム数
 SttnMaxLoadNumDesc = 値が大きいほど高品質（VRAM要求増加、デフォルト50）
-PropainterMaxLoadNum = 最大同時処理フレーム数
-PropainterMaxLoadNumDesc = 値が大きいほど高品質（VRAM要求増加、デフォルト70）
 CheckUpdateOnStartup = 起動時アップデート確認
 CheckUpdateOnStartupDesc = 新バージョンは安定性/機能向上（推奨）
 UpdatesAvailableTitle = 利用可能なアップデート
@@ -67,7 +64,6 @@ SelectSubtitleArea = プレビューで処理領域を選択: {}
 InpaintModeDesc = STTNスマート消去：実写動画に最適、高速、スマート消去（最低4GB VRAM）
     STTN字幕検出：字幕検出付き、スマート消去なし（最低4GB VRAM）
     LAMA：アニメ動画に最適、速度は普通（VRAM要件低め）
-    ProPainter：大量のVRAMを消費、速度は遅い、激しい動きの動画に最適（最低8GB VRAM）
     OpenCV：超高速モード、消去効果は保証されません、検出されたテキスト領域のみ削除（VRAM要件低め）
 SubtitleDetectMode = 字幕検出
 ErrorDuringProcessing = 処理中にエラーが発生しました: {}
@@ -122,7 +118,6 @@ RequestError = {} へのアクセス失敗。理由: {}
 SttnAuto = STTNインテリジェント消去
 SttnDet = STTN字幕検出
 LAMA = LAMA
-ProPainter = ProPainter
 OpenCV = OpenCV
 
 [SubtitleDetectMode]
diff --git a/backend/interface/ko.ini b/backend/interface/ko.ini
index 8469570..68e06db 100644
--- a/backend/interface/ko.ini
+++ b/backend/interface/ko.ini
@@ -11,7 +11,6 @@ BasicSetting = 기본 설정
 AdvancedSetting = 고급 설정
 SubtitleDetectionSetting = 자막 감지 설정
 SttnSetting = STTN 설정
-ProPainterSetting = ProPainter 설정
 AboutSetting = 정보
 HardwareAcceleration = 하드웨어 가속
 HardwareAccelerationDesc = GPU 또는 ONNX 백엔드 사용 가속 처리
@@ -36,8 +35,6 @@ SttnReferenceLength = 참조 프레임 수
 SttnReferenceLengthDesc = 기본값: 10
 SttnMaxLoadNum = 최대 동시 처리 프레임
 SttnMaxLoadNumDesc = 값 클수록 품질 향상 (VRAM 요구 증가, 기본값 50)
-PropainterMaxLoadNum = 최대 동시 처리 프레임
-PropainterMaxLoadNumDesc = 값 클수록 품질 향상 (VRAM 요구 증가, 기본값 70)
 CheckUpdateOnStartup = 시작시 업데이트 확인
 CheckUpdateOnStartupDesc = 새 버전은 안정성/기능 개선 포함 (권장)
 UpdatesAvailableTitle = 업데이트 가능
@@ -67,7 +64,6 @@ SelectSubtitleArea = 미리보기에서 처리 영역 선택: {}
 InpaintModeDesc = STTN 스마트 지우기: 실제 인물 영상에 적합, 빠른 속도, 스마트 지우기(최소 4GB VRAM) 
     STTN 자막 감지: 자막 감지 버전, 스마트 지우기 없음(최소 4GB VRAM) 
     LAMA: 애니메이션 영상에 적합, 보통 속도(VRAM 요구량 낮음) 
-    ProPainter: 많은 VRAM 소모, 느린 속도, 격렬한 움직임 영상에 적합(최소 8GB VRAM) 
     OpenCV: 초고속 모드, 인페인트 효과 보장 안 됨, 텍스트 영역만 제거(VRAM 요구량 낮음)
 SubtitleDetectMode = 자막 감지
 ErrorDuringProcessing = 처리 중 오류: {}
@@ -122,7 +118,6 @@ RequestError = {} 접근 실패. 이유: {}
 SttnAuto = STTN 지능형 제거
 SttnDet = STTN 자막 감지
 LAMA = LAMA
-ProPainter = ProPainter
 OpenCV = OpenCV
 
 [SubtitleDetectMode]
diff --git a/backend/interface/vi.ini b/backend/interface/vi.ini
index 015e2b4..a4b89d0 100644
--- a/backend/interface/vi.ini
+++ b/backend/interface/vi.ini
@@ -11,7 +11,6 @@ BasicSetting = Cài đặt cơ bản
 AdvancedSetting = Cài đặt nâng cao
 SubtitleDetectionSetting = Cài đặt phát hiện phụ đề
 SttnSetting = Cài đặt STTN
-ProPainterSetting = Cài đặt ProPainter
 AboutSetting = Giới thiệu
 HardwareAcceleration = Tăng tốc phần cứng
 HardwareAccelerationDesc = Sử dụng GPU hoặc backend ONNX để tăng tốc xử lý
@@ -36,8 +35,6 @@ SttnReferenceLength = Số khung tham chiếu
 SttnReferenceLengthDesc = Mặc định: 10
 SttnMaxLoadNum = Số khung xử lý tối đa
 SttnMaxLoadNumDesc = Càng cao càng tốt (yêu cầu nhiều VRAM, mặc định 50)
-PropainterMaxLoadNum = Số khung xử lý tối đa
-PropainterMaxLoadNumDesc = Càng cao càng tốt (yêu cầu nhiều VRAM, mặc định 70)
 CheckUpdateOnStartup = Kiểm tra cập nhật khi khởi động
 CheckUpdateOnStartupDesc = Phiên bản mới ổn định hơn (khuyến nghị bật)
 UpdatesAvailableTitle = Có bản cập nhật
@@ -67,7 +64,6 @@ SelectSubtitleArea = Chọn vùng xử lý trong preview: {}
 InpaintModeDesc = STTN Xóa thông minh: Phù hợp cho video người thật, tốc độ nhanh, xóa thông minh (tối thiểu 4GB VRAM)
     STTN Phát hiện phụ đề: Có phát hiện phụ đề, không xóa thông minh (tối thiểu 4GB VRAM)
     LAMA: Phù hợp cho video hoạt hình, tốc độ trung bình (yêu cầu VRAM thấp)
-    ProPainter: Tiêu tốn nhiều VRAM, tốc độ chậm, phù hợp cho video chuyển động mạnh (tối thiểu 8GB VRAM)
     OpenCV: Chế độ siêu nhanh, không đảm bảo hiệu quả xóa, chỉ xóa vùng chứa văn bản (yêu cầu VRAM thấp)
 SubtitleDetectMode = Chế độ phát hiện
 ErrorDuringProcessing = Lỗi khi xử lý: {}
@@ -122,7 +118,6 @@ RequestError = Lỗi truy cập {}, lý do: {}
 SttnAuto = STTN xóa thông minh
 SttnDet = STTN phát hiện
 LAMA = LAMA
-ProPainter = ProPainter
 OpenCV = OpenCV
 
 [SubtitleDetectMode]
diff --git a/backend/main.py b/backend/main.py
index 4efc96b..b067f1c 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -19,7 +19,6 @@ from backend.inpaint.sttn_auto_inpaint import STTNAutoInpaint
 from backend.inpaint.sttn_det_inpaint import STTNDetInpaint
 from backend.inpaint.lama_inpaint import LamaInpaint
 from backend.inpaint.opencv_inpaint import OpenCVInpaint
-from backend.inpaint.propainter_inpaint import PropainterInpaint
 from backend.tools.inpaint_tools import create_mask, batch_generator, expand_frame_ranges
 from backend.tools.model_config import ModelConfig
 from backend.tools.ffmpeg_cli import FFmpegCLI
@@ -67,7 +66,6 @@ class SubtitleRemover:
         except Exception:
             self.video_writer = cv2.VideoWriter(get_readable_path(self.video_temp_file.name), cv2.VideoWriter_fourcc(*'mp4v'), self.fps, self.size)
         self.video_out_path = os.path.abspath(os.path.join(os.path.dirname(self.video_path), f'{self.vd_name}_no_sub.mp4'))
-        self.propainter_inpaint = None
         self.ext = os.path.splitext(vd_path)[-1]
         if self.is_picture:
             pic_dir = os.path.join(os.path.dirname(self.video_path), 'no_sub')
@@ -156,94 +154,6 @@ class SubtitleRemover:
         """
         pass
 
-    def propainter_mode(self, tbar):
-        sub_detector = SubtitleDetect(self.video_path, self.sub_areas)
-        sub_list = sub_detector.find_subtitle_frame_no(sub_remover=self)
-        if len(sub_list) == 0:
-            raise Exception(tr['Main']['NoSubtitleDetected'].format(self.video_path))
-        continuous_frame_no_list = sub_detector.find_continuous_ranges_with_same_mask(sub_list)
-        scene_div_points = sub_detector.get_scene_div_frame_no(self.video_path)
-        continuous_frame_no_list = sub_detector.split_range_by_scene(continuous_frame_no_list,
-                                                                          scene_div_points)
-        del sub_detector
-        gc.collect()        
-        device = self.hardware_accelerator.device if self.hardware_accelerator.has_cuda() else torch.device("cpu")
-        propainter_inpaint = PropainterInpaint(device, self.model_config.PROPAINTER_MODEL_DIR, config.propainterMaxLoadNum.value)
-        self.append_output(tr['Main']['ProcessingStartRemovingSubtitles'])
-        index = 0
-        # 使用帧预读取，I/O 与推理重叠
-        reader = FramePrefetcher(self.video_cap)
-        while True:
-            ret, frame = reader.read()
-            if not ret:
-                break
-            index += 1
-            # 如果当前帧没有水印/文本则直接写
-            if index not in sub_list.keys():
-                self.video_writer.write(frame)
-                # self.append_output(f'write frame: {index}')
-                self.update_progress(tbar, increment=1)
-                self.update_preview_with_comp(frame, frame)
-                continue
-            # 如果有水印，判断该帧是不是开头帧
-            else:
-                # 如果是开头帧，则批推理到尾帧
-                if self.is_current_frame_no_start(index, continuous_frame_no_list):
-                    # self.append_output(f'No 1 Current index: {index}')
-                    start_frame_no = index
-                    # self.append_output(f'find start: {start_frame_no}')
-                    # 找到结束帧
-                    end_frame_no = self.find_frame_no_end(index, continuous_frame_no_list)
-                    # 判断当前帧号是不是字幕起始位置
-                    # 如果获取的结束帧号不为-1则说明
-                    if end_frame_no != -1:
-                        # self.append_output(f'find end: {end_frame_no}')
-                        # ************ 读取该区间所有帧 start ************
-                        temp_frames = list()
-                        # 将头帧加入处理列表
-                        temp_frames.append(frame)
-                        inner_index = 0
-                        # 一直读取到尾帧
-                        while index < end_frame_no:
-                            ret, frame = reader.read()
-                            if not ret:
-                                break
-                            index += 1
-                            temp_frames.append(frame)
-                        # ************ 读取该区间所有帧 end ************
-                        if len(temp_frames) < 1:
-                            # 没有待处理，直接跳过
-                            continue
-                        elif len(temp_frames) == 1:
-                            inner_index += 1
-                            single_mask = create_mask(self.mask_size, sub_list[index])
-                            inpainted_frame = self.lama_inpaint.inpaint(frame, single_mask)
-                            self.video_writer.write(inpainted_frame)
-                            # self.append_output(f'write frame: {start_frame_no + inner_index} with mask {sub_list[start_frame_no]}')
-                            self.update_progress(tbar, increment=1)
-                            continue
-                        else:
-                            # 将读取的视频帧分批处理
-                            # 1. 获取当前批次使用的mask
-                            mask = create_mask(self.mask_size, sub_list[start_frame_no])
-                            for batch in batch_generator(temp_frames, config.propainterMaxLoadNum.value):
-                                # 2. 调用批推理
-                                if len(batch) == 1:
-                                    single_mask = create_mask(self.mask_size, sub_list[start_frame_no])
-                                    inpainted_frame = self.lama_inpaint.inpaint(frame, single_mask)
-                                    self.video_writer.write(inpainted_frame)
-                                    # self.append_output(f'write frame: {start_frame_no + inner_index} with mask {sub_list[start_frame_no]}')
-                                    inner_index += 1
-                                    self.update_progress(tbar, increment=1)
-                                elif len(batch) > 1:
-                                    inpainted_frames = propainter_inpaint(batch, mask)
-                                    for i, inpainted_frame in enumerate(inpainted_frames):
-                                        self.video_writer.write(inpainted_frame)
-                                        # self.append_output(f'write frame: {start_frame_no + inner_index} with mask {sub_list[index]}')
-                                        inner_index += 1
-                                        self.update_preview_with_comp(np.clip(batch[i]+mask[:,:,np.newaxis]*0.3,0,255).astype(np.uint8), inpainted_frame)
-                                self.update_progress(tbar, increment=len(batch))
-
     def sttn_auto_mode(self, tbar):
         """
         使用sttn对选中区域进行重绘，不进行字幕检测
@@ -372,9 +282,7 @@ class SubtitleRemover:
         else:
             # 精准模式下，获取场景分割的帧号，进一步切割
             self.log_model()
-            if config.inpaintMode.value == InpaintMode.PROPAINTER:
-                self.propainter_mode(tbar)
-            elif config.inpaintMode.value == InpaintMode.STTN_AUTO:
+            if config.inpaintMode.value == InpaintMode.STTN_AUTO:
                 self.sttn_auto_mode(tbar)
             elif config.inpaintMode.value == InpaintMode.STTN_DET:
                 self.video_inpaint(tbar, self.sttn_det_inpaint)
diff --git a/backend/models/propainter/ProPainter_1.pth b/backend/models/propainter/ProPainter_1.pth
deleted file mode 100644
index 0a85ad6..0000000
Binary files a/backend/models/propainter/ProPainter_1.pth and /dev/null differ
diff --git a/backend/models/propainter/ProPainter_2.pth b/backend/models/propainter/ProPainter_2.pth
deleted file mode 100644
index 948aebc..0000000
Binary files a/backend/models/propainter/ProPainter_2.pth and /dev/null differ
diff --git a/backend/models/propainter/ProPainter_3.pth b/backend/models/propainter/ProPainter_3.pth
deleted file mode 100644
index cc3586e..0000000
Binary files a/backend/models/propainter/ProPainter_3.pth and /dev/null differ
diff --git a/backend/models/propainter/ProPainter_4.pth b/backend/models/propainter/ProPainter_4.pth
deleted file mode 100644
index aff41a0..0000000
Binary files a/backend/models/propainter/ProPainter_4.pth and /dev/null differ
diff --git a/backend/models/propainter/fs_manifest.csv b/backend/models/propainter/fs_manifest.csv
deleted file mode 100644
index 3583bcc..0000000
--- a/backend/models/propainter/fs_manifest.csv
+++ /dev/null
@@ -1,5 +0,0 @@
-filename,filesize,encoding,header
-ProPainter_1.pth,50000000,,
-ProPainter_2.pth,50000000,,
-ProPainter_3.pth,50000000,,
-ProPainter_4.pth,7780510,,
diff --git a/backend/models/propainter/raft-things.pth b/backend/models/propainter/raft-things.pth
deleted file mode 100644
index dbe6f9f..0000000
Binary files a/backend/models/propainter/raft-things.pth and /dev/null differ
diff --git a/backend/models/propainter/recurrent_flow_completion.pth b/backend/models/propainter/recurrent_flow_completion.pth
deleted file mode 100644
index 28d11ea..0000000
Binary files a/backend/models/propainter/recurrent_flow_completion.pth and /dev/null differ
diff --git a/backend/tools/constant.py b/backend/tools/constant.py
index 40d57cd..3e63a3c 100644
--- a/backend/tools/constant.py
+++ b/backend/tools/constant.py
@@ -8,7 +8,6 @@ class InpaintMode(Enum):
     STTN_AUTO = "sttn-auto"
     STTN_DET = "sttn-det"
     LAMA = "lama"
-    PROPAINTER = "propainter"
     OPENCV = "opencv"
 
 @unique
diff --git a/backend/tools/model_config.py b/backend/tools/model_config.py
index 09e21a6..d5b9e71 100644
--- a/backend/tools/model_config.py
+++ b/backend/tools/model_config.py
@@ -13,7 +13,6 @@ class ModelConfig:
         self.LAMA_MODEL_DIR = os.path.join(BASE_DIR, 'models', 'big-lama')
         self.STTN_AUTO_MODEL_PATH = os.path.join(BASE_DIR, 'models', 'sttn-auto', 'infer_model.pth')
         self.STTN_DET_MODEL_PATH = os.path.join(BASE_DIR, 'models', 'sttn-det', 'sttn.pth')
-        self.PROPAINTER_MODEL_DIR = os.path.join(BASE_DIR,'models', 'propainter')
         if config.subtitleDetectMode.value == SubtitleDetectMode.PP_OCRv5_MOBILE:
             self.DET_MODEL_DIR = os.path.join(BASE_DIR,'models', 'V5', 'ch_det_fast')
         elif config.subtitleDetectMode.value == SubtitleDetectMode.PP_OCRv5_SERVER:
@@ -23,4 +22,3 @@ class ModelConfig:
         self.DET_MODEL_NAME = _MODEL_NAME_MAP[config.subtitleDetectMode.value]
 
         merge_big_file_if_not_exists(self.LAMA_MODEL_DIR, 'bit-lama.pt')
-        merge_big_file_if_not_exists(self.PROPAINTER_MODEL_DIR, 'ProPainter.pth')
diff --git a/ui/advanced_setting_interface.py b/ui/advanced_setting_interface.py
index 1bad94c..5fc4481 100644
--- a/ui/advanced_setting_interface.py
+++ b/ui/advanced_setting_interface.py
@@ -56,9 +56,6 @@ class AdvancedSettingInterface(ScrollArea):
         self.sttn_group.addSettingCard(self.sttn_max_load_num)
         self.expandLayout.addWidget(self.sttn_group)
 
-        self.propainter_group.addSettingCard(self.propainter_max_load_num)
-        self.expandLayout.addWidget(self.propainter_group)
-
         self.advanced_group.addSettingCard(self.save_directory)
         self.advanced_group.addSettingCard(self.check_update_on_startup)
         self.expandLayout.addWidget(self.advanced_group)
@@ -77,8 +74,6 @@ class AdvancedSettingInterface(ScrollArea):
         self.subtitle_detection_group = SettingCardGroup(tr["Setting"]["SubtitleDetectionSetting"], self.scrollWidget)
         # STTN设置组
         self.sttn_group = SettingCardGroup(tr["Setting"]["SttnSetting"], self.scrollWidget)
-        # Propainter设置组
-        self.propainter_group = SettingCardGroup(tr["Setting"]["ProPainterSetting"], self.scrollWidget)
         # 高级设置组
         self.advanced_group = SettingCardGroup(tr["Setting"]["AdvancedSetting"], self.scrollWidget)
         # 关于设置组
@@ -164,14 +159,6 @@ class AdvancedSettingInterface(ScrollArea):
             parent=self.sttn_group
         )
 
-        self.propainter_max_load_num = RangeSettingCard(
-            configItem=config.propainterMaxLoadNum,
-            icon=FluentIcon.DICTIONARY,
-            title=tr["Setting"]["PropainterMaxLoadNum"],
-            content=tr["Setting"]["PropainterMaxLoadNumDesc"],
-            parent=self.propainter_group
-        )
-
         # 视频保存路径
         self.save_directory = PushSettingCard(
             text=tr["Setting"]["ChooseDirectory"],