支持多字幕区域

2026-05-19 20:27:28 +08:00 · 2025-05-18 14:59:08 +08:00
parent f78e985e1c
commit c673b5ccd1
9 changed files with 471 additions and 284 deletions
--- a/backend/config.py
+++ b/backend/config.py
@@ -38,10 +38,9 @@ class Config(QConfig):
    windowW = ConfigItem("Window", "Width", 1200)
    windowH = ConfigItem("Window", "Height", 1200)

-    subtitleSelectionAreaX = ConfigItem("Main", "SubtitleSelectionAreaX", 0.15)
-    subtitleSelectionAreaY = ConfigItem("Main", "SubtitleSelectionAreaY", 0.88)
-    subtitleSelectionAreaW = ConfigItem("Main", "SubtitleSelectionAreaW", 0.70)
-    subtitleSelectionAreaH = ConfigItem("Main", "SubtitleSelectionAreaH", 0.11)
+    # 使用一个配置项存储所有选区
+    # 默认值为一个选区，格式为："x,y,w,h;x,y,w,h;..."，分号分隔不同选区
+    subtitleSelectionAreas = ConfigItem("Main", "SubtitleSelectionAreas", "0.15,0.88,0.70,0.11")

    """
    MODE可选算法类型
--- a/backend/inpaint/sttn_auto_inpaint.py
+++ b/backend/inpaint/sttn_auto_inpaint.py
@@ -1,13 +1,15 @@
+import os
 import copy
 import time
+import sys
+from typing import List

 import cv2
-import numpy as np
 import torch
+import numpy as np
+from tqdm import tqdm
 from torchvision import transforms
-from typing import List
-import sys
-import os
+
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 from backend.config import config
@@ -226,7 +228,7 @@ class STTNAutoInpaint:
            for i in range(rec_time):
                start_f = i * self.clip_gap  # 起始帧位置
                end_f = min((i + 1) * self.clip_gap, frame_info['len'])  # 结束帧位置
-                print('Processing:', start_f + 1, '-', end_f, ' / Total:', frame_info['len'])
+                tqdm.write(f'Processing: {start_f + 1} - {end_f} / Total: {frame_info['len']}')
                
                frames_hr = []  # 高分辨率帧列表
                frames = {}  # 帧字典，用于存储裁剪后的图像
--- a/backend/main.py
+++ b/backend/main.py
@@ -33,11 +33,11 @@ from tqdm import tqdm
 import numpy as np

 class SubtitleRemover:
-    def __init__(self, vd_path, sub_area=None, gui_mode=False):
+    def __init__(self, vd_path, sub_areas=[], gui_mode=False):
        # 线程锁
        self.lock = threading.RLock()
        # 用户指定的字幕区域位置
-        self.sub_area = sub_area
+        self.sub_areas = sub_areas
        # 是否为gui运行，gui运行需要显示预览
        self.gui_mode = gui_mode
        self.hardware_accelerator = HardwareAccelerator.instance()
@@ -67,6 +67,9 @@ class SubtitleRemover:
        self.video_out_path = os.path.abspath(os.path.join(os.path.dirname(self.video_path), f'{self.vd_name}_no_sub.mp4'))
        self.propainter_inpaint = None
        self.ext = os.path.splitext(vd_path)[-1]
+        if len(self.sub_areas) == 0:
+            self.append_output(tr['Main']['FullScreenProcessingNote'])
+            self.sub_areas.append((0, self.frame_height, 0, self.frame_width))
        if self.is_picture:
            pic_dir = os.path.join(os.path.dirname(self.video_path), 'no_sub')
            if not os.path.exists(pic_dir):
@@ -153,7 +156,7 @@ class SubtitleRemover:
        pass

    def propainter_mode(self, tbar):
-        sub_detector = SubtitleDetect(self.video_path, self.sub_area)
+        sub_detector = SubtitleDetect(self.video_path, self.sub_areas)
        sub_list = sub_detector.find_subtitle_frame_no(sub_remover=self)
        if len(sub_list) == 0:
            raise Exception(tr['Main']['NoSubtitleDetected'].format(self.video_path))
@@ -242,18 +245,16 @@ class SubtitleRemover:
        使用sttn对选中区域进行重绘，不进行字幕检测
        """
        self.append_output(tr['Main']['ProcessingStartRemovingSubtitles'])
-        if self.sub_area is not None:
-            ymin, ymax, xmin, xmax = self.sub_area
-        else:
-            self.append_output(tr['Main']['FullScreenProcessingNote'])
-            ymin, ymax, xmin, xmax = 0, self.frame_height, 0, self.frame_width
-        mask_area_coordinates = [(xmin, xmax, ymin, ymax)]
+        mask_area_coordinates = []
+        for sub_area in self.sub_areas:
+            ymin, ymax, xmin, xmax = sub_area
+            mask_area_coordinates.append((xmin, xmax, ymin, ymax))
        mask = create_mask(self.mask_size, mask_area_coordinates)
        sttn_video_inpaint = STTNAutoInpaint(self.hardware_accelerator.device, self.model_config.STTN_AUTO_MODEL_PATH, self.video_path)
        sttn_video_inpaint(input_mask=mask, input_sub_remover=self, tbar=tbar)

    def video_inpaint(self, tbar, model):
-        sub_detector = SubtitleDetect(self.video_path, self.sub_area)
+        sub_detector = SubtitleDetect(self.video_path, self.sub_areas)
        sub_list = sub_detector.find_subtitle_frame_no(sub_remover=self)
        if len(sub_list) == 0:
            raise Exception(tr['Main']['NoSubtitleDetected'].format(self.video_path))
@@ -342,7 +343,7 @@ class SubtitleRemover:
            if original_frame is None:
                self.append_output(tr['Main']['ReadImageFailed'].format(self.video_path))
                return
-            sub_detector = SubtitleDetect(self.video_path, self.sub_area)
+            sub_detector = SubtitleDetect(self.video_path, self.sub_areas)
            sub_list = sub_detector.detect_subtitle(original_frame)
            del sub_detector
            gc.collect()
@@ -457,11 +458,9 @@ if __name__ == '__main__':
    multiprocessing.set_start_method("spawn")
    from backend.tools.args_handler import parse_args
    args = parse_args()
-    sub_area = None if args.ymin is None or args.ymax is None or args.xmin is None or args.xmax is None else (
-        args.ymin, args.ymax, args.xmin, args.xmax)
    
-    print('Subtitle Area:', 'fullscreen' if sub_area is None else sub_area)
-    sr = SubtitleRemover(args.input, sub_area=sub_area)
+    print('Subtitle Area:', 'fullscreen' if len(args.subtitle_area_coords) <= 0 else args.subtitle_area_coords)
+    sr = SubtitleRemover(args.input, sub_areas=args.subtitle_area_coords)
    if not is_video_or_image(args.input):
        sr.append_output(f'Error: {video_path} is not supported not corrupted.')
        exit(-1)
--- a/backend/tools/args_handler.py
+++ b/backend/tools/args_handler.py
@@ -16,20 +16,8 @@ def parse_args():
        help="Output video file path (optional)"
    )
    parser.add_argument(
-        "--ymin", type=int, default=None,
-        help="Subtitle area ymin (optional)"
-    )
-    parser.add_argument(
-        "--ymax", type=int, default=None,
-        help="Subtitle area ymax (optional)"
-    )
-    parser.add_argument(
-        "--xmin", type=int, default=None,
-        help="Subtitle area xmin (optional)"
-    )
-    parser.add_argument(
-        "--xmax", type=int, default=None,
-        help="Subtitle area xmax (optional)"
+        "--subtitle-area-coords", "-c", action="append", nargs=4, type=int, metavar=("YMIN", "YMAX", "XMIN", "XMAX"),
+        help="Subtitle area coordinates (ymin ymax xmin xmax). Can be specified multiple times for multiple areas."
    )
    parser.add_argument(
        "--inpaint-mode", type=str, default="sttn-auto",
@@ -38,4 +26,6 @@ def parse_args():
    )
    args = parser.parse_args()
    args.inpaint_mode = InpaintMode[args.inpaint_mode.replace('-','_').upper()]
+    if args.subtitle_area_coords is None:
+        args.subtitle_area_coords = []
    return args
--- a/backend/tools/subtitle_detect.py
+++ b/backend/tools/subtitle_detect.py
@@ -17,9 +17,9 @@ class SubtitleDetect:
    文本框检测类，用于检测视频帧中是否存在文本框
    """

-    def __init__(self, video_path, sub_area=None):
+    def __init__(self, video_path, sub_areas=[]):
        self.video_path = video_path
-        self.sub_area = sub_area
+        self.sub_areas = sub_areas

    @cached_property
    def text_detector(self):
@@ -46,12 +46,13 @@ class SubtitleDetect:
        if coordinate_list:
            for coordinate in coordinate_list:
                xmin, xmax, ymin, ymax = coordinate
-                if self.sub_area is not None:
-                    s_ymin, s_ymax, s_xmin, s_xmax = self.sub_area
-                    if (s_xmin <= xmin and xmax <= s_xmax
-                            and s_ymin <= ymin
-                            and ymax <= s_ymax):
-                        temp_list.append((xmin, xmax, ymin, ymax))
+                if self.sub_areas is not None and len(self.sub_areas) > 0:
+                    for sub_area in self.sub_areas:
+                        s_ymin, s_ymax, s_xmin, s_xmax = sub_area
+                        if (s_xmin <= xmin and xmax <= s_xmax
+                                and s_ymin <= ymin
+                                and ymax <= s_ymax):
+                            temp_list.append((xmin, xmax, ymin, ymax))
                else:
                    temp_list.append((xmin, xmax, ymin, ymax))
        return temp_list