支持多字幕区域

This commit is contained in:
Jason
2025-05-18 14:59:08 +08:00
parent f78e985e1c
commit c673b5ccd1
9 changed files with 471 additions and 284 deletions

View File

@@ -38,10 +38,9 @@ class Config(QConfig):
windowW = ConfigItem("Window", "Width", 1200)
windowH = ConfigItem("Window", "Height", 1200)
subtitleSelectionAreaX = ConfigItem("Main", "SubtitleSelectionAreaX", 0.15)
subtitleSelectionAreaY = ConfigItem("Main", "SubtitleSelectionAreaY", 0.88)
subtitleSelectionAreaW = ConfigItem("Main", "SubtitleSelectionAreaW", 0.70)
subtitleSelectionAreaH = ConfigItem("Main", "SubtitleSelectionAreaH", 0.11)
# 使用一个配置项存储所有选区
# 默认值为一个选区,格式为:"x,y,w,h;x,y,w,h;...",分号分隔不同选区
subtitleSelectionAreas = ConfigItem("Main", "SubtitleSelectionAreas", "0.15,0.88,0.70,0.11")
"""
MODE可选算法类型

View File

@@ -1,13 +1,15 @@
import os
import copy
import time
import sys
from typing import List
import cv2
import numpy as np
import torch
import numpy as np
from tqdm import tqdm
from torchvision import transforms
from typing import List
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from backend.config import config
@@ -226,7 +228,7 @@ class STTNAutoInpaint:
for i in range(rec_time):
start_f = i * self.clip_gap # 起始帧位置
end_f = min((i + 1) * self.clip_gap, frame_info['len']) # 结束帧位置
print('Processing:', start_f + 1, '-', end_f, ' / Total:', frame_info['len'])
tqdm.write(f'Processing: {start_f + 1} - {end_f} / Total: {frame_info['len']}')
frames_hr = [] # 高分辨率帧列表
frames = {} # 帧字典,用于存储裁剪后的图像

View File

@@ -33,11 +33,11 @@ from tqdm import tqdm
import numpy as np
class SubtitleRemover:
def __init__(self, vd_path, sub_area=None, gui_mode=False):
def __init__(self, vd_path, sub_areas=[], gui_mode=False):
# 线程锁
self.lock = threading.RLock()
# 用户指定的字幕区域位置
self.sub_area = sub_area
self.sub_areas = sub_areas
# 是否为gui运行gui运行需要显示预览
self.gui_mode = gui_mode
self.hardware_accelerator = HardwareAccelerator.instance()
@@ -67,6 +67,9 @@ class SubtitleRemover:
self.video_out_path = os.path.abspath(os.path.join(os.path.dirname(self.video_path), f'{self.vd_name}_no_sub.mp4'))
self.propainter_inpaint = None
self.ext = os.path.splitext(vd_path)[-1]
if len(self.sub_areas) == 0:
self.append_output(tr['Main']['FullScreenProcessingNote'])
self.sub_areas.append((0, self.frame_height, 0, self.frame_width))
if self.is_picture:
pic_dir = os.path.join(os.path.dirname(self.video_path), 'no_sub')
if not os.path.exists(pic_dir):
@@ -153,7 +156,7 @@ class SubtitleRemover:
pass
def propainter_mode(self, tbar):
sub_detector = SubtitleDetect(self.video_path, self.sub_area)
sub_detector = SubtitleDetect(self.video_path, self.sub_areas)
sub_list = sub_detector.find_subtitle_frame_no(sub_remover=self)
if len(sub_list) == 0:
raise Exception(tr['Main']['NoSubtitleDetected'].format(self.video_path))
@@ -242,18 +245,16 @@ class SubtitleRemover:
使用sttn对选中区域进行重绘不进行字幕检测
"""
self.append_output(tr['Main']['ProcessingStartRemovingSubtitles'])
if self.sub_area is not None:
ymin, ymax, xmin, xmax = self.sub_area
else:
self.append_output(tr['Main']['FullScreenProcessingNote'])
ymin, ymax, xmin, xmax = 0, self.frame_height, 0, self.frame_width
mask_area_coordinates = [(xmin, xmax, ymin, ymax)]
mask_area_coordinates = []
for sub_area in self.sub_areas:
ymin, ymax, xmin, xmax = sub_area
mask_area_coordinates.append((xmin, xmax, ymin, ymax))
mask = create_mask(self.mask_size, mask_area_coordinates)
sttn_video_inpaint = STTNAutoInpaint(self.hardware_accelerator.device, self.model_config.STTN_AUTO_MODEL_PATH, self.video_path)
sttn_video_inpaint(input_mask=mask, input_sub_remover=self, tbar=tbar)
def video_inpaint(self, tbar, model):
sub_detector = SubtitleDetect(self.video_path, self.sub_area)
sub_detector = SubtitleDetect(self.video_path, self.sub_areas)
sub_list = sub_detector.find_subtitle_frame_no(sub_remover=self)
if len(sub_list) == 0:
raise Exception(tr['Main']['NoSubtitleDetected'].format(self.video_path))
@@ -342,7 +343,7 @@ class SubtitleRemover:
if original_frame is None:
self.append_output(tr['Main']['ReadImageFailed'].format(self.video_path))
return
sub_detector = SubtitleDetect(self.video_path, self.sub_area)
sub_detector = SubtitleDetect(self.video_path, self.sub_areas)
sub_list = sub_detector.detect_subtitle(original_frame)
del sub_detector
gc.collect()
@@ -457,11 +458,9 @@ if __name__ == '__main__':
multiprocessing.set_start_method("spawn")
from backend.tools.args_handler import parse_args
args = parse_args()
sub_area = None if args.ymin is None or args.ymax is None or args.xmin is None or args.xmax is None else (
args.ymin, args.ymax, args.xmin, args.xmax)
print('Subtitle Area:', 'fullscreen' if sub_area is None else sub_area)
sr = SubtitleRemover(args.input, sub_area=sub_area)
print('Subtitle Area:', 'fullscreen' if len(args.subtitle_area_coords) <= 0 else args.subtitle_area_coords)
sr = SubtitleRemover(args.input, sub_areas=args.subtitle_area_coords)
if not is_video_or_image(args.input):
sr.append_output(f'Error: {video_path} is not supported not corrupted.')
exit(-1)

View File

@@ -16,20 +16,8 @@ def parse_args():
help="Output video file path (optional)"
)
parser.add_argument(
"--ymin", type=int, default=None,
help="Subtitle area ymin (optional)"
)
parser.add_argument(
"--ymax", type=int, default=None,
help="Subtitle area ymax (optional)"
)
parser.add_argument(
"--xmin", type=int, default=None,
help="Subtitle area xmin (optional)"
)
parser.add_argument(
"--xmax", type=int, default=None,
help="Subtitle area xmax (optional)"
"--subtitle-area-coords", "-c", action="append", nargs=4, type=int, metavar=("YMIN", "YMAX", "XMIN", "XMAX"),
help="Subtitle area coordinates (ymin ymax xmin xmax). Can be specified multiple times for multiple areas."
)
parser.add_argument(
"--inpaint-mode", type=str, default="sttn-auto",
@@ -38,4 +26,6 @@ def parse_args():
)
args = parser.parse_args()
args.inpaint_mode = InpaintMode[args.inpaint_mode.replace('-','_').upper()]
if args.subtitle_area_coords is None:
args.subtitle_area_coords = []
return args

View File

@@ -17,9 +17,9 @@ class SubtitleDetect:
文本框检测类,用于检测视频帧中是否存在文本框
"""
def __init__(self, video_path, sub_area=None):
def __init__(self, video_path, sub_areas=[]):
self.video_path = video_path
self.sub_area = sub_area
self.sub_areas = sub_areas
@cached_property
def text_detector(self):
@@ -46,12 +46,13 @@ class SubtitleDetect:
if coordinate_list:
for coordinate in coordinate_list:
xmin, xmax, ymin, ymax = coordinate
if self.sub_area is not None:
s_ymin, s_ymax, s_xmin, s_xmax = self.sub_area
if (s_xmin <= xmin and xmax <= s_xmax
and s_ymin <= ymin
and ymax <= s_ymax):
temp_list.append((xmin, xmax, ymin, ymax))
if self.sub_areas is not None and len(self.sub_areas) > 0:
for sub_area in self.sub_areas:
s_ymin, s_ymax, s_xmin, s_xmax = sub_area
if (s_xmin <= xmin and xmax <= s_xmax
and s_ymin <= ymin
and ymax <= s_ymax):
temp_list.append((xmin, xmax, ymin, ymax))
else:
temp_list.append((xmin, xmax, ymin, ymax))
return temp_list