支持多字幕区域

This commit is contained in:
Jason
2025-05-18 14:59:08 +08:00
parent f78e985e1c
commit c673b5ccd1
9 changed files with 471 additions and 284 deletions

View File

@@ -16,20 +16,8 @@ def parse_args():
help="Output video file path (optional)"
)
parser.add_argument(
"--ymin", type=int, default=None,
help="Subtitle area ymin (optional)"
)
parser.add_argument(
"--ymax", type=int, default=None,
help="Subtitle area ymax (optional)"
)
parser.add_argument(
"--xmin", type=int, default=None,
help="Subtitle area xmin (optional)"
)
parser.add_argument(
"--xmax", type=int, default=None,
help="Subtitle area xmax (optional)"
"--subtitle-area-coords", "-c", action="append", nargs=4, type=int, metavar=("YMIN", "YMAX", "XMIN", "XMAX"),
help="Subtitle area coordinates (ymin ymax xmin xmax). Can be specified multiple times for multiple areas."
)
parser.add_argument(
"--inpaint-mode", type=str, default="sttn-auto",
@@ -38,4 +26,6 @@ def parse_args():
)
args = parser.parse_args()
args.inpaint_mode = InpaintMode[args.inpaint_mode.replace('-','_').upper()]
if args.subtitle_area_coords is None:
args.subtitle_area_coords = []
return args

View File

@@ -17,9 +17,9 @@ class SubtitleDetect:
文本框检测类,用于检测视频帧中是否存在文本框
"""
def __init__(self, video_path, sub_area=None):
def __init__(self, video_path, sub_areas=[]):
self.video_path = video_path
self.sub_area = sub_area
self.sub_areas = sub_areas
@cached_property
def text_detector(self):
@@ -46,12 +46,13 @@ class SubtitleDetect:
if coordinate_list:
for coordinate in coordinate_list:
xmin, xmax, ymin, ymax = coordinate
if self.sub_area is not None:
s_ymin, s_ymax, s_xmin, s_xmax = self.sub_area
if (s_xmin <= xmin and xmax <= s_xmax
and s_ymin <= ymin
and ymax <= s_ymax):
temp_list.append((xmin, xmax, ymin, ymax))
if self.sub_areas is not None and len(self.sub_areas) > 0:
for sub_area in self.sub_areas:
s_ymin, s_ymax, s_xmin, s_xmax = sub_area
if (s_xmin <= xmin and xmax <= s_xmax
and s_ymin <= ymin
and ymax <= s_ymax):
temp_list.append((xmin, xmax, ymin, ymax))
else:
temp_list.append((xmin, xmax, ymin, ymax))
return temp_list