From 93d822d0676aa8d14ab7d729e87872b9e73db427 Mon Sep 17 00:00:00 2001 From: flavioy Date: Wed, 8 Apr 2026 23:34:53 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8DLAMA=E6=A8=A1=E5=BC=8F100%?= =?UTF-8?q?=E5=8D=A1=E6=AD=BB=EF=BC=9A=E5=B8=A7=E5=8C=BA=E9=97=B4=E6=89=A9?= =?UTF-8?q?=E5=B1=95=E8=B6=85=E5=87=BA=E8=A7=86=E9=A2=91=E6=80=BB=E5=B8=A7?= =?UTF-8?q?=E6=95=B0=E5=AF=BC=E8=87=B4FramePrefetcher=E6=AD=BB=E9=94=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 限制字幕区间end不超过frame_count,防止内循环消费哨兵后外层永久阻塞 - LAMA批量推理改为mini-batch(4帧),避免GPU OOM - 各inpaint模型空inpaint_area时返回原始帧 - FFmpeg子进程添加600s超时保护 Co-Authored-By: Claude Opus 4.6 --- backend/inpaint/lama_inpaint.py | 50 ++++++++++++++++----------- backend/inpaint/propainter_inpaint.py | 2 ++ backend/inpaint/sttn_auto_inpaint.py | 2 ++ backend/inpaint/sttn_det_inpaint.py | 2 ++ backend/main.py | 10 +++--- backend/tools/video_io.py | 6 +++- 6 files changed, 46 insertions(+), 26 deletions(-) diff --git a/backend/inpaint/lama_inpaint.py b/backend/inpaint/lama_inpaint.py index 10ad96a..7ed7251 100644 --- a/backend/inpaint/lama_inpaint.py +++ b/backend/inpaint/lama_inpaint.py @@ -28,35 +28,42 @@ class LamaInpaint: return cur_res def _inpaint_batch(self, images: List[np.ndarray], masks: List[np.ndarray]): - """批量推理:将多帧合并为一个 batch tensor 一次性送入 GPU""" + """批量推理:将多帧分小批次送入 GPU,避免单次推理过大导致卡死""" if len(images) == 1: return [self.inpaint(images[0], masks[0])] orig_height, orig_width = images[0].shape[:2] - batch_imgs = [] - batch_masks = [] - for img, msk in zip(images, masks): - batch_imgs.append(get_image(img)) - batch_masks.append(get_image(msk)) + # 分小批次推理,每批最多 4 帧 + mini_batch_size = 4 + results = [None] * len(images) + for start in range(0, len(images), mini_batch_size): + end = min(start + mini_batch_size, len(images)) + batch_imgs = [] + batch_masks = [] + for i in range(start, end): + batch_imgs.append(get_image(images[i])) + batch_masks.append(get_image(masks[i])) - # 堆叠为 (B, C, H, W) 并 pad 到 8 的倍数 - batch_imgs = np.stack(batch_imgs) - batch_masks = np.stack(batch_masks) + padded_imgs = np.stack([pad_img_to_modulo(img, 8) for img in batch_imgs]) + padded_masks = np.stack([pad_img_to_modulo(m, 8) for m in batch_masks]) - # 对每个样本做 pad - padded_imgs = np.stack([pad_img_to_modulo(img, 8) for img in batch_imgs]) - padded_masks = np.stack([pad_img_to_modulo(m, 8) for m in batch_masks]) + img_tensor = torch.from_numpy(padded_imgs).to(self.device) + mask_tensor = torch.from_numpy(padded_masks).to(self.device) + mask_tensor = (mask_tensor > 0) * 1 - img_tensor = torch.from_numpy(padded_imgs).to(self.device) - mask_tensor = torch.from_numpy(padded_masks).to(self.device) - mask_tensor = (mask_tensor > 0) * 1 + with torch.inference_mode(): + inpainted = self.model(img_tensor, mask_tensor) + batch_results = inpainted.permute(0, 2, 3, 1).detach().cpu().numpy() + batch_results = np.clip(batch_results * 255, 0, 255).astype('uint8') - with torch.inference_mode(): - inpainted = self.model(img_tensor, mask_tensor) - results = inpainted.permute(0, 2, 3, 1).detach().cpu().numpy() - results = np.clip(results * 255, 0, 255).astype('uint8') + for i in range(end - start): + results[start + i] = batch_results[i][:orig_height, :orig_width] - return [results[i][:orig_height, :orig_width] for i in range(len(images))] + del img_tensor, mask_tensor, padded_imgs, padded_masks + if torch.cuda.is_available(): + torch.cuda.empty_cache() + + return results def __call__(self, input_frames: List[np.ndarray], input_mask: np.ndarray): """ @@ -98,6 +105,9 @@ class LamaInpaint: for k in range(len(inpaint_area)): frame[inpaint_area[k][0]:inpaint_area[k][1], :, :] = comps[k][j] inpainted_frames.append(frame) + else: + # 无需处理的区域,返回原始帧 + inpainted_frames = frames_hr if torch.cuda.is_available(): torch.cuda.empty_cache() diff --git a/backend/inpaint/propainter_inpaint.py b/backend/inpaint/propainter_inpaint.py index 8fffc58..710941d 100644 --- a/backend/inpaint/propainter_inpaint.py +++ b/backend/inpaint/propainter_inpaint.py @@ -413,6 +413,8 @@ class PropainterInpaint: # 将最终帧添加到列表 inpainted_frames.append(frame) # print(f'processing frame, {len(frames_hr) - j} left') + else: + inpainted_frames = frames_hr return inpainted_frames diff --git a/backend/inpaint/sttn_auto_inpaint.py b/backend/inpaint/sttn_auto_inpaint.py index ccc14c5..d26f49a 100644 --- a/backend/inpaint/sttn_auto_inpaint.py +++ b/backend/inpaint/sttn_auto_inpaint.py @@ -92,6 +92,8 @@ class STTNInpaint: # 将最终帧添加到列表 inpainted_frames.append(frame) # print(f'processing frame, {len(frames_hr) - j} left') + else: + inpainted_frames = frames_hr return inpainted_frames @staticmethod diff --git a/backend/inpaint/sttn_det_inpaint.py b/backend/inpaint/sttn_det_inpaint.py index 65b8311..cac1d40 100644 --- a/backend/inpaint/sttn_det_inpaint.py +++ b/backend/inpaint/sttn_det_inpaint.py @@ -94,6 +94,8 @@ class STTNDetInpaint: # 将最终帧添加到列表 inpainted_frames.append(frame) # print(f'processing frame, {len(frames_hr) - j} left') + else: + inpainted_frames = frames_hr return inpainted_frames @staticmethod diff --git a/backend/main.py b/backend/main.py index e0560cf..e5620dd 100644 --- a/backend/main.py +++ b/backend/main.py @@ -271,9 +271,9 @@ class SubtitleRemover: del sub_detector gc.collect() start_end_map = dict() - for interval in continuous_frame_no_list: - start, end = interval - start_end_map[start] = end + for start, end in continuous_frame_no_list: + # 确保区间不超出视频总帧数,否则会导致 FramePrefetcher 哨兵被内循环消费后外层死锁 + start_end_map[start] = min(end, self.frame_count) current_frame_index = 0 self.append_output(tr['Main']['ProcessingStartRemovingSubtitles']) # 使用帧预读取,I/O 与推理重叠 @@ -423,7 +423,7 @@ class SubtitleRemover: "-vn", "-loglevel", "error", temp.name] use_shell = True if os.name == "nt" else False try: - subprocess.check_output(audio_extract_command, stdin=open(os.devnull), shell=use_shell) + subprocess.check_output(audio_extract_command, stdin=open(os.devnull), shell=use_shell, timeout=600) except Exception as e: traceback.print_exc() self.append_output(tr['Main']['FailToExtractAudio'].format(str(e))) @@ -437,7 +437,7 @@ class SubtitleRemover: "-acodec", "copy", "-loglevel", "error", self.video_out_path] try: - subprocess.check_output(audio_merge_command, stdin=open(os.devnull), shell=use_shell) + subprocess.check_output(audio_merge_command, stdin=open(os.devnull), shell=use_shell, timeout=600) except Exception as e: traceback.print_exc() self.append_output(tr['Main']['FailToMergeAudio'].format(str(e))) diff --git a/backend/tools/video_io.py b/backend/tools/video_io.py index d9df3ad..7930181 100644 --- a/backend/tools/video_io.py +++ b/backend/tools/video_io.py @@ -97,4 +97,8 @@ class FFmpegVideoWriter: self._process.stdin.close() except BrokenPipeError: pass - self._process.wait() + try: + self._process.wait(timeout=600) + except subprocess.TimeoutExpired: + self._process.terminate() + self._process.wait(timeout=5)