diff --git a/README.md b/README.md index abe12f7..1ad0d54 100755 --- a/README.md +++ b/README.md @@ -11,13 +11,14 @@ Video-subtitle-remover (vsr) 是一款基于AI技术,将视频中的硬字幕 - 支持自定义字幕位置,仅去除定义位置中的字幕(传入位置) - 支持全视频自动去除所有文本(不传入位置) -demo.jpg - +demo.png ## 演示 点击查看演示视频👇 +demo.jpg +

demo.gif

## 源码使用说明 @@ -133,6 +134,12 @@ conda activate videoEnv #### 4. 运行程序 +- 运行图形化界面 + +```shell +python gui.py +``` + - 运行命令行版本(CLI) ```shell diff --git a/backend/main.py b/backend/main.py index 64f8bee..6e346c7 100644 --- a/backend/main.py +++ b/backend/main.py @@ -1,7 +1,5 @@ -import shutil import subprocess import random -import config import os from pathlib import Path import threading @@ -9,6 +7,7 @@ import cv2 import sys sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import config import importlib import numpy as np import tempfile @@ -124,6 +123,11 @@ class SubtitleRemover: fluid.install_check.run_check() if torch.cuda.is_available(): print('使用GPU进行加速') + # 总处理进度 + self.progress_total = 0 + self.isFinished = False + # 预览帧 + self.preview_frame = None @staticmethod def get_coordinates(dt_box): @@ -156,17 +160,21 @@ class SubtitleRemover: ret, frame = self.video_cap.read() if not ret: break + original_frame = frame index += 1 if index in sub_list: masks = self.create_mask(frame, sub_list[index]) frame = self.inpaint_frame(frame, masks) + self.preview_frame = cv2.hconcat([original_frame, frame]) self.video_writer.write(frame) tbar.update(1) + self.progress_total = 100 * float(index)/float(self.frame_count) self.video_cap.release() self.video_writer.release() # 将原音频合并到新生成的视频文件中 self.merge_audio_to_video() print(f"视频生字幕去除成功,文件路径:{self.video_out_name}") + self.isFinished = True @staticmethod def inpaint( img, mask): diff --git a/design/demo.png b/design/demo.png new file mode 100644 index 0000000..dd006cf Binary files /dev/null and b/design/demo.png differ diff --git a/design/vsr.ico b/design/vsr.ico new file mode 100644 index 0000000..84a782b Binary files /dev/null and b/design/vsr.ico differ diff --git a/gui.py b/gui.py new file mode 100644 index 0000000..031359c --- /dev/null +++ b/gui.py @@ -0,0 +1,371 @@ +# -*- coding: utf-8 -*- +""" +@Author : Fang Yao +@Time : 2021/4/1 6:07 下午 +@FileName: gui.py +@desc: 字幕提取器图形化界面 +""" +import backend.main +import os +import configparser +import PySimpleGUI as sg +import cv2 +from threading import Thread +import multiprocessing + + +class SubtitleRemoverGUI: + + def __init__(self): + # 初次运行检查运行环境是否正常 + from paddle import fluid + fluid.install_check.run_check() + self.font = 'Arial 10' + self.theme = 'LightBrown12' + sg.theme(self.theme) + self.icon = os.path.join(os.path.dirname(__file__), 'design', 'vsr.ico') + self.screen_width, self.screen_height = sg.Window.get_screen_size() + self.subtitle_config_file = os.path.join(os.path.dirname(__file__), 'subtitle.ini') + print(self.screen_width, self.screen_height) + # 设置视频预览区域大小 + self.video_preview_width = 960 + self.video_preview_height = self.video_preview_width * 9 // 16 + # 默认组件大小 + self.horizontal_slider_size = (120, 20) + self.output_size = (100, 10) + self.progressbar_size = (60, 20) + # 分辨率低于1080 + if self.screen_width // 2 < 960: + self.video_preview_width = 640 + self.video_preview_height = self.video_preview_width * 9 // 16 + self.horizontal_slider_size = (60, 20) + self.output_size = (58, 10) + self.progressbar_size = (28, 20) + # 字幕提取器布局 + self.layout = None + # 字幕提取其窗口 + self.window = None + # 视频路径 + self.video_path = None + # 视频cap + self.video_cap = None + # 视频的帧率 + self.fps = None + # 视频的帧数 + self.frame_count = None + # 视频的宽 + self.frame_width = None + # 视频的高 + self.frame_height = None + # 设置字幕区域高宽 + self.xmin = None + self.xmax = None + self.ymin = None + self.ymax = None + # 字幕提取器 + self.sr = None + + def run(self): + # 创建布局 + self._create_layout() + # 创建窗口 + self.window = sg.Window(title='Video Subtitle Remover', layout=self.layout, + icon=self.icon) + while True: + # 循环读取事件 + event, values = self.window.read(timeout=10) + # 处理【打开】事件 + self._file_event_handler(event, values) + # 处理【滑动】事件 + self._slide_event_handler(event, values) + # 处理【运行】事件 + self._run_event_handler(event, values) + # 如果关闭软件,退出 + if event == sg.WIN_CLOSED: + break + # 更新进度条 + if self.sr is not None: + self.window['-PROG-'].update(self.sr.progress_total) + if self.sr.preview_frame is not None: + self.window['-DISPLAY-'].update(data=cv2.imencode('.png', self._img_resize(self.sr.preview_frame))[1].tobytes()) + if self.sr.isFinished: + # 1) 打开修改字幕滑块区域按钮 + self.window['-Y-SLIDER-'].update(disabled=False) + self.window['-X-SLIDER-'].update(disabled=False) + self.window['-Y-SLIDER-H-'].update(disabled=False) + self.window['-X-SLIDER-W-'].update(disabled=False) + # 2) 打开【运行】、【打开】和【识别语言】按钮 + self.window['-RUN-'].update(disabled=False) + self.window['-FILE-'].update(disabled=False) + self.window['-FILE_BTN-'].update(disabled=False) + self.sr = None + if len(self.video_paths) >= 1: + # 1) 关闭修改字幕滑块区域按钮 + self.window['-Y-SLIDER-'].update(disabled=True) + self.window['-X-SLIDER-'].update(disabled=True) + self.window['-Y-SLIDER-H-'].update(disabled=True) + self.window['-X-SLIDER-W-'].update(disabled=True) + # 2) 关闭【运行】、【打开】和【识别语言】按钮 + self.window['-RUN-'].update(disabled=True) + self.window['-FILE-'].update(disabled=True) + self.window['-FILE_BTN-'].update(disabled=True) + + def _create_layout(self): + """ + 创建字幕提取器布局 + """ + garbage = os.path.join(os.path.dirname(__file__), 'output') + if os.path.exists(garbage): + import shutil + shutil.rmtree(garbage, True) + self.layout = [ + # 显示视频预览 + [sg.Image(size=(self.video_preview_width, self.video_preview_height), background_color='black', + key='-DISPLAY-')], + # 打开按钮 + 快进快退条 + [sg.Input(key='-FILE-', visible=False, enable_events=True), + sg.FilesBrowse(button_text='Open', file_types=(( + 'All Files', '*.*'), ('mp4', '*.mp4'), + ('flv', '*.flv'), + ('wmv', '*.wmv'), + ('avi', '*.avi')), + key='-FILE_BTN-', size=(10, 1), font=self.font), + sg.Slider(size=self.horizontal_slider_size, range=(1, 1), key='-SLIDER-', orientation='h', + enable_events=True, font=self.font, + disable_number_display=True), + ], + # 输出区域 + [sg.Output(size=self.output_size, font=self.font), + sg.Frame(title='Vertical', font=self.font, key='-FRAME1-', + layout=[[ + sg.Slider(range=(0, 0), orientation='v', size=(10, 20), + disable_number_display=True, + enable_events=True, font=self.font, + pad=((10, 10), (20, 20)), + default_value=0, key='-Y-SLIDER-'), + sg.Slider(range=(0, 0), orientation='v', size=(10, 20), + disable_number_display=True, + enable_events=True, font=self.font, + pad=((10, 10), (20, 20)), + default_value=0, key='-Y-SLIDER-H-'), + ]], pad=((15, 5), (0, 0))), + sg.Frame(title='Horizontal', font=self.font, key='-FRAME2-', + layout=[[ + sg.Slider(range=(0, 0), orientation='v', size=(10, 20), + disable_number_display=True, + pad=((10, 10), (20, 20)), + enable_events=True, font=self.font, + default_value=0, key='-X-SLIDER-'), + sg.Slider(range=(0, 0), orientation='v', size=(10, 20), + disable_number_display=True, + pad=((10, 10), (20, 20)), + enable_events=True, font=self.font, + default_value=0, key='-X-SLIDER-W-'), + ]], pad=((15, 5), (0, 0))) + ], + + # 运行按钮 + 进度条 + [sg.Button(button_text='Run', key='-RUN-', + font=self.font, size=(20, 1)), + sg.ProgressBar(100, orientation='h', size=self.progressbar_size, key='-PROG-', auto_size_text=True) + ], + ] + + def _file_event_handler(self, event, values): + """ + 当点击打开按钮时: + 1)打开视频文件,将画布显示视频帧 + 2)获取视频信息,初始化进度条滑块范围 + """ + if event == '-FILE-': + self.video_paths = values['-FILE-'].split(';') + self.video_path = self.video_paths[0] + if self.video_path != '': + self.video_cap = cv2.VideoCapture(self.video_path) + if self.video_cap is None: + return + if self.video_cap.isOpened(): + ret, frame = self.video_cap.read() + if ret: + for video in self.video_paths: + print(f"Open Video Success':{video}") + # 获取视频的帧数 + self.frame_count = self.video_cap.get(cv2.CAP_PROP_FRAME_COUNT) + # 获取视频的高度 + self.frame_height = self.video_cap.get(cv2.CAP_PROP_FRAME_HEIGHT) + # 获取视频的宽度 + self.frame_width = self.video_cap.get(cv2.CAP_PROP_FRAME_WIDTH) + # 获取视频的帧率 + self.fps = self.video_cap.get(cv2.CAP_PROP_FPS) + # 调整视频帧大小,使播放器能够显示 + resized_frame = self._img_resize(frame) + # resized_frame = cv2.resize(src=frame, dsize=(self.video_preview_width, self.video_preview_height)) + # 显示视频帧 + self.window['-DISPLAY-'].update(data=cv2.imencode('.png', resized_frame)[1].tobytes()) + # 更新视频进度条滑块range + self.window['-SLIDER-'].update(range=(1, self.frame_count)) + self.window['-SLIDER-'].update(1) + # 预设字幕区域位置 + y_p, h_p, x_p, w_p = self.parse_subtitle_config() + y = self.frame_height * y_p + h = self.frame_height * h_p + x = self.frame_width * x_p + w = self.frame_width * w_p + # 更新视频字幕位置滑块range + # 更新Y-SLIDER范围 + self.window['-Y-SLIDER-'].update(range=(0, self.frame_height), disabled=False) + # 更新Y-SLIDER默认值 + self.window['-Y-SLIDER-'].update(y) + # 更新X-SLIDER范围 + self.window['-X-SLIDER-'].update(range=(0, self.frame_width), disabled=False) + # 更新X-SLIDER默认值 + self.window['-X-SLIDER-'].update(x) + # 更新Y-SLIDER-H范围 + self.window['-Y-SLIDER-H-'].update(range=(0, self.frame_height - y)) + # 更新Y-SLIDER-H默认值 + self.window['-Y-SLIDER-H-'].update(h) + # 更新X-SLIDER-W范围 + self.window['-X-SLIDER-W-'].update(range=(0, self.frame_width - x)) + # 更新X-SLIDER-W默认值 + self.window['-X-SLIDER-W-'].update(w) + self._update_preview(frame, (y, h, x, w)) + + def _run_event_handler(self, event, values): + """ + 当点击运行按钮时: + 1) 禁止修改字幕滑块区域 + 2) 禁止再次点击【运行】和【打开】按钮 + 3) 设定字幕区域位置 + """ + if event == '-RUN-': + if self.video_cap is None: + print('Please Open Video First') + else: + # 1) 禁止修改字幕滑块区域 + self.window['-Y-SLIDER-'].update(disabled=True) + self.window['-X-SLIDER-'].update(disabled=True) + self.window['-Y-SLIDER-H-'].update(disabled=True) + self.window['-X-SLIDER-W-'].update(disabled=True) + # 2) 禁止再次点击【运行】、【打开】和【识别语言】按钮 + self.window['-RUN-'].update(disabled=True) + self.window['-FILE-'].update(disabled=True) + self.window['-FILE_BTN-'].update(disabled=True) + # 3) 设定字幕区域位置 + self.xmin = int(values['-X-SLIDER-']) + self.xmax = int(values['-X-SLIDER-'] + values['-X-SLIDER-W-']) + self.ymin = int(values['-Y-SLIDER-']) + self.ymax = int(values['-Y-SLIDER-'] + values['-Y-SLIDER-H-']) + if self.ymax > self.frame_height: + self.ymax = self.frame_height + if self.xmax > self.frame_width: + self.xmax = self.frame_width + print(f"{'SubtitleArea'}:({self.ymin},{self.ymax},{self.xmin},{self.xmax})") + subtitle_area = (self.ymin, self.ymax, self.xmin, self.xmax) + y_p = self.ymin / self.frame_height + h_p = (self.ymax - self.ymin) / self.frame_height + x_p = self.xmin / self.frame_width + w_p = (self.xmax - self.xmin) / self.frame_width + self.set_subtitle_config(y_p, h_p, x_p, w_p) + + def task(): + while self.video_paths: + video_path = self.video_paths.pop() + self.sr = backend.main.SubtitleRemover(video_path, subtitle_area) + self.sr.run() + Thread(target=task, daemon=True).start() + self.video_cap.release() + self.video_cap = None + + def _slide_event_handler(self, event, values): + """ + 当滑动视频进度条/滑动字幕选择区域滑块时: + 1) 判断视频是否存在,如果存在则显示对应的视频帧 + 2) 绘制rectangle + """ + if event == '-SLIDER-' or event == '-Y-SLIDER-' or event == '-Y-SLIDER-H-' or event == '-X-SLIDER-' or event \ + == '-X-SLIDER-W-': + if self.video_cap is not None and self.video_cap.isOpened(): + frame_no = int(values['-SLIDER-']) + self.video_cap.set(cv2.CAP_PROP_POS_FRAMES, frame_no) + ret, frame = self.video_cap.read() + if ret: + self.window['-Y-SLIDER-H-'].update(range=(0, self.frame_height-values['-Y-SLIDER-'])) + self.window['-X-SLIDER-W-'].update(range=(0, self.frame_width-values['-X-SLIDER-'])) + # 画字幕框 + y = int(values['-Y-SLIDER-']) + h = int(values['-Y-SLIDER-H-']) + x = int(values['-X-SLIDER-']) + w = int(values['-X-SLIDER-W-']) + self._update_preview(frame, (y, h, x, w)) + + def _update_preview(self, frame, y_h_x_w): + y, h, x, w = y_h_x_w + # 画字幕框 + draw = cv2.rectangle(img=frame, pt1=(int(x), int(y)), pt2=(int(x) + int(w), int(y) + int(h)), + color=(0, 255, 0), thickness=3) + # 调整视频帧大小,使播放器能够显示 + resized_frame = self._img_resize(draw) + # 显示视频帧 + self.window['-DISPLAY-'].update(data=cv2.imencode('.png', resized_frame)[1].tobytes()) + + def _img_resize(self, image): + top, bottom, left, right = (0, 0, 0, 0) + height, width = image.shape[0], image.shape[1] + # 对长短不想等的图片,找到最长的一边 + longest_edge = height + # 计算短边需要增加多少像素宽度使其与长边等长 + if width < longest_edge: + dw = longest_edge - width + left = dw // 2 + right = dw - left + else: + pass + # 给图像增加边界 + constant = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0]) + return cv2.resize(constant, (self.video_preview_width, self.video_preview_height)) + + def set_subtitle_config(self, y, h, x, w): + # 写入配置文件 + with open(self.subtitle_config_file, mode='w', encoding='utf-8') as f: + f.write('[AREA]\n') + f.write(f'Y = {y}\n') + f.write(f'H = {h}\n') + f.write(f'X = {x}\n') + f.write(f'W = {w}\n') + + def parse_subtitle_config(self): + y_p, h_p, x_p, w_p = .78, .21, .05, .9 + # 如果配置文件不存在,则写入配置文件 + if not os.path.exists(self.subtitle_config_file): + self.set_subtitle_config(y_p, h_p, x_p, w_p) + return y_p, h_p, x_p, w_p + else: + try: + config = configparser.ConfigParser() + config.read(self.subtitle_config_file, encoding='utf-8') + conf_y_p, conf_h_p, conf_x_p, conf_w_p = float(config['AREA']['Y']), float(config['AREA']['H']), float(config['AREA']['X']), float(config['AREA']['W']) + return conf_y_p, conf_h_p, conf_x_p, conf_w_p + except Exception: + self.set_subtitle_config(y_p, h_p, x_p, w_p) + return y_p, h_p, x_p, w_p + + +if __name__ == '__main__': + try: + multiprocessing.set_start_method("spawn") + # 运行图形化界面 + subtitleRemoverGUI = SubtitleRemoverGUI() + subtitleRemoverGUI.run() + except Exception as e: + print(f'[{type(e)}] {e}') + import traceback + traceback.print_exc() + msg = traceback.format_exc() + err_log_path = os.path.join(os.path.expanduser('~'), 'VSR-Error-Message.log') + with open(err_log_path, 'w', encoding='utf-8') as f: + f.writelines(msg) + import platform + if platform.system() == 'Windows': + os.system('pause') + else: + input() diff --git a/requirements.txt b/requirements.txt index 43a0abe..70358fc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,6 +9,7 @@ lmdb==1.4.1 PyYAML==6.0.1 omegaconf==2.1.2 tqdm==4.66.1 +PySimpleGUI==4.55.1 easydict==1.9 scikit-learn==0.24.2 pandas==2.0.3 diff --git a/test/test.mp4 b/test/test.mp4 new file mode 100644 index 0000000..8db9b6b Binary files /dev/null and b/test/test.mp4 differ