diff --git a/README.md b/README.md
index abe12f7..1ad0d54 100755
--- a/README.md
+++ b/README.md
@@ -11,13 +11,14 @@ Video-subtitle-remover (vsr) 是一款基于AI技术,将视频中的硬字幕
- 支持自定义字幕位置,仅去除定义位置中的字幕(传入位置)
- 支持全视频自动去除所有文本(不传入位置)
-
-
+
## 演示
点击查看演示视频👇
+
+

## 源码使用说明
@@ -133,6 +134,12 @@ conda activate videoEnv
#### 4. 运行程序
+- 运行图形化界面
+
+```shell
+python gui.py
+```
+
- 运行命令行版本(CLI)
```shell
diff --git a/backend/main.py b/backend/main.py
index 64f8bee..6e346c7 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -1,7 +1,5 @@
-import shutil
import subprocess
import random
-import config
import os
from pathlib import Path
import threading
@@ -9,6 +7,7 @@ import cv2
import sys
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import config
import importlib
import numpy as np
import tempfile
@@ -124,6 +123,11 @@ class SubtitleRemover:
fluid.install_check.run_check()
if torch.cuda.is_available():
print('使用GPU进行加速')
+ # 总处理进度
+ self.progress_total = 0
+ self.isFinished = False
+ # 预览帧
+ self.preview_frame = None
@staticmethod
def get_coordinates(dt_box):
@@ -156,17 +160,21 @@ class SubtitleRemover:
ret, frame = self.video_cap.read()
if not ret:
break
+ original_frame = frame
index += 1
if index in sub_list:
masks = self.create_mask(frame, sub_list[index])
frame = self.inpaint_frame(frame, masks)
+ self.preview_frame = cv2.hconcat([original_frame, frame])
self.video_writer.write(frame)
tbar.update(1)
+ self.progress_total = 100 * float(index)/float(self.frame_count)
self.video_cap.release()
self.video_writer.release()
# 将原音频合并到新生成的视频文件中
self.merge_audio_to_video()
print(f"视频生字幕去除成功,文件路径:{self.video_out_name}")
+ self.isFinished = True
@staticmethod
def inpaint( img, mask):
diff --git a/design/demo.png b/design/demo.png
new file mode 100644
index 0000000..dd006cf
Binary files /dev/null and b/design/demo.png differ
diff --git a/design/vsr.ico b/design/vsr.ico
new file mode 100644
index 0000000..84a782b
Binary files /dev/null and b/design/vsr.ico differ
diff --git a/gui.py b/gui.py
new file mode 100644
index 0000000..031359c
--- /dev/null
+++ b/gui.py
@@ -0,0 +1,371 @@
+# -*- coding: utf-8 -*-
+"""
+@Author : Fang Yao
+@Time : 2021/4/1 6:07 下午
+@FileName: gui.py
+@desc: 字幕提取器图形化界面
+"""
+import backend.main
+import os
+import configparser
+import PySimpleGUI as sg
+import cv2
+from threading import Thread
+import multiprocessing
+
+
+class SubtitleRemoverGUI:
+
+ def __init__(self):
+ # 初次运行检查运行环境是否正常
+ from paddle import fluid
+ fluid.install_check.run_check()
+ self.font = 'Arial 10'
+ self.theme = 'LightBrown12'
+ sg.theme(self.theme)
+ self.icon = os.path.join(os.path.dirname(__file__), 'design', 'vsr.ico')
+ self.screen_width, self.screen_height = sg.Window.get_screen_size()
+ self.subtitle_config_file = os.path.join(os.path.dirname(__file__), 'subtitle.ini')
+ print(self.screen_width, self.screen_height)
+ # 设置视频预览区域大小
+ self.video_preview_width = 960
+ self.video_preview_height = self.video_preview_width * 9 // 16
+ # 默认组件大小
+ self.horizontal_slider_size = (120, 20)
+ self.output_size = (100, 10)
+ self.progressbar_size = (60, 20)
+ # 分辨率低于1080
+ if self.screen_width // 2 < 960:
+ self.video_preview_width = 640
+ self.video_preview_height = self.video_preview_width * 9 // 16
+ self.horizontal_slider_size = (60, 20)
+ self.output_size = (58, 10)
+ self.progressbar_size = (28, 20)
+ # 字幕提取器布局
+ self.layout = None
+ # 字幕提取其窗口
+ self.window = None
+ # 视频路径
+ self.video_path = None
+ # 视频cap
+ self.video_cap = None
+ # 视频的帧率
+ self.fps = None
+ # 视频的帧数
+ self.frame_count = None
+ # 视频的宽
+ self.frame_width = None
+ # 视频的高
+ self.frame_height = None
+ # 设置字幕区域高宽
+ self.xmin = None
+ self.xmax = None
+ self.ymin = None
+ self.ymax = None
+ # 字幕提取器
+ self.sr = None
+
+ def run(self):
+ # 创建布局
+ self._create_layout()
+ # 创建窗口
+ self.window = sg.Window(title='Video Subtitle Remover', layout=self.layout,
+ icon=self.icon)
+ while True:
+ # 循环读取事件
+ event, values = self.window.read(timeout=10)
+ # 处理【打开】事件
+ self._file_event_handler(event, values)
+ # 处理【滑动】事件
+ self._slide_event_handler(event, values)
+ # 处理【运行】事件
+ self._run_event_handler(event, values)
+ # 如果关闭软件,退出
+ if event == sg.WIN_CLOSED:
+ break
+ # 更新进度条
+ if self.sr is not None:
+ self.window['-PROG-'].update(self.sr.progress_total)
+ if self.sr.preview_frame is not None:
+ self.window['-DISPLAY-'].update(data=cv2.imencode('.png', self._img_resize(self.sr.preview_frame))[1].tobytes())
+ if self.sr.isFinished:
+ # 1) 打开修改字幕滑块区域按钮
+ self.window['-Y-SLIDER-'].update(disabled=False)
+ self.window['-X-SLIDER-'].update(disabled=False)
+ self.window['-Y-SLIDER-H-'].update(disabled=False)
+ self.window['-X-SLIDER-W-'].update(disabled=False)
+ # 2) 打开【运行】、【打开】和【识别语言】按钮
+ self.window['-RUN-'].update(disabled=False)
+ self.window['-FILE-'].update(disabled=False)
+ self.window['-FILE_BTN-'].update(disabled=False)
+ self.sr = None
+ if len(self.video_paths) >= 1:
+ # 1) 关闭修改字幕滑块区域按钮
+ self.window['-Y-SLIDER-'].update(disabled=True)
+ self.window['-X-SLIDER-'].update(disabled=True)
+ self.window['-Y-SLIDER-H-'].update(disabled=True)
+ self.window['-X-SLIDER-W-'].update(disabled=True)
+ # 2) 关闭【运行】、【打开】和【识别语言】按钮
+ self.window['-RUN-'].update(disabled=True)
+ self.window['-FILE-'].update(disabled=True)
+ self.window['-FILE_BTN-'].update(disabled=True)
+
+ def _create_layout(self):
+ """
+ 创建字幕提取器布局
+ """
+ garbage = os.path.join(os.path.dirname(__file__), 'output')
+ if os.path.exists(garbage):
+ import shutil
+ shutil.rmtree(garbage, True)
+ self.layout = [
+ # 显示视频预览
+ [sg.Image(size=(self.video_preview_width, self.video_preview_height), background_color='black',
+ key='-DISPLAY-')],
+ # 打开按钮 + 快进快退条
+ [sg.Input(key='-FILE-', visible=False, enable_events=True),
+ sg.FilesBrowse(button_text='Open', file_types=((
+ 'All Files', '*.*'), ('mp4', '*.mp4'),
+ ('flv', '*.flv'),
+ ('wmv', '*.wmv'),
+ ('avi', '*.avi')),
+ key='-FILE_BTN-', size=(10, 1), font=self.font),
+ sg.Slider(size=self.horizontal_slider_size, range=(1, 1), key='-SLIDER-', orientation='h',
+ enable_events=True, font=self.font,
+ disable_number_display=True),
+ ],
+ # 输出区域
+ [sg.Output(size=self.output_size, font=self.font),
+ sg.Frame(title='Vertical', font=self.font, key='-FRAME1-',
+ layout=[[
+ sg.Slider(range=(0, 0), orientation='v', size=(10, 20),
+ disable_number_display=True,
+ enable_events=True, font=self.font,
+ pad=((10, 10), (20, 20)),
+ default_value=0, key='-Y-SLIDER-'),
+ sg.Slider(range=(0, 0), orientation='v', size=(10, 20),
+ disable_number_display=True,
+ enable_events=True, font=self.font,
+ pad=((10, 10), (20, 20)),
+ default_value=0, key='-Y-SLIDER-H-'),
+ ]], pad=((15, 5), (0, 0))),
+ sg.Frame(title='Horizontal', font=self.font, key='-FRAME2-',
+ layout=[[
+ sg.Slider(range=(0, 0), orientation='v', size=(10, 20),
+ disable_number_display=True,
+ pad=((10, 10), (20, 20)),
+ enable_events=True, font=self.font,
+ default_value=0, key='-X-SLIDER-'),
+ sg.Slider(range=(0, 0), orientation='v', size=(10, 20),
+ disable_number_display=True,
+ pad=((10, 10), (20, 20)),
+ enable_events=True, font=self.font,
+ default_value=0, key='-X-SLIDER-W-'),
+ ]], pad=((15, 5), (0, 0)))
+ ],
+
+ # 运行按钮 + 进度条
+ [sg.Button(button_text='Run', key='-RUN-',
+ font=self.font, size=(20, 1)),
+ sg.ProgressBar(100, orientation='h', size=self.progressbar_size, key='-PROG-', auto_size_text=True)
+ ],
+ ]
+
+ def _file_event_handler(self, event, values):
+ """
+ 当点击打开按钮时:
+ 1)打开视频文件,将画布显示视频帧
+ 2)获取视频信息,初始化进度条滑块范围
+ """
+ if event == '-FILE-':
+ self.video_paths = values['-FILE-'].split(';')
+ self.video_path = self.video_paths[0]
+ if self.video_path != '':
+ self.video_cap = cv2.VideoCapture(self.video_path)
+ if self.video_cap is None:
+ return
+ if self.video_cap.isOpened():
+ ret, frame = self.video_cap.read()
+ if ret:
+ for video in self.video_paths:
+ print(f"Open Video Success':{video}")
+ # 获取视频的帧数
+ self.frame_count = self.video_cap.get(cv2.CAP_PROP_FRAME_COUNT)
+ # 获取视频的高度
+ self.frame_height = self.video_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
+ # 获取视频的宽度
+ self.frame_width = self.video_cap.get(cv2.CAP_PROP_FRAME_WIDTH)
+ # 获取视频的帧率
+ self.fps = self.video_cap.get(cv2.CAP_PROP_FPS)
+ # 调整视频帧大小,使播放器能够显示
+ resized_frame = self._img_resize(frame)
+ # resized_frame = cv2.resize(src=frame, dsize=(self.video_preview_width, self.video_preview_height))
+ # 显示视频帧
+ self.window['-DISPLAY-'].update(data=cv2.imencode('.png', resized_frame)[1].tobytes())
+ # 更新视频进度条滑块range
+ self.window['-SLIDER-'].update(range=(1, self.frame_count))
+ self.window['-SLIDER-'].update(1)
+ # 预设字幕区域位置
+ y_p, h_p, x_p, w_p = self.parse_subtitle_config()
+ y = self.frame_height * y_p
+ h = self.frame_height * h_p
+ x = self.frame_width * x_p
+ w = self.frame_width * w_p
+ # 更新视频字幕位置滑块range
+ # 更新Y-SLIDER范围
+ self.window['-Y-SLIDER-'].update(range=(0, self.frame_height), disabled=False)
+ # 更新Y-SLIDER默认值
+ self.window['-Y-SLIDER-'].update(y)
+ # 更新X-SLIDER范围
+ self.window['-X-SLIDER-'].update(range=(0, self.frame_width), disabled=False)
+ # 更新X-SLIDER默认值
+ self.window['-X-SLIDER-'].update(x)
+ # 更新Y-SLIDER-H范围
+ self.window['-Y-SLIDER-H-'].update(range=(0, self.frame_height - y))
+ # 更新Y-SLIDER-H默认值
+ self.window['-Y-SLIDER-H-'].update(h)
+ # 更新X-SLIDER-W范围
+ self.window['-X-SLIDER-W-'].update(range=(0, self.frame_width - x))
+ # 更新X-SLIDER-W默认值
+ self.window['-X-SLIDER-W-'].update(w)
+ self._update_preview(frame, (y, h, x, w))
+
+ def _run_event_handler(self, event, values):
+ """
+ 当点击运行按钮时:
+ 1) 禁止修改字幕滑块区域
+ 2) 禁止再次点击【运行】和【打开】按钮
+ 3) 设定字幕区域位置
+ """
+ if event == '-RUN-':
+ if self.video_cap is None:
+ print('Please Open Video First')
+ else:
+ # 1) 禁止修改字幕滑块区域
+ self.window['-Y-SLIDER-'].update(disabled=True)
+ self.window['-X-SLIDER-'].update(disabled=True)
+ self.window['-Y-SLIDER-H-'].update(disabled=True)
+ self.window['-X-SLIDER-W-'].update(disabled=True)
+ # 2) 禁止再次点击【运行】、【打开】和【识别语言】按钮
+ self.window['-RUN-'].update(disabled=True)
+ self.window['-FILE-'].update(disabled=True)
+ self.window['-FILE_BTN-'].update(disabled=True)
+ # 3) 设定字幕区域位置
+ self.xmin = int(values['-X-SLIDER-'])
+ self.xmax = int(values['-X-SLIDER-'] + values['-X-SLIDER-W-'])
+ self.ymin = int(values['-Y-SLIDER-'])
+ self.ymax = int(values['-Y-SLIDER-'] + values['-Y-SLIDER-H-'])
+ if self.ymax > self.frame_height:
+ self.ymax = self.frame_height
+ if self.xmax > self.frame_width:
+ self.xmax = self.frame_width
+ print(f"{'SubtitleArea'}:({self.ymin},{self.ymax},{self.xmin},{self.xmax})")
+ subtitle_area = (self.ymin, self.ymax, self.xmin, self.xmax)
+ y_p = self.ymin / self.frame_height
+ h_p = (self.ymax - self.ymin) / self.frame_height
+ x_p = self.xmin / self.frame_width
+ w_p = (self.xmax - self.xmin) / self.frame_width
+ self.set_subtitle_config(y_p, h_p, x_p, w_p)
+
+ def task():
+ while self.video_paths:
+ video_path = self.video_paths.pop()
+ self.sr = backend.main.SubtitleRemover(video_path, subtitle_area)
+ self.sr.run()
+ Thread(target=task, daemon=True).start()
+ self.video_cap.release()
+ self.video_cap = None
+
+ def _slide_event_handler(self, event, values):
+ """
+ 当滑动视频进度条/滑动字幕选择区域滑块时:
+ 1) 判断视频是否存在,如果存在则显示对应的视频帧
+ 2) 绘制rectangle
+ """
+ if event == '-SLIDER-' or event == '-Y-SLIDER-' or event == '-Y-SLIDER-H-' or event == '-X-SLIDER-' or event \
+ == '-X-SLIDER-W-':
+ if self.video_cap is not None and self.video_cap.isOpened():
+ frame_no = int(values['-SLIDER-'])
+ self.video_cap.set(cv2.CAP_PROP_POS_FRAMES, frame_no)
+ ret, frame = self.video_cap.read()
+ if ret:
+ self.window['-Y-SLIDER-H-'].update(range=(0, self.frame_height-values['-Y-SLIDER-']))
+ self.window['-X-SLIDER-W-'].update(range=(0, self.frame_width-values['-X-SLIDER-']))
+ # 画字幕框
+ y = int(values['-Y-SLIDER-'])
+ h = int(values['-Y-SLIDER-H-'])
+ x = int(values['-X-SLIDER-'])
+ w = int(values['-X-SLIDER-W-'])
+ self._update_preview(frame, (y, h, x, w))
+
+ def _update_preview(self, frame, y_h_x_w):
+ y, h, x, w = y_h_x_w
+ # 画字幕框
+ draw = cv2.rectangle(img=frame, pt1=(int(x), int(y)), pt2=(int(x) + int(w), int(y) + int(h)),
+ color=(0, 255, 0), thickness=3)
+ # 调整视频帧大小,使播放器能够显示
+ resized_frame = self._img_resize(draw)
+ # 显示视频帧
+ self.window['-DISPLAY-'].update(data=cv2.imencode('.png', resized_frame)[1].tobytes())
+
+ def _img_resize(self, image):
+ top, bottom, left, right = (0, 0, 0, 0)
+ height, width = image.shape[0], image.shape[1]
+ # 对长短不想等的图片,找到最长的一边
+ longest_edge = height
+ # 计算短边需要增加多少像素宽度使其与长边等长
+ if width < longest_edge:
+ dw = longest_edge - width
+ left = dw // 2
+ right = dw - left
+ else:
+ pass
+ # 给图像增加边界
+ constant = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0])
+ return cv2.resize(constant, (self.video_preview_width, self.video_preview_height))
+
+ def set_subtitle_config(self, y, h, x, w):
+ # 写入配置文件
+ with open(self.subtitle_config_file, mode='w', encoding='utf-8') as f:
+ f.write('[AREA]\n')
+ f.write(f'Y = {y}\n')
+ f.write(f'H = {h}\n')
+ f.write(f'X = {x}\n')
+ f.write(f'W = {w}\n')
+
+ def parse_subtitle_config(self):
+ y_p, h_p, x_p, w_p = .78, .21, .05, .9
+ # 如果配置文件不存在,则写入配置文件
+ if not os.path.exists(self.subtitle_config_file):
+ self.set_subtitle_config(y_p, h_p, x_p, w_p)
+ return y_p, h_p, x_p, w_p
+ else:
+ try:
+ config = configparser.ConfigParser()
+ config.read(self.subtitle_config_file, encoding='utf-8')
+ conf_y_p, conf_h_p, conf_x_p, conf_w_p = float(config['AREA']['Y']), float(config['AREA']['H']), float(config['AREA']['X']), float(config['AREA']['W'])
+ return conf_y_p, conf_h_p, conf_x_p, conf_w_p
+ except Exception:
+ self.set_subtitle_config(y_p, h_p, x_p, w_p)
+ return y_p, h_p, x_p, w_p
+
+
+if __name__ == '__main__':
+ try:
+ multiprocessing.set_start_method("spawn")
+ # 运行图形化界面
+ subtitleRemoverGUI = SubtitleRemoverGUI()
+ subtitleRemoverGUI.run()
+ except Exception as e:
+ print(f'[{type(e)}] {e}')
+ import traceback
+ traceback.print_exc()
+ msg = traceback.format_exc()
+ err_log_path = os.path.join(os.path.expanduser('~'), 'VSR-Error-Message.log')
+ with open(err_log_path, 'w', encoding='utf-8') as f:
+ f.writelines(msg)
+ import platform
+ if platform.system() == 'Windows':
+ os.system('pause')
+ else:
+ input()
diff --git a/requirements.txt b/requirements.txt
index 43a0abe..70358fc 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,6 +9,7 @@ lmdb==1.4.1
PyYAML==6.0.1
omegaconf==2.1.2
tqdm==4.66.1
+PySimpleGUI==4.55.1
easydict==1.9
scikit-learn==0.24.2
pandas==2.0.3
diff --git a/test/test.mp4 b/test/test.mp4
new file mode 100644
index 0000000..8db9b6b
Binary files /dev/null and b/test/test.mp4 differ