mirror of
https://github.com/YaoFANGUK/video-subtitle-remover.git
synced 2026-02-22 09:44:41 +08:00
init
This commit is contained in:
234
backend/ppocr/data/imaug/random_crop_data.py
Normal file
234
backend/ppocr/data/imaug/random_crop_data.py
Normal file
@@ -0,0 +1,234 @@
|
||||
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
This code is refer from:
|
||||
https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/random_crop_data.py
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import numpy as np
|
||||
import cv2
|
||||
import random
|
||||
|
||||
|
||||
def is_poly_in_rect(poly, x, y, w, h):
|
||||
poly = np.array(poly)
|
||||
if poly[:, 0].min() < x or poly[:, 0].max() > x + w:
|
||||
return False
|
||||
if poly[:, 1].min() < y or poly[:, 1].max() > y + h:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def is_poly_outside_rect(poly, x, y, w, h):
|
||||
poly = np.array(poly)
|
||||
if poly[:, 0].max() < x or poly[:, 0].min() > x + w:
|
||||
return True
|
||||
if poly[:, 1].max() < y or poly[:, 1].min() > y + h:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def split_regions(axis):
|
||||
regions = []
|
||||
min_axis = 0
|
||||
for i in range(1, axis.shape[0]):
|
||||
if axis[i] != axis[i - 1] + 1:
|
||||
region = axis[min_axis:i]
|
||||
min_axis = i
|
||||
regions.append(region)
|
||||
return regions
|
||||
|
||||
|
||||
def random_select(axis, max_size):
|
||||
xx = np.random.choice(axis, size=2)
|
||||
xmin = np.min(xx)
|
||||
xmax = np.max(xx)
|
||||
xmin = np.clip(xmin, 0, max_size - 1)
|
||||
xmax = np.clip(xmax, 0, max_size - 1)
|
||||
return xmin, xmax
|
||||
|
||||
|
||||
def region_wise_random_select(regions, max_size):
|
||||
selected_index = list(np.random.choice(len(regions), 2))
|
||||
selected_values = []
|
||||
for index in selected_index:
|
||||
axis = regions[index]
|
||||
xx = int(np.random.choice(axis, size=1))
|
||||
selected_values.append(xx)
|
||||
xmin = min(selected_values)
|
||||
xmax = max(selected_values)
|
||||
return xmin, xmax
|
||||
|
||||
|
||||
def crop_area(im, text_polys, min_crop_side_ratio, max_tries):
|
||||
h, w, _ = im.shape
|
||||
h_array = np.zeros(h, dtype=np.int32)
|
||||
w_array = np.zeros(w, dtype=np.int32)
|
||||
for points in text_polys:
|
||||
points = np.round(points, decimals=0).astype(np.int32)
|
||||
minx = np.min(points[:, 0])
|
||||
maxx = np.max(points[:, 0])
|
||||
w_array[minx:maxx] = 1
|
||||
miny = np.min(points[:, 1])
|
||||
maxy = np.max(points[:, 1])
|
||||
h_array[miny:maxy] = 1
|
||||
# ensure the cropped area not across a text
|
||||
h_axis = np.where(h_array == 0)[0]
|
||||
w_axis = np.where(w_array == 0)[0]
|
||||
|
||||
if len(h_axis) == 0 or len(w_axis) == 0:
|
||||
return 0, 0, w, h
|
||||
|
||||
h_regions = split_regions(h_axis)
|
||||
w_regions = split_regions(w_axis)
|
||||
|
||||
for i in range(max_tries):
|
||||
if len(w_regions) > 1:
|
||||
xmin, xmax = region_wise_random_select(w_regions, w)
|
||||
else:
|
||||
xmin, xmax = random_select(w_axis, w)
|
||||
if len(h_regions) > 1:
|
||||
ymin, ymax = region_wise_random_select(h_regions, h)
|
||||
else:
|
||||
ymin, ymax = random_select(h_axis, h)
|
||||
|
||||
if xmax - xmin < min_crop_side_ratio * w or ymax - ymin < min_crop_side_ratio * h:
|
||||
# area too small
|
||||
continue
|
||||
num_poly_in_rect = 0
|
||||
for poly in text_polys:
|
||||
if not is_poly_outside_rect(poly, xmin, ymin, xmax - xmin,
|
||||
ymax - ymin):
|
||||
num_poly_in_rect += 1
|
||||
break
|
||||
|
||||
if num_poly_in_rect > 0:
|
||||
return xmin, ymin, xmax - xmin, ymax - ymin
|
||||
|
||||
return 0, 0, w, h
|
||||
|
||||
|
||||
class EastRandomCropData(object):
|
||||
def __init__(self,
|
||||
size=(640, 640),
|
||||
max_tries=10,
|
||||
min_crop_side_ratio=0.1,
|
||||
keep_ratio=True,
|
||||
**kwargs):
|
||||
self.size = size
|
||||
self.max_tries = max_tries
|
||||
self.min_crop_side_ratio = min_crop_side_ratio
|
||||
self.keep_ratio = keep_ratio
|
||||
|
||||
def __call__(self, data):
|
||||
img = data['image']
|
||||
text_polys = data['polys']
|
||||
ignore_tags = data['ignore_tags']
|
||||
texts = data['texts']
|
||||
all_care_polys = [
|
||||
text_polys[i] for i, tag in enumerate(ignore_tags) if not tag
|
||||
]
|
||||
# 计算crop区域
|
||||
crop_x, crop_y, crop_w, crop_h = crop_area(
|
||||
img, all_care_polys, self.min_crop_side_ratio, self.max_tries)
|
||||
# crop 图片 保持比例填充
|
||||
scale_w = self.size[0] / crop_w
|
||||
scale_h = self.size[1] / crop_h
|
||||
scale = min(scale_w, scale_h)
|
||||
h = int(crop_h * scale)
|
||||
w = int(crop_w * scale)
|
||||
if self.keep_ratio:
|
||||
padimg = np.zeros((self.size[1], self.size[0], img.shape[2]),
|
||||
img.dtype)
|
||||
padimg[:h, :w] = cv2.resize(
|
||||
img[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], (w, h))
|
||||
img = padimg
|
||||
else:
|
||||
img = cv2.resize(
|
||||
img[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w],
|
||||
tuple(self.size))
|
||||
# crop 文本框
|
||||
text_polys_crop = []
|
||||
ignore_tags_crop = []
|
||||
texts_crop = []
|
||||
for poly, text, tag in zip(text_polys, texts, ignore_tags):
|
||||
poly = ((poly - (crop_x, crop_y)) * scale).tolist()
|
||||
if not is_poly_outside_rect(poly, 0, 0, w, h):
|
||||
text_polys_crop.append(poly)
|
||||
ignore_tags_crop.append(tag)
|
||||
texts_crop.append(text)
|
||||
data['image'] = img
|
||||
data['polys'] = np.array(text_polys_crop)
|
||||
data['ignore_tags'] = ignore_tags_crop
|
||||
data['texts'] = texts_crop
|
||||
return data
|
||||
|
||||
|
||||
class RandomCropImgMask(object):
|
||||
def __init__(self, size, main_key, crop_keys, p=3 / 8, **kwargs):
|
||||
self.size = size
|
||||
self.main_key = main_key
|
||||
self.crop_keys = crop_keys
|
||||
self.p = p
|
||||
|
||||
def __call__(self, data):
|
||||
image = data['image']
|
||||
|
||||
h, w = image.shape[0:2]
|
||||
th, tw = self.size
|
||||
if w == tw and h == th:
|
||||
return data
|
||||
|
||||
mask = data[self.main_key]
|
||||
if np.max(mask) > 0 and random.random() > self.p:
|
||||
# make sure to crop the text region
|
||||
tl = np.min(np.where(mask > 0), axis=1) - (th, tw)
|
||||
tl[tl < 0] = 0
|
||||
br = np.max(np.where(mask > 0), axis=1) - (th, tw)
|
||||
br[br < 0] = 0
|
||||
|
||||
br[0] = min(br[0], h - th)
|
||||
br[1] = min(br[1], w - tw)
|
||||
|
||||
i = random.randint(tl[0], br[0]) if tl[0] < br[0] else 0
|
||||
j = random.randint(tl[1], br[1]) if tl[1] < br[1] else 0
|
||||
else:
|
||||
i = random.randint(0, h - th) if h - th > 0 else 0
|
||||
j = random.randint(0, w - tw) if w - tw > 0 else 0
|
||||
|
||||
# return i, j, th, tw
|
||||
for k in data:
|
||||
if k in self.crop_keys:
|
||||
if len(data[k].shape) == 3:
|
||||
if np.argmin(data[k].shape) == 0:
|
||||
img = data[k][:, i:i + th, j:j + tw]
|
||||
if img.shape[1] != img.shape[2]:
|
||||
a = 1
|
||||
elif np.argmin(data[k].shape) == 2:
|
||||
img = data[k][i:i + th, j:j + tw, :]
|
||||
if img.shape[1] != img.shape[0]:
|
||||
a = 1
|
||||
else:
|
||||
img = data[k]
|
||||
else:
|
||||
img = data[k][i:i + th, j:j + tw]
|
||||
if img.shape[0] != img.shape[1]:
|
||||
a = 1
|
||||
data[k] = img
|
||||
return data
|
||||
Reference in New Issue
Block a user