mirror of
https://github.com/YaoFANGUK/video-subtitle-remover.git
synced 2026-03-12 06:37:34 +08:00
init
This commit is contained in:
71
backend/ppocr/losses/__init__.py
Executable file
71
backend/ppocr/losses/__init__.py
Executable file
@@ -0,0 +1,71 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import copy
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
|
||||
# basic_loss
|
||||
from .basic_loss import LossFromOutput
|
||||
|
||||
# det loss
|
||||
from .det_db_loss import DBLoss
|
||||
from .det_east_loss import EASTLoss
|
||||
from .det_sast_loss import SASTLoss
|
||||
from .det_pse_loss import PSELoss
|
||||
from .det_fce_loss import FCELoss
|
||||
|
||||
# rec loss
|
||||
from .rec_ctc_loss import CTCLoss
|
||||
from .rec_att_loss import AttentionLoss
|
||||
from .rec_srn_loss import SRNLoss
|
||||
from .rec_nrtr_loss import NRTRLoss
|
||||
from .rec_sar_loss import SARLoss
|
||||
from .rec_aster_loss import AsterLoss
|
||||
from .rec_pren_loss import PRENLoss
|
||||
from .rec_multi_loss import MultiLoss
|
||||
|
||||
# cls loss
|
||||
from .cls_loss import ClsLoss
|
||||
|
||||
# e2e loss
|
||||
from .e2e_pg_loss import PGLoss
|
||||
from .kie_sdmgr_loss import SDMGRLoss
|
||||
|
||||
# basic loss function
|
||||
from .basic_loss import DistanceLoss
|
||||
|
||||
# combined loss function
|
||||
from .combined_loss import CombinedLoss
|
||||
|
||||
# table loss
|
||||
from .table_att_loss import TableAttentionLoss
|
||||
|
||||
# vqa token loss
|
||||
from .vqa_token_layoutlm_loss import VQASerTokenLayoutLMLoss
|
||||
|
||||
|
||||
def build_loss(config):
|
||||
support_dict = [
|
||||
'DBLoss', 'PSELoss', 'EASTLoss', 'SASTLoss', 'FCELoss', 'CTCLoss',
|
||||
'ClsLoss', 'AttentionLoss', 'SRNLoss', 'PGLoss', 'CombinedLoss',
|
||||
'NRTRLoss', 'TableAttentionLoss', 'SARLoss', 'AsterLoss', 'SDMGRLoss',
|
||||
'VQASerTokenLayoutLMLoss', 'LossFromOutput', 'PRENLoss', 'MultiLoss'
|
||||
]
|
||||
config = copy.deepcopy(config)
|
||||
module_name = config.pop('name')
|
||||
assert module_name in support_dict, Exception('loss only support {}'.format(
|
||||
support_dict))
|
||||
module_class = eval(module_name)(**config)
|
||||
return module_class
|
||||
52
backend/ppocr/losses/ace_loss.py
Normal file
52
backend/ppocr/losses/ace_loss.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This code is refer from: https://github.com/viig99/LS-ACELoss
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
|
||||
|
||||
class ACELoss(nn.Layer):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__()
|
||||
self.loss_func = nn.CrossEntropyLoss(
|
||||
weight=None,
|
||||
ignore_index=0,
|
||||
reduction='none',
|
||||
soft_label=True,
|
||||
axis=-1)
|
||||
|
||||
def __call__(self, predicts, batch):
|
||||
if isinstance(predicts, (list, tuple)):
|
||||
predicts = predicts[-1]
|
||||
|
||||
B, N = predicts.shape[:2]
|
||||
div = paddle.to_tensor([N]).astype('float32')
|
||||
|
||||
predicts = nn.functional.softmax(predicts, axis=-1)
|
||||
aggregation_preds = paddle.sum(predicts, axis=1)
|
||||
aggregation_preds = paddle.divide(aggregation_preds, div)
|
||||
|
||||
length = batch[2].astype("float32")
|
||||
batch = batch[3].astype("float32")
|
||||
batch[:, 0] = paddle.subtract(div, length)
|
||||
batch = paddle.divide(batch, div)
|
||||
|
||||
loss = self.loss_func(aggregation_preds, batch)
|
||||
return {"loss_ace": loss}
|
||||
155
backend/ppocr/losses/basic_loss.py
Normal file
155
backend/ppocr/losses/basic_loss.py
Normal file
@@ -0,0 +1,155 @@
|
||||
#copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
|
||||
from paddle.nn import L1Loss
|
||||
from paddle.nn import MSELoss as L2Loss
|
||||
from paddle.nn import SmoothL1Loss
|
||||
|
||||
|
||||
class CELoss(nn.Layer):
|
||||
def __init__(self, epsilon=None):
|
||||
super().__init__()
|
||||
if epsilon is not None and (epsilon <= 0 or epsilon >= 1):
|
||||
epsilon = None
|
||||
self.epsilon = epsilon
|
||||
|
||||
def _labelsmoothing(self, target, class_num):
|
||||
if target.shape[-1] != class_num:
|
||||
one_hot_target = F.one_hot(target, class_num)
|
||||
else:
|
||||
one_hot_target = target
|
||||
soft_target = F.label_smooth(one_hot_target, epsilon=self.epsilon)
|
||||
soft_target = paddle.reshape(soft_target, shape=[-1, class_num])
|
||||
return soft_target
|
||||
|
||||
def forward(self, x, label):
|
||||
loss_dict = {}
|
||||
if self.epsilon is not None:
|
||||
class_num = x.shape[-1]
|
||||
label = self._labelsmoothing(label, class_num)
|
||||
x = -F.log_softmax(x, axis=-1)
|
||||
loss = paddle.sum(x * label, axis=-1)
|
||||
else:
|
||||
if label.shape[-1] == x.shape[-1]:
|
||||
label = F.softmax(label, axis=-1)
|
||||
soft_label = True
|
||||
else:
|
||||
soft_label = False
|
||||
loss = F.cross_entropy(x, label=label, soft_label=soft_label)
|
||||
return loss
|
||||
|
||||
|
||||
class KLJSLoss(object):
|
||||
def __init__(self, mode='kl'):
|
||||
assert mode in ['kl', 'js', 'KL', 'JS'
|
||||
], "mode can only be one of ['kl', 'js', 'KL', 'JS']"
|
||||
self.mode = mode
|
||||
|
||||
def __call__(self, p1, p2, reduction="mean"):
|
||||
|
||||
loss = paddle.multiply(p2, paddle.log((p2 + 1e-5) / (p1 + 1e-5) + 1e-5))
|
||||
|
||||
if self.mode.lower() == "js":
|
||||
loss += paddle.multiply(
|
||||
p1, paddle.log((p1 + 1e-5) / (p2 + 1e-5) + 1e-5))
|
||||
loss *= 0.5
|
||||
if reduction == "mean":
|
||||
loss = paddle.mean(loss, axis=[1, 2])
|
||||
elif reduction == "none" or reduction is None:
|
||||
return loss
|
||||
else:
|
||||
loss = paddle.sum(loss, axis=[1, 2])
|
||||
|
||||
return loss
|
||||
|
||||
|
||||
class DMLLoss(nn.Layer):
|
||||
"""
|
||||
DMLLoss
|
||||
"""
|
||||
|
||||
def __init__(self, act=None, use_log=False):
|
||||
super().__init__()
|
||||
if act is not None:
|
||||
assert act in ["softmax", "sigmoid"]
|
||||
if act == "softmax":
|
||||
self.act = nn.Softmax(axis=-1)
|
||||
elif act == "sigmoid":
|
||||
self.act = nn.Sigmoid()
|
||||
else:
|
||||
self.act = None
|
||||
|
||||
self.use_log = use_log
|
||||
self.jskl_loss = KLJSLoss(mode="js")
|
||||
|
||||
def _kldiv(self, x, target):
|
||||
eps = 1.0e-10
|
||||
loss = target * (paddle.log(target + eps) - x)
|
||||
# batch mean loss
|
||||
loss = paddle.sum(loss) / loss.shape[0]
|
||||
return loss
|
||||
|
||||
def forward(self, out1, out2):
|
||||
if self.act is not None:
|
||||
out1 = self.act(out1) + 1e-10
|
||||
out2 = self.act(out2) + 1e-10
|
||||
if self.use_log:
|
||||
# for recognition distillation, log is needed for feature map
|
||||
log_out1 = paddle.log(out1)
|
||||
log_out2 = paddle.log(out2)
|
||||
loss = (
|
||||
self._kldiv(log_out1, out2) + self._kldiv(log_out2, out1)) / 2.0
|
||||
else:
|
||||
# for detection distillation log is not needed
|
||||
loss = self.jskl_loss(out1, out2)
|
||||
return loss
|
||||
|
||||
|
||||
class DistanceLoss(nn.Layer):
|
||||
"""
|
||||
DistanceLoss:
|
||||
mode: loss mode
|
||||
"""
|
||||
|
||||
def __init__(self, mode="l2", **kargs):
|
||||
super().__init__()
|
||||
assert mode in ["l1", "l2", "smooth_l1"]
|
||||
if mode == "l1":
|
||||
self.loss_func = nn.L1Loss(**kargs)
|
||||
elif mode == "l2":
|
||||
self.loss_func = nn.MSELoss(**kargs)
|
||||
elif mode == "smooth_l1":
|
||||
self.loss_func = nn.SmoothL1Loss(**kargs)
|
||||
|
||||
def forward(self, x, y):
|
||||
return self.loss_func(x, y)
|
||||
|
||||
|
||||
class LossFromOutput(nn.Layer):
|
||||
def __init__(self, key='loss', reduction='none'):
|
||||
super().__init__()
|
||||
self.key = key
|
||||
self.reduction = reduction
|
||||
|
||||
def forward(self, predicts, batch):
|
||||
loss = predicts[self.key]
|
||||
if self.reduction == 'mean':
|
||||
loss = paddle.mean(loss)
|
||||
elif self.reduction == 'sum':
|
||||
loss = paddle.sum(loss)
|
||||
return {'loss': loss}
|
||||
88
backend/ppocr/losses/center_loss.py
Normal file
88
backend/ppocr/losses/center_loss.py
Normal file
@@ -0,0 +1,88 @@
|
||||
#copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
|
||||
# This code is refer from: https://github.com/KaiyangZhou/pytorch-center-loss
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
import os
|
||||
import pickle
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
|
||||
|
||||
class CenterLoss(nn.Layer):
|
||||
"""
|
||||
Reference: Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016.
|
||||
"""
|
||||
|
||||
def __init__(self, num_classes=6625, feat_dim=96, center_file_path=None):
|
||||
super().__init__()
|
||||
self.num_classes = num_classes
|
||||
self.feat_dim = feat_dim
|
||||
self.centers = paddle.randn(
|
||||
shape=[self.num_classes, self.feat_dim]).astype("float64")
|
||||
|
||||
if center_file_path is not None:
|
||||
assert os.path.exists(
|
||||
center_file_path
|
||||
), f"center path({center_file_path}) must exist when it is not None."
|
||||
with open(center_file_path, 'rb') as f:
|
||||
char_dict = pickle.load(f)
|
||||
for key in char_dict.keys():
|
||||
self.centers[key] = paddle.to_tensor(char_dict[key])
|
||||
|
||||
def __call__(self, predicts, batch):
|
||||
assert isinstance(predicts, (list, tuple))
|
||||
features, predicts = predicts
|
||||
|
||||
feats_reshape = paddle.reshape(
|
||||
features, [-1, features.shape[-1]]).astype("float64")
|
||||
label = paddle.argmax(predicts, axis=2)
|
||||
label = paddle.reshape(label, [label.shape[0] * label.shape[1]])
|
||||
|
||||
batch_size = feats_reshape.shape[0]
|
||||
|
||||
#calc l2 distance between feats and centers
|
||||
square_feat = paddle.sum(paddle.square(feats_reshape),
|
||||
axis=1,
|
||||
keepdim=True)
|
||||
square_feat = paddle.expand(square_feat, [batch_size, self.num_classes])
|
||||
|
||||
square_center = paddle.sum(paddle.square(self.centers),
|
||||
axis=1,
|
||||
keepdim=True)
|
||||
square_center = paddle.expand(
|
||||
square_center, [self.num_classes, batch_size]).astype("float64")
|
||||
square_center = paddle.transpose(square_center, [1, 0])
|
||||
|
||||
distmat = paddle.add(square_feat, square_center)
|
||||
feat_dot_center = paddle.matmul(feats_reshape,
|
||||
paddle.transpose(self.centers, [1, 0]))
|
||||
distmat = distmat - 2.0 * feat_dot_center
|
||||
|
||||
#generate the mask
|
||||
classes = paddle.arange(self.num_classes).astype("int64")
|
||||
label = paddle.expand(
|
||||
paddle.unsqueeze(label, 1), (batch_size, self.num_classes))
|
||||
mask = paddle.equal(
|
||||
paddle.expand(classes, [batch_size, self.num_classes]),
|
||||
label).astype("float64")
|
||||
dist = paddle.multiply(distmat, mask)
|
||||
|
||||
loss = paddle.sum(paddle.clip(dist, min=1e-12, max=1e+12)) / batch_size
|
||||
return {'loss_center': loss}
|
||||
30
backend/ppocr/losses/cls_loss.py
Executable file
30
backend/ppocr/losses/cls_loss.py
Executable file
@@ -0,0 +1,30 @@
|
||||
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from paddle import nn
|
||||
|
||||
|
||||
class ClsLoss(nn.Layer):
|
||||
def __init__(self, **kwargs):
|
||||
super(ClsLoss, self).__init__()
|
||||
self.loss_func = nn.CrossEntropyLoss(reduction='mean')
|
||||
|
||||
def forward(self, predicts, batch):
|
||||
label = batch[1].astype("int64")
|
||||
loss = self.loss_func(input=predicts, label=label)
|
||||
return {'loss': loss}
|
||||
69
backend/ppocr/losses/combined_loss.py
Normal file
69
backend/ppocr/losses/combined_loss.py
Normal file
@@ -0,0 +1,69 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
|
||||
from .rec_ctc_loss import CTCLoss
|
||||
from .center_loss import CenterLoss
|
||||
from .ace_loss import ACELoss
|
||||
from .rec_sar_loss import SARLoss
|
||||
|
||||
from .distillation_loss import DistillationCTCLoss
|
||||
from .distillation_loss import DistillationSARLoss
|
||||
from .distillation_loss import DistillationDMLLoss
|
||||
from .distillation_loss import DistillationDistanceLoss, DistillationDBLoss, DistillationDilaDBLoss
|
||||
|
||||
|
||||
class CombinedLoss(nn.Layer):
|
||||
"""
|
||||
CombinedLoss:
|
||||
a combionation of loss function
|
||||
"""
|
||||
|
||||
def __init__(self, loss_config_list=None):
|
||||
super().__init__()
|
||||
self.loss_func = []
|
||||
self.loss_weight = []
|
||||
assert isinstance(loss_config_list, list), (
|
||||
'operator config should be a list')
|
||||
for config in loss_config_list:
|
||||
assert isinstance(config,
|
||||
dict) and len(config) == 1, "yaml format error"
|
||||
name = list(config)[0]
|
||||
param = config[name]
|
||||
assert "weight" in param, "weight must be in param, but param just contains {}".format(
|
||||
param.keys())
|
||||
self.loss_weight.append(param.pop("weight"))
|
||||
self.loss_func.append(eval(name)(**param))
|
||||
|
||||
def forward(self, input, batch, **kargs):
|
||||
loss_dict = {}
|
||||
loss_all = 0.
|
||||
for idx, loss_func in enumerate(self.loss_func):
|
||||
loss = loss_func(input, batch, **kargs)
|
||||
if isinstance(loss, paddle.Tensor):
|
||||
loss = {"loss_{}_{}".format(str(loss), idx): loss}
|
||||
|
||||
weight = self.loss_weight[idx]
|
||||
|
||||
loss = {key: loss[key] * weight for key in loss}
|
||||
|
||||
if "loss" in loss:
|
||||
loss_all += loss["loss"]
|
||||
else:
|
||||
loss_all += paddle.add_n(list(loss.values()))
|
||||
loss_dict.update(loss)
|
||||
loss_dict["loss"] = loss_all
|
||||
return loss_dict
|
||||
153
backend/ppocr/losses/det_basic_loss.py
Normal file
153
backend/ppocr/losses/det_basic_loss.py
Normal file
@@ -0,0 +1,153 @@
|
||||
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
This code is refer from:
|
||||
https://github.com/WenmuZhou/DBNet.pytorch/blob/master/models/losses/basic_loss.py
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
|
||||
import paddle
|
||||
from paddle import nn
|
||||
import paddle.nn.functional as F
|
||||
|
||||
|
||||
class BalanceLoss(nn.Layer):
|
||||
def __init__(self,
|
||||
balance_loss=True,
|
||||
main_loss_type='DiceLoss',
|
||||
negative_ratio=3,
|
||||
return_origin=False,
|
||||
eps=1e-6,
|
||||
**kwargs):
|
||||
"""
|
||||
The BalanceLoss for Differentiable Binarization text detection
|
||||
args:
|
||||
balance_loss (bool): whether balance loss or not, default is True
|
||||
main_loss_type (str): can only be one of ['CrossEntropy','DiceLoss',
|
||||
'Euclidean','BCELoss', 'MaskL1Loss'], default is 'DiceLoss'.
|
||||
negative_ratio (int|float): float, default is 3.
|
||||
return_origin (bool): whether return unbalanced loss or not, default is False.
|
||||
eps (float): default is 1e-6.
|
||||
"""
|
||||
super(BalanceLoss, self).__init__()
|
||||
self.balance_loss = balance_loss
|
||||
self.main_loss_type = main_loss_type
|
||||
self.negative_ratio = negative_ratio
|
||||
self.return_origin = return_origin
|
||||
self.eps = eps
|
||||
|
||||
if self.main_loss_type == "CrossEntropy":
|
||||
self.loss = nn.CrossEntropyLoss()
|
||||
elif self.main_loss_type == "Euclidean":
|
||||
self.loss = nn.MSELoss()
|
||||
elif self.main_loss_type == "DiceLoss":
|
||||
self.loss = DiceLoss(self.eps)
|
||||
elif self.main_loss_type == "BCELoss":
|
||||
self.loss = BCELoss(reduction='none')
|
||||
elif self.main_loss_type == "MaskL1Loss":
|
||||
self.loss = MaskL1Loss(self.eps)
|
||||
else:
|
||||
loss_type = [
|
||||
'CrossEntropy', 'DiceLoss', 'Euclidean', 'BCELoss', 'MaskL1Loss'
|
||||
]
|
||||
raise Exception(
|
||||
"main_loss_type in BalanceLoss() can only be one of {}".format(
|
||||
loss_type))
|
||||
|
||||
def forward(self, pred, gt, mask=None):
|
||||
"""
|
||||
The BalanceLoss for Differentiable Binarization text detection
|
||||
args:
|
||||
pred (variable): predicted feature maps.
|
||||
gt (variable): ground truth feature maps.
|
||||
mask (variable): masked maps.
|
||||
return: (variable) balanced loss
|
||||
"""
|
||||
positive = gt * mask
|
||||
negative = (1 - gt) * mask
|
||||
|
||||
positive_count = int(positive.sum())
|
||||
negative_count = int(
|
||||
min(negative.sum(), positive_count * self.negative_ratio))
|
||||
loss = self.loss(pred, gt, mask=mask)
|
||||
|
||||
if not self.balance_loss:
|
||||
return loss
|
||||
|
||||
positive_loss = positive * loss
|
||||
negative_loss = negative * loss
|
||||
negative_loss = paddle.reshape(negative_loss, shape=[-1])
|
||||
if negative_count > 0:
|
||||
sort_loss = negative_loss.sort(descending=True)
|
||||
negative_loss = sort_loss[:negative_count]
|
||||
# negative_loss, _ = paddle.topk(negative_loss, k=negative_count_int)
|
||||
balance_loss = (positive_loss.sum() + negative_loss.sum()) / (
|
||||
positive_count + negative_count + self.eps)
|
||||
else:
|
||||
balance_loss = positive_loss.sum() / (positive_count + self.eps)
|
||||
if self.return_origin:
|
||||
return balance_loss, loss
|
||||
|
||||
return balance_loss
|
||||
|
||||
|
||||
class DiceLoss(nn.Layer):
|
||||
def __init__(self, eps=1e-6):
|
||||
super(DiceLoss, self).__init__()
|
||||
self.eps = eps
|
||||
|
||||
def forward(self, pred, gt, mask, weights=None):
|
||||
"""
|
||||
DiceLoss function.
|
||||
"""
|
||||
|
||||
assert pred.shape == gt.shape
|
||||
assert pred.shape == mask.shape
|
||||
if weights is not None:
|
||||
assert weights.shape == mask.shape
|
||||
mask = weights * mask
|
||||
intersection = paddle.sum(pred * gt * mask)
|
||||
|
||||
union = paddle.sum(pred * mask) + paddle.sum(gt * mask) + self.eps
|
||||
loss = 1 - 2.0 * intersection / union
|
||||
assert loss <= 1
|
||||
return loss
|
||||
|
||||
|
||||
class MaskL1Loss(nn.Layer):
|
||||
def __init__(self, eps=1e-6):
|
||||
super(MaskL1Loss, self).__init__()
|
||||
self.eps = eps
|
||||
|
||||
def forward(self, pred, gt, mask):
|
||||
"""
|
||||
Mask L1 Loss
|
||||
"""
|
||||
loss = (paddle.abs(pred - gt) * mask).sum() / (mask.sum() + self.eps)
|
||||
loss = paddle.mean(loss)
|
||||
return loss
|
||||
|
||||
|
||||
class BCELoss(nn.Layer):
|
||||
def __init__(self, reduction='mean'):
|
||||
super(BCELoss, self).__init__()
|
||||
self.reduction = reduction
|
||||
|
||||
def forward(self, input, label, mask=None, weight=None, name=None):
|
||||
loss = F.binary_cross_entropy(input, label, reduction=self.reduction)
|
||||
return loss
|
||||
76
backend/ppocr/losses/det_db_loss.py
Executable file
76
backend/ppocr/losses/det_db_loss.py
Executable file
@@ -0,0 +1,76 @@
|
||||
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
This code is refer from:
|
||||
https://github.com/WenmuZhou/DBNet.pytorch/blob/master/models/losses/DB_loss.py
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from paddle import nn
|
||||
|
||||
from .det_basic_loss import BalanceLoss, MaskL1Loss, DiceLoss
|
||||
|
||||
|
||||
class DBLoss(nn.Layer):
|
||||
"""
|
||||
Differentiable Binarization (DB) Loss Function
|
||||
args:
|
||||
param (dict): the super paramter for DB Loss
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
balance_loss=True,
|
||||
main_loss_type='DiceLoss',
|
||||
alpha=5,
|
||||
beta=10,
|
||||
ohem_ratio=3,
|
||||
eps=1e-6,
|
||||
**kwargs):
|
||||
super(DBLoss, self).__init__()
|
||||
self.alpha = alpha
|
||||
self.beta = beta
|
||||
self.dice_loss = DiceLoss(eps=eps)
|
||||
self.l1_loss = MaskL1Loss(eps=eps)
|
||||
self.bce_loss = BalanceLoss(
|
||||
balance_loss=balance_loss,
|
||||
main_loss_type=main_loss_type,
|
||||
negative_ratio=ohem_ratio)
|
||||
|
||||
def forward(self, predicts, labels):
|
||||
predict_maps = predicts['maps']
|
||||
label_threshold_map, label_threshold_mask, label_shrink_map, label_shrink_mask = labels[
|
||||
1:]
|
||||
shrink_maps = predict_maps[:, 0, :, :]
|
||||
threshold_maps = predict_maps[:, 1, :, :]
|
||||
binary_maps = predict_maps[:, 2, :, :]
|
||||
|
||||
loss_shrink_maps = self.bce_loss(shrink_maps, label_shrink_map,
|
||||
label_shrink_mask)
|
||||
loss_threshold_maps = self.l1_loss(threshold_maps, label_threshold_map,
|
||||
label_threshold_mask)
|
||||
loss_binary_maps = self.dice_loss(binary_maps, label_shrink_map,
|
||||
label_shrink_mask)
|
||||
loss_shrink_maps = self.alpha * loss_shrink_maps
|
||||
loss_threshold_maps = self.beta * loss_threshold_maps
|
||||
|
||||
loss_all = loss_shrink_maps + loss_threshold_maps \
|
||||
+ loss_binary_maps
|
||||
losses = {'loss': loss_all, \
|
||||
"loss_shrink_maps": loss_shrink_maps, \
|
||||
"loss_threshold_maps": loss_threshold_maps, \
|
||||
"loss_binary_maps": loss_binary_maps}
|
||||
return losses
|
||||
63
backend/ppocr/losses/det_east_loss.py
Normal file
63
backend/ppocr/losses/det_east_loss.py
Normal file
@@ -0,0 +1,63 @@
|
||||
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from paddle import nn
|
||||
from .det_basic_loss import DiceLoss
|
||||
|
||||
|
||||
class EASTLoss(nn.Layer):
|
||||
"""
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
eps=1e-6,
|
||||
**kwargs):
|
||||
super(EASTLoss, self).__init__()
|
||||
self.dice_loss = DiceLoss(eps=eps)
|
||||
|
||||
def forward(self, predicts, labels):
|
||||
l_score, l_geo, l_mask = labels[1:]
|
||||
f_score = predicts['f_score']
|
||||
f_geo = predicts['f_geo']
|
||||
|
||||
dice_loss = self.dice_loss(f_score, l_score, l_mask)
|
||||
|
||||
#smoooth_l1_loss
|
||||
channels = 8
|
||||
l_geo_split = paddle.split(
|
||||
l_geo, num_or_sections=channels + 1, axis=1)
|
||||
f_geo_split = paddle.split(f_geo, num_or_sections=channels, axis=1)
|
||||
smooth_l1 = 0
|
||||
for i in range(0, channels):
|
||||
geo_diff = l_geo_split[i] - f_geo_split[i]
|
||||
abs_geo_diff = paddle.abs(geo_diff)
|
||||
smooth_l1_sign = paddle.less_than(abs_geo_diff, l_score)
|
||||
smooth_l1_sign = paddle.cast(smooth_l1_sign, dtype='float32')
|
||||
in_loss = abs_geo_diff * abs_geo_diff * smooth_l1_sign + \
|
||||
(abs_geo_diff - 0.5) * (1.0 - smooth_l1_sign)
|
||||
out_loss = l_geo_split[-1] / channels * in_loss * l_score
|
||||
smooth_l1 += out_loss
|
||||
smooth_l1_loss = paddle.mean(smooth_l1 * l_score)
|
||||
|
||||
dice_loss = dice_loss * 0.01
|
||||
total_loss = dice_loss + smooth_l1_loss
|
||||
losses = {"loss":total_loss, \
|
||||
"dice_loss":dice_loss,\
|
||||
"smooth_l1_loss":smooth_l1_loss}
|
||||
return losses
|
||||
227
backend/ppocr/losses/det_fce_loss.py
Normal file
227
backend/ppocr/losses/det_fce_loss.py
Normal file
@@ -0,0 +1,227 @@
|
||||
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
This code is refer from:
|
||||
https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textdet/losses/fce_loss.py
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from paddle import nn
|
||||
import paddle
|
||||
import paddle.nn.functional as F
|
||||
from functools import partial
|
||||
|
||||
|
||||
def multi_apply(func, *args, **kwargs):
|
||||
pfunc = partial(func, **kwargs) if kwargs else func
|
||||
map_results = map(pfunc, *args)
|
||||
return tuple(map(list, zip(*map_results)))
|
||||
|
||||
|
||||
class FCELoss(nn.Layer):
|
||||
"""The class for implementing FCENet loss
|
||||
FCENet(CVPR2021): Fourier Contour Embedding for Arbitrary-shaped
|
||||
Text Detection
|
||||
|
||||
[https://arxiv.org/abs/2104.10442]
|
||||
|
||||
Args:
|
||||
fourier_degree (int) : The maximum Fourier transform degree k.
|
||||
num_sample (int) : The sampling points number of regression
|
||||
loss. If it is too small, fcenet tends to be overfitting.
|
||||
ohem_ratio (float): the negative/positive ratio in OHEM.
|
||||
"""
|
||||
|
||||
def __init__(self, fourier_degree, num_sample, ohem_ratio=3.):
|
||||
super().__init__()
|
||||
self.fourier_degree = fourier_degree
|
||||
self.num_sample = num_sample
|
||||
self.ohem_ratio = ohem_ratio
|
||||
|
||||
def forward(self, preds, labels):
|
||||
assert isinstance(preds, dict)
|
||||
preds = preds['levels']
|
||||
|
||||
p3_maps, p4_maps, p5_maps = labels[1:]
|
||||
assert p3_maps[0].shape[0] == 4 * self.fourier_degree + 5,\
|
||||
'fourier degree not equal in FCEhead and FCEtarget'
|
||||
|
||||
# to tensor
|
||||
gts = [p3_maps, p4_maps, p5_maps]
|
||||
for idx, maps in enumerate(gts):
|
||||
gts[idx] = paddle.to_tensor(np.stack(maps))
|
||||
|
||||
losses = multi_apply(self.forward_single, preds, gts)
|
||||
|
||||
loss_tr = paddle.to_tensor(0.).astype('float32')
|
||||
loss_tcl = paddle.to_tensor(0.).astype('float32')
|
||||
loss_reg_x = paddle.to_tensor(0.).astype('float32')
|
||||
loss_reg_y = paddle.to_tensor(0.).astype('float32')
|
||||
loss_all = paddle.to_tensor(0.).astype('float32')
|
||||
|
||||
for idx, loss in enumerate(losses):
|
||||
loss_all += sum(loss)
|
||||
if idx == 0:
|
||||
loss_tr += sum(loss)
|
||||
elif idx == 1:
|
||||
loss_tcl += sum(loss)
|
||||
elif idx == 2:
|
||||
loss_reg_x += sum(loss)
|
||||
else:
|
||||
loss_reg_y += sum(loss)
|
||||
|
||||
results = dict(
|
||||
loss=loss_all,
|
||||
loss_text=loss_tr,
|
||||
loss_center=loss_tcl,
|
||||
loss_reg_x=loss_reg_x,
|
||||
loss_reg_y=loss_reg_y, )
|
||||
return results
|
||||
|
||||
def forward_single(self, pred, gt):
|
||||
cls_pred = paddle.transpose(pred[0], (0, 2, 3, 1))
|
||||
reg_pred = paddle.transpose(pred[1], (0, 2, 3, 1))
|
||||
gt = paddle.transpose(gt, (0, 2, 3, 1))
|
||||
|
||||
k = 2 * self.fourier_degree + 1
|
||||
tr_pred = paddle.reshape(cls_pred[:, :, :, :2], (-1, 2))
|
||||
tcl_pred = paddle.reshape(cls_pred[:, :, :, 2:], (-1, 2))
|
||||
x_pred = paddle.reshape(reg_pred[:, :, :, 0:k], (-1, k))
|
||||
y_pred = paddle.reshape(reg_pred[:, :, :, k:2 * k], (-1, k))
|
||||
|
||||
tr_mask = gt[:, :, :, :1].reshape([-1])
|
||||
tcl_mask = gt[:, :, :, 1:2].reshape([-1])
|
||||
train_mask = gt[:, :, :, 2:3].reshape([-1])
|
||||
x_map = paddle.reshape(gt[:, :, :, 3:3 + k], (-1, k))
|
||||
y_map = paddle.reshape(gt[:, :, :, 3 + k:], (-1, k))
|
||||
|
||||
tr_train_mask = (train_mask * tr_mask).astype('bool')
|
||||
tr_train_mask2 = paddle.concat(
|
||||
[tr_train_mask.unsqueeze(1), tr_train_mask.unsqueeze(1)], axis=1)
|
||||
# tr loss
|
||||
loss_tr = self.ohem(tr_pred, tr_mask, train_mask)
|
||||
# tcl loss
|
||||
loss_tcl = paddle.to_tensor(0.).astype('float32')
|
||||
tr_neg_mask = tr_train_mask.logical_not()
|
||||
tr_neg_mask2 = paddle.concat(
|
||||
[tr_neg_mask.unsqueeze(1), tr_neg_mask.unsqueeze(1)], axis=1)
|
||||
if tr_train_mask.sum().item() > 0:
|
||||
loss_tcl_pos = F.cross_entropy(
|
||||
tcl_pred.masked_select(tr_train_mask2).reshape([-1, 2]),
|
||||
tcl_mask.masked_select(tr_train_mask).astype('int64'))
|
||||
loss_tcl_neg = F.cross_entropy(
|
||||
tcl_pred.masked_select(tr_neg_mask2).reshape([-1, 2]),
|
||||
tcl_mask.masked_select(tr_neg_mask).astype('int64'))
|
||||
loss_tcl = loss_tcl_pos + 0.5 * loss_tcl_neg
|
||||
|
||||
# regression loss
|
||||
loss_reg_x = paddle.to_tensor(0.).astype('float32')
|
||||
loss_reg_y = paddle.to_tensor(0.).astype('float32')
|
||||
if tr_train_mask.sum().item() > 0:
|
||||
weight = (tr_mask.masked_select(tr_train_mask.astype('bool'))
|
||||
.astype('float32') + tcl_mask.masked_select(
|
||||
tr_train_mask.astype('bool')).astype('float32')) / 2
|
||||
weight = weight.reshape([-1, 1])
|
||||
|
||||
ft_x, ft_y = self.fourier2poly(x_map, y_map)
|
||||
ft_x_pre, ft_y_pre = self.fourier2poly(x_pred, y_pred)
|
||||
|
||||
dim = ft_x.shape[1]
|
||||
|
||||
tr_train_mask3 = paddle.concat(
|
||||
[tr_train_mask.unsqueeze(1) for i in range(dim)], axis=1)
|
||||
|
||||
loss_reg_x = paddle.mean(weight * F.smooth_l1_loss(
|
||||
ft_x_pre.masked_select(tr_train_mask3).reshape([-1, dim]),
|
||||
ft_x.masked_select(tr_train_mask3).reshape([-1, dim]),
|
||||
reduction='none'))
|
||||
loss_reg_y = paddle.mean(weight * F.smooth_l1_loss(
|
||||
ft_y_pre.masked_select(tr_train_mask3).reshape([-1, dim]),
|
||||
ft_y.masked_select(tr_train_mask3).reshape([-1, dim]),
|
||||
reduction='none'))
|
||||
|
||||
return loss_tr, loss_tcl, loss_reg_x, loss_reg_y
|
||||
|
||||
def ohem(self, predict, target, train_mask):
|
||||
|
||||
pos = (target * train_mask).astype('bool')
|
||||
neg = ((1 - target) * train_mask).astype('bool')
|
||||
|
||||
pos2 = paddle.concat([pos.unsqueeze(1), pos.unsqueeze(1)], axis=1)
|
||||
neg2 = paddle.concat([neg.unsqueeze(1), neg.unsqueeze(1)], axis=1)
|
||||
|
||||
n_pos = pos.astype('float32').sum()
|
||||
|
||||
if n_pos.item() > 0:
|
||||
loss_pos = F.cross_entropy(
|
||||
predict.masked_select(pos2).reshape([-1, 2]),
|
||||
target.masked_select(pos).astype('int64'),
|
||||
reduction='sum')
|
||||
loss_neg = F.cross_entropy(
|
||||
predict.masked_select(neg2).reshape([-1, 2]),
|
||||
target.masked_select(neg).astype('int64'),
|
||||
reduction='none')
|
||||
n_neg = min(
|
||||
int(neg.astype('float32').sum().item()),
|
||||
int(self.ohem_ratio * n_pos.astype('float32')))
|
||||
else:
|
||||
loss_pos = paddle.to_tensor(0.)
|
||||
loss_neg = F.cross_entropy(
|
||||
predict.masked_select(neg2).reshape([-1, 2]),
|
||||
target.masked_select(neg).astype('int64'),
|
||||
reduction='none')
|
||||
n_neg = 100
|
||||
if len(loss_neg) > n_neg:
|
||||
loss_neg, _ = paddle.topk(loss_neg, n_neg)
|
||||
|
||||
return (loss_pos + loss_neg.sum()) / (n_pos + n_neg).astype('float32')
|
||||
|
||||
def fourier2poly(self, real_maps, imag_maps):
|
||||
"""Transform Fourier coefficient maps to polygon maps.
|
||||
|
||||
Args:
|
||||
real_maps (tensor): A map composed of the real parts of the
|
||||
Fourier coefficients, whose shape is (-1, 2k+1)
|
||||
imag_maps (tensor):A map composed of the imag parts of the
|
||||
Fourier coefficients, whose shape is (-1, 2k+1)
|
||||
|
||||
Returns
|
||||
x_maps (tensor): A map composed of the x value of the polygon
|
||||
represented by n sample points (xn, yn), whose shape is (-1, n)
|
||||
y_maps (tensor): A map composed of the y value of the polygon
|
||||
represented by n sample points (xn, yn), whose shape is (-1, n)
|
||||
"""
|
||||
|
||||
k_vect = paddle.arange(
|
||||
-self.fourier_degree, self.fourier_degree + 1,
|
||||
dtype='float32').reshape([-1, 1])
|
||||
i_vect = paddle.arange(
|
||||
0, self.num_sample, dtype='float32').reshape([1, -1])
|
||||
|
||||
transform_matrix = 2 * np.pi / self.num_sample * paddle.matmul(k_vect,
|
||||
i_vect)
|
||||
|
||||
x1 = paddle.einsum('ak, kn-> an', real_maps,
|
||||
paddle.cos(transform_matrix))
|
||||
x2 = paddle.einsum('ak, kn-> an', imag_maps,
|
||||
paddle.sin(transform_matrix))
|
||||
y1 = paddle.einsum('ak, kn-> an', real_maps,
|
||||
paddle.sin(transform_matrix))
|
||||
y2 = paddle.einsum('ak, kn-> an', imag_maps,
|
||||
paddle.cos(transform_matrix))
|
||||
|
||||
x_maps = x1 - x2
|
||||
y_maps = y1 + y2
|
||||
|
||||
return x_maps, y_maps
|
||||
149
backend/ppocr/losses/det_pse_loss.py
Normal file
149
backend/ppocr/losses/det_pse_loss.py
Normal file
@@ -0,0 +1,149 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
This code is refer from:
|
||||
https://github.com/whai362/PSENet/blob/python3/models/head/psenet_head.py
|
||||
"""
|
||||
|
||||
import paddle
|
||||
from paddle import nn
|
||||
from paddle.nn import functional as F
|
||||
import numpy as np
|
||||
from ppocr.utils.iou import iou
|
||||
|
||||
|
||||
class PSELoss(nn.Layer):
|
||||
def __init__(self,
|
||||
alpha,
|
||||
ohem_ratio=3,
|
||||
kernel_sample_mask='pred',
|
||||
reduction='sum',
|
||||
eps=1e-6,
|
||||
**kwargs):
|
||||
"""Implement PSE Loss.
|
||||
"""
|
||||
super(PSELoss, self).__init__()
|
||||
assert reduction in ['sum', 'mean', 'none']
|
||||
self.alpha = alpha
|
||||
self.ohem_ratio = ohem_ratio
|
||||
self.kernel_sample_mask = kernel_sample_mask
|
||||
self.reduction = reduction
|
||||
self.eps = eps
|
||||
|
||||
def forward(self, outputs, labels):
|
||||
predicts = outputs['maps']
|
||||
predicts = F.interpolate(predicts, scale_factor=4)
|
||||
|
||||
texts = predicts[:, 0, :, :]
|
||||
kernels = predicts[:, 1:, :, :]
|
||||
gt_texts, gt_kernels, training_masks = labels[1:]
|
||||
|
||||
# text loss
|
||||
selected_masks = self.ohem_batch(texts, gt_texts, training_masks)
|
||||
|
||||
loss_text = self.dice_loss(texts, gt_texts, selected_masks)
|
||||
iou_text = iou((texts > 0).astype('int64'),
|
||||
gt_texts,
|
||||
training_masks,
|
||||
reduce=False)
|
||||
losses = dict(loss_text=loss_text, iou_text=iou_text)
|
||||
|
||||
# kernel loss
|
||||
loss_kernels = []
|
||||
if self.kernel_sample_mask == 'gt':
|
||||
selected_masks = gt_texts * training_masks
|
||||
elif self.kernel_sample_mask == 'pred':
|
||||
selected_masks = (
|
||||
F.sigmoid(texts) > 0.5).astype('float32') * training_masks
|
||||
|
||||
for i in range(kernels.shape[1]):
|
||||
kernel_i = kernels[:, i, :, :]
|
||||
gt_kernel_i = gt_kernels[:, i, :, :]
|
||||
loss_kernel_i = self.dice_loss(kernel_i, gt_kernel_i,
|
||||
selected_masks)
|
||||
loss_kernels.append(loss_kernel_i)
|
||||
loss_kernels = paddle.mean(paddle.stack(loss_kernels, axis=1), axis=1)
|
||||
iou_kernel = iou((kernels[:, -1, :, :] > 0).astype('int64'),
|
||||
gt_kernels[:, -1, :, :],
|
||||
training_masks * gt_texts,
|
||||
reduce=False)
|
||||
losses.update(dict(loss_kernels=loss_kernels, iou_kernel=iou_kernel))
|
||||
loss = self.alpha * loss_text + (1 - self.alpha) * loss_kernels
|
||||
losses['loss'] = loss
|
||||
if self.reduction == 'sum':
|
||||
losses = {x: paddle.sum(v) for x, v in losses.items()}
|
||||
elif self.reduction == 'mean':
|
||||
losses = {x: paddle.mean(v) for x, v in losses.items()}
|
||||
return losses
|
||||
|
||||
def dice_loss(self, input, target, mask):
|
||||
input = F.sigmoid(input)
|
||||
|
||||
input = input.reshape([input.shape[0], -1])
|
||||
target = target.reshape([target.shape[0], -1])
|
||||
mask = mask.reshape([mask.shape[0], -1])
|
||||
|
||||
input = input * mask
|
||||
target = target * mask
|
||||
|
||||
a = paddle.sum(input * target, 1)
|
||||
b = paddle.sum(input * input, 1) + self.eps
|
||||
c = paddle.sum(target * target, 1) + self.eps
|
||||
d = (2 * a) / (b + c)
|
||||
return 1 - d
|
||||
|
||||
def ohem_single(self, score, gt_text, training_mask, ohem_ratio=3):
|
||||
pos_num = int(paddle.sum((gt_text > 0.5).astype('float32'))) - int(
|
||||
paddle.sum(
|
||||
paddle.logical_and((gt_text > 0.5), (training_mask <= 0.5))
|
||||
.astype('float32')))
|
||||
|
||||
if pos_num == 0:
|
||||
selected_mask = training_mask
|
||||
selected_mask = selected_mask.reshape(
|
||||
[1, selected_mask.shape[0], selected_mask.shape[1]]).astype(
|
||||
'float32')
|
||||
return selected_mask
|
||||
|
||||
neg_num = int(paddle.sum((gt_text <= 0.5).astype('float32')))
|
||||
neg_num = int(min(pos_num * ohem_ratio, neg_num))
|
||||
|
||||
if neg_num == 0:
|
||||
selected_mask = training_mask
|
||||
selected_mask = selected_mask.reshape(
|
||||
[1, selected_mask.shape[0], selected_mask.shape[1]]).astype(
|
||||
'float32')
|
||||
return selected_mask
|
||||
|
||||
neg_score = paddle.masked_select(score, gt_text <= 0.5)
|
||||
neg_score_sorted = paddle.sort(-neg_score)
|
||||
threshold = -neg_score_sorted[neg_num - 1]
|
||||
|
||||
selected_mask = paddle.logical_and(
|
||||
paddle.logical_or((score >= threshold), (gt_text > 0.5)),
|
||||
(training_mask > 0.5))
|
||||
selected_mask = selected_mask.reshape(
|
||||
[1, selected_mask.shape[0], selected_mask.shape[1]]).astype(
|
||||
'float32')
|
||||
return selected_mask
|
||||
|
||||
def ohem_batch(self, scores, gt_texts, training_masks, ohem_ratio=3):
|
||||
selected_masks = []
|
||||
for i in range(scores.shape[0]):
|
||||
selected_masks.append(
|
||||
self.ohem_single(scores[i, :, :], gt_texts[i, :, :],
|
||||
training_masks[i, :, :], ohem_ratio))
|
||||
|
||||
selected_masks = paddle.concat(selected_masks, 0).astype('float32')
|
||||
return selected_masks
|
||||
121
backend/ppocr/losses/det_sast_loss.py
Normal file
121
backend/ppocr/losses/det_sast_loss.py
Normal file
@@ -0,0 +1,121 @@
|
||||
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from paddle import nn
|
||||
from .det_basic_loss import DiceLoss
|
||||
import numpy as np
|
||||
|
||||
|
||||
class SASTLoss(nn.Layer):
|
||||
"""
|
||||
"""
|
||||
|
||||
def __init__(self, eps=1e-6, **kwargs):
|
||||
super(SASTLoss, self).__init__()
|
||||
self.dice_loss = DiceLoss(eps=eps)
|
||||
|
||||
def forward(self, predicts, labels):
|
||||
"""
|
||||
tcl_pos: N x 128 x 3
|
||||
tcl_mask: N x 128 x 1
|
||||
tcl_label: N x X list or LoDTensor
|
||||
"""
|
||||
|
||||
f_score = predicts['f_score']
|
||||
f_border = predicts['f_border']
|
||||
f_tvo = predicts['f_tvo']
|
||||
f_tco = predicts['f_tco']
|
||||
|
||||
l_score, l_border, l_mask, l_tvo, l_tco = labels[1:]
|
||||
|
||||
#score_loss
|
||||
intersection = paddle.sum(f_score * l_score * l_mask)
|
||||
union = paddle.sum(f_score * l_mask) + paddle.sum(l_score * l_mask)
|
||||
score_loss = 1.0 - 2 * intersection / (union + 1e-5)
|
||||
|
||||
#border loss
|
||||
l_border_split, l_border_norm = paddle.split(
|
||||
l_border, num_or_sections=[4, 1], axis=1)
|
||||
f_border_split = f_border
|
||||
border_ex_shape = l_border_norm.shape * np.array([1, 4, 1, 1])
|
||||
l_border_norm_split = paddle.expand(
|
||||
x=l_border_norm, shape=border_ex_shape)
|
||||
l_border_score = paddle.expand(x=l_score, shape=border_ex_shape)
|
||||
l_border_mask = paddle.expand(x=l_mask, shape=border_ex_shape)
|
||||
|
||||
border_diff = l_border_split - f_border_split
|
||||
abs_border_diff = paddle.abs(border_diff)
|
||||
border_sign = abs_border_diff < 1.0
|
||||
border_sign = paddle.cast(border_sign, dtype='float32')
|
||||
border_sign.stop_gradient = True
|
||||
border_in_loss = 0.5 * abs_border_diff * abs_border_diff * border_sign + \
|
||||
(abs_border_diff - 0.5) * (1.0 - border_sign)
|
||||
border_out_loss = l_border_norm_split * border_in_loss
|
||||
border_loss = paddle.sum(border_out_loss * l_border_score * l_border_mask) / \
|
||||
(paddle.sum(l_border_score * l_border_mask) + 1e-5)
|
||||
|
||||
#tvo_loss
|
||||
l_tvo_split, l_tvo_norm = paddle.split(
|
||||
l_tvo, num_or_sections=[8, 1], axis=1)
|
||||
f_tvo_split = f_tvo
|
||||
tvo_ex_shape = l_tvo_norm.shape * np.array([1, 8, 1, 1])
|
||||
l_tvo_norm_split = paddle.expand(x=l_tvo_norm, shape=tvo_ex_shape)
|
||||
l_tvo_score = paddle.expand(x=l_score, shape=tvo_ex_shape)
|
||||
l_tvo_mask = paddle.expand(x=l_mask, shape=tvo_ex_shape)
|
||||
#
|
||||
tvo_geo_diff = l_tvo_split - f_tvo_split
|
||||
abs_tvo_geo_diff = paddle.abs(tvo_geo_diff)
|
||||
tvo_sign = abs_tvo_geo_diff < 1.0
|
||||
tvo_sign = paddle.cast(tvo_sign, dtype='float32')
|
||||
tvo_sign.stop_gradient = True
|
||||
tvo_in_loss = 0.5 * abs_tvo_geo_diff * abs_tvo_geo_diff * tvo_sign + \
|
||||
(abs_tvo_geo_diff - 0.5) * (1.0 - tvo_sign)
|
||||
tvo_out_loss = l_tvo_norm_split * tvo_in_loss
|
||||
tvo_loss = paddle.sum(tvo_out_loss * l_tvo_score * l_tvo_mask) / \
|
||||
(paddle.sum(l_tvo_score * l_tvo_mask) + 1e-5)
|
||||
|
||||
#tco_loss
|
||||
l_tco_split, l_tco_norm = paddle.split(
|
||||
l_tco, num_or_sections=[2, 1], axis=1)
|
||||
f_tco_split = f_tco
|
||||
tco_ex_shape = l_tco_norm.shape * np.array([1, 2, 1, 1])
|
||||
l_tco_norm_split = paddle.expand(x=l_tco_norm, shape=tco_ex_shape)
|
||||
l_tco_score = paddle.expand(x=l_score, shape=tco_ex_shape)
|
||||
l_tco_mask = paddle.expand(x=l_mask, shape=tco_ex_shape)
|
||||
|
||||
tco_geo_diff = l_tco_split - f_tco_split
|
||||
abs_tco_geo_diff = paddle.abs(tco_geo_diff)
|
||||
tco_sign = abs_tco_geo_diff < 1.0
|
||||
tco_sign = paddle.cast(tco_sign, dtype='float32')
|
||||
tco_sign.stop_gradient = True
|
||||
tco_in_loss = 0.5 * abs_tco_geo_diff * abs_tco_geo_diff * tco_sign + \
|
||||
(abs_tco_geo_diff - 0.5) * (1.0 - tco_sign)
|
||||
tco_out_loss = l_tco_norm_split * tco_in_loss
|
||||
tco_loss = paddle.sum(tco_out_loss * l_tco_score * l_tco_mask) / \
|
||||
(paddle.sum(l_tco_score * l_tco_mask) + 1e-5)
|
||||
|
||||
# total loss
|
||||
tvo_lw, tco_lw = 1.5, 1.5
|
||||
score_lw, border_lw = 1.0, 1.0
|
||||
total_loss = score_loss * score_lw + border_loss * border_lw + \
|
||||
tvo_loss * tvo_lw + tco_loss * tco_lw
|
||||
|
||||
losses = {'loss':total_loss, "score_loss":score_loss,\
|
||||
"border_loss":border_loss, 'tvo_loss':tvo_loss, 'tco_loss':tco_loss}
|
||||
return losses
|
||||
324
backend/ppocr/losses/distillation_loss.py
Normal file
324
backend/ppocr/losses/distillation_loss.py
Normal file
@@ -0,0 +1,324 @@
|
||||
#copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import numpy as np
|
||||
import cv2
|
||||
|
||||
from .rec_ctc_loss import CTCLoss
|
||||
from .rec_sar_loss import SARLoss
|
||||
from .basic_loss import DMLLoss
|
||||
from .basic_loss import DistanceLoss
|
||||
from .det_db_loss import DBLoss
|
||||
from .det_basic_loss import BalanceLoss, MaskL1Loss, DiceLoss
|
||||
|
||||
|
||||
def _sum_loss(loss_dict):
|
||||
if "loss" in loss_dict.keys():
|
||||
return loss_dict
|
||||
else:
|
||||
loss_dict["loss"] = 0.
|
||||
for k, value in loss_dict.items():
|
||||
if k == "loss":
|
||||
continue
|
||||
else:
|
||||
loss_dict["loss"] += value
|
||||
return loss_dict
|
||||
|
||||
|
||||
class DistillationDMLLoss(DMLLoss):
|
||||
"""
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
model_name_pairs=[],
|
||||
act=None,
|
||||
use_log=False,
|
||||
key=None,
|
||||
multi_head=False,
|
||||
dis_head='ctc',
|
||||
maps_name=None,
|
||||
name="dml"):
|
||||
super().__init__(act=act, use_log=use_log)
|
||||
assert isinstance(model_name_pairs, list)
|
||||
self.key = key
|
||||
self.multi_head = multi_head
|
||||
self.dis_head = dis_head
|
||||
self.model_name_pairs = self._check_model_name_pairs(model_name_pairs)
|
||||
self.name = name
|
||||
self.maps_name = self._check_maps_name(maps_name)
|
||||
|
||||
def _check_model_name_pairs(self, model_name_pairs):
|
||||
if not isinstance(model_name_pairs, list):
|
||||
return []
|
||||
elif isinstance(model_name_pairs[0], list) and isinstance(
|
||||
model_name_pairs[0][0], str):
|
||||
return model_name_pairs
|
||||
else:
|
||||
return [model_name_pairs]
|
||||
|
||||
def _check_maps_name(self, maps_name):
|
||||
if maps_name is None:
|
||||
return None
|
||||
elif type(maps_name) == str:
|
||||
return [maps_name]
|
||||
elif type(maps_name) == list:
|
||||
return [maps_name]
|
||||
else:
|
||||
return None
|
||||
|
||||
def _slice_out(self, outs):
|
||||
new_outs = {}
|
||||
for k in self.maps_name:
|
||||
if k == "thrink_maps":
|
||||
new_outs[k] = outs[:, 0, :, :]
|
||||
elif k == "threshold_maps":
|
||||
new_outs[k] = outs[:, 1, :, :]
|
||||
elif k == "binary_maps":
|
||||
new_outs[k] = outs[:, 2, :, :]
|
||||
else:
|
||||
continue
|
||||
return new_outs
|
||||
|
||||
def forward(self, predicts, batch):
|
||||
loss_dict = dict()
|
||||
for idx, pair in enumerate(self.model_name_pairs):
|
||||
out1 = predicts[pair[0]]
|
||||
out2 = predicts[pair[1]]
|
||||
if self.key is not None:
|
||||
out1 = out1[self.key]
|
||||
out2 = out2[self.key]
|
||||
|
||||
if self.maps_name is None:
|
||||
if self.multi_head:
|
||||
loss = super().forward(out1[self.dis_head],
|
||||
out2[self.dis_head])
|
||||
else:
|
||||
loss = super().forward(out1, out2)
|
||||
if isinstance(loss, dict):
|
||||
for key in loss:
|
||||
loss_dict["{}_{}_{}_{}".format(key, pair[0], pair[1],
|
||||
idx)] = loss[key]
|
||||
else:
|
||||
loss_dict["{}_{}".format(self.name, idx)] = loss
|
||||
else:
|
||||
outs1 = self._slice_out(out1)
|
||||
outs2 = self._slice_out(out2)
|
||||
for _c, k in enumerate(outs1.keys()):
|
||||
loss = super().forward(outs1[k], outs2[k])
|
||||
if isinstance(loss, dict):
|
||||
for key in loss:
|
||||
loss_dict["{}_{}_{}_{}_{}".format(key, pair[
|
||||
0], pair[1], self.maps_name, idx)] = loss[key]
|
||||
else:
|
||||
loss_dict["{}_{}_{}".format(self.name, self.maps_name[
|
||||
_c], idx)] = loss
|
||||
|
||||
loss_dict = _sum_loss(loss_dict)
|
||||
|
||||
return loss_dict
|
||||
|
||||
|
||||
class DistillationCTCLoss(CTCLoss):
|
||||
def __init__(self,
|
||||
model_name_list=[],
|
||||
key=None,
|
||||
multi_head=False,
|
||||
name="loss_ctc"):
|
||||
super().__init__()
|
||||
self.model_name_list = model_name_list
|
||||
self.key = key
|
||||
self.name = name
|
||||
self.multi_head = multi_head
|
||||
|
||||
def forward(self, predicts, batch):
|
||||
loss_dict = dict()
|
||||
for idx, model_name in enumerate(self.model_name_list):
|
||||
out = predicts[model_name]
|
||||
if self.key is not None:
|
||||
out = out[self.key]
|
||||
if self.multi_head:
|
||||
assert 'ctc' in out, 'multi head has multi out'
|
||||
loss = super().forward(out['ctc'], batch[:2] + batch[3:])
|
||||
else:
|
||||
loss = super().forward(out, batch)
|
||||
if isinstance(loss, dict):
|
||||
for key in loss:
|
||||
loss_dict["{}_{}_{}".format(self.name, model_name,
|
||||
idx)] = loss[key]
|
||||
else:
|
||||
loss_dict["{}_{}".format(self.name, model_name)] = loss
|
||||
return loss_dict
|
||||
|
||||
|
||||
class DistillationSARLoss(SARLoss):
|
||||
def __init__(self,
|
||||
model_name_list=[],
|
||||
key=None,
|
||||
multi_head=False,
|
||||
name="loss_sar",
|
||||
**kwargs):
|
||||
ignore_index = kwargs.get('ignore_index', 92)
|
||||
super().__init__(ignore_index=ignore_index)
|
||||
self.model_name_list = model_name_list
|
||||
self.key = key
|
||||
self.name = name
|
||||
self.multi_head = multi_head
|
||||
|
||||
def forward(self, predicts, batch):
|
||||
loss_dict = dict()
|
||||
for idx, model_name in enumerate(self.model_name_list):
|
||||
out = predicts[model_name]
|
||||
if self.key is not None:
|
||||
out = out[self.key]
|
||||
if self.multi_head:
|
||||
assert 'sar' in out, 'multi head has multi out'
|
||||
loss = super().forward(out['sar'], batch[:1] + batch[2:])
|
||||
else:
|
||||
loss = super().forward(out, batch)
|
||||
if isinstance(loss, dict):
|
||||
for key in loss:
|
||||
loss_dict["{}_{}_{}".format(self.name, model_name,
|
||||
idx)] = loss[key]
|
||||
else:
|
||||
loss_dict["{}_{}".format(self.name, model_name)] = loss
|
||||
return loss_dict
|
||||
|
||||
|
||||
class DistillationDBLoss(DBLoss):
|
||||
def __init__(self,
|
||||
model_name_list=[],
|
||||
balance_loss=True,
|
||||
main_loss_type='DiceLoss',
|
||||
alpha=5,
|
||||
beta=10,
|
||||
ohem_ratio=3,
|
||||
eps=1e-6,
|
||||
name="db",
|
||||
**kwargs):
|
||||
super().__init__()
|
||||
self.model_name_list = model_name_list
|
||||
self.name = name
|
||||
self.key = None
|
||||
|
||||
def forward(self, predicts, batch):
|
||||
loss_dict = {}
|
||||
for idx, model_name in enumerate(self.model_name_list):
|
||||
out = predicts[model_name]
|
||||
if self.key is not None:
|
||||
out = out[self.key]
|
||||
loss = super().forward(out, batch)
|
||||
|
||||
if isinstance(loss, dict):
|
||||
for key in loss.keys():
|
||||
if key == "loss":
|
||||
continue
|
||||
name = "{}_{}_{}".format(self.name, model_name, key)
|
||||
loss_dict[name] = loss[key]
|
||||
else:
|
||||
loss_dict["{}_{}".format(self.name, model_name)] = loss
|
||||
|
||||
loss_dict = _sum_loss(loss_dict)
|
||||
return loss_dict
|
||||
|
||||
|
||||
class DistillationDilaDBLoss(DBLoss):
|
||||
def __init__(self,
|
||||
model_name_pairs=[],
|
||||
key=None,
|
||||
balance_loss=True,
|
||||
main_loss_type='DiceLoss',
|
||||
alpha=5,
|
||||
beta=10,
|
||||
ohem_ratio=3,
|
||||
eps=1e-6,
|
||||
name="dila_dbloss"):
|
||||
super().__init__()
|
||||
self.model_name_pairs = model_name_pairs
|
||||
self.name = name
|
||||
self.key = key
|
||||
|
||||
def forward(self, predicts, batch):
|
||||
loss_dict = dict()
|
||||
for idx, pair in enumerate(self.model_name_pairs):
|
||||
stu_outs = predicts[pair[0]]
|
||||
tch_outs = predicts[pair[1]]
|
||||
if self.key is not None:
|
||||
stu_preds = stu_outs[self.key]
|
||||
tch_preds = tch_outs[self.key]
|
||||
|
||||
stu_shrink_maps = stu_preds[:, 0, :, :]
|
||||
stu_binary_maps = stu_preds[:, 2, :, :]
|
||||
|
||||
# dilation to teacher prediction
|
||||
dilation_w = np.array([[1, 1], [1, 1]])
|
||||
th_shrink_maps = tch_preds[:, 0, :, :]
|
||||
th_shrink_maps = th_shrink_maps.numpy() > 0.3 # thresh = 0.3
|
||||
dilate_maps = np.zeros_like(th_shrink_maps).astype(np.float32)
|
||||
for i in range(th_shrink_maps.shape[0]):
|
||||
dilate_maps[i] = cv2.dilate(
|
||||
th_shrink_maps[i, :, :].astype(np.uint8), dilation_w)
|
||||
th_shrink_maps = paddle.to_tensor(dilate_maps)
|
||||
|
||||
label_threshold_map, label_threshold_mask, label_shrink_map, label_shrink_mask = batch[
|
||||
1:]
|
||||
|
||||
# calculate the shrink map loss
|
||||
bce_loss = self.alpha * self.bce_loss(
|
||||
stu_shrink_maps, th_shrink_maps, label_shrink_mask)
|
||||
loss_binary_maps = self.dice_loss(stu_binary_maps, th_shrink_maps,
|
||||
label_shrink_mask)
|
||||
|
||||
# k = f"{self.name}_{pair[0]}_{pair[1]}"
|
||||
k = "{}_{}_{}".format(self.name, pair[0], pair[1])
|
||||
loss_dict[k] = bce_loss + loss_binary_maps
|
||||
|
||||
loss_dict = _sum_loss(loss_dict)
|
||||
return loss_dict
|
||||
|
||||
|
||||
class DistillationDistanceLoss(DistanceLoss):
|
||||
"""
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
mode="l2",
|
||||
model_name_pairs=[],
|
||||
key=None,
|
||||
name="loss_distance",
|
||||
**kargs):
|
||||
super().__init__(mode=mode, **kargs)
|
||||
assert isinstance(model_name_pairs, list)
|
||||
self.key = key
|
||||
self.model_name_pairs = model_name_pairs
|
||||
self.name = name + "_l2"
|
||||
|
||||
def forward(self, predicts, batch):
|
||||
loss_dict = dict()
|
||||
for idx, pair in enumerate(self.model_name_pairs):
|
||||
out1 = predicts[pair[0]]
|
||||
out2 = predicts[pair[1]]
|
||||
if self.key is not None:
|
||||
out1 = out1[self.key]
|
||||
out2 = out2[self.key]
|
||||
loss = super().forward(out1, out2)
|
||||
if isinstance(loss, dict):
|
||||
for key in loss:
|
||||
loss_dict["{}_{}_{}".format(self.name, key, idx)] = loss[
|
||||
key]
|
||||
else:
|
||||
loss_dict["{}_{}_{}_{}".format(self.name, pair[0], pair[1],
|
||||
idx)] = loss
|
||||
return loss_dict
|
||||
140
backend/ppocr/losses/e2e_pg_loss.py
Normal file
140
backend/ppocr/losses/e2e_pg_loss.py
Normal file
@@ -0,0 +1,140 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from paddle import nn
|
||||
import paddle
|
||||
|
||||
from .det_basic_loss import DiceLoss
|
||||
from ppocr.utils.e2e_utils.extract_batchsize import pre_process
|
||||
|
||||
|
||||
class PGLoss(nn.Layer):
|
||||
def __init__(self,
|
||||
tcl_bs,
|
||||
max_text_length,
|
||||
max_text_nums,
|
||||
pad_num,
|
||||
eps=1e-6,
|
||||
**kwargs):
|
||||
super(PGLoss, self).__init__()
|
||||
self.tcl_bs = tcl_bs
|
||||
self.max_text_nums = max_text_nums
|
||||
self.max_text_length = max_text_length
|
||||
self.pad_num = pad_num
|
||||
self.dice_loss = DiceLoss(eps=eps)
|
||||
|
||||
def border_loss(self, f_border, l_border, l_score, l_mask):
|
||||
l_border_split, l_border_norm = paddle.tensor.split(
|
||||
l_border, num_or_sections=[4, 1], axis=1)
|
||||
f_border_split = f_border
|
||||
b, c, h, w = l_border_norm.shape
|
||||
l_border_norm_split = paddle.expand(
|
||||
x=l_border_norm, shape=[b, 4 * c, h, w])
|
||||
b, c, h, w = l_score.shape
|
||||
l_border_score = paddle.expand(x=l_score, shape=[b, 4 * c, h, w])
|
||||
b, c, h, w = l_mask.shape
|
||||
l_border_mask = paddle.expand(x=l_mask, shape=[b, 4 * c, h, w])
|
||||
border_diff = l_border_split - f_border_split
|
||||
abs_border_diff = paddle.abs(border_diff)
|
||||
border_sign = abs_border_diff < 1.0
|
||||
border_sign = paddle.cast(border_sign, dtype='float32')
|
||||
border_sign.stop_gradient = True
|
||||
border_in_loss = 0.5 * abs_border_diff * abs_border_diff * border_sign + \
|
||||
(abs_border_diff - 0.5) * (1.0 - border_sign)
|
||||
border_out_loss = l_border_norm_split * border_in_loss
|
||||
border_loss = paddle.sum(border_out_loss * l_border_score * l_border_mask) / \
|
||||
(paddle.sum(l_border_score * l_border_mask) + 1e-5)
|
||||
return border_loss
|
||||
|
||||
def direction_loss(self, f_direction, l_direction, l_score, l_mask):
|
||||
l_direction_split, l_direction_norm = paddle.tensor.split(
|
||||
l_direction, num_or_sections=[2, 1], axis=1)
|
||||
f_direction_split = f_direction
|
||||
b, c, h, w = l_direction_norm.shape
|
||||
l_direction_norm_split = paddle.expand(
|
||||
x=l_direction_norm, shape=[b, 2 * c, h, w])
|
||||
b, c, h, w = l_score.shape
|
||||
l_direction_score = paddle.expand(x=l_score, shape=[b, 2 * c, h, w])
|
||||
b, c, h, w = l_mask.shape
|
||||
l_direction_mask = paddle.expand(x=l_mask, shape=[b, 2 * c, h, w])
|
||||
direction_diff = l_direction_split - f_direction_split
|
||||
abs_direction_diff = paddle.abs(direction_diff)
|
||||
direction_sign = abs_direction_diff < 1.0
|
||||
direction_sign = paddle.cast(direction_sign, dtype='float32')
|
||||
direction_sign.stop_gradient = True
|
||||
direction_in_loss = 0.5 * abs_direction_diff * abs_direction_diff * direction_sign + \
|
||||
(abs_direction_diff - 0.5) * (1.0 - direction_sign)
|
||||
direction_out_loss = l_direction_norm_split * direction_in_loss
|
||||
direction_loss = paddle.sum(direction_out_loss * l_direction_score * l_direction_mask) / \
|
||||
(paddle.sum(l_direction_score * l_direction_mask) + 1e-5)
|
||||
return direction_loss
|
||||
|
||||
def ctcloss(self, f_char, tcl_pos, tcl_mask, tcl_label, label_t):
|
||||
f_char = paddle.transpose(f_char, [0, 2, 3, 1])
|
||||
tcl_pos = paddle.reshape(tcl_pos, [-1, 3])
|
||||
tcl_pos = paddle.cast(tcl_pos, dtype=int)
|
||||
f_tcl_char = paddle.gather_nd(f_char, tcl_pos)
|
||||
f_tcl_char = paddle.reshape(f_tcl_char,
|
||||
[-1, 64, 37]) # len(Lexicon_Table)+1
|
||||
f_tcl_char_fg, f_tcl_char_bg = paddle.split(f_tcl_char, [36, 1], axis=2)
|
||||
f_tcl_char_bg = f_tcl_char_bg * tcl_mask + (1.0 - tcl_mask) * 20.0
|
||||
b, c, l = tcl_mask.shape
|
||||
tcl_mask_fg = paddle.expand(x=tcl_mask, shape=[b, c, 36 * l])
|
||||
tcl_mask_fg.stop_gradient = True
|
||||
f_tcl_char_fg = f_tcl_char_fg * tcl_mask_fg + (1.0 - tcl_mask_fg) * (
|
||||
-20.0)
|
||||
f_tcl_char_mask = paddle.concat([f_tcl_char_fg, f_tcl_char_bg], axis=2)
|
||||
f_tcl_char_ld = paddle.transpose(f_tcl_char_mask, (1, 0, 2))
|
||||
N, B, _ = f_tcl_char_ld.shape
|
||||
input_lengths = paddle.to_tensor([N] * B, dtype='int64')
|
||||
cost = paddle.nn.functional.ctc_loss(
|
||||
log_probs=f_tcl_char_ld,
|
||||
labels=tcl_label,
|
||||
input_lengths=input_lengths,
|
||||
label_lengths=label_t,
|
||||
blank=self.pad_num,
|
||||
reduction='none')
|
||||
cost = cost.mean()
|
||||
return cost
|
||||
|
||||
def forward(self, predicts, labels):
|
||||
images, tcl_maps, tcl_label_maps, border_maps \
|
||||
, direction_maps, training_masks, label_list, pos_list, pos_mask = labels
|
||||
# for all the batch_size
|
||||
pos_list, pos_mask, label_list, label_t = pre_process(
|
||||
label_list, pos_list, pos_mask, self.max_text_length,
|
||||
self.max_text_nums, self.pad_num, self.tcl_bs)
|
||||
|
||||
f_score, f_border, f_direction, f_char = predicts['f_score'], predicts['f_border'], predicts['f_direction'], \
|
||||
predicts['f_char']
|
||||
score_loss = self.dice_loss(f_score, tcl_maps, training_masks)
|
||||
border_loss = self.border_loss(f_border, border_maps, tcl_maps,
|
||||
training_masks)
|
||||
direction_loss = self.direction_loss(f_direction, direction_maps,
|
||||
tcl_maps, training_masks)
|
||||
ctc_loss = self.ctcloss(f_char, pos_list, pos_mask, label_list, label_t)
|
||||
loss_all = score_loss + border_loss + direction_loss + 5 * ctc_loss
|
||||
|
||||
losses = {
|
||||
'loss': loss_all,
|
||||
"score_loss": score_loss,
|
||||
"border_loss": border_loss,
|
||||
"direction_loss": direction_loss,
|
||||
"ctc_loss": ctc_loss
|
||||
}
|
||||
return losses
|
||||
115
backend/ppocr/losses/kie_sdmgr_loss.py
Normal file
115
backend/ppocr/losses/kie_sdmgr_loss.py
Normal file
@@ -0,0 +1,115 @@
|
||||
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# reference from : https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/kie/losses/sdmgr_loss.py
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from paddle import nn
|
||||
import paddle
|
||||
|
||||
|
||||
class SDMGRLoss(nn.Layer):
|
||||
def __init__(self, node_weight=1.0, edge_weight=1.0, ignore=0):
|
||||
super().__init__()
|
||||
self.loss_node = nn.CrossEntropyLoss(ignore_index=ignore)
|
||||
self.loss_edge = nn.CrossEntropyLoss(ignore_index=-1)
|
||||
self.node_weight = node_weight
|
||||
self.edge_weight = edge_weight
|
||||
self.ignore = ignore
|
||||
|
||||
def pre_process(self, gts, tag):
|
||||
gts, tag = gts.numpy(), tag.numpy().tolist()
|
||||
temp_gts = []
|
||||
batch = len(tag)
|
||||
for i in range(batch):
|
||||
num, recoder_len = tag[i][0], tag[i][1]
|
||||
temp_gts.append(
|
||||
paddle.to_tensor(
|
||||
gts[i, :num, :num + 1], dtype='int64'))
|
||||
return temp_gts
|
||||
|
||||
def accuracy(self, pred, target, topk=1, thresh=None):
|
||||
"""Calculate accuracy according to the prediction and target.
|
||||
|
||||
Args:
|
||||
pred (torch.Tensor): The model prediction, shape (N, num_class)
|
||||
target (torch.Tensor): The target of each prediction, shape (N, )
|
||||
topk (int | tuple[int], optional): If the predictions in ``topk``
|
||||
matches the target, the predictions will be regarded as
|
||||
correct ones. Defaults to 1.
|
||||
thresh (float, optional): If not None, predictions with scores under
|
||||
this threshold are considered incorrect. Default to None.
|
||||
|
||||
Returns:
|
||||
float | tuple[float]: If the input ``topk`` is a single integer,
|
||||
the function will return a single float as accuracy. If
|
||||
``topk`` is a tuple containing multiple integers, the
|
||||
function will return a tuple containing accuracies of
|
||||
each ``topk`` number.
|
||||
"""
|
||||
assert isinstance(topk, (int, tuple))
|
||||
if isinstance(topk, int):
|
||||
topk = (topk, )
|
||||
return_single = True
|
||||
else:
|
||||
return_single = False
|
||||
|
||||
maxk = max(topk)
|
||||
if pred.shape[0] == 0:
|
||||
accu = [pred.new_tensor(0.) for i in range(len(topk))]
|
||||
return accu[0] if return_single else accu
|
||||
pred_value, pred_label = paddle.topk(pred, maxk, axis=1)
|
||||
pred_label = pred_label.transpose(
|
||||
[1, 0]) # transpose to shape (maxk, N)
|
||||
correct = paddle.equal(pred_label,
|
||||
(target.reshape([1, -1]).expand_as(pred_label)))
|
||||
res = []
|
||||
for k in topk:
|
||||
correct_k = paddle.sum(correct[:k].reshape([-1]).astype('float32'),
|
||||
axis=0,
|
||||
keepdim=True)
|
||||
res.append(
|
||||
paddle.multiply(correct_k,
|
||||
paddle.to_tensor(100.0 / pred.shape[0])))
|
||||
return res[0] if return_single else res
|
||||
|
||||
def forward(self, pred, batch):
|
||||
node_preds, edge_preds = pred
|
||||
gts, tag = batch[4], batch[5]
|
||||
gts = self.pre_process(gts, tag)
|
||||
node_gts, edge_gts = [], []
|
||||
for gt in gts:
|
||||
node_gts.append(gt[:, 0])
|
||||
edge_gts.append(gt[:, 1:].reshape([-1]))
|
||||
node_gts = paddle.concat(node_gts)
|
||||
edge_gts = paddle.concat(edge_gts)
|
||||
|
||||
node_valids = paddle.nonzero(node_gts != self.ignore).reshape([-1])
|
||||
edge_valids = paddle.nonzero(edge_gts != -1).reshape([-1])
|
||||
loss_node = self.loss_node(node_preds, node_gts)
|
||||
loss_edge = self.loss_edge(edge_preds, edge_gts)
|
||||
loss = self.node_weight * loss_node + self.edge_weight * loss_edge
|
||||
return dict(
|
||||
loss=loss,
|
||||
loss_node=loss_node,
|
||||
loss_edge=loss_edge,
|
||||
acc_node=self.accuracy(
|
||||
paddle.gather(node_preds, node_valids),
|
||||
paddle.gather(node_gts, node_valids)),
|
||||
acc_edge=self.accuracy(
|
||||
paddle.gather(edge_preds, edge_valids),
|
||||
paddle.gather(edge_gts, edge_valids)))
|
||||
99
backend/ppocr/losses/rec_aster_loss.py
Normal file
99
backend/ppocr/losses/rec_aster_loss.py
Normal file
@@ -0,0 +1,99 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from paddle import nn
|
||||
|
||||
|
||||
class CosineEmbeddingLoss(nn.Layer):
|
||||
def __init__(self, margin=0.):
|
||||
super(CosineEmbeddingLoss, self).__init__()
|
||||
self.margin = margin
|
||||
self.epsilon = 1e-12
|
||||
|
||||
def forward(self, x1, x2, target):
|
||||
similarity = paddle.fluid.layers.reduce_sum(
|
||||
x1 * x2, dim=-1) / (paddle.norm(
|
||||
x1, axis=-1) * paddle.norm(
|
||||
x2, axis=-1) + self.epsilon)
|
||||
one_list = paddle.full_like(target, fill_value=1)
|
||||
out = paddle.fluid.layers.reduce_mean(
|
||||
paddle.where(
|
||||
paddle.equal(target, one_list), 1. - similarity,
|
||||
paddle.maximum(
|
||||
paddle.zeros_like(similarity), similarity - self.margin)))
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class AsterLoss(nn.Layer):
|
||||
def __init__(self,
|
||||
weight=None,
|
||||
size_average=True,
|
||||
ignore_index=-100,
|
||||
sequence_normalize=False,
|
||||
sample_normalize=True,
|
||||
**kwargs):
|
||||
super(AsterLoss, self).__init__()
|
||||
self.weight = weight
|
||||
self.size_average = size_average
|
||||
self.ignore_index = ignore_index
|
||||
self.sequence_normalize = sequence_normalize
|
||||
self.sample_normalize = sample_normalize
|
||||
self.loss_sem = CosineEmbeddingLoss()
|
||||
self.is_cosin_loss = True
|
||||
self.loss_func_rec = nn.CrossEntropyLoss(weight=None, reduction='none')
|
||||
|
||||
def forward(self, predicts, batch):
|
||||
targets = batch[1].astype("int64")
|
||||
label_lengths = batch[2].astype('int64')
|
||||
sem_target = batch[3].astype('float32')
|
||||
embedding_vectors = predicts['embedding_vectors']
|
||||
rec_pred = predicts['rec_pred']
|
||||
|
||||
if not self.is_cosin_loss:
|
||||
sem_loss = paddle.sum(self.loss_sem(embedding_vectors, sem_target))
|
||||
else:
|
||||
label_target = paddle.ones([embedding_vectors.shape[0]])
|
||||
sem_loss = paddle.sum(
|
||||
self.loss_sem(embedding_vectors, sem_target, label_target))
|
||||
|
||||
# rec loss
|
||||
batch_size, def_max_length = targets.shape[0], targets.shape[1]
|
||||
|
||||
mask = paddle.zeros([batch_size, def_max_length])
|
||||
for i in range(batch_size):
|
||||
mask[i, :label_lengths[i]] = 1
|
||||
mask = paddle.cast(mask, "float32")
|
||||
max_length = max(label_lengths)
|
||||
assert max_length == rec_pred.shape[1]
|
||||
targets = targets[:, :max_length]
|
||||
mask = mask[:, :max_length]
|
||||
rec_pred = paddle.reshape(rec_pred, [-1, rec_pred.shape[2]])
|
||||
input = nn.functional.log_softmax(rec_pred, axis=1)
|
||||
targets = paddle.reshape(targets, [-1, 1])
|
||||
mask = paddle.reshape(mask, [-1, 1])
|
||||
output = -paddle.index_sample(input, index=targets) * mask
|
||||
output = paddle.sum(output)
|
||||
if self.sequence_normalize:
|
||||
output = output / paddle.sum(mask)
|
||||
if self.sample_normalize:
|
||||
output = output / batch_size
|
||||
|
||||
loss = output + sem_loss * 0.1
|
||||
return {'loss': loss}
|
||||
39
backend/ppocr/losses/rec_att_loss.py
Normal file
39
backend/ppocr/losses/rec_att_loss.py
Normal file
@@ -0,0 +1,39 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from paddle import nn
|
||||
|
||||
|
||||
class AttentionLoss(nn.Layer):
|
||||
def __init__(self, **kwargs):
|
||||
super(AttentionLoss, self).__init__()
|
||||
self.loss_func = nn.CrossEntropyLoss(weight=None, reduction='none')
|
||||
|
||||
def forward(self, predicts, batch):
|
||||
targets = batch[1].astype("int64")
|
||||
label_lengths = batch[2].astype('int64')
|
||||
batch_size, num_steps, num_classes = predicts.shape[0], predicts.shape[
|
||||
1], predicts.shape[2]
|
||||
assert len(targets.shape) == len(list(predicts.shape)) - 1, \
|
||||
"The target's shape and inputs's shape is [N, d] and [N, num_steps]"
|
||||
|
||||
inputs = paddle.reshape(predicts, [-1, predicts.shape[-1]])
|
||||
targets = paddle.reshape(targets, [-1])
|
||||
|
||||
return {'loss': paddle.sum(self.loss_func(inputs, targets))}
|
||||
45
backend/ppocr/losses/rec_ctc_loss.py
Executable file
45
backend/ppocr/losses/rec_ctc_loss.py
Executable file
@@ -0,0 +1,45 @@
|
||||
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from paddle import nn
|
||||
|
||||
|
||||
class CTCLoss(nn.Layer):
|
||||
def __init__(self, use_focal_loss=False, **kwargs):
|
||||
super(CTCLoss, self).__init__()
|
||||
self.loss_func = nn.CTCLoss(blank=0, reduction='none')
|
||||
self.use_focal_loss = use_focal_loss
|
||||
|
||||
def forward(self, predicts, batch):
|
||||
if isinstance(predicts, (list, tuple)):
|
||||
predicts = predicts[-1]
|
||||
predicts = predicts.transpose((1, 0, 2))
|
||||
N, B, _ = predicts.shape
|
||||
preds_lengths = paddle.to_tensor(
|
||||
[N] * B, dtype='int64', place=paddle.CPUPlace())
|
||||
labels = batch[1].astype("int32")
|
||||
label_lengths = batch[2].astype('int64')
|
||||
loss = self.loss_func(predicts, labels, preds_lengths, label_lengths)
|
||||
if self.use_focal_loss:
|
||||
weight = paddle.exp(-loss)
|
||||
weight = paddle.subtract(paddle.to_tensor([1.0]), weight)
|
||||
weight = paddle.square(weight)
|
||||
loss = paddle.multiply(loss, weight)
|
||||
loss = loss.mean()
|
||||
return {'loss': loss}
|
||||
70
backend/ppocr/losses/rec_enhanced_ctc_loss.py
Normal file
70
backend/ppocr/losses/rec_enhanced_ctc_loss.py
Normal file
@@ -0,0 +1,70 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from paddle import nn
|
||||
from .ace_loss import ACELoss
|
||||
from .center_loss import CenterLoss
|
||||
from .rec_ctc_loss import CTCLoss
|
||||
|
||||
|
||||
class EnhancedCTCLoss(nn.Layer):
|
||||
def __init__(self,
|
||||
use_focal_loss=False,
|
||||
use_ace_loss=False,
|
||||
ace_loss_weight=0.1,
|
||||
use_center_loss=False,
|
||||
center_loss_weight=0.05,
|
||||
num_classes=6625,
|
||||
feat_dim=96,
|
||||
init_center=False,
|
||||
center_file_path=None,
|
||||
**kwargs):
|
||||
super(EnhancedCTCLoss, self).__init__()
|
||||
self.ctc_loss_func = CTCLoss(use_focal_loss=use_focal_loss)
|
||||
|
||||
self.use_ace_loss = False
|
||||
if use_ace_loss:
|
||||
self.use_ace_loss = use_ace_loss
|
||||
self.ace_loss_func = ACELoss()
|
||||
self.ace_loss_weight = ace_loss_weight
|
||||
|
||||
self.use_center_loss = False
|
||||
if use_center_loss:
|
||||
self.use_center_loss = use_center_loss
|
||||
self.center_loss_func = CenterLoss(
|
||||
num_classes=num_classes,
|
||||
feat_dim=feat_dim,
|
||||
init_center=init_center,
|
||||
center_file_path=center_file_path)
|
||||
self.center_loss_weight = center_loss_weight
|
||||
|
||||
def __call__(self, predicts, batch):
|
||||
loss = self.ctc_loss_func(predicts, batch)["loss"]
|
||||
|
||||
if self.use_center_loss:
|
||||
center_loss = self.center_loss_func(
|
||||
predicts, batch)["loss_center"] * self.center_loss_weight
|
||||
loss = loss + center_loss
|
||||
|
||||
if self.use_ace_loss:
|
||||
ace_loss = self.ace_loss_func(
|
||||
predicts, batch)["loss_ace"] * self.ace_loss_weight
|
||||
loss = loss + ace_loss
|
||||
|
||||
return {'enhanced_ctc_loss': loss}
|
||||
58
backend/ppocr/losses/rec_multi_loss.py
Normal file
58
backend/ppocr/losses/rec_multi_loss.py
Normal file
@@ -0,0 +1,58 @@
|
||||
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from paddle import nn
|
||||
|
||||
from .rec_ctc_loss import CTCLoss
|
||||
from .rec_sar_loss import SARLoss
|
||||
|
||||
|
||||
class MultiLoss(nn.Layer):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__()
|
||||
self.loss_funcs = {}
|
||||
self.loss_list = kwargs.pop('loss_config_list')
|
||||
self.weight_1 = kwargs.get('weight_1', 1.0)
|
||||
self.weight_2 = kwargs.get('weight_2', 1.0)
|
||||
self.gtc_loss = kwargs.get('gtc_loss', 'sar')
|
||||
for loss_info in self.loss_list:
|
||||
for name, param in loss_info.items():
|
||||
if param is not None:
|
||||
kwargs.update(param)
|
||||
loss = eval(name)(**kwargs)
|
||||
self.loss_funcs[name] = loss
|
||||
|
||||
def forward(self, predicts, batch):
|
||||
self.total_loss = {}
|
||||
total_loss = 0.0
|
||||
# batch [image, label_ctc, label_sar, length, valid_ratio]
|
||||
for name, loss_func in self.loss_funcs.items():
|
||||
if name == 'CTCLoss':
|
||||
loss = loss_func(predicts['ctc'],
|
||||
batch[:2] + batch[3:])['loss'] * self.weight_1
|
||||
elif name == 'SARLoss':
|
||||
loss = loss_func(predicts['sar'],
|
||||
batch[:1] + batch[2:])['loss'] * self.weight_2
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
'{} is not supported in MultiLoss yet'.format(name))
|
||||
self.total_loss[name] = loss
|
||||
total_loss += loss
|
||||
self.total_loss['loss'] = total_loss
|
||||
return self.total_loss
|
||||
30
backend/ppocr/losses/rec_nrtr_loss.py
Normal file
30
backend/ppocr/losses/rec_nrtr_loss.py
Normal file
@@ -0,0 +1,30 @@
|
||||
import paddle
|
||||
from paddle import nn
|
||||
import paddle.nn.functional as F
|
||||
|
||||
|
||||
class NRTRLoss(nn.Layer):
|
||||
def __init__(self, smoothing=True, **kwargs):
|
||||
super(NRTRLoss, self).__init__()
|
||||
self.loss_func = nn.CrossEntropyLoss(reduction='mean', ignore_index=0)
|
||||
self.smoothing = smoothing
|
||||
|
||||
def forward(self, pred, batch):
|
||||
pred = pred.reshape([-1, pred.shape[2]])
|
||||
max_len = batch[2].max()
|
||||
tgt = batch[1][:, 1:2 + max_len]
|
||||
tgt = tgt.reshape([-1])
|
||||
if self.smoothing:
|
||||
eps = 0.1
|
||||
n_class = pred.shape[1]
|
||||
one_hot = F.one_hot(tgt, pred.shape[1])
|
||||
one_hot = one_hot * (1 - eps) + (1 - one_hot) * eps / (n_class - 1)
|
||||
log_prb = F.log_softmax(pred, axis=1)
|
||||
non_pad_mask = paddle.not_equal(
|
||||
tgt, paddle.zeros(
|
||||
tgt.shape, dtype=tgt.dtype))
|
||||
loss = -(one_hot * log_prb).sum(axis=1)
|
||||
loss = loss.masked_select(non_pad_mask).mean()
|
||||
else:
|
||||
loss = self.loss_func(pred, tgt)
|
||||
return {'loss': loss}
|
||||
30
backend/ppocr/losses/rec_pren_loss.py
Normal file
30
backend/ppocr/losses/rec_pren_loss.py
Normal file
@@ -0,0 +1,30 @@
|
||||
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from paddle import nn
|
||||
|
||||
|
||||
class PRENLoss(nn.Layer):
|
||||
def __init__(self, **kwargs):
|
||||
super(PRENLoss, self).__init__()
|
||||
# note: 0 is padding idx
|
||||
self.loss_func = nn.CrossEntropyLoss(reduction='mean', ignore_index=0)
|
||||
|
||||
def forward(self, predicts, batch):
|
||||
loss = self.loss_func(predicts, batch[1].astype('int64'))
|
||||
return {'loss': loss}
|
||||
29
backend/ppocr/losses/rec_sar_loss.py
Normal file
29
backend/ppocr/losses/rec_sar_loss.py
Normal file
@@ -0,0 +1,29 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from paddle import nn
|
||||
|
||||
|
||||
class SARLoss(nn.Layer):
|
||||
def __init__(self, **kwargs):
|
||||
super(SARLoss, self).__init__()
|
||||
ignore_index = kwargs.get('ignore_index', 92) # 6626
|
||||
self.loss_func = paddle.nn.loss.CrossEntropyLoss(
|
||||
reduction="mean", ignore_index=ignore_index)
|
||||
|
||||
def forward(self, predicts, batch):
|
||||
predict = predicts[:, :
|
||||
-1, :] # ignore last index of outputs to be in same seq_len with targets
|
||||
label = batch[1].astype(
|
||||
"int64")[:, 1:] # ignore first index of target in loss calculation
|
||||
batch_size, num_steps, num_classes = predict.shape[0], predict.shape[
|
||||
1], predict.shape[2]
|
||||
assert len(label.shape) == len(list(predict.shape)) - 1, \
|
||||
"The target's shape and inputs's shape is [N, d] and [N, num_steps]"
|
||||
|
||||
inputs = paddle.reshape(predict, [-1, num_classes])
|
||||
targets = paddle.reshape(label, [-1])
|
||||
loss = self.loss_func(inputs, targets)
|
||||
return {'loss': loss}
|
||||
47
backend/ppocr/losses/rec_srn_loss.py
Normal file
47
backend/ppocr/losses/rec_srn_loss.py
Normal file
@@ -0,0 +1,47 @@
|
||||
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from paddle import nn
|
||||
|
||||
|
||||
class SRNLoss(nn.Layer):
|
||||
def __init__(self, **kwargs):
|
||||
super(SRNLoss, self).__init__()
|
||||
self.loss_func = paddle.nn.loss.CrossEntropyLoss(reduction="sum")
|
||||
|
||||
def forward(self, predicts, batch):
|
||||
predict = predicts['predict']
|
||||
word_predict = predicts['word_out']
|
||||
gsrm_predict = predicts['gsrm_out']
|
||||
label = batch[1]
|
||||
|
||||
casted_label = paddle.cast(x=label, dtype='int64')
|
||||
casted_label = paddle.reshape(x=casted_label, shape=[-1, 1])
|
||||
|
||||
cost_word = self.loss_func(word_predict, label=casted_label)
|
||||
cost_gsrm = self.loss_func(gsrm_predict, label=casted_label)
|
||||
cost_vsfd = self.loss_func(predict, label=casted_label)
|
||||
|
||||
cost_word = paddle.reshape(x=paddle.sum(cost_word), shape=[1])
|
||||
cost_gsrm = paddle.reshape(x=paddle.sum(cost_gsrm), shape=[1])
|
||||
cost_vsfd = paddle.reshape(x=paddle.sum(cost_vsfd), shape=[1])
|
||||
|
||||
sum_cost = cost_word * 3.0 + cost_vsfd + cost_gsrm * 0.15
|
||||
|
||||
return {'loss': sum_cost, 'word_loss': cost_word, 'img_loss': cost_vsfd}
|
||||
109
backend/ppocr/losses/table_att_loss.py
Normal file
109
backend/ppocr/losses/table_att_loss.py
Normal file
@@ -0,0 +1,109 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from paddle import nn
|
||||
from paddle.nn import functional as F
|
||||
from paddle import fluid
|
||||
|
||||
class TableAttentionLoss(nn.Layer):
|
||||
def __init__(self, structure_weight, loc_weight, use_giou=False, giou_weight=1.0, **kwargs):
|
||||
super(TableAttentionLoss, self).__init__()
|
||||
self.loss_func = nn.CrossEntropyLoss(weight=None, reduction='none')
|
||||
self.structure_weight = structure_weight
|
||||
self.loc_weight = loc_weight
|
||||
self.use_giou = use_giou
|
||||
self.giou_weight = giou_weight
|
||||
|
||||
def giou_loss(self, preds, bbox, eps=1e-7, reduction='mean'):
|
||||
'''
|
||||
:param preds:[[x1,y1,x2,y2], [x1,y1,x2,y2],,,]
|
||||
:param bbox:[[x1,y1,x2,y2], [x1,y1,x2,y2],,,]
|
||||
:return: loss
|
||||
'''
|
||||
ix1 = fluid.layers.elementwise_max(preds[:, 0], bbox[:, 0])
|
||||
iy1 = fluid.layers.elementwise_max(preds[:, 1], bbox[:, 1])
|
||||
ix2 = fluid.layers.elementwise_min(preds[:, 2], bbox[:, 2])
|
||||
iy2 = fluid.layers.elementwise_min(preds[:, 3], bbox[:, 3])
|
||||
|
||||
iw = fluid.layers.clip(ix2 - ix1 + 1e-3, 0., 1e10)
|
||||
ih = fluid.layers.clip(iy2 - iy1 + 1e-3, 0., 1e10)
|
||||
|
||||
# overlap
|
||||
inters = iw * ih
|
||||
|
||||
# union
|
||||
uni = (preds[:, 2] - preds[:, 0] + 1e-3) * (preds[:, 3] - preds[:, 1] + 1e-3
|
||||
) + (bbox[:, 2] - bbox[:, 0] + 1e-3) * (
|
||||
bbox[:, 3] - bbox[:, 1] + 1e-3) - inters + eps
|
||||
|
||||
# ious
|
||||
ious = inters / uni
|
||||
|
||||
ex1 = fluid.layers.elementwise_min(preds[:, 0], bbox[:, 0])
|
||||
ey1 = fluid.layers.elementwise_min(preds[:, 1], bbox[:, 1])
|
||||
ex2 = fluid.layers.elementwise_max(preds[:, 2], bbox[:, 2])
|
||||
ey2 = fluid.layers.elementwise_max(preds[:, 3], bbox[:, 3])
|
||||
ew = fluid.layers.clip(ex2 - ex1 + 1e-3, 0., 1e10)
|
||||
eh = fluid.layers.clip(ey2 - ey1 + 1e-3, 0., 1e10)
|
||||
|
||||
# enclose erea
|
||||
enclose = ew * eh + eps
|
||||
giou = ious - (enclose - uni) / enclose
|
||||
|
||||
loss = 1 - giou
|
||||
|
||||
if reduction == 'mean':
|
||||
loss = paddle.mean(loss)
|
||||
elif reduction == 'sum':
|
||||
loss = paddle.sum(loss)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
return loss
|
||||
|
||||
def forward(self, predicts, batch):
|
||||
structure_probs = predicts['structure_probs']
|
||||
structure_targets = batch[1].astype("int64")
|
||||
structure_targets = structure_targets[:, 1:]
|
||||
if len(batch) == 6:
|
||||
structure_mask = batch[5].astype("int64")
|
||||
structure_mask = structure_mask[:, 1:]
|
||||
structure_mask = paddle.reshape(structure_mask, [-1])
|
||||
structure_probs = paddle.reshape(structure_probs, [-1, structure_probs.shape[-1]])
|
||||
structure_targets = paddle.reshape(structure_targets, [-1])
|
||||
structure_loss = self.loss_func(structure_probs, structure_targets)
|
||||
|
||||
if len(batch) == 6:
|
||||
structure_loss = structure_loss * structure_mask
|
||||
|
||||
# structure_loss = paddle.sum(structure_loss) * self.structure_weight
|
||||
structure_loss = paddle.mean(structure_loss) * self.structure_weight
|
||||
|
||||
loc_preds = predicts['loc_preds']
|
||||
loc_targets = batch[2].astype("float32")
|
||||
loc_targets_mask = batch[4].astype("float32")
|
||||
loc_targets = loc_targets[:, 1:, :]
|
||||
loc_targets_mask = loc_targets_mask[:, 1:, :]
|
||||
loc_loss = F.mse_loss(loc_preds * loc_targets_mask, loc_targets) * self.loc_weight
|
||||
if self.use_giou:
|
||||
loc_loss_giou = self.giou_loss(loc_preds * loc_targets_mask, loc_targets) * self.giou_weight
|
||||
total_loss = structure_loss + loc_loss + loc_loss_giou
|
||||
return {'loss':total_loss, "structure_loss":structure_loss, "loc_loss":loc_loss, "loc_loss_giou":loc_loss_giou}
|
||||
else:
|
||||
total_loss = structure_loss + loc_loss
|
||||
return {'loss':total_loss, "structure_loss":structure_loss, "loc_loss":loc_loss}
|
||||
42
backend/ppocr/losses/vqa_token_layoutlm_loss.py
Executable file
42
backend/ppocr/losses/vqa_token_layoutlm_loss.py
Executable file
@@ -0,0 +1,42 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from paddle import nn
|
||||
|
||||
|
||||
class VQASerTokenLayoutLMLoss(nn.Layer):
|
||||
def __init__(self, num_classes):
|
||||
super().__init__()
|
||||
self.loss_class = nn.CrossEntropyLoss()
|
||||
self.num_classes = num_classes
|
||||
self.ignore_index = self.loss_class.ignore_index
|
||||
|
||||
def forward(self, predicts, batch):
|
||||
labels = batch[1]
|
||||
attention_mask = batch[4]
|
||||
if attention_mask is not None:
|
||||
active_loss = attention_mask.reshape([-1, ]) == 1
|
||||
active_outputs = predicts.reshape(
|
||||
[-1, self.num_classes])[active_loss]
|
||||
active_labels = labels.reshape([-1, ])[active_loss]
|
||||
loss = self.loss_class(active_outputs, active_labels)
|
||||
else:
|
||||
loss = self.loss_class(
|
||||
predicts.reshape([-1, self.num_classes]),
|
||||
labels.reshape([-1, ]))
|
||||
return {'loss': loss}
|
||||
Reference in New Issue
Block a user