mirror of
https://github.com/YaoFANGUK/video-subtitle-remover.git
synced 2026-03-13 15:57:35 +08:00
init
This commit is contained in:
37
backend/ppocr/modeling/necks/__init__.py
Normal file
37
backend/ppocr/modeling/necks/__init__.py
Normal file
@@ -0,0 +1,37 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
__all__ = ['build_neck']
|
||||
|
||||
|
||||
def build_neck(config):
|
||||
from .db_fpn import DBFPN, RSEFPN, LKPAN
|
||||
from .east_fpn import EASTFPN
|
||||
from .sast_fpn import SASTFPN
|
||||
from .rnn import SequenceEncoder
|
||||
from .pg_fpn import PGFPN
|
||||
from .table_fpn import TableFPN
|
||||
from .fpn import FPN
|
||||
from .fce_fpn import FCEFPN
|
||||
from .pren_fpn import PRENFPN
|
||||
support_dict = [
|
||||
'FPN', 'FCEFPN', 'LKPAN', 'DBFPN', 'RSEFPN', 'EASTFPN', 'SASTFPN',
|
||||
'SequenceEncoder', 'PGFPN', 'TableFPN', 'PRENFPN'
|
||||
]
|
||||
|
||||
module_name = config.pop('name')
|
||||
assert module_name in support_dict, Exception('neck only support {}'.format(
|
||||
support_dict))
|
||||
module_class = eval(module_name)(**config)
|
||||
return module_class
|
||||
358
backend/ppocr/modeling/necks/db_fpn.py
Normal file
358
backend/ppocr/modeling/necks/db_fpn.py
Normal file
@@ -0,0 +1,358 @@
|
||||
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from paddle import nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle import ParamAttr
|
||||
import os
|
||||
import sys
|
||||
|
||||
__dir__ = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.append(__dir__)
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../../..')))
|
||||
|
||||
from ppocr.modeling.backbones.det_mobilenet_v3 import SEModule
|
||||
|
||||
|
||||
class DSConv(nn.Layer):
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
padding,
|
||||
stride=1,
|
||||
groups=None,
|
||||
if_act=True,
|
||||
act="relu",
|
||||
**kwargs):
|
||||
super(DSConv, self).__init__()
|
||||
if groups == None:
|
||||
groups = in_channels
|
||||
self.if_act = if_act
|
||||
self.act = act
|
||||
self.conv1 = nn.Conv2D(
|
||||
in_channels=in_channels,
|
||||
out_channels=in_channels,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
groups=groups,
|
||||
bias_attr=False)
|
||||
|
||||
self.bn1 = nn.BatchNorm(num_channels=in_channels, act=None)
|
||||
|
||||
self.conv2 = nn.Conv2D(
|
||||
in_channels=in_channels,
|
||||
out_channels=int(in_channels * 4),
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
bias_attr=False)
|
||||
|
||||
self.bn2 = nn.BatchNorm(num_channels=int(in_channels * 4), act=None)
|
||||
|
||||
self.conv3 = nn.Conv2D(
|
||||
in_channels=int(in_channels * 4),
|
||||
out_channels=out_channels,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
bias_attr=False)
|
||||
self._c = [in_channels, out_channels]
|
||||
if in_channels != out_channels:
|
||||
self.conv_end = nn.Conv2D(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
bias_attr=False)
|
||||
|
||||
def forward(self, inputs):
|
||||
|
||||
x = self.conv1(inputs)
|
||||
x = self.bn1(x)
|
||||
|
||||
x = self.conv2(x)
|
||||
x = self.bn2(x)
|
||||
if self.if_act:
|
||||
if self.act == "relu":
|
||||
x = F.relu(x)
|
||||
elif self.act == "hardswish":
|
||||
x = F.hardswish(x)
|
||||
else:
|
||||
print("The activation function({}) is selected incorrectly.".
|
||||
format(self.act))
|
||||
exit()
|
||||
|
||||
x = self.conv3(x)
|
||||
if self._c[0] != self._c[1]:
|
||||
x = x + self.conv_end(inputs)
|
||||
return x
|
||||
|
||||
|
||||
class DBFPN(nn.Layer):
|
||||
def __init__(self, in_channels, out_channels, **kwargs):
|
||||
super(DBFPN, self).__init__()
|
||||
self.out_channels = out_channels
|
||||
weight_attr = paddle.nn.initializer.KaimingUniform()
|
||||
|
||||
self.in2_conv = nn.Conv2D(
|
||||
in_channels=in_channels[0],
|
||||
out_channels=self.out_channels,
|
||||
kernel_size=1,
|
||||
weight_attr=ParamAttr(initializer=weight_attr),
|
||||
bias_attr=False)
|
||||
self.in3_conv = nn.Conv2D(
|
||||
in_channels=in_channels[1],
|
||||
out_channels=self.out_channels,
|
||||
kernel_size=1,
|
||||
weight_attr=ParamAttr(initializer=weight_attr),
|
||||
bias_attr=False)
|
||||
self.in4_conv = nn.Conv2D(
|
||||
in_channels=in_channels[2],
|
||||
out_channels=self.out_channels,
|
||||
kernel_size=1,
|
||||
weight_attr=ParamAttr(initializer=weight_attr),
|
||||
bias_attr=False)
|
||||
self.in5_conv = nn.Conv2D(
|
||||
in_channels=in_channels[3],
|
||||
out_channels=self.out_channels,
|
||||
kernel_size=1,
|
||||
weight_attr=ParamAttr(initializer=weight_attr),
|
||||
bias_attr=False)
|
||||
self.p5_conv = nn.Conv2D(
|
||||
in_channels=self.out_channels,
|
||||
out_channels=self.out_channels // 4,
|
||||
kernel_size=3,
|
||||
padding=1,
|
||||
weight_attr=ParamAttr(initializer=weight_attr),
|
||||
bias_attr=False)
|
||||
self.p4_conv = nn.Conv2D(
|
||||
in_channels=self.out_channels,
|
||||
out_channels=self.out_channels // 4,
|
||||
kernel_size=3,
|
||||
padding=1,
|
||||
weight_attr=ParamAttr(initializer=weight_attr),
|
||||
bias_attr=False)
|
||||
self.p3_conv = nn.Conv2D(
|
||||
in_channels=self.out_channels,
|
||||
out_channels=self.out_channels // 4,
|
||||
kernel_size=3,
|
||||
padding=1,
|
||||
weight_attr=ParamAttr(initializer=weight_attr),
|
||||
bias_attr=False)
|
||||
self.p2_conv = nn.Conv2D(
|
||||
in_channels=self.out_channels,
|
||||
out_channels=self.out_channels // 4,
|
||||
kernel_size=3,
|
||||
padding=1,
|
||||
weight_attr=ParamAttr(initializer=weight_attr),
|
||||
bias_attr=False)
|
||||
|
||||
def forward(self, x):
|
||||
c2, c3, c4, c5 = x
|
||||
|
||||
in5 = self.in5_conv(c5)
|
||||
in4 = self.in4_conv(c4)
|
||||
in3 = self.in3_conv(c3)
|
||||
in2 = self.in2_conv(c2)
|
||||
|
||||
out4 = in4 + F.upsample(
|
||||
in5, scale_factor=2, mode="nearest", align_mode=1) # 1/16
|
||||
out3 = in3 + F.upsample(
|
||||
out4, scale_factor=2, mode="nearest", align_mode=1) # 1/8
|
||||
out2 = in2 + F.upsample(
|
||||
out3, scale_factor=2, mode="nearest", align_mode=1) # 1/4
|
||||
|
||||
p5 = self.p5_conv(in5)
|
||||
p4 = self.p4_conv(out4)
|
||||
p3 = self.p3_conv(out3)
|
||||
p2 = self.p2_conv(out2)
|
||||
p5 = F.upsample(p5, scale_factor=8, mode="nearest", align_mode=1)
|
||||
p4 = F.upsample(p4, scale_factor=4, mode="nearest", align_mode=1)
|
||||
p3 = F.upsample(p3, scale_factor=2, mode="nearest", align_mode=1)
|
||||
|
||||
fuse = paddle.concat([p5, p4, p3, p2], axis=1)
|
||||
return fuse
|
||||
|
||||
|
||||
class RSELayer(nn.Layer):
|
||||
def __init__(self, in_channels, out_channels, kernel_size, shortcut=True):
|
||||
super(RSELayer, self).__init__()
|
||||
weight_attr = paddle.nn.initializer.KaimingUniform()
|
||||
self.out_channels = out_channels
|
||||
self.in_conv = nn.Conv2D(
|
||||
in_channels=in_channels,
|
||||
out_channels=self.out_channels,
|
||||
kernel_size=kernel_size,
|
||||
padding=int(kernel_size // 2),
|
||||
weight_attr=ParamAttr(initializer=weight_attr),
|
||||
bias_attr=False)
|
||||
self.se_block = SEModule(self.out_channels)
|
||||
self.shortcut = shortcut
|
||||
|
||||
def forward(self, ins):
|
||||
x = self.in_conv(ins)
|
||||
if self.shortcut:
|
||||
out = x + self.se_block(x)
|
||||
else:
|
||||
out = self.se_block(x)
|
||||
return out
|
||||
|
||||
|
||||
class RSEFPN(nn.Layer):
|
||||
def __init__(self, in_channels, out_channels, shortcut=True, **kwargs):
|
||||
super(RSEFPN, self).__init__()
|
||||
self.out_channels = out_channels
|
||||
self.ins_conv = nn.LayerList()
|
||||
self.inp_conv = nn.LayerList()
|
||||
|
||||
for i in range(len(in_channels)):
|
||||
self.ins_conv.append(
|
||||
RSELayer(
|
||||
in_channels[i],
|
||||
out_channels,
|
||||
kernel_size=1,
|
||||
shortcut=shortcut))
|
||||
self.inp_conv.append(
|
||||
RSELayer(
|
||||
out_channels,
|
||||
out_channels // 4,
|
||||
kernel_size=3,
|
||||
shortcut=shortcut))
|
||||
|
||||
def forward(self, x):
|
||||
c2, c3, c4, c5 = x
|
||||
|
||||
in5 = self.ins_conv[3](c5)
|
||||
in4 = self.ins_conv[2](c4)
|
||||
in3 = self.ins_conv[1](c3)
|
||||
in2 = self.ins_conv[0](c2)
|
||||
|
||||
out4 = in4 + F.upsample(
|
||||
in5, scale_factor=2, mode="nearest", align_mode=1) # 1/16
|
||||
out3 = in3 + F.upsample(
|
||||
out4, scale_factor=2, mode="nearest", align_mode=1) # 1/8
|
||||
out2 = in2 + F.upsample(
|
||||
out3, scale_factor=2, mode="nearest", align_mode=1) # 1/4
|
||||
|
||||
p5 = self.inp_conv[3](in5)
|
||||
p4 = self.inp_conv[2](out4)
|
||||
p3 = self.inp_conv[1](out3)
|
||||
p2 = self.inp_conv[0](out2)
|
||||
|
||||
p5 = F.upsample(p5, scale_factor=8, mode="nearest", align_mode=1)
|
||||
p4 = F.upsample(p4, scale_factor=4, mode="nearest", align_mode=1)
|
||||
p3 = F.upsample(p3, scale_factor=2, mode="nearest", align_mode=1)
|
||||
|
||||
fuse = paddle.concat([p5, p4, p3, p2], axis=1)
|
||||
return fuse
|
||||
|
||||
|
||||
class LKPAN(nn.Layer):
|
||||
def __init__(self, in_channels, out_channels, mode='large', **kwargs):
|
||||
super(LKPAN, self).__init__()
|
||||
self.out_channels = out_channels
|
||||
weight_attr = paddle.nn.initializer.KaimingUniform()
|
||||
|
||||
self.ins_conv = nn.LayerList()
|
||||
self.inp_conv = nn.LayerList()
|
||||
# pan head
|
||||
self.pan_head_conv = nn.LayerList()
|
||||
self.pan_lat_conv = nn.LayerList()
|
||||
|
||||
if mode.lower() == 'lite':
|
||||
p_layer = DSConv
|
||||
elif mode.lower() == 'large':
|
||||
p_layer = nn.Conv2D
|
||||
else:
|
||||
raise ValueError(
|
||||
"mode can only be one of ['lite', 'large'], but received {}".
|
||||
format(mode))
|
||||
|
||||
for i in range(len(in_channels)):
|
||||
self.ins_conv.append(
|
||||
nn.Conv2D(
|
||||
in_channels=in_channels[i],
|
||||
out_channels=self.out_channels,
|
||||
kernel_size=1,
|
||||
weight_attr=ParamAttr(initializer=weight_attr),
|
||||
bias_attr=False))
|
||||
|
||||
self.inp_conv.append(
|
||||
p_layer(
|
||||
in_channels=self.out_channels,
|
||||
out_channels=self.out_channels // 4,
|
||||
kernel_size=9,
|
||||
padding=4,
|
||||
weight_attr=ParamAttr(initializer=weight_attr),
|
||||
bias_attr=False))
|
||||
|
||||
if i > 0:
|
||||
self.pan_head_conv.append(
|
||||
nn.Conv2D(
|
||||
in_channels=self.out_channels // 4,
|
||||
out_channels=self.out_channels // 4,
|
||||
kernel_size=3,
|
||||
padding=1,
|
||||
stride=2,
|
||||
weight_attr=ParamAttr(initializer=weight_attr),
|
||||
bias_attr=False))
|
||||
self.pan_lat_conv.append(
|
||||
p_layer(
|
||||
in_channels=self.out_channels // 4,
|
||||
out_channels=self.out_channels // 4,
|
||||
kernel_size=9,
|
||||
padding=4,
|
||||
weight_attr=ParamAttr(initializer=weight_attr),
|
||||
bias_attr=False))
|
||||
|
||||
def forward(self, x):
|
||||
c2, c3, c4, c5 = x
|
||||
|
||||
in5 = self.ins_conv[3](c5)
|
||||
in4 = self.ins_conv[2](c4)
|
||||
in3 = self.ins_conv[1](c3)
|
||||
in2 = self.ins_conv[0](c2)
|
||||
|
||||
out4 = in4 + F.upsample(
|
||||
in5, scale_factor=2, mode="nearest", align_mode=1) # 1/16
|
||||
out3 = in3 + F.upsample(
|
||||
out4, scale_factor=2, mode="nearest", align_mode=1) # 1/8
|
||||
out2 = in2 + F.upsample(
|
||||
out3, scale_factor=2, mode="nearest", align_mode=1) # 1/4
|
||||
|
||||
f5 = self.inp_conv[3](in5)
|
||||
f4 = self.inp_conv[2](out4)
|
||||
f3 = self.inp_conv[1](out3)
|
||||
f2 = self.inp_conv[0](out2)
|
||||
|
||||
pan3 = f3 + self.pan_head_conv[0](f2)
|
||||
pan4 = f4 + self.pan_head_conv[1](pan3)
|
||||
pan5 = f5 + self.pan_head_conv[2](pan4)
|
||||
|
||||
p2 = self.pan_lat_conv[0](f2)
|
||||
p3 = self.pan_lat_conv[1](pan3)
|
||||
p4 = self.pan_lat_conv[2](pan4)
|
||||
p5 = self.pan_lat_conv[3](pan5)
|
||||
|
||||
p5 = F.upsample(p5, scale_factor=8, mode="nearest", align_mode=1)
|
||||
p4 = F.upsample(p4, scale_factor=4, mode="nearest", align_mode=1)
|
||||
p3 = F.upsample(p3, scale_factor=2, mode="nearest", align_mode=1)
|
||||
|
||||
fuse = paddle.concat([p5, p4, p3, p2], axis=1)
|
||||
return fuse
|
||||
188
backend/ppocr/modeling/necks/east_fpn.py
Normal file
188
backend/ppocr/modeling/necks/east_fpn.py
Normal file
@@ -0,0 +1,188 @@
|
||||
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from paddle import nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle import ParamAttr
|
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
groups=1,
|
||||
if_act=True,
|
||||
act=None,
|
||||
name=None):
|
||||
super(ConvBNLayer, self).__init__()
|
||||
self.if_act = if_act
|
||||
self.act = act
|
||||
self.conv = nn.Conv2D(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(name=name + '_weights'),
|
||||
bias_attr=False)
|
||||
|
||||
self.bn = nn.BatchNorm(
|
||||
num_channels=out_channels,
|
||||
act=act,
|
||||
param_attr=ParamAttr(name="bn_" + name + "_scale"),
|
||||
bias_attr=ParamAttr(name="bn_" + name + "_offset"),
|
||||
moving_mean_name="bn_" + name + "_mean",
|
||||
moving_variance_name="bn_" + name + "_variance")
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
return x
|
||||
|
||||
|
||||
class DeConvBNLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
groups=1,
|
||||
if_act=True,
|
||||
act=None,
|
||||
name=None):
|
||||
super(DeConvBNLayer, self).__init__()
|
||||
self.if_act = if_act
|
||||
self.act = act
|
||||
self.deconv = nn.Conv2DTranspose(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(name=name + '_weights'),
|
||||
bias_attr=False)
|
||||
self.bn = nn.BatchNorm(
|
||||
num_channels=out_channels,
|
||||
act=act,
|
||||
param_attr=ParamAttr(name="bn_" + name + "_scale"),
|
||||
bias_attr=ParamAttr(name="bn_" + name + "_offset"),
|
||||
moving_mean_name="bn_" + name + "_mean",
|
||||
moving_variance_name="bn_" + name + "_variance")
|
||||
|
||||
def forward(self, x):
|
||||
x = self.deconv(x)
|
||||
x = self.bn(x)
|
||||
return x
|
||||
|
||||
|
||||
class EASTFPN(nn.Layer):
|
||||
def __init__(self, in_channels, model_name, **kwargs):
|
||||
super(EASTFPN, self).__init__()
|
||||
self.model_name = model_name
|
||||
if self.model_name == "large":
|
||||
self.out_channels = 128
|
||||
else:
|
||||
self.out_channels = 64
|
||||
self.in_channels = in_channels[::-1]
|
||||
self.h1_conv = ConvBNLayer(
|
||||
in_channels=self.out_channels+self.in_channels[1],
|
||||
out_channels=self.out_channels,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
padding=1,
|
||||
if_act=True,
|
||||
act='relu',
|
||||
name="unet_h_1")
|
||||
self.h2_conv = ConvBNLayer(
|
||||
in_channels=self.out_channels+self.in_channels[2],
|
||||
out_channels=self.out_channels,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
padding=1,
|
||||
if_act=True,
|
||||
act='relu',
|
||||
name="unet_h_2")
|
||||
self.h3_conv = ConvBNLayer(
|
||||
in_channels=self.out_channels+self.in_channels[3],
|
||||
out_channels=self.out_channels,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
padding=1,
|
||||
if_act=True,
|
||||
act='relu',
|
||||
name="unet_h_3")
|
||||
self.g0_deconv = DeConvBNLayer(
|
||||
in_channels=self.in_channels[0],
|
||||
out_channels=self.out_channels,
|
||||
kernel_size=4,
|
||||
stride=2,
|
||||
padding=1,
|
||||
if_act=True,
|
||||
act='relu',
|
||||
name="unet_g_0")
|
||||
self.g1_deconv = DeConvBNLayer(
|
||||
in_channels=self.out_channels,
|
||||
out_channels=self.out_channels,
|
||||
kernel_size=4,
|
||||
stride=2,
|
||||
padding=1,
|
||||
if_act=True,
|
||||
act='relu',
|
||||
name="unet_g_1")
|
||||
self.g2_deconv = DeConvBNLayer(
|
||||
in_channels=self.out_channels,
|
||||
out_channels=self.out_channels,
|
||||
kernel_size=4,
|
||||
stride=2,
|
||||
padding=1,
|
||||
if_act=True,
|
||||
act='relu',
|
||||
name="unet_g_2")
|
||||
self.g3_conv = ConvBNLayer(
|
||||
in_channels=self.out_channels,
|
||||
out_channels=self.out_channels,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
padding=1,
|
||||
if_act=True,
|
||||
act='relu',
|
||||
name="unet_g_3")
|
||||
|
||||
def forward(self, x):
|
||||
f = x[::-1]
|
||||
|
||||
h = f[0]
|
||||
g = self.g0_deconv(h)
|
||||
h = paddle.concat([g, f[1]], axis=1)
|
||||
h = self.h1_conv(h)
|
||||
g = self.g1_deconv(h)
|
||||
h = paddle.concat([g, f[2]], axis=1)
|
||||
h = self.h2_conv(h)
|
||||
g = self.g2_deconv(h)
|
||||
h = paddle.concat([g, f[3]], axis=1)
|
||||
h = self.h3_conv(h)
|
||||
g = self.g3_conv(h)
|
||||
|
||||
return g
|
||||
280
backend/ppocr/modeling/necks/fce_fpn.py
Normal file
280
backend/ppocr/modeling/necks/fce_fpn.py
Normal file
@@ -0,0 +1,280 @@
|
||||
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
This code is refer from:
|
||||
https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.3/ppdet/modeling/necks/fpn.py
|
||||
"""
|
||||
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle import ParamAttr
|
||||
from paddle.nn.initializer import XavierUniform
|
||||
from paddle.nn.initializer import Normal
|
||||
from paddle.regularizer import L2Decay
|
||||
|
||||
__all__ = ['FCEFPN']
|
||||
|
||||
|
||||
class ConvNormLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
ch_in,
|
||||
ch_out,
|
||||
filter_size,
|
||||
stride,
|
||||
groups=1,
|
||||
norm_type='bn',
|
||||
norm_decay=0.,
|
||||
norm_groups=32,
|
||||
lr_scale=1.,
|
||||
freeze_norm=False,
|
||||
initializer=Normal(
|
||||
mean=0., std=0.01)):
|
||||
super(ConvNormLayer, self).__init__()
|
||||
assert norm_type in ['bn', 'sync_bn', 'gn']
|
||||
|
||||
bias_attr = False
|
||||
|
||||
self.conv = nn.Conv2D(
|
||||
in_channels=ch_in,
|
||||
out_channels=ch_out,
|
||||
kernel_size=filter_size,
|
||||
stride=stride,
|
||||
padding=(filter_size - 1) // 2,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=initializer, learning_rate=1.),
|
||||
bias_attr=bias_attr)
|
||||
|
||||
norm_lr = 0. if freeze_norm else 1.
|
||||
param_attr = ParamAttr(
|
||||
learning_rate=norm_lr,
|
||||
regularizer=L2Decay(norm_decay) if norm_decay is not None else None)
|
||||
bias_attr = ParamAttr(
|
||||
learning_rate=norm_lr,
|
||||
regularizer=L2Decay(norm_decay) if norm_decay is not None else None)
|
||||
if norm_type == 'bn':
|
||||
self.norm = nn.BatchNorm2D(
|
||||
ch_out, weight_attr=param_attr, bias_attr=bias_attr)
|
||||
elif norm_type == 'sync_bn':
|
||||
self.norm = nn.SyncBatchNorm(
|
||||
ch_out, weight_attr=param_attr, bias_attr=bias_attr)
|
||||
elif norm_type == 'gn':
|
||||
self.norm = nn.GroupNorm(
|
||||
num_groups=norm_groups,
|
||||
num_channels=ch_out,
|
||||
weight_attr=param_attr,
|
||||
bias_attr=bias_attr)
|
||||
|
||||
def forward(self, inputs):
|
||||
out = self.conv(inputs)
|
||||
out = self.norm(out)
|
||||
return out
|
||||
|
||||
|
||||
class FCEFPN(nn.Layer):
|
||||
"""
|
||||
Feature Pyramid Network, see https://arxiv.org/abs/1612.03144
|
||||
Args:
|
||||
in_channels (list[int]): input channels of each level which can be
|
||||
derived from the output shape of backbone by from_config
|
||||
out_channels (list[int]): output channel of each level
|
||||
spatial_scales (list[float]): the spatial scales between input feature
|
||||
maps and original input image which can be derived from the output
|
||||
shape of backbone by from_config
|
||||
has_extra_convs (bool): whether to add extra conv to the last level.
|
||||
default False
|
||||
extra_stage (int): the number of extra stages added to the last level.
|
||||
default 1
|
||||
use_c5 (bool): Whether to use c5 as the input of extra stage,
|
||||
otherwise p5 is used. default True
|
||||
norm_type (string|None): The normalization type in FPN module. If
|
||||
norm_type is None, norm will not be used after conv and if
|
||||
norm_type is string, bn, gn, sync_bn are available. default None
|
||||
norm_decay (float): weight decay for normalization layer weights.
|
||||
default 0.
|
||||
freeze_norm (bool): whether to freeze normalization layer.
|
||||
default False
|
||||
relu_before_extra_convs (bool): whether to add relu before extra convs.
|
||||
default False
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
spatial_scales=[0.25, 0.125, 0.0625, 0.03125],
|
||||
has_extra_convs=False,
|
||||
extra_stage=1,
|
||||
use_c5=True,
|
||||
norm_type=None,
|
||||
norm_decay=0.,
|
||||
freeze_norm=False,
|
||||
relu_before_extra_convs=True):
|
||||
super(FCEFPN, self).__init__()
|
||||
self.out_channels = out_channels
|
||||
for s in range(extra_stage):
|
||||
spatial_scales = spatial_scales + [spatial_scales[-1] / 2.]
|
||||
self.spatial_scales = spatial_scales
|
||||
self.has_extra_convs = has_extra_convs
|
||||
self.extra_stage = extra_stage
|
||||
self.use_c5 = use_c5
|
||||
self.relu_before_extra_convs = relu_before_extra_convs
|
||||
self.norm_type = norm_type
|
||||
self.norm_decay = norm_decay
|
||||
self.freeze_norm = freeze_norm
|
||||
|
||||
self.lateral_convs = []
|
||||
self.fpn_convs = []
|
||||
fan = out_channels * 3 * 3
|
||||
|
||||
# stage index 0,1,2,3 stands for res2,res3,res4,res5 on ResNet Backbone
|
||||
# 0 <= st_stage < ed_stage <= 3
|
||||
st_stage = 4 - len(in_channels)
|
||||
ed_stage = st_stage + len(in_channels) - 1
|
||||
for i in range(st_stage, ed_stage + 1):
|
||||
if i == 3:
|
||||
lateral_name = 'fpn_inner_res5_sum'
|
||||
else:
|
||||
lateral_name = 'fpn_inner_res{}_sum_lateral'.format(i + 2)
|
||||
in_c = in_channels[i - st_stage]
|
||||
if self.norm_type is not None:
|
||||
lateral = self.add_sublayer(
|
||||
lateral_name,
|
||||
ConvNormLayer(
|
||||
ch_in=in_c,
|
||||
ch_out=out_channels,
|
||||
filter_size=1,
|
||||
stride=1,
|
||||
norm_type=self.norm_type,
|
||||
norm_decay=self.norm_decay,
|
||||
freeze_norm=self.freeze_norm,
|
||||
initializer=XavierUniform(fan_out=in_c)))
|
||||
else:
|
||||
lateral = self.add_sublayer(
|
||||
lateral_name,
|
||||
nn.Conv2D(
|
||||
in_channels=in_c,
|
||||
out_channels=out_channels,
|
||||
kernel_size=1,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=XavierUniform(fan_out=in_c))))
|
||||
self.lateral_convs.append(lateral)
|
||||
|
||||
for i in range(st_stage, ed_stage + 1):
|
||||
fpn_name = 'fpn_res{}_sum'.format(i + 2)
|
||||
if self.norm_type is not None:
|
||||
fpn_conv = self.add_sublayer(
|
||||
fpn_name,
|
||||
ConvNormLayer(
|
||||
ch_in=out_channels,
|
||||
ch_out=out_channels,
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
norm_type=self.norm_type,
|
||||
norm_decay=self.norm_decay,
|
||||
freeze_norm=self.freeze_norm,
|
||||
initializer=XavierUniform(fan_out=fan)))
|
||||
else:
|
||||
fpn_conv = self.add_sublayer(
|
||||
fpn_name,
|
||||
nn.Conv2D(
|
||||
in_channels=out_channels,
|
||||
out_channels=out_channels,
|
||||
kernel_size=3,
|
||||
padding=1,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=XavierUniform(fan_out=fan))))
|
||||
self.fpn_convs.append(fpn_conv)
|
||||
|
||||
# add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
|
||||
if self.has_extra_convs:
|
||||
for i in range(self.extra_stage):
|
||||
lvl = ed_stage + 1 + i
|
||||
if i == 0 and self.use_c5:
|
||||
in_c = in_channels[-1]
|
||||
else:
|
||||
in_c = out_channels
|
||||
extra_fpn_name = 'fpn_{}'.format(lvl + 2)
|
||||
if self.norm_type is not None:
|
||||
extra_fpn_conv = self.add_sublayer(
|
||||
extra_fpn_name,
|
||||
ConvNormLayer(
|
||||
ch_in=in_c,
|
||||
ch_out=out_channels,
|
||||
filter_size=3,
|
||||
stride=2,
|
||||
norm_type=self.norm_type,
|
||||
norm_decay=self.norm_decay,
|
||||
freeze_norm=self.freeze_norm,
|
||||
initializer=XavierUniform(fan_out=fan)))
|
||||
else:
|
||||
extra_fpn_conv = self.add_sublayer(
|
||||
extra_fpn_name,
|
||||
nn.Conv2D(
|
||||
in_channels=in_c,
|
||||
out_channels=out_channels,
|
||||
kernel_size=3,
|
||||
stride=2,
|
||||
padding=1,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=XavierUniform(fan_out=fan))))
|
||||
self.fpn_convs.append(extra_fpn_conv)
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, input_shape):
|
||||
return {
|
||||
'in_channels': [i.channels for i in input_shape],
|
||||
'spatial_scales': [1.0 / i.stride for i in input_shape],
|
||||
}
|
||||
|
||||
def forward(self, body_feats):
|
||||
laterals = []
|
||||
num_levels = len(body_feats)
|
||||
|
||||
for i in range(num_levels):
|
||||
laterals.append(self.lateral_convs[i](body_feats[i]))
|
||||
|
||||
for i in range(1, num_levels):
|
||||
lvl = num_levels - i
|
||||
upsample = F.interpolate(
|
||||
laterals[lvl],
|
||||
scale_factor=2.,
|
||||
mode='nearest', )
|
||||
laterals[lvl - 1] += upsample
|
||||
|
||||
fpn_output = []
|
||||
for lvl in range(num_levels):
|
||||
fpn_output.append(self.fpn_convs[lvl](laterals[lvl]))
|
||||
|
||||
if self.extra_stage > 0:
|
||||
# use max pool to get more levels on top of outputs (Faster R-CNN, Mask R-CNN)
|
||||
if not self.has_extra_convs:
|
||||
assert self.extra_stage == 1, 'extra_stage should be 1 if FPN has not extra convs'
|
||||
fpn_output.append(F.max_pool2d(fpn_output[-1], 1, stride=2))
|
||||
# add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
|
||||
else:
|
||||
if self.use_c5:
|
||||
extra_source = body_feats[-1]
|
||||
else:
|
||||
extra_source = fpn_output[-1]
|
||||
fpn_output.append(self.fpn_convs[num_levels](extra_source))
|
||||
|
||||
for i in range(1, self.extra_stage):
|
||||
if self.relu_before_extra_convs:
|
||||
fpn_output.append(self.fpn_convs[num_levels + i](F.relu(
|
||||
fpn_output[-1])))
|
||||
else:
|
||||
fpn_output.append(self.fpn_convs[num_levels + i](
|
||||
fpn_output[-1]))
|
||||
return fpn_output
|
||||
138
backend/ppocr/modeling/necks/fpn.py
Normal file
138
backend/ppocr/modeling/necks/fpn.py
Normal file
@@ -0,0 +1,138 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
This code is refer from:
|
||||
https://github.com/whai362/PSENet/blob/python3/models/neck/fpn.py
|
||||
"""
|
||||
|
||||
import paddle.nn as nn
|
||||
import paddle
|
||||
import math
|
||||
import paddle.nn.functional as F
|
||||
|
||||
|
||||
class Conv_BN_ReLU(nn.Layer):
|
||||
def __init__(self,
|
||||
in_planes,
|
||||
out_planes,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0):
|
||||
super(Conv_BN_ReLU, self).__init__()
|
||||
self.conv = nn.Conv2D(
|
||||
in_planes,
|
||||
out_planes,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
bias_attr=False)
|
||||
self.bn = nn.BatchNorm2D(out_planes, momentum=0.1)
|
||||
self.relu = nn.ReLU()
|
||||
|
||||
for m in self.sublayers():
|
||||
if isinstance(m, nn.Conv2D):
|
||||
n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels
|
||||
m.weight = paddle.create_parameter(
|
||||
shape=m.weight.shape,
|
||||
dtype='float32',
|
||||
default_initializer=paddle.nn.initializer.Normal(
|
||||
0, math.sqrt(2. / n)))
|
||||
elif isinstance(m, nn.BatchNorm2D):
|
||||
m.weight = paddle.create_parameter(
|
||||
shape=m.weight.shape,
|
||||
dtype='float32',
|
||||
default_initializer=paddle.nn.initializer.Constant(1.0))
|
||||
m.bias = paddle.create_parameter(
|
||||
shape=m.bias.shape,
|
||||
dtype='float32',
|
||||
default_initializer=paddle.nn.initializer.Constant(0.0))
|
||||
|
||||
def forward(self, x):
|
||||
return self.relu(self.bn(self.conv(x)))
|
||||
|
||||
|
||||
class FPN(nn.Layer):
|
||||
def __init__(self, in_channels, out_channels):
|
||||
super(FPN, self).__init__()
|
||||
|
||||
# Top layer
|
||||
self.toplayer_ = Conv_BN_ReLU(
|
||||
in_channels[3], out_channels, kernel_size=1, stride=1, padding=0)
|
||||
# Lateral layers
|
||||
self.latlayer1_ = Conv_BN_ReLU(
|
||||
in_channels[2], out_channels, kernel_size=1, stride=1, padding=0)
|
||||
|
||||
self.latlayer2_ = Conv_BN_ReLU(
|
||||
in_channels[1], out_channels, kernel_size=1, stride=1, padding=0)
|
||||
|
||||
self.latlayer3_ = Conv_BN_ReLU(
|
||||
in_channels[0], out_channels, kernel_size=1, stride=1, padding=0)
|
||||
|
||||
# Smooth layers
|
||||
self.smooth1_ = Conv_BN_ReLU(
|
||||
out_channels, out_channels, kernel_size=3, stride=1, padding=1)
|
||||
|
||||
self.smooth2_ = Conv_BN_ReLU(
|
||||
out_channels, out_channels, kernel_size=3, stride=1, padding=1)
|
||||
|
||||
self.smooth3_ = Conv_BN_ReLU(
|
||||
out_channels, out_channels, kernel_size=3, stride=1, padding=1)
|
||||
|
||||
self.out_channels = out_channels * 4
|
||||
for m in self.sublayers():
|
||||
if isinstance(m, nn.Conv2D):
|
||||
n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels
|
||||
m.weight = paddle.create_parameter(
|
||||
shape=m.weight.shape,
|
||||
dtype='float32',
|
||||
default_initializer=paddle.nn.initializer.Normal(
|
||||
0, math.sqrt(2. / n)))
|
||||
elif isinstance(m, nn.BatchNorm2D):
|
||||
m.weight = paddle.create_parameter(
|
||||
shape=m.weight.shape,
|
||||
dtype='float32',
|
||||
default_initializer=paddle.nn.initializer.Constant(1.0))
|
||||
m.bias = paddle.create_parameter(
|
||||
shape=m.bias.shape,
|
||||
dtype='float32',
|
||||
default_initializer=paddle.nn.initializer.Constant(0.0))
|
||||
|
||||
def _upsample(self, x, scale=1):
|
||||
return F.upsample(x, scale_factor=scale, mode='bilinear')
|
||||
|
||||
def _upsample_add(self, x, y, scale=1):
|
||||
return F.upsample(x, scale_factor=scale, mode='bilinear') + y
|
||||
|
||||
def forward(self, x):
|
||||
f2, f3, f4, f5 = x
|
||||
p5 = self.toplayer_(f5)
|
||||
|
||||
f4 = self.latlayer1_(f4)
|
||||
p4 = self._upsample_add(p5, f4, 2)
|
||||
p4 = self.smooth1_(p4)
|
||||
|
||||
f3 = self.latlayer2_(f3)
|
||||
p3 = self._upsample_add(p4, f3, 2)
|
||||
p3 = self.smooth2_(p3)
|
||||
|
||||
f2 = self.latlayer3_(f2)
|
||||
p2 = self._upsample_add(p3, f2, 2)
|
||||
p2 = self.smooth3_(p2)
|
||||
|
||||
p3 = self._upsample(p3, 2)
|
||||
p4 = self._upsample(p4, 4)
|
||||
p5 = self._upsample(p5, 8)
|
||||
|
||||
fuse = paddle.concat([p2, p3, p4, p5], axis=1)
|
||||
return fuse
|
||||
314
backend/ppocr/modeling/necks/pg_fpn.py
Normal file
314
backend/ppocr/modeling/necks/pg_fpn.py
Normal file
@@ -0,0 +1,314 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from paddle import nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle import ParamAttr
|
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride=1,
|
||||
groups=1,
|
||||
is_vd_mode=False,
|
||||
act=None,
|
||||
name=None):
|
||||
super(ConvBNLayer, self).__init__()
|
||||
|
||||
self.is_vd_mode = is_vd_mode
|
||||
self._pool2d_avg = nn.AvgPool2D(
|
||||
kernel_size=2, stride=2, padding=0, ceil_mode=True)
|
||||
self._conv = nn.Conv2D(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
padding=(kernel_size - 1) // 2,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(name=name + "_weights"),
|
||||
bias_attr=False)
|
||||
if name == "conv1":
|
||||
bn_name = "bn_" + name
|
||||
else:
|
||||
bn_name = "bn" + name[3:]
|
||||
self._batch_norm = nn.BatchNorm(
|
||||
out_channels,
|
||||
act=act,
|
||||
param_attr=ParamAttr(name=bn_name + '_scale'),
|
||||
bias_attr=ParamAttr(bn_name + '_offset'),
|
||||
moving_mean_name=bn_name + '_mean',
|
||||
moving_variance_name=bn_name + '_variance',
|
||||
use_global_stats=False)
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self._conv(inputs)
|
||||
y = self._batch_norm(y)
|
||||
return y
|
||||
|
||||
|
||||
class DeConvBNLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size=4,
|
||||
stride=2,
|
||||
padding=1,
|
||||
groups=1,
|
||||
if_act=True,
|
||||
act=None,
|
||||
name=None):
|
||||
super(DeConvBNLayer, self).__init__()
|
||||
|
||||
self.if_act = if_act
|
||||
self.act = act
|
||||
self.deconv = nn.Conv2DTranspose(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(name=name + '_weights'),
|
||||
bias_attr=False)
|
||||
self.bn = nn.BatchNorm(
|
||||
num_channels=out_channels,
|
||||
act=act,
|
||||
param_attr=ParamAttr(name="bn_" + name + "_scale"),
|
||||
bias_attr=ParamAttr(name="bn_" + name + "_offset"),
|
||||
moving_mean_name="bn_" + name + "_mean",
|
||||
moving_variance_name="bn_" + name + "_variance",
|
||||
use_global_stats=False)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.deconv(x)
|
||||
x = self.bn(x)
|
||||
return x
|
||||
|
||||
|
||||
class PGFPN(nn.Layer):
|
||||
def __init__(self, in_channels, **kwargs):
|
||||
super(PGFPN, self).__init__()
|
||||
num_inputs = [2048, 2048, 1024, 512, 256]
|
||||
num_outputs = [256, 256, 192, 192, 128]
|
||||
self.out_channels = 128
|
||||
self.conv_bn_layer_1 = ConvBNLayer(
|
||||
in_channels=3,
|
||||
out_channels=32,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
act=None,
|
||||
name='FPN_d1')
|
||||
self.conv_bn_layer_2 = ConvBNLayer(
|
||||
in_channels=64,
|
||||
out_channels=64,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
act=None,
|
||||
name='FPN_d2')
|
||||
self.conv_bn_layer_3 = ConvBNLayer(
|
||||
in_channels=256,
|
||||
out_channels=128,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
act=None,
|
||||
name='FPN_d3')
|
||||
self.conv_bn_layer_4 = ConvBNLayer(
|
||||
in_channels=32,
|
||||
out_channels=64,
|
||||
kernel_size=3,
|
||||
stride=2,
|
||||
act=None,
|
||||
name='FPN_d4')
|
||||
self.conv_bn_layer_5 = ConvBNLayer(
|
||||
in_channels=64,
|
||||
out_channels=64,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
act='relu',
|
||||
name='FPN_d5')
|
||||
self.conv_bn_layer_6 = ConvBNLayer(
|
||||
in_channels=64,
|
||||
out_channels=128,
|
||||
kernel_size=3,
|
||||
stride=2,
|
||||
act=None,
|
||||
name='FPN_d6')
|
||||
self.conv_bn_layer_7 = ConvBNLayer(
|
||||
in_channels=128,
|
||||
out_channels=128,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
act='relu',
|
||||
name='FPN_d7')
|
||||
self.conv_bn_layer_8 = ConvBNLayer(
|
||||
in_channels=128,
|
||||
out_channels=128,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
act=None,
|
||||
name='FPN_d8')
|
||||
|
||||
self.conv_h0 = ConvBNLayer(
|
||||
in_channels=num_inputs[0],
|
||||
out_channels=num_outputs[0],
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
act=None,
|
||||
name="conv_h{}".format(0))
|
||||
self.conv_h1 = ConvBNLayer(
|
||||
in_channels=num_inputs[1],
|
||||
out_channels=num_outputs[1],
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
act=None,
|
||||
name="conv_h{}".format(1))
|
||||
self.conv_h2 = ConvBNLayer(
|
||||
in_channels=num_inputs[2],
|
||||
out_channels=num_outputs[2],
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
act=None,
|
||||
name="conv_h{}".format(2))
|
||||
self.conv_h3 = ConvBNLayer(
|
||||
in_channels=num_inputs[3],
|
||||
out_channels=num_outputs[3],
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
act=None,
|
||||
name="conv_h{}".format(3))
|
||||
self.conv_h4 = ConvBNLayer(
|
||||
in_channels=num_inputs[4],
|
||||
out_channels=num_outputs[4],
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
act=None,
|
||||
name="conv_h{}".format(4))
|
||||
|
||||
self.dconv0 = DeConvBNLayer(
|
||||
in_channels=num_outputs[0],
|
||||
out_channels=num_outputs[0 + 1],
|
||||
name="dconv_{}".format(0))
|
||||
self.dconv1 = DeConvBNLayer(
|
||||
in_channels=num_outputs[1],
|
||||
out_channels=num_outputs[1 + 1],
|
||||
act=None,
|
||||
name="dconv_{}".format(1))
|
||||
self.dconv2 = DeConvBNLayer(
|
||||
in_channels=num_outputs[2],
|
||||
out_channels=num_outputs[2 + 1],
|
||||
act=None,
|
||||
name="dconv_{}".format(2))
|
||||
self.dconv3 = DeConvBNLayer(
|
||||
in_channels=num_outputs[3],
|
||||
out_channels=num_outputs[3 + 1],
|
||||
act=None,
|
||||
name="dconv_{}".format(3))
|
||||
self.conv_g1 = ConvBNLayer(
|
||||
in_channels=num_outputs[1],
|
||||
out_channels=num_outputs[1],
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
act='relu',
|
||||
name="conv_g{}".format(1))
|
||||
self.conv_g2 = ConvBNLayer(
|
||||
in_channels=num_outputs[2],
|
||||
out_channels=num_outputs[2],
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
act='relu',
|
||||
name="conv_g{}".format(2))
|
||||
self.conv_g3 = ConvBNLayer(
|
||||
in_channels=num_outputs[3],
|
||||
out_channels=num_outputs[3],
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
act='relu',
|
||||
name="conv_g{}".format(3))
|
||||
self.conv_g4 = ConvBNLayer(
|
||||
in_channels=num_outputs[4],
|
||||
out_channels=num_outputs[4],
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
act='relu',
|
||||
name="conv_g{}".format(4))
|
||||
self.convf = ConvBNLayer(
|
||||
in_channels=num_outputs[4],
|
||||
out_channels=num_outputs[4],
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
act=None,
|
||||
name="conv_f{}".format(4))
|
||||
|
||||
def forward(self, x):
|
||||
c0, c1, c2, c3, c4, c5, c6 = x
|
||||
# FPN_Down_Fusion
|
||||
f = [c0, c1, c2]
|
||||
g = [None, None, None]
|
||||
h = [None, None, None]
|
||||
h[0] = self.conv_bn_layer_1(f[0])
|
||||
h[1] = self.conv_bn_layer_2(f[1])
|
||||
h[2] = self.conv_bn_layer_3(f[2])
|
||||
|
||||
g[0] = self.conv_bn_layer_4(h[0])
|
||||
g[1] = paddle.add(g[0], h[1])
|
||||
g[1] = F.relu(g[1])
|
||||
g[1] = self.conv_bn_layer_5(g[1])
|
||||
g[1] = self.conv_bn_layer_6(g[1])
|
||||
|
||||
g[2] = paddle.add(g[1], h[2])
|
||||
g[2] = F.relu(g[2])
|
||||
g[2] = self.conv_bn_layer_7(g[2])
|
||||
f_down = self.conv_bn_layer_8(g[2])
|
||||
|
||||
# FPN UP Fusion
|
||||
f1 = [c6, c5, c4, c3, c2]
|
||||
g = [None, None, None, None, None]
|
||||
h = [None, None, None, None, None]
|
||||
h[0] = self.conv_h0(f1[0])
|
||||
h[1] = self.conv_h1(f1[1])
|
||||
h[2] = self.conv_h2(f1[2])
|
||||
h[3] = self.conv_h3(f1[3])
|
||||
h[4] = self.conv_h4(f1[4])
|
||||
|
||||
g[0] = self.dconv0(h[0])
|
||||
g[1] = paddle.add(g[0], h[1])
|
||||
g[1] = F.relu(g[1])
|
||||
g[1] = self.conv_g1(g[1])
|
||||
g[1] = self.dconv1(g[1])
|
||||
|
||||
g[2] = paddle.add(g[1], h[2])
|
||||
g[2] = F.relu(g[2])
|
||||
g[2] = self.conv_g2(g[2])
|
||||
g[2] = self.dconv2(g[2])
|
||||
|
||||
g[3] = paddle.add(g[2], h[3])
|
||||
g[3] = F.relu(g[3])
|
||||
g[3] = self.conv_g3(g[3])
|
||||
g[3] = self.dconv3(g[3])
|
||||
|
||||
g[4] = paddle.add(x=g[3], y=h[4])
|
||||
g[4] = F.relu(g[4])
|
||||
g[4] = self.conv_g4(g[4])
|
||||
f_up = self.convf(g[4])
|
||||
f_common = paddle.add(f_down, f_up)
|
||||
f_common = F.relu(f_common)
|
||||
return f_common
|
||||
163
backend/ppocr/modeling/necks/pren_fpn.py
Normal file
163
backend/ppocr/modeling/necks/pren_fpn.py
Normal file
@@ -0,0 +1,163 @@
|
||||
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
Code is refer from:
|
||||
https://github.com/RuijieJ/pren/blob/main/Nets/Aggregation.py
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from paddle import nn
|
||||
import paddle.nn.functional as F
|
||||
|
||||
|
||||
class PoolAggregate(nn.Layer):
|
||||
def __init__(self, n_r, d_in, d_middle=None, d_out=None):
|
||||
super(PoolAggregate, self).__init__()
|
||||
if not d_middle:
|
||||
d_middle = d_in
|
||||
if not d_out:
|
||||
d_out = d_in
|
||||
|
||||
self.d_in = d_in
|
||||
self.d_middle = d_middle
|
||||
self.d_out = d_out
|
||||
self.act = nn.Swish()
|
||||
|
||||
self.n_r = n_r
|
||||
self.aggs = self._build_aggs()
|
||||
|
||||
def _build_aggs(self):
|
||||
aggs = []
|
||||
for i in range(self.n_r):
|
||||
aggs.append(
|
||||
self.add_sublayer(
|
||||
'{}'.format(i),
|
||||
nn.Sequential(
|
||||
('conv1', nn.Conv2D(
|
||||
self.d_in, self.d_middle, 3, 2, 1, bias_attr=False)
|
||||
), ('bn1', nn.BatchNorm(self.d_middle)),
|
||||
('act', self.act), ('conv2', nn.Conv2D(
|
||||
self.d_middle, self.d_out, 3, 2, 1, bias_attr=False
|
||||
)), ('bn2', nn.BatchNorm(self.d_out)))))
|
||||
return aggs
|
||||
|
||||
def forward(self, x):
|
||||
b = x.shape[0]
|
||||
outs = []
|
||||
for agg in self.aggs:
|
||||
y = agg(x)
|
||||
p = F.adaptive_avg_pool2d(y, 1)
|
||||
outs.append(p.reshape((b, 1, self.d_out)))
|
||||
out = paddle.concat(outs, 1)
|
||||
return out
|
||||
|
||||
|
||||
class WeightAggregate(nn.Layer):
|
||||
def __init__(self, n_r, d_in, d_middle=None, d_out=None):
|
||||
super(WeightAggregate, self).__init__()
|
||||
if not d_middle:
|
||||
d_middle = d_in
|
||||
if not d_out:
|
||||
d_out = d_in
|
||||
|
||||
self.n_r = n_r
|
||||
self.d_out = d_out
|
||||
self.act = nn.Swish()
|
||||
|
||||
self.conv_n = nn.Sequential(
|
||||
('conv1', nn.Conv2D(
|
||||
d_in, d_in, 3, 1, 1,
|
||||
bias_attr=False)), ('bn1', nn.BatchNorm(d_in)),
|
||||
('act1', self.act), ('conv2', nn.Conv2D(
|
||||
d_in, n_r, 1, bias_attr=False)), ('bn2', nn.BatchNorm(n_r)),
|
||||
('act2', nn.Sigmoid()))
|
||||
self.conv_d = nn.Sequential(
|
||||
('conv1', nn.Conv2D(
|
||||
d_in, d_middle, 3, 1, 1,
|
||||
bias_attr=False)), ('bn1', nn.BatchNorm(d_middle)),
|
||||
('act1', self.act), ('conv2', nn.Conv2D(
|
||||
d_middle, d_out, 1,
|
||||
bias_attr=False)), ('bn2', nn.BatchNorm(d_out)))
|
||||
|
||||
def forward(self, x):
|
||||
b, _, h, w = x.shape
|
||||
|
||||
hmaps = self.conv_n(x)
|
||||
fmaps = self.conv_d(x)
|
||||
r = paddle.bmm(
|
||||
hmaps.reshape((b, self.n_r, h * w)),
|
||||
fmaps.reshape((b, self.d_out, h * w)).transpose((0, 2, 1)))
|
||||
return r
|
||||
|
||||
|
||||
class GCN(nn.Layer):
|
||||
def __init__(self, d_in, n_in, d_out=None, n_out=None, dropout=0.1):
|
||||
super(GCN, self).__init__()
|
||||
if not d_out:
|
||||
d_out = d_in
|
||||
if not n_out:
|
||||
n_out = d_in
|
||||
|
||||
self.conv_n = nn.Conv1D(n_in, n_out, 1)
|
||||
self.linear = nn.Linear(d_in, d_out)
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
self.act = nn.Swish()
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv_n(x)
|
||||
x = self.dropout(self.linear(x))
|
||||
return self.act(x)
|
||||
|
||||
|
||||
class PRENFPN(nn.Layer):
|
||||
def __init__(self, in_channels, n_r, d_model, max_len, dropout):
|
||||
super(PRENFPN, self).__init__()
|
||||
assert len(in_channels) == 3, "in_channels' length must be 3."
|
||||
c1, c2, c3 = in_channels # the depths are from big to small
|
||||
# build fpn
|
||||
assert d_model % 3 == 0, "{} can't be divided by 3.".format(d_model)
|
||||
self.agg_p1 = PoolAggregate(n_r, c1, d_out=d_model // 3)
|
||||
self.agg_p2 = PoolAggregate(n_r, c2, d_out=d_model // 3)
|
||||
self.agg_p3 = PoolAggregate(n_r, c3, d_out=d_model // 3)
|
||||
|
||||
self.agg_w1 = WeightAggregate(n_r, c1, 4 * c1, d_model // 3)
|
||||
self.agg_w2 = WeightAggregate(n_r, c2, 4 * c2, d_model // 3)
|
||||
self.agg_w3 = WeightAggregate(n_r, c3, 4 * c3, d_model // 3)
|
||||
|
||||
self.gcn_pool = GCN(d_model, n_r, d_model, max_len, dropout)
|
||||
self.gcn_weight = GCN(d_model, n_r, d_model, max_len, dropout)
|
||||
|
||||
self.out_channels = d_model
|
||||
|
||||
def forward(self, inputs):
|
||||
f3, f5, f7 = inputs
|
||||
|
||||
rp1 = self.agg_p1(f3)
|
||||
rp2 = self.agg_p2(f5)
|
||||
rp3 = self.agg_p3(f7)
|
||||
rp = paddle.concat([rp1, rp2, rp3], 2) # [b,nr,d]
|
||||
|
||||
rw1 = self.agg_w1(f3)
|
||||
rw2 = self.agg_w2(f5)
|
||||
rw3 = self.agg_w3(f7)
|
||||
rw = paddle.concat([rw1, rw2, rw3], 2) # [b,nr,d]
|
||||
|
||||
y1 = self.gcn_pool(rp)
|
||||
y2 = self.gcn_weight(rw)
|
||||
y = 0.5 * (y1 + y2)
|
||||
return y # [b,max_len,d]
|
||||
191
backend/ppocr/modeling/necks/rnn.py
Normal file
191
backend/ppocr/modeling/necks/rnn.py
Normal file
@@ -0,0 +1,191 @@
|
||||
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from paddle import nn
|
||||
|
||||
from ppocr.modeling.heads.rec_ctc_head import get_para_bias_attr
|
||||
from ppocr.modeling.backbones.rec_svtrnet import Block, ConvBNLayer, trunc_normal_, zeros_, ones_
|
||||
|
||||
|
||||
class Im2Seq(nn.Layer):
|
||||
def __init__(self, in_channels, **kwargs):
|
||||
super().__init__()
|
||||
self.out_channels = in_channels
|
||||
|
||||
def forward(self, x):
|
||||
B, C, H, W = x.shape
|
||||
assert H == 1
|
||||
x = x.squeeze(axis=2)
|
||||
x = x.transpose([0, 2, 1]) # (NTC)(batch, width, channels)
|
||||
return x
|
||||
|
||||
|
||||
class EncoderWithRNN(nn.Layer):
|
||||
def __init__(self, in_channels, hidden_size):
|
||||
super(EncoderWithRNN, self).__init__()
|
||||
self.out_channels = hidden_size * 2
|
||||
self.lstm = nn.LSTM(
|
||||
in_channels, hidden_size, direction='bidirectional', num_layers=2)
|
||||
|
||||
def forward(self, x):
|
||||
x, _ = self.lstm(x)
|
||||
return x
|
||||
|
||||
|
||||
class EncoderWithFC(nn.Layer):
|
||||
def __init__(self, in_channels, hidden_size):
|
||||
super(EncoderWithFC, self).__init__()
|
||||
self.out_channels = hidden_size
|
||||
weight_attr, bias_attr = get_para_bias_attr(
|
||||
l2_decay=0.00001, k=in_channels)
|
||||
self.fc = nn.Linear(
|
||||
in_channels,
|
||||
hidden_size,
|
||||
weight_attr=weight_attr,
|
||||
bias_attr=bias_attr,
|
||||
name='reduce_encoder_fea')
|
||||
|
||||
def forward(self, x):
|
||||
x = self.fc(x)
|
||||
return x
|
||||
|
||||
|
||||
class EncoderWithSVTR(nn.Layer):
|
||||
def __init__(
|
||||
self,
|
||||
in_channels,
|
||||
dims=64, # XS
|
||||
depth=2,
|
||||
hidden_dims=120,
|
||||
use_guide=False,
|
||||
num_heads=8,
|
||||
qkv_bias=True,
|
||||
mlp_ratio=2.0,
|
||||
drop_rate=0.1,
|
||||
attn_drop_rate=0.1,
|
||||
drop_path=0.,
|
||||
qk_scale=None):
|
||||
super(EncoderWithSVTR, self).__init__()
|
||||
self.depth = depth
|
||||
self.use_guide = use_guide
|
||||
self.conv1 = ConvBNLayer(
|
||||
in_channels, in_channels // 8, padding=1, act=nn.Swish)
|
||||
self.conv2 = ConvBNLayer(
|
||||
in_channels // 8, hidden_dims, kernel_size=1, act=nn.Swish)
|
||||
|
||||
self.svtr_block = nn.LayerList([
|
||||
Block(
|
||||
dim=hidden_dims,
|
||||
num_heads=num_heads,
|
||||
mixer='Global',
|
||||
HW=None,
|
||||
mlp_ratio=mlp_ratio,
|
||||
qkv_bias=qkv_bias,
|
||||
qk_scale=qk_scale,
|
||||
drop=drop_rate,
|
||||
act_layer=nn.Swish,
|
||||
attn_drop=attn_drop_rate,
|
||||
drop_path=drop_path,
|
||||
norm_layer='nn.LayerNorm',
|
||||
epsilon=1e-05,
|
||||
prenorm=False) for i in range(depth)
|
||||
])
|
||||
self.norm = nn.LayerNorm(hidden_dims, epsilon=1e-6)
|
||||
self.conv3 = ConvBNLayer(
|
||||
hidden_dims, in_channels, kernel_size=1, act=nn.Swish)
|
||||
# last conv-nxn, the input is concat of input tensor and conv3 output tensor
|
||||
self.conv4 = ConvBNLayer(
|
||||
2 * in_channels, in_channels // 8, padding=1, act=nn.Swish)
|
||||
|
||||
self.conv1x1 = ConvBNLayer(
|
||||
in_channels // 8, dims, kernel_size=1, act=nn.Swish)
|
||||
self.out_channels = dims
|
||||
self.apply(self._init_weights)
|
||||
|
||||
def _init_weights(self, m):
|
||||
if isinstance(m, nn.Linear):
|
||||
trunc_normal_(m.weight)
|
||||
if isinstance(m, nn.Linear) and m.bias is not None:
|
||||
zeros_(m.bias)
|
||||
elif isinstance(m, nn.LayerNorm):
|
||||
zeros_(m.bias)
|
||||
ones_(m.weight)
|
||||
|
||||
def forward(self, x):
|
||||
# for use guide
|
||||
if self.use_guide:
|
||||
z = x.clone()
|
||||
z.stop_gradient = True
|
||||
else:
|
||||
z = x
|
||||
# for short cut
|
||||
h = z
|
||||
# reduce dim
|
||||
z = self.conv1(z)
|
||||
z = self.conv2(z)
|
||||
# SVTR global block
|
||||
B, C, H, W = z.shape
|
||||
z = z.flatten(2).transpose([0, 2, 1])
|
||||
for blk in self.svtr_block:
|
||||
z = blk(z)
|
||||
z = self.norm(z)
|
||||
# last stage
|
||||
z = z.reshape([0, H, W, C]).transpose([0, 3, 1, 2])
|
||||
z = self.conv3(z)
|
||||
z = paddle.concat((h, z), axis=1)
|
||||
z = self.conv1x1(self.conv4(z))
|
||||
return z
|
||||
|
||||
|
||||
class SequenceEncoder(nn.Layer):
|
||||
def __init__(self, in_channels, encoder_type, hidden_size=48, **kwargs):
|
||||
super(SequenceEncoder, self).__init__()
|
||||
self.encoder_reshape = Im2Seq(in_channels)
|
||||
self.out_channels = self.encoder_reshape.out_channels
|
||||
self.encoder_type = encoder_type
|
||||
if encoder_type == 'reshape':
|
||||
self.only_reshape = True
|
||||
else:
|
||||
support_encoder_dict = {
|
||||
'reshape': Im2Seq,
|
||||
'fc': EncoderWithFC,
|
||||
'rnn': EncoderWithRNN,
|
||||
'svtr': EncoderWithSVTR
|
||||
}
|
||||
assert encoder_type in support_encoder_dict, '{} must in {}'.format(
|
||||
encoder_type, support_encoder_dict.keys())
|
||||
if encoder_type == "svtr":
|
||||
self.encoder = support_encoder_dict[encoder_type](
|
||||
self.encoder_reshape.out_channels, **kwargs)
|
||||
else:
|
||||
self.encoder = support_encoder_dict[encoder_type](
|
||||
self.encoder_reshape.out_channels, hidden_size)
|
||||
self.out_channels = self.encoder.out_channels
|
||||
self.only_reshape = False
|
||||
|
||||
def forward(self, x):
|
||||
if self.encoder_type != 'svtr':
|
||||
x = self.encoder_reshape(x)
|
||||
if not self.only_reshape:
|
||||
x = self.encoder(x)
|
||||
return x
|
||||
else:
|
||||
x = self.encoder(x)
|
||||
x = self.encoder_reshape(x)
|
||||
return x
|
||||
284
backend/ppocr/modeling/necks/sast_fpn.py
Normal file
284
backend/ppocr/modeling/necks/sast_fpn.py
Normal file
@@ -0,0 +1,284 @@
|
||||
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from paddle import nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle import ParamAttr
|
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride,
|
||||
groups=1,
|
||||
if_act=True,
|
||||
act=None,
|
||||
name=None):
|
||||
super(ConvBNLayer, self).__init__()
|
||||
self.if_act = if_act
|
||||
self.act = act
|
||||
self.conv = nn.Conv2D(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
padding=(kernel_size - 1) // 2,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(name=name + '_weights'),
|
||||
bias_attr=False)
|
||||
|
||||
self.bn = nn.BatchNorm(
|
||||
num_channels=out_channels,
|
||||
act=act,
|
||||
param_attr=ParamAttr(name="bn_" + name + "_scale"),
|
||||
bias_attr=ParamAttr(name="bn_" + name + "_offset"),
|
||||
moving_mean_name="bn_" + name + "_mean",
|
||||
moving_variance_name="bn_" + name + "_variance")
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
return x
|
||||
|
||||
|
||||
class DeConvBNLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride,
|
||||
groups=1,
|
||||
if_act=True,
|
||||
act=None,
|
||||
name=None):
|
||||
super(DeConvBNLayer, self).__init__()
|
||||
self.if_act = if_act
|
||||
self.act = act
|
||||
self.deconv = nn.Conv2DTranspose(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
padding=(kernel_size - 1) // 2,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(name=name + '_weights'),
|
||||
bias_attr=False)
|
||||
self.bn = nn.BatchNorm(
|
||||
num_channels=out_channels,
|
||||
act=act,
|
||||
param_attr=ParamAttr(name="bn_" + name + "_scale"),
|
||||
bias_attr=ParamAttr(name="bn_" + name + "_offset"),
|
||||
moving_mean_name="bn_" + name + "_mean",
|
||||
moving_variance_name="bn_" + name + "_variance")
|
||||
|
||||
def forward(self, x):
|
||||
x = self.deconv(x)
|
||||
x = self.bn(x)
|
||||
return x
|
||||
|
||||
|
||||
class FPN_Up_Fusion(nn.Layer):
|
||||
def __init__(self, in_channels):
|
||||
super(FPN_Up_Fusion, self).__init__()
|
||||
in_channels = in_channels[::-1]
|
||||
out_channels = [256, 256, 192, 192, 128]
|
||||
|
||||
self.h0_conv = ConvBNLayer(in_channels[0], out_channels[0], 1, 1, act=None, name='fpn_up_h0')
|
||||
self.h1_conv = ConvBNLayer(in_channels[1], out_channels[1], 1, 1, act=None, name='fpn_up_h1')
|
||||
self.h2_conv = ConvBNLayer(in_channels[2], out_channels[2], 1, 1, act=None, name='fpn_up_h2')
|
||||
self.h3_conv = ConvBNLayer(in_channels[3], out_channels[3], 1, 1, act=None, name='fpn_up_h3')
|
||||
self.h4_conv = ConvBNLayer(in_channels[4], out_channels[4], 1, 1, act=None, name='fpn_up_h4')
|
||||
|
||||
self.g0_conv = DeConvBNLayer(out_channels[0], out_channels[1], 4, 2, act=None, name='fpn_up_g0')
|
||||
|
||||
self.g1_conv = nn.Sequential(
|
||||
ConvBNLayer(out_channels[1], out_channels[1], 3, 1, act='relu', name='fpn_up_g1_1'),
|
||||
DeConvBNLayer(out_channels[1], out_channels[2], 4, 2, act=None, name='fpn_up_g1_2')
|
||||
)
|
||||
self.g2_conv = nn.Sequential(
|
||||
ConvBNLayer(out_channels[2], out_channels[2], 3, 1, act='relu', name='fpn_up_g2_1'),
|
||||
DeConvBNLayer(out_channels[2], out_channels[3], 4, 2, act=None, name='fpn_up_g2_2')
|
||||
)
|
||||
self.g3_conv = nn.Sequential(
|
||||
ConvBNLayer(out_channels[3], out_channels[3], 3, 1, act='relu', name='fpn_up_g3_1'),
|
||||
DeConvBNLayer(out_channels[3], out_channels[4], 4, 2, act=None, name='fpn_up_g3_2')
|
||||
)
|
||||
|
||||
self.g4_conv = nn.Sequential(
|
||||
ConvBNLayer(out_channels[4], out_channels[4], 3, 1, act='relu', name='fpn_up_fusion_1'),
|
||||
ConvBNLayer(out_channels[4], out_channels[4], 1, 1, act=None, name='fpn_up_fusion_2')
|
||||
)
|
||||
|
||||
def _add_relu(self, x1, x2):
|
||||
x = paddle.add(x=x1, y=x2)
|
||||
x = F.relu(x)
|
||||
return x
|
||||
|
||||
def forward(self, x):
|
||||
f = x[2:][::-1]
|
||||
h0 = self.h0_conv(f[0])
|
||||
h1 = self.h1_conv(f[1])
|
||||
h2 = self.h2_conv(f[2])
|
||||
h3 = self.h3_conv(f[3])
|
||||
h4 = self.h4_conv(f[4])
|
||||
|
||||
g0 = self.g0_conv(h0)
|
||||
g1 = self._add_relu(g0, h1)
|
||||
g1 = self.g1_conv(g1)
|
||||
g2 = self.g2_conv(self._add_relu(g1, h2))
|
||||
g3 = self.g3_conv(self._add_relu(g2, h3))
|
||||
g4 = self.g4_conv(self._add_relu(g3, h4))
|
||||
|
||||
return g4
|
||||
|
||||
|
||||
class FPN_Down_Fusion(nn.Layer):
|
||||
def __init__(self, in_channels):
|
||||
super(FPN_Down_Fusion, self).__init__()
|
||||
out_channels = [32, 64, 128]
|
||||
|
||||
self.h0_conv = ConvBNLayer(in_channels[0], out_channels[0], 3, 1, act=None, name='fpn_down_h0')
|
||||
self.h1_conv = ConvBNLayer(in_channels[1], out_channels[1], 3, 1, act=None, name='fpn_down_h1')
|
||||
self.h2_conv = ConvBNLayer(in_channels[2], out_channels[2], 3, 1, act=None, name='fpn_down_h2')
|
||||
|
||||
self.g0_conv = ConvBNLayer(out_channels[0], out_channels[1], 3, 2, act=None, name='fpn_down_g0')
|
||||
|
||||
self.g1_conv = nn.Sequential(
|
||||
ConvBNLayer(out_channels[1], out_channels[1], 3, 1, act='relu', name='fpn_down_g1_1'),
|
||||
ConvBNLayer(out_channels[1], out_channels[2], 3, 2, act=None, name='fpn_down_g1_2')
|
||||
)
|
||||
|
||||
self.g2_conv = nn.Sequential(
|
||||
ConvBNLayer(out_channels[2], out_channels[2], 3, 1, act='relu', name='fpn_down_fusion_1'),
|
||||
ConvBNLayer(out_channels[2], out_channels[2], 1, 1, act=None, name='fpn_down_fusion_2')
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
f = x[:3]
|
||||
h0 = self.h0_conv(f[0])
|
||||
h1 = self.h1_conv(f[1])
|
||||
h2 = self.h2_conv(f[2])
|
||||
g0 = self.g0_conv(h0)
|
||||
g1 = paddle.add(x=g0, y=h1)
|
||||
g1 = F.relu(g1)
|
||||
g1 = self.g1_conv(g1)
|
||||
g2 = paddle.add(x=g1, y=h2)
|
||||
g2 = F.relu(g2)
|
||||
g2 = self.g2_conv(g2)
|
||||
return g2
|
||||
|
||||
|
||||
class Cross_Attention(nn.Layer):
|
||||
def __init__(self, in_channels):
|
||||
super(Cross_Attention, self).__init__()
|
||||
self.theta_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act='relu', name='f_theta')
|
||||
self.phi_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act='relu', name='f_phi')
|
||||
self.g_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act='relu', name='f_g')
|
||||
|
||||
self.fh_weight_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act=None, name='fh_weight')
|
||||
self.fh_sc_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act=None, name='fh_sc')
|
||||
|
||||
self.fv_weight_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act=None, name='fv_weight')
|
||||
self.fv_sc_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act=None, name='fv_sc')
|
||||
|
||||
self.f_attn_conv = ConvBNLayer(in_channels * 2, in_channels, 1, 1, act='relu', name='f_attn')
|
||||
|
||||
def _cal_fweight(self, f, shape):
|
||||
f_theta, f_phi, f_g = f
|
||||
#flatten
|
||||
f_theta = paddle.transpose(f_theta, [0, 2, 3, 1])
|
||||
f_theta = paddle.reshape(f_theta, [shape[0] * shape[1], shape[2], 128])
|
||||
f_phi = paddle.transpose(f_phi, [0, 2, 3, 1])
|
||||
f_phi = paddle.reshape(f_phi, [shape[0] * shape[1], shape[2], 128])
|
||||
f_g = paddle.transpose(f_g, [0, 2, 3, 1])
|
||||
f_g = paddle.reshape(f_g, [shape[0] * shape[1], shape[2], 128])
|
||||
#correlation
|
||||
f_attn = paddle.matmul(f_theta, paddle.transpose(f_phi, [0, 2, 1]))
|
||||
#scale
|
||||
f_attn = f_attn / (128**0.5)
|
||||
f_attn = F.softmax(f_attn)
|
||||
#weighted sum
|
||||
f_weight = paddle.matmul(f_attn, f_g)
|
||||
f_weight = paddle.reshape(
|
||||
f_weight, [shape[0], shape[1], shape[2], 128])
|
||||
return f_weight
|
||||
|
||||
def forward(self, f_common):
|
||||
f_shape = paddle.shape(f_common)
|
||||
# print('f_shape: ', f_shape)
|
||||
|
||||
f_theta = self.theta_conv(f_common)
|
||||
f_phi = self.phi_conv(f_common)
|
||||
f_g = self.g_conv(f_common)
|
||||
|
||||
######## horizon ########
|
||||
fh_weight = self._cal_fweight([f_theta, f_phi, f_g],
|
||||
[f_shape[0], f_shape[2], f_shape[3]])
|
||||
fh_weight = paddle.transpose(fh_weight, [0, 3, 1, 2])
|
||||
fh_weight = self.fh_weight_conv(fh_weight)
|
||||
#short cut
|
||||
fh_sc = self.fh_sc_conv(f_common)
|
||||
f_h = F.relu(fh_weight + fh_sc)
|
||||
|
||||
######## vertical ########
|
||||
fv_theta = paddle.transpose(f_theta, [0, 1, 3, 2])
|
||||
fv_phi = paddle.transpose(f_phi, [0, 1, 3, 2])
|
||||
fv_g = paddle.transpose(f_g, [0, 1, 3, 2])
|
||||
fv_weight = self._cal_fweight([fv_theta, fv_phi, fv_g],
|
||||
[f_shape[0], f_shape[3], f_shape[2]])
|
||||
fv_weight = paddle.transpose(fv_weight, [0, 3, 2, 1])
|
||||
fv_weight = self.fv_weight_conv(fv_weight)
|
||||
#short cut
|
||||
fv_sc = self.fv_sc_conv(f_common)
|
||||
f_v = F.relu(fv_weight + fv_sc)
|
||||
|
||||
######## merge ########
|
||||
f_attn = paddle.concat([f_h, f_v], axis=1)
|
||||
f_attn = self.f_attn_conv(f_attn)
|
||||
return f_attn
|
||||
|
||||
|
||||
class SASTFPN(nn.Layer):
|
||||
def __init__(self, in_channels, with_cab=False, **kwargs):
|
||||
super(SASTFPN, self).__init__()
|
||||
self.in_channels = in_channels
|
||||
self.with_cab = with_cab
|
||||
self.FPN_Down_Fusion = FPN_Down_Fusion(self.in_channels)
|
||||
self.FPN_Up_Fusion = FPN_Up_Fusion(self.in_channels)
|
||||
self.out_channels = 128
|
||||
self.cross_attention = Cross_Attention(self.out_channels)
|
||||
|
||||
def forward(self, x):
|
||||
#down fpn
|
||||
f_down = self.FPN_Down_Fusion(x)
|
||||
|
||||
#up fpn
|
||||
f_up = self.FPN_Up_Fusion(x)
|
||||
|
||||
#fusion
|
||||
f_common = paddle.add(x=f_down, y=f_up)
|
||||
f_common = F.relu(f_common)
|
||||
|
||||
if self.with_cab:
|
||||
# print('enhence f_common with CAB.')
|
||||
f_common = self.cross_attention(f_common)
|
||||
|
||||
return f_common
|
||||
110
backend/ppocr/modeling/necks/table_fpn.py
Normal file
110
backend/ppocr/modeling/necks/table_fpn.py
Normal file
@@ -0,0 +1,110 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from paddle import nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle import ParamAttr
|
||||
|
||||
|
||||
class TableFPN(nn.Layer):
|
||||
def __init__(self, in_channels, out_channels, **kwargs):
|
||||
super(TableFPN, self).__init__()
|
||||
self.out_channels = 512
|
||||
weight_attr = paddle.nn.initializer.KaimingUniform()
|
||||
self.in2_conv = nn.Conv2D(
|
||||
in_channels=in_channels[0],
|
||||
out_channels=self.out_channels,
|
||||
kernel_size=1,
|
||||
weight_attr=ParamAttr(initializer=weight_attr),
|
||||
bias_attr=False)
|
||||
self.in3_conv = nn.Conv2D(
|
||||
in_channels=in_channels[1],
|
||||
out_channels=self.out_channels,
|
||||
kernel_size=1,
|
||||
stride = 1,
|
||||
weight_attr=ParamAttr(initializer=weight_attr),
|
||||
bias_attr=False)
|
||||
self.in4_conv = nn.Conv2D(
|
||||
in_channels=in_channels[2],
|
||||
out_channels=self.out_channels,
|
||||
kernel_size=1,
|
||||
weight_attr=ParamAttr(initializer=weight_attr),
|
||||
bias_attr=False)
|
||||
self.in5_conv = nn.Conv2D(
|
||||
in_channels=in_channels[3],
|
||||
out_channels=self.out_channels,
|
||||
kernel_size=1,
|
||||
weight_attr=ParamAttr(initializer=weight_attr),
|
||||
bias_attr=False)
|
||||
self.p5_conv = nn.Conv2D(
|
||||
in_channels=self.out_channels,
|
||||
out_channels=self.out_channels // 4,
|
||||
kernel_size=3,
|
||||
padding=1,
|
||||
weight_attr=ParamAttr(initializer=weight_attr),
|
||||
bias_attr=False)
|
||||
self.p4_conv = nn.Conv2D(
|
||||
in_channels=self.out_channels,
|
||||
out_channels=self.out_channels // 4,
|
||||
kernel_size=3,
|
||||
padding=1,
|
||||
weight_attr=ParamAttr(initializer=weight_attr),
|
||||
bias_attr=False)
|
||||
self.p3_conv = nn.Conv2D(
|
||||
in_channels=self.out_channels,
|
||||
out_channels=self.out_channels // 4,
|
||||
kernel_size=3,
|
||||
padding=1,
|
||||
weight_attr=ParamAttr(initializer=weight_attr),
|
||||
bias_attr=False)
|
||||
self.p2_conv = nn.Conv2D(
|
||||
in_channels=self.out_channels,
|
||||
out_channels=self.out_channels // 4,
|
||||
kernel_size=3,
|
||||
padding=1,
|
||||
weight_attr=ParamAttr(initializer=weight_attr),
|
||||
bias_attr=False)
|
||||
self.fuse_conv = nn.Conv2D(
|
||||
in_channels=self.out_channels * 4,
|
||||
out_channels=512,
|
||||
kernel_size=3,
|
||||
padding=1,
|
||||
weight_attr=ParamAttr(initializer=weight_attr), bias_attr=False)
|
||||
|
||||
def forward(self, x):
|
||||
c2, c3, c4, c5 = x
|
||||
|
||||
in5 = self.in5_conv(c5)
|
||||
in4 = self.in4_conv(c4)
|
||||
in3 = self.in3_conv(c3)
|
||||
in2 = self.in2_conv(c2)
|
||||
|
||||
out4 = in4 + F.upsample(
|
||||
in5, size=in4.shape[2:4], mode="nearest", align_mode=1) # 1/16
|
||||
out3 = in3 + F.upsample(
|
||||
out4, size=in3.shape[2:4], mode="nearest", align_mode=1) # 1/8
|
||||
out2 = in2 + F.upsample(
|
||||
out3, size=in2.shape[2:4], mode="nearest", align_mode=1) # 1/4
|
||||
|
||||
p4 = F.upsample(out4, size=in5.shape[2:4], mode="nearest", align_mode=1)
|
||||
p3 = F.upsample(out3, size=in5.shape[2:4], mode="nearest", align_mode=1)
|
||||
p2 = F.upsample(out2, size=in5.shape[2:4], mode="nearest", align_mode=1)
|
||||
fuse = paddle.concat([in5, p4, p3, p2], axis=1)
|
||||
fuse_conv = self.fuse_conv(fuse) * 0.005
|
||||
return [c5 + fuse_conv]
|
||||
Reference in New Issue
Block a user