init

2026-03-13 15:57:35 +08:00 · 2023-10-25 16:38:16 +08:00
commit 2b9360c299
602 changed files with 152490 additions and 0 deletions
--- a/backend/ppocr/modeling/necks/init.py
+++ b/backend/ppocr/modeling/necks/init.py
@@ -0,0 +1,37 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__all__ = ['build_neck']
+
+
+def build_neck(config):
+    from .db_fpn import DBFPN, RSEFPN, LKPAN
+    from .east_fpn import EASTFPN
+    from .sast_fpn import SASTFPN
+    from .rnn import SequenceEncoder
+    from .pg_fpn import PGFPN
+    from .table_fpn import TableFPN
+    from .fpn import FPN
+    from .fce_fpn import FCEFPN
+    from .pren_fpn import PRENFPN
+    support_dict = [
+        'FPN', 'FCEFPN', 'LKPAN', 'DBFPN', 'RSEFPN', 'EASTFPN', 'SASTFPN',
+        'SequenceEncoder', 'PGFPN', 'TableFPN', 'PRENFPN'
+    ]
+
+    module_name = config.pop('name')
+    assert module_name in support_dict, Exception('neck only support {}'.format(
+        support_dict))
+    module_class = eval(module_name)(**config)
+    return module_class
--- a/backend/ppocr/modeling/necks/db_fpn.py
+++ b/backend/ppocr/modeling/necks/db_fpn.py
@@ -0,0 +1,358 @@
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+import os
+import sys
+
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../../..')))
+
+from ppocr.modeling.backbones.det_mobilenet_v3 import SEModule
+
+
+class DSConv(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 padding,
+                 stride=1,
+                 groups=None,
+                 if_act=True,
+                 act="relu",
+                 **kwargs):
+        super(DSConv, self).__init__()
+        if groups == None:
+            groups = in_channels
+        self.if_act = if_act
+        self.act = act
+        self.conv1 = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=in_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            bias_attr=False)
+
+        self.bn1 = nn.BatchNorm(num_channels=in_channels, act=None)
+
+        self.conv2 = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=int(in_channels * 4),
+            kernel_size=1,
+            stride=1,
+            bias_attr=False)
+
+        self.bn2 = nn.BatchNorm(num_channels=int(in_channels * 4), act=None)
+
+        self.conv3 = nn.Conv2D(
+            in_channels=int(in_channels * 4),
+            out_channels=out_channels,
+            kernel_size=1,
+            stride=1,
+            bias_attr=False)
+        self._c = [in_channels, out_channels]
+        if in_channels != out_channels:
+            self.conv_end = nn.Conv2D(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=1,
+                stride=1,
+                bias_attr=False)
+
+    def forward(self, inputs):
+
+        x = self.conv1(inputs)
+        x = self.bn1(x)
+
+        x = self.conv2(x)
+        x = self.bn2(x)
+        if self.if_act:
+            if self.act == "relu":
+                x = F.relu(x)
+            elif self.act == "hardswish":
+                x = F.hardswish(x)
+            else:
+                print("The activation function({}) is selected incorrectly.".
+                      format(self.act))
+                exit()
+
+        x = self.conv3(x)
+        if self._c[0] != self._c[1]:
+            x = x + self.conv_end(inputs)
+        return x
+
+
+class DBFPN(nn.Layer):
+    def __init__(self, in_channels, out_channels, **kwargs):
+        super(DBFPN, self).__init__()
+        self.out_channels = out_channels
+        weight_attr = paddle.nn.initializer.KaimingUniform()
+
+        self.in2_conv = nn.Conv2D(
+            in_channels=in_channels[0],
+            out_channels=self.out_channels,
+            kernel_size=1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.in3_conv = nn.Conv2D(
+            in_channels=in_channels[1],
+            out_channels=self.out_channels,
+            kernel_size=1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.in4_conv = nn.Conv2D(
+            in_channels=in_channels[2],
+            out_channels=self.out_channels,
+            kernel_size=1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.in5_conv = nn.Conv2D(
+            in_channels=in_channels[3],
+            out_channels=self.out_channels,
+            kernel_size=1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.p5_conv = nn.Conv2D(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels // 4,
+            kernel_size=3,
+            padding=1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.p4_conv = nn.Conv2D(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels // 4,
+            kernel_size=3,
+            padding=1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.p3_conv = nn.Conv2D(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels // 4,
+            kernel_size=3,
+            padding=1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.p2_conv = nn.Conv2D(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels // 4,
+            kernel_size=3,
+            padding=1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+
+    def forward(self, x):
+        c2, c3, c4, c5 = x
+
+        in5 = self.in5_conv(c5)
+        in4 = self.in4_conv(c4)
+        in3 = self.in3_conv(c3)
+        in2 = self.in2_conv(c2)
+
+        out4 = in4 + F.upsample(
+            in5, scale_factor=2, mode="nearest", align_mode=1)  # 1/16
+        out3 = in3 + F.upsample(
+            out4, scale_factor=2, mode="nearest", align_mode=1)  # 1/8
+        out2 = in2 + F.upsample(
+            out3, scale_factor=2, mode="nearest", align_mode=1)  # 1/4
+
+        p5 = self.p5_conv(in5)
+        p4 = self.p4_conv(out4)
+        p3 = self.p3_conv(out3)
+        p2 = self.p2_conv(out2)
+        p5 = F.upsample(p5, scale_factor=8, mode="nearest", align_mode=1)
+        p4 = F.upsample(p4, scale_factor=4, mode="nearest", align_mode=1)
+        p3 = F.upsample(p3, scale_factor=2, mode="nearest", align_mode=1)
+
+        fuse = paddle.concat([p5, p4, p3, p2], axis=1)
+        return fuse
+
+
+class RSELayer(nn.Layer):
+    def __init__(self, in_channels, out_channels, kernel_size, shortcut=True):
+        super(RSELayer, self).__init__()
+        weight_attr = paddle.nn.initializer.KaimingUniform()
+        self.out_channels = out_channels
+        self.in_conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=self.out_channels,
+            kernel_size=kernel_size,
+            padding=int(kernel_size // 2),
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.se_block = SEModule(self.out_channels)
+        self.shortcut = shortcut
+
+    def forward(self, ins):
+        x = self.in_conv(ins)
+        if self.shortcut:
+            out = x + self.se_block(x)
+        else:
+            out = self.se_block(x)
+        return out
+
+
+class RSEFPN(nn.Layer):
+    def __init__(self, in_channels, out_channels, shortcut=True, **kwargs):
+        super(RSEFPN, self).__init__()
+        self.out_channels = out_channels
+        self.ins_conv = nn.LayerList()
+        self.inp_conv = nn.LayerList()
+
+        for i in range(len(in_channels)):
+            self.ins_conv.append(
+                RSELayer(
+                    in_channels[i],
+                    out_channels,
+                    kernel_size=1,
+                    shortcut=shortcut))
+            self.inp_conv.append(
+                RSELayer(
+                    out_channels,
+                    out_channels // 4,
+                    kernel_size=3,
+                    shortcut=shortcut))
+
+    def forward(self, x):
+        c2, c3, c4, c5 = x
+
+        in5 = self.ins_conv[3](c5)
+        in4 = self.ins_conv[2](c4)
+        in3 = self.ins_conv[1](c3)
+        in2 = self.ins_conv[0](c2)
+
+        out4 = in4 + F.upsample(
+            in5, scale_factor=2, mode="nearest", align_mode=1)  # 1/16
+        out3 = in3 + F.upsample(
+            out4, scale_factor=2, mode="nearest", align_mode=1)  # 1/8
+        out2 = in2 + F.upsample(
+            out3, scale_factor=2, mode="nearest", align_mode=1)  # 1/4
+
+        p5 = self.inp_conv[3](in5)
+        p4 = self.inp_conv[2](out4)
+        p3 = self.inp_conv[1](out3)
+        p2 = self.inp_conv[0](out2)
+
+        p5 = F.upsample(p5, scale_factor=8, mode="nearest", align_mode=1)
+        p4 = F.upsample(p4, scale_factor=4, mode="nearest", align_mode=1)
+        p3 = F.upsample(p3, scale_factor=2, mode="nearest", align_mode=1)
+
+        fuse = paddle.concat([p5, p4, p3, p2], axis=1)
+        return fuse
+
+
+class LKPAN(nn.Layer):
+    def __init__(self, in_channels, out_channels, mode='large', **kwargs):
+        super(LKPAN, self).__init__()
+        self.out_channels = out_channels
+        weight_attr = paddle.nn.initializer.KaimingUniform()
+
+        self.ins_conv = nn.LayerList()
+        self.inp_conv = nn.LayerList()
+        # pan head
+        self.pan_head_conv = nn.LayerList()
+        self.pan_lat_conv = nn.LayerList()
+
+        if mode.lower() == 'lite':
+            p_layer = DSConv
+        elif mode.lower() == 'large':
+            p_layer = nn.Conv2D
+        else:
+            raise ValueError(
+                "mode can only be one of ['lite', 'large'], but received {}".
+                format(mode))
+
+        for i in range(len(in_channels)):
+            self.ins_conv.append(
+                nn.Conv2D(
+                    in_channels=in_channels[i],
+                    out_channels=self.out_channels,
+                    kernel_size=1,
+                    weight_attr=ParamAttr(initializer=weight_attr),
+                    bias_attr=False))
+
+            self.inp_conv.append(
+                p_layer(
+                    in_channels=self.out_channels,
+                    out_channels=self.out_channels // 4,
+                    kernel_size=9,
+                    padding=4,
+                    weight_attr=ParamAttr(initializer=weight_attr),
+                    bias_attr=False))
+
+            if i > 0:
+                self.pan_head_conv.append(
+                    nn.Conv2D(
+                        in_channels=self.out_channels // 4,
+                        out_channels=self.out_channels // 4,
+                        kernel_size=3,
+                        padding=1,
+                        stride=2,
+                        weight_attr=ParamAttr(initializer=weight_attr),
+                        bias_attr=False))
+            self.pan_lat_conv.append(
+                p_layer(
+                    in_channels=self.out_channels // 4,
+                    out_channels=self.out_channels // 4,
+                    kernel_size=9,
+                    padding=4,
+                    weight_attr=ParamAttr(initializer=weight_attr),
+                    bias_attr=False))
+
+    def forward(self, x):
+        c2, c3, c4, c5 = x
+
+        in5 = self.ins_conv[3](c5)
+        in4 = self.ins_conv[2](c4)
+        in3 = self.ins_conv[1](c3)
+        in2 = self.ins_conv[0](c2)
+
+        out4 = in4 + F.upsample(
+            in5, scale_factor=2, mode="nearest", align_mode=1)  # 1/16
+        out3 = in3 + F.upsample(
+            out4, scale_factor=2, mode="nearest", align_mode=1)  # 1/8
+        out2 = in2 + F.upsample(
+            out3, scale_factor=2, mode="nearest", align_mode=1)  # 1/4
+
+        f5 = self.inp_conv[3](in5)
+        f4 = self.inp_conv[2](out4)
+        f3 = self.inp_conv[1](out3)
+        f2 = self.inp_conv[0](out2)
+
+        pan3 = f3 + self.pan_head_conv[0](f2)
+        pan4 = f4 + self.pan_head_conv[1](pan3)
+        pan5 = f5 + self.pan_head_conv[2](pan4)
+
+        p2 = self.pan_lat_conv[0](f2)
+        p3 = self.pan_lat_conv[1](pan3)
+        p4 = self.pan_lat_conv[2](pan4)
+        p5 = self.pan_lat_conv[3](pan5)
+
+        p5 = F.upsample(p5, scale_factor=8, mode="nearest", align_mode=1)
+        p4 = F.upsample(p4, scale_factor=4, mode="nearest", align_mode=1)
+        p3 = F.upsample(p3, scale_factor=2, mode="nearest", align_mode=1)
+
+        fuse = paddle.concat([p5, p4, p3, p2], axis=1)
+        return fuse
--- a/backend/ppocr/modeling/necks/east_fpn.py
+++ b/backend/ppocr/modeling/necks/east_fpn.py
@@ -0,0 +1,188 @@
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 padding,
+                 groups=1,
+                 if_act=True,
+                 act=None,
+                 name=None):
+        super(ConvBNLayer, self).__init__()
+        self.if_act = if_act
+        self.act = act
+        self.conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            weight_attr=ParamAttr(name=name + '_weights'),
+            bias_attr=False)
+
+        self.bn = nn.BatchNorm(
+            num_channels=out_channels,
+            act=act,
+            param_attr=ParamAttr(name="bn_" + name + "_scale"),
+            bias_attr=ParamAttr(name="bn_" + name + "_offset"),
+            moving_mean_name="bn_" + name + "_mean",
+            moving_variance_name="bn_" + name + "_variance")
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+
+
+class DeConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 padding,
+                 groups=1,
+                 if_act=True,
+                 act=None,
+                 name=None):
+        super(DeConvBNLayer, self).__init__()
+        self.if_act = if_act
+        self.act = act
+        self.deconv = nn.Conv2DTranspose(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            weight_attr=ParamAttr(name=name + '_weights'),
+            bias_attr=False)
+        self.bn = nn.BatchNorm(
+            num_channels=out_channels,
+            act=act,
+            param_attr=ParamAttr(name="bn_" + name + "_scale"),
+            bias_attr=ParamAttr(name="bn_" + name + "_offset"),
+            moving_mean_name="bn_" + name + "_mean",
+            moving_variance_name="bn_" + name + "_variance")
+
+    def forward(self, x):
+        x = self.deconv(x)
+        x = self.bn(x)
+        return x
+
+
+class EASTFPN(nn.Layer):
+    def __init__(self, in_channels, model_name, **kwargs):
+        super(EASTFPN, self).__init__()
+        self.model_name = model_name
+        if self.model_name == "large":
+            self.out_channels = 128
+        else:
+            self.out_channels = 64
+        self.in_channels = in_channels[::-1]
+        self.h1_conv = ConvBNLayer(
+            in_channels=self.out_channels+self.in_channels[1],
+            out_channels=self.out_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            if_act=True,
+            act='relu',
+            name="unet_h_1")
+        self.h2_conv = ConvBNLayer(
+            in_channels=self.out_channels+self.in_channels[2],
+            out_channels=self.out_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            if_act=True,
+            act='relu',
+            name="unet_h_2")
+        self.h3_conv = ConvBNLayer(
+            in_channels=self.out_channels+self.in_channels[3],
+            out_channels=self.out_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            if_act=True,
+            act='relu',
+            name="unet_h_3")
+        self.g0_deconv = DeConvBNLayer(
+            in_channels=self.in_channels[0],
+            out_channels=self.out_channels,
+            kernel_size=4,
+            stride=2,
+            padding=1,
+            if_act=True,
+            act='relu',
+            name="unet_g_0")
+        self.g1_deconv = DeConvBNLayer(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels,
+            kernel_size=4,
+            stride=2,
+            padding=1,
+            if_act=True,
+            act='relu',
+            name="unet_g_1")
+        self.g2_deconv = DeConvBNLayer(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels,
+            kernel_size=4,
+            stride=2,
+            padding=1,
+            if_act=True,
+            act='relu',
+            name="unet_g_2")
+        self.g3_conv = ConvBNLayer(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            if_act=True,
+            act='relu',
+            name="unet_g_3")
+
+    def forward(self, x):
+        f = x[::-1]
+
+        h = f[0]
+        g = self.g0_deconv(h)
+        h = paddle.concat([g, f[1]], axis=1)
+        h = self.h1_conv(h)
+        g = self.g1_deconv(h)
+        h = paddle.concat([g, f[2]], axis=1)
+        h = self.h2_conv(h)
+        g = self.g2_deconv(h)
+        h = paddle.concat([g, f[3]], axis=1)
+        h = self.h3_conv(h)
+        g = self.g3_conv(h)
+
+        return g
--- a/backend/ppocr/modeling/necks/fce_fpn.py
+++ b/backend/ppocr/modeling/necks/fce_fpn.py
@@ -0,0 +1,280 @@
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.3/ppdet/modeling/necks/fpn.py
+"""
+
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.nn.initializer import XavierUniform
+from paddle.nn.initializer import Normal
+from paddle.regularizer import L2Decay
+
+__all__ = ['FCEFPN']
+
+
+class ConvNormLayer(nn.Layer):
+    def __init__(self,
+                 ch_in,
+                 ch_out,
+                 filter_size,
+                 stride,
+                 groups=1,
+                 norm_type='bn',
+                 norm_decay=0.,
+                 norm_groups=32,
+                 lr_scale=1.,
+                 freeze_norm=False,
+                 initializer=Normal(
+                     mean=0., std=0.01)):
+        super(ConvNormLayer, self).__init__()
+        assert norm_type in ['bn', 'sync_bn', 'gn']
+
+        bias_attr = False
+
+        self.conv = nn.Conv2D(
+            in_channels=ch_in,
+            out_channels=ch_out,
+            kernel_size=filter_size,
+            stride=stride,
+            padding=(filter_size - 1) // 2,
+            groups=groups,
+            weight_attr=ParamAttr(
+                initializer=initializer, learning_rate=1.),
+            bias_attr=bias_attr)
+
+        norm_lr = 0. if freeze_norm else 1.
+        param_attr = ParamAttr(
+            learning_rate=norm_lr,
+            regularizer=L2Decay(norm_decay) if norm_decay is not None else None)
+        bias_attr = ParamAttr(
+            learning_rate=norm_lr,
+            regularizer=L2Decay(norm_decay) if norm_decay is not None else None)
+        if norm_type == 'bn':
+            self.norm = nn.BatchNorm2D(
+                ch_out, weight_attr=param_attr, bias_attr=bias_attr)
+        elif norm_type == 'sync_bn':
+            self.norm = nn.SyncBatchNorm(
+                ch_out, weight_attr=param_attr, bias_attr=bias_attr)
+        elif norm_type == 'gn':
+            self.norm = nn.GroupNorm(
+                num_groups=norm_groups,
+                num_channels=ch_out,
+                weight_attr=param_attr,
+                bias_attr=bias_attr)
+
+    def forward(self, inputs):
+        out = self.conv(inputs)
+        out = self.norm(out)
+        return out
+
+
+class FCEFPN(nn.Layer):
+    """
+    Feature Pyramid Network, see https://arxiv.org/abs/1612.03144
+    Args:
+        in_channels (list[int]): input channels of each level which can be 
+            derived from the output shape of backbone by from_config
+        out_channels (list[int]): output channel of each level
+        spatial_scales (list[float]): the spatial scales between input feature
+            maps and original input image which can be derived from the output 
+            shape of backbone by from_config
+        has_extra_convs (bool): whether to add extra conv to the last level.
+            default False
+        extra_stage (int): the number of extra stages added to the last level.
+            default 1
+        use_c5 (bool): Whether to use c5 as the input of extra stage, 
+            otherwise p5 is used. default True
+        norm_type (string|None): The normalization type in FPN module. If 
+            norm_type is None, norm will not be used after conv and if 
+            norm_type is string, bn, gn, sync_bn are available. default None
+        norm_decay (float): weight decay for normalization layer weights.
+            default 0.
+        freeze_norm (bool): whether to freeze normalization layer.  
+            default False
+        relu_before_extra_convs (bool): whether to add relu before extra convs.
+            default False
+        
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 spatial_scales=[0.25, 0.125, 0.0625, 0.03125],
+                 has_extra_convs=False,
+                 extra_stage=1,
+                 use_c5=True,
+                 norm_type=None,
+                 norm_decay=0.,
+                 freeze_norm=False,
+                 relu_before_extra_convs=True):
+        super(FCEFPN, self).__init__()
+        self.out_channels = out_channels
+        for s in range(extra_stage):
+            spatial_scales = spatial_scales + [spatial_scales[-1] / 2.]
+        self.spatial_scales = spatial_scales
+        self.has_extra_convs = has_extra_convs
+        self.extra_stage = extra_stage
+        self.use_c5 = use_c5
+        self.relu_before_extra_convs = relu_before_extra_convs
+        self.norm_type = norm_type
+        self.norm_decay = norm_decay
+        self.freeze_norm = freeze_norm
+
+        self.lateral_convs = []
+        self.fpn_convs = []
+        fan = out_channels * 3 * 3
+
+        # stage index 0,1,2,3 stands for res2,res3,res4,res5 on ResNet Backbone
+        # 0 <= st_stage < ed_stage <= 3
+        st_stage = 4 - len(in_channels)
+        ed_stage = st_stage + len(in_channels) - 1
+        for i in range(st_stage, ed_stage + 1):
+            if i == 3:
+                lateral_name = 'fpn_inner_res5_sum'
+            else:
+                lateral_name = 'fpn_inner_res{}_sum_lateral'.format(i + 2)
+            in_c = in_channels[i - st_stage]
+            if self.norm_type is not None:
+                lateral = self.add_sublayer(
+                    lateral_name,
+                    ConvNormLayer(
+                        ch_in=in_c,
+                        ch_out=out_channels,
+                        filter_size=1,
+                        stride=1,
+                        norm_type=self.norm_type,
+                        norm_decay=self.norm_decay,
+                        freeze_norm=self.freeze_norm,
+                        initializer=XavierUniform(fan_out=in_c)))
+            else:
+                lateral = self.add_sublayer(
+                    lateral_name,
+                    nn.Conv2D(
+                        in_channels=in_c,
+                        out_channels=out_channels,
+                        kernel_size=1,
+                        weight_attr=ParamAttr(
+                            initializer=XavierUniform(fan_out=in_c))))
+            self.lateral_convs.append(lateral)
+
+        for i in range(st_stage, ed_stage + 1):
+            fpn_name = 'fpn_res{}_sum'.format(i + 2)
+            if self.norm_type is not None:
+                fpn_conv = self.add_sublayer(
+                    fpn_name,
+                    ConvNormLayer(
+                        ch_in=out_channels,
+                        ch_out=out_channels,
+                        filter_size=3,
+                        stride=1,
+                        norm_type=self.norm_type,
+                        norm_decay=self.norm_decay,
+                        freeze_norm=self.freeze_norm,
+                        initializer=XavierUniform(fan_out=fan)))
+            else:
+                fpn_conv = self.add_sublayer(
+                    fpn_name,
+                    nn.Conv2D(
+                        in_channels=out_channels,
+                        out_channels=out_channels,
+                        kernel_size=3,
+                        padding=1,
+                        weight_attr=ParamAttr(
+                            initializer=XavierUniform(fan_out=fan))))
+            self.fpn_convs.append(fpn_conv)
+
+        # add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
+        if self.has_extra_convs:
+            for i in range(self.extra_stage):
+                lvl = ed_stage + 1 + i
+                if i == 0 and self.use_c5:
+                    in_c = in_channels[-1]
+                else:
+                    in_c = out_channels
+                extra_fpn_name = 'fpn_{}'.format(lvl + 2)
+                if self.norm_type is not None:
+                    extra_fpn_conv = self.add_sublayer(
+                        extra_fpn_name,
+                        ConvNormLayer(
+                            ch_in=in_c,
+                            ch_out=out_channels,
+                            filter_size=3,
+                            stride=2,
+                            norm_type=self.norm_type,
+                            norm_decay=self.norm_decay,
+                            freeze_norm=self.freeze_norm,
+                            initializer=XavierUniform(fan_out=fan)))
+                else:
+                    extra_fpn_conv = self.add_sublayer(
+                        extra_fpn_name,
+                        nn.Conv2D(
+                            in_channels=in_c,
+                            out_channels=out_channels,
+                            kernel_size=3,
+                            stride=2,
+                            padding=1,
+                            weight_attr=ParamAttr(
+                                initializer=XavierUniform(fan_out=fan))))
+                self.fpn_convs.append(extra_fpn_conv)
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        return {
+            'in_channels': [i.channels for i in input_shape],
+            'spatial_scales': [1.0 / i.stride for i in input_shape],
+        }
+
+    def forward(self, body_feats):
+        laterals = []
+        num_levels = len(body_feats)
+
+        for i in range(num_levels):
+            laterals.append(self.lateral_convs[i](body_feats[i]))
+
+        for i in range(1, num_levels):
+            lvl = num_levels - i
+            upsample = F.interpolate(
+                laterals[lvl],
+                scale_factor=2.,
+                mode='nearest', )
+            laterals[lvl - 1] += upsample
+
+        fpn_output = []
+        for lvl in range(num_levels):
+            fpn_output.append(self.fpn_convs[lvl](laterals[lvl]))
+
+        if self.extra_stage > 0:
+            # use max pool to get more levels on top of outputs (Faster R-CNN, Mask R-CNN)
+            if not self.has_extra_convs:
+                assert self.extra_stage == 1, 'extra_stage should be 1 if FPN has not extra convs'
+                fpn_output.append(F.max_pool2d(fpn_output[-1], 1, stride=2))
+            # add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
+            else:
+                if self.use_c5:
+                    extra_source = body_feats[-1]
+                else:
+                    extra_source = fpn_output[-1]
+                fpn_output.append(self.fpn_convs[num_levels](extra_source))
+
+                for i in range(1, self.extra_stage):
+                    if self.relu_before_extra_convs:
+                        fpn_output.append(self.fpn_convs[num_levels + i](F.relu(
+                            fpn_output[-1])))
+                    else:
+                        fpn_output.append(self.fpn_convs[num_levels + i](
+                            fpn_output[-1]))
+        return fpn_output
--- a/backend/ppocr/modeling/necks/fpn.py
+++ b/backend/ppocr/modeling/necks/fpn.py
@@ -0,0 +1,138 @@
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/whai362/PSENet/blob/python3/models/neck/fpn.py
+"""
+
+import paddle.nn as nn
+import paddle
+import math
+import paddle.nn.functional as F
+
+
+class Conv_BN_ReLU(nn.Layer):
+    def __init__(self,
+                 in_planes,
+                 out_planes,
+                 kernel_size=1,
+                 stride=1,
+                 padding=0):
+        super(Conv_BN_ReLU, self).__init__()
+        self.conv = nn.Conv2D(
+            in_planes,
+            out_planes,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            bias_attr=False)
+        self.bn = nn.BatchNorm2D(out_planes, momentum=0.1)
+        self.relu = nn.ReLU()
+
+        for m in self.sublayers():
+            if isinstance(m, nn.Conv2D):
+                n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels
+                m.weight = paddle.create_parameter(
+                    shape=m.weight.shape,
+                    dtype='float32',
+                    default_initializer=paddle.nn.initializer.Normal(
+                        0, math.sqrt(2. / n)))
+            elif isinstance(m, nn.BatchNorm2D):
+                m.weight = paddle.create_parameter(
+                    shape=m.weight.shape,
+                    dtype='float32',
+                    default_initializer=paddle.nn.initializer.Constant(1.0))
+                m.bias = paddle.create_parameter(
+                    shape=m.bias.shape,
+                    dtype='float32',
+                    default_initializer=paddle.nn.initializer.Constant(0.0))
+
+    def forward(self, x):
+        return self.relu(self.bn(self.conv(x)))
+
+
+class FPN(nn.Layer):
+    def __init__(self, in_channels, out_channels):
+        super(FPN, self).__init__()
+
+        # Top layer
+        self.toplayer_ = Conv_BN_ReLU(
+            in_channels[3], out_channels, kernel_size=1, stride=1, padding=0)
+        # Lateral layers
+        self.latlayer1_ = Conv_BN_ReLU(
+            in_channels[2], out_channels, kernel_size=1, stride=1, padding=0)
+
+        self.latlayer2_ = Conv_BN_ReLU(
+            in_channels[1], out_channels, kernel_size=1, stride=1, padding=0)
+
+        self.latlayer3_ = Conv_BN_ReLU(
+            in_channels[0], out_channels, kernel_size=1, stride=1, padding=0)
+
+        # Smooth layers
+        self.smooth1_ = Conv_BN_ReLU(
+            out_channels, out_channels, kernel_size=3, stride=1, padding=1)
+
+        self.smooth2_ = Conv_BN_ReLU(
+            out_channels, out_channels, kernel_size=3, stride=1, padding=1)
+
+        self.smooth3_ = Conv_BN_ReLU(
+            out_channels, out_channels, kernel_size=3, stride=1, padding=1)
+
+        self.out_channels = out_channels * 4
+        for m in self.sublayers():
+            if isinstance(m, nn.Conv2D):
+                n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels
+                m.weight = paddle.create_parameter(
+                    shape=m.weight.shape,
+                    dtype='float32',
+                    default_initializer=paddle.nn.initializer.Normal(
+                        0, math.sqrt(2. / n)))
+            elif isinstance(m, nn.BatchNorm2D):
+                m.weight = paddle.create_parameter(
+                    shape=m.weight.shape,
+                    dtype='float32',
+                    default_initializer=paddle.nn.initializer.Constant(1.0))
+                m.bias = paddle.create_parameter(
+                    shape=m.bias.shape,
+                    dtype='float32',
+                    default_initializer=paddle.nn.initializer.Constant(0.0))
+
+    def _upsample(self, x, scale=1):
+        return F.upsample(x, scale_factor=scale, mode='bilinear')
+
+    def _upsample_add(self, x, y, scale=1):
+        return F.upsample(x, scale_factor=scale, mode='bilinear') + y
+
+    def forward(self, x):
+        f2, f3, f4, f5 = x
+        p5 = self.toplayer_(f5)
+
+        f4 = self.latlayer1_(f4)
+        p4 = self._upsample_add(p5, f4, 2)
+        p4 = self.smooth1_(p4)
+
+        f3 = self.latlayer2_(f3)
+        p3 = self._upsample_add(p4, f3, 2)
+        p3 = self.smooth2_(p3)
+
+        f2 = self.latlayer3_(f2)
+        p2 = self._upsample_add(p3, f2, 2)
+        p2 = self.smooth3_(p2)
+
+        p3 = self._upsample(p3, 2)
+        p4 = self._upsample(p4, 4)
+        p5 = self._upsample(p5, 8)
+
+        fuse = paddle.concat([p2, p3, p4, p5], axis=1)
+        return fuse
--- a/backend/ppocr/modeling/necks/pg_fpn.py
+++ b/backend/ppocr/modeling/necks/pg_fpn.py
@@ -0,0 +1,314 @@
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 groups=1,
+                 is_vd_mode=False,
+                 act=None,
+                 name=None):
+        super(ConvBNLayer, self).__init__()
+
+        self.is_vd_mode = is_vd_mode
+        self._pool2d_avg = nn.AvgPool2D(
+            kernel_size=2, stride=2, padding=0, ceil_mode=True)
+        self._conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=(kernel_size - 1) // 2,
+            groups=groups,
+            weight_attr=ParamAttr(name=name + "_weights"),
+            bias_attr=False)
+        if name == "conv1":
+            bn_name = "bn_" + name
+        else:
+            bn_name = "bn" + name[3:]
+        self._batch_norm = nn.BatchNorm(
+            out_channels,
+            act=act,
+            param_attr=ParamAttr(name=bn_name + '_scale'),
+            bias_attr=ParamAttr(bn_name + '_offset'),
+            moving_mean_name=bn_name + '_mean',
+            moving_variance_name=bn_name + '_variance',
+            use_global_stats=False)
+
+    def forward(self, inputs):
+        y = self._conv(inputs)
+        y = self._batch_norm(y)
+        return y
+
+
+class DeConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size=4,
+                 stride=2,
+                 padding=1,
+                 groups=1,
+                 if_act=True,
+                 act=None,
+                 name=None):
+        super(DeConvBNLayer, self).__init__()
+
+        self.if_act = if_act
+        self.act = act
+        self.deconv = nn.Conv2DTranspose(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            weight_attr=ParamAttr(name=name + '_weights'),
+            bias_attr=False)
+        self.bn = nn.BatchNorm(
+            num_channels=out_channels,
+            act=act,
+            param_attr=ParamAttr(name="bn_" + name + "_scale"),
+            bias_attr=ParamAttr(name="bn_" + name + "_offset"),
+            moving_mean_name="bn_" + name + "_mean",
+            moving_variance_name="bn_" + name + "_variance",
+            use_global_stats=False)
+
+    def forward(self, x):
+        x = self.deconv(x)
+        x = self.bn(x)
+        return x
+
+
+class PGFPN(nn.Layer):
+    def __init__(self, in_channels, **kwargs):
+        super(PGFPN, self).__init__()
+        num_inputs = [2048, 2048, 1024, 512, 256]
+        num_outputs = [256, 256, 192, 192, 128]
+        self.out_channels = 128
+        self.conv_bn_layer_1 = ConvBNLayer(
+            in_channels=3,
+            out_channels=32,
+            kernel_size=3,
+            stride=1,
+            act=None,
+            name='FPN_d1')
+        self.conv_bn_layer_2 = ConvBNLayer(
+            in_channels=64,
+            out_channels=64,
+            kernel_size=3,
+            stride=1,
+            act=None,
+            name='FPN_d2')
+        self.conv_bn_layer_3 = ConvBNLayer(
+            in_channels=256,
+            out_channels=128,
+            kernel_size=3,
+            stride=1,
+            act=None,
+            name='FPN_d3')
+        self.conv_bn_layer_4 = ConvBNLayer(
+            in_channels=32,
+            out_channels=64,
+            kernel_size=3,
+            stride=2,
+            act=None,
+            name='FPN_d4')
+        self.conv_bn_layer_5 = ConvBNLayer(
+            in_channels=64,
+            out_channels=64,
+            kernel_size=3,
+            stride=1,
+            act='relu',
+            name='FPN_d5')
+        self.conv_bn_layer_6 = ConvBNLayer(
+            in_channels=64,
+            out_channels=128,
+            kernel_size=3,
+            stride=2,
+            act=None,
+            name='FPN_d6')
+        self.conv_bn_layer_7 = ConvBNLayer(
+            in_channels=128,
+            out_channels=128,
+            kernel_size=3,
+            stride=1,
+            act='relu',
+            name='FPN_d7')
+        self.conv_bn_layer_8 = ConvBNLayer(
+            in_channels=128,
+            out_channels=128,
+            kernel_size=1,
+            stride=1,
+            act=None,
+            name='FPN_d8')
+
+        self.conv_h0 = ConvBNLayer(
+            in_channels=num_inputs[0],
+            out_channels=num_outputs[0],
+            kernel_size=1,
+            stride=1,
+            act=None,
+            name="conv_h{}".format(0))
+        self.conv_h1 = ConvBNLayer(
+            in_channels=num_inputs[1],
+            out_channels=num_outputs[1],
+            kernel_size=1,
+            stride=1,
+            act=None,
+            name="conv_h{}".format(1))
+        self.conv_h2 = ConvBNLayer(
+            in_channels=num_inputs[2],
+            out_channels=num_outputs[2],
+            kernel_size=1,
+            stride=1,
+            act=None,
+            name="conv_h{}".format(2))
+        self.conv_h3 = ConvBNLayer(
+            in_channels=num_inputs[3],
+            out_channels=num_outputs[3],
+            kernel_size=1,
+            stride=1,
+            act=None,
+            name="conv_h{}".format(3))
+        self.conv_h4 = ConvBNLayer(
+            in_channels=num_inputs[4],
+            out_channels=num_outputs[4],
+            kernel_size=1,
+            stride=1,
+            act=None,
+            name="conv_h{}".format(4))
+
+        self.dconv0 = DeConvBNLayer(
+            in_channels=num_outputs[0],
+            out_channels=num_outputs[0 + 1],
+            name="dconv_{}".format(0))
+        self.dconv1 = DeConvBNLayer(
+            in_channels=num_outputs[1],
+            out_channels=num_outputs[1 + 1],
+            act=None,
+            name="dconv_{}".format(1))
+        self.dconv2 = DeConvBNLayer(
+            in_channels=num_outputs[2],
+            out_channels=num_outputs[2 + 1],
+            act=None,
+            name="dconv_{}".format(2))
+        self.dconv3 = DeConvBNLayer(
+            in_channels=num_outputs[3],
+            out_channels=num_outputs[3 + 1],
+            act=None,
+            name="dconv_{}".format(3))
+        self.conv_g1 = ConvBNLayer(
+            in_channels=num_outputs[1],
+            out_channels=num_outputs[1],
+            kernel_size=3,
+            stride=1,
+            act='relu',
+            name="conv_g{}".format(1))
+        self.conv_g2 = ConvBNLayer(
+            in_channels=num_outputs[2],
+            out_channels=num_outputs[2],
+            kernel_size=3,
+            stride=1,
+            act='relu',
+            name="conv_g{}".format(2))
+        self.conv_g3 = ConvBNLayer(
+            in_channels=num_outputs[3],
+            out_channels=num_outputs[3],
+            kernel_size=3,
+            stride=1,
+            act='relu',
+            name="conv_g{}".format(3))
+        self.conv_g4 = ConvBNLayer(
+            in_channels=num_outputs[4],
+            out_channels=num_outputs[4],
+            kernel_size=3,
+            stride=1,
+            act='relu',
+            name="conv_g{}".format(4))
+        self.convf = ConvBNLayer(
+            in_channels=num_outputs[4],
+            out_channels=num_outputs[4],
+            kernel_size=1,
+            stride=1,
+            act=None,
+            name="conv_f{}".format(4))
+
+    def forward(self, x):
+        c0, c1, c2, c3, c4, c5, c6 = x
+        # FPN_Down_Fusion
+        f = [c0, c1, c2]
+        g = [None, None, None]
+        h = [None, None, None]
+        h[0] = self.conv_bn_layer_1(f[0])
+        h[1] = self.conv_bn_layer_2(f[1])
+        h[2] = self.conv_bn_layer_3(f[2])
+
+        g[0] = self.conv_bn_layer_4(h[0])
+        g[1] = paddle.add(g[0], h[1])
+        g[1] = F.relu(g[1])
+        g[1] = self.conv_bn_layer_5(g[1])
+        g[1] = self.conv_bn_layer_6(g[1])
+
+        g[2] = paddle.add(g[1], h[2])
+        g[2] = F.relu(g[2])
+        g[2] = self.conv_bn_layer_7(g[2])
+        f_down = self.conv_bn_layer_8(g[2])
+
+        # FPN UP Fusion
+        f1 = [c6, c5, c4, c3, c2]
+        g = [None, None, None, None, None]
+        h = [None, None, None, None, None]
+        h[0] = self.conv_h0(f1[0])
+        h[1] = self.conv_h1(f1[1])
+        h[2] = self.conv_h2(f1[2])
+        h[3] = self.conv_h3(f1[3])
+        h[4] = self.conv_h4(f1[4])
+
+        g[0] = self.dconv0(h[0])
+        g[1] = paddle.add(g[0], h[1])
+        g[1] = F.relu(g[1])
+        g[1] = self.conv_g1(g[1])
+        g[1] = self.dconv1(g[1])
+
+        g[2] = paddle.add(g[1], h[2])
+        g[2] = F.relu(g[2])
+        g[2] = self.conv_g2(g[2])
+        g[2] = self.dconv2(g[2])
+
+        g[3] = paddle.add(g[2], h[3])
+        g[3] = F.relu(g[3])
+        g[3] = self.conv_g3(g[3])
+        g[3] = self.dconv3(g[3])
+
+        g[4] = paddle.add(x=g[3], y=h[4])
+        g[4] = F.relu(g[4])
+        g[4] = self.conv_g4(g[4])
+        f_up = self.convf(g[4])
+        f_common = paddle.add(f_down, f_up)
+        f_common = F.relu(f_common)
+        return f_common
--- a/backend/ppocr/modeling/necks/pren_fpn.py
+++ b/backend/ppocr/modeling/necks/pren_fpn.py
@@ -0,0 +1,163 @@
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Code is refer from:
+https://github.com/RuijieJ/pren/blob/main/Nets/Aggregation.py
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+
+
+class PoolAggregate(nn.Layer):
+    def __init__(self, n_r, d_in, d_middle=None, d_out=None):
+        super(PoolAggregate, self).__init__()
+        if not d_middle:
+            d_middle = d_in
+        if not d_out:
+            d_out = d_in
+
+        self.d_in = d_in
+        self.d_middle = d_middle
+        self.d_out = d_out
+        self.act = nn.Swish()
+
+        self.n_r = n_r
+        self.aggs = self._build_aggs()
+
+    def _build_aggs(self):
+        aggs = []
+        for i in range(self.n_r):
+            aggs.append(
+                self.add_sublayer(
+                    '{}'.format(i),
+                    nn.Sequential(
+                        ('conv1', nn.Conv2D(
+                            self.d_in, self.d_middle, 3, 2, 1, bias_attr=False)
+                         ), ('bn1', nn.BatchNorm(self.d_middle)),
+                        ('act', self.act), ('conv2', nn.Conv2D(
+                            self.d_middle, self.d_out, 3, 2, 1, bias_attr=False
+                        )), ('bn2', nn.BatchNorm(self.d_out)))))
+        return aggs
+
+    def forward(self, x):
+        b = x.shape[0]
+        outs = []
+        for agg in self.aggs:
+            y = agg(x)
+            p = F.adaptive_avg_pool2d(y, 1)
+            outs.append(p.reshape((b, 1, self.d_out)))
+        out = paddle.concat(outs, 1)
+        return out
+
+
+class WeightAggregate(nn.Layer):
+    def __init__(self, n_r, d_in, d_middle=None, d_out=None):
+        super(WeightAggregate, self).__init__()
+        if not d_middle:
+            d_middle = d_in
+        if not d_out:
+            d_out = d_in
+
+        self.n_r = n_r
+        self.d_out = d_out
+        self.act = nn.Swish()
+
+        self.conv_n = nn.Sequential(
+            ('conv1', nn.Conv2D(
+                d_in, d_in, 3, 1, 1,
+                bias_attr=False)), ('bn1', nn.BatchNorm(d_in)),
+            ('act1', self.act), ('conv2', nn.Conv2D(
+                d_in, n_r, 1, bias_attr=False)), ('bn2', nn.BatchNorm(n_r)),
+            ('act2', nn.Sigmoid()))
+        self.conv_d = nn.Sequential(
+            ('conv1', nn.Conv2D(
+                d_in, d_middle, 3, 1, 1,
+                bias_attr=False)), ('bn1', nn.BatchNorm(d_middle)),
+            ('act1', self.act), ('conv2', nn.Conv2D(
+                d_middle, d_out, 1,
+                bias_attr=False)), ('bn2', nn.BatchNorm(d_out)))
+
+    def forward(self, x):
+        b, _, h, w = x.shape
+
+        hmaps = self.conv_n(x)
+        fmaps = self.conv_d(x)
+        r = paddle.bmm(
+            hmaps.reshape((b, self.n_r, h * w)),
+            fmaps.reshape((b, self.d_out, h * w)).transpose((0, 2, 1)))
+        return r
+
+
+class GCN(nn.Layer):
+    def __init__(self, d_in, n_in, d_out=None, n_out=None, dropout=0.1):
+        super(GCN, self).__init__()
+        if not d_out:
+            d_out = d_in
+        if not n_out:
+            n_out = d_in
+
+        self.conv_n = nn.Conv1D(n_in, n_out, 1)
+        self.linear = nn.Linear(d_in, d_out)
+        self.dropout = nn.Dropout(dropout)
+        self.act = nn.Swish()
+
+    def forward(self, x):
+        x = self.conv_n(x)
+        x = self.dropout(self.linear(x))
+        return self.act(x)
+
+
+class PRENFPN(nn.Layer):
+    def __init__(self, in_channels, n_r, d_model, max_len, dropout):
+        super(PRENFPN, self).__init__()
+        assert len(in_channels) == 3, "in_channels' length must be 3."
+        c1, c2, c3 = in_channels  # the depths are from big to small
+        # build fpn
+        assert d_model % 3 == 0, "{} can't be divided by 3.".format(d_model)
+        self.agg_p1 = PoolAggregate(n_r, c1, d_out=d_model // 3)
+        self.agg_p2 = PoolAggregate(n_r, c2, d_out=d_model // 3)
+        self.agg_p3 = PoolAggregate(n_r, c3, d_out=d_model // 3)
+
+        self.agg_w1 = WeightAggregate(n_r, c1, 4 * c1, d_model // 3)
+        self.agg_w2 = WeightAggregate(n_r, c2, 4 * c2, d_model // 3)
+        self.agg_w3 = WeightAggregate(n_r, c3, 4 * c3, d_model // 3)
+
+        self.gcn_pool = GCN(d_model, n_r, d_model, max_len, dropout)
+        self.gcn_weight = GCN(d_model, n_r, d_model, max_len, dropout)
+
+        self.out_channels = d_model
+
+    def forward(self, inputs):
+        f3, f5, f7 = inputs
+
+        rp1 = self.agg_p1(f3)
+        rp2 = self.agg_p2(f5)
+        rp3 = self.agg_p3(f7)
+        rp = paddle.concat([rp1, rp2, rp3], 2)  # [b,nr,d]
+
+        rw1 = self.agg_w1(f3)
+        rw2 = self.agg_w2(f5)
+        rw3 = self.agg_w3(f7)
+        rw = paddle.concat([rw1, rw2, rw3], 2)  # [b,nr,d]
+
+        y1 = self.gcn_pool(rp)
+        y2 = self.gcn_weight(rw)
+        y = 0.5 * (y1 + y2)
+        return y  # [b,max_len,d]
--- a/backend/ppocr/modeling/necks/rnn.py
+++ b/backend/ppocr/modeling/necks/rnn.py
@@ -0,0 +1,191 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import nn
+
+from ppocr.modeling.heads.rec_ctc_head import get_para_bias_attr
+from ppocr.modeling.backbones.rec_svtrnet import Block, ConvBNLayer, trunc_normal_, zeros_, ones_
+
+
+class Im2Seq(nn.Layer):
+    def __init__(self, in_channels, **kwargs):
+        super().__init__()
+        self.out_channels = in_channels
+
+    def forward(self, x):
+        B, C, H, W = x.shape
+        assert H == 1
+        x = x.squeeze(axis=2)
+        x = x.transpose([0, 2, 1])  # (NTC)(batch, width, channels)
+        return x
+
+
+class EncoderWithRNN(nn.Layer):
+    def __init__(self, in_channels, hidden_size):
+        super(EncoderWithRNN, self).__init__()
+        self.out_channels = hidden_size * 2
+        self.lstm = nn.LSTM(
+            in_channels, hidden_size, direction='bidirectional', num_layers=2)
+
+    def forward(self, x):
+        x, _ = self.lstm(x)
+        return x
+
+
+class EncoderWithFC(nn.Layer):
+    def __init__(self, in_channels, hidden_size):
+        super(EncoderWithFC, self).__init__()
+        self.out_channels = hidden_size
+        weight_attr, bias_attr = get_para_bias_attr(
+            l2_decay=0.00001, k=in_channels)
+        self.fc = nn.Linear(
+            in_channels,
+            hidden_size,
+            weight_attr=weight_attr,
+            bias_attr=bias_attr,
+            name='reduce_encoder_fea')
+
+    def forward(self, x):
+        x = self.fc(x)
+        return x
+
+
+class EncoderWithSVTR(nn.Layer):
+    def __init__(
+            self,
+            in_channels,
+            dims=64,  # XS
+            depth=2,
+            hidden_dims=120,
+            use_guide=False,
+            num_heads=8,
+            qkv_bias=True,
+            mlp_ratio=2.0,
+            drop_rate=0.1,
+            attn_drop_rate=0.1,
+            drop_path=0.,
+            qk_scale=None):
+        super(EncoderWithSVTR, self).__init__()
+        self.depth = depth
+        self.use_guide = use_guide
+        self.conv1 = ConvBNLayer(
+            in_channels, in_channels // 8, padding=1, act=nn.Swish)
+        self.conv2 = ConvBNLayer(
+            in_channels // 8, hidden_dims, kernel_size=1, act=nn.Swish)
+
+        self.svtr_block = nn.LayerList([
+            Block(
+                dim=hidden_dims,
+                num_heads=num_heads,
+                mixer='Global',
+                HW=None,
+                mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop=drop_rate,
+                act_layer=nn.Swish,
+                attn_drop=attn_drop_rate,
+                drop_path=drop_path,
+                norm_layer='nn.LayerNorm',
+                epsilon=1e-05,
+                prenorm=False) for i in range(depth)
+        ])
+        self.norm = nn.LayerNorm(hidden_dims, epsilon=1e-6)
+        self.conv3 = ConvBNLayer(
+            hidden_dims, in_channels, kernel_size=1, act=nn.Swish)
+        # last conv-nxn, the input is concat of input tensor and conv3 output tensor
+        self.conv4 = ConvBNLayer(
+            2 * in_channels, in_channels // 8, padding=1, act=nn.Swish)
+
+        self.conv1x1 = ConvBNLayer(
+            in_channels // 8, dims, kernel_size=1, act=nn.Swish)
+        self.out_channels = dims
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                zeros_(m.bias)
+        elif isinstance(m, nn.LayerNorm):
+            zeros_(m.bias)
+            ones_(m.weight)
+
+    def forward(self, x):
+        # for use guide
+        if self.use_guide:
+            z = x.clone()
+            z.stop_gradient = True
+        else:
+            z = x
+        # for short cut
+        h = z
+        # reduce dim
+        z = self.conv1(z)
+        z = self.conv2(z)
+        # SVTR global block
+        B, C, H, W = z.shape
+        z = z.flatten(2).transpose([0, 2, 1])
+        for blk in self.svtr_block:
+            z = blk(z)
+        z = self.norm(z)
+        # last stage
+        z = z.reshape([0, H, W, C]).transpose([0, 3, 1, 2])
+        z = self.conv3(z)
+        z = paddle.concat((h, z), axis=1)
+        z = self.conv1x1(self.conv4(z))
+        return z
+
+
+class SequenceEncoder(nn.Layer):
+    def __init__(self, in_channels, encoder_type, hidden_size=48, **kwargs):
+        super(SequenceEncoder, self).__init__()
+        self.encoder_reshape = Im2Seq(in_channels)
+        self.out_channels = self.encoder_reshape.out_channels
+        self.encoder_type = encoder_type
+        if encoder_type == 'reshape':
+            self.only_reshape = True
+        else:
+            support_encoder_dict = {
+                'reshape': Im2Seq,
+                'fc': EncoderWithFC,
+                'rnn': EncoderWithRNN,
+                'svtr': EncoderWithSVTR
+            }
+            assert encoder_type in support_encoder_dict, '{} must in {}'.format(
+                encoder_type, support_encoder_dict.keys())
+            if encoder_type == "svtr":
+                self.encoder = support_encoder_dict[encoder_type](
+                    self.encoder_reshape.out_channels, **kwargs)
+            else:
+                self.encoder = support_encoder_dict[encoder_type](
+                    self.encoder_reshape.out_channels, hidden_size)
+            self.out_channels = self.encoder.out_channels
+            self.only_reshape = False
+
+    def forward(self, x):
+        if self.encoder_type != 'svtr':
+            x = self.encoder_reshape(x)
+            if not self.only_reshape:
+                x = self.encoder(x)
+            return x
+        else:
+            x = self.encoder(x)
+            x = self.encoder_reshape(x)
+            return x
--- a/backend/ppocr/modeling/necks/sast_fpn.py
+++ b/backend/ppocr/modeling/necks/sast_fpn.py
@@ -0,0 +1,284 @@
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 groups=1,
+                 if_act=True,
+                 act=None,
+                 name=None):
+        super(ConvBNLayer, self).__init__()
+        self.if_act = if_act
+        self.act = act
+        self.conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=(kernel_size - 1) // 2,
+            groups=groups,
+            weight_attr=ParamAttr(name=name + '_weights'),
+            bias_attr=False)
+  
+        self.bn = nn.BatchNorm(
+            num_channels=out_channels,
+            act=act,
+            param_attr=ParamAttr(name="bn_" + name + "_scale"),
+            bias_attr=ParamAttr(name="bn_" + name + "_offset"),
+            moving_mean_name="bn_" + name + "_mean",
+            moving_variance_name="bn_" + name + "_variance")
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+
+
+class DeConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 groups=1,
+                 if_act=True,
+                 act=None,
+                 name=None):
+        super(DeConvBNLayer, self).__init__()
+        self.if_act = if_act
+        self.act = act
+        self.deconv = nn.Conv2DTranspose(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=(kernel_size - 1) // 2,
+            groups=groups,
+            weight_attr=ParamAttr(name=name + '_weights'),
+            bias_attr=False)
+        self.bn = nn.BatchNorm(
+            num_channels=out_channels,
+            act=act,
+            param_attr=ParamAttr(name="bn_" + name + "_scale"),
+            bias_attr=ParamAttr(name="bn_" + name + "_offset"),
+            moving_mean_name="bn_" + name + "_mean",
+            moving_variance_name="bn_" + name + "_variance")
+
+    def forward(self, x):
+        x = self.deconv(x)
+        x = self.bn(x)
+        return x
+
+
+class FPN_Up_Fusion(nn.Layer):
+    def __init__(self, in_channels):
+        super(FPN_Up_Fusion, self).__init__()
+        in_channels = in_channels[::-1]
+        out_channels = [256, 256, 192, 192, 128]
+                
+        self.h0_conv = ConvBNLayer(in_channels[0], out_channels[0], 1, 1, act=None, name='fpn_up_h0')
+        self.h1_conv = ConvBNLayer(in_channels[1], out_channels[1], 1, 1, act=None, name='fpn_up_h1')
+        self.h2_conv = ConvBNLayer(in_channels[2], out_channels[2], 1, 1, act=None, name='fpn_up_h2')
+        self.h3_conv = ConvBNLayer(in_channels[3], out_channels[3], 1, 1, act=None, name='fpn_up_h3')
+        self.h4_conv = ConvBNLayer(in_channels[4], out_channels[4], 1, 1, act=None, name='fpn_up_h4')
+
+        self.g0_conv = DeConvBNLayer(out_channels[0], out_channels[1], 4, 2, act=None, name='fpn_up_g0')
+
+        self.g1_conv = nn.Sequential(
+            ConvBNLayer(out_channels[1], out_channels[1], 3, 1, act='relu', name='fpn_up_g1_1'),
+            DeConvBNLayer(out_channels[1], out_channels[2], 4, 2, act=None, name='fpn_up_g1_2')
+        )
+        self.g2_conv = nn.Sequential(
+            ConvBNLayer(out_channels[2], out_channels[2], 3, 1, act='relu', name='fpn_up_g2_1'),
+            DeConvBNLayer(out_channels[2], out_channels[3], 4, 2, act=None, name='fpn_up_g2_2')
+        )
+        self.g3_conv = nn.Sequential(
+            ConvBNLayer(out_channels[3], out_channels[3], 3, 1, act='relu', name='fpn_up_g3_1'),
+            DeConvBNLayer(out_channels[3], out_channels[4], 4, 2, act=None, name='fpn_up_g3_2')
+        )
+
+        self.g4_conv = nn.Sequential(
+            ConvBNLayer(out_channels[4], out_channels[4], 3, 1, act='relu', name='fpn_up_fusion_1'),
+            ConvBNLayer(out_channels[4], out_channels[4], 1, 1, act=None, name='fpn_up_fusion_2')
+        )
+
+    def _add_relu(self, x1, x2):
+        x = paddle.add(x=x1, y=x2)
+        x = F.relu(x)
+        return x
+
+    def forward(self, x):
+        f = x[2:][::-1]
+        h0 = self.h0_conv(f[0])
+        h1 = self.h1_conv(f[1])
+        h2 = self.h2_conv(f[2])
+        h3 = self.h3_conv(f[3])
+        h4 = self.h4_conv(f[4])
+
+        g0 = self.g0_conv(h0)
+        g1 = self._add_relu(g0, h1)
+        g1 = self.g1_conv(g1)
+        g2 = self.g2_conv(self._add_relu(g1, h2))
+        g3 = self.g3_conv(self._add_relu(g2, h3))
+        g4 = self.g4_conv(self._add_relu(g3, h4))
+
+        return g4
+
+
+class FPN_Down_Fusion(nn.Layer):
+    def __init__(self, in_channels):
+        super(FPN_Down_Fusion, self).__init__()
+        out_channels = [32, 64, 128]
+
+        self.h0_conv = ConvBNLayer(in_channels[0], out_channels[0], 3, 1, act=None, name='fpn_down_h0')
+        self.h1_conv = ConvBNLayer(in_channels[1], out_channels[1], 3, 1, act=None, name='fpn_down_h1')
+        self.h2_conv = ConvBNLayer(in_channels[2], out_channels[2], 3, 1, act=None, name='fpn_down_h2')
+
+        self.g0_conv = ConvBNLayer(out_channels[0], out_channels[1], 3, 2, act=None, name='fpn_down_g0')
+
+        self.g1_conv = nn.Sequential(
+            ConvBNLayer(out_channels[1], out_channels[1], 3, 1, act='relu', name='fpn_down_g1_1'),
+            ConvBNLayer(out_channels[1], out_channels[2], 3, 2, act=None, name='fpn_down_g1_2')            
+        )
+
+        self.g2_conv = nn.Sequential(
+            ConvBNLayer(out_channels[2], out_channels[2], 3, 1, act='relu', name='fpn_down_fusion_1'),
+            ConvBNLayer(out_channels[2], out_channels[2], 1, 1, act=None, name='fpn_down_fusion_2')            
+        )
+
+    def forward(self, x):
+        f = x[:3]
+        h0 = self.h0_conv(f[0])
+        h1 = self.h1_conv(f[1])
+        h2 = self.h2_conv(f[2])
+        g0 = self.g0_conv(h0)
+        g1 = paddle.add(x=g0, y=h1)
+        g1 = F.relu(g1)
+        g1 = self.g1_conv(g1)
+        g2 = paddle.add(x=g1, y=h2)
+        g2 = F.relu(g2)
+        g2 = self.g2_conv(g2)
+        return g2
+
+
+class Cross_Attention(nn.Layer):
+    def __init__(self, in_channels):
+        super(Cross_Attention, self).__init__()
+        self.theta_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act='relu', name='f_theta')
+        self.phi_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act='relu', name='f_phi')
+        self.g_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act='relu', name='f_g')
+
+        self.fh_weight_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act=None, name='fh_weight')
+        self.fh_sc_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act=None, name='fh_sc')
+
+        self.fv_weight_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act=None, name='fv_weight')
+        self.fv_sc_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act=None, name='fv_sc')
+
+        self.f_attn_conv = ConvBNLayer(in_channels * 2, in_channels, 1, 1, act='relu', name='f_attn')
+
+    def _cal_fweight(self, f, shape):
+        f_theta, f_phi, f_g = f
+        #flatten
+        f_theta = paddle.transpose(f_theta, [0, 2, 3, 1])
+        f_theta = paddle.reshape(f_theta, [shape[0] * shape[1], shape[2], 128])
+        f_phi = paddle.transpose(f_phi, [0, 2, 3, 1])
+        f_phi = paddle.reshape(f_phi, [shape[0] * shape[1], shape[2], 128])
+        f_g = paddle.transpose(f_g, [0, 2, 3, 1])
+        f_g = paddle.reshape(f_g, [shape[0] * shape[1], shape[2], 128])
+        #correlation
+        f_attn = paddle.matmul(f_theta, paddle.transpose(f_phi, [0, 2, 1]))
+        #scale
+        f_attn = f_attn / (128**0.5)
+        f_attn = F.softmax(f_attn)
+        #weighted sum
+        f_weight = paddle.matmul(f_attn, f_g)
+        f_weight = paddle.reshape(
+            f_weight, [shape[0], shape[1], shape[2], 128])
+        return f_weight
+
+    def forward(self, f_common):
+        f_shape = paddle.shape(f_common)
+        # print('f_shape: ', f_shape)
+
+        f_theta = self.theta_conv(f_common)
+        f_phi = self.phi_conv(f_common)
+        f_g = self.g_conv(f_common)
+
+        ######## horizon ########
+        fh_weight = self._cal_fweight([f_theta, f_phi, f_g], 
+                                        [f_shape[0], f_shape[2], f_shape[3]])
+        fh_weight = paddle.transpose(fh_weight, [0, 3, 1, 2])
+        fh_weight = self.fh_weight_conv(fh_weight)
+        #short cut
+        fh_sc = self.fh_sc_conv(f_common)
+        f_h = F.relu(fh_weight + fh_sc)
+
+        ######## vertical ########
+        fv_theta = paddle.transpose(f_theta, [0, 1, 3, 2])
+        fv_phi = paddle.transpose(f_phi, [0, 1, 3, 2])
+        fv_g = paddle.transpose(f_g, [0, 1, 3, 2])
+        fv_weight = self._cal_fweight([fv_theta, fv_phi, fv_g], 
+                                        [f_shape[0], f_shape[3], f_shape[2]])
+        fv_weight = paddle.transpose(fv_weight, [0, 3, 2, 1])
+        fv_weight = self.fv_weight_conv(fv_weight)
+        #short cut
+        fv_sc = self.fv_sc_conv(f_common)
+        f_v = F.relu(fv_weight + fv_sc)
+
+        ######## merge ########
+        f_attn = paddle.concat([f_h, f_v], axis=1)
+        f_attn = self.f_attn_conv(f_attn)
+        return f_attn
+
+
+class SASTFPN(nn.Layer):
+    def __init__(self, in_channels, with_cab=False, **kwargs):
+        super(SASTFPN, self).__init__()
+        self.in_channels = in_channels
+        self.with_cab = with_cab
+        self.FPN_Down_Fusion = FPN_Down_Fusion(self.in_channels)
+        self.FPN_Up_Fusion = FPN_Up_Fusion(self.in_channels)
+        self.out_channels = 128
+        self.cross_attention = Cross_Attention(self.out_channels)
+
+    def forward(self, x):
+        #down fpn
+        f_down = self.FPN_Down_Fusion(x)
+
+        #up fpn
+        f_up = self.FPN_Up_Fusion(x)
+
+        #fusion
+        f_common = paddle.add(x=f_down, y=f_up)
+        f_common = F.relu(f_common)
+
+        if self.with_cab:
+            # print('enhence f_common with CAB.')
+            f_common = self.cross_attention(f_common)
+
+        return f_common
--- a/backend/ppocr/modeling/necks/table_fpn.py
+++ b/backend/ppocr/modeling/necks/table_fpn.py
@@ -0,0 +1,110 @@
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+
+
+class TableFPN(nn.Layer):
+    def __init__(self, in_channels, out_channels, **kwargs):
+        super(TableFPN, self).__init__()
+        self.out_channels = 512
+        weight_attr = paddle.nn.initializer.KaimingUniform()
+        self.in2_conv = nn.Conv2D(
+            in_channels=in_channels[0],
+            out_channels=self.out_channels,
+            kernel_size=1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.in3_conv = nn.Conv2D(
+            in_channels=in_channels[1],
+            out_channels=self.out_channels,
+            kernel_size=1,
+            stride = 1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.in4_conv = nn.Conv2D(
+            in_channels=in_channels[2],
+            out_channels=self.out_channels,
+            kernel_size=1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.in5_conv = nn.Conv2D(
+            in_channels=in_channels[3],
+            out_channels=self.out_channels,
+            kernel_size=1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.p5_conv = nn.Conv2D(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels // 4,
+            kernel_size=3,
+            padding=1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.p4_conv = nn.Conv2D(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels // 4,
+            kernel_size=3,
+            padding=1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.p3_conv = nn.Conv2D(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels // 4,
+            kernel_size=3,
+            padding=1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.p2_conv = nn.Conv2D(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels // 4,
+            kernel_size=3,
+            padding=1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.fuse_conv = nn.Conv2D(
+            in_channels=self.out_channels * 4,
+            out_channels=512,
+            kernel_size=3,
+            padding=1,
+            weight_attr=ParamAttr(initializer=weight_attr), bias_attr=False)
+
+    def forward(self, x):
+        c2, c3, c4, c5 = x
+
+        in5 = self.in5_conv(c5)
+        in4 = self.in4_conv(c4)
+        in3 = self.in3_conv(c3)
+        in2 = self.in2_conv(c2)
+
+        out4 = in4 + F.upsample(
+            in5, size=in4.shape[2:4], mode="nearest", align_mode=1)  # 1/16
+        out3 = in3 + F.upsample(
+            out4, size=in3.shape[2:4], mode="nearest", align_mode=1)  # 1/8
+        out2 = in2 + F.upsample(
+            out3, size=in2.shape[2:4], mode="nearest", align_mode=1)  # 1/4
+
+        p4 = F.upsample(out4, size=in5.shape[2:4], mode="nearest", align_mode=1)
+        p3 = F.upsample(out3, size=in5.shape[2:4], mode="nearest", align_mode=1)
+        p2 = F.upsample(out2, size=in5.shape[2:4], mode="nearest", align_mode=1)
+        fuse = paddle.concat([in5, p4, p3, p2], axis=1)
+        fuse_conv = self.fuse_conv(fuse) * 0.005
+        return [c5 + fuse_conv]