init

2026-02-16 05:01:06 +08:00 · 2023-10-25 16:38:16 +08:00
commit 2b9360c299
602 changed files with 152490 additions and 0 deletions
--- a/backend/ppocr/optimizer/init.py
+++ b/backend/ppocr/optimizer/init.py
@@ -0,0 +1,62 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+import copy
+import paddle
+
+__all__ = ['build_optimizer']
+
+
+def build_lr_scheduler(lr_config, epochs, step_each_epoch):
+    from . import learning_rate
+    lr_config.update({'epochs': epochs, 'step_each_epoch': step_each_epoch})
+    lr_name = lr_config.pop('name', 'Const')
+    lr = getattr(learning_rate, lr_name)(**lr_config)()
+    return lr
+
+
+def build_optimizer(config, epochs, step_each_epoch, model):
+    from . import regularizer, optimizer
+    config = copy.deepcopy(config)
+    # step1 build lr
+    lr = build_lr_scheduler(config.pop('lr'), epochs, step_each_epoch)
+
+    # step2 build regularization
+    if 'regularizer' in config and config['regularizer'] is not None:
+        reg_config = config.pop('regularizer')
+        reg_name = reg_config.pop('name')
+        if not hasattr(regularizer, reg_name):
+            reg_name += 'Decay'
+        reg = getattr(regularizer, reg_name)(**reg_config)()
+    elif 'weight_decay' in config:
+        reg = config.pop('weight_decay')
+    else:
+        reg = None
+
+    # step3 build optimizer
+    optim_name = config.pop('name')
+    if 'clip_norm' in config:
+        clip_norm = config.pop('clip_norm')
+        grad_clip = paddle.nn.ClipGradByNorm(clip_norm=clip_norm)
+    else:
+        grad_clip = None
+    optim = getattr(optimizer, optim_name)(learning_rate=lr,
+                                           weight_decay=reg,
+                                           grad_clip=grad_clip,
+                                           **config)
+    return optim(model), lr
--- a/backend/ppocr/optimizer/learning_rate.py
+++ b/backend/ppocr/optimizer/learning_rate.py
@@ -0,0 +1,310 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from paddle.optimizer import lr
+from .lr_scheduler import CyclicalCosineDecay, OneCycleDecay
+
+
+class Linear(object):
+    """
+    Linear learning rate decay
+    Args:
+        lr (float): The initial learning rate. It is a python float number.
+        epochs(int): The decay step size. It determines the decay cycle.
+        end_lr(float, optional): The minimum final learning rate. Default: 0.0001.
+        power(float, optional): Power of polynomial. Default: 1.0.
+        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+    """
+
+    def __init__(self,
+                 learning_rate,
+                 epochs,
+                 step_each_epoch,
+                 end_lr=0.0,
+                 power=1.0,
+                 warmup_epoch=0,
+                 last_epoch=-1,
+                 **kwargs):
+        super(Linear, self).__init__()
+        self.learning_rate = learning_rate
+        self.epochs = epochs * step_each_epoch
+        self.end_lr = end_lr
+        self.power = power
+        self.last_epoch = last_epoch
+        self.warmup_epoch = round(warmup_epoch * step_each_epoch)
+
+    def __call__(self):
+        learning_rate = lr.PolynomialDecay(
+            learning_rate=self.learning_rate,
+            decay_steps=self.epochs,
+            end_lr=self.end_lr,
+            power=self.power,
+            last_epoch=self.last_epoch)
+        if self.warmup_epoch > 0:
+            learning_rate = lr.LinearWarmup(
+                learning_rate=learning_rate,
+                warmup_steps=self.warmup_epoch,
+                start_lr=0.0,
+                end_lr=self.learning_rate,
+                last_epoch=self.last_epoch)
+        return learning_rate
+
+
+class Cosine(object):
+    """
+    Cosine learning rate decay
+    lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1)
+    Args:
+        lr(float): initial learning rate
+        step_each_epoch(int): steps each epoch
+        epochs(int): total training epochs
+        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+    """
+
+    def __init__(self,
+                 learning_rate,
+                 step_each_epoch,
+                 epochs,
+                 warmup_epoch=0,
+                 last_epoch=-1,
+                 **kwargs):
+        super(Cosine, self).__init__()
+        self.learning_rate = learning_rate
+        self.T_max = step_each_epoch * epochs
+        self.last_epoch = last_epoch
+        self.warmup_epoch = round(warmup_epoch * step_each_epoch)
+
+    def __call__(self):
+        learning_rate = lr.CosineAnnealingDecay(
+            learning_rate=self.learning_rate,
+            T_max=self.T_max,
+            last_epoch=self.last_epoch)
+        if self.warmup_epoch > 0:
+            learning_rate = lr.LinearWarmup(
+                learning_rate=learning_rate,
+                warmup_steps=self.warmup_epoch,
+                start_lr=0.0,
+                end_lr=self.learning_rate,
+                last_epoch=self.last_epoch)
+        return learning_rate
+
+
+class Step(object):
+    """
+    Piecewise learning rate decay
+    Args:
+        step_each_epoch(int): steps each epoch
+        learning_rate (float): The initial learning rate. It is a python float number.
+        step_size (int): the interval to update.
+        gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` .
+            It should be less than 1.0. Default: 0.1.
+        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+    """
+
+    def __init__(self,
+                 learning_rate,
+                 step_size,
+                 step_each_epoch,
+                 gamma,
+                 warmup_epoch=0,
+                 last_epoch=-1,
+                 **kwargs):
+        super(Step, self).__init__()
+        self.step_size = step_each_epoch * step_size
+        self.learning_rate = learning_rate
+        self.gamma = gamma
+        self.last_epoch = last_epoch
+        self.warmup_epoch = round(warmup_epoch * step_each_epoch)
+
+    def __call__(self):
+        learning_rate = lr.StepDecay(
+            learning_rate=self.learning_rate,
+            step_size=self.step_size,
+            gamma=self.gamma,
+            last_epoch=self.last_epoch)
+        if self.warmup_epoch > 0:
+            learning_rate = lr.LinearWarmup(
+                learning_rate=learning_rate,
+                warmup_steps=self.warmup_epoch,
+                start_lr=0.0,
+                end_lr=self.learning_rate,
+                last_epoch=self.last_epoch)
+        return learning_rate
+
+
+class Piecewise(object):
+    """
+    Piecewise learning rate decay
+    Args:
+        boundaries(list): A list of steps numbers. The type of element in the list is python int.
+        values(list): A list of learning rate values that will be picked during different epoch boundaries.
+            The type of element in the list is python float.
+        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+    """
+
+    def __init__(self,
+                 step_each_epoch,
+                 decay_epochs,
+                 values,
+                 warmup_epoch=0,
+                 last_epoch=-1,
+                 **kwargs):
+        super(Piecewise, self).__init__()
+        self.boundaries = [step_each_epoch * e for e in decay_epochs]
+        self.values = values
+        self.last_epoch = last_epoch
+        self.warmup_epoch = round(warmup_epoch * step_each_epoch)
+
+    def __call__(self):
+        learning_rate = lr.PiecewiseDecay(
+            boundaries=self.boundaries,
+            values=self.values,
+            last_epoch=self.last_epoch)
+        if self.warmup_epoch > 0:
+            learning_rate = lr.LinearWarmup(
+                learning_rate=learning_rate,
+                warmup_steps=self.warmup_epoch,
+                start_lr=0.0,
+                end_lr=self.values[0],
+                last_epoch=self.last_epoch)
+        return learning_rate
+
+
+class CyclicalCosine(object):
+    """
+    Cyclical cosine learning rate decay
+    Args:
+        learning_rate(float): initial learning rate
+        step_each_epoch(int): steps each epoch
+        epochs(int): total training epochs
+        cycle(int): period of the cosine learning rate
+        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+    """
+
+    def __init__(self,
+                 learning_rate,
+                 step_each_epoch,
+                 epochs,
+                 cycle,
+                 warmup_epoch=0,
+                 last_epoch=-1,
+                 **kwargs):
+        super(CyclicalCosine, self).__init__()
+        self.learning_rate = learning_rate
+        self.T_max = step_each_epoch * epochs
+        self.last_epoch = last_epoch
+        self.warmup_epoch = round(warmup_epoch * step_each_epoch)
+        self.cycle = round(cycle * step_each_epoch)
+
+    def __call__(self):
+        learning_rate = CyclicalCosineDecay(
+            learning_rate=self.learning_rate,
+            T_max=self.T_max,
+            cycle=self.cycle,
+            last_epoch=self.last_epoch)
+        if self.warmup_epoch > 0:
+            learning_rate = lr.LinearWarmup(
+                learning_rate=learning_rate,
+                warmup_steps=self.warmup_epoch,
+                start_lr=0.0,
+                end_lr=self.learning_rate,
+                last_epoch=self.last_epoch)
+        return learning_rate
+
+
+class OneCycle(object):
+    """
+    One Cycle learning rate decay
+    Args:
+        max_lr(float): Upper learning rate boundaries
+        epochs(int): total training epochs
+        step_each_epoch(int): steps each epoch
+        anneal_strategy(str): {‘cos’, ‘linear’} Specifies the annealing strategy: “cos” for cosine annealing, “linear” for linear annealing. 
+            Default: ‘cos’
+        three_phase(bool): If True, use a third phase of the schedule to annihilate the learning rate according to ‘final_div_factor’ 
+            instead of modifying the second phase (the first two phases will be symmetrical about the step indicated by ‘pct_start’).
+        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+    """
+
+    def __init__(self,
+                 max_lr,
+                 epochs,
+                 step_each_epoch,
+                 anneal_strategy='cos',
+                 three_phase=False,
+                 warmup_epoch=0,
+                 last_epoch=-1,
+                 **kwargs):
+        super(OneCycle, self).__init__()
+        self.max_lr = max_lr
+        self.epochs = epochs
+        self.steps_per_epoch = step_each_epoch
+        self.anneal_strategy = anneal_strategy
+        self.three_phase = three_phase
+        self.last_epoch = last_epoch
+        self.warmup_epoch = round(warmup_epoch * step_each_epoch)
+
+    def __call__(self):
+        learning_rate = OneCycleDecay(
+            max_lr=self.max_lr,
+            epochs=self.epochs,
+            steps_per_epoch=self.steps_per_epoch,
+            anneal_strategy=self.anneal_strategy,
+            three_phase=self.three_phase,
+            last_epoch=self.last_epoch)
+        if self.warmup_epoch > 0:
+            learning_rate = lr.LinearWarmup(
+                learning_rate=learning_rate,
+                warmup_steps=self.warmup_epoch,
+                start_lr=0.0,
+                end_lr=self.max_lr,
+                last_epoch=self.last_epoch)
+        return learning_rate
+
+
+class Const(object):
+    """
+    Const learning rate decay
+    Args:
+        learning_rate(float): initial learning rate
+        step_each_epoch(int): steps each epoch
+        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+    """
+
+    def __init__(self,
+                 learning_rate,
+                 step_each_epoch,
+                 warmup_epoch=0,
+                 last_epoch=-1,
+                 **kwargs):
+        super(Const, self).__init__()
+        self.learning_rate = learning_rate
+        self.last_epoch = last_epoch
+        self.warmup_epoch = round(warmup_epoch * step_each_epoch)
+
+    def __call__(self):
+        learning_rate = self.learning_rate
+        if self.warmup_epoch > 0:
+            learning_rate = lr.LinearWarmup(
+                learning_rate=learning_rate,
+                warmup_steps=self.warmup_epoch,
+                start_lr=0.0,
+                end_lr=self.learning_rate,
+                last_epoch=self.last_epoch)
+        return learning_rate
--- a/backend/ppocr/optimizer/lr_scheduler.py
+++ b/backend/ppocr/optimizer/lr_scheduler.py
@@ -0,0 +1,162 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+from paddle.optimizer.lr import LRScheduler
+
+
+class CyclicalCosineDecay(LRScheduler):
+    def __init__(self,
+                 learning_rate,
+                 T_max,
+                 cycle=1,
+                 last_epoch=-1,
+                 eta_min=0.0,
+                 verbose=False):
+        """
+        Cyclical cosine learning rate decay
+        A learning rate which can be referred in https://arxiv.org/pdf/2012.12645.pdf
+        Args:
+            learning rate(float): learning rate
+            T_max(int): maximum epoch num
+            cycle(int): period of the cosine decay
+            last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+            eta_min(float): minimum learning rate during training
+            verbose(bool): whether to print learning rate for each epoch
+        """
+        super(CyclicalCosineDecay, self).__init__(learning_rate, last_epoch,
+                                                  verbose)
+        self.cycle = cycle
+        self.eta_min = eta_min
+
+    def get_lr(self):
+        if self.last_epoch == 0:
+            return self.base_lr
+        reletive_epoch = self.last_epoch % self.cycle
+        lr = self.eta_min + 0.5 * (self.base_lr - self.eta_min) * \
+                (1 + math.cos(math.pi * reletive_epoch / self.cycle))
+        return lr
+
+
+class OneCycleDecay(LRScheduler):
+    """
+    One Cycle learning rate decay
+    A learning rate which can be referred in https://arxiv.org/abs/1708.07120
+    Code refered in https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR
+    """
+
+    def __init__(self,
+                 max_lr,
+                 epochs=None,
+                 steps_per_epoch=None,
+                 pct_start=0.3,
+                 anneal_strategy='cos',
+                 div_factor=25.,
+                 final_div_factor=1e4,
+                 three_phase=False,
+                 last_epoch=-1,
+                 verbose=False):
+
+        # Validate total_steps
+        if epochs <= 0 or not isinstance(epochs, int):
+            raise ValueError(
+                "Expected positive integer epochs, but got {}".format(epochs))
+        if steps_per_epoch <= 0 or not isinstance(steps_per_epoch, int):
+            raise ValueError(
+                "Expected positive integer steps_per_epoch, but got {}".format(
+                    steps_per_epoch))
+        self.total_steps = epochs * steps_per_epoch
+
+        self.max_lr = max_lr
+        self.initial_lr = self.max_lr / div_factor
+        self.min_lr = self.initial_lr / final_div_factor
+
+        if three_phase:
+            self._schedule_phases = [
+                {
+                    'end_step': float(pct_start * self.total_steps) - 1,
+                    'start_lr': self.initial_lr,
+                    'end_lr': self.max_lr,
+                },
+                {
+                    'end_step': float(2 * pct_start * self.total_steps) - 2,
+                    'start_lr': self.max_lr,
+                    'end_lr': self.initial_lr,
+                },
+                {
+                    'end_step': self.total_steps - 1,
+                    'start_lr': self.initial_lr,
+                    'end_lr': self.min_lr,
+                },
+            ]
+        else:
+            self._schedule_phases = [
+                {
+                    'end_step': float(pct_start * self.total_steps) - 1,
+                    'start_lr': self.initial_lr,
+                    'end_lr': self.max_lr,
+                },
+                {
+                    'end_step': self.total_steps - 1,
+                    'start_lr': self.max_lr,
+                    'end_lr': self.min_lr,
+                },
+            ]
+
+        # Validate pct_start
+        if pct_start < 0 or pct_start > 1 or not isinstance(pct_start, float):
+            raise ValueError(
+                "Expected float between 0 and 1 pct_start, but got {}".format(
+                    pct_start))
+
+        # Validate anneal_strategy
+        if anneal_strategy not in ['cos', 'linear']:
+            raise ValueError(
+                "anneal_strategy must by one of 'cos' or 'linear', instead got {}".
+                format(anneal_strategy))
+        elif anneal_strategy == 'cos':
+            self.anneal_func = self._annealing_cos
+        elif anneal_strategy == 'linear':
+            self.anneal_func = self._annealing_linear
+
+        super(OneCycleDecay, self).__init__(max_lr, last_epoch, verbose)
+
+    def _annealing_cos(self, start, end, pct):
+        "Cosine anneal from `start` to `end` as pct goes from 0.0 to 1.0."
+        cos_out = math.cos(math.pi * pct) + 1
+        return end + (start - end) / 2.0 * cos_out
+
+    def _annealing_linear(self, start, end, pct):
+        "Linearly anneal from `start` to `end` as pct goes from 0.0 to 1.0."
+        return (end - start) * pct + start
+
+    def get_lr(self):
+        computed_lr = 0.0
+        step_num = self.last_epoch
+
+        if step_num > self.total_steps:
+            raise ValueError(
+                "Tried to step {} times. The specified number of total steps is {}"
+                .format(step_num + 1, self.total_steps))
+        start_step = 0
+        for i, phase in enumerate(self._schedule_phases):
+            end_step = phase['end_step']
+            if step_num <= end_step or i == len(self._schedule_phases) - 1:
+                pct = (step_num - start_step) / (end_step - start_step)
+                computed_lr = self.anneal_func(phase['start_lr'],
+                                               phase['end_lr'], pct)
+                break
+            start_step = phase['end_step']
+
+        return computed_lr
--- a/backend/ppocr/optimizer/optimizer.py
+++ b/backend/ppocr/optimizer/optimizer.py
@@ -0,0 +1,234 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from paddle import optimizer as optim
+
+
+class Momentum(object):
+    """
+    Simple Momentum optimizer with velocity state.
+    Args:
+        learning_rate (float|Variable) - The learning rate used to update parameters.
+            Can be a float value or a Variable with one float value as data element.
+        momentum (float) - Momentum factor.
+        regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
+    """
+
+    def __init__(self,
+                 learning_rate,
+                 momentum,
+                 weight_decay=None,
+                 grad_clip=None,
+                 **args):
+        super(Momentum, self).__init__()
+        self.learning_rate = learning_rate
+        self.momentum = momentum
+        self.weight_decay = weight_decay
+        self.grad_clip = grad_clip
+
+    def __call__(self, model):
+        train_params = [
+            param for param in model.parameters() if param.trainable is True
+        ]
+        opt = optim.Momentum(
+            learning_rate=self.learning_rate,
+            momentum=self.momentum,
+            weight_decay=self.weight_decay,
+            grad_clip=self.grad_clip,
+            parameters=train_params)
+        return opt
+
+
+class Adam(object):
+    def __init__(self,
+                 learning_rate=0.001,
+                 beta1=0.9,
+                 beta2=0.999,
+                 epsilon=1e-08,
+                 parameter_list=None,
+                 weight_decay=None,
+                 grad_clip=None,
+                 name=None,
+                 lazy_mode=False,
+                 **kwargs):
+        self.learning_rate = learning_rate
+        self.beta1 = beta1
+        self.beta2 = beta2
+        self.epsilon = epsilon
+        self.parameter_list = parameter_list
+        self.learning_rate = learning_rate
+        self.weight_decay = weight_decay
+        self.grad_clip = grad_clip
+        self.name = name
+        self.lazy_mode = lazy_mode
+
+    def __call__(self, model):
+        train_params = [
+            param for param in model.parameters() if param.trainable is True
+        ]
+        opt = optim.Adam(
+            learning_rate=self.learning_rate,
+            beta1=self.beta1,
+            beta2=self.beta2,
+            epsilon=self.epsilon,
+            weight_decay=self.weight_decay,
+            grad_clip=self.grad_clip,
+            name=self.name,
+            lazy_mode=self.lazy_mode,
+            parameters=train_params)
+        return opt
+
+
+class RMSProp(object):
+    """
+    Root Mean Squared Propagation (RMSProp) is an unpublished, adaptive learning rate method.
+    Args:
+        learning_rate (float|Variable) - The learning rate used to update parameters.
+            Can be a float value or a Variable with one float value as data element.
+        momentum (float) - Momentum factor.
+        rho (float) - rho value in equation.
+        epsilon (float) - avoid division by zero, default is 1e-6.
+        regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
+    """
+
+    def __init__(self,
+                 learning_rate,
+                 momentum=0.0,
+                 rho=0.95,
+                 epsilon=1e-6,
+                 weight_decay=None,
+                 grad_clip=None,
+                 **args):
+        super(RMSProp, self).__init__()
+        self.learning_rate = learning_rate
+        self.momentum = momentum
+        self.rho = rho
+        self.epsilon = epsilon
+        self.weight_decay = weight_decay
+        self.grad_clip = grad_clip
+
+    def __call__(self, model):
+        train_params = [
+            param for param in model.parameters() if param.trainable is True
+        ]
+        opt = optim.RMSProp(
+            learning_rate=self.learning_rate,
+            momentum=self.momentum,
+            rho=self.rho,
+            epsilon=self.epsilon,
+            weight_decay=self.weight_decay,
+            grad_clip=self.grad_clip,
+            parameters=train_params)
+        return opt
+
+
+class Adadelta(object):
+    def __init__(self,
+                 learning_rate=0.001,
+                 epsilon=1e-08,
+                 rho=0.95,
+                 parameter_list=None,
+                 weight_decay=None,
+                 grad_clip=None,
+                 name=None,
+                 **kwargs):
+        self.learning_rate = learning_rate
+        self.epsilon = epsilon
+        self.rho = rho
+        self.parameter_list = parameter_list
+        self.learning_rate = learning_rate
+        self.weight_decay = weight_decay
+        self.grad_clip = grad_clip
+        self.name = name
+
+    def __call__(self, model):
+        train_params = [
+            param for param in model.parameters() if param.trainable is True
+        ]
+        opt = optim.Adadelta(
+            learning_rate=self.learning_rate,
+            epsilon=self.epsilon,
+            rho=self.rho,
+            weight_decay=self.weight_decay,
+            grad_clip=self.grad_clip,
+            name=self.name,
+            parameters=train_params)
+        return opt
+
+
+class AdamW(object):
+    def __init__(self,
+                 learning_rate=0.001,
+                 beta1=0.9,
+                 beta2=0.999,
+                 epsilon=1e-8,
+                 weight_decay=0.01,
+                 multi_precision=False,
+                 grad_clip=None,
+                 no_weight_decay_name=None,
+                 one_dim_param_no_weight_decay=False,
+                 name=None,
+                 lazy_mode=False,
+                 **args):
+        super().__init__()
+        self.learning_rate = learning_rate
+        self.beta1 = beta1
+        self.beta2 = beta2
+        self.epsilon = epsilon
+        self.grad_clip = grad_clip
+        self.weight_decay = 0.01 if weight_decay is None else weight_decay
+        self.grad_clip = grad_clip
+        self.name = name
+        self.lazy_mode = lazy_mode
+        self.multi_precision = multi_precision
+        self.no_weight_decay_name_list = no_weight_decay_name.split(
+        ) if no_weight_decay_name else []
+        self.one_dim_param_no_weight_decay = one_dim_param_no_weight_decay
+
+    def __call__(self, model):
+        parameters = [
+            param for param in model.parameters() if param.trainable is True
+        ]
+
+        self.no_weight_decay_param_name_list = [
+            p.name for n, p in model.named_parameters()
+            if any(nd in n for nd in self.no_weight_decay_name_list)
+        ]
+
+        if self.one_dim_param_no_weight_decay:
+            self.no_weight_decay_param_name_list += [
+                p.name for n, p in model.named_parameters() if len(p.shape) == 1
+            ]
+
+        opt = optim.AdamW(
+            learning_rate=self.learning_rate,
+            beta1=self.beta1,
+            beta2=self.beta2,
+            epsilon=self.epsilon,
+            parameters=parameters,
+            weight_decay=self.weight_decay,
+            multi_precision=self.multi_precision,
+            grad_clip=self.grad_clip,
+            name=self.name,
+            lazy_mode=self.lazy_mode,
+            apply_decay_param_fun=self._apply_decay_param_fun)
+        return opt
+
+    def _apply_decay_param_fun(self, name):
+        return name not in self.no_weight_decay_param_name_list
--- a/backend/ppocr/optimizer/regularizer.py
+++ b/backend/ppocr/optimizer/regularizer.py
@@ -0,0 +1,51 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import paddle
+
+
+class L1Decay(object):
+    """
+    L1 Weight Decay Regularization, which encourages the weights to be sparse.
+    Args:
+        factor(float): regularization coeff. Default:0.0.
+    """
+
+    def __init__(self, factor=0.0):
+        super(L1Decay, self).__init__()
+        self.coeff = factor
+
+    def __call__(self):
+        reg = paddle.regularizer.L1Decay(self.coeff)
+        return reg
+
+
+class L2Decay(object):
+    """
+    L2 Weight Decay Regularization, which helps to prevent the model over-fitting.
+    Args:
+        factor(float): regularization coeff. Default:0.0.
+    """
+
+    def __init__(self, factor=0.0):
+        super(L2Decay, self).__init__()
+        self.coeff = float(factor)
+
+    def __call__(self):
+        return self.coeff