mirror of
https://github.com/YaoFANGUK/video-subtitle-remover.git
synced 2026-02-16 05:01:06 +08:00
init
This commit is contained in:
62
backend/ppocr/optimizer/__init__.py
Normal file
62
backend/ppocr/optimizer/__init__.py
Normal file
@@ -0,0 +1,62 @@
|
||||
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
import copy
|
||||
import paddle
|
||||
|
||||
__all__ = ['build_optimizer']
|
||||
|
||||
|
||||
def build_lr_scheduler(lr_config, epochs, step_each_epoch):
|
||||
from . import learning_rate
|
||||
lr_config.update({'epochs': epochs, 'step_each_epoch': step_each_epoch})
|
||||
lr_name = lr_config.pop('name', 'Const')
|
||||
lr = getattr(learning_rate, lr_name)(**lr_config)()
|
||||
return lr
|
||||
|
||||
|
||||
def build_optimizer(config, epochs, step_each_epoch, model):
|
||||
from . import regularizer, optimizer
|
||||
config = copy.deepcopy(config)
|
||||
# step1 build lr
|
||||
lr = build_lr_scheduler(config.pop('lr'), epochs, step_each_epoch)
|
||||
|
||||
# step2 build regularization
|
||||
if 'regularizer' in config and config['regularizer'] is not None:
|
||||
reg_config = config.pop('regularizer')
|
||||
reg_name = reg_config.pop('name')
|
||||
if not hasattr(regularizer, reg_name):
|
||||
reg_name += 'Decay'
|
||||
reg = getattr(regularizer, reg_name)(**reg_config)()
|
||||
elif 'weight_decay' in config:
|
||||
reg = config.pop('weight_decay')
|
||||
else:
|
||||
reg = None
|
||||
|
||||
# step3 build optimizer
|
||||
optim_name = config.pop('name')
|
||||
if 'clip_norm' in config:
|
||||
clip_norm = config.pop('clip_norm')
|
||||
grad_clip = paddle.nn.ClipGradByNorm(clip_norm=clip_norm)
|
||||
else:
|
||||
grad_clip = None
|
||||
optim = getattr(optimizer, optim_name)(learning_rate=lr,
|
||||
weight_decay=reg,
|
||||
grad_clip=grad_clip,
|
||||
**config)
|
||||
return optim(model), lr
|
||||
310
backend/ppocr/optimizer/learning_rate.py
Normal file
310
backend/ppocr/optimizer/learning_rate.py
Normal file
@@ -0,0 +1,310 @@
|
||||
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from paddle.optimizer import lr
|
||||
from .lr_scheduler import CyclicalCosineDecay, OneCycleDecay
|
||||
|
||||
|
||||
class Linear(object):
|
||||
"""
|
||||
Linear learning rate decay
|
||||
Args:
|
||||
lr (float): The initial learning rate. It is a python float number.
|
||||
epochs(int): The decay step size. It determines the decay cycle.
|
||||
end_lr(float, optional): The minimum final learning rate. Default: 0.0001.
|
||||
power(float, optional): Power of polynomial. Default: 1.0.
|
||||
last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
learning_rate,
|
||||
epochs,
|
||||
step_each_epoch,
|
||||
end_lr=0.0,
|
||||
power=1.0,
|
||||
warmup_epoch=0,
|
||||
last_epoch=-1,
|
||||
**kwargs):
|
||||
super(Linear, self).__init__()
|
||||
self.learning_rate = learning_rate
|
||||
self.epochs = epochs * step_each_epoch
|
||||
self.end_lr = end_lr
|
||||
self.power = power
|
||||
self.last_epoch = last_epoch
|
||||
self.warmup_epoch = round(warmup_epoch * step_each_epoch)
|
||||
|
||||
def __call__(self):
|
||||
learning_rate = lr.PolynomialDecay(
|
||||
learning_rate=self.learning_rate,
|
||||
decay_steps=self.epochs,
|
||||
end_lr=self.end_lr,
|
||||
power=self.power,
|
||||
last_epoch=self.last_epoch)
|
||||
if self.warmup_epoch > 0:
|
||||
learning_rate = lr.LinearWarmup(
|
||||
learning_rate=learning_rate,
|
||||
warmup_steps=self.warmup_epoch,
|
||||
start_lr=0.0,
|
||||
end_lr=self.learning_rate,
|
||||
last_epoch=self.last_epoch)
|
||||
return learning_rate
|
||||
|
||||
|
||||
class Cosine(object):
|
||||
"""
|
||||
Cosine learning rate decay
|
||||
lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1)
|
||||
Args:
|
||||
lr(float): initial learning rate
|
||||
step_each_epoch(int): steps each epoch
|
||||
epochs(int): total training epochs
|
||||
last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
learning_rate,
|
||||
step_each_epoch,
|
||||
epochs,
|
||||
warmup_epoch=0,
|
||||
last_epoch=-1,
|
||||
**kwargs):
|
||||
super(Cosine, self).__init__()
|
||||
self.learning_rate = learning_rate
|
||||
self.T_max = step_each_epoch * epochs
|
||||
self.last_epoch = last_epoch
|
||||
self.warmup_epoch = round(warmup_epoch * step_each_epoch)
|
||||
|
||||
def __call__(self):
|
||||
learning_rate = lr.CosineAnnealingDecay(
|
||||
learning_rate=self.learning_rate,
|
||||
T_max=self.T_max,
|
||||
last_epoch=self.last_epoch)
|
||||
if self.warmup_epoch > 0:
|
||||
learning_rate = lr.LinearWarmup(
|
||||
learning_rate=learning_rate,
|
||||
warmup_steps=self.warmup_epoch,
|
||||
start_lr=0.0,
|
||||
end_lr=self.learning_rate,
|
||||
last_epoch=self.last_epoch)
|
||||
return learning_rate
|
||||
|
||||
|
||||
class Step(object):
|
||||
"""
|
||||
Piecewise learning rate decay
|
||||
Args:
|
||||
step_each_epoch(int): steps each epoch
|
||||
learning_rate (float): The initial learning rate. It is a python float number.
|
||||
step_size (int): the interval to update.
|
||||
gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` .
|
||||
It should be less than 1.0. Default: 0.1.
|
||||
last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
learning_rate,
|
||||
step_size,
|
||||
step_each_epoch,
|
||||
gamma,
|
||||
warmup_epoch=0,
|
||||
last_epoch=-1,
|
||||
**kwargs):
|
||||
super(Step, self).__init__()
|
||||
self.step_size = step_each_epoch * step_size
|
||||
self.learning_rate = learning_rate
|
||||
self.gamma = gamma
|
||||
self.last_epoch = last_epoch
|
||||
self.warmup_epoch = round(warmup_epoch * step_each_epoch)
|
||||
|
||||
def __call__(self):
|
||||
learning_rate = lr.StepDecay(
|
||||
learning_rate=self.learning_rate,
|
||||
step_size=self.step_size,
|
||||
gamma=self.gamma,
|
||||
last_epoch=self.last_epoch)
|
||||
if self.warmup_epoch > 0:
|
||||
learning_rate = lr.LinearWarmup(
|
||||
learning_rate=learning_rate,
|
||||
warmup_steps=self.warmup_epoch,
|
||||
start_lr=0.0,
|
||||
end_lr=self.learning_rate,
|
||||
last_epoch=self.last_epoch)
|
||||
return learning_rate
|
||||
|
||||
|
||||
class Piecewise(object):
|
||||
"""
|
||||
Piecewise learning rate decay
|
||||
Args:
|
||||
boundaries(list): A list of steps numbers. The type of element in the list is python int.
|
||||
values(list): A list of learning rate values that will be picked during different epoch boundaries.
|
||||
The type of element in the list is python float.
|
||||
last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
step_each_epoch,
|
||||
decay_epochs,
|
||||
values,
|
||||
warmup_epoch=0,
|
||||
last_epoch=-1,
|
||||
**kwargs):
|
||||
super(Piecewise, self).__init__()
|
||||
self.boundaries = [step_each_epoch * e for e in decay_epochs]
|
||||
self.values = values
|
||||
self.last_epoch = last_epoch
|
||||
self.warmup_epoch = round(warmup_epoch * step_each_epoch)
|
||||
|
||||
def __call__(self):
|
||||
learning_rate = lr.PiecewiseDecay(
|
||||
boundaries=self.boundaries,
|
||||
values=self.values,
|
||||
last_epoch=self.last_epoch)
|
||||
if self.warmup_epoch > 0:
|
||||
learning_rate = lr.LinearWarmup(
|
||||
learning_rate=learning_rate,
|
||||
warmup_steps=self.warmup_epoch,
|
||||
start_lr=0.0,
|
||||
end_lr=self.values[0],
|
||||
last_epoch=self.last_epoch)
|
||||
return learning_rate
|
||||
|
||||
|
||||
class CyclicalCosine(object):
|
||||
"""
|
||||
Cyclical cosine learning rate decay
|
||||
Args:
|
||||
learning_rate(float): initial learning rate
|
||||
step_each_epoch(int): steps each epoch
|
||||
epochs(int): total training epochs
|
||||
cycle(int): period of the cosine learning rate
|
||||
last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
learning_rate,
|
||||
step_each_epoch,
|
||||
epochs,
|
||||
cycle,
|
||||
warmup_epoch=0,
|
||||
last_epoch=-1,
|
||||
**kwargs):
|
||||
super(CyclicalCosine, self).__init__()
|
||||
self.learning_rate = learning_rate
|
||||
self.T_max = step_each_epoch * epochs
|
||||
self.last_epoch = last_epoch
|
||||
self.warmup_epoch = round(warmup_epoch * step_each_epoch)
|
||||
self.cycle = round(cycle * step_each_epoch)
|
||||
|
||||
def __call__(self):
|
||||
learning_rate = CyclicalCosineDecay(
|
||||
learning_rate=self.learning_rate,
|
||||
T_max=self.T_max,
|
||||
cycle=self.cycle,
|
||||
last_epoch=self.last_epoch)
|
||||
if self.warmup_epoch > 0:
|
||||
learning_rate = lr.LinearWarmup(
|
||||
learning_rate=learning_rate,
|
||||
warmup_steps=self.warmup_epoch,
|
||||
start_lr=0.0,
|
||||
end_lr=self.learning_rate,
|
||||
last_epoch=self.last_epoch)
|
||||
return learning_rate
|
||||
|
||||
|
||||
class OneCycle(object):
|
||||
"""
|
||||
One Cycle learning rate decay
|
||||
Args:
|
||||
max_lr(float): Upper learning rate boundaries
|
||||
epochs(int): total training epochs
|
||||
step_each_epoch(int): steps each epoch
|
||||
anneal_strategy(str): {‘cos’, ‘linear’} Specifies the annealing strategy: “cos” for cosine annealing, “linear” for linear annealing.
|
||||
Default: ‘cos’
|
||||
three_phase(bool): If True, use a third phase of the schedule to annihilate the learning rate according to ‘final_div_factor’
|
||||
instead of modifying the second phase (the first two phases will be symmetrical about the step indicated by ‘pct_start’).
|
||||
last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
max_lr,
|
||||
epochs,
|
||||
step_each_epoch,
|
||||
anneal_strategy='cos',
|
||||
three_phase=False,
|
||||
warmup_epoch=0,
|
||||
last_epoch=-1,
|
||||
**kwargs):
|
||||
super(OneCycle, self).__init__()
|
||||
self.max_lr = max_lr
|
||||
self.epochs = epochs
|
||||
self.steps_per_epoch = step_each_epoch
|
||||
self.anneal_strategy = anneal_strategy
|
||||
self.three_phase = three_phase
|
||||
self.last_epoch = last_epoch
|
||||
self.warmup_epoch = round(warmup_epoch * step_each_epoch)
|
||||
|
||||
def __call__(self):
|
||||
learning_rate = OneCycleDecay(
|
||||
max_lr=self.max_lr,
|
||||
epochs=self.epochs,
|
||||
steps_per_epoch=self.steps_per_epoch,
|
||||
anneal_strategy=self.anneal_strategy,
|
||||
three_phase=self.three_phase,
|
||||
last_epoch=self.last_epoch)
|
||||
if self.warmup_epoch > 0:
|
||||
learning_rate = lr.LinearWarmup(
|
||||
learning_rate=learning_rate,
|
||||
warmup_steps=self.warmup_epoch,
|
||||
start_lr=0.0,
|
||||
end_lr=self.max_lr,
|
||||
last_epoch=self.last_epoch)
|
||||
return learning_rate
|
||||
|
||||
|
||||
class Const(object):
|
||||
"""
|
||||
Const learning rate decay
|
||||
Args:
|
||||
learning_rate(float): initial learning rate
|
||||
step_each_epoch(int): steps each epoch
|
||||
last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
learning_rate,
|
||||
step_each_epoch,
|
||||
warmup_epoch=0,
|
||||
last_epoch=-1,
|
||||
**kwargs):
|
||||
super(Const, self).__init__()
|
||||
self.learning_rate = learning_rate
|
||||
self.last_epoch = last_epoch
|
||||
self.warmup_epoch = round(warmup_epoch * step_each_epoch)
|
||||
|
||||
def __call__(self):
|
||||
learning_rate = self.learning_rate
|
||||
if self.warmup_epoch > 0:
|
||||
learning_rate = lr.LinearWarmup(
|
||||
learning_rate=learning_rate,
|
||||
warmup_steps=self.warmup_epoch,
|
||||
start_lr=0.0,
|
||||
end_lr=self.learning_rate,
|
||||
last_epoch=self.last_epoch)
|
||||
return learning_rate
|
||||
162
backend/ppocr/optimizer/lr_scheduler.py
Normal file
162
backend/ppocr/optimizer/lr_scheduler.py
Normal file
@@ -0,0 +1,162 @@
|
||||
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import math
|
||||
from paddle.optimizer.lr import LRScheduler
|
||||
|
||||
|
||||
class CyclicalCosineDecay(LRScheduler):
|
||||
def __init__(self,
|
||||
learning_rate,
|
||||
T_max,
|
||||
cycle=1,
|
||||
last_epoch=-1,
|
||||
eta_min=0.0,
|
||||
verbose=False):
|
||||
"""
|
||||
Cyclical cosine learning rate decay
|
||||
A learning rate which can be referred in https://arxiv.org/pdf/2012.12645.pdf
|
||||
Args:
|
||||
learning rate(float): learning rate
|
||||
T_max(int): maximum epoch num
|
||||
cycle(int): period of the cosine decay
|
||||
last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
|
||||
eta_min(float): minimum learning rate during training
|
||||
verbose(bool): whether to print learning rate for each epoch
|
||||
"""
|
||||
super(CyclicalCosineDecay, self).__init__(learning_rate, last_epoch,
|
||||
verbose)
|
||||
self.cycle = cycle
|
||||
self.eta_min = eta_min
|
||||
|
||||
def get_lr(self):
|
||||
if self.last_epoch == 0:
|
||||
return self.base_lr
|
||||
reletive_epoch = self.last_epoch % self.cycle
|
||||
lr = self.eta_min + 0.5 * (self.base_lr - self.eta_min) * \
|
||||
(1 + math.cos(math.pi * reletive_epoch / self.cycle))
|
||||
return lr
|
||||
|
||||
|
||||
class OneCycleDecay(LRScheduler):
|
||||
"""
|
||||
One Cycle learning rate decay
|
||||
A learning rate which can be referred in https://arxiv.org/abs/1708.07120
|
||||
Code refered in https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
max_lr,
|
||||
epochs=None,
|
||||
steps_per_epoch=None,
|
||||
pct_start=0.3,
|
||||
anneal_strategy='cos',
|
||||
div_factor=25.,
|
||||
final_div_factor=1e4,
|
||||
three_phase=False,
|
||||
last_epoch=-1,
|
||||
verbose=False):
|
||||
|
||||
# Validate total_steps
|
||||
if epochs <= 0 or not isinstance(epochs, int):
|
||||
raise ValueError(
|
||||
"Expected positive integer epochs, but got {}".format(epochs))
|
||||
if steps_per_epoch <= 0 or not isinstance(steps_per_epoch, int):
|
||||
raise ValueError(
|
||||
"Expected positive integer steps_per_epoch, but got {}".format(
|
||||
steps_per_epoch))
|
||||
self.total_steps = epochs * steps_per_epoch
|
||||
|
||||
self.max_lr = max_lr
|
||||
self.initial_lr = self.max_lr / div_factor
|
||||
self.min_lr = self.initial_lr / final_div_factor
|
||||
|
||||
if three_phase:
|
||||
self._schedule_phases = [
|
||||
{
|
||||
'end_step': float(pct_start * self.total_steps) - 1,
|
||||
'start_lr': self.initial_lr,
|
||||
'end_lr': self.max_lr,
|
||||
},
|
||||
{
|
||||
'end_step': float(2 * pct_start * self.total_steps) - 2,
|
||||
'start_lr': self.max_lr,
|
||||
'end_lr': self.initial_lr,
|
||||
},
|
||||
{
|
||||
'end_step': self.total_steps - 1,
|
||||
'start_lr': self.initial_lr,
|
||||
'end_lr': self.min_lr,
|
||||
},
|
||||
]
|
||||
else:
|
||||
self._schedule_phases = [
|
||||
{
|
||||
'end_step': float(pct_start * self.total_steps) - 1,
|
||||
'start_lr': self.initial_lr,
|
||||
'end_lr': self.max_lr,
|
||||
},
|
||||
{
|
||||
'end_step': self.total_steps - 1,
|
||||
'start_lr': self.max_lr,
|
||||
'end_lr': self.min_lr,
|
||||
},
|
||||
]
|
||||
|
||||
# Validate pct_start
|
||||
if pct_start < 0 or pct_start > 1 or not isinstance(pct_start, float):
|
||||
raise ValueError(
|
||||
"Expected float between 0 and 1 pct_start, but got {}".format(
|
||||
pct_start))
|
||||
|
||||
# Validate anneal_strategy
|
||||
if anneal_strategy not in ['cos', 'linear']:
|
||||
raise ValueError(
|
||||
"anneal_strategy must by one of 'cos' or 'linear', instead got {}".
|
||||
format(anneal_strategy))
|
||||
elif anneal_strategy == 'cos':
|
||||
self.anneal_func = self._annealing_cos
|
||||
elif anneal_strategy == 'linear':
|
||||
self.anneal_func = self._annealing_linear
|
||||
|
||||
super(OneCycleDecay, self).__init__(max_lr, last_epoch, verbose)
|
||||
|
||||
def _annealing_cos(self, start, end, pct):
|
||||
"Cosine anneal from `start` to `end` as pct goes from 0.0 to 1.0."
|
||||
cos_out = math.cos(math.pi * pct) + 1
|
||||
return end + (start - end) / 2.0 * cos_out
|
||||
|
||||
def _annealing_linear(self, start, end, pct):
|
||||
"Linearly anneal from `start` to `end` as pct goes from 0.0 to 1.0."
|
||||
return (end - start) * pct + start
|
||||
|
||||
def get_lr(self):
|
||||
computed_lr = 0.0
|
||||
step_num = self.last_epoch
|
||||
|
||||
if step_num > self.total_steps:
|
||||
raise ValueError(
|
||||
"Tried to step {} times. The specified number of total steps is {}"
|
||||
.format(step_num + 1, self.total_steps))
|
||||
start_step = 0
|
||||
for i, phase in enumerate(self._schedule_phases):
|
||||
end_step = phase['end_step']
|
||||
if step_num <= end_step or i == len(self._schedule_phases) - 1:
|
||||
pct = (step_num - start_step) / (end_step - start_step)
|
||||
computed_lr = self.anneal_func(phase['start_lr'],
|
||||
phase['end_lr'], pct)
|
||||
break
|
||||
start_step = phase['end_step']
|
||||
|
||||
return computed_lr
|
||||
234
backend/ppocr/optimizer/optimizer.py
Normal file
234
backend/ppocr/optimizer/optimizer.py
Normal file
@@ -0,0 +1,234 @@
|
||||
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from paddle import optimizer as optim
|
||||
|
||||
|
||||
class Momentum(object):
|
||||
"""
|
||||
Simple Momentum optimizer with velocity state.
|
||||
Args:
|
||||
learning_rate (float|Variable) - The learning rate used to update parameters.
|
||||
Can be a float value or a Variable with one float value as data element.
|
||||
momentum (float) - Momentum factor.
|
||||
regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
learning_rate,
|
||||
momentum,
|
||||
weight_decay=None,
|
||||
grad_clip=None,
|
||||
**args):
|
||||
super(Momentum, self).__init__()
|
||||
self.learning_rate = learning_rate
|
||||
self.momentum = momentum
|
||||
self.weight_decay = weight_decay
|
||||
self.grad_clip = grad_clip
|
||||
|
||||
def __call__(self, model):
|
||||
train_params = [
|
||||
param for param in model.parameters() if param.trainable is True
|
||||
]
|
||||
opt = optim.Momentum(
|
||||
learning_rate=self.learning_rate,
|
||||
momentum=self.momentum,
|
||||
weight_decay=self.weight_decay,
|
||||
grad_clip=self.grad_clip,
|
||||
parameters=train_params)
|
||||
return opt
|
||||
|
||||
|
||||
class Adam(object):
|
||||
def __init__(self,
|
||||
learning_rate=0.001,
|
||||
beta1=0.9,
|
||||
beta2=0.999,
|
||||
epsilon=1e-08,
|
||||
parameter_list=None,
|
||||
weight_decay=None,
|
||||
grad_clip=None,
|
||||
name=None,
|
||||
lazy_mode=False,
|
||||
**kwargs):
|
||||
self.learning_rate = learning_rate
|
||||
self.beta1 = beta1
|
||||
self.beta2 = beta2
|
||||
self.epsilon = epsilon
|
||||
self.parameter_list = parameter_list
|
||||
self.learning_rate = learning_rate
|
||||
self.weight_decay = weight_decay
|
||||
self.grad_clip = grad_clip
|
||||
self.name = name
|
||||
self.lazy_mode = lazy_mode
|
||||
|
||||
def __call__(self, model):
|
||||
train_params = [
|
||||
param for param in model.parameters() if param.trainable is True
|
||||
]
|
||||
opt = optim.Adam(
|
||||
learning_rate=self.learning_rate,
|
||||
beta1=self.beta1,
|
||||
beta2=self.beta2,
|
||||
epsilon=self.epsilon,
|
||||
weight_decay=self.weight_decay,
|
||||
grad_clip=self.grad_clip,
|
||||
name=self.name,
|
||||
lazy_mode=self.lazy_mode,
|
||||
parameters=train_params)
|
||||
return opt
|
||||
|
||||
|
||||
class RMSProp(object):
|
||||
"""
|
||||
Root Mean Squared Propagation (RMSProp) is an unpublished, adaptive learning rate method.
|
||||
Args:
|
||||
learning_rate (float|Variable) - The learning rate used to update parameters.
|
||||
Can be a float value or a Variable with one float value as data element.
|
||||
momentum (float) - Momentum factor.
|
||||
rho (float) - rho value in equation.
|
||||
epsilon (float) - avoid division by zero, default is 1e-6.
|
||||
regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
learning_rate,
|
||||
momentum=0.0,
|
||||
rho=0.95,
|
||||
epsilon=1e-6,
|
||||
weight_decay=None,
|
||||
grad_clip=None,
|
||||
**args):
|
||||
super(RMSProp, self).__init__()
|
||||
self.learning_rate = learning_rate
|
||||
self.momentum = momentum
|
||||
self.rho = rho
|
||||
self.epsilon = epsilon
|
||||
self.weight_decay = weight_decay
|
||||
self.grad_clip = grad_clip
|
||||
|
||||
def __call__(self, model):
|
||||
train_params = [
|
||||
param for param in model.parameters() if param.trainable is True
|
||||
]
|
||||
opt = optim.RMSProp(
|
||||
learning_rate=self.learning_rate,
|
||||
momentum=self.momentum,
|
||||
rho=self.rho,
|
||||
epsilon=self.epsilon,
|
||||
weight_decay=self.weight_decay,
|
||||
grad_clip=self.grad_clip,
|
||||
parameters=train_params)
|
||||
return opt
|
||||
|
||||
|
||||
class Adadelta(object):
|
||||
def __init__(self,
|
||||
learning_rate=0.001,
|
||||
epsilon=1e-08,
|
||||
rho=0.95,
|
||||
parameter_list=None,
|
||||
weight_decay=None,
|
||||
grad_clip=None,
|
||||
name=None,
|
||||
**kwargs):
|
||||
self.learning_rate = learning_rate
|
||||
self.epsilon = epsilon
|
||||
self.rho = rho
|
||||
self.parameter_list = parameter_list
|
||||
self.learning_rate = learning_rate
|
||||
self.weight_decay = weight_decay
|
||||
self.grad_clip = grad_clip
|
||||
self.name = name
|
||||
|
||||
def __call__(self, model):
|
||||
train_params = [
|
||||
param for param in model.parameters() if param.trainable is True
|
||||
]
|
||||
opt = optim.Adadelta(
|
||||
learning_rate=self.learning_rate,
|
||||
epsilon=self.epsilon,
|
||||
rho=self.rho,
|
||||
weight_decay=self.weight_decay,
|
||||
grad_clip=self.grad_clip,
|
||||
name=self.name,
|
||||
parameters=train_params)
|
||||
return opt
|
||||
|
||||
|
||||
class AdamW(object):
|
||||
def __init__(self,
|
||||
learning_rate=0.001,
|
||||
beta1=0.9,
|
||||
beta2=0.999,
|
||||
epsilon=1e-8,
|
||||
weight_decay=0.01,
|
||||
multi_precision=False,
|
||||
grad_clip=None,
|
||||
no_weight_decay_name=None,
|
||||
one_dim_param_no_weight_decay=False,
|
||||
name=None,
|
||||
lazy_mode=False,
|
||||
**args):
|
||||
super().__init__()
|
||||
self.learning_rate = learning_rate
|
||||
self.beta1 = beta1
|
||||
self.beta2 = beta2
|
||||
self.epsilon = epsilon
|
||||
self.grad_clip = grad_clip
|
||||
self.weight_decay = 0.01 if weight_decay is None else weight_decay
|
||||
self.grad_clip = grad_clip
|
||||
self.name = name
|
||||
self.lazy_mode = lazy_mode
|
||||
self.multi_precision = multi_precision
|
||||
self.no_weight_decay_name_list = no_weight_decay_name.split(
|
||||
) if no_weight_decay_name else []
|
||||
self.one_dim_param_no_weight_decay = one_dim_param_no_weight_decay
|
||||
|
||||
def __call__(self, model):
|
||||
parameters = [
|
||||
param for param in model.parameters() if param.trainable is True
|
||||
]
|
||||
|
||||
self.no_weight_decay_param_name_list = [
|
||||
p.name for n, p in model.named_parameters()
|
||||
if any(nd in n for nd in self.no_weight_decay_name_list)
|
||||
]
|
||||
|
||||
if self.one_dim_param_no_weight_decay:
|
||||
self.no_weight_decay_param_name_list += [
|
||||
p.name for n, p in model.named_parameters() if len(p.shape) == 1
|
||||
]
|
||||
|
||||
opt = optim.AdamW(
|
||||
learning_rate=self.learning_rate,
|
||||
beta1=self.beta1,
|
||||
beta2=self.beta2,
|
||||
epsilon=self.epsilon,
|
||||
parameters=parameters,
|
||||
weight_decay=self.weight_decay,
|
||||
multi_precision=self.multi_precision,
|
||||
grad_clip=self.grad_clip,
|
||||
name=self.name,
|
||||
lazy_mode=self.lazy_mode,
|
||||
apply_decay_param_fun=self._apply_decay_param_fun)
|
||||
return opt
|
||||
|
||||
def _apply_decay_param_fun(self, name):
|
||||
return name not in self.no_weight_decay_param_name_list
|
||||
51
backend/ppocr/optimizer/regularizer.py
Normal file
51
backend/ppocr/optimizer/regularizer.py
Normal file
@@ -0,0 +1,51 @@
|
||||
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import paddle
|
||||
|
||||
|
||||
class L1Decay(object):
|
||||
"""
|
||||
L1 Weight Decay Regularization, which encourages the weights to be sparse.
|
||||
Args:
|
||||
factor(float): regularization coeff. Default:0.0.
|
||||
"""
|
||||
|
||||
def __init__(self, factor=0.0):
|
||||
super(L1Decay, self).__init__()
|
||||
self.coeff = factor
|
||||
|
||||
def __call__(self):
|
||||
reg = paddle.regularizer.L1Decay(self.coeff)
|
||||
return reg
|
||||
|
||||
|
||||
class L2Decay(object):
|
||||
"""
|
||||
L2 Weight Decay Regularization, which helps to prevent the model over-fitting.
|
||||
Args:
|
||||
factor(float): regularization coeff. Default:0.0.
|
||||
"""
|
||||
|
||||
def __init__(self, factor=0.0):
|
||||
super(L2Decay, self).__init__()
|
||||
self.coeff = float(factor)
|
||||
|
||||
def __call__(self):
|
||||
return self.coeff
|
||||
Reference in New Issue
Block a user