
    ёi\                      S SK Jr  S SKrS SKrS SKJrJrJrJrJ	r	  S SK
r
S SKJr  S SKJr  S SKrS SKJr  S SKJr  S SKJr  S SKJrJrJr  S S	KJr  \(       a  S S
KJr  / SQr " S S\	5      r " S S5      r " S S\5      r  " S S\5      r! " S S\5      r" " S S\5      r# " S S\5      r$ " S S\5      r% " S S\5      r& " S S\5      r' " S  S!\5      r( " S" S#\5      r) " S$ S%\5      r* " S& S'\5      r+ " S( S)\5      r, " S* S+\5      r- " S, S-\5      r. " S. S/\5      r/ " S0 S1\5      r0S<S2 jr1S=S3 jr2S>S4 jr3S?S5 jr4S?S6 jr5S?S7 jr6 S@S8 jr7S9 r8S: r9S; r:g)A    )annotationsN)TYPE_CHECKINGAnyCallableLiteral	TypedDict)NotRequired)Tensor)core)
check_type)Variabledefault_main_programin_dygraph_mode)LayerHelper)Sequence)LRScheduler	NoamDecayPiecewiseDecayNaturalExpDecayInverseTimeDecayPolynomialDecayLinearWarmupExponentialDecayMultiStepDecay	StepDecayLambdaDecayReduceOnPlateauCosineAnnealingDecayMultiplicativeDecay
OneCycleLRCyclicLRLinearLRCosineAnnealingWarmRestartsc                  R    \ rS rSr% S\S'   S\S'   S\S'   S\S	'   S\S
'   S\S'   Srg)_LRStateDict=   int
last_epochfloatlast_lrzNotRequired[_LRStateDict]LinearWarmup_LRzNotRequired[int]cooldown_counterbestnum_bad_epochs N)__name__
__module____qualname____firstlineno____annotations____static_attributes__r/       S/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/optimizer/lr.pyr%   r%   =   s$    ON..&&
$$r6   r%   c                      \ rS rSr% SrS\S'   S\S'   S\S'   S\S	'      S       SS
 jjrSS jrSSS jjrSS jr	SS jr
SS jr\rSS jrSrg)r   H   a  

LRScheduler Base class. Define the common interface of a learning rate scheduler.

There are currently 17 strategies implemented in paddle based on this base class, which are:

- ``NoamDecay``: Related algorithms are derived from `*Attention Is All You Need* <http://blog.inkypy.com>`_ . Please refer to :ref:`api_paddle_optimizer_lr_NoamDecay`.
- ``ExponentialDecay``: The next learning rate is obtained by multiplying the current learning rate by a given decay rate. Please refer to :ref:`api_paddle_optimizer_lr_ExponentialDecay`.
- ``NaturalExpDecay``: Each time the current learning rate is multiplied by the natural index of the given decay rate to obtain the next learning rate. Please refer to :ref:`api_paddle_optimizer_lr_NaturalExpDecay`.
- ``InverseTimeDecay``: The resulting learning rate is inversely proportional to the current number of decays. Please refer to :ref:`api_paddle_optimizer_lr_InverseTimeDecay`.
- ``PolynomialDecay``: The resulting learning rate is the interpolation of the score points between the initial learning rate and the given final learning determined by polynomial computation weights. Please refer to :ref:`api_paddle_optimizer_lr_PolynomialDecay`.
- ``PiecewiseDecay``: Segments decay in a step-like fashion by a given number of steps, and each segment has the same learning rate. Please refer to :ref:`api_paddle_optimizer_lr_PiecewiseDecay`.
- ``CosineAnnealingDecay``: The learning rate varies periodically with the number of steps as a cosine function. Please refer to :ref:`api_paddle_optimizer_lr_CosineAnnealingDecay`.
- ``LinearWarmup``: The learning rate increases linearly with the number of steps to the specified learning rate. Please refer to :ref:`api_paddle_optimizer_lr_LinearWarmup`.
- ``StepDecay``: The learning rate decays every fixed interval number of steps, and the number of step intervals needs to be specified. Please refer to :ref:`api_paddle_optimizer_lr_StepDecay`.
- ``MultiStepDecay``: The learning rate decays at a specific number of steps, and the node location at which the decay occurs needs to be specified. Please refer to :ref:`api_paddle_optimizer_lr_MultiStepDecay`.
- ``LambdaDecay``: The learning rate decays according to a custom lambda function. Please refer to :ref:`api_paddle_optimizer_lr_LambdaDecay`.
- ``ReduceOnPlateau``: The learning rate is adaptively adjusted according to the current metric (typically loss), and the learning rate is attenuated when the loss becomes stable. Please refer to :ref:`api_paddle_optimizer_lr_ReduceOnPlateau`.
- ``MultiplicativeDecay``: The resulting learning rate is obtained by multiplying the current learning rate each time by a lambda function. Please refer to :ref:`api_paddle_optimizer_lr_MultiplicativeDecay`.
- ``OneCycleLR``: The learning rate goes up to the maximum and then down to the minimum. Please refer to :ref:`api_paddle_optimizer_lr_OneCycleLR`.
- ``CyclicLR``: Think of the process of learning rate change as a cycle, with the learning rate changing between the minimum and maximum learning rates according to a fixed frequency. Please refer to :ref:`api_paddle_optimizer_lr_CyclicLR`.
- ``LinearLR``: The learning rate increases linearly with the number of steps to the specified learning rate. Please refer to :ref:`api_paddle_optimizer_lr_LinearLR`.
- ``CosineAnnealingWarmRestarts``: The learning rate varies periodically with the number of steps as a cosine function. Please refer to :ref:`api_paddle_optimizer_lr_CosineAnnealingWarmRestarts`.

User can import it by ``from paddle.optimizer.lr import LRScheduler`` ,

then overload it for your subclass and have a custom implementation of ``get_lr()`` .

Otherwise, an ``NotImplementedError`` exception will be thrown.

Args:
    learning_rate (float): The initial learning rate. It is a python float number.
    last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
    verbose (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` .

Returns:
    instance to schedule learning rate.

Examples:
    Here is an example of a simple ``StepDecay`` implementation.

    .. code-block:: python

        >>> import paddle
        >>> from paddle.optimizer.lr import LRScheduler

        >>> class StepDecay(LRScheduler):
        ...     def __init__(self,
        ...                 learning_rate,
        ...                 step_size,
        ...                 gamma=0.1,
        ...                 last_epoch=-1,
        ...                 verbose=False):
        ...         if not isinstance(step_size, int):
        ...             raise TypeError(
        ...                 "The type of 'step_size' must be 'int', but received %s." %
        ...                 type(step_size))
        ...         if gamma >= 1.0:
        ...             raise ValueError('gamma should be < 1.0.')
        ...
        ...         self.step_size = step_size
        ...         self.gamma = gamma
        ...         super().__init__(learning_rate, last_epoch, verbose)
        ...
        ...     def get_lr(self):
        ...         i = self.last_epoch // self.step_size
        ...         return self.base_lr * (self.gamma**i)
        ...
r)   base_lrr*   r'   r(   boolverbosec                   [        U[        [        45      (       d  [        S[	        U5       35      eUS:  a  [        SU 35      e[        U5      U l        [        U5      U l        X l        X0l	        S U l
        U R                  5         g )Nz6The type of learning rate must be float, but received r   zInvalid learning rate: )
isinstancer)   r'   	TypeErrortype
ValueErrorr:   r*   r(   r<   	_var_namestep)selflearning_rater(   r<   s       r7   __init__LRScheduler.__init__   s     -%66HmI\H]^  16}oFGG]+]+$		r6   c                    U R                   $ )z8
Return latest computed learning rate on current epoch.
)r*   rD   s    r7   __call__LRScheduler.__call__   s     ||r6   Nc           	        Uc+  U =R                   S-  sl         U R                  5       U l        OBXl         [        U S5      (       a  U R	                  5       U l        OU R                  5       U l        U R
                  (       a>  [        SU R                    SU R                  R                   SU R                   S35        gg)a  

``step`` should be called after ``optimizer.step`` . It will update the learning rate in optimizer according to current ``epoch`` .
The new learning rate will take effect on next ``optimizer.step`` .

Args:
    epoch (int, None): specify current epoch. Default: None. Auto-increment from last_epoch=-1.

Returns:
    None
Examples:
    .. code-block:: python

        >>> import paddle
        >>> value = paddle.arange(26, dtype='float32')
        >>> a = paddle.reshape(value, [2, 13])
        >>> linear = paddle.nn.Linear(13, 5)
        >>> adadelta = paddle.optimizer.Adadelta(learning_rate=0.0003, epsilon=1e-06, rho=0.95,
        ...                             parameters = linear.parameters())
        >>> out = linear(a)
        >>> out.backward()
        >>> adadelta.step()
        >>> adadelta.clear_grad()

    .. code-block:: python

        >>> import paddle
        >>> value = paddle.arange(26, dtype='float32')
        >>> a = paddle.reshape(value, [2, 13])
        >>> linear = paddle.nn.Linear(13, 5)
        >>> adadelta = paddle.optimizer.Adadelta(learning_rate=0.0003, epsilon=1e-06, rho=0.95,
        ...                             parameters = linear.parameters())
        >>> out = linear(a)
        >>> out.backward()
        >>> adadelta.step()
        >>> adadelta.clear_grad()
N   _get_closed_form_lrEpoch :  set learning rate to .)	r(   get_lrr*   hasattrrN   r<   print	__class__r0   )rD   epochs     r7   rC   LRScheduler.step   s    L =OOq O;;=DL#Ot233#779#{{}<<)DNN,C,C+DDZ[_[g[gZhhij r6   c                   U R                  5         0 nU R                   H^  nX R                  ;  a  M  U R                  U   n[        U[        5      (       a"  UR
                  S:X  d   S5       e[        U5      nX1U'   M`     U$ )z`

Returns the state of the scheduler as a :class:`dict`.

It is a subset of ``self.__dict__`` .
rM   z'numel of Tensor in state_dict must be 1)
state_keyskeys__dict__r>   r
   sizer)   )rD   
state_dictkeyvalues       r7   r^   LRScheduler.state_dict   s}     	
99C--'MM#&E%((zzQ = e#sO  r6   c                    SS/U l         g)a  

For those subclass who overload ``LRScheduler`` (Base Class). Acquiescently, "last_epoch, last_lr" will be saved by ``self.keys = ['last_epoch', 'last_lr']`` .

``last_epoch`` is the current epoch num, and ``last_lr`` is the current learning rate.

If you want to change the default behavior, you should have a custom implementation of ``_state_keys()`` to redefine ``self.keys`` .

r(   r*   Nr[   rI   s    r7   rZ   LRScheduler.state_keys   s     "9-	r6   c                   U R                  5         U R                   H(  nX!;   a  X   U R                  U'   M  [        SU S35      e   [	        U5      [	        U R                  5      :  a  [
        R                  " S5        gg)z

Loads the schedulers state.
zGPlease check whether state_dict is correct for optimizer. Can't find [ z ] in state_dictzThere are some unused values in state_dict. Maybe the optimizer have different 'LearningRateDecay' when invoking state_dict and set_dictN)rZ   r[   r\   RuntimeErrorlenwarningswarn)rD   r^   r_   s      r7   set_state_dictLRScheduler.set_state_dict  s{    
 	99C %/_c""]^a]bbrs 	  z?S^+MM [ ,r6   c                    [         e)z

For those subclass who overload ``LRScheduler`` (Base Class), User should have a custom implementation of ``get_lr()`` .

Otherwise, an ``NotImplementedError`` exception will be thrown.
)NotImplementedErrorrI   s    r7   rS   LRScheduler.get_lr  s
     "!r6   )rB   r:   r[   r(   r*   r<   皙?F)rE   r)   r(   r'   r<   r;   returnNonerr   r)   NrW   
int | Nonerr   rs   rr   r%   rr   rs   r^   r%   rr   rs   )r0   r1   r2   r3   __doc__r4   rF   rJ   rC   r^   rZ   rj   set_dictrS   r5   r/   r6   r7   r   r   H   s{    DL NNOM  #	  	
 
(3j0
.& H"r6   r   c                  p   ^  \ rS rSr% SrS\S'   S\S'      S	           S
U 4S jjjrSS jrSrU =r	$ )r   i'  a  

Applies Noam Decay to the initial learning rate.

The algorithm can be described as following.

.. math::

    new\_learning\_rate = learning\_rate * d_{model}^{-0.5} * min(epoch^{-0.5}, epoch * warmup\_steps^{-1.5})

Please reference `attention is all you need <https://arxiv.org/pdf/1706.03762.pdf>`_


Args:
    d_model(int): The dimensionality of input and output feature vector of model. It is a python int number.
    warmup_steps(int): The number of warmup steps. A super parameter. It is a python int number
    learning_rate (float): The initial learning rate. It is a python float number. Default: 1.0.
    last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
    verbose (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` .

Returns:
    ``NoamDecay`` instance to schedule learning rate.

Examples:
    .. code-block:: pycon
        :name: code-example1

        >>> # Example1: train on default dynamic graph mode
        >>> import paddle
        >>> import numpy as np

        >>> # train on default dynamic graph mode
        >>> linear = paddle.nn.Linear(10, 10)
        >>> scheduler = paddle.optimizer.lr.NoamDecay(d_model=100, warmup_steps=100, verbose=True)
        >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
        >>> for epoch in range(20):
        ...     for batch_id in range(5):
        ...         x = paddle.uniform([10, 10])
        ...         out = linear(x)
        ...         loss = paddle.mean(out)
        ...         loss.backward()
        ...         sgd.step()
        ...         sgd.clear_gradients()
        ...         scheduler.step()  # If you update learning rate each step
        ...     # scheduler.step()        # If you update learning rate each epoch

    .. code-block:: pycon
        :name: code-example2

        >>> # Example2: train on static graph mode
        >>> import paddle
        >>> import numpy as np
        >>> paddle.enable_static()
        >>> main_prog = paddle.static.Program()
        >>> start_prog = paddle.static.Program()
        >>> with paddle.static.program_guard(main_prog, start_prog):
        ...     x = paddle.static.data(name='x', shape=[None, 4, 5])
        ...     y = paddle.static.data(name='y', shape=[None, 4, 5])
        ...     z = paddle.static.nn.fc(x, 100)
        ...     loss = paddle.mean(z)
        ...     scheduler = paddle.optimizer.lr.NoamDecay(d_model=100, warmup_steps=100, verbose=True)
        ...     sgd = paddle.optimizer.SGD(learning_rate=scheduler)
        ...     sgd.minimize(loss)
        >>> exe = paddle.static.Executor()
        >>> exe.run(start_prog)
        >>> for epoch in range(20):
        ...     for batch_id in range(5):
        ...         out = exe.run(
        ...             main_prog,
        ...             feed={
        ...                 'x': np.random.randn(3, 4, 5).astype('float32'),
        ...                 'y': np.random.randn(3, 4, 5).astype('float32'),
        ...             },
        ...             fetch_list=[loss],
        ...         )
        ...         scheduler.step()  # If you update learning rate each step
        ...     # scheduler.step()        # If you update learning rate each epoch
r'   d_modelwarmup_stepsc                `   > US::  a  [        S5      eXl        X l        [        TU ]  X4U5        g )Nr   zd_model should be grater than 0)rA   r~   r   superrF   )rD   r~   r   rE   r(   r<   rV   s         r7   rF   NoamDecay.__init__z  s2     a<>??(G<r6   c                    U R                   S:X  a  SnOU R                   S-  nU R                  S-  U R                   -  nU R                  U R                  S-  -  [	        X5      -  $ )Nr   rM               )r(   r   r:   r~   min)rD   abs      r7   rS   NoamDecay.get_lr  s[    ??aA%At#doo5||t||T12SY>>r6   )r~   r   )      ?rq   F)r~   r'   r   r'   rE   r)   r(   r'   r<   r;   rr   rs   rt   
r0   r1   r2   r3   r{   r4   rF   rS   r5   __classcell__rV   s   @r7   r   r   '  sp    M^ L  #== = 	=
 = = 
= =? ?r6   r   c                  j   ^  \ rS rSr% SrS\S'   S\S'     S
         SU 4S jjjrSS jrS	rU =r	$ )r   i  ag  

Piecewise learning rate scheduler.

The algorithm can be described as the code below:

.. code-block:: text

    boundaries = [100, 200]
    values = [1.0, 0.5, 0.1]
    if epoch < 100:
        learning_rate = 1.0
    elif 100 <= global_step < 200:
        learning_rate = 0.5
    else:
        learning_rate = 0.1

Args:
    boundaries(list|tuple): A list/tuple of steps numbers. The type of element in the list is python int.
    values(list|tuple): A list/tuple of learning rate values that will be picked during different epoch boundaries.
        The type of element in the list is python float. The ``values`` have one more element than ``boundaries``.
    last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
    verbose (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` .

Returns:
    ``PiecewiseDecay`` instance to schedule learning rate.

Examples:

    .. code-block:: pycon
        :name: code-example1

        >>> # Example1: train on default dynamic graph mode
        >>> import paddle
        >>> import numpy as np

        >>> # train on default dynamic graph mode
        >>> linear = paddle.nn.Linear(10, 10)
        >>> scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=[3, 6, 9], values=[0.1, 0.2, 0.3, 0.4], verbose=True)
        >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
        >>> for epoch in range(20):
        ...     for batch_id in range(5):
        ...         x = paddle.uniform([10, 10])
        ...         out = linear(x)
        ...         loss = paddle.mean(out)
        ...         loss.backward()
        ...         sgd.step()
        ...         sgd.clear_gradients()
        ...         scheduler.step()  # If you update learning rate each step
        ...     # scheduler.step()        # If you update learning rate each epoch

    .. code-block:: pycon
        :name: code-example2

        >>> # Example2: train on static graph mode
        >>> import paddle
        >>> import numpy as np
        >>> paddle.enable_static()
        >>> main_prog = paddle.static.Program()
        >>> start_prog = paddle.static.Program()
        >>> with paddle.static.program_guard(main_prog, start_prog):
        ...     x = paddle.static.data(name='x', shape=[None, 4, 5])
        ...     y = paddle.static.data(name='y', shape=[None, 4, 5])
        ...     z = paddle.static.nn.fc(x, 100)
        ...     loss = paddle.mean(z)
        ...     scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=[3, 6, 9], values=[0.1, 0.2, 0.3, 0.4], verbose=True)
        ...     sgd = paddle.optimizer.SGD(learning_rate=scheduler)
        ...     sgd.minimize(loss)
        >>> exe = paddle.static.Executor()
        >>> exe.run(start_prog)
        >>> for epoch in range(20):
        ...     for batch_id in range(5):
        ...         out = exe.run(
        ...             main_prog,
        ...             feed={
        ...                 'x': np.random.randn(3, 4, 5).astype('float32'),
        ...                 'y': np.random.randn(3, 4, 5).astype('float32'),
        ...             },
        ...             fetch_list=[loss],
        ...         )
        ...         scheduler.step()  # If you update learning rate each step
        ...     # scheduler.step()        # If you update learning rate each epoch
Sequence[int]
boundariesSequence[float]valuesc                   > [        U5      S:X  a  [        S5      e[        U5      [        U5      ::  a'  [        S[        U5       S[        U5      S-    S35      eXl        X l        [        TU ]  X4S9  g )Nr   zThe boundaries cannot be empty.zLThe values have one more element than boundaries, but received len(values) [z] < len(boundaries) + 1 [rM   z].)r(   r<   )rg   rA   r   r   r   rF   )rD   r   r   r(   r<   rV   s        r7   rF   PiecewiseDecay.__init__  s     z?a>??v;#j/)^_bci_j^k  lE  FI  JT  FU  XY  FY  EZ  Z\  ]  %J@r6   c                    [        [        U R                  5      5       H1  nU R                  U R                  U   :  d  M"  U R                  U   s  $    U R                  [        U R                  5      S-
     $ NrM   )rangerg   r   r(   r   rD   is     r7   rS   PiecewiseDecay.get_lr  s\    s4??+,A!33{{1~% - {{3t{{+a/00r6   )r   r   rq   F)
r   r   r   r   r(   r'   r<   r;   rr   rs   rt   r   r   s   @r7   r   r     sk    Rh  A!A  A 	A
 A 
A A&1 1r6   r   c                  `   ^  \ rS rSr% SrS\S'     S         S	U 4S jjjrS
S jrSrU =r	$ )r   i  a  

Applies natural exponential decay to the initial learning rate.

The algorithm can be described as following:

.. math::

    new\_learning\_rate = learning\_rate * e^{- gamma * epoch}

Args:
    learning_rate (float): The initial learning rate. It is a python float number.
    gamma (float): A Ratio to update the learning rate, should greater than 0.0 to make learning rate decay. Default: 0.1.
    last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
    verbose (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` .

Returns:
    ``NaturalExpDecay`` instance to schedule learning rate.

Examples:

    .. code-block:: pycon
        :name: code-example1

        >>> # Example1: train on default dynamic graph mode
        >>> import paddle
        >>> import numpy as np
        >>> linear = paddle.nn.Linear(10, 10)
        >>> scheduler = paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5, gamma=0.1, verbose=True)
        >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
        >>> for epoch in range(20):
        ...     for batch_id in range(5):
        ...         x = paddle.uniform([10, 10])
        ...         out = linear(x)
        ...         loss = paddle.mean(out)
        ...         loss.backward()
        ...         sgd.step()
        ...         sgd.clear_grad()
        ...         scheduler.step()  # If you update learning rate each step
        ...     # scheduler.step()        # If you update learning rate each epoch

    .. code-block:: pycon
        :name: code-example2

        >>> # Example2: train on static graph mode
        >>> import paddle
        >>> import numpy as np
        >>> paddle.enable_static()
        >>> main_prog = paddle.static.Program()
        >>> start_prog = paddle.static.Program()
        >>> with paddle.static.program_guard(main_prog, start_prog):
        ...     x = paddle.static.data(name='x', shape=[None, 4, 5])
        ...     y = paddle.static.data(name='y', shape=[None, 4, 5])
        ...     z = paddle.static.nn.fc(x, 100)
        ...     loss = paddle.mean(z)
        ...     scheduler = paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5, gamma=0.1, verbose=True)
        ...     sgd = paddle.optimizer.SGD(learning_rate=scheduler)
        ...     sgd.minimize(loss)
        >>> exe = paddle.static.Executor()
        >>> exe.run(start_prog)
        >>> for epoch in range(20):
        ...     for batch_id in range(5):
        ...         out = exe.run(
        ...             main_prog,
        ...             feed={
        ...                 'x': np.random.randn(3, 4, 5).astype('float32'),
        ...                 'y': np.random.randn(3, 4, 5).astype('float32'),
        ...             },
        ...             fetch_list=[loss],
        ...         )
        ...         scheduler.step()  # If you update learning rate each step
        ...     # scheduler.step()        # If you update learning rate each epoch
r)   gammac                L   > US:  d   S5       eX l         [        TU ]	  XU5        g )N        zH 'gamma' must be a positive number so that the learning rate will decay.r   r   rF   rD   rE   r   r(   r<   rV   s        r7   rF   NaturalExpDecay.__init__Q  s1     s{ 	
V	
{ 
G<r6   c                |    U R                   [        R                  " SU R                  -  U R                  -  5      -  $ )Nrq   )r:   mathexpr   r(   rI   s    r7   rS   NaturalExpDecay.get_lr^  s+    ||dhhrDJJ'HIIIr6   r   r   
rE   r)   r   r)   r(   r'   r<   r;   rr   rs   rt   r   r   s   @r7   r   r     s_    HT L == = 	=
 = 
= =J Jr6   r   c                  `   ^  \ rS rSr% SrS\S'     S         S	U 4S jjjrS
S jrSrU =r	$ )r   ib  aS  

Applies inverse time decay to the initial learning rate.

The algorithm can be described as following:

.. math::

    new\_learning\_rate = \frac{learning\_rate}{1 + gamma * epoch}

Args:
    learning_rate (float): The initial learning rate. It is a python float number.
    gamma (float): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` .
        It should be less than 1.0. Default: 0.1.
    last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
    verbose (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` .

Returns:
    ``InverseTimeDecay`` instance to schedule learning rate.

Examples:

    .. code-block:: pycon
        :name: code-example1

        >>> # Example1: train on default dynamic graph mode
        >>> import paddle
        >>> import numpy as np

        >>> # train on default dynamic graph mode
        >>> linear = paddle.nn.Linear(10, 10)
        >>> scheduler = paddle.optimizer.lr.InverseTimeDecay(learning_rate=0.5, gamma=0.1, verbose=True)
        >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
        >>> for epoch in range(20):
        ...     for batch_id in range(5):
        ...         x = paddle.uniform([10, 10])
        ...         out = linear(x)
        ...         loss = paddle.mean(out)
        ...         loss.backward()
        ...         sgd.step()
        ...         sgd.clear_grad()
        ...         scheduler.step()  # If you update learning rate each step
        ...     # scheduler.step()        # If you update learning rate each epoch

    .. code-block:: pycon
        :name: code-example2

        >>> # Example2: train on static graph mode
        >>> import paddle
        >>> import numpy as np
        >>> paddle.enable_static()
        >>> main_prog = paddle.static.Program()
        >>> start_prog = paddle.static.Program()
        >>> with paddle.static.program_guard(main_prog, start_prog):
        ...     x = paddle.static.data(name='x', shape=[None, 4, 5])
        ...     y = paddle.static.data(name='y', shape=[None, 4, 5])
        ...     z = paddle.static.nn.fc(x, 100)
        ...     loss = paddle.mean(z)
        ...     scheduler = paddle.optimizer.lr.InverseTimeDecay(learning_rate=0.5, gamma=0.1, verbose=True)
        ...     sgd = paddle.optimizer.SGD(learning_rate=scheduler)
        ...     sgd.minimize(loss)
        >>> exe = paddle.static.Executor()
        >>> exe.run(start_prog)
        >>> for epoch in range(20):
        ...     for batch_id in range(5):
        ...         out = exe.run(
        ...             main_prog,
        ...             feed={
        ...                 'x': np.random.randn(3, 4, 5).astype('float32'),
        ...                 'y': np.random.randn(3, 4, 5).astype('float32'),
        ...             },
        ...             fetch_list=[loss],
        ...         )
        ...         scheduler.step()  # If you update learning rate each step
        ...     # scheduler.step()        # If you update learning rate each epoch
r)   r   c                2   > X l         [        TU ]	  XU5        g ru   r   r   s        r7   rF   InverseTimeDecay.__init__  s     
G<r6   c                T    U R                   SU R                  U R                  -  -   -  $ r   r:   r   r(   rI   s    r7   rS   InverseTimeDecay.get_lr  s#    ||q4::#??@@r6   r   r   r   rt   r   r   s   @r7   r   r   b  s_    KZ L == = 	=
 = 
= =A Ar6   r   c                     ^  \ rS rSr% SrS\S'   S\S'   S\S'   S\S	'        S             SU 4S
 jjjrSS jrSrU =r	$ )r   i  a  

Applies polynomial decay to the initial learning rate.

The algorithm can be described as following.

If cycle is set to True, then:

.. math::

    decay\_steps & = decay\_steps * math.ceil(\frac{epoch}{decay\_steps})

    new\_learning\_rate & = (learning\_rate-end\_lr)*(1-\frac{epoch}{decay\_steps})^{power}+end\_lr

If cycle is set to False, then:

.. math::

    epoch & = min(epoch, decay\_steps)

    new\_learning\_rate & = (learning\_rate-end\_lr)*(1-\frac{epoch}{decay\_steps})^{power}+end\_lr


Args:
    learning_rate (float): The initial learning rate. It is a python float number.
    decay_steps(int): The decay step size. It determines the decay cycle. It must be a positive integer.
    end_lr(float, optional): The minimum final learning rate. Default: 0.0001.
    power(float, optional): Power of polynomial, should greater than 0.0 to get learning rate decay. Default: 1.0.
    cycle(bool, optional): Whether the learning rate rises again. If True, then the learning rate will rise when it decrease
        to ``end_lr`` .  If False, the learning rate is monotone decreasing. Default: False.
    last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
    verbose (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` .

Returns:
    ``PolynomialDecay`` instance to schedule learning rate.

Examples:

    .. code-block:: pycon
        :name: code-example1

        >>> # Example1: train on default dynamic graph mode
        >>> import paddle
        >>> import numpy as np

        >>> # train on default dynamic graph mode
        >>> linear = paddle.nn.Linear(10, 10)
        >>> scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.5, decay_steps=20, verbose=True)
        >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
        >>> for epoch in range(20):
        ...     for batch_id in range(5):
        ...         x = paddle.uniform([10, 10])
        ...         out = linear(x)
        ...         loss = paddle.mean(out)
        ...         loss.backward()
        ...         sgd.step()
        ...         sgd.clear_grad()
        ...         scheduler.step()  # If you update learning rate each step
        ...     # scheduler.step()        # If you update learning rate each epoch

    .. code-block:: pycon
        :name: code-example2

        >>> # Example2: train on static graph mode
        >>> import paddle
        >>> import numpy as np
        >>> paddle.enable_static()
        >>> main_prog = paddle.static.Program()
        >>> start_prog = paddle.static.Program()
        >>> with paddle.static.program_guard(main_prog, start_prog):
        ...     x = paddle.static.data(name='x', shape=[None, 4, 5])
        ...     y = paddle.static.data(name='y', shape=[None, 4, 5])
        ...     z = paddle.static.nn.fc(x, 100)
        ...     loss = paddle.mean(z)
        ...     scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.5, decay_steps=20, verbose=True)
        ...     sgd = paddle.optimizer.SGD(learning_rate=scheduler)
        ...     sgd.minimize(loss)
        >>> exe = paddle.static.Executor()
        >>> exe.run(start_prog)
        >>> for epoch in range(20):
        ...     for batch_id in range(5):
        ...         out = exe.run(
        ...             main_prog,
        ...             feed={
        ...                 'x': np.random.randn(3, 4, 5).astype('float32'),
        ...                 'y': np.random.randn(3, 4, 5).astype('float32'),
        ...             },
        ...             fetch_list=[loss],
        ...         )
        ...         scheduler.step()  # If you update learning rate each step
        ...     # scheduler.step()        # If you update learning rate each epoch
r'   decay_stepsr)   end_lrpowerr;   cyclec                   > US:  a  [        U[        5      (       d   S5       eX l        X0l        US:  d   S5       eX@l        XPl        [        TU ]  XU5        g )Nr   z* 'decay_steps' must be a positive integer.r   zG 'power' must be greater than 0.0 so that the learning rate will decay.)r>   r'   r   r   r   r   r   rF   )	rD   rE   r   r   r   r   r(   r<   rV   s	           r7   rF   PolynomialDecay.__init__#  sf     Q:k3#?#? 	
8	
? 's{ 	
U	
{ 

G<r6   c                   U R                   nU R                  nU R                  (       aa  [        R                  " [        U R                   5      [        U R                  5      -  5      nU R                   S:X  a  SnU R                  U-  nO [        U R                   U R                  5      nU R                  U R                  -
  S[        U5      [        U5      -  -
  U R                  -  -  U R                  -   $ )Nr   rM   )
r(   r   r   r   ceilr)   r   r:   r   r   )rD   tmp_epoch_numtmp_decay_stepsdiv_ress       r7   rS   PolynomialDecay.get_lr9  s    **::iidoo&t/?/?)@@G !#"..8O1A1ABMt{{*}%o(>>>4::M
KK 	r6   )r   r   r   r   )-C6?r   Frq   F)rE   r)   r   r'   r   r)   r   r)   r   r;   r(   r'   r<   r;   rt   r   r   s   @r7   r   r     s    [z MLK == = 	=
 = = = = =, r6   r   c                     ^  \ rS rSr% SrS\S'   S\S'   S\S'   S\S	'     S           SU 4S
 jjjrSU 4S jjrSU 4S jjrSS jr	Sr
U =r$ )r   iL  aN  

Linear learning rate warm up strategy. Update the learning rate preliminarily before the normal learning rate scheduler.
For more information, please refer to `Bag of Tricks for Image Classification with Convolutional Neural Networks <https://arxiv.org/abs/1812.01187>`_

When epoch < warmup_steps, learning rate is updated as:

.. math::

        lr = start\_lr + (end\_lr - start\_lr) * \frac{epoch}{warmup\_steps}

where start_lr is the initial learning rate, and end_lr is the final learning rate;

When epoch >= warmup_steps, learning rate is updated as:

.. math::

        lr = learning_rate

where ``learning_rate`` is float or any subclass of ``LRScheduler`` .

Args:
    learning_rate (float|LRScheduler): The learning rate after warm-up. It is a python float number or any subclass of ``LRScheduler`` .
    warmup_steps (int): total steps of warm up. It must be a positive integer.
    start_lr (float): Initial learning rate of warm up.
    end_lr (float): Final learning rate of warm up.
    last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
    verbose (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` .

Returns:
    ``LinearWarmup`` instance to schedule learning rate.

Examples:

    .. code-block:: pycon
        :name: code-example1

        >>> # Example1: train on default dynamic graph mode
        >>> import paddle
        >>> import numpy as np

        >>> # train on default dynamic graph mode
        >>> linear = paddle.nn.Linear(10, 10)
        >>> scheduler = paddle.optimizer.lr.LinearWarmup(
        ...     learning_rate=0.5,
        ...     warmup_steps=20,
        ...     start_lr=0,
        ...     end_lr=0.5,
        ...     verbose=True,
        ... )
        >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
        >>> for epoch in range(20):
        ...     for batch_id in range(5):
        ...         x = paddle.uniform([10, 10])
        ...         out = linear(x)
        ...         loss = paddle.mean(out)
        ...         loss.backward()
        ...         sgd.step()
        ...         sgd.clear_gradients()
        ...         scheduler.step()  # If you update learning rate each step
        ...     # scheduler.step()        # If you update learning rate each epoch

    .. code-block:: pycon
        :name: code-example2

        >>> # Example2: train on static graph mode
        >>> import paddle
        >>> import numpy as np
        >>> paddle.enable_static()
        >>> main_prog = paddle.static.Program()
        >>> start_prog = paddle.static.Program()
        >>> with paddle.static.program_guard(main_prog, start_prog):
        ...     x = paddle.static.data(name='x', shape=[None, 4, 5])
        ...     y = paddle.static.data(name='y', shape=[None, 4, 5])
        ...     z = paddle.static.nn.fc(x, 100)
        ...     loss = paddle.mean(z)
        ...     scheduler = paddle.optimizer.lr.LinearWarmup(
        ...         learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5, verbose=True
        ...     )
        ...     sgd = paddle.optimizer.SGD(learning_rate=scheduler)
        ...     sgd.minimize(loss)
        >>> exe = paddle.static.Executor()
        >>> exe.run(start_prog)
        >>> for epoch in range(20):
        ...     for batch_id in range(5):
        ...         out = exe.run(
        ...             main_prog,
        ...             feed={
        ...                 'x': np.random.randn(3, 4, 5).astype('float32'),
        ...                 'y': np.random.randn(3, 4, 5).astype('float32'),
        ...             },
        ...             fetch_list=[loss],
        ...         )
        ...         scheduler.step()  # If you update learning rate each step
        ...     # scheduler.step()        # If you update learning rate each epoch
float | LRSchedulerrE   r'   r   r)   start_lrr   c                  > [        U[        [        [        45      nU(       d  [	        SU 35      eXl        US:  a  [        U[        5      (       d   S5       eX l        X0l        X@l        XC:  d   SU SU 35       e[        TU ])  X5U5        g )NzUthe type of learning_rate should be [int, float or LRScheduler], the current type is r   z+ 'warmup_steps' must be a positive integer.zend_lr z must be greater than start_lr )r>   r)   r'   r   r?   rE   r   r   r   r   rF   )	rD   rE   r   r   r   r(   r<   
type_checkrV   s	           r7   rF   LinearWarmup.__init__  s      sK/HI
ghugvw  +aJ|S$A$A 	
9	
A )   	
fX<XJG	
  	w7r6   c                   > [         TU ]  5       n[        U R                  [        5      (       a  U R                  R                  5       US'   U$ )zl
Returns the state of the LinearWarmup scheduler as a :class:`dict`.

It is a subset of ``self.__dict__`` .
r+   )r   r^   r>   rE   r   rD   r^   rV   s     r7   r^   LinearWarmup.state_dict  sE     W')
d((+66,0,>,>,I,I,KJ()r6   c                   > [         TU ]  U5        [        U R                  [        5      (       a  U R                  R                  US   5        gg)z.
Loads state_dict for LinearWarmup scheduler.
r+   N)r   rj   r>   rE   r   r   s     r7   rj   LinearWarmup.set_state_dict  sD     	z*d((+66--j9J.KL 7r6   c                   U R                   U R                  :  aR  U R                  U R                  -
  [	        U R                   5      -  [	        U R                  5      -  U R                  -   $ [        U R                  [        5      (       aB  U R                  R                  U R                   U R                  -
  5        U R                  5       $ U R                  $ ru   )	r(   r   r   r   r)   r>   rE   r   rC   rI   s    r7   rS   LinearWarmup.get_lr  s    ??T...KK$--/54 d''()+/==9 9 $,,k::""''$:K:K(KL))++%%%r6   )r   rE   r   r   r   )rE   r   r   r'   r   r)   r   r)   r(   r'   r<   r;   rx   rz   rt   )r0   r1   r2   r3   r{   r4   rF   r^   rj   rS   r5   r   r   s   @r7   r   r   L  s    _B '&OM 8*8 8 	8
 8 8 8 84	M
& 
&r6   r   c                  `   ^  \ rS rSr% SrS\S'     S         S	U 4S jjjrS
S jrSrU =r	$ )r   i  a5  

Update learning rate by `gamma` each epoch.

The algorithm can be described as following.

.. math::

    new\_learning\_rate = last\_learning\_rate * gamma

Args:
    learning_rate (float): The initial learning rate. It is a python float number.
    gamma (float): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` .
        It should be in interval (0.0, 1.0).
    last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
    verbose (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` .

Returns:
    ``ExponentialDecay`` instance to schedule learning rate.

Examples:

    .. code-block:: pycon
        :name: code-example1

        >>> # Example1: train on default dynamic graph mode
        >>> import paddle
        >>> import numpy as np

        >>> # train on default dynamic graph mode
        >>> linear = paddle.nn.Linear(10, 10)
        >>> scheduler = paddle.optimizer.lr.ExponentialDecay(learning_rate=0.5, gamma=0.9, verbose=True)
        >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
        >>> for epoch in range(20):
        ...     for batch_id in range(5):
        ...         x = paddle.uniform([10, 10])
        ...         out = linear(x)
        ...         loss = paddle.mean(out)
        ...         loss.backward()
        ...         sgd.step()
        ...         sgd.clear_grad()
        ...         scheduler.step()  # If you update learning rate each step
        ...     # scheduler.step()        # If you update learning rate each epoch

    .. code-block:: pycon
        :name: code-example2

        >>> # Example2: train on static graph mode
        >>> import paddle
        >>> import numpy as np
        >>> paddle.enable_static()
        >>> main_prog = paddle.static.Program()
        >>> start_prog = paddle.static.Program()
        >>> with paddle.static.program_guard(main_prog, start_prog):
        ...     x = paddle.static.data(name='x', shape=[None, 4, 5])
        ...     y = paddle.static.data(name='y', shape=[None, 4, 5])
        ...     z = paddle.static.nn.fc(x, 100)
        ...     loss = paddle.mean(z)
        ...     scheduler = paddle.optimizer.lr.ExponentialDecay(learning_rate=0.5, gamma=0.9, verbose=True)
        ...     sgd = paddle.optimizer.SGD(learning_rate=scheduler)
        ...     sgd.minimize(loss)
        >>> exe = paddle.static.Executor()
        >>> exe.run(start_prog)
        >>> for epoch in range(20):
        ...     for batch_id in range(5):
        ...         out = exe.run(
        ...             main_prog,
        ...             feed={
        ...                 'x': np.random.randn(3, 4, 5).astype('float32'),
        ...                 'y': np.random.randn(3, 4, 5).astype('float32'),
        ...             },
        ...             fetch_list=[loss],
        ...         )
        ...         scheduler.step()  # If you update learning rate each step
        ...     # scheduler.step()        # If you update learning rate each epoch
r)   r   c                X   > US:  a  US:  d   S5       eX l         [        TU ]	  XU5        g )Nr   r   zM 'gamma' must be in interval (0.0, 1.0) so that the learning rate will decay.r   r   s        r7   rF   ExponentialDecay.__init__=  s8     s{us{ 	
[	
* 
G<r6   c                N    U R                   U R                  U R                  -  -  $ ru   r   rI   s    r7   rS   ExponentialDecay.get_lrJ  s    ||tzz4??:;;r6   r   r   r   rt   r   r   s   @r7   r   r     s]    KZ L == = 	=
 = 
= =< <r6   r   c                  l   ^  \ rS rSr% SrS\S'   S\S'      S
         SU 4S jjjrSS jrS	rU =r	$ )r   iN  a  
Update the learning rate by ``gamma`` once ``epoch`` reaches one of the milestones.

The algorithm can be described as the code below.

.. code-block:: text

    learning_rate = 0.5
    milestones = [30, 50]
    gamma = 0.1
    if epoch < 30:
        learning_rate = 0.5
    elif epoch < 50:
        learning_rate = 0.05
    else:
        learning_rate = 0.005

Args:
    learning_rate (float): The initial learning rate. It is a python float number.
    milestones (tuple|list): List or tuple of each boundaries. Must be increasing.
    gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` .
        It should be less than 1.0. Default: 0.1.
    last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
    verbose (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` .


Returns:
    ``MultiStepDecay`` instance to schedule learning rate.

Examples:

    .. code-block:: pycon
        :name: code-example1

        >>> # Example1: train on default dynamic graph mode
        >>> import paddle
        >>> import numpy as np

        >>> # train on default dynamic graph mode
        >>> linear = paddle.nn.Linear(10, 10)
        >>> scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True)
        >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
        >>> for epoch in range(20):
        ...     for batch_id in range(5):
        ...         x = paddle.uniform([10, 10])
        ...         out = linear(x)
        ...         loss = paddle.mean(out)
        ...         loss.backward()
        ...         sgd.step()
        ...         sgd.clear_gradients()
        ...         scheduler.step()  # If you update learning rate each step
        ...     # scheduler.step()        # If you update learning rate each epoch

    .. code-block:: pycon
        :name: code-example2

        >>> # Example2: train on static graph mode
        >>> import paddle
        >>> import numpy as np
        >>> paddle.enable_static()
        >>> main_prog = paddle.static.Program()
        >>> start_prog = paddle.static.Program()
        >>> with paddle.static.program_guard(main_prog, start_prog):
        ...     x = paddle.static.data(name='x', shape=[None, 4, 5])
        ...     y = paddle.static.data(name='y', shape=[None, 4, 5])
        ...     z = paddle.static.nn.fc(x, 100)
        ...     loss = paddle.mean(z)
        ...     scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True)
        ...     sgd = paddle.optimizer.SGD(learning_rate=scheduler)
        ...     sgd.minimize(loss)
        >>> exe = paddle.static.Executor()
        >>> exe.run(start_prog)
        >>> for epoch in range(20):
        ...     for batch_id in range(5):
        ...         out = exe.run(
        ...             main_prog,
        ...             feed={
        ...                 'x': np.random.randn(3, 4, 5).astype('float32'),
        ...                 'y': np.random.randn(3, 4, 5).astype('float32'),
        ...             },
        ...             fetch_list=[loss],
        ...         )
        ...         scheduler.step()  # If you update learning rate each step
        ...     # scheduler.step()        # If you update learning rate each epoch
r   
milestonesr)   r   c                >  >^ [        T[        [        45      (       d  [        S[	        T5       S35      e[        U4S j[        [        T5      S-
  5       5       5      (       d  [        S5      eUS:  a  [        S5      eTU l	        X0l
        [        TU ]1  XU5        g )NzQThe type of 'milestones' in 'MultiStepDecay' must be 'tuple, list', but received rR   c              3  @   >#    U  H  nTU   TUS -      :  v   M     g7f)rM   Nr/   ).0r   r   s     r7   	<genexpr>*MultiStepDecay.__init__.<locals>.<genexpr>  s*      
/ qMJq1u--/s   rM   z.The elements of milestones must be incrementedr   gamma should be < 1.0.)r>   tuplelistr?   r@   allr   rg   rA   r   r   r   rF   )rD   rE   r   r   r(   r<   rV   s     `   r7   rF   MultiStepDecay.__init__  s     *udm44cdhisdtcuuvw   
3z?Q./
 
 
 MNNC<566$
G<r6   c                    [        [        U R                  5      5       H>  nU R                  U R                  U   :  d  M"  U R                  U R
                  U-  -  s  $    U R                  U R
                  [        U R                  5      -  -  $ ru   )r   rg   r   r(   r:   r   r   s     r7   rS   MultiStepDecay.get_lr  si    s4??+,A!33||tzz1}55 - ||tzzS-AABBr6   )r   r   ro   )
rE   r)   r   r   r   r)   r(   r'   r<   r;   rt   r   r   s   @r7   r   r   N  sh    Tl L == "= 	=
 = = =2C Cr6   r   c                  p   ^  \ rS rSr% SrS\S'   S\S'      S
           SU 4S jjjrSS jrS	rU =r	$ )r   i  al  
Update the learning rate of ``optimizer`` by ``gamma`` every ``step_size`` number of epoch.

The algorithm can be described as the code below.

.. code-block:: text

    learning_rate = 0.5
    step_size = 30
    gamma = 0.1

    learning_rate = 0.5     if epoch < 30
    learning_rate = 0.05    if 30 <= epoch < 60
    learning_rate = 0.005   if 60 <= epoch < 90
    ...

Args:
    learning_rate (float): The initial learning rate. It is a python float number.
    step_size (int): the interval to update. It must be a positive integer.
    gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` .
        It should be less than 1.0. Default: 0.1.
    last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
    verbose (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` .

Returns:
    ``StepDecay`` instance to schedule learning rate.


Examples:

    .. code-block:: pycon
        :name: code-example1

        >>> # Example1: train on default dynamic graph mode
        >>> import paddle
        >>> import numpy as np

        >>> # train on default dynamic graph mode
        >>> linear = paddle.nn.Linear(10, 10)
        >>> scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=5, gamma=0.8, verbose=True)
        >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
        >>> for epoch in range(20):
        ...     for batch_id in range(5):
        ...         x = paddle.uniform([10, 10])
        ...         out = linear(x)
        ...         loss = paddle.mean(out)
        ...         loss.backward()
        ...         sgd.step()
        ...         sgd.clear_grad()
        ...         scheduler.step()  # If you update learning rate each step
        ...     # scheduler.step()        # If you update learning rate each epoch

    .. code-block:: pycon
        :name: code-example2

        >>> # Example2: train on static graph mode
        >>> import paddle
        >>> import numpy as np
        >>> paddle.enable_static()
        >>> main_prog = paddle.static.Program()
        >>> start_prog = paddle.static.Program()
        >>> with paddle.static.program_guard(main_prog, start_prog):
        ...     x = paddle.static.data(name='x', shape=[None, 4, 5])
        ...     y = paddle.static.data(name='y', shape=[None, 4, 5])
        ...     z = paddle.static.nn.fc(x, 100)
        ...     loss = paddle.mean(z)
        ...     scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=5, gamma=0.8, verbose=True)
        ...     sgd = paddle.optimizer.SGD(learning_rate=scheduler)
        ...     sgd.minimize(loss)
        >>> exe = paddle.static.Executor()
        >>> exe.run(start_prog)
        >>> for epoch in range(20):
        ...     for batch_id in range(5):
        ...         out = exe.run(
        ...             main_prog,
        ...             feed={
        ...                 'x': np.random.randn(3, 4, 5).astype('float32'),
        ...                 'y': np.random.randn(3, 4, 5).astype('float32'),
        ...             },
        ...             fetch_list=[loss],
        ...         )
        ...         scheduler.step()  # If you update learning rate each step
        ...     # scheduler.step()        # If you update learning rate each epoch
r'   	step_sizer)   r   c                   > [        U[        5      (       d  [        S[        U5       S35      eUS:  a  [	        S5      eUS:  a  [        U[        5      (       d   S5       eX l        X0l        [        TU ]!  XU5        g )Nz4The type of 'step_size' must be 'int', but received rR   r   r   r   z( 'step_size' must be a positive integer.)	r>   r'   r?   r@   rA   r   r   r   rF   )rD   rE   r   r   r(   r<   rV   s         r7   rF   StepDecay.__init__!  s     )S))FtIFWWXY  C<5661}Is!;!; 	
6	
; #
G<r6   c                l    U R                   U R                  -  nU R                  U R                  U-  -  $ ru   )r(   r   r:   r   r   s     r7   rS   StepDecay.get_lr7  s+    OOt~~-||tzz1}--r6   )r   r   ro   )rE   r)   r   r'   r   r)   r(   r'   r<   r;   rr   rs   rt   r   r   s   @r7   r   r     so    Sj NL == = 	=
 = = 
= =,. .r6   r   c                  X   ^  \ rS rSr% SrS\S'     S       S	U 4S jjjrS rSrU =r	$ )
r   i<  al  
Sets the learning rate of ``optimizer`` by function ``lr_lambda`` . ``lr_lambda`` is function which receives ``epoch`` .

The algorithm can be described as the code below.

.. code-block:: text

    learning_rate = 0.5        # init learning_rate
    lr_lambda = lambda epoch: 0.95 ** epoch

    learning_rate = 0.5        # epoch 0, 0.5*0.95**0
    learning_rate = 0.475      # epoch 1, 0.5*0.95**1
    learning_rate = 0.45125    # epoch 2, 0.5*0.95**2

Args:
    learning_rate (float): The initial learning rate. It is a python float number.
    lr_lambda (function): A function which computes a factor by ``epoch`` , and then multiply the initial learning rate by this factor.
    last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
    verbose (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` .

Returns:
    ``LambdaDecay`` instance to schedule learning rate.

Examples:

    .. code-block:: pycon
        :name: code-example1

        >>> # Example1: train on default dynamic graph mode
        >>> import paddle
        >>> import numpy as np

        >>> # train on default dynamic graph mode
        >>> linear = paddle.nn.Linear(10, 10)
        >>> scheduler = paddle.optimizer.lr.LambdaDecay(learning_rate=0.5, lr_lambda=lambda x: 0.95**x, verbose=True)
        >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
        >>> for epoch in range(20):
        ...     for batch_id in range(5):
        ...         x = paddle.uniform([10, 10])
        ...         out = linear(x)
        ...         loss = paddle.mean(out)
        ...         loss.backward()
        ...         sgd.step()
        ...         sgd.clear_gradients()
        ...         scheduler.step()  # If you update learning rate each step
        ...     # scheduler.step()        # If you update learning rate each epoch

    .. code-block:: pycon
        :name: code-example2

        >>> # Example2: train on static graph mode
        >>> import paddle
        >>> import numpy as np
        >>> paddle.enable_static()
        >>> main_prog = paddle.static.Program()
        >>> start_prog = paddle.static.Program()
        >>> with paddle.static.program_guard(main_prog, start_prog):
        ...     x = paddle.static.data(name='x', shape=[None, 4, 5])
        ...     y = paddle.static.data(name='y', shape=[None, 4, 5])
        ...     z = paddle.static.nn.fc(x, 100)
        ...     loss = paddle.mean(z)
        ...     scheduler = paddle.optimizer.lr.LambdaDecay(learning_rate=0.5, lr_lambda=lambda x: 0.95**x, verbose=True)
        ...     sgd = paddle.optimizer.SGD(learning_rate=scheduler)
        ...     sgd.minimize(loss)
        >>> exe = paddle.static.Executor()
        >>> exe.run(start_prog)
        >>> for epoch in range(20):
        ...     for batch_id in range(5):
        ...         out = exe.run(
        ...             main_prog,
        ...             feed={
        ...                 'x': np.random.randn(3, 4, 5).astype('float32'),
        ...                 'y': np.random.randn(3, 4, 5).astype('float32'),
        ...             },
        ...             fetch_list=[loss],
        ...         )
        ...         scheduler.step()  # If you update learning rate each step
        ...     # scheduler.step()        # If you update learning rate each epoch
Callable[[int], float]	lr_lambdac                   > [        U5      (       d  [        S[        U5       S35      eX l        [        TU ]  XU5        g )NzJThe type of 'lr_lambda' in 'LambdaDecay' must be 'function', but received rR   callabler?   r@   r   r   rF   rD   rE   r   r(   r<   rV   s        r7   rF   LambdaDecay.__init__  sH     	""\]abk]l\mmno  #G<r6   c                R    U R                   U R                  U R                  5      -  $ ru   )r:   r   r(   rI   s    r7   rS   LambdaDecay.get_lr  s    ||dnnT__===r6   r   r   )rE   r)   r   r   r(   r'   r<   r;   r   r   s   @r7   r   r   <  sT    N` &% == *= 	=
 = => >r6   r   c                      \ rS rSr% SrS\S'   S\S'   S\S'   S\S	'   S
\S'   S\S'   S\S'   S\S'            S                     SS jjrSS jr S     SS jjrSS jr	Sr
g)r   i  a6  
Reduce learning rate when ``metrics`` has stopped descending. Models often benefit from reducing the learning rate
by 2 to 10 times once model performance has no longer improvement.

The ``metrics`` is the one which has been pass into ``step`` , it's shape must [] or [1]. When ``metrics``
stop descending for a ``patience`` number of epochs, the learning rate will be reduced to ``learning_rate * factor`` .
(Specially, ``mode`` can also be set to ``'max`` , in this case, when ``metrics`` stop ascending for a ``patience``
number of epochs, the learning rate will be reduced.)

In addition, After each reduction, it will wait a ``cooldown`` number of epochs before resuming above operation.

Args:
    learning_rate (float): The initial learning rate. It is a python float number.
    mode (str, optional): ``'min'`` or ``'max'`` can be selected. Normally, it is ``'min'`` , which means that the
        learning rate will reduce when ``loss`` stops descending. Specially, if it's set to ``'max'`` ,  the learning
        rate will reduce when ``loss`` stops ascending. Default: ``'min'`` .
    factor (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * factor`` .
        It should be less than 1.0. Default: 0.1.
    patience (int, optional): When ``loss`` doesn't improve for this number of epochs, learning rate will be reduced.
        Default: 10.
    threshold (float, optional): ``threshold`` and ``threshold_mode`` will determine the minimum change of ``loss`` .
        This make tiny changes of ``loss`` will be ignored. Default: 1e-4.
    threshold_mode (str, optional): ``'rel'`` or ``'abs'`` can be selected. In ``'rel'`` mode, the minimum change of ``loss``
        is ``last_loss * threshold`` , where ``last_loss`` is ``loss`` in last epoch. In ``'abs'`` mode, the minimum
        change of ``loss`` is ``threshold`` . Default: ``'rel'`` .
    cooldown (int, optional): The number of epochs to wait before resuming normal operation. Default: 0.
    min_lr (float, optional): The lower bound of the learning rate after reduction. Default: 0.
    epsilon (float, optional): Minimal decay applied to lr. If the difference between new and old lr is smaller than epsilon,
        the update is ignored. Default: 1e-8.
    verbose (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False``.


Returns:
    ``ReduceOnPlateau`` instance to schedule learning rate.


Examples:
    .. code-block:: pycon
        :name: code-example1

        >>> # Example1: train on default dynamic graph mode
        >>> import paddle
        >>> import numpy as np

        >>> # train on default dynamic graph mode
        >>> linear = paddle.nn.Linear(10, 10)
        >>> scheduler = paddle.optimizer.lr.ReduceOnPlateau(learning_rate=1.0, factor=0.5, patience=5, verbose=True)
        >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
        >>> for epoch in range(20):
        ...     for batch_id in range(5):
        ...         x = paddle.uniform([10, 10])
        ...         out = linear(x)
        ...         loss = paddle.mean(out)
        ...         loss.backward()
        ...         sgd.step()
        ...         sgd.clear_gradients()
        ...         scheduler.step(loss)  # If you update learning rate each step
        ...     # scheduler.step(loss)        # If you update learning rate each epoch

    .. code-block:: pycon
        :name: code-example2

        >>> # Example2: train on static graph mode
        >>> import paddle
        >>> import numpy as np
        >>> paddle.enable_static()
        >>> main_prog = paddle.static.Program()
        >>> start_prog = paddle.static.Program()
        >>> with paddle.static.program_guard(main_prog, start_prog):
        ...     x = paddle.static.data(name='x', shape=[None, 4, 5])
        ...     y = paddle.static.data(name='y', shape=[None, 4, 5])
        ...     z = paddle.static.nn.fc(x, 100)
        ...     loss = paddle.mean(z)
        ...     scheduler = paddle.optimizer.lr.ReduceOnPlateau(learning_rate=1.0, factor=0.5, patience=5, verbose=True)
        ...     sgd = paddle.optimizer.SGD(learning_rate=scheduler)
        ...     sgd.minimize(loss)
        >>> exe = paddle.static.Executor()
        >>> exe.run(start_prog)
        >>> for epoch in range(20):
        ...     for batch_id in range(5):
        ...         out = exe.run(
        ...             main_prog,
        ...             feed={
        ...                 'x': np.random.randn(3, 4, 5).astype('float32'),
        ...                 'y': np.random.randn(3, 4, 5).astype('float32'),
        ...             },
        ...             fetch_list=[loss],
        ...         )
        ...         scheduler.step(out[0])  # If you update learning rate each step
        ...     # scheduler.step(out[0])        # If you update learning rate each epoch
Literal['min', 'max']moder)   factorr'   patience	thresholdLiteral['rel', 'abs']threshold_modecooldownmin_lrepsilonc                &   UR                  5       nUS;  a  [        SU-   S-   5      eX l        US:  a  [        S5      eX0l        UR                  5       nUS;  a  [        SU-   S-   5      eX`l        [        U[        [        45      (       d  [        S[        U5       S	35      eX@l
        XPl        X`l        Xpl        Xl        Xl        S
U l        S U l        S
U l        [        U5      U l        [        U5      U l        S
U l        Xl        S U l        g )N)r   maxzmode: z is unknown!r   z5new_lr = origin_lr * gamma and gamma should be < 1.0.)relabszthreshold mode: zOThe type of 'learning_rate' in 'ReduceOnPlateau' must be 'float', but received rR   r   )lowerrA   r   r   r   r>   r)   r'   r?   r@   r   r   r   r   r   r,   r-   r.   r:   r*   r(   r<   rB   )rD   rE   r   r   r   r   r   r   r   r   r<   s              r7   rF   ReduceOnPlateau.__init__  s    zz|~%X_~=>>	S=G  '--//"^3nD  --%66abfgtbuavvwx  !",  !	 ]+]+r6   c                    / SQU l         g )N)r,   r-   r.   r(   r*   rc   rI   s    r7   rZ   ReduceOnPlateau.state_keys>  s    
	r6   Nc           	     0   Uc  U R                   S-   U l         OX l         [        U[        R                  R                  [
        R                  45      (       a&  UR                  S:X  d   SUR                   S35       eOP[        U[        [        [
        R                  [
        R                  45      (       d  [        S[        U5       35      eU R                  S:  a  U =R                  S-  sl        gU R                  b   U R!                  XR                  5      (       a  Xl        SU l        OU =R"                  S-  sl        U R"                  U R$                  :  a  U R&                  U l        SU l        [)        U R*                  U R,                  -  U R.                  5      nU R*                  U-
  U R0                  :  aV  X0l        U R2                  (       a>  [5        SU R                    SU R6                  R8                   S	U R*                   S
35        gggg)a  
step should be called after `optimizer.step()` . It will update the learning rate in optimizer according to ``metrics`` .
The new learning rate will take effect on next epoch.

Args:
    metrics (Tensor|numpy.ndarray|float): Which will be monitored to determine whether the learning rate will reduce.
        If it stop descending for a ``patience`` number of epochs, the learning rate will reduce. If it's 'Tensor' or
        'numpy.ndarray', its numel must be 1.
    epoch (int, None): specify current epoch. Default: None. Auto-increment from last_epoch=-1.

Returns:
    None

Examples:
    Please refer to the example of current LRScheduler.
NrM   z?the size of metrics must be 1, but the current metrics.size is z=. Maybe that you should call paddle.mean to process it first.z^metrics must be 'int', 'float', 'np.float64', 'numpy.ndarray' or 'paddle.Tensor', but receive r   rO   rP   rQ   rR   )r(   r>   r   eagerr
   numpyndarrayr]   r'   r)   float32float64r?   r@   r,   r-   
_is_betterr.   r   r   r   r*   r   r   r   r<   rU   rV   r0   )rD   metricsrW   new_lrs       r7   rC   ReduceOnPlateau.stepG  s   * ="oo1DO#O g

 1 15==ABB<<1$ QRYR^R^Q_ `C C$ c5%--?
 
 pquv}q~p  A    1$!!Q&!yy DOOGYY$G$G#	&'###q(#""T]]2(,%&'#T\\DKK7E<<&(4<<7#)L||$T__$5R8O8O7PPfgkgsgsfttuv $ 8	 3r6   c                R   U R                   S:X  a#  U R                  S:X  a  XX R                  -  -
  :  $ U R                   S:X  a!  U R                  S:X  a  XU R                  -
  :  $ U R                   S:X  a#  U R                  S:X  a  XX R                  -  -   :  $ XU R                  -   :  $ )Nr   r   r   r   )r   r   r   )rD   currentr-   s      r7   r  ReduceOnPlateau._is_better  s    99$"5"5">D>>$9999YY%D$7$75$@DNN222YY%D$7$75$@D>>$9999 DNN222r6   )rB   r:   r-   r   r,   r   r   r[   r(   r*   r   r   r.   r   r   r   r<   )	r   rp   
   r   r   r   r   g:0yE>F)rE   r)   r   r   r   r)   r   r'   r   r)   r   r   r   r'   r   r)   r   r)   r<   r;   rr   rs   ry   ru   )r  z!Tensor | npt.NDArray[Any] | floatrW   rw   rr   rs   )rr   r;   )r0   r1   r2   r3   r{   r4   rF   rZ   rC   r  r5   r/   r6   r7   r   r     s    Zx  MM))MMN
 ',0533 $3 	3
 3 3 .3 3 3 3 3 
3l
 !929 9 
	9v3r6   r   c                     ^  \ rS rSr% SrS\S'   S\S'   S\S'      S           SU 4S jjjrSS	 jrS
 rSr	U =r
$ )r   i  a+  

Set the learning rate using a cosine annealing schedule, where :math:`\eta_{max}` is set to
the initial learning_rate. :math:`T_{cur}` is the number of epochs since the last restart in
SGDR.

The algorithm can be described as following.

.. math::

    \eta_t & = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})\left(1
    + \cos\left(\frac{T_{cur}}{T_{max}}\pi\right)\right),
    & T_{cur} \neq (2k+1)T_{max};

    \eta_{t+1} & = \eta_{t} + \frac{1}{2}(\eta_{max} - \eta_{min})
    \left(1 - \cos\left(\frac{1}{T_{max}}\pi\right)\right),
    & T_{cur} = (2k+1)T_{max}.

It has been proposed in `SGDR: Stochastic Gradient Descent with Warm Restarts <https://arxiv.org/abs/1608.03983>`_.
Note that this only implements the cosine annealing part of SGDR, and not the restarts.

Args:
    learning_rate (float): The initial learning rate, that is :math:`\eta_{max}` . It can be set to python float or int number.
    T_max (int): Maximum number of iterations. It is half of the decay cycle of learning rate. It must be a positive integer.
    eta_min (float|int, optional): Minimum learning rate, that is :math:`\eta_{min}` . Default: 0.
    last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
    verbose (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` .

Returns:
    ``CosineAnnealingDecay`` instance to schedule learning rate.

Examples:

    .. code-block:: pycon
        :name: code-example1

        >>> # Example1: train on default dynamic graph mode
        >>> import paddle
        >>> import numpy as np

        >>> # train on default dynamic graph mode
        >>> linear = paddle.nn.Linear(10, 10)
        >>> scheduler = paddle.optimizer.lr.CosineAnnealingDecay(learning_rate=0.5, T_max=10, verbose=True)
        >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
        >>> for epoch in range(20):
        ...     for batch_id in range(5):
        ...         x = paddle.uniform([10, 10])
        ...         out = linear(x)
        ...         loss = paddle.mean(out)
        ...         loss.backward()
        ...         sgd.step()
        ...         sgd.clear_gradients()
        ...         scheduler.step()  # If you update learning rate each step
        ...     # scheduler.step()        # If you update learning rate each epoch

    .. code-block:: pycon
        :name: code-example2

        >>> # Example2: train on static graph mode
        >>> import paddle
        >>> import numpy as np
        >>> paddle.enable_static()
        >>> main_prog = paddle.static.Program()
        >>> start_prog = paddle.static.Program()
        >>> with paddle.static.program_guard(main_prog, start_prog):
        ...     x = paddle.static.data(name='x', shape=[None, 4, 5])
        ...     y = paddle.static.data(name='y', shape=[None, 4, 5])
        ...     z = paddle.static.nn.fc(x, 100)
        ...     loss = paddle.mean(z)
        ...     scheduler = paddle.optimizer.lr.CosineAnnealingDecay(learning_rate=0.5, T_max=10, verbose=True)
        ...     sgd = paddle.optimizer.SGD(learning_rate=scheduler)
        ...     sgd.minimize(loss)
        >>> exe = paddle.static.Executor()
        >>> exe.run(start_prog)
        >>> for epoch in range(20):
        ...     for batch_id in range(5):
        ...         out = exe.run(
        ...             main_prog,
        ...             feed={
        ...                 'x': np.random.randn(3, 4, 5).astype('float32'),
        ...                 'y': np.random.randn(3, 4, 5).astype('float32'),
        ...             },
        ...             fetch_list=[loss],
        ...         )
        ...         scheduler.step()  # If you update learning rate each step
        ...     # scheduler.step()        # If you update learning rate each epoch
r'   T_maxr)   eta_minr(   c                V  > [        U[        5      (       d  [        S[        U5       S35      e[        U[        [        45      (       d  [        S[        U5       S35      eUS:  a  [        U[        5      (       d   S5       eX l        [	        U5      U l        [        TU ]!  XU5        g )NzJThe type of 'T_max' in 'CosineAnnealingDecay' must be 'int', but received rR   zSThe type of 'eta_min' in 'CosineAnnealingDecay' must be 'float, int', but received r   z$ 'T_max' must be a positive integer.)	r>   r'   r?   r@   r)   r  r  r   rF   )rD   rE   r  r  r(   r<   rV   s         r7   rF   CosineAnnealingDecay.__init__  s     %%%\]abg]h\iijk  'E3<00efjkrfsettuv  qyZs33 	
2	
3 
W~G<r6   c                   U R                   S:X  a  U R                  $ U R                   S-
  U R                  -
  SU R                  -  -  S:X  a^  U R                  U R                  U R                  -
  S[
        R                  " [
        R                  U R                  -  5      -
  -  S-  -   $ S[
        R                  " [
        R                  U R                   -  U R                  -  5      -   S[
        R                  " [
        R                  U R                   S-
  -  U R                  -  5      -   -  U R                  U R                  -
  -  U R                  -   $ )Nr   rM      )r(   r:   r  r*   r  r   cospirI   s    r7   rS   CosineAnnealingDecay.get_lr  s   ??a<<oo!DJJ.1tzz>BaG<<$,,.txx$** 4557 DHHTWWt6CDDDOOa$784::EFF
\\DLL(*,0LL9 	9r6   c                    U R                   U R                  U R                   -
  S[        R                  " [        R                  U R
                  -  U R                  -  5      -   -  S-  -   $ NrM   r  )r  r:   r   r  r  r(   r  rI   s    r7   rN   (CosineAnnealingDecay._get_closed_form_lr  sX    LL||dll*488DGGdoo5

BCCE	
r6   )r  r  )r   rq   F)rE   r)   r  r'   r  r)   r(   r'   r<   r;   rr   rs   rt   )r0   r1   r2   r3   r{   r4   rF   rS   rN   r5   r   r   s   @r7   r   r     sy    Vp JNO == = 	=
 = = 
= =.9
 
r6   r   c                  `   ^  \ rS rSr% SrS\S'     S         S	U 4S jjjrS
S jrSrU =r	$ )r   i  a  
Multiply the learning rate of ``optimizer`` by the factor given in function ``lr_lambda`` .

The algorithm can be described as the code below.

.. code-block:: text

    learning_rate = 0.5        # init learning_rate
    lr_lambda = lambda epoch: 0.95

    learning_rate = 0.5        # epoch 0,
    learning_rate = 0.475      # epoch 1, 0.5*0.95
    learning_rate = 0.45125    # epoch 2, 0.475*0.95

Args:
    learning_rate (float): The initial learning rate. It is a python float number.
    lr_lambda (function): A function which computes a factor by ``epoch`` , and then multiply the last learning rate by this factor.
    last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
    verbose (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` .

Returns:
    ``MultiplicativeDecay`` instance to schedule learning rate.

Examples:

    .. code-block:: python

        >>> import paddle

        >>> # train on default dynamic graph mode
        >>> linear = paddle.nn.Linear(10, 10)
        >>> scheduler = paddle.optimizer.lr.MultiplicativeDecay(learning_rate=0.5, lr_lambda=lambda x:0.95, verbose=True)
        >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
        >>> for epoch in range(20):
        ...     for batch_id in range(5):
        ...         x = paddle.uniform([10, 10])
        ...         out = linear(x)
        ...         loss = paddle.mean(out)
        ...         loss.backward()
        ...         sgd.step()
        ...         sgd.clear_gradients()
        ...         scheduler.step()    # If you update learning rate each step
        ...     # scheduler.step()        # If you update learning rate each epoch
        ...
r   r   c                   > [        U5      (       d  [        S[        U5       S35      eX l        [        TU ]  XU5        g )NzRThe type of 'lr_lambda' in 'MultiplicativeDecay' must be 'function', but received rR   r   r   s        r7   rF   MultiplicativeDecay.__init__M  sH     	""deijsetduuvw  #G<r6   c                    U R                   n[        SU R                  S-   5       H  nXR                  U5      -  nM     U$ r   )r:   r   r(   r   )rD   cur_lrrW   s      r7   rS   MultiplicativeDecay.get_lr\  s;    1doo12EnnU33F 3r6   r   r   )
rE   r)   r   r   r(   r'   r<   r;   rr   rs   rt   r   r   s   @r7   r   r     s]    ,\ &% == *= 	=
 = 
= = r6   r   c                     ^  \ rS rSrSr       S                   S	U 4S jjjr        S
S jr        S
S jrSS jrSr	U =r
$ )r    ic  u  

Sets the learning rate according to the one cycle learning rate scheduler.
The scheduler adjusts the learning rate from an initial learning rate to the maximum learning rate and then
from that maximum learning rate to the minimum learning rate, which is much less than the initial learning rate.

It has been proposed in `Super-Convergence: Very Fast Training of Neural Networks Using Large Learning Rates <https://arxiv.org/abs/1708.07120>`_.

Please note that the default behaviour of this scheduler follows the fastai implementation of one cycle,
which claims that “unpublished work has shown even better results by using only two phases”.
If you want the behaviour of this scheduler to be consistent with the paper, please set ``three_phase=True`` .

Also note that you should update learning rate each step.

Args:
    max_learning_rate (float): The maximum learning rate. It is a python float number. Functionally, it defines the initial learning rate by ``divide_factor`` .
    total_steps (int): Number of total training steps.
    divide_factor (float, optional): Initial learning rate will be determined by initial_learning_rate = max_learning_rate / divide_factor. Default: 25.
    end_learning_rate (float, optional): The minimum learning rate during training, it should be much less than initial learning rate.
    phase_pct (float): The percentage of total steps which used to increasing learning rate. Default: 0.3.
    anneal_strategy (str, optional): Strategy of adjusting learning rate.'cos' for cosine annealing, 'linear' for linear annealing. Default: 'cos'.
    three_phase (bool, optional): Whether to use three phase.

        If ``True``:

            1. The learning rate will first increase from initial learning rate to maximum learning rate.
            2. Then it will decrease to initial learning rate. Number of step in this phase is the same as the one in first phase.
            3. Finally, it will decrease to minimum learning rate which is much less than initial learning rate.

        If ``False``:

            1. The learning rate will increase to maximum learning rate.
            2. Then it will directly decrease to minimum learning rate.

    last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
    verbose (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` .

Returns:
    ``OneCycleLR`` instance to schedule learning rate.

Examples:
    .. code-block:: pycon
        :name: code-example1

        >>> # Example1: train on default dynamic graph mode
        >>> import paddle
        >>> import numpy as np

        >>> # train on default dynamic graph mode
        >>> linear = paddle.nn.Linear(10, 10)
        >>> scheduler = paddle.optimizer.lr.OneCycleLR(max_learning_rate=1.0, total_steps=100, verbose=True)
        >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
        >>> for epoch in range(5):
        ...     for batch_id in range(20):
        ...         x = paddle.uniform([10, 10])
        ...         out = linear(x)
        ...         loss = paddle.mean(out)
        ...         loss.backward()
        ...         sgd.step()
        ...         sgd.clear_gradients()
        ...         scheduler.step()  # You should update learning rate each step

    .. code-block:: pycon
        :name: code-example2

        >>> # Example2: train on static graph mode
        >>> import paddle
        >>> import numpy as np
        >>> paddle.enable_static()
        >>> main_prog = paddle.static.Program()
        >>> start_prog = paddle.static.Program()
        >>> with paddle.static.program_guard(main_prog, start_prog):
        ...     x = paddle.static.data(name='x', shape=[None, 4, 5])
        ...     y = paddle.static.data(name='y', shape=[None, 4, 5])
        ...     z = paddle.static.nn.fc(x, 100)
        ...     loss = paddle.mean(z)
        ...     scheduler = paddle.optimizer.lr.OneCycleLR(max_learning_rate=1.0, total_steps=100, verbose=True)
        ...     sgd = paddle.optimizer.SGD(learning_rate=scheduler)
        ...     sgd.minimize(loss)
        >>> exe = paddle.static.Executor()
        >>> exe.run(start_prog)
        >>> for epoch in range(5):
        ...     for batch_id in range(20):
        ...         out = exe.run(
        ...             main_prog,
        ...             feed={
        ...                 'x': np.random.randn(3, 4, 5).astype('float32'),
        ...                 'y': np.random.randn(3, 4, 5).astype('float32'),
        ...             },
        ...             fetch_list=[loss],
        ...         )
        ...         scheduler.step()  # You should update learning rate each step
c
                h  > [        U[        [        45      (       d  [        S[	        U5       35      eUS:  a  [        S5      e[        U[        [        45      (       d  [        S[	        U5       35      eUS:  a  [        S5      e[        U[        5      (       d  [        S[	        U5       35      eUS::  a  [        S5      eX l        [        U[        5      (       d  [        S[	        U5       35      eUS:  d  US	:  a  [        S
U 35      e[        U[        [        45      (       d  [        S[	        U5       35      eU[        U5      -  n
[        U5      nU(       a  US:  a  [        S5      eSXPR                  -  S	-
  SU-  U R                  -  S-
  U R                  S	-
  U R                  S	-
  /U l        U R                  S	   U R                  S   -
  U R                  S   U R                  S	   -
  U R                  S   U R                  S   -
  U R                  S   U R                  S   -
  /U l        U
UU
U/U l	        OSXPR                  -  S	-
  U R                  S	-
  U R                  S	-
  /U l        U R                  S	   U R                  S   -
  U R                  S   U R                  S	   -
  U R                  S   U R                  S	   -
  /U l        XU/U l	        US:X  a  U R                  U l        O&US:X  a  U R                  U l        O[        SU 35      e[        TU ]9  XU	5        g )N;'max_learning_rate' must be 'float' or 'int', but received r   z/'max_learning_rate' must be a positive integer.z;'end_learning_rate' must be 'float' or 'int', but received z/'end_learning_rate' must be a positive integer.z)'total_step' must be 'int', but received z('total_step' must be a positive integer.z*'phase_pct' must be 'float', but received rM   z2'phase_pct' must be between 0 and 1, but received z7'divide_factor' must be 'float' or 'int', but received       ?z;When three_phase is True, 'phase_pct' must be less than 0.5r     r  linearzA'anneal_strategy' must by one of 'cos' or 'linear', but received )r>   r)   r'   r?   r@   rA   total_steps_step_config_steps_size
_lr_config_cos_annealinganneal_func_linear_annealingr   rF   )rD   max_learning_rater(  divide_factorend_learning_rate	phase_pctanneal_strategythree_phaser(   r<   
initial_lrr   rV   s               r7   rF   OneCycleLR.__init__  s    +eS\::MdSdNeMfg  q NOO +eS\::MdSdNeMfg  q NOO +s++;D<M;NO  !GHH& )U++<T)_<MN  q=IMDYKP 
 -%66I$}J]I^_  '})==
()C Q 
 ,,,q0I 0 0014  1$  1$!D !!!$t'8'8';;!!!$t'8'8';;!!!$t'8'8';;!!!$##A&'	 D !	DO ,,,q0  1$  1$	!D !!!$t'8'8';;!!!$t'8'8';;!!!$t'8'8';; D
  *fEDO e##22D(#55DSTcSde  	9r6   c                p    [         R                  " [         R                  U-  5      S-   nX!U-
  S-  U-  -   $ NrM   g       @)r   r  r  )rD   r   r   pctcos_outs        r7   r,  OneCycleLR._cos_annealing/  s7     ((477S=)A-F*c1G;;;r6   c                    X!-
  U-  U-   $ ru   r/   )rD   r   r   r9  s       r7   r.  OneCycleLR._linear_annealing5  s     !S(833r6   c                   U R                   nXR                  :  a  [        SU SU R                   35      e[        [	        U R
                  SS  U R                  5      5       Ho  u  nu  p4X::  d  U[        U R                  5      S-
  :X  d  M+  XR
                  U   -
  U-  nU R                  U R                  U   U R                  US-      U5      s  $    g )NzTried to step z- times. However the number of total steps is rM   r  )
r(   r(  rA   	enumeratezipr)  r*  rg   r+  r-  )rD   current_stepr   end_stepr   
percentages         r7   rS   OneCycleLR.get_lr:  s    *** .[\`\l\l[mn  )2!!!"%t'7'78)
$A$ '1DOO0Dq0H+H*->->q-AAYN
''OOA&A(>
 )
r6   )r+  r)  r*  r-  r(  )g      9@r   g333333?r  Frq   F)r/  r)   r(  r'   r0  r)   r1  r)   r2  r)   r3  zLiteral['cos', 'linear']r4  r;   r(   r'   r<   r;   rr   rs   )r   r)   r   r)   r9  r)   rr   r)   rt   )r0   r1   r2   r3   r{   rF   r,  r.  rS   r5   r   r   s   @r7   r    r    c  s    \D  $#)49!k: k: k: 	k:
 !k: k: 2k: k: k: k: 
k: k:Z<<',<38<	<44',4384	4
 r6   r    c                     ^  \ rS rSr% SrS\S'   S\S'   S\S'   S\S'   S\S	'   S\S
'   S\S'   S\S'          S                     SU 4S jjjrSS jrSS jrSS jr	SS jr
SrU =r$ )r!   iN  a  
Set the learning rate according to the cyclic learning rate (CLR) scheduler.
The scheduler regards the process of learning rate adjustment as one cycle after another.
It cycles the learning rate between two boundaries with a constant frequency.
The distance between the two boundaries can be scaled on a per-iteration or per-cycle basis.

It has been proposed in `Cyclic Learning Rates for Training Neural Networks <https://arxiv.org/abs/1506.01186>`_.

According to the paper, the cyclic learning rate schedule has three built-in scale methods:

* "triangular": A basic triangular cycle without any amplitude scaling.
* "triangular2": A basic triangular cycle that reduce initial amplitude by half each cycle.
* "exp_range": A cycle that scales initial amplitude by scale function which is defined as :math:`gamma^{iterations}` .

The initial amplitude is defined as max_learning_rate - base_learning_rate.
Also note that you should update learning rate each step.

Args:
    base_learning_rate (float): Initial learning rate, which is the lower boundary in the cycle. The paper recommends
        that set the base_learning_rate to 1/3 or 1/4 of max_learning_rate.
    max_learning_rate (float): Maximum learning rate in the cycle. It defines the cycle amplitude as above.
        Since there is some scaling operation during process of learning rate adjustment,
        max_learning_rate may not actually be reached.
    step_size_up (int): Number of training steps, which is used to increase learning rate in a cycle.
        The step size of one cycle will be defined by step_size_up + step_size_down. According to the paper, step
        size should be set as at least 3 or 4 times steps in one epoch.
    step_size_down (int, optional): Number of training steps, which is used to decrease learning rate in a cycle.
        If not specified, it's value will initialize to `` step_size_up `` . Default: None
    mode (str, optional): one of 'triangular', 'triangular2' or 'exp_range'.
        If scale_fn is specified, this argument will be ignored. Default: 'triangular'
    exp_gamma (float): Constant in 'exp_range' scaling function: exp_gamma**iterations. Used only when mode = 'exp_range'. Default: 1.0
    scale_fn (function, optional): A custom scaling function, which is used to replace three built-in methods.
        It should only have one argument. For all x >= 0, 0 <= scale_fn(x) <= 1.
        If specified, then 'mode' will be ignored. Default: None
    scale_mode (str, optional): One of 'cycle' or 'iterations'. Defines whether scale_fn is evaluated on cycle
        number or cycle iterations (total iterations since start of training). Default: 'cycle'
    last_epoch (int, optional): The index of last epoch. Can be set to restart training.Default: -1, means initial learning rate.
    verbose: (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` .

Returns:
    ``CyclicLR`` instance to schedule learning rate.

Examples:
    .. code-block:: pycon
        :name: code-example1

        >>> # Example1: train on default dynamic graph mode
        >>> import paddle
        >>> import numpy as np

        >>> # train on default dynamic graph mode
        >>> linear = paddle.nn.Linear(10, 10)
        >>> scheduler = paddle.optimizer.lr.CyclicLR(
        ...     base_learning_rate=0.5,
        ...     max_learning_rate=1.0,
        ...     step_size_up=15,
        ...     step_size_down=5,
        ...     verbose=True,
        ... )
        >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
        >>> for epoch in range(5):
        ...     for batch_id in range(20):
        ...         x = paddle.uniform([10, 10])
        ...         out = linear(x)
        ...         loss = paddle.mean(out)
        ...         loss.backward()
        ...         sgd.step()
        ...         sgd.clear_gradients()
        ...         scheduler.step()  # You should update learning rate each step

    .. code-block:: pycon
        :name: code-example2

        >>> # Example2: train on static graph mode
        >>> import paddle
        >>> import numpy as np
        >>> paddle.enable_static()
        >>> main_prog = paddle.static.Program()
        >>> start_prog = paddle.static.Program()
        >>> with paddle.static.program_guard(main_prog, start_prog):
        ...     x = paddle.static.data(name='x', shape=[None, 4, 5])
        ...     y = paddle.static.data(name='y', shape=[None, 4, 5])
        ...     z = paddle.static.nn.fc(x, 100)
        ...     loss = paddle.mean(z)
        ...     scheduler = paddle.optimizer.lr.CyclicLR(
        ...         base_learning_rate=0.5,
        ...         max_learning_rate=1.0,
        ...         step_size_up=15,
        ...         step_size_down=5,
        ...         verbose=True,
        ...     )
        ...     sgd = paddle.optimizer.SGD(learning_rate=scheduler)
        ...     sgd.minimize(loss)
        >>> exe = paddle.static.Executor()
        >>> exe.run(start_prog)
        >>> for epoch in range(5):
        ...     for batch_id in range(20):
        ...         out = exe.run(
        ...             main_prog,
        ...             feed={
        ...                 'x': np.random.randn(3, 4, 5).astype('float32'),
        ...                 'y': np.random.randn(3, 4, 5).astype('float32'),
        ...             },
        ...             fetch_list=[loss],
        ...         )
        ...         scheduler.step()  # You should update learning rate each step
r)   
cycle_sizestep_up_pctmax_lr	amplitude1Literal['triangular', 'triangular2', 'exp_range']r   r   zCallable[[float], float]scale_fnLiteral['cycle', 'iterations']
scale_modec                <  > [        U[        [        45      (       d  [        S[	        U5       35      eUS:  a  [        SU 35      e[        U[        5      (       d  [        S[	        U5       35      eUS::  a  [        SU 35      eUb@  [        U[        5      (       d  [        S[	        U5       35      eUS::  a  [        SU 35      e[        U[        5      (       d  [        S[	        U5       35      e[        U5      nUb  [        U5      OUnX4-   U l        X0R                  -  U l        [        U5      U l        U R                  U-
  U l	        US	;  a  Uc  [        S
5      eUS;  a  [        S5      eXPl
        X`l        Uc{  U R                  S:X  a  U R                  U l        SU l        O^U R                  S:X  a  U R                  U l        SU l        O5U R                  S:X  a  U R                   U l        SU l        OXpl        Xl        ["        TU ]I  XU
5        g )Nr$  r   z='max_learning_rate' must be a positive integer, but received z5The type of 'step_size_up' must be int, but received z8'step_size_up' must be a positive integer, but received z7The type of 'step_size_down' must be int, but received z:'step_size_down' must be a positive integer, but received z4The type of 'exp_gamma' must be float, but received )
triangulartriangular2	exp_rangeza'mode' is invalid and 'scale_fn' is not specified, make sure one of 'mode' or 'scale_fn' is valid)r   
iterationsz2'scale_mode' must be one of 'cycle' or 'iterationsrO  r   rP  rQ  rR  )r>   r)   r'   r?   r@   rA   rF  rG  rH  rI  r   r   _triangular_scale_fnrK  rM  _triangular2_scale_fn_exp_range_scale_fnr   rF   )rD   base_learning_rater/  step_size_upstep_size_downr   	exp_gammarK  rM  r(   r<   rV   s              r7   rF   CyclicLR.__init__  sN    +eS\::MdSdNeMfg  q OPaObc 
 ,,,G\HZG[\  1J<.Y 
 %nc22MdSaNbMcd  " PQ_P`a 
 )U++FtIFWX  \* ) .! 	 '7'//9-.'99 BB s  44D  	
yyL( $ 9 9")m+ $ : :")k) $ 8 8".$M(O+Ar6   c                    g)Nr   r/   rD   xs     r7   rS  CyclicLR._triangular_scale_fn 	  s    r6   c                    SSUS-
  -  -  $ r8  r/   r\  s     r7   rT  CyclicLR._triangular2_scale_fn#	  s    CAEN##r6   c                     U R                   U-  $ ru   r   r\  s     r7   rU  CyclicLR._exp_range_scale_fn&	  s    zz1}r6   c                R   U R                   nSXR                  -  -   nSXR                  -  -   U-
  nX0R                  ::  a  X0R                  -  nOSU-
  SU R                  -
  -  nU R                  U-  nU R                  XPR                  [        U R                  5      5      -  -   nU$ )NrM   r   )r(   rF  rG  rI  r:   rK  evalrM  )rD   rR  r   pct_per_cyclescale_factorbase_heightlrs          r7   rS   CyclicLR.get_lr)	  s    __
J//11j??::UB,,,(+;+;;L-!d6F6F2FGLnn|3\\K--T__8M*NNN	r6   )rI  rF  r   rH  r   rK  rM  rG  )NrO  r   Nr   rq   F)rV  r)   r/  r)   rW  r'   rX  rw   r   rJ  rY  r)   rK  zCallable[[float], float] | NonerM  rL  r(   r'   r<   r;   rr   rs   )r]  r)   rr   r)   rt   )r0   r1   r2   r3   r{   r4   rF   rS  rT  rU  rS   r5   r   r   s   @r7   r!   r!   N  s    jX M
;;L&&.. &*BN485<ZB!ZB !ZB 	ZB
 #ZB @ZB ZB 2ZB 3ZB ZB ZB 
ZB ZBx$ r6   r!   c                     ^  \ rS rSr% SrS\S'   S\S'   S\S'       S             SU 4S jjjrSS	 jrS
rU =r	$ )r"   i;	  a  
Set the learning rate according to linear scheduler.
The learning rate will be firstly multiplied by start_factor and linearly increase to end learning rate.

Args:
    learning_rate (float): The initial learning rate. It is a python float number.
    total_steps (int): Number of iterations that the learning_rate reaches end learning_rate.
    start_factor (float): Start learning rate is defined by `start_factor * learning_rate` . Default: 1./3.
    end_factor (float) End learning rate is defined by `end_factor * learning_rate`. Default: 1.0.
    last_epoch (int, optional): The index of last epoch. Can be set to restart training.Default: -1, means initial learning rate.
    verbose: (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` .

Returns:
    ``LinearLR`` instance to schedule learning rate.

Examples:
    .. code-block:: pycon
        :name: code-dynamic

        >>> # Example1: train on default dynamic graph mode
        >>> import paddle
        >>> import numpy as np

        >>> # train on default dynamic graph mode
        >>> linear = paddle.nn.Linear(10, 10)
        >>> scheduler = paddle.optimizer.lr.LinearLR(learning_rate=0.5, total_steps=5, verbose=True)
        >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
        >>> for epoch in range(5):
        ...     for batch_id in range(20):
        ...         x = paddle.uniform([10, 10])
        ...         out = linear(x)
        ...         loss = paddle.mean(out)
        ...         loss.backward()
        ...         sgd.step()
        ...         sgd.clear_gradients()
        ...         scheduler.step()

    .. code-block:: pycon
        :name: code-static

        >>> # Example2: train on static graph mode
        >>> import paddle
        >>> import numpy as np
        >>> paddle.enable_static()
        >>> main_prog = paddle.static.Program()
        >>> start_prog = paddle.static.Program()
        >>> with paddle.static.program_guard(main_prog, start_prog):
        ...     x = paddle.static.data(name='x', shape=[None, 4, 5])
        ...     y = paddle.static.data(name='y', shape=[None, 4, 5])
        ...     z = paddle.static.nn.fc(x, 100)
        ...     loss = paddle.mean(z)
        ...     scheduler = paddle.optimizer.lr.LinearLR(learning_rate=0.5, total_steps=5, verbose=True)
        ...     sgd = paddle.optimizer.SGD(learning_rate=scheduler)
        ...     sgd.minimize(loss)
        >>> exe = paddle.static.Executor()
        >>> exe.run(start_prog)
        >>> for epoch in range(5):
        ...     for batch_id in range(20):
        ...         out = exe.run(
        ...             main_prog,
        ...             feed={
        ...                 'x': np.random.randn(3, 4, 5).astype('float32'),
        ...                 'y': np.random.randn(3, 4, 5).astype('float32'),
        ...             },
        ...             fetch_list=[loss],
        ...         )
        ...         scheduler.step()  # You should update learning rate each step
r)   start_factor
end_factorr'   r(  c                   > US:  d  US::  a  [        SU 35      eUS:  d  US:  a  [        SU 35      eUS::  a  [        SU 35      eX0l        X@l        X l        [        TU ]  XU5        g )Nr   r   zF`start_factor` must be greater than 0 and less or equal to 1, but got z=`end_factor` must be greater than 0 and less than 1, but got z.`total_steps` must be greater than 0, but got )rA   rk  rl  r(  r   rF   )rD   rE   r(  rk  rl  r(   r<   rV   s          r7   rF   LinearLR.__init__	  s     #!2XYeXfg  zA~OPZ|\  !@N  )$&G<r6   c                V   U R                   S:X  a  U R                  U R                  -  $ U R                   U R                  :  a  U R                  $ U R                  U R                  -  nU R
                  U R                  -
  nSUXR                   S-
  U-  -   -  -   nU R                  U-  $ )Nr   r   rM   )r(   r:   rk  r(  r*   rl  )rD   r:   
cur_factorr   s       r7   rS   LinearLR.get_lr	  s    ??a<<$"3"333__t///<<&&):)::G4+<+<<J:??Q.*<< F <<&((r6   )rl  rk  r(  )gUUUUUU?r   rq   F)rE   r)   r(  r'   rk  r)   rl  r)   r(   r'   r<   r;   rr   rs   rt   r   r   s   @r7   r"   r"   ;	  s    CJ  &== = 	=
 = = = 
= =<) )r6   r"   c                     ^  \ rS rSr% SrS\S'   S\S'   S\S'   S\S'   S\S	'       S           SU 4S
 jjjrSS jrSSS jjrSr	U =r
$ )r#   i	  aX  
Set the learning rate of each parameter group using a cosine annealing
schedule, where :math:`\eta_{max}` is set to the initial lr, :math:`T_{cur}`
is the number of epochs since the last restart and :math:`T_{i}` is the number
of epochs between two warm restarts in SGDR:

.. math::
    \eta_t = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})\left(1 +
    \cos\left(\frac{T_{cur}}{T_{i}}\pi\right)\right)

When :math:`T_{cur}=T_{i}`, set :math:`\eta_t = \eta_{min}`.
When :math:`T_{cur}=0` after restart, set :math:`\eta_t=\eta_{max}`.

It has been proposed in `SGDR: Stochastic Gradient Descent with Warm Restarts <https://arxiv.org/abs/1608.03983>`_.

Args:
    learning_rate (float): Initial learning rate.
    T_0 (int): Number of iterations for the first restart.
    T_mult (int, optional): A factor increases :math:`T_{i}` after a restart. Default: 1.
    eta_min (float, optional): Minimum learning rate. Default: 0.
    last_epoch (int, optional): The index of last epoch. Default: -1, means initial learning rate.
    verbose (bool, optional): If ``True``, prints a message to stdout for
        each update. Default: ``False``.

Returns:
    ``CosineAnnealingWarmRestarts`` instance to schedule learning rate.

Examples:
    .. code-block:: pycon
        :name: code-example1

        >>> import paddle
        >>> import numpy as np
        >>> # train on default dynamic graph mode
        >>> linear = paddle.nn.Linear(10, 10)
        >>> scheduler = paddle.optimizer.lr.CosineAnnealingWarmRestarts(learning_rate=0.5, T_0=1, T_mult=2, verbose=True)
        >>> adam = paddle.optimizer.Adam(learning_rate=scheduler, parameters=linear.parameters())
        >>> for epoch in range(10):
        ...     for batch_id in range(10):
        ...         x = paddle.uniform([10, 10])
        ...         out = linear(x)
        ...         loss = paddle.mean(out)
        ...         loss.backward()
        ...         adam.step()
        ...         adam.clear_grad()
        ...     scheduler.step(epoch)  # You should update learning rate each epoch

    .. code-block:: pycon
        :name: code-example2

        >>> import paddle
        >>> import numpy as np
        >>> paddle.enable_static()
        >>> main_prog = paddle.static.Program()
        >>> start_prog = paddle.static.Program()
        >>> with paddle.static.program_guard(main_prog, start_prog):
        ...     x = paddle.static.data(name='x', shape=[None, 4, 5])
        ...     y = paddle.static.data(name='y', shape=[None, 4, 5])
        ...     z = paddle.static.nn.fc(x, 100)
        ...     loss = paddle.mean(z)
        ...     scheduler = paddle.optimizer.lr.CosineAnnealingWarmRestarts(learning_rate=0.5, T_0=1, T_mult=2, verbose=True)
        ...     sgd = paddle.optimizer.SGD(learning_rate=scheduler)
        ...     sgd.minimize(loss)
        >>> exe = paddle.static.Executor()
        >>> exe.run(start_prog)
        >>> for epoch in range(10):
        ...     for batch_id in range(10):
        ...         out = exe.run(
        ...             main_prog,
        ...             feed={
        ...                 'x': np.random.randn(3, 4, 5).astype('float32'),
        ...                 'y': np.random.randn(3, 4, 5).astype('float32'),
        ...             },
        ...             fetch_list=[loss],
        ...         )
        ...     scheduler.step(epoch)  # You should update learning rate each epoch
r'   T_0T_iT_multr)   r  T_curc                  > US::  d  [        U[        5      (       d  [        SU 35      eUS:  d  [        U[        5      (       d  [        SU 35      eX l        X l        X0l        X@l        XPl        [        TU ]%  XU5        g )Nr   z'Expected positive integer T_0, but got rM   z&Expected integer T_mult >= 1, but got )
r>   r'   rA   rs  rt  ru  r  rv  r   rF   )rD   rE   rs  ru  r  r(   r<   rV   s          r7   rF   $CosineAnnealingWarmRestarts.__init__
  sy     !8:c3//FseLMMA:Z44EfXNOO
G<r6   c                    U R                   U R                  U R                   -
  S[        R                  " [        R                  U R
                  -  U R                  -  5      -   -  S-  -   $ r  )r  r:   r   r  r  rv  rt  rI   s    r7   rS   "CosineAnnealingWarmRestarts.get_lr
  sW    LL||dll*488DGGdjj0488;<<>	
r6   c           	     $   Uc  U R                   S:  a  SnUcz  U R                   S-   nU R                  S-   U l        U R                  U R                  :  a<  U R                  U R                  -
  U l        U R                  U R                  -  U l        OUS:  a  [	        SU 35      eXR
                  :  a  U R                  S:X  a  XR
                  -  U l        O[        [        R                  " XR
                  -  U R                  S-
  -  S-   U R                  5      5      nXR
                  U R                  U-  S-
  -  U R                  S-
  -  -
  U l        U R
                  U R                  U-  -  U l        OU R
                  U l        Xl        [        R                  " U5      U l         U R                  5       U l        U R                  (       a>  [        SU R                    SU R                  R                   SU R                   S35        gg)	a]  
step should be called after `optimizer.step()` . It will update the learning rate in optimizer.
The new learning rate will take effect on next epoch.

Args:
    epoch (int|None, optional): specify current epoch. Default: None. Auto-increment from last_epoch=-1.

Returns:
    None

Examples:
    Please refer to the example of current LRScheduler.
Nr   rM   z%Expected non-negative epoch, but got rO   rP   rQ   rR   )r(   rv  rt  ru  rA   rs  r'   r   logfloorrS   r*   r<   rU   rV   r0   )rD   rW   ns      r7   rC    CosineAnnealingWarmRestarts.step"
  s    =T__q0E=OOa'EaDJzzTXX%!ZZ$((2
88dkk1qy ;E7C   ;;!#!&!1DJ"XX-qAAE KKA "'T[[!^a5G)Ha* "DJ  $xx$++!*<<DH88"
**U+{{}<<)DNN,C,C+DDZ[_[g[gZhhij r6   )rs  rv  rt  ru  r  r(   r*   )rM   r   rq   F)rE   r)   rs  r'   ru  r'   r  r)   r(   r'   r<   r;   rt   ru   rv   )r0   r1   r2   r3   r{   r4   rF   rS   rC   r5   r   r   s   @r7   r#   r#   	  s    L\ 
H	HKNJ == = 	=
 = = = =(
3 3r6   r#   c           	     V   [        S5      nU c  Sn UR                  U SS/SSS9u  pEU(       a{  UR                  U[        R                  R
                  R                  US-
  SS9S9  UR                  R                  5       R                  S	S
U/0SU/0S[        U5      0S9  SUl        U$ )a  
:api_attr: Static Graph

Create an auto-increase variable. which will be automatically increased
by 1 in every iteration. By default, the first return of this counter is 1,
and the step size is 1.

Args:
    counter_name(str, optional): The counter name. Default '@STEP_COUNTER@'.
    begin(int, optional): The first return value of this counter. Default 1.
    step(int, optional): The step size. Default 1.

Returns:
    Variable: The auto-increased Variable with data type int64.

Examples:
    .. code-block:: python

        >>> import paddle
        >>> paddle.enable_static()
        >>> global_step = paddle.optimizer.lr.autoincreased_step_counter(
        ...     counter_name='@LR_DECAY_COUNTER@', begin=0, step=1)
global_step_counterz@STEP_COUNTER@int64rM   T)namedtypeshapepersistablebelong_to_optimizer)r`   	force_cpu)initializer	incrementXOutrC   )r@   inputsoutputsattrs)r   create_or_get_global_variableset_variable_initializerpaddlennr  ConstantInitializermain_programglobal_block_prepend_opr)   stop_gradient)counter_namebeginrC   helpercounter
is_new_vars         r7   autoincreased_step_counterr  X
  s    0 ./F' >>c  ? G ''		--AAai4 B  	( 	
 	((*66'#WI&5;'	 	7 	
 !%Nr6   c                J    [        SU SS9n[        R                  " US5      nU$ )Nz@LR_DECAY_COUNTER@rM   )r  r  rC   r  )r  r  cast)r  global_steps     r7   _decay_step_counterr  
  s+    ,)QK ++k95Kr6   c                \   [        5       R                  5          [        5       (       a3  [        R                  R
                  R                  XUS9nUsSSS5        $ [        S5      nUS-  nUS-  U-  nX S-  -  [        R                  " XV5      -  nUsSSS5        $ ! , (       d  f       g= f)a  

Noam decay method. The numpy implementation of noam decay as follows.

.. code-block:: python

    >>> import numpy as np
    >>> # set hyper parameters
    >>> base_lr = 0.01
    >>> d_model = 2
    >>> current_steps = 20
    >>> warmup_steps = 200
    >>> # compute
    >>> lr_value = base_lr * np.power(d_model, -0.5) * np.min([
    ...                         np.power(current_steps, -0.5),
    ...                         np.power(warmup_steps, -1.5) * current_steps])

Please reference `attention is all you need <https://arxiv.org/pdf/1706.03762.pdf>`_.

Args:
    d_model(Variable): The dimensionality of input and output of model.
    warmup_steps(Variable): A super parameter.
    learning_rate(Variable|float|int): The initial learning rate. If the type
        is Variable, it's a 0-D Tensor with shape [], the data type can be
        float32 or float64. It also can be set to python int number. Default 1.0

Returns:
    The decayed learning rate.

Examples:
    .. code-block:: python

        >>> import paddle
        >>> warmup_steps = 100
        >>> learning_rate = 0.01
        >>> lr = paddle.optimizer.lr.noam_decay(
        ...                 1/(warmup_steps *(learning_rate ** 2)),
        ...                 warmup_steps,
        ...                 learning_rate)
rE   NrM   r   r   )	r   _lr_schedule_guardr   r  	optimizerrh  r   r  minimum)r~   r   rE   decayr  r   r   lr_values           r7   
noam_decayr  
  s    R 
		2	2	4$$''11] 2 E  
5	4 .a0KT!At#{2A$69MMH 
5	4	4s   9B7B
B+c                   [        5       R                  5          [        5       (       a  [        X5      nUsSSS5        $ [	        5       nXQ-  nU(       a  [
        R                  " U5      nXU-  -  nUsSSS5        $ ! , (       d  f       g= f)a  

Applies exponential decay to the learning rate.

When training a model, it is often recommended to lower the learning rate as the
training progresses. By using this function, the learning rate will be decayed by
'decay_rate' every 'decay_steps' steps.

Decayed learning rate calculates as follows:

.. code-block:: text

    >>> if staircase == True:
    >>>     decayed_learning_rate = learning_rate * decay_rate ^ floor(global_step / decay_steps)
    >>> else:
    >>>     decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)

Args:
    learning_rate(Variable|float): The initial learning rate. It should be a Variable
        or a float
    decay_steps(int): The learning rate decay steps. See the decay computation above.
    decay_rate(float): The learning rate decay rate. See the decay computation above.
    staircase(bool): If True, decay the learning rate at discrete intervals, which
        means the learning rate will be decayed by `decay_rate` every
        `decay_steps`. If False, learning rate will be decayed continuously
        and following the formula above. Default: False

Returns:
    Variable: The decayed learning rate. The data type is float32.

Examples:
    .. code-block:: python

        >>> import paddle

        >>> paddle.enable_static()
        >>> base_lr = 0.1
        >>> lr = paddle.optimizer.lr.exponential_decay(
        ...     learning_rate=base_lr,
        ...     decay_steps=10000,
        ...     decay_rate=0.5,
        ...     staircase=True
        ... )
N)r   r  r   r   r  r  r}  rE   r   
decay_rate	staircaser  r  r   
decayed_lrs           r7   exponential_decayr  
  sq    Z 
		2	2	4$]?E 
5	4
 ./K!/G ,,w/&g*=>J 
5	4	4s   A<3A<<
B
c                J   [        5       R                  5          [        5       (       a  [        X5      nUsSSS5        $ [	        5       nXQ-  nU(       a  [
        R                  " U5      nU [
        R                  " SU-  U-  5      -  nUsSSS5        $ ! , (       d  f       g= f)aF  

Applies natural exponential decay to the initial learning rate.

When training a model, it is often recommended to lower the learning rate as the
training progresses. By using this function, the learning rate will be decayed by
natural exponential power 'decay_rate' every 'decay_steps' steps.

Decayed learning rate calculates as follows:

.. code-block:: text

    >>> if not staircase:
    >>>     decayed_learning_rate = learning_rate * exp(- decay_rate * (global_step / decay_steps))
    >>> else:
    >>>     decayed_learning_rate = learning_rate * exp(- decay_rate * floor(global_step / decay_steps))

Args:
    learning_rate(Variable|float): The initial learning rate. It should be a Variable
        or a float
    decay_steps(int): The learning rate decay steps. See the decay computation above.
    decay_rate(float): The learning rate decay rate. See the decay computation above.
    staircase(bool): If True, decay the learning rate at discrete intervals, which
        means the learning rate will be decayed by natural exponential power
        `decay_rate` every `decay_steps`. If False, learning rate will be
        decayed continuously and following the formula above. Default: False

Returns:
    The decayed learning rate. The data type is float32.

Examples:
    .. code-block:: python

        >>> import paddle

        >>> paddle.enable_static()
        >>> base_lr = 0.1
        >>> lr = paddle.optimizer.lr.natural_exp_decay(
        ...     learning_rate=base_lr,
        ...     decay_steps=10000,
        ...     decay_rate=0.5,
        ...     staircase=True
        ... )
Nrq   )r   r  r   r   r  r  r}  r   r  s           r7   natural_exp_decayr  
  s    Z 
		2	2	4#M>E 
5	4
 ./K!/G ,,w/&BOg4M)NNJ 
5	4	4s   BAB
B"c                    [        5       R                  5          [        5       (       a  [        X5      nUsSSS5        $ [	        5       nXQ-  nU(       a  [
        R                  " U5      nU SX&-  -   -  nUsSSS5        $ ! , (       d  f       g= f)a  
Applies inverse time decay to the initial learning rate.

When training a model, it is often recommended to lower the learning rate as the
training progresses. By using this function, an inverse decay function will be
applied to the initial learning rate.

Decayed learning rate calculates as follows:

.. code-block:: text

    >>> if staircase == True:
    >>>     decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step))
    >>> else:
    >>>     decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / decay_step)

Args:
    learning_rate(Variable|float): The initial learning rate. It should be a Variable
        or a float
    decay_steps(int): The learning rate decay steps. See the decay computation above.
    decay_rate(float): The learning rate decay rate. See the decay computation above.
    staircase(bool): If True, decay the learning rate at discrete intervals, which
        means the learning rate will be decayed by `decay_rate` times
        every `decay_steps`. If False, learning rate will be decayed
        continuously and following the formula above. Default: False

Returns:
    Variable: The decayed learning rate. The data type is float32.

Examples:
    .. code-block:: python

        >>> import paddle
        >>> paddle.enable_static()
        >>> base_lr = 0.1
        >>> lr = paddle.optimizer.lr.inverse_time_decay(
        ...     learning_rate=base_lr,
        ...     decay_steps=10000,
        ...     decay_rate=0.5,
        ...     staircase=True
        ... )
NrM   )r   r  r   r   r  r  r}  r  s           r7   inverse_time_decayr  F  sv    V 
		2	2	4$]?E 
5	4
 ./K!/G ,,w/&!j.B*BCJ 
5	4	4s   A?6A??
Bc                  ^^ [        5       R                  5          [        5       (       a  [        XX#U5      nUsSSS5        $ [	        5       nU(       a  [
        R                  " Xa-  5      m[
        R                  R                  S/SSS9n[
        R                  R                  S/SSS9m[
        R                  R                  R                  Xg:H  U4S jU4S j5      n[
        R                  " UTS	9  UT-  nO=[
        R                  R                  S/S[        U5      S9n	[
        R                  " XiS
9nX-
  SXa-  -
  U-  -  U-   n
U
sSSS5        $ ! , (       d  f       g= f)aN  
Applies polynomial decay to the initial learning rate.

.. code-block:: text

    if cycle:
        decay_steps = decay_steps * ceil(global_step / decay_steps)
    else:
        global_step = min(global_step, decay_steps)
        decayed_learning_rate = (learning_rate - end_learning_rate) *
                (1 - global_step / decay_steps) ^ power + end_learning_rate

Args:
    learning_rate(Variable|float32): A scalar float32 value or a Variable. This
        will be the initial learning rate during training.
    decay_steps(int32): A Python `int32` number.
    end_learning_rate(float): A Python `float` number.
    power(float): A Python `float` number.
    cycle(bool): If set true, decay the learning rate every decay_steps.

Returns:
    Variable: The decayed learning rate

Examples:
    .. code-block:: python

        >>> import paddle
        >>> start_lr = 0.01
        >>> total_step = 5000
        >>> end_lr = 0
        >>> lr = paddle.optimizer.lr.polynomial_decay(
        ...     start_lr,
        ...     total_step,
        ...     end_lr,
        ...     power=1
        ... )
NrM   r  r   r  r  r`   r   c                    > T $ ru   r/   )one_vars   r7   <lambda>"polynomial_decay.<locals>.<lambda>  s    Wr6   c                    > T $ ru   r/   )r   s   r7   r  r    s    gr6   )output)r]  y)r   r  r   r   r  r  r   tensorfill_constantstaticr  condassignr)   r  )rE   r   r1  r   r   r  r  zero_vardiv_valdecay_steps_varr  r   r  s              @@r7   polynomial_decayr    sO   P 
		2	2	4#,=eE  
5	4 ./K ++k&?@!==66#Yc 7  !--55#Yc 6  !--**//+_o gg6)G3"(--"="=#YeK6H #> # %nn{N';[..58!"J C 
5	4	4s   ED	E
E$c                   [        5       R                  5          [        U5      [        U 5      -
  S:w  a  [        S5      e[	        5       (       a  [        X5      nUsSSS5        $ [        5       n[        R                  R                  S/SSSSS9n[        R                  R                  R                  R                  5        n[        [        U 5      5       Hw  n[        R                  R                  S/S[!        X   5      SS	9nUR#                  X7:  5         [        R                  R                  S/S[!        X   5      US
9  SSS5        My     UR%                  5          [        R                  R                  S/S[!        U[        U5      S-
     5      US
9  SSS5        SSS5        UsSSS5        $ ! , (       d  f       M  = f! , (       d  f       N3= f! , (       d  f       N<= f! , (       d  f       g= f)a  
Applies piecewise decay to the initial learning rate.

The algorithm can be described as the code below.

.. code-block:: text

    boundaries = [10000, 20000]
    values = [1.0, 0.5, 0.1]
    if step < 10000:
        learning_rate = 1.0
    elif 10000 <= step < 20000:
        learning_rate = 0.5
    else:
        learning_rate = 0.1

Args:
    boundaries: A list of steps numbers.
    values: A list of learning rate values that will be picked during
        different step boundaries.

Returns:
    The decayed learning rate.

Examples:
    .. code-block:: python

        >>> import paddle
        >>> paddle.enable_static()
        >>> boundaries = [10000, 20000]
        >>> values = [1.0, 0.5, 0.1]
        >>> optimizer = paddle.optimizer.Momentum(
        ...     momentum=0.9,
        ...     learning_rate=paddle.optimizer.lr.PiecewiseDecay(boundaries, values),
        ...     weight_decay=paddle.regularizer.L2Decay(1e-4)
        ... )
rM   z)len(values) - len(boundaries) should be 1Nr   r  TrE   r  r`   r  r  r  )r  r  r`   r  )r  r  r`   out)r   r  rg   rA   r   r   r  r  r  create_global_varr  control_flowSwitchr   r  r  r)   casedefault)r   r   r  r  rh  switchr   boundary_vals           r7   piecewise_decayr    s   L 
		2	2	4v;Z(A-HII":6E 
5	4 ./K00c $ 1 B !!..5576s:/A#)==#>#> c'#JM2"&	 $? $L  [%?@33#$#"+"'	"2 "	 4  A@ 0 ^^%MM// c'#F3v;?$;<	 0  & 8, O 
5	42 A@ &% 87# 
5	4s\   AG,%AG,AG-F8	G#:G
G%	G,8
GG

GG
G)	%G,,
G:c                   [        U S[        [        4S5        [        5       R	                  5          [        5       (       a  [        X5      nUsSSS5        $ [        5       n[        R                  " XA-  5      nU S-  [        R                  " U[        R                  -  U-  5      S-   -  nUsSSS5        $ ! , (       d  f       g= f)a  

Applies cosine decay to the learning rate.

when training a model, it is often recommended to lower the learning rate as the
training progresses. By using this function, the learning rate will be decayed by
following cosine decay strategy.

.. math::

    decayed\_lr = learning\_rate * 0.5 * (math.cos * (epoch * \\frac{math.pi}{epochs} ) + 1)

Args:
    learning_rate(Variable|float): The initial learning rate.
    step_each_epoch(int): the number of steps in an epoch.
    epochs(int): the number of epochs.

Returns:
    Variable: The decayed learning rate.

Examples:
    .. code-block:: python

        >>> import paddle
        >>> base_lr = 0.1
        >>> lr = paddle.optimizer.lr.cosine_decay(
        >>> learning_rate = base_lr, step_each_epoch=10000, epochs=120)
rE   cosine_decayNr%  rM   )r   r)   r   r   r  r   r   r  r  r}  r  r   r  )rE   step_each_epochepochsr  r  	cur_epochr  s          r7   r  r    s    : (9> 
		2	2	4(?E 
5	4
 ./K[%BCI::i$''1F:;a?A 
  
5	4	4s   B7AB77
Cc           	       ^ ^^^^ Sn[        T [        5      (       a  T R                  n[        U5      [        T5      -
  m[	        5       R                  5          [        5       (       a  [        T TTU5      nUsSSS5        $ [        R                  R                  S/SUSSS9n[        5       m[        T [        5      (       d)  [        R                  R                  S/U[        T 5      S9m [        R                  R                  R                  TT:  UUUU4S	 j4/U 4S
 jS9n[        R                   " Xe5        UsSSS5        $ ! , (       d  f       g= f)a  

This operator use the linear learning rate warm up strategy to adjust the learning rate preliminarily before the normal learning rate scheduling.
For more information, please refer to `Bag of Tricks for Image Classification with Convolutional Neural Networks <https://arxiv.org/abs/1812.01187>`_

When global_step < warmup_steps, learning rate is updated as:

.. code-block:: text

        linear_step = end_lr - start_lr
        lr = start_lr + linear_step * (global_step / warmup_steps)

where start_lr is the initial learning rate, and end_lr is the final learning rate;

When global_step >= warmup_steps, learning rate is updated as:

.. code-block:: text

    lr = learning_rate

where lr is the learning_rate after warm-up.

Args:
    learning_rate (Variable|float): Learning_rate after warm-up, it could be 1D-Tensor or single value with the data type of float32.
    warmup_steps (int): Steps for warm up.
    start_lr (float): Initial learning rate of warm up.
    end_lr (float): Final learning rate of warm up.

Returns:
    Variable: Warm-up learning rate with the same data type as learning_rate.

Examples:

    .. code-block:: python

        >>> import paddle
        >>> paddle.enable_static()
        >>> boundaries = [100, 200]
        >>> lr_steps = [0.1, 0.01, 0.001]
        >>> learning_rate = paddle.optimizer.lr.piecewise_decay(boundaries, lr_steps) # case1, 1D-Tensor
        >>> # learning_rate = 0.1  # case2, single-value
        >>> warmup_steps = 50
        >>> start_lr = 0.1
        >>> end_lr = 1. / 3.
        >>> decayed_lr = paddle.optimizer.lr.linear_lr_warmup(
        ...     learning_rate,
        ...     warmup_steps,
        ...     start_lr,
        ...     end_lr
        ... )
        >>> place = paddle.CPUPlace()
        >>> exe = paddle.static.Executor(place)
        >>> exe.run(paddle.static.default_startup_program())
        >>> out, = exe.run(fetch_list=[decayed_lr.name])
        >>> print(out)
        [0.1]
r  NrM   r   Tlearning_rate_warmupr  r  c                 ,   > TTT [        T5      -  -  -   $ ru   )r)   )r  linear_stepr   r   s   r7   r  "linear_lr_warmup.<locals>.<lambda>  s    %u\7J)JK!Lr6   c                    > T $ ru   r/   r  s   r7   r  r    s    r6   )pred_fn_pairsr  )r>   r   r  r)   r   r  r   r   r  r  r  r  r  r  r  r  r  )	rE   r   r   r   r  rh  lr_valr  r  s	   ```    @@r7   linear_lr_warmupr  N  s/   t E-**##-%/1K			2	2	4m\8VLB 
5	4
 00c + 1 B ./KmX66 & ; ;#U%2F !< ! ]]%%** $l2L . + 	F MM&%; 
5	4	4s   EB:E
E)NrM   rM   )r   )r   )F)r   r   F);
__future__r   r   rh   typingr   r   r   r   r   r  numpy.typingnpttyping_extensionsr	   r  r
   paddle.baser   paddle.base.data_feederr   paddle.base.frameworkr   r   r   paddle.base.layer_helperr   collections.abcr   __all__r%   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r  r  r  r  r  r  r  r  r  r  r/   r6   r7   <module>r     s   #   C C   )    . 
 1(,%9 %\" \"~h? h?Vo1[ o1d[Jk [J|[A{ [A|Ik IX^&; ^&B^<{ ^<BwC[ wCtq. q.hc>+ c>Lk3k k3\I
; I
XD+ DNh hVj{ jZs){ s)ld+ dN1h6r9x9x8x LQIXM`.b\r6   