
    ϑi<4                        S SK Jr  S SKJr  S SKrS SKJrJr  S SKJ	r
  S SKJr  S SKJr  S SKJr  S S	KJr  S S
KJr  \(       a  S SKJr  S SKJr  S SKJr  / r " S S\5      rg)    )annotations)TYPE_CHECKINGN)	frameworkunique_name)base)Variable)LayerHelper)in_pir_mode)	Optimizer)create_parameter)Tensor)Operator)Programc                  ,  ^  \ rS rSr% SrS\S'   S\S'   S\S'   S	\S
'   S\S'   Sr   S         SU 4S jjjrU 4S jr\	R                  \R                  SS j5       5       rS rS rS r\R                     S         SS jj5       rSrU =r$ )	LookAhead$   a  
This implements the Lookahead optimizer of the
paper : https://arxiv.org/abs/1907.08610.

Lookahead keeps two sets of params: the fast_params and
the slow_params. inner_optimizer update fast_params every
training step. Lookahead updates the slow_params and fast_params
every k training steps as follows:

.. math::

    slow\_param_t &= slow\_param_{t-1} + \\alpha * (fast\_param_{t-1} - slow\_param_{t-1})

    fast\_param_t &=  slow\_param_t

Args:
    inner_optimizer (Optimizer): The optimizer that update fast params step by step.
    alpha (float, optional): The learning rate of Lookahead. The default value is 0.5.
    k (int, optional): The slow params is updated every k steps. The default value is 5.
    name (str, optional): Normally there is no need for user to set this property.
        For more information, please refer to :ref:`api_guide_Name`.
        The default value is None.

Examples:

    .. code-block:: python

        >>> import numpy as np
        >>> import paddle
        >>> import paddle.nn as nn

        >>> BATCH_SIZE = 16
        >>> BATCH_NUM = 4
        >>> EPOCH_NUM = 4

        >>> IMAGE_SIZE = 784
        >>> CLASS_NUM = 10
        >>> # define a random dataset
        >>> class RandomDataset(paddle.io.Dataset): # type: ignore[type-arg]
        ...     def __init__(self, num_samples):
        ...         self.num_samples = num_samples
        ...     def __getitem__(self, idx):
        ...         image = np.random.random([IMAGE_SIZE]).astype('float32')
        ...         label = np.random.randint(0, CLASS_NUM - 1,
        ...                                 (1, )).astype('int64')
        ...         return image, label
        ...     def __len__(self):
        ...         return self.num_samples

        >>> class LinearNet(nn.Layer):
        ...     def __init__(self):
        ...         super().__init__()
        ...         self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM)
        ...         self.bias = self._linear.bias
        ...     @paddle.jit.to_static
        ...     def forward(self, x):
        ...         return self._linear(x)

        >>> def train(layer, loader, loss_fn, opt):
        ...     for epoch_id in range(EPOCH_NUM):
        ...         for batch_id, (image, label) in enumerate(loader()):
        ...             out = layer(image)
        ...             loss = loss_fn(out, label)
        ...             loss.backward()
        ...             opt.step()
        ...             opt.clear_grad()
        ...             print("Train Epoch {} batch {}: loss = {}".format(
        ...                 epoch_id, batch_id, np.mean(loss.numpy())))
        >>> layer = LinearNet()
        >>> loss_fn = nn.CrossEntropyLoss()
        >>> optimizer = paddle.optimizer.SGD(learning_rate=0.1, parameters=layer.parameters())
        >>> lookahead = paddle.incubate.LookAhead(optimizer, alpha=0.2, k=5)

        >>> # create data loader
        >>> dataset = RandomDataset(BATCH_NUM * BATCH_SIZE)
        >>> loader = paddle.io.DataLoader(
        ...     dataset,
        ...     batch_size=BATCH_SIZE,
        ...     shuffle=True,
        ...     drop_last=True,
        ...     num_workers=2)

        >>> # doctest: +SKIP('The run time is too long to pass the CI check.')
        >>> train(layer, loader, loss_fn, lookahead)

r   inner_optimizerfloatalphaintkstrtyper	   helperslowc                  > Uc   S5       eSUs=::  a  S::  d   S5       e   S5       e[        U[        5      (       a  US:  d   S5       eXl        U R                  R                  c;  [        R
                  R                  5       R                  5       R                  5       nOU R                  R                  n[        TU ])  UUS S US9  X l        X0l        SU l        [        U R                  R                   5      U l        S U l        S U l        g )	Nzinner optimizer can not be None              ?zBalpha should be larger or equal to 0.0, and less or equal than 1.0r   zk should be a positive integer)learning_rate
parametersweight_decay	grad_clipname	lookahead)
isinstancer   r   _parameter_listpaddlestaticdefault_main_programglobal_blockall_parameterssuper__init__r   r   r   r	   	__class____name__r   _global_step_var_k_var)selfr   r   r   r#   r    r.   s         c/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/incubate/optimizer/lookahead.pyr-   LookAhead.__init__   s    *M,MM*e"s" 	
P	
" 	
P	
" !S!!a!eM-MM+.//7224!  --==J! 	 	
 
	!$.."9"9: $    c                Z   > [         TU ]  X5        U R                  R                  X5        g N)r,   _set_auxiliary_varr   )r2   keyvalr.   s      r3   r8   LookAhead._set_auxiliary_var   s$    "3,//9r5   c                4   U R                   R                  5         U R                  5         / nU R                   HK  nUR                  (       d  M  UR                  5       c  M)  UR                  5       nUR                  X#45        MM     U R                  SSUS9  g)a+  
Execute the optimizer and update parameters once.

Returns:
    None

Examples:

    .. code-block:: python

        >>> import paddle
        >>> inp = paddle.rand([1,10], dtype="float32")
        >>> linear = paddle.nn.Linear(10, 1)
        >>> out = linear(inp)
        >>> loss = paddle.mean(out)
        >>> sgd = paddle.optimizer.SGD(learning_rate=0.1,parameters=linear.parameters())
        >>> lookahead = paddle.incubate.LookAhead(sgd, alpha=0.2, k=5)
        >>> loss.backward()
        >>> lookahead.step()
        >>> lookahead.clear_grad()

N)lossstartup_programparams_grads)r   step_increment_global_varr&   	trainable
_grad_ivarappend_apply_optimize)r2   r?   paramgrad_vars       r3   r@   LookAhead.step   s    2 	!!#""$))E??!- ++-##U$56 * 	t, 	 	
r5   c                    [        U[        R                  [        R                  R                  45      (       d   eU H  nU R                  U R                  U5        M!     g r7   )r%   r   Blockr'   pir_add_accumulator	_slow_str)r2   blockr    ps       r3   _create_accumulatorsLookAhead._create_accumulators   sF    %)//6::3C3C!DEEEEA!!$..!4 r5   c                $   [        5       (       a  U R                  cM  [        SS/[        R                  " S5      S[
        R                  R                  R                  SSS9S9U l        [
        R                  " U R                  S5      U l        g U R                  c;  [
        R                  R                  [        R                  " S5      S/S	SS
S9U l        U R                  R                  SSU R                  /0SU R                  /0SS0S9  g )Nint32   lookahead_stepFr   value	force_cpudtypeshaper#   rB   initializerr   r   Tr#   r[   rW   rZ   persistable	incrementXOutr@   )r   inputsoutputsattrs)r
   r0   r   r   generater'   nnr\   ConstantInitializerr_   r(   create_global_varr   	append_op)r2   s    r3   rA   LookAhead._increment_global_var   s   ==$$,(8!#$--.>?# &		 5 5 I I!U !J !)% %+$4$4T5J5JC$PD!$$,(.(G(G$--.>?#! $ )H )% KK!! d3345!6!6 78sm	 " r5   c                &   [         R                  " S/SSS9n[         R                  " S/SSS9n[        5       (       a\  [	        SS/[
        R                  " S5      S[         R                  R                  R                  [        U R                  5      SS9S	9nO@[         R                  R                  [
        R                  " S5      S/U R                  SS
S9n[         R                  " U R                  U5      n[         R                   " U R                  U5      n[         R"                  " USS9n[         R                   " Xd5      n[         R"                  " USS9nU R%                  U R&                  US   5      n	XrS   -  SU-
  U	-  -   n
[         R(                  " X5        U R*                  US   -  SU R*                  -
  U	-  -   n
X-  SU-
  US   -  -   n[         R(                  " XS   5        X-  SU-
  U	-  -   n[         R(                  " X5        g )NrT   rS   lookahead_ones)r[   rZ   r#   lookahead_zeroslookahead_kFrV   rY   Tr]   float32)rZ   r   r   )r'   oneszerosr
   r   r   re   rf   r\   rg   r   r   r(   rh   	remainderr0   equalcast_get_accumulatorrM   assignr   )r2   rN   param_and_gradone_varzero_vark_varmodcond_1cond_2slow_vartmp_var	tmp_var_1s               r3   _append_optimize_opLookAhead._append_optimize_op   s   ++QCw=MN<<#W+<
 ==$c ))-8"II11EE-5 F E MM33 ))-8cff  4 E t44e<d33W=V95c,V95((9JK!,,F
h/FFg(**~a00C$**4D3PP$F
nQ6G'GG	i!23$F
h'>>	i*r5   c                    [        U[        [        R                  R                  45      (       d   S5       eU R
                  R                  UUUUS9u  pVU R                  5         U R                  XUS9nXV4$ )a  
Add operations to minimize ``loss`` by updating ``parameters``.

Args:
    loss (Tensor): A ``Tensor`` containing the value to minimize.
    startup_program (Program, optional): :ref:`api_paddle_static_Program` for
        initializing parameters in ``parameters``. The default value
        is None, at this time :ref:`api_paddle_static_default_startup_program` will be used.
    parameters (list, optional): List of ``Tensor`` or ``Tensor.name`` to update
        to minimize ``loss``. The default value is None, at this time all parameters
        will be updated.
    no_grad_set (set, optional): Set of ``Tensor``  or ``Tensor.name`` that don't need
        to be updated. The default value is None.

Returns:
    tuple: tuple (optimize_ops, params_grads), A list of operators appended
    by minimize and a list of (param, grad) tensor pairs, param is
    ``Parameter``, grad is the gradient value corresponding to the parameter.
    In static graph mode, the returned tuple can be passed to ``fetch_list`` in ``Executor.run()`` to
    indicate program pruning. If so, the program will be pruned by ``feed`` and
    ``fetch_list`` before run, see details in ``Executor``.

Examples:

    .. code-block:: python

        >>> import paddle

        >>> inp = paddle.rand([1, 10], dtype="float32")
        >>> linear = paddle.nn.Linear(10, 1)
        >>> out = linear(inp)
        >>> loss = paddle.mean(out)
        >>> sgd = paddle.optimizer.SGD(learning_rate=0.1,parameters=linear.parameters())
        >>> lookahead = paddle.incubate.LookAhead(sgd, alpha=0.2, k=5)
        >>> loss.backward()
        >>> lookahead.minimize(loss)
        >>> lookahead.clear_grad()

zThe loss should be an Tensor.)r>   r    no_grad_set)r>   r?   )	r%   r   r'   rK   Valuer   minimizerA   rE   )r2   r=   r>   r    r   optimize_opsr?   _s           r3   r   LookAhead.minimize&  s    ^ $6::+;+; <== 	
+	
=
 &*%9%9%B%B+!#	 &C &
" 	""$   ! 
 ))r5   )r0   r1   r   r   r   r   r   )g      ?   N)
r   r   r   r   r   r   r#   z
str | NonereturnNone)r   r   )NNN)
r=   r   r>   zProgram | Noner    zlist[Tensor] | list[str] | Noner   zset[Tensor] | set[str] | Noner   z2tuple[list[Operator], list[tuple[Tensor, Tensor]]])r/   
__module____qualname____firstlineno____doc____annotations__rM   r-   r8   r   dygraph_onlyimperative_baseno_gradr@   rP   rA   r   r   __static_attributes____classcell__)r.   s   @r3   r   r   $   s   Un L
F
II
 $"$ $ 	$
 $ 
$ $L: $
  $
L5<*+X  +/6:59@*@* (@* 4	@*
 3@* 
<@* @*r5   r   )
__future__r   typingr   r'   paddle.baser   r   paddle.base.dygraphr   r   paddle.base.frameworkr   paddle.base.layer_helperr	   paddle.frameworkr
   paddle.optimizerr   paddle.pir.corer   r   r   paddle.staticr   __all__r    r5   r3   <module>r      sF    #    . 7 * 0 ( & ,.% C*	 C*r5   