
    ϑi%                     ^    S SK r S SKJrJrJr  S SKJr  S SKJrJ	r	  S SK
Jr   " S S\5      rg)    N)_C_ops_legacy_C_opspir)	framework)in_dynamic_modein_pir_mode)	Optimizerc                   T   ^  \ rS rSrSrSr          SU 4S jjrS rS rSr	U =r
$ )	LarsMomentumOptimizer   a  
Momentum optimizer with LARS support

The update equations are as follows:

.. math::

    & local\_learning\_rate = learning\_rate * lars\_coeff * \\
      \\frac{||param||}{||gradient|| + lars\_weight\_decay * ||param||}

    & velocity = mu * velocity + local\_learning\_rate * (gradient + lars\_weight\_decay * param + epsilon)

    & param = param - velocity

Parameters:
    learning_rate (float|Variable): The learning rate used to update parameters. \
        Can be a float value or a Variable with one float value as data element. \
        momentum (float): momentum factor
    lars_coeff (float): Defines how much we trust the layer to change its weights.
    lars_weight_decay (float): Weight decay coefficient for decaying using LARS.
    parameter_list (Iterable, optional):  Iterable of ``Variable`` names to update to minimize ``loss``. \
        This parameter is required in dygraph mode. \
        The default value is None in static graph mode, at this time all parameters will be updated.
    regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \
         :ref:`api_paddle_regularizer_L1Decay` , :ref:`api_paddle_regularizer_L2Decay` . If a parameter has set \
        regularizer using :ref:`api_paddle_ParamAttr` already, the regularization setting here in optimizer will be \
        ignored for this parameter. Otherwise, the regularization setting here in optimizer will take effect.  \
        Default None, meaning there is no regularization.
    grad_clip (GradientClipBase, optional): Gradient clipping strategy, it's an instance of
        some derived class of ``GradientClipBase`` . There are three clipping strategies
        ( :ref:`api_paddle_nn_ClipGradByGlobalNorm` , :ref:`api_paddle_nn_ClipGradByNorm` ,
        :ref:`api_paddle_nn_ClipGradByValue` ). Default None, meaning there is no gradient clipping.
    name (str, optional): This parameter is used by developers to print debugging information. \
        For details, please refer to :ref:`api_guide_Name`. Default is None.
    exclude_from_weight_decay (list[str], optional): Name string of layers which will be exclude from lars weight decay. Default is None.
    epsilon (float, optional): Epsilon to avoid Division by Zero when calculate local lr. Default is 0.
    multi_precision (bool, optional): Whether to use multi-precision during weight updating.
    rescale_grad (float, optional): Multiply the gradient with `rescale_grad` \
        before updating. Often choose to be `1.0/batch_size`.

Examples:
    .. code-block:: python

        >>> import paddle
        >>> import numpy as np

        >>> paddle.enable_static()
        >>> np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
        >>> inp = paddle.static.data(
        ...     name="inp", shape=[2, 2], dtype='float32')
        >>> out = paddle.static.nn.fc(inp, size=3)
        >>> out = paddle.sum(out)
        >>> optimizer = paddle.incubate.optimizer.LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9)
        >>> optimizer.minimize(out)

        >>> exe = paddle.static.Executor(paddle.CPUPlace())
        >>> exe.run(paddle.static.default_startup_program())
        >>> exe.run(
        ...     feed={"inp": np_inp},
        ...     fetch_list=[out.name])
velocityc                   > Uc   eUc   e[         TU ]  UUUUUS9  SU l        X l        [	        U5      U l        [	        U5      U l        [	        U
5      U l        U	c  / U l        OXl        Xl	        [	        U5      U l
        0 U l        g )N)learning_rate
parametersweight_decay	grad_clipnamelars_momentum)super__init__type	_momentumfloat_lars_coeff_lars_weight_decay_epsilon_exclude_from_weight_decay_multi_precision_rescale_grad_master_weights)selfr   momentum
lars_coefflars_weight_decayparameter_listregularizationr   r   exclude_from_weight_decayepsilonmulti_precisionrescale_grad	__class__s                g/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/incubate/optimizer/lars_momentum.pyr   LarsMomentumOptimizer.__init__Z   s     (((###'%' 	 	
 $	! ,"'(9":g$,.0D+.G+ /"<0!    c                    [        U[        R                  [        R                  45      (       d  [	        S5      eU H  nU R
                  (       aO  U R                  UR                  5      (       a/  U R                  U5      nU R                  U R                  U5        Mc  U R                  UR                  5      (       a'  U R
                  (       d  [        R                  " S5        U R                  U R                  U5        M     g )Nblock is not instance of Block.zAccumulating with FP16/BF16 in optimizer can lead to poor accuracy or slow convergence.Consider using multi_precision=True option of the Lars optimizer.)
isinstancer   Blockr   	TypeErrorr   _is_dtype_fp16_or_bf16dtype_create_master_weight_add_accumulator_velocity_acc_strwarningswarn)r!   blockr   pmaster_ps        r,   _create_accumulators*LarsMomentumOptimizer._create_accumulators   s    %)//399!=>>=>>A$$)D)DQWW)M)M55a8%%d&<&<hG++AGG44--X !!$"8"8!< r.   c                     [        U[        R                  [        R                  45      (       d  [	        S5      eU R
                  nUS   R                  n[        U R                  5      S:  a  U R                   H  nXT;   d  M
  Sn  O   U R                  U R                  US   5      nU R                  U5      nU R                  =(       a    U R                  US   R                  5      nU(       a  U R                  US   R                     OS n	U R                   U R"                  U/UU R$                  U R&                  S.n
US   US   UUS.nUS   US.nU(       a  XS'   XS	'   [)        5       (       ab  [*        R,                  " US   /US   /U/U/US   /U/S
U R                   SU R"                  SU/SUSU R$                  SU R&                  5      u  pg [/        5       (       az  [        U	[        R0                  5      (       a  U	/n	[2        R4                  " US   /US   /U/U/U	U R                   U R"                  U/U R$                  UU R&                  5      u      ng UR7                  U R8                  UUU
SS9nU$ )Nr0   r   g        )mur#   r$   r)   r(   r*      )ParamGradVelocityLearningRate)ParamOutVelocityOutMasterParamMasterParamOutrA   r#   r$   r)   r(   r*   T)r   inputsoutputsattrsstop_gradient)r1   r   r2   r   r3   r   r   lenr   _get_accumulator_masterr8   _create_param_lrr   r4   r5   r    r   r   r   r   r   r   r   r   Valuer   lars_momentum_	append_opr   )r!   r;   param_and_gradr   
param_namer   velocity_acclrfind_mastermaster_weightrM   rK   rL   tmptmp2_momentum_ops                    r,   _append_optimize_op)LarsMomentumOptimizer._append_optimize_op   s   %)//399!=>>=>>!44#A&++
t../!377%),& 8
 33""N1$5
 "">2++ 
0K0K1##1

    !2!7!78 	 ..**"4!5*}} ..
 $A&"1%$	
  .a0N$1=!(5$%%33"#"#"#  ##$!""%IC( ]]-33!.++"#"#  #$""GAq!   //YY" * K r.   )	r   r   r   r   r    r   r   r   r   )
gMbP?gMb@?NNNNNr   Fg      ?)__name__
__module____qualname____firstlineno____doc__r8   r   r>   r_   __static_attributes____classcell__)r+   s   @r,   r   r      sH    <| #  "&#"J=$_ _r.   r   )r9   paddler   r   r   paddle.baser   paddle.frameworkr   r   paddle.optimizerr	   r    r.   r,   <module>rm      s+     - - ! 'WI Wr.   