
    ёi+                        S SK Jr  S SKrS SKJr  S SKrS SKJr  S SKJr  SSK	J
r
  SSKJr  S	S
KJr  \(       a5  S SKJr  S SKJr  S SKJr  S SKJr  S SKJr  S	SKJr  S	SKJr   " S S\5      r/ r " S S\5      rg)    )annotationsN)TYPE_CHECKING)_C_ops)in_dynamic_or_pir_mode   )	framework)no_grad   )	Optimizer)Sequence)NotRequired)Tensor)GradientClipBase)WeightDecayRegularizer)LRScheduler)_ParameterConfigc                  *    \ rS rSr% S\S'   S\S'   Srg)_AdadeltaParameterConfig'   zNotRequired[float]epsilonrho N)__name__
__module____qualname____firstlineno____annotations____static_attributes__r       Y/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/optimizer/adadelta.pyr   r   '   s    ##r   r   c                     ^  \ rS rSr% SrS\S'   SrSr       S               SU 4S jjjrS r	S	 r
S
 rSrU =r$ )Adadelta/   av  
**Notes: This API does not support sparse parameter optimization.**

Adadelta Optimizer. Please refer to this for details:
`ADADELTA: AN ADAPTIVE LEARNING RATE METHOD <https://arxiv.org/abs/1212.5701>`_.

The update is done as follows:

.. math::

    E(g_t^2) &= \rho * E(g_{t-1}^2) + (1-\rho) * g^2

    learning\_rate &= \sqrt{ ( E(dx_{t-1}^2) + \epsilon ) / ( E(g_t^2) + \epsilon ) }

    E(dx_t^2) &= \rho * E(dx_{t-1}^2) + (1-\rho) * (-g*learning\_rate)^2

Args:
    learning_rate (float|Tensor|LRScheduler, optional): The learning rate used to update ``Parameter``.
        It can be a float value, a ``Tensor`` with a float type or a LearningRateDecay. The default value is 0.001.
    epsilon (float): a small float number for numeric stability. Default 1.0e-6.
    rho (float): a floating point value indicating the decay rate. Default 0.95.
    parameters (list|tuple|None, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \
        This parameter is required in dygraph mode. And you can specify different options for \
        different parameter groups such as the learning rate, weight decay, etc, \
        then the parameters are list of dict. Note that the learning_rate in parameter groups \
        represents the scale of base learning_rate. \
        The default value is None in static graph mode, at this time all parameters will be updated.
    weight_decay (int|float|WeightDecayRegularizer|None, optional): The strategy of regularization. \
        It can be a int or float value as coeff of L2 regularization or \
        :ref:`api_paddle_regularizer_L1Decay`, :ref:`api_paddle_regularizer_L2Decay`.
        If a parameter has set regularizer using :ref:`api_paddle_ParamAttr` already, \
        the regularization setting here in optimizer will be ignored for this parameter. \
        Otherwise, the regularization setting here in optimizer will take effect. \
        Default None, meaning there is no regularization.
    grad_clip (GradientClipBase|None, optional): Gradient clipping strategy, it's an instance of
        some derived class of ``GradientClipBase`` . There are three clipping strategies
        ( :ref:`api_paddle_nn_ClipGradByGlobalNorm` , :ref:`api_paddle_nn_ClipGradByNorm` ,
        :ref:`api_paddle_nn_ClipGradByValue` ). Default None, meaning there is no gradient clipping.
    name (str|None, optional): The default value is None. Normally there is no need for user
            to set this property. For more information, please refer to
            :ref:`api_guide_Name` .

Examples:
    .. code-block:: python

        >>> import paddle

        >>> inp = paddle.uniform([10, 10], dtype="float32", min=-0.1, max=0.1)
        >>> linear = paddle.nn.Linear(10, 10)
        >>> out = linear(inp)
        >>> loss = paddle.mean(out)
        >>> beta1 = paddle.to_tensor([0.9], dtype="float32")
        >>> beta2 = paddle.to_tensor([0.99], dtype="float32")
        >>> adadelta = paddle.optimizer.Adadelta(learning_rate=0.1, parameters=linear.parameters(), weight_decay=0.01)
        >>> back = out.backward()
        >>> adadelta.step()
        >>> adadelta.clear_grad()

        >>> # Note that the learning_rate of linear_2 is 0.01.
        >>> linear_1 = paddle.nn.Linear(10, 10)
        >>> linear_2 = paddle.nn.Linear(10, 10)
        >>> inp = paddle.uniform(shape=[10, 10], min=-0.1, max=0.1)
        >>> out = linear_1(inp)
        >>> out = linear_2(out)
        >>> loss = paddle.mean(out)
        >>> adadelta = paddle.optimizer.Adadelta(
        ...     learning_rate=0.1,
        ...     parameters=[{  # type: ignore
        ...         'params': linear_1.parameters()
        ...     }, {
        ...         'params': linear_2.parameters(),
        ...         'weight_decay': 0.001,
        ...         'learning_rate': 0.1,
        ...     }],
        ...     weight_decay=0.01)
        >>> out.backward()
        >>> adadelta.step()
        >>> adadelta.clear_grad()

strtype_avg_squared_grad_avg_squared_updatec                   > Uc  [        S5      eUc  [        S5      eUc  [        S5      e[        TU ]	  UUUUUS9  SU l        0 U l        SU l        X l        X0l        UUS.U l        g )Nzlearning_rate is not set.zepsilon is not set.zrho is not set.)learning_rate
parametersweight_decay	grad_clipnameFadadelta)r   r   )	
ValueErrorsuper__init___multi_precision_master_weightsr%   _epsilon_rho_default_dict)	selfr)   r   r   r*   r+   r,   r-   	__class__s	           r    r1   Adadelta.__init__   s      899?233;.//'!% 	 	
 !&!		
r   c                   [        U[        R                  [        R                  R                  45      (       d  [        S5      e[        U[        5      (       a  UR                  S5      nU GHe  nUR                  U R                  ;   a  M   U R                  (       a  U R                  UR                  5      (       ap  U R                  U5      nU R                  U R                  U5        U R                  U R                   U5        U R                  R#                  UR                  5        M  U R                  UR                  5      (       a'  U R                  (       d  [$        R&                  " S5        U R                  U R                  U5        U R                  U R                   U5        U R                  R#                  UR                  5        GMh     g )N)block is not instance of framework.Block.paramszAccumulating with FP16/BF16 in optimizer can lead to poor accuracy or slow convergence.Consider using multi_precision=True option of the Lars optimizer.)
isinstancer   Blockpaddlepir	TypeErrordictgetr-   _already_create_accumulatorr2   _is_dtype_fp16_or_bf16dtype_create_master_weight_add_accumulator_avg_squared_grad_acc_str_avg_squared_update_acc_straddwarningswarn)r7   blockr*   pmaster_ps        r    _create_accumulatorsAdadelta._create_accumulators   sW   %)//6::3C3C!DEEGHHj$''#1JAvv999$$)D)DQWW)M)M55a8%%d&D&DhO%%44h 0044QVV<++AGG44--X !!$"@"@!D!!$"B"BAF,,008+ r   c                   [        U[        5      (       a  U R                  U5      nU R                  U R                  US   5      nU R                  U R
                  US   5      nU R                  =(       a    U R                  US   R                  5      nU(       a  U R                  US   R                     OS n[        5       (       a[  [        5          [        R                  " US   US   UUU R                  U5      UU R                   U R"                  U5	        S S S 5        g [        U[$        R&                  [(        R*                  R&                  45      (       d  [-        S5      eUS   US   UUU R                  U5      S.nUS   UUS.nU(       a  XgS'   XhS'   UR/                  U R0                  UUU R"                  U R                   US.S	S
9n	U	$ ! , (       d  f       g = f)Nr   r
   r;   )ParamGradAvgSquaredGradAvgSquaredUpdateLearningRate)ParamOutAvgSquaredGradOutAvgSquaredUpdateOutMasterParamMasterParamOut)r   r   multi_precisionT)r%   inputsoutputsattrsstop_gradient)r=   rB   _update_param_group_get_accumulator_masterrI   rJ   r2   rE   rF   r3   r-   r   r	   r   	adadelta__create_param_lrr5   r4   r   r>   r?   r@   rA   	append_opr%   )
r7   rN   param_and_gradavg_squared_grad_accavg_squared_update_accfind_mastermaster_weightr_   r`   adadelta_ops
             r    _append_optimize_opAdadelta._append_optimize_op   s   nd++!55nEN#;;**N1,= 
 "&!=!=,,nQ.?"
 ++ 
0K0K1##1

    !2!7!78 	 "##  "1%"1%(*)).9!IIMM
  eioovzz7G7G%HII KLL (*&q)"6$: $ 5 5n EF +1-%9'=G
 (5}%,9()//YY#}}99'2
 # * 
K [  s   AG
G#c                    UR                  SU R                  S   5      U l        UR                  SU R                  S   5      U l        UR                  S5      nU$ )Nr   r   r<   )rC   r6   r4   r5   )r7   r*   s     r    rc   Adadelta._update_param_group  sP    "y$2D2DY2OPNN5$*<*<U*CD	^^H-
r   )r6   r4   r3   r2   r5   r%   )gMbP?gư>gffffff?NNNN)r)   zfloat | Tensor | LRSchedulerr   floatr   rr   r*   z<Sequence[Tensor] | Sequence[_AdadeltaParameterConfig] | Noner+   z%float | WeightDecayRegularizer | Noner,   zGradientClipBase | Noner-   z
str | NonereturnNone)r   r   r   r   __doc__r   rI   rJ   r1   rQ   rn   rc   r   __classcell__)r8   s   @r    r"   r"   /   s    Ob I 3"7 7< >B-1!
3!
 !
 	!
 I!
 <!
 +!
 !
 
!
 !
F9:AF r   r"   )
__future__r   rL   typingr   r?   r   paddle.base.frameworkr   baser   base.dygraphr	   	optimizerr   collections.abcr   typing_extensionsr   r   paddle.nn.clipr   paddle.regularizerr   lrr   r   r   __all__r"   r   r   r    <module>r      s\    #      8  "  (-/9+ #3  
 ]y ]r   