
    ёi                        S SK Jr  S SKrS SKJr  S SKJrJr  SSKJ	r	  SSK
Jr  SSKJr  S	S
KJr  \(       a$  S SKJr  S SKJr  S SKJr  S SKJr  S	SKJr  S	SKJr  / r " S S\5      rg)    )annotationsN)TYPE_CHECKING)_C_opspir   )	framework)no_grad)in_dynamic_or_pir_mode   )	Optimizer)Sequence)Tensor)GradientClipBase)WeightDecayRegularizer)LRScheduler)_ParameterConfigc                     ^  \ rS rSr% SrS\S'         S
             SU 4S jjjrS r\S 5       r	S r
S	rU =r$ )SGD(   aD	  
Optimizer of the stochastic gradient descent algorithm.

.. math::

    param\_out = param - learning\_rate * grad

Parameters:
    learning_rate (float|LRScheduler, optional): The learning rate used to update ``Parameter``.
        It can be a float value or a LRScheduler. The default value is 0.001.
    parameters (list|tuple|None, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \
        This parameter is required in dygraph mode. \
        The default value is None in static graph mode, at this time all parameters will be updated.
    weight_decay (int|float|WeightDecayRegularizer|None, optional): The strategy of regularization. \
        It can be a int or float value as coeff of L2 regularization or \
        :ref:`api_paddle_regularizer_L1Decay`, :ref:`api_paddle_regularizer_L2Decay`.
        If a parameter has set regularizer using :ref:`api_paddle_ParamAttr` already, \
        the regularization setting here in optimizer will be ignored for this parameter. \
        Otherwise, the regularization setting here in optimizer will take effect. \
        Default None, meaning there is no regularization.
    grad_clip (GradientClipBase|None, optional): Gradient clipping strategy, it's an instance of
        some derived class of ``GradientClipBase`` . There are three clipping strategies
        ( :ref:`api_paddle_nn_ClipGradByGlobalNorm` , :ref:`api_paddle_nn_ClipGradByNorm` ,
        :ref:`api_paddle_nn_ClipGradByValue` ). Default None, meaning there is no gradient clipping.
    multi_precision (bool, optional): Whether to use multi-precision during weight updating.
    name (str|None, optional): The default value is None. Normally there is no need for user
            to set this property. For more information, please refer to
            :ref:`api_guide_Name` .

Examples:
    .. code-block:: python

        >>> import paddle

        >>> inp = paddle.uniform(min=-0.1, max=0.1, shape=[10, 10], dtype='float32')
        >>> linear = paddle.nn.Linear(10, 10)
        >>> inp = paddle.to_tensor(inp)
        >>> out = linear(inp)
        >>> loss = paddle.mean(out)
        >>> sgd = paddle.optimizer.SGD(
        ...     learning_rate=0.1,
        ...     parameters=linear.parameters(),
        ...     weight_decay=0.01
        ... )
        >>> out.backward()
        >>> sgd.step()
        >>> sgd.clear_grad()

strtypec                l   > Uc  [        S5      e[        TU ]	  UUUUUS9  SU l        XPl        0 U l        g )Nzlearning_rate is not set)learning_rate
parametersweight_decay	grad_clipnamesgd)
ValueErrorsuper__init__r   _multi_precision_master_weights)selfr   r   r   r   multi_precisionr   	__class__s          T/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/optimizer/sgd.pyr!   SGD.__init__]   sO      788'!% 	 	
 	 /!    c                d   [        U[        R                  [        R                  45      (       d   e[        U[        5      (       a  U R                  U5      nU H  nUR                  U R                  ;   a  M  U R                  (       aX  U R                  UR                  5      (       a8  U R                  U5      nU R                  R                  UR                  5        M  U R                  UR                  5      (       d  M  U R                  (       a  M  [        R                  " S5        M     g )NzAccumulating with FP16/BF16 in optimizer can lead to poor accuracy or slow convergence.Consider using multi_precision=True option of the Adam optimizer.)
isinstancer   Blockr   dict_update_param_groupr   _already_create_accumulatorr"   _is_dtype_fp16_or_bf16dtype_create_master_weightaddwarningswarn)r$   blockr   pmaster_ps        r'   _create_accumulatorsSGD._create_accumulatorss   s    %)//399!=>>>>j$''11*=J Avv999$$)D)DQWW)M)M55a80044QVV<++AGG44---X r)   c                H   [        U[        5      (       a  U R                  U5      nU R                  =(       a    U R	                  US   R
                  5      nU(       a  U R                  US   R                     OS nU R                  U5      n[        5       (       a!  [        R                  " US   UUS   UU5        g [        U[        R                  5      (       d   eUS   US   US.nSUS   0nSU0nU(       a  XFS'   XGS'   UR                  U R                  UUUSS	9n	U	$ )
Nr   r   )ParamGradLearningRateParamOutr%   MasterParamMasterParamOutT)r   inputsoutputsattrsstop_gradient)r+   r-   r.   r"   r0   r1   r#   r   _create_param_lrr
   r   sgd_r   r,   	append_opr   )
r$   r6   param_and_gradfind_mastermaster_weightlrrB   rC   rD   sgd_ops
             r'   _append_optimize_opSGD._append_optimize_op   sF   nd++!55nEN++ 
0K0K1##1

    !2!7!78 	 "">2!##KKq!q! eY__5555 (*&q) "F ">!#45G&4E(5}%,9()__YY" % F Mr)   c                (    UR                  S5      nU$ )Nparams)get)r$   r   s     r'   r.   SGD._update_param_group   s    ^^H-
r)   )r#   r"   r   )gMbP?NNNFN)r   zfloat | LRSchedulerr   z4Sequence[Tensor] | Sequence[_ParameterConfig] | Noner   z%float | WeightDecayRegularizer | Noner   zGradientClipBase | Noner%   boolr   z
str | NonereturnNone)__name__
__module____qualname____firstlineno____doc____annotations__r!   r9   r	   rN   r.   __static_attributes____classcell__)r&   s   @r'   r   r   (   s    0d I .3KO>B-1 %"*" I" <	"
 +" " " 
" ",, 0 0d r)   r   )
__future__r   r4   typingr   paddler   r   baser   base.dygraphr	   base.frameworkr
   	optimizerr   collections.abcr   r   paddle.nn.clipr   paddle.regularizerr   rL   r   r   __all__r    r)   r'   <module>rk      sG    #      " 3  (/9+
V) Vr)   