
    ͑i                    :   S SK Jr  S SKrS SKJr  S SKJr  S SKJrJ	r	J
r
  S SKrS SKrS SKJrJr  S SKJrJr  S SKJr  S S	KJrJrJr  S S
KJr  SSKJr  \(       a  S SKJr  S SKJr  S SK J!r!   " S S\
5      r" " S S\5      r#S r$ " S S5      r% " S S\%5      r&g)    )annotationsN)defaultdict)Enum)TYPE_CHECKINGAny	TypedDict)_C_ops_legacy_C_ops)coreunique_name)
check_type)Operator_dygraph_tracerin_pir_mode)in_dynamic_mode   )amp_global_state)Tensor)OptimizerWithMixedPrecision)	Optimizerc                  f    \ rS rSr% S\S'   S\S'   S\S'   S\S'   S\S	'   S\S
'   S\S'   S\S'   Srg)_ScaleStateDict)   r   scalefloat
incr_ratio
decr_ratiointincr_every_n_stepsdecr_every_n_nan_or_inf
incr_count
decr_countbooluse_dynamic_loss_scaling N)__name__
__module____qualname____firstlineno____annotations____static_attributes__r%       V/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/amp/grad_scaler.pyr   r   )   s/    !$$"&&r,   r   c                       \ rS rSrSrSrSrSrg)OptimizerState4   r   r      r%   N)r&   r'   r(   r)   INITUNSCALEDSTEPPEDr+   r%   r,   r-   r/   r/   4   s    DHGr,   r/   c                 &    S[         R                  0$ )Nstate)r/   r2   r%   r,   r-   _refresh_optimizer_stater7   :   s    ^(())r,   c                     \ rS rSrSr       S               SS jjrSS jr        SS jrS rS r	SS jr
SS	 jrSS
 jrSS jrSS jrSS jrSS jrS S jrS!S jrS"S jrS!S jr    S#S jrS$S jrS%S jrSrg)&	AmpScaler>   a$	  
AmpScaler is used for Auto-Mixed-Precision training/inferring in imperative
mode. It controls the scaling of loss, helps avoiding numerical overflow.
The object of this class has seventeen methods `scale()`, `unscale_()`, `minimize()` and `get`/`set` api of parameters.

`scale()` is used to multiply the loss by a scale ratio.
`unscale_()` is used to unscale the gradients of parameters, multiplies the gradients of parameters by 1/(scale ratio)
`minimize()` is similar as `optimizer.minimize()`, performs parameters updating, and it will update the loss_scaling.

Commonly, it is used together with `amp_guard` to achieve Auto-Mixed-Precision in
imperative mode.

Args:
    enable(bool, optional): Enable loss scaling or not. Default is True.
    init_loss_scaling (float, optional): The initial loss scaling factor. Default is 2**15.
    incr_ratio(float, optional): The multiplier to use when increasing the loss
                    scaling. Default is 2.0.
    decr_ratio(float, optional): The less-than-one-multiplier to use when decreasing
                    the loss scaling. Default is 0.5.
    incr_every_n_steps(int, optional): Increases loss scaling every n consecutive
                            steps with finite gradients. Default is 1000.
    decr_every_n_nan_or_inf(int, optional): Decreases loss scaling every n
                                accumulated steps with nan or inf gradients. Default is 2.
    use_dynamic_loss_scaling(bool, optional): Whether to use dynamic loss scaling. If False, fixed loss_scaling is used. If True, the loss scaling is updated dynamically. Default is True.
Returns:
    An AmpScaler object.

Examples:

    .. code-block:: python

        >>> import numpy as np
        >>> import paddle

        >>> data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32')
        >>> model = paddle.nn.Conv2D(3, 2, 3)
        >>> optimizer = paddle.optimizer.SGD(
        ...         learning_rate=0.01, parameters=model.parameters())
        >>> scaler = paddle.amp.AmpScaler(init_loss_scaling=1024)
        >>> data = paddle.to_tensor(data)
        >>> with paddle.amp.amp_guard():
        ...     conv = model(data)
        ...     loss = paddle.mean(conv)
        ...     scaled = scaler.scale(loss)
        ...     scaled.backward()
        ...     scaler.minimize(optimizer, scaled)
c           	     N   [        5       (       a  [        5       nU(       d  [        S5      eU(       a  UR                  R	                  5       (       dd  UR                  R                  5       (       dE  UR                  R                  5       (       d&  [        R                  " SUR                   S35        SnXl	        SU l
        SU l        S U l        U R                  (       Ga  US:  d   S5       eUS:  d   S5       eX l        X0l        X@l        XPl        X`l        SU l        SU l        Xpl
        ['        5       (       at  [(        R*                  R,                  R/                  S	S
/[0        R2                  " S5      [(        R4                  R6                  R9                  U R                  S9S9U l        g [(        R:                  " [<        R>                  " S/5      RA                  [<        RB                  5      5      U l"        [(        R:                  " [<        R>                  " S/5      RA                  [<        RB                  5      5      U l#        [(        R:                  " [<        R>                  " S/5      RA                  [<        RB                  5      5      U l$        [(        R:                  " [<        R>                  " S/5      RA                  [<        RB                  5      5      U l%        [(        R:                  " [<        R>                  " S/5      RA                  [<        RB                  5      5      U l&        [(        R:                  " [<        R>                  " U R                  /5      RA                  [<        RN                  5      5      U l        S U l(        [S        [T        5      U l+        g g )Nz;current_tracer is None, maybe it is not in imperative mode.zWAmpScaler can only be enabled on CUDAPlace, XPUPlace and CustomPlace, current place is z, so it makes no effect.F      ?zThe incr_ratio must be > 1.0.zThe decr_ratio must be < 1.0.r   float32r   loss_scaling)value)dtypeshapenameinitializer),r   r   
ValueError_expected_placeis_gpu_placeis_xpu_placeis_custom_placewarningswarn_enable_use_dynamic_loss_scaling_init_loss_scaling_scale_incr_ratio_decr_ratio_incr_every_n_steps_decr_every_n_nan_or_inf_incr_count_decr_countr   paddlepirr   create_persistable_valuer   generatennrC   ConstantInitializer	to_tensornparrayastypebool_
_found_inf_temp_found_inf_value_false_temp_found_inf_fp16_temp_found_inf_bf16_temp_found_inf_fp32r=   _cache_found_infr   r7   _optimizer_states)	selfenableinit_loss_scalingr   r   r   r    r$   tracers	            r-   __init__AmpScaler.__init__o   s    $&F Q  &&3355))6688))99;;mnt  oE  oE  nF  F^  _ ).&"%<<<#D%DD##D%DD#&7#))'9$,C) D D-E*}}$jjooFF##$--n= &		 5 5 I I"55 !J !	 G  #)"2"2HHaSM((2# 4:3C3CHHaSM((240 -3,<,<HHaSM((2-) -3,<,<HHaSM((2-) -3,<,<HHaSM((2-) %..HHd5567>>rzzJ )-%)45M)N&U r,   c                   [        US[        R                  [        R                  R                  4S5        U R
                  (       aj  [        5       R                  S:w  aR  U R                  (       aA  SU l        SU l        SU l	        [        R                  " S[        5       R                   S35        [        5       (       GaL  UR                  [        R                  R                   :w  a  UR#                  S5      nU R                  (       d  U$ [        R$                  R'                  XR(                  5      nUR+                  5       nUR+                  5       nUR,                  (       a  UR,                  (       a  [        R.                  R0                  R                  R3                  UR,                  R4                  UR,                  R7                  5       UR,                  R9                  5       UR,                  R:                  5      Ul        U$ U R
                  (       a  UR=                  5       (       d  U$ XR(                  -  $ )	a  
Multiplies a Tensor by the scale factor and returns scaled outputs.
If this instance of :class:`AmpScaler` is not enabled, output are returned unmodified.

Args:
    var (Tensor):  The Tensor to scale.
Returns:
    The scaled Tensor or original Tensor.

Examples:

    .. code-block:: python

        >>> import numpy as np
        >>> import paddle

        >>> data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32')
        >>> model = paddle.nn.Conv2D(3, 2, 3)
        >>> optimizer = paddle.optimizer.SGD(
        ...         learning_rate=0.01, parameters=model.parameters())
        >>> scaler = paddle.amp.AmpScaler(init_loss_scaling=1024)
        >>> data = paddle.to_tensor(data)
        >>> with paddle.amp.amp_guard():
        ...     conv = model(data)
        ...     loss = paddle.mean(conv)
        ...     scaled = scaler.scale(loss)
        ...     scaled.backward()
        ...     scaler.minimize(optimizer, scaled)
varzAmpScaler.scale()float16Fr<   z6It is not recommended to use dynamic loss scaling for z&, so GradScaler is disable by default.r=   )r   rU   r   rV   ValuerK   r   	amp_dtyperL   rM   rI   rJ   r   r@   r   DataTypeFLOAT32r^   r	   multiplyrN   get_defining_op	dist_attrbase	libpaddlecreate_op_dist_attributeprocess_meshoperandsresultschunk_id_is_initialized)rg   rn   	scale_outmultiply_op
src_var_ops        r-   r   AmpScaler.scale   s   < 	]]FJJ,,-		
 LL ",,	9.. DL-2D*&)D#MMHIYI[IeIeHf  gM  N ==yyDMM111jj+11
..sKK@I#335K,,.J$$)=)=KK))--FF#--::#--668#--557",,55	 %  ||3#6#6#8#8J[[  r,   c                V   [        5       (       a  [        U[        R                  R                  R
                  R                  5      (       d   eU R                  Ul        U R                  Ul        U R                  Ul
        US   Ul        U R                  (       aR  U R                  Ul        U R                  Ul        U R                  Ul        U R                  Ul        SUl        SUl        UR$                  " U0 UD6$ U R&                  (       d  UR$                  " U0 UD6$ U R(                  [+        U5         nUS   [,        R.                  L a  U R1                  U5        Su  pV[3        US5      (       aG  UR5                  SU R6                  5        UR$                  " U0 UD6u  pVUR9                  S5      U l        O4U R6                  (       a  SU l        OUR$                  " U0 UD6u  pVSU l        U R                  (       a  U R=                  5         [?        [@        5      U l        XV4$ )	a  
This function is similar as `Optimizer.minimize()`, which performs parameters updating.

If the scaled gradients of parameters contains NAN or INF, the parameters updating is skipped.
Otherwise, if `unscale_()` has not been called, it first unscales the scaled gradients of parameters, then updates the parameters.

Finally, the loss scaling ratio is updated.

Args:
    optimizer(Optimizer):  The optimizer used to update parameters.
    args:  Arguments, which will be forward to `Optimizer.minimize()`.
    kwargs: Keyword arguments, which will be forward to `Optimizer.minimize()`.

Examples:

    .. code-block:: python

        >>> import numpy as np
        >>> import paddle

        >>> data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32')
        >>> model = paddle.nn.Conv2D(3, 2, 3)
        >>> optimizer = paddle.optimizer.SGD(
        ...     learning_rate=0.01,
        ...     parameters=model.parameters()
        ... )
        >>> scaler = paddle.amp.AmpScaler(init_loss_scaling=1024)
        >>> data = paddle.to_tensor(data)
        >>> with paddle.amp.amp_guard():
        ...     conv = model(data)
        ...     loss = paddle.mean(conv)
        ...     scaled = scaler.scale(loss)
        ...     scaled.backward()
        ...     scaler.minimize(optimizer, scaled)
r   Nr6   )NN_set_auxiliary_var	found_infTF)!r   
isinstancerU   staticamp	decoratorr   rL   rM   rN   _loss_scaling_scaled_lossrQ   rR   rO   rP   _num_good_steps_num_bad_stepsminimizerK   rf   idr/   r2   _unscalehasattrr   r`   _get_auxiliary_varre   _updater   r7   )rg   	optimizerargskwargsoptimizer_stateoptimize_opsparams_gradss          r-   r   AmpScaler.minimize  s   T ==!!++GG    372P2PI/+/+B+BI(&*kkI#%)!WI"--040H0H	-11 2 )-(8(8	%(,(8(8	%,0	)+/	(%%t6v66||%%t6v6600I? 7#~':'::MM)$%1"9233((dooF)2););T)LV)L&L$-$@$@$MD!(,%-6-?-?-P-P*(-%))LLN!,-E!F))r,   c                   U R                   (       d  gU R                  [        U5         nUS   [        R                  L a  [        S5      eUS   [        R                  L a  [        S5      e[        USS5      (       Ga7  [        UR                  S   [        5      (       Ga  / n/ n/ n/ nUR                   H  nUS    H  nUR                  5       c  M  UR                  UR                  5       5        UR                  5       R                  [        R                  :X  a!  UR                  UR                  5       5        M  UR                  5       R                  [        R                   :X  a!  UR                  UR                  5       5        M  UR                  UR                  5       5        M     M     GO[#        5       (       a.  [$        R&                  R)                  UR*                  5      u  nnnOUR*                   Vs/ s H&  nUR                  5       c  M  UR                  5       PM(     nnU Vs/ s H%  nUR                  [        R                  :X  d  M#  UPM'     nnU Vs/ s H%  nUR                  [        R                   :X  d  M#  UPM'     nnU Vs/ s H%  nUR                  [        R,                  :X  d  M#  UPM'     nnU R.                  U l        [3        U5      (       a]  [4        R6                  " UU R8                  UU R:                  5        [<        R>                  " U R0                  U R:                  5      U l        [3        U5      (       a]  [4        R6                  " UU R8                  UU R@                  5        [<        R>                  " U R0                  U R@                  5      U l        [3        U5      (       a]  [4        R6                  " UU R8                  UU RB                  5        [<        R>                  " U R0                  U RB                  5      U l        [        R                  US'   gs  snf s  snf s  snf s  snf )aG  
Unscale the gradients of parameters, multiplies the gradients of parameters by 1/(loss scaling ratio).
If this instance of :class:`GradScaler` is not enabled, output are returned unmodified.
Args:
    optimizer(Optimizer):  The optimizer used to update parameters.
Returns:
    The unscaled parameters or original parameters.
Nr6   zMunscale_() has already been called on this optimizer since the last update().z(unscale_() is being called after step()._param_groupsr   params)"rK   rf   r   r/   r3   RuntimeErrorr4   getattrr   r   dict
_grad_ivarappendr@   rU   ro   bfloat16r   r   eagerget_grads_lists_parameter_listr=   ra   r`   lenr
   check_finite_and_unscalerN   rb   r	   
bitwise_orrc   rd   )	rg   r   r   param_gradsparam_grads_fp16param_grads_bf16param_grads_fp32groupparams	            r-   r   AmpScaler._unscaleb  s    ||00I?7#~'>'>>_  W%)?)??IJJ9ot44##A&:
 :
 K!!!"00"8_E'')5#**5+;+;+=> ++-33v~~E,33E4D4D4FG"--/55H,33E4D4D4FG,33E4D4D4FG - 1    JJ..y/H/HI	$$$ "+!:!:!:'') 'E$$&!:   "-$!,{{fnn4 !, ! $ "-$!,{{foo5 !, ! $ "-$!,{{fnn4 !, ! $
 ::  22  ))	 %//!:!:DO   22  ))	 %//!:!:DO   22  ))	 %//!:!:DO $2#:#: i
$
$
$s0   P3P35"P8P8'"P=P="Q?Qc           
     N   U R                   (       d  gU R                  (       a  SU l        U R                  S-   U l        U R                  U R                  :X  ar  [        S[        U R                  5       S[        U R                  5       S[        U R                  5       35        U R                  U R                  -  U l        SU l        gSU l        U R                  S-   U l        U R                  U R                  :X  a%  U R                  U R                  -  U l        SU l        g)z
Updates the loss_scaling.
Nr   r   z$Found inf or nan, current scale is: z, decrease to: *)rK   re   rS   rT   rR   printr   rN   rP   rQ   rO   rg   s    r-   r   AmpScaler._update  s    ||   D#//!3D4#@#@@:5;M:No^cdhdodo^p]qqrsxy}  zJ  zJ  tK  sL  M #kkD,<,<<#$  	  !D#//!3D4#;#;;"kkD,<,<<#$ r,   c                    U R                   $ )zd
Enable loss scaling or not.

Returns:
    bool: enable loss scaling return True else return False.
)rK   r   s    r-   	is_enableAmpScaler.is_enable  s     ||r,   c                    U R                   $ )z
Whether to use dynamic loss scaling.

Returns:
    bool: if fixed loss_scaling is used return False, if the loss scaling is updated dynamically return true.
)rL   r   s    r-   is_use_dynamic_loss_scaling%AmpScaler.is_use_dynamic_loss_scaling  s     ---r,   c                    U R                   $ )z`
Return the initial loss scaling factor.

Returns:
    float:  the initial loss scaling factor.
)rM   r   s    r-   get_init_loss_scalingAmpScaler.get_init_loss_scaling  s     &&&r,   c                    Xl         [        R                  " [        R                  " U R                   /5      R                  [        R                  5      5      U l        g)z
Set the initial loss scaling factor by `new_init_loss_scaling`.

Args:
    new_init_loss_scaling(int):  The new_init_loss_scaling used to update initial loss scaling factor.s
N)rM   rU   r[   r\   r]   r^   r=   rN   )rg   new_init_loss_scalings     r-   set_init_loss_scalingAmpScaler.set_init_loss_scaling  s@     #8&&HHd--./66rzzB
r,   c                    U R                   $ )z
Return the multiplier to use when increasing the loss scaling.

Returns:
    float:  the multiplier to use when increasing the loss scaling.
rO   r   s    r-   get_incr_ratioAmpScaler.get_incr_ratio
       r,   c                *    US:  d   S5       eXl         g)z
Set the multiplier to use when increasing the loss scaling by `new_incr_ratio`, `new_incr_ratio` should > 1.0.

Args:
    new_incr_ratio(float):  The new_incr_ratio used to update the multiplier to use when increasing the loss scaling.
r<   z!The new_incr_ratio must be > 1.0.Nr   )rg   new_incr_ratios     r-   set_incr_ratioAmpScaler.set_incr_ratio       #H%HH#)r,   c                    U R                   $ )z
Get the less-than-one-multiplier to use when decreasing the loss scaling.

Returns:
    float:  the less-than-one-multiplier to use when decreasing the loss scaling.
rP   r   s    r-   get_decr_ratioAmpScaler.get_decr_ratio  r   r,   c                *    US:  d   S5       eXl         g)a	  
Set the less-than-one-multiplier to use when decreasing the loss scaling by `new_incr_ratio`, `new_decr_ratio` should < 1.0.

Args:
    new_decr_ratio(float):  The new_decr_ratio used to update the less-than-one-multiplier to use when decreasing the loss scaling.
r<   z!The new_decr_ratio must be < 1.0.Nr   )rg   new_decr_ratios     r-   set_decr_ratioAmpScaler.set_decr_ratio&  r   r,   c                    U R                   $ )z
Return the num `n`, `n` represent increases loss scaling every `n` consecutive steps with finite gradients.

Returns:
    int:  the num `n`, `n` represent increases loss scaling every `n` consecutive steps with finite gradients.
rQ   r   s    r-   get_incr_every_n_steps AmpScaler.get_incr_every_n_steps0  s     '''r,   c                    Xl         g)a>  
Set the num `n` by `new_incr_every_n_steps`, `n` represent increases loss scaling every `n` consecutive steps with finite gradients.

Args:
    new_incr_every_n_steps(int):  The new_incr_every_n_steps used to update the num `n`, `n` represent increases loss scaling every `n` consecutive steps with finite gradients.
Nr   )rg   new_incr_every_n_stepss     r-   set_incr_every_n_steps AmpScaler.set_incr_every_n_steps9  s
     $: r,   c                    U R                   $ )z
Return the num `n`, `n` represent decreases loss scaling every `n` accumulated steps with nan or inf gradients.

Returns:
    int:  the num `n`, `n` represent decreases loss scaling every `n` accumulated steps with nan or inf gradients.
rR   r   s    r-   get_decr_every_n_nan_or_inf%AmpScaler.get_decr_every_n_nan_or_infB  s     ,,,r,   c                    Xl         g)aU  
Set the num `n` by `new_decr_every_n_nan_or_inf`, `n` represent decreases loss scaling every `n` accumulated steps with nan or inf gradients.

Args:
    new_decr_every_n_nan_or_inf(int):  The new_decr_every_n_nan_or_inf used to update the num `n`, `n` represent decreases loss scaling every `n` accumulated steps with nan or inf gradients.
Nr   )rg   new_decr_every_n_nan_or_infs     r-   set_decr_every_n_nan_or_inf%AmpScaler.set_decr_every_n_nan_or_infK  s     )D%r,   c           	         U R                   (       ai  U R                  R                  5       U R                  U R                  U R
                  U R                  U R                  U R                  U R                  S.$ 0 $ )ay  
Returns the state of the scaler as a `dict`, If this instance is not enabled, returns an empty dict.

Returns:
    A dict of scaler includes:
    scale (tensor): The loss scaling factor.
    incr_ratio(float): The multiplier to use when increasing the loss scaling.
    decr_ratio(float): The less-than-one-multiplier to use when decreasing the loss scaling.
    incr_every_n_steps(int): Increases loss scaling every n consecutive steps with finite gradients.
    decr_every_n_nan_or_inf(int): Decreases loss scaling every n accumulated steps with nan or inf gradients.
    incr_count(int): The number of recent consecutive unskipped steps.
    decr_count(int): The number of recent consecutive skipped steps.
    use_dynamic_loss_scaling(bool): Whether to use dynamic loss scaling. If False, fixed loss_scaling is used. If True, the loss scaling is updated dynamically. Default is True.
)r   r   r   r   r    r!   r"   r$   )
rK   rN   numpyrO   rP   rQ   rR   rS   rT   rL   r   s    r-   
state_dictAmpScaler.state_dictV  sp    4 || **,".."..&*&>&>+/+H+H".."..,0,J,J		
 	
r,   c                   U R                   (       d  g[        U5      S:X  a  [        S5      eUS   S   U l        [        R
                  " [        R                  " U R                  /5      R                  [        R                  5      5      U l
        US   U l        US   U l        US   U l        US   U l        US	   U l        US
   U l        US   U l        g)z
Loads the scaler state.

Args:
   state_dict(dict): scaler state. Should be an object returned from a call to `AmpScaler.state_dict()`.
Nr   zdThe input state dict is empty, possibly because it was saved from a disabled instance of GradScaler.r   r   r   r   r    r!   r"   r$   )rK   r   r   rM   rU   r[   r\   r]   r^   r=   rN   rO   rP   rQ   rR   rS   rT   rL   )rg   r   s     r-   load_state_dictAmpScaler.load_state_dictt  s     ||z?a: 
 #-W"5a"8&&HHd--./66rzzB
 &l3%l3#-.B#C (23L(M%%l3%l3)34N)O&r,   )re   rT   rR   rP   rK   r`   rS   rQ   rO   rM   rf   rN   rc   rb   rd   ra   rL   N)Tg      @       @      ?i  r   Trh   r#   ri   r   r   r   r   r   r   r   r    r   r$   r#   returnNonern   r   r   r   r   z'Optimizer | OptimizerWithMixedPrecisionr   r   r   r   r   z2tuple[list[Operator], list[tuple[Tensor, Tensor]]]r   r#   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r&   r'   r(   r)   __doc__rk   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r+   r%   r,   r-   r9   r9   >   s#   .d #*"&'()-JOJO !JO 	JO
 JO  JO "%JO #'JO 
JOXH!T[*:[* [* 	[*
 
<[*zf;P2.'

 * *(:-	D+.	D		D
<Pr,   r9   c                    ^  \ rS rSrSr       S               SU 4S jjjrSU 4S jjr        SU 4S jjrSS jrSS jr	U 4S jr
SU 4S	 jjrSU 4S
 jjrSU 4S jjrS U 4S jjrSU 4S jjrS!U 4S jjrSU 4S jjrS"U 4S jjrS#U 4S jjrS$U 4S jjrS#U 4S jjr    S%U 4S jjrS&U 4S jjrS'U 4S jjrSrU =r$ )(
GradScaleri  a	  
GradScaler is used for Auto-Mixed-Precision training in dynamic graph mode.
It controls the scaling of loss, helps avoiding numerical overflow.
The object of this class has nineteen methods `scale()`, `unscale_()`, `minimize()`, `step()`, `update()` and `get`/`set` api of parameters.

`scale()` is used to multiply the loss by a scale ratio.
`unscale_()` is used to unscale the gradients of parameters, multiplies the gradients of parameters by 1/(scale ratio)
`minimize()` is similar as `optimizer.minimize()`, performs parameters updating, and it will update the loss_scaling, it equal to `step()` + `update()`.
`step()` is similar as `optimizer.step()`, which performs parameters updating.
`update` is used to update the loss_scaling.


Commonly, it is used together with `paddle.amp.auto_cast` to achieve Auto-Mixed-Precision in
dynamic graph mode.

Args:
    enable(bool, optional): Enable loss scaling or not. Default is True.
    init_loss_scaling (float, optional): The initial loss scaling factor. Default is 65536.0.
    incr_ratio(float, optional): The multiplier to use when increasing the loss
                    scaling. Default is 2.0.
    decr_ratio(float, optional): The less-than-one-multiplier to use when decreasing
                    the loss scaling. Default is 0.5.
    incr_every_n_steps(int, optional): Increases loss scaling every n consecutive
                            steps with finite gradients. Default is 2000.
    decr_every_n_nan_or_inf(int, optional): Decreases loss scaling every n
                                accumulated steps with nan or inf gradients. Default is 1.
    use_dynamic_loss_scaling(bool, optional): Whether to use dynamic loss scaling. If False, fixed loss_scaling is used. If True, the loss scaling is updated dynamically. Default is True.
Returns:
    An GradScaler object.

Examples:

    .. code-block:: python

        >>> import paddle

        >>> model = paddle.nn.Conv2D(3, 2, 3, bias_attr=True)
        >>> optimizer = paddle.optimizer.SGD(learning_rate=0.01, parameters=model.parameters())
        >>> scaler = paddle.amp.GradScaler(init_loss_scaling=1024)
        >>> data = paddle.rand([10, 3, 32, 32])

        >>> with paddle.amp.auto_cast():
        ...     conv = model(data)
        ...     loss = paddle.mean(conv)

        >>> scaled = scaler.scale(loss)  # scale the loss
        >>> scaled.backward()            # do backward
        >>> scaler.minimize(optimizer, scaled)  # update parameters
        >>> optimizer.clear_grad()
c           	     0   > [         TU ]  UUUUUUU5        g )N)superrk   )	rg   rh   ri   r   r   r   r    r$   	__class__s	           r-   rk   GradScaler.__init__  s'     	#$	
r,   c                "   > [         TU ]  U5      $ )a  
Multiplies a Tensor by the scale factor and returns scaled outputs.
If this instance of :class:`GradScaler` is not enabled, output are returned unmodified.

Args:
    var (Tensor):  The tensor to scale.
Returns:
    The scaled tensor or original tensor.

Examples:

    .. code-block:: python

        >>> import paddle

        >>> model = paddle.nn.Conv2D(3, 2, 3, bias_attr=True)
        >>> optimizer = paddle.optimizer.SGD(learning_rate=0.01, parameters=model.parameters())
        >>> scaler = paddle.amp.GradScaler(init_loss_scaling=1024)
        >>> data = paddle.rand([10, 3, 32, 32])

        >>> with paddle.amp.auto_cast():
        ...     conv = model(data)
        ...     loss = paddle.mean(conv)

        >>> scaled = scaler.scale(loss)  # scale the loss
        >>> scaled.backward()            # do backward
        >>> scaler.minimize(optimizer, scaled)  # update parameters
        >>> optimizer.clear_grad()
)r  r   )rg   rn   r  s     r-   r   GradScaler.scale  s    < w}S!!r,   c                ,   > [         TU ]  " U/UQ70 UD6$ )a  
This function is similar as `optimizer.minimize()`, which performs parameters updating.

If the scaled gradients of parameters contains NAN or INF, the parameters updating is skipped.
Otherwise, if `unscale_()` has not been called, it first unscales the scaled gradients of parameters, then updates the parameters.

Finally, the loss scaling ratio is updated.

Args:
    optimizer(Optimizer):  The optimizer used to update parameters.
    args:  Arguments, which will be forward to `optimizer.minimize()`.
    kwargs: Keyword arguments, which will be forward to `optimizer.minimize()`.

Examples:

    .. code-block:: python

        >>> import paddle

        >>> model = paddle.nn.Conv2D(3, 2, 3, bias_attr=True)
        >>> optimizer = paddle.optimizer.SGD(learning_rate=0.01, parameters=model.parameters())
        >>> scaler = paddle.amp.GradScaler(init_loss_scaling=1024)
        >>> data = paddle.rand([10, 3, 32, 32])

        >>> with paddle.amp.auto_cast():
        ...     conv = model(data)
        ...     loss = paddle.mean(conv)

        >>> scaled = scaler.scale(loss)  # scale the loss
        >>> scaled.backward()            # do backward
        >>> scaler.minimize(optimizer, scaled)  # update parameters
        >>> optimizer.clear_grad()
)r  r   )rg   r   r   r   r  s       r-   r   GradScaler.minimize  s     N w	;D;F;;r,   c                   U R                   (       d  UR                  5       $ U R                  [        U5         nUS   [        R
                  L a  [        S5      eUS   [        R                  L a  U R                  U5        [        US5      (       aC  UR                  SU R                  5        UR                  5         UR                  S5      U l        O0U R                  (       a  SU l        OUR                  5         SU l        [        R
                  US'   U R                  (       d  [        [         5      U l        gg)a  
This function is similar as `optimizer.step()`, which performs parameters updating.

If the scaled gradients of parameters contains NAN or INF, the parameters updating is skipped.
Otherwise, if `unscale_()` has not been called, it first unscales the scaled gradients of parameters, then updates the parameters.

Args:
    optimizer(Optimizer):  The optimizer used to update parameters.

Examples:

    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU)
        >>> import paddle
        >>> paddle.device.set_device('gpu')

        >>> model = paddle.nn.Conv2D(3, 2, 3, bias_attr=True)
        >>> optimizer = paddle.optimizer.SGD(learning_rate=0.01, parameters=model.parameters())
        >>> scaler = paddle.amp.GradScaler(init_loss_scaling=1024)
        >>> data = paddle.rand([10, 3, 32, 32])
        >>> with paddle.amp.auto_cast():
        ...     conv = model(data)
        ...     loss = paddle.mean(conv)
        >>> scaled = scaler.scale(loss)  # scale the loss
        >>> scaled.backward()            # do backward
        >>> scaler.step(optimizer)       # update parameters
        >>> scaler.update()              # update the loss scaling ratio
        >>> optimizer.clear_grad()
r6   z7step() has already been called since the last update().r   r   TFN)rK   steprf   r   r/   r4   r   r2   r   r   r   r`   r   re   rL   r   r7   )rg   r   r   s      r-   r	  GradScaler.step"  s    > ||>>##00I?7#~'='==I 
 7#~':'::MM)$9233((dooFNN$-$@$@$MD!(,% (-%#1#9#9 --%01I%JD" .r,   c                    U R                   (       d  gU R                  (       a$  U R                  5         [        [        5      U l        g)a`  
Updates the loss_scaling.

Examples:

    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU)
        >>> import paddle

        >>> paddle.device.set_device('gpu')
        >>> model = paddle.nn.Conv2D(3, 2, 3, bias_attr=True)
        >>> optimizer = paddle.optimizer.SGD(learning_rate=0.01, parameters=model.parameters())
        >>> scaler = paddle.amp.GradScaler(init_loss_scaling=1024)
        >>> data = paddle.rand([10, 3, 32, 32])
        >>> with paddle.amp.auto_cast():
        ...     conv = model(data)
        ...     loss = paddle.mean(conv)
        >>> scaled = scaler.scale(loss)     # scale the loss
        >>> scaled.backward()               # do backward
        >>> scaler.step(optimizer)          # update parameters
        >>> scaler.update()                 # update the loss scaling ratio
        >>> optimizer.clear_grad()
N)rK   rL   r   r   r7   rf   r   s    r-   updateGradScaler.update^  s1    2 ||))LLN%01I%JD"r,   c                "   > [         TU ]  U5      $ )a}  
Unscale the gradients of parameters, multiplies the gradients of parameters by 1/(loss scaling ratio).
If this instance of :class:`GradScaler` is not enabled, output are returned unmodified.

Args:
    optimizer(Optimizer):  The optimizer used to update parameters.

Returns:
    The unscaled parameters or original parameters.

Examples:

    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU)
        >>> import paddle

        >>> paddle.device.set_device('gpu')
        >>> model = paddle.nn.Conv2D(3, 2, 3, bias_attr=True)
        >>> optimizer = paddle.optimizer.SGD(learning_rate=0.01, parameters=model.parameters())
        >>> scaler = paddle.amp.GradScaler(init_loss_scaling=1024)
        >>> data = paddle.rand([10, 3, 32, 32])
        >>> with paddle.amp.auto_cast():
        ...     conv = model(data)
        ...     loss = paddle.mean(conv)
        >>> scaled = scaler.scale(loss)  # scale the loss
        >>> scaled.backward()            # do backward
        >>> scaler.unscale_(optimizer)    # unscale the parameter
        >>> scaler.step(optimizer)
        >>> scaler.update()
        >>> optimizer.clear_grad()
)r  r   )rg   r   r  s     r-   unscale_GradScaler.unscale_~  s    B w	**r,   c                    > [         TU ]  5       $ )ah  
Enable loss scaling or not.

Returns:
    bool: enable loss scaling return True else return False.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU, env:XPU)
        >>> import paddle
        >>> scaler = paddle.amp.GradScaler(
        ...     enable=True,
        ...     init_loss_scaling=1024,
        ...     incr_ratio=2.0,
        ...     decr_ratio=0.5,
        ...     incr_every_n_steps=1000,
        ...     decr_every_n_nan_or_inf=2,
        ...     use_dynamic_loss_scaling=True
        ... )
        >>> enable = scaler.is_enable()
        >>> print(enable)
        True
)r  r   rg   r  s    r-   r   GradScaler.is_enable  s    2 w ""r,   c                    > [         TU ]  5       $ )a  
Whether to use dynamic loss scaling.

Returns:
    bool: if fixed loss_scaling is used return False, if the loss scaling is updated dynamically return True.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU, env:XPU)
        >>> import paddle
        >>> scaler = paddle.amp.GradScaler(
        ...     enable=True,
        ...     init_loss_scaling=1024,
        ...     incr_ratio=2.0,
        ...     decr_ratio=0.5,
        ...     incr_every_n_steps=1000,
        ...     decr_every_n_nan_or_inf=2,
        ...     use_dynamic_loss_scaling=True
        ... )
        >>> use_dynamic_loss_scaling = scaler.is_use_dynamic_loss_scaling()
        >>> print(use_dynamic_loss_scaling)
        True
)r  r   r  s    r-   r   &GradScaler.is_use_dynamic_loss_scaling      2 w244r,   c                    > [         TU ]  5       $ )a  
Return the initial loss scaling factor.

Returns:
    float:  the initial loss scaling factor.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU, env:XPU)
        >>> import paddle
        >>> scaler = paddle.amp.GradScaler(
        ...     enable=True,
        ...     init_loss_scaling=1024,
        ...     incr_ratio=2.0,
        ...     decr_ratio=0.5,
        ...     incr_every_n_steps=1000,
        ...     decr_every_n_nan_or_inf=2,
        ...     use_dynamic_loss_scaling=True
        ... )
        >>> init_loss_scaling = scaler.get_init_loss_scaling()
        >>> print(init_loss_scaling)
        1024
)r  r   r  s    r-   r    GradScaler.get_init_loss_scaling  s    2 w,..r,   c                $   > [         TU ]  U5        g)aM  
Set the initial loss scaling factor by `new_init_loss_scaling`.

Args:
    new_init_loss_scaling(float):  The new_init_loss_scaling used to update initial loss scaling factor.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU, env:XPU)
        >>> import paddle
        >>> scaler = paddle.amp.GradScaler(
        ...     enable=True,
        ...     init_loss_scaling=1024,
        ...     incr_ratio=2.0,
        ...     decr_ratio=0.5,
        ...     incr_every_n_steps=1000,
        ...     decr_every_n_nan_or_inf=2,
        ...     use_dynamic_loss_scaling=True
        ... )
        >>> print(scaler.get_init_loss_scaling())
        1024
        >>> new_init_loss_scaling = 1000
        >>> scaler.set_init_loss_scaling(new_init_loss_scaling)
        >>> print(scaler.get_init_loss_scaling())
        1000
N)r  r   )rg   r   r  s     r-   r    GradScaler.set_init_loss_scaling  s    8 	%&;<r,   c                    > [         TU ]  5       $ )a  
Return the multiplier to use when increasing the loss scaling.

Returns:
    float:  the multiplier to use when increasing the loss scaling.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU, env:XPU)
        >>> import paddle
        >>> scaler = paddle.amp.GradScaler(
        ...     enable=True,
        ...     init_loss_scaling=1024,
        ...     incr_ratio=2.0,
        ...     decr_ratio=0.5,
        ...     incr_every_n_steps=1000,
        ...     decr_every_n_nan_or_inf=2,
        ...     use_dynamic_loss_scaling=True
        ... )
        >>> incr_ratio = scaler.get_incr_ratio()
        >>> print(incr_ratio)
        2.0
)r  r   r  s    r-   r   GradScaler.get_incr_ratio      2 w%''r,   c                $   > [         TU ]  U5        g)ac  
Set the multiplier to use when increasing the loss scaling by `new_incr_ratio`, `new_incr_ratio` should > 1.0.

Args:
    new_incr_ratio(float):  The new_incr_ratio used to update the multiplier to use when increasing the loss scaling.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU, env:XPU)
        >>> import paddle
        >>> scaler = paddle.amp.GradScaler(
        ...     enable=True,
        ...     init_loss_scaling=1024,
        ...     incr_ratio=2.0,
        ...     decr_ratio=0.5,
        ...     incr_every_n_steps=1000,
        ...     decr_every_n_nan_or_inf=2,
        ...     use_dynamic_loss_scaling=True
        ... )
        >>> print(scaler.get_incr_ratio())
        2.0
        >>> new_incr_ratio = 3.0
        >>> scaler.set_incr_ratio(new_incr_ratio)
        >>> print(scaler.get_incr_ratio())
        3.0
N)r  r   )rg   r   r  s     r-   r   GradScaler.set_incr_ratio+      8 	~.r,   c                    > [         TU ]  5       $ )a  
Get the less-than-one-multiplier to use when decreasing the loss scaling.

Returns:
    float:  the less-than-one-multiplier to use when decreasing the loss scaling.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU, env:XPU)
        >>> import paddle
        >>> scaler = paddle.amp.GradScaler(
        ...     enable=True,
        ...     init_loss_scaling=1024,
        ...     incr_ratio=2.0,
        ...     decr_ratio=0.5,
        ...     incr_every_n_steps=1000,
        ...     decr_every_n_nan_or_inf=2,
        ...     use_dynamic_loss_scaling=True
        ... )
        >>> decr_ratio = scaler.get_decr_ratio()
        >>> print(decr_ratio)
        0.5
)r  r   r  s    r-   r   GradScaler.get_decr_ratioI  r  r,   c                $   > [         TU ]  U5        g)a  
Set the less-than-one-multiplier to use when decreasing the loss scaling by `new_incr_ratio`, `new_decr_ratio` should < 1.0.

Args:
    new_decr_ratio(float):  The new_decr_ratio used to update the less-than-one-multiplier to use when decreasing the loss scaling.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU, env:XPU)
        >>> import paddle
        >>> scaler = paddle.amp.GradScaler(
        ...     enable=True,
        ...     init_loss_scaling=1024,
        ...     incr_ratio=2.0,
        ...     decr_ratio=0.5,
        ...     incr_every_n_steps=1000,
        ...     decr_every_n_nan_or_inf=2,
        ...     use_dynamic_loss_scaling=True
        ... )
        >>> print(scaler.get_decr_ratio())
        0.5
        >>> new_decr_ratio = 0.1
        >>> scaler.set_decr_ratio(new_decr_ratio)
        >>> print(scaler.get_decr_ratio())
        0.1
N)r  r   )rg   r   r  s     r-   r   GradScaler.set_decr_ratiod  r   r,   c                    > [         TU ]  5       $ )a  
Return the num `n`, `n` represent increases loss scaling every `n` consecutive steps with finite gradients.

Returns:
    int:  the num `n`, `n` represent increases loss scaling every `n` consecutive steps with finite gradients.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU, env:XPU)
        >>> import paddle
        >>> scaler = paddle.amp.GradScaler(
        ...     enable=True,
        ...     init_loss_scaling=1024,
        ...     incr_ratio=2.0,
        ...     decr_ratio=0.5,
        ...     incr_every_n_steps=1000,
        ...     decr_every_n_nan_or_inf=2,
        ...     use_dynamic_loss_scaling=True
        ... )
        >>> incr_every_n_steps = scaler.get_incr_every_n_steps()
        >>> print(incr_every_n_steps)
        1000
)r  r   r  s    r-   r   !GradScaler.get_incr_every_n_steps  s    2 w-//r,   c                $   > [         TU ]  U5        g)a  
Set the num `n` by `new_incr_every_n_steps`, `n` represent increases loss scaling every `n` consecutive steps with finite gradients.

Args:
    new_incr_every_n_steps(int):  The new_incr_every_n_steps used to update the num `n`, `n` represent increases loss scaling every `n` consecutive steps with finite gradients.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU, env:XPU)
        >>> import paddle
        >>> scaler = paddle.amp.GradScaler(
        ...     enable=True,
        ...     init_loss_scaling=1024,
        ...     incr_ratio=2.0,
        ...     decr_ratio=0.5,
        ...     incr_every_n_steps=1000,
        ...     decr_every_n_nan_or_inf=2,
        ...     use_dynamic_loss_scaling=True
        ... )
        >>> print(scaler.get_incr_every_n_steps())
        1000
        >>> new_incr_every_n_steps = 2000
        >>> scaler.set_incr_every_n_steps(new_incr_every_n_steps)
        >>> print(scaler.get_incr_every_n_steps())
        2000
N)r  r   )rg   r   r  s     r-   r   !GradScaler.set_incr_every_n_steps  s    8 	&'=>r,   c                    > [         TU ]  5       $ )a"  
Return the num `n`, `n` represent decreases loss scaling every `n` accumulated steps with nan or inf gradients.

Returns:
    int: the num `n`, `n` represent decreases loss scaling every `n` accumulated steps with nan or inf gradients.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU, env:XPU)
        >>> import paddle
        >>> scaler = paddle.amp.GradScaler(
        ...     enable=True,
        ...     init_loss_scaling=1024,
        ...     incr_ratio=2.0,
        ...     decr_ratio=0.5,
        ...     incr_every_n_steps=1000,
        ...     decr_every_n_nan_or_inf=2,
        ...     use_dynamic_loss_scaling=True
        ... )
        >>> decr_every_n_nan_or_inf = scaler.get_decr_every_n_nan_or_inf()
        >>> print(decr_every_n_nan_or_inf)
        2
)r  r   r  s    r-   r   &GradScaler.get_decr_every_n_nan_or_inf  r  r,   c                $   > [         TU ]  U5        g)a  
Set the num `n` by `new_decr_every_n_nan_or_inf`, `n` represent decreases loss scaling every `n` accumulated steps with nan or inf gradients.

Args:
    new_decr_every_n_nan_or_inf(int):  The new_decr_every_n_nan_or_inf used to update the num `n`, `n` represent decreases loss scaling every `n` accumulated steps with nan or inf gradients.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU, env:XPU)
        >>> import paddle
        >>> scaler = paddle.amp.GradScaler(
        ...     enable=True,
        ...     init_loss_scaling=1024,
        ...     incr_ratio=2.0,
        ...     decr_ratio=0.5,
        ...     incr_every_n_steps=1000,
        ...     decr_every_n_nan_or_inf=2,
        ...     use_dynamic_loss_scaling=True
        ... )
        >>> print(scaler.get_decr_every_n_nan_or_inf())
        2
        >>> new_decr_every_n_nan_or_inf = 3
        >>> scaler.set_decr_every_n_nan_or_inf(new_decr_every_n_nan_or_inf)
        >>> print(scaler.get_decr_every_n_nan_or_inf())
        3
N)r  r   )rg   r   r  s     r-   r   &GradScaler.set_decr_every_n_nan_or_inf  s    < 	+,GHr,   c                    > [         TU ]  5       $ )a`  
Returns the state of the scaler as a `dict`, If this instance is not enabled, returns an empty dict.

Returns:
    A dict of scaler includes:
    scale (tensor): The loss scaling factor.
    incr_ratio(float): The multiplier to use when increasing the loss scaling.
    decr_ratio(float): The less-than-one-multiplier to use when decreasing the loss scaling.
    incr_every_n_steps(int): Increases loss scaling every n consecutive steps with finite gradients.
    decr_every_n_nan_or_inf(int): Decreases loss scaling every n accumulated steps with nan or inf gradients.
    incr_count(int): The number of recent consecutive unskipped steps.
    decr_count(int): The number of recent consecutive skipped steps.
    use_dynamic_loss_scaling(bool): Whether to use dynamic loss scaling. If False, fixed loss_scaling is used. If True, the loss scaling is updated dynamically. Default is True.


Examples:

    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU, env:XPU)
        >>> import paddle

        >>> scaler = paddle.amp.GradScaler(
        ...     enable=True,
        ...     init_loss_scaling=1024,
        ...     incr_ratio=2.0,
        ...     decr_ratio=0.5,
        ...     incr_every_n_steps=1000,
        ...     decr_every_n_nan_or_inf=2,
        ...     use_dynamic_loss_scaling=True
        ... )
        >>> scaler_state = scaler.state_dict()
)r  r   r  s    r-   r   GradScaler.state_dict  s    D w!##r,   c                $   > [         TU ]  U5        g)a  
Loads the scaler state.

Args:
    state_dict(dict): scaler state. Should be an object returned from a call to `GradScaler.state_dict()`.

Examples:

    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU, env:XPU)
        >>> import paddle

        >>> scaler = paddle.amp.GradScaler(
        ...     enable=True,
        ...     init_loss_scaling=1024,
        ...     incr_ratio=2.0,
        ...     decr_ratio=0.5,
        ...     incr_every_n_steps=1000,
        ...     decr_every_n_nan_or_inf=2,
        ...     use_dynamic_loss_scaling=True
        ... )
        >>> scaler_state = scaler.state_dict()
        >>> scaler.load_state_dict(scaler_state)
N)r  r   )rg   r   r  s     r-   r   GradScaler.load_state_dict  s    4 	
+r,   )re   rf   )Tg      @r   r   i  r   Tr   r   r   )r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   )r&   r'   r(   r)   r   rk   r   r   r	  r  r  r   r   r   r   r   r   r   r   r   r   r   r   r   r   r+   __classcell__)r  s   @r-   r   r     s   1j #*"&'()-

 !
 	

 
  
 "%
 #'
 

 
("@'<:'< '< 	'<
 
<'<R:Kx@!+F#656/6=<(6/<(6/<06?<56I+.I	I@"$H, ,r,   r   )'
__future__r   rI   collectionsr   enumr   typingr   r   r   r   r\   rU   r	   r
   paddle.baser   r   paddle.base.data_feederr   paddle.base.frameworkr   r   r   paddle.frameworkr   	auto_castr   r   paddle.static.amp.decoratorr   !python.paddle.optimizer.optimizerr   r   r/   r7   r9   r   r%   r,   r-   <module>r=     s    #  #     ( ) . H H , 'G;') 'T *P	P P	Pfc
, c
,r,   