
    RЦi2'                     6    S SK r S SKrS SKJr   " S S\5      rg)    N)	Optimizerc                      ^  \ rS rSrSr         SU 4S jjrU 4S jr\R                  " 5       S 5       r	\R                  " 5       S	S j5       r
SrU =r$ )
	AdaBelief   a  Implements AdaBelief algorithm. Modified from Adam in PyTorch

Arguments:
    params (iterable): iterable of parameters to optimize or dicts defining
        parameter groups
    lr (float, optional): learning rate (default: 1e-3)
    betas (Tuple[float, float], optional): coefficients used for computing
        running averages of gradient and its square (default: (0.9, 0.999))
    eps (float, optional): term added to the denominator to improve
        numerical stability (default: 1e-16)
    weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
    amsgrad (boolean, optional): whether to use the AMSGrad variant of this
        algorithm from the paper `On the Convergence of Adam and Beyond`_
        (default: False)
    decoupled_decay (boolean, optional): (default: True) If set as True, then
        the optimizer uses decoupled weight decay as in AdamW
    fixed_decay (boolean, optional): (default: False) This is used when weight_decouple
        is set as True.
        When fixed_decay == True, the weight decay is performed as
        $W_{new} = W_{old} - W_{old} \times decay$.
        When fixed_decay == False, the weight decay is performed as
        $W_{new} = W_{old} - W_{old} \times decay \times lr$. Note that in this case, the
        weight decay ratio decreases with learning rate (lr).
    rectify (boolean, optional): (default: True) If set as True, then perform the rectified
        update similar to RAdam
    degenerated_to_sgd (boolean, optional) (default:True) If set as True, then perform SGD update
        when variance of gradient is high
reference: AdaBelief Optimizer, adapting stepsizes by the belief in observed gradients, NeurIPS 2020

For a complete table of recommended hyperparameters, see https://github.com/juntang-zhuang/Adabelief-Optimizer'
For example train/args for EfficientNet see these gists
  - link to train_script: https://gist.github.com/juntang-zhuang/0a501dd51c02278d952cf159bc233037
  - link to args.yaml: https://gist.github.com/juntang-zhuang/517ce3c27022b908bb93f78e4f786dc3
c                    > SU::  d  [        SR                  U5      5      eSU::  d  [        SR                  U5      5      eSUS   s=::  a  S:  d  O  [        SR                  US   5      5      eSUS   s=::  a  S:  d  O  [        SR                  US   5      5      e[        U[        [        45      (       ay  [        U5      S:  aj  [        US   [        5      (       aR  U HL  nS	U;   d  M  US	   S   US   :w  d  US	   S   US   :w  d  M+  [        S
5       Vs/ s H  n/ SQPM	     snUS'   MN     [        UUUUUU
UU	U[        S
5       Vs/ s H  n/ SQPM	     snS9
n[        [        U ]+  X5        g s  snf s  snf )Ng        zInvalid learning rate: {}zInvalid epsilon value: {}r         ?z%Invalid beta parameter at index 0: {}   z%Invalid beta parameter at index 1: {}betas
   )NNNbuffer)
lrr
   epsweight_decayamsgraddegenerated_to_sgddecoupled_decayrectifyfixed_decayr   )
ValueErrorformat
isinstancelisttuplelendictrangesuperr   __init__)selfparamsr   r
   r   r   r   r   r   r   r   param_defaults	__class__s                 S/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/optim/adabelief.pyr   AdaBelief.__init__*   s    by8??CDDcz8??DEEeAh$$DKKERSHUVVeAh$$DKKERSHUVVftUm,,VqZPVWXPY[_E`E`e#w):eAh)F%PW.YZJ[_def_gJgCH9&M9a'99&ME(O   %1+#05b	:	1&	:
 	i'9 'N ;s   E6E;c                 v   > [         [        U ]  U5        U R                   H  nUR	                  SS5        M     g )Nr   F)r   r   __setstate__param_groups
setdefault)r   stategroupr$   s      r%   r(   AdaBelief.__setstate__S   s2    i+E2&&EY. '    c                    U R                    H|  nUS    Hp  nU R                  U   nUS   nSUS'   [        R                  " U5      US'   [        R                  " U5      US'   U(       d  MW  [        R                  " U5      US'   Mr     M~     g )Nr    r   r   stepexp_avgexp_avg_varmax_exp_avg_var)r)   r+   torch
zeros_like)r   r,   pr+   r   s        r%   resetAdaBelief.resetX   s    &&E8_

1	* !"f#(#3#3A#6i  (-'7'7':m$7/4/?/?/BE+, % 'r.   c           	      n	   SnUb%  [         R                  " 5          U" 5       nSSS5        U R                   GHg  nUS    GHY  nUR                  c  M  UR                  nUR                  [         R
                  [         R                  1;   a  UR                  5       nUR                  (       a  [        S5      eUnUR                  [         R
                  [         R                  1;   a  UR                  5       nUS   nUS   u  pU R                  U   n
[        U
5      S:X  aW  SU
S'   [         R                  " U5      U
S'   [         R                  " U5      U
S	'   U(       a  [         R                  " U5      U
S
'   US   (       a@  US   (       d  UR                  SUS   US   -  -
  5        O3UR                  SUS   -
  5        OUS   S:w  a  UR                  XcS   S9  U
S   U
S	   pU
S==   S-  ss'   SXS   -  -
  nSXS   -  -
  nUR                  U5      R                  USU-
  S9  X[-
  nUR                  U	5      R                  XSU	-
  S9  U(       ag  U
S
   n[         R                   " UUR                  US   5      US9  UR#                  5       [$        R"                  " U5      -  R                  US   5      nOKUR                  US   5      R#                  5       [$        R"                  " U5      -  R                  US   5      nUS   (       d  US   U-  nUR'                  UUU* S9  GO'US   [)        U
S   S-  5         nU
S   US   :X  a  US   US   nnOU
S   US'   XS   -  nSSU	-
  -  S-
  nUSU
S   -  U-  SU-
  -  -
  nUUS'   US:  aC  [$        R"                  " SU-
  US-
  -  US-
  -  US-
  -  U-  U-  US-
  -  5      SXS   -  -
  -  nOUS   (       a  SSXS   -  -
  -  nOSnUUS'   US:  a;  UR#                  5       R                  US   5      nUR'                  UUU* US   -  S9  OUS:  a  UR                  UU* US   -  S9  UR                  [         R
                  [         R                  1;   d  GMH  UR+                  U5        GM\     GMj     U$ ! , (       d  f       GN= f)zPerforms a single optimization step.
Arguments:
    closure (callable, optional): A closure that reevaluates the model
        and returns the loss.
Nr    zOAdaBelief does not support sparse gradients, please consider SparseAdam insteadr   r
   r   r0   r1   r2   r3   r   r   r   r   r   )alphar	   )valuer   )outr   r   r            r   )r4   enable_gradr)   graddtypefloat16bfloat16float	is_sparseRuntimeErrorr+   r   r5   mul_add_addcmul_maxsqrtmathaddcdiv_intcopy_)r   closurelossr,   r6   rB   p_fp32r   beta1beta2r+   r1   r2   bias_correction1bias_correction2grad_residualr3   denom	step_sizebufferednum_smabeta2_tnum_sma_maxs                          r%   r0   AdaBelief.stepj   s    ""$y % &&E8_66>vv::%--!@@::<D>>&ik k 77u}}enn==#\\^F	*$W~

1u:?$%E&M','7'7'?E)$+0+;+;F+CE-(383C3CF3K/0 *+ /C%+n8M*M$MNC%*?$?@^,1		&n0E	F (-Y'7}9Mf"#$uf'=#= #$uf'=#=  U#((QY(? $  '00UVY^U^0_&+,=&>OIIo{/?/?e/MSbc -113dii@P6QQWWX]^cXdeE(--eEl;@@BTYYO_E``ffglmrgstE Y' %d.> >IOOGU9*OE  %Xs5=23E/FGHV}3-5a[(1+&+Fm"'="8&'1u9o&9"-E&M0AG0KqSZ{0["[&- #a<(,		!"W!(1!.1<q!B!(1!.07!8 !,!, 0;Q!@)A EFW]Q^H^D^	)`I
 ##78(+q5&M3I/I(JI(*I&/!| + 0 0 2 7 7e Eyj5QU;>VW"QGI:d3KL77u}}enn==GGFOC % 'H O %$s   R%%
R4 )	gMbP?)g?g+?gؗҜ<r   FTFTT)N)__name__
__module____qualname____firstlineno____doc__r   r(   r4   no_gradr7   r0   __static_attributes____classcell__)r$   s   @r%   r   r      si    !L  #':R/
 ]]_C C" ]]_o or.   r   )rN   r4   torch.optim.optimizerr   r   ra   r.   r%   <module>rk      s      +T	 Tr.   