
    RЦiA              %          S r SSKrSSKJrJrJr  SSKrSSKJr  SSKJ	r	  SSK
Jr   " S S	\	5      r  SS
\\   S\\   S\\   S\\   S\\   S\\   S\\   S\S\S\S\S\S\S\S\S\S\\   SS4$S jjrS
\\   S\\   S\\   S\\   S\\   S\\   S\S\S\S\S\S\S\S\S\S\\   4 S jrS
\\   S\\   S\\   S\\   S\\   S\\   S\S\S\S\S\S\S\S\S\S\\   4 S jrg) aL  AdamW Optimizer
Impl copied from PyTorch master

References for added functionality:
    Cautious Optimizers: https://arxiv.org/abs/2411.16085
    Why Gradients Rapidly Increase Near the End of Training: https://arxiv.org/abs/2506.02285

NOTE: This impl has been deprecated in favour of torch.optim.AdamW and remains as a reference
    N)ListOptionalTuple)Tensor)	Optimizer   )ParamsTc                      ^  \ rS rSrSr          SS\S\S\\\4   S\S\S\S	\S
\S\S\	\   S\4U 4S jjjr
U 4S jr\R                  " 5       SS j5       rSrU =r$ )AdamWLegacy   a  Implements AdamW algorithm.

NOTE: This impl has been deprecated in favour of torch.optim.AdamW and remains as a reference

References:
    - Adam: A Method for Stochastic Optimization: https://arxiv.org/abs/1412.6980
    - Decoupled Weight Decay Regularization: https://arxiv.org/abs/1711.05101
    - On the Convergence of Adam and Beyond: https://openreview.net/forum?id=ryQu7f-RZ

Args:
    params: iterable of parameters to optimize or dicts defining parameter groups
    lr: learning rate
    betas: coefficients used for computing running averages of gradient and its square
    eps: term added to the denominator to improve numerical stability
    weight_decay: weight decay coefficient
    amsgrad: whether to use the AMSGrad variant of this algorithm
        from the paper `On the Convergence of Adam and Beyond`
    caution: apply caution when using AdamW
    corrected_weight_decay: apply corrected weight decay (lr**2 / max_lr)
    maximize: maximize the params based on the objective, instead of minimizing
    foreach: whether foreach implementation of optimizer is used.
        If unspecified by the user (so foreach is None), we will try to use
        foreach over for-loop implementation on CUDA, since it is faster in general.
    capturable: whether this instance is safe to capture in a CUDA graph.
        Passing True can impair ungraphed performance, so if you don't intend to
        graph capture this instance, leave it False
paramslrbetasepsweight_decayamsgradcautioncorrected_weight_decaymaximizeforeach
capturablec                   > SU::  d  [        SR                  U5      5      eSU::  d  [        SR                  U5      5      eSUS   s=::  a  S:  d  O  [        SR                  US   5      5      eSUS   s=::  a  S:  d  O  [        SR                  US   5      5      e[        UUUUUUUU
U	US	9
n[        [        U ]  X5        g )
N        zInvalid learning rate: {}zInvalid epsilon value: {}r         ?z%Invalid beta parameter at index 0: {}r   z%Invalid beta parameter at index 1: {})
r   r   r   r   r   r   r   r   r   r   )
ValueErrorformatdictsuperr   __init__)selfr   r   r   r   r   r   r   r   r   r   r   defaults	__class__s                O/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/optim/adamw.pyr   AdamWLegacy.__init__1   s     by8??CDDcz8??DEEeAh$$DKKERSHUVVeAh$$DKKERSHUVV%#9!
 	k4)&;    c                 <  > [         [        U ]  U5        [        U R                  R                  5       5      n[        U5      S:g  =(       a    [        R                  " US   S   5      nU(       d.  U H(  n[        R                  " [        US   5      5      US'   M*     U R                   Ho  nUR                  SS5        UR                  SS5        UR                  SS5        UR                  SS 5        UR                  SS5        UR                  S	S5        Mq     g )
Nr   stepr   Fr   r   r   r   r   )r   r   __setstate__liststatevalueslentorch	is_tensortensorfloatparam_groups
setdefault)r    r*   state_valuesstep_is_tensorsgroupr"   s         r#   r(   AdamWLegacy.__setstate__U   s    k4-e4DJJ--/0l+q0^eoolSToV\F]6^!!LLqy)9:&	 "&&EY.Y.5u=Y-Z/\51 'r%   c                    U R                  5         SnUb%  [        R                  " 5          U" 5       nSSS5        U R                   GH  n/ n/ n/ n/ n/ n/ n	US   u  pUS   nUS    GHy  nUR                  c  M  UR                  U5        UR                  R                  (       a  [        S5      eUR                  UR                  5        U R                  U   n[        U5      S:X  a  [        R                  " S5      US'   [        R                  " U[        R                  S	9US
'   [        R                  " U[        R                  S	9US'   U(       a&  [        R                  " U[        R                  S	9US'   UR                  US
   5        UR                  US   5        U(       a!  UR                  UR                  SS5      5        U	R                  US   5        GM|     [        UUUUUU	US   UU
UUS   US   US   US   US   US   US   (       a  U R                  S   OSS9  GM     U$ ! , (       d  f       GN= f)zPerforms a single optimization step.

Arguments:
    closure (callable, optional): A closure that reevaluates the model
        and returns the loss.
Nr   r   r   z'AdamW does not support sparse gradientsr   r   r'   )memory_formatexp_avg
exp_avg_sqmax_exp_avg_sqr   r   r   r   r   r   r   r   )r   r   beta1beta2r   r   r   r   r   r   max_lr) _cuda_graph_capture_health_checkr-   enable_gradr1   gradappend	is_sparseRuntimeErrorr*   r,   r/   
zeros_likepreserve_formatgetadamwr!   )r    closurelossr6   params_with_gradgradsexp_avgsexp_avg_sqsmax_exp_avg_sqsstate_stepsr=   r>   r   pr*   s                  r#   r'   AdamWLegacy.stepd   s*    	--/""$y % &&E!EHK OK >LEI&G8_66> ''*66##&'PQQQVV$

1 u:?$)LL$4E&M','7'7I^I^'_E)$*/*:*:1ELaLa*bE,'272B2B1TYTiTi2j./i 01""5#67#**5995Et+LM""5=13 %6  i(;">2%Li(z* ..34L.Mt}}T*SW#K 'r y %$s   H44
I )
MbP?)g?g+?g:0yE>g{Gz?FFFFNFN)__name__
__module____qualname____firstlineno____doc__r	   r0   r   boolr   r   r(   r-   no_gradr'   __static_attributes____classcell__)r"   s   @r#   r   r      s    > )5"&!!+0"&*$"<"< "< &	"<
 "<  "< "< "< %)"< "< d^"< "< "<H2 ]]_G Gr%   r   r   rM   rN   rO   rP   rQ   r   r   r   r=   r>   r   r   r   r   r   r?   returnc       	            [        S U 5       5      (       d  [        S5      eUcD   U(       + =(       d5    S[        R                  R                  R
                  R                  5       ;   nU(       a*  [        R                  R                  5       (       d  [        nO[        nU" U UUUUUUU	U
UUUUUUUS9  g!   Sn NS= f)z]Functional API that performs AdamW algorithm computation.
See AdamWLegacy class for details.
c              3   V   #    U  H  n[        U[        R                  5      v   M!     g 7frV   )
isinstancer-   r   ).0ts     r#   	<genexpr>adamw.<locals>.<genexpr>   s     @Kqz!U\\**Ks   ')zPAPI has changed, `state_steps` argument must contain a list of singleton tensorsNScalarF)
r   r=   r>   r   r   r   r   r   r   r?   )allrE   r-   opsaten_foreach_maximum_	overloadsjitis_scripting_multi_tensor_adamw_single_tensor_adamw)r   rM   rN   rO   rP   rQ   r   r   r   r=   r>   r   r   r   r   r   r?   funcs                     r#   rI   rI      s    0 @K@@@!" 	" 	!k]X1Q1Q1[1[1]%]G uyy--//"#!!	Gs   AB7 7B=c       
            [        U 5       GH}  u  nnU(       d  UU   OUU   * nUU   nUU   nUU   nUS-  nUc  U	OU	S-  U-  nUR                  SUU
-  -
  5        UR                  U5      R                  USU-
  S9  UR                  U5      R                  UUSU-
  S9  U(       a  UU   n[        R
                  " UUUS9  UnOUnU(       a  UnS[        R                  " UU5      -
  nS[        R                  " UU5      -
  nU	U-  nUR                  5       nUR                  5       nUR                  5       UU-  -  R                  UU-  5      nU(       aR  UU-  S:  R                  UR                  5      n U R                  U R                  5       R                  SS	95        UU -  nUR                  UU5        GM  UR                  5       nSUU-  -
  nSUU-  -
  nU	U-  n[         R                  " U5      nUR                  5       U-  R                  U5      nU(       aR  UU-  S:  R                  UR                  5      n U R                  U R                  5       R                  SS	95        UU -  nUR                  UUU* S9  GM     g )
Nr      r   alpha)value)outr   rU   )min)	enumeratemul_add_addcmul_r-   maxpownegsqrttodtypediv_meanclamp_addcdiv_itemmath)!r   rM   rN   rO   rP   rQ   r   r=   r>   r   r   r   r   r   r   r?   iparamrB   r:   r;   step_twd_scaler<   
denom_baser'   bias_correction1bias_correction2	step_sizestep_size_negbias_correction2_sqrtdenommasks!                                    r#   rq   rq      s   ( f%5'uQxeAhY1+ ^
Q 	!  2R1Wv-=

2<//0 	U  QY 7''d!e)'D,Q/NIInjnE'J#JD  !599UD#99 599UD#99--I%MMOM$4$9$9$;!__&*?-*OPVVWZ]jWjkE  $*..tzz:		$))+,,,67!D.NN7E*;;=D 5D=0 5D=0--I$(II.>$?!__&)>>DDSIE$*..tzz:		$))+,,,67!D.NN7E)N< &r%   c       
            [        U 5      S:X  a  g U(       a'  [        S [        X5       5       5      (       d   S5       eU(       a  [        R                  " [        U5      5      nU Vs/ s H6  n[        R                  " U5      (       a  [        R                  " U5      OUPM8     nnU Vs/ s H6  n[        R                  " U5      (       a  [        R                  " U5      OUPM8     nnU Vs/ s H6  n[        R                  " U5      (       a  [        R                  " U5      OUPM8     nnU  Vs/ s H6  n[        R                  " U5      (       a  [        R                  " U5      OUPM8     n n[        R                  " US5        Uc  U	OU	S-  U-  n[        R                  " U SUU
-  -
  5        [        R                  " X'5        [        R                  " X!SU-
  S9  [        R                  " X85        [        R                  " X1USU-
  5        U(       Ga  U Vs/ s H  n[        R                  " UU5      PM     nnU Vs/ s H  n[        R                  " UU5      PM     nn[        R                  " US5        [        R                  " US5        [        R                  " U5        [        R                  " U5        [        R                  " UU	5      n[        R                  " U5        [        R                  " U5        [        R                   " U5      nU(       ap  U Vs/ s H6  n[        R                  " U5      (       a  [        R                  " U5      OUPM8     nn[        R"                  " XC5        [        R                   " U5      nO[        R                   " U5      n[        R$                  " U[        R&                  " UU5      5        [        R                  " UU5      n[        R                  " U5        [        R(                  " UU5      nU(       a  [        R&                  " X!5      n[        UU5       VVs/ s H$  u  nnUS:  R+                  UR,                  5      PM&     nnnU Vs/ s H  nUR/                  5       PM     nn[        R"                  " US5        [        R$                  " UU5        [        R&                  " UU5      n[        R0                  " XU5        g U Vs/ s H  nSUUR3                  5       -  -
  PM     nnU Vs/ s H  nSUUR3                  5       -  -
  PM     nnU Vs/ s H  nU	U-  S-  PM     nnU Vs/ s H  n[4        R6                  " U5      PM     nnU(       ap  U Vs/ s H6  n[        R                  " U5      (       a  [        R                  " U5      OUPM8     nn[        R"                  " XC5        [        R                   " U5      nO[        R                   " U5      n[        R$                  " UU5        [        R                  " UU5        U(       a  [        R&                  " X!5      n[        UU5       VVs/ s H$  u  nnUS:  R+                  UR,                  5      PM&     nnnU Vs/ s H  nUR/                  5       PM     nn[        R"                  " US5        [        R$                  " UU5        [        R&                  " UU5      n[        R0                  " XUU5        g s  snf s  snf s  snf s  snf s  snf s  snf s  snf s  snnf s  snf s  snf s  snf s  snf s  snf s  snf s  snnf s  snf )	Nr   c              3   d   #    U  H&  u  pUR                   =(       a    UR                   v   M(     g 7frV   )is_cuda)rd   rR   r'   s      r#   rf   &_multi_tensor_adamw.<locals>.<genexpr>Y  s&      
6N71AII&$,,&6Ns   .0z@If capturable=True, params and state_steps must be CUDA tensors.r   rt   ru   rU   )r,   ri   zipr-   _foreach_negtuple
is_complexview_as_real_foreach_add__foreach_mul__foreach_addcmul_r   _foreach_sub__foreach_neg__foreach_div_foreach_reciprocal__foreach_sqrtrl   _foreach_div__foreach_mul_foreach_addr   r   r   _foreach_addcdiv_r   r   r   )r   rM   rN   rO   rP   rQ   r   r=   r>   r   r   r   r   r   r   r?   xr   r'   r   r   r   r   r   eps_over_step_sizer   masksmg
mask_scalebcs                                  r#   rp   rp   B  s   & 6{a 
69&6N
 
 
 	NM	N 
 ""5<0JOP%Qe&6&6q&9&9U"q@%EPMUVX)9)9!)<)<""1%!CXHVP[\P[1E,<,<Q,?,?5%%a(QFP[K\KQR6au'7'7':':e  #A6FR 
Q' ^rq6)9H	X%< <= 
(	q5y9	+	Kq5yA?JK{tEIIeT2{K?JK{tEIIeT2{K,a0,a0,-,- &&'7<	""9-I& % 3 34D E\kl\kWX8H8H8K8Ku11!4QRR\kOl##OA,,_=J,,[9J4i@	
 #//	3?""#56"":/AB&&x7E585FG5FTQa!eZZ(5FEG,12Eq!&&(EJ2##J5z2))(E:H%8ALMA 44MALMA 44M.>?.>b2g^.>	?9I J9I229I J\kl\kWX8H8H8K8Ku11!4QRR\kOl##OA''8E''4EE#89E3'&&x7E585FG5FTQa!eZZ(5FEG,12Eq!&&(EJ2##J5z2))(E:H%C{ QV\R& LK  m" H2 NM? J m H2s`   )=\,=\/=\2=\!\!\$%=\)+\.5\42 \9 \>>] ]=]+]])NF)r[   r   typingr   r   r   r-   r   torch.optim.optimizerr   _typesr	   r   r\   r0   rI   rq   rp   rT   r%   r#   <module>r      s    ( (   + X) XD #' :V:F|: v,: &\	:
 f: &\: $: : : : : : : :  !:" #:$ %:& 
':zS=VS=F|S= v,S= &\	S=
 fS= &\S= S= S= S= S= S= S= S= S=  !S=" #S=l{DV{DF|{D v,{D &\	{D
 f{D &\{D {D {D {D {D {D {D {D {D  !{D" #{Dr%   