
    RЦi,                        S r SSKJrJr  SSKrSSKJr  SSKJr   SSKJrJ	r	  Sr
S	S
KJr  SS/r " S S\5      r  SSS.S\\   S\\   S\\\      S\S\\   S\S\S\S\S\S\S\S\\   4S jjjrS\\   S\\   S\\\      S\S\S\S\S\S\S\S\S\\   4S jrS\\   S\\   S\\\      S\S\S\S\S\S\S\S\S\\   4S jrg! \ a    Sr
 Nf = f) a   SGD with decoupled weight-decay.

References for added functionality:
    Cautious Optimizers: https://arxiv.org/abs/2411.16085
    Why Gradients Rapidly Increase Near the End of Training: https://arxiv.org/abs/2506.02285

Hacked together by Ross Wightman
    )ListOptionalN)Tensor)	Optimizer)_use_grad_for_differentiable_default_to_fused_or_foreachTF   )ParamsTSGDWsgdwc                      ^  \ rS rSr     SSSSSSS.S\S\S\S\S	\S
\S\S\S\S\\   S\4U 4S jjjjrU 4S jr	S r
\R                  " 5       SS j5       rSrU =r$ )r      FN)cautioncorrected_weight_decaymaximizeforeachdifferentiableparamslrmomentum	dampeningweight_decaynesterovr   r   r   r   r   c                   > US:  a  [        SU 35      eUS:  a  [        SU 35      eUS:  a  [        SU 35      e[        UUUUUUUU	U
US9
nU(       a  US::  d  US:w  a  [        S5      e[        TU ]  X5        g )N        zInvalid learning rate: zInvalid momentum value: zInvalid weight_decay value: )
r   r   r   r   r   r   r   r   r   r   r   z8Nesterov momentum requires a momentum and zero dampening)
ValueErrordictsuper__init__)selfr   r   r   r   r   r   r   r   r   r   r   defaults	__class__s                N/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/optim/sgdw.pyr   SGDW.__init__   s     86rd;<<c>7zBCC#;L>JKK%#9)
 Q)q.WXX*    c                 "  > [         TU ]  U5        U R                   Ho  nUR                  SS5        UR                  SS5        UR                  SS5        UR                  SS5        UR                  SS 5        UR                  SS5        Mq     g )Nr   Fr   r   r   r   r   )r   __setstate__param_groups
setdefault)r    stategroupr"   s      r#   r'   SGDW.__setstate__@   s    U#&&EY.5u=Z/Z/Y--u5 'r%   c                 J   SnUS    H  nUR                   c  M  UR                  U5        UR                  UR                   5        UR                   R                  (       a  SnU R                  U   nSU;  a  UR                  S 5        M  UR                  US   5        M     U$ )NFr   Tmomentum_buffer)gradappend	is_sparser*   )r    r+   params_with_gradgradsmomentum_buffer_listhas_sparse_gradpr*   s           r#   _init_groupSGDW._init_groupJ   s    xAvv! ''*QVV$66##&*O

1$E1(//5(//6G0HI ! r%   c                    SnUb%  [         R                  " 5          U" 5       nSSS5        U R                   H  n/ n/ n/ nU R                  X4XV5      n[	        UUUUS   US   US   US   US   US   US   UUS	   US
   (       a  U R
                  S   OSS9  [        XF5       H  u  pU R                  U   n
XS'   M     M     U$ ! , (       d  f       N= f)zPerforms a single optimization step.

Args:
    closure (Callable, optional): A closure that reevaluates the model
        and returns the loss.
Nr   r   r   r   r   r   r   r   r   )
r   r   r   r   r   r   r   r5   r   max_lrr.   )torchenable_gradr(   r7   r   r!   zipr*   )r    closurelossr+   r2   r3   r4   r5   r6   r.   r*   s              r#   step	SGDW.step^   s    ""$y % &&E!E#% "..udO $">2z*;,z*i(z* /i(.34L.Mt}}T*SW" '**:&Q"

1+:'( 'R1 '8 ? %$s   C
C )MbP?r   r   r   FN)__name__
__module____qualname____firstlineno__r
   floatboolr   r   r'   r7   r;   no_gradr@   __static_attributes____classcell__)r"   s   @r#   r   r      s      !"$"$+ "+0"&*#($+$+ $+ 	$+
 $+  $+ $+ $+ %)$+ $+ d^$+ !$+ $+L6( ]]_( (r%   )r:   r   r3   r4   r5   r   r   r   r   r   r   r   r   r:   c                   [         (       a  [        [        S5      (       al  Uc3  [        R                  R                  5       (       d  [        U SSS9u  pOSnU(       a.  [        R                  R                  5       (       a  [        S5      eOSnU(       a*  [        R                  R                  5       (       d  [        nO[        nU" U UUUUUUU	U
UUUS9  g)zdFunctional API that performs SGD algorithm computation.

See :class:`~torch.optim.SGD` for details.
"_group_tensors_by_device_and_dtypeNF)r   	use_fusedz6torch.jit.script not supported with foreach optimizers)	r   r   r   r   r   r   r5   r   r:   )
has_recent_pthasattrr   r;   jitis_scriptingr   RuntimeError_multi_tensor_sgdw_single_tensor_sgdw)r   r3   r4   r5   r   r   r   r   r   r   r   r   r:   _funcs                  r#   r   r      s    , },PQQ? 99))++9&QVbgh
7uyy--//WXXuyy--//!"!'r%   c       	         |   [        U 5       GH,  u  pU	(       d  X   OX   * nUc  UOUS-  U-  nUR                  SX-  -
  5        US:w  a  X,   nUc)  [        R                  " U5      R	                  5       nUX,'   O"UR                  U5      R                  USU-
  S9  U(       aj  U(       a  UR                  UUS9nUU-  S:  R                  UR                  5      nUR                  UR                  5       R                  SS95        UU-  nOU(       a  UR                  UUS9nOUnUR                  X* S9  GM/     g )N         ?r   r	   alpharC   )min)	enumeratemul_r;   clonedetachadd_addtodtypediv_meanclamp_)r   r3   r4   r   r   r   r   r   r   r   r5   r:   iparamr/   wd_scalebufmasks                     r#   rW   rW      s'    f%'uxehY2R1Wv-=

2//0q=&)C{kk$'..0*-$'"''A	M'B((3h(7Cd
Q**4::6		$))+,,,67Tz88Cx88DD

4s
#9 &r%   c       	         (   [        U 5      S:X  a  g [        R                  " XU/SS9nUR                  5        GH  u  u  pnnU
=(       a    [	        S U 5       5      nU	(       a  [
        R                  " U5      nUc  UOUS-  U-  n[
        R                  " U SUU-  -
  5        US:w  Ga  / nSn[        [        U5      5       H!  nUU   c  Sn  OUR                  UU   5        M#     U(       a1  [
        R                  " UU5        [
        R                  " UUSU-
  S	9  O/ n[        [        U5      5       Hy  nUU   c5  [
        R                  " UU   5      R                  5       =n=UU'   UUU   '   O*UU   nUR                  U5      R                  UU   SU-
  S	9  UR                  U5        M{     U(       a  U(       a  [
        R                  " UUUS	9n[
        R                   " UU5      n[#        UU5       VVs/ s H$  u  nnUS:  R%                  UR&                  5      PM&     nnnU Vs/ s H  nUR)                  5       PM     nn[
        R*                  " US
5        [
        R,                  " UU5        [
        R                   " UU5      nO U(       a  [
        R                  " UUUS	9  OUnU(       d  [
        R                  " XU* S	9  GM  [        [        U5      5       H  nUU   R                  UU   U* S	9  M     GM     g s  snnf s  snf )Nr   T)with_indicesc              3   8   #    U  H  oR                   v   M     g 7frD   )r1   ).0r/   s     r#   	<genexpr>%_multi_tensor_sgdw.<locals>.<genexpr>  s     8aT`DT`s   r[   r\   Fr	   r]   rC   )lenr   rO   valuesanyr;   _foreach_neg_foreach_mul_ranger0   _foreach_add_rb   rc   ra   rd   _foreach_add_foreach_mulr=   rf   rg   ri   _foreach_maximum__foreach_div_)r   r3   r4   r   r   r   r   r   r   r   r5   r:   grouped_tensorsdevice_paramsdevice_gradsdevice_momentum_buffer_listindicesdevice_has_sparse_gradrm   bufsall_states_with_momentum_bufferrk   rn   masksmg
mask_scales                              r#   rV   rV      s    6{aBB	,-DBOQ`QgQgQiM	C-'BW!0!aS8aT`8a5a --l;L2R1Wv-=FBL)@$@Aq=D.2+3:;<.q196;3KK ;A >? = /##D(3##D,a)mLs#>?@A215=!KKQ8??AB B9!<?ST[\]T^?_ :!<*//Qq9}/UKK$ A  --lDQD**4>9<UL9QR9QA!a%AGG,9QR0561affh
6''
D9##E:6$11$>''d(K#'L%B3G 3}-.a %%l1obS%A /q RjR S6s   =+L	/L)NN)__doc__typingr   r   r;   r   torch.optim.optimizerr   r   r   rQ   ImportError_typesr
   __all__r   rJ   rI   r   rW   rV   rB   r%   r#   <module>r      sC   "   +`M 6
n9 nn !%"&6  #'!6V6F|6 #8F#346 6 $6 6 6 6 6 6 6 6  !6r+$V+$F|+$ #8F#34+$
 +$ +$ +$ +$ +$ +$ +$ +$ +$\MBVMBF|MB #8F#34MB
 MB MB MB MB MB MB MB MB MBA  Ms   
C CC