
    RЦi-              "       T   S r SSKrSSKJrJrJr  SSKrSSKJr  SSKJ	r	   " S S\
5      r " S S	\	5      rS
\\   S\\   S\\   S\\   S\\   S\\   S\S\S\S\S\S\S\S\S\S\S\4"S jrS
\\   S\\   S\\   S\\   S\\   S\\   S\S\S\S\S\S\S\S\S\S\S\4"S jrg)zAdan Optimizer

Adan: Adaptive Nesterov Momentum Algorithm for Faster Optimizing Deep Models[J]. arXiv preprint arXiv:2208.06677, 2022.
    https://arxiv.org/abs/2208.06677

Implementation adapted from https://github.com/sail-sg/Adan
    N)ListOptionalTuple)Tensor)	Optimizerc                   (    \ rS rSrSrSrS rS rSrg)MultiTensorApply   Fc                      S[         l        Xl        g ! [         a   nS[         l        U[         l         S nAg S nAff = f)NTF)r	   	available
chunk_sizeImportError
import_err)selfr   errs      N/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/optim/adan.py__init__MultiTensorApply.__init__"   s:    	.)-&(O 	.).&*-''	.s    
>9>c                 *    U" U R                   X#/UQ76 $ Nr   )r   opnoop_flag_buffertensor_listsargss        r   __call__MultiTensorApply.__call__*   s    $//#3IDII    r   N)	__name__
__module____qualname____firstlineno__r   warnedr   r   __static_attributes__ r   r   r	   r	      s    IF.Jr   r	   c                      ^  \ rS rSrSr       SS\S\\\\4   S\S\S\S\S	\\   4U 4S
 jjjr	U 4S jr
\R                  " 5       S 5       r\R                  " 5       SS j5       rSrU =r$ )Adan.   a  Implements a pytorch variant of Adan.

Adan was proposed in Adan: Adaptive Nesterov Momentum Algorithm for Faster Optimizing Deep Models
https://arxiv.org/abs/2208.06677

Arguments:
    params: Iterable of parameters to optimize or dicts defining parameter groups.
    lr: Learning rate.
    betas: Coefficients used for first- and second-order moments.
    eps: Term added to the denominator to improve numerical stability.
    weight_decay: Decoupled weight decay (L2 penalty)
    no_prox: How to perform the weight decay
    caution: Enable caution from 'Cautious Optimizers'
    foreach: If True would use torch._foreach implementation. Faster but uses slightly more memory.
lrbetasepsweight_decayno_proxcautionforeachc	           
        > SU::  d  [        SR                  U5      5      eSU::  d  [        SR                  U5      5      eSUS   s=::  a  S:  d  O  [        SR                  US   5      5      eSUS   s=::  a  S:  d  O  [        SR                  US   5      5      eSUS	   s=::  a  S:  d  O  [        S
R                  US	   5      5      e[        UUUUUUUS9n	[        T
U ]  X5        g )N        zInvalid learning rate: {}zInvalid epsilon value: {}r         ?z%Invalid beta parameter at index 0: {}   z%Invalid beta parameter at index 1: {}   z%Invalid beta parameter at index 2: {})r)   r*   r+   r,   r-   r.   r/   )
ValueErrorformatdictsuperr   )r   paramsr)   r*   r+   r,   r-   r.   r/   defaults	__class__s             r   r   Adan.__init__?   s     by8??CDDcz8??DEEeAh$$DKKERSHUVVeAh$$DKKERSHUVVeAh$$DKKERSHUVV%
 	*r   c                    > [         [        U ]  U5        U R                   H'  nUR	                  SS5        UR	                  SS5        M)     g )Nr-   Fr.   )r8   r'   __setstate__param_groups
setdefault)r   stategroupr;   s      r   r>   Adan.__setstate___   sB    dD&u-&&EY.Y. 'r   c                 &   U R                    H  nSUS'   US    Hp  nUR                  (       d  M  U R                  U   n[        R                  " U5      US'   [        R                  " U5      US'   [        R                  " U5      US'   Mr     M     g )Nr   stepr9   exp_avg
exp_avg_sqexp_avg_diff)r?   requires_gradrA   torch
zeros_like)r   rB   prA   s       r   restart_optAdan.restart_opte   s    &&EE&M8_??? JJqME (-'7'7':E)$*/*:*:1*=E,',1,<,<Q,?E.) % 'r   c                    SnUb%  [         R                  " 5          U" 5       nSSS5         S[         R                  R                  R                  R                  5       ;   nU R                   GH  n/ n/ n/ n/ n/ n	/ n
US   u  pnSU;   a  US==   S-  ss'   OSUS'   SXS   -  -
  nSXS   -  -
  nSXS   -  -
  nUS    GH%  nUR                  c  M  UR                  U5        UR                  UR                  5        U R                  U   n[        U5      S	:X  aK  [         R                  " U5      US
'   [         R                  " U5      US'   [         R                  " U5      US'   SU;  d	  US   S:X  a  UR                  R                  5       * US'   UR                  US
   5        UR                  US   5        U	R                  US   5        U
R                  US   5        GM(     U(       d  GM  US   c  US   (       + =(       d    UnOUS   nU(       a  [        nO[        nU" UU4UUU	U
UUUUU[        R                   " U5      US   US   US   US   US   S.6  GM     U$ ! , (       d  f       GNN= f!   Sn GN"= f)z$Performs a single optimization step.NScalarFr*   rE   r3   r2   r9   r   rF   rG   rH   neg_pre_gradr/   r.   r)   r,   r+   r-   )exp_avgsexp_avg_sqsexp_avg_diffsneg_pre_gradsbeta1beta2beta3bias_correction1bias_correction2bias_correction3_sqrtr)   r,   r+   r-   r.   )rJ   enable_gradopsaten_foreach_maximum_	overloadsr?   gradappendrA   lenrK   clone_multi_tensor_adan_single_tensor_adanmathsqrt)r   closurelosshas_scalar_maximumrB   params_with_gradgradsrR   rS   rT   rU   rV   rW   rX   rY   rZ   bias_correction3rL   rA   use_foreachfuncs                        r   rE   	Adan.stepu   s    ""$y %	'!)UYY^^-M-M-W-W-Y!Y &&E!EHKMM"'.E% f" !f"UFm%;;"UFm%;;"UFm%;;8_66> ''*QVV$

1u:?','7'7':E)$*/*:*:1*=E,',1,<,<Q,?E.)!.%-12D-.VV\\^OE.)i 01""5#67$$U>%:;$$U>%:;% %( $Y'"'	"22H6H#I.)*  "'++!1!1&*ii0@&A;">2%Li(i(#k 'R c %$
	'!&s   I*5I< *
I9<Jr%   )MbP?)g\(\?gq=
ףp?gGz?g:0yE>r1   FFNr   )r   r    r!   r"   __doc__floatr   boolr   r   r>   rJ   no_gradrM   rE   r$   __classcell__)r;   s   @r   r'   r'   .   s    $ 0B"%!!&*++ u,-+ 	+
  + + + d^+ +@/ ]]_@ @ ]]_U Ur   r'   r9   rm   rR   rS   rT   rU   rV   rW   rX   rY   rZ   r[   r)   r,   r+   r-   r.   c                   [        U 5       GH  u  nnUU   nUU   nUU   nUU   nUU   nUR                  U5        UR                  U5      R                  USU-
  S9  UR                  U5      R                  USU-
  S9  UR                  U5      R                  U5        UR                  U5      R                  UUSU-
  S9  UR	                  5       U-  R                  U5      nX-  U
-  nX-  nU(       aR  UU-  S:  R                  UR                  5      nUR                  UR                  5       R                  SS95        UU-  nU(       a;  UR                  SX-  -
  5        UR                  UUU* S9  UR                  UUU* S9  O:UR                  UUU* S9  UR                  UUU* S9  UR                  SX-  -   5        UR                  5       R                  USS9  GM     g )Nr3   alphavaluer   rr   )min      )	enumerateadd_mul_addcmul_rh   todtypediv_meanclamp_addcdiv_zero_)r9   rm   rR   rS   rT   rU   rV   rW   rX   rY   rZ   r[   r)   r,   r+   r-   r.   iparamra   rF   rG   rH   neg_grad_or_diffdenomstep_size_diff	step_sizemasks                               r   rf   rf      s   ( f%5Qx1+ ^
$Q'(+ 	d#U  QY 7% %%&6a%i%He$))$/''(8:JRSV[R['\"%::@@E&66)	dNQ&**4::6DIIdiik((T(23nGJJq2,,-NN7E)N<NN<~oNFNN7E)N<NN<~oNFJJq2,,- %%d$%7G &r   c                ~   [        U 5      S:X  a  g [        R                  " XQ5        [        R                  " X&5        [        R                  " X!SU-
  S9  [        R                  " XG5        [        R                  " XESU-
  S9  [        R                  " XW5        [        R                  " XQ5        [        R                  " X85        [        R                  " X5USU-
  S9  [        R
                  " U5      n[        R                  " UU5        [        R                  " UU5        X-  U
-  nX-  nU(       a  [        R                  " X!5      n[        UU5       VVs/ s H$  u  nnUS:  R                  UR                  5      PM&     nnnU Vs/ s H  nUR                  5       PM     nn[        R                  " US5        [        R                  " UU5        [        R                  " UU5      nU(       aK  [        R                  " U SX-  -
  5        [        R                  " XUU* S9  [        R                  " XUU* S9  OJ[        R                  " XUU* S9  [        R                  " XUU* S9  [        R                  " U SX-  -   5        [        R                  " U5        [        R                  " XQSS9  g s  snnf s  snf )Nr   r3   ry   r{   rr   r~   )rc   rJ   _foreach_add__foreach_mul__foreach_addcmul__foreach_sqrt_foreach_div__foreach_mulzipr   r   r   r_   _foreach_addcdiv__foreach_zero_)r9   rm   rR   rS   rT   rU   rV   rW   rX   rY   rZ   r[   r)   r,   r+   r-   r.   r   r   r   masksmg
mask_scales                           r   re   re     s)   ( 6{a 
-	(	q5y9	-	AIF	-	-	+	KQQVYW,E	45	s#Z"22N%I""8314UE1BC1BA!a%AGG$1BC(-.1affh
.
D1E:.%%h6FA(9$9:%	zJu^OT%	zJu^OTFA(9$9:	'	D9! D.s   +J4J:)rs   rg   typingr   r   r   rJ   r   torch.optim.optimizerr   objectr	   r'   rt   ru   rf   re   r%   r   r   <module>r      s  *  ( (   +Jv J ]9 ]@78V78F|78 v,78 &\	78
 F|78 F|78 78 78 78  78  78  %78 78 78  !78" #78$ %78t?:V?:F|?: v,?: &\	?:
 F|?: F|?: ?: ?: ?:  ?:  ?:  %?: ?: ?:  !?:" #?:$ %?:r   