
    RЦi6L              +       r   S r SSKJrJrJrJrJr  SSKrSSKJr  SSK	J
r
  SSKJr  SS	/rS
 rS'S jrS rS r " S S\
5      rS\\   S\\   S\\   S\\   S\\   S\\   S\\   S\S\S\S\\\4   S\S\\   S\\   S\S\S\S \S!\S"\4(S# jrS\\   S\\   S\\   S\\   S\\   S\\   S\\   S\S\S\S\\\4   S\S\\   S\\   S\S\S\S \S!\S"\4(S$ jr      S(S\\   S\\   S\\   S\\   S\\   S%\\   S!\S"\S\\   S\\   S\S\S\S\\\4   S\S\\   S\\   S\S\S\S \4*S& jjrg))u?  ADOPT PyTorch Optimizer

ADOPT: Modified Adam Can Converge with Any β2 with the Optimal Rate: https://arxiv.org/abs/2411.02853

Modified for reduced dependencies on PyTorch internals from original at: https://github.com/iShohei220/adopt

@inproceedings{taniguchi2024adopt,
 author={Taniguchi, Shohei and Harada, Keno and Minegishi, Gouki and Oshima, Yuta and Jeong, Seong Cheol and Nagahara, Go and Iiyama, Tomoshi and Suzuki, Masahiro and Iwasawa, Yusuke and Matsuo, Yutaka},
 booktitle = {Advances in Neural Information Processing Systems},
 title = {ADOPT: Modified Adam Can Converge with Any β2 with the Optimal Rate},
 year = {2024}
}

References for added functionality:
    Cautious Optimizers: https://arxiv.org/abs/2411.16085
    Why Gradients Rapidly Increase Near the End of Training: https://arxiv.org/abs/2506.02285
    )castListOptionalTupleUnionN)Tensor)	Optimizer   )ParamsTAdoptadoptc                     [        U 5       H_  u  p#[        R                  " U5      (       d  M"  [        R                  " X   5      X'   U H  n[        R                  " XB   5      XB'   M     Ma     g N)	enumeratetorch
is_complexview_as_real)paramsstate_and_gradsipss        O/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/optim/adopt.py_view_as_realr      sV    &!A**695FI$))!$/ % "    c                     U (       a  [         R                  $ [         R                  " 5       [         R                  :X  a  [         R                  $ [         R                  $ r   )r   float32get_default_dtypefloat64)is_fuseds    r   _get_scalar_dtyper!   $   s;    }}002emmCINr   c                      [        [        S5      (       a=  [        [        R                  S5      (       a  [        R                  R                  5       $ g)Ncompileris_compilingF)hasattrr   r#   r$    r   r   _is_compilingr'   ,   s5    uj!!gennn&M&M~~**,,r   c                     [         R                  R                  5       (       d  [        5       (       a  U $ [	        U [         R
                  5      (       a  U R                  5       $ U $ r   )r   jitis_scriptingr'   
isinstancer   item)xs    r   
_get_valuer.   3   sA    99!!##%a66qvvx=A=r   c                      ^  \ rS rSrSr       SSSSSSS.S\S\\\4   S\	\\4   S\S	\
\   S
\S\S\S\S\
\   S\S\S\4U 4S jjjjrU 4S jrS r\R                   " 5       SS j5       rSrU =r$ )r   ;   ui   
ADOPT: Modified Adam Can Converge with Any β2 with the Optimal Rate: https://arxiv.org/abs/2411.02853

F)cautionforeachmaximize
capturabledifferentiabler   lrbetasepsclip_expweight_decay	decoupledcorrected_weight_decayr1   r2   r3   r4   r5   c	                  > [        U[        5      (       a8  U
(       a  U(       d  [        S5      eUR                  5       S:w  a  [        S5      eSU::  d  [        SU 35      eSU::  d  [        SU 35      eSUS   s=::  a  S:  d  O  [        S	US    35      eSUS   s=::  a  S:  d  O  [        S
US    35      eSU::  d  [        SU 35      e[	        UUUUUUUU	UU
UUS9n[
        TU ]  X5        g )NElr as a Tensor is not supported for capturable=False and foreach=Truer
   zTensor lr must be 1-element        zInvalid learning rate: zInvalid epsilon value: r         ?z#Invalid beta parameter at index 0: z#Invalid beta parameter at index 1: zInvalid weight_decay value: )r6   r7   r8   r:   r9   r;   r<   r1   r3   r2   r4   r5   )r+   r   
ValueErrornumeldictsuper__init__)selfr   r6   r7   r8   r9   r:   r;   r<   r1   r2   r3   r4   r5   defaults	__class__s                  r   rE   Adopt.__init__@   s   " b&!!z [  xxzQ !>??by6rd;<<cz6se<==eAh$$B58*MNNeAh$$B58*MNNl";L>JKK%#9!)
 	*r   c                   > [         TU ]  U5        U R                   GH;  nUR                  SS5        UR                  SS 5        UR                  SS5        UR                  SS5        UR                  SS 5        UR                  SS5        UR                  SS5        US	    H  nU R                  R                  U/ 5      n[        U5      S
:w  d  M0  [        R                  " US   5      (       a  MP  [        US   5      nUS   (       a(  [        R                  " U[        5       UR                  S9O[        R                  " U[        5       S9US'   M     GM>     g )Nr3   Fr2   r4   r5   r9   r1   r<   r   r   stepdtypedevicerM   )rD   __setstate__param_groups
setdefaultstategetlenr   	is_tensorfloattensorr!   rN   )rF   rS   groupr   p_statestep_valrH   s         r   rP   Adopt.__setstate__s   s%   U#&&EZ/Y-\51-u5Z.Y.5u=8_**..B/w<1$U__WV_-M-M$WV_5H !. $"3"5#$88 #\\(:K:MN FO	 % 'r   c                    SnUS    GH  nUR                   c  M  U[        R                  " U5      -  nUR                  U5        UR                   R                  (       a  [        S5      eUR                  UR                   5        U R                  U   n	[        U	5      S:X  a  US   (       a2  [        R                  " S[        5       UR                   R                  S9O[        R                  " S[        5       S	9U	S
'   [        R                  " UR                   [        R                  S9U	S'   [        R                  " UR                   [        R                  S9U	S'   UR                  U	S   5        UR                  U	S   5        US   (       a  U	S
   R                  (       a  [        S5      eUS   (       a3  [        R                  " US   5      (       a  US   (       d  [        S5      eUR                  U	S
   5        GM     U$ )NFr   z'ADOPT does not support sparse gradientsr   r4   r&   rL   r?   rO   rK   )memory_formatexp_avg
exp_avg_sqr5   zB`requires_grad` is not supported for `step` in differentiable moder2   r6   r>   )gradr   r   append	is_sparseRuntimeErrorrS   rU   zerosr!   rN   rX   
zeros_likepreserve_formatrequires_gradrV   )
rF   rY   params_with_gradgradsexp_avgsexp_avg_sqsstate_stepshas_complexr   rS   s
             r   _init_groupAdopt._init_group   s    xAvv~5++A..K##A&vv"#LMMLL JJqME5zQ \* KK*;*=affmmTc1B1DE f $)#3#3AFF%J_J_#`i &+&6&6qvvUMbMb&cl#OOE),-u\23%&5=+F+F"#ghh YEOOE$K$@$@|I\"#jkkuV}-G !H r   c                 &   U R                  5         SnUb%  [        R                  " 5          U" 5       nSSS5        U R                   H  n/ n/ n/ n/ n/ nUS   u  pU R	                  UUUUUU5      n[        UUUUU40 SU_SU	_SU
_SUS   _SUS   _SUS   _S	US
   (       a  U R                  S   OS_SUS   _SUS   _SUS   _SUS   _SUS   _SUS   _SUS   _S[        U SS5      _S[        U SS5      _6  M     U$ ! , (       d  f       N= f)zPerform a single optimization step.

Args:
    closure (Callable, optional): A closure that reevaluates the model
        and returns the loss.
Nr7   rn   beta1beta2r6   r:   r9   max_lrr<   r;   r8   r1   r3   r2   r4   r5   
grad_scale	found_inf) _cuda_graph_capture_health_checkr   enable_gradrQ   ro   r   rG   getattr)rF   closurelossrY   ri   rj   rk   rl   rm   rr   rs   rn   s               r   rK   
Adopt.step   s    	--/""$y % &&E-/"$E%'H(*K(*K >LE** K   (   ; #>2 z* /44L.Mt}}T*SW  , %L i(  z*!" i(#$ !.%&  %%56'( #4t<)* "$T:+# 'R Y %$s   D
Dr&   )MbP?)g?gH.?gư>gZd;O?r?   FFr   )__name__
__module____qualname____firstlineno____doc__r   r   rW   r   r   r   boolrE   rP   ro   r   no_gradrK   __static_attributes____classcell__)rH   s   @r   r   r   ;   s     (,)6(-"%#+01+ "&+"$#(1+1+ eVm$1+ &	1+
 1+ uo1+  1+ 1+ %)1+ 1+ d^1+ 1+ 1+ !1+ 1+f0.b ]]_7 7r   r   rj   rk   rl   rm   ru   rv   rn   rr   rs   r6   r:   r9   rt   r;   r8   r1   r3   r4   r5   c                   Uc  Ub   e[         R                  R                  5       (       a  [        U
[        5      (       d   e[        U 5       GH  u  nnU(       d  UU   OUU   * nUU   nUU   nUU   nU(       ao  [        5       (       d`  SSKJn  U" 5       nUR                  R                  UR                  R                  :X  a  UR                  R                  U;   d   SU S35       eUS-  n[         R                  " U5      (       a^  [         R                  " U5      nUb  [         R                  " U5      nUb  [         R                  " U5      n[         R                  " U5      nUS:w  a  U(       d  UR                  UUS9nU(       d  U(       a  UO
[        U5      nUS:X  a#  UR                  UUR!                  5       5        GM}  US:w  a(  U(       a!  Ub  U
S-  U-  OU
nUR#                  UU* U-  S9  [         R$                  " UR'                  5       U5      nUR)                  U5      nUb  US-
  U-  n UR+                  U * U 5        UR-                  USU-
  5        U(       aR  UU-  S:  R/                  UR0                  5      n!U!R3                  U!R5                  5       R+                  SS	95        UU!-  nUR#                  UU
* S9  UR7                  U	5      R                  UUR!                  5       SU	-
  S
9  GM     g )Nr   !_get_capturable_supported_devicesIIf capturable=True, params and state_steps must be on supported devices: .r
   alpha   r}   )minvalue)r   r)   r*   r+   rW   r   r'   torch.optim.optimizerr   rN   typer   r   addr.   addcmul_conjadd_clampsqrtdivclamp_lerp_torM   div_meanmul_)"r   rj   rk   rl   rm   ru   rv   rn   rr   rs   r6   r:   r9   rt   r;   r8   r1   r3   r4   r5   r   paramra   r_   r`   step_tr   capturable_supported_devicesrK   wd_scaledenomnormed_gradclip_valmasks"                                     r   _single_tensor_adoptr      s   . )"333yy "e$$$$f%5'uQxeAhY1+ ^
Q mooO+L+N(<<$$(:(::u||?P?PTp?p |[\x[yyz{|p 	!E""%%d+D",,W5%"//
;
&&u-E1Y88E86D#~v:f;M19diik21+1+=rQw'2HJJuXI$<J=JOO-s3hhuoqX-Hy(3k1u9-dNQ&**4::6DIIdiik((T(23nG

72#
&''diikU'Kk &r   c          	      X  ^) [        U 5      S:X  a  g [        U
[        5      (       a  U(       d  [        S5      eU(       aI  [	        5       (       d:  SSKJn  U" SS9m)[        U)4S j[        X5       5       5      (       d   ST) S35       eUc  Ub   eU(       a   S	5       e[        R                  " XX#U/5      nUR                  5        GHS  u  u  nnnnnn[        [        [           U5      n[        [        [           U5      n[        [        [           U5      n[        [        [           U5      n[        [        [           U5      n U(       a  [        UUUU5        U(       a  [        R                   " U5      n[	        5       (       d>  U S   R"                  (       a*  [        R$                  " U [        R&                  " S
SS9S
S9  O[        R$                  " U S5        US:w  a;  U(       d4  U(       a  [        R$                  " UUUS9  O[        R(                  " UUUS9nU S   S:X  a  [        R*                  " UUU5        GM{  US:w  a.  U(       a'  Ub  U
S-  U-  OU
n![        R$                  " UUU!* U-  S9  [        R,                  " U5      n"[        R.                  " U"U5        [        R0                  " UU"5      n#Ub:  U S   S-
  U-  n$[        R.                  " U#U$* 5        [        R2                  " U#U$5        [        R4                  " UU#SU-
  5        U(       a  [        R6                  " UU5      n%[        U%U5       V&V's/ s H$  u  n&n'U&S:  R9                  U'R:                  5      PM&     n%n&n'U% V&s/ s H  n&U&R=                  5       PM     n(n&[        R.                  " U(S5        [        R>                  " U%U(5        [        R6                  " UU%5      n[        R$                  " UUU
* S9  [        R@                  " UU	5        [        R*                  " UUUSU	-
  S9  GMV     g s  sn'n&f s  sn&f )Nr   r>   r   F)supports_xlac              3      >#    U  HT  u  pUR                   R                  UR                   R                  :H  =(       a    UR                   R                  T;   v   MV     g 7fr   )rN   r   ).0r   rK   r   s      r   	<genexpr>&_multi_tensor_adopt.<locals>.<genexpr>s  sG      
3 HHMMT[[---_!((--C_2__3s   AAr   r   z#_foreach ops don't support autogradr@   cpu)rN   r   r
   r   r}   r   )!rU   r+   r   rd   r'   r   r   allzipr	   "_group_tensors_by_device_and_dtypevaluesr   r   r   r   _foreach_negis_cpu_foreach_add_rX   _foreach_add_foreach_addcmul__foreach_sqrt_foreach_maximum__foreach_div_foreach_minimum__foreach_lerp__foreach_mulr   rM   r   _foreach_div__foreach_mul_)*r   rj   rk   rl   rm   ru   rv   rn   rr   rs   r6   r:   r9   rt   r;   r8   r1   r3   r4   r5   r   grouped_tensorsdevice_params_device_grads_device_exp_avgs_device_exp_avg_sqs_device_state_steps__device_paramsdevice_gradsdevice_exp_avgsdevice_exp_avg_sqsdevice_state_stepsr   exp_avg_sq_sqrtr   r   masksmg
mask_scaler   s*                                            @r   _multi_tensor_adoptr   N  s   . 6{a"fjS
 	

 -//K'H(
$  
v3
 
 
 	w WWsVttuv	w 

 )"333DDDBB	{;O ""$		 	T&\>:DL-8tF|-=>!$v,0CD!$v,0CD -HZ[ --l;L #5a#8#?#? 2ELLU4S[^_ 2A61Y##L-|T$11,Uaba A%##$6lS1+1+=rQw'2H}XIP\D\]--.@A5((G*1-1h>H##K(;##K:_k1u9E&&EE585MN5MTQa!eZZ(5MEN,12Eq!&&(EJ2##J5z2#00%HOM?2#F.6 2L,VWZ_V_`} %j O2s    +P!2P'r2   c       
         ~   Uc  Sn[        5       (       d"  [        S U 5       5      (       d  [        S5      eU(       a.  [        R                  R                  5       (       a  [        S5      eU(       a*  [        R                  R                  5       (       d  [        nO[        nU" U UUUU4U
UUUUUUUUUUUUUU	S.6  g)z?Functional API that performs ADOPT algorithm computation.

    NFc              3   V   #    U  H  n[        U[        R                  5      v   M!     g 7fr   )r+   r   r   )r   ts     r   r   adopt.<locals>.<genexpr>  s     &XKqz!U\\'B'BKs   ')zPAPI has changed, `state_steps` argument must contain a list of singleton tensorsz6torch.jit.script not supported with foreach optimizers)rn   rr   rs   r6   r:   r9   rt   r;   r8   r1   r3   r4   r5   ru   rv   )r'   r   rd   r   r)   r*   r   r   )r   rj   rk   rl   rm   r2   r4   r5   ru   rv   rn   rr   rs   r6   r:   r9   rt   r;   r8   r1   r3   funcs                         r   r   r     s    :  ??3&XK&X#X#X^
 	
 599))++STTuyy--//"#  !%)r   r   )NFFNNF)r   typingr   r   r   r   r   r   r   r   r	   _typesr   __all__r   r!   r'   r.   r   r   rW   r   r   r   r&   r   r   <module>r      s  " 6 5   + G
0>yI yxTLVTLF|TL v,TL &\	TL
 &\TL V$TL F#TL TL TL TL %- TL TL 5/TL TL  !TL" #TL$ %TL& 'TL( )TL* +TLnuaVuaF|ua v,ua &\	ua
 &\ua V$ua F#ua ua ua ua %- ua ua 5/ua ua  !ua" #ua$ %ua& 'ua( )ua* +uaB #' $'+&*!DVDF|D v,D &\	D
 &\D $D D D V$D F#D D D  !D" %- #D$ %D& 5/'D( )D* +D, -D. /D0 1Dr   