
    RЦi              $          S r SSKrSSKJrJr  SSKrSSKJr  SSKJ	r	  S\R                  S\R                  S	\R                  S
\R                  S\S\S\S\S\R                  S\S\S\S\S\S\S\S\\\4   S\4$S jr " S S\5      rg)a  PyTorch MARS Optimizer

Code simplified from https://github.com/AGI-Arena/MARS

Paper: MARS: Unleashing the Power of Variance Reduction for Training Large Models - https://arxiv.org/abs/2411.10438

@article{yuan2024mars,
  title={MARS: Unleashing the Power of Variance Reduction for Training Large Models},
  author={Yuan, Huizhuo and Liu, Yifeng and Wu, Shuang and Zhou, Xun and Gu, Quanquan},
  journal={arXiv preprint arXiv:2411.10438},
  year={2024}
}
    N)OptionalTuple)	Optimizer   )ParamsTpgradexp_avg
exp_avg_sqlrweight_decaybeta1beta2	last_gradepsstepgamma	mars_type
is_grad_2doptimize_1dlr_1d_factorbetas_1dcautionc                 X   U(       d  U(       Ga  SU-
  nU
S:X  a  UnOHX-
  R                  XU-  -  5      R                  U5      n[        R                  " U5      nUS:  a  UU-  nUR                  U5      R                  UUS9  U(       aQ  X!-  S:  R	                  UR
                  5      nUR                  UR                  5       R                  SS95        UU-  nUS:X  a  UR                  U5      R                  UUSU-
  S9  SXj-  -
  nSXz-  -
  nUR                  5       [        R                  " U5      -  R                  U	5      nX-  UU-  R                  U5      -   nOUS	:X  a  X-  UR                  5       -   nO eU R                  UU* S9  X#4$ Uu  nnUR                  U5      R                  USU-
  S9  UR                  U5      R                  XSU-
  S9  SUU
-  -
  nSUU
-  -
  nUR                  5       [        R                  " U5      -  R                  U	5      nU(       aQ  X!-  S:  R	                  UR
                  5      nUR                  UR                  5       R                  SS95        UU-  nX-  UU-  R                  U5      -   nU R                  UXO-  * S9  X#4$ )
N      ?r   )alphar   gMbP?)minadamw)valuelion)mul_add_torchnormtodtypediv_meanclamp_addcmul_sqrtmathsign)r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   one_minus_beta1c_tc_t_normmaskbias_correction1bias_correction2denomupdatebeta1_1dbeta2_1ds                               N/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/optim/mars.py_mars_single_tensor_stepr9      s   * ju*19C#))%?3J*KLQQRVWCzz#H"}HnU  O <NQ&**4::6DIIdiik((T(23nGOOE"++CBJ+G"U]2"U]2__&3C)DDJJ3OE%3C)C(I(I%(PPF& %6F5	vbS!  &(X##DX#>!**4R(]*KT!11T!11"TYY/?%@@FFsKNQ&**4::6DIIdiik((T(23nG!W/?%?$E$Ee$LL	vr012    c                      ^  \ rS rSrSr          SS\S\S\\\4   S\S\S\S	\S
\	S\S\
\\\4      S\	4U 4S jjjrU 4S jr\R                  " 5       SS j5       rSrU =r$ )Mars[   zMARS Optimizer

Paper: MARS: Unleashing the Power of Variance Reduction for Training Large Models
    https://arxiv.org/abs/2411.10438

paramsr   betasr   r   r   r   r   r   r   r   c                   > SU::  d  [        SR                  U5      5      eSU::  d  [        SR                  U5      5      eSUS   s=::  a  S:  d  O  [        SR                  US   5      5      eSUS   s=::  a  S:  d  O  [        SR                  US   5      5      eUS	;   d   S
5       e[        UUUUUUUU	U
=(       d    UUS9
n[        [        U ]  X5        g )N        zInvalid learning rate: {}zInvalid epsilon value: {}r   r   z%Invalid beta parameter at index 0: {}r   z%Invalid beta parameter at index 1: {})r   r    zMARS type not supported)
r   r?   r   r   r   r   r   r   r   r   )
ValueErrorformatdictsuperr<   __init__)selfr>   r   r?   r   r   r   r   r   r   r   r   defaults	__class__s                r8   rF   Mars.__init__b   s     by8??CDDcz8??DEEeAh$$DKKERSHUVVeAh$$DKKERSHUVV--H/HH-%#%&
 	dD"64r:   c                 v   > [         [        U ]  U5        U R                   H  nUR	                  SS5        M     g )Nr   F)rE   r<   __setstate__param_groups
setdefault)rG   stategrouprI   s      r8   rL   Mars.__setstate__   s2    dD&u-&&EY. 'r:   c                    SnUb%  [         R                  " 5          U" 5       nSSS5        U R                   GH+  nUS    GH  nUR                  c  M  UR                  nUR                  (       a  [        S5      eU R                  U   n[        U5      S::  aP  SUS'   [         R                  " U5      US'   [         R                  " U5      US'   [         R                  " U5      US	'   US==   S-  ss'   US   nUS   nUS	   n	US   n
US
   nUS   nUS   u  pUR                  S:  n[        UUUU	UUUUU
US   UUS   US   UUS   US   US   US   S9  XVS'   GM      GM.     U$ ! , (       d  f       GNM= f)zPerforms a single optimization step.

Arguments:
    closure (callable, optional): A closure that reevaluates the model
        and returns the loss.
Nr>   zJAdam does not support sparse gradients, please consider SparseAdam insteadr   r   r   r
   r   r   r   r   r?      r   r   r   r   r   r   r   )r   r   r   r   r   r   )r#   enable_gradrM   r	   	is_sparseRuntimeErrorrO   len
zeros_likendimr9   )rG   closurelossrP   r   r	   rO   r   r
   r   r   r   wdr   r   r   s                   r8   r   	Mars.step   s    ""$y % &&E8_66>vv>>&'stt

1u:?$%E&M','7'7':E)$).)9)9!)<E+&*/*:*:1*=E,'f"V}	*"<0
!+.	4[>*$W~!YY!^
 )%L'N#K0) %m 4!&~!6":.!),%* &*k"e % 'j q %$s   E))
E8 )
g~jth?)g?gGz?g:0yE>rA   g?r   Fr   NF)N)__name__
__module____qualname____firstlineno____doc__r   floatr   strboolr   rF   rL   r#   no_gradr   __static_attributes____classcell__)rI   s   @r8   r<   r<   [   s     )4"$ $ %"%6:!$5$5 $5 &	$5
 $5  $5 $5 $5 $5  $5 uUE\23$5 $5 $5L/
 ]]_A Ar:   r<   )rc   r,   typingr   r   r#   torch.optim.optimizerr   _typesr   Tensorrd   intre   rf   r9   r<   r^   r:   r8   <module>ro      s     "  + ?<<?ll? ? LL	?
 ? ? ? ? <<? ? ? ? ? ? ?  !?" u%#?$ %?Dt9 tr:   