
    RЦi"                        S r SSKJrJrJr  SSKrSSKJr  SSKJ	r	   " S S\5      r
  SSS	.S
\\R                     S\\R                     S\\R                     S\S\S\S\S\S\S\S\\   4S jjjrS
\\R                     S\\R                     S\\R                     S\S\S\S\S\S\S\\   4S jrS
\\R                     S\\R                     S\\R                     S\S\S\S\S\S\S\\   4S jrg)ag  Lion Optimizer
Paper: `Symbolic Discovery of Optimization Algorithms` - https://arxiv.org/abs/2302.06675
Original Impl: https://github.com/google/automl/tree/master/lion

References for added functionality:
    Cautious Optimizers: https://arxiv.org/abs/2411.16085
    Why Gradients Rapidly Increase Near the End of Training: https://arxiv.org/abs/2506.02285
    )ListOptionalTupleN)	Optimizer   )ParamsTc                      ^  \ rS rSrSr       SS\S\S\\\4   S\S\S\S	\S
\	\   4U 4S jjjr
U 4S jr\R                  " 5       SS j5       rSrU =r$ )Lion   zImplements Lion algorithm.paramslrbetasweight_decaycautioncorrected_weight_decaymaximizeforeachc	           
      >  > SU::  d  [        SR                  U5      5      eSUS   s=::  a  S:  d  O  [        SR                  US   5      5      eSUS   s=::  a  S:  d  O  [        SR                  US   5      5      e[        UUUUUUUS9n	[        T
U ]  X5        g	)
av  Initialize the hyperparameters.

Args:
    params: iterable of parameters to optimize or dicts defining parameter groups
    lr: learning rate
    betas: coefficients used for computing running averages of gradient and its square
    weight_decay: weight decay coefficient
    caution: apply caution
    corrected_weight_decay: apply corrected weight decay (lr**2 / max_lr)
        zInvalid learning rate: {}r   g      ?z%Invalid beta parameter at index 0: {}r   z%Invalid beta parameter at index 1: {})r   r   r   r   r   r   r   N)
ValueErrorformatdictsuper__init__)selfr   r   r   r   r   r   r   r   defaults	__class__s             N/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/optim/lion.pyr   Lion.__init__"   s    , by8??CDDeAh$$DKKERSHUVVeAh$$DKKERSHUVV%#9
 	*    c                    > [         TU ]  U5        U R                   HK  nUR                  SS5        UR                  SS5        UR                  SS5        UR                  SS 5        MM     g )Nr   Fr   r   r   )r   __setstate__param_groups
setdefault)r   stategroupr   s      r   r"   Lion.__setstate__I   sa    U#&&EY.5u=Z/Y-	 'r    c                    SnUb%  [         R                  " 5          U" 5       nSSS5        U R                   GH  n/ n/ n/ nUS   u  pxUS    H  n	U	R                  c  M  UR	                  U	5        U	R                  R
                  (       a  [        S5      eUR	                  U	R                  5        U R                  U	   n
[        U
5      S:X  a&  [         R                  " U	[         R                  S9U
S'   UR	                  U
S   5        M     [        UUUUUUS   US	   US
   US   US   US   (       a  U R                  S   OSS9  GM     U$ ! , (       d  f       GN3= f)zPerforms a single optimization step.

Args:
    closure: A closure that reevaluates the model and returns the loss.

Returns:
    the loss.
Nr   r   z&Lion does not support sparse gradientsr   )memory_formatexp_avgr   r   r   r   r   r   )beta1beta2r   r   r   r   r   max_lr)torchenable_gradr#   gradappend	is_sparseRuntimeErrorr%   len
zeros_likepreserve_formatlionr   )r   closurelossr&   params_with_gradgradsexp_avgsr+   r,   pr%   s              r   step	Lion.stepQ   sV    ""$y % &&E!EH >LE8_66> ''*66##&'OPPQVV$

1 u:?','7'7I^I^'_E)$i 01 %   ;">2i(z*i(.34L.Mt}}T*SW- 'H O %$s   E
E )g-C6?)g?gGz?r   FFFN)N)__name__
__module____qualname____firstlineno____doc__r   floatr   boolr   r   r"   r.   no_gradr>   __static_attributes____classcell__)r   s   @r   r
   r
      s    %
 )4"%!+0"&*%+%+ %+ &	%+
  %+ %+ %)%+ %+ d^%+ %+N. ]]_2 2r    r
   )r-   r   r;   r<   r   r   r+   r,   r   r   r   r-   c                   UcD   U	(       + =(       d5    S[         R                  R                  R                  R	                  5       ;   nU(       a.  [         R                  R                  5       (       a  [        S5      eU(       a*  [         R                  R                  5       (       d  [        nO[        nU" U UUUUUUU	UU
S9
  g! [
         a    Sn Nf = f)z=Functional API that performs Lion algorithm computation.
    NScalarFz6torch.jit.script not supported with foreach optimizers)r+   r,   r   r   r   r   r-   )r.   opsaten_foreach_maximum_	overloads	Exceptionjitis_scriptingr3   _multi_tensor_lion_single_tensor_lion)r   r;   r<   r   r   r+   r,   r   r   r   r-   funcs               r   r7   r7      s    $ 	!k]X1Q1Q1[1[1]%]G 599))++STTuyy--//!"!  	G	s   AC CCc                   [        U 5       GHS  u  pU(       d  X   OX   * nX*   n[        R                  " U5      (       aB  [        R                  " U5      n[        R                  " U5      n[        R                  " U5      nU	c  UOUS-  U	-  nUR	                  SX-  -
  5        UR                  U5      R                  USU-
  S9R                  5       nU(       a]  X-  S:  R                  UR                  5      nUR                  UR                  5       R                  SS95        UR	                  U5        UR                  X* S9  UR                  USU-
  5        GMV     g )N   r   alphar   MbP?)min)	enumerater.   
is_complexview_as_realmul_muladd_sign_todtypediv_meanclamp_lerp_)r   r;   r<   r+   r,   r   r   r   r   r-   iparamr0   r*   wd_scaleupdatemasks                    r   rU   rU      s1    f%'uxehY+E""%%d+D((1G&&u-E  2R1Wv-=

1x../ U#((QY(?EEGMA%))$**5DIIdiik((T(23KK

6
% 	dAI&3 &r    c                ,   [        U 5      S:X  a  g U(       a  [        R                  " [        U5      5      nU V
s/ s H6  n
[        R                  " U
5      (       a  [        R
                  " U
5      OU
PM8     nn
U V
s/ s H6  n
[        R                  " U
5      (       a  [        R
                  " U
5      OU
PM8     nn
U  V
s/ s H6  n
[        R                  " U
5      (       a  [        R
                  " U
5      OU
PM8     n n
U	c  UOUS-  U	-  n[        R                  " U SX-  -
  5        [        R                  " X#5      n[        R                  " XSU-
  S9  U Vs/ s H  oR                  5       PM     nnU(       a  [        R                  " X5      n[        X5       VVs/ s H$  u  nnUS:  R                  UR                  5      PM&     nnnU Vs/ s H  oR                  5       PM     nn[        R                  " US5        [        R                  " UU5        [        R                  " X5        [        R                  " XU* S9  [        R                  " X$5        [        R                  " X!SU-
  S9  g s  sn
f s  sn
f s  sn
f s  snf s  snnf s  snf )Nr   rX   r   rY   r[   )r4   r.   _foreach_negtupler^   r_   _foreach_mul__foreach_mul_foreach_add_rc   ziprd   re   rg   rO   _foreach_div_)r   r;   r<   r+   r,   r   r   r   r   r-   xrl   updatesumasksmg
mask_scales                     r   rT   rT      s    6{a""5<0JOP%Qe&6&6q&9&9U"q@%EPMUVX)9)9!)<)<""1%!CXHVKQR6au'7'7':':e  #A6FR ^rq6)9H	H$; ;<   1G	a%i8")*'Qwwy'G*""7214U1BC1BA!a%AGG$1BC(-.1ffh
.
D1E:.G+	s3 
(	q5y95 QVR +
 D.s#   =I7>=I<=JJ!+JJ)FN)rE   typingr   r   r   r.   torch.optim.optimizerr   _typesr   r
   TensorrG   rF   r7   rU   rT   r@   r    r   <module>r      s  , ) (  + e9 e\ , #',U\\",ELL!, u||$, , , , , , , , ,^&'U\\"&'ELL!&' u||$&'
 &' &' &' &' &' &' &'R-:U\\"-:ELL!-: u||$-:
 -: -: -: -: -: -: -:r    