
    RЦi#              	           S r SSKrSSKJs  Jr  SSKJr  SSKrS\R                  4S jr
S\R                  4S jrSS\S\S	\S
\4S jjr " S S\5      rg)a  
AdamP Optimizer Implementation copied from https://github.com/clovaai/AdamP/blob/master/adamp/adamp.py

Paper: `Slowing Down the Weight Norm Increase in Momentum-based Optimizers` - https://arxiv.org/abs/2006.08217
Code: https://github.com/clovaai/AdamP


References for added functionality:
    Cautious Optimizers: https://arxiv.org/abs/2411.16085
    Spherical Cautious Optimizer: https://openreview.net/forum?id=4lTJbNpWr3 
Copyright (c) 2020-present NAVER Corp.
MIT license
    N)	Optimizerreturnc                 D    U R                  U R                  S5      S5      $ )Nr   )reshapesizexs    O/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/optim/adamp.py_channel_viewr      s    99QVVAY##    c                 &    U R                  SS5      $ )N   r   )r   r	   s    r   _layer_viewr      s    99Qr   deltawd_ratioepscautionc                 N   SnSS[        U R                  5      S-
  -  -   n[        [        4 GH  n	U	" U 5      n
U	" U5      n[        R
                  " XSUS9R                  5       nUR                  5       U[        R                  " U
R                  S5      5      -  :  d  Mt  X
R                  SSS9R                  U5      R                  U5      -  nX-U	" X-  5      R                  SS9R                  U5      -  -  nU(       a  X" X-  5      R                  SS9R                  U5      -  nX-
  nX/-  S	:  R                  UR                   5      nUR#                  UR%                  5       R'                  S
S95        UR)                  U5        X-U	" X-  5      R                  SS9R                  U5      -  -  nUnX'4s  $    U(       a]  X!-  S	:  R                  UR                   5      nUR#                  UR%                  5       R'                  S
S95        UR)                  U5        X'4$ )N      ?)r   )r   r   )dimr      )pr   )r   r   MbP?min)lenshaper   r   Fcosine_similarityabs_maxmathsqrtr   normadd_r   sumtodtypediv_meanclamp_mul_)r   gradperturbr   r   r   r   wdexpand_size	view_func
param_view	grad_view
cosine_simp_ngrad_radial	grad_perpmasks                    r   
projectionr:      s   	B$#agg,"233K#[1	q\
dO	((A3OTTV
 >>edii
0B&CCCooqo166s;CCKPPCYs}599a9@HHUUUG!Icj$9$=$=!$=$D$L$L[$YY .	+a/33DJJ?		$))+,,,67T"3=!9!=!=!!=!D!L!L[!YYYB;+ 2. "&&tzz2		$))+$$$./T;r   c                   n   ^  \ rS rSr        SU 4S jjr\R                  " 5       SS j5       rSrU =r	$ )AdamP@   c
                 L   > [        UUUUUUUU	S9n
[        [        U ]  X5        g )N)lrbetasr   weight_decayr   r   nesterovr   )dictsuperr<   __init__)selfparamsr?   r@   r   rA   r   r   rB   r   defaults	__class__s              r   rE   AdamP.__init__A   s8     %	
 	eT#F5r   c                 $   S nUb%  [         R                  " 5          U" 5       nS S S 5        U R                   GHB  nUS    GH4  nUR                  c  M  UR                  nUS   u  pgUS   nUR	                  SS5      n	U R
                  U   n
[        U
5      S:X  a7  SU
S'   [         R                  " U5      U
S'   [         R                  " U5      U
S	'   U
S   U
S	   pU
S==   S
-  ss'   S
XjS   -  -
  nS
XzS   -  -
  nUR                  U5      R                  US
U-
  S9  UR                  U5      R                  XUS
U-
  S9  UR                  5       [        R                  " U5      -  R                  US   5      nUS   U-  nU(       a  Xk-  S
U-
  U-  -   U-  nOX-  nSn[        UR                  5      S
:  a  [        XEUUS   US   US   U	5      u  nnOeU	(       a^  UU-  S:  R                  UR                   5      nUR#                  UR%                  5       R'                  SS95        UR                  U5        US   S:  a   UR                  SUS   US   -  U-  -
  5        UR                  UU* S9  GM7     GME     U$ ! , (       d  f       GNd= f)NrG   r@   rB   r   Fr   stepexp_avg
exp_avg_sqr   )alpha)valuer   r?   r   r   r   r   r   rA   )torchenable_gradparam_groupsr.   getstater   
zeros_liker-   r&   addcmul_r$   r#   r   r:   r(   r)   r*   r+   r,   )rF   closurelossgroupr   r.   beta1beta2rB   r   rU   rM   rN   bias_correction1bias_correction2denom	step_sizer/   r   r9   s                       r   rL   
AdamP.stepY   s   ""$y % &&E8_66>vv$W~ ,))Iu5

1 u:?$%E&M','7'7':E)$*/*:*:1*=E,' ',I&6l8Kf"#$uf'=#= #$uf'=#= U#((QY(?&//!e)/L#*TYY7G-HHNNuUZ|\!$K*::	$!e)t1CCuLG%oG qww<!#(2%.%
:KUSX\[b)%GX #dNQ.224::>DIIdiik00T0:;LL& (1,FF2deN.C Ch NNO wyj1i % 'n u %$s   J  
J )r   )g?g+?g:0yE>r   皙?rc   FF)N)
__name__
__module____qualname____firstlineno__rE   rQ   no_gradrL   __static_attributes____classcell__)rI   s   @r   r<   r<   @   s<     60 ]]_= =r   r<   )F)__doc__rQ   torch.nn.functionalnn
functionalr   torch.optim.optimizerr   r#   Tensorr   r   floatboolr:   r<   rb   r   r   <module>rs      sl       + $ $ell      U  UY  FWI Wr   