
    RЦi<                     N    S r SSKr " S S\R                  R                  5      rg)zAdaHessian Optimizer

Lifted from https://github.com/davda54/ada-hessian/blob/master/ada_hessian.py
Originally licensed MIT, Copyright 2020, David Samuel
    Nc                      ^  \ rS rSrSr        S
U 4S jjr\S 5       rS rS r	\
R                  " 5       S 5       r\
R                  " 5       SS j5       rS	rU =r$ )
Adahessian	   a  
Implements the AdaHessian algorithm from "ADAHESSIAN: An Adaptive Second OrderOptimizer for Machine Learning"

Arguments:
    params (iterable): iterable of parameters to optimize or dicts defining parameter groups
    lr (float, optional): learning rate (default: 0.1)
    betas ((float, float), optional): coefficients used for computing running averages of gradient and the
        squared hessian trace (default: (0.9, 0.999))
    eps (float, optional): term added to the denominator to improve numerical stability (default: 1e-8)
    weight_decay (float, optional): weight decay (L2 penalty) (default: 0.0)
    hessian_power (float, optional): exponent of the hessian trace (default: 1.0)
    update_each (int, optional): compute the hessian trace approximation only after *this* number of steps
        (to save time) (default: 1)
    n_samples (int, optional): how many times to sample `z` for the approximation of the hessian trace (default: 1)
c
                 T  > SU::  d  [        SU 35      eSU::  d  [        SU 35      eSUS   s=::  a  S:  d  O  [        SUS    35      eSUS   s=::  a  S:  d  O  [        SUS    35      eSUs=::  a  S::  d  O  [        S	U 35      eXl        Xpl        Xl        S
U l        [
        R                  " 5       R                  U R                  5      U l        [        UUUUUS9n
[        [        U ]3  X5        U R                  5        H  nSUl        SU R                  U   S'   M     g )N        zInvalid learning rate: zInvalid epsilon value: r         ?z#Invalid beta parameter at index 0:    z#Invalid beta parameter at index 1: zInvalid Hessian power value: i)lrbetasepsweight_decayhessian_powerhessian step)
ValueError	n_samplesupdate_eachavg_conv_kernelseedtorch	Generatormanual_seed	generatordictsuperr   __init__
get_paramshessstate)selfparamsr
   r   r   r   r   r   r   r   defaultsp	__class__s               T/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/optim/adahessian.pyr   Adahessian.__init__   s4    by6rd;<<cz6se<==eAh$$B58*MNNeAh$$B58*MNNm*s*<]OLMM"&. 	*66tyyA%'
 	j$(:"AAF,-DJJqM.) #    c                     g)NT r   s    r$   is_second_orderAdahessian.is_second_orderF   s    r&   c                 (    S U R                    5       $ )z8
Gets all parameters in all param_groups with gradients
c              3   b   #    U  H%  oS      H  o"R                   (       d  M  Uv   M     M'     g7f)r    N)requires_grad).0groupr"   s      r$   	<genexpr>(Adahessian.get_params.<locals>.<genexpr>O   s"     ]0e/Q__/0s   //)param_groupsr)   s    r$   r   Adahessian.get_paramsJ   s    
 ^t00]]r&   c                     U R                  5        Hc  n[        UR                  [        5      (       a  M$  U R                  U   S   U R
                  -  S:X  d  MI  UR                  R                  5         Me     g)z+
Zeros out the accumulated hessian traces.
r   r   N)r   
isinstancer   floatr   r   zero_)r   r"   s     r$   zero_hessianAdahessian.zero_hessianQ   sT    
 "Aaffe,,A~1NQUQaQa1aef1f #r&   c                    / n[        S U R                  5       5       HQ  nU R                  U   S   U R                  -  S:X  a  UR	                  U5        U R                  U   S==   S-  ss'   MS     [        U5      S:X  a  gU R                  R                  US   R                  :w  aA  [        R                  " US   R                  5      R                  U R                  5      U l        U Vs/ s H  o"R                  PM     nn[        U R                  5       H  nU Vs/ s HC  n[        R                  " SSUR!                  5       U R                  UR                  S9S-  S	-
  PME     nn[        R"                  R                  X1US
X@R                  S-
  :  S9n[%        XeU5       H*  u  pxnU=R&                  Xx-  U R                  -  -  sl        M,     M     gs  snf s  snf )zm
Computes the Hutchinson approximation of the hessian trace and accumulates it for each trainable parameter.
c                     U R                   S L$ N)grad)r"   s    r$   <lambda>(Adahessian.set_hessian.<locals>.<lambda>a   s    !&&"4r&   r   r   r	   N   )r   deviceg       @r   T)grad_outputsonly_inputsretain_graph)filterr   r   r   appendlenr   rB   r   r   r   r   r>   ranger   randintsizeautogradzipr   )	r   r    r"   gradsizsh_zsh_zzs	            r$   set_hessianAdahessian.set_hessianZ   s    4doo6GHAzz!}^,t/?/??1Da JJqM.)Q.) I
 v;!>>  F1I$4$44"__VAY-=-=>JJ499UDN!'(A(t~~&Apvwpvkl%--1affh$..QRQYQYZ]``cffpvBw>>&&BDqSaSadeSeOe ' gD 62	#'DNN22 3 ' ) xs   /G"A
G!c                 v   SnUb  U" 5       nU R                  5         U R                  5         U R                   GH  nUS    GH  nUR                  b  UR                  c  M   U R
                  (       ap  UR                  5       S:X  a\  [        R                  " UR                  5      R                  SS/SS9R                  UR                  5      R                  5       Ul        UR                  SUS	   US
   -  -
  5        U R                  U   n[        U5      S:X  a7  SUS'   [        R                  " U5      US'   [        R                  " U5      US'   US   US   pvUS   u  pUS==   S-  ss'   UR                  U5      R!                  UR                  SU-
  S9  UR                  U	5      R#                  UR                  UR                  SU	-
  S9  SXS   -  -
  n
SXS   -  -
  nUS   nX{-  R%                  US-  5      R!                  US   5      nUS	   U
-  nUR'                  XmU* S9  GM     GM     U$ )z
Performs a single optimization step.
Arguments:
    closure (callable, optional) -- a closure that reevaluates the model and returns the loss (default: None)
Nr       rA      T)dimkeepdimr	   r
   r   r   stepexp_avgexp_hessian_diag_sqr   )alpha)valuer   r   )r9   rT   r3   r>   r   r   rY   r   absmean	expand_asclonemul_r   rH   
zeros_likeadd_addcmul_pow_addcdiv_)r   closurelossr0   r"   r   r\   r]   beta1beta2bias_correction1bias_correction2kdenom	step_sizes                  r$   r[   Adahessian.stepv   s)    9D&&E8_66>QVV^''AEEGqL"YYqvv.33A3MWWXYX^X^_eegAF q5;~)>>>?

1 u:?$%E&M','7'7':E)$383C3CA3FE/0/4Y/?G\A],$W~f" U#((q5y(A#((/88qSXy8Y#$uf'=#= #$uf'=#= /*,?EEa!eLQQRWX]R^_ "$K*::	

7)
<I % 'N r&   )r   r   r   r   r   )g?)g?g+?g:0yE>r   r   r	   r	   Fr=   )__name__
__module____qualname____firstlineno____doc__r   propertyr*   r   r9   r   no_gradrT   r[   __static_attributes____classcell__)r#   s   @r$   r   r   	   s{    & !*.X  ^ ]]_3 36 ]]_5 5r&   r   )rx   r   optim	Optimizerr   r(   r&   r$   <module>r      s'   
 c&& cr&   