
    RЦiY                     :    S r SSKrSSKJr  SSKr " S S\5      rg)a5  Nvidia NovoGrad Optimizer.
Original impl by Nvidia from Jasper example:
    - https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/SpeechRecognition/Jasper
Paper: `Stochastic Gradient Methods with Layer-wise Adaptive Moments for Training of Deep Networks`
    - https://arxiv.org/abs/1905.11286
    N)	Optimizerc                   z   ^  \ rS rSrSr      SU 4S jjrU 4S jr\R                  " 5       SS j5       r	Sr
U =r$ )	
NvNovoGrad   a  
Implements Novograd algorithm.

Args:
    params (iterable): iterable of parameters to optimize or dicts defining
        parameter groups
    lr (float, optional): learning rate (default: 1e-3)
    betas (Tuple[float, float], optional): coefficients used for computing
        running averages of gradient and its square (default: (0.95, 0.98))
    eps (float, optional): term added to the denominator to improve
        numerical stability (default: 1e-8)
    weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
    grad_averaging: gradient averaging
    amsgrad (boolean, optional): whether to use the AMSGrad variant of this
        algorithm from the paper `On the Convergence of Adam and Beyond`_
        (default: False)
c           	        > SU::  d  [        SR                  U5      5      eSU::  d  [        SR                  U5      5      eSUS   s=::  a  S:  d  O  [        SR                  US   5      5      eSUS   s=::  a  S:  d  O  [        SR                  US   5      5      e[        UUUUUUS	9n[        [        U ]  X5        g )
Ng        zInvalid learning rate: {}zInvalid epsilon value: {}r   g      ?z%Invalid beta parameter at index 0: {}   z%Invalid beta parameter at index 1: {})lrbetasepsweight_decaygrad_averagingamsgrad)
ValueErrorformatdictsuperr   __init__)
selfparamsr	   r
   r   r   r   r   defaults	__class__s
            T/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/optim/nvnovograd.pyr   NvNovoGrad.__init__    s     by8??CDDcz8??DEEeAh$$DKKERSHUVVeAh$$DKKERSHUVV%)
 	j$(:    c                 v   > [         [        U ]  U5        U R                   H  nUR	                  SS5        M     g )Nr   F)r   r   __setstate__param_groups
setdefault)r   stategroupr   s      r   r   NvNovoGrad.__setstate__=   s2    j$,U3&&EY. 'r   c                 n   SnUb%  [         R                  " 5          U" 5       nSSS5        U R                   GHg  nUS    GHY  nUR                  c  M  UR                  nUR                  (       a  [        S5      eUS   nU R                  U   n[        U5      S:X  a  SUS'   [         R                  " U5      US'   [         R                  " / 5      R                  US   R                  5      US'   U(       a5  [         R                  " / 5      R                  US   R                  5      US	'   US   US   pU(       a  US	   n
US
   u  pUS==   S-  ss'   [         R                  " [         R                  " US5      5      nU	S:X  a  U	R                  U5        O"U	R                  U5      R!                  USU-
  S9  U(       a8  [         R"                  " W
XS9  U
R%                  5       R!                  US   5      nO"U	R%                  5       R!                  US   5      nUR'                  U5        US   S:w  a  UR!                  XCS   S9  US   (       a  UR                  SU-
  5        UR                  U5      R!                  U5        UR!                  XS   * S9  GM\     GMj     U$ ! , (       d  f       GN= f)zPerforms a single optimization step.

Arguments:
    closure (callable, optional): A closure that reevaluates the model
    and returns the loss.
Nr   z#Sparse gradients are not supported.r   r   stepexp_avg
exp_avg_sqmax_exp_avg_sqr
   r      )alpha)outr   r   r   r	   )torchenable_gradr   grad	is_sparseRuntimeErrorr   len
zeros_likezerostodevicesumpowcopy_mul_add_maxsqrtdiv_)r   closurelossr    pr,   r   r   r$   r%   r&   beta1beta2normdenoms                  r   r#   NvNovoGrad.stepB   sW    ""$y % &&E8_66>vv>>&'LMM	*

1 u:?$%E&M','7'7':E)$*/++b/*<*<U9=M=T=T*UE,'27++b/2D2DU9EUE\E\2]./&+I&6l8K%*+;%<N$W~f"yy4!34?$$T*OOE*//AI/FIInjM*//166uU|DE&OO-225<@E		% (A-IIa^'<I=)*IIa%i(U#((.wT{l3e % 'j q %$s   J%%
J4 )gMbP?)gffffff?g\(\?g:0yE>r   FF)N)__name__
__module____qualname____firstlineno____doc__r   r   r*   no_gradr#   __static_attributes____classcell__)r   s   @r   r   r      sB    *  ;:/
 ]]_A Ar   r   )rI   r*   torch.optim.optimizerr   mathr   rD   r   r   <module>rO      s#     + w wr   