
    RЦi!                     <   S r SSKJrJr  SSKrSSKJr   SSKJ	r	  Sr
 SSKJrJr  Sr\" \S	5      rSqSS
\4S jjrSS
\4S jjrS rSS jr   S S\R.                  S\S\\R.                     S\\R.                     S\S\R.                  4S jjr   S S\R.                  S\\   S\\R.                     S\\R.                     S\S\R.                  4S jjr  S!S\R.                  S\\   S\\R.                     S\4S jjr  S!S\R.                  S\\   S\\R.                     S\S\R.                  4
S jjr  S!S\R.                  S\\   S\\R.                     S\4S jjr  S!S\R.                  S\\   S\\R.                     S\S\R.                  4
S jjr  S!S\R.                  S\\   S\\R.                     S\4S jjr   S!S\R.                  S\\   S\\R.                     S\S\R.                  4
S jjr!g! \ a    Sr
 GNf = f! \ a    Sr GNf = f)"a  'Fast' Normalization Functions

For GroupNorm and LayerNorm these functions bypass typical AMP upcast to float32.

Additionally, for LayerNorm, the APEX fused LN is used if available (which also does not upcast)

Hacked together by / Copyright 2022 Ross Wightman
    )ListOptionalN)
functional)fused_layer_norm_affineTF)fused_rms_norm_affinefused_rms_normrms_normdevicec                      [         R                  " U 5      $ ! [        [        4 a=    U S:X  a  [         R                  " 5       s $ U S:X  d   e[         R
                  " 5       s $ f = fNcpucuda)torchget_autocast_dtypeAttributeError	TypeErrorget_autocast_cpu_dtypeget_autocast_gpu_dtyper
   s    T/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/layers/fast_norm.pyr   r   !   s_    2''//I& 2U?//11V###//112s    +A%A%$A%c                      [         R                  " U 5      $ ! [         a=    U S:X  a  [         R                  " 5       s $ U S:X  d   e[         R                  " 5       s $ f = fr   )r   is_autocast_enabledr   is_autocast_cpu_enabledr   s    r   r   r   -   sZ    /((00 /U?0022V###,,../s    %AAAc                      [         $ N_USE_FAST_NORM     r   is_fast_normr    9   s    r   c                     U q g r   r   )enables    r   set_fast_normr#   =   s    Nr   x
num_groupsweightbiasepsreturnc                 @   [         R                  R                  5       (       a  [        R                  " XX#U5      $ [        U R                  R                  5      (       a[  [        U R                  R                  5      nU R                  U5      Ub  UR                  U5      OS Ub  UR                  U5      OS p2n [         R                  R                  U R                  R                  SS9   [        R                  " XX#U5      sS S S 5        $ ! , (       d  f       g = fNFdevice_typeenabled)r   jitis_scriptingF
group_normr   r
   typer   toampautocast)r$   r%   r&   r'   r(   dts         r   fast_group_normr8   B   s     yy||A6==188==))  .DDH#/FIIbMT+DGGBK  
		u		E||A6= 
F	E	Es   -D
Dnormalized_shapec                 p   [         R                  R                  5       (       a  [        R                  " XX#U5      $ [
        (       a  [        XX1U5      $ [        U R                  R                  5      (       a[  [        U R                  R                  5      nU R                  U5      Ub  UR                  U5      OS Ub  UR                  U5      OS p2n [         R                  R                  U R                  R                  SS9   [        R                  " XX#U5      sS S S 5        $ ! , (       d  f       g = fr+   )r   r/   r0   r1   
layer_normhas_apexr   r   r
   r3   r   r4   r5   r6   )r$   r9   r&   r'   r(   r7   s         r   fast_layer_normr=   [   s     yy||AsCCx&q$#NN188==))  .DDH#/FIIbMT+DGGBK  
		u		E||AsC 
F	E	Es   D''
D5c                    [        U5      nU R                  S5      n[        R                  R	                  5       (       a-  US:X  d   e[        R
                  " USS9R                  S5      nO/[        [        SU* S-
  S5      5      n[        R
                  " XVSS9nU [        R                  " XS-   5      -  n Ub  X-  n U $ )N      dimTrC   keepdim)
lenpowr   r/   r0   mean	unsqueezetuplerangersqrtr$   r9   r&   r(   	norm_ndimvdimss          r   r	   r	   w   s     $%I	aAyy A~~JJqb!++B/U2	zA~r23JJqD1	EKK  AJHr   c                 z   [         R                  R                  5       (       a  [        XX#5      $ [        (       a  Uc  [        XU5      $ [        XX5      $ [        U R                  R                  5      (       aE  [        U R                  R                  5      nU R                  U5      Ub  UR                  U5      OS p [         R                  R                  U R                  R                  SS9   [        (       a  [        R                  " XX#5      n O[        XX#5      n S S S 5        U $ ! , (       d  f       U $ = fr+   )r   r/   r0   r	   has_apex_rmsnormr   r   r   r
   r3   r   r4   r5   r6   has_torch_rms_normr1   r$   r9   r&   r(   r7   s        r   fast_rms_normrU      s     yyV99>!!s;;(4DJJ188==))  .DDHv/Afiimt6			u		E

1<Af:A	 
F H 
F	E Hs   10D++
D:c                     [        U5      S:X  d   eU R                  S5      n[        R                  " USSS9nU [        R                  " XC-   5      -  n Ub  XR                  SSSS5      -  n U $ )Nr@   r?   TrD   rA   )rF   rG   r   rH   rL   reshape)r$   r9   r&   r(   rO   s        r   
rms_norm2drX      sn      A%%%	aA

1!T*A	EKK  Aq"a++Hr   c                    [         R                  R                  5       (       a  [        XX#5      $ [        (       aD  U R                  SSSS5      n Uc  [        XU5      n O[        XX5      n U R                  SSSS5      n [        U R                  R                  5      (       aE  [        U R                  R                  5      nU R                  U5      Ub  UR                  U5      OS p [         R                  R                  U R                  R                  SS9   [        XX#5      n S S S 5        U $ ! , (       d  f       U $ = f)Nr   r?      r@   Fr,   )r   r/   r0   rX   rR   permuter   r   r   r
   r3   r   r4   r5   r6   rT   s        r   fast_rms_norm2dr\      s     yy!v;;IIaAq!>qC8A%a1AGAIIaAq!188==))  .DDHv/Afiimt6			u		EqF8 
F H 
F	E Hs   D11
E c                 ^   [        U5      n[        R                  R                  5       (       a-  US:X  d   e[        R                  " U SS9R                  S5      nO/[        [        SU* S-
  S5      5      n[        R                  " XSS9nU [        R                  " XS-   5      -  n Ub  X-  n U $ )Nr@   rA   rB   TrD   )	rF   r   r/   r0   varrI   rJ   rK   rL   rM   s          r   simple_normr_      s     $%Iyy A~~IIaR **2.U2	zA~r23IIa40	EKK  AJHr   c                    [         R                  R                  5       (       a  [        XX#5      $ [	        U R
                  R                  5      (       aE  [        U R
                  R                  5      nU R                  U5      Ub  UR                  U5      OS p [         R                  R                  U R
                  R                  SS9   [        XX#5      n S S S 5        U $ ! , (       d  f       U $ = fr+   )r   r/   r0   r_   r   r
   r3   r   r4   r5   r6   rT   s        r   fast_simple_normra      s     yy1<<188==))  .DDHv/Afiimt6			u		EV9 
FH 
F	EHs   C""
C1)r   )T)NNh㈵>)Nrb   )"__doc__typingr   r   r   torch.nnr   r1   #apex.normalization.fused_layer_normr   r<   ImportErrorr   r   rR   hasattrrS   r   strr   r   r    r#   Tensorintfloatr8   r=   r	   rU   rX   r\   r_   ra   r   r   r   <module>rm      s   "  $KHY
 Q
+  	2s 	2	/ 	/ &*#'>||>> U\\"> 5<<
 	>
 
> \\>8 &*#'D||D3iD U\\"D 5<<
 	D
 
D \\D> &*	||3i U\\" 
	4 &*	||3i U\\" 
	
 \\D &*	||3i U\\" 
	$ &*	||3i U\\" 
	
 \\B &*	||3i U\\" 
	2 &*	||3i U\\" 
	
 \\  H  s"   H  
H  HHHH