
    RЦiar                     2   S r S/rSSKrSSKJr  SSKJr  SSKJrJ	r	J
r
JrJrJr  SSKrSSKJr  SSKJrJr  SSKJrJrJrJr  S	S
KJr  S	SKJr  S	SKJrJr  S	SK J!r!J"r"   " S S\R                   RF                  5      r$ " S S\R                   RF                  5      r% " S S\R                   RL                  5      r' " S S\R                   RL                  5      r( " S S\R                   RL                  5      r) " S S\R                   RL                  5      r* " S S\R                   RL                  5      r+ " S S\R                   RL                  5      r, " S S\R                   RL                  5      r- " S  S!\R                   RF                  5      r. " S" S\RL                  5      r/S.S# jr0\"" \0" S$S%9\0" S$S%9\0" S$S%9\0" S$S%9\0" S$S%9\0" S$S%9S&.5      r1S/S' jr2\!S/S( j5       r3\!S/S) j5       r4\!S/S* j5       r5\!S/S+ j5       r6\!S/S, j5       r7\!S/S- j5       r8g)0zEfficientViT (by MSRA)

Paper: `EfficientViT: Memory Efficient Vision Transformer with Cascaded Group Attention`
    - https://arxiv.org/abs/2305.07027

Adapted from official impl at https://github.com/microsoft/Cream/tree/main/EfficientViT
EfficientVitMsra    N)OrderedDict)partial)DictListOptionalTupleTypeUnionIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)SqueezeExciteSelectAdaptivePool2dtrunc_normal__assert   )build_model_with_cfg)feature_take_indices)
checkpointcheckpoint_seq)register_modelgenerate_default_cfgsc                      ^  \ rS rSr        SS\S\S\S\S\S\S\S	\4U 4S
 jjjr\R                  " 5       S 5       r	Sr
U =r$ )ConvNorm   in_chsout_chsksstridepaddilationgroupsbn_weight_initc           	         > XS.n[         TU ]  5         [        R                  " XX4XVU4SS0UD6U l        [        R
                  " U40 UD6U l        [        R                  R                  R                  U R                  R                  U5        g )NdevicedtypebiasF)super__init__nnConv2dconvBatchNorm2dbntorchinit	constant_weight)selfr   r   r   r    r!   r"   r#   r$   r'   r(   dd	__class__s               \/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/models/efficientvit_msra.pyr+   ConvNorm.__init__   so     /IIfr3&cW\c`bc	../B/?    c           
      :   U R                   U R                  p!UR                  UR                  UR                  -   S-  -  nUR                  US S 2S S S 4   -  nUR
                  UR                  UR                  -  UR                  UR                  -   S-  -  -
  n[        R                  R                  UR                  S5      U R                   R                  -  UR                  S5      UR                  SS  U R                   R                  U R                   R                  U R                   R                  U R                   R                  S9nUR                  R                   R#                  U5        UR
                  R                   R#                  U5        U$ )N      ?r   r      )r    paddingr"   r#   )r.   r0   r4   running_varepsr)   running_meanr1   r,   r-   sizer#   shaper    r>   r"   datacopy_)r5   cr0   wbms         r8   fuseConvNorm.fuse.   s3   		4772II"&&0366HHqD$,--GGboo		1^^bff$s*+ +HHOOFF1I		(((!&&)QWWQR[99##TYY->->I[I[dhdmdmdtdt  v 	
A	!r:   )r0   r.   )r   r   r   r   r   r   NN)__name__
__module____qualname____firstlineno__intfloatr+   r1   no_gradrJ   __static_attributes____classcell__r7   s   @r8   r   r      s    
 $%@@ @ 	@
 @ @ @ @ "@ @& ]]_ r:   r   c                   |   ^  \ rS rSr     S
S\S\S\S\S\4
U 4S jjjr\R                  " 5       S 5       r
S	rU =r$ )
NormLinear=   in_featuresout_featuresr)   stddropc                   > XgS.n[         T	U ]  5         [        R                  " U40 UD6U l        [        R
                  " U5      U l        [        R                  " X4SU0UD6U l        [        U R                  R                  US9  U R                  R                  b5  [        R                  R                  U R                  R                  S5        g g )Nr&   r)   )r[   r   )r*   r+   r,   BatchNorm1dr0   Dropoutr\   Linearlinearr   r4   r)   r2   r3   )
r5   rY   rZ   r)   r[   r\   r'   r(   r6   r7   s
            r8   r+   NormLinear.__init__>   s     /..33JJt$	iiKKKdkk((c2;;'GGdkk..2 (r:   c                 H   U R                   U R                  p!UR                  UR                  UR                  -   S-  -  nUR
                  U R                   R                  U R                   R                  -  UR                  UR                  -   S-  -  -
  nUR                  US S S 24   -  nUR
                  c#  X@R                  R                  R                  -  nO<UR                  US S 2S 4   -  R                  S5      U R                  R
                  -   n[        R                  R                  UR                  S5      UR                  S5      5      nUR                  R                  R                  U5        UR
                  R                  R                  U5        U$ )Nr<   r   r   )r0   ra   r4   r?   r@   r)   rA   Tviewr1   r,   r`   rB   rD   rE   )r5   r0   ra   rG   rH   rI   s         r8   rJ   NormLinear.fuseR   s2   WWdkkFII"&&0366GGdgg**GGNN nnrvv5;< <MMAdAgJ&;;KK&&(((A1d7+11"58H8HHAHHOOAFF1Iqvvay1	A	!r:   )r0   r\   ra   )Tg{Gz?        NN)rL   rM   rN   rO   rP   boolrQ   r+   r1   rR   rJ   rS   rT   rU   s   @r8   rW   rW   =   sh    
 33 3 	3
 3 3 3( ]]_ r:   rW   c                   B   ^  \ rS rSr  SS\S\4U 4S jjjrS rSrU =r$ )PatchMergingc   dimout_dimc                 ,  > X4S.n[         TU ]  5         [        US-  5      n[        XSSS40 UD6U l        [
        R                  R                  5       U l        [        XfSSS4SU0UD6U l	        [        US40 UD6U l        [        XbSSS40 UD6U l        g )	Nr&      r   r      r=   r#   g      ?)r*   r+   rP   r   conv1r1   r,   ReLUactconv2r   seconv3)r5   rm   rn   r'   r(   r6   hid_dimr7   s          r8   r+   PatchMerging.__init__d   s     /cAg,cAq!:r:
88==?g1aNN2N
33g1a>2>
r:   c                     U R                  U R                  U R                  U R                  U R                  U R	                  U5      5      5      5      5      5      nU$ N)rw   rv   rt   ru   rr   r5   xs     r8   forwardPatchMerging.forwardt   sA    JJtwwtxx

488DJJqM3J(KLMNr:   )rt   rr   ru   rw   rv   NN	rL   rM   rN   rO   rP   r+   r~   rS   rT   rU   s   @r8   rk   rk   c   s1    
 ?? ? ?  r:   rk   c                   R   ^  \ rS rSrSS\R
                  S\4U 4S jjjrS rSr	U =r
$ )ResidualDropy   rI   r\   c                 :   > [         TU ]  5         Xl        X l        g r{   )r*   r+   rI   r\   )r5   rI   r\   r7   s      r8   r+   ResidualDrop.__init__z   s    	r:   c           
      z   U R                   (       a  U R                  S:  a  XR                  U5      [        R                  " UR                  S5      SSSUR                  S9R                  U R                  5      R                  SU R                  -
  5      R                  5       -  -   $ XR                  U5      -   $ )Nr   r   )r'   )
trainingr\   rI   r1   randrB   r'   ge_divdetachr|   s     r8   r~   ResidualDrop.forward   s    ==TYY]vvay5::q	1a188$558S^CCDIIDVW]W]W_` ` ` vvay= r:   )r\   rI   )rh   )rL   rM   rN   rO   r,   ModulerQ   r+   r~   rS   rT   rU   s   @r8   r   r   y   s)    ")) 5  
! !r:   r   c                   B   ^  \ rS rSr  SS\S\4U 4S jjjrS rSrU =r$ )ConvMlp   edhc                    > X4S.n[         TU ]  5         [        X40 UD6U l        [        R
                  R                  5       U l        [        X!4SS0UD6U l        g )Nr&   r$   r   )	r*   r+   r   pw1r1   r,   rs   rt   pw2)r5   r   r   r'   r(   r6   r7   s         r8   r+   ConvMlp.__init__   sP     /B(R(88==?A:!:r:r:   c                 d    U R                  U R                  U R                  U5      5      5      nU$ r{   )r   rt   r   r|   s     r8   r~   ConvMlp.forward   s&    HHTXXdhhqk*+r:   )rt   r   r   r   r   rU   s   @r8   r   r      s1    
 ;; ; ; r:   r   c                      ^  \ rS rSr% \\\R                  4   \S'          SS\	S\	S\	S\	S\	S\
\	S	4   4U 4S
 jjjrSS jrSS jrSS jr\R                  " 5       SU 4S jj5       rS\R"                  S\R                  4S jrS rSrU =r$ )CascadedGroupAttention   attention_bias_cacherm   key_dim	num_heads
attn_ratio
resolutionkernels.c	                   > XxS.n	[         TU ]  5         X0l        US-  U l        X l        [        XB-  5      U l        X@l        / n
/ n[        U5       H  nU
R                  [        X-  U R                  S-  U R                  -   40 U	D65        UR                  [        U R                  U R                  Xl   SXl   S-  4SU R                  0U	D65        M     [        R                  R                  U
5      U l        [        R                  R                  U5      U l        [        R                  R!                  [        R                  R#                  5       [        U R                  U-  U4SS0U	D65      U l        XPl        XU-  nXU-  n[        R                  R)                  [        R*                  " X>40 U	D65      U l        U R/                  S[        R*                  " X4U[        R0                  S9S	S
9  0 U l        U R5                  5         g )Nr&   g      r=   r   r#   r$   r   attention_bias_idxsF)
persistent)r*   r+   r   scaler   rP   val_dimr   rangeappendr   r1   r,   
ModuleListqkvsdws
Sequentialrs   projr   	Parameteremptyattention_biasesregister_bufferlongr   reset_parameters)r5   rm   r   r   r   r   r   r'   r(   r6   r   r   iNnum_offsetsr7   s                  r8   r+   CascadedGroupAttention.__init__   s    /"_
:/0$y!AKK!14<<!3Cdll3RYVXYZJJxdllGJ7:YZ?vcgcocovsuvw " HH''-	88&&s+HH''HHMMOT\\I-sK1KK
	
 %# - % 2 25;;y3\Y[3\ ]!KKvUZZ@ 	 	

 %'! 	r:   returnc                     [         R                  R                  R                  U R                  5        U R                  5         g)z"Initialize parameters and buffers.N)r1   r,   r2   zeros_r   _init_buffersr5   s    r8   r   'CascadedGroupAttention.reset_parameters   s*    T223r:   c                    [        [        R                  " [        U R                  5      [        U R                  5      5      5      n0 n/ nU HY  nU HP  n[        US   US   -
  5      [        US   US   -
  5      4nXb;  a  [        U5      X&'   UR                  X&   5        MR     M[     U R                  R                  [        R                  " U[        R                  S9R                  [        U5      [        U5      5      5        g)z.Compute and fill non-persistent buffer values.r   r   )r(   N)list	itertoolsproductr   r   abslenr   r   rE   r1   tensorr   rf   )r5   pointsattention_offsetsidxsp1p2offsets          r8   r   $CascadedGroupAttention._init_buffers   s    i''doo(>doo@VWXBbebem,c"Q%"Q%-.@A2034E0F%--56	   	  &&u||D

'K'P'PQTU[Q\^abh^i'jkr:   c                 $    U R                  5         g)z"Initialize non-persistent buffers.N)r   r   s    r8   init_non_persistent_buffers2CascadedGroupAttention.init_non_persistent_buffers   s    r:   c                 f   > [         TU ]  U5        U(       a  U R                  (       a  0 U l        g g g r{   )r*   trainr   )r5   moder7   s     r8   r   CascadedGroupAttention.train   s)    dD--(*D% .4r:   r'   c                 J   [         R                  R                  5       (       d  U R                  (       a  U R                  S S 2U R
                  4   $ [        U5      nX R                  ;  a*  U R                  S S 2U R
                  4   U R                  U'   U R                  U   $ r{   )r1   jit
is_tracingr   r   r   strr   )r5   r'   
device_keys      r8   get_attention_biases+CascadedGroupAttention.get_attention_biases   s    99!!T]]((D,D,D)DEEVJ!:!::8<8M8MaQUQiQiNi8j))*5,,Z88r:   c                    UR                   u  p#pEUR                  [        U R                  5      SS9n/ nUS   nU R	                  UR
                  5      n	[        [        U R                  U R                  5      5       GH  u  n
u  pU
S:  a  XU
   -   nU" U5      nUR                  USXE5      R                  U R                  U R                  U R                  /SS9u  pnU" U5      nUR                  S5      UR                  S5      UR                  S5      pnXR                  -  nUR                  SS5      U-  nUX   -   nUR!                  SS9nUUR                  SS5      -  nUR                  X R                  XE5      nUR#                  U5        GM     U R%                  [&        R(                  " US5      5      nU$ )Nr   )rm   r   rd   r=   )rC   chunkr   r   r   r'   	enumeratezipr   rf   splitr   r   flattenr   	transposesoftmaxr   r   r1   cat)r5   r}   BCHWfeats_in	feats_outfeat	attn_biashead_idxqkvr   qkvattns                    r8   r~   CascadedGroupAttention.forward   s   WW
a773tyy>q71	{--ahh7	$-c$))TXX.F$G Hjs!|x00t9Dii2q,22DLL$,,PTP\P\3]cd2eGA!AAiilAIIaL!))A,!AJJA;;r2&*D)--D<<B<'Dt~~b"--D99Qa3DT" %H IIeii	1-.r:   )r   r   r   r   r   r   r   r   r   r   r   )   rp         r   r   r   NN)r   NT)rL   rM   rN   rO   r   r   r1   Tensor__annotations__rP   r	   r+   r   r   r   rR   r   r'   r   r~   rS   rT   rU   s   @r8   r   r      s    sELL011	  '3, ,  ,  	, 
 ,  ,  38_,  , \
l ]]_+ +
95<< 9ELL 9 r:   r   c                   n   ^  \ rS rSrSr       SS\S\S\S\S\S\S	\\S
4   4U 4S jjjrS rSr	U =r
$ )LocalWindowAttentioni  a  Local Window Attention.

Args:
    dim (int): Number of input channels.
    key_dim (int): The dimension for query and key.
    num_heads (int): Number of attention heads.
    attn_ratio (int): Multiplier for the query dim for value dimension.
    resolution (int): Input resolution.
    window_resolution (int): Local window resolution.
    kernels (List[int]): The kernel size of the dw conv on query.
rm   r   r   r   r   window_resolutionr   .c
                    > XS.n
[         TU ]  5         Xl        X0l        XPl        US:  d   S5       eX`l        [        Xe5      n[        XU4UUUS.U
D6U l        g )Nr&   r   z"window_size must be greater than 0)r   r   r   )	r*   r+   rm   r   r   r   minr   r   r5   rm   r   r   r   r   r   r   r'   r(   r6   r7   s              r8   r+   LocalWindowAttention.__init__  sv     /"$ 1$J&JJ$!2 1>*)
!(	

 
	r:   c           	         U R                   =p#UR                  u  pEpg[        X&:H  SX#4 SXg4 35        [        X7:H  SX#4 SXg4 35        X R                  ::  a"  X0R                  ::  a  U R	                  U5      nU$ UR                  SSSS5      nU R                  X R                  -  -
  U R                  -  nU R                  X0R                  -  -
  U R                  -  n	[        R                  R                  R                  USSSU	SU45      nX(-   X9-   pXR                  -  nXR                  -  nUR                  XLU R                  XR                  U5      R                  SS5      nUR                  XL-  U-  U R                  U R                  U5      R                  SSSS5      nU R	                  U5      nUR                  SSSS5      R                  XLXR                  U R                  U5      nUR                  SS5      R                  XJX5      nUS S 2S U2S U24   R                  5       nUR                  SSSS5      nU$ )Nz%input feature has wrong size, expect z, got r   r=   rq   r   )r   rC   r   r   r   permuter1   r,   
functionalr!   rf   r   reshape
contiguous)r5   r}   r   r   r   r   H_W_pad_bpad_rpHpWnHnWs                 r8   r~   LocalWindowAttention.forward:  s3   wwb@!PRxjYZ@!PRxjYZ&&&10F0F+F		!A& # 		!Q1%A++a2H2H.HHDLbLbbE++a2H2H.HHDLbLbbE##''Aq!UAu+EFAY	---B---Bqd44b:P:PRST^^_`bcdA		!&2+t'='=t?U?UWXYaabcefhiklmA		!A		!Q1%**1"6L6LdNdNdfghAAq!))!7A!RaR!)'')A		!Q1%Ar:   )r   rm   r   r   r   )r   rp   r      r   NN)rL   rM   rN   rO   __doc__rP   r	   r+   r~   rS   rT   rU   s   @r8   r   r     s    
  %&'3

 
 	

 
 
  #
 38_
 
8 r:   r   c                   n   ^  \ rS rSrSrSSSS/ SQSS4S	\S
\S\S\S\S\S\\   4U 4S jjjrS rSr	U =r
$ )EfficientVitBlockiW  a  A basic EfficientVit building block.

Args:
    dim (int): Number of input channels.
    key_dim (int): Dimension for query and key in the token mixer.
    num_heads (int): Number of attention heads.
    attn_ratio (int): Multiplier for the query dim for value dimension.
    resolution (int): Input resolution.
    window_resolution (int): Local window resolution.
    kernels (List[int]): The kernel size of the dw conv on query.
r   rp   r   r  r   Nrm   r   r   r   r   r   r   c
           
        > XS.n
[         TU ]  5         [        [        XSSS4USS.U
D65      U l        [        [        U[        US-  5      40 U
D65      U l        [        [        XU4UUUUS.U
D65      U l	        [        [        XSSS4USS.U
D65      U l
        [        [        U[        US-  5      40 U
D65      U l        g )Nr&   rq   r   rh   )r#   r$   r=   )r   r   r   r   )r*   r+   r   r   dw0r   rP   ffn0r   mixerdw1ffn1r   s              r8   r+   EfficientVitBlock.__init__c  s     /1a `3WY `]_ `a c#'l!Ab!AB	! i%%"3 	

  1a `3WY `]_ `a c#'l!Ab!AB	r:   c                     U R                  U R                  U R                  U R                  U R	                  U5      5      5      5      5      $ r{   )r  r  r  r  r  r|   s     r8   r~   EfficientVitBlock.forward  s4    yy$**TYYtxx{-C"DEFFr:   )r  r  r  r  r  )rL   rM   rN   rO   r  rP   r   r+   r~   rS   rT   rU   s   @r8   r  r  W  s    
  %&!-CC C 	C
 C C  #C #YC C@G Gr:   r  c                      ^  \ rS rSrSSSSS/ SQSS	S	4	S
\S\S\S\\\4   S\S\S\S\S\\   S\4U 4S jjjrS r	Sr
U =r$ )EfficientVitStagei   r   r   rp   r   r  r   r   Nin_dimrn   r   
downsampler   r   r   r   r   depthc                   > XS.n[         TU ]  5         US   S:X  Ga)  US-
  US   -  S-   U l        / nUR                  S[        R
                  R                  [        [        XSSS4SU0UD65      [        [        U[        US-  5      40 UD65      5      45        UR                  S	[        X40 UD645        UR                  S
[        R
                  R                  [        [        X"SSS4SU0UD65      [        [        U[        US-  5      40 UD65      5      45        [
        R                  " [        U5      5      U l        O'X:X  d   e[
        R                  " 5       U l        Xpl        / n[        U
5       H.  nUR                  [!        UUUUU R                  UU	40 UD65        M0     [
        R                  " U6 U l        g )Nr&   r   	subsampler   res1rq   r#   r=   
patchmergeres2)r*   r+   r   r   r1   r,   r   r   r   r   rP   rk   r   r"  Identityr   r  blocks)r5   r!  rn   r   r"  r   r   r   r   r   r#  r'   r(   r6   down_blocksr*  dr7   s                    r8   r+   EfficientVitStage.__init__  s    /a=K')A~*Q-?!CDOK## &!Q!W&!WTV!WX VaZ!GB!GH   l6.Qb.QRS## 'Aq!!ZG!ZWY!Z[ #gk2B!Ib!IJ   !mmK,DEDO$$$ kkmDO(OuAMM+!	 	 	  mmV,r:   c                 J    U R                  U5      nU R                  U5      nU$ r{   )r"  r*  r|   s     r8   r~   EfficientVitStage.forward  s"    OOAKKNr:   )r*  r"  r   )rL   rM   rN   rO   rP   r	   r   r   r+   r~   rS   rT   rU   s   @r8   r  r    s     +2 %&!-5-5- 5- 	5-
 c3h5- 5- 5- 5-  #5- #Y5- 5- 5-n r:   r  c                   <   ^  \ rS rSr  SS\S\4U 4S jjjrSrU =r$ )PatchEmbeddingi  in_chansrm   c           
      n  > [         TU ]  5         X4S.nU R                  S[        XS-  SSS40 UD65        U R                  S[        R
                  R                  5       5        U R                  S[        US-  US	-  SSS40 UD65        U R                  S
[        R
                  R                  5       5        U R                  S[        US	-  US-  SSS40 UD65        U R                  S[        R
                  R                  5       5        U R                  S[        US-  USSS40 UD65        SU l        g )Nr&   rr   r   rq   r=   r   relu1ru   rp   relu2rw   relu3conv4   )r*   r+   
add_moduler   r1   r,   rs   
patch_size)r5   r2  rm   r'   r(   r6   r7   s         r8   r+   PatchEmbedding.__init__  s     	/(1HaA!L!LM1#(C1HaA!L!LM1#(C1HaA!L!LM1#(CAq!GB!GHr:   )r:  r   )rL   rM   rN   rO   rP   r+   rS   rT   rU   s   @r8   r1  r1    s'    
   r:   r1  c                   4  ^  \ rS rSr              S*S\S\S\S\\S4   S\\S4   S	\\S4   S
\\S4   S\\S4   S\\S4   S\\\\4   S4   S\S\4U 4S jjjrS+S\	4S jjr
S+S\R                  S\	SS4S jjr\R                  R                   S 5       r\R                  R                   S,S j5       r\R                  R                   S+S j5       r\R                  R                   S\R                  4S j5       rS-S\S\\   4S jjr     S.S\R.                  S\\\\\   4      S\	S\	S\S \	S\\\R.                     \\R.                  \\R.                     4   4   4S! jjr   S/S\\\\   4   S"\	S#\	4S$ jjrS% rS,S&\	4S' jjrS( rS)rU =r $ )0r   i  Nimg_sizer2  num_classes	embed_dim.r   r#  r   window_sizer   down_opsglobal_pool	drop_ratec                   > [         TU ]  5         XS.nSU l        X0l        X l        Xl        [        X$S   40 UD6U l        U R                  R                  nXR                  R                  -  n[        [        U5      5       Vs/ s H  nUU   UU   UU   -  -  PM     nn/ U l        / nUS   n[        [        XEXgUX5      5       H|  u  nu  nnnnnnn[        SUUUUUUUUU	US.
UD6nUnUS   S:X  a  US:w  a  UUS   -  nUR                  nUR!                  U5        U =R                  [#        UUSU 3S9/-  sl        M~     [$        R&                  " U6 U l        US	:X  a  [+        US
S9U l        O"US:X  d   e[$        R.                  " 5       U l        US   =U l        U l        US:  a#  [5        U R0                  U4SU R
                  0UD6O[6        R$                  R/                  5       U l        U R;                  SS9  g s  snf )Nr&   Fr   )
r!  rn   r   r"  r   r   r   r   r   r#  r%  r   zstages.)num_chs	reductionmoduleavgT	pool_typer   rd   r\   needs_reset )r*   r+   grad_checkpointingr>  r2  rC  r1  patch_embedr:  r   r   feature_infor   r   r  r   r   dictr,   r   stagesr   rB  r)  num_featureshead_hidden_sizerW   r1   headinit_weights)r5   r=  r2  r>  r?  r   r#  r   r@  r   rA  rB  rC  r'   r(   r6   r    r   r   r   rR  pre_edr   kddpthnharwddostager7   s                                 r8   r+   EfficientVitMsra.__init__  sJ   " 	/"'& " *(aLGBG!!,,!1!1!<!<<
JOPST]P^J_`J_Qilgaj9Q<&?@J_
` 11:I*k\2^-A-Bb"b"% %"$ E F!u#Q"Q%))JMM% $rVgVWUXM"Z![[)2^* mmV,%3kSWXD!###!{{}D4=bMAD1JUXY/ {G15GCEG_d_g_g_p_p_r 		 	e,O as   HrL  c                 J    U R                  [        U R                  US95        g )NrK  )applyr   _init_weights)r5   rL  s     r8   rV  EfficientVitMsra.init_weights  s    

74--;GHr:   rI   r   c                 X    U(       a#  [        US5      (       a  UR                  5         g g g )Nr   )hasattrr   )r5   rI   rL  s      r8   rb  EfficientVitMsra._init_weights"  s%    71&899  :;r:   c                 z    U R                  5       R                  5        Vs1 s H  nSU;   d  M  UiM     sn$ s  snf )Nr   )
state_dictkeysr|   s     r8   no_weight_decay EfficientVitMsra.no_weight_decay&  s4    ??,113O3a7IQ7N3OOOs   
88c                 0    [        SU(       a  SOSS/S9nU$ )Nz^patch_embedz^stages\.(\d+))z^stages\.(\d+).downsample)r   )z^stages\.(\d+)\.\w+\.(\d+)N)stemr*  )rQ  )r5   coarsematchers      r8   group_matcherEfficientVitMsra.group_matcher*  s'     (.$455
 r:   c                     Xl         g r{   )rN  )r5   enables     r8   set_grad_checkpointing'EfficientVitMsra.set_grad_checkpointing5  s    "(r:   c                 .    U R                   R                  $ r{   )rU  ra   r   s    r8   get_classifierEfficientVitMsra.get_classifier9  s    yyr:   c                     Xl         Ub8  US:X  a  [        USS9U l        O"US:X  d   e[        R                  " 5       U l        US:  a$  [        U R                  XR                  S9U l	        g [        R                  R	                  5       U l	        g )NrH  TrI  r   )r\   )
r>  r   rB  r,   r)  rW   rS  rC  r1   rU  )r5   r>  rB  s      r8   reset_classifier!EfficientVitMsra.reset_classifier=  s    &"e##7+W[#\ "a'''#%;;= DORSO {A	Y^YaYaYjYjYl 		r:   r}   indicesnorm
stop_early
output_fmtintermediates_onlyc                    US;   d   S5       e/ n[        [        U R                  5      U5      u  pU R                  U5      n[        R
                  R                  5       (       d  U(       d  U R                  n
OU R                  SU	S-    n
[        U
5       He  u  pU R                  (       a/  [        R
                  R                  5       (       d  [        X5      nOU" U5      nX;   d  MT  UR                  U5        Mg     U(       a  U$ X4$ )a  Forward features that returns intermediates.

Args:
    x: Input image tensor
    indices: Take last n blocks if int, all if None, select matching indices if sequence
    norm: Apply norm layer to compatible intermediates
    stop_early: Stop iterating over blocks when last desired intermediate hit
    output_fmt: Shape of intermediate feature outputs
    intermediates_only: Only return intermediate features
Returns:

)NCHWzOutput shape must be NCHW.Nr   )r   r   rR  rO  r1   r   is_scriptingr   rN  r   r   )r5   r}   r|  r}  r~  r  r  intermediatestake_indices	max_indexrR  feat_idxr^  s                r8   forward_intermediates&EfficientVitMsra.forward_intermediatesH  s    * Y&D(DD&"6s4;;7G"Q Q99!!##:[[F[[)a-0F(0OH&&uyy/E/E/G/Gu(!H'$$Q'  1   r:   
prune_norm
prune_headc                     [        [        U R                  5      U5      u  pEU R                  SUS-    U l        U(       a  U R                  SS5        U$ )z?Prune layers not required for specified intermediates.
        Nr   r   r   )r   r   rR  rz  )r5   r|  r  r  r  r  s         r8   prune_intermediate_layers*EfficientVitMsra.prune_intermediate_layersv  sK     #7s4;;7G"Qkk.9q=1!!!R(r:   c                     U R                  U5      nU R                  (       a;  [        R                  R	                  5       (       d  [        U R                  U5      nU$ U R                  U5      nU$ r{   )rO  rN  r1   r   r  r   rR  r|   s     r8   forward_features!EfficientVitMsra.forward_features  sX    Q""599+A+A+C+Ct{{A.A  AAr:   
pre_logitsc                 X    U R                  U5      nU(       a  U$ U R                  U5      $ r{   )rB  rU  )r5   r}   r  s      r8   forward_headEfficientVitMsra.forward_head  s'    Qq0DIIaL0r:   c                 J    U R                  U5      nU R                  U5      nU$ r{   )r  r  r|   s     r8   r~   EfficientVitMsra.forward  s'    !!!$a r:   )rC  rP  rB  rN  rU  rT  r2  r>  rS  rO  rR  )   rq     @         )r8  r8  r8  r   r=   rq   rp   rp   rp   r  r  r  r   )r  r%  r=   r  rH  rh   NNr   Fr{   )NFFr  F)r   FT)!rL   rM   rN   rO   rP   r	   r   rQ   r+   ri   rV  r,   r   rb  r1   r   ignorerj  rp  rt  rw  r   rz  r   r   r   r  r  r  r  r~   rS   rT   rU   s   @r8   r   r     s     #)7'3%.)2+4'34a$!C-C- C- 	C-
 S#XC- 38_C- c?C- S#XC- sCxC- 38_C- E#s(OS01C- C- C- C-JI I!ryy !t !t ! YYP P YY  YY) ) YY 		    	mC 	mhsm 	m 8<$$',, ||,  eCcN34,  	, 
 ,  ,  !%,  
tELL!5tELL7I)I#JJ	K, ` ./$#	3S	>*  	1$ 1 r:   c           
      .    U S[         [        SSSSSS.	UE$ )Nr  zpatch_embed.conv1.convzhead.linearT)rp   rp   mit)	urlr>  meanr[   
first_conv
classifierfixed_input_size	pool_sizelicenser   )r  kwargss     r8   _cfgr    s1    %#.#   r:   ztimm/)	hf_hub_id)zefficientvit_m0.r224_in1kzefficientvit_m1.r224_in1kzefficientvit_m2.r224_in1kzefficientvit_m3.r224_in1kzefficientvit_m4.r224_in1kzefficientvit_m5.r224_in1kc           	      b    UR                  SS5      n[        [        U U4S[        SUS90UD6nU$ )Nout_indices)r   r   r=   feature_cfgT)flatten_sequentialr  )popr   r   rQ  )variant
pretrainedr  r  models        r8   _create_efficientvit_msrar    sG    **]I6K  DkJ	
 E Lr:   c           	      `    [        S/ SQ/ SQ/ SQ/ SQ/ SQS9n[        S	SU 0[        U40 UD6D6$ )
Nr  r  r  r  r  r   r=  r?  r#  r   r@  r   r  )efficientvit_m0rQ  r  r  r  
model_argss      r8   r  r    s@     J %l:lQUV`QkdjQkllr:   c           	      `    [        S/ SQ/ SQ/ SQ/ SQ/ SQS9n[        S	SU 0[        U40 UD6D6$ )
Nr  )r     r  r  )r=   rq   rq   r  r  r   rq   rq   r  r  )efficientvit_m1r  r  s      r8   r  r    @    !J %l:lQUV`QkdjQkllr:   c           	      `    [        S/ SQ/ SQ/ SQ/ SQ/ SQS9n[        S	SU 0[        U40 UD6D6$ )
Nr  )r  r  r  r  )rp   rq   r=   r  r  r  r  )efficientvit_m2r  r  s      r8   r  r  	  r  r:   c           	      `    [        S/ SQ/ SQ/ SQ/ SQ/ SQS9n[        S	SU 0[        U40 UD6D6$ )
Nr  )r     i@  r  )rp   rq   rp   r  r   r  r  )efficientvit_m3r  r  s      r8   r  r    r  r:   c           	      `    [        S/ SQ/ SQ/ SQ/ SQ/ SQS9n[        S	SU 0[        U40 UD6D6$ )
Nr  )r       r  r  r  r  r  r  )efficientvit_m4r  r  s      r8   r  r  #  r  r:   c           	      `    [        S/ SQ/ SQ/ SQ/ SQ/ SQS9n[        S	SU 0[        U40 UD6D6$ )
Nr  )r  i   r  )r   rq   rp   )rq   rq   rp   r  r  r  r  )efficientvit_m5r  r  s      r8   r  r  0  r  r:   )r   r  )9r  __all__r   collectionsr   	functoolsr   typingr   r   r   r	   r
   r   r1   torch.nnr,   	timm.datar   r   timm.layersr   r   r   r   _builderr   	_featuresr   _manipulater   r   	_registryr   r   r   r   rW   r   rk   r   r   r   r   r  r  r1  r   r  default_cfgsr  r  r  r  r  r  r  rM  r:   r8   <module>r     s   
  #  ; ;   A S S * + 3 < uxx""  F#$$ #L588?? ,!588?? !ehhoo &uUXX__ upB588?? BJ-G -G`; ;|UXX(( (zryy z~ %!%" "&" "&" "&" "&" "&"+& 8	 	m 	m 	m 	m 	m 	m 	m 	m 	m 	m 	m 	mr:   