
    RЦi                        S SK r S SKJr  S SKJrJrJrJrJr  S SK	r	S SK
Jr  S SKJrJrJrJr  S SKJrJrJrJrJrJrJrJrJr  SSKJr  SSKJr  SS	KJ r   SS
K!J"r"J#r#  S/r$S r% " S S\RL                  5      r' " S S\RL                  5      r(\RR                  SSSS4S\*S\*S\\RL                     S\+S\+S\RX                  4S jjr- " S S\RL                  5      r. " S S\RL                  5      r/ " S S\RL                  5      r0 " S  S!\RL                  5      r1 " S" S#\RL                  5      r2 " S$ S%\RL                  5      r3 " S& S'\RL                  5      r4 " S( S)\RL                  5      r5 " S* S+\RL                  5      r6 " S, S\RL                  5      r7ScS- jr8\#" 0 S.\8" S/S09_S1\8" S/S09_S2\8" S/S09_S3\8" S/S09_S4\8" S/S09_S5\8" S/S09_S6\8" S/S7S89_S9\8" S/S09_S:\8" S/S09_S;\8" S/S09_S<\8" S/S09_S=\8" S/S09_S>\8" S/S09_S?\8" S/S7S89_S@\8" SASBS7SCSDSESFSG9_SH\8" SISJS7SCSDSESFSG9_SK\8" SLSMS7SCSDSESFSG9_\8" S/SNSCSDSESFSO9\8" S/S7SCSDSESFSO9\8" S/S7SP\\SQSRSFSS9\8" S/S7SP\\SQSRSFSS9ST.E5      r9SU r:SdSV jr;\"SdSW j5       r<\"SdSX j5       r=\"SdSY j5       r>\"SdSZ j5       r?\"SdS[ j5       r@\"SdS\ j5       rA\"SdS] j5       rB\"SdS^ j5       rC\"SdS_ j5       rD\"SdS` j5       rE\"SdSa j5       rF\"SdSb j5       rGg)e    N)partial)ListOptionalTupleTypeUnion)IMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STDOPENAI_CLIP_MEANOPENAI_CLIP_STD)	DropPathcalculate_drop_path_ratestrunc_normal_create_conv2dConvNormActSqueezeExciteuse_fused_attnClassifierHeadLayerNorm2d   )build_model_with_cfg)feature_take_indices)checkpoint_seq)register_modelgenerate_default_cfgsFastVitc                 .    U (       d  gX-  S:X  d   eX-  $ )Nr   r    )
group_sizechannelss     R/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/models/fastvit.py
num_groupsr"   "   s$     $)))%%    c                     ^  \ rS rSrSrSSSSSSSS\R                  SS4S\S	\S
\S\S\S\S\S\S\S\S\S\	\R                     SS4U 4S jjjrS\R                  S\R                  4S jrS rS\\R                  \R                  4   4S jrS\\R&                  \R(                  4   S\\R                  \R                  4   4S jrSrU =r$ )MobileOneBlock+   a  MobileOne building block.

This block has a multi-branched architecture at train-time
and plain-CNN style architecture at inference time
For more details, please refer to our paper:
`An Improved One millisecond Mobile Backbone` -
https://arxiv.org/pdf/2206.04040.pdf
r   r   FTNin_chsout_chskernel_sizestridedilationr   inference_modeuse_seuse_actuse_scale_branchnum_conv_branches	act_layerreturnc                   > XS.n[         TU ]  5         Xpl        [        Xa5      U l        X@l        XPl        X0l        Xl        X l	        Xl
        U(       a  [        U4SS0UD6O[        R                  " 5       U l        U(       a$  [        UU4UUUU R                  SS.UD6U l        GO	SU l        X!:X  a  US:X  a  [        R"                  " SSU0UD6OSU l        US:  ax  [        R&                  " [)        U R                  5       Vs/ s H=  n[+        U R                  U R                  4UU R
                  U R                  S	S
.UD6PM?     sn5      U l        OSU l        SU l        US:  aF  U
(       a?  [+        U R                  U R                  4SU R
                  U R                  S	S
.UD6U l        U	(       a  U" 5       U l        g[        R                  " 5       U l        gs  snf )aO  Construct a MobileOneBlock module.

Args:
    in_chs: Number of channels in the input.
    out_chs: Number of channels produced by the block.
    kernel_size: Size of the convolution kernel.
    stride: Stride size.
    dilation: Kernel dilation factor.
    group_size: Convolution group size.
    inference_mode: If True, instantiates model in inference mode.
    use_se: Whether to use SE-ReLU activations.
    use_act: Whether to use activation. Default: ``True``
    use_scale_branch: Whether to use scale branch. Default: ``True``
    num_conv_branches: Number of linear conv branches.
devicedtype
rd_divisorr   Tr)   r*   r+   groupsbiasNnum_featuresr   Fr)   r*   r9   	apply_actr   )super__init__r,   r"   r9   r*   r+   r)   r'   r(   r0   r   nnIdentityser   reparam_convBatchNorm2didentity
ModuleListranger   conv_kxk
conv_scaleact)selfr'   r(   r)   r*   r+   r   r,   r-   r.   r/   r0   r1   r5   r6   dd_	__class__s                    r!   r?   MobileOneBlock.__init__5   s   @ /, 4 &!2 AG-<A<<BKKM -	! (!{{	! 	!D !%D $1 9F9b9 M !1$ "  %T%;%;<
/  =!   %0#{{#{{"'   =
/ 
! !% #DOQ#3"-KKLL# !";;;;## # #*9;r{{}7
/s   AG xc                    U R                   b/  U R                  U R                  U R                  U5      5      5      $ SnU R                  b  U R                  U5      nSnU R                  b  U R	                  U5      nX2-   nU R
                  b  U R
                   H  nXE" U5      -  nM     U R                  U R                  U5      5      $ )zApply forward pass.r   )rC   rJ   rB   rE   rI   rH   )rK   rP   identity_out	scale_outoutrcs         r!   forwardMobileOneBlock.forward   s     (88DGGD$5$5a$89:: ==$==+L 	??&*I &==$mmr!u $ xx%%r#   c           
      D   U R                   b  gU R                  5       u  p[        U R                  U R                  U R
                  U R                  U R                  U R                  SS9U l         XR                   R                  l
        X R                   R                  l
        U R                  5        H  u  p4SU;   a  M  UR                  5         M     U R                  S5        U R                  S5        [        U S5      (       a  U R                  S5        SU l        g)zFollowing works like `RepVGG: Making VGG-style ConvNets Great Again` -
https://arxiv.org/pdf/2101.03697.pdf. We re-parameterize multi-branched
architecture used at training time to obtain a plain CNN-like structure
for inference.
NT)in_channelsout_channelsr)   r*   r+   r9   r:   rC   rH   rI   rE   )rC   _get_kernel_biasr   r'   r(   r)   r*   r+   r9   weightdatar:   named_parametersdetach___delattr__hasattrr,   )rK   kernelr:   nameparas        r!   reparameterizeMobileOneBlock.reparameterize   s     (,,.)((;;]];;
 )/  %&*# //1JD%LLN 2
 	$&4$$Z("r#   c                    SnSnU R                   bX  U R                  U R                   5      u  pU R                  S-  n[        R                  R
                  R                  XX3U/5      nSnSnU R                  b  U R                  U R                  5      u  pESnSnU R                  bD  [        U R                  5       H+  nU R                  U R                  U   5      u  pXi-  nXz-  nM-     Xa-   U-   nXr-   U-   nX4$ )zMethod to obtain re-parameterized kernel and bias.
Reference: https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L83

Returns:
    Tuple of (kernel, bias) after fusing branches.
r      )rI   _fuse_bn_tensorr)   torchr@   
functionalpadrE   rH   rG   r0   )rK   kernel_scale
bias_scalerl   kernel_identitybias_identitykernel_conv	bias_convix_kernel_biaskernel_final
bias_finals                r!   r[   MobileOneBlock._get_kernel_bias   s	    
??&'+';';DOO'L$L""a'C 88..22<sQTAUVL ==$-1-A-A$---P*O 	==$D223!%!5!5dmmB6G!H&"	 4
 #1OC+m;
''r#   branchc                    [        U[        5      (       a  UR                  R                  nUR                  R
                  nUR                  R                  nUR                  R                  nUR                  R                  nUR                  R                  nGO9[        U[        R                  5      (       d   e[        U S5      (       d  U R                  U R                  -  n[        R                  " U R                  XR                   U R                   4UR                  R"                  UR                  R$                  S9n	['        U R                  5       H(  n
SU	XU-  U R                   S-  U R                   S-  4'   M*     Xl        U R(                  nUR
                  nUR                  nUR                  nUR                  nUR                  nXG-   R+                  5       nX[-  R-                  SSSS5      nX,-  XcU-  U-  -
  4$ )zMethod to fuse batchnorm layer with preceding conv layer.
Reference: https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L95

Args:
    branch: Sequence of ops to be fused.

Returns:
    Tuple of (kernel, bias) after fusing batchnorm.
	id_tensorr6   r5   r   rh   )
isinstancer   convr\   bnrunning_meanrunning_varr:   epsr@   rD   ra   r'   r9   rj   zerosr)   r6   r5   rG   r{   sqrtreshape)rK   ry   rb   r   r   gammabetar   	input_dimkernel_valueistdts                r!   ri   MobileOneBlock._fuse_bn_tensor   s    fk**[[''F!9911L ))//KII$$E99>>D))--Cfbnn55554-- KK4;;6	${{[[)-=-=t?O?OP ----!==// 
 t{{+A  !y=$*:*:a*?AQAQUVAVV , ".^^F!..L ,,KMME;;D**C &&([!!"aA.z4"6"<<<<r#   )rJ   rH   rI   r+   r9   r{   rE   r'   r,   r)   r0   r(   rC   rB   r*   )__name__
__module____qualname____firstlineno____doc__r@   GELUintboolr   Moduler?   rj   TensorrV   re   r   r[   r   
SequentialrD   ri   __static_attributes____classcell__rN   s   @r!   r%   r%   +   sQ    #(  %)%&)+`=`= `= 	`=
 `= `= `= !`= `= `= #`=  #`= BII`=  
!`= `=D& &%,, &2!#F!(%ell(B"C !(F*="--78*= 
u||U\\)	**= *=r#   r%   c                     ^  \ rS rSrSr      SS\S\S\S\S\S	\\   S
\S\\R                     S\SS4U 4S jjjr
S\R                  S\R                  4S jrS\\R                  \R                  4   4S jrSS jr\S\R$                  S\R&                  S\\R                  \R                  4   4S j5       rSrU =r$ )ReparamLargeKernelConvi#  zBuilding Block of RepLKNet

This class defines overparameterized large kernel conv block
introduced in `RepLKNet <https://arxiv.org/abs/2203.06717>`_

Reference: https://github.com/DingXiaoH/RepLKNet-pytorch
Nr'   r(   r)   r*   r   small_kernelr-   r1   r,   r2   c           	      d  > XS.n[         TU ]  5         X@l        [        XQ5      U l        Xl        X l        X0l        X`l        U	(       a#  [        UU4UUSU R                  SS.UD6U l
        OlSU l
        [        UU4UU R                  U R                  SS.UD6U l        Ub7  Xc::  d   S5       e[        UU4UU R                  U R                  SS.UD6U l        U(       a  [        U4S	S
0UD6O[        R                   " 5       U l        Ub  U" 5       U l        g[        R                   " 5       U l        g)a  Construct a ReparamLargeKernelConv module.

Args:
    in_chs: Number of input channels.
    out_chs: Number of output channels.
    kernel_size: Kernel size of the large kernel conv branch.
    stride: Stride size. Default: 1
    group_size: Group size. Default: 1
    small_kernel: Kernel size of small kernel conv branch.
    act_layer: Activation module. Default: ``nn.GELU``
    inference_mode: If True, instantiates model in inference mode. Default: ``False``
r4   r   Tr8   NFr<   zDThe kernel size for re-param cannot be larger than the large kernel!rd_ratiog      ?)r>   r?   r*   r"   r9   r'   r(   r)   r   r   rC   r   
large_conv
small_convr   r@   rA   rB   rJ   )rK   r'   r(   r)   r*   r   r   r-   r1   r,   r5   r6   rL   rN   s                r!   r?   ReparamLargeKernelConv.__init__,  sO   4 / 4&( -	! ({{	! 	!D !%D) ({{{{ DO ' /ZYZ/"-# !-;;;;## # BH-=$="=R[[]"+"79;R[[]r#   rP   c                     U R                   b  U R                  U5      nO1U R                  U5      nU R                  b  X R                  U5      -   nU R                  U5      nU R	                  U5      nU$ N)rC   r   r   rB   rJ   )rK   rP   rT   s      r!   rV   ReparamLargeKernelConv.forwardv  sf    (##A&C//!$C*OOA..ggclhhsm
r#   c                    U R                  U R                  R                  U R                  R                  5      u  p[	        U S5      (       a  U R                  U R
                  R                  U R
                  R                  5      u  p4X$-  nU[        R                  R                  X0R                  U R                  -
  S-  /S-  5      -  nX4$ )zMethod to obtain re-parameterized kernel and bias.
Reference: https://github.com/DingXiaoH/RepLKNet-pytorch

Returns:
    Tuple of (kernel, bias) after fusing branches.
r   rh      )_fuse_bnr   r   r   ra   r   r@   rk   rl   r)   r   )rK   eq_keq_bsmall_ksmall_bs        r!   get_kernel_bias&ReparamLargeKernelConv.get_kernel_bias  s     ]]4??#7#79K9KL
4&&#}}T__-A-A4??CUCUVGODBMM%%++d.?.??AEFJ D zr#   c           	         U R                  5       u  p[        U R                  U R                  U R                  U R
                  U R                  SS9U l        XR                  R                  l	        X R                  R                  l	        U R                  S5        [        U S5      (       a  U R                  S5        gg)z
Following works like `RepVGG: Making VGG-style ConvNets Great Again` -
https://arxiv.org/pdf/2101.03697.pdf. We re-parameterize multi-branched
architecture used at training time to obtain a plain CNN-like structure
for inference.
Tr)   r*   r9   r:   r   r   N)r   r   r'   r(   r)   r*   r9   rC   r\   r]   r:   r`   ra   )rK   r   r   s      r!   re   %ReparamLargeKernelConv.reparameterize  s     ))+
)KKLL((;;;;
 )-  %&*#&4&&\* 'r#   r   r   c                     U R                   nUR                  nUR                  nUR                   nUR                  nUR                  nXG-   R                  5       nXX-  R                  SSSS5      n	X)-  XcU-  U-  -
  4$ )zMethod to fuse batchnorm layer with conv layer.

Args:
    conv: Convolutional kernel weights.
    bn: Batchnorm 2d layer.

Returns:
    Tuple of (kernel, bias) after fusing batchnorm.
r}   r   )r\   r   r   r:   r   r   r   )
r   r   rb   r   r   r   r   r   r   r   s
             r!   r   ReparamLargeKernelConv._fuse_bn  s}     nn		wwff &&([!!"aA.z4"6"<<<<r#   )rJ   r9   r'   r)   r   r(   rC   rB   r   r   r*   )NFNFNNr2   N)r   r   r   r   r   r   r   r   r@   r   r?   rj   r   rV   r   r   re   staticmethodConv2drD   r   r   r   r   s   @r!   r   r   #  s4    +/ -1#(HKHK HK 	HK
 HK HK #3-HK HK  		*HK !HK 
HK HKT	 	%,, 	u||U\\'A!B  +. =))== 
u||U\\)	*= =r#   r   FTr'   r(   r1   r,   r/   r2   c                     XVS.n[         R                  " [        SU USSUUUS.UD6[        SUUSSSUUUS.UD6[        SUUSSUUUS.UD65      $ )a  Build convolutional stem with MobileOne blocks.

Args:
    in_chs: Number of input channels.
    out_chs: Number of output channels.
    inference_mode: Flag to instantiate model in inference mode. Default: ``False``

Returns:
    nn.Sequential object with stem elements.
r4      rh   )r'   r(   r)   r*   r1   r,   r/   r   )r'   r(   r)   r*   r   r1   r,   r/   r   )r@   r   r%   )r'   r(   r1   r,   r/   r5   r6   rL   s           r!   convolutional_stemr     s    & 	+B== 		
)-		
 		
 	 
	
)-
	
 
	
 	 		
)-		
 		
-   r#   c                      ^  \ rS rSr% Sr\R                  R                  \   \	S'         SS\
S\
S\S\S	\S
S4U 4S jjjrS\R                  S
\R                  4S jrSrU =r$ )	Attentioni  zMulti-headed Self Attention module.

Source modified from:
https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py

fused_attnNdimhead_dimqkv_bias	attn_drop	proj_dropr2   c                 ~  > XgS.n[         T	U ]  5         X-  S:X  d   S5       eX l        X-  U l        US-  U l        [        5       U l        [        R                  " XS-  4SU0UD6U l	        [        R                  " U5      U l        [        R                  " X40 UD6U l        [        R                  " U5      U l        g)aE  Build MHSA module that can handle 3D or 4D input tensors.

Args:
    dim: Number of embedding dimensions.
    head_dim: Number of hidden dimensions per head. Default: ``32``
    qkv_bias: Use bias or not. Default: ``False``
    attn_drop: Dropout rate for attention tensor.
    proj_drop: Dropout rate for projection tensor.
r4   r   z#dim should be divisible by head_dimg      r   r:   N)r>   r?   r   	num_headsscaler   r   r@   LinearqkvDropoutr   projr   )
rK   r   r   r   r   r   r5   r6   rL   rN   s
            r!   r?   Attention.__init__   s    & /~"I$II" %
(*99S'??B?I.IIc-"-	I.r#   rP   c                 F   UR                   u  p#pEXE-  nUR                  S5      R                  SS5      nU R                  U5      R	                  X&SU R
                  U R                  5      R                  SSSSS5      nUR                  S5      u  pn
U R                  (       aQ  [        R                  R                  R                  XU
U R                  (       a  U R                  R                   OSS	9nOFXR"                  -  nXR                  SS5      -  nUR%                  SS
9nU R                  U5      nX-  nUR                  SS5      R	                  X&U5      nU R'                  U5      nU R)                  U5      nUR                  SS5      R	                  X#XE5      nU$ )Nrh   r}   r   r   r   r           )	dropout_p)r   )shapeflatten	transposer   r   r   r   permuteunbindr   rj   r@   rk   scaled_dot_product_attentiontrainingr   pr   softmaxr   r   )rK   rP   BCHWNr   qkvattns               r!   rV   Attention.forward   s]   WW
aEIIaL""2r*HHQKWQ1dnndmm<WQ1a# 	
 **Q-a??##@@a.2mm$..** A A
 JJA{{2r**D<<B<'D>>$'DAKK1%%aA.IIaLNN1KKB''a3r#   )r   r   r   r   r   r   r   r   )    Fr   r   NN)r   r   r   r   r   rj   jitFinalr   __annotations__r   floatr?   r   rV   r   r   r   s   @r!   r   r     s    
 		%%
 """// / 	/
 / / 
/ /@ %,,  r#   r   c                      ^  \ rS rSrSr\R                  SSSSS4S\S\S\S\S	\\R                     S
\
S\
S\
SS4U 4S jjjrS\R                  S\R                  4S jrSrU =r$ )
PatchEmbedi?  z$Convolutional patch embedding layer.FN
patch_sizer*   r'   	embed_dimr1   lkc_use_actr-   r,   r2   c                    > XS.n[         TU ]  5         [        R                  " [	        SUUUUSSUU(       a  UOSUS.	UD6[        SUUSSSUUS.UD65      U l        g)	aC  Build patch embedding layer.

Args:
    patch_size: Patch size for embedding computation.
    stride: Stride for convolutional embedding layer.
    in_chs: Number of channels of input tensor.
    embed_dim: Number of embedding dimensions.
    inference_mode: Flag to instantiate model in inference mode. Default: ``False``
r4   r   r   N)	r'   r(   r)   r*   r   r   r-   r1   r,   F)r'   r(   r)   r*   r-   r1   r,   r   )r>   r?   r@   r   r   r%   r   )rK   r   r*   r'   r   r1   r   r-   r,   r5   r6   rL   rN   s               r!   r?   PatchEmbed.__init__B  s    , /MM" !&'2)-   	 !#-	 	
	r#   rP   c                 (    U R                  U5      nU$ r   r   rK   rP   s     r!   rV   PatchEmbed.forwards  s    IIaLr#   r   )r   r   r   r   r   r@   r   r   r   r   r   r?   rj   r   rV   r   r   r   s   @r!   r   r   ?  s    . *, % #(/
/
 /
 	/

 /
 BII/
 /
 /
 !/
 
/
 /
b %,,  r#   r   c                   J   ^  \ rS rSr    SS\S\S\4U 4S jjjrS rSr	U =r
$ )	LayerScale2dix  r   init_valuesinplacec                    > [         TU ]  5         X0l        [        R                  " U[
        R                  " USSXES9-  5      U l        g )Nr   r4   )r>   r?   r   r@   	Parameterrj   onesr   )rK   r   r   r   r5   r6   rN   s         r!   r?   LayerScale2d.__init__y  s9     	\\+

31V0a"ab
r#   c                 v    U R                   (       a  UR                  U R                  5      $ XR                  -  $ r   )r   mul_r   r   s     r!   rV   LayerScale2d.forward  s&    %)\\qvvdjj!Eq::~Er#   )r   r   )h㈵>FNN)r   r   r   r   r   r   r   r?   rV   r   r   r   s   @r!   r   r   x  sH     "&!
c
c 
c 	
c 
cF Fr#   r   c            	          ^  \ rS rSrSr     SS\S\S\\   S\4U 4S jjjr	S\
R                  S	\
R                  4S
 jrSS jrSrU =r$ )RepMixeri  zReparameterizable token mixer.

For more details, please refer to our paper:
`FastViT: A Fast Hybrid Vision Transformer using Structural Reparameterization <https://arxiv.org/pdf/2303.14189.pdf>`_
r   r)   layer_scale_init_valuer,   c           	        > XVS.n[         TU ]  5         Xl        X l        X@l        U(       aY  [
        R                  " U R                  U R                  4U R                  SU R                  S-  U R                  SS.UD6U l        gSU l        [        UUU4SSSSS	.UD6U l	        [        UUU4SSS
.UD6U l
        Ub  [        X40 UD6U l        g[
        R                  " 5       U l        g)ah  Build RepMixer Module.

Args:
    dim: Input feature map dimension. :math:`C_{in}` from an expected input of size :math:`(B, C_{in}, H, W)`.
    kernel_size: Kernel size for spatial mixing. Default: 3
    layer_scale_init_value: Initial value for layer scale. Default: 1e-5
    inference_mode: If True, instantiates model in inference mode. Default: ``False``
r4   r   rh   Tr)   r*   paddingr9   r:   NFr   )r   r.   r/   r0   )r   r.   )r>   r?   r   r)   r,   r@   r   rC   r%   normmixerr   layer_scalerA   )	rK   r   r)   r  r,   r5   r6   rL   rN   s	           r!   r?   RepMixer.__init__  s   " /&, "			! !,,((A-xx	! 	!D !%D&	 !&"#	 	DI (  DJ &1#/#Rr#R #%;;= r#   rP   r2   c                     U R                   b  U R                  U5      nU$ XR                  U R                  U5      U R                  U5      -
  5      -   nU$ r   )rC   r  r  r  r   s     r!   rV   RepMixer.forward  sT    (!!!$A  $$TZZ]TYYq\%ABBAr#   c           	      |   U R                   (       a  gU R                  R                  5         U R                  R                  5         [	        U R
                  [        5      (       a  U R                  R                  U R
                  R                  R                  S5      U R                  R                  R                  U R                  R                  R                  -
  -  -   n[        R                  " U R
                  R                  5      U R                  R                  R                  U R                  R                  R                  -
  -  nOU R                  R                  U R                  R                  R                  -   U R                  R                  R                  -
  nU R                  R                  R                  U R                  R                  R                  -
  n[        U R                   U R                   U R"                  SU R                   SS9U l
        XR                  R                  l        X R                  R                  l        U R'                  5        H  u  p4SU;   a  M  UR)                  5         M     U R+                  S5        U R+                  S5        U R+                  S	5        g)
zYReparameterize mixer and norm into a single
convolutional layer for efficient inference.
Nr}   r   Tr   rC   r  r  r  )r,   r  re   r  r~   r  r   r{   r   	unsqueezerC   r\   rj   squeezer:   r   r   r)   r]   r^   r_   r`   )rK   wbrc   rd   s        r!   re   RepMixer.reparameterize  s    

!!#		  "d&&55

$$t'7'7'='='G'G'K

''..1G1G1N1NN( A d..445

'',,tyy/E/E/J/JJA
 

$$**))001))((//0 
 

'',,tyy/E/E/J/JJA)HHHH((88
 )*  %&'#//1JD%LLN 2 	! 'r#   )r   r,   r)   r  r  r  rC   )r   r   FNNr   )r   r   r   r   r   r   r   r   r   r?   rj   r   rV   re   r   r   r   s   @r!   r   r     ss      !6:#(9191 91 %-UO	91
 !91 91v %,, *( *(r#   r   c                      ^  \ rS rSrSrSS\R                  SSS4S\S\\   S\\   S\	\R                     S	\S
S4U 4S jjjrS\R                  S
S4S jrS\R                  S
\R                  4S jrSrU =r$ )ConvMlpi  zConvolutional FFN Module.Nr   r'   hidden_channelsr(   r1   dropr2   c                 ~  > XgS.n[         T	U ]  5         U=(       d    UnU=(       d    Un[        UU4SUSS.UD6U l        [        R
                  " X4SS0UD6U l        U" 5       U l        [        R
                  " X#4SS0UD6U l        [        R                  " U5      U l
        U R                  U R                  5        g)a'  Build convolutional FFN module.

Args:
    in_chs: Number of input channels.
    hidden_channels: Number of channels after expansion. Default: None
    out_chs: Number of output channels. Default: None
    act_layer: Activation layer. Default: ``GELU``
    drop: Dropout rate. Default: ``0.0``.
r4      F)r)   r9   r=   r)   r   N)r>   r?   r   r   r@   r   fc1rJ   fc2r   r  apply_init_weights)
rK   r'   r  r(   r1   r  r5   r6   rL   rN   s
            r!   r?   ConvMlp.__init__  s    & /#V)3V
 
 
	 99VJ!JrJ;99_K1KKJJt$	

4%%&r#   mc                     [        U[        R                  5      (       aM  [        UR                  SS9  UR
                  b+  [        R                  R                  UR
                  S5        g g g )N{Gz?r   r   )r~   r@   r   r   r\   r:   init	constant_rK   r  s     r!   r  ConvMlp._init_weights'  sM    a##!((-vv!!!!&&!, " $r#   rP   c                     U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU R	                  U5      nU R                  U5      nU$ r   )r   r  rJ   r  r  r   s     r!   rV   ConvMlp.forward-  sU    IIaLHHQKHHQKIIaLHHQKIIaLr#   )rJ   r   r  r  r  )r   r   r   r   r   r@   r   r   r   r   r   r   r?   r  rj   r   rV   r   r   r   s   @r!   r  r    s    #
 .2%))+#'#' &c]#' c]	#'
 BII#' #' 
#' #'J-ryy -T - %,,  r#   r  c                      ^  \ rS rSrSr     SS\S\\   S\\\\\4   4   S\	SS4
U 4S	 jjjr
S
\R                  S\R                  4S jrSS jrSrU =r$ )RepConditionalPosEnci7  a  Implementation of conditional positional encoding.

For more details refer to paper:
`Conditional Positional Encodings for Vision Transformers <https://arxiv.org/pdf/2102.10882.pdf>`_

In our implementation, we can reparameterize this module to eliminate a skip connection.
Nr   dim_outspatial_shaper,   r2   c           
        > XVS.n[         TU ]  5         [        U[        5      (       a  [	        U/S-  5      n[        U[
        5      (       d   S[        U5       S35       e[        U5      S:X  d   S[        U5       S35       eX0l        Xl	        U=(       d    UU l
        Xl        U(       aR  [        R                  " U R                  U R                  4U R                  SUS   S-  U R                  SS	.UD6U l        g
S
U l        [        R                  " U R                  U R                  US[        US   S-  5      4U R                  SS.UD6U l        g
)aD  Build reparameterizable conditional positional encoding

Args:
    dim: Number of input channels.
    dim_out: Number of embedding dimensions. Default: 768
    spatial_shape: Spatial shape of kernel for positional encoding. Default: (7, 7)
    inference_mode: Flag to instantiate block in inference mode. Default: ``False``
r4   rh   z/"spatial_shape" must by a sequence or int, get z	 instead.z+Length of "spatial_shape" should be 2, got r   r   Tr  N)r9   r:   )r>   r?   r~   r   tupler   typelenr)  r   r(  r9   r@   r   rC   pos_enc)	rK   r   r(  r)  r,   r5   r6   rL   rN   s	           r!   r?   RepConditionalPosEnc.__init__@  sh   " /mS))!=/A"56M-// 	
&'y2	
/ =!Q& 	
}%&i1	
&
 +~# "			! !..%a(A-{{	! 	!D !%D99M!$)*	 {{	 	DLr#   rP   c                 n    U R                   b  U R                  U5      nU$ U R                  U5      U-   nU$ r   )rC   r.  r   s     r!   rV   RepConditionalPosEnc.forward{  s>    (!!!$A  Q!#Ar#   c           
         U R                   U R                  -  n[        R                  " U R                   UU R                  S   U R                  S   4U R
                  R                  R                  U R
                  R                  R                  S9n[        U R                   5       H.  nSUUX1-  U R                  S   S-  U R                  S   S-  4'   M0     UnX@R
                  R                  -   nU R
                  R                  n[        R                  " U R                   U R                  U R                  S[        U R                  S   S-  5      U R                  SS9U l        XPR                  R                  l        X`R                  R                  l        U R#                  5        H  u  pxSU;   a  M  UR%                  5         M     U R'                  S5        g )	Nr   r   r|   rh   Tr  rC   r.  )r   r9   rj   r   r)  r.  r\   r6   r5   rG   r:   r@   r   r(  r   rC   r]   r^   r_   r`   )	rK   r   r   r   r{   w_finalb_finalrc   rd   s	            r!   re   #RepConditionalPosEnc.reparameterize  s   HH+	{{""1%""1%	 ,,%%++<<&&--	
 txxA  ""1%*""1%*, ! !	 ll111,,## IIHHLL****1-23;;
 )0  %&-#//1JD%LLN 2 	#r#   )r   r(  r9   r.  rC   r)  )Nr  r  FNNr   )r   r   r   r   r   r   r   r   r   r   r?   rj   r   rV   re   r   r   r   s   @r!   r'  r'  7  s     &*9?#(99 c]9 !eCHo!56	9
 !9 
9 9v %,, +$ +$r#   r'  c                      ^  \ rS rSrSrSS\R                  SSSSSS4	S	\S
\S\S\	\R                     S\S\S\S\4U 4S jjjrS rSrU =r$ )RepMixerBlocki  zImplementation of Metaformer block with RepMixer as token mixer.

For more details on Metaformer structure, please refer to:
`MetaFormer Is Actually What You Need for Vision <https://arxiv.org/pdf/2111.11418.pdf>`_
r         @r   r   FNr   r)   	mlp_ratior1   r   	drop_pathr  r,   c                 T  > XS.n[         TU ]  5         [        U4UUUS.UD6U l        [	        SU[        X-  5      UUS.UD6U l        Ub  [        X40 UD6U l        O[        R                  " 5       U l        US:  a  [        U5      U l        g[        R                  " 5       U l        g)a  Build RepMixer Block.

Args:
    dim: Number of embedding dimensions.
    kernel_size: Kernel size for repmixer. Default: 3
    mlp_ratio: MLP expansion ratio. Default: 4.0
    act_layer: Activation layer. Default: ``nn.GELU``
    proj_drop: Dropout rate. Default: 0.0
    drop_path: Drop path rate. Default: 0.0
    layer_scale_init_value: Layer scale value at initialization. Default: 1e-5
    inference_mode: Flag to instantiate block in inference mode. Default: ``False``
r4   )r)   r  r,   r'   r  r1   r  Nr   r   )r>   r?   r   token_mixerr  r   mlpr   r  r@   rA   r   r;  )rK   r   r)   r:  r1   r   r;  r  r,   r5   r6   rL   rN   s               r!   r?   RepMixerBlock.__init__  s    2 /#
##9)	

 
  
0	

 
 "-+CN2ND!{{}D09C),R[[]r#   c                     U R                  U5      nXR                  U R                  U R                  U5      5      5      -   nU$ r   )r>  r;  r  r?  r   s     r!   rV   RepMixerBlock.forward  s;    Qt//<==r#   )r;  r  r?  r>  )r   r   r   r   r   r@   r   r   r   r   r   r   r?   rV   r   r   r   s   @r!   r8  r8    s      !")+"",0#(/S/S /S 	/S
 BII/S /S /S %*/S !/S /Sb r#   r8  c                      ^  \ rS rSrSrS\R                  \R                  SSSSS4S\S\	S	\
\R                     S
\
\R                     S\	S\	S\	4U 4S jjjrS rSrU =r$ )AttentionBlocki  zImplementation of metaformer block with MHSA as token mixer.

For more details on Metaformer structure, please refer to:
`MetaFormer Is Actually What You Need for Vision <https://arxiv.org/pdf/2111.11418.pdf>`_
r9  r   r   Nr   r:  r1   
norm_layerr   r;  r  c
                   > XS.n
[         TU ]  5         U" U40 U
D6U l        [        SSU0U
D6U l        Ub  [        X40 U
D6U l        O[        R                  " 5       U l        US:  a  [        U5      O[        R                  " 5       U l
        [        SU[        X-  5      UUS.U
D6U l        Ub  [        X40 U
D6U l        O[        R                  " 5       U l        US:  a  [        U5      U l        g[        R                  " 5       U l        g)a  Build Attention Block.

Args:
    dim: Number of embedding dimensions.
    mlp_ratio: MLP expansion ratio. Default: 4.0
    act_layer: Activation layer. Default: ``nn.GELU``
    norm_layer: Normalization layer. Default: ``nn.BatchNorm2d``
    proj_drop: Dropout rate. Default: 0.0
    drop_path: Drop path rate. Default: 0.0
    layer_scale_init_value: Layer scale value at initialization. Default: 1e-5
r4   r   Nr   r=  r   )r>   r?   r  r   r>  r   layer_scale_1r@   rA   r   
drop_path1r  r   r?  layer_scale_2
drop_path2)rK   r   r:  r1   rE  r   r;  r  r5   r6   rL   rN   s              r!   r?   AttentionBlock.__init__  s    . /s)b)	$333!-!-c!PR!PD!#D1:S(9-bkkm 
0	

 
 "-!-c!PR!PD!#D1:S(9-bkkmr#   c           
          XR                  U R                  U R                  U R                  U5      5      5      5      -   nXR	                  U R                  U R                  U5      5      5      -   nU$ r   )rH  rG  r>  r  rJ  rI  r?  r   s     r!   rV   AttentionBlock.forward$  sZ     2 243C3CDIIaL3Q RSS 2 2488A; ?@@r#   )rH  rJ  rG  rI  r?  r  r>  )r   r   r   r   r   r@   r   rD   r   r   r   r   r?   rV   r   r   r   s   @r!   rD  rD    s      #)+*,.."",0-T-T -T BII	-T
 RYY-T -T -T %*-T -T^ r#   rD  c            %       *  ^  \ rS rSrSSSSSSS\R
                  \R                  S	S	S
SSSS4S\S\S\S\S\	S\	S\S\S\
\R                     S\S\S\\R                     S\\R                     S\S\\\   \4   S\
\   S\	S\	4$U 4S jjjrS rSrU =r$ ) FastVitStagei*  TFr  rh   Nr   r9  r   r   r   r(  depthtoken_mixer_type
downsamplese_downsampledown_patch_sizedown_stridepos_emb_layerr)   r:  r1   rE  proj_drop_ratedrop_path_rater  r   r,   c                 X  > [         TU ]  5         UUS.nSU l        U(       a  [        SUUUUUUUUS.UD6U l        O!X:X  d   e[
        R                  " 5       U l        U	b  U	" U4SU0UD6U l        O[
        R                  " 5       U l        / n[        U5       Hv  nUS:X  a(  UR                  [        U4U
UUUUU   UUS.UD65        M1  US:X  a'  UR                  [        U4UUUUUU   US	.UD65        M^  [        S
R                  U5      5      e   [
        R                  " U6 U l        g)a  FastViT stage.

Args:
    dim: Number of embedding dimensions.
    depth: Number of blocks in stage
    token_mixer_type: Token mixer type.
    kernel_size: Kernel size for repmixer.
    mlp_ratio: MLP expansion ratio.
    act_layer: Activation layer.
    norm_layer: Normalization layer.
    proj_drop_rate: Dropout rate.
    drop_path_rate: Drop path rate.
    layer_scale_init_value: Layer scale value at initialization.
    inference_mode: Flag to instantiate block in inference mode.
r4   F)r   r*   r'   r   r-   r1   r   r,   Nr,   repmixer)r)   r:  r1   r   r;  r  r,   	attention)r:  r1   rE  r   r;  r  z"Token mixer type: {} not supportedr   )r>   r?   grad_checkpointingr   rR  r@   rA   pos_embrG   appendr8  rD  
ValueErrorformatr   blocks)rK   r   r(  rP  rQ  rR  rS  rT  rU  rV  r)   r:  r1   rE  rW  rX  r  r   r,   r5   r6   rL   ra  	block_idxrN   s                           r!   r?   FastVitStage.__init__+  sl   L 	/"'( 
*"!$#'-
 
DO >!> kkmDO$(VVSUVDL;;=DLuI:-m
 +'',,Y7+A#1
 
 
 "[0n	''),,Y7+A	 	 	 !8??@PQ 3 &8 mmV,r#   c                    U R                  U5      nU R                  U5      nU R                  (       a;  [        R                  R                  5       (       d  [        U R                  U5      nU$ U R                  U5      nU$ r   )rR  r]  r\  rj   r   is_scriptingr   ra  r   s     r!   rV   FastVitStage.forward  sc    OOALLO""599+A+A+C+Ct{{A.A  AAr#   )ra  rR  r\  r]  )r   r   r   r   r@   r   rD   r   strr   r   r   r   r   r   r   r?   rV   r   r   r   s   @r!   rO  rO  *  sC     $"'#$ 15 ")+*,..$'8;6: %#(+\-\- \- 	\-
 "\- \-  \- !\- \- $BII.\- \- \- BII\- RYY\- "\-  "$u+u"45!\-" %-UO#\-$ %\-& !'\- \-| r#   rO  c            3         ^  \ rS rSr% \R
                  R                  \   \S'    SSSSSSS	SS
SSSSSSSSSSSS\	R                  \	R                  SSS4S\S\\S4   S\\S4   S\\S4   S\\S4   S\\S4   S\\S4   S\S\S\\\	R"                     S4   S \S!\S"\S#\S$\S%\S&\S'\S\S(\S)\S*\\	R"                     S+\\	R"                     S,\S-S42U 4S. jjjrS/\	R"                  S-S4S0 jr\R
                  R*                  S1 5       r\R
                  R*                  SES2 j5       r\R
                  R*                  SFS3 j5       r\R
                  R*                  S-\	R"                  4S4 j5       rSGS\S)\\   4S5 jjr     SHS6\R6                  S7\\\\\   4      S8\S9\S:\S;\S-\\\R6                     \\R6                  \\R6                     4   4   4S< jjr   SIS7\\\\   4   S=\S>\4S? jjrS6\R6                  S-\R6                  4S@ jr SES6\R6                  SA\4SB jjr!S6\R6                  S-\R6                  4SC jr"SDr#U =r$$ )Jr   i  	fork_featr   rh   rh      rh   rZ  rZ  rZ  rZ  @            r   r   r   r   )FTTT)FFFF  )NNNNr  rh   r   r   FTg       @avgNin_chanslayers.token_mixers
embed_dims
mlp_ratiosdownsamplesse_downsamplesrepmixer_kernel_sizenum_classespos_embsrT  rU  	drop_raterW  rX  r  r   stem_use_scale_branch	cls_ratioglobal_poolrE  r1   r,   r2   c                   > [         T(U ]  5         UUS.nU(       a  SOU	U l        UU l        UU l        / U l        [        UUS   UU4SU0UD6U l        US   nSn[        XSS9n/ n[        [        U5      5       H  n UU    =(       d    UUU    :g  n![        S$0 SU_SUU    _S	UU    _S
U!_SUU    _SU_SU_SU
U    _SUU    _SU_SUU    _SU_SU_SU_SUU    _SU_SU_SU_UD6n"UR                  U"5        UU    nU!(       a  US-  nU =R
                  [        USU-  SU  3S9/-  sl        M     [        R                  " U6 U l        [        U R                   5      U l        U=U l        U l        U R                  (       a  / SQU l        [+        U R(                  5       Hk  u  n#n$U#S:X  a<  [,        R.                  R1                  SS 5      (       a   [        R2                  " 5       n%OU" UU#   40 UD6n%SU$ 3n&U R5                  U&U%5        Mm     OQ[7        US    U-  5      =U l        =U l        n'[9        S$US    U'S!SSUSUSS".	UD6U l        [=        U'U	4UUS#.UD6U l        U RA                  U RB                  5        g )%Nr4   r   r/   r   T)	stagewiser   r(  rP  rR  rS  rT  rU  rV  rQ  r)   r:  r1   rE  rW  rX  r  r   r,   rh   r   stages.)num_chs	reductionmoduler   r   rh   r   
FORK_LAST3r  r}   r   )	r'   r(   r)   r*   r   r,   r-   r1   r0   )	pool_typer  r   )"r>   r?   r}  ri  r  feature_infor   stemr   rG   r-  rO  r^  dictr@   r   stages
num_stagesr;   head_hidden_sizeout_indices	enumerateosenvirongetrA   
add_moduler   r%   
final_convr   headr  r  ))rK   ru  rv  rw  rx  ry  rz  r{  r|  r}  r~  rT  rU  r  rW  rX  r  r   r  ri  r  r  rE  r1   r,   r5   r6   rL   prev_dimr   dprr  r   rR  stagei_embi_layerlayer
layer_namefinal_featuresrN   s)                                           r!   r?   FastVit.__init__  s=   : 	/ )1{"& 'qM	

 3
 
	 a='$Os6{#A$QD8z!}+DJ  "1 Qi &	
 -Q/ !0 ( 'qk ".a 1 %Q- $ &  .  #1v  (>!" (#$  .'E* MM% !!}H
$x1u9W^_`^aUb"c!dd7 $8 mmV,dkk*4<<D1 >>  ,D"+D,<,<"=wA:"**..t"D"D KKME&z%'8?B?E#G9-

E2 #> JMZXZ^^gMgIhhDh 5, !"~&-#"# DO ' &#	
 DI 	

4%%&r#   r  c                    [        U[        R                  5      (       am  [        UR                  SS9  [        U[        R                  5      (       a9  UR
                  b+  [        R                  R                  UR
                  S5        gggg)zInit. for classificationr  r  Nr   )r~   r@   r   r   r\   r:   r   r!  r"  s     r!   r  FastVit._init_weights  s`    a##!((-!RYY''AFF,>!!!&&!, -?' $r#   c                     [        5       $ r   )setrK   s    r!   no_weight_decayFastVit.no_weight_decay  s	    ur#   c                 0    [        SU(       a  SS9$ / SQS9$ )Nz^stemz^stages\.(\d+)))z^stages\.(\d+).downsampler   )z^stages\.(\d+).pos_embr  )z^stages\.(\d+)\.\w+\.(\d+)N)r  ra  )r  )rK   coarses     r!   group_matcherFastVit.group_matcher!  s'    (.$
 	
5
 	
r#   c                 6    U R                    H	  nXl        M     g r   )r  r\  )rK   enabless      r!   set_grad_checkpointingFastVit.set_grad_checkpointing,  s    A#)  r#   c                 .    U R                   R                  $ r   )r  fcr  s    r!   get_classifierFastVit.get_classifier1  s    yy||r#   c                 F    Xl         U R                  R                  X5        g r   )r}  r  reset)rK   r}  r  s      r!   reset_classifierFastVit.reset_classifier5  s    &		1r#   rP   indicesr  
stop_early
output_fmtintermediates_onlyc                    US;   d   S5       e/ n[        [        U R                  5      U5      u  pU R                  U5      nU R                  S-
  n
[
        R                  R                  5       (       d  U(       d  U R                  nOU R                  SU	S-    nSn[        U5       H%  u  pU" U5      nX;   d  M  UR                  U5        M'     U(       a  U$ X:X  a  U R                  U5      nX4$ )a  Forward features that returns intermediates.

Args:
    x: Input image tensor
    indices: Take last n blocks if int, all if None, select matching indices if sequence
    norm: Apply norm layer to compatible intermediates
    stop_early: Stop iterating over blocks when last desired intermediate hit
    output_fmt: Shape of intermediate feature outputs
    intermediates_only: Only return intermediate features
Returns:

)NCHWzOutput shape must be NCHW.r   Nr   )r   r-  r  r  r  rj   r   re  r  r^  r  )rK   rP   r  r  r  r  r  intermediatestake_indices	max_indexlast_idxr  feat_idxr  s                 r!   forward_intermediatesFastVit.forward_intermediates9  s    * Y&D(DD&"6s4;;7G"Q IIaL??Q&99!!##:[[F[[)a-0F(0OHaA'$$Q'  1
   "Ar#   
prune_norm
prune_headc                     [        [        U R                  5      U5      u  pEU R                  SUS-    U l        U(       a  U R                  SS5        U$ )z?Prune layers not required for specified intermediates.
        Nr   r    )r   r-  r  r  )rK   r  r  r  r  r  s         r!   prune_intermediate_layers!FastVit.prune_intermediate_layersg  sK     #7s4;;7G"Qkk.9q=1!!!R(r#   c                 X   U R                  U5      n/ n[        U R                  5       HY  u  p4U" U5      nU R                  (       d  M   X0R                  ;   d  M1  [        U SU 35      nU" U5      nUR                  U5        M[     U R                  (       a  U$ U R                  U5      nU$ )Nr  )r  r  r  ri  r  getattrr^  r  )rK   rP   outsidxblockrE  x_outs          r!   forward_featuresFastVit.forward_featuresu  s    IIaL#DKK0JCaA~~~***!(cU|!<J&qMEKK& 1 >>KOOAr#   
pre_logitsc                 R    U(       a  U R                  USS9$ U R                  U5      $ )NT)r  )r  )rK   rP   r  s      r!   forward_headFastVit.forward_head  s$    0:tyyty,L		!Lr#   c                 p    U R                  U5      nU R                  (       a  U$ U R                  U5      nU$ r   )r  ri  r  r   s     r!   rV   FastVit.forward  s3    !!!$>>Ha r#   )r  r  ri  r  r  r  r}  r;   r  r  r  r  F)Tr   )NFFr  F)r   FT)%r   r   r   r   rj   r   r   r   r   r@   rD   r   r   r   rg  r   r   r   r   r?   r  ignorer  r  r  r  r  r   r   r   r  r  r  r  rV   r   r   r   s   @r!   r   r     s   yyt$$ &2,\*=,4,E/K()#8C#$ "$'$',0 %*.#"$*,..)+#(7z'z' #s(Oz'  S/	z'
 c3hz' eSj)z' tSy)z' "$),z' #&z' z' HRYY/45z' !z' z' z' "z'  "!z'" %*#z'$ %z'& $('z'( )z'* +z', -z'. RYY/z'0 BII1z'2 !3z'8 
9z' z'x-ryy -T - YY  YY
 
 YY* * YY		  2C 2hsm 2 8<$$',, ||,  eCcN34,  	, 
 ,  ,  !%,  
tELL!5tELL7I)I#JJ	K, ` ./$#	3S	>*  	%,, 5<< "Mell M M %,,  r#   c                 2    U SSSSS[         S[        SSS	.UE$ )
Nrs  )r   rp  rp  )   r  g?bicubiczfastvit-license)stem.0.conv_kxk.0.convzstem.0.conv_scale.convzhead.fc)urlr}  
input_size	pool_sizecrop_pctinterpolationmeanlicenser   
first_conv
classifier)r	   r
   )r  kwargss     r!   _cfgr    s7    #"%$#J  r#   zfastvit_t8.apple_in1kztimm/)	hf_hub_idzfastvit_t12.apple_in1kzfastvit_s12.apple_in1kzfastvit_sa12.apple_in1kzfastvit_sa24.apple_in1kzfastvit_sa36.apple_in1kzfastvit_ma36.apple_in1kgffffff?)r  r  zfastvit_t8.apple_dist_in1kzfastvit_t12.apple_dist_in1kzfastvit_s12.apple_dist_in1kzfastvit_sa12.apple_dist_in1kzfastvit_sa24.apple_dist_in1kzfastvit_sa36.apple_dist_in1kzfastvit_ma36.apple_dist_in1kzfastvit_mci0.apple_mclipzapple/mobileclip_s0_timmzXhttps://docs-assets.developer.apple.com/ml-research/datasets/mobileclip/mobileclip_s0.ptrq  )r   r   r   )      ?r  r  z
apple-amlr)r  r  r  r}  r  r   r  zfastvit_mci1.apple_mclipzapple/mobileclip_s1_timmzXhttps://docs-assets.developer.apple.com/ml-research/datasets/mobileclip/mobileclip_s1.ptzfastvit_mci2.apple_mclipzapple/mobileclip_s2_timmzXhttps://docs-assets.developer.apple.com/ml-research/datasets/mobileclip/mobileclip_s2.ptr  )r  r  r}  r  r   r     )r   r   r  )r  r  r}  r  r   r  r  r  )z!fastvit_mci0.apple_mclip2_dfndr2bz!fastvit_mci2.apple_mclip2_dfndr2bz!fastvit_mci3.apple_mclip2_dfndr2bz!fastvit_mci4.apple_mclip2_dfndr2bc                 H   SU ;   a  U $ SU ;   aN  U R                  5        VVs0 s H0  u  p#UR                  S5      (       d  M  UR                  SS5      U_M2     snn$ U R                  SU 5      n SU ;   a  SnOSnS	S
KnS	S
Kn/ nU R                  5        HI  u  p#UR                  SU5      nU(       d  M   UR                  [        UR                  S5      5      5        MK     [        [        [        U5      5      5      n0 n	U R                  5        GH  u  p#U(       a  XB;  a  M  UR                  US5      nUR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  SS U5      nUR                  S!5      (       a  UR                  S!S"5      nUR                  S#S$5      nUR                  S%5      (       a  US&:X  a  [        UR                   S'5      (       ax  [#        UR                   R$                  [&        R(                  5      (       aE  UR                  S&S(5      nUR*                  n[,        R.                  " UR0                  S	   5      U	S)'   OUR                  S%S*5      nUR                  S+U5      nS,u  pU(       a+  [        UR                  S-5      5      nUR3                  X{5      n
U
bb  S.U 3nS/U
 3nUS0-   U;   a  UR                  US0-   US1-   5      nO6US2-   U;   a  UR                  US2-   US3-   5      nOUR                  XS4-   5      nX9U'   GM     U	$ s  snnf )5z#Remap original checkpoints -> timm zstem.0.conv_kxk.0.conv.weightz1module.visual.trunk.stem.0.conv_kxk.0.conv.weightzmodule.visual.trunkzmodule.visual.trunk.r  
state_dictz8image_encoder.model.patch_embed.0.rbr_conv.0.conv.weightzimage_encoder.model.r   Nz^(.*?)network\.(\d+)\.proj.*rh   patch_embedr  rbr_convrH   	rbr_scalerI   rbr_skiprE   conv_expr  
lkb_originr   convffnr?  z	se.reducezse.fc1z	se.expandzse.fc2zlayer_scale_([0-9])zlayer_scale_\1.gammar  zlayer_scale.gamma	dist_head	head_distzhead.z	head.projr  zhead.fc.weightzhead.fc.biaszhead.fc.z^network\.(\d+))NNr   znetwork.r  z.projz.downsample.projz.pez.pos_emb.pos_encz.blocks)items
startswithreplacer  rebisectmatchr^  r   grouplistsortedr  subendswithra   r  r~   r  r@   r   Trj   r   r   bisect_right)r  modelr   r   prefixr  r   
stage_endsr  out_dict	stage_idxnet_idx
net_prefixstage_prefixs                 r!   checkpoint_filter_fnr    sQ   &*4:jHEOEUEUEWEWTQ[\[g[gh}[~8		0"5q8EWj9JAZO' J  "8!<5c%++a.12 # fS_-.JH  "		&"%A IImV,IIj*-IIk<0IIj*-IIj,/IIlL1IIi'IIk8,IIk8,FF)+BAF::m$$		-)<=AIIk;/<<  KGEJJ$=$=*UZZ]]\^\e\eBfBfIIk+;<CC+0;;qwwqz+B(IIgz2 +Q/'	%++a.)G++J@I #G9-J$YK0LG#q(IIj72LCU4UVe#q(IIj50,AS2STIIj*BC[ #\ OG @s   NNc           	      l    UR                  SS5      n[        [        U U4[        [	        SUS9S.UD6nU$ )Nr  r  T)flatten_sequentialr  )pretrained_filter_fnfeature_cfg)popr   r   r  r  )variant
pretrainedr  r  r	  s        r!   _create_fastvitr  M  sJ    **]L9K  2DkJ E Lr#   c           	      H    [        SSSSS9n[        SSU 0[        U40 UD6D6$ )z%Instantiate FastViT-T8 model variant.)rh   rh   r   rh   )0   `        r   r   r   r   rl  rv  rx  ry  rw  r  )
fastvit_t8r  r  r  r  
model_argss      r!   r!  r!  Z  s:     %E	J ]J]$zB\U[B\]]r#   c           	      H    [        SSSSS9n[        SSU 0[        U40 UD6D6$ )z&Instantiate FastViT-T12 model variant.rj  rm  r  rl  r   r  )fastvit_t12r"  r#  s      r!   r&  r&  f  :     &E	J ^Z^4
C]V\C]^^r#   c           	      H    [        SSSSS9n[        SSU 0[        U40 UD6D6$ )z&Instantiate FastViT-S12 model variant.rj  rm  rr  rl  r   r  )fastvit_s12r"  r#  s      r!   r)  r)  r  r'  r#   c                 j    [        SSSSSS[        [        SS94SS9n[        S
S	U 0[        U40 UD6D6$ )z'Instantiate FastViT-SA12 model variant.rj  rm  rr  Nr6  r)  rZ  rZ  rZ  r[  rv  rx  ry  r~  rw  r  )fastvit_sa12r  r   r'  r  r#  s      r!   r.  r.  ~  sO     &dG,@PV$WXFJ _j_DD^W]D^__r#   c                 j    [        SSSSSS[        [        SS94SS9n[        S
S	U 0[        U40 UD6D6$ )z'Instantiate FastViT-SA24 model variant.)r   r      r   rm  rr  Nr6  r+  r,  r-  r  )fastvit_sa24r/  r#  s      r!   r2  r2    O     &dG,@PV$WXFJ _j_DD^W]D^__r#   c                 j    [        SSSSSS[        [        SS94SS9n[        S
S	U 0[        U40 UD6D6$ )z'Instantiate FastViT-SA36 model variant.rk  rk     rk  rm  rr  Nr6  r+  r,  r-  r  )fastvit_sa36r/  r#  s      r!   r7  r7    r3  r#   c                 j    [        SSSSSS[        [        SS94SS9n[        S
S	U 0[        U40 UD6D6$ )z'Instantiate FastViT-MA36 model variant.r5  )L      i0  i`  rr  Nr6  r+  r,  r-  r  )fastvit_ma36r/  r#  s      r!   r;  r;    r3  r#   c                 n    [        SSSSSSS[        [        SS94SS	S
9n[        SSU 0[        U40 UD6D6$ )zInstantiate MCi0 model variant.)rh   rk  
   rh   rm  r  FFTTNr6  r+  r,  Trv  rx  ry  r{  r~  rw  r   r  )fastvit_mci0r/  r#  s      r!   r@  r@    sU     &1dG,@PV$WXFJ _j_DD^W]D^__r#   c                 n    [        SSSSSSS[        [        SS94SS	S
9n[        SSU 0[        U40 UD6D6$ )zInstantiate MCi1 model variant.)r   r1     r   rm  r  r>  Nr6  r+  r,  Tr?  r  )fastvit_mci1r/  r#  s      r!   rC  rC    U     &1dG,@PV$WXFJ _j_DD^W]D^__r#   c                 n    [        SSSSSSS[        [        SS94SS	S
9n[        SSU 0[        U40 UD6D6$ )zInstantiate MCi2 model variant.)r   r1     r   )P      i@  i  r  r>  Nr6  r+  r,  Tr?  r  )fastvit_mci2r/  r#  s      r!   rI  rI    rD  r#   c                     [        SSSSSSSS[        [        SS9[        [        SS94S	S
[        [        SS9SS9
n[	        SSU 0[        U40 UD6D6nU$ )zInstantiate L model variant.)rh   r1  rF  r   rh   )r  r  r  r  i   r   r   r   r   r   FFFFFFTTTTNr6  r+  rZ  rZ  rZ  r[  r[  Tr   r   F
rv  rx  ry  r{  rz  r~  rw  r   rE  r  r  )fastvit_mci3r  r   r'  r   r  r  r  r$  r	  s       r!   rQ  rQ    s{      ,":3(?(?
 T;D1#!J$ `z`T*E_X^E_`ELr#   c                     [        SSSSSSSS[        [        SS9[        [        SS94S	S
[        [        SS9SS9
n[	        SSU 0[        U40 UD6D6nU$ )zInstantiate XL model variant.)rh   r1  rF  r   r   )ro  rp  rq  i   i   rK  rL  rM  Nr6  r+  rN  Tr   rO  FrP  r  )fastvit_mci4rR  rS  s       r!   rU  rU    s{      .":3(?(?
 T;D1#!J& `z`T*E_X^E_`ELr#   )r  r  )Hr  	functoolsr   typingr   r   r   r   r   rj   torch.nnr@   	timm.datar	   r
   r   r   timm.layersr   r   r   r   r   r   r   r   r   _builderr   	_featuresr   _manipulater   	_registryr   r   __all__r"   r   r%   r   r   r   r   r   r   r   r   r   r   r  r'  r8  rD  rO  r   r  default_cfgsr  r  r!  r&  r)  r.  r2  r7  r;  r@  rC  rI  rQ  rU  r   r#   r!   <module>ra     s   
  5 5   d d
 
 
 + + ' <+&u=RYY u=p[=RYY [=B &(WW$!%444 		?4 	4
 4 ]]4nD		 DN6 6rF299 F"s(ryy s(l5bii 5pv$299 v$r;BII ;|9RYY 9xf299 fR{bii {|" % \&T\& d\& d\& t \& t \& t \& t \&& !$#'\&* "4$+\&0 "4$1\&4 #D%5\&8 #D%9\&< #D%=\&B #D%C\&L ,f|\!M\&Z ,f|\![\&h ,f|\!i\&x *.|* *.|* *.?+* *.?+*g\& \~IX
 ^ ^ _ _ _ _ 	` 	` 	` 	` 	` 	` 	` 	` ` ` ` ` ` `  0  r#   