
    RЦi                     <   S r SSKrSSKJr  SSKJr  SSKJrJr  SSK	J
r
JrJrJrJrJr  SSKrSSKJr  SSKJrJr  SS	KJrJrJrJrJrJr  S
SKJr  S
SKJr  S
SK J!r!  S
SK"J#r#  S
SK$J%r%J&r&  SS/r'\ " S S5      5       r(S r) " S S\RT                  5      r+\! SVS\\,   S\-S\\R\                  \\R\                     4   4S jj5       r/\! SWS\,S\\R\                     S\\R\                  \\,   4   4S jj5       r0\!S\R\                  S\R\                  S\-S \\,   S!\\,   S"\R\                  S#\R\                  4S$ j5       r1 " S% S&\RT                  5      r2 " S' S(\RT                  5      r3 " S) S*\RT                  5      r4 " S+ S,\RT                  5      r5 " S- S\RT                  5      r6S. r7\8" \(" S/S09\(" S1S09\(" S2S09\(" S3S4S5S6S79\(" S1SS89\(" S2SS89\(" S3S4S5SSS99\(" S:S;S<SSS99S=9r9SXS> jr:SYS@ jr;\&" \;" SASBSC9\;" SDSBSC9\;" SESBSC9\;" SFSBSC9\;" S?SG9\;" SHSBSISJ9\;" SKSBSISJ9\;" SLSBSISJ9SM.5      r<\%SZS\64SN jj5       r=\%SZS\64SO jj5       r>\%SZS\64SP jj5       r?\%SZS\64SQ jj5       r@\%SZS\64SR jj5       rA\%SZS\64SS jj5       rB\%SZS\64ST jj5       rC\%SZS\64SU jj5       rDg)[a  Multi-Scale Vision Transformer v2

@inproceedings{li2021improved,
  title={MViTv2: Improved multiscale vision transformers for classification and detection},
  author={Li, Yanghao and Wu, Chao-Yuan and Fan, Haoqi and Mangalam, Karttikeya and Xiong, Bo and Malik, Jitendra and Feichtenhofer, Christoph},
  booktitle={CVPR},
  year={2022}
}

Code adapted from original Apache 2.0 licensed impl at https://github.com/facebookresearch/mvit
Original copyright below.

Modifications and timm support by / Copyright 2022, Ross Wightman
    N)OrderedDict)	dataclass)partialreduce)UnionListTupleOptionalAnyType)nnIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)MlpDropPathcalculate_drop_path_ratestrunc_normal_tf_get_norm_layer	to_2tuple   )build_model_with_cfg)feature_take_indices)register_notrace_function)
checkpoint)register_modelgenerate_default_cfgsMultiScaleVitMultiScaleVitCfgc                      \ rS rSr% Sr\\S4   \S'   Sr\	\\\S4   4   \S'   Sr
\	\\\S4   4   \S'   S	r\\S
'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\\4   \S'   Sr\\\\\4         \S'   Sr\\\\\4         \S'   Sr\\\\4      \S'   Sr\\\4   \S'   Sr\\\4   \S'   Sr\\\4   \S '   S!r\\S"'   S#r\\S$'   S%r \	\\\\4   4   \S&'   S'r!\	\\\\4   4   \S('   S)r"\\S*'   S+ r#S,r$g)-r   $            r$   .depths`   	embed_dimr   	num_heads      @	mlp_ratioF
pool_firstTexpand_attnqkv_biasuse_cls_tokenuse_abs_posresidual_poolingconvmoder$   r$   
kernel_qkv)r   r   r#   r#   r7   r7   stride_qN	stride_kv   r;   stride_kv_adaptive   r>   patch_kernelpatch_stridepatch_paddingmax	pool_typespatialrel_pos_typegelu	act_layer	layernorm
norm_layergư>norm_epsc           	      P  ^  [        T R                  5      n[        T R                  [        [
        45      (       d#  [	        U 4S j[        U5       5       5      T l        [        T R                  5      U:X  d   e[        T R                  [        [
        45      (       d#  [	        U 4S j[        U5       5       5      T l        [        T R                  5      U:X  d   eT R                  b  T R                  c  T R                  n/ n[        U5       H|  n[        T R                  U   5      S:  aC  [        [        U5      5       Vs/ s H$  n[        X%   T R                  U   U   -  S5      PM&     nnUR                  [	        U5      5        M~     [	        U5      T l	        g g g s  snf )Nc              3   H   >#    U  H  nTR                   S U-  -  v   M     g7fr#   N)r(   .0iselfs     Q/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/models/mvitv2.py	<genexpr>1MultiScaleVitCfg.__post_init__.<locals>.<genexpr>A         "VDUq4>>AF#:DU   "c              3   H   >#    U  H  nTR                   S U-  -  v   M     g7frM   )r)   rN   s     rR   rS   rT   E   rU   rV   r   )lenr&   
isinstancer(   tuplelistranger)   r<   r9   minr8   rB   append)rQ   
num_stages
_stride_kvpool_kv_striderP   ds   `     rR   __post_init__MultiScaleVitCfg.__post_init__>   sY   %
$..5$-88""VE*DU"VVDN4>>"j000$..5$-88""VE*DU"VVDN4>>"j000"".4>>3I00JN:&t}}Q'(1, "'s:!7"!7A JMT]]1-=a-@@!D!7  " %%eJ&78 ' #>2DN 4J.
"s   +F#)r(   r)   r9   )%__name__
__module____qualname____firstlineno__r&   r	   int__annotations__r(   r   r)   r+   floatr,   boolr-   r.   r/   r0   r1   r3   strr5   r8   r
   r9   r<   r?   r@   rA   rC   rE   rG   rI   rJ   rc   __static_attributes__     rR   r   r   $   s   +FE#s(O+-/IuS%S/)*/-.IuS%S/)*.IuJKHdM4K!d!D#"(Jc3h(1QHhuU38_-.Q26IxeCHo./64:sCx1:$*L%S/*$*L%S/*%+M5c?+Is!L#!-3IuS%S/)*3.9Jc5c?*+9He3rp   c                 8    [        [        R                  U S5      $ )Nr   )r   operatormul)iterables    rR   prodru   U   s    (,,!,,rp   c                      ^  \ rS rSrSr       SS\S\S\\\4   S\\\4   S\\\4   4
U 4S jjjrS	\\R                  \
\   4   4S
 jrSrU =r$ )
PatchEmbedY   z
PatchEmbed.
dim_indim_outkernelstridepaddingc                 l   > [         T	U ]  5         XgS.n[        R                  " UU4UUUS.UD6U l        g )Ndevicedtype)kernel_sizer|   r}   )super__init__r   Conv2dproj)
rQ   ry   rz   r{   r|   r}   r   r   dd	__class__s
            rR   r   PatchEmbed.__init__^   sH     	/II
 
 
	rp   returnc                     U R                  U5      nUR                  S5      R                  SS5      UR                  SS  4$ )Nr#   r   )r   flatten	transposeshaperQ   xs     rR   forwardPatchEmbed.forwardt   s9    IIaLyy|%%a+QWWRS\99rp   )r   )r$   i   r=   r:   r4   NN)re   rf   rg   rh   __doc__ri   r	   r   torchTensorr   r   rn   __classcell__r   s   @rR   rw   rw   Y   s     &,&,'-

 
 #s(O	

 #s(O
 38_
 
,:E%,,S	"9: : :rp   rw   T	feat_sizehas_cls_tokenr   c                     Uu  p4U(       a"  U S S 2S S 2S S2S S 24   U S S 2S S 2SS 2S S 24   pOS nU R                  SX4U R                  S   5      R                  SSSS5      R                  5       n X4$ )Nr   r   r$   r#   )reshaper   permute
contiguous)r   r   r   HWcls_toks         rR   reshape_pre_poolr   z   st     DAq!RaR{^Qq!QR{^			"aAGGBK(00Aq!<GGIA:rp   r)   r   c                    U R                   S   U R                   S   /nU R                   S   U R                   S   -  nU R                  SXR                   S   U5      R                  SS5      n Ub  [        R                  " X 4SS9n X4$ )Nr#   r$   r   r   dim)r   r   r   r   cat)r   r)   r   r   L_pooleds        rR   reshape_post_poolr      s     QWWQZ(IwwqzAGGAJ&H			"iX6@@AFAIIwl*<rp   attnqq_sizek_size	rel_pos_h	rel_pos_wc                    U(       a  SOSnUu  pUu  p[        X-  S5      n[        X-  S5      n[        R                  " XR                  [        R                  S9R                  S5      U-  [        R                  " XR                  [        R                  S9R                  S5      U-  -
  nXS-
  U-  -  n[        X-  S5      n[        X-  S5      n[        R                  " XR                  [        R                  S9R                  S5      U-  [        R                  " XR                  [        R                  S9R                  S5      U-  -
  nUUS-
  U-  -  nX^R	                  5          nUUR	                  5          nUR                  u  nnnnUSS2SS2US24   R                  UUXU5      n[        R                  " SUU5      n[        R                  " SUU5      nU SS2SS2US2US24   R                  USXX5      UR                  S5      -   UR                  S	5      -   R                  USX-  X-  5      U SS2SS2US2US24'   U $ )
z)
Spatial Relative Positional Embeddings.
r   r   g      ?r   r   Nzbyhwc,hkc->byhwkzbyhwc,wkc->byhwkr   )
rB   r   aranger   long	unsqueezer   r   einsumview)r   r   r   r   r   r   r   sp_idxq_hq_wk_hk_w	q_h_ratio	k_h_ratiodist_h	q_w_ratio	k_w_ratiodist_wrel_hrel_wBn_headq_Nr   r_qs                            rR   cal_rel_pos_typer      s"     QQFHCHC CIs#ICIs#IS<FFrJYVS<FFqIIU	V  Qw)##FCIs#ICIs#IS<FFrJYVS<FFqIIU	V  sQw)##Fkkm$Efkkm$E''AvsC
Aq&'M

"
"1fc
<CLL+S%8ELL+S%8E 	Q67FG#$))!R3D
//"
	
//"
	 d1b#)SY'	 	Avw	  Krp   c                      ^  \ rS rSrSSSSSSSSSS\R
                  SS4S\S	\S
\\\4   S\S\S\	S\\\4   S\\\4   S\\\4   S\\\4   S\S\	S\S\
\R                     4U 4S jjjrS
\\   4S jrSrU =r$ )MultiScaleAttentionPoolFirst      Tr2   r6   rD   Nr   rz   r   r)   r.   r3   kernel_q	kernel_kvr8   r9   r   rE   r1   rI   c           	        > UUS.n[         TU ]  5         X@l        X l        X$-  U l        U R                  S-  U l        Xl        [        U Vs/ s H  n[        US-  5      PM     sn5      n[        U Vs/ s H  n[        US-  5      PM     sn5      n[        R                  " X4SU0UD6U l        [        R                  " X4SU0UD6U l        [        R                  " X4SU0UD6U l        [        R                  " X"40 UD6U l        [        U5      S:X  a  [        U	5      S:X  a  S n[        U5      S:X  a  [        U
5      S:X  a  S nX`l        US:H  U l        Su  U l        U l        U l        Su  U l        U l        U l        US;   a_  US	:X  a  [        R0                  O[        R2                  nU(       a  U" XyU5      U l        U(       a  U" XU5      U l        U" XU5      U l        OUS
:X  d  US:X  a  US
:X  a  X-  OUnU(       a1  [        R4                  " UUU4U	UUSS.UD6U l        U" U40 UD6U l        U(       ab  [        R4                  " UUU4U
UUSS.UD6U l        U" U40 UD6U l        [        R4                  " UUU4U
UUSS.UD6U l        U" U40 UD6U l        O[7        SU 35      eXl        U R8                  S:X  a  US   US   :X  d   eUS   n[;        U	5      S:  a  UU	S   -  OUn[;        U
5      S:  a  UU
S   -  OUnS[=        UU5      -  S-
  n[        R>                  " [@        RB                  " UU R                  40 UD65      U l"        [        R>                  " [@        RB                  " UU R                  40 UD65      U l#        [I        U RD                  SS9  [I        U RF                  SS9  Xl%        g s  snf s  snf )Nr         r#   biasr   conv_unsharedNNNavgrB   rB   r2   Fr|   r}   groupsr   Unsupported model rD   r   {Gz?std)&r   r   r)   rz   head_dimscaler   rZ   ri   r   Linearr   kvr   ru   r3   unsharedpool_qpool_kpool_vnorm_qnorm_knorm_v	MaxPool2d	AvgPool2dr   NotImplementedErrorrE   rX   rB   	Parameterr   zerosr   r   r   r1   rQ   r   rz   r   r)   r.   r3   r   r   r8   r9   r   rE   r1   rI   r   r   r   r   	padding_qkv
padding_kvpool_opdim_convsizer   kv_size
rel_sp_dimr   s                               rR   r   %MultiScaleAttentionPoolFirst.__init__   s   & /",]]d*
*913qAv;9:	9=9RCaL9=>
3=h="=3=h="=3=h="=IIg5"5	 >Q4>Q#6H	?aDOq$8I	/0@-T[$+0@-T[$+>!&*embllG%h)D%iJG%iJGV^t6+/6>s'sH ii	 $%#	 	 )8R8 ii	 %&#	 	 )8R8 ii	 %&#	 	 )8R8%(:4&&ABB )	)Q<9Q<///Q<D,/MA,=TXa[(4F.1)nq.@dil*dGS11A5J\\%++j$--*VSU*VWDN\\%++j$--*VSU*VWDNT^^6T^^6 0] :=s   O5Oc           	         UR                   u  p4nU R                  (       a  SOU R                  nUR                  X4US5      R	                  SSSS5      nU=n=pU R
                  bB  [        XrU R                  5      u  pzU R                  U5      n[        XpR                  U
5      u  p{OUnU R                  b  U R                  U5      nU R                  bB  [        XU R                  5      u  pU R                  U5      n[        XR                  U5      u  pOUnU R                  b  U R                  U5      nU R                  bB  [        XU R                  5      u  pU R                  U	5      n	[        XR                  U5      u  pOUnU R                  b  U R                  U	5      n	US   US   -  [        U R                  5      -   nUR                  SS5      R                  UUS5      nU R!                  U5      R                  UUU R                  S5      R                  SS5      nUS   US   -  [        U R                  5      -   nUR                  SS5      R                  UUS5      nU R#                  U5      R                  UUU R                  S5      nUS   US   -  [        U R                  5      -   nU	R                  SS5      R                  UUS5      n	U R%                  U	5      R                  UUU R                  S5      R                  SS5      n	XpR&                  -  U-  nU R(                  S:X  a/  [+        UUU R                  UUU R,                  U R.                  5      nUR1                  SS9nUU	-  nU R2                  (       a  X-   nUR                  SS5      R                  USU R4                  5      nU R7                  U5      nX4$ )Nr   r   r   r#   r$   rD   r   )r   r   r)   r   r   r   r   r   r   r   r   r   r   r   ri   r   r   r   r   r   rE   r   r   r   softmaxr1   rz   r   )rQ   r   r   r   N_fold_dimr   r   r   q_tokr   k_tokr   v_tokv_sizer   k_Nv_Nr   s                       rR   r   $MultiScaleAttentionPoolFirst.forward5  sP   ''a14>>IIaHb)11!Q1=A;;"'d6H6HIHAAA)!^^UCIAvF;;"AA;;"'d6H6HIHAAA)!^^UCIAvF;;"AA;;"'d6H6HIHAAA)!^^UCIAvF;;"AAQi&)#c$*<*<&==KK1%%ab1FF1Iadnnb9CCAqIQi&)#c$*<*<&==KK1%%ab1FF1Iadnnb9Qi&)#c$*<*<&==KK1%%ab1FF1Iadnnb9CCAqIJJ!#	)#""D |||#1H  AKK1%%aT\\:IIaLyrp   )rz   r   r   r   r3   r   r   r   r)   r   r   r   r   r   r   rE   r   r1   r   r   r   re   rf   rg   rh   r   	LayerNormri   r	   rl   rm   r   Moduler   r   r   rn   r   r   s   @rR   r   r      s    !(.)/(.)/"& )%)*,,,#h1h1 h1 S#X	h1
 h1 h1 h1 CHoh1 S#Xh1 CHoh1 S#Xh1  h1 h1 #h1 RYYh1 h1TBDI B Brp   r   c                      ^  \ rS rSrSSSSSSSSSS\R
                  SS4S\S	\S
\\\4   S\S\S\	S\\\4   S\\\4   S\\\4   S\\\4   S\S\	S\S\
\R                     4U 4S jjjrS
\\   4S jrSrU =r$ )MultiScaleAttentioniz  r   Tr2   r6   rD   Nr   rz   r   r)   r.   r3   r   r   r8   r9   r   rE   r1   rI   c           	      (  > UUS.n[         TU ]  5         X@l        X l        X$-  U l        U R                  S-  U l        Xl        [        U Vs/ s H  n[        US-  5      PM     sn5      n[        U Vs/ s H  n[        US-  5      PM     sn5      n[        R                  " XS-  4SU0UD6U l        [        R                  " X"40 UD6U l        [        U5      S:X  a  [        U	5      S:X  a  S n[        U5      S:X  a  [        U
5      S:X  a  S nX`l        US:H  U l        Su  U l        U l        U l        Su  U l        U l        U l        US	;   a_  US
:X  a  [        R,                  O[        R.                  nU(       a  U" XyU5      U l        U(       a  U" XU5      U l        U" XU5      U l        OUS:X  d  US:X  a  US:X  a  X$-  OUnU(       a1  [        R0                  " UUU4U	UUSS.UD6U l        U" U40 UD6U l        U(       ab  [        R0                  " UUU4U
UUSS.UD6U l        U" U40 UD6U l        [        R0                  " UUU4U
UUSS.UD6U l        U" U40 UD6U l        O[3        SU 35      eXl        U R4                  S:X  a  US   US   :X  d   eUS   n[7        U	5      S:  a  UU	S   -  OUn[7        U
5      S:  a  UU
S   -  OUnS[9        UU5      -  S-
  n[        R:                  " [<        R>                  " UU R                  40 UD65      U l         [        R:                  " [<        R>                  " UU R                  40 UD65      U l!        [E        U R@                  SS9  [E        U RB                  SS9  Xl#        g s  snf s  snf )Nr   r   r#   r$   r   r   r   r   r   rB   r2   Fr   r   rD   r   r   r   )$r   r   r)   rz   r   r   r   rZ   ri   r   r   qkvr   ru   r3   r   r   r   r   r   r   r   r   r   r   r   rE   rX   rB   r   r   r   r   r   r   r1   r   s                               rR   r   MultiScaleAttention.__init__{  s   & /",]]d*
*913qAv;9:	9=9RCaL9=>
99SA+CHCCIIg5"5	 >Q4>Q#6H	?aDOq$8I	/0@-T[$+0@-T[$+>!&*embllG%h)D%iJG%iJGV^t6/3v~w+7H ii	 $%#	 	 )8R8 ii	 %&#	 	 )8R8 ii	 %&#	 	 )8R8%(:4&&ABB )	)Q<9Q<///Q<D,/MA,=TXa[(4F.1)nq.@dil*dGS11A5J\\%++j$--*VSU*VWDN\\%++j$--*VSU*VWDNT^^6T^^6 0Y :=s   N
5Nc           	         UR                   u  p4nU R                  U5      R                  X4SU R                  S5      R	                  SSSSS5      nUR                  SS9u  pxn	U R                  bB  [        XrU R                  5      u  pzU R                  U5      n[        XpR                  U
5      u  p{OUnU R                  b  U R                  U5      nU R                  bB  [        XU R                  5      u  pU R                  U5      n[        XR                  U5      u  pOUnU R                  b  U R                  U5      nU R                  bA  [        XU R                  5      u  pU R                  U	5      n	[        XR                  U5      u  pU R                  b  U R                  U	5      n	XpR                  -  UR!                  SS5      -  nU R"                  S	:X  a/  [%        UUU R                  UUU R&                  U R(                  5      nUR+                  SS9nX-  nU R,                  (       a  X-   nUR!                  SS5      R                  USU R.                  5      nU R1                  U5      nX4$ )
Nr$   r   r#   r   r   r;   r   r   rD   )r   r  r   r)   r   unbindr   r   r   r   r   r   r   r   r   r   r   rE   r   r   r   r   r1   rz   r   )rQ   r   r   r   r   r   r  r   r   r   r   r   r   r   r   r   s                   rR   r   MultiScaleAttention.forward  s   ''ahhqk!!!4>>2>FFq!QPQSTU***#a;;"'d6H6HIHAAA)!^^UCIAvF;;"AA;;"'d6H6HIHAAA)!^^UCIAvF;;"AA;;"'d6H6HIHAAA$Q>DA;;"AAJJ!++b""55	)#""D |||#H  AKK1%%aT\\:IIaLyrp   )rz   r   r   r3   r   r   r   r)   r   r   r   r   r  r   rE   r   r1   r   r   r  r   s   @rR   r  r  z  s    !(.)/(.)/"& )%)*,,,#f1f1 f1 S#X	f1
 f1 f1 f1 CHof1 S#Xf1 CHof1 S#Xf1  f1 f1 #f1 RYYf1 f1P3DI 3 3rp   r  c            %          ^  \ rS rSrSSS\R
                  SSSSSSSSSSS	S	4S
\S\S\S\\\4   S\S\	S\S\
\R                     S\\\4   S\\\4   S\\\4   S\\\4   S\S\	S\	S\	S\S\	4$U 4S jjjrS\\   4S jrS\\   4S jrSrU =r$ ) MultiScaleBlocki  r*   T        r6   r2   FrD   Nr   rz   r)   r   r+   r.   	drop_pathrI   r   r   r8   r9   r3   r   r-   r,   rE   r1   c                   > UUS.n[         TU ]  5         X:g  nXl        X l        Xl        U" U40 UD6U l        U(       a  U(       a  [        R                  " X40 UD6OS U l        U(       aj  [        U5      S:  a[  U Vs/ s H  nUS:  a  US-   OUPM     nnUnU Vs/ s H  n[        US-  5      PM     nn[        R                  " UUU5      U l        OS U l        U(       a  UOUnU(       a  [        O[        nU" UU4UUUU	U
UUUUUUUS.UD6U l        US:  a  [!        U5      O[        R"                  " 5       U l        U" U40 UD6U l        UnU(       a  U(       d  [        R                  " X40 UD6OS U l        [+        SU[        UU-  5      US.UD6U l        US:  a  [!        U5      U l        g [        R"                  " 5       U l        g s  snf s  snf )Nr   r   r#   )r)   r   r.   r   r   r8   r9   rI   r   r3   rE   r1   r  )in_featureshidden_featuresout_featuresro   )r   r   r   rz   r   norm1r   r   shortcut_proj_attnru   ri   r   shortcut_pool_attnr   r  r   r   Identity
drop_path1norm2shortcut_proj_mlpr   mlp
drop_path2) rQ   r   rz   r)   r   r+   r.   r  rI   r   r   r8   r9   r3   r   r-   r,   rE   r1   r   r   r   proj_neededskernel_skipstride_skipskippadding_skipatt_dim
attn_layermlp_dim_outr   s                                   rR   r   MultiScaleBlock.__init__  s   . /n**r*
CNS^"))C"?B"?dhX*:BC(QAE1q5q0(KC"K7BC{tC	N{LC&(ll;\&ZD#&*D#('c5?1EX

  !'%-
 
	" 2;S(9-bkkm.2.
BMVa3!>2!>gk 
) 34$
 	
 2;S(9-bkkmK DCs   ?GGc                    U R                   c  U$ U R                  (       a  US S 2S S2S S 24   US S 2SS 2S S 24   pOS nUR                  u  pEnUu  pxUR                  XGX5      R	                  SSSS5      R                  5       nU R                  U5      nUR                  XFS5      R                  SS5      nUb  [        R                  " X14SS9nU$ )Nr   r   r$   r#   r   r   )	r  r   r   r   r   r   r   r   r   )	rQ   r   r   r   r   LCr   r   s	            rR   _shortcut_poolMultiScaleBlock._shortcut_poolc  s    ""*H1bqb!8a12qkQG''aIIaA!))!Q15@@B##A&IIaB))!Q/		7,A.Arp   c                 ~   U R                  U5      nU R                  c  UOU R                  U5      nU R                  XB5      nU R                  X25      u  pX@R	                  U5      -   nU R                  U5      nU R                  c  UOU R                  U5      nX@R                  U R                  U5      5      -   nX4$ N)	r  r  r+  r   r  r  r  r  r  )rQ   r   r   x_norm
x_shortcutfeat_size_news         rR   r   MultiScaleBlock.forwards  s    A119Qt?V?VW]?^
((?
99V7++A008Qd>T>TU[>\
&)9::rp   )r   r   rz   r  r  r   r  r  r  r  r  r  )re   rf   rg   rh   r   r  ri   r	   rk   rl   r   r  rm   r   r   r+  r   rn   r   r   s   @rR   r  r    ss     #!"*,,,(.)/(.)/"& %$ )%)+GTGT GT 	GT
 S#XGT GT GT GT RYYGT CHoGT S#XGT CHoGT S#XGT GT  GT  !GT" #GT$ %GT& #'GT GTR49   DI    rp   r  c            *          ^  \ rS rSrSSSSSSSSSSSS\R
                  SS	S	4S
\S\S\S\S\\\4   S\S\	S\\\4   S\\\4   S\\\4   S\\\4   S\
S\	S\	S\	S\
S\	S\\R                     S\\\\   4   4&U 4S jjjrS\\   4S jrSrU =r$ ) MultiScaleVitStagei  r*   Tr6   r2   FrD   r  Nr   rz   depthr)   r   r+   r.   r   r   r8   r9   r3   r   r-   r,   rE   r1   rI   r  c                 b  > UUS.n[         TU ]  5         SU l        [        R                  " 5       U l        U(       a  U4U-  nOU4US-
  -  U4-   n[        U5       H  n[        S0 SU_SUU   _SU_SU_SU_S	U_S
U_SU	_SUS:X  a  U
OS_SU_SU_SU_SU_SU_SU_SU_SU_S[        U[        [        45      (       a  UU   OU_UD6nUU   nU R
                  R                  U5        US:X  d  M  [        [        XZ5       VVs/ s H  u  nnUU-  PM     snn5      nM     XPl        g s  snnf )Nr   Fr   r   rz   r)   r   r+   r.   r   r   r8   r   r6   r9   r3   r   r,   rE   r1   r-   rI   r  ro   )r   r   grad_checkpointingr   
ModuleListblocksr\   r  rY   r[   rZ   r^   zipr   )rQ   r   rz   r5  r)   r   r+   r.   r   r   r8   r9   r3   r   r-   r,   rE   r1   rI   r  r   r   r   out_dimsrP   attention_blockr   r|   r   s                               rR   r   MultiScaleVitStage.__init__  s   0 /"'mmozE)Hv+wj8HuA-   $ $	
 $ " " $ &'!V $  , & * "2  (!" &#$ +5Yu*N*N)A,T]'O* 1+CKK/Av!c)F^"_F^ldF46>F^"_`	3 6 # #`s   D+c                     U R                    HQ  nU R                  (       a3  [        R                  R	                  5       (       d  [        X1U5      u  pMG  U" X5      u  pMS     X4$ r.  )r9  r7  r   jitis_scriptingr   )rQ   r   r   blks       rR   r   MultiScaleVitStage.forward  sS    ;;C&&uyy/E/E/G/G)#)<9"109	 
 |rp   )r9  r   r7  )re   rf   rg   rh   r   r  ri   r	   rk   rl   rm   r   r  r   r   r   r   rn   r   r   s   @rR   r4  r4    sS     #!(.)/(.)/"& %$ )%)*,,,36-=#=# =# 	=#
 =# S#X=# =# =# CHo=# S#X=# CHo=# S#X=# =#  =# =#  !=#" #=#$ #%=#& RYY'=#( UDK/0)=# =#~DI  rp   r4  c                     ^  \ rS rSrSr        S!S\S\\\4   S\S\\	   S\S\
S	\
4U 4S
 jjjrS r\R                  R                  S 5       r\R                  R                  S"S j5       r\R                  R                  S#S j5       r\R                  R                  S\R(                  4S j5       rS$S\S\\	   4S jjr     S%S\R.                  S\\\\\   4      S\S\S\	S\S\\\R.                     \\R.                  \\R.                     4   4   4S jjr   S&S\\\\   4   S\S\4S jjrS rS"S\4S jjrS rS r U =r!$ )'r   i  a  
Improved Multiscale Vision Transformers for Classification and Detection
Yanghao Li*, Chao-Yuan Wu*, Haoqi Fan, Karttikeya Mangalam, Bo Xiong, Jitendra Malik,
    Christoph Feichtenhofer*
https://arxiv.org/abs/2112.01526

Multiscale Vision Transformers
Haoqi Fan*, Bo Xiong*, Karttikeya Mangalam*, Yanghao Li*, Zhicheng Yan, Jitendra Malik,
    Christoph Feichtenhofer*
https://arxiv.org/abs/2104.11227
cfgimg_sizein_chansglobal_poolnum_classesdrop_path_rate	drop_ratec
                 t	  > [         TU ]  5         XS.n
[        U5      n[        [	        UR
                  5      UR                  S9nXPl        X0l        Xpl	        Uc  UR                  (       a  SOSnX@l        [        UR                  5      U l        UR                  U l        UR                  S   n[!        S#UUUR"                  UR$                  UR&                  S.U
D6U l        US   UR$                  S   -  US   UR$                  S   -  4n[+        U5      nUR                  (       a?  [,        R.                  " [0        R2                  " SSU40 U
D65      U l        SU l        US-   nOSU l        S U l        UnUR8                  (       a2  [,        R.                  " [0        R2                  " SX40 U
D65      U l        OS U l        [=        UR                  5      nUn[?        UR$                  5      n[A        XaR                  SS	9n[,        RB                  " 5       U l"        / U l#        [I        U5       GH  nUR                  (       a  UR                  U   nOUR                  [K        US-   US-
  5         n[M        S#0 S
U_SU_SUR                  U   _SURN                  U   _SU_SURP                  _SURR                  _SURT                  _SURV                  _SUR                  _SURX                  _SURX                  _SURZ                  U   _SUR\                  U   _SUR                  _SUR^                  _SUR`                  _SU_SUU   _U
D6nU[?        URZ                  U   5      -  nU =RF                  [c        SU 3UUS9/-  sl#        UnURd                  nU RD                  Rg                  U5        GM     U=U l4        U l5        U" U40 U
D6U l6        [,        Rn                  " [q        S[,        Rr                  " U R                  5      4S US:  a"  [,        Rt                  " U Rh                  U40 U
D6O[,        Rv                  " 5       4/5      5      U l<        U R:                  b  [{        U R:                  S!S"9  U R4                  b  [{        U R4                  S!S"9  U R}                  U R~                  5        g )$Nr   )epstokenr   r   )ry   rz   r{   r|   r}   r   T)	stagewiser   rz   r5  r)   r   r+   r.   r3   r,   r-   r   r   r8   r9   r   rE   r1   rI   r  zblock.)modulenum_chs	reductiondropfcr   r   ro   )@r   r   r   r   r   rI   rJ   rH  rF  rJ  r/   rG  rZ   r&   r-   r(   rw   r?   r@   rA   patch_embedru   r   r   r   r   	cls_tokennum_prefix_tokensr0   	pos_embedrX   rB   r   r8  stagesfeature_infor\   r]   r4  r)   r+   r.   r3   r,   r5   r8   r9   rE   r1   dictr   r^   num_featureshead_hidden_sizenorm
Sequentialr   Dropoutr   r  headr   apply_init_weights)rQ   rD  rE  rF  rG  rH  rI  rJ  r   r   r   rI   r(   
patch_dimsnum_patchespos_embed_dimr_   r   curr_stridedprrP   rz   stager   s                          rR   r   MultiScaleVit.__init__  sV    	/X&^CNN;N
& "%(%6%6'EK&CJJ'??MM!$	% 
####%%
 
 qkS%5%5a%88(1+IYIYZ[I\:\]
:&\\%++aI*L*LMDN%&D"'!OM%&D"!DN'M??\\%++a*XUW*XYDN!DN'
	#**+'

dSmmoz"A--*--AE:>(BC&  jjm --*	
 $ --  XX >>  OO  .. a --* "//  !--!" "%!5!5#$ &%& a&)E, 3s||A//K$qclGWb"c!ddIIKKu%? #B 5>=D1y/B/	MM+RZZ/0kTUo299T..BrB[][f[f[hi/
 # 	
 >>%T^^6>>%T^^6

4%%&rp   c                    [        U[        R                  5      (       am  [        UR                  SS9  [        U[        R                  5      (       a9  UR
                  b+  [        R                  R                  UR
                  S5        g g g g )Nr   r   r  )rY   r   r   r   weightr   init	constant_)rQ   ms     rR   rb  MultiScaleVit._init_weights?  s`    a##QXX40!RYY''AFF,>!!!&&#. -?' $rp   c                    ^ U R                  5        V^Vs1 s H$  u  mn[        U4S jS 5       5      (       d  M"  TiM&     snn$ s  snnf )Nc              3   ,   >#    U  H	  oT;   v   M     g 7fr.  ro   )rO   nr   s     rR   rS   0MultiScaleVit.no_weight_decay.<locals>.<genexpr>H  s     \'[!Av'[s   )rW  r   r   rU  )named_parametersany)rQ   r   r   s    ` rR   no_weight_decayMultiScaleVit.no_weight_decayE  sH    "335 ^5da\'[\\ 5 ^ 	^ ^s
   !AAc                     [        SSS/S9nU$ )Nz^patch_embed)z^stages\.(\d+)N)z^norm)i )stemr9  )rZ  )rQ   coarsematchers      rR   group_matcherMultiScaleVit.group_matcherJ  s     -/CD
 rp   c                 6    U R                    H	  nXl        M     g r.  )rX  r7  )rQ   enabler  s      rR   set_grad_checkpointing$MultiScaleVit.set_grad_checkpointingR  s    A#)  rp   r   c                 .    U R                   R                  $ r.  )r`  rS  )rQ   s    rR   get_classifierMultiScaleVit.get_classifierW  s    yy||rp   c                 h   Xl         Ub  X l        [        U R                  R                  S5      (       a*  U R                  R                  R
                  R                  OS n[        U R                  R                  S5      (       a*  U R                  R                  R
                  R                  OS n[        R                  " [        S[        R                  " U R                  5      4SUS:  a   [        R                  " U R                  XUS9O[        R                  " 5       4/5      5      U l        g )Nrk  rR  rS  r   r   )rH  rG  hasattrr`  rS  rk  r   r   r   r^  r   r_  rJ  r   r[  r  )rQ   rH  rG  r   r   s        rR   reset_classifierMultiScaleVit.reset_classifier[  s    &"*/6tyy||X/N/N$$++TX-4TYY\\8-L-L		##))RVMM+RZZ/0\gjk\k299T..RWXqsq|q|q~/
 # 	rp   r   indicesr]  
stop_early
output_fmtintermediates_onlyc                 2   US;   d   S5       eUS:H  n/ n[        [        U R                  5      U5      u  pU R                  U5      u  pUR                  S   nU R
                  b3  U R
                  R                  USS5      n[        R                  " X4SS9nU R                  b  XR                  -   n[        U R                  5      S-
  n[        U R                  5       H  u  nnU" X5      u  pX;   d  M  U(       a  X:X  a  U R                  U5      nOUnU(       aC  U R
                  b  USS2SS24   nUR                  XS   US   S5      R                  SS	SS
5      nUR                  U5        M     U(       a  U$ WU:X  a  U R                  U5      nX4$ )a  Forward features that returns intermediates.

Args:
    x: Input image tensor
    indices: Take last n blocks if int, all if None, select matching indices if sequence
    norm: Apply norm layer to all intermediates
    stop_early: Stop iterating over blocks when last desired intermediate hit
    output_fmt: Shape of intermediate feature outputs
    intermediates_only: Only return intermediate features
Returns:

)NCHWNLCz!Output shape must be NCHW or NLC.r  r   Nr   r   r   r$   r#   )r   rX   rX  rT  r   rU  expandr   r   rW  	enumerater]  r   r   r^   )rQ   r   r  r]  r  r  r  r   intermediatestake_indices	max_indexr   r   
cls_tokenslast_idxfeat_idxrh  x_inters                     rR   forward_intermediates#MultiScaleVit.forward_intermediatesf  s   * _,Q.QQ,&"6s4;;7G"Q ''*GGAJ>>%..q"b9J		:/q1A>>%NN"At{{#a'(5OHe .LA'H0"iilGG~~1")!QR%.%ooa1y|RPXXYZ\]_`bcdG$$W-  6   x		!Arp   
prune_norm
prune_headc                     [        [        U R                  5      U5      u  pEU(       a  [        R                  " 5       U l        U(       a  U R                  SS5        U$ )z?Prune layers not required for specified intermediates.
        r    )r   rX   rX  r   r  r]  r  )rQ   r  r  r  r  r  s         rR   prune_intermediate_layers'MultiScaleVit.prune_intermediate_layers  sG     #7s4;;7G"Q DI!!!R(rp   c                 \   U R                  U5      u  pUR                  u  p4nU R                  b3  U R                  R                  USS5      n[        R
                  " Xa4SS9nU R                  b  XR                  -   nU R                   H  nU" X5      u  pM     U R                  U5      nU$ )Nr   r   r   )	rT  r   rU  r  r   r   rW  rX  r]  )rQ   r   r   r   r   r*  r  rh  s           rR   forward_featuresMultiScaleVit.forward_features  s    ''*''a>>%..q"b9J		:/q1A>>%NN"A[[E .LAy ! IIaLrp   
pre_logitsc                     U R                   (       a>  U R                   S:X  a%  US S 2U R                  S 24   R                  S5      nO	US S 2S4   nU(       a  U$ U R                  U5      $ )Nr   r   r   )rG  rV  meanr`  )rQ   r   r  s      rR   forward_headMultiScaleVit.forward_head  s^    5(a//00166q9adGq0DIIaL0rp   c                 J    U R                  U5      nU R                  U5      nU$ r.  )r  r  r   s     rR   r   MultiScaleVit.forward  s'    !!!$a rp   )rU  r&   rJ  r-   rY  rG  r`  r\  rF  r]  rH  r[  rV  rT  rW  rX  ))   r  r$   N  r  r  NNFTr.  )NFFr  F)r   FT)"re   rf   rg   rh   r   r   r	   ri   r
   rm   rk   r   rb  r   r?  ignorerv  r|  r  r   r  r  r  r   r   r   rl   r  r  r  r  r   rn   r   r   s   @rR   r   r     s   
 )3)-#$&!e'!e' CHoe' 	e'
 "#e' e' "e' e' e'N/ YY^ ^ YY  YY* * YY		  	C 	hsm 	 8<$$',9 ||9  eCcN349  	9 
 9  9  !%9  
tELL!5tELL7I)I#JJ	K9 z ./$#	3S	>*  	""1$ 1 rp   c           
        ^ SU ;   a  U R                  5        H  nSU;   d  M  X   nUR                  5       U   R                  nUR                  S   US   :w  d  MD  [        R                  R
                  R                  UR                  SUR                  S   S5      R                  SSS5      US   SS9nUR                  SUS   5      R                  SS5      X'   M     U $ SS K	nS	U ;   a  U S	   n [        US
S 5      n[        USS5      nUc   S5       e0 mSn	[        U5       H;  u  pTR                  [        XU-   5       Vs0 s H	  oXU	-
  4_M     sn5        X-  n	M=     0 nU R                  5        Hf  u  p.UR                  SU4S jU5      nU(       a  UR                  SSU5      nOUR                  SSU5      nSU;   a  UR!                  SS5      nXU'   Mh     U$ s  snf )Nzstages.0.blocks.0.norm1.weightrel_posr   r   r   r#   linear)r   r3   model_stater&   r-   Tz3model requires depth attribute to remap checkpointszblocks\.(\d+)c           	         > ST[        U R                  S5      5         S    ST[        U R                  S5      5         S    3$ )Nzstages.r   r   z.blocks.)ri   group)r   	depth_maps    rR   <lambda>&checkpoint_filter_fn.<locals>.<lambda>  sH    	#aggaj/ :1 =>hyQTUVU\U\]^U_Q`GabcGdFefrp   z stages\.(\d+).blocks\.(\d+).projz&stages.\1.blocks.\2.shortcut_proj_attnz%stages.\1.blocks.\2.shortcut_proj_mlpr`  zhead.projectionhead.fc)keys
state_dictr   r   r   
functionalinterpolater   r   regetattrr  updater\   itemssubreplace)r  modelr   r  dest_rel_pos_shaperel_pos_resizedr  r&   r-   	block_idx	stage_idxrb   rP   out_dictr   r  s                  @rR   checkpoint_filter_fnr    s	   ':5"AA~$-%*%5%5%7%:%@%@"==#'9!'<<&+hh&9&9&E&E7==+;R@HHAqQ/2% 'F 'O
 %4$;$;B@RST@U$V$^$^_`bc$dJM # 
".
UHd+F%5KTTTII!&)	y^_R_A`aA`AiY77A`ab	 * H  "FFf
 :>fijkA:>ehijAQ;		+Y7A # O% bs   G#
)r   r#      r#   )r&   )r   r#      r#   r"   )r#      r!   r;      r#   F)r&   r(   r)   r-   )r&   r/   )r&   r(   r)   r/   r-   )r;   r   <   r      r$   )mvitv2_tinymvitv2_smallmvitv2_basemvitv2_largemvitv2_small_clsmvitv2_base_clsmvitv2_large_clsmvitv2_huge_clsc           
          UR                  SS5      n[        [        U U4U(       d	  [        U    O[        U   [        [        USS9S.UD6$ )Nout_indicesr;   getter)r  feature_cls)	model_cfgpretrained_filter_fnfeature_cfg)popr   r   
model_cfgsr  rZ  )variantcfg_variant
pretrainedkwargsr  s        rR   _create_mvitv2r  *  sW    **]A.K .9*W%j>U1[hG  rp   r  c                 4    U SSS SS[         [        SSSSS	.UE$ )
Nr  )r$   r  r  g?bicubiczpatch_embed.projr  Tz
apache-2.0)urlrH  
input_size	pool_sizecrop_pctinterpolationr  r   
first_conv
classifierfixed_input_sizelicenser   )r  r  s     rR   _cfgr  7  s6    =t%.B(	 	 	 	rp   zDhttps://dl.fbaipublicfiles.com/mvit/mvitv2_models/MViTv2_T_in1k.pythztimm/)r  	hf_hub_idzDhttps://dl.fbaipublicfiles.com/mvit/mvitv2_models/MViTv2_S_in1k.pythzDhttps://dl.fbaipublicfiles.com/mvit/mvitv2_models/MViTv2_B_in1k.pythzDhttps://dl.fbaipublicfiles.com/mvit/mvitv2_models/MViTv2_L_in1k.pyth)r  zEhttps://dl.fbaipublicfiles.com/mvit/mvitv2_models/MViTv2_B_in21k.pythiJ  )r  r  rH  zEhttps://dl.fbaipublicfiles.com/mvit/mvitv2_models/MViTv2_L_in21k.pythzEhttps://dl.fbaipublicfiles.com/mvit/mvitv2_models/MViTv2_H_in21k.pyth)zmvitv2_tiny.fb_in1kzmvitv2_small.fb_in1kzmvitv2_base.fb_in1kzmvitv2_large.fb_in1kr  zmvitv2_base_cls.fb_inw21kzmvitv2_large_cls.fb_inw21kzmvitv2_huge_cls.fb_inw21kc                     [        SSU 0UD6$ )Nr  )r  r  r  r  s     rR   r  r  _      IJI&IIrp   c                     [        SSU 0UD6$ )Nr  )r  r  r  s     rR   r  r  d      JZJ6JJrp   c                     [        SSU 0UD6$ )Nr  )r  r  r  s     rR   r  r  i  r  rp   c                     [        SSU 0UD6$ )Nr  )r  r  r  s     rR   r  r  n  r  rp   c                     [        SSU 0UD6$ )Nr  )r  r  r  s     rR   r  r  s      NNvNNrp   c                     [        SSU 0UD6$ )Nr  )r  r  r  s     rR   r  r  x      M
MfMMrp   c                     [        SSU 0UD6$ )Nr  )r  r  r  s     rR   r  r  }  r  rp   c                     [        SSU 0UD6$ )Nr  )r  r  r  s     rR   r  r    r  rp   r  r.  )NF)r  r  )Er   rr   collectionsr   dataclassesr   	functoolsr   r   typingr   r   r	   r
   r   r   r   r   	timm.datar   r   timm.layersr   r   r   r   r   r   _builderr   	_featuresr   _features_fxr   _manipulater   	_registryr   r   __all__r   ru   r  rw   ri   rl   r   r   r   r   r   r  r  r4  r   r  rZ  r  r  r  default_cfgsr  r  r  r  r  r  r  r  ro   rp   rR   <module>r     s    # ! % : :   A m m * + 3 # <.
/ -3 -3 -3`-: :B  #9  5<<%,,//0	   +/

 %,,'
 5<<c"#	
 
 /ll/<</ / S		/
 S	/ <</ <</ /dm299 m`\")) \~e bii e PG GTCBII CL,^   " ! "	 & % & %A'
T

 %R !%k$j %k !%S" #'S# "&S")& 6 J} J J K K K J} J J K K K OM O O N= N N OM O O N= N Nrp   