
    RЦi+A                        S r  SSKJrJrJrJr  SSKrSSKJr  SSK	J
r
Jr  SSKJrJrJrJrJrJrJr  SSKJr  SSKJr  SS	KJrJr  S
/r\ " S S\R8                  5      5       r " S S\R8                  5      r " S S\R8                  5      r " S S
\R8                  5      r SS jr!SS jr"\" \"" SS9\"" SS9\"" SS9S.5      r#\SS\ 4S jj5       r$\SS\ 4S jj5       r%\SS\ 4S jj5       r&g)a  ConViT Model

@article{d2021convit,
  title={ConViT: Improving Vision Transformers with Soft Convolutional Inductive Biases},
  author={d'Ascoli, St{'e}phane and Touvron, Hugo and Leavitt, Matthew and Morcos, Ari and Biroli, Giulio and Sagun, Levent},
  journal={arXiv preprint arXiv:2103.10697},
  year={2021}
}

Paper link: https://arxiv.org/abs/2103.10697
Original code: https://github.com/facebookresearch/convit, original copyright below

Modifications and additions for timm hacked together by / Copyright 2021, Ross Wightman
    )OptionalUnionTypeAnyNIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)DropPathcalculate_drop_path_ratestrunc_normal_
PatchEmbedMlp	LayerNormHybridEmbed   )build_model_with_cfg)register_notrace_module)register_modelgenerate_default_cfgsConVitc                      ^  \ rS rSr       SS\S\S\S\S\S\4U 4S jjjrS	 rS
 r	SS jr
S rS\S\R                  4S jrSrU =r$ )GPSA'   dim	num_headsqkv_bias	attn_drop	proj_droplocality_strengthc	                 p  > XxS.n	[         TU ]  5         X l        Xl        X-  n
U
S-  U l        X`l        [        R                  " XS-  4SU0U	D6U l        [        R                  " X4SU0U	D6U l	        [        R                  " U5      U l        [        R                  " X40 U	D6U l        [        R                  " SU40 U	D6U l        [        R                  " U5      U l        [        R                  " [         R"                  " U R                  40 U	D65      U l        [         R&                  " S0 U	D6U l        g )Ndevicedtype         bias   )r   r   r   r'   )super__init__r   r   scaler   nnLinearqkvDropoutr   projpos_projr   	Parametertorchonesgating_paramzerosrel_indices)selfr   r   r   r   r   r   r"   r#   ddhead_dim	__class__s              Q/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/models/convit.pyr)   GPSA.__init__)   s     /"#%
!2))Cq>x>2>39(9b9I.IIc-"-			!Y5"5I.LLDNN)Ib)IJ).)F2)F    c                    UR                   u  p#nU R                  b  U R                  R                   S   U:w  a  U R                  U5      U l        U R                  U5      nU R	                  U5      R                  X#U R                  X@R                  -  5      R                  SSSS5      nXV-  R                  SS5      R                  X#U5      nU R                  U5      nU R                  U5      nU$ )Nr   r   r%   r'   )shaper7   get_rel_indicesget_attentionr.   reshaper   permute	transposer0   r   )r8   xBNCattnr.   s          r<   forwardGPSA.forwardF   s    ''a#t'7'7'='=a'@A'E#33A6D!!!$FF1IaDNNA4GHPPQRTUWXZ[\X  A&..qQ7IIaLNN1r>   c                    UR                   u  p#nU R                  U5      R                  X#SU R                  X@R                  -  5      R	                  SSSSS5      nUS   US   pvU R
                  R                  USSS5      nU R                  U5      R	                  SSSS5      nXgR                  SS5      -  U R                  -  n	U	R                  SS9n	UR                  SS9nU R                  R                  SSSS5      n
S	[        R                  " U
5      -
  U	-  [        R                  " U
5      U-  -   nXR                  SS9R!                  S5      -  nU R#                  U5      nU$ )
Nr%   r   r'   r      r         ?)r@   r-   rC   r   rD   r7   expandr1   rE   r*   softmaxr5   viewr3   sigmoidsum	unsqueezer   )r8   rF   rG   rH   rI   r-   qk	pos_scorepatch_scoregatingrJ   s               r<   rB   GPSA.get_attentionQ   sU   ''aWWQZann9LMUUVWYZ\]_`bcd!ube1$$++Ar2r:	MM),44Q1a@	;;r2..$**<!))b)1%%"%-	""''2q!4U]]6**k9EMM&<QT]<]]R **2..~~d#r>   c                     U R                  U5      R                  S5      nU R                  R                  5       S S 2S S 2S4   S-  n[        R
                  " SXC45      UR                  S5      -  nU(       a  XS4$ U$ )Nr   rO         ?	nm,hnm->h)rB   meanr7   squeezer3   einsumsize)r8   rF   
return_mapattn_map	distancesdists         r<   get_attention_mapGPSA.get_attention_mapa   ss    %%a(--a0$$,,.q!Rx8B>	||K))>?)..QRBSS>!Kr>   c                    U R                   R                  R                  R                  [        R
                  " U R                  5      5        Sn[        U R                  S-  5      nUS-  S:X  a  US-
  S-  OUS-  n[        U5       H  n[        U5       H  nXBU-  -   nSU R                  R                  R                  US4'   SXC-
  -  U-  U R                  R                  R                  US4'   SXS-
  -  U-  U R                  R                  R                  US4'   M     M     U R                  R                  =R                  U R                  -  sl        g )Nr   r`   r%   r   rO   )r.   weightdatacopy_r3   eyer   intr   ranger1   r   )r8   locality_distancekernel_sizecenterh1h2positions          r<   
local_initGPSA.local_initj   s'     488!45$..B./*5/Q*>+/Q&KSTDT$BK(b 009;$$))(A+69:bk9JM^9^$$))(A+69:bk9JM^9^$$))(A+6	 ) % 	!!T%;%;;!r>   num_patchesreturnc                    [        US-  5      n[        R                  " SXS5      n[        R                  " U[        R                  S9R                  SS5      [        R                  " U[        R                  S9R                  SS5      -
  nUR                  X"5      nUR                  USS9R                  USS9nUS-  US-  -   nUR                  S5      US S 2S S 2S S 2S4'   UR                  S5      US S 2S S 2S S 2S4'   UR                  S5      US S 2S S 2S S 2S4'   U R                  R                  R                  nU R                  R                  R                  n	UR                  XS	9$ )
Nr`   r   r'   r#   rO   r   rQ   r%   r!   )rq   r3   r6   arangefloat32rU   repeatrepeat_interleaverX   r-   rm   r"   r#   to)
r8   r{   img_sizer7   indindxindyinddr"   r#   s
             r<   rA   GPSA.get_rel_indicesx   s<   {b()kk![qAXU]];@@BG,,xu}}=BB2qIJ 	 zz(-$$X1$5GGVWGXqy419$"&.."3Aq!QJ"&.."3Aq!QJ"&.."3Aq!QJ&&$$~~V~99r>   )r   r   r5   r   r   r1   r0   r   r-   r7   r*   r.   )   F        r   rR   NNF)__name__
__module____qualname____firstlineno__rq   boolfloatr)   rK   rB   rj   ry   r3   TensorrA   __static_attributes____classcell__r;   s   @r<   r   r   '   s    
 "!!')GG G 	G
 G G  %G G:	 <:3 :5<< : :r>   r   c                   `   ^  \ rS rSr      SS\S\S\S\S\4
U 4S jjjrSS jrS	 r	S
r
U =r$ )MHSA   r   r   r   r   r   c                 8  > XgS.n[         T
U ]  5         X l        X-  n	U	S-  U l        [        R
                  " XS-  4SU0UD6U l        [        R                  " U5      U l        [        R
                  " X40 UD6U l	        [        R                  " U5      U l
        g )Nr!   r$   r'   r&   )r(   r)   r   r*   r+   r,   qkvr/   r   r0   r   )r8   r   r   r   r   r   r"   r#   r9   r:   r;   s             r<   r)   MHSA.__init__   s     /"#%
99S'??B?I.IIc-"-	I.r>   c                 X   UR                   u  p4nU R                  U5      R                  X4SU R                  XPR                  -  5      R	                  SSSSS5      nUS   US   US   pnXxR                  SS5      -  U R                  -  n
U
R                  SS9R                  S5      n
[        US	-  5      n[        R                  " U[        R                  S
9R                  SS5      [        R                  " U[        R                  S
9R                  SS5      -
  nUR                  X5      nUR                  USS9R                  USS9nUS-  US-  -   nUS	-  nUR!                  U
R"                  U
R$                  5      n[        R&                  " SUU
45      U-  nU(       a  UU
4$ U$ )Nr'   r%   r   r   rN   rP   rO   rQ   r`   r~   ra   )r@   r   rC   r   rD   rE   r*   rT   rb   rq   r3   r   r   rU   r   r   r   r"   r#   rd   )r8   rF   rf   rG   rH   rI   r   rY   rZ   r.   rg   r   r   r   r   r   rh   ri   s                     r<   rj   MHSA.get_attention_map   s   ''ahhqk!!!4>>1;NOWWXY[\^_abdefa&#a&#a&aB++tzz9###+003qBw<XU]];@@BG,,xu}}=BB2qIJ 	 zz(-$$X1$5GGVWGXqy419$BJ	LL(..A	||K)X)>?!C>!Kr>   c                    UR                   u  p#nU R                  U5      R                  X#SU R                  X@R                  -  5      R	                  SSSSS5      nUR                  S5      u  pgnXgR                  SS5      -  U R                  -  n	U	R                  SS9n	U R                  U	5      n	X-  R                  SS5      R                  X#U5      nU R                  U5      nU R                  U5      nU$ )	Nr'   r%   r   r   rN   rP   rO   rQ   )r@   r   rC   r   rD   unbindrE   r*   rT   r   r0   r   )
r8   rF   rG   rH   rI   r   rY   rZ   r.   rJ   s
             r<   rK   MHSA.forward   s    ''ahhqk!!!4>>1;NOWWXY[\^_abdef**Q-aKKB''4::5|||#~~d#X  A&..qQ7IIaLNN1r>   )r   r   r0   r   r   r*   )r   Fr   r   NNr   )r   r   r   r   rq   r   r   r)   rj   rK   r   r   r   s   @r<   r   r      s`     "!!// / 	/
 / / /*0 r>   r   c                      ^  \ rS rSrSSSSS\R
                  \SSSS4S\S	\S
\S\	S\S\S\S\
\R                     S\
\R                     S\	S\4U 4S jjjrS rSrU =r$ )Block         @Fr   TrR   Nr   r   	mlp_ratior   r   r   	drop_path	act_layer
norm_layeruse_gpsar   c           	        > XS.n[         TU ]  5         U	" U40 UD6U l        Xl        U R                  (       a  [	        U4UUUUUS.UD6U l        O[        U4UUUUS.UD6U l        US:  a  [        U5      O[        R                  " 5       U l
        U	" U40 UD6U l        [        X-  5      n[        SUUUUS.UD6U l        g )Nr!   )r   r   r   r   r   )r   r   r   r   r   )in_featureshidden_featuresr   drop )r(   r)   norm1r   r   rJ   r   r
   r+   Identityr   norm2rq   r   mlp)r8   r   r   r   r   r   r   r   r   r   r   r   r"   r#   r9   mlp_hidden_dimr;   s                   r<   r)   Block.__init__   s      /*r*
 ==#!##"3 DI #!## DI 1:B),BKKM*r*
S_- 
*	

 
r>   c                     XR                  U R                  U R                  U5      5      5      -   nXR                  U R                  U R	                  U5      5      5      -   nU$ N)r   rJ   r   r   r   r8   rF   s     r<   rK   Block.forward   sK    tyyA788txx

1677r>   )rJ   r   r   r   r   r   )r   r   r   r   r+   GELUr   rq   r   r   r   Moduler)   rK   r   r   r   s   @r<   r   r      s      ""!!!)+*3!')0
0
 0
 	0

 0
 0
 0
 0
 BII0
 RYY0
 0
  %0
 0
d r>   r   c            )         ^  \ rS rSrSrSSSSSSS	S	S
SSSSSSS\SSSSS4S\S\S\S\S\S\S\S\S\S\	S\S\S\S\S\S\
\   S \\R                     S!\S"\S#\	4(U 4S$ jjjrS% r\R$                  R&                  S& 5       r\R$                  R&                  S1S' j5       r\R$                  R&                  S2S( j5       r\R$                  R&                  S)\R                  4S* j5       rS3S\S\
\   4S+ jjrS, rS1S-\	4S. jjrS/ rS0rU =r$ )4r   i  zHVision Transformer with support for patch or hybrid CNN input stage
          r'     tokeni      r   Fr   NrR   Tr   
patch_sizein_chansnum_classesglobal_pool	embed_dimdepthr   r   r   	drop_ratepos_drop_rateproj_drop_rateattn_drop_ratedrop_path_ratehybrid_backboner   local_up_to_layerr   use_pos_embedc                   > [         TU ]  5         UUS.nUS;   d   eXh-  nX@l        X0l        XPl        UU l        U=U l        =U l        U l        UU l	        UU l
        Ub  [        U4UUUS.UD6U l        O[        SUUUUS.UD6U l        U R                  R                  nUU l        [        R                   " ["        R$                  " SSU40 UD65      U l        [        R(                  " US9U l        U R                  (       aF  [        R                   " ["        R$                  " SUU40 UD65      U l        [/        U R,                  SS9  [1        X5      n[        R2                  " [5        U5       Vs/ s H  n[7        SUUU	U
UUUU   UUU:  US	.
UD6PM!     sn5      U l        U" U40 UD6U l        [=        US
SS9/U l        [        R(                  " U5      U l         US
:  a  [        RB                  " Xd40 UD6O[        RD                  " 5       U l#        [/        U R&                  SS9  U RI                  U RJ                  5        U RM                  5        H)  u  nn[O        US5      (       d  M  URQ                  5         M+     g s  snf )Nr!   ) avgr   )r   r   r   )r   r   r   r   r   )p{Gz?std)
r   r   r   r   r   r   r   r   r   r   r   head)num_chs	reductionmodulery   r   ))r(   r)   r   r   r   r   num_featureshead_hidden_sizer   r   r   r   patch_embedr   r{   r+   r2   r3   r6   	cls_tokenr/   pos_drop	pos_embedr   r   
ModuleListrr   r   blocksnormdictfeature_info	head_dropr,   r   r   apply_init_weightsnamed_moduleshasattrry   )r8   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r"   r#   r9   r{   dprinmr;   s                                r<   r)   ConVit.__init__  s}   2 	/2222	& &!2ENNND1DN!2*&* !!#	 
  D  *  !%!#	 
  D &&22&ekk!Q	&HR&HI

]3\\%++ai*VSU*VWDN$..c2'>mm U|%% $!  ##!((a&%.."3  $%% & y/B/	 ")qPQI.?JQBIIi;;TVT_T_Ta	dnn#.

4%%&&&(DAqq,'' )/%%s   2&Jc                    [        U[        R                  5      (       am  [        UR                  SS9  [        U[        R                  5      (       a9  UR
                  b+  [        R                  R                  UR
                  S5        g g g [        U[        R                  5      (       aU  [        R                  R                  UR
                  S5        [        R                  R                  UR                  S5        g g )Nr   r   r   rR   )	
isinstancer+   r,   r   rm   r&   init	constant_r   )r8   r   s     r<   r   ConVit._init_weights`  s    a##!((,!RYY''AFF,>!!!&&!, -?'2<<((GGaffa(GGahh, )r>   c                 
    SS1$ )Nr   r   r   r8   s    r<   no_weight_decayConVit.no_weight_decayi  s    [))r>   c                     [        SSS/S9$ )Nz ^cls_token|pos_embed|patch_embed)z^blocks\.(\d+)N)z^norm)i )stemr   )r   )r8   coarses     r<   group_matcherConVit.group_matcherm  s    4-/CD
 	
r>   c                      U(       a   S5       eg )Nz$gradient checkpointing not supportedr   )r8   enables     r<   set_grad_checkpointingConVit.set_grad_checkpointingt  s    AAAz6r>   r|   c                     U R                   $ r   )r   r   s    r<   get_classifierConVit.get_classifierx  s    yyr>   c                     Xl         Ub  US;   d   eX l        US:  a'  [        R                  " U R                  U5      U l        g [        R
                  " 5       U l        g )N)r   r   r   r   )r   r   r+   r,   r   r   r   )r8   r   r   s      r<   reset_classifierConVit.reset_classifier|  sO    &""6666*>IAoBIIdnnk:	SUS^S^S`	r>   c                    U R                  U5      nU R                  (       a  XR                  -   nU R                  U5      nU R                  R                  UR                  S   SS5      n[        U R                  5       H2  u  p4X0R                  :X  a  [        R                  " X!4SS9nU" U5      nM4     U R                  U5      nU$ )Nr   rO   r   rQ   )r   r   r   r   r   rS   r@   	enumerater   r   r3   catr   )r8   rF   
cls_tokensublks        r<   forward_featuresConVit.forward_features  s    QNN"AMM!^^**1771:r2>
,FA***IIzo15AA - IIaLr>   
pre_logitsc                     U R                   (       a1  U R                   S:X  a  US S 2SS 24   R                  SS9OUS S 2S4   nU R                  U5      nU(       a  U$ U R                  U5      $ )Nr   r   rQ   r   )r   rb   r   r   )r8   rF   r  s      r<   forward_headConVit.forward_head  s_    (,(8(8E(A!QR%!$qAwANN1q0DIIaL0r>   c                 J    U R                  U5      nU R                  U5      nU$ r   )r  r  r   s     r<   rK   ConVit.forward  s'    !!!$a r>   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r{   r   r   r   r   r   )Tr   )r   r   r   r   __doc__r   rq   strr   r   r   r   r   r+   r   r)   r   r3   jitignorer   r   r   r  r  r  r  rK   r   r   r   s   @r<   r   r     s   
   #& !"!#%$&$&$&-1*3%&')"&/YY Y 	Y
 Y Y Y Y Y Y Y Y !Y "Y "Y  "!Y" &c]#Y$ RYY%Y&  #'Y(  %)Y*  +Y Yv- YY* * YY
 
 YYB B YY		  aC ahsm a1$ 1 r>   c                 h    UR                  SS 5      (       a  [        S5      e[        [        X40 UD6$ )Nfeatures_onlyz<features_only not implemented for Vision Transformer models.)getRuntimeErrorr   r   )variant
pretrainedkwargss      r<   _create_convitr!    s1    zz/4((YZZFvFFr>   c                 0    U SSS [         [        SSSSS.
UE$ )Nr   )r'   r   r   Tzpatch_embed.projr   z
apache-2.0)
urlr   
input_size	pool_sizerb   r   fixed_input_size
first_conv
classifierlicenser   )r#  r   s     r<   _cfgr*    s0    =t%.BX\(<	
  r>   ztimm/)	hf_hub_id)zconvit_tiny.fb_in1kzconvit_small.fb_in1kzconvit_base.fb_in1kr|   c           	      N    [        SSSSS9n[        SSU S.[        U40 UD6D6nU$ )	N
   rR   0   rN   r   r   r   r   convit_tinyr  r  r   r   r!  r  r   
model_argsmodels       r<   r0  r0    s;    rQPJf=Zf4PZKe^dKefELr>   c           	      N    [        SSSSS9n[        SSU S.[        U40 UD6D6nU$ )	Nr-  rR   r.  	   r/  convit_smallr1  r   r2  r3  s       r<   r8  r8    s;    rQPJg>jgDQ[Lf_eLfgELr>   c           	      N    [        SSSSS9n[        SSU S.[        U40 UD6D6nU$ )	Nr-  rR   r.  r   r/  convit_baser1  r   r2  r3  s       r<   r:  r:    s;    rRQJf=Zf4PZKe^dKefELr>   r   )r   )'r  typingr   r   r   r   r3   torch.nnr+   	timm.datar   r	   timm.layersr
   r   r   r   r   r   r   _builderr   _features_fxr   	_registryr   r   __all__r   r   r   r   r   r!  r*  default_cfgsr0  r8  r:  r   r>   r<   <module>rD     s  ( . -   A s s s * 1 < * _:299 _: _:D:299 :z7BII 7tXRYY XvG %'2 73'2	&  v      v  r>   