
    RЦiS                     p   S r SSKrSSKJrJr  SSKJr  SSKJrJ	r	J
r
  SSKrSSKJr  SSKJrJr  SSKJrJrJrJrJrJrJr  SS	KJr  SS
KJrJr  SSKJrJ r   SSK!J"r"J#r#  \ " S S5      5       r$\ " S S5      5       r%SHS jr& " S S\RN                  5      r( " S S\RN                  5      r) " S S\RN                  5      r* " S S\RN                  5      r+ " S S\RN                  5      r, " S S\RN                  5      r-SIS jr.SHS jr/\ " \/" S S!S"9\/" S#S$S"9\/" S%S!S"9\/" S&S!S"9\/" S'S!S"9\/" S(S!S)S*S+9\/" S,S!S-S*S+9\/" S.S!S/S*S+9\/" S0S1S"9\/" S2S1S)S*S+9\/" S3S1S-S*S+9\/" S4S1S/S*S+9\/" S5S6S)S*S+9\/" S7S6S-S*S+9\/" S8S6S/S*S+9S9.5      r0\SJS:\"4S; jj5       r1\SJS:\"4S< jj5       r2\SJS:\"4S= jj5       r3\SJS:\"4S> jj5       r4\SJS:\"4S? jj5       r5\SJS:\"4S@ jj5       r6\SJS:\"4SA jj5       r7\SJS:\"4SB jj5       r8\SJS:\"4SC jj5       r9\SJS:\"4SD jj5       r:\SJS:\"4SE jj5       r;\SJS:\"4SF jj5       r<\SJS:\"4SG jj5       r=g)Kaa  ViTamin

Paper: Designing Scalable Vison Models in the Vision-Language Era
A family of model weights on Huggingface: https://huggingface.co/collections/jienengchen/vitamin-family-661048126b72debdaca060bf

@inproceedings{chen2024vitamin,
  title={ViTamin: Designing Scalable Vision Models in the Vision-language Era},
  author={Chen, Jieneng and Yu, Qihang and Shen, Xiaohui and Yuille, Alan and Chen, Liang-Chieh},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  year={2024}
}

Based on Apache 2.0 licensed code at https://github.com/ViTamin/ViTamin

Modifications and timm support by Jieneng Chen 2024

Reference:
https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/vision_transformer.py
https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/vision_transformer_hybrid.py
    N)	dataclassfield)partial)OptionalUnionTupleOPENAI_CLIP_MEANOPENAI_CLIP_STD)create_act_layerget_norm_layerget_norm_act_layercreate_conv2dmake_divisibleDropPathHybridEmbed   )build_model_with_cfg)named_applycheckpoint_seq)register_modelgenerate_default_cfgs)VisionTransformercheckpoint_filter_fnc                       \ rS rSr% Sr\\S'   Sr\\S'   Sr	\
\S'   Sr\
\S	'   S
r\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\   \S'   Sr\\S'   Srg)
VitConvCfg'         @expand_ratioTexpand_output   kernel_sizer   
group_sizeFpre_norm_actdwstride_modeavg2	pool_typedownsample_pool_typegelu	act_layer 
norm_layergh㈵>norm_epsdown_shortcutmlp N)__name__
__module____qualname____firstlineno__r   float__annotations__r    boolr"   intr#   r$   r&   strr(   r)   r+   r-   r.   r/   r   r0   __static_attributes__r1       R/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/models/vitamin.pyr   r   '   s    L%M4KJL$KIs &#&IsJHe$(M8D>(Cr<   r   c                       \ rS rSr% Sr\\\\\S4   4   S4   \S'   Sr	\\\\\S4   4   S4   \S'   Sr
\\S'   \" \S	9r\\S
'   Sr\\S'   Srg)VitCfg8   )`           .	embed_dim)   r!      rF   depths@   
stem_width)default_factoryconv_cfgr,   	head_typer1   N)r2   r3   r4   r5   rE   r   r   r9   r7   rH   rJ   r   r   rL   rM   r:   r;   r1   r<   r=   r?   r?   8   sq    9LIuU3c3h/0#56L6BFE%U38_,-s23BJ <Hj<Isr<   r?   c                    [        U [        R                  5      (       a  U R                  S   U R                  S   -  U R                  -  nX0R
                  -  n[        R                  R                  U R                  S[        R                  " SU-  5      5        U R                  b*  [        R                  R                  U R                  5        g g g )Nr   r          @)
isinstancennConv2dr"   out_channelsgroupsinitnormal_weightmathsqrtbiaszeros_)modulenameschemefan_outs       r=   
_init_convr`   A   s    &"))$$$$Q'&*<*<Q*??&BUBUUMM!
q$))C'M*BC;;"GGNN6;;' #	 %r<   c                   Z   ^  \ rS rSr      SS\S\S\S\S\S\4U 4S jjjrS	 r	S
r
U =r$ )StemJ   in_chsout_chsr+   r-   r.   rZ   c	                    > XxS.n	[         TU ]  5         [        [        XC5      US9n
X l        [        XS4SUS.U	D6U l        U
" U40 U	D6U l        [        X"S4SUS.U	D6U l        [        [        U 5        g )Ndevicedtypeepsr!   rF   striderZ   r   )super__init__r   r   re   r   conv1norm1conv2r   r`   )selfrd   re   r+   r-   r.   rZ   rh   ri   ddnorm_act_layer	__class__s              r=   ro   Stem.__init__K   s     / !3J!JPXY"6AQadQbQ
#G2r2
"7QRqtRrR
J%r<   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ N)rp   rq   rr   rs   xs     r=   forwardStem.forwarda   s.    JJqMJJqMJJqMr<   )rp   rr   rq   re   )r*   layernorm2dư>TNN)r2   r3   r4   r5   r9   r:   r6   r8   ro   r|   r;   __classcell__rv   s   @r=   rb   rb   J   se    
 $+"&& & 	&
 & & & &, r<   rb   c            	       N   ^  \ rS rSr    S	S\S\S\S\4U 4S jjjrS rSr	U =r
$ )
Downsample2dh   dimdim_outr(   rZ   c                    > XVS.n[         TU ]  5         [        R                  " SSSSS9U l        X:w  a   [        R
                  " XS4SU0UD6U l        g [        R                  " 5       U l        g )Nrg   r!   rF   r   F)r"   rm   paddingcount_include_padrZ   )rn   ro   rQ   	AvgPool2dpoolrR   expandIdentity)	rs   r   r   r(   rZ   rh   ri   rt   rv   s	           r=   ro   Downsample2d.__init__i   s]     /LLQq!W\]	>))C!E$E"EDK++-DKr<   c                 J    U R                  U5      nU R                  U5      nU$ ry   )r   r   rz   s     r=   r|   Downsample2d.forward{   s!    IIaLKKNr<   )r   r   )r'   TNN)r2   r3   r4   r5   r9   r:   r8   ro   r|   r;   r   r   s   @r=   r   r   h   sK    
 $(( ( 	(
 ( ($ r<   r   c                   \   ^  \ rS rSrSr       SS\S\S\S\S\4
U 4S jjjrS	 rS
rU =r	$ )StridedConv   zdownsample 2d as well
    r"   rm   r   in_chansrE   c                    > XgS.n[         T
U ]  5         [        [        S5      SS9n	[        R
                  " XE4XUS.UD6U l        U	" U40 UD6U l        g )Nrg   r~   r   rj   )r"   rm   r   )rn   ro   r   r   rQ   rR   projnorm)rs   r"   rm   r   r   rE   rh   ri   rt   r-   rv   s             r=   ro   StridedConv.__init__   sX     /^M:E
IIhq{cjqnpq	x.2.	r<   c                 J    U R                  U5      nU R                  U5      nU$ ry   r   r   rz   s     r=   r|   StridedConv.forward   s!    IIaLIIaLr<   r   )r!   rF   r   r!   rD   NN)
r2   r3   r4   r5   __doc__r9   ro   r|   r;   r   r   s   @r=   r   r      sc      ! // / 	/
 / / /" r<   r   c                   z   ^  \ rS rSrSr         SS\S\S\S\S\S\S	\S
\S\4U 4S jjjrSS jr	S r
SrU =r$ )MbConvLNBlock   zKPre-Norm Conv Block - 1x1 - kxk - 1x1, w/ inverted bottleneck (expand)
    rd   re   rm   	drop_pathr"   r-   r.   r+   r   c                   > XS.n[         TU ]  5         X1UsU l        U l        U l        [        X)-  5      n[        [        Xh5      US9nUS:X  a  [        X4SSS.UD6U l	        O?X:w  a   [        R                  " XS4SS0UD6U l	        O[        R                  " 5       U l	        U" U4S	S
0UD6U l        [        R                  " 5       U l        [        XS4SSS.UD6U l        [#        USS9U l        [        XU4USUSS.UD6U l        [#        USS9U l        [        XS4SS0UD6U l        US:  a  [-        U5      U l        g [        R                  " 5       U l        g )Nrg   rj   rF   avgT)r(   rZ   r   rZ   	apply_actFrl   )inplace)rm   dilationrT   rZ           )rn   ro   rm   rd   re   r   r   r   r   shortcutrQ   rR   r   pre_normdownr   	conv1_1x1r   act1	conv2_kxkact2	conv3_1x1r   r   )rs   rd   re   rm   r   r"   r-   r.   r+   r   rh   ri   rt   mid_chsprenorm_act_layerrv   s                  r=   ro   MbConvLNBlock.__init__   sX    /17.T[$, !78#$6z$MS[\Q;([EPT[XZ[DMIIfqJtJrJDMKKMDM)&HEHRHKKM	&vU!$URTU$Y=	&kg281W[_gceg$Y=	&wLLL09B),BKKMr<   c                 4    [        [        [        US9U 5        g )N)r^   )r   r   r`   )rs   r^   s     r=   init_weightsMbConvLNBlock.init_weights   s    GJv6=r<   c                 >   U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU R	                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      U-   nU$ ry   )	r   r   r   r   r   r   r   r   r   )rs   r{   r   s      r=   r|   MbConvLNBlock.forward   s    ==#MM!IIaL NN1IIaL NN1IIaL NN1NN1(r<   )r   r   r   r   r   r   r   rd   re   r   r   rm   )	r   r   r!   r~   r   r*   r   NNr,   )r2   r3   r4   r5   r   r9   r6   r:   ro   r   r|   r;   r   r   s   @r=   r   r      s     ! +"#"%#R#R #R 	#R
 #R #R #R #R #R  #R #RL> r<   r   c            	       b   ^  \ rS rSrSr    S	S\S\\\\\4   4   S\4U 4S jjjr	S r
SrU =r$ )
MbConvStages   z2MobileConv for stage 1 and stage 2 of ViTamin
    cfgimg_sizer   c                   > XES.n[         TU ]  5         SU l        [        S	UUR                  S.UD6U l        / n[        UR                  5      U l        [        UR                  S S 5       H  u  pUS:  a  UR                  US-
     OUR                  n
[        UR                  U   5       Vs/ s H"  n[        S	US:X  a  U
OU	U	US:X  a  SOSS.UD6PM$     nnU[        R                  " U6 /-  nM     [        R                  " U6 U l        [!        S	SUR                  S   UR                  S   S.UD6U l        g s  snf )
Nrg   F)rd   re   rF   r   r   )rd   re   rm   )rm   r   rE   r1   )rn   ro   grad_checkpointingrb   rJ   stemlenrE   
num_stages	enumeraterangerH   r   rQ   
Sequentialstagesr   r   )rs   r   r   r   rh   ri   rt   r   sr   stage_in_chsdblocksrv   s                r=   ro   MbConvStages.__init__   s[    /"' 
NN
 
	 cmm,bq 12FA1213==1-#..L szz!}- .A  -.T\s!"#q&Qa 	 .   r}}f-..F 3 mmV, 
]]1%mmA&
 	
	s   /)D<c                    U R                  U5      nU R                  (       a:  [        R                  R	                  5       (       d  [        U R                  U5      nOU R                  U5      nU R                  U5      nU$ ry   )r   r   torchjitis_scriptingr   r   r   rz   s     r=   r|   MbConvStages.forward  sY    IIaL""599+A+A+C+Ct{{A.AAAIIaLr<   )r   r   r   r   r   )   r!   NN)r2   r3   r4   r5   r   r?   r   r9   r   ro   r|   r;   r   r   s   @r=   r   r      sY    
 58'
'
 CsCx01'
 	'
 '
R r<   r   c                   `   ^  \ rS rSr      SS\S\S\S\\   S\S\4U 4S jjjr	S	 r
S
rU =r$ )GeGluMlpi  in_featureshidden_featuresr+   r-   rZ   dropc	                 R  > XxS.n	[         T
U ]  5         [        [        U=(       d    S5      SS9nU" U40 U	D6U l        [
        R                  " X4SU0U	D6U l        [        U5      U l	        [
        R                  " X4SU0U	D6U l
        [
        R                  " X!4SU0U	D6U l        g )Nrg   	layernormr   rj   rZ   )rn   ro   r   r   r   rQ   Linearw0r   actw1w2)rs   r   r   r+   r-   rZ   r   rh   ri   rt   rv   s             r=   ro   GeGluMlp.__init__  s     /^J,E+FDQ
{1b1	))KJtJrJ#I.))KJtJrJ))OJtJrJr<   c                     U R                  U5      nU R                  U R                  U5      5      U R                  U5      -  nU R	                  U5      nU$ ry   )r   r   r   r   r   rz   s     r=   r|   GeGluMlp.forward(  sD    IIaLHHTWWQZ 4771:-GGAJr<   )r   r   r   r   r   )r*   NTr   NN)r2   r3   r4   r5   r9   r:   r   r8   r6   ro   r|   r;   r   r   s   @r=   r   r     sq    
 $(,KK !K 	K
 !K K K K* r<   r   c           	      >   UR                  SS5      nUc   eUR                  SS 5      UR                  SS 5      S.n[        SX#R                  SS5      S.UD6n[        [        USS	9US
'   UR                  SS5        [        [        U U4[        [        USS9S.UD6$ )Nout_indicesr!   rh   ri   rg   r   )r   r   F)backboner   embed_layer
patch_sizer   getter)r   feature_cls)pretrained_filter_fnfeature_cfgr1   )
popgetr   r   r   
setdefaultr   r   r   dict)variant
pretrained	embed_cfgkwargsr   rt   r   s          r=   _create_vitaminr   /  s    **]A.K   JJx.GT9R	SBT	JJz14MTQSTH#K(OF=
lA& 2[hG  r<   c                 4    U SSS SSS[         [        SSSS	.UE$ )
Ni  )r!   r   r   g?bicubicTzpatch_embed.backbone.stem.conv1headmit)urlnum_classes
input_size	pool_sizecrop_pctinterpolationfixed_input_sizemeanstd
first_conv
classifierlicenser	   )r   r   s     r=   _cfgr  A  s4    =t 7  r<   zjienengchen/ViTamin-S-LTTrD   )	hf_hub_idr   zjienengchen/ViTamin-SrC   zjienengchen/ViTamin-B-LTTzjienengchen/ViTamin-Bzjienengchen/ViTamin-L-224pxzjienengchen/ViTamin-L-256px)r!      r  g      ?)r  r   r   r   zjienengchen/ViTamin-L-336px)r!   P  r  zjienengchen/ViTamin-L-384px)r!   rC   rC   zjienengchen/ViTamin-L2-224px   zjienengchen/ViTamin-L2-256pxzjienengchen/ViTamin-L2-336pxzjienengchen/ViTamin-L2-384pxzjienengchen/ViTamin-XL-256px  zjienengchen/ViTamin-XL-336pxzjienengchen/ViTamin-XL-384px)z%vitamin_small_224.datacomp1b_clip_lttz!vitamin_small_224.datacomp1b_clipz$vitamin_base_224.datacomp1b_clip_lttz vitamin_base_224.datacomp1b_clipz!vitamin_large_224.datacomp1b_clipz!vitamin_large_256.datacomp1b_clipz!vitamin_large_336.datacomp1b_clipz!vitamin_large_384.datacomp1b_clipz"vitamin_large2_224.datacomp1b_clipz"vitamin_large2_256.datacomp1b_clipz"vitamin_large2_336.datacomp1b_clipz"vitamin_large2_384.datacomp1b_clipz"vitamin_xlarge_256.datacomp1b_clipz"vitamin_xlarge_336.datacomp1b_clipz"vitamin_xlarge_384.datacomp1b_clipreturnc                     [        SSS[        SSS9SS9n[        S	S
S[        SSSUS9n[	        SSU 0[        U40 UD6D6nU$ )N)rI      rC   rF      r   rI   r~   r   r-   r.   1drE   rH   rJ   rL   rM   rC         rO   Fr   rE   depth	num_heads	mlp_layer	mlp_ratioclass_tokenglobal_poolr   r   )vitamin_small_224r?   r   r   r   r   r   r   r   
model_argsmodels        r=   r  r  x  sj     $
 	I R1Bu	J eJe$zJd]cJdeELr<   c                     [        SSS[        SSS9SS9n[        S	S
S[        SSSUS9n[	        SSU 0[        U40 UD6D6nU$ )N)r
  r  rD   r  r
  r~   r   r  r  r  rD   r     rO   Fr   r  r   )vitamin_base_224r  r  s        r=   r   r     sk    !$
 	I R2Ru	CJ d:djIc\bIcdELr<   c                     [        SSS[        SSS9SS9n[        S	S
S[        SSSUS9n[	        SSU 0[        U40 UD6D6nU$ )N   i@  r  r  r#  r~   r   r  r  r  r        rO   Fr   r  r   )vitamin_large_224r  r  s        r=   r&  r&    sj    "$
 	I bB(bu	J eJe$zJd]cJdeELr<   c                     [        SSS[        SSS9SS9n[        S	S
SS[        SSSUS9	n[	        SSU 0[        U40 UD6D6nU$ )Nr"  r  r#  r~   r   r  r  r  r  r  r$  r%  rO   Fr   	r   rE   r  r  r  r  r  r  r   r   )vitamin_large_256r  r  s        r=   r)  r)    n    "$
 	I B"\^u	CJ eJe$zJd]cJdeELr<   c                     [        SSS[        SSS9SS9n[        S	S
SS[        SSSUS9	n[	        SSU 0[        U40 UD6D6nU$ )Nr"  r  r#  r~   r   r  r  r  r  r  r$  r%  rO   Fr   r(  r   )vitamin_large_336r  r  s        r=   r,  r,    sm    "$
 	I B"\^u	J eJe$zJd]cJdeELr<   c                     [        SSS[        SSS9SS9n[        S	S
SS[        SSSUS9	n[	        SSU 0[        U40 UD6D6nU$ )Nr"  r  r#  r~   r   r  r  r  rC   r  r$  r%  rO   Fr   r(  r   )vitamin_large_384r  r  s        r=   r.  r.    r*  r<   c                     [        SSS[        SSS9SS9n[        S	S
S[        SSSUS9n[	        SSU 0[        U40 UD6D6nU$ )Nr"  r  r#  r~   r   r  r  r  r  r$  r%  rO   Fr   r  r   )vitamin_large2_224r  r  s        r=   r0  r0    sk    "$
 	I bB(bu	J fZf4PZKe^dKefELr<   c                     [        SSS[        SSS9SS9n[        S	S
SS[        SSSUS9	n[	        SSU 0[        U40 UD6D6nU$ )Nr"  r  r#  r~   r   r  r  r  r  r  r$  r%  rO   Fr   r(  r   )vitamin_large2_256r  r  s        r=   r2  r2    o    "$
 	I B"\^u	CJ fZf4PZKe^dKefELr<   c                     [        SSS[        SSS9SS9n[        S	S
SS[        SSSUS9	n[	        SSU 0[        U40 UD6D6nU$ )Nr"  r  r#  r~   r   r  r  r  r  r  r$  r%  rO   Fr   r(  r   )vitamin_large2_336r  r  s        r=   r5  r5    sn    "$
 	I B"\^u	J fZf4PZKe^dKefELr<   c                     [        SSS[        SSS9SS9n[        S	S
SS[        SSSUS9	n[	        SSU 0[        U40 UD6D6nU$ )Nr"  r  r#  r~   r   r  r  r  rC   r  r$  r%  rO   Fr   r(  r   )vitamin_large2_384r  r  s        r=   r7  r7  (  r3  r<   c                     [        SSS[        SSS9SS9n[        S	S
SS[        SSSSUS9
n[	         SSU 0[        U40 UD6D6nU$ )NrB   rC   r  r  rB   r~   r   r  r  r  r  r      r%  rO   Fr   none
r   rE   r  r  r  r  r  r  	pos_embedr   r   vitamin_xlarge_256r  r  s        r=   r?  r?  ;  s}    "$
 	I B"\^u)UJ S)3S7;J7Q&7QSELr<   c                     [        SSS[        SSS9SS9n[        S	S
SS[        SSSSUS9
n[	        SSU 0[        U40 UD6D6nU$ )Nr9  r  rB   r~   r   r  r  r  r  r  r:  r%  rO   Fr   r;  r<  r   r>  r  r  s        r=   vitamin_xlarge_336rA  O  q    "$
 	I B"\^u)UJ fZf4PZKe^dKefELr<   c                     [        SSS[        SSS9SS9n[        S	S
SS[        SSSSUS9
n[	        SSU 0[        U40 UD6D6nU$ )Nr9  r  rB   r~   r   r  r  r  rC   r  r:  r%  rO   Fr   r;  r<  r   )vitamin_xlarge_384r  r  s        r=   rD  rD  b  rB  r<   r   )FN)F)>r   rX   dataclassesr   r   	functoolsr   typingr   r   r   r   torch.nnrQ   	timm.datar
   r   timm.layersr   r   r   r   r   r   r   _builderr   _manipulater   r   	_registryr   r   vision_transformerr   r   r   r?   r`   Modulerb   r   r   r   r   r   r   r  default_cfgsr  r   r&  r)  r,  r.  r0  r2  r5  r7  r?  rA  rD  r1   r<   r=   <module>rQ     se  *  (  ) )   7* * * * 4 < G       (299 <299 2")) 4>BII >B3299 3lryy :$	 %-1-3.@)-)s*<,0-3-@(,)s)<)-/S*B)-/S 3*0 *./S 3*0 *./S 3*0 +/0d+D*.0d 3+0 +/0d 3+0 +/0d 3+0 +/0d 3+0 +/0d 3+0 +/0d 3+0K(& (V 5F  & 4E  $ 5F  & 5F  $ 5F  & 5F  $ 6G  & 6G  $ 6G  & 6G  $ 6G  & 6G  $ 6G  r<   