
    RЦiv                        S r SSKrSSKrSSKJr  SSKJrJrJrJ	r	J
r
   SSKJr  SSKrSSKJr  SSKJr  SSKJrJr  SSKJrJrJrJrJrJrJrJrJr  S	S
K J!r!  S	SK"J#r#  S	SK$J%r%J&r&  S	SK'J(r(J)r)  S	SK*J+r+  S/r,\RZ                  " \.5      r/ " S S\R`                  5      r1 " S S\R`                  5      r2 " S S\R`                  5      r3 " S S\R`                  5      r4S8S jr5S9S jr6\(" \6" SSSS9\6" SSS9\6" S SS!9\6" S"SS!9\6" S#SS!9\6" S$SS!9\6" S%SS!9\6" S&SS!9\6" 5       \6" S'SS!9\6" 5       \6" S(SS!9\6" 5       S).5      r7\)S8S*\44S+ jj5       r8\)S8S*\44S, jj5       r9\)S8S*\44S- jj5       r:\)S8S*\44S. jj5       r;\)S8S*\44S/ jj5       r<\)S8S*\44S0 jj5       r=\)S8S*\44S1 jj5       r>\)S8S*\44S2 jj5       r?\)S8S*\44S3 jj5       r@\)S8S*\44S4 jj5       rA\)S8S*\44S5 jj5       rB\)S8S*\44S6 jj5       rC\)S8S*\44S7 jj5       rDg! \ a
    SSKJr   GNf = f):zRelative Position Vision Transformer (ViT) in PyTorch

NOTE: these models are experimental / WIP, expect changes

Hacked together by / Copyright 2022, Ross Wightman
    N)partial)ListOptionalTupleTypeUnion)Literal)FinalIMAGENET_INCEPTION_MEANIMAGENET_INCEPTION_STD)	
PatchEmbedMlp
LayerScaleDropPathcalculate_drop_path_rates	RelPosMlp
RelPosBiasuse_fused_attn	LayerType   )build_model_with_cfg)feature_take_indices)named_apply
checkpoint)generate_default_cfgsregister_model)get_init_weights_vitVisionTransformerRelPosc                      ^  \ rS rSr% \\   \S'   SSSSSS\R                  SS4	S\	S\	S	\S
\S\
\\R                        S\S\S\\R                     4U 4S jjjrSS\
\R                      4S jjrSrU =r$ )RelPosAttention,   
fused_attn   FN        dim	num_headsqkv_biasqk_normrel_pos_cls	attn_drop	proj_drop
norm_layerc                   > XS.n[         TU ]  5         X-  S:X  d   S5       eX l        X-  U l        U R                  S-  U l        [        5       U l        [        R                  " XS-  4SU0UD6U l	        U(       a  U" U R                  40 UD6O[        R                  " 5       U l        U(       a  U" U R                  40 UD6O[        R                  " 5       U l        U(       a
  U" SSU0UD6OS U l        [        R                  " U5      U l        [        R                  " X40 UD6U l        [        R                  " U5      U l        g )	Ndevicedtyper   z$dim should be divisible by num_headsg         biasr'    )super__init__r'   head_dimscaler   r#   nnLinearqkvIdentityq_normk_normrel_posDropoutr+   projr,   )selfr&   r'   r(   r)   r*   r+   r,   r-   r0   r1   dd	__class__s               d/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/models/vision_transformer_relpos.pyr6   RelPosAttention.__init__/   s    /!#K%KK#"(]]d*
(*99S'??B?9@j5"5bkkm9@j5"5bkkmAL{=Y="=RVI.IIc-"-	I.    shared_rel_posc                    UR                   u  p4nU R                  U5      R                  X4SU R                  U R                  5      R                  SSSSS5      nUR                  S5      u  pxn	U R                  U5      nU R                  U5      nU R                  (       a  U R                  b  U R                  R                  5       n
OUb  Un
OS n
[        R                  R                  R                  XxU	U
U R                   (       a  U R"                  R$                  OSS9nOjXpR&                  -  nXxR)                  SS	5      -  nU R                  b  U R                  XS
9nOUb  X-   nUR+                  S	S9nU R#                  U5      nX-  nUR)                  SS5      R                  X4U5      nU R-                  U5      nU R/                  U5      nU$ )Nr2      r   r      r%   )	attn_mask	dropout_prH   r&   )shaper;   reshaper'   r7   permuteunbindr=   r>   r#   r?   get_biastorchr9   
functionalscaled_dot_product_attentiontrainingr+   pr8   	transposesoftmaxrA   r,   )rB   xrH   BNCr;   qkv	attn_biasattns               rE   forwardRelPosAttention.forwardL   s   ''ahhqk!!!4>>4==IQQRSUVXY[\^_`**Q-aKKNKKN??||' LL113	+*	 	##@@a#.2mm$..** A A JJA{{2r**D||'||D|H+,<<B<'D>>$'DAKK1%%aA.IIaLNN1rG   )r+   r#   r7   r>   r'   rA   r,   r=   r;   r?   r8   N)__name__
__module____qualname____firstlineno__r
   bool__annotations__r9   	LayerNormintr   r   Modulefloatr6   rW   Tensorrg   __static_attributes____classcell__rD   s   @rE   r!   r!   ,   s    d
 "!59!!*,,,// / 	/
 / "$ryy/2/ / / RYY/ /:"%,,)? " "rG   r!   c                   &  ^  \ rS rSrSSSSSSSS\R
                  \R                  SS4S\S\S\S	\	S
\	S\
\\R                        S\
\   S\S\S\S\\R                     S\\R                     4U 4S jjjrSS\
\R                     4S jjrSrU =r$ )RelPosBlockq         @FNr%   r&   r'   	mlp_ratior(   r)   r*   init_valuesr,   r+   	drop_path	act_layerr-   c           
      H  > XS.n[         TU ]  5         U" U40 UD6U l        [        UU4UUUU	UUS.UD6U l        U(       a  [        U4SU0UD6O[        R                  " 5       U l        U
S:  a  [        U
5      O[        R                  " 5       U l
        U" U40 UD6U l        [        SU[        X-  5      UUS.UD6U l        U(       a  [        U4SU0UD6O[        R                  " 5       U l        U
S:  a  [        U
5      U l        g [        R                  " 5       U l        g )Nr/   r(   r)   r*   r+   r,   r-   r}   r%   in_featureshidden_featuresr   dropr4   )r5   r6   norm1r!   rf   r   r9   r<   ls1r   
drop_path1norm2r   rq   mlpls2
drop_path2rB   r&   r'   r|   r(   r)   r*   r}   r,   r+   r~   r   r-   r0   r1   rC   rD   s                   rE   r6   RelPosBlock.__init__s   s%   " /*r*
#

 #!

 

	 FQ:cA{AbAVXVaVaVc1:R(9-R[[]*r*
 
0	

 
 FQ:cA{AbAVXVaVaVc1:R(9-R[[]rG   rH   c           
         XR                  U R                  U R                  U R                  U5      US95      5      -   nXR	                  U R                  U R                  U R                  U5      5      5      5      -   nU$ NrP   )r   r   rf   r   r   r   r   r   rB   r^   rH   s      rE   rg   RelPosBlock.forward   sb    4::a=Q_)` abb$**Q-)@ ABBrG   )rf   r   r   r   r   r   r   r   ri   )rj   rk   rl   rm   r9   GELUrp   rq   rs   rn   r   r   rr   r6   rW   rt   rg   ru   rv   rw   s   @rE   ry   ry   q   s      ""!59+/!!!)+*,,,,S,S ,S 	,S
 ,S ,S "$ryy/2,S "%,S ,S ,S ,S BII,S RYY,S ,S\%,,)?  rG   ry   c                   ,  ^  \ rS rSrSSSSSSSS\R
                  \R                  SS4S\S\S\S	\	S
\	S\
\\R                        S\
\   S\S\S\S\\R                     S\\R                     4U 4S jjjrS rSS\
\R                      4S jjrSrU =r$ )ResPostRelPosBlock   r{   FNr%   r&   r'   r|   r(   r)   r*   r}   r,   r+   r~   r   r-   c           
        > XS.n[         TU ]  5         Xpl        [        UU4UUUU	UUS.UD6U l        U" U40 UD6U l        U
S:  a  [        U
5      O[        R                  " 5       U l	        [        SU[        X-  5      UUS.UD6U l        U" U40 UD6U l        U
S:  a  [        U
5      O[        R                  " 5       U l        U R                  5         g )Nr/   r   r%   r   r4   )r5   r6   r}   r!   rf   r   r   r9   r<   r   r   rq   r   r   r   init_weightsr   s                   rE   r6   ResPostRelPosBlock.__init__   s    " /&#

 #!

 

	  *r*
1:R(9-R[[] 
0	

 
  *r*
1:R(9-R[[]rG   c                    U R                   b}  [        R                  R                  U R                  R
                  U R                   5        [        R                  R                  U R                  R
                  U R                   5        g g ri   )r}   r9   init	constant_r   weightr   rB   s    rE   r   ResPostRelPosBlock.init_weights   s[    'GGdjj//1A1ABGGdjj//1A1AB (rG   rH   c           
          XR                  U R                  U R                  XS95      5      -   nXR                  U R	                  U R                  U5      5      5      -   nU$ r   )r   r   rf   r   r   r   r   s      rE   rg   ResPostRelPosBlock.forward   sN    

499Q9+V WXX

488A; 788rG   )rf   r   r   r}   r   r   r   ri   )rj   rk   rl   rm   r9   r   rp   rq   rs   rn   r   r   rr   r6   r   rW   rt   rg   ru   rv   rw   s   @rE   r   r      s      ""!59+/!!!)+*,,,-- - 	-
 - - "$ryy/2- "%- - - - BII- RYY- -^C%,,)?  rG   r   c            8         ^  \ rS rSrSrSSSSSSS	S	S
SSSSSSSSSSSSSS\SS\SS4S\\\	\\4   4   S\\\	\\4   4   S\S\S\
S   S\S\S\S\S\S\S\\   S\S \S!\S"\\   S#\S$\S%\S&\S'\S(\
S)   S*\S+\\R"                     S,\\   S-\\   S.\\R"                     46U 4S/ jjjrSJS1 jrSKS2\S3\S0S4S4 jjr\R.                  R0                  S5 5       r\R.                  R0                  SLS6 j5       r\R.                  R0                  SMS7 j5       r\R.                  R0                  S0\R"                  4S8 j5       rSNS\S\\   4S9 jjr      SOS:\R<                  S;\\\\\   4      S<\S=\S>\S?\S@\S0\\\R<                     \	\R<                  \\R<                     4   4   4SA jjr    SPS;\\\\   4   SB\SC\4SD jjr!SE r"SLSF\4SG jjr#SH r$SIr%U =r&$ )Qr      aO  Vision Transformer w/ Relative Position Bias

Differing from classic vit, this impl
  * uses relative position index (swin v1 / beit) or relative log coord + mlp (swin v2) pos embed
  * defaults to no class token (can be enabled)
  * defaults to global avg pool for head (can be changed)
  * layer-scale (residual branch gain) enabled
      r2     avg      r{   TFư>r   Nr%   resetimg_size
patch_sizein_chansnum_classesglobal_pool) r   tokenmap	embed_dimdepthr'   r|   r(   r)   r}   class_tokenfc_normrel_pos_typerel_pos_dimrH   	drop_rateproj_drop_rateattn_drop_ratedrop_path_rateweight_init)skipr   jaxmocor   fix_initembed_layerr-   r   block_fnc                 j  > [         T%U ]  5         UUS.nUS;   d   eU(       d  US:w  d   eU=(       d    [        [        R                  SS9nU=(       d    [        R
                  nX@l        X0l        XPl        U=U l	        =U l
        U l        U(       a  SOSU l        SU l        U" SUUUUS	.UD6U l        U R                  R                  n[!        U R                  S
5      (       a  U R                  R#                  5       OUn [%        UU R                  S9n!UR'                  S5      (       a(  U(       a  UU!S'   SU;   a  SU!S'   [        [(        40 U!D6n"O[        [*        40 U!D6n"SU l        U(       a  U"" SSU0UD6U l        Sn"U(       a7  [        R.                  " [0        R2                  " SU R                  U40 UD65      OSU l        [7        UU5      n#[        R8                  " [;        U5       V$s/ s H  n$U" SUUU	U
UU"UUUU#U$   UUS.UD6PM     sn$5      U l        [;        U5       V$s/ s H  n$[%        SU$ 3UU S9PM     sn$U l        U(       d	  U" U40 UD6O[        R@                  " 5       U l!        U(       a	  U" U40 UD6O[        R@                  " 5       U l"        [        RF                  " U5      U l$        US:  a"  [        RJ                  " U R                  U40 UD6O[        R@                  " 5       U l&        US:X  a  SOUU l'        UU l(        US:w  a  U RS                  SS9  ggs  sn$f s  sn$f )am  
Args:
    img_size: input image size
    patch_size: patch size
    in_chans: number of input channels
    num_classes: number of classes for classification head
    global_pool: type of global pooling for final sequence (default: 'avg')
    embed_dim: embedding dimension
    depth: depth of transformer
    num_heads: number of attention heads
    mlp_ratio: ratio of mlp hidden dim to embedding dim
    qkv_bias: enable bias for qkv if True
    qk_norm: Enable normalization of query and key in attention
    init_values: layer-scale init values
    class_token: use class token (default: False)
    fc_norm: use pre classifier norm instead of pre-pool
    rel_pos_type: type of relative position
    shared_rel_pos: share relative pos across all blocks
    drop_rate: dropout rate
    proj_drop_rate: projection dropout rate
    attn_drop_rate: attention dropout rate
    drop_path_rate: stochastic depth rate
    weight_init: weight init scheme
    fix_init: apply weight initialization fix (scaling w/ layer index)
    embed_layer: patch embedding layer
    norm_layer: normalization layer
    act_layer: MLP activation layer
r/   r   r   r   r   r   )epsr   r   F)r   r   r   r   
feat_ratio)window_sizeprefix_tokensr   
hidden_dimswinmodeNr'   )r&   r'   r|   r(   r)   r*   r}   r,   r+   r~   r-   r   zblocks.)modulenum_chs	reductionr   r   needs_resetr4   )*r5   r6   r   r9   rp   r   r   r   r   num_featureshead_hidden_sizer   num_prefix_tokensgrad_checkpointingpatch_embed	grid_sizehasattrr   dict
startswithr   r   rH   	ParameterrW   zeros	cls_tokenr   
ModuleListrangeblocksfeature_infor<   normr   r@   	head_dropr:   headweight_init_moder   r   )&rB   r   r   r   r   r   r   r   r'   r|   r(   r)   r}   r   r   r   r   rH   r   r   r   r   r   r   r   r-   r   r   r0   r1   rC   	feat_sizerrel_pos_argsr*   dprirD   s&                                        rE   r6    VisionTransformerRelPos.__init__   s   x 	/2222kW444B72<<T#B
(	& &ENNND1DN&1q"'& 
!	

 
 $$..	-4T5E5E|-T-TD'')Zd	AWAWX""5))-8\*%'-V$!)<|<K!*==K""-"H	"HR"HDKbmekk!T5K5KY&]Z\&]^sw'>mm  5\!%#  "  ##!''((a&%#  "!%# $$ QVV[P\^P\1D'!yAFP\^7>Jy/B/BKKM	 7>z)2r22;;=I.DORSOBIIdnnk@R@Y[YdYdYf	+6&+@k & %0 !;%#"^s   2"L+-L0returnc                    [         R                  " 5          [        U R                  5       H  u  p[        R
                  " SUS-   -  5      nUR                  R                  R                  R                  U5        UR                  R                  R                  R                  U5        M     SSS5        g! , (       d  f       g= f)z9Apply weight initialization fix (scaling w/ layer index).g       @r   N)rW   no_grad	enumerater   mathsqrtrf   rA   r   div_r   fc2)rB   layer_idlayerr8   s       rE   fix_init_weight'VisionTransformerRelPos.fix_init_weightt  sy    ]]_#,T[[#9		#A"67

&&++E2		$$))%0 $: __s   BB88
Cr   r   c                 f   U=(       d    U R                   nUS;   d   eSU;   a!  [        R                  " U R                  5      * OSnU R                  b(  [
        R                  R                  U R                  SS9  [        [        XUS9U 5        U R                  (       a  U R                  5         gg)a(  Initialize model weights.

Args:
    mode: Weight initialization mode ('jax', 'jax_nlhb', 'moco', or '').
    needs_reset: If True, call reset_parameters() on modules (default for after to_empty()).
        If False, skip reset_parameters() (for __init__ where modules already self-initialized).
)r   jax_nlhbr   r   r   nlhbr%   Nr   )stdr   )r   r   logr   r   r9   r   normal_r   r   r   r   )rB   r   r   	head_biass       rE   r   $VisionTransformerRelPos.init_weights|  s     ,t,,????39T>TXXd..//r	>>%GGOODNNO5(kRTXY==  " rG   c                     S1$ )Nr   r4   r   s    rE   no_weight_decay'VisionTransformerRelPos.no_weight_decay  s
    }rG   c                     [        SSS/S9$ )Nz^cls_token|patch_embed)z^blocks\.(\d+)N)z^norm)i )stemr   )r   )rB   coarses     rE   group_matcher%VisionTransformerRelPos.group_matcher  s    *-/CD
 	
rG   c                     Xl         g ri   )r   )rB   enables     rE   set_grad_checkpointing.VisionTransformerRelPos.set_grad_checkpointing  s    "(rG   c                     U R                   $ ri   )r   r   s    rE   get_classifier&VisionTransformerRelPos.get_classifier  s    yyrG   c                     X4S.nXl         Ub  US;   d   eX l        US:  a(  [        R                  " U R                  U40 UD6U l        g [        R
                  " 5       U l        g )Nr/   r   r   )r   r   r9   r:   r   r<   r   )rB   r   r   r0   r1   rC   s         rE   reset_classifier(VisionTransformerRelPos.reset_classifier  s]    /&""6666*DORSOBIIdnnk@R@	Y[YdYdYf	rG   r^   indicesreturn_prefix_tokensr   
stop_early
output_fmtintermediates_onlyc           	         US;   d   S5       eUS:H  n/ n	[        [        U R                  5      U5      u  pUR                  u  ppU R	                  U5      nU R
                  b?  [        R                  " U R
                  R                  UR                  S   SS5      U4SS9nU R                  b  U R                  R                  5       OSn[        R                  R                  5       (       d  U(       d  U R                  nOU R                  SUS-    n[        U5       H~  u  nnU R                  (       a/  [        R                  R                  5       (       d  [        UUUS	9nOU" UUS	9nUU
;   d  MU  U	R!                  U(       a  U R#                  U5      OU5        M     U R$                  (       aJ  U	 Vs/ s H  nUSS2SU R$                  24   PM     nnU	 Vs/ s H  nUSS2U R$                  S24   PM     n	nU(       ac  U R                  R'                  X45      u  nnU	 Vs/ s H7  nUR)                  UUUS5      R+                  SS
SS5      R-                  5       PM9     n	n[        R                  R                  5       (       d  U(       a  [/        [1        U	W5      5      n	U(       a  U	$ U R#                  U5      nX4$ s  snf s  snf s  snf )a  Forward features that returns intermediates.

Args:
    x: Input image tensor
    indices: Take last n blocks if int, all if None, select matching indices if sequence
    return_prefix_tokens: Return both prefix and spatial intermediate tokens
    norm: Apply norm layer to all intermediates
    stop_early: Stop iterating over blocks when last desired intermediate hit
    output_fmt: Shape of intermediate feature outputs
    intermediates_only: Only return intermediate features
Returns:

)NCHWNLCz)Output format must be one of NCHW or NLC.r  Nr   rO   r   rQ   rP   r2   rJ   )r   lenr   rR   r   r   rW   catexpandrH   rV   jitis_scriptingr   r   r   appendr   r   dynamic_feat_sizerS   rT   
contiguouslistzip)rB   r^   r  r  r   r  r  r  rS   intermediatestake_indices	max_indexr_   _heightwidthrH   r   r   blkyr   HWs                           rE   forward_intermediates-VisionTransformerRelPos.forward_intermediates  sf   . _,Y.YY,&"6s4;;7G"Q  ggfQ>>%		4>>00RDaHaPA;?;N;N;Z,,557`d99!!##:[[F[[)a-0F'FAs&&uyy/E/E/G/GsAnE.9L $$TTYYq\qA ( !!ERS]Qq!D$:$:"::;]MSDQRMqQq$"8"8"99:MMR##55voFDAq^kl^kYZQYYq!Q3;;Aq!QGRRT^kMlyy%%'',@ ]M!BCM  IIaL TR ms   6KK&>K
prune_norm
prune_headc                    [        [        U R                  5      U5      u  pEU R                  SUS-    U l        U(       a  [        R                  " 5       U l        U(       a,  [        R                  " 5       U l        U R                  SS5        U$ )z?Prune layers not required for specified intermediates.
        Nr   r   r   )r   r  r   r9   r<   r   r   r  )rB   r  r1  r2  r&  r'  s         rE   prune_intermediate_layers1VisionTransformerRelPos.prune_intermediate_layers  sh     #7s4;;7G"Qkk.9q=1DI;;=DL!!!R(rG   c                    U R                  U5      nU R                  b?  [        R                  " U R                  R	                  UR
                  S   SS5      U4SS9nU R                  b  U R                  R                  5       OS nU R                   HI  nU R                  (       a/  [        R                  R                  5       (       d  [        X1US9nMC  U" XS9nMK     U R                  U5      nU$ )Nr   rO   r   rQ   rP   )r   r   rW   r  r  rR   rH   rV   r   r   r  r  r   r   )rB   r^   rH   r+  s       rE   forward_features(VisionTransformerRelPos.forward_features  s    Q>>%		4>>00RDaHaPA;?;N;N;Z,,557`d;;C&&uyy/E/E/G/GsnE9	 
 IIaLrG   
pre_logitsc                    U R                   (       a;  U R                   S:X  a"  US S 2U R                  S 24   R                  SS9OUS S 2S4   nU R                  U5      nU R	                  U5      nU(       a  U$ U R                  U5      $ )Nr   r   rQ   r   )r   r   meanr   r   r   )rB   r^   r9  s      rE   forward_head$VisionTransformerRelPos.forward_head  s~    =A=M=MQV=V!T++,,-22q29\]^_ab^b\cALLONN1q0DIIaL0rG   c                 J    U R                  U5      nU R                  U5      nU$ ri   )r7  r<  )rB   r^   s     rE   rg   VisionTransformerRelPos.forward  s'    !!!$a rG   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rH   r   )r   N)r   TF)T)NNN)NFFFr  F)r   FT)'rj   rk   rl   rm   __doc__r   ry   r   rq   r   r	   rs   rn   r   strr   r9   rr   r   r6   r   r   rW   r  ignorer  r  r  r  r  rt   r   r/  r4  r7  r<  rg   ru   rv   rw   s   @rE   r   r      s    5868#>C !!!+/ %! %)-#(!$&$&$&GN"+5.2-1(3=D1CsCx01D1 c5c?23D1 	D1
 D1 !!:;D1 D1 D1 D1 D1 D1 D1 "%D1 D1 D1  !D1" "##D1$ !%D1& 'D1( ")D1* "+D1, "-D1. !!CD/D10 1D12 bii3D14 !+5D16  	*7D18 299o9D1 D1L1# # # #& YY  YY
 
 YY) ) YY		  gC ghsm g 8<).$$',B ||B  eCcN34B  #'	B 
 B  B  B  !%B  
tELL!5tELL7I)I#JJ	KB L ./$#	3S	>*  	"1$ 1 rG   c           	      `    UR                  SS5      n[        [        X4S[        USS90UD6nU$ )Nout_indicesr2   feature_cfggetter)rE  feature_cls)popr   r   r   )variant
pretrainedkwargsrE  models        rE   !_create_vision_transformer_relposrN    sA    **]A.K [hG E
 LrG   r   c                 4    U SSS SSS[         [        SSSS	.UE$ )
Nr   )r2   r   r   g?bicubicTzpatch_embed.projr   z
apache-2.0)urlr   
input_size	pool_sizecrop_pctinterpolationfixed_input_sizer;  r   
first_conv
classifierlicenser   )rQ  rL  s     rE   _cfgrZ  $  s5    =t'0F(  rG   zhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_replos_base_patch32_plus_rpn_256-sw-dd486f51.pthztimm/)r2      r[  )rQ  	hf_hub_idrR  )r2      r]  )rQ  rR  zhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_relpos_small_patch16_224-sw-ec2778b4.pth)rQ  r\  zhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_relpos_medium_patch16_224-sw-11c174af.pthzhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_relpos_base_patch16_224-sw-49049aed.pthzhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_srelpos_small_patch16_224-sw-6cdb8849.pthzhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_srelpos_medium_patch16_224-sw-ad702b8c.pthzhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_relpos_medium_patch16_cls_224-sw-cfe8e259.pthzhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_relpos_base_patch16_gapcls_224-sw-1a341d6c.pthzhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_relpos_medium_patch16_rpn_224-sw-5d2befd8.pth)z,vit_relpos_base_patch32_plus_rpn_256.sw_in1kz*vit_relpos_base_patch16_plus_240.untrainedz$vit_relpos_small_patch16_224.sw_in1kz%vit_relpos_medium_patch16_224.sw_in1kz#vit_relpos_base_patch16_224.sw_in1kz%vit_srelpos_small_patch16_224.sw_in1kz&vit_srelpos_medium_patch16_224.sw_in1kz)vit_relpos_medium_patch16_cls_224.sw_in1kz)vit_relpos_base_patch16_cls_224.untrainedz*vit_relpos_base_patch16_clsgap_224.sw_in1kz*vit_relpos_small_patch16_rpn_224.untrainedz)vit_relpos_medium_patch16_rpn_224.sw_in1kz)vit_relpos_base_patch16_rpn_224.untrainedr   c           	      X    [        SSSS[        S9n[         SSU 0[        U40 UD6D6nU$ )z_ViT-Base (ViT-B/32+) w/ relative log-coord position and residual post-norm, no class token
          r      )r   r   r   r'   r   rK  )$vit_relpos_base_patch32_plus_rpn_256r   r   rN  rK  rL  
model_argsrM  s       rE   rb  rb  X  sG     s"UghJ-.e;EeIMjIc\bIceELrG   c           	      N    [        SSSSS9n[         SSU 0[        U40 UD6D6nU$ )zHViT-Base (ViT-B/16+) w/ relative log-coord position, no class token
    r   r`  r   ra  )r   r   r   r'   rK  ) vit_relpos_base_patch16_plus_240r   rN  rd  s       rE   rg  rg  b  sD     s"KJ-*a7AaEI*E_X^E_aELrG   c           	      R    [        SSSSSSS9n[         S	SU 0[        U40 UD6D6nU$ )
GViT-Base (ViT-B/16) w/ relative log-coord position, no class token
    r     r      FTr   r   r   r'   r(   r   rK  )vit_relpos_small_patch16_224rh  rd  s       rE   rn  rn  l  sJ     s"TYcghJ-&]3=]AEjA[TZA[]ELrG   c           	      R    [        SSSSSSS9n[         S	SU 0[        U40 UD6D6nU$ )
rj  r      r   r$   FTrm  rK  )vit_relpos_medium_patch16_224rh  rd  s       rE   rq  rq  v  sM     B!eUY[J-'^4>^BFzB\U[B\^ELrG   c           	      R    [        SSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	rj  r   r   r   FTrm  rK  )vit_relpos_base_patch16_224rh  rd  s       rE   rs  rs    sM     B"uVZ\J-%\2<\@DZ@ZSY@Z\ELrG   c                 V    [        SSSSSSSSS9n[         S	SU 0[        U40 UD6D6nU$ )
NViT-Base (ViT-B/16) w/ shared relative log-coord position, no class token
    r   rk  r   rl  FTr   r   r   r'   r(   r   r   rH   rK  )vit_srelpos_small_patch16_224rh  rd  s       rE   rw  rw    sS     B!eUZ.J .'^4>^BFzB\U[B\^ELrG   c                 V    [        SSSSSSSSS9n[         S	SU 0[        U40 UD6D6nU$ )
ru  r   rp  r   r$   FTrv  rK  )vit_srelpos_medium_patch16_224rh  rd  s       rE   ry  ry    sS     B!eUZ.J .(_5?_CG
C]V\C]_ELrG   c                 X    [        SSSSSSSSSS	9	n[         SS
U 0[        U40 UD6D6nU$ )zLViT-Base (ViT-M/16) w/ relative log-coord position, class token present
    r   rp  r   r$   Fr[  Tr   )	r   r   r   r'   r(   r   r   r   r   rK  )!vit_relpos_medium_patch16_cls_224rh  rd  s       rE   r{  r{    sV     B!eUZTw@J .+b8BbFJ:F`Y_F`bELrG   c           
      T    [        SSSSSSSS9n[         S	SU 0[        U40 UD6D6nU$ )
zLViT-Base (ViT-B/16) w/ relative log-coord position, class token present
    r   r   r   FTr   )r   r   r   r'   r(   r   r   rK  )vit_relpos_base_patch16_cls_224rh  rd  s       rE   r}  r}    sP     B"uZ^lsuJ-)`6@`DHD^W]D^`ELrG   c           
      T    [        SSSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	a  ViT-Base (ViT-B/16) w/ relative log-coord position, class token present
NOTE this config is a bit of a mistake, class token was enabled but global avg-pool w/ fc-norm was not disabled
Leaving here for comparisons w/ a future re-train as it performs quite well.
r   r   r   FT)r   r   r   r'   r(   r   r   rK  )"vit_relpos_base_patch16_clsgap_224rh  rd  s       rE   r  r    sP     B"uVZhlnJ-,c9CcGKJGaZ`GacELrG   c           	      Z    [        SSSSS[        S9n[         SSU 0[        U40 UD6D6nU$ )	^ViT-Base (ViT-B/16) w/ relative log-coord position and residual post-norm, no class token
    r   rk  r   rl  Fr   r   r   r'   r(   r   rK  ) vit_relpos_small_patch16_rpn_224rc  rd  s       rE   r  r    sM     B!eVhjJ-*a7AaEI*E_X^E_aELrG   c           	      Z    [        SSSSS[        S9n[         SSU 0[        U40 UD6D6nU$ )	r  r   rp  r   r$   Fr  rK  )!vit_relpos_medium_patch16_rpn_224rc  rd  s       rE   r  r    sM     B!eVhjJ-+b8BbFJ:F`Y_F`bELrG   c           	      Z    [        SSSSS[        S9n[         SSU 0[        U40 UD6D6nU$ )r  r   r   r   Fr  rK  )vit_relpos_base_patch16_rpn_224rc  rd  s       rE   r  r    sM     B"uWikJ-)`6@`DHD^W]D^`ELrG   r@  )r   )ErA  loggingr   	functoolsr   typingr   r   r   r   r   r	   ImportErrortyping_extensionsrW   torch.nnr9   	torch.jitr
   	timm.datar   r   timm.layersr   r   r   r   r   r   r   r   r   _builderr   	_featuresr   _manipulater   r   	_registryr   r   vision_transformerr   __all__	getLoggerrj   _loggerrr   r!   ry   r   r   rN  rZ  default_cfgsrb  rg  rn  rq  rs  rw  ry  r{  r}  r  r  r  r  r4   rG   rE   <module>r     s      5 5*    E
 
 
 + + 0 < 4$
%


H
%Bbii BJ3")) 3l: :zsbii sl		 %48 X 5" 372-2X,0 P- .2 Q. ,0 O, .2 Q. /3 R/ 26 U2 2626 V3 37&15 U2 26I%& %P H_   D[   @W   AX   ?V   AX   BY   E\   CZ   	F] 	 	 D[   E\   CZ  c  *))*s   G8 8HH