
    RЦil                    "   S r SSKrSSKrSSKJr  SSKJrJrJrJ	r	J
r
JrJrJr  SSKrSSKJr  SSKJs  Jr  SSKJrJrJrJr  SSKJrJrJrJrJrJrJ r J!r!J"r"J#r#J$r$J%r%J&r&J'r'J(r(J)r)J*r*J+r+J,r,J-r-  SSK.J/r/  SS	K0J1r1  SS
K2J3r3  SSK4J5r5J6r6  S/r7 " S S\Rp                  5      r9 " S S\Rp                  5      r: " S S\Rp                  5      r; " S S\Rp                  5      r< SS\\=\R|                  4   S\Rp                  S\=S\\=\R|                  4   4S jjr?  SS\\=\R|                  4   S\Rp                  S\=S\@S\\=\R|                  4   4
S jjrASS\=S\@S\<4S jjrBSS\=S\\=\4   4S  jjrCSS\=S\\=\4   4S! jjrDSS\=S\\=\4   4S" jjrE\5" 0 S#\C" S$S%9_S&\C" S$S'S(S)S*9_S+\C" S$\\S'S(S)S,9_S-\C" S$\\S.S(S)S,9_S/\C" S$S0S(S)S*9_S1\C" S$S0S(S)S*9_S2\C" S$S0S(S)S*9_S3\C" S$S'S(S49_S5\C" S$S'S(S49_S6\C" S$S0S(S49_S7\C" S$S0S(S49_S8\C" S$S0S(S49_S9\C" S$S0S(S)S:S;9_S<\C" S$S0S(S)S:S;9_S=\C" S$S0S(S)S:S;9_S>\C" S$SS?9_S@\C" S$SS?9_0 SA\C" S$SS?9_SB\C" S$SS?9_SC\C" S$SS?9_SD\C" S$SES?9_SF\C" S$SES?9_SG\C" S$SHS?9_SI\C" S$SJS?9_SK\C" S$S'S(SJSL9_SM\C" S$SES?9_SN\C" S$SES?9_SO\C" SSP9_SQ\C" S$SRSSSTSTSU9_SV\C" S$SRSSSTSTSU9_SW\C" S$SRSSS49_SX\C" S$SRSSSTSTSU9_SY\D" S$SZSHS[9_S\\D" S$SZSHS[9_E0 S]\D" S$S^SES[9_S_\D" S$S'SES[9_S`\D" S$S0SaS[9_Sb\D" S$S0SS[9_Sc\D" S$S0SS[9_Sd\D" S$S0SS[9_Se\D" S$S0SS[9_Sf\D" S$SgSS[9_Sh\D" S$SgSS[9_Si\D" S$SgSS[9_Sj\D" S$S0SS[9_Sk\D" S$S0SS[9_Sl\C" S$\\SmSn9_So\C" S$\\SmSn9_Sp\C" S$\\SmSn9_Sq\C" S$\\SmSn9_Sr\C" S$\\SmSn9_E0 Ss\C" S$\\SmSn9_St\C" S$\\SmSn9_Su\C" S$\\SmSn9_Sv\C" S$\\SmSn9_Sw\C" S$\\SmSn9_Sx\C" S$\\SmSn9_Sy\C" S$\\SmSn9_Sz\E" S$S%9_S{\E" S$S%9_S|\E" S$S%9_S}\E" S$S%9_S~\E" S$S%9_S\E" S$S%9_S\E" S$S%9_S\E" S$S%9_S\E" S$SSS9_S\E" S$SSS9_E\E" S$S%9\E" S$S%9\E" S$S%9\E" S$SSS9S.E5      rF\6SS\@S\<4S jj5       rG\6SS\@S\<4S jj5       rH\6SS\@S\<4S jj5       rI\6SS\@S\<4S jj5       rJ\6SS\@S\<4S jj5       rK\6SS\@S\<4S jj5       rL\6SS\@S\<4S jj5       rM\6SS\@S\<4S jj5       rN\6SS\@S\<4S jj5       rO\6SS\@S\<4S jj5       rP\6SS\@S\<4S jj5       rQ\6SS\@S\<4S jj5       rR\6SS\@S\<4S jj5       rS\6SS\@S\<4S jj5       rT\6SS\@S\<4S jj5       rU\6SS\@S\<4S jj5       rV\6SS\@S\<4S jj5       rW\6SS\@S\<4S jj5       rX\6SS\@S\<4S jj5       rY\6SS\@S\<4S jj5       rZ\6SS\@S\<4S jj5       r[\6SS\@S\<4S jj5       r\\6SS\@S\<4S jj5       r]\6SS\@S\<4S jj5       r^\6SS\@S\<4S jj5       r_\6SS\@S\<4S jj5       r`\6SS\@S\<4S jj5       ra\6SS\@S\<4S jj5       rb\6SS\@S\<4S jj5       rc\6SS\@S\<4S jj5       rd\6SS\@S\<4S jj5       re\6SS\@S\<4S jj5       rf\6SS\@S\<4S jj5       rg\6SS\@S\<4S jj5       rh\6SS\@S\<4S jj5       ri\6SS\@S\<4S jj5       rj\6SS\@S\<4S jj5       rk\6SS\@S\<4S jj5       rl\6SS\@S\<4S jj5       rm\6SS\@S\<4S jj5       rn\6SS\@S\<4S jj5       ro\6SS\@S\<4S jj5       rp\6SS\@S\<4S jj5       rq\6SS\@S\<4S jj5       rr\6SS\@S\<4S jj5       rs\6SS\@S\<4S jj5       rt\6SS\@S\<4S jj5       ru\6SS\@S\<4S jj5       rv\6SS\@S\<4S jj5       rw\6SS\@S\<4S jj5       rx\6SS\@S\<4S jj5       ry\6SS\@S\<4S jj5       rz\6SS\@S\<4S jj5       r{\6SS\@S\<4S jj5       r|\6SS\@S\<4S jj5       r}g)aJ  EVA

EVA ViT from https://github.com/baaivision/EVA , paper: https://arxiv.org/abs/2211.07636

This file contains a number of ViT variants the utilise ROPE position embeddings, SwiGLU and other additions:
 * EVA & EVA02 model implementations that evolved from BEiT, additional models in vision_transformer.py.
 * `timm` original SBB ViT w/ ROPE position embeddings
 * Perception Encoder (PE) ViT from Meta (https://arxiv.org/abs/2504.13181)
 * ROPE-ViT from Naver AI (https://arxiv.org/abs/2403.13298)
 * DINOv3 from META AI Research (https://arxiv.org/abs/2508.10104)

@article{EVA,
  title={EVA: Exploring the Limits of Masked Visual Representation Learning at Scale},
  author={Fang, Yuxin and Wang, Wen and Xie, Binhui and Sun, Quan and Wu, Ledell and Wang, Xinggang and Huang,
  Tiejun and Wang, Xinlong and Cao, Yue},
  journal={arXiv preprint arXiv:2211.07636},
  year={2022}
}

EVA-02: A Visual Representation for Neon Genesis - https://arxiv.org/abs/2303.11331
@article{EVA02,
  title={EVA-02: A Visual Representation for Neon Genesis},
  author={Fang, Yuxin and Sun, Quan and Wang, Xinggang and Huang, Tiejun and Wang, Xinlong and Cao, Yue},
  journal={arXiv preprint arXiv:2303.11331},
  year={2023}
}

@article{bolya2025perception,
  title={Perception encoder: The best visual embeddings are not at the output of the network},
  author={Bolya, Daniel and Huang, Po-Yao and Sun, Peize and Cho, Jang Hyun and Madotto, Andrea and Wei, Chen and Ma,
    Tengyu and Zhi, Jiale and Rajasegaran, Jathushan and Rasheed, Hanoona and others},
  journal={arXiv preprint arXiv:2504.13181},
  year={2025}
}

@inproceedings{heo2024rotary,
  title={Rotary position embedding for vision transformer},
  author={Heo, Byeongho and Park, Song and Han, Dongyoon and Yun, Sangdoo},
  booktitle={European Conference on Computer Vision},
  pages={289--305},
  year={2024},
  organization={Springer}
}

@article{simeoni2025dinov3,
  title={{DINOv3}},
  author={Sim{'e}oni, Oriane and Vo, Huy V. and Seitzer, Maximilian and Baldassarre, Federico and Oquab, Maxime
    and Jose, Cijo and Khalidov, Vasil and Szafraniec, Marc and Yi, Seungeun and Ramamonjisoa, Micha{"e}l
    and Massa, Francisco and Haziza, Daniel and Wehrstedt, Luca and Wang, Jianyuan and Darcet, Timoth{'e}e
    and Moutakanni, Th{'e}o and Sentana, Leonel and Roberts, Claire and Vedaldi, Andrea and Tolan, Jamie
    and Brandt, John and Couprie, Camille and Mairal, Julien and J{'e}gou, Herv{'e} and Labatut, Patrick
    and Bojanowski, Piotr},
  year={2025},
  eprint={2508.10104},
  url={https://arxiv.org/abs/2508.10104},
}

DINOv3 code was a modification of existing EVA model and support modules, so licensed under Apache-2.0 like timm.
Weights from META remain under DINOv3 License (https://ai.meta.com/resources/models-and-libraries/dinov3-license/).

Modifications by / Copyright 2023 Ross Wightman, original copyrights below
    N)partial)AnyCallableDictListOptionalSetTupleUnion)IMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STDOPENAI_CLIP_MEANOPENAI_CLIP_STD)
PatchEmbedMlpGluMlpSwiGLU	LayerNormDropPathcalculate_drop_path_ratesPatchDropoutWithIndicescreate_rope_embedapply_rot_embed_catapply_keep_indices_nlctrunc_normal_resample_patch_embedresample_abs_pos_embedglobal_pool_nlc	to_2tupleuse_fused_attnmaybe_add_maskAttentionRopeAttentionPoolLatent   )build_model_with_cfg)feature_take_indices)
checkpoint)generate_default_cfgsregister_modelEvac                   6  ^  \ rS rSr% Sr\R                  R                  \   \	S'                 SS\
S\
S\S\S\S	\
S
\S\S\\
   S\\   S\S\S\4U 4S jjjrSS jrSS jr  SS\\R"                     S\\R"                     4S jjrSS jrSrU =r$ )EvaAttentionh   zFEVA Attention with ROPE, no k-bias, and fused/unfused qkv options
    
fused_attndim	num_headsqkv_bias	qkv_fusedqkv_bias_separatenum_prefix_tokens	attn_drop	proj_dropattn_head_dim
norm_layerqk_norm
scale_normrotate_halfc                   > XS.n[         TU ]  5         U(       d  U(       a
  U
c   S5       eX l        X-  nU	b  U	nUU R                  -  nUS-  U l        X`l        [        5       U l        XPl        Xl        U(       a  [        R                  " UUS-  4SS0UD6U l        S=U l        =U l        U l        U(       a  [        R                  " [         R"                  " U40 UD65      U l        U R'                  S[         R"                  " U40 UD6SS	9  [        R                  " [         R"                  " U40 UD65      U l        OS=U l        =U l        U l        Oy[        R                  " UU4SU0UD6U l        [        R                  " UU4SS0UD6U l        [        R                  " UU4SU0UD6U l        SU l        S=U l        =U l        U l        U(       a  U
" U R,                  40 UD6O[        R.                  " 5       U l        U(       a  U
" U R,                  40 UD6O[        R.                  " 5       U l        [        R4                  " U5      U l        U(       a	  U
" U40 UD6O[        R.                  " 5       U l        [        R                  " UU40 UD6U l        [        R4                  " U5      U l        U R?                  5         g)
a  
Args:
    dim: Input dimension of the token embeddings
    num_heads: Number of attention heads
    qkv_bias: Whether to add a bias term to the query, key, and value projections
    qkv_fused: Whether qkv projections are fused into one projection or separate
    qkv_bias_separate: Whether to apply bias to qkv as a separate addition or part of F.linear() call
    num_prefix_tokens: Number of reg/cls tokens at the beginning of the sequence that
        should not have position embeddings applied
    attn_drop: Dropout rate for attention weights
    proj_drop: Dropout rate for the output projection
    attn_head_dim: Dimension of each attention head (if None, computed as dim // num_heads)
    norm_layer: Normalization layer constructor to use for QK and scale normalization
    qk_norm: Enable normalization of query (Q) and key (K) vectors with norm_layer
    scale_norm: Enable normalization (scaling) of attention output with norm_layer
    rotate_half: Use half rotation layout instead of interleaved
devicedtypeNz<norm_layer must be provided if qk_norm or scale_norm is Trueg         biasFk_bias)
persistent) super__init__r0   scaler4   r    r.   r3   r;   nnLinearqkvq_projk_projv_proj	Parametertorchemptyq_biasregister_bufferv_biasrB   head_dimIdentityq_normk_normDropoutr5   normprojr6   reset_parameters)selfr/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r>   r?   ddrS   attn_dim	__class__s                      N/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/models/eva.pyrE   EvaAttention.__init__m   s<   F /)i+ii)"#$$Hdnn,%
!2(*!2&yyhlEE"EDH6::DK:$+ ll5;;x+F2+FG$$Xu{{8/Jr/JW\$] ll5;;x+F2+FG:>>>dkDK))CGGBGDK))CDDDDK))CGGBGDKDH6::DK:$+9@j5"5bkkm9@j5"5bkkmI.2<Jx.2."++-	IIh2r2	I. 	    c                     U R                   bR  [        R                  R                  U R                   5        [        R                  R                  U R                  5        U R                  5         g)z"Initialize parameters and buffers.N)rP   rG   initzeros_rR   _init_buffersr[   s    r_   rZ   EvaAttention.reset_parameters   s@    ;;"GGNN4;;'GGNN4;;'ra   c                 T    U R                   b  U R                   R                  5         gg)z.Compute and fill non-persistent buffer values.N)rB   zero_rf   s    r_   re   EvaAttention._init_buffers   s!    ;;"KK #ra   rope	attn_maskc                    UR                   u  pEnU R                  b  U R                  c  U R                  U5      nO[        R                  " U R                  U R
                  U R                  45      nU R                  (       a  U R                  U5      nXx-  nO)[        R                  " XR                  R                  US9nUR                  XESU R                  S5      R                  SSSSS5      nUR                  S5      u  pnOU R                  U5      R                  XEU R                  S5      R!                  SS5      n	U R#                  U5      R                  XEU R                  S5      R!                  SS5      n
U R%                  U5      R                  XEU R                  S5      R!                  SS5      nU R'                  U	5      U R)                  U
5      pUb  U R*                  n[-        U S	S
5      n[        R                  " U	SS2SS2SU2SS24   [/        U	SS2SS2US2SS24   X-S9/SS9R1                  U5      n	[        R                  " U
SS2SS2SU2SS24   [/        U
SS2SS2US2SS24   X-S9/SS9R1                  U5      n
U R2                  (       a?  [        R4                  " XUUU R6                  (       a  U R8                  R:                  OSS9nOQXR<                  -  n	XR!                  SS5      -  n[?        X5      nURA                  SS9nU R9                  U5      nX-  nUR!                  SS5      R                  XEU5      nU RC                  U5      nU RE                  U5      nU RG                  U5      nU$ )aY  Forward pass for the attention module.

Args:
    x: Input tensor of shape (batch_size, sequence_length, embedding_dim)
    rope: Rotary position embeddings tensor for position-aware attention
    attn_mask: Optional attention mask to apply during attention computation

Returns:
    Tensor of shape (batch_size, sequence_length, embedding_dim)
N)weightrA   r@      r   r$      r;   F)halfr/           )rl   	dropout_p)$shaperI   rP   rN   catrB   rR   r3   Flinearrn   reshaper0   permuteunbindrJ   	transposerK   rL   rU   rV   r4   getattrr   type_asr.   scaled_dot_product_attentiontrainingr5   prF   r!   softmaxrX   rY   r6   )r[   xrk   rl   BNCrI   r1   qkvnptrr   attns                  r_   forwardEvaAttention.forward   s     ''a88{{"hhqk 99dkk4;;%LM))((1+COC((1XX__8LC++aAt~~r:BB1aAqQCjjmGA!A&&qT^^R@JJ1aPAA&&qT^^R@JJ1aPAA&&qT^^R@JJ1aPA{{1~t{{1~1((C46D		1Q4C4]+-@1aq=AQSW-cdjklttuvwA		1Q4C4]+-@1aq=AQSW-cdjklttuvwA??..a#.2mm$..**A JJAB++D!$2D<<B<'D>>$'DAKK1%%aA.IIaLIIaLNN1ra   c                 $    U R                  5         g)z"Initialize non-persistent buffers.N)re   rf   s    r_   init_non_persistent_buffers(EvaAttention.init_non_persistent_buffers  s    ra   )r5   r.   rB   rV   rK   rX   r0   r4   rY   r6   rP   rU   rJ   rI   r3   r;   rF   rR   rL   )   TTFr$   rt   rt   NNFTFNNreturnNNN)__name__
__module____qualname____firstlineno____doc__rN   jitFinalbool__annotations__intfloatr   r   rE   rZ   re   Tensorr   r   __static_attributes____classcell__r^   s   @r_   r,   r,   h   s2   		%%
 !"&+%&!!+/-1!# %!I I  I  	I 
 I   $I   #I  I  I  $C=I  !*I  I  I  I  I V  ,004	> 5<<(>  -	>@ ra   r,   c            (       J  ^  \ rS rSrSSSSSSSSSSSSSS	\R
                  \S	S	S	4S
\S\S\S\S\	S\S\S\S\S\S\
S\S\	S\	S\	S\\	   S\S\S\\   4&U 4S jjjrS%S jr  S&S \R                   S!\\R                      S"\\R                      S\R                   4S# jjrS$rU =r$ )'EvaBlocki	  T      @Fr   r$   evart   Nr/   r0   r1   r2   	mlp_ratio
swiglu_mlpswiglu_align_to	scale_mlpscale_attn_innerr4   	attn_typer;   r6   r5   	drop_pathinit_values	act_layerr8   r7   c                   > UUS.n[         TU ]  5         U" U40 UD6U l        US:X  a  [        O[        nU" U4UUUU
UUUUU	US.
UD6U l        UU l        Ub+  [        R                  " [        R                  " U40 UD65      OSU l        US:  a  [        U5      O[        R                  " 5       U l        U" U40 UD6U l        [!        X-  5      nU(       a`  U(       d  U(       a   [#        SUUU(       a  UOSUUS.UD6U l        OQ['        SUUS-  U(       a  UOS[        R(                  SUS	.UD6U l        O[+        SUUUU(       a  UOSUS
.UD6U l        Ub+  [        R                  " [        R                  " U40 UD65      OSU l        US:  a  [        U5      O[        R                  " 5       U l        U R1                  5         g)a>  Initialize the EVA transformer block.

Args:
  dim: Input dimension of the token embeddings
    num_heads: Number of attention heads
    qkv_bias: Whether to use bias terms in query, key, value projections
    qkv_fused: Whether to use a single projection for query, key, value
    mlp_ratio: Ratio of MLP hidden dimension to input dimension
    swiglu_mlp: Whether to use SwiGLU activation in the MLP
    scale_mlp: Whether to use normalization in the MLP
    scale_attn_inner: Whether to use normalization within the attention mechanism
    num_prefix_tokens: Number of tokens at the beginning of the sequence (class tokens, etc.)
    attn_type: Type of attention module to use ('eva' or 'rope')
    proj_drop: Dropout rate for projection layers
    attn_drop: Dropout rate for attention matrix
    drop_path: Stochastic depth rate
    init_values: Initial value for LayerScale, None = no LayerScale
    act_layer: Activation layer constructor
    norm_layer: Normalization layer constructor
    attn_head_dim: Dimension of each attention head (if None, computed as dim // num_heads)
r=   rk   
r0   r1   r2   r4   r5   r6   r7   r8   r:   r;   Nrt   in_featureshidden_featuresr8   dropalign_torp   Fr   r   r8   r   	gate_lastr   r   r   r   r8   r    )rD   rE   norm1r"   r,   r   r   rG   rM   rN   rO   gamma_1r   rT   
drop_path1norm2r   r   mlpr   SiLUr   gamma_2
drop_path2rZ   )r[   r/   r0   r1   r2   r   r   r   r   r   r4   r   r;   r6   r5   r   r   r   r8   r7   r>   r?   kwargsr\   attn_clsr   r^   s                             r_   rE   EvaBlock.__init__  s   \ /*r*
$-$7=\
/'!'#
 
	 '?J?Vr||EKK$:r$:;\`1:R(9-R[[]*r*
co.O!  #$3-6zD",  "  #$3a$7-6zD gg#"    /#)2: DH @K?Vr||EKK$:r$:;\`1:R(9-R[[] 	ra   r   c                     U R                   bi  [        R                  R                  U R                   U R                  5        [        R                  R                  U R
                  U R                  5        gg)zInitialize parameters.N)r   rG   rc   	constant_r   r   rf   s    r_   rZ   EvaBlock.reset_parametersw  sM    <<#GGdllD,<,<=GGdllD,<,<= $ra   r   rk   rl   c           
         U R                   cc  XR                  U R                  U R                  U5      X#S95      -   nXR	                  U R                  U R                  U5      5      5      -   nU$ XR                  U R                   U R                  U R                  U5      X#S9-  5      -   nXR	                  U R                  U R                  U R                  U5      5      -  5      -   nU$ N)rk   rl   )r   r   r   r   r   r   r   r   r[   r   rk   rl   s       r_   r   EvaBlock.forward}  s     <<OODIIdjjm$I$\]]AOODHHTZZ]$;<<A  OODLL499TZZ]QU93k$kllAOODLL488DJJqM3J$JKKAra   )	r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   rG   GELUr   r   r   r   strr   r   rE   rZ   rN   r   r   r   r   r   s   @r_   r   r   	  s}    ""!$#$#%*%&" %!!!+/"$''#,+/-j j  j  	j 
 j  j  j  !j  j  #j   #j  j  j  j  j   !j " "%#j $  %j & !'j ( $C=)j  j X> ,004	|| 5<<(  -	
 
 ra   r   c            (       X  ^  \ rS rSrSrSSSSSSSSSSS	S	S	S
\R                  \R                  S
S
S
4S\S\S\	S\	S\
S\S\	S\	S\S\	S\	S\S\
S\
S\
S\\
   S\S\S\\   4&U 4S jjjr  S%S\R                   S \\R                      S!\\R                      S"\R                   4S# jjrS$rU =r$ )&EvaBlockPostNormi  zEEVA block w/ post-norm and support for swiglu, MLP norm scale, ROPE. Tr   r   Fr   r$   rt   Nr/   r0   r1   r2   r   r   r;   r   r   r   r   r4   r6   r5   r   r   r   r8   r7   c                   > UUS.n[         TU ]  5         US:X  a  [        O[        nU" U4UUUUUUUUUUS.
UD6U l        U" U40 UD6U l        US:  a  [        U5      O[        R                  " 5       U l	        [        X-  5      nU(       aY  U
(       a   [        SUUU
(       a  UOSUU	S.UD6U l        OQ[        SUUS-  U
(       a  UOS[        R                  SUS	.UD6U l        O[        SUUUU
(       a  UOSUS
.UD6U l        U" U40 UD6U l        US:  a  [        U5      U l        g[        R                  " 5       U l        g)al  Initialize the post-norm EVA transformer block.

Args:
  dim: Input dimension of the token embeddings
    num_heads: Number of attention heads
    qkv_bias: Whether to use bias terms in query, key, value projections
    qkv_fused: Whether to use a single projection for query, key, value
    mlp_ratio: Ratio of MLP hidden dimension to input dimension
    swiglu_mlp: Whether to use SwiGLU activation in the MLP
    scale_mlp: Whether to use normalization in the MLP
    scale_attn_inner: Whether to use normalization within the attention mechanism
    num_prefix_tokens: Number of tokens at the beginning of the sequence (class tokens, etc.)
    attn_type: Type of attention module to use ('eva' or 'rope')
    proj_drop: Dropout rate for projection layers
    attn_drop: Dropout rate for attention matrix
    drop_path: Stochastic depth rate
    init_values: Initial value for LayerScale, None = no LayerScale (NOTE: ignored for post-norm block)
    act_layer: Activation layer constructor
    norm_layer: Normalization layer constructor
    attn_head_dim: Dimension of each attention head (if None, computed as dim // num_heads)
r=   rk   r   rt   Nr   rp   Fr   r   r   )rD   rE   r"   r,   r   r   r   rG   rT   r   r   r   r   r   r   r   r   r   )r[   r/   r0   r1   r2   r   r   r;   r   r   r   r   r4   r6   r5   r   r   r   r8   r7   r>   r?   r   r\   r   r   r^   s                             r_   rE   EvaBlockPostNorm.__init__  so   \ /$-$7=\
/'!'#
 
	  *r*
1:R(9-R[[]co.!  #$3-6zD",  "  #$3a$7-6zD gg#"    /#)2: DH  *r*
1:R(9-R[[]ra   r   rk   rl   r   c                     XR                  U R                  U R                  XUS95      5      -   nXR                  U R	                  U R                  U5      5      5      -   nU$ r   )r   r   r   r   r   r   r   s       r_   r   EvaBlockPostNorm.forward  sR     

499QY9+W XYY

488A; 788ra   )r   r   r   r   r   r   r   )r   r   r   r   r   rG   r   r   r   r   r   r   r   r   rE   rN   r   r   r   r   r   s   @r_   r   r     s   P
 ""!" %$#$#%*%&!!!+/"$''#%<<+/-dSdS dS 	dS
 dS dS dS dS dS !dS dS #dS  #dS dS dS  !dS" "%#dS$  %dS& !'dS( $C=)dS dSR ,004	|| 5<<(  -	
 
 ra   r   c            [         ^  \ rS rSrSrSSSSSSS	S	S
S
SSSSSSSSSSSS\SS
SSS
SSSSSSSSSSSSSSSSSS4.S\\\\\4   4   S\\\\\4   4   S\S\S\	S\S\S\S\
S\
S\S \
S!\S"\
S#\
S$\	S%\S&\S'\S(\S)\S*\S+\S,\\   S-\
S.\S/\
S0\
S1\
S2\\	   S3\S4\	S5\S6\
S7\
S8\
S9\\
   S:\\
   S;\\   S<\\   S=\
S>\
S?\\\\\4   \4      S@\4XU 4SA jjjrScSB\
4SC jjrSdSE jrScSF\R$                  SB\
SDS4SG jjr\R*                  R,                  SD\\	   4SH j5       r\R*                  R,                  ScSI\
SDS4SJ jj5       r\R*                  R,                  SeSK\
SD\\	\4   4SL jj5       r\R*                  R,                  SD\R$                  4SM j5       rSfS\S\\	   SDS4SN jjr  SgS\\\\4      S\\\\4      SDS4SO jjrSD\\R@                  \\R@                     4   4SP jr!      ShSQ\R@                  SR\\\\"\   4      SS\
ST\
SU\
SV\	SW\
SD\\"\R@                     \\R@                  \"\R@                     4   4   4SX jjr#   SiSR\\\"\   4   SY\
SZ\
4S[ jjr$SfSQ\R@                  S\\\	   SD\R@                  4S] jjr%SQ\R@                  SD\R@                  4S^ jr&SeSQ\R@                  S_\
SD\R@                  4S` jjr'SQ\R@                  SD\R@                  4Sa jr(Sbr)U =r*$ )jr*   i  a  Eva Vision Transformer w/ Abs & Rotary Pos Embed

This class implements the EVA and EVA02 models that were based on the BEiT ViT variant
  * EVA - abs pos embed, global avg pool
  * EVA02 - abs + rope pos embed, global avg pool, SwiGLU, scale Norm in MLP (ala normformer)
      r@     avg      Tr   Fr   r   rt   Nrx   ijg     @gMbP?img_size
patch_sizein_chansnum_classesglobal_pool	embed_dimdepthr0   r1   r2   r   r   r   r   r   r   	drop_ratepos_drop_ratepatch_drop_rateproj_drop_rateattn_drop_ratedrop_path_rater8   r   class_tokennum_reg_tokensno_embed_classuse_abs_pos_embuse_rot_pos_emb	rope_typerope_grid_offsetrope_grid_indexingrope_temperaturerope_rotate_halfuse_post_normuse_pre_transformer_normuse_post_transformer_normuse_fc_normattn_pool_num_headsattn_pool_mlp_ratiodynamic_img_sizedynamic_img_padref_feat_shapehead_init_scalec/                 	  > [         T;U ]  5         U-U.S.n/US;   d   eX@l        X0l        XPl        U=U l        =U l        U l        U(       a  SOSU-   U l        UU l	        U)U l
        SU l        U$n0U&b  U&n1OUS:H  n1U%b  U%n2OU1(       + n20 n3U)(       a  U3R                  [        SSS	95        [        S+UUUUU*U$(       + S
.U3DU/D6U l        U R                  R                   n4[#        U R                  S5      (       a  U R                  R%                  5       OUn5U(       a-  [&        R(                  " [*        R,                  " SSU40 U/D65      OSU l        U(       a-  [&        R(                  " [*        R,                  " SUU40 U/D65      OSU l        U=(       a    U R0                  SL U l        U(       a  U4OU4U R                  -   n6U(       a-  [&        R(                  " [*        R,                  " SU6U40 U/D65      OSU l        [&        R6                  " US9U l        US:  a  [;        UU R                  S9U l        OSU l        SU l        U(       a  U+b  [A        U+5      OSn+[        S+UUU)(       a  SOU R                  RB                  U!U S.U/D6n7US:X  a   U7R                  [        US95        SU l        O US:X  a  U7R                  [        SUU+S95        [E        S+SU0U7D6U l#        OSU l#        U0(       a	  U" U40 U/D6O[&        RH                  " 5       U l%        [M        UU5      n8U#(       a  [N        O[P        n9[&        RR                  " [U        U5       V:s/ s HK  n:U9" S+0 SU_SU_SU	_SU
_SU_SU_SU_SU_SU_SU_SU"_S U R                  _S!U_S"U_S#U8U:   _S$U_S%U_U/D6PMM     sn:5      U l+        [U        U5       V:s/ s H  n:[        S&U: 3UU5S'9PM     sn:U l,        U2(       a	  U" U40 U/D6O[&        RH                  " 5       U l-        US(:X  aA  []        U R                  4U'=(       d    UU(=(       d    UU[&        R^                  S).U/D6U l0        OSU l0        U1(       a	  U" U40 U/D6O[&        RH                  " 5       U l1        [&        R6                  " U5      U l2        US:  a  [&        Rf                  " Xd40 U/D6O[&        RH                  " 5       U l4        U,U l5        U Rm                  SS*9  gs  sn:f s  sn:f ),a  Initialize the EVA Vision Transformer model.

Args:
    img_size: Input image size (single int for square, or tuple for rectangular)
    patch_size: Patch size to divide image into tokens (single int for square, or tuple)
    in_chans: Number of input image channels
    num_classes: Number of classes (output dim) for classification head (final projection), 0 for pass-through
    global_pool: Type of global pooling for final sequence ('avg', 'token', 'map', etc.)
    embed_dim: Embedding dimension for tokens
    depth: Number of transformer blocks
    num_heads: Number of attention heads
    qkv_bias: Enable bias for query, key, value projections
    qkv_fused: Use a single projection for query, key, value
    mlp_ratio: Ratio of mlp hidden dim to embedding dim
    swiglu_mlp: Use SwiGLU activation in MLP
    scale_mlp: Apply scaling normalization in MLP (normformer style)
    scale_attn_inner: Apply scaling normalization inside attention
    attn_type: Type of attention module to use
    drop_rate: Dropout rate after final projection and pooling
    pos_drop_rate: Dropout rate for positional embeddings
    patch_drop_rate: Rate of dropping patches during training
    proj_drop_rate: Dropout rate for projections
    attn_drop_rate: Dropout rate for attention
    drop_path_rate: Stochastic depth rate
    norm_layer: Normalization layer constructor
    init_values: Initial layer-scale values
    class_token: Use class token
    num_reg_tokens: Number of additional learnable 'register' tokens to add to the sequence
    no_embed_class: Don't include position embeddings for class (or reg) tokens
    use_abs_pos_emb: Use absolute (learned) positional embeddings
    use_rot_pos_emb: Use rotary position embeddings
    rope_type: Type of RoPE to use ('cat', 'mixed', 'dinov3', etc.).
    rope_grid_offset: Offset for rotary position embedding grid
    rope_grid_indexing: Indexing mode for rotary position embeddings ('ij' or 'xy')
    rope_temperature: Temperature parameter for ROPE frequency computation
    rope_rotate_half: Use half rotation layout (rotate D/2 dims), else use interleaved rotation layout
    use_post_norm: Use post-norm transformer block type
    use_pre_transformer_norm: Use normalization layer before transformer blocks
    use_post_transformer_norm: Use normalization layer after transformer blocks
    use_fc_norm: Use normalization layer after pooling, before final classifier
    attn_pool_num_heads: Number of heads in attention pooling
    attn_pool_mlp_ratio: MLP ratio in attention pooling
    dynamic_img_size: Support dynamic image sizes in forward pass
    dynamic_img_pad: Apply dynamic padding for irregular image sizes
    ref_feat_shape: Reference feature shape for rotary position embedding scale
    head_init_scale: Initialization scale for classification head weights
r=   ) r   avgmaxmaxtokenmapr$   r   FNr   NHWC)strict_img_size
output_fmt)r   r   r   r   r   rA   
feat_ratio)r   )r4   )r/   r0   
feat_shapetemperaturegrid_indexingmixed)r   Trx   )	in_pixelsgrid_offsetr   r   r/   r0   r1   r2   r   r   r   r   r   r   r;   r4   r6   r5   r   r8   r   blocks.)modulenum_chs	reductionr  )r0   r   r8   r   needs_resetr   )7rD   rE   r   r   r   num_featureshead_hidden_sizer   r4   r   r   grad_checkpointingupdatedictr   patch_embednum_patcheshasattrr
  rG   rM   rN   rO   	cls_token	reg_token	cls_embed	pos_embedrW   pos_dropr   
patch_drop
rope_mixedr   	grid_sizer   rk   rT   norm_prer   r   r   
ModuleListrangeblocksfeature_inforX   r#   r   	attn_poolfc_norm	head_droprH   headr   init_weights)<r[   r   r   r   r   r   r   r   r0   r1   r2   r   r   r   r   r   r   r   r   r   r   r   r   r8   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r>   r?   r\   activate_pre_normactivate_fc_normactivate_post_norm
embed_argsr  rnum_pos_tokensrope_kwargsdprblock_fnir^   s<                                                              r_   rE   Eva.__init__  s   @ 	/JJJJ& &ENNND1DN'2!^!K, 0"' 5"**e3$0!:%5!5
d5VLM% 	
!+--	
 	
 	
 &&22-4T5E5E|-T-TD'')ZdMXekk!Q	&HR&HI^bZhekk!^Y&URT&UVnr$?4)?(6K$J`J`<`Ziekk!^Y&URT&UVos

]3Q5oY]YoYopDO"DO:H:TY~6Z^N  ##349I9I9S9S,0 K G#""4e#45"&e#""4# 0#1$  *MIMMDIDI7H
933bkkm'>'4#(mm* 5\+%#* ")  # " $	
 $ & !0 $ "2 $ - #'"8"8 ) ) a&  &!" (%( "+%# $. QVV[P\^P\1D'!yAFP\^ 4FJy/B/2;;=	%0-:-:%'' DN "DN6Fz)2r2BKKMI.?JQBIIi;;TVT_T_Ta	. 	e,Y%#,^s   
AS 5S%r  c                    U R                  [        U R                  US95        U R                  b  [	        U R                  SS9  U R
                  b  [	        U R
                  SS9  U R                  b  [	        U R                  SS9  U R                  5         U R                  (       a  [        U R                  [        R                  5      (       a  [	        U R                  R                  SS9  [        R                  " 5          U R                  R                  R!                  U R                  5        U R                  R"                  R!                  U R                  5        S S S 5        g g g ! , (       d  f       g = f)Nr  {Gz?std)applyr   _init_weightsr"  r   r  r   fix_init_weightr   
isinstancer/  rG   rH   rn   rN   no_gradmul_rA   )r[   r  s     r_   r0  Eva.init_weights  s    

74--;GH>>%$..c2>>%$..c2>>%$..c2Jtyy"))$D$D$))**4		  %%d&:&:;		##D$8$89 ! %E s   AE..
E<r   c                    [         R                  " 5          [        U R                  5       H  u  p[        R
                  " SUS-   -  5      nUR                  R                  R                  R                  U5        UR                  R                  R                  R                  U5        M     SSS5        g! , (       d  f       g= f)z=Fix initialization weights by rescaling based on layer depth.g       @r$   N)rN   rD  	enumerater*  mathsqrtr   rY   rn   div_r   fc2)r[   layer_idlayerrF   s       r_   rB  Eva.fix_init_weight  sy    ]]_#,T[[#9		#A"67

&&++E2		$$))%0 $: __s   BB88
Cmc                 8   [        U[        R                  5      (       aL  [        UR                  SS9  UR
                  b*  [        R                  R                  UR
                  5        ggU(       a(  [        US5      (       a  XLa  UR                  5         gggg)zInitialize weights for Linear layers and call reset_parameters on modules.

Args:
    m: Module to initialize.
    needs_reset: Whether to call reset_parameters() on modules.
r=  r>  NrZ   )
rC  rG   rH   r   rn   rA   rc   rd   r  rZ   )r[   rP  r  s      r_   rA  Eva._init_weights  sr     a##!((,vv!qvv& "WQ(:;;  AN;[ra   c                     SS1n[        U SS5      =n(       a7  [        US5      (       a&  XR                  5        Vs1 s H  nSU 3iM
     sn-  $ U$ s  snf )z(Parameters to exclude from weight decay.r"  r  rk   Nno_weight_decayzrope.)r   r  rT  )r[   nwdrk   r   s       r_   rT  Eva.no_weight_decay  sc     K(D&$//D/WTCT5U5U/C/C/EF/E!E!+/EFFF
 Gs   Aenablec                     Xl         g)z)Enable or disable gradient checkpointing.N)r  )r[   rW  s     r_   set_grad_checkpointingEva.set_grad_checkpointing  s
     #)ra   coarsec                     [        SSS/S9nU$ )z(Create layer groupings for optimization.z ^cls_token|pos_embed|patch_embed)z^blocks\.(\d+)N)z^norm)i )stemr*  )r  )r[   r[  matchers      r_   group_matcherEva.group_matcher  s!     4-/CD
 ra   c                     U R                   $ N)r/  rf   s    r_   get_classifierEva.get_classifier%  s    yyra   c                     Xl         Ub  X l        US:  a'  [        R                  " U R                  U5      U l        g[        R
                  " 5       U l        g)zsReset the classifier head.

Args:
    num_classes: Number of output classes.
    global_pool: Global pooling type.
Nr   )r   r   rG   rH   r   rT   r/  )r[   r   r   s      r_   reset_classifierEva.reset_classifier)  sB     '"*>IAoBIIdnnk:	SUS^S^S`	ra   c           
      &   U R                   R                  nU R                   R                  XS9  U R                  b  U R                  (       a  SOU R
                  nU R                   R                  U-   nXPR                  R                  S   :w  aD  [        R                  " [        U R                  U R                   R                  UUSS95      U l        U R                  b0  U R                  R                  U R                   R                  5        gg)zUpdate the input image resolution and patch size.

Args:
    img_size: New input resolution, if None current resolution is used.
    patch_size: New patch size, if None existing patch size is used.
)r   r   Nr   r$   T)new_sizeold_sizer4   verbose)r  r&  set_input_sizer"  r   r4   r  rw   rG   rM   r   rk   update_feat_shape)r[   r   r   prev_grid_sizer4   num_new_tokenss         r_   rl  Eva.set_input_size5  s     ))33'''Q>>%%)%8%8d>T>T!--99<MMN!5!5a!88!#.DNN!--77+&7 / " 99 II''(8(8(B(BC !ra   c                    U R                   (       a  UR                  u  p#pEU R                  bK  U R                  R                  n[        U R                  X44UU R                  (       a  SOU R                  S9nOS nUR                  USU5      nU R                  b  U R                  R                  X44S9OS nO5U R                  nU R                  b  U R                  R                  5       OS n/ n	U R                  b9  U	R                  U R                  R                  UR                  S   SS5      5        U R                  b9  U	R                  U R                  R                  UR                  S   SS5      5        U R                  (       a'  Ub  X-   nU	(       a  [        R                   " X/-   SS9nO&U	(       a  [        R                   " X/-   SS9nUb  X-   nU R#                  U5      nU R$                  b]  U R%                  U5      u  pUbG  U
bD  ['        XU
5      n[)        U SS5      (       a  UR+                  SS5      nX4$ UR-                  S5      nX4$ )	Nr   )ri  rj  r4   ro   )rw   r$   rs   r%  F)r   rw   r"  r  r&  r   r   r4   viewrk   	get_embedr  appendexpandr   rN   rx   r#  r$  r   r   r~   	unsqueeze)r[   r   r   HWr   rn  r"  rot_pos_embedto_catkeep_indicess              r_   
_pos_embedEva._pos_embedR  s     JA!~~)!%!1!1!;!;2NNV++/+>+>aDDZDZ		 !	q"a AAEAVDII//qf/=\`MI59YY5JDII//1PTM>>%MM$..//
BCD>>%MM$..//
BCD$MIIfsl2 IIfsl2$MMM! ??&"ooa0OA(\-E 6q V4u55$1$;$;Aq$AM
  %2$;$;A$>Mra   r   indicesreturn_prefix_tokensrX   
stop_earlyr	  intermediates_onlyc           	      T   US;   d   S5       eUS:H  n/ n	[        [        U R                  5      U5      u  pUR                  u  ppU R	                  U5      nU R                  U5      u  nnU R                  U5      n[        R                  R                  5       (       d  U(       d  U R                  nOU R                  SUS-    n[        U SS5      (       a  Ub  [        U5       H  u  nnU R                  (       a2  [        R                  R                  5       (       d  [        UUUU   S9nO
U" UUU   S9nUU
;   d  M[  U	R                  U(       a  U R                  U5      OU5        M     O[        U5       H~  u  nnU R                  (       a/  [        R                  R                  5       (       d  [        UUUS9nOU" UUS9nUU
;   d  MU  U	R                  U(       a  U R                  U5      OU5        M     U R                   (       aJ  U	 Vs/ s H  nUSS2S	U R                   24   PM     nnU	 Vs/ s H  nUSS2U R                   S24   PM     n	nU(       ac  U R                  R#                  X45      u  nnU	 Vs/ s H7  nUR%                  UUUS
5      R'                  S	SSS5      R)                  5       PM9     n	n[        R                  R                  5       (       d  U(       a  [+        [-        U	W5      5      n	U(       a  U	$ U R                  U5      nX4$ s  snf s  snf s  snf )a  Forward features that returns intermediates.
Args:
    x: Input image tensor
    indices: Take last n blocks if an int, if is a sequence, select by matching indices
    return_prefix_tokens: Return both prefix and spatial intermediate tokens
    norm: Apply norm layer to all intermediates
    stop_early: Stop iterating over blocks when last desired intermediate hit
    output_fmt: Shape of intermediate feature outputs
    intermediates_only: Only return intermediate features
)NCHWNLCz>Output format for EVA-ViT features must be one of NCHW or NLC.r  Nr$   r%  Frk   r   ro   r@   rp   )r&   lenr*  rw   r  r|  r'  rN   r   is_scriptingr   rH  r  r'   rt  rX   r4   dynamic_feat_sizer{   r|   
contiguouslistzip)r[   r   r~  r  rX   r  r	  r  r{   intermediatestake_indices	max_indexr   _heightwidthry  r*  r:  blkyprefix_tokensrw  rx  s                           r_   forward_intermediatesEva.forward_intermediates  s   ( _,n.nn,&"6s4;;7G"Q  ggfQ??1-=MM!99!!##:[[F[[)a-0F 4u---2K#F+3**5993I3I3K3K"3a0@AAAM!$45A$!((11E , $F+3**5993I3I3K3K"3>AAM2A$!((11E , !!ERS]Qq!D$:$:"::;]MSDQRMqQq$"8"8"99:MMR##55voFDAq^kl^kYZQYYq!Q3;;Aq!QGRRT^kMlyy%%'',@ ]M!BCM  IIaL TR ms   L4L ?>L%
prune_norm
prune_headc                 ,   [        [        U R                  5      U5      u  pEU R                  SUS-    U l        U(       a  [        R                  " 5       U l        U(       a3  SU l        [        R                  " 5       U l        U R                  SS5        U$ )z?Prune layers not required for specified intermediates.
        Nr$   r   r  )	r&   r  r*  rG   rT   rX   r,  r-  rf  )r[   r~  r  r  r  r  s         r_   prune_intermediate_layersEva.prune_intermediate_layers  so     #7s4;;7G"Qkk.9q=1DI!DN;;=DL!!!R(ra   	pool_typec                     U R                   b  U R                  U5      nU$ Uc  U R                  OUn[        XU R                  S9nU$ )N)r  r4   )r,  r   r   r4   )r[   r   r  s      r_   poolEva.pool  sH    >>%q!AH(1(9D$$y	AdF\F\]ra   c                 B   U R                  U5      nU R                  U5      u  pU R                  U5      n[        U SS5      (       am  Ubj  [	        U R
                  5       HP  u  p4U R                  (       a1  [        R                  R                  5       (       d  [        XAX#   S9nMG  U" XU   S9nMR     OYU R
                   HI  nU R                  (       a/  [        R                  R                  5       (       d  [        XAUS9nMC  U" XS9nMK     U R                  U5      nU$ )zjForward pass through feature extraction layers.

Args:
    x: Input tensor.

Returns:
    Feature tensor.
r%  Fr  )r  r|  r'  r   rH  r*  r  rN   r   r  r'   rX   )r[   r   ry  r:  r  s        r_   forward_featuresEva.forward_features  s     Q??1-MM!4u---2K $DKK0**5993I3I3K3K"30@AAA!$45A	 1 {{**5993I3I3K3K"3>AA2A	 # IIaLra   
pre_logitsc                     U R                  U5      nU R                  U5      nU R                  U5      nU(       a  U$ U R                  U5      $ )zForward pass through classifier head.

Args:
    x: Feature tensor.
    pre_logits: Return pre-logits if True.

Returns:
    Output tensor.
)r  r-  r.  r/  )r[   r   r  s      r_   forward_headEva.forward_head  sA     IIaLLLONN1q0DIIaL0ra   c                 J    U R                  U5      nU R                  U5      nU$ )zGForward pass.

Args:
    x: Input tensor.

Returns:
    Output tensor.
)r  r  )r[   r   s     r_   r   Eva.forward  s)     !!!$a ra   )r,  r*  r!  r  r   r   r-  r+  r   r  r/  r.  r  r   r   r   rX   r'  r   r  r4   r$  r  r#  r"  r   rk   r%  )Tr   Frb  r   )NFFFr  F)r$   FT)+r   r   r   r   r   r   r   r   r
   r   r   r   r   r   rE   r0  rB  rG   ModulerA  rN   r   ignorer	   rT  rY  r   r   r_  rc  rf  rl  r   r|  r   r  r  r  r  r  r   r   r   r   s   @r_   r*   r*     s    5868#$ !"!$#$#%*"!#%%'$&$&$&#,+/ $"##($($)',&(&*&,%*"'-28<*.1537%*$)DH%*__-CsCx01_- c5c?23_- 	_-
 _- _- _- _- _- _- _- _- _- !_- _-  #!_-" #_-$ %_-& !'_-( #)_-* "+_-, "-_-. "/_-0 !1_-2 "%3_-4 5_-6  7_-8 !9_-: ";_-< "=_->  }?_-@ $A_-B !$C_-D $E_-F #G_-H  I_-J '+K_-L (0~M_-N "$O_-P "*#Q_-R "*%S_-T #U_-V "W_-X %U5c?C+?%@AY_-Z #[_- _-B: :"1!ryy !t !t ! YYS   YY)T )T ) ) YYD T#s(^   YY		  
aC 
ahsm 
aW[ 
a 3748DuS#X/D !sCx1D 
	D:5 uU\\8ELL3I%IJ 5 t 8<).$$',G ||G  eCcN34G  #'	G 
 G  G  G  !%G  
tELL!5tELL7I)I#JJ	KG V ./$#	3S	>*  	$ell x}  %,, 5<< @1ell 1 1 1 %,,  ra   
state_dictmodelprefixr   c                    U R                  SU 5      n U R                  5        VVs0 s H  u  p4UR                  SS5      U_M     n nn0 n/ SQn[        U5      nU R                  5        GH  u  p4U(       a  UR	                  U5      (       d  M%  X7S nU H  nUR                  US   US   5      nM     UR	                  S5      (       a  UR                  S	S5      nUR                  S
S5      nUR                  SS5      nUR	                  S5      (       a`  UR
                  S   S-  n	UR                  S5      (       a  USU	 US'   XIS US'   O%UR                  S5      (       a  USU	 US'   XIS US'   GM  OUS:X  a;  SnUR                  SS5      n[        R                  " UR
                  S   5      US'   O@US:X  a#  SnUR                  S5      R                  S5      nOUS:X  a  UR                  S5      nXEU'   GM     U$ s  snnf )zConvert Perception Encoder weights.

Args:
    state_dict: State dictionary to convert.
    model: Target model instance.
    prefix: Prefix to strip from keys.

Returns:
    Converted state dictionary.
r  zmodule.r  ))conv1patch_embed.proj)positional_embeddingr"  )ztransformer.resblocks.r  )ln_prer'  )ln_postrX   )ln_rX   )z
ls_1.gammar   )z
ls_2.gammar   )in_proj_zqkv.)out_projrY   )zmlp.c_fcmlp.fc1)z
mlp.c_projmlp.fc2Nr   r$   r,  zattn_pool.attnzattn_pool.layernormzattn_pool.normzattn_pool.probezattn_pool.latentzattn_pool.qkvr@   rn   zattn_pool.q.weightzattn_pool.kv.weightrA   zattn_pool.q.biaszattn_pool.kv.biasrY   zhead.weightz	head.biasclass_embeddingr  r"  )getitemsreplacer  
startswithrw   endswithr~   rN   zerosrv  )
r  r  r  r   r   out_dictswaps
len_prefixspr/   s
             r_   _convert_per  )  s    4J:D:J:J:LM:L$!!))Ir*A-:LJMHE VJ  "<<''+AB		"Q%A'A  <<$$		*K8A		/1ABA		+-?@A||O,,ggajAo::h''56tWH1267gH23ZZ''34Tc7H/045dGH01 - &[AAq!A$)KK
$;H[!##AA((+A+AA? #B Og Ns   Hinterpolation	antialiasc           
      x   0 nU R                  SU 5      n U R                  SU 5      n U R                  SU 5      n U R                  SU 5      n SU ;   a  [        X5      $ SU ;   a
  [        XSS9$ S	U ;   a  S
nOSU ;   a  SnOSnSU ;   nU(       + =(       a    US-   U ;   nUS-   U ;   n[        U5      n	U R                  5        GH  u  pU(       a  U
R	                  U5      (       d  M%  XS n
SU
;   a  U
S:X  d  M7  U(       a  [        S Vs/ s H  oR                  U5      PM     sn5      (       a  Mo  U
R	                  S5      (       a  M  U
R                  S5      (       aR  U
R                  SS5      n UR                  U5        UR                  SSS9u  nnnXU'   UXJR                  SS5      '   M  U
R                  SS5      n
U
R                  SS5      n
U
R                  SS5      n
O/U(       a(  U
S ;   a"  U
S!:X  d  U
S":X  a  U
R                  S#S$5      n
OGMU  S%U
;   ae  UR                  R                  R                  R                  u    nnnUR                  S   U:w  d  UR                  S&   U:w  a  [!        UUU4UUS'S(9nOsU
S):X  am  UR                  S*   UR"                  R                  S*   :w  aC  [%        US+S,5      (       a  S-O[%        US.S*5      n['        UUR                  R(                  UUUS'S/9nU
R                  S0S15      n
U
R                  S2S35      n
U
R                  S4S55      n
U
R                  S6S75      n
U
R                  S8S95      n
U
R                  S:S;5      n
U(       a$  U
R                  SS<5      n
U
R                  SS=5      n
XU
'   GM     U$ s  snf ! [         a  n[        U5         SnAGM  SnAff = f)>a:  Convert patch embedding weight from manual patchify + linear proj to conv.

Args:
    state_dict: Checkpoint state dictionary.
    model: Target model instance.
    interpolation: Interpolation method for resizing.
    antialias: Whether to use antialiasing when resizing.

Returns:
    Filtered state dictionary.
	model_emar  r  r  zvisual.conv1.weightzconv1.weightr  )r  zvisual.trunk.pos_embedzvisual.trunk.zvisual.pos_embedvisual.storage_tokens
mask_tokenzblocks.0.attn.q_proj.weightNrk   z
rope.freqs)z.periodsz
.bias_maskr  local_cls_normzqkv.biasrP   r@   ro   rs   rR   z	ls1.gammar   z	ls2.gammar   r   )r  zlm_head.weightzlm_head.biasnorm.weight	norm.biasr  r  rX   r-  zpatch_embed.proj.weightrv   T)r  r  rk  r"  r$   r   Fr   r4   )ri  r4   r  r  rk  z
mlp.ffn_lnzmlp.normzattn.inner_attn_lnz	attn.normzmlp.w12r  zmlp.w1z	mlp.fc1_gzmlp.w2z	mlp.fc1_xzmlp.w3r  zq_proj.biaszv_proj.bias)r  r  r  r  r  anyr  r  get_parameter	Exceptionprintchunkr  rY   rn   rw   r   r"  r   r   r&  )r  r  r  r  r  r  dinov3_weightsmim_weightsno_qkvr  r   r   fq_bias_keqvkvvvr  rw  rx  r4   s                         r_   checkpoint_filter_fnr  o  s   " HZ8J4J*5Jj9J 
*:--	:	%:R88  :- 	z	)%3N$$L,)>*)LK33zAFVJ  "<<''+AQ;qL0+ST+SaJJqM+STUU||,--zz*%%99Z: ''1 WWQBW/
B%'"<>:x89		+y1A		+y1A		*K8AQ"nnM!Q+%5IIfi0 $)**//66<<JAq!Qwwr{a1772;!#3(F"/'  +!''!*0E0Ea0H"H%,U4De%L%LRYZ_atvwRx&**44"3+#A IIlJ/II*K8IIi+IIh,IIh,IIh	*		(M2A		(M2AY #\ OG U ! !Hs   N
.N
N9"N44N9variant
pretrainedc           	      
   UR                  SS5      n[        R                  R                  SS5      S:H  nUc  UnU(       a  SSKJn  U" X40 UD6$ UR                  SS	5      n[        [        X4[        [        US
S9S.UD6nU$ )zCreate an EVA model.

Args:
    variant: Model variant name.
    pretrained: Load pretrained weights.
    **kwargs: Additional model arguments.

Returns:
    Instantiated Eva model.

use_naflexNTIMM_USE_NAFLEX01r$   )_create_naflexvit_from_evaout_indicesr@   getter)r  feature_cls)pretrained_filter_fnfeature_cfg)
poposenvironr  	naflexvitr  r%   r*   r  r  )r  r  r   r  _USE_NAFLEX_DEFAULTr  r  r  s           r_   _create_evar    s     L$/J**..):C@CG(
9)'HHH**]A.K W1[hG 	E Lra   urlc                 4    U SSSSSS[         [        SSS	S
.UE$ )zGenerate default configuration for EVA models.

Args:
    url: Model weights URL.
    **kwargs: Additional configuration parameters.

Returns:
    Model configuration dictionary.
r   r@   r   r   Ng?bicubicTr  r/  mitr  r   
input_size	pool_sizecrop_pctr  fixed_input_sizemeanr?  
first_conv
classifierlicense)r   r   r  r   s     r_   _cfgr  
  s6     =t ( # ra   c                 $    U SSSSSSSSSS	S
S.UE$ )zGenerate default configuration for Perception Encoder models.

Args:
    url: Model weights URL.
    **kwargs: Additional configuration parameters.

Returns:
    Model configuration dictionary.
r   r  N      ?r  T      ?r  r  r  r/  
apache-2.0r  r   r   s     r_   _pe_cfgr    s6     D)( $* ra   c                 4    U SSSSSS[         [        SSS	S
.UE$ )zGenerate default configuration for DINOv3 models.

Args:
    url: Model weights URL.
    **kwargs: Additional configuration parameters.

Returns:
    Model configuration dictionary.
r   r@      r
  Nr  r  Tr  r/  zdinov3-licenser  )r   r   r   s     r_   _dinov3_cfgr  2  s7     D)%.B(# (. ra   z"eva_giant_patch14_224.clip_ft_in1kztimm/)	hf_hub_idz"eva_giant_patch14_336.clip_ft_in1k)r@   P  r  r  squash)r  r  r  	crop_modez(eva_giant_patch14_336.m30m_ft_in22k_in1k)r  r  r?  r  r  r  z(eva_giant_patch14_560.m30m_ft_in22k_in1k)r@   0  r  z.eva02_base_patch14_448.mim_in22k_ft_in22k_in1k)r@     r  z/eva02_large_patch14_448.mim_in22k_ft_in22k_in1kz.eva02_large_patch14_448.mim_m38m_ft_in22k_in1kz(eva02_tiny_patch14_336.mim_in22k_ft_in1k)r  r  r  z)eva02_small_patch14_336.mim_in22k_ft_in1kz(eva02_base_patch14_448.mim_in22k_ft_in1kz)eva02_large_patch14_448.mim_in22k_ft_in1kz(eva02_large_patch14_448.mim_m38m_ft_in1kz)eva02_base_patch14_448.mim_in22k_ft_in22kiQU  )r  r  r  r  r   z*eva02_large_patch14_448.mim_in22k_ft_in22kz)eva02_large_patch14_448.mim_m38m_ft_in22kz eva02_tiny_patch14_224.mim_in22k)r  r   z!eva02_small_patch14_224.mim_in22kz eva02_base_patch14_224.mim_in22kz!eva02_large_patch14_224.mim_in22kz eva02_large_patch14_224.mim_m38mz$eva_giant_patch14_clip_224.laion400m   z#eva_giant_patch14_clip_224.merged2bz$eva02_base_patch16_clip_224.merged2b   z%eva02_large_patch14_clip_224.merged2br   z%eva02_large_patch14_clip_336.merged2b)r  r  r  r   z'eva02_enormous_patch14_clip_224.laion2bz,eva02_enormous_patch14_clip_224.laion2b_plusz(eva02_enormous_patch14_clip_224.pretrain)r   z-vit_medium_patch16_rope_reg1_gap_256.sbb_in1kr	  gffffff?r  )r  r  r  r  r?  z.vit_mediumd_patch16_rope_reg1_gap_256.sbb_in1kz.vit_betwixt_patch16_rope_reg4_gap_256.sbb_in1kz+vit_base_patch16_rope_reg1_gap_256.sbb_in1kzvit_pe_core_tiny_patch16_384.fb)r@     r  )r  r  r   z vit_pe_core_small_patch16_384.fbzvit_pe_core_base_patch16_224.fbr  z vit_pe_core_large_patch14_336.fbz#vit_pe_core_gigantic_patch14_448.fb   z vit_pe_lang_large_patch14_448.fbz'vit_pe_lang_large_patch14_448.fb_tilingz#vit_pe_lang_gigantic_patch14_448.fbz*vit_pe_lang_gigantic_patch14_448.fb_tilingz"vit_pe_spatial_tiny_patch16_512.fb)r@   r  r  z#vit_pe_spatial_small_patch16_512.fbz"vit_pe_spatial_base_patch16_512.fbz#vit_pe_spatial_large_patch14_448.fbz&vit_pe_spatial_gigantic_patch14_448.fbz%vit_small_patch16_rope_224.naver_in1kr  )r  r  r?  r  z$vit_base_patch16_rope_224.naver_in1kz%vit_large_patch16_rope_224.naver_in1kz+vit_small_patch16_rope_mixed_224.naver_in1kz*vit_base_patch16_rope_mixed_224.naver_in1kz+vit_large_patch16_rope_mixed_224.naver_in1kz)vit_small_patch16_rope_ape_224.naver_in1kz(vit_base_patch16_rope_ape_224.naver_in1kz)vit_large_patch16_rope_ape_224.naver_in1kz/vit_small_patch16_rope_mixed_ape_224.naver_in1kz.vit_base_patch16_rope_mixed_ape_224.naver_in1kz/vit_large_patch16_rope_mixed_ape_224.naver_in1kz!vit_small_patch16_dinov3.lvd1689mz&vit_small_patch16_dinov3_qkvb.lvd1689mz&vit_small_plus_patch16_dinov3.lvd1689mz+vit_small_plus_patch16_dinov3_qkvb.lvd1689mz vit_base_patch16_dinov3.lvd1689mz%vit_base_patch16_dinov3_qkvb.lvd1689mz!vit_large_patch16_dinov3.lvd1689mz&vit_large_patch16_dinov3_qkvb.lvd1689mz vit_large_patch16_dinov3.sat493m)gQ?gM?gl?)g$C?g+?gM?)r  r  r?  z%vit_large_patch16_dinov3_qkvb.sat493m)z%vit_huge_plus_patch16_dinov3.lvd1689mz*vit_huge_plus_patch16_dinov3_qkvb.lvd1689mzvit_7b_patch16_dinov3.lvd1689mzvit_7b_patch16_dinov3.sat493mc           	      N    [        SSSSSS9n[        SSU 0[        U40 UD6D6nU$ )	,EVA-g model https://arxiv.org/abs/2211.07636     (   r   tE]t@r   r   r   r0   r   r  )eva_giant_patch14_224r  r  r  r   
model_argsr  s       r_   r  r    9     t2WbcJeJe$zJd]cJdeELra   c           	      N    [        SSSSSS9n[        SSU 0[        U40 UD6D6nU$ )	r  r  r  r  r   r  r  r  )eva_giant_patch14_336r  r  s       r_   r#  r#    r!  ra   c           	      N    [        SSSSSS9n[        SSU 0[        U40 UD6D6nU$ )	r  r  r  r  r   r  r  r  )eva_giant_patch14_560r  r  s       r_   r%  r%    r!  ra   c                 V    [        SSSSSSSSSS	9	n[        SS
U 0[        U40 UD6D6nU$ )+EVA02 Tiny https://arxiv.org/abs/2303.11331r   r     r   r@   UUUUUU@Tr   r   	r   r   r   r   r0   r   r   r   r   r  )eva02_tiny_patch14_224r  r  s       r_   r,  r,    N     
J fZf4PZKe^dKefELra   c                 V    [        SSSSSSSSSS	9	n[        SS
U 0[        U40 UD6D6nU$ ),EVA02 Small https://arxiv.org/abs/2303.11331r   r  r  r      r)  Tr*  r+  r  )eva02_small_patch14_224r  r  s       r_   r1  r1    N     
J gjgDQ[Lf_eLfgELra   c                 Z    [        SSSSSSSSSSSS	9n[        SS
U 0[        U40 UD6D6nU$ )+EVA02 Base https://arxiv.org/abs/2303.11331r   r  r   r   Fr)  Tr*  r   r   r   r   r0   r2   r   r   r   r   r   r  )eva02_base_patch14_224r  r  s       r_   r6  r6    T     J fZf4PZKe^dKefELra   c                 Z    [        SSSSSSSSSSS	S
9n[        SSU 0[        U40 UD6D6nU$ ),EVA02 Large https://arxiv.org/abs/2303.11331r   r  r     r   r)  FTr*  r   r   r   r   r0   r   r2   r   r   r   r   r  )eva02_large_patch14_224r  r  s       r_   r<  r<  (  T     J gjgDQ[Lf_eLfgELra   c                 V    [        SSSSSSSSSS	9	n[        SS
U 0[        U40 UD6D6nU$ )r'  r  r  r(  r   r@   r)  Tr*  r+  r  )eva02_tiny_patch14_336r  r  s       r_   r?  r?  <  r-  ra   c                 V    [        SSSSSSSSSS	9	n[        SS
U 0[        U40 UD6D6nU$ )r/  r  r  r  r   r0  r)  Tr*  r+  r  )eva02_small_patch14_336r  r  s       r_   rA  rA  N  r2  ra   c                 Z    [        SSSSSSSSSSSS	9n[        SS
U 0[        U40 UD6D6nU$ )r4  r  r  r   r   Fr)  Tr*  r5  r  )eva02_base_patch14_448r  r  s       r_   rC  rC  `  r7  ra   c                 Z    [        SSSSSSSSSSS	S
9n[        SSU 0[        U40 UD6D6nU$ )r9  r  r  r  r:  r   r)  FTr*  r;  r  )eva02_large_patch14_448r  r  s       r_   rE  rE  t  r=  ra   c                 p    [        SSSSSUR                  SS5      S9n[        S
S	U 0[        U40 UD6D6nU$ )z?EVA-g CLIP model (only difference from non-CLIP is the pooling)r  r  r  r   r  r   r  )r   r   r   r0   r   r   r  )eva_giant_patch14_clip_224r  r  r  r  s       r_   rG  rG    sL     R2JJ}g68J jjtT^OibhOijELra   c                 ~    [        SSSSSSSSSSSSUR                  S	S
5      S9n[        SSU 0[        U40 UD6D6nU$ )zUAn EVA-CLIP specific variant that adds additional attn scale layer-norm to eva02_baser   r   r   r   Fr)  Tr*  r   r  )r   r   r   r   r0   r2   r   r   r   r   r   r   r   r  )eva02_base_patch16_clip_224rH  r  s       r_   rJ  rJ    sf     JJ}g6J k*kPTU_PjciPjkELra   c                 ~    [        SSSSSSSSSSSS	UR                  S
S5      S9n[        SSU 0[        U40 UD6D6nU$ )VAn EVA-CLIP specific variant that adds additional attn scale layer-norm to eva02_larger   r  r  r:  r   r)  FTr*  r   r  r   r   r   r   r0   r   r2   r   r   r   r   r   r   r  )eva02_large_patch14_clip_224rH  r  s       r_   rN  rN    f     JJ}g6J l:lQUV`QkdjQklELra   c                 ~    [        SSSSSSSSSSSS	UR                  S
S5      S9n[        SSU 0[        U40 UD6D6nU$ )rL  r  r  r  r:  r   r)  FTr*  r   r  rM  r  )eva02_large_patch14_clip_336rH  r  s       r_   rQ  rQ    rO  ra   c                 t    [        SSSSSSSUR                  SS	5      S
9n[        SSU 0[        U40 UD6D6nU$ )zCAn EVA-CLIP specific variant that uses residual post-norm in blocksr   r  i   @   r   gI$I$!@Tr   r  )r   r   r   r   r0   r   r   r   r  )eva02_enormous_patch14_clip_224rH  r  s       r_   rT  rT    sW     JJ}g6	J ojoTXYcTngmTnoELra   c                 ^    [        SSSSSSSSSS	SSS
S9n[        SSU 0[        U40 UD6D6nU$ )timm SBB ViT with ROPEr
  r   r  r   r   Th㈵>Fr$   r*  r   r   r   r   r0   r2   r1   r   r   r   r   r   r   r  )$vit_medium_patch16_rope_reg1_gap_256r  r  s       r_   rY  rY    s[     J t:tY]^hYslrYstELra   c                 ^    [        SSSSSSSSSS	SSS
S9n[        SSU 0[        U40 UD6D6nU$ )rV  r
  r   r     r   TFrW  r$   r*  rX  r  )%vit_mediumd_patch16_rope_reg1_gap_256r  r  s       r_   r\  r\    s[     J uJuZ^_iZtmsZtuELra   c                 ^    [        SSSSSSSSSS	SSS
S9n[        SSU 0[        U40 UD6D6nU$ )rV  r
  r   i  r   
   TrW  Frq   r*  rX  r  )%vit_betwixt_patch16_rope_reg4_gap_256r  r  s       r_   r_  r_    s[     J uJuZ^_iZtmsZtuELra   c                 ^    [        SSSSSSSSSSSSS	S
9n[        SSU 0[        U40 UD6D6nU$ )rV  r
  r   r   r   TrW  Fr$   r*  rX  r  )"vit_base_patch16_rope_reg1_gap_256r  r  s       r_   ra  ra  '  s[     J rrW[\fWqjpWqrELra   c                 v    [        SSSSSSSSSS	S
SSS[        [        SS9S9n[        SSU 0[        U40 UD6D6$ )HPerception Encoder (PE) ViT from Meta (https://arxiv.org/abs/2504.13181)r   r(  r   r@   r   r  rk   Tr:  r:  r  xyr   rW  epsr   r   r   r0   r   r   r   r   r   r   r   r   r   r   r8   r  )vit_pe_core_tiny_patch16_384r  r   r   r  r  r   r   s      r_   ri  ri  =  sd     !%9$/J$ k*kPTU_PjciPjkkra   c                 v    [        SSSSSSSSSS	S
SSS[        [        SS9S9n[        SSU 0[        U40 UD6D6$ )rc  r   r  r   r0  r   r  rk   Trd  r  re  r   rW  rf  rh  r  )vit_pe_core_small_patch16_384rj  rk  s      r_   rm  rm  V  sd     !%9$/J$ l:lQUV`QkdjQkllra   c                 v    [        SSSSSSSSSSS	S
SS[        [        SS9S9n[        SSU 0[        U40 UD6D6$ )rc  r   r   r   r   r  rk   T)r  r  r  re  r   rW  rf  rh  r  )vit_pe_core_base_patch16_224rj  rk  s      r_   ro  ro  n  sd     !%9$/J$ k*kPTU_PjciPjkkra   c                 v    [        SSSSSSSSSS	S
SSS[        [        SS9S9n[        SSU 0[        U40 UD6D6$ )rc  r  r  r:  r   r   r  rk   Trd  r  re  r   rW  rf  rh  r  )vit_pe_core_large_patch14_336rj  rk  s      r_   rq  rq    sd     !%9$/J$ l:lQUV`QkdjQkllra   c                 v    [        SSSSSSSSS	S	S
SSS[        [        SS9S9n[        SSU 0[        U40 UD6D6$ )rc  r     2   r   UUUUUU@r  rk   FT    rw  re  r   r   rW  rf  )r   r   r   r0   r   r   r   r   r   r   r   r   r   r   r8   r  ) vit_pe_core_gigantic_patch14_448rj  rk  s      r_   rx  rx    sd     !%9$/J$ ojoTXYcTngmTnoora   c           
          [        S 0 SS_SS_SS_SS_S	S
_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_S[        [        SS9_6n[        S!SU 0[        U40 UD6D6$ )"rc  r   r  r   r  r      r0   r   r   r   r   rk   r   Tr   r   rv  r   r  r   re  r   r   Fr   r   皙?r8   rW  rf  r  r   )vit_pe_lang_large_patch14_448rj  rk  s      r_   r|  r|    s        	
          "& #(    9$/!J& l:lQUV`QkdjQkllra   c                 v    [        SSSSSSSSS	S
SSSS[        [        SS9S9n[        SSU 0[        U40 UD6D6$ )rc  r  rs  /   r   ru  rk   FTrv  re  r{  rW  rf  r   r   r   r0   r   r   r   r   r   r   r   r   r   r   r8   r  ) vit_pe_lang_gigantic_patch14_448rj  rk  s      r_   r  r    sd     !%"'9$/J$ ojoTXYcTngmTnoora   c                 t    [        SSSSSSSSSSS	S
S[        [        SS9S9n[        SSU 0[        U40 UD6D6$ )rc  r   r(  r   r@   r   rk   TFrv  r  re  rW  rf  r   r   r   r0   r   r   r   r   r   r   r   r   r   r8   r  )vit_pe_spatial_tiny_patch16_512rj  rk  s      r_   r  r    sa     !%"'9$/J" nZnSWXbSmflSmnnra   c                 t    [        SSSSSSSSSSS	S
S[        [        SS9S9n[        SSU 0[        U40 UD6D6$ )rc  r   r  r   r0  r   rk   TFrv  r  re  rW  rf  r  r  ) vit_pe_spatial_small_patch16_512rj  rk  s      r_   r  r    sa     !%"'9$/J" ojoTXYcTngmTnoora   c                 t    [        SSSSSSSSSSSS	S
[        [        SS9S9n[        SSU 0[        U40 UD6D6$ )rc  r   r   r   r   rk   TFrv  r  re  rW  rf  r  r  )vit_pe_spatial_base_patch16_512rj  rk  s      r_   r  r  	  sa     !%"'9$/J" nZnSWXbSmflSmnnra   c                 t    [        SSSSSSSSSSS	S
S[        [        SS9S9n[        SSU 0[        U40 UD6D6$ )rc  r  r  r:  r   r   rk   TFrv  r  re  rW  rf  r  r  ) vit_pe_spatial_large_patch14_448rj  rk  s      r_   r  r  ,	  sa     !%"'9$/J" ojoTXYcTngmTnoora   c                 v    [        SSSSSSSSS	S
SSSS[        [        SS9S9n[        SSU 0[        U40 UD6D6$ )rc  r  rs  rt  r   ru  rk   FTrv  re  r{  rW  rf  r  r  )#vit_pe_spatial_gigantic_patch14_448rj  rk  s      r_   r  r  C	  sd     !%"'9$/J$ rrW[\fWqjpWqrrra   c                 `    [        SSSSSSSSSS	S
SSSS9n[        SSU 0[        U40 UD6D6nU$ )z=RoPE-Axial ViT-S/16 from https://github.com/naver-ai/rope-vitr   r  r   r0  rq   rk   TrW  r  Fre        Y@r   r   r   r0   r   r   r1   r   r   r   r   r   r   r   r  )vit_small_patch16_rope_224r  r  s       r_   r  r  \	  s]     J  jjtT^OibhOijELra   c                 b    [        SSSSSSSSSSS	SSS
SS9n[        SSU 0[        U40 UD6D6nU$ )z=RoPE-Axial ViT-B/16 from https://github.com/naver-ai/rope-vitr   r   r   rq   rk   FTrW  r  re  r  )r   r   r   r0   r   r   r   r1   r   r   r   r   r   r   r   r  )vit_base_patch16_rope_224r  r  s       r_   r  r  s	  s`     J" i
idS]NhagNhiELra   c                 `    [        SSSSSSSSSSS	SS
SS9n[        SSU 0[        U40 UD6D6nU$ )z=RoPE-Axial ViT-L/16 from https://github.com/naver-ai/rope-vitr   r  r:  rq   rk   TrW  r  Fre  r  r  r  )vit_large_patch16_rope_224r  r  s       r_   r  r  	  s]     J  jjtT^OibhOijELra   c                 b    [        SSSSSSSSSS	S
SSSSS9n[        SSU 0[        U40 UD6D6nU$ )z=RoPE-Mixed ViT-S/16 from https://github.com/naver-ai/rope-vitr   r  r   r0  rq   rk   TrW  r  Fre        $@r  r   r   r   r0   r   r   r1   r   r   r   r   r   r   r   r   r  ) vit_small_patch16_rope_mixed_224r  r  s       r_   r  r  	  sa     J" pzpUYZdUohnUopELra   c                 b    [        SSSSSSSSSSS	SS
SSS9n[        SSU 0[        U40 UD6D6nU$ )z=RoPE-Mixed ViT-B/16 from https://github.com/naver-ai/rope-vitr   r   r   rq   Trk   rW  r  Fre  r  r  )r   r   r   r0   r   r1   r   r   r   r   r   r   r   r   r   r  )vit_base_patch16_rope_mixed_224r  r  s       r_   r  r  	  sa     J" ojoTXYcTngmTnoELra   c                 b    [        SSSSSSSSSSS	SS
SSS9n[        SSU 0[        U40 UD6D6nU$ )z=RoPE-Mixed ViT-L/16 from https://github.com/naver-ai/rope-vitr   r  r:  rq   rk   TrW  r  Fre  r  r  r  r  ) vit_large_patch16_rope_mixed_224r  r  s       r_   r  r  	  sa     J" pzpUYZdUohnUopELra   c                 b    [        SSSSSSSSSS	SSSS
SS9n[        SSU 0[        U40 UD6D6nU$ )zCRoPE-Axial + APE ViT-S/16 from https://github.com/naver-ai/rope-vitr   r  r   r0  rq   rk   TrW  r  re  r  r   r   r   r0   r   r   r1   r   r   r   r   r   r   r   r   r  )vit_small_patch16_rope_ape_224r  r  s       r_   r  r  	  sa     J" nZnSWXbSmflSmnELra   c                 b    [        SSSSSSSSSSSSSS	S
S9n[        SSU 0[        U40 UD6D6nU$ )zCRoPE-Axial + APE ViT-B/16 from https://github.com/naver-ai/rope-vitr   r   r   rq   rk   TrW  r  re  r  r  r  )vit_base_patch16_rope_ape_224r  r  s       r_   r  r  
  sa     J$ mJmRVWaRlekRlmELra   c                 b    [        SSSSSSSSSSSSSS	S
S9n[        SSU 0[        U40 UD6D6nU$ )zCRoPE-Axial + APE ViT-L/16 from https://github.com/naver-ai/rope-vitr   r  r:  rq   rk   TrW  r  re  r  r  r  )vit_large_patch16_rope_ape_224r  r  s       r_   r  r  
  sa     J$ nZnSWXbSmflSmnELra   c           	          [        S0 SS_SS_SS_SS_S	S
_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_6n[        SSU 0[        U40 UD6D6nU$ ) zCRoPE-Mixed + APE ViT-S/16 from https://github.com/naver-ai/rope-vitr   r   r   r  r   r   r0   r0  r   rq   r   rk   r1   Tr   rW  r   r   r  r   r   r   r   re  r   r  r   r  r  r   )$vit_small_patch16_rope_mixed_ape_224r  r  s       r_   r  r  5
  s        	
              !J& t:tY]^hYslrYstELra   c           	          [        S0 SS_SS_SS_SS_SS	_S
S_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_6n[        SSU 0[        U40 UD6D6nU$ )zCRoPE-Mixed + APE ViT-B/16 from https://github.com/naver-ai/rope-vitr   r   r   r   r   r   r0   r   rq   r   rk   r1   Tr   rW  r   r   r  r   r   r   r   re  r   r  r   r  r  r   )#vit_base_patch16_rope_mixed_ape_224r  r  s       r_   r  r  O
  s        	
              !J$ s*sX\]gXrkqXrsELra   c           	          [        S0 SS_SS_SS_SS_SS	_S
S_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_6n[        SSU 0[        U40 UD6D6nU$ )zCRoPE-Mixed + APE ViT-L/16 from https://github.com/naver-ai/rope-vitr   r   r   r  r   r:  r0   r   rq   r   rk   r1   Tr   rW  r   r   r  r   r   r   r   re  r   r  r   r  r  r   )$vit_large_patch16_rope_mixed_ape_224r  r  s       r_   r  r  h
  s        	
              !J$ t:tY]^hYslrYstELra   c                 z    [        SSSSSSSSS	SSSS
S[        [        SS9S9n[        SSU 0[        U40 UD6D6nU$ )z,DINOv3 S/16 https://arxiv.org/abs/2508.10104r   Tr  r   r0  FrW  dinov3d   rq   rf  r   r   r   r   r0   r1   r   r   r   r   r   r   r   r   r8   r  )vit_small_patch16_dinov3rj  r  s       r_   r  r  
  sg     9$/!J$ hzhTR\Mg`fMghELra   c                 z    [        SSSSSSSSSSSS	S
S	[        [        SS9S9n[        SSU 0[        U40 UD6D6nU$ )zKDINOv3 S/16 w/ QKV bias enabled (but zero) https://arxiv.org/abs/2508.10104r   Tr  r   r0  rW  r  r  Frq   rf  r  r  )vit_small_patch16_dinov3_qkvbrj  r  s       r_   r  r  
  sh     9$/!J$ mJmRVWaRlekRlmELra   c           
          [        S0 SS_SS_SS_SS_S	S
_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_S[        [        SS9_6n[        S SU 0[        U40 UD6D6nU$ )!z1DINOv3 S/16 Plus https://arxiv.org/abs/2508.10104r   r   r   Tr   r  r   r   r0   r0  r1   Fr   rW  r   r  r   r  r   r   r   r   r   r   r   rq   r   r8   rf  r  r   )vit_small_plus_patch16_dinov3rj  r  s       r_   r  r  
  s        	
            !" #$ 9$/%J( mJmRVWaRlekRlmELra   c           
          [        S0 SS_SS_SS_SS_S	S
_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_S[        [        SS9_6n[        S SU 0[        U40 UD6D6nU$ )!zMDINOv3 S/16 Plus w/ QKV bias enabled (but 0) https://arxiv.org/abs/2508.10104r   r   r   Tr   r  r   r   r0   r0  r1   r   rW  r   r  r   r  r   r   r   Fr   r   r   r   rq   r   r8   rf  r  r   )"vit_small_plus_patch16_dinov3_qkvbrj  r  s       r_   r  r  
  s        	
            !" #$ 9$/%J( rrW[\fWqjpWqrELra   c                 z    [        SSSSSSSSSSSSS	S[        [        SS
9S9n[        SSU 0[        U40 UD6D6nU$ )z,DINOv3 B/16 https://arxiv.org/abs/2508.10104r   Tr   r   FrW  r  r  rq   rf  r  r  )vit_base_patch16_dinov3rj  r  s       r_   r  r  
  sg     9$/!J$ gjgDQ[Lf_eLfgELra   c                 z    [        SSSSSSSSSSSSS	S[        [        SS
9S9n[        SSU 0[        U40 UD6D6nU$ )zKDINOv3 B/16 w/ QKV bias enabled (but zero) https://arxiv.org/abs/2508.10104r   Tr   r   rW  r  r  Frq   rf  r  r  )vit_base_patch16_dinov3_qkvbrj  r  s       r_   r  r    sh     9$/!J$ l:lQUV`QkdjQklELra   c                 z    [        SSSSSSSSSSSSS	S[        [        SS
9S9n[        SSU 0[        U40 UD6D6nU$ )z,DINOv3 L/16 https://arxiv.org/abs/2508.10104r   Tr  r:  FrW  r  r  rq   rf  r   r   r   r   r0   r1   r   r   r   r   r   r   r   r   r8   r  )vit_large_patch16_dinov3rj  r  s       r_   r  r    sg     9$/!J$ hzhTR\Mg`fMghELra   c                 z    [        SSSSSSSSSSSSS	S[        [        SS
9S9n[        SSU 0[        U40 UD6D6nU$ )zFDINOv3 w/ QKV bias enabled (but zero) https://arxiv.org/abs/2508.10104r   Tr  r:  rW  r  r  Frq   rf  r  r  )vit_large_patch16_dinov3_qkvbrj  r  s       r_   r  r  4  sh     9$/!J$ mJmRVWaRlekRlmELra   c           
          [        S0 SS_SS_SS_SS_S	S
_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_S[        [        SS9_6n[        S SU 0[        U40 UD6D6nU$ )!z1DINOv3 H/16 Plus https://arxiv.org/abs/2508.10104r   r   r   Tr   r  r   rw  r0   r[  r1   Fr   rW  r   r  r   r  r   r   r   r   r   r   r   rq   r   r8   rf  r  r   )vit_huge_plus_patch16_dinov3rj  r  s       r_   r  r  M  s        	
            !" #$ 9$/%J* l:lQUV`QkdjQklELra   c           
          [        S0 SS_SS_SS_SS_S	S
_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_S[        [        SS9_6n[        S SU 0[        U40 UD6D6nU$ )!zPDINOv3 H/16 Plus w/ QKV bias enabled (but zero) https://arxiv.org/abs/2508.10104r   r   r   Tr   r  r   rw  r0   r[  r1   r   rW  r   r  r   r  r   r   Fr   r   r   r   r   rq   r   r8   rf  r  r   )!vit_huge_plus_patch16_dinov3_qkvbrj  r  s       r_   r  r  i  s        	
            !" #$ 9$/%J* q
qVZ[eVpioVpqELra   c           
          [        S!0 SS_SS_SS_SS_S	S
_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_S[        [        SS9_6n[        S"S U 0[        U40 UD6D6nU$ )#z-DINOv3 7B/16 https://arxiv.org/abs/2508.10104r   r   r   Tr   i   r   r  r0   rw  r1   Fr   rp   r   rW  r   r  r   r  r   r   r   r   r   rS  r   rq   r   r8   rf  r  r   )vit_7b_patch16_dinov3rj  r  s       r_   r  r    s        	
           " #$ %& 9$/'J, eJe$zJd]cJdeELra   )r  )r  Tr  )r  )~r   rI  r  	functoolsr   typingr   r   r   r   r   r	   r
   r   rN   torch.nnrG   torch.nn.functional
functionalry   	timm.datar   r   r   r   timm.layersr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   _builderr%   	_featuresr&   _manipulater'   	_registryr(   r)   __all__r  r,   r   r   r*   r   r   r  r   r  r  r  r  r  default_cfgsr  r#  r%  r,  r1  r6  r<  r?  rA  rC  rE  rG  rJ  rN  rQ  rT  rY  r\  r_  ra  ri  rm  ro  rq  rx  r|  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   ra   r_   <module>r     sl  =@  	  I I I     d d     * + + # <'^299 ^B@ryy @Fpryy pfg")) gZ CS%,,&'C99C C 
#u||
	CR '	yell*+yyyy y 	y
 
#u||
yx $ S >c T#s(^ ( S#X (S DcN & % P& )$+P& )$ 3(+DP& /"(< 3(	1DP&$ /"(< 3(	1D%P&2 5d 3(73P&< 6t 3(8=P&F 5d 3(7GP&T / 31UP&^ 0 32_P&h / 31iP&r 0 32sP&| / 31}P&J 0 3(PU2KP&T 1$ 3(PU3UP&^ 0 3(PU2_P&l ')mP&v (*wP&@ ')AP&J (*KP&T ')UP&b +D -cP&p *4 ,qP&~ +D -P&L ,T .MP&Z ,T  3.[P&j .t 0kP&x 3D 5yP&F /1GP&P 4T 4/6QP&Z 5d 4/7[P&d 5d 47eP&l 24 4/4mP&z &w !({P&H ' !)IP&V &w !(WP&d ' !)eP&r *7 !,sP&B ' !)CP&P .w !0QP&^ *7 !,_P&l 1' !3mP&| )' !+}P&J *7 !,KP&X )' !+YP&f *7 !,gP&t -g !/uP&F	 ,T"(<.G	P&P	 +D"(<-Q	P&Z	 ,T"(<.[	P&d	 24"(<4e	P&n	 1$"(<3o	P&x	 24"(<4y	P&B
 0"(<2C
P&L
 /"(<1M
P&V
 0"(<2W
P&`
 6t"(<8a
P&j
 5d"(<7k
P&t
 6t"(<8u
P&D (*EP&J -k/KP&P -k/QP&V 2;4WP&\ ')]P&b ,[.cP&h (*iP&n -k/oP&t '"(=)uP&| ,["(=.}P&D .9. 3>3 '2' &1"(=&WP& Pf d    d    d    t #  "  3  " t #  &  3  & t #  "  3  " t #  &  3  & 4 c   D s  * T   * T   *  3    T PS  * d QT  * d QT  * 4 c  * lT l l l0 md m m m. lT l l l. md m m m. p pC p p. md m m m0 p pC p p. o o3 o o, p pC p p, o o3 o o, p pC p p, sD ss s s0 4 c  , $ S  . 4 c  ,  C  .  3  .  C  0 t #  . d   0 t #  0 T PS  2 D s  0 T PS  0  C  0 d   0 d   4 4 c  4  3  0 T   0  C  0 d   0 T   6 $ S  4 d   ra   