
    RЦiH                        S r SSKrSSKrSSKJr  SSKJrJrJrJ	r	J
r
JrJr  SSKrSSKJr  SSKJrJr  SSKJrJrJr  SS	KJr  SS
KJr  SSKJrJr  SSKJr  S/r " S S\R@                  5      r! " S S\RD                  5      r# " S S\RD                  5      r$ " S S\RD                  5      r% " S S\RD                  5      r&S r'S(S jr(S)S jr)\" \)" SS9\)" SS9\)" SS9\)" SS9\)" SSS9\)" SSS9\)" SSS9\)" SSS9S.5      r*\S(S\&4S  jj5       r+\S(S\&4S! jj5       r,\S(S\&4S" jj5       r-\S(S\&4S# jj5       r.\S(S\&4S$ jj5       r/\S(S\&4S% jj5       r0\S(S\&4S& jj5       r1\S(S\&4S' jj5       r2g)*a  Pooling-based Vision Transformer (PiT) in PyTorch

A PyTorch implement of Pooling-based Vision Transformers as described in
'Rethinking Spatial Dimensions of Vision Transformers' - https://arxiv.org/abs/2103.16302

This code was adapted from the original version at https://github.com/naver-ai/pit, original copyright below.

Modifications for timm by / Copyright 2020 Ross Wightman
    N)partial)ListOptionalSequenceTupleUnionTypeAny)nnIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)trunc_normal_	to_2tuplecalculate_drop_path_rates   )build_model_with_cfg)feature_take_indices)register_modelgenerate_default_cfgs)BlockPoolingVisionTransformerc                       \ rS rSrSrS\\R                  \R                  4   S\\R                  \R                  4   4S jrSr	g)SequentialTuple!   zHThis module exists to work around torchscript typing issues list -> listxreturnc                 (    U  H  nU" U5      nM     U$ N )selfr   modules      N/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/models/pit.pyforwardSequentialTuple.forward#   s    Fq	A     r    N)
__name__
__module____qualname____firstlineno____doc__r   torchTensorr$   __static_attributes__r    r&   r#   r   r   !   s?    Su||U\\9: uU\\SXS_S_E_?` r&   r   c                   
  ^  \ rS rSr       SS\S\S\S\S\\   S\S\S	\\\      S
\\	\
R                        4U 4S jjjrS\\R                  \R                  4   S\\R                  \R                  4   4S jrSrU =r$ )Transformer)   base_dimdepthheads	mlp_ratiopool	proj_drop	attn_dropdrop_path_prob
norm_layerc                 T  > XS.n[         TU ]  5         X-  nXPl        U	(       a	  U	" U40 UD6O[        R                  " 5       U l        [        R                  " [        U5       Vs/ s H/  n[        SUUUSUUX   [        [        R                  SS9S.UD6PM1     sn6 U l        g s  snf )NdevicedtypeTư>)eps)dim	num_headsr5   qkv_biasr7   r8   	drop_pathr:   r    )super__init__r6   r   Identitynorm
Sequentialranger   r   	LayerNormblocks)r!   r2   r3   r4   r5   r6   r7   r8   r9   r:   r=   r>   dd	embed_dimi	__class__s                  r#   rF   Transformer.__init__*   s     /$		3=Jy/B/2;;=	mm 5\&# "  
###(+"2<<T:
 
 "&# $ &#s   %6B%r   r   c                    Uu  pUR                   S   nU R                  b  U R                  X5      u  pUR                   u  pEpgUR                  S5      R                  SS5      n[        R
                  " X!4SS9nU R                  U5      nU R                  U5      nUS S 2S U24   nUS S 2US 24   nUR                  SS5      R                  XEXg5      nX4$ )Nr      )rA   )	shaper6   flatten	transposer,   catrH   rL   reshape)r!   r   
cls_tokenstoken_lengthBCHWs           r#   r$   Transformer.forwardL   s    !''*99  IIa4MAWW
aIIaL""1a(IIzo1-IIaLKKNq-<-'(
aKK1%%aA1}r&   )rL   rH   r6   )N        r`   NNNN)r'   r(   r)   r*   intfloatr   r
   r   r	   r   ModulerF   r   r,   r-   r$   r.   __classcell__rP   s   @r#   r0   r0   )   s     #'!!4848 $ $  $ 	 $
  $ 3- $  $  $ %T%[1 $ !bii1 $  $Du||U\\9: uU\\SXS_S_E_?`  r&   r0   c            	          ^  \ rS rSr   S
S\S\S\S\4U 4S jjjrS\\R                  \R                  4   4S jr
S	rU =r$ )Pooling`   
in_featureout_featurestridepadding_modec           	         > XVS.n[         TU ]  5         [        R                  " UU4US-   US-  UUUS.UD6U l        [        R
                  " X40 UD6U l        g )Nr<   r   rS   )kernel_sizepaddingrk   rl   groups)rE   rF   r   Conv2dconvLinearfc)	r!   ri   rj   rk   rl   r=   r>   rM   rP   s	           r#   rF   Pooling.__init__a   sm     /II	
 
aK%	
 	
	 ))J:r:r&   r   c                 L    U R                  U5      nU R                  U5      nX4$ r   rr   rt   )r!   r   	cls_tokens      r#   r$   Pooling.forwardy   s%    IIaLGGI&	|r&   rw   )zerosNN)r'   r(   r)   r*   ra   strrF   r   r,   r-   r$   r.   rd   re   s   @r#   rg   rg   `   sa     !(;; ; 	;
 ; ;0uU\\5<<-G'H  r&   rg   c                   Z   ^  \ rS rSr      SS\S\S\S\S\S\4U 4S jjjrS	 rS
rU =r$ )ConvEmbedding   in_channelsout_channelsimg_size
patch_sizerk   ro   c	                   > XxS.n	[         T
U ]  5         Un[        U5      U l        [        U5      U l        [
        R                  " U R                  S   SU-  -   U R                  S   -
  U-  S-   5      U l        [
        R                  " U R                  S   SU-  -   U R                  S   -
  U-  S-   5      U l        U R                  U R                  4U l	        [        R                  " UU4UUUSS.U	D6U l        g )Nr<   r   rS   r   T)rn   rk   ro   bias)rE   rF   r   r   r   mathfloorheightwidth	grid_sizer   rq   rr   )r!   r   r   r   r   rk   ro   r=   r>   rM   rP   s             r#   rF   ConvEmbedding.__init__   s     /!(+#J/jj$--"2Q["@4??STCU"UY_!_bc!cdZZq!1AK!?$//RSBT!TX^ ^ab bc
++tzz2II
 #
 
	r&   c                 (    U R                  U5      nU$ r   )rr   r!   r   s     r#   r$   ConvEmbedding.forward   s    IIaLr&   )rr   r   r   r   r   r   )         r   NN)	r'   r(   r)   r*   ra   rF   r$   r.   rd   re   s   @r#   r}   r}      se    
   

 
 	

 
 
 
 
< r&   r}   c            #         ^  \ rS rSrSr                   S+S\S\S\S\S\\   S\\   S	\\   S
\S\S\S\S\	S\S\S\S\S\4"U 4S jjjr
S r\R                  R                  S 5       r\R                  R                  S,S j5       r\R                  R                  S,S j5       rS\R&                  4S jrS-S\S\\   4S jjr     S.S\R.                  S\\\\\   4      S\	S\	S \S!\	S\\\R.                     \\R.                  \\R.                     4   4   4S" jjr   S/S\\\\   4   S#\	S$\	4S% jjrS& rS0S'\	S\R.                  4S( jjrS) rS*r U =r!$ )1r      zPooling-based Vision Transformer

A PyTorch implement of 'Rethinking Spatial Dimensions of Vision Transformers'
    - https://arxiv.org/abs/2103.16302
r   r   rk   	stem_type	base_dimsr3   r4   r5   num_classesin_chansglobal_pool	distilled	drop_ratepos_drop_drateproj_drop_rateattn_drop_ratedrop_path_ratec           
        > [         TU ]  5         UUS.nUS;   d   eXPl        Xpl        US   US   -  nXl        Xl        Xl        U(       a  SOSU l        / U l        [        U
UXU40 UD6U l
        [        R                  " [        R                  " SUU R                  R                  U R                  R                   40 UD65      U l        [        R                  " [        R                  " SU R                  U40 UD65      U l        [        R&                  " US9U l        / n[+        UUSS9nUn[-        [/        U5      5       Hv  nS nUU   UU   -  nUS:  a  [1        UU4S	S0UD6nU[3        UU   UU   UU   U4UUUUU   S
.UD6/-  nUnU =R                  [5        UUS-
  SU-  -  SU 3S9/-  sl        Mx     [7        U6 U l        [        R:                  " US   US   -  4SS0UD6U l        U=U l        =U l         U l!        [        R&                  " U5      U l"        U	S:  a"  [        RF                  " U RB                  U	40 UD6O[        RH                  " 5       U l%        S U l&        U(       aL  U	S:  a,  [        RF                  " U RB                  U R                  40 UD6O[        RH                  " 5       U l&        SU l'        [Q        U R"                  SS9  [Q        U R$                  SS9  U RS                  U RT                  5        g )Nr<   )tokenr   rS   r   )pT)	stagewiserk   )r6   r7   r8   r9   transformers.)num_chs	reductionr"   r@   r?   Fg{Gz?)std)+rE   rF   r   r4   r   r   r   
num_tokensfeature_infor}   patch_embedr   	Parameterr,   randnr   r   	pos_embedrx   Dropoutpos_dropr   rJ   lenrg   r0   dictr   transformersrK   rH   num_featureshead_hidden_sizerN   	head_droprs   rG   head	head_distdistilled_trainingr   apply_init_weights)r!   r   r   rk   r   r   r3   r4   r5   r   r   r   r   r   r   r   r   r   r=   r>   rM   rN   r   dprprev_dimrO   r6   rP   s                              r#   rF   !PoolingVisionTransformer.__init__   s   , 	/j((("
aL58+	& &(!a(9hTZa^`aekk!Y@P@P@W@WY]YiYiYoYo&vsu&vwekk!T__i&VSU&VW

^4'Ns5z"AD!!uQx/I1u  	 [!aa	

 (("1v
 
 
 
L !H$xFQJRSUVRVCVanopnq_r"s!tt- #0 ,\:LL2r!:KKK	ENNND1DN I.DORSOBIIdnnk@R@Y[YdYdYf	R]`aRaRYYt~~t7G7GN2NgigrgrgtDN"'dnn#.dnn#.

4%%&r&   c                     [        U[        R                  5      (       aU  [        R                  R	                  UR
                  S5        [        R                  R	                  UR                  S5        g g )Nr   g      ?)
isinstancer   rK   init	constant_r   weight)r!   ms     r#   r   &PoolingVisionTransformer._init_weights   sH    a&&GGaffa(GGahh, 'r&   c                 
    SS1$ )Nr   rx   r    r!   s    r#   no_weight_decay(PoolingVisionTransformer.no_weight_decay  s    [))r&   c                     Xl         g r   )r   r!   enables     r#   set_distilled_training/PoolingVisionTransformer.set_distilled_training  s    "(r&   c                      U(       a   S5       eg )Nz$gradient checkpointing not supportedr    r   s     r#   set_grad_checkpointing/PoolingVisionTransformer.set_grad_checkpointing
  s    AAAz6r&   r   c                 d    U R                   b  U R                  U R                   4$ U R                  $ r   )r   r   r   s    r#   get_classifier'PoolingVisionTransformer.get_classifier  s)    >>%99dnn,,99r&   c                 F   Xl         Ub  X l        [        U R                  S5      (       a   U R                  R                  R
                  OS n[        U R                  S5      (       a   U R                  R                  R                  OS nUS:  a   [        R                  " U R                  XUS9O[        R                  " 5       U l        U R                  bK  US:  a*  [        R                  " U R                  U R                   X4S9O[        R                  " 5       U l        g g )Nr   r   r<   )r   r   hasattrr   r   r=   r>   r   rs   rN   rG   r   )r!   r   r   r=   r>   s        r#   reset_classifier)PoolingVisionTransformer.reset_classifier  s    &"*,3DIIx,H,H!!((d*1$))X*F*F		  &&DZehiZiBIIdnnkPUVoqozozo|	>>%hsvwhwRYYt~~t7G7GPVd}  ~I  ~I  ~KDN &r&   r   indicesrH   
stop_early
output_fmtintermediates_onlyc                    US;   d   S5       e/ n[        [        U R                  5      U5      u  pU R                  U5      nU R	                  XR
                  -   5      nU R                  R                  UR                  S   SS5      n
[        U R                  5      S-
  n[        R                  R                  5       (       d  U(       d  U R                  nOU R                  SU	S-    n[        U5       H(  u  pU" X45      u  pX;   d  M  UR                  U5        M*     U(       a  U$ WU:X  a  U R                  U
5      n
X4$ )a  Forward features that returns intermediates.

Args:
    x: Input image tensor
    indices: Take last n blocks if int, all if None, select matching indices if sequence
    norm: Apply norm layer to compatible intermediates
    stop_early: Stop iterating over blocks when last desired intermediate hit
    output_fmt: Shape of intermediate feature outputs
    intermediates_only: Only return intermediate features
Returns:

)NCHWzOutput shape must be NCHW.r   r   r   N)r   r   r   r   r   r   rx   expandrT   r,   jitis_scripting	enumerateappendrH   )r!   r   r   rH   r   r   r   intermediatestake_indices	max_indexrY   last_idxstagesfeat_idxstages                  r#   forward_intermediates.PoolingVisionTransformer.forward_intermediates  s&   * Y&D(DD&"6s4;L;L7Mw"W QMM!nn,-^^**1771:r2>
t(()A-99!!##:&&F&&~	A6F(0OH!1/2MA'$$Q'  1
   x:.J((r&   
prune_norm
prune_headc                     [        [        U R                  5      U5      u  pEU R                  SUS-    U l        U(       a  [        R                  " 5       U l        U(       a  U R                  SS5        U$ )z?Prune layers not required for specified intermediates.
        Nr   r    )r   r   r   r   rG   rH   r   )r!   r   r   r   r   r   s         r#   prune_intermediate_layers2PoolingVisionTransformer.prune_intermediate_layersO  s`     #7s4;L;L7Mw"W --ny1}=DI!!!R(r&   c                     U R                  U5      nU R                  XR                  -   5      nU R                  R	                  UR
                  S   SS5      nU R                  X45      u  pU R                  U5      nU$ )Nr   r   )r   r   r   rx   r   rT   r   rH   )r!   r   rY   s      r#   forward_features)PoolingVisionTransformer.forward_features_  so    QMM!nn,-^^**1771:r2>
))1/:YYz*
r&   
pre_logitsc                    U R                   b  U R                  S:X  d   eUS S 2S4   US S 2S4   p1U R                  U5      nU R                  U5      nU(       d"  U R                  U5      nU R                  U5      nU R                  (       a7  U R
                  (       a&  [        R                  R                  5       (       d  X4$ X-   S-  $ U R                  S:X  a	  US S 2S4   nU R                  U5      nU(       d  U R                  U5      nU$ )Nr   r   r   rS   )	r   r   r   r   r   trainingr,   r   r   )r!   r   r   x_dists       r#   forward_head%PoolingVisionTransformer.forward_headg  s    >>%##w...!Q$1a4vq!A^^F+FIIaL/&&4==AWAWAYAYy  
a''7*adGq!AIIaLHr&   c                 J    U R                  U5      nU R                  U5      nU$ r   )r   r   r   s     r#   r$    PoolingVisionTransformer.forward~  s'    !!!$a r&   )r   rx   r   rN   r   r   r   r   r   r   r4   r   rH   r   r   r   r   r   r   r   )r   r   r   overlap0   r   r   rS         rS   r  r   r       r   Fr`   r`   r`   r`   r`   NN)Tr   )NFFr   F)r   FTF)"r'   r(   r)   r*   r+   ra   r{   r   rb   boolrF   r   r,   r   ignorer   r   r   r   rc   r   r   r   r-   r   r   r   r   r   r   r   r$   r.   rd   re   s   @r#   r   r      s      &'3#,#, #&#!$&$&$&$&)R'R' R' 	R'
 R'  }R' C=R' C=R' R' R' R' R' R' R' "R'  "!R'" "#R'$ "%R' R'h-
 YY* * YY) ) YYB B		 KC Khsm K 8<$$',/)||/) eCcN34/) 	/)
 /) /) !%/) 
tELL!5tELL7I)I#JJ	K/)f ./$#	3S	>*  	 $ 5<< . r&   c                     0 n[         R                  " S5      nU R                  5        H  u  pEUR                  S U5      nXRU'   M     U$ )zpreprocess checkpoints zpools\.(\d)\.c                 D    S[        U R                  S5      5      S-    S3$ )Nr   r   z.pool.)ra   group)exps    r#   <lambda>&checkpoint_filter_fn.<locals>.<lambda>  s     }S15F5J4K6%Rr&   )recompileitemssub)
state_dictmodelout_dictp_blockskvs         r#   checkpoint_filter_fnr    sM    Hzz*+H  "
 LLRTUV # Or&   c           	          [        [        S5      5      nUR                  SU5      n[        [        U U4[
        [        SUS9S.UD6nU$ )Nr  out_indiceshook)feature_clsr  )pretrained_filter_fnfeature_cfg)tuplerJ   popr   r   r  r   )variant
pretrainedkwargsdefault_out_indicesr  r  s         r#   _create_pitr%    sY    a/**],?@K   2VE E Lr&   c                 4    U SSS SSS[         [        SSSS	.UE$ )
Nr  )r  r   r   g?bicubicTzpatch_embed.convr   z
apache-2.0)urlr   
input_size	pool_sizecrop_pctinterpolationfixed_input_sizemeanr   
first_conv
classifierlicenser   )r(  r#  s     r#   _cfgr2    s5    =t%.B(  r&   ztimm/)	hf_hub_id)r   r   )r3  r0  )zpit_ti_224.in1kzpit_xs_224.in1kzpit_s_224.in1kzpit_b_224.in1kzpit_ti_distilled_224.in1kzpit_xs_distilled_224.in1kzpit_s_distilled_224.in1kzpit_b_distilled_224.in1kr   c           	      X    [        SS/ SQ/ SQ/ SQSS9n[        SU 40 [        U40 UD6D6$ )	N      @   r8  r8  r  r   r  r  r   r   r  r   rk   r   r3   r4   r5   	pit_b_224r   r%  r"  r#  
model_argss      r#   r<  r<    <    J {JM$z2LV2LMMr&   c           	      X    [        SS/ SQ/ SQ/ SQSS9n[        SU 40 [        U40 UD6D6$ )	Nr   r   r   r   r  r      r  r;  	pit_s_224r=  r>  s      r#   rD  rD    r@  r&   c           	      X    [        SS/ SQ/ SQ/ SQSS9n[        SU 40 [        U40 UD6D6$ )	Nr   r   r   r   r  r  r;  
pit_xs_224r=  r>  s      r#   rF  rF    <    J |ZN4
3Mf3MNNr&   c           	      X    [        SS/ SQ/ SQ/ SQSS9n[        SU 40 [        U40 UD6D6$ )	Nr   r       rJ  rJ  r   r  r  r;  
pit_ti_224r=  r>  s      r#   rK  rK    rG  r&   c           
      Z    [        SS/ SQ/ SQ/ SQSSS9n[        S	U 40 [        U40 UD6D6$ )
Nr5  r6  r7  r9  r:  r  Tr   rk   r   r3   r4   r5   r   pit_b_distilled_224r=  r>  s      r#   rN  rN    @    J ,jWD<Vv<VWWr&   c           
      Z    [        SS/ SQ/ SQ/ SQSSS9n[        S	U 40 [        U40 UD6D6$ )
Nr   r   r   r   rB  r  TrM  pit_s_distilled_224r=  r>  s      r#   rQ  rQ    rO  r&   c           
      Z    [        SS/ SQ/ SQ/ SQSSS9n[        S	U 40 [        U40 UD6D6$ )
Nr   r   r   r   r  r  TrM  pit_xs_distilled_224r=  r>  s      r#   rS  rS    A    J -zXT*=WPV=WXXr&   c           
      Z    [        SS/ SQ/ SQ/ SQSSS9n[        S	U 40 [        U40 UD6D6$ )
Nr   r   rI  r   r  r  TrM  pit_ti_distilled_224r=  r>  s      r#   rV  rV     rT  r&   r  )r   )3r+   r   r  	functoolsr   typingr   r   r   r   r   r	   r
   r,   r   	timm.datar   r   timm.layersr   r   r   _builderr   	_featuresr   	_registryr   r   vision_transformerr   __all__rI   r   rc   r0   rg   r}   r   r  r%  r2  default_cfgsr<  rD  rF  rK  rN  rQ  rS  rV  r    r&   r#   <module>ra     s    	  D D D   A K K * + < % &
&bmm 4")) 4nbii >!BII !H^ryy ^B	 %g.g.W-W-!%("* "&("* !%(!* !%(!*& * 	N-E 	N 	N 	N-E 	N 	N 	O.F 	O 	O 	O.F 	O 	O 
X7O 
X 
X 
X7O 
X 
X 
Y8P 
Y 
Y 
Y8P 
Y 
Yr&   