
    RЦi&                        S r SSKrSSKrSSKJrJrJrJrJrJ	r	J
r
JrJr  SSKrSSKJr  SSKJrJr  SSKJrJrJrJrJrJrJrJrJrJrJrJr  SSK J!r!  SSK"J#r#  SS	K$J%r%  SS
K&J'r'J(r(  SSK)J*r*J+r+J,r,  SSK-J.r.  S/r/\R`                  " \15      r2\\3\
\3\34   4   r4S\Rj                  S\
\3\34   S\Rj                  4S jr6\%S\Rj                  S\
\3\34   S\3S\3S\Rj                  4
S j5       r7SbS\3S\3S\Rj                  4S jjr8 " S S\Rr                  5      r: " S S\Rr                  5      r; " S S\Rr                  5      r< " S S \Rr                  5      r= " S! S\Rr                  5      r>S"\?S#\Rr                  S\\@\Rj                  4   4S$ jrAScS%\@S&\BS\>4S' jjrCSdS(\@S\\@\4   4S) jjrD\*" 0 S*\D" S+S,S-9_S.\D" S+S/S-9_S0\D" S+S1S2S3S4S59_S6\D" S+S7S-9_S8\D" S+S9S2S3S4S59_S:\D" S+S;S-9_S<\D" S+S=S-9_S>\D" S+S?S-9_S@\D" S+SAS2S3S4S59_SB\D" S+SCS-9_SD\D" S+SESFSG9_SH\D" S+SISFSG9_SJ\D" S+SKSFSG9_SL\D" S+SMS2S3S4SFSN9_SO\D" S+SPSFSG9_SQ\D" S+SRS2S3S4SFSN9_SS\D" S+STS-9_\D" S+SUS-9\D" S+SVS-9SW.E5      rE\+ScS\>4SX jj5       rF\+ScS\>4SY jj5       rG\+ScS\>4SZ jj5       rH\+ScS\>4S[ jj5       rI\+ScS\>4S\ jj5       rJ\+ScS\>4S] jj5       rK\+ScS\>4S^ jj5       rL\+ScS\>4S_ jj5       rM\+ScS\>4S` jj5       rN\," \1SJSLSOSQSa.5        g)ea  Swin Transformer
A PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows`
    - https://arxiv.org/pdf/2103.14030

Code/weights from https://github.com/microsoft/Swin-Transformer, original copyright/license info below

S3 (AutoFormerV2, https://arxiv.org/abs/2111.14725) Swin weights from
    - https://github.com/microsoft/Cream/tree/main/AutoFormerV2

Modifications and additions for timm hacked together by / Copyright 2021, Ross Wightman
    N)	AnyDictCallableListOptionalSetTupleUnionTypeIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)
PatchEmbedMlpDropPathcalculate_drop_path_ratesClassifierHead	to_2tuple	to_ntupletrunc_normal_use_fused_attnresize_rel_pos_bias_tableresample_patch_embedndgrid   )build_model_with_cfg)feature_take_indices)register_notrace_function)checkpoint_seqnamed_apply)generate_default_cfgsregister_modelregister_model_deprecations)get_init_weights_vitSwinTransformerxwindow_sizereturnc                     U R                   u  p#pEU R                  X#US   -  US   XAS   -  US   U5      n U R                  SSSSSS5      R                  5       R                  SUS   US   U5      nU$ )zPartition into non-overlapping windows.

Args:
    x: Input tokens with shape [B, H, W, C].
    window_size: Window size.

Returns:
    Windows after partition with shape [B * num_windows, window_size, window_size, C].
r   r               shapeviewpermute
contiguous)r&   r'   BHWCwindowss          [/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/models/swin_transformer.pywindow_partitionr:   *   s     JA!	q{1~%{1~qN7JKXYN\]^Aii1aAq)446;;BAP[\]P^`abGN    r8   r5   r6   c                     U R                   S   nU R                  SX!S   -  X1S   -  US   US   U5      nUR                  SSSSSS5      R                  5       R                  SX#U5      nU$ )zReverse window partition.

Args:
    windows: Windows with shape (num_windows*B, window_size, window_size, C).
    window_size: Window size.
    H: Height of image.
    W: Width of image.

Returns:
    Tensor with shape (B, H, W, C).
r.   r   r   r*   r+   r,   r-   r/   )r8   r'   r5   r6   r7   r&   s         r9   window_reverser=   =   sy     	bAR!n,aq>.A;q>S^_`SacdeA			!Q1a#..055b!BAHr;   win_hwin_wc                    [         R                  " [        [         R                  " X[         R                  S9[         R                  " X[         R                  S95      5      n[         R
                  " US5      nUSS2SS2S4   USS2SSS24   -
  nUR                  SSS5      R                  5       nUSS2SS2S4==   U S-
  -  ss'   USS2SS2S4==   US-
  -  ss'   USS2SS2S4==   SU-  S-
  -  ss'   UR                  S5      $ )zGet pair-wise relative position index for each token inside the window.

Args:
    win_h: Window height.
    win_w: Window width.

Returns:
    Relative position index tensor.
devicedtyper   Nr+   r   r.   )	torchstackr   arangelongflattenr2   r3   sum)r>   r?   rB   coordscoords_flattenrelative_coordss         r9   get_relative_position_indexrM   P   s     [[U<U< F ]]61-N$Q4Z0>!T1*3MMO%--aA6AACOAq!G	)Aq!G	)Aq!GE	A-r""r;   c                   V  ^  \ rS rSr% Sr\R                  R                  \   \	S'          SS\
S\
S\\
   S\S	\S
\S\4U 4S jjjrSS jrSS jrS\\
\
4   SS4S jrS\R&                  4S jrSS\R&                  S\\R&                     S\R&                  4S jjrSS jrSrU =r$ )WindowAttentionh   zWindow based multi-head self attention (W-MSA) module with relative position bias.

Supports both shifted and non-shifted windows.

fused_attnNdim	num_headshead_dimr'   qkv_bias	attn_drop	proj_dropc
           
      
  > XS.n
[         TU ]  5         Xl        [        U5      U l        U R                  u  pX-  U l        X l        U=(       d    X-  nX2-  nUS-  U l        [        SS9U l	        [        R                  " [        R                  " SU-  S-
  SU-  S-
  -  U40 U
D65      U l        U R                  S[        R                  " X-  X-  U[        R                   S9SS	9  [        R"                  " XS
-  4SU0U
D6U l        [        R&                  " U5      U l        [        R"                  " X40 U
D6U l        [        R&                  " U5      U l        [        R.                  " SS9U l        U R3                  5         g)an  
Args:
    dim: Number of input channels.
    num_heads: Number of attention heads.
    head_dim: Number of channels per head (dim // num_heads if not set)
    window_size: The height and width of the window.
    qkv_bias:  If True, add a learnable bias to query, key, value.
    attn_drop: Dropout ratio of attention weight.
    proj_drop: Dropout ratio of output.
rA   g      T)experimentalr+   r   relative_position_indexF
persistentr*   biasr.   )rR   N)super__init__rR   r   r'   window_arearS   scaler   rQ   nn	ParameterrD   emptyrelative_position_bias_tableregister_bufferrG   LinearqkvDropoutrV   projrW   Softmaxsoftmaxreset_parameters)selfrR   rS   rT   r'   rU   rV   rW   rB   rC   ddr>   r?   attn_dim	__class__s                 r9   r_   WindowAttention.__init__o   sW   , /$[1'' ="/s/'%
(d; -/LLKKUQ1u9q=99KK-M) 	%KKu}V5::V 	 	
 99SQ,DXDDI.IIh2r2	I.zzb) 	r;   r(   c                 L    [        U R                  SS9  U R                  5         g)"Initialize parameters and buffers.g{Gz?)stdN)r   re   _init_buffersrn   s    r9   rm    WindowAttention.reset_parameters   s    d77SAr;   c                     U R                   u  pU R                  R                  [        XU R                  R                  S95        g).Compute and fill non-persistent buffer values.rB   N)r'   rZ   copy_rM   rB   )rn   r>   r?   s      r9   rv   WindowAttention._init_buffers   s9    ''$$**'T=Y=Y=`=`a	
r;   c           
         [        U5      nXR                  :X  a  gXl        U R                  u  p#X#-  U l        [        R                  " 5          SU-  S-
  SU-  S-
  -  U R
                  4n[        R                  " [        U R                  U R                  US95      U l	        U R                  S[        X#U R                  R                  S9SS9  SSS5        g! , (       d  f       g= f)	zbUpdate window size & interpolate position embeddings
Args:
    window_size (int): New window size
Nr+   r   new_window_sizenew_bias_shaperZ   r{   Fr[   )r   r'   r`   rD   no_gradrS   rb   rc   r   re   rf   rM   rB   )rn   r'   r>   r?   r   s        r9   set_window_sizeWindowAttention.set_window_size   s    
  ,***&'' =]]_%i!mE	A>NN02)55$($4$4#11D-   )+EAbAbAiAij  !  __s   BC
C(c                     U R                   U R                  R                  S5         R                  U R                  U R                  S5      nUR	                  SSS5      R                  5       nUR                  S5      $ )Nr.   r+   r   r   )re   rZ   r1   r`   r2   r3   	unsqueeze)rn   relative_position_biass     r9   _get_rel_pos_bias!WindowAttention._get_rel_pos_bias   ss    !%!B!B((--b1"33748H8H$JZJZ\^3_ 	!7!?!?1a!H!S!S!U%//22r;   r&   maskc                    UR                   u  p4nU R                  U5      R                  X4SU R                  S5      R	                  SSSSS5      nUR                  S5      u  pxn	U R                  (       a  U R                  5       n
Uba  UR                   S   nUR                  SUSXD5      R                  X;-  SU R                  SS5      nXR                  SU R                  XD5      -   n
[        R                  R                  R                  XxU	U
U R                  (       a  U R                  R                   OSS9nOXpR"                  -  nXxR%                  S	S5      -  nXR                  5       -   nUbj  UR                   S   nUR                  SXR                  XD5      UR'                  S5      R'                  S5      -   nUR                  SU R                  XD5      nU R)                  U5      nU R                  U5      nX-  nUR%                  SS5      R                  X4S5      nU R+                  U5      nU R-                  U5      nU$ )
zForward pass.

Args:
    x: Input features with shape of (num_windows*B, N, C).
    mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None.

Returns:
    Output features with shape of (num_windows*B, N, C).
r*   r.   r+   r   r   r,           )	attn_mask	dropout_p)r0   rh   reshaperS   r2   unbindrQ   r   r1   expandrD   rb   
functionalscaled_dot_product_attentiontrainingrV   pra   	transposer   rl   rj   rW   )rn   r&   r   B_Nr7   rh   qkvr   num_winattns                r9   forwardWindowAttention.forward   s    77qhhqk!!"DNNB?GG1aQRTUV**Q-a??..0I**Q-yyGQ5<<R]BPTP^P^`bdfg%R(NN	##@@a#.2mm$..** A A JJA{{2r**D0022D**Q-yyWnnaCdnnUVFWFaFabcFddyyT^^Q:<<%D>>$'DAKK1%%bR0IIaLNN1r;   c                 $    U R                  5         gz"Initialize non-persistent buffers.Nrv   rw   s    r9   init_non_persistent_buffers+WindowAttention.init_non_persistent_buffers       r;   )rV   rR   rQ   rS   rj   rW   rh   re   ra   rl   r`   r'   )N   Tr   r   NNr(   NN)__name__
__module____qualname____firstlineno____doc__rD   jitFinalbool__annotations__intr   _int_or_tuple_2_tfloatr_   rm   rv   r	   r   Tensorr   r   r   __static_attributes____classcell__rq   s   @r9   rO   rO   h   s     		%% '+-.!!!4 4  4  sm	4 
 +4  4  4  4  4 l

5c? t 235<< 3( (Xell-C (u|| (T r;   rO   c                    x  ^  \ rS rSrSrSSSSSSSS	S
S
S
\R                  \R                  SS4S\S\	S\S\
\   S\	S\S\S\S\S\S\S\S\S\\R                     S\\R                     4U 4S jjjrS+S jrS+S jr   S,S\
\R&                     S\
\R(                     S \
\R*                     S\
\R&                     4S! jjr S-S"\\\\\4   4   S#\
\\\\\4   4      S\\\\4   \\\4   4   4S$ jjr S-S%\\\4   S\\\4   S\
\   4S& jjrS' rS\R&                  S\R&                  4S( jrS+S) jrS*rU =r$ ).SwinTransformerBlock   zcSwin Transformer Block.

A transformer block with window-based self-attention and shifted windows.
r,   Nr   r   F      @Tr   rR   input_resolutionrS   rT   r'   
shift_sizealways_partitiondynamic_mask	mlp_ratiorU   rW   rV   	drop_path	act_layer
norm_layerc           
        > UUS.n[         TU ]  5         Xl        X l        [	        U5      U l        Xpl        Xl        U R                  XV5      u  U l	        U l
        U R                  S   U R                  S   -  U l        Xl        U" U40 UD6U l        [        U4UUU R                  U
UUS.UD6U l        US:  a  [!        U5      O["        R$                  " 5       U l        U" U40 UD6U l        [+        SU[-        X-  5      UUS.UD6U l        US:  a  [!        U5      O["        R$                  " 5       U l        U R3                  SSS	S
9  U R5                  5         g)ae  
Args:
    dim: Number of input channels.
    input_resolution: Input resolution.
    window_size: Window size.
    num_heads: Number of attention heads.
    head_dim: Enforce the number of channels per head
    shift_size: Shift size for SW-MSA.
    always_partition: Always partition into full windows and shift
    mlp_ratio: Ratio of mlp hidden dim to embedding dim.
    qkv_bias: If True, add a learnable bias to query, key, value.
    proj_drop: Dropout rate.
    attn_drop: Attention dropout rate.
    drop_path: Stochastic depth rate.
    act_layer: Activation layer.
    norm_layer: Normalization layer.
rA   r   r   )rS   rT   r'   rU   rV   rW   r   )in_featureshidden_featuresr   dropr   NFr[    )r^   r_   rR   r   r   target_shift_sizer   r   _calc_window_shiftr'   r   r`   r   norm1rO   r   r   rb   Identity
drop_path1norm2r   r   mlp
drop_path2rf   rm   )rn   rR   r   rS   rT   r'   r   r   r   r   rU   rW   rV   r   r   r   rB   rC   ro   rq   s                      r9   r_   SwinTransformerBlock.__init__  sd   J / 0!*:!6 0(,0,C,CK,\)$/++A.1A1A!1DD"*r*
#	
((	
 	
	 2;R(9-R[[]*r*
 
0	

 
 2;R(9-R[[] 	[$5A 	r;   r(   c                 $    U R                  5         g)rt   Nr   rw   s    r9   rm   %SwinTransformerBlock.reset_parametersR  r   r;   c                     U R                   (       da  U R                  R                  R                  nU R                  R                  R                  nU R                  XS9nU R                  SUSS9  gg)rz   rA   r   Fr[   N)r   r   weightrB   rC   get_attn_maskrf   )rn   rB   rC   r   s       r9   rv   "SwinTransformerBlock._init_buffersV  sb      ZZ&&--FJJ%%++E**&*FI  iE J	 !r;   r&   rB   rC   c           	      h   [        U R                  5      (       Ga  Ub6  UR                  S   UR                  S   pTUR                  nUR                  nOU R
                  u  pEUnUn[        R                  " X@R                  S   -  5      U R                  S   -  n[        R                  " XPR                  S   -  5      U R                  S   -  n[        R                  " SXES4X2S9nSnSU R                  S   * 4U R                  S   * U R                  S   * 4U R                  S   * S 44 Hn  nSU R                  S   * 4U R                  S   * U R                  S   * 4U R                  S   * S 44 H$  n	XvS S 2US   US   2U	S   U	S   2S S 24'   US-  nM&     Mp     [        X`R                  5      n
U
R                  SU R                  5      n
U
R                  S5      U
R                  S5      -
  nUR                  US:g  [!        S5      5      R                  US:H  [!        S5      5      nU$ S nU$ )Nr   r+   r   )rC   rB   r.   g      Yr   )anyr   r0   rB   rC   r   mathceilr'   rD   zerosr:   r1   r`   r   masked_fillr   )rn   r&   rB   rC   r5   r6   img_maskcnthwmask_windowsr   s               r9   r   "SwinTransformerBlock.get_attn_mask^  sE    t}wwqz1771:1,,		!..q112T5E5Ea5HHA		!..q112T5E5Ea5HHA{{AqQ<uLHC))!,,-&&q))DOOA,>+>?ooa(($/ T--a001**1--0B/BC//!,,d3A
 <?Q!QqT	1Q4!9a781HC ,H6F6FGL',,R1A1ABL$..q1L4J4J14MMI!--i1neFmLXXYbfgYginoristI  Ir;   target_window_sizer   c                    [        U5      nUc-  U R                  n[        U5      (       a  US   S-  US   S-  4nO[        U5      nU R                  (       a  X4$ [	        U R
                  U5       VVs/ s H  u  p4X4::  a  UOUPM     nnn[	        U R
                  XR5       VVVs/ s H  u  p4ocU::  a  SOUPM     nnnn[        U5      [        U5      4$ s  snnf s  snnnf )Nr   r+   r   )r   r   r   r   zipr   tuple)rn   r   r   rr   r'   sr   s           r9   r   'SwinTransformerBlock._calc_window_shift  s    
 ''9:$ $ 6 6$%%%7%:a%?ASTUAVZ[A[$\! )*; <  %8869$:O:OQc6de6ddaAFq)6de8;D<Q<QS^8rs8rWQ16aq(8r
s[!5#444 fss   4C&C	feat_sizec                    Xl         Ub  X0l        U R                  U5      u  U l        U l        U R                  S   U R                  S   -  U l        U R                  R                  U R                  5        U R                  b  U R                  R                  OSnU R                  b  U R                  R                  OSnU R                  SU R                  (       a  SOU R                  XES9SS9  g)z
Args:
    feat_size: New input resolution
    window_size: New window size
    always_partition: Change always_partition attribute if not None
Nr   r   r   rA   Fr[   )r   r   r   r'   r   r`   r   r   r   rB   rC   rf   r   r   )rn   r   r'   r   rB   rC   s         r9   set_input_size#SwinTransformerBlock.set_input_size  s     !*'$4!,0,C,CK,P)$/++A.1A1A!1DD		!!$"2"23*...*D&&$(,(B$$%%D4+=+=V+=+Y 	 	
r;   c           	         UR                   u  p#pE[        U R                  5      nU(       a4  [        R                  " XR                  S   * U R                  S   * 4SS9nOUnU R
                  S   X0R
                  S   -  -
  U R
                  S   -  nU R
                  S   X@R
                  S   -  -
  U R
                  S   -  n	[        R                  R                  R                  USSSU	SU45      nUR                   u  pp[        XpR
                  5      nUR                  SU R                  U5      n[        U SS5      (       a  U R                  U5      nOU R                  nU R                  XS9nUR                  SU R
                  S   U R
                  S   U5      n[!        XR
                  X5      nUS S 2S U2S U2S S 24   R#                  5       nU(       a!  [        R                  " XpR                  SS9nU$ UnU$ )	Nr   r   )r   r+   )shiftsdimsr.   r   F)r   )r0   r   r   rD   rollr'   rb   r   padr:   r1   r`   getattrr   r   r   r=   r3   )rn   r&   r4   r5   r6   r7   	has_shift	shifted_xpad_hpad_w_HpWp	x_windowsr   attn_windowss                   r9   _attnSwinTransformerBlock._attn  s   WW
a (	

1q/A.ADOOTUDVCV-W^deII !!!$q+;+;A+>'>>$BRBRSTBUU!!!$q+;+;A+>'>>$BRBRSTBUUHH''++I1a57QR	 r %Y0@0@A	NN2t'7'7;	 4//**95IIyyy; $((T-=-=a-@$BRBRSTBUWXY"<1A1A2J	a!RaRl+668	 

9__6JA  Ar;   c                 2   UR                   u  p#pEXR                  U R                  U R                  U5      5      5      -   nUR	                  USU5      nXR                  U R                  U R                  U5      5      5      -   nUR	                  X#XE5      nU$ )z{Forward pass.

Args:
    x: Input features with shape (B, H, W, C).

Returns:
    Output features with shape (B, H, W, C).
r.   )r0   r   r   r   r   r   r   r   )rn   r&   r4   r5   r6   r7   s         r9   r   SwinTransformerBlock.forward  s|     WW
a

4::a= 9::IIaQA 788IIaA!r;   c                 $    U R                  5         gr   r   rw   s    r9   r   0SwinTransformerBlock.init_non_persistent_buffers  r   r;   )r   r   rR   r   r   r   r   r   r   r   r   r   r   r`   r'   r   )NNNr   ) r   r   r   r   r   rb   GELU	LayerNormr   r   r   r   r   r   Moduler_   rm   rv   rD   r   rB   rC   r   r
   r	   r   r   r   r   r   r   r   r   s   @r9   r   r      s9    &*-.%*!&!!!!!)+*,,,%K K  0K  	K 
 smK  +K  K  #K  K  K  K  K  K  K  BIIK   RYY!K  K ZK )--1+/	&%& U\\*& EKK(	&
 
%,,	&V HL5 %c5c?&: ;5  (c5c?.B(CD5 
uS#Xc3h/	0	52 04	
S#X
 sCx
 'tn	
4%N %,,   r;   r   c                      ^  \ rS rSrSrS\R                  SS4S\S\\   S\	\R                     4U 4S jjjrS\R                  S	\R                  4S
 jrSrU =r$ )PatchMergingi  zNPatch Merging Layer.

Downsample features by merging 2x2 neighboring patches.
NrR   out_dimr   c                    > XES.n[         TU ]  5         Xl        U=(       d    SU-  U l        U" SU-  40 UD6U l        [
        R                  " SU-  U R                  4SS0UD6U l        g)z
Args:
    dim: Number of input channels.
    out_dim: Number of output channels (or 2 * dim if None)
    norm_layer: Normalization layer.
rA   r+   r,   r]   FN)r^   r_   rR   r  normrb   rg   	reduction)rn   rR   r  r   rB   rC   ro   rq   s          r9   r_   PatchMerging.__init__  sf     /)!c'q3w-"-	1s7DLLKuKKr;   r&   r(   c                 \   UR                   u  p#pESSSUS-  SUS-  4n[        R                  R                  X5      nUR                   u  pspGUR	                  X#S-  SUS-  SU5      R                  SSSSSS5      R                  S5      nU R                  U5      nU R                  U5      nU$ )zForward pass.

Args:
    x: Input features with shape (B, H, W, C).

Returns:
    Output features with shape (B, H//2, W//2, out_dim).
r   r+   r   r*   r,   r-   )	r0   rb   r   r   r   r2   rH   r  r  )rn   r&   r4   r5   r6   r7   
pad_valuesr   s           r9   r   PatchMerging.forward  s     WW
aAq1uaQ/
MMa,WW
aIIaaAFAq199!Q1aKSSTUVIIaLNN1r;   )rR   r  r  r  )r   r   r   r   r   rb   r  r   r   r   r  r_   rD   r   r   r   r   r   s   @r9   r
  r
    sq     &**,,,LL c]L RYY	L L* %,,  r;   r
  c            "       D  ^  \ rS rSrSrSSSSSSSSS	S	S	\R                  SS4S
\S\S\\\4   S\S\	S\S\
\   S\S\	S\	S\S\	S\S\S\\\   \4   S\\R                      4 U 4S jjjr S!S\\\4   S\S\
\	   4S jjrS\R(                  S\R(                  4S jrS rU =r$ )"SwinTransformerStagei!  ztA basic Swin Transformer layer for one stage.

Contains multiple Swin Transformer blocks and optional downsampling.
Tr,   Nr   Fr   r   rR   r  r   depth
downsamplerS   rT   r'   r   r   r   rU   rW   rV   r   r   c                 r  > UUS.n[         TU ]  5         Xl        X0l        U(       a  [	        S U 5       5      OUU l        X@l        SU l        [        U5      n[	        U Vs/ s H  nUS-  PM
     sn5      nU(       a  [        S	UUUS.UD6U l
        O!X:X  d   e[        R                  " 5       U l
        [        R                  " [        U5       Vs/ s HL  n[        S	UU R
                  UUUUS-  S:X  a  SOUU	U
UUUU[!        U["        5      (       a  UU   OUUS.UD6PMN     sn6 U l        gs  snf s  snf )
ar  
Args:
    dim: Number of input channels.
    out_dim: Number of output channels.
    input_resolution: Input resolution.
    depth: Number of blocks.
    downsample: Downsample layer at the end of the layer.
    num_heads: Number of attention heads.
    head_dim: Channels per head (dim // num_heads if not set)
    window_size: Local window size.
    mlp_ratio: Ratio of mlp hidden dim to embedding dim.
    qkv_bias: If True, add a learnable bias to query, key, value.
    proj_drop: Projection dropout rate.
    attn_drop: Attention dropout rate.
    drop_path: Stochastic depth rate.
    norm_layer: Normalization layer.
rA   c              3   *   #    U  H	  oS -  v   M     g7fr+   Nr   .0is     r9   	<genexpr>0SwinTransformerStage.__init__.<locals>.<genexpr>Q  s     &H7G!Av7G   Fr+   )rR   r  r   r   )rR   r   rS   rT   r'   r   r   r   r   rU   rW   rV   r   r   Nr   )r^   r_   rR   r   r   output_resolutionr  grad_checkpointingr   r
  r  rb   r   
Sequentialranger   
isinstancelistblocks)rn   rR   r  r   r  r  rS   rT   r'   r   r   r   rU   rW   rV   r   r   rB   rC   ro   r   r   r  rq   s                          r9   r_   SwinTransformerStage.__init__'  sX   L / 0LV&H7G&H!H\l
"',K8KqAFK89
 * % 	DO >!> kkmDO mm$ 5\%&#$ "# ! !%!7!7#!'!"Q!1*!1)#!##*4Y*E*E)A,9% " "%&# $ 9&#s   "D/AD4r   c                     Xl         [        U R                  [        R                  5      (       a  Xl        O[        S U 5       5      U l        U R                   H  nUR                  U R
                  UUS9  M      g)zUpdates the resolution, window size and so the pair-wise relative positions.

Args:
    feat_size: New input (feature) resolution
    window_size: New window size
    always_partition: Always partition / shift the window
c              3   *   #    U  H	  oS -  v   M     g7fr  r   r  s     r9   r  6SwinTransformerStage.set_input_size.<locals>.<genexpr>  s     *E9a69r  r   r'   r   N)	r   r$  r  rb   r   r   r   r&  r   )rn   r   r'   r   blocks        r9   r   #SwinTransformerStage.set_input_sizex  sh     !*door{{33%."%**E9*E%ED"[[E  00'!1 !  !r;   r&   r(   c                     U R                  U5      nU R                  (       a;  [        R                  R	                  5       (       d  [        U R                  U5      nU$ U R                  U5      nU$ )zKForward pass.

Args:
    x: Input features.

Returns:
    Output features.
)r  r!  rD   r   is_scriptingr   r&  rn   r&   s     r9   r   SwinTransformerStage.forward  sX     OOA""599+A+A+C+Ct{{A.A  AAr;   )r&  r  rR   r  r!  r   r   r   )r   r   r   r   r   rb   r  r   r	   r   r   r   r   r
   r   r   r  r_   r   rD   r   r   r   r   r   s   @r9   r  r  !  sg     $&*-.%*!&!!!!35*,,,'O$O$ O$ $CHo	O$
 O$ O$ O$ smO$ +O$ #O$ O$ O$ O$ O$ O$  T%[%/0!O$" RYY#O$ O$j 04	S#X  'tn	2 %,,  r;   r  c            ,         ^  \ rS rSrSrSSSSSSS	S
SSSSSSSSSS\\R                  SSS4S\S\	S\	S\	S\
S\	S\\	S4   S\\	S4   S\\	   S\S\S\S \S!\S"\S#\S$\S%\S&\\R                      S'\\
\\R                      4   S(\
4*U 4S) jjjr\R(                  R*                  SFS*\
S+\S,S4S- jj5       r\R(                  R*                  S,\\
   4S. j5       r     SGS\\\	\	4      S\\\	\	4      S\\\	\	4      S/\	S\\   S,S4S0 jjr\R(                  R*                  SHS1\S,\\
\4   4S2 jj5       r\R(                  R*                  SIS3\S,S4S4 jj5       r\R(                  R*                  S,\R                   4S5 j5       rSJS\	S\\
   S,S4S6 jjr     SKS7\R@                  S8\\\	\!\	   4      S9\S:\S;\
S<\S,\\!\R@                     \\R@                  \!\R@                     4   4   4S= jjr"   SLS8\\	\!\	   4   S>\S?\S,\!\	   4S@ jjr#S7\R@                  S,\R@                  4SA jr$SHS7\R@                  SB\S,\R@                  4SC jjr%S7\R@                  S,\R@                  4SD jr&SEr'U =r($ )Mr%   i  zSwin Transformer.

A PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows`  -
      https://arxiv.org/pdf/2103.14030
   r,   r*     avg`   r+   r+      r+   r*   r8        Nr   FTr   r   g? img_size
patch_sizein_chansnum_classesglobal_pool	embed_dimdepths.rS   rT   r'   r   strict_img_sizer   rU   	drop_rateproj_drop_rateattn_drop_ratedrop_path_rateembed_layerr   weight_initc                   > [         T!U ]  5         UUS.nUS;   d   eX@l        X0l        XPl        SU l        [        U5      U l        X`l        [        USU R                  S-
  -  -  5      =U l
        U l        / U l        [        U[        [        45      (       d4  [!        U R                  5       Vs/ s H  n[        USU-  -  5      PM     nnU" S"UUUUS   UUSS.UD6U l        U R"                  R$                  n['        U R                  5      " U	5      n	[        U
[        [        45      (       d  ['        U R                  5      " U
5      n
O[        U
5      S:X  a  U
4U R                  -  n
[        U
5      U R                  :X  d   e['        U R                  5      " U5      n[)        UUSS	9n/ nUS   nSn[!        U R                  5       H  nUU   n U[+        S"0 S
U_SU _SUS   U-  US   U-  4_SUU   _SUS:  _SUU   _SU	U   _SU
U   _SU_SU(       + _SUU   _SU_SU_SU_SUU   _SU_UD6/-  nU nUS:  a  US-  nU =R                  [-        U UU-  SU 3S9/-  sl        M     [.        R0                  " U6 U l        U" U R                  40 UD6U l        [7        U R                  U4UUU R
                  S.UD6U l        US:X  a  SOUU l        US:w  a  U R=                  SS 9  g!g!s  snf )#a  
Args:
    img_size: Input image size.
    patch_size: Patch size.
    in_chans: Number of input image channels.
    num_classes: Number of classes for classification head.
    embed_dim: Patch embedding dimension.
    depths: Depth of each Swin Transformer layer.
    num_heads: Number of attention heads in different layers.
    head_dim: Dimension of self-attention heads.
    window_size: Window size.
    mlp_ratio: Ratio of mlp hidden dim to embedding dim.
    qkv_bias: If True, add a learnable bias to query, key, value.
    drop_rate: Dropout rate.
    attn_drop_rate (float): Attention dropout rate.
    drop_path_rate (float): Stochastic depth rate.
    embed_layer: Patch embedding layer.
    norm_layer (nn.Module): Normalization layer.
rA   )r<  r5  NHWCr+   r   r   )r=  r>  r?  rB  r   rD  
output_fmtT)	stagewiserR   r  r   r  r  rS   rT   r'   r   r   r   rU   rW   rV   r   r   layers.)num_chsr  module)	pool_typerE  	input_fmtskipresetF)needs_resetNr   )r^   r_   r@  r?  rA  rM  len
num_layersrB  r   num_featureshead_hidden_sizefeature_infor$  r   r%  r#  patch_embed	grid_sizer   r   r  dictrb   r"  layersr  r   headweight_init_modeinit_weights)"rn   r=  r>  r?  r@  rA  rB  rC  rS   rT   r'   r   rD  r   rU   rE  rF  rG  rH  rI  r   rJ  rB   rC   kwargsro   r  
patch_griddprr_  in_dimra   r  rq   s"                                    r9   r_   SwinTransformer.__init__  s   \ 	/k)))& & f+"47	A$//\]J]D^8^4__D1)eT]33:?:PQ:PQYa/0:PIQ ' 	
!l!+	
 	
 %%//
 T__-h7+e}55#DOO4[AK"&.4??:K;4??222doo.y9	'$O1t'AlG+  qMU*qMU*" Qi q5 $A, "! (N "2 "10 $A, "  )!" )#$ a&%& &)  F, F1u
$w*uBT_fghfi]j"k!ll7 (8 mmV,t007B7	"
 "oo
 
	 ,7&+@k& %0 !K Rs   /K/moderV  r(   c                     U=(       d    U R                   nUS;   d   eSU;   a!  [        R                  " U R                  5      * OSn[	        [        XUS9U 5        g)a  Initialize model weights.

Args:
    mode: Weight initialization mode ('jax', 'jax_nlhb', 'moco', or '').
    needs_reset: If True, call reset_parameters() on modules that have it.
        Set to False when modules have already self-initialized in __init__.
)jaxjax_nlhbmocorU  r<  nlhbr   )	head_biasrV  N)ra  r   logr@  r    r$   )rn   rh  rV  rn  s       r9   rb  SwinTransformer.init_weights.  sV     ,t,,????39T>TXXd..//r	(P[\^bcr;   c                 ~    [        5       nU R                  5        H  u  p#SU;   d  M  UR                  U5        M      U$ )z,Parameters that should not use weight decay.re   )setnamed_parametersadd)rn   nwdnr   s       r9   no_weight_decaySwinTransformer.no_weight_decay<  s:     e))+DA-2
 , 
r;   window_ratioc                 X   Uc  Ub/  U R                   R                  XS9  U R                   R                  nUc  [        W Vs/ s H  owU-  PM	     sn5      n[	        U R
                  5       H6  u  pS[        US-
  S5      -  n
U	R                  WS   U
-  US   U
-  4UUS9  M8     gs  snf )a  Update the image resolution and window size.

Args:
    img_size: New input resolution, if None current resolution is used.
    patch_size: New patch size, if None use current patch size.
    window_size: New window size, if None based on new_img_size // window_div.
    window_ratio: Divisor for calculating window size from grid size.
    always_partition: Always partition into windows and shift (even if window size < feat size).
N)r=  r>  r+   r   r   r+  )r\  r   r]  r   	enumerater_  max)rn   r=  r>  r'   ry  r   rd  pgindexstagestage_scales              r9   r   SwinTransformer.set_input_sizeE  s    " :#9++X+U))33Jj Ij|!3j IJK%dkk2LEs519a00K  %a=K7A+9UV'!1 !  3 !Js   B'coarsec                 0    [        SU(       a  SS9$ / SQS9$ )z"Group parameters for optimization.z^patch_embedz^layers\.(\d+)))z^layers\.(\d+).downsample)r   )z^layers\.(\d+)\.\w+\.(\d+)N)z^norm)i )stemr&  )r^  )rn   r  s     r9   group_matcherSwinTransformer.group_matchere  s)      (.$
 	
5
 	
r;   enablec                 6    U R                    H	  nXl        M     g)z)Enable or disable gradient checkpointing.N)r_  r!  )rn   r  ls      r9   set_grad_checkpointing&SwinTransformer.set_grad_checkpointingq  s     A#)  r;   c                 .    U R                   R                  $ )zGet the classifier head.)r`  fcrw   s    r9   get_classifierSwinTransformer.get_classifierw  s     yy||r;   c                 B    Xl         U R                  R                  XS9  g)zReset the classifier head.

Args:
    num_classes: Number of classes for new classifier.
    global_pool: Global pooling type.
)rR  N)r@  r`  rU  )rn   r@  rA  s      r9   reset_classifier SwinTransformer.reset_classifier|  s     '		;r;   r&   indicesr  
stop_earlyrM  intermediates_onlyc                 l   US;   d   S5       e/ n[        [        U R                  5      U5      u  pU R                  U5      n[        U R                  5      n
[        R
                  R                  5       (       d  U(       d  U R                  nOU R                  SU	S-    n[        U5       Hj  u  pU" U5      nX;   d  M  U(       a  XS-
  :X  a  U R                  U5      nOUnUR                  SSSS5      R                  5       nUR                  U5        Ml     U(       a  U$ U R                  U5      nX4$ )a  Forward features that returns intermediates.

Args:
    x: Input image tensor.
    indices: Take last n blocks if int, all if None, select matching indices if sequence.
    norm: Apply norm layer to compatible intermediates.
    stop_early: Stop iterating over blocks when last desired intermediate hit.
    output_fmt: Shape of intermediate feature outputs.
    intermediates_only: Only return intermediate features.

Returns:
    List of intermediate features or tuple of (final features, intermediates).
)NCHWzOutput shape must be NCHW.Nr   r   r*   r+   )r   rW  r_  r\  rD   r   r/  r{  r  r2   r3   append)rn   r&   r  r  r  rM  r  intermediatestake_indices	max_index
num_stagesstagesr  r  x_inters                  r9   forward_intermediates%SwinTransformer.forward_intermediates  s   , Y&D(DD&"6s4;;7G"Q Q%
99!!##:[[F[[)a-0F!&)HAaA Aa/"iilGG!//!Q15@@B$$W- *   IIaLr;   
prune_norm
prune_headc                     [        [        U R                  5      U5      u  pEU R                  SUS-    U l        U(       a  [        R                  " 5       U l        U(       a  U R                  SS5        U$ )a  Prune layers not required for specified intermediates.

Args:
    indices: Indices of intermediate layers to keep.
    prune_norm: Whether to prune normalization layer.
    prune_head: Whether to prune the classifier head.

Returns:
    List of indices that were kept.
Nr   r   r<  )r   rW  r_  rb   r   r  r  )rn   r  r  r  r  r  s         r9   prune_intermediate_layers)SwinTransformer.prune_intermediate_layers  s[      #7s4;;7G"Qkk.9q=1DI!!!R(r;   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ )z/Forward pass through feature extraction layers.)r\  r_  r  r0  s     r9   forward_features SwinTransformer.forward_features  s1    QKKNIIaLr;   
pre_logitsc                 R    U(       a  U R                  USS9$ U R                  U5      $ )zForward pass through classifier head.

Args:
    x: Feature tensor.
    pre_logits: Return features before final classifier.

Returns:
    Output tensor.
T)r  )r`  )rn   r&   r  s      r9   forward_headSwinTransformer.forward_head  s&     1;tyyty,L		!Lr;   c                 J    U R                  U5      nU R                  U5      nU$ )zGForward pass.

Args:
    x: Input tensor.

Returns:
    Output logits.
)r  r  r0  s     r9   r   SwinTransformer.forward  s)     !!!$a r;   )rB  r[  rA  r`  rZ  r?  r_  r  r@  rY  rX  rM  r\  ra  )r<  T)NNN   NF)Tr   )NFFr  F)r   FT))r   r   r   r   r   r   rb   r  r   r   strr	   r   r   r   r   r  r
   r_   rD   r   ignorerb  r   rw  r   r   r   r  r  r  r  r   r   r  r  r  r  r   r   r   r   s   @r9   r%   r%     s;    +.#$&2)7&*-.%*$(!!!$&$&$'+568ll!1B1'B1 B1 	B1
 B1 B1 B1 #s(OB1 S#XB1 smB1 +B1 #B1 "B1 B1 B1  !B1" "#B1$ "%B1& "'B1( bii)B1* c4		?23+B1, -B1 B1H YYd d d d d YYS   374859 !/3uS#X/ !sCx1 "%S/2	
  'tn 
@ YY	
D 	
T#s(^ 	
 	
 YY*T *T * *
 YY		  <C <hsm <W[ < 8<$$',1 ||1  eCcN341  	1 
 1  1  !%1  
tELL!5tELL7I)I#JJ	K1 j ./$#	3S	>*  	
 
c0%,, 5<< 
Mell 
M 
M 
M %,,  r;   
state_dictmodelc           	      X   SnSU ;   a  SnSSK n0 nU R                  SU 5      n U R                  SU 5      n U R                  5        GH[  u  pV[        S Vs/ s H  owU;   PM	     sn5      (       a  M+  S	U;   ab  UR                  R
                  R                  R                  u    pn
UR                  S
   U	:w  d  UR                  S   U
:w  a  [        UX4SSSS9nUR                  S5      (       a  UR                  USS 5      nUR                  UR                  R                  :w  d   UR                  S   UR                  S   :w  a)  [        UUR                  UR                  R                  S9nU(       a&  UR                  SS U5      nUR                  SS5      nXdU'   GM^     U$ s  snf )zConvert patch embedding weight from manual patchify + linear proj to conv.

Args:
    state_dict: State dictionary from checkpoint.
    model: Model instance.

Returns:
    Filtered state dictionary.
Tzhead.fc.weightFr   Nr  r  )rZ   r   zpatch_embed.proj.weightr   r.   bicubic)interpolation	antialiasverbosere   ir   r   zlayers.(\d+).downsamplec                 D    S[        U R                  S5      5      S-    S3$ )NrO  r   z.downsample)r   group)r&   s    r9   <lambda>&checkpoint_filter_fn.<locals>.<lambda>  s"    ws177ST:YZGZF[[f=gr;   zhead.zhead.fc.)regetitemsr   r\  rj   r   r0   r   endswithget_submodulere   r'   r   subreplace)r  r  old_weightsr  out_dictr   r   rv  r   r5   r6   ms               r9   checkpoint_filter_fnr    s    K:%H4Jj9J  " HI H1Q HIJJ$)**//66<<JAqQwwr{a1772;!#3(F"+"  ::455##AdsG,Aww!88>>>!--PQBRVWVcVcdeVfBf-$%MM#$#A#A#G#G 13gijkA		':.A9 #: O9 Js   F'
variant
pretrainedc           	          [        S [        UR                  SS5      5       5       5      nUR                  SU5      n[	        [
        X4[        [        SUS9S.UD6nU$ )zCreate a Swin Transformer model.

Args:
    variant: Model variant name.
    pretrained: Load pretrained weights.
    **kwargs: Additional model arguments.

Returns:
    SwinTransformer model instance.
c              3   *   #    U  H	  u  pUv   M     g 7fr   r   )r  r  r   s      r9   r  +_create_swin_transformer.<locals>.<genexpr>.  s     \.[da.[r  rC  )r   r   r*   r   out_indicesT)flatten_sequentialr  )pretrained_filter_fnfeature_cfg)r   r{  r  popr   r%   r  r^  )r  r  rc  default_out_indicesr  r  s         r9   _create_swin_transformerr  #  sh      \i

8\8Z.[\\**],?@K 1DkJ 	E Lr;   urlc                 4    U SSSSSS[         [        SSS	S
.UE$ )z9Create default configuration for Swin Transformer models.r4  )r*   r3  r3  )r   r   g?r  Tzpatch_embed.projzhead.fcmit)r  r@  
input_size	pool_sizecrop_pctr  fixed_input_sizemeanru   
first_conv
classifierlicenser   )r  rc  s     r9   _cfgr  :  s7     =v%.B(	 # r;   z.swin_small_patch4_window7_224.ms_in22k_ft_in1kztimm/zvhttps://github.com/SwinTransformer/storage/releases/download/v1.0.8/swin_small_patch4_window7_224_22kto1k_finetune.pth)	hf_hub_idr  z-swin_base_patch4_window7_224.ms_in22k_ft_in1kzlhttps://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224_22kto1k.pthz.swin_base_patch4_window12_384.ms_in22k_ft_in1kzmhttps://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384_22kto1k.pth)r*     r  )r:  r:  g      ?)r  r  r  r  r  z.swin_large_patch4_window7_224.ms_in22k_ft_in1kzmhttps://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window7_224_22kto1k.pthz/swin_large_patch4_window12_384.ms_in22k_ft_in1kznhttps://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22kto1k.pthz$swin_tiny_patch4_window7_224.ms_in1kzdhttps://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pthz%swin_small_patch4_window7_224.ms_in1kzehttps://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pthz$swin_base_patch4_window7_224.ms_in1kzdhttps://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224.pthz%swin_base_patch4_window12_384.ms_in1kzehttps://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384.pthz-swin_tiny_patch4_window7_224.ms_in22k_ft_in1kzuhttps://github.com/SwinTransformer/storage/releases/download/v1.0.8/swin_tiny_patch4_window7_224_22kto1k_finetune.pthz%swin_tiny_patch4_window7_224.ms_in22kzhhttps://github.com/SwinTransformer/storage/releases/download/v1.0.8/swin_tiny_patch4_window7_224_22k.pthiQU  )r  r  r@  z&swin_small_patch4_window7_224.ms_in22kzihttps://github.com/SwinTransformer/storage/releases/download/v1.0.8/swin_small_patch4_window7_224_22k.pthz%swin_base_patch4_window7_224.ms_in22kzhhttps://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224_22k.pthz&swin_base_patch4_window12_384.ms_in22kzihttps://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384_22k.pth)r  r  r  r  r  r@  z&swin_large_patch4_window7_224.ms_in22kzihttps://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window7_224_22k.pthz'swin_large_patch4_window12_384.ms_in22kzjhttps://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pthzswin_s3_tiny_224.ms_in1kzbhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/s3_t-1d53f6a8.pthzbhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/s3_s-3bb4c69d.pthzbhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/s3_b-a1e95db4.pth)zswin_s3_small_224.ms_in1kzswin_s3_base_224.ms_in1kc           	      L    [        SSSSSS9n[         SSU 0[        U40 UD6D6$ )	z*Swin-T @ 224x224, trained ImageNet-1k
    r,   r   r6  r7  r9  r>  r'   rB  rC  rS   r  )swin_tiny_patch4_window7_224r^  r  r  rc  
model_argss      r9   r  r    sF     R`noJ#&]3=]AEjA[TZA[] ]r;   c           	      L    [        SSSSSS9n[         SSU 0[        U40 UD6D6$ )	zSwin-S @ 224x224
    r,   r   r6  r+   r+      r+   r9  r  r  )swin_small_patch4_window7_224r  r  s      r9   r  r    sF     RaopJ#'^4>^BFzB\U[B\^ ^r;   c           	      L    [        SSSSSS9n[         SSU 0[        U40 UD6D6$ )	zSwin-B @ 224x224
    r,   r      r  r,   r         r  r  )swin_base_patch4_window7_224r  r  s      r9   r  r    sF     SbpqJ#&]3=]AEjA[TZA[] ]r;   c           	      L    [        SSSSSS9n[         SSU 0[        U40 UD6D6$ )	zSwin-B @ 384x384
    r,   r:  r  r  r  r  r  )swin_base_patch4_window12_384r  r  s      r9   r  r    sF     c-cqrJ#'^4>^BFzB\U[B\^ ^r;   c           	      L    [        SSSSSS9n[         SSU 0[        U40 UD6D6$ )	zSwin-L @ 224x224
    r,   r      r  r8  r:  r;  0   r  r  )swin_large_patch4_window7_224r  r  s      r9   r  r    sF     SbqrJ#'^4>^BFzB\U[B\^ ^r;   c           	      L    [        SSSSSS9n[         SSU 0[        U40 UD6D6$ )	zSwin-L @ 384x384
    r,   r:  r  r  r  r  r  )swin_large_patch4_window12_384r  r  s      r9   r  r    sF     c-crsJ#(_5?_CG
C]V\C]_ _r;   c           	      J    [        SSSSSS9n[        SSU 0[        U40 UD6D6$ )	z:Swin-S3-T @ 224x224, https://arxiv.org/abs/2111.14725
    r,   r   r      r   r6  r7  r9  r  r  )swin_s3_tiny_224r  r  s      r9   r  r    s;     -2l^lnJ#l:lQUV`QkdjQkllr;   c           	      J    [        SSSSSS9n[        SSU 0[        U40 UD6D6$ )	z:Swin-S3-S @ 224x224, https://arxiv.org/abs/2111.14725
    r,   )r  r  r  r   r6  r  r9  r  r  )swin_s3_small_224r  r  s      r9   r  r    s;     /RaoqJ#mJmRVWaRlekRlmmr;   c           	      J    [        SSSSSS9n[        SSU 0[        U40 UD6D6$ )	z:Swin-S3-B @ 224x224, https://arxiv.org/abs/2111.14725
    r,   r  r6  )r+   r+      r+   r9  r  r  )swin_s3_base_224r  r  s      r9   r  r    s;     -2m_moJ#l:lQUV`QkdjQkllr;   )"swin_base_patch4_window7_224_in22k#swin_base_patch4_window12_384_in22k#swin_large_patch4_window7_224_in22k$swin_large_patch4_window12_384_in22kr   r  )r<  )Or   loggingr   typingr   r   r   r   r   r   r	   r
   r   rD   torch.nnrb   	timm.datar   r   timm.layersr   r   r   r   r   r   r   r   r   r   r   r   _builderr   	_featuresr   _features_fxr   _manipulater   r    	_registryr!   r"   r#   vision_transformerr$   __all__	getLoggerr   _loggerr   r   r   r:   r=   rM   r  rO   r   r
  r  r%   r^  r  r  r   r  r  default_cfgsr  r  r  r  r  r  r  r  r  r   r;   r9   <module>r     s0  
"   O O O   AL L L L * + 3 4 Y Y 4



H
%#uS#X./ <<38_ \\& ELL uS#X 3 SV [`[g[g  $#s #3 # #0Tbii Tno299 od-299 -`299 DLbii L^
.T .")) .S%,,EV@W .bc t Ra .	c 	T#s(^ 	 % H&4d E7HH& 4Tz6}	H& 5d{ Hs7DH& 5d{7~H& 6t| Hs8DH&& +Dr-u'H&, ,Ts.v-H&2 +Dr-u3H&8 ,Ts Hs.D9H&D 4T D6FEH&L ,Tv.MH&T -dw/UH&\ ,Tv.]H&d -dw HsPU/WeH&l -dw/mH&t .tx HsPU0WuH&~ p!rH&D "&p"r !%p!rKH& HV ] ] ] ^ ^ ^ ] ] ] ^ ^ ^ ^ ^ ^ _/ _ _ mO m m n_ n n mO m m H*Q+S+S,U	' r;   