
    RЦi/                     r   S r SSKrSSKJr  SSKJrJrJrJrJ	r	J
r
JrJrJr  SSKrSSKJr  SSKJs  Jr  SSKJrJr  SSKJrJrJrJrJrJrJrJrJ r J!r!J"r"  SSK#J$r$  SS	K%J&r&  SS
K'J(r(  SSK)J*r*  SSK+J,r,J-r-J.r.  S/r/\\0\\0\04   4   r1S\Rd                  S\\0\04   S\Rd                  4S jr3\(S\Rd                  S\\0\04   S\\0\04   S\Rd                  4S j5       r4 " S S\Rj                  5      r6 " S S\Rj                  5      r7 " S S\Rj                  5      r8 " S S\Rj                  5      r9 " S S\Rj                  5      r:S\\;\Rd                  4   S\Rj                  S\\;\Rd                  4   4S  jr<SOS!\;S"\=S\:4S# jjr>SPS$ jr?\," \?" S%S&S'9\?" S%S(S)S*S+S,9\?" S%S-S'9\?" S%S.S)S*S+S,9\?" S%S/S'9\?" S%S0S'9\?" S%S1S'9\?" S%S2S'9\?" S%S3S'9\?" S%S4S'9\?" S%S5S6S7S8S99\?" S%S:S6S7S8S99S;.5      r@\-SOS"\=S\:4S< jj5       rA\-SOS"\=S\:4S= jj5       rB\-SOS"\=S\:4S> jj5       rC\-SOS"\=S\:4S? jj5       rD\-SOS"\=S\:4S@ jj5       rE\-SOS"\=S\:4SA jj5       rF\-SOS"\=S\:4SB jj5       rG\-SOS"\=S\:4SC jj5       rH\-SOS"\=S\:4SD jj5       rI\-SOS"\=S\:4SE jj5       rJ\-SOS"\=S\:4SF jj5       rK\-SOS"\=S\:4SG jj5       rL\." \MSHSISJSKSLSMSN.5        g)QaJ  Swin Transformer V2
A PyTorch impl of : `Swin Transformer V2: Scaling Up Capacity and Resolution`
    - https://arxiv.org/abs/2111.09883

Code/weights from https://github.com/microsoft/Swin-Transformer, original copyright/license info below

Modifications and additions for timm hacked together by / Copyright 2022, Ross Wightman
    N)partial)	AnyCallableDictListOptionalSetTupleTypeUnionIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)
PatchEmbedMlpDropPathcalculate_drop_path_rates	to_2tupletrunc_normal_ClassifierHeadresample_patch_embedndgridget_act_layer	LayerType   )build_model_with_cfg)feature_take_indices)register_notrace_function)
checkpoint)generate_default_cfgsregister_modelregister_model_deprecationsSwinTransformerV2xwindow_sizereturnc                     U R                   u  p#pEU R                  X#US   -  US   XAS   -  US   U5      n U R                  SSSSSS5      R                  5       R                  SUS   US   U5      nU$ )zPartition into non-overlapping windows.

Args:
    x: Input tensor of shape (B, H, W, C).
    window_size: Window size (height, width).

Returns:
    Windows tensor of shape (num_windows*B, window_size[0], window_size[1], C).
r   r               shapeviewpermute
contiguous)r$   r%   BHWCwindowss          ^/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/models/swin_transformer_v2.pywindow_partitionr8   %   s     JA!	q{1~%{1~qN7JKXYN\]^Aii1aAq)446;;BAP[\]P^`abGN    r6   img_sizec                     Uu  p4U R                   S   nU R                  SX1S   -  XAS   -  US   US   U5      nUR                  SSSSSS5      R                  5       R                  SX4U5      nU$ )a  Merge windows back to feature map.

Args:
    windows: Windows tensor of shape (num_windows * B, window_size[0], window_size[1], C).
    window_size: Window size (height, width).
    img_size: Image size (height, width).

Returns:
    Feature map tensor of shape (B, H, W, C).
r,   r   r   r(   r)   r*   r+   r-   )r6   r%   r:   r3   r4   r5   r$   s          r7   window_reverser<   8   s      DAbAR!n,aq>.A;q>S^_`SacdeA			!Q1a#..055b!BAHr9   c                   X  ^  \ rS rSrSr       SS\S\\\4   S\S\S\S	\S
\S\\\4   SS4U 4S jjjr	SS jr
SS jr  SS\\R                  \R                  4   4S jjrS\\\4   SS4S jrSS jrSS\R                  S\\R                     S\R                  4S jjrSrU =r$ )WindowAttentionO   zWindow based multi-head self attention (W-MSA) module with relative position bias.

Supports both shifted and non-shifted window attention with continuous relative
position bias and cosine attention.
Ndimr%   	num_headsqkv_biasqkv_bias_separate	attn_drop	proj_droppretrained_window_sizer&   c           
        > XS.n[         TU ]  5         Xl        X l        [	        U5      U l        X0l        XPl        [        R                  " [        R                  " USS440 UD65      U l        [        R                  " [        R                  " SSS0UD6[        R                  " SS9[        R                  " SU4SS0UD65      U l        [        R                  " XS	-  4SS0UD6U l        U(       a  [        R                  " [        R                  " U40 UD65      U l        U R'                  S
[        R                  " U40 UD6SS9  [        R                  " [        R                  " U40 UD65      U l        OSU l        SU l        SU l        [        R,                  " U5      U l        [        R                  " X40 UD6U l        [        R,                  " U5      U l        [        R4                  " SS9U l        U R                  u  pU R'                  S[        R                  " SSU-  S-
  SU-  S-
  S40 UD6SS9  U R'                  S[        R                  " X-  X-  U	[        R8                  S9SS9  U R;                  5         g)a  Initialize window attention module.

Args:
    dim: Number of input channels.
    window_size: The height and width of the window.
    num_heads: Number of attention heads.
    qkv_bias: If True, add a learnable bias to query, key, value.
    qkv_bias_separate: If True, use separate bias for q, k, v projections.
    attn_drop: Dropout ratio of attention weight.
    proj_drop: Dropout ratio of output.
    pretrained_window_size: The height and width of the window in pre-training.
devicedtyper   r)      biasT)inplaceFr(   k_bias
persistentNr,   r@   relative_coords_tablerelative_position_index)r)   rK   )super__init__r@   r%   r   rF   rA   rC   nn	Parametertorchemptylogit_scale
SequentialLinearReLUcpb_mlpqkvq_biasregister_bufferv_biasrN   DropoutrD   projrE   Softmaxsoftmaxlongreset_parameters)selfr@   r%   rA   rB   rC   rD   rE   rF   rI   rJ   ddwin_hwin_w	__class__s                 r7   rU   WindowAttention.__init__V   s   2 /&&/0F&G#"!2<<Y14E(L(LM }}II.4.2.GGD!IIc9757B7
 99S'<<<,,u{{3'="'=>DK  5;;s+Ab+Ae T,,u{{3'="'=>DKDKDKDKI.IIc-"-	I.zzb) ''#KK1u9q=!e)a-AbA 	 	

 	%KKu}V5::V 	 	
 	r9   c                 ^   [         R                  R                  U R                  [        R
                  " S5      5        U R                  bR  [         R                  R                  U R                  5        [         R                  R                  U R                  5        U R                  5         g)z"Initialize parameters and buffers.
   N)
rV   init	constant_rZ   mathlogr`   zeros_rb   _init_buffersri   s    r7   rh    WindowAttention.reset_parameters   sb    
$**DHHRL9;;"GGNN4;;'GGNN4;;'r9   c                 Z   U R                   b  U R                   R                  5         U R                  U R                  R                  R
                  U R                  R                  R                  S9u  pU R                  R                  U5        U R                  R                  U5        g)z.Compute and fill non-persistent buffer values.NrH   )
rN   zero_"_make_pair_wise_relative_positionsrd   weightrI   rJ   rR   copy_rS   )ri   rR   rS   s      r7   rv   WindowAttention._init_buffers   s    ;;"KK9=9`9`99##**$))2B2B2H2H :a :
6 	""(()>?$$**+BCr9   c                    [         R                  " U R                  S   S-
  * U R                  S   U[         R                  S9n[         R                  " U R                  S   S-
  * U R                  S   U[         R                  S9n[         R                  " [        X45      5      nUR                  SSS5      R                  5       R                  S5      nU R                  S   S:  aO  USS2SS2SS2S4==   U R                  S   S-
  -  ss'   USS2SS2SS2S4==   U R                  S   S-
  -  ss'   ONUSS2SS2SS2S4==   U R                  S   S-
  -  ss'   USS2SS2SS2S4==   U R                  S   S-
  -  ss'   US-  n[         R                  " U5      [         R                  " [         R                  " U5      S-   5      -  [        R                  " S5      -  nUR                  US9n[         R                  " U R                  S   U[         R                  S9n[         R                  " U R                  S   U[         R                  S9n[         R                  " [        Xg5      5      n[         R                   " US5      n	U	SS2SS2S4   U	SS2SSS24   -
  n
U
R                  SSS5      R                  5       n
U
SS2SS2S4==   U R                  S   S-
  -  ss'   U
SS2SS2S4==   U R                  S   S-
  -  ss'   U
SS2SS2S4==   SU R                  S   -  S-
  -  ss'   U
R#                  S	5      nX[4$ )
zCompute pair-wise relative position index and coordinates table.

Returns:
    Tuple of (relative_coords_table, relative_position_index)
r   r   rH   r)   N         ?)rJ   r,   )rX   aranger%   float32stackr   r0   r1   	unsqueezerF   signlog2absrs   torg   flattensum)ri   rI   rJ   relative_coords_hrelative_coords_wrR   coords_hcoords_wcoordscoords_flattenrelative_coordsrS   s               r7   r{   2WindowAttention._make_pair_wise_relative_positions   s    "LLq!A%&(8(8(;FRWR_R_a!LLq!A%&(8(8(;FRWR_R_a %F3D,X Y 5 = =aA F Q Q S ] ]^_ `&&q)A-!!Q1*-$2M2Ma2PST2TU-!!Q1*-$2M2Ma2PST2TU-!!Q1*-$2B2B12E2IJ-!!Q1*-$2B2B12E2IJ-" %

+@ AEJJII+,s2E4 !46:iil!C 5 8 8u 8 E << 0 0 3F%**U<< 0 0 3F%**UVH78vq1(At4~aqj7QQ)11!Q:EEG1a D$4$4Q$7!$;; 1a D$4$4Q$7!$;; 1a A(8(8(;$;a$?? "1"5"5b"9$==r9   c                 "   [        U5      nXR                  :w  au  U R                  c   eU R                  R                  nU R                  R                  nXl        U R                  X#S9u  pEU R                  SUSS9  U R                  SUSS9  gg)zuUpdate window size and regenerate relative position tables.

Args:
    window_size: New window size (height, width).
NrH   rR   FrO   rS   )r   r%   rR   rI   rJ   r{   ra   )ri   r%   rI   rJ   rR   rS   s         r7   set_window_sizeWindowAttention.set_window_size   s      ,***--999//66F..44E*77v7S ;!  !8:O\a b  !:<S`e f +r9   c                 $    U R                  5         g)z"Initialize non-persistent buffers.N)rv   rw   s    r7   init_non_persistent_buffers+WindowAttention.init_non_persistent_buffers   s    r9   r$   maskc                    UR                   u  p4nU R                  c  U R                  U5      nO[        R                  " U R                  U R
                  U R                  45      nU R                  (       a  U R                  U5      nXg-  nO)[        R                  " XR                  R                  US9nUR                  X4SU R                  S5      R                  SSSSS5      nUR                  S5      u  pn
[        R                  " USS9[        R                  " U	SS9R!                  S	S5      -  n[        R"                  " U R$                  [&        R(                  " S
5      S9R+                  5       nX-  nU R-                  U R.                  5      R1                  SU R                  5      nXR2                  R1                  S5         R1                  U R4                  S   U R4                  S   -  U R4                  S   U R4                  S   -  S5      nUR                  SSS5      R7                  5       nS[        R8                  " U5      -  nXR;                  S5      -   nUb|  UR                   S   nUR1                  SXR                  XD5      UR;                  S5      R;                  S5      -   nUR1                  SU R                  XD5      nU R=                  U5      nOU R=                  U5      nU R?                  U5      nX-  R!                  SS5      R                  X4U5      nU RA                  U5      nU RC                  U5      nU$ )zForward pass of window attention.

Args:
    x: Input features with shape of (num_windows*B, N, C).
    mask: Attention mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None.

Returns:
    Output features with shape of (num_windows*B, N, C).
)r|   rL   r(   r,   r)   r   r   r*   rQ   g      Y@)max   )"r.   r`   r_   rX   catrN   rb   rC   Flinearr|   reshaperA   r0   unbind	normalize	transposeclamprZ   rs   rt   expr^   rR   r/   rS   r%   r1   sigmoidr   rf   rD   rd   rE   )ri   r$   r   B_Nr5   r_   rB   qkvattnrZ   relative_position_bias_tablerelative_position_biasnum_wins                   r7   forwardWindowAttention.forward   s    77q;;((1+Cyy$++t{{DKK!HIH%%hhqkhhqxHkk"DNNB7??1aAN**Q-a A2&QB)?)I)I"b)QQkk$"2"28KLPPR!'+||D4N4N'O'T'TUWY]YgYg'h$!=>Z>Z>_>_`b>c!d!i!iQ$"2"21"55t7G7G7JTM]M]^_M`7`bd"f!7!?!?1a!H!S!S!U!#emm4J&K!K66q99jjmG99R..!?$..QRBSB]B]^_B``D99R6D<<%D<<%D~~d#X  A&..ra8IIaLNN1r9   )rD   r^   r@   rN   rZ   rA   rF   rd   rE   r`   r_   rC   rf   rb   r%   )TF        r   )r   r   NNr&   N)NNN)__name__
__module____qualname____firstlineno____doc__intr
   boolfloatrU   rh   rv   rX   Tensorr{   r   r   r   r   __static_attributes____classcell__rm   s   @r7   r>   r>   O   s%    "&+!!6<F F  sCxF  	F 
 F   $F  F  F  %*#s(OF  
F  F PD (> 
u||U\\)	*	(>Tg5c? gt g"1 1Xell-C 1u|| 1 1r9   r>   c                   4  ^  \ rS rSrSrSSSSSSSSSS	\R                  SS
S
4S\S\S\S\S\S\	S\	S\
S\	S\
S\
S\
S\S\\R                     S\4U 4S jjjr   S(S\\R"                     S\\R$                     S\\R&                     S\\R"                     4S jjr S)S \S!\\   S\\\\4   \\\4   4   4S" jjr S)S#\\\4   S\\\4   S\\	   SS
4S$ jjrS\R"                  S\R"                  4S% jrS\R"                  S\R"                  4S& jrS'rU =r$ )*SwinTransformerV2Blocki#  zSwin Transformer V2 Block.

A standard transformer block with window attention and shifted window attention
for modeling long-range dependencies efficiently.
   r   F      @Tr   geluNr@   input_resolutionrA   r%   
shift_sizealways_partitiondynamic_mask	mlp_ratiorB   rE   rD   	drop_path	act_layer
norm_layerrF   c                 6  > UUS.n[         TU ]  5         Xl        [        U5      U l        X0l        [        U5      U l        X`l        Xpl        U R                  XE5      u  U l
        U l        U R                  S   U R                  S   -  U l        Xl        [        U5      n[        U4[        U R                  5      UU	UU
[        U5      S.UD6U l        U" U40 UD6U l        US:  a  [%        U5      O[&        R(                  " 5       U l        [-        SU[/        X-  5      UU
S.UD6U l        U" U40 UD6U l        US:  a  [%        U5      O[&        R(                  " 5       U l        U R7                  SU R                  (       a  SOU R8                  " S0 UD6S	S
9  g)ag  
Args:
    dim: Number of input channels.
    input_resolution: Input resolution.
    num_heads: Number of attention heads.
    window_size: Window size.
    shift_size: Shift size for SW-MSA.
    always_partition: Always partition into full windows and shift
    mlp_ratio: Ratio of mlp hidden dim to embedding dim.
    qkv_bias: If True, add a learnable bias to query, key, value.
    proj_drop: Dropout rate.
    attn_drop: Attention dropout rate.
    drop_path: Stochastic depth rate.
    act_layer: Activation layer.
    norm_layer: Normalization layer.
    pretrained_window_size: Window size in pretraining.
rH   r   r   )r%   rA   rB   rD   rE   rF   r   )in_featureshidden_featuresr   drop	attn_maskNFrO    )rT   rU   r@   r   r   rA   target_shift_sizer   r   _calc_window_shiftr%   r   window_arear   r   r>   r   norm1r   rV   Identity
drop_path1r   r   mlpnorm2
drop_path2ra   get_attn_mask)ri   r@   r   rA   r%   r   r   r   r   rB   rE   rD   r   r   r   rF   rI   rJ   rj   rm   s                      r7   rU   SwinTransformerV2Block.__init__*  s   J / )*: ;"!*:!6 0(,0,C,CK,\)$/++A.1A1A!1DD"!),	#	
!$"2"23#,-C#D	
 	
	  *r*
1:R(9-R[[] 
0	

 
  *r*
1:R(9-R[[]%%D4+=+=+C+C 	 	
r9   r$   rI   rJ   r&   c           	         [        U R                  5      (       Ga  Uc&  [        R                  " S/U R                  QSP7X#S9nOH[        R                  " SUR
                  S   UR
                  S   S4UR                  UR                  S9nSnSU R                  S   * 4U R                  S   * U R                  S   * 4U R                  S   * S44 Hn  nSU R                  S   * 4U R                  S   * U R                  S   * 4U R                  S   * S44 H$  nXTSS2US   US   2US   US   2SS24'   US-  nM&     Mp     [        X@R                  5      nUR                  SU R                  5      nUR                  S5      UR                  S5      -
  n	U	R                  U	S:g  [        S5      5      R                  U	S:H  [        S5      5      n	U	$ Sn	U	$ )	zGenerate attention mask for shifted window attention.

Args:
    x: Input tensor for dynamic shape calculation.

Returns:
    Attention mask or None if no shift.
Nr   rH   r)   r   r,   g      Yr   )anyr   rX   zerosr   r.   rI   rJ   r%   r8   r/   r   r   masked_fillr   )
ri   r$   rI   rJ   img_maskcnthwmask_windowsr   s
             r7   r   $SwinTransformerV2Block.get_attn_masky  s    ty ;;'ED,A,A'E1'Efb ;;1771:qwwqz1'Eahh^_^e^efC))!,,-&&q))DOOA,>+>?ooa(($/ T--a001**1--0B/BC//!,,d3A
 <?Q!QqT	1Q4!9a781HC ,H6F6FGL',,R1A1ABL$..q1L4J4J14MMI!--i1neFmLXXYbfgYginoristI  Ir9   target_window_sizer   c                    [        U5      nUc-  U R                  n[        U5      (       a  US   S-  US   S-  4nO[        U5      nU R                  (       a  X4$ [        U5      n[        U5      n[	        U R
                  U5       VVs/ s H  u  p4X4::  a  UOUPM     nnn[	        U R
                  XR5       VVVs/ s H  u  p4ocU::  a  SOUPM     nnnn[        U5      [        U5      4$ s  snnf s  snnnf )zCalculate window size and shift size based on input resolution.

Args:
    target_window_size: Target window size.
    target_shift_size: Target shift size.

Returns:
    Tuple of (adjusted_window_size, adjusted_shift_size).
r   r)   r   )r   r   r   r   zipr   tuple)ri   r   r   rr   r%   sr   s           r7   r   )SwinTransformerV2Block._calc_window_shift  s    ''9:$ $ 6 6$%%%7%:a%?ASTUAVZ[A[$\! )*; <  %88&'9:%&7869$:O:OQc6de6ddaAFq)6de8;D<Q<QS^8rs8rWQ16aq(8r
s[!5#444 fss   
C,<C2	feat_sizec                    Xl         Ub  X0l        U R                  [        U5      5      u  U l        U l        U R                  S   U R                  S   -  U l        U R                  R                  U R                  5        U R                  b  U R                  R                  OSnU R                  b  U R                  R                  OSnU R                  SU R                  (       a  SOU R                  XES9SS9  g)zSet input size and update window configuration.

Args:
    feat_size: New feature map size.
    window_size: New window size.
    always_partition: Override always_partition setting.
Nr   r   r   rH   FrO   )r   r   r   r   r%   r   r   r   r   r   rI   rJ   ra   r   r   )ri   r   r%   r   rI   rJ   s         r7   set_input_size%SwinTransformerV2Block.set_input_size  s     !*'$4!,0,C,CIkDZ,[)$/++A.1A1A!1DD		!!$"2"23*...*D&&$(,(B$$%%D4+=+=V+=+Y 	 	
r9   c           	         UR                   u  p#pE[        U R                  5      nU(       a4  [        R                  " XR                  S   * U R                  S   * 4SS9nOUnU R
                  S   X0R
                  S   -  -
  U R
                  S   -  nU R
                  S   X@R
                  S   -  -
  U R
                  S   -  n	[        R                  R                  R                  USSSU	SU45      nUR                   u  pp[        XpR
                  5      nUR                  SU R                  U5      n[        U SS5      (       a  U R                  U5      nOU R                  nU R                  XS9nUR                  SU R
                  S   U R
                  S   U5      n[!        XR
                  X45      nUS	S	2S	U2S	U2S	S	24   R#                  5       nU(       a!  [        R                  " XpR                  SS9nU$ UnU$ )
zApply windowed attention with optional shift.

Args:
    x: Input tensor of shape (B, H, W, C).

Returns:
    Output tensor of shape (B, H, W, C).
r   r   )r   r)   )shiftsdimsr,   r   F)r   N)r.   r   r   rX   rollr%   rV   
functionalpadr8   r/   r   getattrr   r   r   r<   r1   )ri   r$   r2   r3   r4   r5   	has_shift	shifted_xpad_hpad_w_HpWp	x_windowsr   attn_windowss                   r7   _attnSwinTransformerV2Block._attn  s    WW
a (	

1q/A.ADOOTUDVCV-W^deII!!!$q+;+;A+>'>>$BRBRSTBUU!!!$q+;+;A+>'>>$BRBRSTBUUHH''++I1a57QR	 r %Y0@0@A	NN2t'7'7;	 4//**95IIyyy; $((T-=-=a-@$BRBRSTBUWXY"<1A1AB8L	a!RaRl+668	 

9__6JA  Ar9   c                 2   UR                   u  p#pEXR                  U R                  U R                  U5      5      5      -   nUR	                  USU5      nXR                  U R                  U R                  U5      5      5      -   nUR	                  X#XE5      nU$ )Nr,   )r.   r   r   r  r   r   r   r   )ri   r$   r2   r3   r4   r5   s         r7   r   SwinTransformerV2Block.forward  sz    WW
a

4::a= 9::IIaQ

488A; 788IIaA!r9   )r   r   r@   r   r   r   r   r   r   r   r   rA   r   r   r   r%   )NNNr   )r   r   r   r   r   rV   	LayerNormr   _int_or_tuple_2_tr   r   r   r   ModulerU   r   rX   r   rI   rJ   r   r
   r   r   r  r   r   r   r   s   @r7   r   r   #  s    ./,-%*!&!!!!!#)*,,,89%M
M
 0M
 	M

 +M
 *M
 #M
 M
 M
 M
 M
 M
 M
 !M
 RYYM
  %6!M
 M
b )--1+/	'%' U\\*' EKK(	'
 
%,,	'X >B5 15  ((9:5 
uS#Xc3h/	0	5J 04	
S#X
 sCx
 'tn	

 

8,u|| , ,\ %,,  r9   r   c                      ^  \ rS rSrSrS\R                  SS4S\S\\   S\	\R                     4U 4S jjjrS\R                  S	\R                  4S
 jrSrU =r$ )PatchMergingi  zPatch Merging Layer.

Merges 2x2 neighboring patches and projects to higher dimension,
effectively downsampling the feature maps.
Nr@   out_dimr   c                    > XES.n[         TU ]  5         Xl        U=(       d    SU-  U l        [        R
                  " SU-  U R                  4SS0UD6U l        U" U R                  40 UD6U l        g)z
Args:
    dim (int): Number of input channels.
    out_dim (int): Number of output channels (or 2 * dim if None)
    norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
rH   r)   r*   rL   FN)rT   rU   r@   r  rV   r\   	reductionnorm)ri   r@   r  r   rI   rJ   rj   rm   s          r7   rU   PatchMerging.__init__  sf     /)!c'1s7DLLKuKKt||2r2	r9   r$   r&   c                 \   UR                   u  p#pESSSUS-  SUS-  4n[        R                  R                  X5      nUR                   u  pspGUR	                  X#S-  SUS-  SU5      R                  SSSSSS5      R                  S5      nU R                  U5      nU R                  U5      nU$ )Nr   r)   r   r(   r*   r+   )	r.   rV   r   r   r   r0   r   r  r  )ri   r$   r2   r3   r4   r5   
pad_valuesr  s           r7   r   PatchMerging.forward2  s    WW
aAq1uaQ/
MMa,WW
aIIaaAFAq199!Q1aKSSTUVNN1IIaLr9   )r@   r  r  r  )r   r   r   r   r   rV   r
  r   r   r   r  rU   rX   r   r   r   r   r   s   @r7   r  r    sl     &**,,,33 c]3 RYY	3 3*
 
%,, 
 
r9   r  c            '       b  ^  \ rS rSrSrSSSSSSSSS\R                  SSS	S	4S
\S\S\S\S\S\S\	S\	S\	S\
S\	S\
S\
S\
S\\\\R                     4   S\\R                     S\S\	SS	4&U 4S jjjr S$S\\\4   S\S\\	   SS	4S jjrS \R(                  S\R(                  4S! jrS%S" jrS#rU =r$ )&SwinTransformerV2Stagei?  zA Swin Transformer V2 Stage.

A single stage consisting of multiple Swin Transformer blocks with
optional downsampling at the beginning.
Fr   Tr   r   r   Nr@   r  r   depthrA   r%   r   r   
downsampler   rB   rE   rD   r   r   r   rF   output_nchwr&   c                   > UUS.n[         TU ]  5         Xl        X0l        U	(       a  [	        S U 5       5      OUU l        X@l        UU l        SU l        [        U5      n[	        U Vs/ s H  nUS-  PM
     sn5      nU	(       a  [        S	XUS.UD6U l        O!X:X  d   e[        R                  " 5       U l        [        R                  " [        U5       Vs/ s HM  n[!        S	UU R
                  UUUS-  S:X  a  SOUUUU
UUU[#        U[$        5      (       a  UU   OUUUUS.UD6PMO     sn5      U l        gs  snf s  snf )
ak  
Args:
    dim: Number of input channels.
    out_dim: Number of output channels.
    input_resolution: Input resolution.
    depth: Number of blocks.
    num_heads: Number of attention heads.
    window_size: Local window size.
    always_partition: Always partition into full windows and shift
    dynamic_mask: Create attention mask in forward based on current input size
    downsample: Use downsample layer at start of the block.
    mlp_ratio: Ratio of mlp hidden dim to embedding dim.
    qkv_bias: If True, add a learnable bias to query, key, value.
    proj_drop: Projection dropout rate
    attn_drop: Attention dropout rate.
    drop_path: Stochastic depth rate.
    act_layer: Activation layer type.
    norm_layer: Normalization layer.
    pretrained_window_size: Local window size in pretraining.
    output_nchw: Output tensors on NCHW format instead of NHWC.
rH   c              3   *   #    U  H	  oS -  v   M     g7fr)   Nr   .0is     r7   	<genexpr>2SwinTransformerV2Stage.__init__.<locals>.<genexpr>v  s     &H7G!Av7G   Fr)   )r@   r  r   r   )r@   r   rA   r%   r   r   r   r   rB   rE   rD   r   r   r   rF   Nr   )rT   rU   r@   r   r   output_resolutionr  r  grad_checkpointingr   r  r  rV   r   
ModuleListranger   
isinstancelistblocks)ri   r@   r  r   r  rA   r%   r   r   r  r   rB   rE   rD   r   r   r   rF   r  rI   rJ   rj   r   r   r!  rm   s                            r7   rU   SwinTransformerV2Stage.__init__F  sU   X / 0LV&H7G&H!H\l
&"',K8KqAFK89
 *asPZa^`aDO>!> kkmDO mm& 5\'%#& "% # !%!7!7#'!"Q!1*!1)#!##*4Y*E*E)A,9#%'=  !$ "'%# $ 9%#s   )D9AD>r   c                 <   Xl         [        U R                  [        R                  5      (       a  Xl        O8[        U R                  [        5      (       d   e[        S U 5       5      U l        U R                   H  nUR                  U R
                  UUS9  M      g)zUpdate resolution, window size and relative positions.

Args:
    feat_size: New input (feature) resolution.
    window_size: New window size.
    always_partition: Always partition / shift the window.
c              3   *   #    U  H	  oS -  v   M     g7fr  r   r  s     r7   r"  8SwinTransformerV2Stage.set_input_size.<locals>.<genexpr>  s     *E9a69r$  r   r%   r   N)
r   r)  r  rV   r   r%  r  r   r+  r   )ri   r   r%   r   blocks        r7   r   %SwinTransformerV2Stage.set_input_size  s     !*door{{33%."doo|<<<<%**E9*E%ED"[[E  00'!1 !  !r9   r$   c                     U R                  U5      nU R                   HL  nU R                  (       a0  [        R                  R                  5       (       d  [        X!5      nMD  U" U5      nMN     U$ )zForward pass through the stage.

Args:
    x: Input tensor of shape (B, H, W, C).

Returns:
    Output tensor of shape (B, H', W', C').
)r  r+  r&  rX   jitis_scriptingr   )ri   r$   blks      r7   r   SwinTransformerV2Stage.forward  sU     OOA;;C&&uyy/E/E/G/Gs&F	 
 r9   c                    U R                    H  n[        R                  R                  UR                  R
                  S5        [        R                  R                  UR                  R                  S5        [        R                  R                  UR                  R
                  S5        [        R                  R                  UR                  R                  S5        M     g)z/Initialize residual post-normalization weights.r   N)r+  rV   rq   rr   r   rL   r|   r   )ri   r6  s     r7   _init_respostnorm(SwinTransformerV2Stage._init_respostnorm  s    ;;CGGciinna0GGcii..2GGciinna0GGcii..2	 r9   )r+  r  r@   r  r&  r   r  r%  r   r   )r   r   r   r   r   rV   r
  r   r  r   r   r   strr   r  rU   r
   r   r   rX   r   r   r9  r   r   r   s   @r7   r  r  ?  s    &+!&$!!!!!5;*,,,89 %+R$R$ R$ 0	R$
 R$ R$ +R$ #R$ R$ R$ R$ R$ R$ R$ R$  S$ryy/12!R$" RYY#R$$ %6%R$& 'R$, 
-R$ R$p 04	S#X  'tn	
 
4 %,, $3 3r9   r  c            +         ^  \ rS rSrSrSSSSSSS	S
SSSSSSSSSS\R                  SSS4S\S\S\S\S\	S\S\
\S4   S\
\S4   S\S\S\S \S!\S"\S#\S$\S%\S&\\	\4   S'\\R                      S(\
\S4   4(U 4S) jjjrSGS*\S+S4S, jjrSGS-\R                   S*\S+S4S. jjr     SHS\\
\\4      S\\
\\4      S\\
\\4      S/\\   S\\   4
S0 jjr\R.                  R0                  S+\\	   4S1 j5       r\R.                  R0                  SIS2\S+\\	\4   4S3 jj5       r\R.                  R0                  SGS4\S+S4S5 jj5       r\R.                  R0                  S+\R                   4S6 j5       rSJS\S\\	   S+S4S7 jjr      SKS8\RB                  S9\\\\"\   4      S:\S;\S<\	S=\S+\\"\RB                     \
\RB                  \"\RB                     4   4   4S> jjr#   SLS9\\\"\   4   S?\S@\4SA jjr$S8\RB                  S+\RB                  4SB jr%SIS8\RB                  SC\S+\RB                  4SD jjr&S8\RB                  S+\RB                  4SE jr'SFr(U =r)$ )Mr#   i  a  Swin Transformer V2.

A hierarchical vision transformer using shifted windows for efficient
self-attention computation with continuous position bias.

A PyTorch impl of : `Swin Transformer V2: Scaling Up Capacity and Resolution`
    - https://arxiv.org/abs/2111.09883
   r*   r(     avg`   r)   r)      r)   r(   rB        r   FTr   r   g?r   )r   r   r   r   Nr:   
patch_sizein_chansnum_classesglobal_pool	embed_dimdepths.rA   r%   r   strict_img_sizer   rB   	drop_rateproj_drop_rateattn_drop_ratedrop_path_rater   r   pretrained_window_sizesc                 n  > [         T U ]  5         UUS.nX@l        X0l        US;   d   eXPl        SU l        [        U5      U l        X`l        [        USU R                  S-
  -  -  5      =U l
        U l        / U l        [        U[        [        45      (       d4  [!        U R                  5       Vs/ s H  n[        USU-  -  5      PM     nn[#        S"UUUUS   UUSS.UD6U l        U R$                  R&                  n[)        UUSS	9n/ nUS   nSn[!        U R                  5       H  nUU   nU[+        S"0 S
U_SU_SUS   U-  US   U-  4_SUU   _SUS:  _SUU   _SU	_SU
_SU(       + _SU_SU_SU_SU_SUU   _SU_SU_SUU   _UD6/-  nUnUS:  a  US-  nU =R                  [-        USU-  SU 3S9/-  sl        M     [.        R0                  " U6 U l        U" U R                  40 UD6U l        [7        U R                  U4UUU R
                  S.UD6U l        U R;                  SS 9  g!s  snf )#a  
Args:
    img_size: Input image size.
    patch_size: Patch size.
    in_chans: Number of input image channels.
    num_classes: Number of classes for classification head.
    embed_dim: Patch embedding dimension.
    depths: Depth of each Swin Transformer stage (layer).
    num_heads: Number of attention heads in different layers.
    window_size: Window size.
    mlp_ratio: Ratio of mlp hidden dim to embedding dim.
    qkv_bias: If True, add a learnable bias to query, key, value.
    drop_rate: Head dropout rate.
    proj_drop_rate: Projection dropout rate.
    attn_drop_rate: Attention dropout rate.
    drop_path_rate: Stochastic depth rate.
    norm_layer: Normalization layer.
    act_layer: Activation layer type.
    patch_norm: If True, add normalization after patch embedding.
    pretrained_window_sizes: Pretrained window sizes of each layer.
    output_fmt: Output tensor format if not None, otherwise output 'NHWC' by default.
rH   ) r?  NHWCr)   r   r   )r:   rF  rG  rJ  r   rL  
output_fmtT)	stagewiser@   r  r   r  r  rA   r%   r   r   r   rB   rE   rD   r   r   r   rF   r*   layers.)num_chsr  module)	pool_typerM  	input_fmtFneeds_resetNr   )rT   rU   rH  rG  rI  rU  len
num_layersrJ  r   num_featureshead_hidden_sizefeature_infor)  r   r*  r(  r   patch_embed	grid_sizer   r  dictrV   r[   layersr  r   headinit_weights)!ri   r:   rF  rG  rH  rI  rJ  rK  rA   r%   r   rL  r   rB   rM  rN  rO  rP  r   r   rQ  rI   rJ   kwargsrj   r!  rd  dprrf  in_dimscaler  rm   s!                                   r7   rU   SwinTransformerV2.__init__  s   ` 	/& k)))& f+"47	A$//\]J]D^8^4__D1)eT]33:?:PQ:PQYa/0:PIQ & 	
!l!+	
 	
 $$..	'$O1t'AlG-  #,A,%"719N!O Qi	
 q5 $A, ( "2 "10 $ " ) ) a& $  &!" (?q'A%  F( F1u
$w!e)V]^_]`Ta"b!cc3 (6 mmV,t007B7	"
 "oo
 
	 	e,u Rs   /H2r]  r&   c                     U R                  [        U R                  US95        U R                   H  nUR	                  5         M     g)zInitialize model weights.

Args:
    needs_reset: If True, call reset_parameters() on modules (default for after to_empty()).
        If False, skip reset_parameters() (for __init__ where modules already self-initialized).
r\  N)applyr   _init_weightsrf  r9  )ri   r]  blys      r7   rh  SwinTransformerV2.init_weightsS  s7     	

74--;GH;;C!!# r9   mc                 0   [        U[        R                  5      (       aM  [        UR                  SS9  UR
                  b+  [        R                  R                  UR
                  S5        ggU(       a#  [        US5      (       a  UR                  5         ggg)zInitialize weights for Linear layers.

Args:
    m: Module to initialize.
    needs_reset: Whether to call reset_parameters() on modules.
g{Gz?)stdNr   rh   )
r)  rV   r\   r   r|   rL   rq   rr   hasattrrh   )ri   rs  r]  s      r7   rp  SwinTransformerV2._init_weights^  sm     a##!((,vv!!!!&&!, "WQ(:;;  <[r9   window_ratioc                 ^   Uc  Ub/  U R                   R                  XS9  U R                   R                  nUc!  Ub  [        W Vs/ s H  owU-  PM	     sn5      n[	        U R
                  5       H6  u  pS[        US-
  S5      -  n
U	R                  WS   U
-  US   U
-  4UUS9  M8     gs  snf )a  Updates the image resolution, window size, and so the pair-wise relative positions.

Args:
    img_size (Optional[Tuple[int, int]]): New input resolution, if None current resolution is used
    patch_size (Optional[Tuple[int, int]): New patch size, if None use current patch size
    window_size (Optional[int]): New window size, if None based on new_img_size // window_div
    window_ratio (int): divisor for calculating window size from patch grid size
    always_partition: always partition / shift windows even if feat size is < window
N)r:   rF  r)   r   r   r0  )rc  r   rd  r   	enumeraterf  r   )ri   r:   rF  r%   rx  r   rd  r   indexstagestage_scales              r7   r    SwinTransformerV2.set_input_sizel  s    " :#9++X+U((22I<#;I FIql!2I FGK%dkk2LEs519a00K  $Q<;6	!8ST'!1 !  3 !Gs   B*c                     [        5       nU R                  5        H;  u  p#[        S Vs/ s H  oDU;   PM	     sn5      (       d  M*  UR                  U5        M=     U$ s  snf )zyGet parameter names that should not use weight decay.

Returns:
    Set of parameter names to exclude from weight decay.
)r^   rZ   )setnamed_modulesr   add)ri   nodnrs  kws        r7   no_weight_decay!SwinTransformerV2.no_weight_decay  sW     e&&(DA&@A&@!G&@ABB
 ) 
 Bs   A
coarsec                 0    [        SU(       a  SS9$ / SQS9$ )zCreate parameter group matcher for optimizer parameter groups.

Args:
    coarse: If True, use coarse grouping.

Returns:
    Dictionary mapping group names to regex patterns.
z^absolute_pos_embed|patch_embedz^layers\.(\d+)))z^layers\.(\d+).downsample)r   )z^layers\.(\d+)\.\w+\.(\d+)N)z^norm)i )stemr+  )re  )ri   r  s     r7   group_matcherSwinTransformerV2.group_matcher  s)     3(.$
 	
5
 	
r9   enablec                 6    U R                    H	  nXl        M     g)zeEnable or disable gradient checkpointing.

Args:
    enable: If True, enable gradient checkpointing.
N)rf  r&  )ri   r  ls      r7   set_grad_checkpointing(SwinTransformerV2.set_grad_checkpointing  s     A#)  r9   c                 .    U R                   R                  $ )zGGet the classifier head.

Returns:
    The classification head module.
)rg  fcrw   s    r7   get_classifier SwinTransformerV2.get_classifier  s     yy||r9   c                 F    Xl         U R                  R                  X5        g)z}Reset the classification head.

Args:
    num_classes: Number of classes for new head.
    global_pool: Global pooling type.
N)rH  rg  reset)ri   rH  rI  s      r7   reset_classifier"SwinTransformerV2.reset_classifier  s     '		1r9   r$   indicesr  
stop_earlyrU  intermediates_onlyc                 l   US;   d   S5       e/ n[        [        U R                  5      U5      u  pU R                  U5      n[        U R                  5      n
[        R
                  R                  5       (       d  U(       d  U R                  nOU R                  SU	S-    n[        U5       Hj  u  pU" U5      nX;   d  M  U(       a  XS-
  :X  a  U R                  U5      nOUnUR                  SSSS5      R                  5       nUR                  U5        Ml     U(       a  U$ U R                  U5      nX4$ )a  Forward features that returns intermediates.

Args:
    x: Input image tensor
    indices: Take last n blocks if int, all if None, select matching indices if sequence
    norm: Apply norm layer to compatible intermediates
    stop_early: Stop iterating over blocks when last desired intermediate hit
    output_fmt: Shape of intermediate feature outputs
    intermediates_only: Only return intermediate features
Returns:

)NCHWzOutput shape must be NCHW.Nr   r   r(   r)   )r   r^  rf  rc  rX   r4  r5  rz  r  r0   r1   append)ri   r$   r  r  r  rU  r  intermediatestake_indices	max_index
num_stagesstagesr!  r|  x_inters                  r7   forward_intermediates'SwinTransformerV2.forward_intermediates  s   * Y&D(DD&"6s4;;7G"Q Q%
99!!##:[[F[[)a-0F!&)HAaA Aa/"iilGG!//!Q15@@B$$W- *   IIaLr9   
prune_norm
prune_headc                     [        [        U R                  5      U5      u  pEU R                  SUS-    U l        U(       a  [        R                  " 5       U l        U(       a  U R                  SS5        U$ )z?Prune layers not required for specified intermediates.
        Nr   r   rS  )r   r^  rf  rV   r   r  r  )ri   r  r  r  r  r  s         r7   prune_intermediate_layers+SwinTransformerV2.prune_intermediate_layers  s[     #7s4;;7G"Qkk.9q=1DI!!!R(r9   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ )zForward pass through feature extraction layers.

Args:
    x: Input tensor of shape (B, C, H, W).

Returns:
    Feature tensor of shape (B, H', W', C).
)rc  rf  r  ri   r$   s     r7   forward_features"SwinTransformerV2.forward_features  s3     QKKNIIaLr9   
pre_logitsc                 R    U(       a  U R                  USS9$ U R                  U5      $ )zForward pass through classification head.

Args:
    x: Feature tensor of shape (B, H, W, C).
    pre_logits: If True, return features before final linear layer.

Returns:
    Logits tensor of shape (B, num_classes) or pre-logits.
T)r  )rg  )ri   r$   r  s      r7   forward_headSwinTransformerV2.forward_head  s&     1;tyyty,L		!Lr9   c                 J    U R                  U5      nU R                  U5      nU$ )zForward pass through the model.

Args:
    x: Input tensor of shape (B, C, H, W).

Returns:
    Logits tensor of shape (B, num_classes).
)r  r  r  s     r7   r   SwinTransformerV2.forward%  s)     !!!$a r9   )rJ  rb  rI  rg  ra  rG  rf  r  rH  r`  r_  rU  rc  )T)NNNr   NFr   )NFFr  F)r   FT)*r   r   r   r   r   rV   r
  r  r   r;  r
   r   r   r   r   r   r  rU   rh  rp  r   r   rX   r4  ignorer	   r  r   r   r  r  r  r  r   r   r  r  r  r  r   r   r   r   s   @r7   r#   r#     s    +.#$&2)7-.%*$(!!!$&$&$'.4*,,,7C/x-'x- x- 	x-
 x- x- x- #s(Ox- S#Xx- +x- #x- "x- x- x- x-  "!x-" "#x-$ "%x-& S(]+'x-( RYY)x-* &+38_+x- x-t	$ 	$ 	$!ryy !t !t !  374859*+/3uS#X/ !sCx1 "%S/2	
 #3- 'tn@ YY
S 
 
 YY
D 
T#s(^ 
 
$ YY*T *T * * YY		  2C 2hsm 2W[ 2 8<$$',0 ||0  eCcN340  	0 
 0  0  !%0  
tELL!5tELL7I)I#JJ	K0 h ./$#	3S	>*  	 %,, 5<< 
Mell 
M 
M 
M %,,  r9   
state_dictmodelc           	         U R                  SU 5      n U R                  SU 5      n SU ;   n0 nSSKnU R                  5        H  u  pV[        S Vs/ s H  owU;   PM	     sn5      (       a  M*  SU;   ab  UR                  R
                  R                  R                  u    pn
UR                  S   U	:w  d  UR                  S	   U
:w  a  [        UX4S
SSS9nU(       d&  UR                  SS U5      nUR                  SS5      nXcU'   M     U$ s  snf )a-  Filter and process checkpoint state dict for loading.

Handles resizing of patch embeddings and relative position tables
when model size differs from checkpoint.

Args:
    state_dict: Checkpoint state dictionary.
    model: Target model to load weights into.

Returns:
    Filtered state dictionary.
r  r  zhead.fc.weightr   N)rS   rR   r   zpatch_embed.proj.weightr   r,   bicubicT)interpolation	antialiasverbosezlayers.(\d+).downsamplec                 D    S[        U R                  S5      5      S-    S3$ )NrW  r   z.downsample)r   group)r$   s    r7   <lambda>&checkpoint_filter_fn.<locals>.<lambda>V  s"    ws177ST:YZGZF[[f=gr9   zhead.zhead.fc.)getreitemsr   rc  rd   r|   r.   r   subreplace)r  r  native_checkpointout_dictr  r   r   r  r  r3   r4   s              r7   checkpoint_filter_fnr  3  s    4Jj9J(J6H  " ab a1Q abcc$)**//66<<JAqQwwr{a1772;!#3(F"+"  !13gijkA		':.A' #* O) cs   D	
variant
pretrainedc           	          [        S [        UR                  SS5      5       5       5      nUR                  SU5      n[	        [
        X4[        [        SUS9S.UD6nU$ )zCreate a Swin Transformer V2 model.

Args:
    variant: Model variant name.
    pretrained: If True, load pretrained weights.
    **kwargs: Additional model arguments.

Returns:
    SwinTransformerV2 model instance.
c              3   *   #    U  H	  u  pUv   M     g 7fr   r   )r   r!  r  s      r7   r"  ._create_swin_transformer_v2.<locals>.<genexpr>h  s     \.[da.[r$  rK  )r   r   r   r   out_indicesT)flatten_sequentialr  )pretrained_filter_fnfeature_cfg)r   rz  r  popr   r#   r  re  )r  r  ri  default_out_indicesr  r  s         r7   _create_swin_transformer_v2r  ]  sh      \i

8\8Z.[\\**],?@K 71DkJ 	E
 Lr9   c                 4    U SSSSSS[         [        SSS	S
.UE$ )Nr>  )r(      r  )r   r   g?r  Tzpatch_embed.projzhead.fcmit)urlrH  
input_size	pool_sizecrop_pctr  fixed_input_sizemeanru  
first_conv
classifierlicenser   )r  ri  s     r7   _cfgr  s  s5    =v%.B(	 # r9   ztimm/z{https://github.com/SwinTransformer/storage/releases/download/v2.0.0/swinv2_base_patch4_window12to16_192to256_22kto1k_ft.pth)	hf_hub_idr  z{https://github.com/SwinTransformer/storage/releases/download/v2.0.0/swinv2_base_patch4_window12to24_192to384_22kto1k_ft.pth)r(     r  )rD  rD  r   )r  r  r  r  r  z|https://github.com/SwinTransformer/storage/releases/download/v2.0.0/swinv2_large_patch4_window12to16_192to256_22kto1k_ft.pthz|https://github.com/SwinTransformer/storage/releases/download/v2.0.0/swinv2_large_patch4_window12to24_192to384_22kto1k_ft.pthzfhttps://github.com/SwinTransformer/storage/releases/download/v2.0.0/swinv2_tiny_patch4_window8_256.pthzghttps://github.com/SwinTransformer/storage/releases/download/v2.0.0/swinv2_tiny_patch4_window16_256.pthzghttps://github.com/SwinTransformer/storage/releases/download/v2.0.0/swinv2_small_patch4_window8_256.pthzhhttps://github.com/SwinTransformer/storage/releases/download/v2.0.0/swinv2_small_patch4_window16_256.pthzfhttps://github.com/SwinTransformer/storage/releases/download/v2.0.0/swinv2_base_patch4_window8_256.pthzghttps://github.com/SwinTransformer/storage/releases/download/v2.0.0/swinv2_base_patch4_window16_256.pthzkhttps://github.com/SwinTransformer/storage/releases/download/v2.0.0/swinv2_base_patch4_window12_192_22k.pthiQU  )r(      r  )rB  rB  )r  r  rH  r  r  zlhttps://github.com/SwinTransformer/storage/releases/download/v2.0.0/swinv2_large_patch4_window12_192_22k.pth)2swinv2_base_window12to16_192to256.ms_in22k_ft_in1k2swinv2_base_window12to24_192to384.ms_in22k_ft_in1k3swinv2_large_window12to16_192to256.ms_in22k_ft_in1k3swinv2_large_window12to24_192to384.ms_in22k_ft_in1kzswinv2_tiny_window8_256.ms_in1kz swinv2_tiny_window16_256.ms_in1kz swinv2_small_window8_256.ms_in1kz!swinv2_small_window16_256.ms_in1kzswinv2_base_window8_256.ms_in1kz swinv2_base_window16_256.ms_in1k!swinv2_base_window12_192.ms_in22k"swinv2_large_window12_192.ms_in22kc           	      J    [        SSSSS9n[         SSU 0[        U40 UD6D6$ )z"Swin-T V2 @ 256x256, window 16x16.r   r@  rA  rC  r%   rJ  rK  rA   r  )swinv2_tiny_window16_256re  r  r  ri  
model_argss      r7   r  r    sD     "<SabJ&"Y/9Y=A*=WPV=WY Yr9   c           	      J    [        SSSSS9n[         SSU 0[        U40 UD6D6$ )z Swin-T V2 @ 256x256, window 8x8.r   r@  rA  rC  r  r  )swinv2_tiny_window8_256r  r  s      r7   r  r    sC     !r,R`aJ&!X.8X<@<Vv<VX Xr9   c           	      J    [        SSSSS9n[         SSU 0[        U40 UD6D6$ )z"Swin-S V2 @ 256x256, window 16x16.r   r@  r)   r)      r)   rC  r  r  )swinv2_small_window16_256r  r  s      r7   r  r    sD     "=TbcJ&#Z0:Z>B:>XQW>XZ Zr9   c           	      J    [        SSSSS9n[         SSU 0[        U40 UD6D6$ )z Swin-S V2 @ 256x256, window 8x8.r   r@  r  rC  r  r  )swinv2_small_window8_256r  r  s      r7   r  r    sD     !r-SabJ&"Y/9Y=A*=WPV=WY Yr9   c           	      J    [        SSSSS9n[         SSU 0[        U40 UD6D6$ )z"Swin-B V2 @ 256x256, window 16x16.r      r  r*   r   r       r  r  )swinv2_base_window16_256r  r  s      r7   r   r     D     "MUcdJ&"Y/9Y=A*=WPV=WY Yr9   c           	      J    [        SSSSS9n[         SSU 0[        U40 UD6D6$ )z Swin-B V2 @ 256x256, window 8x8.r   r  r  r  r  r  )swinv2_base_window8_256r  r  s      r7   r  r    sC     !s=TbcJ&!X.8X<@<Vv<VX Xr9   c           	      J    [        SSSSS9n[         SSU 0[        U40 UD6D6$ )z"Swin-B V2 @ 192x192, window 12x12.rD  r  r  r  r  r  )swinv2_base_window12_192r  r  s      r7   r  r    r  r9   c           	      L    [        SSSSSS9n[         SSU 0[        U40 UD6D6$ )	zQSwin-B V2 @ 192x192, trained at window 12x12, fine-tuned to 256x256 window 16x16.r   r  r  r  rD  rD  rD  rB  r%   rJ  rK  rA   rQ  r  )!swinv2_base_window12to16_192to256r  r  s      r7   r	  r	    K     #m~ /1J '+b8BbFJ:F`Y_F`b br9   c           	      L    [        SSSSSS9n[         SSU 0[        U40 UD6D6$ )	zQSwin-B V2 @ 192x192, trained at window 12x12, fine-tuned to 384x384 window 24x24.rE  r  r  r  r  r  r  )!swinv2_base_window12to24_192to384r  r  s      r7   r  r    r
  r9   c           	      J    [        SSSSS9n[         SSU 0[        U40 UD6D6$ )z"Swin-L V2 @ 192x192, window 12x12.rD  r  r  rB  rD  rE  0   r  r  )swinv2_large_window12_192r  r  s      r7   r  r    sD     "MUdeJ&#Z0:Z>B:>XQW>XZ Zr9   c           	      L    [        SSSSSS9n[         SSU 0[        U40 UD6D6$ )	zQSwin-L V2 @ 192x192, trained at window 12x12, fine-tuned to 256x256 window 16x16.r   r  r  r  r  r  r  )"swinv2_large_window12to16_192to256r  r  s      r7   r  r    K     #m /1J ',c9CcGKJGaZ`Gac cr9   c           	      L    [        SSSSSS9n[         SSU 0[        U40 UD6D6$ )	zQSwin-L V2 @ 192x192, trained at window 12x12, fine-tuned to 384x384 window 24x24.rE  r  r  r  r  r  r  )"swinv2_large_window12to24_192to384r  r  s      r7   r  r    r  r9   r  r  r  r  r  r  )swinv2_base_window12_192_22k)swinv2_base_window12to16_192to256_22kft1k)swinv2_base_window12to24_192to384_22kft1kswinv2_large_window12_192_22k*swinv2_large_window12to16_192to256_22kft1k*swinv2_large_window12to24_192to384_22kft1kr  )rS  )Nr   rs   	functoolsr   typingr   r   r   r   r   r	   r
   r   r   rX   torch.nnrV   torch.nn.functionalr   r   	timm.datar   r   timm.layersr   r   r   r   r   r   r   r   r   r   r   _builderr   	_featuresr   _features_fxr   _manipulater   	_registryr    r!   r"   __all__r   r  r   r8   r<   r  r>   r   r  r  r#   r;  r  r   r  r  default_cfgsr  r  r  r  r   r  r  r	  r  r  r  r  r   r   r9   r7   <module>r)     s     O O O     A; ; ; ; * + 3 # Y Y
#uS#X./ <<38_ \\& 38_ S/ \\	 ,Qbii QhpRYY pf&299 &RM3RYY M3`a		 aH'T#u||*;%< 'RYY 'SWX[]b]i]iXiSj 'T $ Uf , %:> J; ;? J Hs;
 <@ K< <@ K Hs< (,t( )-u) )-u) *.v* (,t( )-u)
 *.ymv*
 +/zmv+e7& 7t Y YDU Y Y X XCT X X Z$ ZEV Z Z Y YDU Y Y Y YDU Y Y X XCT X X Y YDU Y Y b$ bM^ b b b$ bM^ b b Z$ ZEV Z Z c4 cN_ c c c4 cN_ c c H$G1e1e%I2g2g' r9   