
    QЦi`}                        S SK r S SKJr  S SKJr  S SKJrJrJrJ	r	J
r
Jr  S SKrS SKrS SKJs  Jr  S SKJrJr  S SKJrJrJr  S SKJr  S SKJrJr  S S	KJrJr  S S
K J!r!  S SK"J#r#J$r$  S SK%J&r&  / SQr'S\\(\(4   S\(S\(S\(S\\(\(4   4
S jr)S\\(\(4   S\(S\\\(\(4      4S jr*S\(S\(S\R&                  4S jr+ " S S\RX                  5      r- " S S\RX                  5      r. " S S\RX                  5      r/ " S S \RX                  5      r0 " S! S"\RX                  5      r1 " S# S$\RX                  5      r2 " S% S&\RX                  5      r3 " S' S(\RX                  5      r4 " S) S*\RX                  5      r5  S<S+\(S,\\(   S-\\(   S.\6S/\(S0\(S1\	\   S2\7S3\S\54S4 jjr8 " S5 S6\5      r9\" 5       \" S7\9Rt                  4S89SS9S:.S1\	\9   S2\7S3\S\54S; jj5       5       r;g)=    N)OrderedDict)partial)AnyCallableListOptionalSequenceTuple)nnTensor)register_modelWeightsWeightsEnum)_IMAGENET_CATEGORIES)_ovewrite_named_paramhandle_legacy_interface)Conv2dNormActivationSqueezeExcitation)StochasticDepth)ImageClassificationInterpolationMode)_log_api_usage_once)MaxVitMaxVit_T_Weightsmaxvit_t
input_sizekernel_sizestridepaddingreturnc                 R    U S   U-
  SU-  -   U-  S-   U S   U-
  SU-  -   U-  S-   4$ )Nr          )r   r   r   r   s       X/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/torchvision/models/maxvit.py_get_conv_output_shaper&      sJ    	A	$q7{	2v=A	A	$q7{	2v=A     n_blocksc                     / n[        U SSS5      n[        U5       H"  n[        USSS5      nUR                  U5        M$     U$ )zQUtil function to check that the input size is correct for a MaxVit configuration.   r"   r#   )r&   rangeappend)r   r(   shapesblock_input_shape_s        r%   _make_block_input_shapesr0       sL    F.z1aC8_23DaAN'(  Mr'   heightwidthc                    [         R                  " [         R                  " [         R                  " U 5      [         R                  " U5      /SS95      n[         R                  " US5      nUS S 2S S 2S 4   US S 2S S S 24   -
  nUR                  SSS5      R                  5       nUS S 2S S 2S4==   U S-
  -  ss'   US S 2S S 2S4==   US-
  -  ss'   US S 2S S 2S4==   SU-  S-
  -  ss'   UR                  S5      $ )Nij)indexingr#   r"   r   )torchstackmeshgridarangeflattenpermute
contiguoussum)r1   r2   coordscoords_flatrelative_coordss        r%   _get_relative_position_indexrB   *   s    [[f)=u||E?R(S^bcdF--*K!!Q*-AtQJ0GGO%--aA6AACOAq!G
*Aq!G	)Aq!GE	A-r""r'   c                      ^  \ rS rSrSr SS\S\S\S\S\S\S	\R                  4   S
\S	\R                  4   S\SS4U 4S jjjr
S\S\4S jrSrU =r$ )MBConv5   a  MBConv: Mobile Inverted Residual Bottleneck.

Args:
    in_channels (int): Number of input channels.
    out_channels (int): Number of output channels.
    expansion_ratio (float): Expansion ratio in the bottleneck.
    squeeze_ratio (float): Squeeze ratio in the SE Layer.
    stride (int): Stride of the depthwise convolution.
    activation_layer (Callable[..., nn.Module]): Activation function.
    norm_layer (Callable[..., nn.Module]): Normalization function.
    p_stochastic_dropout (float): Probability of stochastic depth.
in_channelsout_channelsexpansion_ratiosqueeze_ratior   activation_layer.
norm_layerp_stochastic_dropoutr    Nc	                   > [         TU ]  5         U   US:g  =(       d    X:g  n	U	(       aQ  [        R                  " XSSSS9/n
US:X  a  [        R                  " SUSS9/U
-   n
[        R
                  " U
6 U l        O[        R                  " 5       U l        [        X#-  5      n[        X$-  5      nU(       a  [        USS9U l
        O[        R                  " 5       U l
        [        5       nU" U5      US	'   [        UUSSS
UUS S9US'   [        UUSUSUUUS S9	US'   [        X[        R                  S9US'   [        R                  " XSSS9US'   [        R
                  " U5      U l        g )Nr#   T)r   r   biasr"   r*   r   r   r   rowmodepre_normr   )r   r   r   rJ   rK   inplaceconv_a)r   r   r   rJ   rK   groupsrT   conv_b)
activationsqueeze_excitation)rF   rG   r   rN   conv_c)super__init__r   Conv2d	AvgPool2d
SequentialprojIdentityintr   stochastic_depthr   r   r   SiLUlayers)selfrF   rG   rH   rI   r   rJ   rK   rL   should_projr`   mid_channelssqz_channels_layers	__class__s                 r%   r\   MBConv.__init__C   sh    	 	k@[%@IIkQqW[\]D{61MNQUUt,DIDI<9:<78$34Hu$UD!$&KKMD!-(5
0-!	
 1-!

 ):,acahah(i$%II,ghostmmG,r'   xc                 l    U R                  U5      nU R                  U R                  U5      5      nX!-   $ )z
Args:
    x (Tensor): Input tensor with expected layout of [B, C, H, W].
Returns:
    Tensor: Output tensor with expected layout of [B, C, H / stride, W / stride].
)r`   rc   re   rf   rm   ress      r%   forwardMBConv.forward   s0     iil!!$++a.1wr'   )re   r`   rc   )        )__name__
__module____qualname____firstlineno____doc__rb   floatr   r   Moduler\   r   rq   __static_attributes____classcell__rk   s   @r%   rD   rD   5   s    , '*;-;- ;- 	;-
 ;- ;- #3		>2;- S"))^,;- $;- 
;- ;-z	 	F 	 	r'   rD   c                   t   ^  \ rS rSrSrS\S\S\SS4U 4S jjrS\R                  4S	 jr	S
\S\4S jr
SrU =r$ )$RelativePositionalMultiHeadAttention   zRelative Positional Multi-Head Attention.

Args:
    feat_dim (int): Number of input features.
    head_dim (int): Number of features per head.
    max_seq_len (int): Maximum sequence length.
feat_dimhead_dimmax_seq_lenr    Nc                 V  > [         TU ]  5         X-  S:w  a  [        SU SU 35      eX-  U l        X l        [        [        R                  " U5      5      U l        X0l	        [        R                  " XR                  U R                  -  S-  5      U l        US-  U l        [        R                  " U R                  U R                  -  U5      U l        [        R                  R!                  ["        R$                  " SU R                  -  S-
  SU R                  -  S-
  -  U R                  4["        R&                  S95      U l        U R+                  S	[-        U R                  U R                  5      5        ["        R                  R.                  R1                  U R(                  S
S9  g )Nr   z
feat_dim: z  must be divisible by head_dim: r*   g      r"   r#   )dtyperelative_position_index{Gz?std)r[   r\   
ValueErrorn_headsr   rb   mathsqrtsizer   r   Linearto_qkvscale_factormerge	parameter	Parameterr7   emptyfloat32relative_position_bias_tableregister_bufferrB   inittrunc_normal_)rf   r   r   r   rk   s       r%   r\   -RelativePositionalMultiHeadAttention.__init__   sK    	!#z(3ST\S]^__+ 		+./	&ii,,*F*JK$dNYYt}}t||;XF
,.LL,B,BKK!dii-!+DII0ABDLLQY^YfYfg-
) 	68TUYU^U^`d`i`i8jk##D$E$E4#Pr'   c                    U R                   R                  S5      nU R                  U   R                  U R                  U R                  S5      nUR	                  SSS5      R                  5       nUR                  S5      $ )Nr6   r"   r   r#   )r   viewr   r   r<   r=   	unsqueeze)rf   
bias_indexrelative_biass      r%   get_relative_positional_biasARelativePositionalMultiHeadAttention.get_relative_positional_bias   ss    1166r:
99*EJJ4K[K[]a]m]moqr%--aA6AAC&&q))r'   rm   c                    UR                   u  p#pEU R                  U R                  pvU R                  U5      n[        R
                  " USSS9u  pnU	R                  X#XFU5      R                  SSSSS5      n	U
R                  X#XFU5      R                  SSSSS5      n
UR                  X#XFU5      R                  SSSSS5      nXR                  -  n
[        R                  " SX5      nU R                  5       n[        R                  " X-   SS9n[        R                  " S	X5      nUR                  SSSSS5      R                  X#XE5      nU R                  U5      nU$ )
z
Args:
    x (Tensor): Input tensor with expected layout of [B, G, P, D].
Returns:
    Tensor: Output tensor with expected layout of [B, G, P, D].
r*   r6   )dimr   r#   r"      z!B G H I D, B G H J D -> B G H I Jz!B G H I J, B G H J D -> B G H I D)shaper   r   r   r7   chunkreshaper<   r   einsumr   Fsoftmaxr   )rf   rm   BGPDHDHqkvqkvdot_prodpos_biasouts                  r%   rq   ,RelativePositionalMultiHeadAttention.forward   s8    WW
admm2kk!n++c1"-aIIaA"%--aAq!<IIaA"%--aAq!<IIaA"%--aAq!<!!!<< CQJ44699X0b9ll>Lkk!Q1a(00q<jjo
r'   )r   r   r   r   r   r   r   r   )rt   ru   rv   rw   rx   rb   r\   r7   r   r   rq   r{   r|   r}   s   @r%   r   r      s`    QQ Q 	Q
 
Q8*ell * F  r'   r   c                   v   ^  \ rS rSrSrS\S\SS4U 4S jjrS\R                  S\R                  4S	 jr	S
r
U =r$ )SwapAxes   zPermute the axes of a tensor.abr    Nc                 :   > [         TU ]  5         Xl        X l        g N)r[   r\   r   r   )rf   r   r   rk   s      r%   r\   SwapAxes.__init__   s    r'   rm   c                 \    [         R                  " XR                  U R                  5      nU$ r   )r7   swapaxesr   r   ro   s      r%   rq   SwapAxes.forward   s    nnQ/
r'   )r   r   )rt   ru   rv   rw   rx   rb   r\   r7   r   rq   r{   r|   r}   s   @r%   r   r      s@    '# # $ 
 %,,  r'   r   c                   F   ^  \ rS rSrSrS	U 4S jjrS\S\S\4S jrSr	U =r
$ )
WindowPartition   z:
Partition the input tensor into non-overlapping windows.
r    c                 "   > [         TU ]  5         g r   r[   r\   rf   rk   s    r%   r\   WindowPartition.__init__       r'   rm   pc                     UR                   u  p4pVUnUR                  X4XW-  XvU-  U5      nUR                  SSSSSS5      nUR                  X5U-  Xg-  -  Xw-  U5      nU$ )z
Args:
    x (Tensor): Input tensor with expected layout of [B, C, H, W].
    p (int): Number of partitions.
Returns:
    Tensor: Output tensor with expected layout of [B, H/P, W/P, P*P, C].
r   r"   r   r*      r#   r   r   r<   )rf   rm   r   r   Cr   Wr   s           r%   rq   WindowPartition.forward   sl     WW
aIIaAFAAvq1IIaAq!Q'IIaq&QV,aeQ7r'   r$   r    Nrt   ru   rv   rw   rx   r\   r   rb   rq   r{   r|   r}   s   @r%   r   r      s,     C F  r'   r   c            
       N   ^  \ rS rSrSrSU 4S jjrS\S\S\S\S\4
S	 jrS
r	U =r
$ )WindowDepartition   zg
Departition the input tensor of non-overlapping windows into a feature volume of layout [B, C, H, W].
r    c                 "   > [         TU ]  5         g r   r   r   s    r%   r\   WindowDepartition.__init__  r   r'   rm   r   h_partitionsw_partitionsc                     UR                   u  pVpxUn	X4pUR                  XZXX5      nUR                  SSSSSS5      nUR                  XXX-  X-  5      nU$ )a2  
Args:
    x (Tensor): Input tensor with expected layout of [B, (H/P * W/P), P*P, C].
    p (int): Number of partitions.
    h_partitions (int): Number of vertical partitions.
    w_partitions (int): Number of horizontal partitions.
Returns:
    Tensor: Output tensor with expected layout of [B, C, H, W].
r   r   r#   r*   r"   r   r   )rf   rm   r   r   r   r   r   PPr   r   HPWPs               r%   rq   WindowDepartition.forward  s`     ggbBIIaRA)IIaAq!Q'IIaBFBF+r'   r$   r   r   r}   s   @r%   r   r      s;     C s # RX  r'   r   c                      ^  \ rS rSrSrS\S\S\S\S\\\4   S\S	\S
\	R                  4   S\S
\	R                  4   S\S\S\SS4U 4S jjrS\S\4S jrSrU =r$ )PartitionAttentionLayeri  av  
Layer for partitioning the input tensor into non-overlapping windows and applying attention to each window.

Args:
    in_channels (int): Number of input channels.
    head_dim (int): Dimension of each attention head.
    partition_size (int): Size of the partitions.
    partition_type (str): Type of partitioning to use. Can be either "grid" or "window".
    grid_size (Tuple[int, int]): Size of the grid to partition the input tensor into.
    mlp_ratio (int): Ratio of the  feature size expansion in the MLP layer.
    activation_layer (Callable[..., nn.Module]): Activation function to use.
    norm_layer (Callable[..., nn.Module]): Normalization function to use.
    attention_dropout (float): Dropout probability for the attention layer.
    mlp_dropout (float): Dropout probability for the MLP layer.
    p_stochastic_dropout (float): Probability of dropping out a partition.
rF   r   partition_sizepartition_type	grid_size	mlp_ratiorJ   .rK   attention_dropoutmlp_dropoutrL   r    Nc           	        > [         TU ]  5         X-  U l        X l        US   U-  U l        X@l        XPl        US;  a  [        S5      eUS:X  a  X0R                  sU l        U l	        OU R                  UsU l        U l	        [        5       U l        [        5       U l        US:X  a  [        SS5      O[        R                   " 5       U l        US:X  a  [        SS5      O[        R                   " 5       U l        [        R&                  " U" U5      [)        XUS-  5      [        R*                  " U	5      5      U l        [        R&                  " [        R.                  " U5      [        R0                  " XU-  5      U" 5       [        R0                  " X-  U5      [        R*                  " U
5      5      U l        [5        US	S
9U l        g )Nr   )gridwindowz0partition_type must be either 'grid' or 'window'r   r   r"   rP   rQ   )r[   r\   r   r   n_partitionsr   r   r   r   gr   partition_opr   departition_opr   r   ra   partition_swapdepartition_swapr_   r   Dropout
attn_layer	LayerNormr   	mlp_layerr   stochastic_dropout)rf   rF   r   r   r   r   r   rJ   rK   r   r   rL   rk   s               r%   r\    PartitionAttentionLayer.__init__,  su   " 	". %aLN:,"!33OPPX%+->->NDFDF!..NDFDF+-/12@F2Jhr2.PRP[P[P]4Bf4LR 0RTR]R]R_--{# 1XYHYZJJ()
 LL%IIk#:;IIk-{;JJ{#
 #22FU"Sr'   rm   c                    U R                   S   U R                  -  U R                   S   U R                  -  p2[        R                  " U R                   S   U R                  -  S:H  =(       a    U R                   S   U R                  -  S:H  SR	                  U R                   U R                  5      5        U R                  XR                  5      nU R                  U5      nXR                  U R                  U5      5      -   nXR                  U R                  U5      5      -   nU R                  U5      nU R                  XR                  X#5      nU$ )z
Args:
    x (Tensor): Input tensor with expected layout of [B, C, H, W].
Returns:
    Tensor: Output tensor with expected layout of [B, C, H, W].
r   r#   z[Grid size must be divisible by partition size. Got grid size of {} and partition size of {})r   r   r7   _assertformatr   r   r   r   r   r   r   )rf   rm   ghgws       r%   rq   PartitionAttentionLayer.forwardf  s    "dff,dnnQ.?466.IBNN1&!+Oq0ADFF0Ja0Oipp	
 a("''(:;;''q(9::!!!$6622r'   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   )rt   ru   rv   rw   rx   rb   strr
   r   r   rz   ry   r\   r   rq   r{   r|   r}   s   @r%   r   r     s    "8T8T 8T
 8T 8T c?8T 8T #3		>28T S"))^,8T !8T 8T $8T  
!8Tt F  r'   r   c                      ^  \ rS rSrSrS\S\S\S\S\S\S	\R                  4   S
\S	\R                  4   S\S\S\S\S\S\S\
\\4   SS4U 4S jjrS\S\4S jrSrU =r$ )MaxVitLayeri  a  
MaxVit layer consisting of a MBConv layer followed by a PartitionAttentionLayer with `window` and a PartitionAttentionLayer with `grid`.

Args:
    in_channels (int): Number of input channels.
    out_channels (int): Number of output channels.
    expansion_ratio (float): Expansion ratio in the bottleneck.
    squeeze_ratio (float): Squeeze ratio in the SE Layer.
    stride (int): Stride of the depthwise convolution.
    activation_layer (Callable[..., nn.Module]): Activation function.
    norm_layer (Callable[..., nn.Module]): Normalization function.
    head_dim (int): Dimension of the attention heads.
    mlp_ratio (int): Ratio of the MLP layer.
    mlp_dropout (float): Dropout probability for the MLP layer.
    attention_dropout (float): Dropout probability for the attention layer.
    p_stochastic_dropout (float): Probability of stochastic depth.
    partition_size (int): Size of the partitions.
    grid_size (Tuple[int, int]): Size of the input feature grid.
rF   rG   rI   rH   r   rK   .rJ   r   r   r   r   rL   r   r   r    Nc                 "  > [         TU ]  5         [        5       n[        UUUUUUUUS9US'   [	        UUUSUU	U[
        R                  UU
US9US'   [	        UUUSUU	U[
        R                  UU
US9US'   [
        R                  " U5      U l        g )N)rF   rG   rH   rI   r   rJ   rK   rL   MBconvr   )rF   r   r   r   r   r   rJ   rK   r   r   rL   window_attentionr   grid_attention)	r[   r\   r   rD   r   r   r   r_   re   )rf   rF   rG   rI   rH   r   rK   rJ   r   r   r   r   rL   r   r   re   rk   s                   r%   r\   MaxVitLayer.__init__  s    * 	)m "#%+'-!!5	
x &=$)#-||/#!5&
!" $;$)!-||/#!5$
  mmF+r'   rm   c                 (    U R                  U5      nU$ zu
Args:
    x (Tensor): Input tensor of shape (B, C, H, W).
Returns:
    Tensor: Output tensor of shape (B, C, H, W).
re   )rf   rm   s     r%   rq   MaxVitLayer.forward  s     KKNr'   r  )rt   ru   rv   rw   rx   rb   ry   r   r   rz   r
   r\   r   rq   r{   r|   r}   s   @r%   r  r    s    (?, ?, 	?,
 ?, ?, ?, S"))^,?, #3		>2?, ?, ?, ?, !?,  $!?,$ %?,& c?'?,( 
)?,B F  r'   r  c                      ^  \ rS rSrSrS\S\S\S\S\S\R                  4   S	\S\R                  4   S
\S\S\S\S\S\
\\4   S\S\\   SS4U 4S jjrS\S\4S jrSrU =r$ )MaxVitBlocki  a  
A MaxVit block consisting of `n_layers` MaxVit layers.

 Args:
    in_channels (int): Number of input channels.
    out_channels (int): Number of output channels.
    expansion_ratio (float): Expansion ratio in the bottleneck.
    squeeze_ratio (float): Squeeze ratio in the SE Layer.
    activation_layer (Callable[..., nn.Module]): Activation function.
    norm_layer (Callable[..., nn.Module]): Normalization function.
    head_dim (int): Dimension of the attention heads.
    mlp_ratio (int): Ratio of the MLP layer.
    mlp_dropout (float): Dropout probability for the MLP layer.
    attention_dropout (float): Dropout probability for the attention layer.
    p_stochastic_dropout (float): Probability of stochastic depth.
    partition_size (int): Size of the partitions.
    input_grid_size (Tuple[int, int]): Size of the input feature grid.
    n_layers (int): Number of layers in the block.
    p_stochastic (List[float]): List of probabilities for stochastic depth for each layer.
rF   rG   rI   rH   rK   .rJ   r   r   r   r   r   input_grid_sizen_layersp_stochasticr    Nc                 p  > [         TU ]  5         [        U5      U:X  d  [        SU SU S35      e[        R
                  " 5       U l        [        USSSS9U l        [        U5       HL  u  nnUS:X  a  SOSnU =R                  [        US:X  a  UOUUUUUUUUUU	U
UU R                  US	9/-  sl        MN     g )
Nz'p_stochastic must have length n_layers=z, got p_stochastic=.r*   r"   r#   rO   r   )rF   rG   rI   rH   r   rK   rJ   r   r   r   r   r   r   rL   )r[   r\   lenr   r   
ModuleListre   r&   r   	enumerater  )rf   rF   rG   rI   rH   rK   rJ   r   r   r   r   r   r  r  r  idxr   r   rk   s                     r%   r\   MaxVitBlock.__init__  s    , 	< H,FxjPcdpcqqrsttmmo/QWXbcd-FC(QFKK/2ax\!-"/$3!)%5%' +&7#1"nn)* K .r'   rm   c                 <    U R                    H  nU" U5      nM     U$ r
  r  )rf   rm   layers      r%   rq   MaxVitBlock.forward,  s      [[EaA !r'   )r   re   )rt   ru   rv   rw   rx   rb   ry   r   r   rz   r
   r   r\   r   rq   r{   r|   r}   s   @r%   r  r    s    *1 1 	1
 1 1 S"))^,1 #3		>21 1 1 1 !1  !1" sCx#1& '1( 5k)1* 
+1f	 	F 	 	r'   r  c            !         ^  \ rS rSrSrS\R                  SSSSSS4S\\\4   S	\S
\S\	\   S\	\   S\S\
S\\S\R                  4      S\S\R                  4   S\
S\
S\S\
S\
S\SS4 U 4S jjjrS\S\4S jrS rSrU =r$ )r   i8  a1  
Implements MaxVit Transformer from the `MaxViT: Multi-Axis Vision Transformer <https://arxiv.org/abs/2204.01697>`_ paper.
Args:
    input_size (Tuple[int, int]): Size of the input image.
    stem_channels (int): Number of channels in the stem.
    partition_size (int): Size of the partitions.
    block_channels (List[int]): Number of channels in each block.
    block_layers (List[int]): Number of layers in each block.
    stochastic_depth_prob (float): Probability of stochastic depth. Expands to a list of probabilities for each layer that scales linearly to the specified value.
    squeeze_ratio (float): Squeeze ratio in the SE Layer. Default: 0.25.
    expansion_ratio (float): Expansion ratio in the MBConv bottleneck. Default: 4.
    norm_layer (Callable[..., nn.Module]): Normalization function. Default: None (setting to None will produce a `BatchNorm2d(eps=1e-3, momentum=0.01)`).
    activation_layer (Callable[..., nn.Module]): Activation function Default: nn.GELU.
    head_dim (int): Dimension of the attention heads.
    mlp_ratio (int): Expansion ratio of the MLP layer. Default: 4.
    mlp_dropout (float): Dropout probability for the MLP layer. Default: 0.0.
    attention_dropout (float): Dropout probability for the attention layer. Default: 0.0.
    num_classes (int): Number of classes. Default: 1000.
Ng      ?r   rs   i  r   stem_channelsr   block_channelsblock_layersr   stochastic_depth_probrK   .rJ   rI   rH   r   r   r   num_classesr    c                   > [         TU ]  5         [        U 5        SnUc  [        [        R
                  SSS9n[        U[        U5      5      n[        U5       H6  u  nnUS   U-  S:w  d  US   U-  S:w  d  M   [        SU SU S	U S
U S3	5      e   [        R                  " [        UUSSUU	SS S9[        X"SSS S SS95      U l        [        USSSS9nX0l        [        R                  " 5       U l        U/US S -   nUn["        R$                  " SU['        U5      5      R)                  5       nSn[+        UUU5       HZ  u  nnnU R                   R-                  [/        UUU
UUU	UUUUUUUUUUU-    S95        U R                   S   R0                  nUU-  nM\     [        R                  " [        R2                  " S5      [        R4                  " 5       [        R6                  " US   5      [        R8                  " US   US   5      [        R:                  " 5       [        R8                  " US   USS95      U l        U R?                  5         g )Nr*   gMbP?g{Gz?)epsmomentumr   r#   zInput size z
 of block z$ is not divisible by partition size zx. Consider changing the partition size or the input size.
Current configuration yields the following block input sizes: r  r"   F)r   rK   rJ   rN   rT   T)r   rK   rJ   rN   rO   r6   )rF   rG   rI   rH   rK   rJ   r   r   r   r   r   r  r  r  )rN   ) r[   r\   r   r   r   BatchNorm2dr0   r  r  r   r_   r   stemr&   r   r  blocksnplinspacer>   tolistzipr,   r  r   AdaptiveAvgPool2dFlattenr   r   Tanh
classifier_init_weights)rf   r   r  r   r  r  r   r   rK   rJ   rI   rH   r   r   r   r!  input_channelsblock_input_sizesr  block_input_sizerF   rG   r  p_idx
in_channelout_channel
num_layersrk   s                              r%   r\   MaxVit.__init__M  s   : 	D!  TDIJ
 5Z^ATU%./@%A!C!"^3q8<LQ<OR`<`de<e !"2!3:cUBfgufv wUUfTgghj  &B MM %!1	 !ad]ahl
	" ,JAaYZ[
, mmo$os(;;%
 {{1&;S=NOVVX36{LR^3_/JZKK *!,"/$3)%5%' +&7#1$.'!-eej6H!I$ R22JZE) 4`0 --  #JJLLL+,IInR(.*<=GGIIInR(+EB
 	r'   rm   c                     U R                  U5      nU R                   H  nU" U5      nM     U R                  U5      nU$ r   )r&  r'  r/  )rf   rm   blocks      r%   rq   MaxVit.forward  s9    IIaL[[EaA !OOAr'   c                 (   U R                  5        GH}  n[        U[        R                  5      (       ab  [        R                  R                  UR                  SS9  UR                  b+  [        R                  R                  UR                  5        M  M  [        U[        R                  5      (       aV  [        R                  R                  UR                  S5        [        R                  R                  UR                  S5        M  [        U[        R                  5      (       d  GM  [        R                  R                  UR                  SS9  UR                  c  GMT  [        R                  R                  UR                  5        GM     g )Nr   r   r#   r   )modules
isinstancer   r]   r   normal_weightrN   zeros_r%  	constant_r   )rf   ms     r%   r0  MaxVit._init_weights  s    A!RYY''d366%GGNN166* &Ar~~..!!!((A.!!!&&!,Aryy))d366%GGNN166*  r'   )r'  r/  r   r&  )rt   ru   rv   rw   rx   r   GELUr
   rb   r   ry   r   r   rz   r\   r   rq   r0  r{   r|   r}   s   @r%   r   r   8  s0   J :>57WW#!" #&7t #s(Ot
 t t S	t 3it t  %t" Xc299n56#t$ #3		>2%t( )t* +t. /t0 1t2 !3t6 7t8 
9t tl F + +r'   r   r  r  r  r   r   r   weightsprogresskwargsc                 d   Ube  [        US[        UR                  S   5      5        UR                  S   S   UR                  S   S   :X  d   e[        USUR                  S   5        UR                  SS5      n	[	        SU UUUUUU	S.UD6n
Ub  U
R                  UR                  US	S
95        U
$ )Nr!  
categoriesmin_sizer   r#   r      rM  )r  r  r  r   r   r   r   T)rG  
check_hashr$   )r   r  metapopr   load_state_dictget_state_dict)r  r  r  r   r   r   rF  rG  rH  r   models              r%   _maxvitrT    s    $ fmSl9S5TU||J'*gll:.Fq.IIIIflGLL4LML*5J 	#%!3%	 	E g44hSW4XYLr'   c                   f    \ rS rSr\" S\" \SS\R                  S9\	SSSSS	S
S.0SSSS.S9r
\
rSrg)r   i  z9https://download.pytorch.org/models/maxvit_t-bc5ab103.pthrM  )	crop_sizeresize_sizeinterpolationirL  zLhttps://github.com/pytorch/vision/tree/main/references/classification#maxvitzImageNet-1KgT@g|?5.X@)zacc@1zacc@5gZd;@gK7]@zThese weights reproduce closely the results of the paper using a similar training recipe.
            They were trained with a BatchNorm2D momentum of 0.99 instead of the more correct 0.01.)rJ  
num_paramsrK  recipe_metrics_ops
_file_size_docs)url
transformsrO  r$   N)rt   ru   rv   rw   r   r   r   r   BICUBICr   IMAGENET1K_V1DEFAULTr{   r$   r'   r%   r   r     sb    G3CO`OhOh
 /""d##  !g
M. Gr'   r   
pretrained)rF  T)rF  rG  c                 \    [         R                  U 5      n [        SS/ SQ/ SQSSSU US.UD6$ )	aF  
Constructs a maxvit_t architecture from
`MaxViT: Multi-Axis Vision Transformer <https://arxiv.org/abs/2204.01697>`_.

Args:
    weights (:class:`~torchvision.models.MaxVit_T_Weights`, optional): The
        pretrained weights to use. See
        :class:`~torchvision.models.MaxVit_T_Weights` below for
        more details, and possible values. By default, no pre-trained
        weights are used.
    progress (bool, optional): If True, displays a progress bar of the
        download to stderr. Default is True.
    **kwargs: parameters passed to the ``torchvision.models.maxvit.MaxVit``
        base class. Please refer to the `source code
        <https://github.com/pytorch/vision/blob/main/torchvision/models/maxvit.py>`_
        for more details about this class.

.. autoclass:: torchvision.models.MaxVit_T_Weights
    :members:
@   )rf        i   )r"   r"   r   r"       g?   )r  r  r  r   r   r   rF  rG  r$   )r   verifyrT  )rF  rG  rH  s      r%   r   r     sH    . %%g.G 
*!!
 
 
r'   )NF)<r   collectionsr   	functoolsr   typingr   r   r   r   r	   r
   numpyr(  r7   torch.nn.functionalr   
functionalr   r   torchvision.models._apir   r   r   torchvision.models._metar   torchvision.models._utilsr   r   torchvision.ops.miscr   r    torchvision.ops.stochastic_depthr   torchvision.transforms._presetsr   r   torchvision.utilsr   __all__rb   r&   r0   rB   rz   rD   r   r   r   r   r   r  r  r   ry   boolrT  r   rb  r   r$   r'   r%   <module>r{     sX    #  A A      H H 9 T H < R 1uS#X S RU `c hmnqsvnvhw sCx C DQVWZ\_W_Q`La # #S #U\\ #TRYY TnF299 FR
ryy 
bii 4		 <ebii eP^")) ^BR")) Rj^+RYY ^+Z &*'' I	'
 s)' !' ' ' k"' ' '  !'T{ 6 ,0@0N0N!OP6:T !"23 !d !]` !ek ! Q !r'   