
    RЦi                    6   S r SSKrSSKJr  SSKJrJrJr  SSKJ	r	  SSK
JrJrJrJrJrJrJrJr  SSKrSSKJr  SSKJr  SS	KJrJr  SS
KJrJrJrJrJrJ r J!r!J"r"J#r#J$r$J%r%J&r&J'r'J(r(J)r)J*r*J+r+J,r,J-r-J.r.J/r/J0r0J1r1J2r2J3r3  SSK4J5r5  SSK6J7r7  SSK8J9r9  SSK:J;r;J<r<  SSK=J>r>J?r?  / SQr@\ " S S5      5       rA\ " S S5      5       rB\ " S S5      5       rC " S S\R                  5      rE " S S\R                  5      rF " S S\R                  5      rGGSS\R                  S \HS!\HS"S4S# jjrI " S$ S%\R                  5      rJGSS\R                  S \HS!\HS"S4S& jjrKS'\\L   S(\LS"\L4S) jrM " S* S+\R                  5      rN " S, S-\R                  5      rOS.\R                  S/\\L   S"\R                  4S0 jrQ\9S1\R                  S/\\L   S2\\L   S"\R                  4S3 j5       rRS.\R                  S4\\L   S"\R                  4S5 jrS\9S1\R                  S4\\L   S2\\L   S"\R                  4S6 j5       rTS7\AS/\\L\L4   S"\\   4S8 jrU " S9 S:\R                  5      rV " S; S<\R                  5      rWS.\R                  S/\\L   S"\R                  4S= jrX\9S1\R                  S/\\L   S2\\L   S"\R                  4S> j5       rYS.\R                  S4\\L   S"\R                  4S? jrZ\9S1\R                  S4\\L   S2\\L   S"\R                  4S@ j5       r[ " SA SB\R                  5      r\ " SC SD\R                  5      r] " SE SF\R                  5      r^ " SG SH\R                  5      r_ " SI SJ\R                  5      r`S7\AS2\\L\L4   S"\A4SK jraS7\CSL\S"\C4SM jrb " SN SO\R                  5      rc            GSSU\HSV\HSW\dSX\dSY\HSZ\HS[\dS\\HS]\HS^\\e   S_\HS`\LS"\\H\4   4Sa jjrf            GSSU\HSV\HSW\dSc\eSZ\HS\\HS]\HS/\\\L\L4      Sd\LS^\\e   S_\HS`\LS"\\H\4   4Se jjrg           GSSU\HSV\HSZ\HSh\HS\\HS]\HS/\\\L\L4      Si\dS^\\e\\e\e4   4   S_\HS`\LS"\\H\4   4Sj jjrhS"\\H\4   4Sk jri\j" GS0 Sl\C" GSSmSnSoSp.\g" SRSqSr9D6_Ss\C" GSSmStSoSp.\g" SPSRSqSu9D6_Sv\C" GSSwSxSoSp.\f" SRSQSy9D6_Sz\C" GSSwS{SoSp.\f" SbSRSQS|9D6_S}\C" GSS~S{SSp.\f" SbSS9D6_S\C" GSSS{SSp.\f" SbSSfS9D6_S\C" GSSwSxSoSp.\f" SbSRSQSS9D6_S\C" GSSmStSoSp.\g" SRSqSgSS9D6_S\C" GSSwSxSoSp.\f" SbSgS9D6_S\C" GSSwS{SoSp.\f" SSRSQSgSS9D6_S\C" GSSwS{SoSp.\f" SbSgSTS9D6_S\C" GSS~S{SSp.\f" SbSSfSgS9D6_S\C" GSSS{SSp.\f" SbSSfSgS9D6_S\C" GSSmStSoSS.\f" 5       D6_S\C" GSSmStSoSS.\h" SSSS9D6_S\C" SwSnSSS9_S\C" SwS{SSS9_S\C" S~S{SSS9_S\C" SS{SSS9_S\C" SSSSS9_S\C" SSSSS9_S\C" GSSSSSS.\g" 5       D6_S\C" GSSmSSSoS.\g" 5       D6_S\C" GSSmSSSoS.\g" 5       D6_S\C" GSSmSSSoS.\g" 5       D6_S\C" GSSSSSS.\g" SgS9D6_S\C" GSSmSSSoS.\g" SgS9D6_S\C" GSSmSSSoS.\g" SgS9D6_S\C" GSSwSSSoS.\g" SgSfS9D6_S\C" GSSwS{SSoSS.\g" SgS9D6_S\C" GSSmSSSoSS.\h" 5       D6_S\C" GSSmSSSoS.\h" 5       D6_S\C" GSSwSSSS.\h" 5       D6_S\C" GSSwSSSSS.\h" SRSSS9D6_S\C" GSS~SSSS.\h" SRS9D6_S\C" GSSSSSSS.\h" SRS9D6_S\C" GSSmSSSSRSTS.\i" 5       D6_S\C" GSSwSSSSRSS.\i" 5       D6_S\C" GSSwS{SSSRSS.\i" 5       D6_S\C" GSS~S{SSSRSS.\i" 5       D6_S\C" GSSS{SSSRSS.\i" 5       D6_6rkS\\H\R                  4   S\R                  S"\\H\R                  4   4S jrlGSS\HS\\H   S\dSL\S"\c4
S jjrmGSS\HSL\S"\\H\4   4S jjrn\>" 0 S\n" SS9_S\n" SSSS9_S\n" SSS9_S\n" SSS9_S\n" SS9_S\n" SS9_S\n" SS9_S\n" SSSSSS9_S\n" SS\\SS9_S\n" SSSS9_S\n" SS9_S\n" SSS9_S\n" SSS9_S\n" SS9_GS \n" SS9_GS\n" SGSSS9_GS\n" SGSGS9_0 GS\n" SGSGS9_GS\n" SGSGS9_GS\n" SGSGS9_GS	\n" SS9_GS
\n" SS9_GS\n" SS9_GS\n" SS9_GS\n" SS9_GS\n" SS9_GS\n" SGSGSGS9_GS\n" SGSGSGSGS9_GS\n" SGSS9_GS\n" SGSGSGS9_GS\n" SGSGSGS9_GS\n" SGSGSGSGS9_GS\n" SGSGSGSGS9_GS\n" SGSGSGSGS9_E0 GS \n" SGS!SS9_GS"\n" SGSGSGS9_GS#\n" SS9_GS$\n" SSSSSS9_GS%\n" SGSGS9_GS&\n" SGS'GSGSGS9_GS(\n" SGSGSGS9_GS)\n" SGS*GSGSGS9_GS+\n" SGSGSGS,9_GS-\n" SS9_GS.\n" SSSSSS9_GS/\n" SS9_GS0\n" SGSGS9_GS1\n" S\\GS29_GS3\n" SSSSSS9_GS4\n" SGS5GS6SSS9_GS7\n" S\\GS29_E0 GS8\n" SSSSSS9_GS9\n" SGS5GS6SSS9_GS:\n" S\\GS29_GS;\n" SSSSSS9_GS<\n" SGS5GS6SSS9_GS=\n" S\\GS29_GS>\n" SSSSSS9_GS?\n" SGS5GS6SSS9_GS@\n" SGSAGS9_GSB\n" SSSSSS9_GSC\n" SGS5GS6SSS9_GSD\n" SGSAGS9_GSE\n" SSSSSS9_GSF\n" SGS5SSGSG9_GSH\n" SGSAGS9_GSI\n" SSSSSS9_GSJ\n" SGS5GS6SSS9_E5      ro\?GSS\dSL\S"\c4GSK jj5       rp\?GSS\dSL\S"\c4GSL jj5       rq\?GSS\dSL\S"\c4GSM jj5       rr\?GSS\dSL\S"\c4GSN jj5       rs\?GSS\dSL\S"\c4GSO jj5       rt\?GSS\dSL\S"\c4GSP jj5       ru\?GSS\dSL\S"\c4GSQ jj5       rv\?GSS\dSL\S"\c4GSR jj5       rw\?GSS\dSL\S"\c4GSS jj5       rx\?GSS\dSL\S"\c4GST jj5       ry\?GSS\dSL\S"\c4GSU jj5       rz\?GSS\dSL\S"\c4GSV jj5       r{\?GSS\dSL\S"\c4GSW jj5       r|\?GSS\dSL\S"\c4GSX jj5       r}\?GSS\dSL\S"\c4GSY jj5       r~\?GSS\dSL\S"\c4GSZ jj5       r\?GSS\dSL\S"\c4GS[ jj5       r\?GSS\dSL\S"\c4GS\ jj5       r\?GSS\dSL\S"\c4GS] jj5       r\?GSS\dSL\S"\c4GS^ jj5       r\?GSS\dSL\S"\c4GS_ jj5       r\?GSS\dSL\S"\c4GS` jj5       r\?GSS\dSL\S"\c4GSa jj5       r\?GSS\dSL\S"\c4GSb jj5       r\?GSS\dSL\S"\c4GSc jj5       r\?GSS\dSL\S"\c4GSd jj5       r\?GSS\dSL\S"\c4GSe jj5       r\?GSS\dSL\S"\c4GSf jj5       r\?GSS\dSL\S"\c4GSg jj5       r\?GSS\dSL\S"\c4GSh jj5       r\?GSS\dSL\S"\c4GSi jj5       r\?GSS\dSL\S"\c4GSj jj5       r\?GSS\dSL\S"\c4GSk jj5       r\?GSS\dSL\S"\c4GSl jj5       r\?GSS\dSL\S"\c4GSm jj5       r\?GSS\dSL\S"\c4GSn jj5       r\?GSS\dSL\S"\c4GSo jj5       r\?GSS\dSL\S"\c4GSp jj5       r\?GSS\dSL\S"\c4GSq jj5       r\?GSS\dSL\S"\c4GSr jj5       r\?GSS\dSL\S"\c4GSs jj5       r\?GSS\dSL\S"\c4GSt jj5       r\?GSS\dSL\S"\c4GSu jj5       r\?GSS\dSL\S"\c4GSv jj5       r\?GSS\dSL\S"\c4GSw jj5       r\?GSS\dSL\S"\c4GSx jj5       r\?GSS\dSL\S"\c4GSy jj5       r\?GSS\dSL\S"\c4GSz jj5       r\?GSS\dSL\S"\c4GS{ jj5       r\?GSS\dSL\S"\c4GS| jj5       r\?GSS\dSL\S"\c4GS} jj5       r\?GSS\dSL\S"\c4GS~ jj5       r\?GSS\dSL\S"\c4GS jj5       r\?GSS\dSL\S"\c4GS jj5       r\?GSS\dSL\S"\c4GS jj5       r\?GSS\dSL\S"\c4GS jj5       rg(  a  MaxVit and CoAtNet Vision Transformer - CNN Hybrids in PyTorch

This is a from-scratch implementation of both CoAtNet and MaxVit in PyTorch.

99% of the implementation was done from papers, however last minute some adjustments were made
based on the (as yet unfinished?) public code release https://github.com/google-research/maxvit

There are multiple sets of models defined for both architectures. Typically, names with a
 `_rw` suffix are my own original configs prior to referencing https://github.com/google-research/maxvit.
These configs work well and appear to be a bit faster / lower resource than the paper.

The models without extra prefix / suffix' (coatnet_0_224, maxvit_tiny_224, etc), are intended to
match paper, BUT, without any official pretrained weights it's difficult to confirm a 100% match.

Papers:

MaxViT: Multi-Axis Vision Transformer - https://arxiv.org/abs/2204.01697
@article{tu2022maxvit,
  title={MaxViT: Multi-Axis Vision Transformer},
  author={Tu, Zhengzhong and Talebi, Hossein and Zhang, Han and Yang, Feng and Milanfar, Peyman and Bovik, Alan and Li, Yinxiao},
  journal={ECCV},
  year={2022},
}

CoAtNet: Marrying Convolution and Attention for All Data Sizes - https://arxiv.org/abs/2106.04803
@article{DBLP:journals/corr/abs-2106-04803,
  author    = {Zihang Dai and Hanxiao Liu and Quoc V. Le and Mingxing Tan},
  title     = {CoAtNet: Marrying Convolution and Attention for All Data Sizes},
  journal   = {CoRR},
  volume    = {abs/2106.04803},
  year      = {2021}
}

Hacked together by / Copyright 2022, Ross Wightman
    N)OrderedDict)	dataclassreplacefield)partial)AnyCallableDictListOptionalSetTupleUnion)nn)Final)IMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)MlpConvMlpDropPathcalculate_drop_path_rates	LayerNorm
LayerScaleLayerScale2dClassifierHeadNormMlpClassifierHeadcreate_attnget_act_layerget_norm_layerget_norm_act_layercreate_conv2dcreate_pool2dtrunc_normal_tf_	to_2tupleextend_tuplemake_divisible_assert	RelPosMlp
RelPosBiasRelPosBiasTfuse_fused_attnresize_rel_pos_bias_table   )build_model_with_cfg)feature_take_indices)register_notrace_function)named_applycheckpoint_seq)generate_default_cfgsregister_model)
MaxxVitCfgMaxxVitConvCfgMaxxVitTransformerCfgMaxxVitc                   l   \ rS rSr% SrSr\\S'   Sr\	\S'   Sr
\\S'   Sr\	\S	'   Sr\	\S
'   Sr\	\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\\\4      \S'   Sr\\\\4      \S'   Sr\	\S'   Sr\	\S'   Sr\\   \S'   Sr\\S'   Sr\\S '   S!r\\S"'   S#r \\S$'   S% r!S&r"g)'r7   T   z-Configuration for MaxxVit transformer blocks.    dim_headT
head_first      @expand_ratioexpand_firstshortcut_bias	attn_bias        	attn_drop	proj_dropavg2	pool_typebiasrel_pos_type   rel_pos_dimpartition_ratioNwindow_size	grid_sizeFno_block_attnuse_nchw_attninit_valuesgelu	act_layerlayernorm2d
norm_layer	layernormnorm_layer_clư>norm_epsc                     U R                   b  [        U R                   5      U l         U R                  b:  [        U R                  5      U l        U R                   c  U R                  U l         g g g N)rN   r$   rM   selfs    R/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/models/maxxvit.py__post_init__#MaxxVitTransformerCfg.__post_init__m   s\    >>%&t~~6DN'()9)9:D~~%!%!1!1 & (    )rN   rM   )#__name__
__module____qualname____firstlineno____doc__r<   int__annotations__r=   boolr?   floatr@   rA   rB   rD   rE   rG   strrI   rK   rL   rM   r   r   rN   rO   rP   rQ   rS   rU   rW   rY   r_   __static_attributes__ ra   r^   r7   r7   T   s    7HcJL%L$M4ItIuIuIsL#KOS-1K%S/*1+/Ixc3h(/M4M4#'K%'Is#J#$M3$He2ra   r7   c                   D   \ rS rSr% SrSr\\S'   Sr\	\S'   Sr
\\S'   S	r\\S
'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\	\S'   Sr\\	   \S'   S r\\S!'   Sr\\S"'   Sr\\S#'   S$r\\	   \S%'   S& rS'r g$)(r6   v   z-Configuration for MaxxVit convolution blocks.mbconv
block_typer>   r?   Texpand_output   kernel_sizer-   
group_sizeFpre_norm_actoutput_biasdwstride_moderF   rG   downsample_pool_type padding
attn_earlyse
attn_layersiluattn_act_layer      ?
attn_ratiorX   rQ   rR   rS   rU   rW   NrY   c                 H   U R                   S;   d   eU R                   S:H  nU R                  (       d  U(       a  SOSU l        U R                  (       d  U(       d  SU l        U R                  c  U(       a  SOSU l        U R                  =(       d    U R
                  U l        g )N)rp   convnextrp   batchnorm2drT   rV   h㈵>rX   )rq   rU   rW   rY   rz   rG   )r]   
use_mbconvs     r^   r_   MaxxVitConvCfg.__post_init__   st    "8888__0
/9m}DO!!*!,D== $.DDDM$($=$=$O!ra   )rz   rY   rU   rW   )!rb   rc   rd   re   rf   rq   rk   rh   r?   rj   rr   ri   rt   rg   ru   rv   rw   ry   rG   rz   r|   r}   r   r   r   rQ   r   rS   rU   rW   rY   r_   rl   rm   ra   r^   r6   r6   v   s    7JL%M4KJL$KKIs &#&GSJJ NC J#'K%'IsJM3 $Hhuo$
Pra   r6   c                       \ rS rSr% SrSr\\S4   \S'   Sr	\\S4   \S'   Sr
\\\\\S4   4   S4   \S	'   S
r\\\\\4   4   \S'   Sr\\S'   \" \S9r\\S'   \" \S9r\\S'   Sr\\   \S'   Sr\\S'   Srg)r5      z!Configuration for MaxxVit models.`           .	embed_dim   rs      r   depths)Cr   Tr   rq   @   
stem_widthF	stem_bias)default_factoryconv_cfgtransformer_cfgNhead_hidden_sizevit_effweight_initrm   )rb   rc   rd   re   rf   r   r   rg   rh   r   rq   r   rk   r   r   ri   r   r6   r   r7   r   r   r   r   rl   rm   ra   r^   r5   r5      s    +!4IuS#X4*FE#s(O*:NJeCsCx01367N.0Jc5c?*+0It$^DHnD-2CX-YO*Y&*hsm* K ra   r5   c                      ^  \ rS rSr% Sr\\   \S'             SS\S\	\   S\S\S\S	\S
\	\
   S\S\4U 4S jjjrSS\R                  S\	\R                     S\R                  4S jjrSrU =r$ )Attention2d   z)Multi-head attention for 2D NCHW tensors.
fused_attndimdim_outr<   rH   r@   r=   rel_pos_clsrD   rE   c                   > XS.n[         TU ]  5         U=(       d    UnU(       a  UOUnX-  U l        X0l        X`l        US-  U l        [        5       U l        [        R                  " XS-  S4SU0UD6U l
        U(       a  U" SSU R                  0UD6OSU l        [        R                  " U5      U l        [        R                  " XS4SU0UD6U l        [        R                  " U	5      U l        g)	  
Args:
    dim: Input dimension.
    dim_out: Output dimension (defaults to input dimension).
    dim_head: Dimension per attention head.
    bias: Whether to use bias in qkv and projection.
    expand_first: Whether to expand channels before or after qkv.
    head_first: Whether heads are first in tensor layout.
    rel_pos_cls: Relative position class to use.
    attn_drop: Attention dropout rate.
    proj_drop: Projection dropout rate.
devicedtype      rs   r-   rH   	num_headsNrm   )super__init__r   r<   r=   scaler+   r   r   Conv2dqkvrel_posDropoutrD   projrE   r]   r   r   r<   rH   r@   r=   r   rD   rE   r   r   dddim_attn	__class__s                 r^   r   Attention2d.__init__   s    4 /.S*7!- $%
(*99SQ,CCCFQ{BT^^BrBW[I.IIhDDD	I.ra   xshared_rel_posreturnc                    UR                   u  p4pVU R                  (       aK  U R                  U5      R                  X0R                  U R
                  S-  S5      R                  SSS9u  pxn	OJU R                  U5      R                  USU R                  U R
                  S5      R                  S5      u  pxn	U R                  (       a  S n
U R                  b  U R                  R                  5       n
OUb  Un
[        R                  R                  R                  UR!                  SS5      R#                  5       UR!                  SS5      R#                  5       U	R!                  SS5      R#                  5       U
U R$                  (       a  U R&                  R(                  OSS9R!                  SS5      R                  USXV5      nOXpR*                  -  nUR!                  SS5      U-  nU R                  b  U R                  U5      nOUb  X-   nUR-                  SS9nU R'                  U5      nXR!                  SS5      -  R                  USXV5      nU R/                  U5      nU R1                  U5      nU$ )	Nrs   r   r   r-   rC   	attn_mask	dropout_p)shaper=   r   viewr   r<   chunkreshapeunbindr   r   get_biastorchr   
functionalscaled_dot_product_attention	transpose
contiguoustrainingrD   pr   softmaxr   rE   )r]   r   r   Br   HWqkvrB   attns               r^   forwardAttention2d.forward   s   WW
a??hhqk&&q..$--!:KRPVVWX^_V`GA!hhqk))!QrRYYZ[\GA!??I||' LL113	+*	##@@B#..0B#..0B#..0#.2mm$..** A  iB2q 4  JJA;;r2&*D||'||D)+,<<B<'D>>$'D^^B++11!R>AIIaLNN1ra   
rD   r<   r   r=   r   r   rE   r   r   r   
Nr;   TTTNrC   rC   NNr[   rb   rc   rd   re   rf   r   ri   rh   rg   r   r	   rj   r   r   Tensorr   rl   __classcell__r   s   @r^   r   r      s    3d
 &*!%#.2!!(/(/ c](/ 	(/
 (/ (/ (/ "(+(/ (/ (/ (/T# #x7M #Y^YeYe # #ra   r   c                      ^  \ rS rSr% Sr\\   \S'             SS\S\	\   S\S\S\S	\S
\	\
   S\S\4U 4S jjjrSS\R                  S\	\R                     S\R                  4S jjrSrU =r$ )AttentionCl   z/Channels-last multi-head attention (B, ..., C).r   r   r   r<   rH   r@   r=   r   rD   rE   c                   > XS.n[         TU ]  5         U=(       d    UnU(       a  X!:  a  UOUnX-  S:X  d   S5       eX-  U l        X0l        X`l        US-  U l        [        5       U l        [        R                  " XS-  4SU0UD6U l
        U(       a  U" S	SU R                  0UD6OSU l        [        R                  " U5      U l        [        R                  " X4SU0UD6U l        [        R                  " U	5      U l        g)
r   r   r   z(attn dim should be divisible by head_dimr   rs   rH   r   Nrm   )r   r   r   r<   r=   r   r+   r   r   Linearr   r   r   rD   r   rE   r   s                 r^   r   AttentionCl.__init__  s    4 /.S*w}7#"a'S)SS'!- $%
(*99SQ,@T@R@FQ{BT^^BrBW[I.IIhAdAbA	I.ra   r   r   r   c                 T   UR                   S   nUR                   S S nU R                  (       a\  U R                  U5      R                  USU R                  U R
                  S-  5      R                  SS5      R                  SSS9u  pVnOZU R                  U5      R                  USSU R                  U R
                  5      R                  SS5      R                  S5      u  pVnU R                  (       a  S nU R                  b  U R                  R                  5       nOUb  Un[        R                  R                  R!                  XVUUU R"                  (       a  U R$                  R&                  OSS9nOjXPR(                  -  nXVR                  S	S5      -  n	U R                  b  U R                  XS
9n	OUb  X-   n	U	R+                  SS9n	U R%                  U	5      n	X-  nUR                  SS5      R                  US-   5      nU R-                  U5      nU R/                  U5      nU$ )Nr   r   rs   r-   r   r   rC   r   r   r   )r   )r   r=   r   r   r   r<   r   r   r   r   r   r   r   r   r   r   r   r   rD   r   r   r   r   rE   )
r]   r   r   r   restore_shaper   r   r   rB   r   s
             r^   r   AttentionCl.forward,  s   GGAJ??hhqk&&q"dnndmma>OPZZ[\^_`ffghnofpGA!hhqk))!RDNNDMMR\\]^`abiijklGA!??I||' LL113	+*	##@@a#.2mm$..** A A JJA{{2r**D||'||D|H+,<<B<'D>>$'DAKK1%%me&;<IIaLNN1ra   r   r   r[   r   r   s   @r^   r   r      s    9d
 &*!%#.2!!)/)/ c])/ 	)/
 )/ )/ )/ "(+)/ )/ )/ )/V# #x7M #Y^YeYe # #ra   r   c                      ^  \ rS rSrSr     SS\S\S\S\S\4
U 4S jjjrS	\	R                  S
\	R                  4S jrSrU =r$ )Downsample2diR  a!  A downsample pooling module supporting several maxpool and avgpool modes.

* 'max' - MaxPool2d w/ kernel_size 3, stride 2, padding 1
* 'max2' - MaxPool2d w/ kernel_size = stride = 2
* 'avg' - AvgPool2d w/ kernel_size 3, stride 2, padding 1
* 'avg2' - AvgPool2d w/ kernel_size = stride = 2
r   r   rG   r|   rH   c           	        > [         TU ]  5         US;   d   eUS:X  a  [        SSSU=(       d    SS9U l        O[US:X  a  [        SSU=(       d    SS	9U l        O;US
:X  a  [        S
SSSU=(       d    SS9U l        O[        S
SU=(       d    SS	9U l        X:w  a  [        R
                  " XSXVUS9U l        g[        R                  " 5       U l        g)z
Args:
    dim: Input dimension.
    dim_out: Output dimension.
    pool_type: Type of pooling operation.
    padding: Padding mode.
    bias: Whether to use bias in expansion conv.
)maxmax2avgrF   r   rs   r   r-   )rt   strider|   r   r   )r|   r   F)rt   r   count_include_padr|   )rH   r   r   N)r   r   r"   poolr   r   expandIdentity)	r]   r   r   rG   r|   rH   r   r   r   s	           r^   r   Downsample2d.__init__[  s    $ 	::::%e1glYZ[DI& %eQ1EDI%%1Q%QXQ]\]_DI &eQ1EDI>))C!$UZ[DK++-DKra   r   r   c                 J    U R                  U5      nU R                  U5      nU$ r[   )r   r   r]   r   s     r^   r   Downsample2d.forward~  s!    IIaLKKNra   )r   r   )rF   r{   TNN)rb   rc   rd   re   rf   rg   rk   ri   r   r   r   r   rl   r   r   s   @r^   r   r   R  st     $!(!( !( 	!(
 !( !( !(F %,,  ra   r   r{   modulenameschemer   c                    [        U [        R                  [        R                  45      (       Ga  US:X  a`  [        R                  R                  U R                  SS9  U R                  b*  [        R                  R                  U R                  5        ggUS:X  aL  [        U R                  SS9  U R                  b*  [        R                  R                  U R                  5        ggUS:X  aa  [        R                  R                  U R                  5        U R                  b*  [        R                  R                  U R                  5        gg[        R                  R                  U R                  5        U R                  bY  SU;   a)  [        R                  R                  U R                  SS9  g[        R                  R                  U R                  5        ggg)	z&Initialize transformer module weights.normal{Gz?stdNtrunc_normalxavier_normalmlprX   )
isinstancer   r   r   initnormal_weightrH   zeros_r#   xavier_normal_xavier_uniform_)r   r   r   s      r^   _init_transformerr    s8   &299bii011XGGOOFMMsO3{{&v{{+ '~%V]]4{{&v{{+ '&GG""6==1{{&v{{+ ' GG##FMM2{{&D=GGOOFKKTO:GGNN6;;/	 '! 2ra   c                      ^  \ rS rSrSrSS\" 5       SSS4S\S\S\S	\\   S
\S\	4U 4S jjjr
SS\SS4S jjrSS\R                  S\\R                     S\R                  4S jjrSrU =r$ )TransformerBlock2di  aE  Transformer block with 2D downsampling.

'2D' NCHW tensor layout

Some gains can be seen on GPU using a 1D / CL block, BUT w/ the need to switch back/forth to NCHW
for spatial pooling, the benefit is minimal so ended up using just this variant for CoAt configs.

This impl was faster on TPU w/ PT XLA than the 1D experiment.
r-   NrC   r   r   r   r   cfg	drop_pathc	                   > XxS.n	[         TU ]  5         [        [        UR                  5      UR
                  S9n
[        UR                  5      nUS:X  ap  [        X4UR                  UR                  S.U	D6U l        [        R                  " [        SU
" U40 U	D64S[        X4SUR                  0U	D64/5      5      U l        O/X:X  d   e[        R                   " 5       U l        U
" U40 U	D6U l        [#        UU4UR$                  UR&                  UR(                  UUR*                  UR,                  S.U	D6U l        UR0                  (       a  [3        U4S	UR0                  0U	D6O[        R                   " 5       U l        US
:  a  [7        U5      O[        R                   " 5       U l        U
" U40 U	D6U l        [=        SU[?        X%R@                  -  5      UUR,                  S.U	D6U l!        UR0                  (       a  [3        U4S	UR0                  0U	D6O[        R                   " 5       U l"        US
:  a  [7        U5      U l#        g[        R                   " 5       U l#        g)z
Args:
    dim: Input dimension.
    dim_out: Output dimension.
    stride: Stride for downsampling.
    rel_pos_cls: Relative position class.
    cfg: Transformer block configuration.
    drop_path: Drop path rate.
r   epsr   )rG   rH   normdownrG   )r<   r@   rH   r   rD   rE   rQ   rC   in_featureshidden_featuresrS   dropNrm   )$r   r   r   r   rU   rY   r   rS   r   rG   rA   shortcutr   
Sequentialr   norm1r   r   r<   r@   rB   rD   rE   r   rQ   r   ls1r   
drop_path1norm2r   rg   r?   r  ls2
drop_path2)r]   r   r   r   r   r  r  r   r   r   rU   rS   r   s               r^   r   TransformerBlock2d.__init__  s
   ( /^CNN;N
!#--0	Q;(mUXUfUfmjlmDM{C.2./cN#--N2NO4 ( DJ
 >!>KKMDM#C.2.DJ

 \\))#mmmm

 

	 PS<KS__KKdfdododq1:R(9-R[[].2.
 
*:*: :;	

 
 PS<KS__KKdfdododq1:R(9-R[[]ra   r   r   c                 4    [        [        [        US9U 5        g Nr   )r1   r   r  r]   r   s     r^   init_weightsTransformerBlock2d.init_weights  s    G-f=tDra   r   r   c           
      $   U R                  U5      U R                  U R                  U R                  U R	                  U5      US95      5      -   nXR                  U R                  U R                  U R                  U5      5      5      5      -   nU$ )Nr   )	r  r   r  r   r  r#  r"  r  r!  )r]   r   r   s      r^   r   TransformerBlock2d.forward  sm    MM!ttxx		$**Q-`n	8o/pqq$**Q-)@ ABBra   )	r   r   r#  r  r"  r  r  r!  r  r{   r[   )rb   rc   rd   re   rf   r7   rg   r   r	   rj   r   rk   r)  r   r   r   rl   r   r   s   @r^   r  r    s     .2)>)@!;S;S ;S 	;S
 "(+;S ';S ;S ;SzE3 E E x7M Y^YeYe  ra   r  c                    [        U [        R                  5      (       Ga  US:X  a`  [        R                  R	                  U R
                  SS9  U R                  b*  [        R                  R                  U R                  5        ggUS:X  aL  [        U R
                  SS9  U R                  b*  [        R                  R                  U R                  5        ggUS:X  aa  [        R                  R                  U R
                  5        U R                  b*  [        R                  R                  U R                  5        ggU R                  S   U R                  S   -  U R                  -  nX0R                  -  n[        R                  R	                  U R
                  S[        R                  " S	U-  5      5        U R                  b*  [        R                  R                  U R                  5        ggg)
z&Initialize convolution module weights.r   r  r  Nr  r  r   r-   g       @)r  r   r   r  r	  r
  rH   r  r#   r  rt   out_channelsgroupsmathsqrt)r   r   r   fan_outs       r^   
_init_convr4    sV   &"))$$XGGOOFMMsO3{{&v{{+ '~%V]]4{{&v{{+ '&GG""6==1{{&v{{+ ' ((+f.@.@.CCfFYFYYG%GGGOOFMM1diig.FG{{&v{{+ '% %ra   ru   channelsc                 .    U (       d  gX-  S:X  d   eX-  $ )z3Calculate number of groups for grouped convolution.r-   r   rm   )ru   r5  s     r^   
num_groupsr7    s$     $)))%%ra   c                      ^  \ rS rSrSrSS\" 5       SSS4S\S\S	\S
\\\4   S\S\4U 4S jjjr	SS\
SS4S jjrS\R                  S\R                  4S jrSrU =r$ )MbConvBlocki  zGPre-Norm Conv Block - 1x1 - kxk - 1x1, w/ inverted bottleneck (expand).r-   r-   r-   rC   Nin_chsout_chsr   dilationr  r  c	                 :  > XxS.n	[         TU ]  5         [        [        UR                  UR
                  5      UR                  S9n
[        UR                  (       a  UOUUR                  -  5      n[        UR                  U5      nUS:X  a4  [        X4UR                  UR                  UR                  S.U	D6U l        O["        R$                  " 5       U l        UR&                  S;   d   eSu  pnUR&                  S:X  a  X4S   nnOUR&                  S	:X  a  X4S   nnOX4S
   nnU
" U4SUR(                  0U	D6U l        US:  a)  [        X4UR,                  UR                  S.U	D6U l        O["        R$                  " 5       U l        [1        XS4SU0U	D6U l        U
" U40 U	D6U l        [1        UUUR6                  4UUUUR                  S.U	D6U l        0 n[;        UR<                  [>        5      (       a]  UR<                  S:X  d  UR<                  S:X  a=  UR@                  US'   [C        URD                  UR                  (       a  UOU-  5      US'   URF                  (       a4  [I        UR<                  U40 UDU	D6U l%        U
" U40 U	D6U l&        SU l'        O3SU l%        U
" U40 U	D6U l&        [I        UR<                  U40 UDU	D6U l'        [1        XS4SUR                  0U	D6U l(        US:  a  [S        U5      U l*        g["        R$                  " 5       U l*        g)z
Args:
    in_chs: Input channels.
    out_chs: Output channels.
    stride: Stride for conv.
    dilation: Dilation for conv.
    cfg: Convolution block configuration.
    drop_path: Drop path rate.
r   r  r   )rG   rH   r|   )r   1x1rx   )r-   r-   r-   r   r-   r?  r   	apply_act)rG   r|   r   )r   r=  r0  r|   r~   ecarS   rd_channelsNrH   rC   )+r   r   r   r    rU   rS   rY   r&   rr   r?   r7  ru   r   rG   rw   r|   r  r   r   ry   rv   pre_normrz   r  r!   	conv1_1x1r  rt   	conv2_kxkr  r   rk   r   rg   r   r}   r   se_earlyr!  r~   	conv3_1x1r   r  )r]   r;  r<  r   r=  r  r  r   r   r   norm_act_layermid_chsr0  stride_poolstride_1stride_2
dilation_2attn_kwargsr   s                     r^   r   MbConvBlock.__init__  s   ( / !3CNNCMM!RX[XdXde S->->'FcN^N^!^_CNNG4Q;(k+.==sX[XcXckgikDM KKMDM"7777*1'x??f$&,qkK__%#)A;jHj#)A;jH&vP9I9IPRP?$Vss?W?WadalalsprsDIDI&vQ(QbQ#G2r2
&OO	
 KK	
 	
 cnnc**~~%5)@+.+=+=K(-0cN_N_7el1m-nM* >>'U;URTUDM'626DJDG DM'626DJ!#..'O[OBODG&wWWTVW09B),BKKMra   r   r   c                 4    [        [        [        US9U 5        g r&  r1   r   r4  r(  s     r^   r)  MbConvBlock.init_weightse      GJv6=ra   r   c                    U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU R	                  U5      nU R                  U5      nU R                  b  U R                  U5      nU R                  U5      nU R                  b  U R                  U5      nU R                  U5      nU R                  U5      U-   nU$ r[   )r  rC  r  rD  r  rE  rF  r!  r~   rG  r  r]   r   r  s      r^   r   MbConvBlock.forwardh  s    ==#MM!IIaL NN1JJqM NN1==$a AJJqM77
A NN1NN1(ra   )rD  rE  rG  r  r  r  r!  rC  r~   rF  r  r-  )rb   rc   rd   re   rf   r6   rg   r   rj   r   rk   r)  r   r   r   rl   r   r   s   @r^   r9  r9    s    Q (."0"2!PRPR PR 	PR
 CHoPR  PR PR PRd>3 > > %,,  ra   r9  c                      ^  \ rS rSrSrSSSS\" 5       SSSS4	S	\S
\\   S\S\S\\\4   S\S\	S\
4U 4S jjjrS\R                  S\R                  4S jrSrU =r$ )ConvNeXtBlocki  zConvNeXt Block.N   r-   r:  TrC   r;  r<  rt   r   r=  r  conv_mlpr  c           	        > XS.n[         TU ]  5         U=(       d    Un[        UR                  5      nU(       a.  [	        [        UR                  5      UR                  S9n[        nOSUR                  ;   d   e[        n[        nXpl        US:X  a  [        X40 UD6U l        OIX:w  a*  [        R                  " X4SUR                   S.UD6U l        O[        R"                  " 5       U l        UR$                  S;   d   eSu  nnUR$                  S	:X  a  UnOUnUS:X  a  [        X4S
UR&                  0UD6U l        O[        R"                  " 5       U l        [+        UU4UUUS   SUR                   S.UD6U l        U" U40 UD6U l        U" U[1        UR2                  U-  5      4UR                   US.UD6U l        U(       aB  UR6                  (       a  [9        X&R6                  40 UD6O[        R"                  " 5       U l        OAUR6                  (       a  [=        X&R6                  40 UD6O[        R"                  " 5       U l        US:  a  [?        U5      U l         g[        R"                  " 5       U l         g)a)  
Args:
    in_chs: Input channels.
    out_chs: Output channels.
    kernel_size: Kernel size for depthwise conv.
    stride: Stride for conv.
    dilation: Dilation for conv.
    cfg: Convolution block configuration.
    conv_mlp: Whether to use convolutional MLP.
    drop_path: Drop path rate.
r   r  rV   r   r-   )rt   rH   )r   rx   r:  r   rG   T)rt   r   r=  	depthwiserH   )rH   rS   rC   N)!r   r   r   rS   r   r   rU   rY   r   r   r   use_conv_mlpr   r  r   r   rw   r   ry   rz   r  r!   conv_dwr  rg   r?   r  rQ   r   lsr   r   r  )r]   r;  r<  rt   r   r=  r  rZ  r  r   r   r   rS   rU   	mlp_layerrJ  	stride_dwr   s                    r^   r   ConvNeXtBlock.__init__  s   0 /#V!#--0	 !?S\\RJI#..000"JI$Q;(?B?DMIIfa13??a^`aDMKKMDM.000!%Y??f$ KI!$V^s?W?W^[]^DIDI$	
 $a[	
 	
 w-"-	  7*+
 	

 
 FIool7OOBrB[][f[f[hDGDGOOj//@R@Y[YdYdYfDG09B),BKKMra   r   r   c                    U R                  U5      nU R                  U5      nU R                  U5      nU R                  (       a4  U R	                  U5      nU R                  U5      nU R                  U5      nO[UR                  SSSS5      nU R	                  U5      nU R                  U5      nU R                  U5      nUR                  SSSS5      nU R                  U5      U-   nU$ Nr   r   rs   r-   )	r  r  r^  r]  r  r  r_  permuter  rU  s      r^   r   ConvNeXtBlock.forward  s    ==#IIaLLLO		!AA
A		!Q1%A		!AA
A		!Q1%ANN1(ra   )r^  r  r  r_  r  r  r  r]  )rb   rc   rd   re   rf   r6   rg   r   r   ri   rj   r   r   r   r   rl   r   r   s   @r^   rX  rX    s    
 &* (."0"2!!OROR c]OR 	OR
 OR CHoOR  OR OR OR ORb %,,  ra   rX  r   rM   c                 `   U R                   u  p#pE[        X1S   -  S:H  SU SUS    S35        [        XAS   -  S:H  SU SUS    S35        U R                  X#US   -  US   XAS   -  US   U5      n U R                  SSSSS	S
5      R	                  5       R                  SUS   US   U5      nU$ )z'Partition into non-overlapping windows.r   height () must be divisible by window ()r-   width (rs   r      r   r   r   r'   r   re  r   )r   rM   r   r   r   r   windowss          r^   window_partitionro    s    JA!AA!#xs2QR]^_R`Qaab%cdAA!#wqc1PQ\]^Q_P``a%bc	q{1~%{1~qN7JKXYN\]^Aii1aAq)446;;BAP[\]P^`abGNra   rn  img_sizec                     Uu  p4U R                   S   nU R                  SX1S   -  XAS   -  US   US   U5      nUR                  SSSSSS5      R                  5       R                  SX4U5      nU$ )zReverse window partition.r   r   r-   rs   r   rl  r   r   r   re  r   rn  rM   rp  r   r   r   r   s          r^   window_reversert    s     DAbAR!n,aq>.A;q>S^_`SacdeA			!Q1a#..055b!BAHra   rN   c           	      Z   U R                   u  p#pE[        X1S   -  S:H  SU SUS    35        [        XAS   -  S:H  SU SUS    35        U R                  X!S   X1S   -  US   XAS   -  U5      n U R                  SSSSSS	5      R	                  5       R                  S
US   US   U5      nU$ )z6Partition into overlapping windows with grid striding.r   height  must be divisible by grid r-   width r   rl  rs   r   r   rm  )r   rN   r   r   r   r   rn  s          r^   grid_partitionry    s    JA!A!!WQC/J9UV<.#YZA!!VA3.I)TU,#XY	qA,q\ 19Q<PQlARTUVAii1aAq)446;;B	!iXYl\]^GNra   c                     Uu  p4U R                   S   nU R                  SX1S   -  XAS   -  US   US   U5      nUR                  SSSSSS5      R                  5       R                  SX4U5      nU$ )zReverse grid partition.r   r   r-   rs   rl  r   r   rr  rn  rN   rp  r   r   r   r   s          r^   grid_reverser|    s~     DAbARl*A1,=y|YWX\[\]A			!Q1a#..055b!BAHra   r  c                     SnU R                   S:X  a  [        [        XR                  S9nU$ U R                   S:X  a  [        [        US9nU$ U R                   S:X  a  [        [
        US9nU$ )z,Get relative position class based on config.Nr  )rM   
hidden_dimrH   )rM   bias_tf)rI   r   r(   rK   r)   r*   )r  rM   r   s      r^   get_rel_pos_clsr    ss    K
5 i[__]
 	 
		V	#jkB  
		Y	&lDra   c            	       d   ^  \ rS rSrSrS\" 5       SSS4S\S\S\S	\4U 4S
 jjjr	S r
S rSrU =r$ )PartitionAttentionCli  zJGrid or Block partition + Attn + FFN.

NxC 'channels last' tensor layout.
blockrC   Nr   partition_typer  r  c           
      H  > XVS.n[         TU ]  5         [        [        UR                  5      UR
                  S9n[        UR                  5      n	US:H  U l        [        U R                  (       a  UR                  OUR                  5      U l        [        X0R                  5      n
U" U40 UD6U l        [        UU4UR                   UR"                  UR$                  U
UR&                  UR(                  S.UD6U l        UR,                  (       a  [/        U4SUR,                  0UD6O[0        R2                  " 5       U l        US:  a  [7        U5      O[0        R2                  " 5       U l        U" U40 UD6U l        [=        SU[?        XR@                  -  5      U	UR(                  S.UD6U l!        UR,                  (       a  [/        U4SUR,                  0UD6O[0        R2                  " 5       U l"        US:  a  [7        U5      U l#        g [0        R2                  " 5       U l#        g )	Nr   r  r  r<   rH   r=   r   rD   rE   rQ   rC   r  rm   )$r   r   r   r   rW   rY   r   rS   partition_blockr$   rM   rN   partition_sizer  r  r   r<   rB   r=   rD   rE   r   rQ   r   r   r   r  r   r   r!  r   rg   r?   r  r"  r#  r]   r   r  r  r  r   r   r   rU   rS   r   r   s              r^   r   PartitionAttentionCl.__init__   s    /^C,=,=>CLLQ
!#--0	-8'4;O;OUXUbUbc%c+>+>?*r*


 \\~~#mmmm

 

	 JM:cEsE"E^`^i^i^k1:R(9-R[[]*r*
 
&6&6 67	

 
 JM:cEsE"E^`^i^i^k1:R(9-R[[]ra   c                 <   UR                   SS nU R                  (       a  [        XR                  5      nO[	        XR                  5      nU R                  U5      nU R                  (       a  [        X0R                  U5      nU$ [        X0R                  U5      nU$ )Nr-   rs   )r   r  ro  r  ry  r   rt  r|  r]   r   rp  partitioneds       r^   _partition_attn$PartitionAttentionCl._partition_attnL  s    771Q<*1.A.ABK(,?,?@Kii,{,?,?JA  [*=*=xHAra   c           
         XR                  U R                  U R                  U R                  U5      5      5      5      -   nXR	                  U R                  U R                  U R                  U5      5      5      5      -   nU$ r[   r   r  r  r  r#  r"  r  r!  r   s     r^   r   PartitionAttentionCl.forward[  _    )=)=djjm)L MNN$**Q-)@ ABBra   
r   r   r#  r  r"  r  r  r!  r  r  )rb   rc   rd   re   rf   r7   rg   rk   rj   r   r  r   rl   r   r   s   @r^   r  r    sb     #*)>)@!*S*S  *S '	*S
 *S *SX ra   r  c                      ^  \ rS rSrSr\" 5       SSS4S\S\S\4U 4S jjjrS	\	R                  S
\	R                  4S jrS	\	R                  S
\	R                  4S jrSrU =r$ )ParallelPartitionAttentionia  zIExperimental. Grid and Block partition + single FFN.

NxC tensor layout.
rC   Nr   r  r  c           
        > XES.n[         T
U ]  5         US-  S:X  d   e[        [        UR                  5      UR
                  S9n[        UR                  5      nUR                  UR                  :X  d   e[        UR                  5      U l        [        X R                  5      n	U" U40 UD6U l        [        UUS-  4UR                  UR                   UR"                  U	UR$                  UR&                  S.UD6U l        [        UUS-  4UR                  UR                   UR"                  U	UR$                  UR&                  S.UD6U l        UR,                  (       a  [/        U4SUR,                  0UD6O[0        R2                  " 5       U l        US:  a  [7        U5      O[0        R2                  " 5       U l        U" U40 UD6U l        [=        S
U[?        XR@                  -  5      UUUR&                  S.UD6U l!        UR,                  (       a  [/        U4SUR,                  0UD6O[0        R2                  " 5       U l"        US:  a  [7        U5      U l#        g	[0        R2                  " 5       U l#        g	)zj
Args:
    dim: Input dimension.
    cfg: Transformer block configuration.
    drop_path: Drop path rate.
r   r   r   r  r  rQ   rC   )r  r  out_featuresrS   r  Nrm   )$r   r   r   r   rW   rY   r   rS   rM   rN   r$   r  r  r  r   r<   rB   r=   rD   rE   
attn_block	attn_gridrQ   r   r   r   r  r   r   r!  r   rg   r?   r  r"  r#  )r]   r   r  r  r   r   r   rU   rS   r   r   s             r^   r   #ParallelPartitionAttention.__init__g  s    /Qw!||^C,=,=>CLLQ
!#--0	#--///'8%c+>+>?*r*
%1H

 \\~~#mmmm

 

 %1H

 \\~~#mmmm

 

 JM:cEsE"E^`^i^i^k1:R(9-R[[]*r*
 
&6&6 67
 
 JM:cEsE"E^`^i^i^k1:R(9-R[[]ra   r   r   c                 <   UR                   SS n[        XR                  5      nU R                  U5      n[	        X0R                  U5      n[        XR                  5      nU R                  U5      n[        XPR                  U5      n[        R                  " XF/SS9$ )Nr-   rs   r   r   )
r   ro  r  r  rt  ry  r  r|  r   cat)r]   r   rp  partitioned_blockx_windowpartitioned_gridx_grids          r^   r  *ParallelPartitionAttention._partition_attn  s    771Q<,Q0C0CD OO,=>!"35H5H(S)!-@-@A>>*:;.0C0CXNyy(+44ra   c           
         XR                  U R                  U R                  U R                  U5      5      5      5      -   nXR	                  U R                  U R                  U R                  U5      5      5      5      -   nU$ r[   r  r   s     r^   r   "ParallelPartitionAttention.forward  r  ra   )
r  r  r   r#  r  r"  r  r  r!  r  )rb   rc   rd   re   rf   r7   rg   rj   r   r   r   r  r   rl   r   r   s   @r^   r  r  a  s     *?)@!<S<S '<S 	<S <S|5 5%,, 5 %,,  ra   r  c           	      \   U R                   u  p#pE[        XAS   -  S:H  SU SUS    S35        [        XQS   -  S:H  SU SUS    S35        U R                  X#XAS   -  US   XQS   -  US   5      n U R                  SSSSS	S
5      R	                  5       R                  SX1S   US   5      nU$ )z#Partition windows for NCHW tensors.r   rh  ri  rj  r-   rk  r   rl  rs   r   r   rm  )r   rM   r   r   r   r   rn  s          r^   window_partition_nchwr    s    JA!AA!#xs2QR]^_R`Qaab%cdAA!#wqc1PQ\]^Q_P``a%bc	qQa.(+a.!1~:M{[\~^Aii1aAq)446;;Bq>S^_`SabGNra   c           	          Uu  p4U R                   S   nU R                  SX1S   -  XAS   -  XQS   US   5      nUR                  SSSSSS5      R                  5       R                  SXSU5      nU$ )z*Reverse window partition for NCHW tensors.r-   r   r   rs   rl  r   r   rr  rs  s          r^   window_reverse_nchwr    s~     DAaAR!n,aq>.A1RSnVabcVdeA			!Q1a#..055b!BAHra   c           
      X   U R                   u  p#pE[        XAS   -  S:H  SU SUS    35        [        XQS   -  S:H  SU SUS    35        U R                  X#US   XAS   -  US   XQS   -  5      n U R                  SSSSSS	5      R	                  5       R                  S
X1S   US   5      nU$ )z Grid partition for NCHW tensors.r   rv  rw  r-   rx  rs   r   r   rl  r   rm  )r   rN   r   r   r   r   rn  s          r^   grid_partition_nchwr    s    JA!A!!WQC/J9UV<.#YZA!!VA3.I)TU,#XY	qYq\1!#4ilASTDUVAii1aAq)446;;BQ<QZ[\Q]^GNra   c           	          Uu  p4U R                   S   nU R                  SX1S   -  XAS   -  XQS   US   5      nUR                  SSSSSS5      R                  5       R                  SXSU5      nU$ )z(Reverse grid partition for NCHW tensors.r-   r   r   rs   rl  r   r   rr  r{  s          r^   grid_reverse_nchwr    s}     DAaARl*A1,=qA,PYZ[P\]A			!Q1a#..055b!BAHra   c            	          ^  \ rS rSrSrS\" 5       SSS4S\S\S\S	\4U 4S
 jjjr	S\
R                  S\
R                  4S jrS\
R                  S\
R                  4S jrSrU =r$ )PartitionAttention2di  z@Grid or Block partition + Attn + FFN.

'2D' NCHW tensor layout.
r  rC   Nr   r  r  r  c           
      H  > XVS.n[         TU ]  5         [        [        UR                  5      UR
                  S9n[        UR                  5      n	US:H  U l        [        U R                  (       a  UR                  OUR                  5      U l        [        X0R                  5      n
U" U40 UD6U l        [        UU4UR                   UR"                  UR$                  U
UR&                  UR(                  S.UD6U l        UR,                  (       a  [/        U4SUR,                  0UD6O[0        R2                  " 5       U l        US:  a  [7        U5      O[0        R2                  " 5       U l        U" U40 UD6U l        [=        S	U[?        XR@                  -  5      U	UR(                  S.UD6U l!        UR,                  (       a  [/        U4SUR,                  0UD6O[0        R2                  " 5       U l"        US:  a  [7        U5      U l#        g[0        R2                  " 5       U l#        g)
z
Args:
    dim: Input dimension.
    partition_type: Partition type ('block' or 'grid').
    cfg: Transformer block configuration.
    drop_path: Drop path rate.
r   r  r  r  rQ   rC   r  Nrm   )$r   r   r   r   rU   rY   r   rS   r  r$   rM   rN   r  r  r  r   r<   rB   r=   rD   rE   r   rQ   r   r   r   r  r   r   r!  r   rg   r?   r  r"  r#  r  s              r^   r   PartitionAttention2d.__init__  s     /^CNN;N
!#--0	-8'4;O;OUXUbUbc%c+>+>?*r*


 \\~~#mmmm

 

	 LO??<GGBG`b`k`k`m1:R(9-R[[]*r*
 
&6&6 67	

 
 LO??<GGBG`b`k`k`m1:R(9-R[[]ra   r   r   c                 <   UR                   SS  nU R                  (       a  [        XR                  5      nO[	        XR                  5      nU R                  U5      nU R                  (       a  [        X0R                  U5      nU$ [        X0R                  U5      nU$ )Nr   )r   r  r  r  r  r   r  r  r  s       r^   r  $PartitionAttention2d._partition_attn  s    7723</3F3FGK-a1D1DEKii,#K1D1DhOA  "+/B/BHMAra   c           
         XR                  U R                  U R                  U R                  U5      5      5      5      -   nXR	                  U R                  U R                  U R                  U5      5      5      5      -   nU$ r[   r  r   s     r^   r   PartitionAttention2d.forward(  r  ra   r  )rb   rc   rd   re   rf   r7   rg   rk   rj   r   r   r   r  r   rl   r   r   s   @r^   r  r    s     #*)>)@!1S1S  1S '	1S
 1S 1Sf %,,  %,,  ra   r  c                   |   ^  \ rS rSrSrS\" 5       \" 5       SSS4S\S\S\S	\S
\S\4U 4S jjjr	SS jr
S rSrU =r$ )MaxxVitBlocki.  z;MaxVit conv, window partition + FFN , grid partition + FFN.r-   rC   Nr   r   r   r   r   r  c	                 Z  > XxS.n	[         TU ]  5         UR                  U l        UR                  S:X  a  [
        O[        n
U
" X4X4US.U	D6U l        [        SX%US.U	D6nU R                  (       a  [        O[        nUR                  (       a  SOU" S0 UD6U l        U" SSS0UD6U l        g)	a  Initialize MaxxVitBlock.

Args:
    dim: Input channel dimension.
    dim_out: Output channel dimension.
    stride: Stride for downsampling.
    conv_cfg: Configuration for convolutional blocks.
    transformer_cfg: Configuration for transformer blocks.
    drop_path: Drop path rate.
r   r   r   r  r  r   r  r  Nr  gridrm   )r   r   rP   	nchw_attnrq   rX  r9  convdictr  r  rO   r  r  )r]   r   r   r   r   r   r  r   r   r   conv_clsrN  partition_layerr   s                r^   r   MaxxVitBlock.__init__1  s    * /(66$,$7$7:$E=;Sb&R[b_ab	WwyWTVW26...FZ"1"?"?$_EcWbEc(NN+Nra   c                     U R                   b"  [        [        [        US9U R                   5        [        [        [        US9U R                  5        [        [        [
        US9U R                  5        g r&  )r  r1   r   r  r  r4  r  r(  s     r^   r)  MaxxVitBlock.init_weightsR  sM    ??& 1&A4??SG-f=t~~NGJv6		Bra   c                    U R                  U5      nU R                  (       d  UR                  SSSS5      nU R                  b  U R                  U5      nU R	                  U5      nU R                  (       d  UR                  SSSS5      nU$ rd  )r  r  re  r  r  r   s     r^   r   MaxxVitBlock.forwardX  sp    IIaL~~		!Q1%A??&"ANN1~~		!Q1%Ara   )r  r  r  r  r-  )rb   rc   rd   re   rf   r6   r7   rg   rj   r   r)  r   rl   r   r   s   @r^   r  r  .  s}    E '5'75J5L!OO O 	O
 %O 3O O OBC ra   r  c                      ^  \ rS rSrSrSS\" 5       \" 5       SSS4S\S\S	\S
\S\S\S\4U 4S jjjr	SS\
SS4S jjrS\R                  S\R                  4S jrSrU =r$ )ParallelMaxxVitBlockif  zQMaxVit block with parallel cat(window + grid), one FF.

Experimental timm block.
r-   r   rC   Nr   r   r   num_convr   r   r  c
                 *  > XS.n
[         TU ]  5         UR                  S:X  a  [        O[        nUS:  a:  U" X4X5US.U
D6/nX" X"4XWS.U
D6/US-
  -  -  n[
        R                  " U6 U l        OU" X4X5US.U
D6U l        [        SX&US.U
D6U l	        g)	a  
Args:
    dim: Input dimension.
    dim_out: Output dimension.
    stride: Stride for first conv block.
    num_conv: Number of convolution blocks.
    conv_cfg: Convolution block configuration.
    transformer_cfg: Transformer block configuration.
    drop_path: Drop path rate.
r   r   r-   r  )r  r  r  Nrm   )
r   r   rq   rX  r9  r   r  r  r  r   )r]   r   r   r   r  r   r   r  r   r   r   r  convsr   s                r^   r   ParallelMaxxVitBlock.__init__l  s    , /$,$7$7:$E=;a<cc6S\c`bcdEhwYXYVXYZ^fij^jkkEu-DI ffV_fcefDI.k7[dkhjk	ra   r   r   c                     [        [        [        US9U R                  5        [        [        [        US9U R
                  5        g r&  )r1   r   r  r   r4  r  r(  s     r^   r)  !ParallelMaxxVitBlock.init_weights  s-    G-f=tyyIGJv6		Bra   r   c                     U R                  U5      nUR                  SSSS5      nU R                  U5      nUR                  SSSS5      nU$ rd  )r  re  r   r   s     r^   r   ParallelMaxxVitBlock.forward  sI    IIaLIIaAq!IIaLIIaAq!ra   )r   r  r-  )rb   rc   rd   re   rf   r6   r7   rg   rj   r   rk   r)  r   r   r   rl   r   r   s   @r^   r  r  f  s     '5'75J5L! l l  l 	 l
  l % l 3 l  l  lDC3 C C %,,  ra   r  c                      ^  \ rS rSrSrSSSS\" 5       \" 5       SSS4	S	\S
\S\S\S\\\4   S\	\
\\
   4   S\S\S\	\\\   4   4U 4S jjjrS\R                  S\R                  4S jrSrU =r$ )MaxxVitStagei  zEMaxxVit stage consisting of mixed convolution and transformer blocks.r   rl  )   r  r   rC   Nr;  r<  r   depth	feat_sizeblock_typesr   r   r  c           
        > XS.n[         TU ]  5         SU l        [        Xd5      n/ n[	        U5       H  u  pUS:X  a  UOSnUS;   d   eUS:X  a1  UR
                  S:X  a  [        O[        nUU" UU4UUX   S.UD6/-  nOgUS	:X  a$  [        Xu5      nU[        UU4UUUX   S
.UD6/-  nO=US:X  a  U[        UU4UUUX   S.UD6/-  nOUS:X  a  U[        UU4UUUX   S.UD6/-  nUnM     [        R                  " U6 U l        g)az  
Args:
    in_chs: Input channels.
    out_chs: Output channels.
    stride: Stride for first block.
    depth: Number of blocks in stage.
    feat_size: Feature map size.
    block_types: Block types ('C' for conv, 'T' for transformer, etc).
    transformer_cfg: Transformer block configuration.
    conv_cfg: Convolution block configuration.
    drop_path: Drop path rate(s).
r   Fr   r-   )r   r   MPMr   r   r  r   )r   r   r  r  r  )r   r   r   r  r  N)r   r   grad_checkpointingr%   	enumeraterq   rX  r9  r  r  r  r  r   r  blocks)r]   r;  r<  r   r  r  r  r   r   r  r   r   r   r  itblock_strider  r   r   s                      r^   r   MaxxVitStage.__init__  s   4 /"'";6k*DA%&!V6L----Cx,4,?,?:,M=S^8 ( 'l    c-oI- ( +''l    c< (%$3'l    d/ (%$3'l    FY +Z mmV,ra   r   r   c                     U R                   (       a;  [        R                  R                  5       (       d  [	        U R
                  U5      nU$ U R                  U5      nU$ r[   )r  r   jitis_scriptingr2   r  r   s     r^   r   MaxxVitStage.forward  sH    ""599+A+A+C+Ct{{A.A  AAra   )r  r  )rb   rc   rd   re   rf   r7   r6   rg   r   r   rk   rj   r   r   r   r   r   rl   r   r   s   @r^   r  r    s    O )1255J5L'5'735M-M- M- 	M-
 M- S#XM- sE#J/M- 3M- %M- UDK/0M- M-^ %,,  ra   r  c                      ^  \ rS rSrSr        SS\S\S\S\S\S	\S
\S\4U 4S jjjr	SS\SS4S jjr
S\R                  S\R                  4S jrSrU =r$ )Stemi  z"Stem layer for feature extraction.Nr;  r<  rt   r|   rH   rS   rU   rY   c                 H  > XS.n[         TU ]  5         [        U[        [        45      (       d  [        U5      n[        [        Xv5      US9nUS   U l        SU l	        [        XS   U4SXES.UD6U l        U" US   40 UD6U l        [        US   US   U4SXES.UD6U l        g)	a  
Args:
    in_chs: Input channels.
    out_chs: Output channels.
    kernel_size: Kernel size for convolutions.
    padding: Padding mode.
    bias: Whether to use bias.
    act_layer: Activation layer.
    norm_layer: Normalization layer.
    norm_eps: Normalization epsilon.
r   r  r   r   r   )r   r|   rH   r-   N)r   r   r  listtupler$   r   r    r<  r   r!   conv1r  conv2)r]   r;  r<  rt   r|   rH   rS   rU   rY   r   r   r   rH  r   s                r^   r   Stem.__init__  s    0 /'D%=11(G !3J!JPXYr{"61:{o1V]olno
#GAJ5"5
"71:wqz;sqZasprs
ra   r   r   c                 4    [        [        [        US9U 5        g r&  rQ  r(  s     r^   r)  Stem.init_weights  rS  ra   r   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r[   )r  r  r  r   s     r^   r   Stem.forward  s.    JJqMJJqMJJqMra   )r  r  r  r<  r   )rs   r{   FrR   r   r   NNr-  )rb   rc   rd   re   rf   rg   rk   ri   rj   r   r)  r   r   r   rl   r   r   s   @r^   r  r    s    ,  !#+"#t#t #t 	#t
 #t #t #t #t #t #tJ>3 > > %,,  ra   r  c                     U R                   b  U R                  (       d   eU $ US   U R                  -  US   U R                  -  4n[        XUS9n U $ )z>Configure window size based on image size and partition ratio.r   r-   )rM   rN   )rM   rN   rL   r   )r  rp  r  s      r^   cfg_window_sizer  &  sV    
"}}}
a[C$7$77!H[H[9[[N
#^
LCJra   kwargsc           	      j   0 n0 n0 nUR                  5        Ha  u  pVUR                  S5      (       a  XbUR                  SS5      '   M1  UR                  S5      (       a  XcUR                  SS5      '   M]  XdU'   Mc     [        U 4[        U R                  40 UD6[        U R                  40 UD6S.UD6n U $ )z-Overlay keyword arguments onto configuration.transformer_r{   conv_)r   r   )items
startswithr   r   r   )r  r  transformer_kwargsconv_kwargsbase_kwargsr   r   s          r^   _overlay_kwargsr  0  s    KK<<''@Aqyy<=\\'""23		'2./N   3 3J7IJ55 	C Jra   c                     ^  \ rS rSrSr        S)S\S\\\\\4   4   S\S\S\	S	\
S
\
S\4U 4S jjjrS*S\R                  S\	S\	SS4S jjr\R"                  R$                  S\\	   4S j5       r\R"                  R$                  S+S\S\\	\4   4S jj5       r\R"                  R$                  S,S\SS4S jj5       r\R"                  R$                  S\R                  4S j5       rS-S\S\\	   SS4S jjr     S.S\R8                  S\\\\\   4      S\S\S\	S\S\\\R8                     \\R8                  \\R8                     4   4   4S jjr   S/S\\\\   4   S \S!\S\\S"4   4S# jjrS\R8                  S\R8                  4S$ jr S+S\R8                  S%\S\R8                  4S& jjr!S\R8                  S\R8                  4S' jr"S(r#U =r$$ )0r8   iE  zsCoaTNet + MaxVit base model.

Highly configurable for different block compositions, tensor layouts, pooling types.
Nr  rp  in_chansnum_classesglobal_pool	drop_ratedrop_path_rater  c
                   > [         TU ]  5         XS.n[        U5      nU
(       a  [        U40 U
D6n[	        UR
                  U5      nX@l        X0l        XPl        UR                  S   =U l
        U l	        X`l        SU l        / U l        [        SUUR                  UR                   R"                  UR$                  UR                   R&                  UR                   R(                  UR                   R*                  S.UD6U l        U R,                  R.                  nU =R                  [1        U R,                  R2                  SSS9/-  sl        [5        [7        U[        U5      5       VVs/ s H	  u  pX-  PM     snn5      n[9        UR                  5      n[9        UR:                  5      U:X  d   e[=        XqR:                  SS	9nU R,                  R2                  n/ n[?        U5       H  nSnUR                  U   n[5        U Vs/ s H  nUS
-
  U-  S
-   PM     sn5      nU[A        UU4UR:                  U   URB                  U   UR                   UUUU   S.UD6/-  nUU-  nUnU =R                  [1        UUSU 3S9/-  sl        M     [D        RF                  " U6 U l$        [K        [M        UR
                  R(                  5      UR
                  R*                  S9nURN                  (       aW  [D        RP                  " 5       U l)        URN                  U l'        [U        U R                  U4U RN                  UUUS.UD6U l+        OHU R                  U l'        U" U R                  40 UD6U l)        [Y        U R                  U4UUS.UD6U l+        URZ                  S;   d   eURZ                  (       a)  []        [K        U R^                  URZ                  S9U 5        ggs  snnf s  snf )aH  
Args:
    cfg: Model configuration.
    img_size: Input image size.
    in_chans: Number of input channels.
    num_classes: Number of classification classes.
    global_pool: Global pooling type.
    drop_rate: Dropout rate.
    drop_path_rate: Drop path rate.
    **kwargs: Additional keyword arguments to overlay on config.
r   r   F)r;  r<  r|   rH   rS   rU   rY   r   stem)num_chs	reductionr   T)	stagewiser-   )r  r  r   r   r  r  zstages.r  )hidden_sizerG   r  rU   )rG   r  )r{   r   r  r  r   r'  Nrm   )0r   r   r$   r  r  r   r  r  r  r   num_featuresr  r  feature_infor  r   r   r|   r   rS   rU   rY   r  r   r  r<  r  ziplenr   r   ranger  rq   r   r  stagesr   r   r   r   r  r   headr   r   r1   _init_weights)r]   r  rp  r  r  r  r  r   r   r   r  r   r   r   r  sr  
num_stagesdprr;  r  stage_strider<  rfinal_norm_layerr   s                            r^   r   MaxxVit.__init__K  s   0 	/X&!#00C)#*=*=xH& &-0]]2->>DN""' 	
NNLL((ll,,||..\\**	
 	
	 !!d499+<+<RXYZZc(If<M.NO.Nda16.NOP	'
3::*,,,'

dS""z"ALmmA&GINIqA,6:INOI|
 jjmNN1- /#a&
 
 
 
F l"FF$w&SZ[\Z]Q^"_!``# #$ mmV,">#2E2E2P2P#QWZWjWjWsWstDI$'$8$8D!-!! !11%#+ DI %)$5$5D!():):AbADI&!! &#	
 DI "\\\\?? 2 23??KTR m P Os   +O
O%
r   r   r   r   c                     [        US5      (       a   UR                  US9  g g ! [         a    UR                  5          g f = f)Nr)  r'  )hasattrr)  	TypeError)r]   r   r   r   s       r^   r  MaxxVit._init_weights  sG    6>**&##6#2 +  &##%&s   $ A Ac                    ^ U R                  5        V^Vs1 s H$  u  mn[        U4S jS 5       5      (       d  M"  TiM&     snn$ s  snnf )Nc              3   ,   >#    U  H	  oT;   v   M     g 7fr[   rm   ).0nr   s     r^   	<genexpr>*MaxxVit.no_weight_decay.<locals>.<genexpr>  s     S#Ra6#Rs   )relative_position_bias_tablezrel_pos.mlp)named_parametersany)r]   r   _s    ` r^   no_weight_decayMaxxVit.no_weight_decay  sJ     //1U1$!QS#RSS 1U 	U Us
   !AAcoarsec                     [        SSS/S9nU$ )Nz^stem)z^stages\.(\d+)N)z^norm)i )r  r  )r  )r]   r&  matchers      r^   group_matcherMaxxVit.group_matcher  s    -/CD
 ra   enablec                 6    U R                    H	  nXl        M     g r[   )r  r  )r]   r+  r  s      r^   set_grad_checkpointingMaxxVit.set_grad_checkpointing  s    A#)  ra   c                 .    U R                   R                  $ r[   )r  fcr\   s    r^   get_classifierMaxxVit.get_classifier  s    yy||ra   c                 F    Xl         U R                  R                  X5        g r[   )r  r  reset)r]   r  r  s      r^   reset_classifierMaxxVit.reset_classifier  s    &		1ra   r   indicesr  
stop_early
output_fmtintermediates_onlyc                 P   US;   d   S5       e/ n[        [        U R                  5      S-   U5      u  pSn
U R                  U5      nX;   a  UR	                  U5        [        U R                  5      n[
        R                  R                  5       (       d  U(       d  U R                  nOU R                  SU	 nU HH  nU
S-  n
U" U5      nX;   d  M  U(       a  X:X  a  U R                  U5      nOUnUR	                  U5        MJ     U(       a  U$ X:X  a  U R                  U5      nX4$ )a  Forward features that returns intermediates.

Args:
    x: Input image tensor
    indices: Take last n blocks if int, all if None, select matching indices if sequence
    norm: Apply norm layer to compatible intermediates
    stop_early: Stop iterating over blocks when last desired intermediate hit
    output_fmt: Shape of intermediate feature outputs
    intermediates_only: Only return intermediate features
Returns:

)NCHWzOutput shape must be NCHW.r-   r   N)	r/   r
  r  r  appendr   r  r  r  )r]   r   r7  r  r8  r9  r:  intermediatestake_indices	max_indexfeat_idxlast_idxr  stagex_inters                  r^   forward_intermediatesMaxxVit.forward_intermediates  s   * Y&D(DD&"6s4;;7G!7KW"U IIaL#  #t{{#99!!##:[[F[[),FEMHaA'H0"iilGG$$W-    		!Ara   
prune_norm
prune_head.c                     [        [        U R                  5      S-   U5      u  pEU R                  SU U l        U(       a  [        R                  " 5       U l        U(       a  U R                  SS5      U l        U$ )z6Prune layers not required for specified intermediates.r-   Nr   r{   )r/   r
  r  r   r   r  r5  r  )r]   r7  rG  rH  r?  r@  s         r^   prune_intermediate_layers!MaxxVit.prune_intermediate_layers  s`     #7s4;;7G!7KW"Ukk*9-DI--a4DIra   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r[   )r  r  r  r   s     r^   forward_featuresMaxxVit.forward_features  s.    IIaLKKNIIaLra   
pre_logitsc                 P    U(       a  U R                  XS9$ U R                  U5      $ )N)rO  )r  )r]   r   rO  s      r^   forward_headMaxxVit.forward_head#  s#    6@tyyy2RdiiPQlRra   c                 J    U R                  U5      nU R                  U5      nU$ r[   )rM  rQ  r   s     r^   r   MaxxVit.forward&  s'    !!!$a ra   )r  r   r  r  r  r  r   r  r  r  r  r  r  )   rs     r   rC   rC   NNr-  F)Tr[   )NFFr<  F)r-   FT)%rb   rc   rd   re   rf   r5   r   rg   r   rk   rj   r   r   r   Moduler  r   r  ignorer   r$  ri   r
   r)  r-  r1  r   r5  r   r   rE  rJ  rM  rQ  r   rl   r   r   s   @r^   r8   r8   E  s    58#$!$&iSiS CsCx01iS 	iS
 iS iS iS "iS iS iSV&BII &S &# &t & YYUS U U
 YYD T#s(^   YY*T *T * * YY		  2C 2hsm 2W[ 2 8<$$',4 ||4  eCcN344  	4 
 4  4  !%4  
tELL!5tELL7I)I#JJ	K4 p ./$#	3S	>*  	
 
sCx%,, 5<< Sell S S S %,,  ra   r8   r   FTrH   rJ   ry   rG   conv_output_biasconv_attn_earlyconv_attn_act_layerconv_norm_layertransformer_shortcut_biastransformer_norm_layertransformer_norm_layer_clrQ   rI   rK   c                 P    [        [        U USSUUUSUS9	[        SUUU	UUU
US9S9$ )a;  RW variant configuration for CoAtNet models.

These models were created and trained before seeing https://github.com/google-research/maxvit

Common differences for initial timm models:
  - pre-norm layer in MZBConv included an activation after norm
  - mbconv expansion calculated from input instead of output chs
  - mbconv shortcut and final 1x1 conv did not have a bias
  - SE act layer was relu, not silu
  - mbconv uses silu in timm, not gelu
  - expansion in attention block done via output proj, not input proj

Variable differences (evolved over training initial models):
  - avg pool with kernel_size=2 favoured downsampling (instead of maxpool for coat)
  - SE attention was between conv2 and norm/act
  - default to avg pool for mbconv downsample instead of 1x1 or dw conv
  - transformer block shortcut has no bias
TFr   )	ry   rG   rv   rr   rw   r}   r   rS   rU   )r@   rA   rG   rQ   rU   rW   rI   rK   r   r   r  r6   r7   )ry   rG   rZ  r[  r\  r]  r^  r_  r`  rQ   rI   rK   s               r^   _rw_coat_cfgrd  ,  sW    @ #(&.&

 .3#-3%#	
 ra   rx   conv_attn_ratior<   c                 N    [        [        U USUUSUS9[        SUUUU	UUU
US9	S9$ )a  RW variant configuration for MaxViT models.

These models were created and trained before seeing https://github.com/google-research/maxvit

Differences of initial timm models:
  - mbconv expansion calculated from input instead of output chs
  - mbconv shortcut and final 1x1 conv did not have a bias
  - mbconv uses silu in timm, not gelu
  - expansion in attention block done via output proj, not input proj
Fr   )ry   rG   rr   rw   r   rS   rU   )	r@   rG   r<   rM   rQ   rU   rW   rI   rK   rb  rc  )ry   rG   rZ  re  r]  r_  r`  rM   r<   rQ   rI   rK   s               r^   _rw_max_cfgrg  e  sS    0 #(&&
 .##-3%#

 ra   rX   r  conv_norm_layer_clrO   c                 p    [        U5      n[        [        SU USUS   UUS9[        SUUUUS   UUU	U
S9	S9$ )z=Configuration for experimental ConvNeXt-based MaxxViT models.r   Fr   )rq   ry   rG   rr   rQ   rU   rW   r-   )	r@   rG   rM   rO   rQ   rU   rW   rI   rK   rb  )r$   r  r6   r7   )ry   rG   r]  rh  r_  r`  rM   rO   rQ   rI   rK   s              r^   	_next_cfgrj    se     K(K!##A&,
 .#'#A-3%#

 ra   c            
      <    [        [        SSSS9[        SSSSS9S	9$ )
z0Configuration matching TensorFlow MaxViT models.gMbP?	gelu_tanhsame)rY   rS   r|   r   Fr  )rY   rS   r=   rI   rb  rc  rm   ra   r^   _tf_cfgrn    s6    !

 .!"	
 ra   coatnet_pico_rw)r         rJ   r   )r;   r   )r   r   r   r   )rZ  re  coatnet_nano_rw)rs   rl     rs   )ry   rZ  re  coatnet_0_rwr   )r   rs   rY  r   )r[  r^  coatnet_1_rw)r   rs  r  r   )ry   r[  r^  coatnet_2_rw)rp  rq  rJ      )r   rp  r   )ry   r\  coatnet_3_rw)r   r   r      )r   r   )ry   r\  rQ   coatnet_bn_0_rwr   )ry   r[  r^  r_  coatnet_rmlp_nano_rwr   )rZ  re  rI   rK   coatnet_rmlp_0_rw)ry   rI   coatnet_rmlp_1_rwr   )rG   r[  r^  rI   rK   coatnet_rmlp_1_rw2)ry   rI   rK   coatnet_rmlp_2_rw)ry   r\  rQ   rI   coatnet_rmlp_3_rwcoatnet_nano_cc)r   r   r   r   r  )r   r   r   rq   coatnext_nano_rwr   )r   r   r   r   )r   N)rI   rQ   	coatnet_0r   r   )r   r   r   r   	coatnet_1	coatnet_2rp  rw  	coatnet_3r   ry  	coatnet_4)r         r   	coatnet_5)rq  rJ         r  maxvit_pico_rw)r;   r   rp  rq  )r   r   r   r   )r  r  r  r  )   r;   )r   r   rq   r   maxvit_nano_rw)r-   r   rs   r-   maxvit_tiny_rwmaxvit_tiny_pm)r  r  r  r  maxvit_rmlp_pico_rw)rI   maxvit_rmlp_nano_rwmaxvit_rmlp_tiny_rwmaxvit_rmlp_small_rwmaxvit_rmlp_base_rw)r   r   rq   r   r   maxxvit_rmlp_nano_rw)r   r   rq   r   r   maxxvit_rmlp_tiny_rwmaxxvit_rmlp_small_rw)0   r   maxxvitv2_nano_rw)rO   rI   maxxvitv2_rmlp_base_rw)r   rs  r  r   )rO   maxxvitv2_rmlp_large_rw)   i@  i  r  )r   rs     r   )P   r  r  maxvit_tiny_tf)r   r   rq   r   r   r   maxvit_small_tfmaxvit_base_tfmaxvit_large_tfmaxvit_xlarge_tf
state_dictmodelc                 ~   UR                  5       n0 nU R                  5        GH  u  pEUR                  S5      (       a  UR                  USS 5      nUR                  UR
                  R                  :w  d   UR                  S   UR                  S   :w  a)  [        UUR                  UR
                  R                  S9nXB;   ao  UR                  X$   R                  :w  aS  UR                  5       X$   R                  5       :X  a/  UR                  S;   d   eUR                  X$   R                  5      nXSU'   GM     U$ )z/Filter checkpoint state dict for compatibility.r   Nir   r-   )new_window_sizenew_bias_shape)r   rl  )r  r  endswithget_submoduler   r   rM   r,   ndimnumelr   )r  r  model_state_dictout_dictr   r   ms          r^   checkpoint_filter_fnr  >  s   '')H  "::455##AdsG,Aww!88>>>!--PQBRVWVcVcdeVfBf-$%MM#$#A#A#G#G  QVV/?/B/G/G%GAGGIYiYlYrYrYtLt66V###		*-334A # Ora   variantcfg_variant
pretrainedc                     Uc0  U [         ;   a  U nO#SR                  U R                  S5      SS 5      n[        [        X4[         U   [        SS9[        S.UD6$ )zCreate a MaxxVit model variant.Nr#  r   T)flatten_sequential)	model_cfgfeature_cfgpretrained_filter_fn)
model_cfgsjoinsplitr.   r8   r  r  )r  r  r  r  s       r^   _create_maxxvitr  T  sg    j !K((7==#5cr#:;K[)D11	
  ra   urlc                 $    U SSSSSSSSSS	S
S.UE$ )z$Create a default configuration dict.rV  )rs   rU  rU  )rY  rY  ffffff?bicubic)      ?r  r  z
stem.conv1zhead.fcTz
apache-2.0)r  r  
input_size	pool_sizecrop_pctinterpolationmeanr  
first_conv
classifierfixed_input_sizelicenserm   )r  r  s     r^   _cfgr  c  s7     4}SY9")  $* ra   zcoatnet_pico_rw_224.untrained)r  zcoatnet_nano_rw_224.sw_in1kztimm/zyhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnet_nano_rw_224_sw-f53093b4.pthg?)	hf_hub_idr  r  zcoatnet_0_rw_224.sw_in1kzvhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnet_0_rw_224_sw-a6439706.pth)r  r  zcoatnet_1_rw_224.sw_in1kzvhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnet_1_rw_224_sw-5cae1ea8.pthz!coatnet_2_rw_224.sw_in12k_ft_in1k)r  z'coatnet_rmlp_1_rw2_224.sw_in12k_ft_in1kz&coatnet_rmlp_2_rw_224.sw_in12k_ft_in1kz&coatnet_rmlp_2_rw_384.sw_in12k_ft_in1k)rs   r   r   )r  r  g      ?squash)r  r  r  r  	crop_modezcoatnet_bn_0_rw_224.sw_in1kzyhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnet_bn_0_rw_224_sw-c228e218.pthr  )r  r  r  r  r  z coatnet_rmlp_nano_rw_224.sw_in1kz~https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnet_rmlp_nano_rw_224_sw-bd1d51b3.pthzcoatnet_rmlp_0_rw_224.untrainedzcoatnet_rmlp_1_rw_224.sw_in1kz{https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnet_rmlp_1_rw_224_sw-9051e6c3.pthzcoatnet_rmlp_2_rw_224.sw_in1kz{https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnet_rmlp_2_rw_224_sw-5ccfac55.pthzcoatnet_rmlp_3_rw_224.untrainedzcoatnet_nano_cc_224.untrainedzcoatnext_nano_rw_224.sw_in1kzzhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnext_nano_rw_224_ad-22cb71c2.pthzcoatnet_2_rw_224.sw_in12ki-.  )r  r  zcoatnet_3_rw_224.sw_in12kzcoatnet_rmlp_1_rw2_224.sw_in12kzcoatnet_rmlp_2_rw_224.sw_in12kzcoatnet_0_224.untrainedzcoatnet_1_224.untrainedzcoatnet_2_224.untrainedzcoatnet_3_224.untrainedzcoatnet_4_224.untrainedzcoatnet_5_224.untrainedzmaxvit_pico_rw_256.untrained)rs   rq  rq  )   r  )r  r  r  zmaxvit_nano_rw_256.sw_in1kzxhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/maxvit_nano_rw_256_sw-fb127241.pth)r  r  r  r  zmaxvit_tiny_rw_224.sw_in1kzxhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/maxvit_tiny_rw_224_sw-7d0dffeb.pthzmaxvit_tiny_rw_256.untrainedzmaxvit_tiny_pm_256.untrainedzmaxvit_rmlp_pico_rw_256.sw_in1kz}https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/maxvit_rmlp_pico_rw_256_sw-8d82f2c6.pthzmaxvit_rmlp_nano_rw_256.sw_in1kz}https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/maxvit_rmlp_nano_rw_256_sw-c17bb0d6.pthzmaxvit_rmlp_tiny_rw_256.sw_in1kz}https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/maxvit_rmlp_tiny_rw_256_sw-bbef0ff5.pthz maxvit_rmlp_small_rw_224.sw_in1kz~https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/maxvit_rmlp_small_rw_224_sw-6ef0ae4f.pthz"maxvit_rmlp_small_rw_256.untrainedz(maxvit_rmlp_base_rw_224.sw_in12k_ft_in1kz(maxvit_rmlp_base_rw_384.sw_in12k_ft_in1kz maxvit_rmlp_base_rw_224.sw_in12kz maxxvit_rmlp_nano_rw_256.sw_in1kz~https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/maxxvit_rmlp_nano_rw_256_sw-0325d459.pthz"maxxvit_rmlp_tiny_rw_256.untrainedz!maxxvit_rmlp_small_rw_256.sw_in1kzhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/maxxvit_rmlp_small_rw_256_sw-37e217ff.pthzmaxxvitv2_nano_rw_256.sw_in1k)r  r  r  z+maxxvitv2_rmlp_base_rw_224.sw_in12k_ft_in1kz+maxxvitv2_rmlp_base_rw_384.sw_in12k_ft_in1kz%maxxvitv2_rmlp_large_rw_224.untrainedz#maxxvitv2_rmlp_base_rw_224.sw_in12kzmaxvit_tiny_tf_224.in1k)r  r  r  zmaxvit_tiny_tf_384.in1kzmaxvit_tiny_tf_512.in1k)rs   rJ   rJ   )r  r  zmaxvit_small_tf_224.in1kzmaxvit_small_tf_384.in1kzmaxvit_small_tf_512.in1kzmaxvit_base_tf_224.in1kzmaxvit_base_tf_384.in1kzmaxvit_base_tf_512.in1kzmaxvit_large_tf_224.in1kzmaxvit_large_tf_384.in1kzmaxvit_large_tf_512.in1kzmaxvit_base_tf_224.in21kiSU  z maxvit_base_tf_384.in21k_ft_in1kz maxvit_base_tf_512.in21k_ft_in1kzmaxvit_large_tf_224.in21kz!maxvit_large_tf_384.in21k_ft_in1kz!maxvit_large_tf_512.in21k_ft_in1k)r  r  r  r  zmaxvit_xlarge_tf_224.in21kz"maxvit_xlarge_tf_384.in21k_ft_in1kz"maxvit_xlarge_tf_512.in21k_ft_in1kc                     [        SSU 0UD6$ )z)CoatNet Pico model with RW configuration.r  )coatnet_pico_rw_224r  r  r  s     r^   r  r  J	       RZR6RRra   c                     [        SSU 0UD6$ )z)CoatNet Nano model with RW configuration.r  )coatnet_nano_rw_224r  r  s     r^   r  r  P	  r  ra   c                     [        SSU 0UD6$ )z&CoatNet-0 model with RW configuration.r  )coatnet_0_rw_224r  r  s     r^   r  r  V	       O*OOOra   c                     [        SSU 0UD6$ )z&CoatNet-1 model with RW configuration.r  )coatnet_1_rw_224r  r  s     r^   r  r  \	  r  ra   c                     [        SSU 0UD6$ )z&CoatNet-2 model with RW configuration.r  )coatnet_2_rw_224r  r  s     r^   r  r  b	  r  ra   c                     [        SSU 0UD6$ )z&CoatNet-3 model with RW configuration.r  )coatnet_3_rw_224r  r  s     r^   r  r  h	  r  ra   c                     [        SSU 0UD6$ )z4CoatNet-0 model with BatchNorm and RW configuration.r  )coatnet_bn_0_rw_224r  r  s     r^   r  r  n	  r  ra   c                     [        SSU 0UD6$ )z.CoatNet Nano model with Relative Position MLP.r  )coatnet_rmlp_nano_rw_224r  r  s     r^   r  r  t	       W*WPVWWra   c                     [        SSU 0UD6$ )z+CoatNet-0 model with Relative Position MLP.r  )coatnet_rmlp_0_rw_224r  r  s     r^   r  r  z	       TzTVTTra   c                     [        SSU 0UD6$ )z+CoatNet-1 model with Relative Position MLP.r  )coatnet_rmlp_1_rw_224r  r  s     r^   r  r  	  r  ra   c                     [        SSU 0UD6$ )z.CoatNet-1 model with Relative Position MLP v2.r  )coatnet_rmlp_1_rw2_224r  r  s     r^   r  r  	  s     U
UfUUra   c                     [        SSU 0UD6$ )z+CoatNet-2 model with Relative Position MLP.r  )coatnet_rmlp_2_rw_224r  r  s     r^   r  r  	  r  ra   c                     [        SSU 0UD6$ )z6CoatNet-2 model with Relative Position MLP at 384x384.r  )coatnet_rmlp_2_rw_384r  r  s     r^   r  r  	  r  ra   c                     [        SSU 0UD6$ )z+CoatNet-3 model with Relative Position MLP.r  )coatnet_rmlp_3_rw_224r  r  s     r^   r  r  	  r  ra   c                     [        SSU 0UD6$ )z(CoatNet Nano model with ConvNeXt blocks.r  )coatnet_nano_cc_224r  r  s     r^   r  r  	  r  ra   c                     [        SSU 0UD6$ )z*CoAtNeXt Nano model with RW configuration.r  )coatnext_nano_rw_224r  r  s     r^   r  r  	  s     SjSFSSra   c                     [        SSU 0UD6$ )zCoatNet-0 model.r  )coatnet_0_224r  r  s     r^   r  r  	       LzLVLLra   c                     [        SSU 0UD6$ )zCoatNet-1 model.r  )coatnet_1_224r  r  s     r^   r  r  	  r  ra   c                     [        SSU 0UD6$ )zCoatNet-2 model.r  )coatnet_2_224r  r  s     r^   r  r  	  r  ra   c                     [        SSU 0UD6$ )zCoatNet-3 model.r  )coatnet_3_224r  r  s     r^   r  r  	  r  ra   c                     [        SSU 0UD6$ )zCoatNet-4 model.r  )coatnet_4_224r  r  s     r^   r  r  	  r  ra   c                     [        SSU 0UD6$ )zCoatNet-5 model.r  )coatnet_5_224r  r  s     r^   r  r  	  r  ra   c                     [        SSU 0UD6$ )z(MaxViT Pico model with RW configuration.r  )maxvit_pico_rw_256r  r  s     r^   r  r  	       QJQ&QQra   c                     [        SSU 0UD6$ )z(MaxViT Nano model with RW configuration.r  )maxvit_nano_rw_256r  r  s     r^   r
  r
  	  r  ra   c                     [        SSU 0UD6$ )z(MaxViT Tiny model with RW configuration.r  )maxvit_tiny_rw_224r  r  s     r^   r  r  	  r  ra   c                     [        SSU 0UD6$ )z3MaxViT Tiny model with RW configuration at 256x256.r  )maxvit_tiny_rw_256r  r  s     r^   r  r  	  r  ra   c                     [        SSU 0UD6$ )z3MaxViT Relative Position MLP Pico RW 256x256 model.r  )maxvit_rmlp_pico_rw_256r  r  s     r^   r  r  	       VVvVVra   c                     [        SSU 0UD6$ )z3MaxViT Relative Position MLP Nano RW 256x256 model.r  )maxvit_rmlp_nano_rw_256r  r  s     r^   r  r  	  r  ra   c                     [        SSU 0UD6$ )z3MaxViT Relative Position MLP Tiny RW 256x256 model.r  )maxvit_rmlp_tiny_rw_256r  r  s     r^   r  r  	  r  ra   c                     [        SSU 0UD6$ )z4MaxViT Relative Position MLP Small RW 224x224 model.r  )maxvit_rmlp_small_rw_224r  r  s     r^   r  r  	  r  ra   c                     [        SSU 0UD6$ )z9MaxViT Small model with Relative Position MLP at 256x256.r  )maxvit_rmlp_small_rw_256r  r  s     r^   r  r  	  r  ra   c                     [        SSU 0UD6$ )z-MaxViT Base model with Relative Position MLP.r  )maxvit_rmlp_base_rw_224r  r  s     r^   r  r  
  r  ra   c                     [        SSU 0UD6$ )z8MaxViT Base model with Relative Position MLP at 384x384.r  )maxvit_rmlp_base_rw_384r  r  s     r^   r  r  

  r  ra   c                     [        SSU 0UD6$ )z'MaxViT Tiny model with parallel blocks.r  )maxvit_tiny_pm_256r  r  s     r^   r  r  
  r  ra   c                     [        SSU 0UD6$ )z4MaxxViT Relative Position MLP Nano RW 256x256 model.r  )maxxvit_rmlp_nano_rw_256r  r  s     r^   r!  r!  
  r  ra   c                     [        SSU 0UD6$ )z.MaxxViT Tiny model with Relative Position MLP.r  )maxxvit_rmlp_tiny_rw_256r  r  s     r^   r#  r#  
  r  ra   c                     [        SSU 0UD6$ )z/MaxxViT Small model with Relative Position MLP.r  )maxxvit_rmlp_small_rw_256r  r  s     r^   r%  r%  "
  s     X:XQWXXra   c                     [        SSU 0UD6$ )zMaxxViT-V2 Nano model.r  )maxxvitv2_nano_rw_256r  r  s     r^   r'  r'  (
  r  ra   c                     [        SSU 0UD6$ )z1MaxxViT-V2 Base model with Relative Position MLP.r  )maxxvitv2_rmlp_base_rw_224r  r  s     r^   r)  r)  .
       YJYRXYYra   c                     [        SSU 0UD6$ )z<MaxxViT-V2 Base model with Relative Position MLP at 384x384.r  )maxxvitv2_rmlp_base_rw_384r  r  s     r^   r,  r,  4
  r*  ra   c                     [        SSU 0UD6$ )z2MaxxViT-V2 Large model with Relative Position MLP.r  )maxxvitv2_rmlp_large_rw_224r  r  s     r^   r.  r.  :
  s     ZZZSYZZra   c                     [        SSU 0UD6$ )z"MaxViT Tiny model from TensorFlow.r  )maxvit_tiny_tf_224r  r  r  s     r^   r0  r0  @
       cjc\bccra   c                     [        SSU 0UD6$ )z-MaxViT Tiny model from TensorFlow at 384x384.r  )maxvit_tiny_tf_384r  r  r  s     r^   r3  r3  F
  r1  ra   c                     [        SSU 0UD6$ )z-MaxViT Tiny model from TensorFlow at 512x512.r  )maxvit_tiny_tf_512r  r  r  s     r^   r5  r5  L
  r1  ra   c                     [        SSU 0UD6$ )z#MaxViT Small model from TensorFlow.r  )maxvit_small_tf_224r  r  r  s     r^   r7  r7  R
       ePZe^deera   c                     [        SSU 0UD6$ )z.MaxViT Small model from TensorFlow at 384x384.r  )maxvit_small_tf_384r  r  r  s     r^   r:  r:  X
  r8  ra   c                     [        SSU 0UD6$ )z.MaxViT Small model from TensorFlow at 512x512.r  )maxvit_small_tf_512r  r  r  s     r^   r<  r<  ^
  r8  ra   c                     [        SSU 0UD6$ )z"MaxViT Base model from TensorFlow.r  )maxvit_base_tf_224r  r  r  s     r^   r>  r>  d
  r1  ra   c                     [        SSU 0UD6$ )z-MaxViT Base model from TensorFlow at 384x384.r  )maxvit_base_tf_384r  r  r  s     r^   r@  r@  j
  r1  ra   c                     [        SSU 0UD6$ )z-MaxViT Base model from TensorFlow at 512x512.r  )maxvit_base_tf_512r  r  r  s     r^   rB  rB  p
  r1  ra   c                     [        SSU 0UD6$ )z#MaxViT Large model from TensorFlow.r  )maxvit_large_tf_224r  r  r  s     r^   rD  rD  v
  r8  ra   c                     [        SSU 0UD6$ )z.MaxViT Large model from TensorFlow at 384x384.r  )maxvit_large_tf_384r  r  r  s     r^   rF  rF  |
  r8  ra   c                     [        SSU 0UD6$ )z.MaxViT Large model from TensorFlow at 512x512.r  )maxvit_large_tf_512r  r  r  s     r^   rH  rH  
  r8  ra   c                     [        SSU 0UD6$ )z$MaxViT XLarge model from TensorFlow.r  )maxvit_xlarge_tf_224r  r  r  s     r^   rJ  rJ  
       gR\g`fggra   c                     [        SSU 0UD6$ )z/MaxViT XLarge model from TensorFlow at 384x384.r  )maxvit_xlarge_tf_384r  r  r  s     r^   rM  rM  
  rK  ra   c                     [        SSU 0UD6$ )z/MaxViT XLarge model from TensorFlow at 512x512.r  )maxvit_xlarge_tf_512r  r  r  s     r^   rO  rO  
  rK  ra   r-  )r   rF   FFrelur{   TrT   rV   NrH   rJ   )rx   rF   Fg      ?r{   rT   rV   Nr;   NrH   rJ   )rx   rF   rT   rV   rT   rV   NFrX   r  rJ   rm   )NFrW  )rf   r1  collectionsr   dataclassesr   r   r   	functoolsr   typingr   r	   r
   r   r   r   r   r   r   r   	torch.jitr   	timm.datar   r   timm.layersr   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   _builderr.   	_featuresr/   _features_fxr0   _manipulater1   r2   	_registryr3   r4   __all__r7   r6   r5   rX  r   r   r   rk   r  r  r4  rg   r7  r9  rX  r   ro  rt  ry  r|  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r8   ri   rj   rd  rg  rj  rn  r  r  r  r  r  default_cfgsr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r
  r  r  r  r  r  r  r  r  r  r  r!  r#  r%  r'  r)  r,  r.  r0  r3  r5  r7  r:  r<  r>  r@  rB  rD  rF  rH  rJ  rM  rO  rm   ra   r^   <module>r_     s   "H  # 1 1  I I I    A      6 + + 3 4 <
N 2 2 2B !P !P !PH 
! 
! 
!Q")) QhR")) Rj/299 /d0bii 0s 0C 0 02N Nb,ryy , ,S ,$ ,0&8C= &C &C &l")) l^dBII dN 49   ELL tCy DQTI Z_ZfZf  ell tCy U\\  %,, 49 S	 V[VbVb  	. 	U38_ 	QYZbQc 	D299 DNT TnU\\ S	 ell   DI QUVYQZ _d_k_k  5<< DI %,,  u|| S	 TRUY [`[g[g  K299 K\5299 5p1299 1hW299 Wt/299 /d. %S/ Nc  s z *dbii dP "!& %#)!*.&3)4'+"666 6 	6
 !6 6 $(6 !$6 $'6 e_6 6 6 
#s(^6t  !&!'!&3)415'+"--- - 	-
 - !$- $'- eCHo.- - e_- - - 
#s(^-b  ,"-&3)415#9=!$$$ $  	$
 !$$ $'$ eCHo.$ $ 5%u"556$ $ $ 
#s(^$Nc3h "  n % ! 
	n  	%	 ! 
		n*  %  &+
	+n<  	%	  &+
		=nP  	'	  &
		Qnd  	'	  &
		en|  
%
  &+#0	
	
}nR $ 
%
 ! 	
	
Snh ! % 
	inz ! %  &+
	{nR " 	%	 
		Snf ! 
'
  &	
	
gn| ! 
'
  &	
	
}nT  %5	
 .Unb   	%		
 $
	cnz %	{nF %	GnR '	Sn^ '	_nj '	knv (	wnF  $	
 -GnT  %	
 -Unb  %	
 -cnp  %	
 -qn@ # $	
 5
)AnN # %	
 5
)On\ # %	
 5
)]nj $ 	%		
 
	kn~ # 	%	 
	nT $ % +Und $ %	
 +enr % %	
 +snB	 ! 
%
 

C	nX	 & '	
 
Y	nj	 ' 	'	 
	k	nB
  % )C
nT
  % )U
nf
  % )g
nx
  ' )y
nJ   ' )Kn
bT#u||*;%< RYY SWX[]b]i]iXiSj ,S x} QU il qx 	c 	# 	$sCx. 	 % X&#Tb\X& "4 H$X&  E!FX&  E!X&  (*!X&* .t0+X&. -d//X&2 -d Hsh/X3X&< "4 H"(<	$=X&F ' M)GX&N &t|OX&P $T J&KQX&V $T J&KWX&\ &t|]X&^ $Tb\_X&` #D I%aX&l  "mX&r  "sX&x &t(yX&~ %d'X&H t|IX&J t|KX&L t|MX&N t|OX&P t|QX&R t|SX&X #DRMU[$\YX&Z !$ G F#4[X&b !$ G#HcX&h #D F%4iX&n #DRMU[$\oX&t &t L F(4uX&| &t L F(4}X&D &t L F(4EX&L ' M)MX&V )$ F+4WX&` /1aX&f / Hsh1XgX&p ')qX&| ' M F)4}X&D )$2-[a*bEX&F ( N F*4GX&R $T F&4SX&X 244YX&\ 24 Hsh4X]X&b ,Tb\cX&f *4,gX&p t"(< >qX&v t Hsh XwX&| t Hsh X}X&B "(<!>CX&H  Hsh!XIX&N  Hsh!XOX&T t"(< >UX&Z t Hsh X[X&` t Hsh XaX&f "(<!>gX&l  Hsh!XmX&r  Hsh!XsX&z !{X&@ ' Hsh)XAX&F ' Hsh)XGX&L  "MX&R ( Hsh*XSX&X ( 3(*DYX&^ !$#_X&d )$ Hsh+XeX&j )$ Hsh+XkX& Xv SD SC SG S S
 SD SC SG S S
 P P P P P
 P P P P P
 P P P P P
 P P P P P
 SD SC SG S S
 X X X X X
 Ud Uc Ug U U
 Ud Uc Ug U U
 Vt Vs Vw V V
 Ud Uc Ug U U
 Ud Uc Ug U U
 Ud Uc Ug U U
 SD SC SG S S
 TT TS TW T T
 Md Mc Mg M M
 Md Mc Mg M M
 Md Mc Mg M M
 Md Mc Mg M M
 Md Mc Mg M M
 Md Mc Mg M M
 R4 R3 R7 R R
 R4 R3 R7 R R
 R4 R3 R7 R R
 R4 R3 R7 R R
 W W W W W
 W W W W W
 W W W W W
 X X X X X
 X X X X X
 W W W W W
 W W W W W
 R4 R3 R7 R R
 X X X X X
 X X X X X
 Y$ Y# Y' Y Y
 Ud Uc Ug U U
 Z4 Z3 Z7 Z Z
 Z4 Z3 Z7 Z Z
 [D [C [G [ [
 d4 d3 d7 d d
 d4 d3 d7 d d
 d4 d3 d7 d d
 fD fC fG f f
 fD fC fG f f
 fD fC fG f f
 d4 d3 d7 d d
 d4 d3 d7 d d
 d4 d3 d7 d d
 fD fC fG f f
 fD fC fG f f
 fD fC fG f f
 hT hS hW h h
 hT hS hW h h
 hT hS hW h hra   