
    RЦi              
          S r SSKrSSKrSSKJrJr  SSKJrJrJ	r	J
r
  SSKrSSKrSSKJr  SSKJs  Jr  SSKJrJrJrJrJrJrJrJr  SSKJr  SSKJr  SSK J!r!  SS	K"J#r#J$r$  S
SK%J&r&J'r'  SS/r(Sr)Sr*S\+S\+S\\+   4S jr,  S>S\+S\-S\R\                  4S jjr/  S>S\+S\-S\R\                  4S jjr0 " S S\Rb                  5      r2 " S S\Rb                  5      r3S?S\+4S jjr4 " S S\Rb                  5      r5 " S  S!\Rb                  5      r6 " S" S#\Rb                  5      r7 " S$ S%\Rb                  5      r8 " S& S'\Rb                  5      r9 " S( S)\Rb                  5      r: " S* S+\Rb                  5      r; " S, S\Rb                  5      r<S@S- jr=\'" \=" S.S/9\=" S.S0S1S29\=" S.S3S1S49S5.5      r>S6\?S7\Rb                  S\?4S8 jr@SAS9\AS:\-S\<4S; jjrB\&SAS:\-S\<4S< jj5       rC\&SAS:\-S\<4S= jj5       rDg)Ba  CSATv2

A frequency-domain vision model using DCT transforms with spatial attention.

Paper: TBD

This model created by members of MLPA Lab. Welcome feedback and suggestion, questions.
gusdlf93@naver.com
juno.demie.oh@gmail.com

Refined for timm by Ross Wightman
    N)partialreduce)ListOptionalTupleUnion)trunc_normal_DropPathMlpLayerNorm2d	AttentionNormMlpClassifierHead
LayerScaleLayerScale2d)GlobalResponseNorm)build_model_with_cfg)feature_take_indices)
checkpointcheckpoint_seq   )register_modelgenerate_default_cfgsCSATv2csatv2))@g;i#@g_LegmV}b?gZӼiUMu>g{g ]iUMu?gh㈵4?g	k?g)t^cwg-C6*?r   gW8g^/vgdS       g!J>h㈵ga2U0*3g	3mJ?g	.V`Z?9̗iUMugמY"gǺ6h㈵>g/nbg8*5{5?h㈵iUMug-C6*מY?r   -C6
?gGŧ h?h㈵r   gW(?h㈵?r   ga2U0*#r"   r$   g-C6?r&   h㈵?        r(   h㈵ga2U0*#?h㈵>r$   r   r'   r*   r'   r   r+   r   r   r   r#   r   )@g o_@gnqgŏ1w-?gWX?r   g'>i?gWr*   gC8
!g/$?g_LE?r   r   r   g8*5{e?r)   r!   r$   r    r   gVIk?gQ,Z?r'   r   r*   r(   r   gH}M?g9̗'?r    r*   r$   r#   r*   r   gyCnK?r   r*   r)   r'   r*   r"   r)   r!   r+   r!   r'   K8?r)   r#   r,   r"   r!   r!   r!   r!   מYr#   r!   r)   r!   r+   r!   r)   )@gQ1ߤt@gP6
ragVF摷g~tgy^?C8
!?gq@H6?gkC8Sr+   r,   g׆q&g%>?r-   r*   g-C6
gH}]?gyCn;?r+   g-C6r'   r   gcbqmhg{GzT?r"   r#   r*   r*   r*   gמYB?gyCn+?r!   r*   r+   r*   r*   r%   gǺFr"   r"   r%   r&   r   r+   r"   r!   r)   r#   r'   r$   r!   r)   r.   r   r)   r+   r   r*   r&   r#   r)   r!   r*   r!   r   r)   ))@g   AgN@@g->V@gr3܀oԙ@gߖ@g|~!\@ge@gwR~R@g[ D@gMu@gqZq@g-r@gCiw@gqu@gP6bw@gZB>g@gFg@g&:k@gx=\ri@g"[='<b@gQ_@gCVO@g\P@gqY@gK87`@gJ_{A`@ggY@geI)pX@glIFJ@gϽgK@gN],_R@gdBT@gX9vVQ@gH@gi@@gctv2B@gz1}4@g8@g\Ɏ@@@g.lIFF@g %G@gFxD@gꐛn>@gx@ٔ6@g2d:@go%;6:@gd5@g2bj1@gyt].@g!Yd%@g%zr{&@gt)@g?x-.@g.@gHm!@giR
@gMۿҤ@g~@gJF@gؙB@g%X
@gXV@g'>O@ge1?)@g[|
e@ga_Yt@g%;6qr@gQkw^S@g:dwS@g͍	CV@gHȰ@@g2_A@gNG@@g1%2<@g-(@ga7l[,@go1u2@g*Wx0@g]3f[/@g5Ry;@gqrCQ !@g 4$@g72"@g9#J{@gdz@g_5j@gt^>@gq@gіs)j@gxqZ@gEdXY@gЛT[@gN#-7?gN#-@g@g~:p@g5Ry;@g⪲@gJ5o?gsA?gT7?g	;?g]h?gTt<f?gӤt{I?g1Zd?g:ǀ?g';?g {?g2}ƅ?g<!?g^D?gnQfL2?g^D?g|wJ?grt?göE2?gQ?g"^F?gʦ\?g($?gj?gQ?g3d?g~k,	?g6[ ?gLuT5?gu7Ou?)@g[|
ٹ@g8Mr@gM;p@g^P@gmO@g+S@g7<@g*;@g,yp9@g_L7@g#@gnض(&@gqh,@gr)@g9]c)@gEV@g*Ph@gM@g㥛 P@grO@gr/@g@g@@g3.H@g3ı.n@geS@g
@gg?RD@gV	?gd`TR?gu7@gӂ}@g[%X@gs?ga)?g?gû\wb?gM֨ht?gO@a?g>??g]o%?gjt?gdu?g*<?g=~o?g~jt?gd?gmJR?gaۢ?gZM?g7qrCQ?gf?gm?g\='o|?gCVzN?g;6~?gE_A?gsFZ*o?go%;6?g~:p?g|Pk?gO|?gKqUw?goʡ?rowscolsreturnc                    [         R                  " SX-  S5      R                  X5      R                  5       n[	        X-   S-
  5       Vs/ s H  n/ PM     nn/ n[	        U 5       HU  n[	        U5       HC  nXg-   nUS-  S:X  a  XH   R                  SX&   U   5        M+  XH   R                  X&   U   5        ME     MW     U H  n	UR                  U	5        M     U$ s  snf )z0Generate zigzag scan order for DCT coefficients.r   r      )nparangereshapetolistrangeinsertappendextend)
r/   r0   
idx_matrix_diazigzagijsds
             Q/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/models/csatv2.py_zigzag_permutationrE   X   s    1dk1-55dAHHJJT[1_-
.-!2-C
.F4[tAA1uzaq!12jmA./   a M /s   
Ckernel_sizeorthonormalc                    [        X#S9n[        R                  " U 40 UD6nUR                  5       R	                  5       R                  SU 5      n[        R                  " XfR                  S/5      /SS9n[        R                  R                  USS9SS2SU 24   n[        R                  " SU[        R                  S9[        R                  -  [        R                  " X[        R                  S9SSS24   -  n[        R                  " XpS-  -  5      nXg-  nUR                  nU(       a  USS2S4   [        R                   " [        R                  " SU S	-  -  40 UD65      -  USS2S4'   USS2SS24   [        R                   " [        R                  " SU S-  -  40 UD65      -  USS2SS24'   UR	                  5       R
                  " UR"                  6 nU$ )
z#Generate Type-II DCT kernel matrix.devicedtyper   dimNy             r3   r      )dicttorcheyeclone
contiguousviewcatflipffttensor	complex64pir5   longexprealsqrtshape)rF   rG   rJ   rK   ddxvks           rD   _dct_kernel_type_2re   i   s    
V	)B		+$$A		##B4A		1ffaSk"+A		aR L[L1ASu?%((J
,,{
DT1W
M	N  			!Q'(A	A	AAqD'EJJu||Aq4I'PR'PQQ!Q$QU8ejja;?6K)Rr)RSS!QR%	QWW%AH    c                 T    [         R                  R                  [        XX#5      5      $ )z9Generate Type-III DCT kernel matrix (inverse of Type-II).)rQ   linalginvre   )rF   rG   rJ   rK   s       rD   _dct_kernel_type_3rj      s!     <<.{WXXrf   c            	          ^  \ rS rSrSr    SS\S\S\SS4U 4S jjjrS	\R                  S\R                  4S
 jr
SrU =r$ )Dct1d   z#1D Discrete Cosine Transform layer.NrF   kernel_typerG   r1   c                    > [        XES9n[        T	U ]	  5         [        [        S.nXr    " X40 UD6R
                  nU R                  SUR                  5       5        U R                  SS 5        g )NrI   )23weightsbias)	rP   super__init__re   rj   Tregister_bufferrT   register_parameter)
selfrF   rn   rG   rJ   rK   ra   kerneldct_weights	__class__s
            rD   ru   Dct1d.__init__   sh     -)0BC}.{N2NPPY(>(>(@A-rf   rb   c                 X    [         R                  " XR                  U R                  5      $ N)Flinearrr   rs   ry   rb   s     rD   forwardDct1d.forward   s    xx<<33rf    r3   TNN__name__
__module____qualname____firstlineno____doc__intboolru   rQ   Tensorr   __static_attributes____classcell__r|   s   @rD   rl   rl      sd    -
  ! $.. . 	. 
. .4 4%,, 4 4rf   rl   c            	          ^  \ rS rSrSr    SS\S\S\SS4U 4S jjjrS	\R                  S\R                  4S
 jr
SrU =r$ )Dct2d   z#2D Discrete Cosine Transform layer.NrF   rn   rG   r1   c                 X   > [        XES9n[        TU ]	  5         [        XU40 UD6U l        g NrI   )rP   rt   ru   rl   	transformry   rF   rn   rG   rJ   rK   ra   r|   s          rD   ru   Dct2d.__init__   s-     -{KKrf   rb   c                     U R                  U R                  U5      R                  SS5      5      R                  SS5      $ )NrL   )r   	transposer   s     rD   r   Dct2d.forward   s5    ~~dnnQ/99"bABLLRQSTTrf   )r   r   r   r   s   @rD   r   r      sn    -
  ! $
L
L 
L 	
L 

L 
LU U%,, U Urf   r   out_chsc           
      $  ^^ [        [        R                  U5      m[        U4S jU 5       5      n[	        U5      nX-  S:X  a  X:  d   SU  SU SU SU S3	5       eX-  mU4S jU 5       u  pEnXE-   U-   U :X  a  [        XEU5      S:  d   eXEU4$ )	Nc              3   ,   >#    U  H	  oT-  v   M     g 7fr   r   ).0rb   gs     rD   	<genexpr>!_split_out_chs.<locals>.<genexpr>   s     $e1fe   r   zout_chs=z( can't be split into Y/Cb/Cr with ratio z
 (reduced z!); out_chs must be a multiple of .c              3   ,   >#    U  H	  oT-  v   M     g 7fr   r   )r   riunits     rD   r   r      s     'QrdQr   )r   mathgcdtuplesummin)	r   ratiordenomycbcrr   r   s	          @@rD   _split_out_chsr      s    txxA$e$$AFE?aG$4 
7)CE7 K37wa	A4
 D'Q'IA26B;'!c!nq&888"9rf   c                   X  ^  \ rS rSrSr     SS\S\S\S\SS4
U 4S	 jjjrSS
 jrSS jr	SS jr
S\R                  S\R                  4S jrS\R                  S\R                  4S jrS\R                  S\R                  4S jrS\R                  S\R                  4S jrSrU =r$ )LearnableDct2d   zKLearnable 2D DCT stem with RGB to YCbCr conversion and frequency selection.NrF   rn   rG   r   r1   c                   > [        XVS9n[        TU ]	  5         Xl        [	        XU40 UD6U l        [        X5      U l        [        USS9u  pn
[        R                  " US-  U4SSS.UD6U l        [        R                  " US-  U	4SSS.UD6U l        [        R                  " US-  U
4SSS.UD6U l        U R                  S[        R                   " S	S
XVS9SS9  U R                  S[        R                   " S	S
XVS9SS9  U R                  S[        R                   " S	SSXVS9SS9  U R                  S[        R                   " S	SSXVS9SS9  U R#                  5         g )NrI      rO   rO   )r   r3   r   r   rF   paddingmean   @   F)
persistentvarimagenet_meanimagenet_std)rP   rt   ru   rd   r   r   rE   permutationr   nnConv2dconv_yconv_cbconv_crrw   rQ   emptyreset_parameters)ry   rF   rn   rG   r   rJ   rK   ra   y_chcb_chcr_chr|   s              rD   ru   LearnableDct2d.__init__   s_    -{KK.{H+G:FUyy!14YaQRYVXYyy!15YaQRYVXYyy!15YaQRYVXY 	VU[[Bv%S`efUEKK2f$R_de_ekk!Q&.^kpq^U[[Aq-]jop 	rf   c                 $    U R                  5         g)zInitialize buffers.N_init_buffersry   s    rD   r   LearnableDct2d.reset_parameters       rf   c                    U R                   R                  [        R                  " [        5      5        U R
                  R                  [        R                  " [        5      5        U R                  R                  [        R                  " / SQ5      R                  SSS5      5        U R                  R                  [        R                  " / SQ5      R                  SSS5      5        g)z.Compute and fill non-persistent buffer values.g
ףp=
?gv/?gCl?r   r   gZd;O?gy&1?g?N)
r   copy_rQ   rY   	_DCT_MEANr   _DCT_VARr   rU   r   r   s    rD   r   LearnableDct2d._init_buffers   s    		Y/0u||H-.  .C!D!I!I!QPQ!RS-B C H HAq QRrf   c                 $    U R                  5         g)z"Initialize non-persistent buffers.Nr   r   s    rD   init_non_persistent_buffers*LearnableDct2d.init_non_persistent_buffers   r   rf   rb   c                 p    UR                  U R                  5      R                  U R                  5      S-  $ )z3Convert from ImageNet normalized to [0, 255] range.   )mulr   add_r   r   s     rD   _denormalizeLearnableDct2d._denormalize   s-    uuT&&',,T-?-?@3FFrf   c                     USS2S4   USS2S4   USS2S4   pCnUS-  US-  -   US-  -   nSXE-
  -  S	-   nS
X%-
  -  S	-   n[         R                  " XVU/SS9$ )z5Convert RGB to YCbCr color space (BCHW input/output).Nr   r   r3   gA`"?gbX9?gv/?g?5^I?   g7A`?rM   )rQ   stack)ry   rb   r   r   br   r   r   s           rD   _rgb_to_ycbcrLearnableDct2d._rgb_to_ycbcr   su    AqD'1QT7AadGaIE	!AI-ae_s"ae_s"{{A2;A..rf   c                 H    U R                   S-  S-   nXR                  -
  U-  $ )z8Normalize DCT coefficients using precomputed statistics.g      ?g:0yE>)r   r   )ry   rb   stds      rD   _frequency_normalize#LearnableDct2d._frequency_normalize  s%    hh#o$II$$rf   c                 2   UR                   u  p#pEU R                  U5      nU R                  U5      nUR                  X#X@R                  -  U R                  XPR                  -  U R                  5      nUR                  SSSSSS5      nU R                  U5      nUR                  SX0R                  U R                  -  5      nUS S 2S S 2U R                  4   nU R                  U5      nUR                  X$U R                  -  XPR                  -  US5      nUR                  SSSSS5      R                  5       nU R                  US S 2S4   5      nU R                  US S 2S4   5      nU R                  US S 2S4   5      n[        R                  " XgU/SS9$ )	Nr   r3   rO   r   r      rL   rM   )r`   r   r   r6   rd   permuter   r   r   rT   r   r   r   rQ   rV   )	ry   rb   r   chwx_yx_cbx_crs	            rD   r   LearnableDct2d.forward  s[   WW
aa q!IIaAKffdffEIIaAq!Q'NN1IIb!VVdff_-aD$$$%%%a(IIadffa66k1b9IIaAq!$//1kk!AqD'"||AadG$||AadG$yy#T*22rf   )r   r   r   rd   r   r   )r3   T    NNr1   N)r   r   r   r   r   r   r   ru   r   r   r   rQ   r   r   r   r   r   r   r   r   s   @rD   r   r      s    U
  ! $     	 
   
   <SGell Gu|| G/u|| / /%ell %u|| %
3 3%,, 3 3rf   r   c            	          ^  \ rS rSrSr    SS\S\S\SS4U 4S jjjrS	\R                  S\
\R                  \R                  4   4S
 jrSrU =r$ )
Dct2dStatsi  z5Utility module to compute DCT coefficient statistics.NrF   rn   rG   r1   c                    > [        XES9n[        TU ]	  5         Xl        [	        XU40 UD6U l        [        X5      U l        g r   )rP   rt   ru   rd   r   r   rE   r   r   s          rD   ru   Dct2dStats.__init__  s?     -{KK.{Hrf   rb   c                    UR                   u  p#pEUR                  X#X@R                  -  U R                  XPR                  -  U R                  5      nUR                  SSSSSS5      nU R	                  U5      nUR                  SX0R                  U R                  -  5      nUS S 2S S 2U R
                  4   nUR                  X$U R                  -  -  XPR                  -  -  US5      n[        R                  " SS/5      n[        R                  " SS/5      n[        S5       H?  n[        R                  " US S 2U4   SS	9Xh'   [        R                  " US S 2U4   SS	9Xx'   MA     Xg4$ )
Nr   r3   rO   r   r   r   rL   r   rM   )r`   r6   rd   r   r   r   rQ   zerosr8   r   r   )	ry   rb   r   r   r   r   	mean_listvar_listr@   s	            rD   r   Dct2dStats.forward,  s+   WW
aIIaAKffdffEIIaAq!Q'NN1IIb!VVdff_-aD$$$%IIa;'1;7B?KKB(	;;2w'qA ::a1g15IL))AadG3HK  ""rf   )rd   r   r   r   )r   r   r   r   r   r   r   ru   rQ   r   r   r   r   r   r   s   @rD   r   r     sx    ?
  ! $II I 	I 
I I# #%ell0J*K # #rf   r   c            	          ^  \ rS rSrSr    SS\S\S\\   SS4U 4S jjjrS	\	R                  S\	R                  4S
 jrSrU =r$ )Blocki>  z,ConvNeXt-style block with spatial attention.NrN   	drop_pathls_init_valuer1   c                 d  > [        XES9n[        TU ]	  5         [        R                  " X4SSUS.UD6U l        [        R                  " U4SS0UD6U l        [        R                  " USU-  40 UD6U l	        [        R                  " 5       U l        [        SU-  4SS	0UD6U l        [        R                  " SU-  U40 UD6U l        U(       a  [        U4S
U0UD6O[        R                   " 5       U l        US:  a  [%        U5      O[        R                   " 5       U l        [)        S0 UD6U l        g )NrI      r   )rF   r   groupsepsư>rO   channels_lastTinit_valuesr)   r   )rP   rt   ru   r   r   dwconv	LayerNormnormLinearpwconv1GELUactr   grnpwconv2r   Identitylsr
   r	  SpatialAttentionattn)ry   rN   r	  r
  rJ   rK   ra   r|   s          rD   ru   Block.__init__A  s     -iiUa3URTULL5$5"5	yya#g44779%a#gHTHRHyyS#44HU,sDDD[][f[f[h09B),BKKM$*r*	rf   rb   c                    UnU R                  U5      nUR                  SSSS5      nU R                  U5      nU R                  U5      nU R	                  U5      nU R                  U5      nU R                  U5      nUR                  SSSS5      nU R                  U5      n[        R                  " X1R                  SS  SSS9nX-  nU R                  U5      nX R                  U5      -   $ )Nr   r3   r   r   bilinearT)sizemodealign_corners)r  r   r  r  r  r  r  r  r   interpolater`   r  r	  )ry   rb   shortcutr  s       rD   r   Block.forwardU  s    KKNIIaAq!IIaLLLOHHQKHHQKLLOIIaAq!yy|}}T*TXYHGGAJ..+++rf   )	r  r  r	  r  r  r  r  r  r  )r)   NNN)r   r   r   r   r   r   floatr   ru   rQ   r   r   r   r   r   s   @rD   r  r  >  sh    6
  "-1++ + $E?	+ 
+ +(, ,%,, , ,rf   r  c                   t   ^  \ rS rSrSr  S S	U 4S jjjrS\R                  S\R                  4S jrSr	U =r
$ )
SpatialTransformerBlockih  zLightweight transformer block for spatial attention (1-channel, 7x7 grid).

This is a simplified transformer with single-head, 1-dim attention over spatial
positions. Used inside SpatialAttention where input is 1 channel at 7x7 resolution.
r1   c                 >  > [        XS9n[        TU ]	  5         [        SSS0UD6U l        [
        R                  " S	0 UD6U l        [
        R                  " S
SS0UD6U l	        [
        R                  " S	0 UD6U l
        [        SS[
        R                  0UD6U l        g )NrI   in_chansr   rs   F)r   rO   r   	act_layerr   )r   )r   r   )rP   rt   ru   PosConv	pos_embedr   r  norm1r  qkvnorm2r   r  mlpry   rJ   rK   ra   r|   s       rD   ru    SpatialTransformerBlock.__init__o  s    
 - 2!2r2\\*r*
99444 \\*r*
8"''8R8rf   rb   c                    UR                   u  p#pEUnUR                  S5      R                  SS5      nU R                  U5      nU R	                  XtU45      nU R                  U5      nUR                  S5      u  pnXR                  SS5      -  R                  SS9nX-  R                  S5      nUR                  SS5      R                  X#XE5      nXg-   nUnUR                  S5      R                  SS5      nU R                  U R                  U5      5      nUR                  SS5      R                  X#XE5      nXg-   nU$ )Nr3   r   rL   r   rM   )r`   flattenr   r0  r/  r1  unbindsoftmax	unsqueezer6   r3  r2  )ry   rb   BCHWr&  x_tr1  qrd   rc   r  s                rD   r   SpatialTransformerBlock.forward  s&   WW
a iil$$Q*jjonnSa&) hhsm**R.aKKB''00R08x""2&mmAq!))!5N iil$$Q*hhtzz#'mmAq!))!5Nrf   )r3  r0  r2  r/  r1  NNr   r   r   r   r   r   ru   rQ   r   r   r   r   r   s   @rD   r*  r*  h  sD     9 
	9 9  %,,  rf   r*  c                   t   ^  \ rS rSrSr  S S	U 4S jjjrS\R                  S\R                  4S jrSr	U =r
$ )
r  i  zBSpatial attention module using channel statistics and transformer.r1   c                    > [        XS9n[        TU ]	  5         [        R                  " S5      U l        [        R                  " SSSS.UD6U l        [        S0 UD6U l	        g )NrI   )r  r  r  r   r   )r3   r   r   )
rP   rt   ru   r   AdaptiveAvgPool2davgpoolr   convr*  r  r4  s       rD   ru   SpatialAttention.__init__  sU    
 -++F3IIC1CC	+1b1	rf   rb   c                     UR                  SSS9nUR                  SSS9n[        R                  " X#/SS9nU R	                  U5      nU R                  U5      nU R                  U5      nU$ )Nr   T)rN   keepdimrM   )r   amaxrQ   rV   rG  rH  r  )ry   rb   x_avgx_maxs       rD   r   SpatialAttention.forward  sd    1d+1d+IIun!,LLOIIaLIIaLrf   )r  rG  rH  rB  r   rC  r   s   @rD   r  r    sB    L 	2 
		2 	2 %,,  rf   r  c                      ^  \ rS rSrSr         SS\S\S\S\S\S	\S
\S\S\\   SS4U 4S jjjr	S\
R                  S\
R                  4S jrSrU =r$ )TransformerBlocki  zQTransformer block with optional downsampling and convolutional position encoding.Ninpoup	num_headsattn_head_dim
downsample	attn_drop	proj_dropr	  r
  r1   c           
      N  > [        XS9n[        TU ]	  5         [        US-  5      nXPl        U R                  (       a\  [
        R                  " SSS5      U l        [
        R                  " SSS5      U l        [
        R                  " XSSS4SS0UD6U l
        ON[
        R                  " 5       U l        [
        R                  " 5       U l        [
        R                  " 5       U l
        [        SS	U0UD6U l        [
        R                  " U40 UD6U l        [!        SUUUUUUS
.UD6U l        U	(       a  [%        U4SU	0UD6O[
        R                  " 5       U l        US:  a  [)        U5      O[
        R                  " 5       U l        [
        R                  " U40 UD6U l        [/        X-U4[
        R0                  US.UD6U l        U	(       a  [%        U4SU	0UD6O[
        R                  " 5       U l        US:  a  [)        U5      U l        g [
        R                  " 5       U l        g )NrI   rO   r   r3   r   r   rs   Fr,  )rN   rT  rU  dim_outrW  rX  r  r)   )r-  dropr   )rP   rt   ru   r   rV  r   	MaxPool2dpool1pool2r   projr  r.  r/  r  r0  r   r  r   ls1r
   
drop_path1r2  r   r  r3  ls2
drop_path2)ry   rR  rS  rT  rU  rV  rW  rX  r	  r
  rJ   rK   ra   
hidden_dimr|   s                 rD   ru   TransformerBlock.__init__  s    -q\
$??aA.DJaA.DJ		#Aq!F%F2FDIDJDJDI 4#44\\#,,
 
'
 
	 HU:cC}CCZ\ZeZeZg1:R(9-R[[]\\#,,
sUrwwYURTUGT:cC}CCZ\ZeZeZg1:R(9-R[[]rf   rb   c                 d   U R                   (       a  U R                  U R                  U5      5      nU R                  U5      nUR                  u  pEpgUR                  S5      R                  SS5      nU R                  U5      nU R                  X6U45      nU R                  U R                  U5      5      nUR                  SS5      R                  USXg5      nX R                  U5      -   nOUR                  u  pEpgUnUR                  S5      R                  SS5      nU R                  U5      nU R                  X6U45      nU R                  U R                  U5      5      nUR                  SS5      R                  USXg5      nX R                  U5      -   nUR                  u  pEpgUnUR                  S5      R                  SS5      nU R                  U R                  U R                  U5      5      5      nUR                  SS5      R                  XEXg5      nX R!                  U5      -   nU$ )Nr3   r   rL   )rV  r_  r]  r^  r`   r7  r   r0  r/  r`  r  r6   ra  rb  r3  r2  rc  )ry   rb   r&  r?  r;  r<  r=  r>  s           rD   r   TransformerBlock.forward  s   ??yyA/H**Q-CJA!++a.**1a0C**S/C..!f-C((499S>*C--1%--aQ:C??3//AJA!H))A,((A.C**S/C..!f-C((499S>*C--1%--aQ:C??3//A WW
aiil$$Q*hhtxx

301mmAq!))!5s++rf   )r  rV  ra  rc  r`  rb  r3  r0  r2  r]  r^  r/  r_  )	   r   Fr)   r)   r)   NNN)r   r   r   r   r   r   r   r(  r   ru   rQ   r   r   r   r   r   s   @rD   rQ  rQ    s    [ !#$!!!-1-S-S -S 	-S
 -S -S -S -S -S $E?-S 
-S -S^ %,,  rf   rQ  c                      ^  \ rS rSrSr  SS\SS4U 4S jjjrS\R                  S\	\\4   S\R                  4S	 jr
S
rU =r$ )r.  i  z Convolutional position encoding.Nr,  r1   c           	      x   > [        X#S9n[        TU ]	  5         [        R                  " X4SSSSUS.UD6U l        g )NrI   r   r   T)rF   strider   rs   r  )rP   rt   ru   r   r   r_  )ry   r,  rJ   rK   ra   r|   s        rD   ru   PosConv.__init__  s@     -IIhwaST[_hpwtvw	rf   rb   r"  c                     UR                   u  p4nUu  pgUR                  SS5      R                  X5Xg5      nU R                  U5      U-   nUR	                  S5      R                  SS5      $ )Nr   r3   )r`   r   rU   r_  r7  )	ry   rb   r"  r;  Nr<  r=  r>  cnn_feats	            rD   r   PosConv.forward  sa    ''a;;q!$))!5IIh(*yy|%%a++rf   )r_  rB  )r   r   r   r   r   r   ru   rQ   r   r   r   r   r   r   s   @rD   r.  r.    s]    *
 	xx
 
x x, ,U38_ , , ,rf   r.  c                   <  ^  \ rS rSrSr           S'S\S\S\\S4   S\\S4   S	\\S4   S
\S\S\	\   S\
SS4U 4S jjjrS(S\4S jjrS(S\R                  S\SS4S jjr\R"                  R$                  S\R                  4S j5       rS)S\S\	\
   SS4S jjr\R"                  R$                  S(S\SS4S jj5       rS\R,                  S\R,                  4S jr     S*S\R,                  S\	\\\\   4      S\S\S\
S\S\\\R,                     \\R,                  \\R,                     4   4   4S jjr   S+S\\\\   4   S \S!\S\\   4S" jjrS,S\R,                  S#\S\R,                  4S$ jjrS\R,                  S\R,                  4S% jrS&rU =r$ )-r   i  zCSATv2: Frequency-domain vision model with spatial attention.

A hybrid architecture that processes images in the DCT frequency domain
with ConvNeXt-style blocks and transformer attention.
Nnum_classesr,  dims.depthstransformer_depthsdrop_path_ratetransformer_drop_pathr
  global_poolr1   c                   > [        XS9n[        TU ]	  5         US:w  a  [        R                  " SU S35        Xl        X l        Xl        SU l        US   U l	        U R                  U l
        [        US   SS	S
9/U l        Sn[        U5       H8  u  nnUS:  a  US-  nU R                  R                  [        UUSU 3S
95        M:     U(       a  [        U5      O[        S [        XE5       5       5      n[!        ["        R$                  " SUU5      R'                  5       5      n/ n[        XE5       Hb  u  nnU[)        UU-
  5       Vs/ s H  n[+        U5      PM     sn-  nU[)        U5       Vs/ s H  nU(       a  [+        U5      OSPM     sn-  nMd     [-        SSUS   0UD6U l        [!        U5      n/ n[        [        X4U5      5       H  u  nu  nnnUS:  a!  [0        R2                  " X?S-
     U4SSS.UD6/O/ [)        UU-
  5       Vs/ s H  n[5        SU[+        U5      US.UD6PM     sn-   [)        U5       Vs/ s H  n[7        SUU[+        U5      US.UD6PM     sn-   U[9        U5      S-
  :  a  [;        U4SS0UD6/O/ -   nUR                  [0        R<                  " U6 5        M     [0        R<                  " U6 U l        [A        US   U4SU	0UD6U l!        U RE                  SS9  g s  snf s  snf s  snf s  snf )NrI   r   z5CSATv2 is designed for 3-channel RGB input. in_chans=z* may not work correctly with the DCT stem.FrL   r   rh  stem_dct)num_chs	reductionmoduler3   stages.c              3   .   #    U  H  u  pX-
  v   M     g 7fr   r   )r   rC   ts      rD   r   "CSATv2.__init__.<locals>.<genexpr>H  s     DwWvtqQUWvs   r)   r   r   )rF   rk  )rN   r	  r
  )rR  rS  r	  r
  r  r  	pool_typeneeds_reset)rh  r   )#rP   rt   ru   warningswarnrr  r,  rx  grad_checkpointingnum_featureshead_hidden_sizefeature_info	enumerater:   r   zipiterrQ   linspacer7   r8   nextr   rz  r   r   r  rQ  lenr   
Sequentialstagesr   headinit_weights)ry   rr  r,  rs  rt  ru  rv  rw  r
  rx  rJ   rK   kwargsra   r|  r@   rN   total_blocksdp_iterdp_ratesdeptht_depthr=   r  layersr|   s                            rD   ru   CSATv2.__init__!  s@    -q=MM$:%OQ ' &"' H $ 1 1 "$q'QzRS	oFAs1uQ	$$T#U\]^\_S`%ab & '<s6{DwWZ[aWvDwAwu~~aFMMOP!&=NE7ego0FG0F1g0FGGHQVW^Q_`Q_A*?gRGQ_``H > '@$q'@R@ x.(1#dDV2W(X$A$UG RSUVQV"))DQKL!ALLM\^ejkpszkze{|e{`a[3$w-}[XZ[e{|} z  @G  zH  I  zHtu!ocsd7m[holno  zH  I	I 893v;?7J+c2t2r23PRT  MM"--01 )Y mmV,)$r(K];]Z\]	 	e,3 H` } Is   
K62K;"L #Lr  c                 J    U R                  [        U R                  US95        g )Nr  )applyr   _init_weights)ry   r  s     rD   r  CSATv2.init_weightsg  s    

74--;GHrf   mc                 P   [        U[        R                  [        R                  45      (       aM  [	        UR
                  SS9  UR                  b+  [        R                  R                  UR                  S5        g g U(       a#  [        US5      (       a  UR                  5         g g g )Ng{Gz?)r   r   r   )
isinstancer   r   r  r	   weightrs   init	constant_hasattrr   )ry   r  r  s      rD   r  CSATv2._init_weightsj  st    a"))RYY/00!((-vv!!!!&&!, "WQ(:;;  <[rf   c                 .    U R                   R                  $ r   )r  fcr   s    rD   get_classifierCSATv2.get_classifierr  s    yy||rf   c                 T    Xl         Ub  X l        U R                  R                  XS9  g )N)r  )rr  rx  r  reset)ry   rr  rx  s      rD   reset_classifierCSATv2.reset_classifierv  s&    &"*		;rf   enablec                     Xl         g r   )r  )ry   r  s     rD   set_grad_checkpointingCSATv2.set_grad_checkpointing|  s    "(rf   rb   c                     U R                  U5      nU R                  (       a;  [        R                  R	                  5       (       d  [        U R                  U5      nU$ U R                  U5      nU$ r   )rz  r  rQ   jitis_scriptingr   r  r   s     rD   forward_featuresCSATv2.forward_features  sV    MM!""599+A+A+C+Ct{{A.A  AArf   indicesr  
stop_early
output_fmtintermediates_onlyc                 \   US:X  d   S5       e/ n[        [        U R                  5      S-   U5      u  pU R                  U5      nSU;   a  UR	                  U5        [
        R                  R                  5       (       d  U(       d  U R                  n
OU	S:  a  U R                  SU	 O/ n
[        U
5       Hi  u  pU R                  (       a/  [
        R                  R                  5       (       d  [        X5      nOU" U5      nUS-   U;   d  MX  UR	                  U5        Mk     U(       a  U$ X4$ )a  Forward pass returning intermediate features.

Args:
    x: Input image tensor.
    indices: Indices of features to return (0=stem_dct, 1-4=stages). None returns all.
    norm: Apply norm layer to final intermediate (unused, for API compat).
    stop_early: Stop iterating when last desired intermediate is reached.
    output_fmt: Output format, must be 'NCHW'.
    intermediates_only: Only return intermediate features.

Returns:
    List of intermediate features or tuple of (final features, intermediates).
NCHWzOutput format must be NCHW.r   r   N)r   r  r  rz  r:   rQ   r  r  r  r  r   )ry   rb   r  r  r  r  r  intermediatestake_indices	max_indexr  feat_idxstages                rD   forward_intermediatesCSATv2.forward_intermediates  s   , V#B%BB#"6s4;;7G!7KW"UMM!  #99!!##:[[F 1:AT[[),2F(0OH&&uyy/E/E/G/Gu(!H!||+$$Q'  1   rf   
prune_norm
prune_headc                 4   [        [        U R                  5      S-   U5      u  pEUS:  a  U R                  SU O[        R                  " 5       U l        U(       a$  [        R
                  " 5       U R                  l        U(       a  U R                  SS5        U$ )a'  Prune layers not required for specified intermediates.

Args:
    indices: Indices of intermediate layers to keep (0=stem_dct, 1-4=stages).
    prune_norm: Whether to prune the final norm layer.
    prune_head: Whether to prune the classifier head.

Returns:
    List of indices that were kept.
r   r   N )	r   r  r  r   r  r  r  r  r  )ry   r  r  r  r  r  s         rD   prune_intermediate_layers CSATv2.prune_intermediate_layers  sn    " #7s4;;7G!7KW"U1:Qdkk*9-BMMO[[]DIIN!!!R(rf   
pre_logitsc                      U R                  XS9$ )N)r  )r  )ry   rb   r  s      rD   forward_headCSATv2.forward_head  s    yyy22rf   c                 F    U R                  U5      nU R                  U5      $ r   )r  r  r   s     rD   r   CSATv2.forward  s#    !!!$  ##rf   )
r  rx  r  r  r  r,  rr  r  r  rz  )  r   )r   H      i  )r3   r3   rh     )r   r   r3   r3   r)   FNavgNN)Tr   )NFFr  F)r   FTF) r   r   r   r   r   r   r   r(  r   r   strru   r  r   Moduler  rQ   r  ignorer  r  r  r   r  r   r   r  r  r  r   r   r   r   s   @rD   r   r     s     $$6&22>$'*/-1$D-D- D- S/	D-
 #s(OD- !&c3hD- "D- $(D- $E?D- D- 
D- D-LI I!ryy !t !t ! YY		  <C <hsm <W[ < YY)T )T ) )%,, 5<<  8<$$',0 ||0  eCcN340  	0 
 0  0  !%0  
tELL!5tELL7I)I#JJ	K0 h ./$#	3S	>*  	
 
c83ell 3 3 3$ $%,, $ $rf   c                      U SSSSSSSS/ S	.
UE$ )
Nr  )r      r  )rh  rh  r   r   r!  g      ?zhead.fc)
urlrr  
input_size	pool_sizer   r   interpolationcrop_pct
classifier
first_convr   )r  r  s     rD   _cfgr    s0    =v%.C#r  rf   ztimm/)	hf_hub_id)r     r  bicubic)r  r  r  )
   r  )r  r  r  )zcsatv2.r512_in1kzcsatv2_21m.sw_r640_in1kzcsatv2_21m.sw_r512_in1k
state_dictmodelc                   ^ SU ;   a  U $ SSK nSSSS.mUR                  S5      nUR                  S5      nUR                  S	5      nUR                  S
5      nSUR                  S[        4U4S jjn0 nU R	                  5        GH  u  pUR                  SU	5      n	U	R                  SS5      R                  SS5      R                  SS5      n	UR                  Xy5      n	SU	;   a#  U	R                  SS5      U
R                  S5      pO(SU	;   a"  U	R                  SS5      U
R                  S5      pSU	;   a  U	R                  SS5      n	OJSU	;   a  U	R                  SS5      n	O1SU	;   a  U	R                  SS5      n	OS U	;   a  U	R                  S S!5      n	S"U	;   a3  U	R                  S#S$5      R                  S%S&5      R                  S"S&5      n	OS'U	;   a  U	R                  S'S(5      n	S)U	;   a  U	R                  S)S*5      n	OS+U	;   a  U	R                  S+S,5      n	S-U	;   a  S&U	;  a  U	R                  S-S.5      n	UR                  S/U	5      n	UR                  S0U	5      n	XU	'   GM     U$ )1zRemap original CSATv2 checkpoint to timm format.

Handles two key structural changes:
1) Stage naming: stages1/2/3/4 -> stages.0/1/2/3
2) Downsample position: moved from end of stage N to start of stage N+1
zstages.0.0.grn.weightr   Nr   	   )r   r3   r   z^dct\.z^stages([1-4])\.(\d+)\.(.*)$z^head\.z^norm\.r  r1   c                    > [        U R                  S5      5      [        U R                  S5      5      U R                  S5      p2nUT;   a  UTU   :X  a  SU SU 3$ US:X  a  SU SU 3$ SUS-
   SUS-    SU 3$ )Nr   r3   r   r~  z.0.z	stages.0.r   )r   group)r  r  idxrestdownsample_idxs       rD   remap_stage)checkpoint_filter_fn.<locals>.remap_stage  s    qwwqz?C
OQWWQZDN"snU.C'CUG3tf--A:se1TF++1S1WIQtf55rf   z	stem_dct.z.Y_Conv.z.conv_y.z	.Cb_Conv.z	.conv_cb.z	.Cr_Conv.z	.conv_cr.z	grn.gammaz
grn.weightrL   zgrn.betazgrn.biasz
.ff.net.0.z	.mlp.fc1.z
.ff.net.3.z	.mlp.fc2.z	.ff_norm.z.norm2.z.attn_norm.z.norm1.z.attention.attention.z!.attention.attention.attn.to_qkv.z.attn.attn.qkv.z.attention.attention.attn.z.attn.attn.z.attention.z.attn.z.attn.to_qkv.z
.attn.qkv.z.attn.to_out.0.z.attn.proj.z.attn.pos_embed.z.pos_embed.zhead.fc.z
head.norm.)recompileMatchr  itemssubreplacer6   )r  r  r  dct_restage_rehead_renorm_rer  outrd   rc   r  s              @rD   checkpoint_filter_fnr     s\    *, qQ'Nzz)$Fzz9:Hzz*%Gzz*%G6rxx 6C 6 C  "JJ{A&YYzJ/[1[1 	

 LL( !99[,72q1_99Z4aiimq 1		,4AQ		,4AA		+y1Aa		-3A #a'>@QRG8OG3O  a		-2A a		/<8A!#		+];A "}A'=		,m<A KK
A&KKa(Aa #d Jrf   variant
pretrainedc           	      x    UR                  SS5      n[        [        U U4[        [	        USS9[
        U    S.UD6$ )Nout_indices)r   r3   r   rO   T)r  flatten_sequential)pretrained_filter_fnfeature_cfgdefault_cfg)popr   r   r   rP   default_cfgs)r  r  r  r  s       rD   _create_csatv2r  L  sN    **]L9K 2[TJ )  rf   c                     [        SU 40 UD6$ )Nr   )r  )r  r  s     rD   r   r   Y  s    (J9&99rf   c           	      F    [        SSSS9n[        SU 40 [        U40 UD6D6$ )N)0   `      i  )r   r   r  rh  )r   r   rO   r   )rs  rt  ru  
csatv2_21m)rP   r  )r  r  
model_argss      rD   r  r  ^  s5     !)J ,
Qd:6P6PQQrf   rB  )r   )r  r  )Er   r   r  	functoolsr   r   typingr   r   r   r   numpyr4   rQ   torch.nnr   torch.nn.functional
functionalr   timm.layersr	   r
   r   r   r   r   r   r   timm.layers.grnr   timm.models._builderr   timm.models._featuresr   timm.models._manipulater   r   	_registryr   r   __all__r   r   r   rE   r   r   re   rj   r  rl   r   r   r   r   r  r*  r  rQ  r.  r   r  r
  rP   r   r  r  r   r  r   rf   rD   <module>r      sX     % / /      } } } . 5 6 > <X
	68c  c ( 	
 \\: 	YYY
 \\Y4BII 4,UBII U&C "Q3RYY Q3h #  #F',BII ',T0bii 0fryy 0Oryy Od,bii ,*A$RYY A$H %  $  
  $ & "OT O")) O Od
C 
T 
 
 :t :& : : R4 Rf R Rrf   