
    RЦi                        S r SSKrSSKJrJrJrJr  SSKrSSKJr  SSK	J
r
  SSKJr  SSKJr     SVS
\S\S\S\\R$                     4S jjr   SWS
\S\S\S\\R$                     S\R(                  4
S jjrSSSSS\R,                  4S\\   S\S\S\S\S\\R$                     S\R.                  S\R(                  4S jjrS\\   S\\   4S jrSSSSSSS	SSS S\R,                  4S\\   S!\\R(                     S
\S"\S\S\S#\S$\S%\\\      S&\S'\S\\R$                     S\R.                  S\\R(                     4S( jjr " S) S*\R8                  5      rS+ rS,\R(                  S\R(                  4S- jr SXS,\R(                  S.\R(                  S/\R(                  S0\S\R(                  4
S1 jjr  SXS,\\R(                     S.\R(                  S/\R(                  S0\S\\R(                     4
S2 jjr! SXS,\R(                  S3\R(                  S0\S\R(                  4S4 jjr" SXS,\R(                  S5\R(                  S6\R(                  S7\S\R(                  4
S8 jjr#SSSSSS	SSS S\R,                  4S\\   S!\\R(                     S\S"\S\S\S$\S%\\\      S&\S'\S\\R$                     S\R.                  4S9 jjr$ " S: S;\R8                  5      r% " S< S=\R8                  5      r&  SYS\R,                  S>.S?\S@\SA\S\SB\S\R(                  4SC jjjr'\RP                  RR                  \
S S\R,                  4SD\\   S'\S\\R$                     S\R.                  S\\R(                  \R(                  4   4
SE jj5       5       r*SF\R(                  SG\R(                  SH\R(                  S\R(                  4SI jr+ " SJ SK\R8                  5      r,\RP                  RR                  \
SLS SSM\R,                  4SN\SO\SP\S'\S&\S\R$                  S\R.                  S\R(                  4SQ jj5       5       r- " SR SS\R8                  5      r.   SZST\S\SA\S\R8                  4SU jjr/g)[zuSin-cos, fourier, rotary position embedding modules and functions

Hacked together by / Copyright 2022 Ross Wightman
    N)ListTupleOptionalUnion)nn   )register_notrace_function)ndgrid)_assertT	num_bandsmax_freqlinear_bandsdevicec           	         U(       a*  [         R                  " SUS-  U [         R                  US9nOAS[         R                  " S[        R                  " US5      S-
  U [         R                  US9-  nU[         R
                  -  $ )N      ?   dtyper   r   r   )torchlinspacefloat32mathlogpi)r   r   r   r   bandss        [/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/layers/pos_embed_sincos.pypixel_freq_bandsr      sh     sHqL)5==Y_`U^^Atxx!'<q'@)SXS`S`iopp588         @temperaturestepreturnc                     [         R                  " SX[         R                  US9R                  [         R                  5      U -  nSX-  -  nU$ )Nr   r   r   )r   arangeint64tor   )r   r    r!   r   expr   s         r   
freq_bandsr(      sF     ,,q)V
L
O
OPUP]P]
^aj
jC+$%ELr   @   F
feat_shapedimreverse_coordinterleave_sin_cosr   c                    US-  S:X  d   S5       eUS-  n[        XrSUS9nU(       a  U SSS2   n [        R                  " [        U  V	s/ s HC  n	[        R                  " X[        R
                  S9R                  [        R                  5      PME     sn	5      5      R                  S5      R                  SS5      n
U
R                  S5      UR                  S5      -  nU(       a  S	OSn[        R                  " [        R                  " U5      [        R                  " U5      /US
9R                  S5      nUR                  US9$ s  sn	f )z

Args:
    feat_shape:
    dim:
    temperature:
    reverse_coord: stack grid order W, H instead of H, W
    interleave_sin_cos: sin, cos, sin, cos stack instead of sin, sin, cos, cos
    dtype:
    device:

Returns:

   r   zHEmbed dimension must be divisible by 4 for sin-cos 2D position embeddingr   r    r!   r   Nr   r   r   r+   r   )r(   r   stackr
   r$   r%   r&   r   flatten	transpose	unsqueezesincos)r*   r+   r    r,   r-   r   r   pos_dimr   sgridpos2	stack_dimpos_embs                 r   build_sincos2d_pos_embedrA   '   s   . 7a<ccc<QhGwaOE"%
;;vA 	QU[[9<<U]]K   	
99Q? 	 >>" 22D (QIkk599T?EIIdO<)LTTUVWG::E:""s   	A
Eseqc                 V    [        U 5      S:  a  U $ U S   U S   /[        U SS  5      -   $ )Nr   r   r   )lenlist)rB   s    r   swap_shape_xyrF   P   s4    
3x!|
FCFd3qr7m++r              ijr   max_resinclude_grid	in_pixelsref_feat_shapegrid_offsetgrid_indexingc                    Uc*  U(       a  [        U[        U5      UUS9nO+[        UUSUS9nOUc  UR                  nUc  UR                  nU
S:X  a  [        U 5      n Ub  [        U5      nU(       a6  U  Vs/ s H(  n[        R                  " SSX[        R                  S9PM*     nnOSU  Vs/ s HF  n[        R                  " X[        R                  S9R                  [        R                  5      U	-   PMH     nnUb*  [        XU5       VVVs/ s H  u  nnnUU-  U-  PM     nnnn[        R                  " [        R                  " XS	9S
S9nUR                  S
5      nUU-  nUR!                  5       R                  US9UR#                  5       R                  US9nnU(       a  UUU/nU$ UU/nU$ s  snf s  snf s  snnnf )a  

Args:
    feat_shape: Feature shape for embedding.
    bands: Pre-calculated frequency bands.
    num_bands: Number of frequency bands (determines output dim).
    max_res: Maximum resolution for pixel based freq.
    temperature: Temperature for non-pixel freq.
    linear_bands: Linear band spacing for pixel based freq.
    include_grid: Include the spatial grid in output.
    in_pixels: Output in pixel freq.
    ref_feat_shape: Reference feature shape for resize / fine-tune.
    grid_offset: Constant offset to add to grid for non-pixel freq.
    grid_indexing: Indexing mode for meshgrid ('ij' or 'xy')
    dtype: Output dtype.
    device: Output device.

Returns:

)r   r   r   r0   xyg      r   )stepsr   r   r2   indexingr1   r3   r4   )r   floatr(   r   r   rF   r   r   r   r$   r%   r&   zipr5   meshgridr8   r9   r:   )r*   r   r   rJ   r    r   rK   rL   rM   rN   rO   r   r   r<   txfrr=   pospos_sinpos_cosouts                          r   build_fourier_pos_embedr`   V   s   F }$g)	E '	E >\\F=KKE":.
%*>:N  
 NN3!%--P 	
 
  
 LL=@@OR]] 	
 

 !&)!&HI&H71aQUQY&HI;;u~~a@bID>>"D
,Cwwy||%|0#''),,U,2KWG&24'
"CJ :A'8JCJ)


 Js   6/F3,AF8F=c                   h   ^  \ rS rSr      S
S\S\4U 4S jjjrSS jrSS jrSS jrS r	S	r
U =r$ )FourierEmbed   rJ   r   c           	         > [         TU ]  5         Xl        X l        X0l        X@l        U R                  S[        R                  " X%US9SS9  U R                  5         g )Nr   r2   F
persistent)
super__init__rJ   r   concat_gridkeep_spatialregister_bufferr   emptyreset_parameters)selfrJ   r   ri   rj   r   r   	__class__s          r   rh   FourierEmbed.__init__   sV     	"&(Wekk)RW&Xejk 	r   c                 $    U R                  5         gz"Initialize parameters and buffers.N_init_buffersrn   s    r   rm   FourierEmbed.reset_parameters       r   c                 v    U R                   R                  [        U R                  U R                  5      5        g.Compute and fill non-persistent buffer values.N)r   copy_r   r   rJ   ru   s    r   rt   FourierEmbed._init_buffers   s$    

)$..$,,GHr   c                 $    U R                  5         gz"Initialize non-persistent buffers.Nrs   ru   s    r   init_non_persistent_buffers(FourierEmbed.init_non_persistent_buffers   rw   r   c           	         UR                   S S u  p#UR                   SS  n[        UU R                  U R                  UR                  UR
                  S9n[        R                  " USS9nUR                  SS5      R                  [        U5      5      nU4SUR                  S-
  -  -   nU R                  (       aH  [        R                  " XR                  S5      R                  U5      R                  SS	SS5      /SS9nU$ [        R                  " UR                  SSS	S5      UR                  S5      R                  U5      /SS9nUR!                  X$R#                  5       S5      nU$ )
Nr   )rK   r   r   r1   r3   r1   r   r      )shaper`   r   ri   r   r   r   catr7   r6   rD   ndimrj   r8   expandpermutereshapenumel)rn   rY   BCr*   embbatch_expands          r   forwardFourierEmbed.forward   s=   wwr{WWQR[
%JJ))''88
 ii$mmB#++C
O<teqvvz22 		1mmA.55lCKKAqRSUVWX^_`A
  		199Q1a0#--2B2I2I,2WX^`aA		!--/4Ar   )ri   rj   rJ   r   )rG   r)   TFNNr"   N)__name__
__module____qualname____firstlineno__intrh   rm   rt   r   r   __static_attributes____classcell__ro   s   @r   rb   rb      sM           &I r   rb   c                     [         R                  " U SSS S24   * U SS S S24   /S5      R                  U R                  5      $ )N.r   r   r1   )r   r5   r   r   )rY   s    r   rotr      sE     ;;319qcc{3R8@@IIr   rY   c                 V    U R                  SSS9u  p[        R                  " U* U/SS9$ )Nr   r1   r3   )chunkr   r   )rY   x1x2s      r   rope_rotate_halfr      s/     WWQBWFB99rc2YB''r   sin_embcos_embhalfc                 \    U(       a  X-  [        U 5      U-  -   $ X-  [        U 5      U-  -   $ N)r   r   )rY   r   r   r   s       r   apply_rot_embedr      s9      {-a07:::
 {SVg---r   c                     [        U [        R                  5      (       a  U /n U(       a$  U  Vs/ s H  oDU-  [        U5      U-  -   PM     sn$ U  Vs/ s H  oDU-  [	        U5      U-  -   PM     sn$ s  snf s  snf r   )
isinstancer   Tensorr   r   )rY   r   r   r   rX   s        r   apply_rot_embed_listr     s|     !U\\""C FGGQG.q1G;;QGG
 9::1Gc!fw..:: H
 ;s   A2A7r   c                     UR                  SS5      u  p4U(       a  X-  [        U 5      U-  -   $ X-  [        U 5      U-  -   $ )Nr   r1   )r   r   r   )rY   r   r   r   r   s        r   apply_rot_embed_catr     sL    
 yyB'G {-a07:::
 {SVg---r   	pos_embedkeep_indicespos_embed_has_batchc                    U(       a  [        UR                  S:  S5        OY[        UR                  S:  S5        U R                  S   4SUR                  -  -   nUR                  S5      R	                  U5      nUR                  S   4SUR                  S-
  -  -   UR                  S   S4-   nUR                  U5      n[        UR                  5      nSUS	'   UR	                  U5      nUR                  S	U5      $ )
a  Apply keep indices to different ROPE shapes

Expected pos_embed shapes:
* [seq_len, pos_embed_dim] --> output [batch_size, seq_len, pos_embed_dim]
* [num_heads, seq_len, pos_embed_dim] --> output [batch_size, num_heads, seq_len, pos_embed_dim]
* [depth, num_heads, seq_len, pos_embed_dim] --> output [batch_size, depth, num_heads, seq_len, pos_embed_dim]

And all of the above with leading batch dimension already present if `pos_embed_has_batch == True`

r   zIncorrect number of dimensionsr   r   r   )r   r   r1   r   )r   r   r   r8   r   viewrE   gather)rY   r   r   r   expand_shape
keep_shapekeep_expands          r   apply_keep_indices_nlcr   ,  s      	!#%EF 		!#%EF
}uy~~'==''*11,?	 $$Q')DINNQ4F,GG<K]K]^_K`bcJddJ$$Z0L y'KKO&&{3LB--r   c                     [        U UUS-  UUUUUUU	U
US9u  pSnU  H  nX-  nM	     UR                  US5      R                  SS5      nUR                  US5      R                  SS5      nX4$ )a  

Args:
    feat_shape: Spatial shape of the target tensor for embedding.
    bands: Optional pre-generated frequency bands
    dim: Output dimension of embedding tensor.
    max_res: Maximum resolution for pixel mode.
    temperature: Temperature (inv freq) for non-pixel mode
    linear_bands: Linearly (instead of log) spaced bands for pixel mode
    in_pixels: Pixel vs language (inv freq) mode.
    ref_feat_shape: Reference feature shape for resize / fine-tune.
    grid_offset: Constant offset to add to grid for non-pixel freq.
    grid_indexing: Indexing mode for meshgrid ('ij' or 'xy')
    device: Output device.
    dtype: Output dtype.

Returns:

r/   )r   r   rJ   r    r   rL   rM   rN   rO   r   r   r   r1   r   )r`   r   repeat_interleave)r*   r   r+   rJ   r    r   rL   rM   rN   rO   r   r   r   r   num_spatial_dimrY   s                   r   build_rotary_pos_embedr   Q  s    B /(!%#G O ooor2DDQKGooor2DDQKGr   c                     ^  \ rS rSrSr          SS\S\\\      S\\\      S\	S\
4
U 4S	 jjjrSS
 jrSS jrSS jrS\R                   4S\\   4S jjrSS jrS\\   4S jrSS\\\      4S jjrS rSrU =r$ )RotaryEmbeddingi  a  Rotary position embedding

NOTE: This is my initial attempt at impl rotary embedding for spatial use, it has not
been well tested, and will likely change. It will be moved to its own file.

The following impl/resources were referenced for this impl:
* https://github.com/lucidrains/vit-pytorch/blob/6f3a5fcf0bca1c5ec33a35ef48d97213709df4ba/vit_pytorch/rvt.py
* https://blog.eleuther.ai/rotary-embeddings/
Nr   r*   rM   rN   rO   c           	        > [         TU ]  5         Xl        X l        X0l        XPl        X@l        X`l        Xpl        Xl	        Xl
        US LU l        Uc9  US-  4nU R                  S[        R                  " XUS9SS9  S U l        S U l        OaS U l        SnU H  nX-  nM	     X4nU R                  S[        R                  " XUS9SS9  U R                  S[        R                  " XUS9SS9  U R%                  5         g )	Nr/   r   r2   Fre   r   pos_embed_sinpos_embed_cos)rg   rh   r+   rJ   r    r   rL   r*   rM   rN   rO   _use_cached_embedrk   r   rl   r   r   r   rm   rn   r+   rJ   r    rL   r   r*   rM   rN   rO   r   r   bands_shapenum_posr<   	emb_shapero   s                   r   rh   RotaryEmbedding.__init__  s    	&("$,&* ",4!7!8+K  %++kX]*^kp q!%D!%D DJG   I  %++i^c2dqv w  %++i^c2dqv w 	r   c                 $    U R                  5         grr   rs   ru   s    r   rm    RotaryEmbedding.reset_parameters  rw   r   c                     U R                   (       d*  U R                  R                  U R                  5       5        gU R	                  U R
                  5      u  pU R                  R                  U5        U R                  R                  U5        gry   )r   r   r{   _compute_bands_get_pos_embed_valuesr*   r   r   )rn   emb_sinemb_coss      r   rt   RotaryEmbedding._init_buffers  se    %%JJT0023#99$//JG$$W-$$W-r   c                     U R                   (       a6  [        U R                  S-  [        U R                  5      U R
                  S9nO"[        U R                  S-  U R                  SS9nUR                  XS9$ zCompute frequency bands.r/   )r   r   )r    r!   r2   	rL   r   r+   rU   rJ   r   r(   r    r&   rn   r   r   r   s       r   r   RotaryEmbedding._compute_bands  i    >>$Adll#!..E A ,,E
 xxvx33r   c                     [        UU R                  U R                  U R                  U R                  U R
                  U R                  U R                  U R                  UUS9u  pEXE4$ )Nr*   r+   rJ   r    r   rL   rM   rN   rO   r   r   )	r   r+   rJ   r    r   rL   rM   rN   rO   )rn   r*   r   r   r   r   s         r   r   %RotaryEmbedding._get_pos_embed_values  se    1!LL((**nn..((,,
 r   c                 $    U R                  5         gr~   rs   ru   s    r   r   +RotaryEmbedding.init_non_persistent_buffers  rw   r   c                    U R                   b{  XR                   :w  ak  U R                  c   eU R                  c   eU R                  UU R                  R                  U R                  R
                  S9u  U l        U l        Xl         g g g Nr2   )r*   r   r   r   r   r   rn   r*   s     r   update_feat_shape!RotaryEmbedding.update_feat_shape  s    ??&:+H%%111%%111595O5O))00((.. 6P 62D 2
 )O ,I&r   r   c           	         UbM  U R                   b@  [        UU R                   U R                  U R                  U R                  U R
                  S9$ U R                  b%  U R                  b  U R                  U R                  4$  S5       e)NrL   rM   rN   rO   zQget_embed() requires pre-computed pos embeds or valid shape w/ pre-computed bands)r   r   rL   rM   rN   rO   r   r   )rn   r   s     r   	get_embedRotaryEmbedding.get_embed   s    !7)

..#22 ,,"00  +0B0B0N%%t'9'999mmm5r   c                 Z    U R                  UR                  SS  5      u  p#[        XU5      $ Nr   )r   r   r   )rn   rY   r   r   s       r   r   RotaryEmbedding.forward  s*    >>!''!"+6q733r   )r   r   r+   r*   rO   rN   rL   r   rJ   r   r   rM   r    
rG   i'  TFNNrH   rI   NNr   NNr   )r   r   r   r   __doc__boolr   r   r   rU   strrh   rm   rt   r   r   r   r   r   r   r   r   r   r   r   s   @r   r   r     s     !&.226!#!%-  -  !c+-  %T#Y/-  -  -  - ^.4  CGemm  S	   
)DI 
)nxS	2 n 4 4r   r   c                   ~  ^  \ rS rSrSr          SS\S\S\S\S\S	\\	\      S
\\	\      S\S\
4U 4S jjjrSS jrSS jrSS jrS\R                   4S	\	\   4S jjrSS jrS	\	\   4S jrSS\\	\      4S jjr SS\	\\\4      S\\   S\\R.                  \	\R.                     4   4S jjrS rSrU =r$ ) RotaryEmbeddingCati  a  Rotary position embedding w/ concatenatd sin & cos

The following impl/resources were referenced for this impl:
* https://github.com/lucidrains/vit-pytorch/blob/6f3a5fcf0bca1c5ec33a35ef48d97213709df4ba/vit_pytorch/rvt.py
* https://blog.eleuther.ai/rotary-embeddings/
Nr+   rJ   r    rL   r   r*   rM   rN   rO   c           	        > [         TU ]  5         Xl        X l        X0l        X@l        XPl        X`l        Xpl        Xl	        Xl
        US LU l        Uc2  US-  4nU R                  S[        R                  " XUS9SS9  S U l        O@S U l        SnU H  nX-  nM	     XS-  4nU R                  S[        R                  " XUS9SS9  U R#                  5         g )	Nr/   r   r2   Fre   r   r   r   )rg   rh   r+   rJ   r    rL   r   r*   rM   rN   rO   r   rk   r   rl   r   r   rm   r   s                   r   rh   RotaryEmbeddingCat.__init__  s     	&"($,&* ",4!7!8+K  %++kX]*^kp q!DN DJG   '*I  ekk)Z_.`mr s 	r   r"   c                 $    U R                  5         grr   rs   ru   s    r   rm   #RotaryEmbeddingCat.reset_parametersK  rw   r   c                     U R                   (       d*  U R                  R                  U R                  5       5        gU R                  R                  U R                  U R                  5      5        gry   )r   r   r{   r   r   r   r*   ru   s    r   rt    RotaryEmbeddingCat._init_buffersO  sG    %%JJT0023NN  !;!;DOO!LMr   c                     U R                   (       a6  [        U R                  S-  [        U R                  5      U R
                  S9nO"[        U R                  S-  U R                  SS9nUR                  XS9$ r   r   r   s       r   r   !RotaryEmbeddingCat._compute_bandsV  r   r   c                     [        UU R                  U R                  U R                  U R                  U R
                  U R                  U R                  U R                  UUS9n[        R                  " US5      $ )Nr   r1   )r   r+   rJ   r    r   rL   rM   rN   rO   r   r   )rn   r*   r   r   embedss        r   r   (RotaryEmbeddingCat._get_pos_embed_valuesf  sj    '!LL((**nn..((,,
 yy$$r   c                 $    U R                  5         gr~   rs   ru   s    r   r   .RotaryEmbeddingCat.init_non_persistent_buffersv  rw   r   c                     U R                   bd  XR                   :w  aT  U R                  c   eU R                  UU R                  R                  U R                  R                  S9U l        Xl         g g g r   )r*   r   r   r   r   r   s     r   r   $RotaryEmbeddingCat.update_feat_shapez  si    ??&:+H>>---!77~~,,nn** 8 DN
 )O ,I&r   r   c           	         Ubd  U R                   bW  [        UU R                   U R                  U R                  U R                  U R
                  S9n[        R                  " US5      $ U R                  b  U R                  $  S5       e)Nr   r1   zPget_embed() requires pre-computed pos embed or valid shape w/ pre-computed bands)	r   r   rL   rM   rN   rO   r   r   r   )rn   r   r   s      r   r   RotaryEmbeddingCat.get_embed  sy    !7+

..#22 ,,"00F 99VR((^^'>>!lll5r   shapesseq_lenc           	         U(       d  / $ U R                   c  [        S5      e[        S U 5       5      n[        S U 5       5      n[        X44U R                   U R                  U R
                  U R                  U R                  S9u  pV[        R                  " XV/SS9R                  X4S5      nUbz  [        R                  " [        U5      X'R                  S   5      R                  U5      n[        U5       H-  u  n	u  pX-  nUSU
2SU24   R!                  US5      XSU24'   M/     U$ U V
Vs/ s H"  u  pUSU
2SU24   R!                  X-  S5      PM$     nn
nU$ s  snn
f )a{  Generate ROPE embeddings for multiple grid shapes efficiently.

Computes embeddings for the maximum grid size once, then extracts
and flattens the relevant portions for each requested shape.

Args:
    shapes: List of (H, W) tuples representing different grid sizes

Returns:
    List of concatenated sin/cos embeddings for each shape,
    where each tensor has shape (H*W, dim)
NzMBatch embedding generation requires cached bands, not pre-computed embeddingsc              3   *   #    U  H	  u  pUv   M     g 7fr    .0hws      r   	<genexpr>6RotaryEmbeddingCat.get_batch_embeds.<locals>.<genexpr>       )&$!A&   c              3   *   #    U  H	  u  pUv   M     g 7fr   r  r  s      r   r  r    r	  r
  )r*   r   rL   rM   rN   rO   r1   r3   )r   RuntimeErrormaxr   rL   rM   rN   rO   r   r   r   zerosrD   r   type_as	enumerater   )rn   r   r   max_hmax_wr   r   rope_embed_2dflat_embedsir  r  src_lenflat_embeds_lists                 r   get_batch_embeds#RotaryEmbeddingCat.get_batch_embeds  sp   " I ::noo )&)))&)) 2~**nn..((,,
 		7"4"=BB5QST++c&k7<O<OPR<ST\\]deK&v.	6A%+8!RaR+@+H+HRT+UxxK( / U[\U[TQbqb"1"f 5 = =aeR HU[\##  ]s   1)Ec                 T    U R                  UR                  SS  5      n[        X5      $ r   r   r   r   rn   rY   r   s      r   r   RotaryEmbeddingCat.forward  %    NN17712;/	"100r   )r   r   r+   r*   rO   rN   rL   r   rJ   r   rM   r    r   r   r   r   )r   r   r   r   r   r   rU   r   r   r   r   rh   rm   rt   r   r   r   r   r   r   r   r   r   r   r  r   r   r   r   s   @r   r   r     sO    !&"!&.226!#!%+ +  +  	+ 
 +  +  !c++  %T#Y/+  +  +  + ZN4  CGemm %S	 % 	)DI 	)mxS	2 m( &*3$sCx)3$ c]3$ 
u||T%,,//	0	3$j1 1r   r   r2   head_dimdepth	num_headsrotatec          
         SU[         R                  " SU SXVS9U -  -  -  nUR                  S5      R                  S5      nU(       a+  [         R                  " XSXVS9S-  [         R                  -  nO[         R
                  " XSXVS9n[         R                  " U[         R                  " U5      -  U[         R                  " U[         R                  S-  -   5      -  /SS9n	[         R                  " U[         R                  " U5      -  U[         R                  " U[         R                  S-  -   5      -  /SS9n
[         R                  " X/SS9$ )	zVectorised 2D ROPE frequencies with random rotation for mixed mode ROPE.
Returns:
     Tensor (2, depth, num_heads, head_dim//2)
r   r   r/   r2   r   r   r1   r3   )
r   r$   r8   randr   r  r   r:   r9   r5   )r  r   r!  r    r"  r   r   maganglesfxfys              r   init_random_2d_freqsr)    s    a1V!Y\d!de
fC --

$
$Q
'C EaLqPSXS[S[[UqM 
C%))F++S599VehhQRl=R3S-STZ\	]B	C%))F++S599VehhQRl=R3S-STZ\	]B ;;xQ''r   r   c           	      j   US:X  a  [        U 5      n [        R                  " [        R                  " U S   U[        R                  S9[        R                  " U S   U[        R                  S9US9u  pEUR                  U5      R                  5       nUR                  U5      R                  5       nXg4$ )NrQ   r   r2   r   rS   )rF   r   rW   r$   r   r&   r6   )r   rO   r   r   x_posy_post_xt_ys           r   get_mixed_gridr/    s     e$>>U1XfEMMBU1XfEMMBLE
 ((5/
!
!
#C
((5/
!
!
#C8Or   freqsr-  r.  c                    U R                   nU R                  5       n UR                  S5      U S   R                  S5      -  nUR                  S5      U S   R                  S5      -  nXE-   n[        R                  " U5      R                  SS5      n[        R                  " U5      R                  SS5      n[        R                  " Xx/SS9n	U	R                  U5      $ )z&Compute mixed (learnable) frequencies.r1   r   r   r   r   r3   )	r   rU   r8   r   r9   r   r:   r   r&   )
r0  r-  r.  r   freqs_xfreqs_ycombinedr   r   rope_embedss
             r   get_mixed_freqsr6    s     KKEKKME}}R 58#5#5b#99G}}R 58#5#5b#99G Hii!33Ar:Gii!33Ar:G))W.B7K>>%  r   c                   `  ^  \ rS rSrSr     SS\S\S\S\S\\\      S\	4U 4S	 jjjr
SS jrSS jrS\\\      4S jrS\\\      4S jrSS jrSS\\\      S
\R"                  4S jjr SS\\\\4      S\\   S
\\R"                  \\R"                     4   4S jjrS rS rSrU =r$ )RotaryEmbeddingMixedi  aj  Rotary position embedding with depth-dependent learnable frequencies.

This implementation supports mixed (learnable) ROPE. In mixed mode,
each transformer block has its own set of learnable frequency parameters.

Based on 'Rotary Position Embedding for Vision: https://arxiv.org/abs/2403.13298)'
Compatible with original at https://github.com/naver-ai/rope-vit
r+   r   r!  r    r*   rO   c	           
        > [         TU ]  5         Xl        X l        X0l        X@l        XPl        X`l        X-  n	U	S-  S:X  d
   SU	 35       e[        U	UUUSUUS9n
[        R                  " U
5      U l        Ubh  SnU H  nX-  nM	     U R                  S[        R                  " XUS	9S
S9  U R                  S[        R                  " XUS	9S
S9  U R                  5         gS=U l        U l        g)aZ  Initialize rotary embeddings.

Args:
    dim: Embedding dimension (should be divisible by 4)
    depth: Number of transformer blocks
    num_heads: Number of attention heads
    temperature: Base for frequency computation
    feat_shape: Spatial dimensions [H, W] if known in advance
    grid_indexing: How to index grid positions ('xy' or 'ij')
r/   r   z%head_dim must be divisible by 4, got T)r    r"  r   r   Nr   r-  r2   Fre   r.  )rg   rh   r+   r   r!  r    r*   rO   r)  r   	Parameterr0  rk   r   rl   rt   r-  r.  )rn   r+   r   r!  r    r*   rO   r   r   r  r0  r   r<   ro   s                r   rh   RotaryEmbeddingMixed.__init__"  s   * 	
"&$*#!|q T$I("TT $#
 \\%(
!G    GRW(Xej k  GRW(Xej k "&&DHtxr   r"   c                     U R                   bT  U R                  U R                   5      u  pU R                  R                  U5        U R                  R                  U5        ggry   )r*   _get_grid_valuesr-  r{   r.  )rn   r-  r.  s      r   rt   "RotaryEmbeddingMixed._init_buffersY  sG    ??&,,T__=HCHHNN3HHNN3 'r   c                 $    U R                  5         grr   rs   ru   s    r   rm   %RotaryEmbeddingMixed.reset_parameters`  rw   r   c                 ^    [        UU R                  U R                  R                  S9u  p#X#4$ )NrO   r   )r/  rO   r0  r   rn   r*   r-  r.  s       r   r=  %RotaryEmbeddingMixed._get_grid_valuesd  s0    !,,::$$

 xr   c                    U R                   b  XR                   :w  a  U R                  c   eU R                  c   eU R                  U5      u  p#UR	                  U R                  R
                  U R                  R                  5      U l        UR	                  U R                  R
                  U R                  R                  5      U l        Xl         g g g r   )r*   r-  r.  r=  r&   r   r   rC  s       r   r   &RotaryEmbeddingMixed.update_feat_shapel  s    ??&:+H88'''88''',,Z8HCvvdhhootxx~~>DHvvdhhootxx~~>DH(O ,I&r   c                 $    U R                  5         gr~   rs   ru   s    r   r   0RotaryEmbeddingMixed.init_non_persistent_buffersu  rw   r   r   c                     Ub,  [        UU R                  U R                  R                  S9u  p#O9U R                  b%  U R
                  b  U R                  U R
                  p2O S5       e[        U R                  X#5      $ )zGenerate rotary embeddings for the given spatial shape.

Args:
    shape: Spatial dimensions [H, W]

Returns:
    Tensor of shape (depth, H*W, dim) containing concatenated sin/cos embeddings
rB  z8get_embed() requires pre-computed t_x/t_y or valid shape)r/  rO   r0  r   r-  r.  r6  )rn   r   r-  r.  s       r   r   RotaryEmbeddingMixed.get_embedy  sq     %"00zz((HC
 XX!dhh&:xxTTT5tzz344r   r   r   c           
         U(       d  / $ [        S U 5       5      n[        S U 5       5      n[        X4/U R                  U R                  R                  S9u  pV[        U R                  XV5      nUR                  u  ppUR                  XX4U5      nUb  [        U5      n[        R                  " XXXR                  R                  U R                  R                  S9n[        U5       HA  u  nu  nnUSS2SS2SU2SU24   R                  XUU-  U5      nUU-  nUXSS2SS2SU24'   MC     U$ / nU H<  u  nnUSS2SS2SU2SU24   R                  XUU-  U5      nUR                  U5        M>     U$ )a!  Generate ROPE embeddings for multiple grid shapes efficiently.

Computes embeddings for the maximum grid size once, then extracts
and flattens the relevant portions for each requested shape.

Args:
    shapes: List of (H, W) tuples representing different grid sizes
    seq_len: If provided, return padded tensor of this length. Otherwise return list.

Returns:
    If seq_len is provided: Padded tensor of shape (len(shapes), depth, num_heads, seq_len, dim)
    Otherwise: List of tensors with shape (depth, num_heads, H*W, dim) for each shape
c              3   *   #    U  H	  u  pUv   M     g 7fr   r  r  s      r   r  8RotaryEmbeddingMixed.get_batch_embeds.<locals>.<genexpr>  r	  r
  c              3   *   #    U  H	  u  pUv   M     g 7fr   r  r  s      r   r  rM    r	  r
  rB  Nr2   )r  r/  rO   r0  r   r6  r   r   rD   r   r  r   r  r   append)rn   r   r   r  r  r-  r.  	max_embedr   r!  _r+   max_embed_2dr   paddedr  r  r  embed_slice
actual_lenresultss                        r   r  %RotaryEmbeddingMixed.get_batch_embeds  s   $ I )&)))&)) "N,,::$$

 $DJJ9	 $-?? ! ~~ecJFA[[9s::K\K\dhdndndtdtuF&v.	6Aq*1a!RaR<8@@STWXSXZ]^U
/:!Q+,	 /
 M G1*1a!RaR<8@@STWXSXZ]^{+  Nr   c                 T    U R                  UR                  SS  5      n[        X5      $ r   r  r  s      r   r   RotaryEmbeddingMixed.forward  r  r   c                     S1$ )z/Exclude frequency parameters from weight decay.r0  r  ru   s    r   no_weight_decay$RotaryEmbeddingMixed.no_weight_decay  s
    yr   )	r   r+   r*   r0  rO   r!  r-  r.  r    )      $@NrQ   NNr   r   )r   r   r   r   r   r   rU   r   r   r   rh   rt   rm   r=  r   r   r   r   r   r   r   r  r   r[  r   r   r   s   @r   r8  r8    s    "&.2!%5'5' 5' 	5'
 5' !c+5' 5' 5'n 8DI+> )HT#Y,? )5xS	2 5ell 52 &*6sCx)6 c]6 
u||T%,,//	0	6p1
 r   r8  separatecpuheightwidthnormalize_coordsc                    [         R                  " SX[         R                  S9U-   n[         R                  " SX[         R                  S9U-   nUS:X  a  [        [	        X5      5      n	U	n
U	nOJUS:X  a  [        [        X5      5      n	U	n
U	nO+US:X  a  [        U 5      n
[        U5      nO[        SU 35      eXz-  nX-  nUR                  U5      nUR                  U5      nUS:X  a.  [         R                  " XSS9u  p[         R                  " X/S	S
9nO([         R                  " [         R                  " XxSS9S	S
9nUR                  SS5      nSU-  S-
  nU$ )zsMake coordinate grid matching offset and normalization of original.
Returns: coords with shape (HW, 2) in [-1, 1].
g      ?r2   r  minr^  zUnknown normalize_coords: rQ   rS   r1   r3   rI   r   r          @r   )r   r$   r   rU   r  rd  
ValueErrorr&   rW   r5   r6   )r`  ra  rb  rO   rN   r   r   coords_hcoords_wdenomh_denomw_denomgrid_wgrid_hcoordss                  r   make_coords_dinov3ro    sI    ||CemmL{ZH||CU]]KkYH 5 c&()	U	"c&()	Z	'-,56F5GHII !H!H{{5!H{{5!H TJf-26U^^HNTVW^^Aq!F6\CFMr   c                     ^  \ rS rSrSr             S$S\S\\   S\\   S\\   S\\\      S\	S	\S
\	S\
S\\   S\\   S\\   4U 4S jjjrS%S jrS%S jrS\R                  4S\R                   S\R"                  S\R$                  4S jjrS\R$                  S\R$                  4S jrS\R$                  S\\R$                  \R$                  4   4S jr S&S\\   S\
S\R$                  4S jjrS\\   4S jrS\\   4S jrS%S jrS'S\\\      S\R$                  4S  jjrS!\R$                  S\R$                  4S" jrS#rU =r$ )(RotaryEmbeddingDinoV3i  ak  RoPE for timm DinoV3 port, numerically matching original.

Math is aligned to original DinoV3 RopePositionEmbedding at https://github.com/facebookresearch/dinov3:
  - 0.5-centered coords normalized by H/W (or min/max), mapped to [-1,1]
  - training-time augmentations (shift/jitter/rescale)
  - periods schedule equals Rope's temperature (base) or min/max period
r+   r    
min_period
max_periodr*   rb  rN   rO   rotate_halfshift_coordsjitter_coordsrescale_coordsc           	      L  > [         TU ]  5         Xl        Xl        [	        U5      U l        X0l        X@l        X`l        Xl	        Xl
        Xl        [        U R                  U R                  U R                  4 Vs/ s H  oS LPM     sn5      U l        XPl        Xpl        Xl        US-  4nU R#                  S[$        R&                  " UXS9SS9  Ub7  US   US   -  nUUS-  4nU R#                  S	[$        R&                  " UXS9SS9  OS U l        U R+                  5         g s  snf )
Nr/   periodsr2   Fre   r   r   r   pos_embed_cached)rg   rh   r+   rt  rU   r    rr  rs  rb  ru  rv  rw  any
aug_activer*   rN   rO   rk   r   rl   rz  rm   )rn   r+   r    rr  rs  r*   rb  rN   rO   rt  ru  rv  rw  r   r   aperiods_shaper   r   ro   s                      r   rh   RotaryEmbeddingDinoV3.__init__  s/   " 	 & !-$$ !1(*,t7H7H$J\J\^b^q^q6rs6r}6rst %&* YM&(^kpq! mjm3G #'*I  !3U[[SY5gty z$(D! 	) ts   :D!r"   c                 $    U R                  5         grr   rs   ru   s    r   rm   &RotaryEmbeddingDinoV3.reset_parametersC  rw   r   c                     U R                   R                  U R                  5       5        U R                  bD  U R                  b6  U R                  U R                  SS9nU R                  R                  U5        ggg)rz   NTno_aug)ry  r{   _compute_periodsr*   rz  _create_embed)rn   
rope_embeds     r   rt   #RotaryEmbeddingDinoV3._init_buffersG  sh    40023??&4+@+@+L++DOOD+IJ!!''
3 ,M&r   r_  r   r   c                    U R                   S-  nU R                  b]  U R                  bP  [        R                  " SSUS[        R
                  S9nU R                  U R                  U R                  -  U-  -  nO^U R                  c  [        S5      eS[        R                  " US[        R
                  S9-  U R                   S-  -  nU R                  U-  nUR                  XS9$ )	z5Construct periods from either min/max or temperature.r/   r   r   r_  r2   z0Provide either min/max periods or `temperature`.re  r   )
r+   rr  rs  r   r   r   r    rf  r$   r&   )rn   r   r   r+   	exponentsry  s         r   r  &RotaryEmbeddingDinoV3._compute_periodsN  s    hh!m??&4??+Fq!SemmTIoo$//DOO*KPY)YZG' !STTell3uEMMRRVZV^V^bcVcdI&&)3G zzz55r   rn  c                 6   U R                   (       a  U R                  (       d  U$ UR                  nUR                  nU R                  bF  [        U R                  5      n[        R                  " SX#S9R                  U* U5      nXSSS24   -   nU R                  b{  [        U R                  5      nUS::  a  [        S5      e[        R                  " U5      n[        R                  " SX#S9R                  U* U5      R                  5       nXSSS24   -  nU R                  bt  [        U R                  5      n	U	S::  a  [        S5      e[        R                  " U	5      n
[        R                  " SX#S9R                  U
* U
5      R                  5       nX-  nU$ )z4Apply shift/jitter/rescale train time augmentations.Nr   r2   r   zAjitter_coords must be > 0 (interpreted as multiplicative factor).zBrescale_coords must be > 0 (interpreted as multiplicative factor).r   )trainingr|  r   r   ru  rU   r   rl   uniform_rv  rf  r   r   r'   rw  )rn   rn  r   r   shiftshift_hwjitter_factor
jitter_max	jitter_hwrescale_factorrescale_maxrescales               r   _apply_coord_augs'RotaryEmbeddingDinoV3._apply_coord_augs_  se   }}DOOM ($++,E{{1VAJJE6SXYHtQw//F )!$"4"45M! !dee-0JAfBKKZKYcdhhjIa00F *"4#6#67N" !eff((>2Kkk!F@II;,XcdhhjG%Fr   c                 &   U R                   S-  nU R                  R                  nU R                  R                  nU R                  R	                  5       U:X  d   eUSS2SS2S4   R                  X4S9nS[        R                  -  U-  U R                  SSSS24   -  nUR                  S5      nU R                  (       a  UR                  S5      nOUR                  SSS9n[        R                  " U5      n[        R                  " U5      nXg4$ )zEReturn sin/cos embeddings with either 'half' or 'interleaved' layout.r/   Nr2   r   r   r1   r3   )r+   ry  r   r   r   r&   r   r   r6   rt  tiler   r   r9   r:   )rn   rn  r+   r   r   r&  r9   r:   s           r   _get_pos_embed_from_coords0RotaryEmbeddingDinoV3._get_pos_embed_from_coords  s     hh!m$$""||!!#s*** 1d
#&&f&BTWWv%T4](CC"[[^F --aR-8Fiiiixr   r  c                     Uu  p4[        X4U R                  U R                  U R                  S9nU(       d  U R	                  U5      nU R                  U5      u  pg[        R                  " Xg/SS9nU$ )N)rb  rO   rN   r1   r3   )ro  rb  rO   rN   r  r  r   r   )	rn   r*   r  HWrn  r9   r:   r  s	            r   r  #RotaryEmbeddingDinoV3._create_embed  sr    
 #!22,,((	
 ++F3F226:YYzr2
r   c                 R    U R                  USS9nU R                  SUSS9  Xl        g )NTr  rz  Fre   )r  rk   r*   )rn   r*   r  s      r   _cache_embed"RotaryEmbeddingDinoV3._cache_embed  s2    ''
4'@
/N$r   c                 b    U R                   b"  XR                   :w  a  U R                  U5        g g g r   )r*   r  r   s     r   r   'RotaryEmbeddingDinoV3.update_feat_shape  s,    ??&:+Hj) ,I&r   c                 $    U R                  5         gr~   rs   ru   s    r   r   1RotaryEmbeddingDinoV3.init_non_persistent_buffers  rw   r   r   c                 @   Ub  U R                  U5      nU$ U R                  SL =(       d    U R                  =(       a    U R                  nU(       a1  U R                  c   S5       eU R                  U R                  5      nU$ U R                  c   eU R                  nU$ )zGenerate rope_embed matching DINOv3 RopePositionEmbedding numerics.

Returns: (HW, num_heads, 2 * head_dim) with last dim = [sin, cos] cat.
Nz&feature shape must be cached on create)r  rz  r  r|  r*   )rn   r   r  need_creates       r   r   RotaryEmbeddingDinoV3.get_embed  s    
 ++E2J  //47^DMM<]dooK2\4\\2!//@

  ,,888!22
r   rY   c                 f    U R                  UR                  SS 5      n[        XU R                  S9$ )z$Get and apply rotary embeddings to xr   N)r   )r   r   r   rt  r  s      r   r   RotaryEmbeddingDinoV3.forward  s/     NN17712;/	"1d6F6FGGr   )r|  r+   r*   rO   rN   rv  rs  rr  rb  rz  rw  rt  ru  r    )g      Y@NNNr^  rH   rI   TNNNNNr   Fr   )r   r   r   r   r   r   r   rU   r   r   r   rh   rm   rt   r   r   r   r   r   r  r  r   r  r  r  r   r   r   r   r   r   r   s   @r   rq  rq    s    ,1*.*..2$.!$!% $,0-1.25 5  "%5  !	5 
 !5  !c+5  "5  5  5  5  #5/5  $E?5  %UO5  5 n4 7<RWR_R_ 6u|| 6EKK 6didpdp 6"     D %V[VbVbHbBc 6 !S	  
	$%tCy %*DI *
xS	2 ell $H H%,, H Hr   rq  	rope_typec                    U S:X  a!  UR                  SS5        [        SSX-  0UD6$ U S:X  a!  UR                  SS5        [        SSX-  0UD6$ U S:X  a1  UR                  SS5        UR                  SS5        [        SXS	.UD6$ U S
:X  a3  UR                  SS5        UR                  SS5        [	        SSX-  0UD6$ [        SU  35      e)a  Factory function for creating rotary position embeddings.

Args:
    rope_type: Type of RoPE to create. Options:
        - 'base': Basic RotaryEmbedding
        - 'cat': RotaryEmbeddingCat (concatenated sin/cos)
        - 'mixed': RotaryEmbeddingMixed (learnable per-depth frequencies)
        - 'dinov3': RotaryEmbeddingDinoV3 (with coordinate transforms)
    dim: Total embedding dimension
    num_heads: Number of attention heads
    **kwargs: Additional arguments passed to the specific RoPE class

Returns:
    Rotary embedding module
basert  Nr+   r   mixedrL   rM   )r+   r!  dinov3zUnknown RoPE type: r  )popr   r   r8  rq  rf  )r  r+   r!  kwargss       r   create_rope_embedr    s    * F

=$'>3#3>v>>	e	

=$'!Ac&6A&AA	g	

;%

#T*#KKFKK	h	

;%

#T*$D)9DVDD.yk:;;r   )g      l@TN)r   r   Nr  )r]  T)r   i      )0r   r   typingr   r   r   r   r   r   _fxr	   r=   r
   trace_utilsr   r   rU   r   r   r   r   r(   r   r   rA   rF   r   r`   Modulerb   r   r   r   r   r   r   r   r   r   r)  r'  wrapr/  r6  r8  ro  rq  r  r  r   r   <module>r     s|    / /   *    !)-	


 
 &	
 $)-	  &	
 \\ ###()-"]]&#I&#&# &# 	&#
 !&# &&# {{&# \\&#R,tCy ,T#Y , )-#"".2!)-"]]RIR%R R 	R
 R R R R !c+R R R &R {{R 
%,,Rj6299 6rJ( ( ( 	.<<.. . 	.
 \\.. 	;;; ; 	;
 
%,,;0 .<<.\\. . \\	.. %*	".<<".<<". ll". "	".
 \\".N )-#".2!)-"]]5I5%5 5 	5
 5 5 5 !c+5 5 5 &5 {{5pJ4bii J4Zx1 x1~ "( mm((( ( 	(
 ( \\(D  ")-"]]	Cy & {{	
 5<<%&  $!||!\\! \\! \\	!$u299 up  !+!$"]]--- - 	-
 - - {{- \\-  -`MHBII MHb %<%<%< %<
 YY%<r   