
    RЦi/)                         S r SSKJrJrJr  SSKrSSKJr  SSKJs  J	r
  SSKJr   " S S\R                  5      r " S S	\R                  5      r " S
 S\R                  5      r " S S\R                  5      rg)aG  Non-Local Attention Pooling Layers

A collection of global pooling layers that go beyond simple avg/max pooling.

LSEPool - LogSumExp pooling, a smooth approximation between avg and max pooling
SimPool - Attention-based pooling from 'Keep It SimPool' (ICCV 2023)

Based on implementations from:
* LSE Pooling: custom implementation by Bill Psomas
* SimPool: https://arxiv.org/abs/2309.06891 - 'Keep It SimPool: Who Said Supervised Transformers
    Suffer from Attention Deficit?' by Bill Psomas et al.

Hacked together by / Copyright 2024 Ross Wightman, original code by Bill Psomas
    )OptionalTypeUnionN   )use_fused_attnc                      ^  \ rS rSrSr     SS\S\S\4U 4S jjjrS\R                  S\R                  4S	 jr
S
rU =r$ )	LsePlus2d   am  LogSumExp (LSE) Pooling for 2D inputs.

A smooth approximation to max pooling that provides a learnable interpolation between
average and max pooling. When r is large, LSE approaches max pooling; when r is small,
it approaches average pooling.

Implements: (1/r) * log((1/n) * sum(exp(r * (x - x_max)))) + x_max

The x_max subtraction provides numerical stability.
rr_learnableflattenc           	         > [         TU ]  5         U(       a/  [        R                  " [        R
                  " XUS95      U l        O%U R                  S[        R
                  " XUS95        X0l        g)z
Args:
    r: Initial value of the pooling parameter. Higher = closer to max pooling.
    r_learnable: If True, r is a learnable parameter.
    flatten: If True, flatten spatial dims in output.
devicedtyper   N)	super__init__nn	Parametertorchtensorr   register_bufferr   )selfr   r   r   r   r   	__class__s         U/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/layers/other_pool.pyr   LsePlus2d.__init__$   sN     	\\%,,qu"MNDF  ell15&QR    xreturnc                 4   [         R                  " US5      n[        R                  " U R                  X-
  -  5      nUR                  SSS9nUSU R                  -  [        R                  " U5      -  -   nU R                  (       a  UR                  S5      nU$ )Nr   )      Tdimkeepdim      ?)Fadaptive_max_pool2dr   expr   meanlogr   r   r   x_maxexp_xsum_expouts         r   forwardLsePlus2d.forward9   sx    %%a+		$&&AI./***6sTVV|uyy'999<<++a.C
r   )r   r   )      $@TTNN__name__
__module____qualname____firstlineno____doc__floatboolr   r   Tensorr1   __static_attributes____classcell__r   s   @r   r	   r	      s_    	  $   	 * %,,  r   r	   c                   ~   ^  \ rS rSrSr    S
S\S\4U 4S jjjrS\R                  S\R                  4S jr
S	rU =r$ )	LsePlus1dC   a   LogSumExp (LSE) Pooling for sequence (NLC) inputs.

A smooth approximation to max pooling that provides a learnable interpolation between
average and max pooling. When r is large, LSE approaches max pooling; when r is small,
it approaches average pooling.
r   r   c           	         > [         TU ]  5         U(       a/  [        R                  " [        R
                  " XUS95      U l        gU R                  S[        R
                  " XUS95        g)z
Args:
    r: Initial value of the pooling parameter. Higher = closer to max pooling.
    r_learnable: If True, r is a learnable parameter.
r   r   N)r   r   r   r   r   r   r   r   )r   r   r   r   r   r   s        r   r   LsePlus1d.__init__K   sI     	\\%,,qu"MNDF  ell15&QRr   r   r   c                    UR                  SSS9R                  n[        R                  " U R                  X-
  -  5      nUR                  SSS9nUSU R                  -  [        R                  " U5      -  -   nUR                  S5      $ )Nr   Tr#   r&   )maxvaluesr   r)   r   r*   r+   squeezer,   s         r   r1   LsePlus1d.forward]   ss    !T*11		$&&AI./**D*1sTVV|uyy'999{{1~r   )r   )r3   TNNr4   r?   s   @r   rA   rA   C   sV      $SS S S$ %,,  r   rA   c                      ^  \ rS rSr% Sr\R                  R                  \   \	S'          SS\
S\
S\S\S\\   S	\\\R                        4U 4S
 jjjrS\R"                  S\R"                  4S jrSrU =r$ )	SimPool2df   aN  SimPool: Simple Attention-Based Pooling for 2D (NCHW) inputs.

From 'Keep It SimPool: Who Said Supervised Transformers Suffer from Attention Deficit?'
https://arxiv.org/abs/2309.06891

Uses GAP as query initialization and applies cross-attention between the GAP query
and spatial features to produce a weighted pooled representation.

fused_attnr$   	num_headsqkv_biasqk_normgamma
norm_layerc	                 V  > [         T
U ]  5         XxS.n	X-  S:X  d   S5       eX l        X-  U l        U R                  S-  U l        XPl        [        5       U l        U=(       d    [        R                  nU" U40 U	D6U l
        [        R                  " X4SU0U	D6U l        [        R                  " X4SU0U	D6U l        U(       a1  U" U R                  40 U	D6U l        U" U R                  40 U	D6U l        g[        R                   " 5       U l        [        R                   " 5       U l        g)a  
Args:
    dim: Input feature dimension (number of channels).
    num_heads: Number of attention heads.
    qkv_bias: If True, add bias to query and key projections.
    qk_norm: If True, apply normalization to queries and keys.
    gamma: If provided, apply power normalization to values with this exponent.
    norm_layer: Normalization layer for patches and optionally qk_norm.
    flatten: If True, flatten output to (B, C).
r   r   "dim must be divisible by num_heads      biasNr   r   rN   head_dimscalerQ   r   rM   r   	LayerNormnormLinearqkq_normk_normIdentityr   r$   rN   rO   rP   rQ   rR   r   r   ddr   s             r   r   SimPool2d.__init__q   s    * 	/!#I%II#"(]]d*

(*/2<<
s)b)	39(9b939(9b9$T]]9b9DK$T]]9b9DK++-DK++-DKr   r   r   c                    UR                   u  p#pEXE-  nUR                  S5      R                  SS5      nUR                  SSS9nU R	                  U5      nU R                  U5      R                  USU R                  U R                  5      R                  SS5      nU R                  U5      R                  X&U R                  U R                  5      R                  SS5      n	UR                  X&U R                  U R                  5      R                  SS5      n
U R                  U5      U R                  U	5      pU R                  b  U
R                  SSS9nX-
  S-   nU R                  (       a1  [        R                   " XyUR#                  U R                  5      5      nOMXpR$                  -  U	R                  SS5      -  nUR'                  SS9nXR#                  U R                  5      -  nUR#                  S	U R                  -  5      nO]U R                  (       a  [        R                   " XyU
5      nO4XpR$                  -  U	R                  SS5      -  nUR'                  SS9nX-  nUR                  SS5      R                  X#5      nU$ )
Nr!   r   Tr#   ư>r$   r&   )shaper   	transposer*   r[   r]   reshaperN   rX   r^   r_   r`   rQ   aminrM   r'   scaled_dot_product_attentionpowrY   softmax)r   r   BCHWNr]   x_normr^   vv_min	v_shiftedattn_outattnr0   s                   r   r1   SimPool2d.forward   s   WW
aE IIaL""1a( FFq$F' 1 FF1IaDNNDMMBLLQPQRFF6N""1GQQRSUVWNN1?II!QO{{1~t{{1~1::!FFr4F0E	D(I99!	djj@YZJJ!++b"*==|||+--

";;,,sTZZ/0C44Q1=JJ!++b"*==|||+h mmAq!))!/
r   
rM   rQ   rX   r^   r`   r[   rN   r]   r_   rY   r   FFNNNNr5   r6   r7   r8   r9   r   jitFinalr;   __annotations__intr   r:   r   r   Moduler   r<   r1   r=   r>   r?   s   @r   rK   rK   f   s     		%%
 "!%)48'('( '( 	'(
 '( E?'( !bii1'( '(R) )%,, ) )r   rK   c                      ^  \ rS rSr% Sr\R                  R                  \   \	S'          SS\
S\
S\S\S\\   S	\\\R                        4U 4S
 jjjrS\R"                  S\R"                  4S jrSrU =r$ )	SimPool1d   aR  SimPool: Simple Attention-Based Pooling for sequence (NLC) inputs.

From 'Keep It SimPool: Who Said Supervised Transformers Suffer from Attention Deficit?'
https://arxiv.org/abs/2309.06891

Uses GAP as query initialization and applies cross-attention between the GAP query
and sequence tokens to produce a weighted pooled representation.
rM   r$   rN   rO   rP   rQ   rR   c	                 V  > [         T
U ]  5         XxS.n	X-  S:X  d   S5       eX l        X-  U l        U R                  S-  U l        XPl        [        5       U l        U=(       d    [        R                  nU" U40 U	D6U l
        [        R                  " X4SU0U	D6U l        [        R                  " X4SU0U	D6U l        U(       a1  U" U R                  40 U	D6U l        U" U R                  40 U	D6U l        g[        R                   " 5       U l        [        R                   " 5       U l        g)ag  
Args:
    dim: Input feature dimension.
    num_heads: Number of attention heads.
    qkv_bias: If True, add bias to query and key projections.
    qk_norm: If True, apply normalization to queries and keys.
    gamma: If provided, apply power normalization to values with this exponent.
    norm_layer: Normalization layer for tokens and optionally qk_norm.
r   r   rT   rU   rV   NrW   rb   s             r   r   SimPool1d.__init__   s    ( 	/!#I%II#"(]]d*

(*/2<<
s)b)	39(9b939(9b9$T]]9b9DK$T]]9b9DK++-DK++-DKr   r   r   c                    UR                   u  p#nUR                  SSS9nU R                  U5      nU R                  U5      R	                  USU R
                  U R                  5      R                  SS5      nU R                  U5      R	                  X#U R
                  U R                  5      R                  SS5      nUR	                  X#U R
                  U R                  5      R                  SS5      nU R                  U5      U R                  U5      puU R                  b  UR                  SSS9n	X-
  S-   n
U R                  (       a1  [        R                  " XWU
R!                  U R                  5      5      nOMXPR"                  -  UR                  SS5      -  nUR%                  SS9nXR!                  U R                  5      -  nUR!                  S	U R                  -  5      nO]U R                  (       a  [        R                  " XWU5      nO4XPR"                  -  UR                  SS5      -  nUR%                  SS9nX-  nUR                  SS5      R	                  X$5      nU$ )
Nr   Tr#   r!   rf   rg   rh   ri   r&   )rj   r*   r[   r]   rl   rN   rX   rk   r^   r_   r`   rQ   rm   rM   r'   rn   ro   rY   rp   )r   r   rq   ru   rr   r]   rv   r^   rw   rx   ry   rz   r{   r0   s                 r   r1   SimPool1d.forward   s   ''a FFq$F' 1 FF1IaDNNDMMBLLQPQRFF6N""1GQQRSUVWNN1?II!QO{{1~t{{1~1::!FFr4F0E	D(I99!	djj@YZJJ!++b"*==|||+--

";;,,sTZZ/0C44Q1=JJ!++b"*==|||+h mmAq!))!/
r   r}   r~   r   r?   s   @r   r   r      s     		%%
 "!%)48&(&( &( 	&(
 &( E?&( !bii1&( &(P% %%,, % %r   r   )r9   typingr   r   r   r   torch.nnr   torch.nn.functional
functionalr'   configr   r   r	   rA   rK   r    r   r   <module>r      sh    ) (     "(		 (V 		  F]		 ]@X		 Xr   