
    RЦi/                         S r SSKJrJrJr  SSKrSSKJr  SSKJr  SSK	J
r
  SSKJr   " S	 S
\R                  5      r " S S\R                  5      r " S S\R                  5      r " S S\R                  5      rg)a^  Coordinate Attention and Variants

Coordinate Attention decomposes channel attention into two 1D feature encoding processes
to capture long-range dependencies with precise positional information. This module includes
the original implementation along with simplified and other variants.

Papers / References:
- Coordinate Attention: `Coordinate Attention for Efficient Mobile Network Design` - https://arxiv.org/abs/2103.02907
- Efficient Local Attention: `Rethinking Local Perception in Lightweight Vision Transformer` - https://arxiv.org/abs/2403.01123

Hacked together by / Copyright 2025 Ross Wightman
    )OptionalTypeUnionN)nn   )create_act_layer)make_divisible)
GroupNorm1c                      ^  \ rS rSrSSSSS\R
                  \R                  SSSS4S\S	\S
\	\   S\S\S\
S\\R                     S\	\\R                        S\\\\R                     4   S\
4U 4S jjjrS rSrU =r$ )	CoordAttn   g      ?N   UUUUUU?Fsigmoidchannelsrd_ratiord_channels
rd_divisor	se_factorbias	act_layer
norm_layer
gate_layerhas_skipc                   > XS.n[         TU ]  5         Xl        U(       d  [        X-  U-  USS9n[        R
                  " X4SSSUS.UD6U l        Ub	  U" U40 UD6O[        R                  " 5       U l        U" 5       U l	        [        R
                  " X14SSSUS.UD6U l
        [        R
                  " X14SSSUS.UD6U l        [        U	5      U l        g)a5  Coordinate Attention module for spatial feature recalibration.

Introduced in "Coordinate Attention for Efficient Mobile Network Design" (CVPR 2021).
Decomposes channel attention into two 1D feature encoding processes along the height and
width axes to capture long-range dependencies with precise positional information.

Args:
    channels: Number of input channels.
    rd_ratio: Reduction ratio for bottleneck channel calculation.
    rd_channels: Explicit number of bottleneck channels, overrides rd_ratio if set.
    rd_divisor: Divisor for making bottleneck channels divisible.
    se_factor: Applied to rd_ratio for final channel count (keeps params similar to SE).
    bias: Whether to use bias in convolution layers.
    act_layer: Activation module class for bottleneck.
    norm_layer: Normalization module class, None for no normalization.
    gate_layer: Gate activation, either 'sigmoid', 'hardsigmoid', or a module class.
    has_skip: Whether to add residual skip connection to output.
    device: Device to place tensors on.
    dtype: Data type for tensors.
devicedtype        round_limitr   r   )kernel_sizestridepaddingr   N)super__init__r   r	   r   Conv2dconv1Identitybn1actconv_hconv_wr   gate)selfr   r   r   r   r   r   r   r   r   r   r   r   dd	__class__s                 U/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/layers/coord_attn.pyr&   CoordAttn.__init__   s    H / ()<y)H*bdeKYYxj!AWX_cjgij
4>4J:k0R0PRP[P[P];iik1QXY`dkhjkiik1QXY`dkhjk$Z0	    c                 4   UnUR                  5       u  p4pVUR                  SSS9nUR                  SSS9nUR                  SS5      n[        R                  " Xx/SS9n	U R                  U	5      n	U R                  U	5      n	U R                  U	5      n	[        R                  " XU/SS9u  pxUR                  SS5      nU R                  U R                  U5      5      n
U R                  U R                  U5      5      nX+-  U
-  nU R                  (       a  X-   nU$ )N   T)keepdim   dim)sizemean	transposetorchcatr(   r*   r+   splitr.   r,   r-   r   )r/   xidentityNCHWx_hx_wya_ha_wouts                r2   forwardCoordAttn.forwardJ   s    VVX
a ffQf%ffQf%mmB#IIsja(JJqMHHQKHHQK;;qa&a0mmB#iiC()iiC()ns"==.C
r4   )r+   r*   r(   r,   r-   r.   r   )__name__
__module____qualname____firstlineno__r   	HardswishBatchNorm2dintfloatr   boolr   Moduler   strr&   rO   __static_attributes____classcell__r1   s   @r2   r   r      s     &)-")+46NN6?"0101 01 "#	01
 01 01 01 BII01 !bii101 c4		?2301 01 01d r4   r   c                      ^  \ rS rSrSrSSSSS\R                  SS	SS4
S
\S\S\	\   S\S\S\
S\\R                     S\\\\R                     4   S\
4U 4S jjjrS rSrU =r$ )SimpleCoordAttne   a  Simplified Coordinate Attention variant.

Uses
 * linear layers instead of convolutions
 * no norm
 * additive pre-gating re-combination
for reduced complexity while maintaining the core coordinate attention mechanism
of separate height and width attention.
g      ?Nr   r   Tr   Fr   r   r   r   r   r   r   r   r   c                 P  > XS.n[         TU ]  5         Xl        U(       d  [        X-  U-  USS9n[        R
                  " X4SU0UD6U l        U" 5       U l        [        R
                  " X14SU0UD6U l        [        R
                  " X14SU0UD6U l	        [        U5      U l        g)a  
Args:
    channels: Number of input channels.
    rd_ratio: Reduction ratio for bottleneck channel calculation.
    rd_channels: Explicit number of bottleneck channels, overrides rd_ratio if set.
    rd_divisor: Divisor for making bottleneck channels divisible.
    se_factor: Applied to rd_ratio for final channel count (keeps param similar to SE)
    bias: Whether to use bias in linear layers.
    act_layer: Activation module class for bottleneck.
    gate_layer: Gate activation, either 'sigmoid', 'hardsigmoid', or a module class.
    has_skip: Whether to add residual skip connection to output.
    device: Device to place tensors on.
    dtype: Data type for tensors.
r   r   r    r   N)r%   r&   r   r	   r   Linearfc1r+   fc_hfc_wr   r.   )r/   r   r   r   r   r   r   r   r   r   r   r   r0   r1   s                r2   r&   SimpleCoordAttn.__init__p   s    8 / ()<y)H*bdeK99XDDD;IIkE$E"E	IIkE$E"E	$Z0	r4   c                    UnUR                  SS9nUR                  SS9nU R                  U R                  UR                  SS5      5      5      nU R                  U R                  UR                  SS5      5      5      nU R	                  U5      R                  SS5      R                  S5      nU R                  U5      R                  SS5      R                  S5      nX R                  XV-   5      -  nU R                  (       a  Xr-   nU$ )Nr6   r;   r8   r   r9   r:   )	r>   r+   rd   r?   re   	unsqueezerf   r.   r   r/   rC   rD   rI   rJ   rL   rM   rN   s           r2   rO   SimpleCoordAttn.forward   s     fffmfffm hhtxxa 345hhtxxa 345 iin&&q!,66r:iin&&q!,66r:39--==.C
r4   )r+   rd   re   rf   r.   r   )rQ   rR   rS   rT   __doc__r   SiLUrW   rX   r   rY   r   rZ   r   r[   r&   rO   r\   r]   r^   s   @r2   r`   r`   e   s     #)-$)+6?"(1(1 (1 "#	(1
 (1 (1 (1 BII(1 c4		?23(1 (1 (1T r4   r`   c                      ^  \ rS rSrSrSS\R                  S\SSS4S\S\S	\	S
\
\R                     S\\\
\R                     4   S\\
\R                        S\	4U 4S jjjrS rSrU =r$ )EfficientLocalAttn   zEfficient Local Attention.

Lightweight alternative to Coordinate Attention that preserves spatial
information without channel reduction. Uses 1D depthwise convolutions
and GroupNorm for better generalization.

Paper: https://arxiv.org/abs/2403.01123
   Fr   Nr   r"   r   r   r   r   r   c
           	        > XS.n
[         TU ]  5         Xpl        [        R                  " X4US4SUS-  S4UUS.U
D6U l        [        R                  " X4SU4SSUS-  4UUS.U
D6U l        Ub  U" U40 U
D6U l        U" U40 U
D6U l        O4[        R                  " 5       U l        [        R                  " 5       U l        U" 5       U l
        [        U5      U l        g)a  
Args:
    channels: Number of input channels.
    kernel_size: Kernel size for 1D depthwise convolutions.
    bias: Whether to use bias in convolution layers.
    act_layer: Activation module class applied after normalization.
    gate_layer: Gate activation, either 'sigmoid', 'hardsigmoid', or a module class.
    norm_layer: Normalization module class, None for no normalization.
    has_skip: Whether to add residual skip connection to output.
    device: Device to place tensors on.
    dtype: Data type for tensors.
r   r   r8   r   r"   r#   r$   groupsr   N)r%   r&   r   r   r'   r,   r-   norm_hnorm_wr)   r+   r   r.   )r/   r   r"   r   r   r   r   r   r   r   r0   r1   s              r2   r&   EfficientLocalAttn.__init__   s    0 / ii
$a( A%q)
 
 ii
K(q()
 
 !$X44DK$X44DK++-DK++-DK;$Z0	r4   c                    UnUR                  SSS9nUR                  SSS9nU R                  U R                  U R                  U5      5      5      nU R                  U R	                  U R                  U5      5      5      nU R                  U5      nU R                  U5      nX%-  U-  nU R                  (       a  Xr-   nU$ Nr6   T)r<   r7   r8   )r>   r+   ru   r,   rv   r-   r.   r   rj   s           r2   rO   EfficientLocalAttn.forward   s     ffDf)ffDf) hht{{4;;s#345hht{{4;;s#345 iiniinns"==.C
r4   )r+   r,   r-   r.   r   ru   rv   )rQ   rR   rS   rT   rl   r   rm   r
   rW   rY   r   rZ   r   r[   r   r&   rO   r\   r]   r^   s   @r2   ro   ro      s      !)+6?4>"5151 51 	51
 BII51 c4		?2351 !bii151 51 51n r4   ro   c                      ^  \ rS rSrSr       SS\S\S\S\S\\\	\
R                     4   S\4U 4S	 jjjrS
 rSrU =r$ )	StripAttni  znMinimal Strip Attention.

Lightweight spatial attention using strip pooling with optional learned refinement.
r   use_convr"   r   r   r   c	           	      ~  > XxS.n
[         TU ]  5         X`l        X l        U(       aS  [        R
                  " X4US4SUS-  S4UUS.U
D6U l        [        R
                  " X4SU4SSUS-  4UUS.U
D6U l        O4[        R                  " 5       U l        [        R                  " 5       U l        [        U5      U l
        g)a  
Args:
    channels: Number of input channels.
    use_conv: Whether to apply depthwise convolutions for learned spatial refinement.
    kernel_size: Kernel size for 1D depthwise convolutions when use_conv is True.
    bias: Whether to use bias in convolution layers.
    gate_layer: Gate activation, either 'sigmoid', 'hardsigmoid', or a module class.
    has_skip: Whether to add residual skip connection to output.
    device: Device to place tensors on.
    dtype: Data type for tensors.
r   r   r8   r   rs   N)r%   r&   r   r}   r   r'   r,   r-   r)   r   r.   )r/   r   r}   r"   r   r   r   r   r   _r0   r1   s              r2   r&   StripAttn.__init__  s    . /  ))(!,$)1- DK )),K1,- DK ++-DK++-DK$Z0	r4   c                     UnUR                  SSS9nUR                  SSS9nU R                  U5      nU R                  U5      nU R                  X4-   5      nX%-  nU R                  (       a  Xb-   nU$ ry   )r>   r,   r-   r.   r   )r/   rC   rD   rI   rJ   a_hwrN   s          r2   rO   StripAttn.forwardB  sw     ffDf)ffDf) kk#kk# yy#o==.C
r4   )r,   r-   r.   r   r}   )Tr6   Fr   FNN)rQ   rR   rS   rT   rl   rW   rY   r   r[   r   r   rZ   r&   rO   r\   r]   r^   s   @r2   r|   r|     s     " 6?"3131 31 	31
 31 c4		?2331 31 31j r4   r|   )rl   typingr   r   r   r@   r   
create_actr   helpersr	   normr
   rZ   r   r`   ro   r|    r4   r2   <module>r      sl    ) (   ( # K		 K\Hbii HVT TnM		 Mr4   