
    RЦi                         S r SSKJrJr  SSKrSSKJr  SSKJs  Jr  SSK	J
r
  SSKJrJr  SSKJr  SS	 jr " S
 S\R"                  5      rg)am  Lambda Layer

Paper: `LambdaNetworks: Modeling Long-Range Interactions Without Attention`
    - https://arxiv.org/abs/2102.08602

@misc{2102.08602,
Author = {Irwan Bello},
Title = {LambdaNetworks: Modeling Long-Range Interactions Without Attention},
Year = {2021},
}

Status:
This impl is a WIP. Code snippets in the paper were used as reference but
good chance some details are missing/wrong.

I've only implemented local lambda conv based pos embeddings.

For a PyTorch impl that includes other embedding options checkout
https://github.com/lucidrains/lambda-networks

Hacked together by / Copyright 2021 Ross Wightman
    )OptionalTupleN)nn   )ndgrid)	to_2tuplemake_divisible)trunc_normal_c                    [        U 5      n [        R                  " [        [        R                  " U S   U[        R
                  S9[        R                  " U S   U[        R
                  S95      5      R                  S5      nUS S 2S S S 24   US S 2S S 2S 4   -
  nUS==   U S   S-
  -  ss'   US==   U S   S-
  -  ss'   U$ )Nr   devicedtyper   )r   torchstackr   arangelongflatten)sizer   posrel_poss       W/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/layers/lambda_layer.pyrel_pos_indicesr   "   s    T?D
++fT!WV5::>T!WV5::>  wqz  !T1*oAq$J/GAJ$q'A+JAJ$q'A+JN    c                      ^  \ rS rSrSr          SS\S\\   S\\\\4      S\S\S\S	\S
\S\	4U 4S jjjr
SS jrSS jrS rSS jrSrU =r$ )LambdaLayer.   a  Lambda Layer

Paper: `LambdaNetworks: Modeling Long-Range Interactions Without Attention`
    - https://arxiv.org/abs/2102.08602

NOTE: intra-depth parameter 'u' is fixed at 1. It did not appear worth the complexity to add.

The internal dimensions of the lambda module are controlled via the interaction of several arguments.
  * the output dimension of the module is specified by dim_out, which falls back to input dim if not set
  * the value (v) dimension is set to dim_out // num_heads, the v projection determines the output dim
  * the query (q) and key (k) dimension are determined by
    * dim_head = (dim_out * attn_ratio // num_heads) if dim_head is None
    * q = num_heads * dim_head, k = dim_head
  * as seen above, attn_ratio determines the ratio of q and k relative to the output if dim_head not set

Args:
    dim: input dimension to the module
    dim_out: output dimension of the module, same as dim if not set
    feat_size: size of input feature_map for relative pos variant H, W
    stride: output stride of the module, avg pool used if stride == 2
    num_heads: parallel attention heads.
    dim_head: dimension of query and key heads, calculated from dim_out * attn_ratio // num_heads if not set
    r: local lambda convolution radius. Use lambda conv if set, else relative pos if not. (default: 9)
    qk_ratio: ratio of q and k dimensions to output dimension when dim_head not set. (default: 1.0)
    qkv_bias: add bias to q, k, and v projections
dimdim_out	feat_sizestride	num_headsdim_headrqk_ratioqkv_biasc           	        > XS.n[         TU ]  5         U=(       d    UnX%-  S:X  d   S5       eU=(       d    [        X(-  SS9U-  U l        XPl        X%-  U l        [        R                  " UXPR                  -  U R                  -   U R
                  -   4SU	S.UD6U l        [        R                  " XPR                  -  40 UD6U l
        [        R                  " U R
                  40 UD6U l        UbK  [        R                  " SU R                  XwS44SUS	-  US	-  S40UD6U l        S U l        S U l        S U l        OUc   e[#        U5      nX0l        U Vs/ s H  nS	U-  S-
  PM     nnUS   US   -  nS U l        [        R$                  " [&        R(                  " US   US   U R                  40 UD65      U l        U R+                  S
[&        R(                  " S	X4U
[&        R,                  S9SS9  US	:X  a  [        R.                  " S	S	5      O[        R0                  " 5       U l        U R5                  5         g s  snf )Nr   r   z should be divided by num_heads   )divisorr   )kernel_sizebiaspadding   r   F)
persistent)super__init__r	   dim_qkr!   dim_vr   Conv2dqkvBatchNorm2dnorm_qnorm_vConv3dconv_lambdapos_embr   r   r   	Parameterr   emptyregister_bufferr   	AvgPool2dIdentitypoolreset_parameters)selfr   r   r   r    r!   r"   r#   r$   r%   r   r   ddsrel_sizeM	__class__s                   r   r/   LambdaLayer.__init__I   s    /.S"a'J)JJ'\.1CQ"OS\"\")
99#dkk1DJJ>
 	

 
 nnY%<CCnnTZZ626=!yyDKK!fQRSVUVZ[U[]^L_fcefDDL#'D !DN (((!),I&N+459aA	9H5!y|+A#D<<HQK!dkk(`]_(`aDL  !QIfEJJG  !  +1A+BLLA&2;;=	 	 6s   H;c                 Z   [        U R                  R                  U R                  R                  R                  S   S-  S9  U R                  b+  [        U R                  R                  U R
                  S-  S9  U R                  b  [        U R                  SS9  U R                  5         g)z"Initialize parameters and buffers.r   g      )stdNg{Gz?)r
   r3   weightshaper8   r0   r9   _init_buffersrA   s    r   r@   LambdaLayer.reset_parameters   s{    dhhoo488??+@+@+Ct+KL'$**11t{{d7JK<<#$,,C0r   c                     U R                   bB  U R                   R                  [        U R                  U R                   R                  S95        gg)z.Compute and fill non-persistent buffer values.N)r   )r   copy_r   r   rM   s    r   rL   LambdaLayer._init_buffers   sA    +  &&t7K7K7R7RS ,r   c           	         UR                   u  p#pEXE-  nU R                  U5      n[        R                  " UU R                  U R
                  -  U R
                  U R                  /SS9u  pn
U R                  U5      R                  X R                  U R
                  U5      R                  SS5      nU R                  U
5      R                  X R                  U5      R                  SS5      n
[        R                  " U	R                  X R
                  U5      SS9n	X-  nXR                  S5      -  nU R                  ci  U R                  U
R                  USXEU R                  5      5      nUR                  USU R
                  XE-  U R                  5      R                  SS5      nOpU R                  U R                   S   U R                   S   4   R#                  USSS5      nUR                  SS5      U
R                  S5      -  R                  S5      nUR                  S5      U-  R%                  S5      nX-   R                  SS5      R                  X#XE5      nU R'                  U5      nU$ )Nr   )r   r,      r   )rK   r3   r   splitr!   r0   r1   r5   reshape	transposer6   Fsoftmax	unsqueezer9   r8   r   expandsqueezer?   )rA   xBCHWrE   r3   qkvcontent_lamcontent_outposition_lamr9   position_outouts                    r   forwardLambdaLayer.forward   s   WW
aEhhqk++cNNT[[($++tzz$CHIKaKKN""1nndkk1EOOPRTVWKKN""1jj!4>>r2FIIaii;;2;e//22<<++AIIaA$**,MNL'//1dkk15$**U__`acdeL ll4#7#7#:D<P<PQR<S#ST[[\]_acegijG#--b"5AFQQRSTLB,6??C)44R<DDQ1Piin
r   c                 $    U R                  5         g)z"Initialize non-persistent buffers.N)rL   rM   s    r   init_non_persistent_buffers'LambdaLayer.init_non_persistent_buffers   s    r   )r8   r0   r1   r   r5   r6   r!   r?   r9   r3   r   )
NNr         	   g      ?FNN)returnN)__name__
__module____qualname____firstlineno____doc__intr   r   floatboolr/   r@   rL   rk   rn   __static_attributes____classcell__)rF   s   @r   r   r   .   s    : &*37!"8 8  c]8   c3h0	8 
 8  8  8  8  8  8  8 t4 r   r   )N)rx   typingr   r   r   r   torch.nn.functional
functionalrY   gridr   helpersr   r	   weight_initr
   r   Moduler    r   r   <module>r      s;   , #      . &	A")) Ar   