
    Αi                        S SK Jr  S SKJr  S SKJs  Jr  \(       a  S SKJ	r	       S                 SS jjr
g)    )annotations)TYPE_CHECKINGN)Tensorc                    U(       a  Ub  [        S5      eU R                  SS5      UR                  SS5      UR                  SS5      p!n [        R                  " U UUUUUSSUUS5      nUR                  SS5      $ )a  
The equation is:

.. math::

    result=softmax(\frac{ Q * K^T }{\sqrt{d}}) * V

where : ``Q``, ``K``, and ``V`` represent the three input parameters of the attention module.
The dimensions of the three parameters are the same.
``d`` represents the size of the last dimension of the three parameters.


Warning:
    This API only verifies inputs with dtype float16 and bfloat16, other dtypes may fall back to math
    implementation, which is less optimized.

Note:
    This API differs from :ref:`api_paddle_nn_functional_scaled_dot_product_attention` in that:
    The QKV layout of this API is [batch_size, num_heads, seq_len, head_dim] or [num_heads, seq_len, head_dim].

Args:
    query(Tensor): The query tensor in the Attention module.
                    4-D tensor with shape:
                    [batch_size, num_heads, seq_len, head_dim].
                    3-D tensor with shape:
                    [num_heads, seq_len, head_dim].
                    The dtype can be float16 or bfloat16.
    key(Tensor): The key tensor in the Attention module.
                    4-D tensor with shape:
                    [batch_size, num_heads, seq_len, head_dim].
                    3-D tensor with shape:
                    [num_heads, seq_len, head_dim].
                    The dtype can be float16 or bfloat16.
    value(Tensor): The value tensor in the Attention module.
                    4-D tensor with shape:
                    [batch_size, num_heads, seq_len, head_dim].
                    3-D tensor with shape:
                    [num_heads, seq_len, head_dim].
                    The dtype can be float16 or bfloat16.
    attn_mask(Tensor, optional): The attention mask tensor. The shape should be broadcastable to
                    [batch_size, num_heads, seq_len_key, seq_len_query]. The dtype can be bool
                    or same type of query. The bool mask indicates the positions should take part
                    in attention. The non-bool mask will be added to attention score.

    is_causal(bool, optional): Whether enable causal mode. If True, the attention masking is a lower
                    triangular matrix when the mask is a square matrix. The attention masking has the
                    form of the upper left causal bias when the mask is a non-square matrix.
                    An error is thrown if both attn_mask and is_causal are set.
    scale(float, optional): The scaling factor used in the calculation of attention weights.
                    If None, scale = 1 / sqrt(head_dim).
    enable_gqa(bool, optional): Whether enable GQA mode. Default False.

Returns:
    out(Tensor): The attention tensor.
                4-D tensor with shape: [batch_size, num_heads, seq_len, head_dim].
                3-D tensor with shape: [num_heads, seq_len, head_dim].
                The dtype can be float16 or bfloat16.

Examples:
    .. code-block:: python

        >>> # doctest: +SKIP('bfloat need V100 compile')
        >>> import paddle
        >>> q = paddle.rand((1, 2, 128, 16), dtype=paddle.bfloat16)
        >>> output = paddle.compat.nn.functional.scaled_dot_product_attention(q, q, q, None, 0.9, False)
        >>> print(output)
        >>> # doctest: -SKIP
Nz8Explicit attn_mask should not be set when is_causal=TrueT)RuntimeErrorswapaxesFscaled_dot_product_attention)	querykeyvalue	attn_mask	dropout_p	is_causalscale
enable_gqaouts	            `/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/compat/nn/functional/sdpa.pyr   r      s    \ Y*F
 	

 	r2Rr2 E
 
(
(C <<B    )Ng        FNF)r   r   r   r   r   r   r   zTensor | Noner   floatr   boolr   zfloat | Noner   r   returnr   )
__future__r   typingr   paddle.nn.functionalnn
functionalr   paddler   r    r   r   <module>r"      s    #        $e e 	e  e  	e 
 e  e  e  e  e r   