
    x-j                    T    d dl mZ d dlmZ d dlmc mZ erd dlm	Z	 	 	 	 	 	 dddZ
dS )    )annotations)TYPE_CHECKINGN)Tensor        Fqueryr   keyvalue	attn_maskTensor | None	dropout_pfloat	is_causalboolscalefloat | None
enable_gqareturnc                   |r|t          d          |                     dd          |                    dd          |                    dd          }}} t          j        | |||||dd||d          }|                    dd          S )aw  
    The equation is:

    .. math::

        result=softmax(\frac{ Q * K^T }{\sqrt{d}}) * V

    where : ``Q``, ``K``, and ``V`` represent the three input parameters of the attention module.
    The dimensions of the three parameters are the same.
    ``d`` represents the size of the last dimension of the three parameters.


    Warning:
        This API only verifies inputs with dtype float16 and bfloat16, other dtypes may fall back to math
        implementation, which is less optimized.

    Note:
        This API differs from :ref:`api_paddle_nn_functional_scaled_dot_product_attention` in that:
        The QKV layout of this API is [batch_size, num_heads, seq_len, head_dim] or [num_heads, seq_len, head_dim].

    Args:
        query(Tensor): The query tensor in the Attention module.
                        4-D tensor with shape:
                        [batch_size, num_heads, seq_len, head_dim].
                        3-D tensor with shape:
                        [num_heads, seq_len, head_dim].
                        The dtype can be float16 or bfloat16.
        key(Tensor): The key tensor in the Attention module.
                        4-D tensor with shape:
                        [batch_size, num_heads, seq_len, head_dim].
                        3-D tensor with shape:
                        [num_heads, seq_len, head_dim].
                        The dtype can be float16 or bfloat16.
        value(Tensor): The value tensor in the Attention module.
                        4-D tensor with shape:
                        [batch_size, num_heads, seq_len, head_dim].
                        3-D tensor with shape:
                        [num_heads, seq_len, head_dim].
                        The dtype can be float16 or bfloat16.
        attn_mask(Tensor, optional): The attention mask tensor. The shape should be broadcastable to
                        [batch_size, num_heads, seq_len_key, seq_len_query]. The dtype can be bool
                        or same type of query. The bool mask indicates the positions should take part
                        in attention. The non-bool mask will be added to attention score.

        is_causal(bool, optional): Whether enable causal mode. If True, the attention masking is a lower
                        triangular matrix when the mask is a square matrix. The attention masking has the
                        form of the upper left causal bias when the mask is a non-square matrix.
                        An error is thrown if both attn_mask and is_causal are set.
        scale(float, optional): The scaling factor used in the calculation of attention weights.
                        If None, scale = 1 / sqrt(head_dim).
        enable_gqa(bool, optional): Whether enable GQA mode. Default False.

    Returns:
        out(Tensor): The attention tensor.
                    4-D tensor with shape: [batch_size, num_heads, seq_len, head_dim].
                    3-D tensor with shape: [num_heads, seq_len, head_dim].
                    The dtype can be float16 or bfloat16.

    Examples:
        .. code-block:: python

            >>> # doctest: +SKIP('bfloat need V100 compile')
            >>> import paddle
            >>> q = paddle.rand((1, 2, 128, 16), dtype=paddle.bfloat16)
            >>> output = paddle.compat.nn.functional.scaled_dot_product_attention(q, q, q, None, 0.9, False)
            >>> print(output)
            >>> # doctest: -SKIP
    Nz8Explicit attn_mask should not be set when is_causal=TrueT)RuntimeErrorswapaxesFscaled_dot_product_attention)	r   r   r	   r
   r   r   r   r   outs	            `/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/paddle/compat/nn/functional/sdpa.pyr   r      s    \  
Y*F
 
 	

 	r2Rr2 3E
 
( C <<B    )Nr   FNF)r   r   r   r   r	   r   r
   r   r   r   r   r   r   r   r   r   r   r   )
__future__r   typingr   paddle.nn.functionalnn
functionalr   paddler   r    r   r   <module>r%      s    # " " " " "                                 $e  e  e  e  e  e  e r   