ó
    Ñ‘®i[  ã                  ó„   • S SK Jr  S SKJr  / rS SKJr  S SKJr  \(       a  S SKJ	r	     S               S	S jjr
g)
é    )Úannotations)ÚTYPE_CHECKING)Ú_C_ops)Úin_dynamic_or_pir_mode)ÚTensorNc                ó^   • [        5       (       d   S5       e[        R                  " XX#XE5      $ )aŽ  
Note:
    This API is only used from ``CUDA 11.8`` .

SparseCsrTensor is used to store the intermediate result of Attention matrix
in Transformer module, which can reduce memory usage and improve performance.
``sparse_mask`` express the sparse layout in CSR format.
The calculation equation is:

.. math::

    result = softmax(\frac{ Q * K^T }{\sqrt{d}}) * V

where : ``Q``, ``K``, and ``V`` represent the three input parameters of the attention module.
The shape of the three parameters are: `[batch_size, num_heads, seq_len, head_dim]`, and
``d`` represents ``head_dim`` .

Args:
    query (DenseTensor): `query` in the Attention module. 4D Tensor with float32 or float64.
    key (DenseTensor): `key` in the Attention module. 4D Tensor with float32 or float64.
    value (DenseTensor): `value` in the Attention module. 4D Tensor with float32 or float64.
    sparse_mask (SparseCsrTensor): The sparse layout in the Attention module. Its dense shape
        is `[batch_size*num_heads, seq_len, seq_len]`. `nnz` of each batch must be the same.
        dtype of `crows` and `cols` must be int64, dtype of `values` can be float32 or float64.
    key_padding_mask (DenseTensor|None, optional): The key padding mask tensor in the Attention module.
        2D tensor with shape: [batch_size, seq_len]. dtype can be float32 or float64. Default: None.
    attn_mask (DenseTensor|None, optional): The attention mask tensor in the Attention module.
        2D tensor with shape: [seq_len, seq_len]. dtype can be float32 or float64. Default: None.
    name (str|None, optional): The default value is None. Normally there is no need for user
        to set this property. For more information, please refer to :ref:`api_guide_Name`.

Returns:
    4D tensor with shape: [batch_size, num_heads, seq_len, head_dim]. dtype is same with input.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU)
        >>> import paddle
        >>> paddle.device.set_device('gpu')

        >>> batch_size = 16
        >>> num_heads = 16
        >>> seq_len = 512
        >>> head_dim = 32

        >>> query = paddle.rand([batch_size, num_heads, seq_len, head_dim])
        >>> key = paddle.rand([batch_size, num_heads, seq_len, head_dim])
        >>> value = paddle.rand([batch_size, num_heads, seq_len, head_dim])

        >>> query.stop_gradient = False
        >>> key.stop_gradient = False
        >>> value.stop_gradient = False

        >>> mask = paddle.nn.functional.dropout(paddle.ones([seq_len, seq_len])).expand([batch_size, num_heads, seq_len, seq_len])
        >>> sp_mask = mask.reshape([-1, seq_len, seq_len]).to_sparse_csr()

        >>> kp_mask = paddle.randint(0, 2, [batch_size, seq_len]).astype(paddle.float32)
        >>> attn_mask = paddle.randint(0, 2, [seq_len, seq_len]).astype(paddle.float32)

        >>> output = paddle.sparse.nn.functional.attention(query, key, value, sp_mask, kp_mask, attn_mask)
        >>> output.backward()
z<Currently, Sparse API only support dynamic mode or pir mode.)r   r   Úsparse_fused_attention)ÚqueryÚkeyÚvalueÚsparse_maskÚkey_padding_maskÚ	attn_maskÚnames          Úg/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/sparse/nn/functional/transformer.pyÚ	attentionr      s;   € ôP "×#Ñ#ð ØFóÐ#ô ×(Ò(ØEÐ(8óð ó    )NNN)r
   r   r   r   r   r   r   r   r   úTensor | Noner   r   r   z
str | NoneÚreturnr   )Ú
__future__r   Útypingr   Ú__all__Úpaddler   Úpaddle.base.frameworkr   r   r   © r   r   Ú<module>r      sƒ   ðõ #å  à
€å Ý 8æÝð '+Ø#ØðMØðMà	ðMð ðMð ð	Mð
 $ðMð ðMð ðMð öMr   