ó
    Ñ‘®i¨  ã                  ó†   • S SK Jr  S SKJr  S SKJr  S SKJr  \(       a  S SKJr    S                   S	S jjr	g)
é    )Úannotations)ÚTYPE_CHECKING)Ú_C_ops)Úin_dynamic_or_pir_mode)ÚTensorNc	           
     ór   • [        5       (       a(  [        R                  " U UUUUUUU5      u  n	n
nnU	U
UU4$ g)a™  
Permute tokens for Mixture of Experts (MoE) computation in distributed training scenarios.

Note:
    This function reorganizes input tokens based on expert assignments to prepare for expert computation.
    It handles both bfloat16 and float8_e4m3fn data types with proper scaling for float8 inputs.

    1. This function is typically used in pair of moe_unpermute to provide complete MoE functionality.
    2. For float8 inputs, proper scaling must be provided via the scale parameter.
    3. The padding_alignment parameter affects memory efficiency but not correctness.
    4. Any output tokens can find an exact-match in the original input tokens.
    5. This permute function has overcomed the aadiff issue, is deterministic.

Args:
    hidden_states (Tensor): The input tensor containing tokens to be permuted, stored in row-major layout.
        Supported data types: bfloat16 or float8_e4m3fn.
        Shape: [sequence_length, token_dimension]
    scale (Tensor|None): Scaling factors required when hidden_states is of float8 type.
        For float8 inputs, this tensor provides the scaling factors for dequantization.
        Shape: [sequence_length, ceil(token_dimension / 128)]
        Data type: float32
    expert_routemap_topk (Tensor): Tensor indicating expert assignments for each token (top-k experts).
        Each value represents the expert index the token is assigned to (-1 indicates not assigned).
        Shape: [sequence_length, top_k_experts]
        Data type: int32
        Value range: [-1, num_experts)
    expert_prob_topk (Tensor): Tensor containing routing probabilities for top-k experts.
        Shape: [sequence_length, top_k_experts]
        Data type: float32
    num_experts (int): Total number of experts in the MoE layer, limited between 1 and 64.
    tokens_per_expert (list[int]): List where each element indicates the number of tokens
        assigned to the corresponding expert.
    padding_alignment (int): Tokens alignment requirement for expert buffers (in bytes).
        Must be a power of 2. Typical values are 16, 32 or 64 for optimal memory access.
    do_gather(bool): Decide whether do actual tokens gather operation or not, default is True.
    name (str|None, optional): Name prefix for the operation (optional).
        Default: None

Returns:
    tuple[Tensor, Tensor, Tensor, Tensor]:
        - hidden_states_unzipped (Tensor): The permuted and broadcasted input tensor.
            Shape: [total_tokens_after_broadcast, token_dimension]
            Data type: same as input hidden_states
        - zipped_expertwise_rowmap (Tensor): Mapping tensor used to restore original order (unpermute).
            Shape: [sequence_length, num_experts]
            Data type: int32
        - token_prob_unzipped (Tensor): Flattened expert probabilities aligned with permuted tokens.
            Shape: [total_tokens_after_broadcast, 1]
            Data type: float32
        - scale_unzipped (Tensor): Broadcasted scale tensor (only valid for float8 inputs).
            Shape: [total_tokens_after_broadcast, ceil(token_dimension / 128)]
            Data type: float32

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU)
        >>> # doctest: +SKIP('This is only support in cuda 12.0+')
        >>> import paddle
        >>> import numpy as np
        >>> import paddle.nn.functional as F
        >>> hidden_states = paddle.randn([3, 128], dtype='bfloat16')
        >>> expert_routemap_topk = paddle.to_tensor([[-1, 0, -1, -1, 2, -1, -1, -1],
        ...                                          [1, -1, -1, -1, -1, -1, -1, -1],
        ...                                          [-1, -1, -1, -1, -1, -1, 1, -1]],
        ...                                           dtype='int32')
        >>> expert_prob_topk= paddle.to_tensor([[0.0, 0.6, 0.0, 0.0, 0.4, 0.0, 0.0, 0.0],
        ...                                     [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
        ...                                     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]],
        ...                                          dtype='float32')
        >>> num_experts = 3
        >>> tokens_per_expert = [1, 2, 1]
        >>> padding_alignment = 2
        >>> hidden_states_unzipped, zipped_expertwise_rowmap, token_prob_unzipped, scale_unzipped = F.moe_permute(
        ...     hidden_states,
        ...     None,
        ...     expert_routemap_topk,
        ...     expert_prob_topk,
        ...     num_experts,
        ...     tokens_per_expert,
        ...     padding_alignment,
        ... )
        >>> # weighted by probs.
        >>> hidden_states_unzipped = (hidden_states_unzipped.astype("float32") * token_prob_unzipped.astype("float32").unsqueeze(-1)).astype("bfloat16")
        >>> zipped_tokens, zipped_probs = F.moe_unpermute(hidden_states_unzipped, zipped_expertwise_rowmap, expert_routemap_topk, token_prob_unzipped,3,3)
        >>> np.testing.assert_allclose(zipped_tokens.numpy(), hidden_states.numpy(), rtol=1e-05, atol=1e-06)
N)r   r   Úmoe_permute)Úhidden_statesÚscaleÚexpert_routemap_topkÚexpert_prob_topkÚnum_expertsÚtokens_per_expertÚpadding_alignmentÚ	do_gatherÚnameÚhidden_states_unzippedÚzipped_expertwise_rowmapÚtoken_prob_unzippedÚscale_unzippeds                Ú`/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/nn/functional/moe_permute.pyr	   r	      sd   € ôD ×Ñô ×ÒØØØ ØØØØØó	
ñ	
Ø"Ø$ØØð #Ø$ØØð	
ð 	
ð!  ó    )TN)r
   r   r   zTensor | Noner   r   r   r   r   Úintr   Úlistr   r   r   Úboolr   z
str | NoneÚreturnz%tuple[Tensor, Tensor, Tensor, Tensor])
Ú
__future__r   Útypingr   Úpaddler   Úpaddle.base.frameworkr   r   r	   © r   r   Ú<module>r"      s‘   ðõ #å  å Ý 8æÝð Øðw
Øðw
àðw
ð !ðw
ð ð	w
ð
 ðw
ð ðw
ð ðw
ð ðw
ð ðw
ð +öw
r   