
    Αi"                     V    S SK JrJr  S SKJr  S SKJrJrJr  S r	S r
S
S jrS rS rg	)    )_C_ops_legacy_C_ops)check_variable_and_dtype)LayerHelperin_dynamic_modein_dynamic_or_pir_modec                     [        5       (       a  [        R                  " X5      $ Sn[        U40 [	        5       D6nUR                  U R                  S9nUR                  USU 0SU0SU0S9  U$ )a  
calculate the expert count according to the gate index.
Args:
    numbers (Tensor): Tensor. The input gate index whose data type should be int32 or int64.
    upper_range (int): The number of the experts.
Returns:
    out (Tensor): The output expert count.
Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env: DISTRIBUTED)
        >>> import paddle
        >>> from paddle.distributed.models.moe import utils
        >>> numbers = [[0, 2], [0, 2]]
        >>> upper_range = 6
        >>> numbers = paddle.to_tensor(numbers, dtype="int64")
        >>> number_count = utils._number_count(numbers, upper_range)
        >>> print(number_count)
        Tensor(shape=[6], dtype=int64, place=Place(gpu:0), stop_gradient=True,
        [2, 0, 2, 0, 0, 0])
number_countdtypenumbersOutupper_rangetypeinputsoutputsattrs)r   r   r
   r   locals"create_variable_for_type_inferencer   	append_op)r   r   op_typehelperouts        c/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/distributed/models/moe/utils.py_number_countr      s    , ""788 W1177gmm7Lw'CL +.	 	 	
 
    c                     [        5       (       a  [        R                  " XUS   5      $ Sn[        U40 [	        5       D6nUR                  UR                  S9nUR                  UU /U/US   /S.SU/0S9  U$ )a?  
Assign pos decides which tokens should be fetched belong to
specially expert orderly.

Args:
    x (Tensor): Tensor. Every element in the list must be a Tensor whose data type
        should be float16, float32, float64, int32 or int64.
    cum_count (Tensor): The cumulative sum tokens of counters. Every element in the list must be a Tensor whose
        data type should be int64.

Returns:
    out (Tensor): Assemble numbers in the order of counters.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env: DISTRIBUTED)
        >>> import paddle
        >>> from paddle.distributed.models.moe import utils
        >>> number_count = [2, 0, 2, 0]
        >>> numbers = [[0, 2], [0, 2]]
        >>> number_count = paddle.to_tensor(number_count, dtype="int64")
        >>> numbers = paddle.to_tensor(numbers, dtype="int64")
        >>> num_cum = paddle.cumsum(number_count)
        >>> pos = utils._assign_pos(x=numbers, cum_count=num_cum)
        >>> print(pos)
        Tensor(shape=[4], dtype=int64, place=Place(gpu:0), stop_gradient=True,
        [2, 0, 3, 1])

assign_posr   )X	cum_counteff_num_lenr   )r   r   r   )r   r   r    r   r   r   r   r   )xr"   r   r   r   s        r   _assign_posr%   ?   s    <   y}==W1177ioo7NS'[ )"
 SEN 	 	
 
r   c                     US:X  a1  [        5       (       a  [        R                  " X!U 5      $ [        S5      e[        S5      e)a2  
random routing topk gate idx
```
    out = topk_idx
    for i in len(topk_idx):
        if topk * value[i][topk-1] < prob[i]:
            out[i][topk-1] = -1
```
Args:
    topk_idx: gate idx, shape=(N, topk)
    topk_value: values, shape = topk_idx.shape
    prob: random prob, shape=(topk_idx.shape[0],)
   z$Not supporting static graph mode nowzonly topk=2 is supported now)r   r   random_routingRuntimeError)topk_idx
topk_valueprobtopks       r   _random_routingr.   q   s?     qy //(KKEFF9::r   c                     [        5       (       a  [        R                  " XU5      $ Sn[        U40 [	        5       D6nUR                  U R                  S9nUR                  UXS.SU0SU0S9  U$ )a  
limit the expert count by capacity.
Args:
    expert_count (Tensor): Tensor. The input expert count whose data type should be int32 or int64.
    capacity (Tensor): Tensor. The input capacity whose data type should be int32 or int64 and the elements of capacity should be the same with expert_count.numel()/n_work.
    n_work (int): The number of the works.
Returns:
    out (Tensor): The output expert count limit by capacity.
Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env: DISTRIBUTED)
        >>> import paddle
        >>> from paddle.distributed.models.moe import utils
        >>> expert_count = [1, 2, 2, 8, 3, 6]
        >>> capacity = [5, 5, 5]
        >>> n_work = 2
        >>> expert_count = paddle.to_tensor(expert_count, dtype="int64")
        >>> capacity = paddle.to_tensor(capacity, dtype="int64")
        >>> out = utils._limit_by_capacity(expert_count, capacity, n_work)
        >>> print(out)
        Tensor(shape=[6], dtype=int64, place=Place(gpu:0), stop_gradient=True,
        [1, 2, 2, 4, 3, 3])
limit_by_capacityr   )expert_countcapacityr   n_workerr   )r   r   r0   r   r   r   r   r   )r1   r2   r3   r   r   r   s         r   _limit_by_capacityr4      s    2 ''II%W1177$$ 8 
 	$0GCLx(	 	 	
 
r   c                    [        5       (       a  [        R                  " XX#5      $ [        U SSS/S5        [        USSS/S5        [	        S0 [        5       D6nUR                  U R                  S9nUR                  SXS.S	U0X#S
.S9  U$ )a  
prune gate by capacity(only support CUDA)

Args:
    gate_idx (Tensor): Represents the gate_id sequence corresponding to the input data with type int32, int64.
    expert_count (Tensor): The quantity value counted on the gate_id sequence of the input data with type int32, int64.
    n_worker(int, optional): The number of workers on the trainer with type int64.

Returns:
    new_gate_idx (Tensor): The gate_id sequence corresponding to the new input data after passing through prune.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env: DISTRIBUTED)
        >>> import paddle
        >>> from paddle.distributed.models.moe import utils
        >>> gate_idx = paddle.to_tensor([1, 3, 3, 3, 3, 2, 1, 1], dtype='int64')
        >>> expert_count = paddle.to_tensor([0, 3, 1, 3, 0, 0, 0, 0], dtype='int64')
        >>> n_worker = 1
        >>> n_expert = 8
        >>> new_gate_id = utils._prune_gate_by_capacity(
        ...     gate_idx, expert_count, n_expert, n_worker
        ... )
        >>> print(new_gate_id)
        Tensor(shape=[8], dtype=int64, place=Place(gpu:0), stop_gradient=True,
        [1, 3, 3, 3, -1, 2, 1, 1])
GateIdxint32int64z/paddle.distributed.utils.prune_gate_by_capacityExpertCountprune_gate_by_capacityr   )r6   r9   
NewGateIdx)n_expertr3   r   )r:   )	r   r   r:   r   r   r   r   r   r   )gate_idxr1   r<   r3   r   new_gate_idxs         r   _prune_gate_by_capacityr?      s    : ,,H
 	
 	!g=		
 	!g=		
 BB@@.. A 
 	)'E!<0'>	 	 	
 r   N)r'   )paddler   r   paddle.common_ops_importr   paddle.frameworkr   r   r   r   r%   r.   r4   r?    r   r   <module>rD      s3    ) = $N/d;.)X:r   