
    Αir                    
   S SK Jr  S SKrS SKrS SKrS SKJr  S SKJr  S SK	J
r
JrJr  S SKrS SKJr  S SKJr  SS	KJr  \
(       a  S SKJr  S S
KJr  SS/rSq\R4                  R7                  S\R8                  R:                  R=                  5       5      r\ " \R4                  R7                  SS 5      5      r!  S     SS jjr" S   SS jjr# " S S5      r$ " S S5      r% " S S5      r& " S S\&5      r' " S S5      r(g)    )annotationsN)reduce)product)TYPE_CHECKINGAnyLiteral)
NCCLConfig)check_nccl_version_for_p2p   )logger)GroupCommunicateTopologyHybridCommunicateGroupPADDLE_USE_FOUR_DIRECTIONS_P2P$FLAGS_pipeline_nccl_comm_init_optionc                4   [         R                  R                  R                  S:w  a  g [	        U [
        [        45      (       d  g SSKJn  SSK	J
n  [	        U [        5      (       a  U nOU" U SS9nSU;  a  Ub  XS'   UR                  R                  " S0 UD6$ )	Nncclr   )MessageToDict)coreT)preserving_proto_field_namecommName )paddledistributed
collective_default_backend
isinstanceNCCLConfig_Messagedictgoogle.protobuf.json_formatr   paddle.baser   r	   create)messagedefault_namer   r   ret_dicts        f/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/distributed/fleet/base/topology.pymessage2nccl_configr'   /   s     $$55?g 2D9::9 '4   dK!l&>+??!!-H--    c                    [        U S5      $ )a  

Function that creates nccl config.

Args:
    nccl_config (dict[str, int | str] | None): None or a dict containing the following keys:
        commName (str): name of the process group. ll_buffsize (int): buffer size of ll protocol.
        ll128_buffsize (int): buffer size of ll128 protocol. simple_buffsize (int): buffer size of
        simple protocol. buffsize_align (int): alignment unit of the total buffer size.
        nchannels (int): max number of channels. algoStr (str): communication algorithm.
        protoStr (str): communication protocol.

Returns:
    NCCLConfig (NCCLConfig | None): an object containing the information,
    which can be used as an argument of new_group().

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env: DISTRIBUTED)
        >>> import paddle
        >>> import paddle.distributed as dist
        >>> from typing import Union
        >>> dist.init_parallel_env()
        >>> nccl_config: dict[str, Union[int, str]] = {"commName":"tp_comm","ll_buffsize":0,"ll128_buffsize":0,"simple_buffsize":1024,"buffsize_align":1024,"nchannels":4,"algoStr":"Ring","protoStr":"Simple",}
        >>> ranks=[0,1,2,3,4,5,6,7]
        >>> nccl_config=dist.create_nccl_config(nccl_config)
        >>> pg=dist.new_group(ranks, nccl_config=nccl_config)
        >>> m, n = 4096, 8192
        >>> local_rank = dist.get_rank(pg)
        >>> num_local_ranks = dist.get_world_size(pg)
        >>> x = paddle.ones(shape=[m, n], dtype=paddle.float32) * (local_rank + 1)
        >>> dist.all_reduce(x, group=pg)

N)r'   nccl_configs    r&   create_nccl_configr,   E   s    L {D11r(   c                  ,    \ rS rSrSrSrSrSrSrSr	Sr
g	)
ParallelModen   a  

There are all the parallel modes currently supported:

    - DATA_PARALLEL: Distribute input data to different devices.
    - TENSOR_PARALLEL: Shards tensors in the network to different devices.
    - PIPELINE_PARALLEL: Place different layers of the network on different devices.
    - SHARDING_PARALLEL: Segment the model parameters, parameter gradients and optimizer states corresponding to the parameters to each device.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env: DISTRIBUTED)
        >>> import paddle
        >>> parallel_mode = paddle.distributed.ParallelMode
        >>> print(parallel_mode.DATA_PARALLEL)
        0

r      r         r   N)__name__
__module____qualname____firstlineno____doc__DATA_PARALLELTENSOR_PARALLELPIPELINE_PARALLELSHARDING_PARALLELSEGMENT_PARALLEL__static_attributes__r   r(   r&   r.   r.   n   s%    ( MOr(   r.   c                      \ rS rSr/ SQ/ SQ4     SS jjrSS jrSS jrSS jrSS jrSS	 jr	SS
 jr
SS jrSS jrSS jrSS jrSrg)r      )datapipeshardingsepcontextmodel)r0   r0   r0   r0   r0   r0   c           
     4   Xl         X l        [        R                  " SU R                   5      U l        [        S U R                  S5      U l        U R                   Vs/ s H  n[        U5      PM     nn[        U6  Vs/ s H  oPR                  " U6 PM     nn[        [        U[        [        U5      5      5      5      U l        [        [        U R                  R                  5       U R                  R                  5       5      5      U l        g s  snf s  snf )N
Coordinatec                
    X-  $ Nr   )xys     r&   <lambda>.CommunicateTopology.__init__.<locals>.<lambda>   s    qur(   r0   )_parallel_names_dimscollections
namedtuple
coordinater   _world_sizeranger   r   ziplen_coord2rankvalueskeys_rank2coord)selfhybrid_group_namesdimsdrangesrJ   all_coordinates          r&   __init__CommunicateTopology.__init__   s      2
%00$..
 ""4djj!D$(JJ/Jq%(J/7>7GH7G!//1-7GHNE#n:M4N OP  '')4+;+;+@+@+BC
	 0Hs   D?Dc                    U R                   $ rI   )rN   r[   s    r&   get_hybrid_group_names*CommunicateTopology.get_hybrid_group_names       ###r(   c                R    U R                   U R                  R                  U5         $ rI   )rO   rN   indexr[   	axis_names     r&   get_dimCommunicateTopology.get_dim   s"    zz$..44Y?@@r(   c                    U R                   $ rI   )rS   rd   s    r&   
world_sizeCommunicateTopology.world_size       r(   c                    [        U5      [        U R                  5      :X  d   eU R                  " S0 UD6nX R                  R	                  5       ;   d   eU R                  U   $ Nr   )rV   rO   rR   rW   rY   )r[   argskeys      r&   get_rankCommunicateTopology.get_rank   sY    4yC

O+++oo%%&&++----$$r(   c                    XR                   :  d   eXR                  R                  5       ;   d   eU R                  U   $ rI   )rS   rZ   rY   )r[   ranks     r&   	get_coordCommunicateTopology.get_coord   s@    &&&&&'',,....%%r(   c                    U R                   R                  U5      nU R                  R                  5        Vs/ s H  nXC   U:X  d  M  U R                  U   PM     nnUR	                  5         U$ s  snf rI   )rN   ri   rW   rY   sort)r[   rk   ri   axiscoordrankss         r&   get_axis_list!CommunicateTopology.get_axis_list   su    ##)))4 ))..0
0{e# $DU#0 	 

 	


s   A/A/c                t    XR                   ;   d   eU R                  U R                   R                  U5         $ rI   )rN   rO   ri   rj   s     r&   get_dim_size CommunicateTopology.get_dim_size   s4    00000zz$..44Y?@@r(   c           
        [        [        U R                  5      R                  U5      5      n/ nU HC  nUR	                  [        U R                  U R                  R                  U5         5      5        ME     / nU HC  nUR	                  [        U R                  U R                  R                  U5         5      5        ME     / n[        U6  H  n0 n/ n	[        U5       H  u  pXX*   '   M     [        U6  HK  n[        U5       H  u  pXX   '   M     U	R	                  U R                  U R                  " S0 UD6   5        MM     UR	                  U	5        M     U$ rs   )listsetrN   
differenceappendrT   rO   ri   r   	enumeraterW   rR   )r[   
fused_axisnon_fused_axisnon_fused_rangesrk   fused_ranges	rank_listnon_fused_ranks
coord_dictr   inon_fused_rankfused_ranks
fused_ranks                 r&   get_fused_ranks#CommunicateTopology.get_fused_ranks   sG   c$"6"67BB:NO'I##djj!5!5!;!;I!FGH ( #Idjj!5!5!;!;I!FGH $
 	&(89OJE%.%?!0>>,- &@&5%.{%;MA0:z}- &<T--doo.K
.KLM  6 U#  : r(   c           
        XR                   ;   d   eU R                    Vs/ s H  o"U:w  d  M
  UPM     nn/ nU H.  nU R                  U5      nUR                  [        U5      5        M0     / n[	        U6  H  n0 nU H  n	XsR                  U	5         X'   M     / n
[        SU R                  U5      5       H5  nXU'   U
R                  U R                  U R                  " S0 UD6   5        M7     UR                  U
5        M     U$ s  snf )Nr   r   )rN   r   r   rT   r   ri   rW   rR   )r[   rk   nameother_axis_namesr_   dim_num
all_resultrJ   	key_coord
other_nameresultr   s               r&   get_comm_list!CommunicateTopology.get_comm_list   s   00000!11
1TY5FD1 	 
 $D''-GMM%.) % 
&!AI.
()*@*@*L(M	% / F1d//	:;'()$d..t/K/KLM < f% " +
s
   	DDc                    U R                  U5      nUR                  " S0 UD6R                  5       nU R                  " S0 UD6$ rs   )rz   _replace_asdictrv   )r[   global_rankkwargsr   tfs        r&   get_rank_from_stage'CommunicateTopology.get_rank_from_stage   s<    {+^^%f%--/}}"r""r(   )rW   rO   rN   rZ   rS   rR   N)r\   	list[str]r]   	list[int]returnNone)r   r   )rk   strr   intr   r   )rt   r   r   r   )ry   r   r   r   )rk   r   ri   r   r   r   )r   r   r   list[list[int]])rk   r   r   r   )r   r   r   r   r   r   )r3   r4   r5   r6   ra   re   rl   ro   rv   rz   r   r   r   r   r   r=   r   r(   r&   r   r      sa    )
 -
%
 
 

6$A %&
A62#r(   c                     \ rS rSr S/     S0S jjrS1S jrS2S jrS3S jr   S4       S5S jjr   S4       S5S jjr	S6S	 jr
S6S
 jrS3S jrS3S jrS7S jrS6S jrS6S jrS6S jrS6S jrS8S jrS6S jrS9S jrS6S jrS6S jrS8S jrS6S jrS6S jrS6S jrS6S jrS6S jrS6S jrS6S jrS8S jr S6S  jr!S8S! jr"S:S" jr#S6S# jr$S6S$ jr%S6S% jr&S8S& jr'S6S' jr(S;S<S( jjr)S=S) jr*S8S* jr+S8S+ jr,S6S, jr- S/     S>S- jjr.S.r/g)?r   i  Nc                n   [         R                  R                  5       U l        [         R                  R	                  5       U l        Xl        U R                  R                  S5      U l        U R                  R                  S5      U l	        U R                  R                  S5      U l
        U R                  R                  S5      U l        U R                  R                  S5      U l        U R                  5       U l        U R                  5       U l        U R#                  5       U l        U R'                  5       U l        U R+                  5       U l        U R/                  5       (       dU   SU R                   SU R                   SU R                   S	U R                   S
U R                   SU R                   35       eU R1                  SUb  [3        US   R4                  S5      OS S9u  U l        U l        [         R                  R;                  [         R<                  " S/SS9[         R                  R>                  R@                  U R8                  S9  Sn[         RB                  " U5      U   (       aY  U R8                  R                  S:  a?  U R8                  RD                  RG                  Ub  [3        US   RH                  S5      OS S9  U R1                  SUb  [3        US   RJ                  S5      OS S9u  U l&        U l'        U R1                  SUb  [3        US   RJ                  S5      OS S9u  U l(        U l)        U R1                  SUb  [3        US   RJ                  S5      OS S9u  U l*        U l+        S U l,        U R                  S:  a9  U R1                  SUb  [3        US   RJ                  S5      OS S9u  U l,        U l-        U R]                  SUb  [3        US   R^                  S5      OS S9u  U l0        U l1        U R                  S:  a9  U R]                  SUb  [3        US   R^                  S5      OS S9u  U l2        U l3        U R                  S:  av  U Ri                  SS/Ub  [3        US   RJ                  S5      OS S9u  U l5        U l6        U Ri                  SS/Ub  [3        US   RJ                  S 5      OS S9u  U l7        U l8        U R,                  S!:H  U l9        U R,                  U R                  S-
  :H  U l:        U R                  S:  ab  [         Rv                  Rx                  R{                  5       (       a
  [}        5         U R                  5         [        (       a  U R                  5         S"U R
                   S#U R                   S$U R                   S%U R                   S&U R                   S'U R                   3nUS(U RP                   S)U RT                   S*U R6                   S+U RL                   S,U RX                   S-U R`                   3-  n[        R                  " U5        U qDg ).Nr@   rE   rA   rB   rC   znranks: z
, mp_num: z, sharding_num: z
, pp_num: z
, dp_num: z, sep_num: 
pp_configspp_collr*   r0   int32dtypeopgroup$FLAGS_eager_communication_connectionpp_p2p
dp_configsdp
mp_configstpsharding_configssep_configsdp_checksharding_checkdp_sep_configsdp_seppp_tp_configspp_tpr   HybridParallelInfo: rank_id: , mp_degree: , sharding_degree: , pp_degree: , dp_degree: , sep_degree: , mp_group: ,  sharding_group: , pp_group: , dp_group: z, sep:group: , check/clip group: )Er   r   get_world_sizenranksrv   r   _toporl   
_dp_degree
_mp_degree
_pp_degree_sharding_degree_sep_degree_get_data_parallel_id_data_parallel_id_get_model_parallel_id_model_parallel_id_get_sharding_parallel_id_sharding_parallel_id_get_sep_parallel_id_sep_parallel_id_get_pipe_parallel_idstage_id_check_valid_topo_set_comm_groupr'   coll_nccl_config	_pp_group_pp_comm_group
all_reducezerosReduceOpSUM	get_flagsprocess_groupeager_connect_ring_exchangep2p_nccl_configr+   	_dp_group_dp_comm_group	_mp_group_mp_comm_group_sharding_group_sharding_comm_group
_sep_group_sep_comm_group_set_check_groupcheck_nccl_config_check_group_check_comm_groupsharding_check_groupsharding_check_comm_groupcreate_fuse_group_dp_sep_group_dp_sep_comm_group_pp_mp_group_pp_mp_comm_groupis_first_stageis_last_stage	frameworkr   is_compiled_with_ncclr
   _set_p2p_prev_next_use_four_directions_set_four_directions_p2p_groupr   info_HYBRID_PARALLEL_GROUP)r[   topologyhybrid_configsenv_name	debug_strs        r&   ra   HybridCommunicateGroup.__init__  s   
 ((779!--668
**,,V4**,,W5**,,V4 $

 2 2: >::--e4!%!;!;!="&"="="?%)%C%C%E" $ 9 9 ;224%%'' 	
t{{m:doo->>NtOdOdNeeoptpp  pA  AK  LP  L[  L[  K\  \g  hl  hx  hx  gy  z	
'
 /3.B.B
 "- $"<0AA9  /C 	/
++ 	%%LL!G,!!**..%% 	& 	
 :H%h/""))A-##11MM *5	 ,*<8HH$
 " N 	 /3.B.B
 "- $"<0<<d  /C 	/
++ /3.B.B
 "- $"<0<<d  /C 	/
++ ;?:N:N
 "- $"#56BBJ  ;O 	;
7d7 a484H4H
 &1 (&}5AA5  5I 	51DOT1 594I4I
 "- $"<0BBJ  5J 	5
141   1$ %% &1	 (&'9:LL(
  & 
). a &&
 &1 (&'78DDh  ' 	"' 9=8N8N!
 &1 (&7CCW  9O 	95Dt5 #mmq0!]]t/BC ??Q$$::<<*,##%##335 ,D,<,<+=]4??J[ \  $ 5 56mDOOCTTabfbqbqar  sA  BF  BR  BR  AST 	 	|DNN#33FtG[G[F\\himiwiwhx  yE  FJ  FT  FT  EU  Ub  cg  cr  cr  bs  sG  HL  HY  HY  GZ  [  	[	I "&r(   c                   U R                   S:X  aP  U R                  S:X  a@  U R                  S:X  a0  U R                  S:X  a   U R                  S:  a  [
        R                  $ U R                   S:X  a@  U R                  S:X  a0  U R                  S:X  a   U R                  S:  a  [
        R                  $ U R                   S:X  a0  U R                  S:X  a   U R                  S:  a  [
        R                  $ U R                   S:X  a   U R                  S:  a  [
        R                  $ U R                   S:  a  [
        R                  $ g Nr0   )r   r   r   r   r   r.   r8   r;   r<   r9   r:   rd   s    r&   get_parallel_mode(HybridCommunicateGroup.get_parallel_mode  s    OOq 1$  A%%%*!#---OOq 1$  A%%%)  111OOq 1$  1$  000__!doo&9  ///__q 111 !r(   c                    U R                   U R                  -  U R                  -  U R                  -  U R                  -  U R
                  :H  $ rI   )r   r   r   r   r   r   rd   s    r&   r   (HybridCommunicateGroup._check_valid_topo  sT    OOoooo ##$ 	
 {{	
r(   c                2    U R                   S:  d   S5       eg )Nr0   zsep not existr   rd   s    r&   _check_sep_exist'HybridCommunicateGroup._check_sep_exist  s    !#4_4#r(   c                `   / nS nUc  U R                   nUR                  U5      nUS:X  a  [        OSnU H8  n[        R                  R                  UUUS9n	U R                  U;   d  M4  UnU	nM:     [        U5      S:  d   eUc   e[        R                  " S[        U5       SU S35        XE4$ )NrA   r   r   nccl_comm_init_optionr+   Total  z# comm group(s) create successfully!)
r   r    g_pipeline_nccl_comm_init_optionr   r   	new_groupr   rV   r   r  )
r[   parallel_methodtopor+   parallel_groupparallel_comm_groupparallel_groupsgroup_nccl_comm_init_optionr   
comm_groups
             r&   r   &HybridCommunicateGroup._set_comm_group  s     "<::D,,_=  6) - 	$
 %E++55&A' 6 J
 5(!&&0# % >"Q&&&"...S)*!O+<<_`	
 22r(   c                <   / nS nUc  U R                   nUR                  U5      n[        U5       HQ  nU R                   R                  X5      n[        R
                  R                  XS9n	U R                  U;   d  MM  UnU	nMS     [        U5      S:  d   eUc   eXE4$ )Nr   r+   r   )	r   rl   rT   r   r   r   r&  r   rV   )
r[   r'  r(  r+   r)  r*  parallel_sizeidxr+  r-  s
             r&   r   'HybridCommunicateGroup._set_check_group  s     "<::D_5'C"jj66LO++55% 6 J ?2!0&0# ( >"Q&&&"...22r(   c                J    [        U S5      (       d   S5       eU R                  $ )N	next_rankznext_rank has not been inited)hasattrr5  rd   s    r&   _get_p2p_next_rank)HybridCommunicateGroup._get_p2p_next_rank,  %    t[))J+JJ)~~r(   c                J    [        U S5      (       d   S5       eU R                  $ )N	prev_rankzprev_rank has not been inited)r6  r;  rd   s    r&   _get_p2p_prev_rank)HybridCommunicateGroup._get_p2p_prev_rank0  r9  r(   c                :   U R                   R                  S5      nU Hz  n[        U5      U R                  :X  d   e[	        U5       HM  u  p4UnX#S-   U R                  -     nX#S-
  U R                  -     nU R
                  U:X  d  MA  X`l        Xpl        MO     M|     g )NrA   r0   )r   r   rV   r   r   r   r5  r;  )r[   
comm_lists
comm_ranksr2  ry   	curr_rankr5  r;  s           r&   r  )HybridCommunicateGroup._set_p2p_prev_next4  s    ZZ--f5
$Jz?doo555&z2	 	&a4??'BC	&a4??'BC	##y0%.N%.N 3 %r(   c                   U R                   R                  S5      nS U l        S U l        S U l        S U l        U H  n[        U5      U R                  :X  d   e[        U5       H  u  p4UnX#S-   U R                  -     nX#S-
  U R                  -     n[        R                  R                  XV/S9nU R                  U:X  a  Xl        OU R                  U:X  a  Xl        [        R                  R                  Xu/S9n	U R                  U:X  a  Xl        M  U R                  U:X  d  M  Xl        M     M     U R                  c   eU R                  c   eU R                  c   eU R
                  c   eg )NrA   r0   )r   )r   r   send_next_groupsend_prev_grouprecv_next_grouprecv_prev_grouprV   r   r   r   r   r&  r   )
r[   r?  r@  r2  ry   rA  r5  r;  
next_group
prev_groups
             r&   r  5HybridCommunicateGroup._set_four_directions_p2p_groupB  sv   ZZ--f5
####$Jz?doo555&z2	 	&a4??'BC	&a4??'BC	#//99$0 : 
 ##y0+5(%%2+5(#//99$0 : 
 ##y0+5(%%2+5() 3 %0 ##///##///##///##///r(   c                    U R                   $ rI   )r   rd   s    r&   r  HybridCommunicateGroup.topologyg  s    zzr(   c                    U R                   $ rI   )r   rd   s    r&   get_global_rank&HybridCommunicateGroup.get_global_rankj  rq   r(   c                `    U R                   R                  U R                  5      R                  $ rI   )r   rz   r   r@   rd   s    r&   r   ,HybridCommunicateGroup._get_data_parallel_idn  #    zz##D$4$45:::r(   c                    U R                   $ rI   )r   rd   s    r&   get_data_parallel_rank-HybridCommunicateGroup.get_data_parallel_rankq  s    %%%r(   c                    U R                   $ rI   )r   rd   s    r&   get_data_parallel_world_size3HybridCommunicateGroup.get_data_parallel_world_sizet      r(   c                    U R                   $ rI   )r   rd   s    r&   get_data_parallel_group.HybridCommunicateGroup.get_data_parallel_groupw      """r(   c                4    U R                   R                  S   $ Nr   )r   r   rd   s    r&    get_data_parallel_group_src_rank7HybridCommunicateGroup.get_data_parallel_group_src_rankz      ""((++r(   c                `    U R                   R                  U R                  5      R                  $ rI   )r   rz   r   rE   rd   s    r&   r   -HybridCommunicateGroup._get_model_parallel_id~  s#    zz##D$4$45;;;r(   c                    U R                   $ rI   )r   rd   s    r&   get_model_parallel_rank.HybridCommunicateGroup.get_model_parallel_rank  s    &&&r(   c                    U R                   $ rI   )r   rd   s    r&   get_model_parallel_world_size4HybridCommunicateGroup.get_model_parallel_world_size  rY  r(   c                    U R                   $ rI   )r   rd   s    r&   get_model_parallel_group/HybridCommunicateGroup.get_model_parallel_group  r]  r(   c                4    U R                   R                  S   $ r_  )r   r   rd   s    r&   !get_model_parallel_group_src_rank8HybridCommunicateGroup.get_model_parallel_group_src_rank  rb  r(   c                `    U R                   R                  U R                  5      R                  $ rI   )r   rz   r   rA   rd   s    r&   r   ,HybridCommunicateGroup._get_pipe_parallel_id  rR  r(   c                    U R                   $ rI   )r   rd   s    r&   get_stage_id#HybridCommunicateGroup.get_stage_id  s    }}r(   c                    U R                   $ rI   )r   rd   s    r&   get_pipe_parallel_world_size3HybridCommunicateGroup.get_pipe_parallel_world_size  rY  r(   c                `    U R                   R                  U R                  5      R                  $ rI   )r   rz   r   rC   rd   s    r&   r   +HybridCommunicateGroup._get_sep_parallel_id  s#    zz##D$4$45999r(   c                    U R                   $ rI   )r   rd   s    r&   get_sep_parallel_rank,HybridCommunicateGroup.get_sep_parallel_rank      $$$r(   c                    U R                   $ rI   r  rd   s    r&   get_sep_parallel_world_size2HybridCommunicateGroup.get_sep_parallel_world_size  rq   r(   c                :    U R                  5         U R                  $ rI   )r  r   rd   s    r&   get_sep_parallel_group-HybridCommunicateGroup.get_sep_parallel_group  s    ###r(   c                T    U R                  5         U R                  R                  S   $ r_  )r  r   r   rd   s    r&   get_sep_parallel_group_src_rank6HybridCommunicateGroup.get_sep_parallel_group_src_rank  s%    ##))!,,r(   c                    U R                   $ rI   )r   rd   s    r&   get_pipe_parallel_group.HybridCommunicateGroup.get_pipe_parallel_group  r]  r(   c                    [         (       d   S5       eU R                  U R                  U R                  U R                  4$ )NzrIf you want to use four directions p2p group, set the environment variable PADDLE_USE_FOUR_DIRECTIONS_P2P to True.)r  rD  rE  rF  rG  rd   s    r&   get_p2p_groups%HybridCommunicateGroup.get_p2p_groups  sK    ## 	
 A	
#         	
 	
r(   c                `    U R                   R                  U R                  5      R                  $ rI   )r   rz   r   rB   rd   s    r&   r   0HybridCommunicateGroup._get_sharding_parallel_id  s#    zz##D$4$45>>>r(   c                    U R                   $ rI   )r   rd   s    r&   get_sharding_parallel_rank1HybridCommunicateGroup.get_sharding_parallel_rank  s    )))r(   c                    U R                   $ rI   )r   rd   s    r&    get_sharding_parallel_world_size7HybridCommunicateGroup.get_sharding_parallel_world_size  r~  r(   c                    U R                   $ rI   )r   rd   s    r&   get_sharding_parallel_group2HybridCommunicateGroup.get_sharding_parallel_group      (((r(   c                4    U R                   R                  S   $ r_  )r   r   rd   s    r&   $get_sharding_parallel_group_src_rank;HybridCommunicateGroup.get_sharding_parallel_group_src_rank  s    ((..q11r(   c                @    U(       a  U R                   $ U R                  $ rI   )r  r   )r[   rB   s     r&   get_check_parallel_group/HybridCommunicateGroup.get_check_parallel_group  s    111)))r(   c                T    U R                   R                  " U R                  4SU0UD6$ )NrA   )r   r   r   )r[   r   r   s      r&   r   *HybridCommunicateGroup.get_rank_from_stage  s2    zz--
#+
/5
 	
r(   c                :    U R                  5         U R                  $ rI   )r  r  rd   s    r&   get_dp_sep_parallel_group0HybridCommunicateGroup.get_dp_sep_parallel_group  s    &&&r(   c                :    U R                  5         U R                  $ rI   )r  r  rd   s    r&   get_pp_mp_parallel_group/HybridCommunicateGroup.get_pp_mp_parallel_group  s    %%%r(   c                    gr_  r   rd   s    r&   $get_moe_sharding_parallel_world_size;HybridCommunicateGroup.get_moe_sharding_parallel_world_size  s    r(   c                   [        U5      S:  d   S5       e/ n/ nU R                  R                  U5      nUR                  5         U HT  n[        R
                  R                  XbS9nU R                  U;   d  M2  UR                  U5        UR                  U5        MV     [        U5      S:  d   e[        U5      S:  d   e[        R                  " S[        U5       SU S35        [        U5      S:  a  X44$ US   US   4$ )Nr   z9the length of fused_strategy_list must be greater than 0.r0  r#  z comm group(s) of fused z create successfully!r0   )rV   r   r   r}   r   r   r&  r   r   r   r  )r[   fused_strategy_listr+   r)  r*  r+  r   r-  s           r&   r  (HybridCommunicateGroup.create_fuse_group  s#   
 &'!+ 	
G	
+  **445HI$E++55 6 J 5(%%e,#**:6 % >"Q&&&&'!+++S)**BCVBWWlm	
 ~"!66!!$&9!&<<<r(   )'r   r   r   r   r   r   r  r  r   r   r   r   r   r   r   r  r  r   r   r   r   r   r   r   r   r   r   r  r	  r5  r   r;  rF  rG  rD  rE  r  r  r   rI   )r  r   r  NCCLConfig_Message | Noner   r   )r   zLiteral[0, 1, 2, 3, 4]r   boolr   r   )r@   NN)r'  r   r(  r   r+   NCCLConfig | Noner   ztuple[list[int], Group]r   )r   r   r   r   )r   r   )r   z!tuple[Group, Group, Group, Group]F)rB   r  r   r   )r   r   r   r   r   r   )r  r   r+   r  r   =tuple[list[list[int]], list[Group]] | tuple[list[int], Group])0r3   r4   r5   r6   ra   r  r   r  r   r   r7  r<  r  r  r  rN  r   rT  rW  r[  r`  r   rf  ri  rl  ro  r   rt  rw  r   r|  r  r  r  r  r  r   r  r  r  r  r  r   r  r  r  r  r=   r   r(   r&   r   r     s    59x&%x& 2x& 
	x&t$2L
5
  &$()-	!3!3 "!3 '	!3
 
!!3J  &$()-	33 "3 '	3
 
!32/#0J ;&#,<'#,;:% $-#	
?*%)2
*
'& *.=&= '= 
G	= =r(   c                  |   \ rS rSr/ SQ/ SQS4       S(S jjrS)S jrS*S jrS+S jrS	 rS
 r	S r
S+S jrS+S jr S+ S,S jjrS rS rS rS rS-S jrS rS-S jrS-S jrS.S jrS-S jrS.S jrS-S jrS.S jrS-S jrS-S jrS-S jrS.S jrS-S  jrS-S! jr S-S" jr!S.S# jr"S-S$ jr# S/ S-S% jjr$S/S-S& jjr%S'r&g)0EPHybridCommunicateGroupi  )rA   moe_shardingexpertr@   rB   rC   rD   rE   )r0   r0   r0   r0   r0   r0   r0   r0   Nc                4   [         R                  R                  5       U l        [         R                  R	                  5       U l        [        [        X5      5      nUR                  SS5      U l	        UR                  SS5      U l
        UR                  SS5      U l        UR                  SS5      U l        UR                  SS5      U l        UR                  SS5      U l        UR                  SS5      U l        UR                  SS5      U l        S	U;  a  SUS	'   UR                  S	S5      U l        / n/ n[        X5       H/  u  pxUS
;   d  M  UR%                  U5        UR%                  U5        M1     SU;   a  SU;   d   e['        XV5      U l        US   US   -  US'   U Vs/ s H  nUS;  d  M  UPM     n	nU	R+                  S5      n
UR+                  S5      UR+                  S5      :  a(  U	R-                  U
S-   S5        U	R-                  U
S5        O*U	R-                  U
S-   S5        U	R-                  U
S-   S5        U	 Vs/ s H  otU   PM	     nnU	R+                  S5      U	R+                  S5      :  d   S5       e['        X5      U l        US   US	   -  US'   / SQnU Vs/ s H  otU   PM	     nn['        X5      U l        X R(                  l        X R.                  l        X R0                  l        U R.                  U l        U R7                  U R.                  S5      U l        U R7                  U R.                  S5      U l        U R=                  5       U l        U R7                  U R.                  S5      U l         U R7                  U R0                  S	5      U l!        U R0                  RE                  S5      U l#        U R7                  U R(                  S5      U l$        U R7                  U R(                  S5      U l%        U R7                  U R(                  S5      U l&        U R                  U R                  :X  d"   SU R                   SU R                   S35       eU R4                  RN                  U R(                  RN                  :X  d6   SU R4                  RN                   SU R(                  RN                   S35       eU R                   S:X  a  U R                  S:X  d"   SU R                    SU R                   S35       eU RQ                  SU R(                  Ub  [S        US   RT                  S5      OS S9u  U l+        U l,        [         R                  R[                  [         R\                  " S/SS9[         R                  R^                  R`                  U RX                  S9  Sn[         Rb                  " U5      U   (       aY  U RX                  R                  S:  a?  U RX                  Rd                  Rg                  Ub  [S        US   Rh                  S 5      OS S9  U RQ                  SU R(                  Ub  [S        US!   Rj                  S"5      OS S9u  U l6        U l7        U RQ                  SU R(                  Ub  [S        US#   Rj                  S5      OS S9u  U l8        U l9        U RQ                  SU R.                  Ub  [S        US$   Rj                  S%5      OS S9u  U l:        U l;        U RQ                  SU R.                  Ub  [S        US&   Rj                  S5      OS S9u  U l<        U l=        U RQ                  SU R.                  Ub  [S        US'   Rj                  S(5      OS S9u  U l>        U l?        U R                  U R.                  Ub  [S        US)   Rj                  S5      OS S9u  U lA        U lB        U R                  U R.                  Ub  [S        US*   Rj                  S	5      OS S9u  U lD        U lE        S U lF        S U lG        U R"                  S:  aC  U R                  U R.                  Ub  [S        US+   Rj                  S,5      OS S9u  U lF        U lG        U R                  U R.                  Ub  [S        US-   Rj                  S5      OS S9u  U lJ        U lK        U R                  5       U lM        U R                  SU R.                  Ub  [S        US$   R                  S.5      OS S9u  U lP        U lQ        U R                  SU R(                  Ub  [S        US#   R                  S/5      OS S9u  U lR        U lS        U RH                  S0:H  U lT        U RH                  U R                  S-
  :H  U lU        U R                  S:  ab  [         R                  R                  R                  5       (       a
  [        5         U R                  5         [        (       a  U R                  5         S1U R
                   S2U R                   S3U R                   S4U R                   S5U R                   S6U R                    S7U R"                   S8U R                   S9U R                   3nUS:U R|                   S;U R                   S<U RV                   S=U Rt                   S>U Rx                   S?U R                   S@U R                   SAU R                   SBU R                   SCU Rl                   SDU Rp                   S3-  n[        R                  " U5        U q_g s  snf s  snf s  snf )ENr  r0   r  rA   r@   rE   rB   rC   rD   )rA   r  r  dense_sharding)r  rB   r  rD   r   z%moe_sharding must be before sharding.cp_sharding)r  rA   rD   rE   zMismatch moe_pp_degree:z, pp_degree:.zMismatch world_size:z, moe_world_size:zsep_degree z and dp_degree z must be 1 in MoE.r   r   r*   r   r   r   r   r   
ep_configsepmoe_sharding_configsr   r   r   r   r   r   
cp_configscp_mp_configscp_mpcp_sharding_configs
data_checkmoe_sharding_checkr   r   r   r   r   r   r   z, cp_degree: z, ep_degree: z, moe_sharding_degree: r   r   r   r   z, sep_group: z, cp_group: z, cp_sharding_group: z, cp_mp_group: r   z, ep_group: z, moe_sharding_group: )`r   r   r   r   rv   r   r   rU   get
_ep_degree_moe_sharding_degree_moe_pp_degreer   r   r   r   r   
_cp_degreer   r   	_moe_topori   insert_dense_topo_cp_topo_parent_hcgr   _get_parallel_idr   r   r   r   r   _cp_parallel_idrl   _cp_sharding_degreer   _expert_parallel_id_moe_sharding_parallel_idrS   r   r'   r   r   r   r   r   r   r   r   r   r   r   r+   	_ep_group_ep_comm_group_moe_sharding_group_moe_sharding_comm_groupr   r   r   r   r   r   build_sharding_groupr   r   build_context_group	_cp_group_cp_comm_group_cp_mp_group_cp_mp_comm_groupbuild_cp_mp_fuse_groupbuild_context_sharding_group_cp_sharding_group_cp_sharding_comm_group_get_cp_sharding_parallel_id_cp_sharding_parallel_idr   r   r   r   r  r  r  r	  r
  r   r  r
   r  r  r  r   r  r  )r[   r\   r]   r  dim_dictmoe_hybrid_group_namesmoe_dimsr   dimdense_group_namespipe_idx
dense_dimscp_group_namescp_dimsr  r  s                   r&   ra   !EPHybridCommunicateGroup.__init__  sU    ((779!--668.56",,x3$,LL$C!&ll615",,vq1",,w2",,vq1 (Z ;#<<q1H$"#HY",,y!4!#/6ID99&--d3$ 7
 4422	
3 --CN Z H^$<< 	!"
 +
*LL * 	 

 %**62##F+.@.F.F/
 
 $$X\3CD$$X~>$$X\>B$$X\3CD1BC1Btn1B
C &&
##$456 	
 4	
 6 //@M"*:"6(9:M"M
 /==ndD>n=+ND%)"'+$$(!%%
!%!6!6t7G7G!P"&"7"7g#
 &*%C%C%E" $ 5 5d6F6F N#44T]]IN#'==#8#8#G --dnnfE#'#8#8NNH$
  *.)>)>NNN*
& ""doo5 	
%d&9&9%:,tFWWXY	
5 zz%%)C)CC 	
"4::#9#9"::KDNNLfLfKgghi	
C 1$A)= 	
$**+?4??:KK]^	
= /3.B.BNN
 "- $"<0AA9  /C 
/
++ 	%%LL!G,!!**..%% 	& 	

 :H%h/""))A-##11MM *5	 ,*<8HH$
 " N 	 /3.B.BNN
 "- $"<0<<d  /C 
/
++    &1	 (&'=>JJ&
  !  	@ $"?  /3.B.B
 "- $"<0<<d  /C 
/
++ 150D0D
 "- $"=1==u  1E 
1
-- /3.B.B
 "- $"<0<<d  /C 
/
++ %%   &1	 (&'9:FF"
  & 
 	8d7 /3.F.F
 "- $"<0<<i  /G 	/
++ !!%??Q++$$
 *5 ,*?;GG " , 	 6Dt5 --   &1	 (&'<=II%
  . 
 	>!= )-(I(I(K% 594I4I
 "- $"<0BBL  5J 
5
141 !! &1 (&2++,	  "  	B!4#A$ #mmq0!]]t/BC ??Q$$::<<*,##%##335 ,D,<,<+=]4??J[ \  $ 5 56mDOOCTTabfbqbqar  sA  BF  BR  BR  AS S//* +//**A$B[B[A\^ 	 	|DNN#33FtG[G[F\\himiwiwhx  yE  FJ  FT  FT  EU  Ub  cg  cr  cr  bs  s  @D  @N  @N  O  Od  ei  e|  e|  d}  }L  MQ  M^  M^  L_  _s  tx  tE  tE  sF  FR  SW  Sa  Sa  Rb  bx  y}  yQ  yQ  xR  RS  T  	T	I "&i
 D  >s   '
l5ll)lc                    U R                   U R                  -  U R                  -  U R                  -  U R                  -  U R
                  :H  =(       a%    U R                  S:H  =(       d    U R                  S:H  $ r  )r   r   r   r   r   r   r  rd   s    r&   r   *EPHybridCommunicateGroup._check_valid_topoG  sw    OOoooo ##$ 	
 {{> !#<t'7'71'<	>r(   c                2    U R                   S:  d   S5       eg )Nr0   zcp not existr  rd   s    r&   _check_cp_exist(EPHybridCommunicateGroup._check_cp_existQ  s    "2N2"r(   c                (   / nS nU R                  USS5      nSnU H8  n[        R                  R                  UUUS9nU R                  U;   d  M4  UnUnM:     [        U5      S:  d   eUc   e[        R                  " S[        U5       S35        X44$ )Nr  r  r   r!  r#  z, sharding comm group(s) create successfully!)merge_inner_comm_listr   r   r&  r   rV   r   r  )	r[   r(  r+   r)  r*  r+  r,  r   r-  s	            r&   r  -EPHybridCommunicateGroup.build_sharding_groupT  s    "44."2
 '(#$E++55&A' 6 J
 5(!&&0# % >"Q&&&"...S)**VW	
 22r(   c                j   U R                  USS5      n/ nU H  n[        U5      U R                  -  U R                  :X  d    S[        U5       SU R                   35       e[	        U R                  5       H5  nUXPR                  -  US-   U R                  -   nUR                  U5        M7     M     U$ )Nr  r  sharding comm list ) size must divided by cp_sharding_degree r0   r  rV   r  r  rT   r   r[   r(  sharding_comm_listcontext_comm_listr   r   	sub_rankss          r&   split_context_comm_list0EPHybridCommunicateGroup.split_context_comm_listp  s    !77."2
 'Eu:!9!99T__L %c%j\1Z[_[s[sZtuL 4334!'1q5DOO*C	 "((3	 5	 ( ! r(   c                J   U R                  USS5      n/ nU H  n[        U5      U R                  -  U R                  :X  d    S[        U5       SU R                   35       e[	        U R                  5       H%  nXES U R                  2   nUR                  U5        M'     M     U$ )Nr  r  r  r  r  r  s          r&    split_context_sharding_comm_list9EPHybridCommunicateGroup.split_context_sharding_comm_list  s    !77."2
 'Eu:!9!99T__L %c%j\1Z[_[s[sZtuL 4??+!"6t"67	!((3 ,	 ( ! r(   c                   UR                  S5      nU R                  U5      n " S S5      nU" 5       nX2-    HI  n[        U5      S:  d  M  US   n[        S[        U5      5       H  nUR	                  XvU   5        M     MK     UR                  5       n	U	 H  n
U
R                  5         M     U	R                  S S9  U	$ )NrE   c                  ,    \ rS rSrS rS rS rS rSrg)REPHybridCommunicateGroup.fuse_context_tensor_parallel_comm_list.<locals>.UnionFindi  c                     0 U l         0 U l        g rI   parentry   rd   s    r&   ra   [EPHybridCommunicateGroup.fuse_context_tensor_parallel_comm_list.<locals>.UnionFind.__init__  s     	r(   c                    XR                   ;  a  XR                   U'   SU R                  U'   U$ U R                   U   U:w  a+  U R                  U R                   U   5      U R                   U'   U R                   U   $ r_  )r  ry   find)r[   rJ   s     r&   r  WEPHybridCommunicateGroup.fuse_context_tensor_parallel_comm_list.<locals>.UnionFind.find  se    KK'%&KKN#$DIIaLH;;q>Q&%)YYt{{1~%>DKKN{{1~%r(   c                "   U R                  U5      U R                  U5      pCX4:X  a  g U R                  U   U R                  U   :  a  XCpCX0R                  U'   U R                  U   U R                  U   :X  a  U R                  U==   S-  ss'   g g r  )r  ry   r  )r[   rJ   rK   pxpys        r&   unionXEPHybridCommunicateGroup.fuse_context_tensor_parallel_comm_list.<locals>.UnionFind.union  sx    1tyy|B899R=499R=0"$B99R=DIIbM1IIbMQ&M 2r(   c                    0 nU R                    H0  nU R                  U5      nX1;  a  / X'   X   R                  U5        M2     [        UR	                  5       5      $ rI   )r  r  r   r   rX   )r[   
componentsnoderoots       r&   get_componentsaEPHybridCommunicateGroup.fuse_context_tensor_parallel_comm_list.<locals>.UnionFind.get_components  sW    
 KKD99T?D-+-
($++D1	 (
 J--/00r(   r
  N)	r3   r4   r5   r6   ra   r  r  r  r=   r   r(   r&   	UnionFindr    s    &
'1r(   r  r0   r   c                    U S   $ r_  r   )rJ   s    r&   rL   QEPHybridCommunicateGroup.fuse_context_tensor_parallel_comm_list.<locals>.<lambda>  s    1Q4r(   )ru   )r   r  rV   rT   r  r  r}   )r[   r(  mp_comm_listcp_comm_listr  ufr   firstr   cp_tp_comm_list	components              r&   &fuse_context_tensor_parallel_comm_list?EPHybridCommunicateGroup.fuse_context_tensor_parallel_comm_list  s    ))'233D9"	1 "	1H [!0E5zA~aq#e*-AHHU!H- . 1 ++-(INN )0r(   c                   SnU R                  U5      nU H8  n[        R                  R                  UUUS9nU R                  U;   d  M4  UnUnM:     [        W5      S:  d   eWc   e[        R                  " SU R                   S35        Xx4$ )Nr   r!  r#  z4 context parallel comm group(s) create successfully!)	r  r   r   r&  r   rV   r   r  r  	r[   r(  r+   r,  r+  r   r-  r)  r*  s	            r&   r  ,EPHybridCommunicateGroup.build_context_group  s    &'#66t<$E++55&A' 6 J
 5(!&&0# % >"Q&&&"...T__%%YZ	
 22r(   c                   SnU R                  U5      nU H8  n[        R                  R                  UUUS9nU R                  U;   d  M4  UnUnM:     [        W5      S:  d   eWc   e[        R                  " SU R                   S35        Xx4$ )Nr   r!  r#  z= context sharding parallel comm group(s) create successfully!)	r  r   r   r&  r   rV   r   r  r  r'  s	            r&   r  5EPHybridCommunicateGroup.build_context_sharding_group  s    &'#??E$E++55&A' 6 J
 5(!&&0# % >"Q&&&"...T--..kl	
 22r(   c                    SnU R                  U5      nU H8  n[        R                  R                  UUUS9nU R                  U;   d  M4  UnUnM:     [
        R                  " S5        WW4$ )Nr   r!  z9Fused context & model parallel group create successfully!)r$  r   r   r&  r   r   r  r'  s	            r&   r  /EPHybridCommunicateGroup.build_cp_mp_fuse_group  s~     '(#EEdK$E++55&A' 6 J
 5(!&&0# % 	OP222r(   c                V   UR                   R                  U5      nUR                   R                  U5      nUR                  U5      n[        U5      UR                  U   -  n[
        R                  " UR                  US-   S 5      UR                  U   -  nUS:  a  US:  d   e/ n	[        U5       Hq  n
/ n[        UR                  U   5       H?  nXU-  -   [        U5      :  d   SU
 SU SU S[        U5       35       eXXU-  -      -  nMA     U	R                  U5        Ms     U	$ )z
merge all inner communication list whose rank-id are in
the same outer communication list. E.g.:
  outer_comm_list: [[0, 4], [1, 5]]
  inner_comm_list: [[0, 2], [1, 3], [4, 6], [5, 7]]
  => merged_inner_comm_list: [[0, 2, 4, 6], [1, 3, 5, 7]]
r0   Nr   z+Unexpected error in merge_inner_comm_list, z, )	rN   ri   r   rV   rO   mathprodrT   r   )r[   r(  
outer_name
inner_name
inner_axis
outer_axisinner_comm_listnum_merged_groupsintervalmerged_comm_listr   commjs                r&   r  .EPHybridCommunicateGroup.merge_inner_comm_list  sE    ))//
;
))//
;
,,Z80DJJz4JJIIdjj*q.!345J9OO 	 !1$A55()AD4::j12x<'#o*>> A!BqcH:UWX[\kXlWmn> L(899	 3
 ##D) *  r(   c                    [        U5      n[        US   5      nUS-
  nSnUS:  a0  Xd:  a+  X   U   nXr:X  a  U$ Xr:  a  US-  nOUS-  nUS:  a  Xd:  a  M+  g )Nr   r0   )rV   )r[   	comm_listr   rowscolsrccurrents           r&   find_col_idx%EPHybridCommunicateGroup.find_col_idx  sr    9~9Q< 1H1fl1oG%&QQ 1f r(   c                h    UR                  U5      nU R                  X0R                  5      nUc   eU$ rI   )r   rB  r   )r[   r(  parallel_typer<  parallel_ids        r&   r  )EPHybridCommunicateGroup._get_parallel_id0  s:    &&}5	''	3C3CD&&&r(   c                    U R                  U R                  SS5      nU R                  XR                  5      nUc   eU$ )Nr  r  )r  r  rB  r   )r[   r  rF  s      r&   r   2EPHybridCommunicateGroup._get_sharding_parallel_id6  sL    !77n.>
 ''(:<L<LM&&&r(   c                L    U R                   R                  U R                  5      $ rI   )r  ri   r   rd   s    r&   _get_context_parallel_id1EPHybridCommunicateGroup._get_context_parallel_id>  s    ~~##D$4$455r(   c                L    U R                   R                  U R                  5      $ rI   )r  ri   r   rd   s    r&   r  5EPHybridCommunicateGroup._get_cp_sharding_parallel_idA  s    &&,,T-=-=>>r(   c                    U R                   $ rI   )r  rd   s    r&   get_context_parallel_rank2EPHybridCommunicateGroup.get_context_parallel_rankD  rg   r(   c                    U R                   $ rI   r  rd   s    r&   get_context_parallel_world_size8EPHybridCommunicateGroup.get_context_parallel_world_sizeG  rY  r(   c                :    U R                  5         U R                  $ rI   )r  r  rd   s    r&   get_context_parallel_group3EPHybridCommunicateGroup.get_context_parallel_groupJ  s    """r(   c                T    U R                  5         U R                  R                  S   $ r_  )r  r  r   rd   s    r&   #get_context_parallel_group_src_rank<EPHybridCommunicateGroup.get_context_parallel_group_src_rankN  s%    ""((++r(   c                :    U R                  5         U R                  $ rI   )r  r  rd   s    r&   get_cp_sharding_parallel_group7EPHybridCommunicateGroup.get_cp_sharding_parallel_groupR  s    +++r(   c                T    U R                  5         U R                  R                  S   $ r_  )r  r  r   rd   s    r&   'get_cp_sharding_parallel_group_src_rank@EPHybridCommunicateGroup.get_cp_sharding_parallel_group_src_rankV  s%    ++11!44r(   c                :    U R                  5         U R                  $ rI   )r  r  rd   s    r&   get_cp_mp_parallel_group1EPHybridCommunicateGroup.get_cp_mp_parallel_groupZ  s    %%%r(   c                T    U R                  5         U R                  R                  S   $ r_  )r  r  r   rd   s    r&   !get_cp_mp_parallel_group_src_rank:EPHybridCommunicateGroup.get_cp_mp_parallel_group_src_rank^  s%    %%++A..r(   c                    U R                   $ rI   )r  rd   s    r&   get_expert_parallel_rank1EPHybridCommunicateGroup.get_expert_parallel_rankb  s    '''r(   c                    U R                   $ rI   )r  rd   s    r&   get_expert_parallel_world_size7EPHybridCommunicateGroup.get_expert_parallel_world_sizee  rY  r(   c                    U R                   $ rI   )r  rd   s    r&   get_expert_parallel_group2EPHybridCommunicateGroup.get_expert_parallel_grouph  r]  r(   c                4    U R                   R                  S   $ r_  )r  r   rd   s    r&   "get_expert_parallel_group_src_rank;EPHybridCommunicateGroup.get_expert_parallel_group_src_rankk  rb  r(   c                    U R                   $ rI   )r  rd   s    r&   get_moe_sharding_parallel_rank7EPHybridCommunicateGroup.get_moe_sharding_parallel_rankn  s    ---r(   c                    U R                   $ rI   )r  rd   s    r&   r  =EPHybridCommunicateGroup.get_moe_sharding_parallel_world_sizeq  r  r(   c                    U R                   $ rI   )r  rd   s    r&   get_moe_sharding_parallel_group8EPHybridCommunicateGroup.get_moe_sharding_parallel_groupt  s    ,,,r(   c                4    U R                   R                  S   $ r_  )r  r   rd   s    r&   (get_moe_sharding_parallel_group_src_rankAEPHybridCommunicateGroup.get_moe_sharding_parallel_group_src_rankw  s    ,,22155r(   c                @    U(       a  U R                   $ U R                  $ rI   )r  r   r[   with_context_parallels     r&   r  9EPHybridCommunicateGroup.get_sharding_parallel_world_sizez  s     !+++(((r(   c                @    U(       a  U R                   $ U R                  $ rI   )r  r   r  s     r&   r  3EPHybridCommunicateGroup.get_sharding_parallel_rank  s     000---r(   )3r   r   r  r  r  r  r  r  r  r  r  r  r  r   r  r   r   r   r  r  r  r  r   r  r  r  r  r  r  r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r	  r   r  r  r   )r\   r   r]   r   r  r  r   r   r  r  rI   )r   r  r   r  r  )'r3   r4   r5   r6   ra   r   r  r  r  r  r$  r  r  r  r  rB  r  r   rK  r  rP  rS  rV  rY  r\  r_  rb  re  rh  rk  rn  rq  rt  r  ry  r|  r  r  r=   r   r(   r&   r  r    s   	)
 348C&%C& C& 2C& 
C&J
>338! !5n3*3, !%3	F3$ <"6?$#,,5&/(#,.)-6 %*)	). .r(   r  c                  <    \ rS rSrSrS rS rS rS rS r	S r
S	rg
)_CommunicateGroupi  ztmp for staticc                    U q 0 U l        g rI   )r  groupsrd   s    r&   ra   _CommunicateGroup.__init__  s    !%r(   c                t    [         R                  R                  R                  X$U5      nX`R                  U'   g rI   )r   r   r   r   r  )r[   
group_name
group_rank
group_sizering_idgroup_ranksr   s          r&   set_comm_group _CommunicateGroup.set_comm_group  s3     ""--33
 #(Jr(   c                B    XR                   ;   d   eU R                   U   $ rI   r  )r[   r  s     r&   	get_group_CommunicateGroup.get_group  s!    [[((({{:&&r(   c                $    U R                  S5      $ NrE   )r  rd   s    r&   rl  *_CommunicateGroup.get_model_parallel_group  s    ~~g&&r(   c                8    U R                  S5      R                  $ r  )r  r   rd   s    r&   ri  /_CommunicateGroup.get_model_parallel_world_size  s    ~~g&---r(   c                8    U R                  S5      R                  $ r  )r  ry   rd   s    r&   rf  )_CommunicateGroup.get_model_parallel_rank  s    ~~g&+++r(   r  N)r3   r4   r5   r6   r7   ra   r  r  rl  ri  rf  r=   r   r(   r&   r  r    s#    
(''.,r(   r  )NN)r#   z0NCCLConfig_Message | dict[str, int | str] | Noner$   z
str | Noner   r	   rI   )r+   zdict[str, int | str] | Noner   r  ))
__future__r   rP   r.  os	functoolsr   	itertoolsr   typingr   r   r   r   7paddle.distributed.fleet.proto.distributed_strategy_pb2r	   r   #paddle.distributed.utils.nccl_utilsr
   utils.log_utilr   paddle.base.libpaddlepaddle.distributed.collectiver   __all__r  environr  baser   is_compiled_with_xpur  r   r%  r'   r,   r.   r   r   r  r  r   r(   r&   <module>r     s   #   	   . .  K #03 ":
; zz~~$fkk&6&6&K&K&M  $'JJNN91=$   AE#.=.. .. 04&2,&2&2R 8u# u#p|= |=~E
.5 E
.P, ,r(   