
    Αi                       S r SSKJr  SSKrSSKrSSKrSSKrSSKJrJ	r	  SSK
JrJrJrJr  SSKrSSKrSSKJr  SSKJr  SS	KJr  \(       a  SSKJ
r  / r " S
 S5      r " S S5      r " S S5      r " S S\5      r " S S\5      rg)zDefinition of Role Makers.    )annotationsN)ManagerProcess)TYPE_CHECKINGAnyClassVarLiteral)core)wait_server_ready   )getenv_or_backupc                  \    \ rS rSr% SrS\S'   SrS\S'   SrS	\S
'   SrS\S'   Sr	S\S'   Sr
g)Role)      zClassVar[Literal[1]]WORKER   zClassVar[Literal[2]]SERVERr   zClassVar[Literal[3]]HETER_WORKER   zClassVar[Literal[4]]ALL   zClassVar[Literal[5]]COORDINATOR N)__name__
__module____qualname____firstlineno__r   __annotations__r   r   r   r   __static_attributes__r       h/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/distributed/fleet/base/role_maker.pyr   r   )   s9    #$F $#$F $)*L&* !C	!()K%)r!   r   c                      \ rS rSrSr " S S5      rS r  SS jrS rS	 r	S
 r
S rS rS rS rS rSS jrSS jrSrg)Gloo1   zD
Gloo is a universal class for barrier and collective communication
c                       \ rS rSrSrSrSrSrg)Gloo.RENDEZVOUS6   r   r   r   r   N)r   r   r   r   HDFSFILEHTTPr    r   r!   r"   
RENDEZVOUSr'   6   s    r!   r,   c                   S U l         S U l        S U l        / SQU l        SU l        SU l        SU R                   3U l        SU l        SU l        SU l	        S U l
        S U l        S U l        SU l        SU l        SU l        SU l        g )	N)workerserverallz?gloo is not initialized, will not communicator with other nodesz.gloo initialized error, please check argumentsz#argument error, comm_world must in Fi  i )_worker_comm_server_comm_nodes_comm_comm_world	_err_init	_err_type
_err_world_is_initialized_init_timeout_seconds_run_timeout_seconds_rendezvous_role_iface_role_id_worker_num_server_num_need_init_allselfs    r"   __init__Gloo.__init__;   s      6M 	 J1$2B2B1CD 	  %%)"$+!
#r!   Nc                b   Xl         X l        X0l        X@l        XPl        X`l        SU l        UR                  SS5      U l        S nU R                   [        R                  R                  :X  a~  UR                  SS5      n	UR                  SS5      n
UR                  SS5      nU	(       a  U
(       a  U(       d  [        U R                  5      eU R                  XXR                  5        GO8U R                   [        R                  R                  :X  aJ  UR                  SS5      nU(       d  [        U R                  5      eU R!                  XR                  5        OU R                   [        R                  R"                  :X  a  UR                  SS5      nUR                  SS5      nUR                  SS	5      nUR                  S
5      nU(       a  U(       d  [        U R                  5      eU R%                  XU R                  UU5      nO[        U R                  5      eSU l        Xl        g )N store.prefixdfs.namedfs.ugidfs.path	http.host	http.portstart_http_serverFhttp_server_dT)r<   r=   r?   r@   rA   rB   r>   get_prefixr$   r,   r)   
ValueErrorr7   	_init_dfsr*   _init_fsr+   
_init_httpr9   _http_server)rD   
rendezvousrolerole_id
worker_num
server_numneed_init_allkwargshttp_serverdfs_namedfs_ugidfs_pathfs_pathipportrO   rP   s                    r"   init	Gloo.initV   s    &
%%+zz."5t333zz*b1HjjB/Gzz*b1H7( 00NN8hE!5!55jjR0G 00MM'<<0!5!55K,B::k2.D &

+> F"JJ7MT 00//$,,(9=K T^^,,#'r!   c                  ^ ^^ UUU 4S jnT R                   [        R                  :X  a2  T R                  [        R                  5      u  pEU" XES5      nUT l        O1T R                  [        R
                  5      u  pEU" XES5      nUT l        T R                  (       a2  T R                  [        R                  5      u  pEU" XES5      nUT l	        g g )Nc                  > [         R                  " 5       nUR                  U 5        UR                  U5        UR	                  T5        UR                  TR                  5        UR                  TR                  TR                  5        UR                  [        R                  R                  TU5      SS5        UR                  5         U$ )NrH   r
   r$   set_rankset_size
set_prefix	set_ifacer>   set_timeout_secondsr:   r;   set_hdfs_storeospathjoinrf   )ranknodesrY   gloorc   prefixrD   s       r"   rf   Gloo._init_fs.<locals>.init   s    99;DMM$MM% OOF#NN4;;'$$**D,E,E Wd ;RDIIKKr!   r   r   r   
r=   r   r   _get_rank_nodesr2   r   r3   rB   r   r4   )rD   rc   rw   rf   rt   ru   rv   s   ```    r"   rU   Gloo._init_fs   s    	 ::$..t{{;KDX.D $D..t{{;KDX.D $D..txx8KDU+D#D r!   c                  ^ ^^^^ UUUUU 4S jnT R                   [        R                  :X  a2  T R                  [        R                  5      u  pgU" XgS5      nUT l        O1T R                  [        R
                  5      u  pgU" XgS5      nUT l        T R                  (       a2  T R                  [        R                  5      u  pgU" XgS5      nUT l	        g g )Nc                  > [         R                  " 5       nUR                  U 5        UR                  U5        UR	                  T5        UR                  TR                  5        UR                  TR                  TR                  5        UR                  [        R                  R                  TU5      TT5        UR                  5         U$ Nrj   )	rt   ru   rY   rv   r`   rb   ra   rw   rD   s	       r"   rf   Gloo._init_dfs.<locals>.init   s    99;DMM$MM% OOF#NN4;;'$$**D,E,E Xt <hPIIKKr!   r   r   r   ry   )	rD   r`   ra   rb   rw   rf   rt   ru   rv   s	   `````    r"   rT   Gloo._init_dfs   s    	 	 ::$..t{{;KDX.D $D..t{{;KDX.D $D..txx8KDU+D#D r!   c                ^  ^ ^^^^ U4S jmUUU 4S jnUUUU 4S jn[        T5      mU(       a  [        S5        U" U5      nT R                  [        R                  :X  a1  T R                  [        R                  5      u  pU" XS5      nUT l        U(       a  SUS'   WR                  5         g g )Nc                n  > [        ST SU 35        SSKJn  U" TU5      nUR                  5         SnU R	                  SS5      (       d  UR                  5       (       dF  [        R                  " U5        U R	                  SS5      (       a  M/  UR                  5       (       d  MF  UR                  5         g )Nzstart http_server: z, r   )KVServerr   runningF)	print*paddle.distributed.fleet.utils.http_serverr   startrQ   should_stoptimesleepstop)rP   size_dr   r_   wait_secondsre   s        r"   __start_kv_server*Gloo._init_http.<locals>.__start_kv_server   s    'vRx89K"40KL!!)U33"..00

<( !!)U33"..00 r!   c                   > TS-   S-   nUTR                   0n[        SU SU 35        SU S'   [        TX4S9nSUl        UR	                  5         U$ )N_r.   zworker_key:z, size: Tr   )targetargs)r@   r   r   daemonr   )rP   
worker_keyr   rW   _Gloo__start_kv_serverrw   rD   s       r"   init_kv_server'Gloo._init_http.<locals>.init_kv_server   st    #0JD,,F K
|8F8<='+M)$"(/FL #'L  r!   c                  > [         R                  " 5       nUR                  U 5        UR                  U5        UR	                  T5        UR                  TR                  5        UR                  TR                  TR                  5        UR                  TTS5        SR                  T[        T5      /5      n[        U/5        UR                  5         U$ )Nr.   :)r
   r$   rk   rl   rm   rn   r>   ro   r:   r;   set_http_storers   strr   rf   )	rt   ru   rY   rv   eprd   re   rw   rD   s	        r"   rf   Gloo._init_http.<locals>.init   s    99;DMM$MM% OOF#NN4;;'$$**D,E,E D(32s4y/*Brd#IIKKr!   zto start http_serverr   Fr   )intr   r=   r   r   rz   r2   rs   )rD   rd   re   rw   rO   rP   r   rf   r_   rt   ru   rv   r   s   ````        @r"   rV   Gloo._init_http   s    		 $	 	 4y()(7K::$..t{{;KDX.D $D ',M)$ r!   c                   SnSnU[         R                  :X  a  U R                  nU R                  nX24$ U[         R                  :X  a  U R
                  nU R                  nX24$ U[         R                  :X  ac  U R                  U R
                  -   nU R                  [         R                  :X  a  U R                  nX24$ U R                  U R                  -   n X24$ [        U R                  5        X24$ )Nr   r1   )
r   r   r@   r?   r   rA   r   r=   rS   r7   )rD   rY   ru   rt   s       r"   rz   Gloo._get_rank_nodes  s    4;;$$E==D { T[[ $$E==D { TXX$$t'7'77EzzT[[(}} {	 ''$--7 { t~~&{r!   c                V    U R                  5       nU R                  5       nUS:X  a  U$ U$ ) 
get default physical interface
lo)%_Gloo__get_default_iface_from_gateway(_Gloo__get_default_iface_from_interfaces)rD   default1default2s      r"   __get_default_ifaceGloo.__get_default_iface  s2     88:;;=#t+x99r!   c                   [         R                  " S5      R                  5       R                  5       R	                  S5      nSnSnU H  nUR	                  5       nSU;   a*  SU;   a$  UR                  S5      nUR                  S5      nMC  Uc  MH  Uc  MM  Sn[        U5      U:  a  XB   nU(       d  Mk  US:w  d  Ms  US:w  d  M{  [        U5      U:  d  M  XC   s  $    g)	r   zroute -A inet
NGatewayIface*z0.0.0.0r   )rq   popenreadstripsplitindexlen)rD   resgateway_idx	iface_idxitemgateways         r"    __get_default_iface_from_gateway%Gloo.__get_default_iface_from_gateway%  s     hh',,.446<<TB	D::<DD W_"jj3 JJw/	(Y-Bt9{*"/GG39,D	I-?*   r!   c                    [         R                  " S5      R                  5       R                  5       R	                  S5      nU H-  nSU;   d  M  UR	                  S5      S   R                  5       s  $    g)r   zip -f inet addr | awk NR%3==1r   	BROADCASTr   r   r   )rq   r   r   r   r   )rD   r   r   s      r"   #__get_default_iface_from_interfaces(Gloo.__get_default_iface_from_interfaces?  sh    
 HH45::<BBDJJ4P 	 Dd"zz#q)//11  r!   c                h   U R                   (       d!  [        R                  " U R                  5        gXR                  ;  a  [        U R                  5      eUS:X  a  U R                  R                  5         gUS:X  a  U R                  R                  5         gU R                  R                  5         g)z
dummy barrier, do nothing
Nr.   r/   )r9   warningswarnr6   r5   rS   r8   r2   barrierr3   r4   rD   
comm_worlds     r"   r   Gloo.barrierK  s     ##MM$..)---T__--!%%'8#%%'$$&r!   c                `   U R                   (       d"  [        R                  " U R                  5        U$ X0R                  ;  a  [        U R                  5      e[        R                  " U5      nUR                  nUR                  S5      R                  5       nU R                  U5        US:X  a  U R                  R                  XR5      nO=US:X  a  U R                  R                  XR5      nOU R                   R                  XR5      n[        R                  " U5      R                  U5      nU$ )Nr1   r.   r/   )r9   r   r   r6   r5   rS   r8   nparrayshapereshapetolistr   r2   
all_reducer3   r4   )rD   inputmoder   input_shape
input_listansoutputs           r"   r   Gloo.all_reduce]  s    ##MM$..)L---T__--kk]]2&--/
Z !##..z@C8###..z@C""--j?C#&&{3r!   c                v   U R                   (       d"  [        R                  " U R                  5        U$ X R                  ;  a  [        U R                  5      eUS:X  a  U R                  R                  U5      nU$ US:X  a  U R                  R                  U5      nU$ U R                  R                  U5      nU$ )zG
dummy all gather, do nothing
Args:
    obj(any): obj to do all gather
r.   r/   )r9   r   r   r6   r5   rS   r8   r2   
all_gatherr3   r4   )rD   r   r   r   s       r"   r   Gloo.all_gatheru  s     ##MM$..)L---T__--!&&11%8F  8#&&11%8F  %%007Fr!   )r5   r6   r7   r8   rW   r>   r:   r9   rB   r4   rR   r<   r=   r?   r;   r3   rA   r2   r@   )FNsumr.   r.   )r   r   r   r   __doc__r,   rE   rf   rU   rT   rV   rz   _Gloo__get_default_ifacer   r   r   r   r   r    r   r!   r"   r$   r$   1   sY     
$D 3(j$8$8BH,:4
'$0r!   r$   c                      \ rS rSrSrS rS rS rS rS r	S r
S	 rS
 rS rS rS rS rS rSS jrSS jrS rSrg)RoleMakerBasei  z
RoleMakerBase is a base class for assigning a role to current process
in distributed training.
A paddle developer can implement RoleMakerBase to design a role maker
for worker or pserver assignment.
c                X    / U l         / U l        SU l        SU l        S U l        SU l        g )NrH   Fr1   )_worker_endpoints_server_endpoints_cur_endpoint_role_is_generatedr=   _current_idrC   s    r"   rE   RoleMakerBase.__init__  s1    !#!#"'
r!   c                    [        S5      e)z'
return is_worker() of current process
+Please implement this method in child classNotImplementedErrorrC   s    r"   
_is_workerRoleMakerBase._is_worker       ""OPPr!   c                    [        S5      e)z'
return is_server() of current process
r   r   rC   s    r"   
_is_serverRoleMakerBase._is_server  r   r!   c                    [        S5      e)z
Check whether the node is the first instance of worker.
Returns:
    bool: True if this is the first node of worker,
          False if not.
r   r   rC   s    r"   _is_first_workerRoleMakerBase._is_first_worker       ""OPPr!   c                    [        S5      e)zC
Get current total worker number.

Returns:
    int: worker number
r   r   rC   s    r"   r@   RoleMakerBase._worker_num  r   r!   c                    [        S5      e)zC
Get current total server number.

Returns:
    int: server number
r   r   rC   s    r"   rA   RoleMakerBase._server_num  r   r!   c                    [        S5      e)z3
Get current worker id.

Returns:
    int: node id
r   r   rC   s    r"   _worker_indexRoleMakerBase._worker_index  r   r!   c                    [        S5      e)z3
Get current server id.

Returns:
    int: node id
r   r   rC   s    r"   _server_indexRoleMakerBase._server_index  r   r!   c                    [        S5      e)z,
Get current id.

Returns:
    int: node id
r   r   rC   s    r"   r?   RoleMakerBase._role_id  r   r!   c                    [        S5      e)z9
Get the training node number
Returns:
    int: node num
r   r   rC   s    r"   	_node_numRoleMakerBase._node_num  s     ""OPPr!   c                    U R                   $ )z
return trainer endpoints
)r   rC   s    r"   _get_trainer_endpoints$RoleMakerBase._get_trainer_endpoints       %%%r!   c                    U R                   $ )z
return pserver endpoints
)r   rC   s    r"   _get_pserver_endpoints$RoleMakerBase._get_pserver_endpoints  r  r!   c                n    SU R                    SU R                   SU R                   SU R                   3$ )Nzrole: z, current_id: z, worker_endpoints: z, server_endpoints: )r=   r   r   r   rC   s    r"   	to_stringRoleMakerBase.to_string  sX    

|>$2B2B1CCWX\XnXnWo  pD  EI  E[  E[  D\  ]  	]r!   c                    [        S5        g )Nz7warning: RoleMakerBase does not have all gather worker.r   rD   r   r   s      r"   _all_gatherRoleMakerBase._all_gather  s    GHr!   c                    [        S5        g)z
Args:
    input(list/numpy.array): array of one dim
    output(list/numpy.array): array of one dim
    mode(str): "sum" or "min" or "max"
z7warning: RoleMakerBase does not have all reduce worker.Nr  rD   r   r   r   s       r"   _all_reduceRoleMakerBase._all_reduce  s     	GHr!   c                    [        S5        g)z5
barrier between trainers if current role is TRAINER
z4warning: RoleMakerBase does not have barrier worker.Nr  r   s     r"   _barrierRoleMakerBase._barrier  s     	DEr!   )r   r   r=   r   r   r   Nr   r   )r   r   r   r   r   rE   r   r   r   r@   rA   r   r   r?   r  r  r
  r  r  r  r  r    r   r!   r"   r   r     sd    QQQQQQQQQ&&]IIFr!   r   c                    ^  \ rS rSrSrS*S+U 4S jjjrS,S jr S-     S.S jjr S/       S0S jjrS1S jr	S1S jr
S2S	 jrS3S
 jrS2S jrS4S jrS4S jrS4S jrS4S jrS2S jrS2S jrS2S jrS2S jrS2S jrS2S jrS2S jrS5S jrS5S jrS5S jrS6S jrS1S jrS6S jrS1S jrS6S jr S6S jr!S  r"S6S! jr#S4S" jr$S2S# jr%S4S$ jr&S7S% jr'S7S& jr(S7S' jr)S7S( jr*S)r+U =r,$ )8PaddleCloudRoleMakeri%  a  
PaddleCloudRoleMaker is an interface for distributed configuration initialization based on obtaining distributed related information from environment variables.

Examples:
    .. code-block:: python

        >>> import os
        >>> import paddle.distributed.fleet as fleet

        >>> os.environ["PADDLE_PSERVER_NUMS"] = "2"
        >>> os.environ["PADDLE_TRAINERS_NUM"] = "2"

        >>> os.environ["POD_IP"] = "127.0.0.1"
        >>> os.environ["PADDLE_PORT"] = "36001"
        >>> os.environ["TRAINING_ROLE"] = "PSERVER"
        >>> os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001,127.0.0.2:36001"

        >>> os.environ["PADDLE_TRAINER_ID"] = "0"

        >>> fleet.PaddleCloudRoleMaker(is_collective=False)

c                *  > [         TU ]  5         Xl        SU l        X l        SU l        SU l        SU l        / U l        / U l	        / U l
        SU l        SU l        SU l        / U l        / U l        / U l        S U l        SU l        ['        5       U l        g )NFr   cpu)superrE   _is_collective_non_distributed_kwargsr   	_stage_id
_stage_num_next_heter_trainer_endpoints!_previous_heter_trainer_endpoints_heter_trainer_endpoints_heter_trainer_device_heter_trainer_device_type_is_heter_parameter_server_mode_stage_trainersr   r   _coordinator_endpoints_with_coordinatorr$   _gloo)rD   is_collectiver^   	__class__s      r"   rE   PaddleCloudRoleMaker.__init__=  s    + %"' -/*13.(*%%*"*/'/4,!!#!#&*#!&V
r!   c                :    U R                   R                  U5        g r~   )r.  r   r   s     r"   r  PaddleCloudRoleMaker._barrierW  s    

:&r!   c                8    U R                   R                  X5      $ r~   )r.  r   r  s      r"   r   PaddleCloudRoleMaker._all_gatherZ  s     zz$$U77r!   c                :    U R                   R                  XU5      $ r~   )r.  r   r  s       r"   r   PaddleCloudRoleMaker._all_reduce_  s     zz$$U*==r!   c                \    U R                   (       d  U R                  5         U R                  $ )z<
return the heter device that current heter worker is using
)r   _generate_roler(  rC   s    r"   _heter_device"PaddleCloudRoleMaker._heter_deviced  s%     &&!)))r!   c                \    U R                   (       d  U R                  5         U R                  $ )zA
return the heter device type that current heter worker is using
)r   r9  r)  rC   s    r"   _heter_device_type'PaddleCloudRoleMaker._heter_device_typel  s%     &&!...r!   c                \    U R                   (       d  U R                  5         U R                  $ )z)
return stage id of current heter worker
)r   r9  r#  rC   s    r"   _get_stage_id"PaddleCloudRoleMaker._get_stage_idt  s#     &&!~~r!   c                \    U R                   (       d  U R                  5         U R                  $ )z"
return trainer num of all stages
)r   r9  r+  rC   s    r"   _get_stage_trainers(PaddleCloudRoleMaker._get_stage_trainers|  s%     &&!###r!   c                \    U R                   (       d  U R                  5         U R                  $ )z
return stage num
)r   r9  r$  rC   s    r"   _get_num_stage#PaddleCloudRoleMaker._get_num_stage  #     &&!r!   c                ~    U R                   (       d  U R                  5         U R                  [        R                  :H  $ )z#
whether current process is worker
)r   r9  r=   r   r   rC   s    r"   r   PaddleCloudRoleMaker._is_worker  ,     &&!zzT[[((r!   c                ~    U R                   (       d  U R                  5         U R                  [        R                  :H  $ )z#
whether current process is server
)r   r9  r=   r   r   rC   s    r"   r   PaddleCloudRoleMaker._is_server  rK  r!   c                ~    U R                   (       d  U R                  5         U R                  [        R                  :H  $ r~   )r   r9  r=   r   r   rC   s    r"   _is_coordinator$PaddleCloudRoleMaker._is_coordinator  s,    &&!zzT----r!   c                    U R                   (       d  U R                  5         U R                  [        R                  :H  =(       a    U R
                  S:H  $ )z-
whether current process is worker of rank 0
r   )r   r9  r=   r   r   r   rC   s    r"   r   %PaddleCloudRoleMaker._is_first_worker  s<     &&!zzT[[(BT-=-=-BBr!   c                \    U R                   (       d  U R                  5         U R                  $ )z
get index of current worker
r   r9  r   rC   s    r"   r   "PaddleCloudRoleMaker._worker_index  %     &&!r!   c                \    U R                   (       d  U R                  5         U R                  $ )z
get index of current server
rT  rC   s    r"   r   "PaddleCloudRoleMaker._server_index  rV  r!   c                \    U R                   (       d  U R                  5         U R                  $ )z
get index of current node
rT  rC   s    r"   r?   PaddleCloudRoleMaker._role_id  rV  r!   c                \    U R                   (       d  U R                  5         U R                  $ )z%
return the current number of worker
)r   r9  _trainers_numrC   s    r"   r@    PaddleCloudRoleMaker._worker_num  s%     &&!!!!r!   c                    U R                   (       d  U R                  5         U R                  5       b  [        U R                  5       5      $ S$ )z%
return the current number of server
r   )r   r9  r
  r   rC   s    r"   rA    PaddleCloudRoleMaker._server_num  sL     &&! **,8 ++-.	
 	
r!   c                \    U R                   (       d  U R                  5         U R                  $ z!
return the training node number
r   r9  
_nodes_numrC   s    r"   r  PaddleCloudRoleMaker._node_num  rH  r!   c                \    U R                   (       d  U R                  5         U R                  $ ra  rb  rC   s    r"   _get_node_num"PaddleCloudRoleMaker._get_node_num  rH  r!   c                \    U R                   (       d  U R                  5         U R                  $ r~   )r   r9  _local_rankrC   s    r"   _get_local_rank$PaddleCloudRoleMaker._get_local_rank  s#    &&!r!   c                \    U R                   (       d  U R                  5         U R                  $ r~   )r   r9  _local_device_idsrC   s    r"   _get_local_device_ids*PaddleCloudRoleMaker._get_local_device_ids  #    &&!%%%r!   c                \    U R                   (       d  U R                  5         U R                  $ r~   )r   r9  _world_device_idsrC   s    r"   _get_world_device_ids*PaddleCloudRoleMaker._get_world_device_ids  rp  r!   c                \    U R                   (       d  U R                  5         U R                  $ )z
get endpoint of all trainers
)r   r9  r   rC   s    r"   r  +PaddleCloudRoleMaker._get_trainer_endpoints  %     &&!%%%r!   c                    U R                   (       d  U R                  5         U R                  [        R                  :X  d   S5       eU R
                  $ )Nz0get_trainer_endpoint should be called by trainer)r   r9  r=   r   r   r   rC   s    r"   _get_trainer_endpoint*PaddleCloudRoleMaker._get_trainer_endpoint  sE    &&!zzT[[( 	
>	
( !!!r!   c                    U R                   (       d  U R                  5         U R                  / :w  d   S5       eU R                  $ )z3
Returns:
    string: all heter_trainers'endpoints
z&Heter Worker Endpoints Not initialized)r   r9  r'  rC   s    r"   _get_heter_worker_endpoints0PaddleCloudRoleMaker._get_heter_worker_endpoints  sE    
 &&!,,2 	
4	
2 ,,,r!   c                    U R                   (       d  U R                  5         U R                  [        R                  :X  d   S5       eU R
                  $ )z:
Returns:
    str: corresponding heter_trainer's endpoint
z<_get_heter_worker_endpoint should be invoked by heter worker)r   r9  r=   r   r   r   rC   s    r"   _get_heter_worker_endpoint/PaddleCloudRoleMaker._get_heter_worker_endpoint  sI    
 &&!zzT... 	
J	
. !!!r!   c                \    U R                   (       d  U R                  5         U R                  $ )z
get endpoint of all pservers
)r   r9  r   rC   s    r"   r
  +PaddleCloudRoleMaker._get_pserver_endpoints  rw  r!   c                \    U R                   (       d  U R                  5         U R                  $ r~   )r   r9  r,  rC   s    r"   _get_coordinator_endpoints/PaddleCloudRoleMaker._get_coordinator_endpoints$  s#    &&!***r!   c                    U R                   (       d  U R                  5         U R                  [        R                  [        R
                  4;   d   S5       eU R                  $ )
invoked by heter worker
zC_get_previous_trainers should be invoked by trainer or heter worker)r   r9  r=   r   r   r   r&  rC   s    r"   _get_previous_trainers+PaddleCloudRoleMaker._get_previous_trainers)  s_     &&!zzKK
 
 	Q Q	Q 
 555r!   c                    U R                   (       d  U R                  5         U R                  [        R                  [        R
                  4;   d   S5       eU R                  $ )r  z?_get_next_trainers should be invoked by trainer or heter worker)r   r9  r=   r   r   r   r%  rC   s    r"   _get_next_trainers'PaddleCloudRoleMaker._get_next_trainers5  s_     &&!zzKK
 
 	M M	M 
 111r!   c                \    U R                   (       d  U R                  5         U R                  $ )zs
Return True if indispensable environment for fleetrun is not found
(use python-run to launch fleet-code directly)
)r   r9  r!  rC   s    r"   _is_non_distributed(PaddleCloudRoleMaker._is_non_distributedA  s%    
 &&!$$$r!   c                \    U R                   (       d  U R                  5         U R                  $ )z
get heter worker nums
)r   r9  _heter_trainers_numrC   s    r"   _heter_worker_num&PaddleCloudRoleMaker._heter_worker_numJ  s%     &&!'''r!   c                ~    U R                   (       d  U R                  5         U R                  [        R                  :H  $ )z)
whether current process is heter worker
)r   r9  r=   r   r   rC   s    r"   _is_heter_worker%PaddleCloudRoleMaker._is_heter_workerR  s.     &&!zzT....r!   c                   [         R                  " SS 5      U l        U R                  cG  SU l        SU l        [        R
                  U l        SU l        SU l        SU l	        S U l
        SU l        g U R                  R                  S5      U l        [        SS 5      U l        U R                  b!  U R                  R                  S5      U l        O/ U l        [         R                  " SS5      U l        U R                  S:X  a  [!        S	5        O'SU l        U R                  R                  S5      U l        [         R                  " S
S 5      nUc  [%        S5      e['        U5      n[         R                  " SS 5      nUc  [%        S5      eUS;  a  [%        SU S35      e[         R                  " SS5      n[         R                  " SS5      n[         R                  " SS5      nUS:w  a  UR                  S5      U l
        SU l        [+        U R                  5      U l	        US:X  a  US;   d   S5       eO UR                  S5      U l        US:X  a  US;   d   S5       eO& UR                  S5      U l        OSU l        SU l	        US:X  Ga  [        R
                  n[         R                  " SS 5      nUc  [%        S5      e['        U5      nU R(                  (       Ga  [         R                  " SS 5      U l        U R0                  c  [%        S5      e['        U R0                  5      U l        [         R                  " S S 5      U l        U R2                  c  [%        S!5      e['        U R2                  5      U l        [         R                  " S"S 5      U l        U R4                  c  [%        S#5      e[7        [8        R:                  " S$U R4                  5       Vs/ s H  n['        U5      PM     sn5      U l        [         R                  " S%S 5      n	U	c  [%        S&5      e[         R                  " S'S 5      n
U
c  [%        S(5      eS)R=                  X/5      nXl        GOtUS*:X  a=  [!        S+5        [        R@                  n['        [         R                  " SS,5      5      nGO1US-:X  a  [        RB                  n[         R                  " S%S 5      n	U	c  [%        S&5      e[         R                  " S'S 5      n
U
c  [%        S(5      eS)R=                  X/5      nXl        U R                  RE                  U R>                  5      nGOUS.:X  Ga  [        RF                  n[         R                  " SS 5      U l        U R0                  c  [%        S5      e['        U R0                  5      U l        [         R                  " S S 5      U l        U R2                  c  [%        S!5      e['        U R2                  5      U l        [         R                  " S"S 5      U l        U R4                  c  [%        S#5      e[7        [8        R:                  " S$U R4                  5       Vs/ s H  n['        U5      PM     sn5      U l        [         R                  " S/S 5      U l$        U RH                  c  [%        S05      eU RH                  S1;   d   S25       eU RH                  S3:X  a9  [         R                  " S4S,5      nS)R=                  U RH                  U45      U l%        U RJ                  S5:X  a9  [         R                  " S6S,5      nS)R=                  U RH                  U45      U l%        [         R                  " S%S 5      n	U	c  [%        S&5      e[         R                  " S'S 5      n
U
c  [%        S(5      eS)R=                  X/5      nXl        UR                  S5      RE                  U5      U-   nXl        WU l        WU l        [+        U R                   Vs1 s H  oR                  S)5      S   iM     sn5      U l        g !   [%        S5      e= f!   [%        S5      e= fs  snf s  snf s  snf )7NPADDLE_PSERVERS_IP_PORT_LISTrH   r   r   T,PADDLE_TRAINER_ENDPOINTSPADDLE_COORDINATOR_ENDPOINTSz$fl-ps > coordinator address is null!PADDLE_TRAINERS_NUMz@Can not find PADDLE_TRAINERS_NUM, please check your environment.TRAINING_ROLEz:Can not find TRAINING_ROLE, please check your environment.)TRAINERPSERVERHETER_TRAINERr   zRTRAINING_ROLE must be PSERVER or TRAINER or HETER_TRAINER or COORDINATOR, but get z , please check your environment.&PADDLE_NEXT_HETER_TRAINER_IP_PORT_LIST*PADDLE_PREVIOUS_HETER_TRAINER_IP_PORT_LIST%PADDLE_ALL_HETER_TRAINER_IP_PORT_LIST)r  r  z*training_role should be trainer or pserverzCan not Find PADDLE_PREVIOUS_HETER_TRAINER_IP_PORT_LIST in env or its format doesn't match the requirement: 'IP:PORT,IP:PORT' .)r  r  z0training_role should be heter trainer or pserverz{Can not Find PADDLE_NEXT_HETER_TRAINER_IP_PORT_LIST in env or its format doesn't match the requirement: 'IP:PORT,IP:PORT' .Fr  PADDLE_TRAINER_IDz>Can not find PADDLE_TRAINER_ID, please check your environment.STAGE_IDz5Can not find STAGE_ID, please check your environment.	STAGE_NUMz6Can not find STAGE_NUM, please check your environment.PADDLE_STAGE_TRAINERS_NUMzFCan not find PADDLE_STAGE_TRAINERS_NUM, please check your environment.z\d+PADDLE_PORTz8Can not find PADDLE_PORT, please check your environment.POD_IPz3Can not find POD_IP, please check your environment.r   r   z>>> curr node is coordinator!0r  r  HETER_DEVICE_TYPEz>Can not find HETER_DEVICE_TYPE, please check your environment.)r  gpuxpuz*HETER_DEVICE_TYPE should be cpu,gpu or xpur  FLAGS_selected_gpusr  FLAGS_selected_xpus)&rq   getenvr   r\  r   r   r=   r   rc  r  r'  r!  r   r   r   r,  r   r-  rS   r   r*  r   r&  r%  r#  r$  r+  tuplerefindallrs   r   r   r   r   r   r)  r(  )rD   trainers_numtraining_rolenext_heter_trainer_eplistprevious_heter_trainer_eplistall_heter_trainer_eplistrY   
current_idxcur_portcur_ipcurr_endpointheter_device_ids                r"   _ps_envPaddleCloudRoleMaker._ps_envZ  s    "$+I4!P!!)%'D"!"DDJ DDO'(D$,0D)$(D!!%!7!7!=!=c!B!1&"
 !!-%)%;%;%A%A#%FD"%'D"&(ii*B'
# &&",89%)D"*.*E*E*K*KC*PD'yy!6=R  <(		/48 L   !
 
 derds  tT  U 
 %'II4b%
! )+		8")
% $&993R$
  $r),D,J,J3,OD)37D0'*4+H+H'ID$,2$ )  @ @@ 
5;;C@ : )B.$ )  F FF 
177< 6 49D0'(D$I%;;D#6=J! T  ZJ333!#:t!<>>)$O  "%T^^!4"$))K">??*$P  #&doo"6')yy/($ ''/$`  (-%'ZZ8L8L%MN%MSV%MN($ yy5H N  YYx.F~ I   HHf%78M!.m+12##DRYY':C@AJi';;Dyy5H N  YYx.F~ I   HHf%78M!.//55d6H6HIJo-$$DYYz48DN~~% K  !0DN iiT:DO& L  "$//2DO#%99-H$#OD ##+ \  $)!#FD4H4H!IJ!IAQ!IJ$D  /1ii#T/D+ ..6 T  22 7  < <	< 
 ..%7"$)),A3"G-0XX44oF.* ))U2"$)),A3"G-0XX44oF.* yy5H N  YYx.F~ I   HHf%78M!.(..s399-H 
 *
%8N8NO8N1wws|A8NOPs$ Z $ V J Ol K\ Ps*   7`" "`2 aa8a"`/2`?c                   [        [        R                  " SS5      5      U l        [        R                  " SS5      U l        U R                  S:X  d   e[
        R                  U l        [        S5      U l	        [        R                  " S5      U l
        U R                  c  SU l	        U R                  U l
        SU l        U R                  R                  S	5      U l	        [        U R                  5      U l        [        R                  " S
S 5      nUb'  [        R                  " SS 5      n[        U5      U l        [        U R                   Vs1 s H  o3R                  S5      S   iM     sn5      U l        [        R                  " S5      U l        [        R                  " S5      U l        [        R                  " S5      U l        g s  snf )Nr  r  PADDLE_TRAINING_ROLEr  r  PADDLE_CURRENT_ENDPOINTz127.0.0.1:6170Tr  PADDLE_AUTO_PARALLEL_CONFIGr  r   r   PADDLE_RANK_IN_NODEPADDLE_LOCAL_DEVICE_IDSPADDLE_WORLD_DEVICE_IDS)r   rq   r  r   _training_roler   r   r=   r   r   r   r!  r   r   r\  rc  ri  rm  rr  )rD   
auto_tunerr  r  s       r"   _collective_env$PaddleCloudRoleMaker._collective_envJ  sb   ryy)<cBC ii(>	J""i///[[
!12L!MYY'@A!!)%5D"!%!7!7D$(D!!%!7!7!=!=c!B !7!78YY<dC
!99%:DAL!$\!2D8N8NO8N1wws|A8NOP99%:;!#+D!E!#+D!E Ps   G	c           
     
   [        [        R                  " SS5      5      nUS;  a  g [        [        R                  " SS5      5      n[        R                  " SS5      nU[        R                  R
                  [        R                  R                  [        R                  R                  4;  a  [        U R                  R                  5      eUS:X  a  SOS	nU[        R                  R
                  :X  aM  [        R                  " S
S5      n[        R                  " SS5      n[        R                  " SS5      nUUUUS.nOU[        R                  R                  :X  a  S	n	[        5       n
U
R                  5       nS	US'   U R                  (       a'  U R                  S   nU R                  5       (       a  Sn	OB[        R                  " SS5      nU R!                  5       (       a  U R#                  5       S:X  a  Sn	UR%                  S5      u  pUUUU	US.nO[        R                  " SS5      nUUS.nU[        R                  R
                  :X  a  SnO#U[        R                  R                  :X  a  SnOSn['        SU SU SU 35        U R                  R)                  UU R*                  U R-                  5       U R/                  5       U R1                  5       UUS9  U[        R                  R                  :X  a  S	WS'   g g )NPADDLE_WITH_GLOOr  )r   r   PADDLE_GLOO_RENDEZVOUS
SYS_JOB_IDrH   r   TFPADDLE_GLOO_FS_NAMEPADDLE_GLOO_FS_UGIPADDLE_GLOO_FS_PATH)rJ   rK   rL   rI   r   r   PADDLE_GLOO_HTTP_ENDPOINTr   )rM   rN   rI   rO   rP   )rL   rI   r)   r+   r*   zGloo init with z: need_init_all: z, args: )rX   rY   rZ   r[   r\   r]   r^   )r   rq   r  r$   r,   r)   r+   r*   rS   r.  r7   r   dictr   r   r   r   r   r   r   rf   r=   r?   r@   rA   )rD   use_gloorendezvous_typerw   r]   r`   ra   rb   r^   rO   managerrP   	ep_rank_0rd   re   types                   r"   
_gloo_initPaddleCloudRoleMaker._gloo_inita  s   ryy!3S9:6! bii(@#FG<,OO  OO  OO  #
 

 TZZ1122 (A5doo222yy!6;Hii 4b9Gyy!6;H$"$ &	F  4 44 %iG#LLNM',M)$"" 2215	((**(,%II&A2F	??$$););)=)B(,% s+HB! &%6!.F yy!6;H$ &F
 doo222D 4 44DDdV#4]O8F8T	
 	

&MMO'')'')' 	 	
 doo222',M)$ 3r!   c                    U R                   (       de  U R                  (       d  U R                  5         OU R                  5         SU l         [        R
                  " 5       (       d  U R                  5         gggz
generate role for role maker
TN)r   r   r  r  paddlein_dynamic_moder  rC   s    r"   r9  #PaddleCloudRoleMaker._generate_role  sV     &&&&$$&&*D#))++! , 'r!   )r,  r   r   r.  r(  r)  r'  r  r   r*  r"  rm  ri  r%  rc  r!  r&  r=   r   r   r#  r$  r+  r\  r  r-  r   rr  )F)r/  boolr^   r   returnNone)r   r   r  r  r   )r   r   r   r   r  zlist[float]r   )r   r   r   r   r   r   r  znpt.NDArray[Any])r  r   )r  r   )r  z	list[int])r  r  )r  z
str | None)r  z	list[str]r  r  )-r   r   r   r   r   rE   r  r  r  r:  r=  r@  rC  rF  r   r   rO  r   r   r   r?   r@   rA   r  rf  rj  rn  rs  r  ry  r|  r  r
  r  r  r  r  r  r  r  r  r  r9  r    __classcell__r0  s   @r"   r  r  %  s   . 4' -588&)8	8 @H>> #>9<>	>
*/$)).
C   "

 
&
&
&"
-
"&+

6
2%(/nQ`F.M-^" "r!   r  c                  d   ^  \ rS rSrSr  S       S	U 4S jjjrS
S jrS
S jrS
S jrSr	U =r
$ )UserDefinedRoleMakeri  a  
UserDefinedRoleMaker is an interface for distributed configuration initialization based on obtaining distributed related information from user-defined parameters.

Examples:
    .. code-block:: python

        >>> import paddle.distributed.fleet as fleet
        >>> from paddle.distributed.fleet.base.role_maker import Role

        >>> fleet.UserDefinedRoleMaker(
        ...     current_id=0,
        ...     role=Role.SERVER,
        ...     worker_num=2,
        ...     server_endpoints=["127.0.0.1:36011", "127.0.0.1:36012"])
c                6   > [         TU ]  " SXS.UD6  X l        g )N)r/  	init_gloor   )r  rE   
_init_gloo)rD   r/  r  r^   r0  s       r"   rE   UserDefinedRoleMaker.__init__  s)     	 	
'	
@F	
 $r!   c                   U R                   R                  S5      U l        U R                   R                  S/ 5      U l        U R                   R                  SS5      U l        U R                  S:X  a5  [        U R                  5      S:  d   e[        U R                  5      U l        U R                   R                  S5      U l        U R                   R                  S5      U l        U R                  [        R                  :X  aB  [        U R                  5      U R                  :  a  U R                  U R                     U l
        O<U R                  [        R                  :X  a  U R                  U R                     U l
        [        U R                   Vs1 s H  oR                  S5      S   iM     sn5      U l        g s  snf )Nserver_endpointsworker_endpointsr[   r   rY   r  r   )r"  rQ   r   r   r\  r   r=   r   r   r   r   r   r   rc  rD   r  s     r"   _user_defined_ps_env)UserDefinedRoleMaker._user_defined_ps_env  sQ   !%!1!12D!E!%!1!12Db!I!\\--lA>"t--.222!$T%;%;!<D\\%%f-
<<++L9 JJ$++%D**+d.>.>>!%!7!78H8H!IDZZ4;;&!%!7!78H8H!ID8N8NO8N1wws|A8NOPOs   Gc                d   U R                   R                  S5      U l        U R                   R                  S5      U l        [	        U R                  5      U l        [        R                  U l        [	        U R                   Vs1 s H  oR                  S5      S   iM     sn5      U l
        g s  snf )Nr  r  r   r   )r"  rQ   r   r   r   r\  r   r   r  r   rc  r  s     r"   _user_defined_collective_env1UserDefinedRoleMaker._user_defined_collective_env  s    !%!1!12D!E<<++L9 !7!78"kk8N8NO8N1wws|A8NOPOs   B-c                    U R                   (       d:  U R                  (       d  U R                  5         OU R                  5         SU l         ggr  )r   r   r  r  rC   s    r"   r9  #UserDefinedRoleMaker._generate_role  s;     &&&&))+113&*D# 'r!   )
r   r   r  rc  r=   r   r   r\  r  r   )FF)r/  r  r  r  r^   r   r  r  r  )r   r   r   r   r   rE   r  r  r9  r    r  r  s   @r"   r  r    sV    $ $	$	$ 	$ 		$
 
	$ 	$Q*Q	+ 	+r!   r  ) r   
__future__r   rq   r  r   r   multiprocessingr   r   typingr   r   r   r	   numpyr   r  paddle.baser
   5paddle.distributed.fleet.base.private_helper_functionr   
backup_envr   numpy.typingnpt__all__r   r$   r   r  r  r   r!   r"   <module>r     s    ! " 	 	   , 8 8    +
* *X Xv
yF yFrV
"= V
"rA+/ A+r!   