
    x-j                    ,   d Z ddlmZ ddlZddlZddlZddlZddlmZm	Z	 ddl
mZmZmZmZ ddlZddlZddlmZ ddlmZ dd	lmZ erddlm
Z g Z G d
 d          Z G d d          Z G d d          Z G d de          Z G d de          ZdS )zDefinition of Role Makers.    )annotationsN)ManagerProcess)TYPE_CHECKINGAnyClassVarLiteral)core)wait_server_ready   )getenv_or_backupc                  V    e Zd ZU dZded<   dZded<   dZded	<   d
Zded<   dZded<   dS )Role   zClassVar[Literal[1]]WORKER   zClassVar[Literal[2]]SERVERr   zClassVar[Literal[3]]HETER_WORKER   zClassVar[Literal[4]]ALL   zClassVar[Literal[5]]COORDINATORN)	__name__
__module____qualname__r   __annotations__r   r   r   r        h/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/paddle/distributed/fleet/base/role_maker.pyr   r   )   sg         #$F$$$$#$F$$$$)*L**** !C!!!!()K))))))r   r   c                  ~    e Zd ZdZ G d d          Zd Z	 	 ddZd Zd	 Zd
 Z	d Z
d Zd Zd Zd ZddZddZdS )GloozL
    Gloo is a universal class for barrier and collective communication
    c                      e Zd ZdZdZdZdS )Gloo.RENDEZVOUSr   r   r   N)r   r   r   HDFSFILEHTTPr   r   r   
RENDEZVOUSr#   6   s        r   r'   c                   d | _         d | _        d | _        g d| _        d| _        d| _        d| j         | _        d| _        d| _        d| _	        d | _
        d | _        d | _        d| _        d| _        d| _        d| _        d S )	N)workerserverallz?gloo is not initialized, will not communicator with other nodesz.gloo initialized error, please check argumentsz#argument error, comm_world must in Fi  i )_worker_comm_server_comm_nodes_comm_comm_world	_err_init	_err_type
_err_world_is_initialized_init_timeout_seconds_run_timeout_seconds_rendezvous_role_iface_role_id_worker_num_server_num_need_init_allselfs    r   __init__zGloo.__init__;   s      666M 	 JD$2BDD 	  %%)"$+!
#r   FNc                $   || _         || _        || _        || _        || _        || _        d| _        |                    dd          | _        d }| j         t          j
        j        k    r{|                    dd          }	|                    dd          }
|                    dd          }|	r|
r|st          | j                  |                     |	|
|| j                   n| j         t          j
        j        k    rH|                    dd          }|st          | j                  |                     || j                   n| j         t          j
        j        k    r|                    dd          }|                    dd          }|                    dd	          }|                    d
          }|r|st          | j                  |                     ||| j        ||          }nt          | j                  d| _        || _        d S )N store.prefixdfs.namedfs.ugidfs.path	http.host	http.portstart_http_serverFhttp_server_dT)r7   r8   r:   r;   r<   r=   r9   get_prefixr!   r'   r$   
ValueErrorr2   	_init_dfsr%   _init_fsr&   
_init_httpr4   _http_server)r?   
rendezvousrolerole_id
worker_num
server_numneed_init_allkwargshttp_serverdfs_namedfs_ugidfs_pathfs_pathipportrI   rJ   s                    r   initz	Gloo.initV   s    &
%%+zz."55t333zz*b11HjjB//Gzz*b11H 17 1( 1 000NN8WhEEEE!555jjR00G 1 000MM'4<0000!555K,,B::k2..D &

+> F F"JJ77M 1T 1 000//D$,(9= KK T^,,,#'r   c                     fd} j         t          j        k    r7                     t          j                  \  }} |||d          }| _        n6                     t          j                  \  }} |||d          }| _         j        r8                     t          j                  \  }} |||d          }| _	        d S d S )Nc                   t          j                    }|                    |            |                    |           |                               |                    j                   |                    j        j	                   |
                    t          j                            |          dd           |                                 |S )NrB   r
   r!   set_rankset_size
set_prefix	set_ifacer9   set_timeout_secondsr5   r6   set_hdfs_storeospathjoinr`   )ranknodesrS   gloor]   prefixr?   s       r   r`   zGloo._init_fs.<locals>.init   s    9;;DMM$MM%   OOF###NN4;'''$$*D,E   Wd ; ;RDDDIIKKKKr   r   r   r   
r8   r   r   _get_rank_nodesr-   r   r.   r=   r   r/   )r?   r]   rp   r`   rm   rn   ro   s   ```    r   rO   zGloo._init_fs   s    	 	 	 	 	 	 	 :$$..t{;;KD%4eX..D $D..t{;;KD%4eX..D $D 	$..tx88KD%4eU++D#D	$ 	$r   c                     fd} j         t          j        k    r7                     t          j                  \  }} |||d          }| _        n6                     t          j                  \  }} |||d          }| _         j        r8                     t          j                  \  }} |||d          }| _	        d S d S )Nc                   t          j                    }|                    |            |                    |           |                               |                    j                   |                    j        j	                   |
                    t          j                            |                     |                                 |S Nrc   )	rm   rn   rS   ro   rZ   r\   r[   rp   r?   s	       r   r`   zGloo._init_dfs.<locals>.init   s    9;;DMM$MM%   OOF###NN4;'''$$*D,E   Xt < <hPPPIIKKKKr   r   r   r   rq   )	r?   rZ   r[   r\   rp   r`   rm   rn   ro   s	   `````    r   rN   zGloo._init_dfs   s    	 	 	 	 	 	 	 	 	 :$$..t{;;KD%4eX..D $D..t{;;KD%4eX..D $D 	$..tx88KD%4eU++D#D	$ 	$r   c                ^    fd fd} fd}t                    |rt          d            ||          } j        t          j        k    r6                     t          j                  \  }	}
 ||	|
d          }| _        |rd|d<   |                                 d S d S )Nc                z   t          d d|            ddlm}  ||          }|                                 d}|                     dd          s|                                s>t          j        |           |                     dd          *|                                >|                                 d S )Nzstart http_server: z, r   )KVServerr   runningF)	print*paddle.distributed.fleet.utils.http_serverrx   startrK   should_stoptimesleepstop)rJ   size_drx   rY   wait_secondsr_   s        r   __start_kv_serverz*Gloo._init_http.<locals>.__start_kv_server   s    88888999KKKKKK"(400KL!!)U33)"..00) 
<((( !!)U33)"..00)
 r   c                    dz   dz   }|j         i}t          d| d|            d| d<   t          | |f          }d|_        |                                 |S )N_r)   zworker_key:z, size: Try   )targetargs)r;   rz   r   daemonr|   )rJ   
worker_keyr   rQ   _Gloo__start_kv_serverrp   r?   s       r   init_kv_serverz'Gloo._init_http.<locals>.init_kv_server   s    #0JD,F <
<<F<<==='+M)$"(v/F  L #'L    r   c                   t          j                    }|                    |            |                    |           |                               |                    j                   |                    j        j	                   |
                    d           d                    t                    g          }t          |g           |                                 |S )Nr)   :)r
   r!   rd   re   rf   rg   r9   rh   r5   r6   set_http_storerl   strr   r`   )	rm   rn   rS   ro   epr^   r_   rp   r?   s	        r   r`   zGloo._init_http.<locals>.init   s    9;;DMM$MM%   OOF###NN4;'''$$*D,E   D(3332s4yy/**Brd###IIKKKKr   zto start http_serverr   Fry   )intrz   r8   r   r   rr   r-   rl   )r?   r^   r_   rp   rI   rJ   r   r`   rY   rm   rn   ro   r   s   ````        @r   rP   zGloo._init_http   s   	 	 	 	 		  	  	  	  	  	  	 $	 	 	 	 	 	 	 	 4yy 	8()))(.77K:$$..t{;;KD%4eX..D $D  	',M)$	 	r   c                N   d}d}|t           j        k    r| j        }| j        }n|t           j        k    r| j        }| j        }n`|t           j        k    r<| j        | j        z   }| j        t           j        k    r| j        }n$| j        | j        z   }nt          | j	                   ||fS )Nr   r,   )
r   r   r;   r:   r   r<   r   r8   rM   r2   )r?   rS   rn   rm   s       r   rr   zGloo._get_rank_nodes  s    4;$E=DDT[  $E=DDTX$t'77EzT[((}'$-7t~&&&U{r   c                f    |                                  }|                                 }|dk    r|n|S )0
        get default physical interface
        lo)%_Gloo__get_default_iface_from_gateway(_Gloo__get_default_iface_from_interfaces)r?   default1default2s      r   __get_default_ifacezGloo.__get_default_iface  s:     88::;;==#t++xx9r   c                   t          j        d                                                                                              d          }d}d}|D ]}|                                }d|v r/d|v r+|                    d          }|                    d          }I|J|Hd}t          |          |k    r||         }|r)|dk    r#|dk    rt          |          |k    r
||         c S dS )	r   zroute -A inet
NGatewayIface*z0.0.0.0r   )rj   popenreadstripsplitindexlen)r?   resgateway_idx	iface_idxitemgateways         r    __get_default_iface_from_gatewayz%Gloo.__get_default_iface_from_gateway%  s    h'',,..4466<<TBB	 	+ 	+D::<<DD  W__"jj33 JJw//		(Y-Bt99{**";/G+39,,D		I--	?***tr   c                   t          j        d                                                                                              d          }|D ]5}d|v r/|                    d          d                                         c S 6dS )r   zip -f inet addr | awk NR%3==1r   	BROADCASTr   r   r   )rj   r   r   r   r   )r?   r   r   s      r   #__get_default_iface_from_interfacesz(Gloo.__get_default_iface_from_interfaces?  s    
 H455::<<BBDDJJ4PP 	  	2 	2Dd""zz#q)//11111 #tr   c                :   | j         st          j        | j                   dS || j        vrt          | j                  |dk    r| j                                         dS |dk    r| j	                                         dS | j
                                         dS )z+
        dummy barrier, do nothing
        Nr)   r*   )r4   warningswarnr1   r0   rM   r3   r-   barrierr.   r/   r?   
comm_worlds     r   r   zGloo.barrierK  s     # 	M$.)))FT---T_---!!%%'''''8##%%'''''$$&&&&&r   sumr)   c                >   | j         st          j        | j                   |S || j        vrt          | j                  t          j        |          }|j	        }|
                    d                                          }|                     |           |dk    r| j                            ||          }n=|dk    r| j                            ||          }n| j                            ||          }t          j        |          
                    |          }|S )Nr,   r)   r*   )r4   r   r   r1   r0   rM   r3   nparrayshapereshapetolistr   r-   
all_reducer.   r/   )r?   inputmoder   input_shape
input_listansoutputs           r   r   zGloo.all_reduce]  s   # 	M$.)))LT---T_---k]]2&&--//
Z   !!#..z4@@CC8###..z4@@CC"--j$??C#&&{33r   c                <   | j         st          j        | j                   |S || j        vrt          | j                  |dk    r| j                            |          }n;|dk    r| j	                            |          }n| j
                            |          }|S )zg
        dummy all gather, do nothing
        Args:
            obj(any): obj to do all gather
        r)   r*   )r4   r   r   r1   r0   rM   r3   r-   
all_gatherr.   r/   )r?   r   r   r   s       r   r   zGloo.all_gatheru  s     # 	M$.)))LT---T_---!!&11%88FF8##&11%88FF%0077Fr   )FNr   r)   r)   )r   r   r   __doc__r'   r@   r`   rO   rN   rP   rr   _Gloo__get_default_ifacer   r   r   r   r   r   r   r   r!   r!   1   s               
$ $ $D 3( 3( 3( 3(j$ $ $8$ $ $8B B BH  ,: : :  4
 
 
' ' '$   0     r   r!   c                  v    e Zd ZdZd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd ZddZddZd ZdS )RoleMakerBasez
    RoleMakerBase is a base class for assigning a role to current process
    in distributed training.
    A paddle developer can implement RoleMakerBase to design a role maker
    for worker or pserver assignment.
    c                Z    g | _         g | _        d| _        d| _        d | _        d| _        d S )NrB   Fr,   )_worker_endpoints_server_endpoints_cur_endpoint_role_is_generatedr8   _current_idr>   s    r   r@   zRoleMakerBase.__init__  s7    !#!#"'
r   c                     t          d          )z7
        return is_worker() of current process
        +Please implement this method in child classNotImplementedErrorr>   s    r   
_is_workerzRoleMakerBase._is_worker       ""OPPPr   c                     t          d          )z7
        return is_server() of current process
        r   r   r>   s    r   
_is_serverzRoleMakerBase._is_server  r   r   c                     t          d          )z
        Check whether the node is the first instance of worker.
        Returns:
            bool: True if this is the first node of worker,
                  False if not.
        r   r   r>   s    r   _is_first_workerzRoleMakerBase._is_first_worker       ""OPPPr   c                     t          d          )zc
        Get current total worker number.

        Returns:
            int: worker number
        r   r   r>   s    r   r;   zRoleMakerBase._worker_num  r   r   c                     t          d          )zc
        Get current total server number.

        Returns:
            int: server number
        r   r   r>   s    r   r<   zRoleMakerBase._server_num  r   r   c                     t          d          )zS
        Get current worker id.

        Returns:
            int: node id
        r   r   r>   s    r   _worker_indexzRoleMakerBase._worker_index  r   r   c                     t          d          )zS
        Get current server id.

        Returns:
            int: node id
        r   r   r>   s    r   _server_indexzRoleMakerBase._server_index  r   r   c                     t          d          )zL
        Get current id.

        Returns:
            int: node id
        r   r   r>   s    r   r:   zRoleMakerBase._role_id  r   r   c                     t          d          )zY
        Get the training node number
        Returns:
            int: node num
        r   r   r>   s    r   	_node_numzRoleMakerBase._node_num  s     ""OPPPr   c                    | j         S )z*
        return trainer endpoints
        )r   r>   s    r   _get_trainer_endpointsz$RoleMakerBase._get_trainer_endpoints       %%r   c                    | j         S )z*
        return pserver endpoints
        )r   r>   s    r   _get_pserver_endpointsz$RoleMakerBase._get_pserver_endpoints  r   r   c                F    d| j          d| j         d| j         d| j         S )Nzrole: z, current_id: z, worker_endpoints: z, server_endpoints: )r8   r   r   r   r>   s    r   	to_stringzRoleMakerBase.to_string  s[     ]
  ]  ]$2B  ]  ]X\Xn  ]  ]  EI  E[  ]  ]  	]r   r)   c                $    t          d           d S )Nz7warning: RoleMakerBase does not have all gather worker.rz   r?   r   r   s      r   _all_gatherzRoleMakerBase._all_gather  s    GHHHHHr   r   c                $    t          d           dS )z
        Args:
            input(list/numpy.array): array of one dim
            output(list/numpy.array): array of one dim
            mode(str): "sum" or "min" or "max"
        z7warning: RoleMakerBase does not have all reduce worker.Nr   r?   r   r   r   s       r   _all_reducezRoleMakerBase._all_reduce  s     	GHHHHHr   c                $    t          d           dS )zE
        barrier between trainers if current role is TRAINER
        z4warning: RoleMakerBase does not have barrier worker.Nr   r   s     r   _barrierzRoleMakerBase._barrier  s     	DEEEEEr   Nr   r   )r   r   r   r   r@   r   r   r   r;   r<   r   r   r:   r   r   r   r   r   r   r   r   r   r   r   r     sC          Q Q QQ Q QQ Q QQ Q QQ Q QQ Q QQ Q QQ Q QQ Q Q& & && & &] ] ]I I I II I I IF F F F Fr   r   c                  T    e Zd ZdZd;d< fd	Zd=dZ	 d>d?dZ	 d@dAdZdBdZdBdZ	dCdZ
dDdZdCdZdEdZdEdZdEdZdEdZdCd ZdCd!ZdCd"ZdCd#ZdCd$ZdCd%ZdCd&ZdFd(ZdFd)ZdFd*ZdGd,ZdBd-ZdGd.ZdBd/ZdGd0ZdGd1Z d2 Z!dGd3Z"dEd4Z#dCd5Z$dEd6Z%dHd7Z&dHd8Z'dHd9Z(dHd:Z) xZ*S )IPaddleCloudRoleMakera  
    PaddleCloudRoleMaker is an interface for distributed configuration initialization based on obtaining distributed related information from environment variables.

    Examples:
        .. code-block:: python

            >>> import os
            >>> import paddle.distributed.fleet as fleet

            >>> os.environ["PADDLE_PSERVER_NUMS"] = "2"
            >>> os.environ["PADDLE_TRAINERS_NUM"] = "2"

            >>> os.environ["POD_IP"] = "127.0.0.1"
            >>> os.environ["PADDLE_PORT"] = "36001"
            >>> os.environ["TRAINING_ROLE"] = "PSERVER"
            >>> os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001,127.0.0.2:36001"

            >>> os.environ["PADDLE_TRAINER_ID"] = "0"

            >>> fleet.PaddleCloudRoleMaker(is_collective=False)

    Fis_collectiveboolrX   r   returnNonec                \   t                                                       || _        d| _        || _        d| _        d| _        d| _        g | _        g | _	        g | _
        d| _        d| _        d| _        g | _        g | _        g | _        d | _        d| _        t'                      | _        d S )NFr   cpu)superr@   _is_collective_non_distributed_kwargsr   	_stage_id
_stage_num_next_heter_trainer_endpoints!_previous_heter_trainer_endpoints_heter_trainer_endpoints_heter_trainer_device_heter_trainer_device_type_is_heter_parameter_server_mode_stage_trainersr   r   _coordinator_endpoints_with_coordinatorr!   _gloo)r?   r   rX   	__class__s      r   r@   zPaddleCloudRoleMaker.__init__=  s    + %"' -/*13.(*%%*"*/'/4,!!#!#&*#!&VV


r   r   r   c                :    | j                             |           d S ru   )r  r   r   s     r   r   zPaddleCloudRoleMaker._barrierW  s    
:&&&&&r   r)   r   list[float]c                8    | j                             ||          S ru   )r  r   r   s      r   r   z PaddleCloudRoleMaker._all_gatherZ  s     z$$UJ777r   r   r   npt.NDArray[Any]c                :    | j                             |||          S ru   )r  r   r   s       r   r   z PaddleCloudRoleMaker._all_reduce_  s     z$$UD*===r   c                F    | j         s|                                  | j        S )zL
        return the heter device that current heter worker is using
        )r   _generate_roler  r>   s    r   _heter_devicez"PaddleCloudRoleMaker._heter_deviced  s*     & 	"!!!))r   c                F    | j         s|                                  | j        S )zQ
        return the heter device type that current heter worker is using
        )r   r  r  r>   s    r   _heter_device_typez'PaddleCloudRoleMaker._heter_device_typel  s*     & 	"!!!..r   r   c                F    | j         s|                                  | j        S )z9
        return stage id of current heter worker
        )r   r  r  r>   s    r   _get_stage_idz"PaddleCloudRoleMaker._get_stage_idt  s)     & 	"!!!~r   	list[int]c                F    | j         s|                                  | j        S )z2
        return trainer num of all stages
        )r   r  r	  r>   s    r   _get_stage_trainersz(PaddleCloudRoleMaker._get_stage_trainers|  s*     & 	"!!!##r   c                F    | j         s|                                  | j        S )z"
        return stage num
        )r   r  r  r>   s    r   _get_num_stagez#PaddleCloudRoleMaker._get_num_stage  )     & 	"!!!r   c                b    | j         s|                                  | j        t          j        k    S )z3
        whether current process is worker
        )r   r  r8   r   r   r>   s    r   r   zPaddleCloudRoleMaker._is_worker  0     & 	"!!!zT[((r   c                b    | j         s|                                  | j        t          j        k    S )z3
        whether current process is server
        )r   r  r8   r   r   r>   s    r   r   zPaddleCloudRoleMaker._is_server  r!  r   c                b    | j         s|                                  | j        t          j        k    S ru   )r   r  r8   r   r   r>   s    r   _is_coordinatorz$PaddleCloudRoleMaker._is_coordinator  s/    & 	"!!!zT---r   c                x    | j         s|                                  | j        t          j        k    o
| j        dk    S )z=
        whether current process is worker of rank 0
        r   )r   r  r8   r   r   r   r>   s    r   r   z%PaddleCloudRoleMaker._is_first_worker  s=     & 	"!!!zT[(BT-=-BBr   c                F    | j         s|                                  | j        S )z-
        get index of current worker
        r   r  r   r>   s    r   r   z"PaddleCloudRoleMaker._worker_index  *     & 	"!!!r   c                F    | j         s|                                  | j        S )z-
        get index of current server
        r'  r>   s    r   r   z"PaddleCloudRoleMaker._server_index  r(  r   c                F    | j         s|                                  | j        S )z+
        get index of current node
        r'  r>   s    r   r:   zPaddleCloudRoleMaker._role_id  r(  r   c                F    | j         s|                                  | j        S )z5
        return the current number of worker
        )r   r  _trainers_numr>   s    r   r;   z PaddleCloudRoleMaker._worker_num  s*     & 	"!!!!!r   c                    | j         s|                                  |                                 !t          |                                           ndS )z5
        return the current number of server
        Nr   )r   r  r   r   r>   s    r   r<   z PaddleCloudRoleMaker._server_num  sY     & 	"!!! **,,8 ++--...	
r   c                F    | j         s|                                  | j        S z1
        return the training node number
        r   r  
_nodes_numr>   s    r   r   zPaddleCloudRoleMaker._node_num  r  r   c                F    | j         s|                                  | j        S r/  r0  r>   s    r   _get_node_numz"PaddleCloudRoleMaker._get_node_num  r  r   
str | Nonec                F    | j         s|                                  | j        S ru   )r   r  _local_rankr>   s    r   _get_local_rankz$PaddleCloudRoleMaker._get_local_rank  s(    & 	"!!!r   c                F    | j         s|                                  | j        S ru   )r   r  _local_device_idsr>   s    r   _get_local_device_idsz*PaddleCloudRoleMaker._get_local_device_ids  (    & 	"!!!%%r   c                F    | j         s|                                  | j        S ru   )r   r  _world_device_idsr>   s    r   _get_world_device_idsz*PaddleCloudRoleMaker._get_world_device_ids  r;  r   	list[str]c                F    | j         s|                                  | j        S )z.
        get endpoint of all trainers
        )r   r  r   r>   s    r   r   z+PaddleCloudRoleMaker._get_trainer_endpoints  *     & 	"!!!%%r   c                    | j         s|                                  | j        t          j        k    s
J d            | j        S )Nz0get_trainer_endpoint should be called by trainer)r   r  r8   r   r   r   r>   s    r   _get_trainer_endpointz*PaddleCloudRoleMaker._get_trainer_endpoint  sJ    & 	"!!!zT[(((> )(( !!r   c                p    | j         s|                                  | j        g k    s
J d            | j        S )zK
        Returns:
            string: all heter_trainers'endpoints
        z&Heter Worker Endpoints Not initialized)r   r  r  r>   s    r   _get_heter_worker_endpointsz0PaddleCloudRoleMaker._get_heter_worker_endpoints  sK    
 & 	"!!!,2224 322 ,,r   c                    | j         s|                                  | j        t          j        k    s
J d            | j        S )zR
        Returns:
            str: corresponding heter_trainer's endpoint
        z<_get_heter_worker_endpoint should be invoked by heter worker)r   r  r8   r   r   r   r>   s    r   _get_heter_worker_endpointz/PaddleCloudRoleMaker._get_heter_worker_endpoint  sM    
 & 	"!!!zT....J /.. !!r   c                F    | j         s|                                  | j        S )z.
        get endpoint of all pservers
        )r   r  r   r>   s    r   r   z+PaddleCloudRoleMaker._get_pserver_endpoints  rA  r   c                F    | j         s|                                  | j        S ru   )r   r  r
  r>   s    r   _get_coordinator_endpointsz/PaddleCloudRoleMaker._get_coordinator_endpoints$  s(    & 	"!!!**r   c                    | j         s|                                  | j        t          j        t          j        fv s
J d            | j        S ))
        invoked by heter worker
        zC_get_previous_trainers should be invoked by trainer or heter worker)r   r  r8   r   r   r   r  r>   s    r   _get_previous_trainersz+PaddleCloudRoleMaker._get_previous_trainers)  se     & 	"!!!zK
 
 
 
 Q
 
 
 55r   c                    | j         s|                                  | j        t          j        t          j        fv s
J d            | j        S )rL  z?_get_next_trainers should be invoked by trainer or heter worker)r   r  r8   r   r   r   r  r>   s    r   _get_next_trainersz'PaddleCloudRoleMaker._get_next_trainers5  se     & 	"!!!zK
 
 
 
 M
 
 
 11r   c                F    | j         s|                                  | j        S )z
        Return True if indispensable environment for fleetrun is not found
        (use python-run to launch fleet-code directly)
        )r   r  r   r>   s    r   _is_non_distributedz(PaddleCloudRoleMaker._is_non_distributedA  s*    
 & 	"!!!$$r   c                F    | j         s|                                  | j        S )z'
        get heter worker nums
        )r   r  _heter_trainers_numr>   s    r   _heter_worker_numz&PaddleCloudRoleMaker._heter_worker_numJ  s*     & 	"!!!''r   c                b    | j         s|                                  | j        t          j        k    S )z9
        whether current process is heter worker
        )r   r  r8   r   r   r>   s    r   _is_heter_workerz%PaddleCloudRoleMaker._is_heter_workerR  s1     & 	"!!!zT...r   c                   t          j        dd           | _        | j        Dd| _        d| _        t          j        | _        d| _        d| _        d| _	        d | _
        d| _        d S | j                            d          | _        t          dd           | _        | j         | j                            d          | _        ng | _        t          j        dd          | _        | j        dk    rt!          d	           n&d| _        | j                            d          | _        t          j        d
d           }|t%          d          t'          |          }t          j        dd           }|t%          d          |dvrt%          d| d          t          j        dd          }t          j        dd          }t          j        dd          }|dk    r|                    d          | _
        d| _        t+          | j
                  | _	        |dk    r|dv s
J d            n0	 |                    d          | _        n#  t%          d          xY w|dk    r|dv s
J d            n>	 |                    d          | _        n"#  t%          d          xY wd| _        d| _	        |dk    rt          j        }t          j        dd           }|t%          d          t'          |          }| j        rt          j        dd           | _        | j        t%          d          t'          | j                  | _        t          j        d d           | _        | j        t%          d!          t'          | j                  | _        t          j        d"d           | _        | j        t%          d#          t7          d$ t9          j        d%| j                  D                       | _        t          j        d&d           }|t%          d'          t          j        d(d           }	|	t%          d)          d*                    |	|g          }
|
| _        nG|d+k    r?t!          d,           t          j         }t'          t          j        dd-                    }n|d.k    rt          j!        }t          j        d&d           }|t%          d'          t          j        d(d           }	|	t%          d)          d*                    |	|g          }
|
| _        | j        "                    | j                  }ne|d/k    r^t          j#        }t          j        dd           | _        | j        t%          d          t'          | j                  | _        t          j        d d           | _        | j        t%          d!          t'          | j                  | _        t          j        d"d           | _        | j        t%          d#          t7          d0 t9          j        d%| j                  D                       | _        t          j        d1d           | _$        | j$        t%          d2          | j$        d3v s
J d4            | j$        d5k    r6t          j        d6d-          }d*                    | j$        |f          | _%        | j%        d7k    r6t          j        d8d-          }d*                    | j$        |f          | _%        t          j        d&d           }|t%          d'          t          j        d(d           }	|	t%          d)          d*                    |	|g          }
|
| _        |                    d          "                    |
          |z   }|| _        || _        || _        t+          d9 | j        D                       | _        d S ):NPADDLE_PSERVERS_IP_PORT_LISTrB   r   r   T,PADDLE_TRAINER_ENDPOINTSPADDLE_COORDINATOR_ENDPOINTSz$fl-ps > coordinator address is null!PADDLE_TRAINERS_NUMz@Can not find PADDLE_TRAINERS_NUM, please check your environment.TRAINING_ROLEz:Can not find TRAINING_ROLE, please check your environment.)TRAINERPSERVERHETER_TRAINERr   zRTRAINING_ROLE must be PSERVER or TRAINER or HETER_TRAINER or COORDINATOR, but get z , please check your environment.&PADDLE_NEXT_HETER_TRAINER_IP_PORT_LIST*PADDLE_PREVIOUS_HETER_TRAINER_IP_PORT_LIST%PADDLE_ALL_HETER_TRAINER_IP_PORT_LIST)r^  r_  z*training_role should be trainer or pserverzCan not Find PADDLE_PREVIOUS_HETER_TRAINER_IP_PORT_LIST in env or its format doesn't match the requirement: 'IP:PORT,IP:PORT' .)r`  r_  z0training_role should be heter trainer or pserverz{Can not Find PADDLE_NEXT_HETER_TRAINER_IP_PORT_LIST in env or its format doesn't match the requirement: 'IP:PORT,IP:PORT' .Fr^  PADDLE_TRAINER_IDz>Can not find PADDLE_TRAINER_ID, please check your environment.STAGE_IDz5Can not find STAGE_ID, please check your environment.	STAGE_NUMz6Can not find STAGE_NUM, please check your environment.PADDLE_STAGE_TRAINERS_NUMzFCan not find PADDLE_STAGE_TRAINERS_NUM, please check your environment.c                ,    g | ]}t          |          S r   r   .0xs     r   
<listcomp>z0PaddleCloudRoleMaker._ps_env.<locals>.<listcomp>  s    NNNSVVNNNr   z\d+PADDLE_PORTz8Can not find PADDLE_PORT, please check your environment.POD_IPz3Can not find POD_IP, please check your environment.r   r   z>>> curr node is coordinator!0r_  r`  c                ,    g | ]}t          |          S r   ri  rj  s     r   rm  z0PaddleCloudRoleMaker._ps_env.<locals>.<listcomp>  s    JJJAQJJJr   HETER_DEVICE_TYPEz>Can not find HETER_DEVICE_TYPE, please check your environment.)r   gpuxpuz*HETER_DEVICE_TYPE should be cpu,gpu or xpurs  FLAGS_selected_gpusrt  FLAGS_selected_xpusc                D    h | ]}|                     d           d         S r   r   r   rj  s     r   	<setcomp>z/PaddleCloudRoleMaker._ps_env.<locals>.<setcomp>H  &    OOO1qwws||AOOOr   )&rj   getenvr   r,  r   r   r8   r   r1  rS  r  r   r   r   r   r
  rz   r  rM   r   r  r   r  r  r  r  r	  tuplerefindallrl   r   r   r   r   r   r  r  )r?   trainers_numtraining_rolenext_heter_trainer_eplistprevious_heter_trainer_eplistall_heter_trainer_eplistrS   
current_idcur_portcur_ipcurr_endpointheter_device_ids               r   _ps_envzPaddleCloudRoleMaker._ps_envZ  s    "$+I4!P!P!)%'D"!"DDJ DDO'(D$,0D)$(D!F!%!7!=!=c!B!B!1&"
 "
 !-%)%;%A%A#%F%FD""%'D"&(i*B'
 '
# &",,89999%)D"*.*E*K*KC*P*PD'y!6==R   <((	/488 L    !
 
 
  Uer  U  U  U  
 %'I4b%
 %
! )+	8")
 )
% $&93R$
 $
  $r)),D,J,J3,O,OD)37D0'*4+H'I'ID$,22$ )    @   
5;;C@@ ::$ Z   )B..$ )    F   
177<< 66$ V  
 49D0'(D$I%%;D#6==J! T   ZJ3 !#:t!<!<>)$O   "%T^!4!4"$)K">">?*$P   #&do"6"6')y/( ($ '/$`   (-NNRZ8L%M%MNNN( ($ y55H N   Yx..F~ I    HHfh%788M!.Dm++1222#DRY':C@@AAJJi'';Dy55H N   Yx..F~ I    HHfh%788M!.D/55d6HIIJJo--$DYz488DN~% K   !00DN iT::DO& L   "$/22DO#%9-H$#O#OD #+ \   $)JJFD4H!I!IJJJ$ $D  /1i#T/ /D+ .6 T   2 7    <	  
 .%77"$),A3"G"G-0XX4oF. .* )U22"$),A3"G"G-0XX4oF. .* y55H N   Yx..F~ I    HHfh%788M!.D(..s3399-HH 
 *
%OO8NOOOPPs   *I I/J
 
Jc                ,   t          t          j        dd                    | _        t          j        dd          | _        | j        dk    sJ t
          j        | _        t          d          | _	        t          j        d          | _
        | j	        d| _	        | j	        | _
        d| _        | j	                            d	          | _	        t          | j	                  | _        t          j        d
d           }|)t          j        dd           }t          |          | _        t          d | j	        D                       | _        t          j        d          | _        t          j        d          | _        t          j        d          | _        d S )Nrd  rp  PADDLE_TRAINING_ROLEr^  rZ  PADDLE_CURRENT_ENDPOINTz127.0.0.1:6170TrY  PADDLE_AUTO_PARALLEL_CONFIGr\  c                D    h | ]}|                     d           d         S rx  ry  rj  s     r   rz  z7PaddleCloudRoleMaker._collective_env.<locals>.<setcomp>\  r{  r   PADDLE_RANK_IN_NODEPADDLE_LOCAL_DEVICE_IDSPADDLE_WORLD_DEVICE_IDS)r   rj   r|  r   _training_roler   r   r8   r   r   r   r   r   r   r,  r1  r6  r9  r=  )r?   
auto_tunerr  s      r   _collective_envz$PaddleCloudRoleMaker._collective_envJ  s_   ry)<cBBCC i(>	JJ"i////[
!12L!M!MY'@AA!)%5D"!%!7D$(D!!%!7!=!=c!B!B !788Y<dCC
!9%:DAAL!$\!2!2DOO8NOOOPP9%:;;!#+D!E!E!#+D!E!Er   c           	     |   t          t          j        dd                    }|dvrd S t          t          j        dd                    }t          j        dd          }|t          j        j        t          j        j        t          j        j        fvrt          | j	        j
                  |dk    rdnd	}|t          j        j        k    rGt          j        d
d          }t          j        dd          }t          j        dd          }||||d}n|t          j        j        k    rd	}	t                      }
|
                                }d	|d<   | j        r$| j        d         }|                                 rd}	nCt          j        dd          }|                                 r|                                 dk    rd}	|                    d          \  }}||||	|d}nt          j        dd          }||d}|t          j        j        k    rd}n|t          j        j        k    rd}nd}t'          d| d| d|            | j	                            || j        |                                 |                                 |                                 ||           |t          j        j        k    rd	|d<   d S d S )NPADDLE_WITH_GLOOrp  )r   r   PADDLE_GLOO_RENDEZVOUS
SYS_JOB_IDrB   r   TFPADDLE_GLOO_FS_NAMEPADDLE_GLOO_FS_UGIPADDLE_GLOO_FS_PATH)rD   rE   rF   rC   ry   r   PADDLE_GLOO_HTTP_ENDPOINTr   )rG   rH   rC   rI   rJ   )rF   rC   r$   r&   r%   zGloo init with z: need_init_all: z, args: )rR   rS   rT   rU   rV   rW   rX   )r   rj   r|  r!   r'   r$   r&   r%   rM   r  r2   r   dictr   r   r   r   r   r   rz   r`   r8   r:   r;   r<   )r?   use_gloorendezvous_typerp   rW   rZ   r[   r\   rX   rI   managerrJ   	ep_rank_0r^   r_   types                   r   
_gloo_initzPaddleCloudRoleMaker._gloo_inita  s   ry!3S99::6!!F bi(@#FFGG<,,O O O #
 
 

 TZ1222 (A5do222y!6;;Hi 4b99Gy!6;;H$"$ &	 FF  444 %iiG#LLNNM',M)$" - 215	((** -(,%I&A2FF	??$$ -););)=)=)B)B(,% s++HB! &%6!. FF y!6;;H$ & F
 do222DD 444DDDTdTT]TTFTT	
 	
 	
 	
&MMOO''))''))' 	 	
 	
 	
 do222',M)$$$ 32r   c                    | j         s`| j        s|                                  n|                                  d| _         t	          j                    s|                                  dS dS dS z.
        generate role for role maker
        TN)r   r   r  r  paddlein_dynamic_moder  r>   s    r   r  z#PaddleCloudRoleMaker._generate_role  s~     & 	"& '$$&&&&*D#)++ "!!!!!	" 	"" "r   )F)r   r   rX   r   r   r   )r   r   r   r   r   )r   r   r   r   r   r  r   )r   r   r   r   r   r   r   r  )r   r   )r   r   )r   r  )r   r   )r   r4  )r   r?  r   r   )+r   r   r   r   r@   r   r   r   r  r  r  r  r  r   r   r$  r   r   r   r:   r;   r<   r   r3  r7  r:  r>  r   rC  rE  rG  r   rJ  rM  rO  rQ  rT  rV  r  r  r  r  __classcell__r  s   @r   r   r   %  sW        .      4' ' ' ' -58 8 8 8 8 @H> > > > >
* * * */ / / /   $ $ $ $   ) ) ) )) ) ) ). . . .
C C C C                     " " " "

 

 

 

             
& & & &
& & & &
& & & &" " " "
- 
- 
- 
-
" 
" 
" 
"& & & &+ + + +

6 
6 
6
2 
2 
2 
2% % % %( ( ( (/ / / /nQ nQ nQ nQ`F F F F.M- M- M- M-^" " " " " " " "r   r   c                  B     e Zd ZdZ	 	 dd fd
ZddZddZddZ xZS )UserDefinedRoleMakeraB  
    UserDefinedRoleMaker is an interface for distributed configuration initialization based on obtaining distributed related information from user-defined parameters.

    Examples:
        .. code-block:: python

            >>> import paddle.distributed.fleet as fleet
            >>> from paddle.distributed.fleet.base.role_maker import Role

            >>> fleet.UserDefinedRoleMaker(
            ...     current_id=0,
            ...     role=Role.SERVER,
            ...     worker_num=2,
            ...     server_endpoints=["127.0.0.1:36011", "127.0.0.1:36012"])
    Fr   r   	init_gloorX   r   r   r   c                N     t                      j        d||d| || _        d S )N)r   r  r   )r   r@   
_init_gloo)r?   r   r  rX   r  s       r   r@   zUserDefinedRoleMaker.__init__  sD     	 	
'9	
 	
@F	
 	
 	
 $r   c                   | j                             d          | _        | j                             dg           | _        | j                             dd          | _        | j        dk    r3t          | j                  dk    sJ t          | j                  | _        | j                             d          | _        | j                             d          | _        | j        t          j	        k    r5t          | j                  | j        k    r| j        | j                 | _
        n,| j        t          j        k    r| j        | j                 | _
        t          d | j        D                       | _        d S )Nserver_endpointsworker_endpointsrU   r   rS   r  c                D    h | ]}|                     d           d         S rx  ry  rj  s     r   rz  z<UserDefinedRoleMaker._user_defined_ps_env.<locals>.<setcomp>  r{  r   )r   rK   r   r   r,  r   r8   r   r   r   r   r   r1  r>   s    r   _user_defined_ps_envz)UserDefinedRoleMaker._user_defined_ps_env  sC   !%!1!12D!E!E!%!1!12Db!I!I!\--lA>>""t-..2222!$T%;!<!<D\%%f--
<++L99 J$+%%D*++d.>>>!%!78H!IDZ4;&&!%!78H!IDOO8NOOOPPr   c                   | j                             d          | _        | j                             d          | _        t	          | j                  | _        t          j        | _        t	          d | j        D                       | _	        d S )Nr  r  c                D    h | ]}|                     d           d         S rx  ry  rj  s     r   rz  zDUserDefinedRoleMaker._user_defined_collective_env.<locals>.<setcomp>  r{  r   )
r   rK   r   r   r   r,  r   r   r  r1  r>   s    r   _user_defined_collective_envz1UserDefinedRoleMaker._user_defined_collective_env  su    !%!1!12D!E!E<++L99 !788"kOO8NOOOPPr   c                    | j         s9| j        s|                                  n|                                  d| _         dS dS r  )r   r   r  r  r>   s    r   r  z#UserDefinedRoleMaker._generate_role  sY     & 	+& 4))++++11333&*D###	+ 	+r   )FF)r   r   r  r   rX   r   r   r   r  )	r   r   r   r   r@   r  r  r  r  r  s   @r   r  r    s         $ $	$ 	$ 	$ 	$ 	$ 	$ 	$Q Q Q Q*Q Q Q Q	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+r   r  ) r   
__future__r   rj   r~  r~   r   multiprocessingr   r   typingr   r   r   r	   numpyr   r  paddle.baser
   5paddle.distributed.fleet.base.private_helper_functionr   
backup_envr   numpy.typingnpt__all__r   r!   r   r   r  r   r   r   <module>r     s   !   " " " " " " 				 				   , , , , , , , , 8 8 8 8 8 8 8 8 8 8 8 8                 + * * * * * 
* * * * * * * *X X X X X X X Xv
yF yF yF yF yF yF yF yFrV
" V
" V
" V
" V
"= V
" V
" V
"rA+ A+ A+ A+ A+/ A+ A+ A+ A+ A+r   