
    Αi-p                        S r SSKrSSKrSSKrSSKrSSKrSSKrSSKrSSKJ	r	J
r
  SSKJr  SSKJrJr  SSKJrJr  SSKJrJrJrJrJrJrJrJrJrJrJrJrJ r J!r!  / r"S r#S	 r$S
 r%S r&S r'S r(S r)S r*S r+S r,S r-\.S:X  a  \-" 5         gg)a	  
fleetrun is a module that spawns multiple distributed
process on each training node for gpu training and cpu training.
Usage:
    In both of single node training or multiple node training, this module
launch a process on each of the given gpu card or cpu machine.
    GPU training:
    1. for single node training with all visible gpu cards:
       fleetrun your_training_py (arg1 arg2 and all others)
    2. for single node training with [0,4) cards
       fleetrun --gpus="0,1,2,3" your_training_py (arg1 arg2 and all others)
    3. for multiple node training such as two node:192.168.0.16, 192.168.0.17
        on 192.168.0.16:
            fleetrun --ips="192.168.0.16,192.168.0.17" \
                your_training_py (arg1 arg2 and all others)
        on 192.168.0.17:
            fleetrun --ips="192.168.0.16,192.168.0.17" \
                your_training_py (arg1 arg2 and all others)
    CPU training:
    1. for single node training with multi servers and workers:
        fleetrun --server_num=2 --worker_num=2 your_training_py (arg1 arg2 and all others)
    2. for multiple node training such as two node:192.168.0.16, 192.168.0.17 \
        with 2 servers and 4 workers.
        on 192.168.0.16:
            fleetrun --servers="192.168.0.16:6170,192.168.0.17:6170" \
                --workers="192.168.0.16,192.168.0.17,192.168.0.16,192.168.0.17" \
                your_training_py (arg1 arg2 and all others)
        on 192.168.0.17:
            fleetrun --servers="192.168.0.16:6170,192.168.0.17:6171" \
                --workers="192.168.0.16,192.168.0.17,192.168.0.16,192.168.0.17" \
                your_training_py (arg1 arg2 and all others)
    3. use gloo backend for multiple node training such as two node:192.168.0.16, 192.168.0.17 \
        with 2 servers and 4 workers. (workers should set port)
        on 192.168.0.16:
            fleetrun --servers="192.168.0.16:6170,192.168.0.17:6170" \
                --workers="192.168.0.16:6171,192.168.0.17:6171,192.168.0.16:6172,192.168.0.17:6172" \
                your_training_py (arg1 arg2 and all others)
        on 192.168.0.17:
            fleetrun --servers="192.168.0.16:6170,192.168.0.17:6170" \
                --workers="192.168.0.16:6171,192.168.0.17:6171,192.168.0.16:6172,192.168.0.17:6172" \
                your_training_py (arg1 arg2 and all others)
    N)	REMAINDERArgumentParser)	framework)cloud_utilslaunch_utils)enable_elasticlaunch_elastic)
DeviceModeDistributeModeParameterServerLauncherblock_windows_and_macoscheck_backenddirect_startfind_free_portsget_clusterget_host_name_ip
get_loggerloggerstart_local_trainersterminate_local_procswatch_local_trainersc                     [        S5        [        [        U 5      R                  5       5       H  u  p[        U SU 35        M     [        S5        g )Nz0-----------  Configuration Arguments -----------z: z0------------------------------------------------)printsortedvarsitems)argsargvalues      _/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/distributed/fleet/launch.py_print_argumentsr!   Y   sE    	
<=T$Z--/0
Rw  1	
<=    c                     [        SS9n U R                  S5      nUR                  S[        SSS9  UR                  S[        [        R
                  R                  S	S
5      SS9  UR                  S[        SSS9  UR                  S[        SSS9  [        R                  R                  5       (       a&  UR                  S[        SSS9  UR                  SSS9  [        R                  R                  5       (       a&  UR                  S[        SSS9  UR                  SSS9  UR                  S[        SS9  UR                  S[        S9  U R                  S5      nUR                  S [        S!S"S9  UR                  S#[        SS$S9  UR                  S%[        SS&S9  UR                  S'[        S(S)S9  U R                  S*5      nUR                  S+[        S,S-S9  UR                  S.[        S,S/S9  UR                  S0[        S,S1S9  UR                  S2[        S,S3S9  UR                  S4[        S,S5S9  UR                  S6[        S7S9  UR                  S8[        S9S9  UR                  S:[        S;S9  UR                  S<[        S=S9  UR                  S>[        S?S9  U R                  S@5      nUR                  SA[        SBS9  UR                  SC[        SDS9  UR                  SE[        SFS9  UR                  SG[        SHS9  UR                  SI[        SJSKS9  UR                  SL[        SMS9  UR                  SN[        S(SOS9  U R                  5       u  pVU$ )PzI
Helper function parsing the command line options
@retval ArgumentParser
zstart paddle training using multi-process mode.
see: http://www.paddlepaddle.org/documentation/docs/zh/1.6/user_guides/howto/training/cluster_howto.html#permalink-8--nccl2-
)descriptionzBase Parametersz	--log_dirlogz7The path for each process's log. Default --log_dir=log/)typedefaulthelpz	--backendPADDLE_DISTRI_BACKENDautozhSpecify the backend, can be gloo|nccl|bkcl|auto|heter. Default value is auto which prefers nccl or bkcl.z--nproc_per_nodeNzThe number of processes to launch on a node.In gpu training, it should be less or equal to the gpus number of you system(or you set by --gpus). And so each process can bound to one or average number of gpus.z
--run_modez.run mode of job, can be:collective/ps/ps-heterz--gpuszmIt's for gpu training.For example:--gpus="0,1,2,3" will launch four training processes each bound to one gpu.z--selected_gpusgpus)destz--xpuszoIt's for xpu training. For example: --xpus="0,1,2,3" will launch four training processes each bound to one xpu.z--selected_xpusxpustraining_scriptzThe full path to the single GPU training program/script to be launched in parallel, followed by all the arguments for the training script)r&   r(   training_script_args)nargszCollective Parameters--ipsz	127.0.0.1z=Paddle cluster nodes ips, such as 192.168.0.16,192.168.0.17..z--cluster_topo_pathz|A json format file will be stored in this path which is usedto represent the cluster topology information for auto parallel.z--rank_mapping_pathzkA json format file will be stored in this path which is usedto map processes to machines for auto parallel.z--enable_auto_mappingFz>Set true to enable the lazy launch for auto-parallel scenario.zParameter-Server Parameters	--servers zUser defined servers ip:port	--workerszUser defined workers ip:port--coordinatorsz!User defined coordinators ip:port--heter_workersz<User defined heter workers in each stage ip1:port1;ip2:port2--heter_devicesz4User defined heter devices in each stage cpu;gpu;cpu--worker_numznumber of workers--coordinator_numznumber of coordinators--server_numznumber of servers--heter_worker_numz+number of heter_workers in each stage 1;2;3--http_portzGloo http PortzElastic Parametersz--elastic_serverzetcd server host:portz--elastic_pre_hookzelastic pre_hook shell cmdz--job_idzjob unique idz--npzjob pod/node numberz--scaler   zscale npz--hostz bind host, default to POD_IP envz--forcezupdate np force)r   add_argument_groupadd_argumentstrosenvirongetintr   coreis_compiled_with_cudais_compiled_with_xpur   boolparse_known_args)parser
base_groupcollective_groupps_groupelastic_group
known_args_s          r    _parse_argsrP   `   s5   
 F
 **+<=JF	   

6?<	   3	   =	   ~~++--Z	 	  	
 	 1?~~**,,Z	 	  	
 	 1?   2)D 001HI!!L	 "  !!K	 "  !!:	 "  !!M	 "  (()FGH#r0N   #r0N   0	   K	   C	   .s9LM#,D   .s9LM:  
 -c8HI --.BCM+B   3-I   z/JvC6KLysAJOs!C   e2C   ++-MJr"   c           	         U R                   R                  S5       Vs/ s H  o3R                  5       PM     nn[        U5      S:X  a  US   nO*U R                  (       a  U R                  nO[        5       u  peXT;   d   SU SU S35       eUR                  U5      n[        R                  " SU SU S	U 35        S n[        R                  " 5       (       dk  [        U5      S::  a\  [        R                  R                  S
5      c<  [        [        U5      5      nUb$  [        U5      n[        R                   " SU 35        OjSn	[        R                  R                  S
5      b(  [#        [        R                  R                  S
5      5      n	[        [%        X[        U5      -   5      5      n/ n
U H)  nU
R'                  U Vs/ s H	  o SU 3PM     sn5        M+     [)        XEXU5      $ s  snf s  snf )N,   r   zCan't find your local ip {z} in node_ips: {}zparsed from args: node_ips:z	 node_ip:z node_rank:FLAGS_START_PORTzfind free ports:i  :)ipssplitstriplenhostr   indexr   debugr   use_paddlecloudr@   rA   rB   r   listinforC   rangeappendr   )r   device_modedevices_per_procxnode_ipsnode_iprO   	node_rank
free_ports
start_porttrainer_endpointsipports                r    get_cluster_from_argsrn     s   #'88>>##67#6a	#6H7
8}1+99iiG)+JA 
%gY.@
"M w'I
LL
%hZy	YKX J''))MQJJNN-.6$S)9%:;
!j)JKK*:,78
::>>,-9RZZ^^,>?@J%
=M9N,NOP
  Z!HZTD$.Z!HI ,;K M 8J "Is   G*G/
c                 0   U R                   (       aA  [        U R                   R                  S5      5      S:  a  [        SU R                    S35      eU R                  (       a  U R                  S:X  d   S5       eU R
                  (       a  [        S5      eg)	NrR   rS   zHCPUONLY launch only support single trainer, that is len(ips)=1, but got .cpuonlyz/CPUONLY launch only support run mode is CPUONLYz1CPUONLY launch can't have --servers as arguments.T)rW   rZ   rX   RuntimeErrorrun_modeserversr   s    r    cpuonly_checkrv   1  s    xxCs+,q0VW[W_W_V``ab
 	
 }}}}	) 	
=	
) ||NOOr"   c                    U R                   S:X  a  [        U 5        U R                  (       a  [        R                  / p!O[
        R                  " U 5      u  p[        R                  " 5       n[        R                  " SU SU SU 35        [        R                  " S5      nS nS nSn[        R                  R                  S5      b  [        R                  R                  S5      nU R                  (       GaH  U R                  c   S5       eU R                   =(       d    [        R                  " S	5      nU(       Gd=  [#        S
5      [        R                  S'   [#        [%        X5      5      [        R                  S'   [&        R(                  " 5       R+                  5       n	[        R,                  R/                  U	S5      n[#        U5      [        R                  S	'   [0        R2                  SS  n
SR/                  U
5      [        R                  S'   [#        U R                  5      [        R                  S'   [#        U R                  5      [        R                  S'   [
        R4                  " X5      u  nnXV4$ [#        S5      [        R                  S'   [#        [%        X5      5      [        R                  S'   [#        U R                  5      [        R                  S'   [#        U5      [        R                  S	'   [#        U R                  5      [        R                  S'   [
        R6                  " X5      u  nn XV4$ [        R8                  " 5       (       aF  US:w  a@  [        R:                  " U R<                  XU5      u  pV[        R                  " SU 35        XV4$ [?        XU5      u  pV[        R                  " SU 35        XV4$ )Ngloozparsed from args trainers_num:z mode:z	 devices:CUDA_VISIBLE_DEVICESi  rU   zAThe cluster topology must be provided when enabling auto mapping.PADDLE_RANK_MAPPING_PATHTPADDLE_NEED_RANK_MAPPINGPADDLE_ENABLE_ELASTICzauto_parallel_rank_mapping.jsonrS    PADDLE_ORIGINAL_CMD_ARGSPADDLE_CLUSTER_TOPO_PATHPADDLE_ENABLE_AUTO_MAPPINGFzget cluster from cloud:zget cluster from args:) backendrv   enable_auto_mappingr
   GPUr   get_device_proc_infor   get_trainers_numr   r]   r@   getenvrA   rB   cluster_topo_pathrank_mapping_pathr?   r   pathlibPathcwdpathjoinsysargv1get_mapped_cluster_from_args_without_rank_mapping.get_mapped_cluster_from_args_with_rank_mappingr^   get_cloud_clusterrW   rn   )r   rc   rd   trainers_numcuda_visible_devicesclusterpodrj   r   r   original_argss              r    get_cluster_infor   ?  s5   ||vd+5>>2&*6*K*K+
' //1L
LL
(f[MScRde 99%;<G
CJ	zz~~()5ZZ^^$67
%%1 	
O	
1 !22 
bii&7
 !58YBJJ1225t13BJJ./ ,,.$$&C "6! 699J5KBJJ12HHQRLM58XXm5LBJJ12589O9O5PBJJ127:((8BJJ34 NN@ <7 69ZBJJ1225t13BJJ./ 699O9O5PBJJ12589J5KBJJ127:((8BJJ34 KK < 
	$	$	&	&<1+<"44HHkZ
 	.wi89 <	 -/
 	-gY78<r"   c                     [         R                   " [        R                  R                  5       5      n[        [        R                  " SS5      5      US'   SUS'   XS'   U R
                  US'   U$ )NPADDLE_WITH_GLOO03PADDLE_GLOO_RENDEZVOUSPADDLE_GLOO_FS_PATHr)   )copyr@   rA   r?   r   r   )r   tmp_dirglobal_envss      r    get_global_envsr     s`    ))BJJOO-.K&)"))4F*L&MK"#,/K())0%&+/<<K'(r"   c           	         [         R                  " 5       n[        U 5      u  p#[        X5      n[	        UUU R
                  U R                  U R                  US9n[        U5       H*  u  pg[        SUR                  R                   SU 35        M,       [        XRR                  5       5      nU(       d0  [        R                  " S5        [        R                   " SU 35        O["        R$                  " S5        Mi  [.        R0                  R3                  U5      (       a  [4        R6                  " U5        g g !   [        R&                  " S5        [)        U5        [*        R,                  " S5         Ny= f)	N)r.   r/   log_direnvszlaunch proc_id:z idx:zLocal processes completed.z	POD info:   zTerminating... exitrS   )tempfilemkdtempr   r   r   r.   r/   r   	enumerater   procpidr   trainers_nranksr   r`   r]   timesleepwarningr   r   exitr@   r   existsshutilrmtree)	r   r   r   r   r   procsidxr   alives	            r    launch_collectiver     s"    G#D)LG!$0K ,,!66E u%			eC59: & 	(0G0G0IJE89y./JJqM   
ww~~gg 	NN01!%(HHQKs   AD9 %D9 99E4c                     [         R                  " 5       nU(       a   U[        R                  :X  a  [	        U 5        g [        X5      nUR                  5         g )N)r   r^   r   PSr   r   start_ps)r   distribute_mode
cloud_flagps_launchers       r    	launch_psr     sH    ,,.J o):)::T *$@K
r"   c                     U R                   S:w  a  g [        R                  R                  5       (       a  SU l         g [        R                  R	                  5       (       a  SU l         g SU l         g )Nr*   ncclbkclrx   )r   r   rD   rE   rF   ru   s    r    infer_backendr     sL    ||v~~++--		,	,	.	.r"   c           	         [        U 5        U R                  b  U R                  S;   d   eU R                  S:X  a  [        R                  $ U R                  S:X  a  [        R                  $ U R                  S:X  a  [        R
                  $ / SQnS/n/ SQnSS	/nU Vs/ s H,  oUS
R                  [        R                  SS 5      ;   d  M*  UPM.     nnU Vs/ s H-  nUS
R                  [        R                  SS 5      ;   d  M+  UPM/     nn[        U5      S:  a  [        U5      S:  a  [        S5      e[        R                  R                  5       (       a  [        R                  R                  5       n	OD[        R                  R                  5       (       a  [        R                  R!                  5       n	OSn	[        U5      S:  a  ["        R$                  " SU SU	 35        ['        [)        U5      [)        U5      -  5      n
['        [)        U5      [)        U5      -  5      n[        U
5      S:  a  [        R
                  $ [        R                  $ [        U5      S:  a,  ["        R$                  " SU SU	 35        [        R                  $ [        R                  R                  5       (       dj  [        R                  R                  5       (       dG  U R*                  (       a&  ["        R,                  " S5        [        R                  $ [        R                  $ ["        R,                  " S5        [        R                  $ s  snf s  snf )N)
collectivepsps-heterr   r   r   )r8   r:   r;   r2   r4   r6   r7   r<   r1   )r;   r6   r7   r9   r5   r}   rS   z}Only one mode(Collective or Parameter-Server) can be selected at the same time, but more than one configuration was received.r   z,Run parameter-sever mode. pserver arguments:z, accelerators count:z#Run collective mode. gpu arguments:z, cuda count:zuNot found distinct arguments and not compiled with cuda or xpu. But found args.servers not empty, default use ps modezWNot found distinct arguments and compiled with cuda or xpu. Default use collective mode)r   rs   r   
COLLECTIVEr   PS_HETERr   r   r   rZ   
ValueErrorr   rD   rE   get_cuda_device_countrF   get_xpu_device_countr   r`   r_   setrt   r   )r   ps_argscollective_argsps_heter_argscoordinator_argsps_arghas_ps_argsco_arghas_collective_argsacceleratorshas_ps_heter_argshas_coordinator_argss               r    which_distributed_moder     s   $}} }} @@@@}}$(((	$	   	*	$&&&	G iOPM+-=> %$6#((388Ab>2J(JW  
 &%FSXXchhqn-- 	%   ;!$7 81 < L
 	
 ~~++-- ~~;;=		,	,	.	. ~~::<
;!:;-G\]i\jk	
 ![!1C4F!FG#C$4s;K7L$LM !A%!***!$$$	 	!A	%12E1FmT`Sab	
 ((( 4466NN7799||L &(((%000NN. ",,,ms   )MM*M	M	c                     [        5       n [        5       n[        U 5        U R                  S:X  a  [	        U 5      nOIU R
                  S:X  d  U R
                  b   S5       e[        U R                  5        [        R                  nU R                  S:X  a  UR                  " S5        [        U R                  5        [        X5      (       a  [        X5        gU[        R                  :X  a  [        U 5        g[        X5        g)aM  
Paddle distribution training entry ``python -m paddle.distributed.launch``.

Usage:
    .. code-block:: bash
        :name: code-block-bash1

        python -m paddle.distributed.launch [-h] [--log_dir LOG_DIR] [--nproc_per_node NPROC_PER_NODE] [--run_mode RUN_MODE] [--gpus GPUS]
                         [--selected_gpus GPUS] [--ips IPS] [--servers SERVERS] [--workers WORKERS] [--heter_workers HETER_WORKERS]
                         [--worker_num WORKER_NUM] [--server_num SERVER_NUM] [--heter_worker_num HETER_WORKER_NUM]
                         [--http_port HTTP_PORT] [--elastic_server ELASTIC_SERVER] [--job_id JOB_ID] [--np NP] [--scale SCALE]
                         [--host HOST] [--force FORCE]
                         training_script ...


Base Parameters:
    - ``--log_dir``: The path for each process's log. e.g., ``--log_dir=output_dir``. Default ``--log_dir=log``.

    - ``--nproc_per_node``: The number of processes to launch on a node. In gpu training, it should be less or equal to the gpus number of you system(or you set by --gpus).  e.g., ``--nproc_per_node=8``

    - ``--run_mode``: run mode of job, can be:collective/ps/ps-heter. e.g., ``--run_mode=ps``. Default ``--run_mode=collective``.

    - ``--gpus``: It's for gpu training. e.g., ``--gpus=0,1,2,3`` will launch four training processes each bound to one gpu.

    - ``--selected_gpus``: gpus aliases, recommend to use ``--gpus``.

    - ``--xpus``: It's for xpu training if xpu is available. e.g., ``--xpus=0,1,2,3``.

    - ``--selected_xpus``: xpus aliases, recommend to use ``--xpus``.

    - ``training_script``: The full path to the single GPU training program/script to be launched in parallel, followed by all the arguments for the training script. e.g., ``training.py``

    - ``training_script_args``: The args of training_script. e.g., ``--lr=0.1``

Collective Parameters:
    - ``--ips``: Paddle cluster nodes ips, e.g., ``--ips=192.168.0.16,192.168.0.17``. Default ``--ips=127.0.0.1``.

Parameter-Server Parameters:
    - ``--servers``: User defined servers ip:port, e.g., ``--servers="192.168.0.16:6170,192.168.0.17:6170"``

    - ``--workers``: User defined workers ip:port, e.g., ``--workers="192.168.0.16:6171,192.168.0.16:6172,192.168.0.17:6171,192.168.0.17:6172"``

    - ``--heter_workers``: User defined heter workers ip1:port1;ip2:port2, e.g., ``--heter_workers="192.168.0.16:6172;192.168.0.17:6172"``

    - ``--worker_num``: Number of workers (It recommend to set when in the emulated distributed environment using single node)

    - ``--server_num``: Number of servers (It recommend to set when in the emulated distributed environment using single node)

    - ``--heter_worker_num``: Number of heter_workers in each stage (It recommend to set when in the emulated distributed environment using single node)

    - ``--heter_devices``: Type of heter_device in each stage

    - ``--http_port``: Gloo http Port

Elastic Parameters:
    - ``--elastic_server``: etcd server host:port, e.g., ``--elastic_server=127.0.0.1:2379``

    - ``--job_id``: job unique id, e.g., ``--job_id=job1``

    - ``--np``: job pod/node number, e.g., ``--np=2``

    - ``--host``: bind host, default to POD_IP env.


Returns:
    ``None``

Examples 1 (collective, single node):
    .. code-block:: bash
        :name: code-block-example-bash1

        # For training on single node using 4 gpus.

        python -m paddle.distributed.launch --gpus=0,1,2,3 train.py --lr=0.01

Examples 2 (collective, multi node):
    .. code-block:: bash
        :name: code-block-example-bash2

        # The parameters of --gpus and --ips must be consistent in each node.

        # For training on multiple nodes, e.g., 192.168.0.16, 192.168.0.17

        # On 192.168.0.16:

        python -m paddle.distributed.launch --gpus=0,1,2,3 --ips=192.168.0.16,192.168.0.17 train.py --lr=0.01

        # On 192.168.0.17:
        python -m paddle.distributed.launch --gpus=0,1,2,3 --ips=192.168.0.16,192.168.0.17 train.py --lr=0.01

Examples 3 (ps, cpu, single node):
    .. code-block:: bash
        :name: code-block-example-bash3

        # To simulate distributed environment using single node, e.g., 2 servers and 4 workers.

        python -m paddle.distributed.launch --server_num=2 --worker_num=4 train.py --lr=0.01

Examples 4 (ps, cpu, multi node):
    .. code-block:: bash
        :name: code-block-example-bash4

        # For training on multiple nodes, e.g., 192.168.0.16, 192.168.0.17 where each node with 1 server and 2 workers.

        # On 192.168.0.16:

        python -m paddle.distributed.launch --servers="192.168.0.16:6170,192.168.0.17:6170" --workers="192.168.0.16:6171,192.168.0.16:6172,192.168.0.17:6171,192.168.0.17:6172" train.py --lr=0.01

        # On 192.168.0.17:

        python -m paddle.distributed.launch --servers="192.168.0.16:6170,192.168.0.17:6170" --workers="192.168.0.16:6171,192.168.0.16:6172,192.168.0.17:6171,192.168.0.17:6172" train.py --lr=0.01

Examples 5 (ps, gpu, single node):
    .. code-block:: bash
        :name: code-block-example-bash5

       # To simulate distributed environment using single node, e.g., 2 servers and 4 workers, each worker use single gpu.

        export CUDA_VISIBLE_DEVICES=0,1,2,3
        python -m paddle.distributed.launch --server_num=2 --worker_num=4 train.py --lr=0.01

Examples 6 (ps, gpu, multi node):
    .. code-block:: bash
        :name: code-block-example-bash6

        # For training on multiple nodes, e.g., 192.168.0.16, 192.168.0.17 where each node with 1 server and 2 workers.

        # On 192.168.0.16:

        export CUDA_VISIBLE_DEVICES=0,1
        python -m paddle.distributed.launch --servers="192.168.0.16:6170,192.168.0.17:6170" --workers="192.168.0.16:6171,192.168.0.16:6172,192.168.0.17:6171,192.168.0.17:6172" train.py --lr=0.01

        # On 192.168.0.17:

        export CUDA_VISIBLE_DEVICES=0,1
        python -m paddle.distributed.launch --servers="192.168.0.16:6170,192.168.0.17:6170" --workers="192.168.0.16:6171,192.168.0.16:6172,192.168.0.17:6171,192.168.0.17:6172" train.py --lr=0.01

Examples 7 (ps-heter, cpu + gpu, single node):
    .. code-block:: bash
        :name: code-block-example-bash7

        # To simulate distributed environment using single node, e.g., 2 servers and 4 workers, two workers use gpu, two workers use cpu.

        export CUDA_VISIBLE_DEVICES=0,1
        python -m paddle.distributed.launch --server_num=2 --worker_num=2 --heter_worker_num=2 train.py --lr=0.01

Examples 8 (ps-heter, cpu + gpu, multi node):
    .. code-block:: bash
        :name: code-block-example-bash8

        # For training on multiple nodes, e.g., 192.168.0.16, 192.168.0.17 where each node with 1 server, 1 gpu worker, 1 cpu worker.

        # On 192.168.0.16:

        export CUDA_VISIBLE_DEVICES=0
        python -m paddle.distributed.launch --servers="192.168.0.16:6170,192.168.0.17:6170" --workers="192.168.0.16:6171,192.168.0.17:6171" --heter_workers="192.168.0.16:6172,192.168.0.17:6172" train.py --lr=0.01

        # On 192.168.0.17:

        export CUDA_VISIBLE_DEVICES=0
        python -m paddle.distributed.launch --servers="192.168.0.16:6170,192.168.0.17:6170" --workers="192.168.0.16:6171,192.168.0.17:6171" --heter_workers="192.168.0.16:6172,192.168.0.17:6172" train.py --lr=0.01

Examples 9 (elastic):
    .. code-block:: bash
        :name: code-block-example-bash9

        python -m paddle.distributed.launch --elastic_server=127.0.0.1:2379 --np=2 --job_id=job1  --gpus=0,1,2,3 train.py

r*   r   Nz7When backend is not 'auto', run mode must be collectiverx   zlaunch start with CPUONLY mode)rP   r   r!   r   r   rs   r   r   r   r   r   r   r	   r   r   )r   r   r   s      r    launchr   5  s    V =D\FT||v0
 }},0E 	
E	
E 	dll#(33 ||v78 d,,t-.333$$(r"   __main__)/__doc__r   r@   r   r   r   r   r   argparser   r   paddler   paddle.distributed.fleetr   r    paddle.distributed.fleet.elasticr   r	   %paddle.distributed.fleet.launch_utilsr
   r   r   r   r   r   r   r   r   r   r   r   r   r   __all__r!   rP   rn   rv   r   r   r   r   r   r   r   __name__ r"   r    <module>r      s   )V  	   
   .  > K   " >bJ)XRj"J$R-jJ)Z z
H r"   