
    ΑiY`                       S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKJrJ	r	J
r
Jr  S SKJr  S SKJr  S SKJrJr  S SKJr  S SKJr  S S	KJrJrJr  S S
KJrJrJr  S SKJ r   \(       a  S SK!J"r"J#r#  S SK$J%r%J&r&   " S S\5      r'/ r( " S S5      r)S r*S r+S r,S r-S r.S r/S r0S r1 " S S5      r2    S             SS jjr3g)    )annotationsN)TYPE_CHECKINGAnyLiteral	TypedDict)core)
get_device)_get_trainers_numget_cluster_and_pod)use_paddlecloud)get_cluster_from_args)
DeviceModeblock_windows_and_macoscheck_backend)_prepare_trainer_env_print_argumentsget_host_name_ip)	set_flags)CallableIterable)NotRequiredUnpackc                  >    \ rS rSr% S\S'   S\S'   S\S'   S\S'   S	rg
)_SpawnOptions3   z3NotRequired[Literal['spawn', 'fork', 'forkserver']]start_methodzNotRequired[str | None]gpusxpuszNotRequired[str]ips N)__name__
__module____qualname____firstlineno____annotations____static_attributes__r        X/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/distributed/spawn.pyr   r   3   s    II%%%%r'   r   c                      \ rS rSrS rSrg)ParallelEnvArgs=   c                X    S U l         S U l        S U l        S U l        SU l        S U l        g )NT)cluster_node_ipsnode_ipr   started_portprint_configselected_devices)selfs    r(   __init__ParallelEnvArgs.__init__>   s;     $   $ ! ! !%r'   )r-   r.   r0   r1   r/   r   N)r!   r"   r#   r$   r3   r&   r    r'   r(   r*   r*   =   s    %r'   r*   c                    / SQn/ SQnU  H=  nX1;  d  M
  X2;   a!  [         R                  " SU S3[        5        M0  [        SU S35      e   g )N)r   r   r   r   r0   backend)r1   r/   r-   r.   r   zThe config option (z|) of `paddle.distributed.spawn` is deprecated. Please use the latest config options stated in the `spawn` API documentation.z1) of `paddle.distributed.spawn` is not supported.)warningswarnDeprecationWarning
ValueError)optionssupported_optionsdeprecated_optionskeys       r(   _options_valid_checkr?   V   sh     '()# /d d& !)#._`  r'   c                 X   [        5       n U [        R                  " 5       ;   a(  [        R                  " U R	                  S5      S   5      $ SU ;   a  [        R
                  " 5       $ SU ;   a  [        R                  " 5       $ SU ;   a  [        R                  " 5       $ [        SU  S35      e)N:r   gpuxpucpuI`paddle.distributed.spawn` does not support parallel training on device `` now.)
r	   r   get_available_custom_deviceget_custom_device_countsplitget_cuda_device_countget_xpu_device_countmultiprocessing	cpu_countRuntimeErrordevices    r(   _get_default_nprocsrQ   u   s    \F1133++FLL,=a,@AA	&))++	&((**	&((**WX^W__ef
 	
r'   c                     [        5       n U [        R                  " 5       ;   a  gSU ;   a  gSU ;   a  gSU ;   a  g[        SU  S	35      e)
NxcclrB   ncclrC   bkclrD   gloorE   rF   )r	   r   rG   rN   rO   s    r(   _get_default_backendrW      sW    \F1133	&	&	&WX^W__ef
 	
r'   c                    S nU R                  S5       Vs/ s H  o"R                  5       PM     nn[        U5      S:X  a  US   nU$ [        5       u  pAU$ s  snf )N,   r   )rI   striplenr   )r   r.   xnode_ips_s        r(   _get_node_ipr`      sY    G#&99S>2>a	>H2
8}1+ N &'
N 3s   Ac           	        SU;  d	  US   S:X  a  [        5       US'   [        US   5        [        US   5        / n[        5       nUR	                  SS 5      Ul        UR
                  c+  UR	                  SS 5      Ul        UR
                  c  SUl        US   S:X  Ga  UR	                  SS 5      Ul        UR                  c  UR	                  SS 5      Ul        [        R                  " S	S 5      nUb  US
:X  a8  [        [        R                  " 5       5       Vs/ s H  n[        U5      PM     nnOUR                  S5      nUR                  ch  [        U5      U :  a  [        S[        U5       SU  S35      eSR!                  [        SU 5       Vs/ s H  n[        Xe   5      PM     sn5      Ul        GOUR                  R                  S5      n[        U5      U :w  a  [#        S[        U5       SU  S35      eU H2  nX;  d  M
  [#        SR%                  USR!                  U5      5      5      e   GO>US   S:X  Ga  UR	                  SS 5      Ul        UR                  c  UR	                  SS 5      Ul        [        R                  " SS 5      nUb  US
:X  a8  [        [        R&                  " 5       5       Vs/ s H  n[        U5      PM     nnOUR                  S5      nUR                  ch  [        U5      U :  a  [        S[        U5       SU  S35      eSR!                  [        SU 5       Vs/ s H  n[        Xe   5      PM     sn5      Ul        GOUR                  R                  S5      n[        U5      U :w  a  [#        S[        U5       SU  S35      eU H2  nX;  d  M
  [#        SR%                  USR!                  U5      5      5      e   GOUS   S:X  a  [(        R*                  " S5        SUl        S Ul        UR
                  Ul        UR	                  SS 5      b   S5       e[        UR
                  R                  S5      5      S::  d   S 5       e[1        5       S:X  d   S!5       eOUS   S":X  a  S Ul        [        R2                  " 5       S   n	[        R                  " S#U	 S$3S 5      nUb  US
:X  a9  [        [        R4                  " U	5      5       Vs/ s H  n[        U5      PM     nnOUR                  S5      n[        U5      U :  a  [        S[        U5       SU  S%U	 S&35      eSR!                  [        SU 5       Vs/ s H  n[        Xe   5      PM     sn5      Ul        UR	                  S'S 5      Ul        UR6                  c  [9        UR
                  5      Ul        UR	                  S(S 5      Ul        UR	                  SS 5      Ul        UR<                  c  [=        5       Ul        US   S:X  a3  [?        [        SU 5      5      n
[A        U[B        RD                  U
5      u  pO[G        U5      u  pURH                   H!  nURK                  [M        XUS   5      5        M#     UR	                  S)S*5      Ul'        URN                  (       a  [Q        U5        U$ s  snf s  snf s  snf s  snf s  snf s  snf )+Nr6   autor   r-   z	127.0.0.1rT   r   r1   CUDA_VISIBLE_DEVICES rY   zthe number of visible devices(z-) is less than the number of spawn processes(z), please ensure that the correct `nprocs` argument is passed or the environment variable `CUDA_VISIBLE_DEVICES` is correctly configured.r   zThe number of selected devices(z0) is not equal to the number of spawn processes(zK), please ensure that the correct `nprocs` and `gpus` arguments are passed.zCThe selected gpu card {} cannot found in CUDA_VISIBLE_DEVICES ({}).rU   r   XPU_VISIBLE_DEVICESz), please ensure that the correct `nprocs` argument is passed or the environment variable `XPU_VISIBLE_DEVICES` is correctly configured.zK), please ensure that the correct `nprocs` and `xpus` arguments are passed.zBThe selected xpu card {} cannot found in XPU_VISIBLE_DEVICES ({}).rV   zYour model will be trained under CPUONLY mode by using GLOO,because CPUPlace is specified manually or your installed PaddlePaddle only support CPU Device.Tr   z.CPUONLY spawn doesn't support use paddle cloudrZ   zJCPUONLY spawn only support single trainer, that is len(ips)=1, but got %s.z+CPUONLY spawn doesn't support multi-trainerrS   FLAGS_selected_szj), please ensure that the correct `nprocs` argument is passed or the environment variable `FLAGS_selected_zs` is correctly configured.r.   r/   r0   F))rW   r   r   r*   getr-   r1   osgetenvranger   rJ   strrI   r\   rN   joinr:   formatrK   r7   r8   paddle_cpuonlyr   r
   get_all_custom_device_typerH   r.   r`   r/   r   listr   r   CPUr   trainersappendr   r0   r   )nprocsr;   processes_env_listargsenv_devicesr]   env_devices_listselected_device_listcard_idcustom_device_namedevices_per_procclusterpodtrainers                 r(   _get_subprocess_env_listr      s    79#5#?13	')$%GI./  D $KKt4D$ ',> E  ($/D! yV# 'FD 9  ($+KK0BD$ID!ii 6=+"3 %d&@&@&B C  C1A C     +005  (#$v-"4S9I5J4K L**0 2FF  %(HH38F3CD3Ca%()3CD%D! $(#8#8#>#>s#C '(F2 5c:N6O5P Q55;H =HH 
 02$55;V#SXX.>%?6  0 
	v	% 'FD 9  ($+KK0BD$ID!ii 5t<+"3 %d&?&?&A B  B1A B     +005  (#$v-"4S9I5J4K L**0 2EE  %(HH38F3CD3Ca%()3CD%D! $(#8#8#>#>s#C '(F2 5c:N6O5P Q55;H =HH 
 02$44:F#SXX.>%?5  0 
	v	%m	
 # $(({{,d3; 	
<	
; 4((..s349 	
X	
9 !"a' 	
9	
' 
	v	% $!<<>qAii/2D1EQ GN+"3 t;;<NOP PA AP   
  +005 6)05E1F0G H&&,X .##5"66QS  !$/4Q/?@/?!S!$%/?@!

 ;;y$/DL||#D$9$9:ND9D";;'8$?D#.0 yV#a 01,*.."2
 +40 <<!! 793EF	
    NE:D_  E2  ER  As$   Z+Z0;Z54Z:"Z?[c                     [         R                  R                  SS 5        [         R                  R                  SS 5        g )N
http_proxyhttps_proxy)ri   environpopr    r'   r(   _remove_risky_envr   W  s(     JJNN<&JJNN=$'r'   c                    US:X  a  [        SU S   05        OUS:X  a  [        SU S   05        O U  H  nX   [        R                  U'   M     g )NrT   FLAGS_selected_gpusrU   FLAGS_selected_xpus)r   ri   r   )env_dictr6   var_names      r(   _set_trainer_envr   ^  s\     &((3H*IJK	F	((3H*IJK
 	'1

8 r'   c                    [        5         [        XE5        U " U6 nUR                  U5        g ! [         a     g [         a<    SS KnUR                  UR                  5       5        [        R                  " S5         g f = f)Nr   rZ   )	r   r   putKeyboardInterrupt	Exception	traceback
format_excsysexit)funcrw   error_queuereturn_queuer   r6   resultr   s           r(   _func_wrapperr   v  si    +t   	,,./	s   +. 
A?AA?>A?c                  *    \ rS rSrS rSS jrS rSrg)MultiprocessContexti  c                    X l         X0l        Xl        [        U5       VVs0 s H  u  pEUR                  U_M     snnU l        g s  snnf N)error_queuesreturn_queues	processes	enumeratesentinel	sentinels)r2   r   r   r   indexprocesss         r(   r3   MultiprocessContext.__init__  sH    ( +":CI:N
:NGe#:N
 
s   ANc                >   [        U R                  5      S:X  a  g[        R                  R	                  U R                  R                  5       US9nS nU HQ  nU R                  R                  U5      nU R                  U   nUR                  5         UR                  S:w  d  MO  Un  O   Uc  [        U R                  5      S:H  $ U R                   H8  nUR                  5       (       a  UR                  5         UR                  5         M:     U R                  U5        g )Nr   T)timeout)r\   r   rL   
connectionwaitkeysr   r   rm   exitcodeis_alive	terminate_throw_exception)r2   r   readyerror_indexr   r   r   s          r(   rm   MultiprocessContext.join  s    t~~!#**//NN!7 0 
 HNN&&x0EnnU+GLLN1$#  t~~&!++~~G!!!!#LLN &
 	k*r'   c                r   U R                   U   R                  5       (       ad  U R                  U   R                  nUS:  a3  [        R
                  " U* 5      R                  n[        SU SU S35      e[        SU SU S35      eU R                   U   R                  5       nSU S3nXT-  n[        U5      e)Nr   zProcess z terminated with signal .z terminated with exit code z9

----------------------------------------------
Process zV terminated with the following error:
----------------------------------------------

)	r   emptyr   r   signalSignalsnamer   rh   )r2   r   r   r   original_tracemsgs         r(   r   $MultiprocessContext._throw_exception  s    [)//11~~k2;;H!|~~xi055{m+CD6K   {m+FxjPQR  **;7;;="m $AA 	
 	nr'   )r   r   r   r   r   )r!   r"   r#   r$   r3   rm   r   r&   r    r'   r(   r   r     s    
+6r'   r   c                p   [        U5        US:X  a
  [        5       n[        X%5      nUR                  SS5      nUc  Sn[        R
                  " U5      n/ n	/ n
/ n[        U5       H  nUR                  5       nUR                  5       nUR                  [        U UUUXl   US   4S9nXOl
        UR                  5         U	R                  U5        U
R                  U5        UR                  U5        M     [        XU
5      nU(       d  U$ UR                  5       (       d   UR                  5       (       d  M  U$ )a  
Start multiple processes with ``spawn`` method for parallel training.

.. note::
    ``spawn`` now only supports GPU or XPU collective mode. The collective mode
    of GPU and XPU cannot be started at the same time, so the option `gpus` and
    `xpus` cannot be configured at the same time.

Args:
    func (function): The target function is called by spawned process.
        This function need to be able to pickled, so it must be defined
        at the top level of a module.
    args (list|tuple, optional): Arguments passed to ``func``.
    nprocs (int, optional): Number of processed to start. Default: -1.
        when nprocs is -1, the available device will be obtained from
        the environment variable when the model is executed: If use GPU,
        the currently available device ID is obtained from the environment
        variable CUDA_VISIBLE_DEVICES; If use XPU, the currently available
        device ID is obtained from the environment variable XPU_VISIBLE_DEVICES.
    join (bool, optional): Perform a blocking join on all spawned processes.
        Default: True.
    daemon (bool, optional): The spawned processes' daemon flag. Default: False.
    **options(dict, optional): Other initial parallel execution environment
        configuration options. The following options are currently supported:
        (1) start_method (string): the way to start a process.
        The start method can be ``spawn`` , ``fork`` , ``forkserver`` .
        Because the CUDA runtime does not support the ``fork`` start method,
        when use CUDA in subprocesses, we should start process by ``spawn``
        or ``forkserver`` method. Default: "spawn" ;
        (2) gpus (string): The training process will run on the
        selected gpus, such as "0,1,2,3". Default: None;
        (3) xpus (string): The training process will run on the
        selected xpus, such as "0,1,2,3". Default: None;
        (5) ips (string): Paddle cluster nodes ips, such as
        "192.168.0.16,192.168.0.17". Default: "127.0.0.1" .

Returns:
    ``MultiprocessContext`` object, it hold the spawned processes.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:DISTRIBUTED)
        >>> import paddle
        >>> import paddle.nn as nn
        >>> import paddle.optimizer as opt
        >>> import paddle.distributed as dist

        >>> class LinearNet(nn.Layer):
        ...     def __init__(self):
        ...         super().__init__()
        ...         self._linear1 = nn.Linear(10, 10)
        ...         self._linear2 = nn.Linear(10, 1)
        ...     def forward(self, x):
        ...         return self._linear2(self._linear1(x))

        >>> def train(print_result=False):
        ...     # 1. initialize parallel environment
        ...     group = dist.init_parallel_env()
        ...     process_group = group.process_group if group else None
        ...     # 2. create data parallel layer & optimizer
        ...     layer = LinearNet()
        ...     dp_layer = paddle.DataParallel(layer, group = process_group)  # type: ignore[arg-type]
        ...     loss_fn = nn.MSELoss()
        ...     adam = opt.Adam(
        ...         learning_rate=0.001, parameters=dp_layer.parameters())
        ...     # 3. run layer
        ...     inputs = paddle.randn([10, 10], 'float32')
        ...     outputs = dp_layer(inputs)
        ...     labels = paddle.randn([10, 1], 'float32')
        ...     loss = loss_fn(outputs, labels)
        ...     if print_result is True:
        ...         print("loss:", loss.numpy())
        ...     loss.backward()
        ...     adam.step()
        ...     adam.clear_grad()

        >>> # Usage 1: only pass function.
        >>> # If your training method no need any argument, and
        >>> # use all visible devices for parallel training.
        >>> if __name__ == '__main__':
        ...     dist.spawn(train)

        >>> # Usage 2: pass function and arguments.
        >>> # If your training method need some arguments, and
        >>> # use all visible devices for parallel training.
        >>> if __name__ == '__main__':
        ...     dist.spawn(train, args=(True,))

        >>> # Usage 3: pass function, arguments and nprocs.
        >>> # If your training method need some arguments, and
        >>> # only use part of visible devices for parallel training.
        >>> # If your machine hold 8 cards {0,1,2,3,4,5,6,7},
        >>> # this case will use cards {0,1}; If you set
        >>> # CUDA_VISIBLE_DEVICES=4,5,6,7, this case will use
        >>> # cards {4,5}
        >>> if __name__ == '__main__':
        ...     dist.spawn(train, args=(True,), nprocs=2)

        >>> # Usage 4: pass function, arguments, nprocs and gpus.
        >>> # If your training method need some arguments, and
        >>> # only use part of visible devices for parallel training,
        >>> # but you can't set your machine's environment variable
        >>> # CUDA_VISIBLE_DEVICES, such as it is None or all cards
        >>> # {0,1,2,3,4,5,6,7}, you can pass `gpus` to
        >>> # select the GPU cards you want to use. For example,
        >>> # this case will use cards {4,5} if your machine hold 8 cards.
        >>> if __name__ == '__main__':
        ...     dist.spawn(train, args=(True,), nprocs=2, gpus='4,5')

r   Nspawnr6   )targetrw   )r?   rQ   r   rh   rL   get_contextrk   SimpleQueueProcessr   daemonstartrt   r   rm   )r   rw   ru   rm   r   r;   procs_env_listr   mpr   r   r   ir   r   r   contexts                    r(   r   r     s5   r ! |$& .f>N ;;~t4L		$	$\	2BLMI6]nn&~~'** !	"  

  K(\*!% ( ")=IG llnn llnn Nr'   )r    r   TF)r   zCallable[..., None]rw   zIterable[Any]ru   intrm   boolr   r   r;   zUnpack[_SpawnOptions]returnr   )4
__future__r   rL   ri   r   r   r7   typingr   r   r   r   paddle.baser   paddle.devicer	   paddle.distributed.cloud_utilsr
   r   $paddle.distributed.fleet.cloud_utilsr   paddle.distributed.fleet.launchr   %paddle.distributed.fleet.launch_utilsr   r   r   %paddle.distributed.utils.launch_utilsr   r   r   paddle.frameworkr   collections.abcr   r   typing_extensionsr   r   r   __all__r*   r?   rQ   rW   r`   r   r   r   r   r   r   r    r'   r(   <module>r      s   #  	  
  9 9  $ A A 
 
 '25	  % %2>
 
 up(20$= =D o
o
o o 	o
 o %o or'   