
    Αi$                       S r SSKrSSKrSSKrSSKrSSKrSSKJr  SSKr	SSK
Jr  SSKJr  SSKJrJrJr  SSKJrJr  SSKJr  SS	KJrJrJr  SS
KJr  SS/rSS/rSrSr \RB                  RE                  5       r#\RB                  RI                  5       =r%r&\RB                  RN                  RP                  r)\RB                  RN                  RT                  r+\RB                  RN                  RX                  r-\RB                  RN                  R\                  r/Sq0 " S S5      r1S r2 " S S5      r3S r4S r5 " S S5      r6 " S S5      r7 " S S5      r8g)a  
Steps to transpile trainer:
1. split variable to multiple blocks, aligned by product(dim[1:]) (width).
2. rename split grad variables to add trainer_id suffix ".trainer_%d".
3. modify trainer program add split_op to each grad variable.
4. append send_op to send split variables to server and
5. add recv_op to fetch params(split blocks or origin param) from server.
6. append concat_op to merge split blocks to update local weights.

Steps to transpile pserver:
1. create new program for parameter server.
2. create params and grad variables that assigned to current server instance.
3. create a sub-block in the server side program
4. append ops that should run on current server instance.
5. add listen_and_serv op
    N)reduce)	framework)grad_var_name)BlockProgramcore)PSDispatcher
RoundRobin)Constant)	Parameterdefault_main_programdefault_startup_program)unique_namelookup_tablelookup_table_v2lookup_table_gradlookup_table_v2_gradop_namescopez@CLIPFc                   $    \ rS rSrSrSrSrSrSrg)DistributedModeG   r             N)	__name__
__module____qualname____firstlineno__SYNCASYNC
HALF_ASYNCGEO__static_attributes__r       s/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/distributed/transpiler/distribute_transpiler.pyr   r   G   s    DEJ
Cr%   r   c                  2    [         (       a  [        U 5        g g N)	PRINT_LOGprint)argss    r&   logr,   N   s    yd r%   c                        \ rS rSrS rS rSrg)VarBlockS   c                 (    Xl         X l        X0l        g r(   varnameoffsetsize)selfr2   r3   r4   s       r&   __init__VarBlock.__init__T   s    	r%   c                 R    U R                    SU R                   SU R                   3$ )N:r1   r5   s    r&   __str__VarBlock.__str__Z   s%    ,,qQtyyk::r%   )r3   r4   r2   N)r   r   r   r   r6   r;   r$   r   r%   r&   r.   r.   S   s    ;r%   r.   c                 @    X:H  =(       d    U R                  US-   5      $ )N.block)
startswith)p_namevar_names     r&   same_or_split_varrB   ^   s     G!2!28h3F!GGr%   c           	         / nU  GHE  nUn[        S UR                  S5      n[        [        R                  " U[        U5      -  5      5      nUS:X  a  SnXq:  a  Un[        [        R                  " U[        U5      -  5      5      n[        UR                  5      S:  a,  [        S UR                  SS S5      n	X-  n
U
S:w  a  XU
-
  -  n[        [        R                  " U[        U5      -  5      5      n[        U5       HC  n[        XX-  -
  5      n[        UR                  X5      nUR                  [        U5      5        ME     GMH     U$ )a  
We may need to split dense tensor to one or more blocks and put
them equally onto parameter server. One block is a sub-tensor
aligned by dim[0] of the tensor.

We need to have a minimal block size so that the calculations in
the parameter server side can gain better performance. By default
minimum block size 8K elements (maybe 16bit or 32bit or 64bit).

Args:
    var_list (list): List of variables.
    slice_count (int): Numel of count that variables will be sliced, which
        could be the pserver services' count.
    min_block_size (int): Minimum split block size.
Returns:
    blocks (list[(varname, block_id, current_block_size)]): A list
        of VarBlocks. Each VarBlock specifies a shard of the var.
c                 
    X-  $ r(   r   xys     r&   <lambda> slice_variable.<locals>.<lambda>x   s    r%   r   r   r   c                 
    X-  $ r(   r   rE   s     r&   rH   rI      s    qur%   N)r   shapeintmathfloorfloatceillenrangeminr.   nameappendstr)var_listslice_countmin_block_sizeblocksvarsplit_count	var_numelmax_pserver_count
block_sizedim1remainsblock_idcurr_block_sizeblocks                 r&   slice_variablere   b   s-   & F!-syy!<	

9u^7L+L MN! !*+K9u[/A#ABC
syy>Q,ciimQ?D 'G!|Wn,
$))Ij0A$ABCk*H!()@AO SXXxAEMM#e*% +% 0 Mr%   c                       \ rS rSrSrSrSrSrSrSr	Sr
SrSrSrSrSrSrSrS	rSrS
rSrS r\S 5       r\R2                  S 5       r\S 5       r\R2                  S 5       rSrg)DistributeTranspilerConfig   a  
    :api_attr: Static Graph

A configuration class that provide support for transpiler distributed jobs.
Some important parameters are explained as follows:


.. py:attribute:: slice_var_up (bool)

      Whether to do Tensor slice for parameter servers, default is True.

.. py:attribute:: split_method (PSDispatcher)

      Methods of dispatching parameters for server,
      `RoundRobin` or
      `HashName` (both from `paddle.incubate.distributed.fleet.parameter_server.ir.ps_dispatcher`) can be used and default is RoundRobin.
      Try to choose the best method to balance loads for parameter servers.

.. py:attribute:: min_block_size (int)

      Minimum number of split elements in block, default is 8192.

      According to : https://github.com/PaddlePaddle/Paddle/issues/8638#issuecomment-369912156
      We can use bandwidth efficiently when data size is larger than 2MB.If you
      want to change it, please be sure you have read the slice_variable function. You can find
      the definition of slice_variable in
      https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/transpiler/distribute_transpiler.py
      .

Examples:
    .. code-block:: python

        >>> from paddle.distributed.transpiler.distribute_transpiler import RoundRobin
        >>> import paddle.distributed.transpiler as transpiler

        >>> config = transpiler.DistributeTranspilerConfig()
        >>> config.slice_var_up = True
        >>> config.split_method = RoundRobin
        >>> config.min_block_size = 81920

TN    Fpserverd   r   r   c                     g r(   r   r:   s    r&   r6   #DistributeTranspilerConfig.__init__   s    r%   c                     U R                   $ r(   )4_DistributeTranspilerConfig__runtime_split_send_recvr:   s    r&   runtime_split_send_recv2DistributeTranspilerConfig.runtime_split_send_recv   s    ---r%   c                 r    Uc  [        S5      eU(       a  U R                  (       a  [        S5      eXl        g )Nz%runtime_split_send_recv can't be Nonezeif you want to set runtime_split_send_recv to be true, make ensure config.sync_mode is false at first)
ValueError&_DistributeTranspilerConfig__sync_modero   r5   values     r&   rp   rq      s6    =DEET%%w  */&r%   c                     U R                   $ r(   )rt   r:   s    r&   	sync_mode$DistributeTranspilerConfig.sync_mode   s    r%   c                 r    Uc  [        S5      eU(       a  U R                  (       a  [        S5      eXl        g )Nzsync_mode can't be Nonezeif you want to set sync_mode to be true, make ensure config.runtime_split_send_recv is false at first)rs   ro   rt   ru   s     r&   rx   ry      s6    =677T33w  !r%   )__runtime_split_send_recv__sync_mode)r   r   r   r   __doc__slice_var_upsplit_methodrY   enable_dc_asgdmode	print_log	wait_portro   rt   
half_asynccompletely_not_asyncgeo_sgd_modegeo_sgd_need_push_numsnccl_comm_numuse_hierarchical_allreduce#hierarchical_allreduce_inter_nrankscollective_moder6   propertyrp   setterrx   r$   r   r%   r&   rg   rg      s    (T LLNNDII %K J  L M "'*+' O . . ##/ $/     ! !r%   rg   c                       \ rS rSrS rSrg)ServerRuntimeConfig   c                     [        [        R                  " SS5      5      U l        [        [        R                  " SS5      5      U l        [        [        R                  " SS5      5      U l        g )NFLAGS_rpc_send_thread_num12FLAGS_rpc_get_thread_numFLAGS_rpc_prefetch_thread_num)rL   osgetenv_rpc_send_thread_num_rpc_get_thread_num_rpc_prefetch_thread_numr:   s    r&   r6   ServerRuntimeConfig.__init__   sZ    $'II148%
! $'II0$7$
  ),II5t<)
%r%   )r   r   r   N)r   r   r   r   r6   r$   r   r%   r&   r   r      s    	
r%   r   c                      \ rS rSrSrS6S jrS6S jr  S7S jr   S8S jrS r	S	 r
S
 r      S9S jrS rS rS rS:S jrS rS rS r S;S jrS rS rS rS rS rS rS rS rS r S<S jrS:S jr\ S 5       r!S  r"S! r#S" r$S# r%S$ r&S% r'S& r(S' r)S( r*S) r+S* r,S+ r-S, r.S- r/S. r0S/ r1S0 r2S1 r3S2 r4S3 r5S4 r6S5r7g)=DistributeTranspileri  a  
    :api_attr: Static Graph

**DistributeTranspiler**

Convert the base program to distributed data-parallelism programs.
Supports two modes: parameter server(pserver) mode and nccl2 mode.

In pserver mode, the main_program will be transformed to use a remote
parameter server to do parameter optimization. And the optimization
graph will be put into a parameter server program.

In nccl2 mode, the transpiler will append a NCCL_ID broadcasting
op in startup_program to share the NCCL_ID across the job nodes.
After transpile_nccl2 called, you ***must*** pass trainer_id and
num_trainers argument to ParallelExecutor to enable NCCL2 distributed
mode.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:DISTRIBUTED)
        >>> import paddle
        >>> import paddle.base as base
        >>> import paddle.distributed.transpiler as transpiler

        >>> paddle.enable_static()

        >>> x = paddle.static.data(name='x', shape=[1,13], dtype='float32')
        >>> y = paddle.static.data(name='y', shape=[1], dtype='float32')
        >>> y_predict = paddle.static.nn.fc(x, size=1, activation=None)

        >>> cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
        >>> avg_loss = paddle.mean(cost)

        >>> sgd_optimizer = paddle.optimizer.SGD(learning_rate=0.001)
        >>> sgd_optimizer.minimize(avg_loss)

        >>> # for pserver mode
        >>> pserver_endpoints = "192.168.0.1:6174,192.168.0.2:6174"
        >>> trainer_endpoints = "192.168.0.1:6174,192.168.0.2:6174"
        >>> current_endpoint = "192.168.0.1:6174"
        >>> trainer_id = 0
        >>> trainers = 4
        >>> role = "PSERVER"

        >>> t = transpiler.DistributeTranspiler()
        >>> t.transpile(
        ...         trainer_id, pservers=pserver_endpoints, trainers=trainers)

        >>> if role == "PSERVER":
        ...         pserver_program = t.get_pserver_program(current_endpoint)
        ...         pserver_startup_program = t.get_startup_program(current_endpoint,
        ...                                                     pserver_program)
        ... elif role == "TRAINER":
        ...         trainer_program = t.get_trainer_program()

        >>> # for nccl2 mode
        >>> trainer_num = 2
        >>> trainer_id = 0
        >>> config = transpiler.DistributeTranspilerConfig()
        >>> config.mode = "nccl2"
        >>> trainer_endpoints = "192.168.0.1:6174,192.168.0.2:6174"
        >>> t = transpiler.DistributeTranspiler(config=config)
        >>> t.transpile(trainer_id=trainer_id, trainers=trainer_endpoints, current_endpoint="192.168.0.1:6174")
        >>> exe = paddle.static.ParallelExecutor(
        ...     use_cuda=True,
        ...     loss_name=avg_loss.name,
        ...     num_trainers=trainer_num,
        ...     trainer_id=trainer_id
        ... )

Nc                    Ub  Xl         O[        5       U l         U R                  5         U R                   R                  c  [        U R                   l        U R                   R
                  (       d  U R                   R                  (       a  [        R                  U l	        OFU R                   R                  (       a  [        R                  U l	        O[        R                  U l	        U R                   R                  (       a  SqU R                   R                  S:  d   eU R                   R                  R                   S   ["        :X  d   eS U l        g )NTri   r   )configrg   _set_server_configr   r
   rx   r   r   r    distributed_moderp   r!   r"   r   r)   rY   	__bases__r	   counter_var)r5   r   s     r&   r6   DistributeTranspiler.__init__S  s     K46DK!;;##+'1DKK$;;  DKK$D$D$3$8$8D![[00$3$9$9D!$3$>$>D! ;;  I{{))T111{{''11!4DDDr%   c                 v    Uc  [        5       U l        g [        U[         5      (       a  Xl        g [        S5      e)NzQIn DistributeTranspiler, server_config must be an instance of ServerRuntimeConfig)r   server_config
isinstance	TypeError)r5   r   s     r&   r   'DistributeTranspiler._set_server_configk  s7     !4!6D':;;!.c r%   c                    SSK Jn  U(       d
  [        5       nUS:  Ga"  UR                  S5      nUR	                  U5        US:X  a  U(       a  U" U5        UR                  5       R                  SS[        R                  R                  R                  S9n[        SU R                  R                  5       HG  n	UR                  5       R                  SU	 3S[        R                  R                  R                  S9  MI     U R                  R                  (       a  [        SU R                  R                  5       H  n	UR                  5       R                  S	U	 3S[        R                  R                  R                  S9  UR                  5       R                  S
U	 3S[        R                  R                  R                  S9  M     UR                  5       R                  S0 SU0UR                  S5      UU R                  R                  U R                  R                  U R                  R                   S.S9  U$ [#        S5      e)Nr   wait_server_ready,NCCLIDTrT   persistabletyper   NCCLID_Hierarchical_inter_NCCLID_Hierarchical_exter_NCCLID_gen_nccl_id)trainers
trainer_idr   r   r   r   inputsoutputsattrszmust set trainer_id > 0)5paddle.distributed.fleet.base.private_helper_functionr   r   splitremoveglobal_block
create_varr   VarDescVarTypeRAWrR   r   r   r   	append_opr   rs   )
r5   r   r   current_endpointstartup_programr   r   worker_endpointsnccl_id_varis
             r&   _transpile_nccl2%DistributeTranspiler._transpile_nccl2u  s   	
 57O?'~~c2##$45Q9!"23)668CC4dll6J6J6N6N D K 1dkk778,,.99"1# $--11 :  9 {{55q$++";";<A#002==9!=$(!\\1155 > 
 $002==9!=$(!\\1155 >  = ((*44"!;/ (s 3",%)[[%>%>26++2X2X;?;;;j;j	 5  677r%   c           	      b   SSK Jn  [        U[        5      (       a  UR	                  S5      n	O5[        U[
        5      (       a  Un	OUS:w  a  [        S[        U5      -   5      e[        W	5      S:X  a  US:w  a  [        S5      eUc
  [        5       nUc
  [        5       nS n
US:X  a&  UR                  U R                  R                  5      n
OQUS	:X  a&  UR                  U R                  R                  5      n
O%US:X  a  UR                  5       n
O[        S
U 35      eU
R                  UUUU	UUS9  g )Nr   )
collectiver   single_process_multi_threadzinvalid trainers config: r   z(invalid trainer number in distributed: 1grad_allreduce	local_sgdzinvalid collective_mode: )r   main_programrank	endpointsr   r   )paddle.distributed.transpilerr   r   rV   r   listrs   rQ   r   r   GradAllReducer   r   LocalSGDSingleProcessMultiThread	transpile)r5   r   r   r   r   r   r   r   r   r   
transpilers              r&   _transpile_collective*DistributeTranspiler._transpile_collective  s+    	=h$$ s+I$'' I ==83x=HII 	Na#@@GHH"57O/1L
..#11$++2K2KLJ+#,,T[[-F-FGJ ==#<<>J88IJKK+%- 	 	
r%   c                     / n/ SQnUR                  5       R                   H<  nUR                  U;   d  M  UR                  S5      SL d  M+  UR	                  U5        M>     U$ )N)r   ncer   remote_prefetchT)r   opsr   attrrU   )r5   r   sparse_update_opssparse_update_op_typesops        r&    _get_all_remote_sparse_update_op5DistributeTranspiler._get_all_remote_sparse_update_op  s\    !K++-11B11GG-.$6!((, 2 ! r%   c                    UR                  5        GH  u  p4U R                  U   nUS   nUS   n/ nSn	/ n
[        U R                  5       H  u  pX<R                  ;   a6  U	S:X  a0  UR
                  n	UR                  U5        U
R                  U5        MJ  X<R                  ;   d  M[  XR
                  :X  d  Ml  UR                  U5        U
R                  U5        M     U	[        ;   d  M  UR                  5       R                  nU Vs/ s H  oR                  U5      PM     nnU Vs/ s H2  nUR                  5       R                  UR                  S5      S      PM4     nnUR                  5       R                  US   R                  S5      S      nUS   R                  S5      nU Vs/ s H2  nUR                  5       R                  UR                  S5      S      PM4     nnUS S S2    H"  nUR                  5       R                  U5        M$     S/[!        U5      -  nS/[!        U5      -  n[        UR                  5       R                  5       H  u  p[#        S[!        UR$                  5      5       HM  nUR                  UR$                  U   5      n[        U5       H  u  nnUR&                  U;   d  M  UUU'   M     MO     [#        S[!        UR(                  5      5       HM  nUR                  UR(                  U   5      n[        U5       H  u  nnUR&                  U;   d  M  UUU'   M     MO     M     [+        U5      [-        U5      -
  S:  aF  [-        U5      S-   nUR                  5       R/                  US	UUS
.SU0UUUUU R0                  U	S.S9  O[3        S5      eU
S S S2    H  nU R                  R5                  U5        M      GM     g s  snf s  snf s  snf )Nr   r    IdsWpadding_idxOutdistributed_lookup_tabler   r   Outputs)table_namesheight_sectionsr   r   r   lookup_table_versionindexr   r   r   r   zIsomething wrong with distribute_transpiler, submit a issue is recommended)itemssparse_param_to_height_sections	enumerater   input_arg_namesr   rU   LOOKUP_TABLE_TYPEr   r   r   varsinputr   output
_remove_oprQ   rR   output_namesrT   input_namesrS   max
_insert_opr   rs   pop)r5   programneed_sparse_update_paramsparam_varnamer   r   r   r   r   op_typeused_opsidxr   all_opsop_idxsr   wr   r   inputs_idxsoutputs_idxsr   outsin_idin_varinsout_idout_vardistributed_idxs                                r&   _update_remote_sparse_update_op4DistributeTranspiler._update_remote_sparse_update_op  s    %>$C$C$E M"BBO aI(KCGH$T%;%;< $6$667b= ggGJJrNOOC("&8&88W=OJJrNOOC( = ++!..0447:;s==,s; "! ((*//0BC!   ((*//AS0A!0DE!!fkk-8 "! ((*//		%0@0CD!  
 #4R4=C((*55c: )  "dS[0 "tc'l2()=)=)?)C)CDGC"1c"//&:;!yy);<-6v->ME6%{{d258E 2 .? <
 #1c"..&9: hhr~~a'89/8/AOFG&||s27:V 4 0B ;  E |$s;'771<&)+&6&:O((*55-7'-A6!*G 4+6/>)2+6*.//4; 6  %c  $DbD>C**..s3 *W %F. <s   O'9O ,9O%c                 L    U R                    H  nXR                  ;   d  M    g   gNTF)r   r   )r5   
param_namer   s      r&   $_is_input_of_remote_sparse_update_op9DistributeTranspiler._is_input_of_remote_sparse_update_op@  s&    ((B/// ) r%   c                    SSK Jn  SSKJn	Jn
  Sn[        U[        R                  S9  Uc
  [        5       nUc
  [        5       nX l
        X`l        U R                  R                  5       U l        U R                  R                  S:X  Ga  [!        U["        5      (       d   eUR%                  S5      U R                  l        U R                  R(                  U R                  l        U R                  R,                  U R                  l        U R                  R,                  (       Ga  [1        U R                  R&                  5      nU R                  R2                  S	::  a$  [4        R6                  " 5       U R                  l        UU R                  R2                  :  d!   S
U SU R                  R2                   35       eUU R                  R2                  -  S:X  d"   S
U SU R                  R2                   S35       e[9        U R                  R2                  5      U R                  l        U R=                  UUUUU R                  R>                  S9  gU R                  R                  S:X  a>  U RA                  U R                  RB                  UUUUUU R                  R>                  S9  gX@l"        XPl#        Xl$        UR%                  S5      nXl%        U	" 5       U l&        U RO                  5       u  U l(        U l)        U R                  RU                  U RJ                  5      nU" U R                  5      U l+        U RV                  SLU l,        0 U l-        0 U l.        U RR                   HL  u  nnUR^                  U RZ                  UR^                  '   UR^                  U R\                  UR^                  '   MN     U Ra                  U R                  5      U l1        0 U l2        / U l3        SU R                  l4        U RJ                  U R                  l5        XpR                  l6        U RH                  S:H  U R                  l7        U RV                  (       a  U RV                  OSU R                  l8        U Rs                  5         URu                  5         / n[w        U Rx                  R{                  5       5      nU R                  R|                  (       dR  [~        R                  R                  U R                  R                  5        [~        R                  R                  U5        0 U lD        U GH  u  nnUR                  U5      nU R                  R|                  (       d  [1        U5      S	:X  d   eUn[1        U5      S	:X  a&  US   R^                  nU
" UR                  5       USS9nO[1        U5      S	:  ah  UR                  5       R                  U   nU
" UR                  5       USS9nU R                  R                  (       d  U R                  UUUU5        US	-  nO[        SU5        US   R                  [4        R                  R                  R                  :X  aR  U R\                  U   nU R                  U5      (       a-  U Vs/ s H  nUR                  S   PM     snU Rd                  U'   UR                  5       R                  [        R                  " 5       S9nUU R                  U'   U R                  R                  (       a  UR                  5       R                  U   /nU R                  U5      nU R                  R                  (       a<  U RD                  S	:  a,  U Vs/ s H  nUR^                   SU RH                   3PM      nnO#U Vs/ s H  nUR^                  PM     nnOUn/ n/ nUR                  5       R                  WS	-   SSU0SU0SUSUSU[        [        [        U R\                  U   U/0S9  [        U5       H  u  n nUR                  U5        M     GM     UR                  5       R                  [        R                  " 5       S9n!U RX                  (       aG  UR                  5       R                  [        R                  " 5       S9U R                  U RV                  '   [w        U R                  R                  5       5      n"U RF                  (       Gd  U R                  5       n#[1        U#5      S:  a  U R                  (       a  UR                  5       R                  [        R                  " 5       S9n$U R                  R                  (       a  U R                  R^                  /nO/ n/ nUR                  5       R                  SSU R                  0SU$0SUSUSUSSSS[        [        [        U R                  R^                  U R                  R^                  /0S 9  U"R                  U$5        U RF                  (       a[  / n%UR                  5       R                  S!S[w        U"5      0SU!0S"US#U RH                  S$S[        [        0S 9  U%R                  U!5        O}U R                  R                  (       ab  U R                  R                  (       aG  UR                  5       R                  S!S[w        U"5      0SU!0S"US#U RH                  S$S[        [        0S 9  / n&[        U5       H$  u  n nU&R                  U R                  U   5        M&     URu                  5         UR                  U&5      n[        U5       H}  u  n'n(U R                  U(   S%   R                  U&U'   5        U R                  U(   S&   R                  UU'   5        U RL                  R                  U&U'   R^                  5      n)U(U)ld        M     0 n*/ n+U R                  R{                  5        GH  u  n,n/ n-/ n.U Hh  nU& V/s/ s H  n/U/R^                  PM     sn/R                  UR^                  5      nU-R                  UU   5        U.R                  UR^                  5        Mj     U RF                  (       a  U!n0OU R                  U RZ                  U,      n0U RZ                  U,   n1U1n2U Rx                  U1   n3[1        U35      S	:X  a  U3S   R^                  n2U,U Rd                  ;   a5  U. H%  n4U RL                  R                  U45      n)S'U)lg        M'     U-U.4U*U,'   GM,  / n5U R                  R                  (       a<  UR                  5       R                  U,   n6U Vs/ s H  nUR^                  PM     n5nU6/nU+R                  U5        UR                  5       R                  S(SU0/0SU0SU-S)U5S#U RH                  [        [        [        U,U2/0S 9  GM     U R                  UU*5        U RF                  (       a<  UR                  5       R                  S*SW%0SU+0S"US#U RH                  [        [        0S 9  U R                  R{                  5        H  u  n,n[1        U5      S	::  a  M  UR                  5       R                  U,   n6U,U Rd                  ;  d  MF  U R                  R                  (       a  Mc  UR                  5       R                  S+SU0SU6/0S,S[        [        0S 9  M     U R                  U&US-9  U RX                  (       a"  U R                  X-5        U R                  X-5        U R                  5         U RL                  U R                  lo        gs  snf s  snf s  snf s  sn/f s  snf ).ag  
Transpile the input program to distributed programs with config and arguments.

Args:
    trainer_id (int): id for current trainer worker, if you have
        n workers, the id may range from 0 ~ n-1
    program (Program|None): program to transpile,
        default is paddle.static.default_main_program().
    startup_program (Program|None): startup_program to transpile,
        default is paddle.static.default_startup_program().
    pservers (str): comma separated ip:port string for the pserver
        list.
    trainers (int|str): in pserver mode this is the number of
        trainers, in nccl2 mode this is a string of trainer
        endpoints.
    sync_mode (bool): Do sync training or not, default is True.
    startup_program (Program|None): startup_program to transpile,
        default is paddle.static.default_main_program().
    current_endpoint (str): need pass current endpoint when
        transpile as nccl2 distributed mode. In pserver mode
        this argument is not used.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:DISTRIBUTED)
        >>> t = paddle.distributed.transpiler.DistributeTranspiler()
        >>> t.transpile(
        ...     trainer_id=0,
        ...     pservers="127.0.0.1:7000,127.0.0.1:7001",
        ...     trainers=2,
        ...     sync_mode=False,
        ...     current_endpoint="127.0.0.1:7000")

r   )find_distributed_lookup_table)VarsDistributedfind_op_by_output_argz

API is deprecated since 2.0.0 Please use FleetAPI instead.
WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler

        )fileNnccl2r   r   ztrainers_num:z' < hierarchical_allreduce_inter_nranks:z) mod hierarchical_allreduce_inter_nranks:z != 0)r   r   r   )r   r   r   r   r   r   r   T)reversez6Can not insert the send op by original variable name :rT   	.trainer_sendXr   epmapsectionssend_varnamesr   	merge_adduse_send_handlerFr   send_barrierr   r   r   paramsgradsRemotePrefetchrecvrecv_varnamesfetch_barrierconcataxis)	recv_varseplist)p*paddle.distributed.distribute_lookup_tabler  %paddle.distributed.transpiler.detailsr  r   r*   sysstderrr   r   origin_programr   cloneorigin_startup_programr   r   r   rV   r   _trainers_endpointsr   _nccl_comm_numr   _use_hierarchical_allreducerQ   r   r   get_cuda_device_countrL   $_hierarchical_allreduce_inter_nranksr   r   r   r   trainer_numrx   r   pserver_endpointsvars_overview_get_optimize_passoptimize_opsparams_gradsr   
table_namehas_distributed_lookup_tableparam_name_to_grad_namegrad_name_to_param_namerT   r   r   r   need_delete_optimize_vars_is_distributed
_endpoints_ps_endpoint	_is_chief_distributed_lookup_table_init_splited_varsresetr   grad_var_mappingr   r~   nprandomseedrandom_seedshufflegrad_name_to_send_dummy_outdispatchr   r   rp   _insert_split_opAssertionErrorr   r   r   SELECTED_ROWSr  rK   r   r   generate_control_dev_var_name_get_splited_var_sectionsr   r  RPC_OP_ROLE_ATTR_NAMERPC_OP_ROLE_ATTR_VALUEOP_ROLE_VAR_ATTR_NAMEr   rU   values_get_lr_opsr   r   r   grad_param_mappingparam_grad_ep_mappingget_distributed_var_by_sliceendpointparam_var_mappingr   vtypeextendr  DIST_OP_ROLE_ATTR_VALUE_get_trainer_startup_program&_replace_lookup_table_op_with_prefetch#_split_table_grad_and_add_send_vars_get_distributed_optimizer_vars_parameters_on_pservers)7r5   r   r  pserversr   rx   r   r   r  r  r   err_msgtrainers_numrE  ps_dispatcher	param_vargrad_var	send_varsgrad_var_mapping_itemsgrad_varnamesplited_varsr7  splited_grad_varnamer   orig_varsparse_param_namesplited_vardummy_outputsend_input_varsr)  r[   r*  _send_barrier_out
input_depslr_opsdecay_dummy_outputfetch_barrier_inputr6  r   epdistributed_varr  all_recv_outputsr  epsr   vrecv_dep_inorig_grad_namerecv_op_role_var_namesplited_trainer_gradrJ  r2  
orig_params7                                                          r&   r   DistributeTranspiler.transpileF  sn   Z	
	

 	gCJJ'?*,G"57O%.&*&:&:&@&@&B#;;w&h,,,,6>nnS6ID3151J1JD.66 ; {{555"4#6#6#J#JK;;BBaG224 KKC
 !kkEEF $L>1XY]YdYd  ZI  ZI  YJ  K	F !kkEEF
 $L>1Z[_[f[f  \K  \K  [L  LQ  R LOKKCCL##H !!  /++// "  ;;|+&& $ ; ;%!!1 /$++// '  #"$$NN3/!2,./3/F/F/H,4,001G1GH78K8KL,0OO4,G)')$')$#'#4#4Ix;C==D((8:C..D((7 $5
 "&!F!F"
 02,)+& /3+)-)?)?&+;((,1(<%#DOOD 	5 	! 		 "&d&;&;&A&A&C!D{{''IINN4..::;II45+-(*@&L,"++L9F;;++<(A---#/ < A%'3A';';$-((*,@$ \"Q&"//1667KL-((*,@$ {{::))5, QJEL(
 A##t||';';'I'II$($@$@$N!<<=NOO@LO@L))!,OD889JK #//1<<<<> = L >JD,,\:{{22((*//0DE#  99,G;;338H8H18L $0%#/C 88*Idoo->?#/ " %M
 :F$F#SXXM$FM". "   "--ai_--V#])+A)44\B,,	 .   $L13  % 2a +Af #//1<<88: = 
 ,,$$&11"@@B 2  ,,T__=
 $::AACD
~~~%%'F6{Q4#3#3%,%9%9%;%F%F"@@B &G &" ;;66%)%5%5%:%:$;M$&M$$&00!1!12"$67!2"H'#T*E-/E- ,,11 ,,110	 1 " !!"45>>"$  ",,#T*-. 01!2 $// %)+A		 - 
  &&'78{{22t{{7M7M$$&00'j!12"$45#%6$doo$d-/E		 1 
 		*FAsT44S9: +''	2v&EAr&&r*84;;IaLI&&r*73::9Q<H"00MM!!!O (*O$ ' %'! *.*@*@*F*F*H&M;CK")23A399#((C

6%=)""388, # ~~. #>>00? "99-HN$2!#'#8#8#H '(A-(<Q(?(D(D% D DD"-J**GG& $
 -=O) #. =@;M)-8 ";;66!(!5!5!7!<!<]!KJ9D$E#SXXM$E#-,K ''4$$&00+/"K0'$doo-/E-)10		 1 U +It 	,,W6OP>>  ",,$01 01!2 $//)+A	 - 	 +/*@*@*F*F*H&M;;1$ --/44]CJD$H$HH{{:::((*44% #[1!& 5"A13J	 5  +I  	))If)M,,77 44WP,,.6:6H6H3aO %
 %Gz 4D %Fs   }	$%}};}}c                    SS/n/ nU R                   R                  5       R                   H  nUR                  U;   a7  UR	                  S5      SL a#  UR                  UR                  S5      S   5        UR                  S:X  d  M\  UR                  UR                  S5      S   5        M     U R                  (       a  UR                  U R                  5        [        [        U5      5      $ )Nr   r   	is_sparseTr   r   r   )r<  r   r   r   r   rU   r   rK  rJ  r   set)r5   r   sparse_table_namesr   s       r&   _get_sparse_table_names,DistributeTranspiler._get_sparse_table_names  s    "0%!8%%22488B11GGK(D0"))"((3-*:;ww44"))"((3-*:; 9 ,,%%doo6C*+,,r%   c           
      (   SSK Jn  U GH  nU R                  R                  5       R                  U   n/ nU R                  R                  5       R
                   H%  nX6R                  ;   d  M  UR                  U5        M'     [        U5      nUS:w  a  [        S[        U5      -   5      eUS   nU R                  R                  5       R                  S0 SU0SUR                  S5      0S9  U" U R                  R                  5       U5        GM     g )	Nr   
delete_opsr   z&table init op num should be 1, now is 	fake_initr   rK   r   )r9  r  r   r   r   r   output_arg_namesrU   rQ   rs   rV   r   r   )	r5   r  r  rJ  	table_vartable_param_init_opr   init_op_numtable_init_ops	            r&   _fake_init_sparsetable+DistributeTranspiler._fake_init_sparsetable  s   D,J,,99;@@LI"$**779==!4!44'..r2 > 12Ka <s;?OO  02M  --/99 	* 2 27 ;<	 :  t++88:<OP% -r%   c                 x   SSK Jn  / n/ n/ nU R                   H>  nUR                  UR                  5        UR                  UR                  S5      5        M@     [        [        U5      5      n[        [        U5      5      nU H  nXt;  d  M
  UR                  U5        M     [        [        U5      5      nU(       a  / n	U Hc  n/ n
U R                  R                  5       R                   H%  nXvR                  ;   d  M  U
R                  U5        M'     U	R                  U
5        Me     U" U R                  R                  5       U	5        U H\  nU R                  R                  5       R                  U5      (       d  M3  U R                  R                  5       R                  U5        M^     g U" U R                  R                  5       U R                  5        U H\  nU R                  R                  5       R                  U5      (       d  M3  U R                  R                  5       R                  U5        M^     g )Nr   r  op_role_var)r9  r  rH  rn  r   r   r   r  rU   r   r   r   r  has_var_remove_varr<  )r5   
is_startupr  optimize_varsoptimize_op_role_varsoptimize_need_delete_varsr   r[   rN  init_opsparam_init_ops              r&   _delete_trainer_optimizer.DistributeTranspiler._delete_trainer_optimizer7  s   D "$&!##B  !3!34!(()?@ $ S/0 $S)>%? @ C/)005 ! %)-F)G$H!H0 "..;;=AAB111%,,R0 B . 1 t++88:HE0''446>>sCC((557CCCH 1 t**7794;L;LM0&&335==cBB''446BB3G 1r%   c                    SSK Jn  SSKJn  U R	                  SS9  U R                  5       nU R                  U5        U R                  5       nU" U R                  R                  5       U5        U R	                  SS9  U R                  R                  5         U R                  R                  5         U(       a  U" U R                  5        U R                  $ )a  
Get transpiled trainer side program. The program on trainer side compared with origin program
has following difference:

    - Delete optimizer related op, because parameter updated on Pserver
    - After the op which computed gradient of each parameter, add ``Send_op`` and ``Recv_op``

Args:
    wait_port(bool): Whether to wait for the parameter server to be ready before returning to program,
    default is True

Returns:
    Program: trainer side program.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:DISTRIBUTED)
        >>> import paddle.distributed.transpiler as transpiler
        >>> # this is an example, find available endpoints in your case
        >>> pserver_endpoints = "192.168.0.1:6174,192.168.0.2:6174"
        >>> trainer_id = 0
        >>> trainers = 4

        >>> t = transpiler.DistributeTranspiler()
        >>> t.transpile(trainer_id, trainers=trainers, pservers=pserver_endpoints)
        >>> trainer_program = t.get_trainer_program()

r   r   r  T)r  F)r   r   r9  r  r  r  r  rg  r<  r   r;   r   rE  )r5   r   r   r  r  r  s         r&   get_trainer_program(DistributeTranspiler.get_trainer_program]  s    @	
 	E&&$&7!99;##$67!!#4&&335v>&&%&8##%$$&d445"""r%   c                    U R                   nU R                  5       nU R                  R                  5        GH)  u  pVXT;   a  M  / nU HK  nU V	s/ s H  oR                  PM     sn	R                  UR                  5      n
UR                  X*   5        MM     U H  nUR                  5       R                  UR                  5      (       a  M3  UR                  5       R                  UR                  SUR                  UR                  UR                  UR                  S9  M     UR                  5       R                  SS/ 0SU0SUSU R                  [         ["        0S9nGM,     UR                  5       R                  [$        R&                  " 5       S	9nUR                  5       R                  S
0 SU0SU R(                  SU R                  [         ["        0S9  U R                  R                  5        H  u  pVXT;   a  M  [+        U5      S::  a  M  XSR                  5       R,                  ;   a  UR                  5       R,                  U   nOpU R.                  R                  5       R,                  U   nUR                  5       R                  UUR0                  UR                  UR                  UR                  S9nUR                  5       R                  SSU0SU/0SS0S9  M     U$ s  sn	f )z
Get transpiled trainer side startup program.

Args:
    recv_vars (list): Variable list to recv for current trainer_id
    eplist (list): A list of strings indicating

Returns:
    Program: trainer side startup program.
F)rT   r   r   dtyperK   	lod_levelr1  r'  r   r(  r   r   r$  r3  r   r   rT   r   r   r  rK   r4  r5  r   )r   r  rl  r   rT   r   rU   r   r  r   r   r  rK   r  r   r   rc  rd  r   ra  rE  rQ   r   r<  r   )r5   r6  r7  r   r  r2   r  r  r[   r  r   r   fetch_barrier_outr  origin_param_vars                  r&   rp  1DistributeTranspiler._get_trainer_startup_program  s    .. "99;
 %)$:$:$@$@$B G,C")23A399#((C

6=) # #"//199#((CC,,.99 %))))!mm : 	 # !--/99Ry,S $//)+A	 : 	B- %CB ,88:EE88: F 
 	$$&00 -.T33doo%'=	 	1 		
 %)$:$:$@$@$B G,;1$668===,99;@@I
#'#6#6#C#C#E#J#J$  -99;FF  0 < <)..*00*00 G 
 ((*44[)-qk	 5 ) %C6 I 4s   K(c                 N  ^ ^^*^+ [         R                  R                  S5        [        5       nT R                  R
                  Ul        UR                  T R                  5        / nT R                  T   S    H#  nT R                  UR                  5       U5        M%     T R                  T   S    GHA  nUR                  R                  S5      nUS:  a  UR                  SU nOUR                  nUR                  5       R                  USUR                  UR                  UR                  S9nT R                   (       d+  T R"                  R$                  (       a  T R&                  S	:  at  [)        T R&                  5       HX  nUR                  5       R                  U SU 3S
UR                  UR                  UR                  S9n	UR+                  U	5        MZ     GM0  UR+                  U5        GMD     T R-                  T R.                  5      n
/ n[1        T R.                  5       HG  u  pT R3                  U5      (       d  M  T R5                  TU5      (       d  M6  UR+                  U5        MI     T R"                  R6                  SL a  T R                   S
L d   e/ T l        T R                  T   S    H  n[)        T R&                  5       Hp  nUR                   SU S3nUR                  5       R                  UUR                  UR                  UR                  S9nT R8                  R+                  UU45        Mr     M     / n/ m+UU U+4S jnU*U 4S jm*T R;                  5       n/ nSn[=        U5      S:  ak  UR?                  UR@                  S	-
  5      nUR+                  U5        [1        U5       H!  u  pT RC                  UU5      nT*" UUU5        M#     URD                  n/ nUR@                  S	-
  n[1        U5       GH:  u  nnUR?                  U5      nUR+                  U5        URG                  [H        5      S   nSn[1        T R.                  5       Hc  u  pURG                  [H        5      S	   n URG                  [H        5      S   U:X  d  M;  T RK                  UU TUT R                  5      nU(       d  Mc    O   U(       d  M  [1        T R.                  5       HY  u  pURG                  [H        5      S   U:X  d  M#  UU;  d  M+  [M        SUR                  URN                  U5        U" UUUUU5        M[     GM=     [Q        [S        U5      5      nU(       aD  UR?                  UR@                  S	-
  5      n!UR+                  U!5        U H  n"U" U"U!USU5        M     / n#T RT                  (       a  T RV                  RY                  T5      n$T R[                  U$UUU5      n%UR+                  U%5        T R]                  U$UU%5      n&T R_                  UU%RD                  5      n'T R`                  Ul1        U#Re                  U&5        [=        U5      S:X  aV  [f        Rh                  " S[k        T5      -   S-   5        UR@                  S	-
  nUR?                  U5      n(UR+                  U(5        UTT RV                  RY                  T5      T R&                  T Rl                  UT+UT Rn                  Rp                  T Rn                  Rr                  T Rn                  Rt                  S.n)T RT                  (       a  W'U)S'   T R"                  R6                  (       a  SU)S'   [=        U#5      S:  a  U#U)S'   UR                  5       Rw                  SSU00 U)S9  URy                  5         UT l=        U$ )aK  
Get parameter server side program.The program on pserver side compared with origin program
has following difference:

    - Only the following op is included: optimize-related op and communication-related op
    - NO.0 block only has variable definitions and ``listen_and_serv_op``
    - Every variable which need to be updated has a unique block

Args:
    endpoint (str): current parameter server endpoint.

Returns:
    Program: the program for current parameter server to run.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:DISTRIBUTED)
        >>> import paddle.distributed.transpiler as transpiler
        >>> # this is an example, find available endpoints in your case
        >>> pserver_endpoints = "192.168.0.1:6174,192.168.0.2:6174"
        >>> current_endpoint = "192.168.0.1:6174"
        >>> trainer_id = 0
        >>> trainers = 4

        >>> t = transpiler.DistributeTranspiler()
        >>> t.transpile(
        ...     trainer_id, pservers=pserver_endpoints, trainers=trainers)

        >>> pserver_program = t.get_pserver_program(current_endpoint)

zsget_pserver_program() is deprecated, call get_pserver_programs() to get pserver main and startup in a single call.
r.  r/  r%  r   NTr  r   F_bakrT   r   rK   r  c           	         > TR                  U 5      (       a"  TR                  UU TUTR                  UT5        g X;  a  TR                  X5        g g r(   )_is_optimizer_op_append_pserver_opsr<  _append_pserver_non_opt_ops)r   rd   grad_to_block_id
merged_varr  rk  r5   sparse_grad_to_params        r&   __append_optimize_op__HDistributeTranspiler.get_pserver_program.<locals>.__append_optimize_op__r  sZ     $$R(((($''( !00; "r%   c                   > U R                  S5      (       d  g U R                  S5      nT
R                  R                  UR                  5      n[        U[        5      (       d   eUR                  UR                  5      nUR                   H  nUR                  U5        M     UR                   H  nT
R                  XU5      nT	" XU5        M      U R                  SU5        g )N	sub_block)has_attrr   r<  rd   idr   r   _create_blockr	  r   _clone_variabler   _clone_lr_op	_set_attr)r   r  lr_blockorigin_block_descorigin_blocknew_sub_blockr[   	origin_op	cloned_op__clone_lr_op_sub_block__r5   s            r&   r  KDistributeTranspiler.get_pserver_program.<locals>.__clone_lr_op_sub_block__  s    ;;{++ " 4..445F5I5IJLlE2222 $11(,,?M $((--c2 ) *--	 --giP	))mL . LLm4r%   r   zappend opt op: z	pserver [z] has no optimize block!!)optimize_blocksrk  
pserver_idFaninr   r  r  lr_decay_block_idrpc_get_thread_numrpc_send_thread_numrpc_prefetch_thread_numcheckpoint_block_iddc_asgdprefetch_var_name_to_block_idlisten_and_servr'  r   )>r:  r;  writer   r<  rZ  _copy_dist_param_info_fromri  
_clone_varr   rT   findr   r   r  rK   rx   r   r   rD  rR   rU   _create_ufindrH  r   r  _is_opt_op_on_pserverr   param_bak_listrg  rQ   r  
num_blocksr  r	  r   re  _append_pserver_grad_merge_opsr,   r   r   r  rK  rE  r   _create_table_optimize_block_create_prefetch_block_create_checkpoint_save_blockrJ  rS  rn  loggingwarningrV   r   r   r   r   r   r   _sync_with_cpppserver_program),r5   rk  r  recv_inputsr  suff_idxorig_var_namesingle_trainer_varr   r[   ufindopt_op_on_pserverr  r   pr   param_bak_nametmpvar
global_opsr  r  r  r  lr_decay_blockr  r  pre_block_idxr	  opt_opper_opt_blockoptimize_target_param_namer  grad_varname_for_blockopt_state_blockglb_opr  pserver_indextable_opt_block!lookup_table_var_name_to_block_idr  empty_blockr   r  r  s,   ``                                        @@r&   get_pserver_program(DistributeTranspiler.get_pserver_program  sR   J 	

 C	
 ")&*&9&9&E&E#2243F3FG ++H5h?AOOO88:A> @++H5g>A
 vv{{;/H1} !y 1 ! "1!=!=!?!J!J" VVgggg "K " ;;33$$q("'(8(8"9J)668CC -i
|D$)VVgggg D C  &&s+ #: ""#56E ?P ""4#4#45 t001EA$$R((T-G-G". . "((,	 2 ;;%%->>U***"$D//9(Ct//0A()xy4%@N,99;FF+VVgggg G F ''..6{; 1 D$ 
  "	< 	52 !!#v;?,::**Q.N "">2"6* <<^RP	) + !/ 2 2 '22Q6$%67KC+99-HM""=1)/5J)KA)N& J"4#4#45 *,1F)G)J&GG121512 "&!D!D%. (++"J "z! 6" z&t'8'89EA  56q956j0-GG..&	 /),&" :3 8^  $4 56-;;**Q.O ""?3$&O-=tV % )+%,, 2288BM"???OO ""?3040K0K1- #'"D"D!4!4# 9=O5)001 1$OOc(m+.II ,66:M)77FK"";/
  / 0066x@%% $ 5 5 0$8!2"&"4"4"H"H#'#5#5#J#J'+'9'9'R'R
 ,,+>E'(;;%%#E),-1- 12
 	$$&00"%	 	1 	
 	&&(.r%   c                 H    U R                  U5      nU R                  XS9nX#4$ )a  
Get pserver side main program and startup program for distributed training.
The ``main_program`` returned by this function is consistent with the
return value of the function ``get_pserver_program`` .

Args:
    endpoint (str): current pserver endpoint.

Returns:
    tuple: (main_program, startup_program), of type "Program"

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:DISTRIBUTED)
        >>> import paddle.distributed.transpiler as transpiler
        >>> # this is an example, find available endpoints in your case
        >>> pserver_endpoints = "192.168.0.1:6174,192.168.0.2:6174"
        >>> current_endpoint = "192.168.0.1:6174"
        >>> trainer_id = 0
        >>> trainers = 4

        >>> t = transpiler.DistributeTranspiler()
        >>> t.transpile(
        ...     trainer_id, pservers=pserver_endpoints, trainers=trainers)
        >>> pserver_program, pserver_startup_program = t.get_pserver_programs(current_endpoint)

)r  )r
  get_startup_program)r5   rk  pserver_progpserver_startups       r&   get_pserver_programs)DistributeTranspiler.get_pserver_programs.  s7    : //922 3 
 ,,r%   c                 H  ^ [        5       nU R                  nUR                  Ul        U R                  U   S   mU4S jnUR	                  5       R
                  n[        R                  " 5       nUR                  5        H2  u  pUR	                  5       R                  U
5      nXU
R                  '   M4     UR	                  5       R                   GH+  n[        R                  " 5       nSnUR                  S;  av  UR                   Hf  nU" UR                  U5      S   5      u  nn	U(       a  SnUU   X'   M2  UR                  U5      S   U;   d  ML  SnX|R                  U5      S      X'   Mh     U(       d  M  U R                  X|5      nUR                  S;   a(  UR!                  S[#        US	   R$                  5      5        UR	                  5       R'                  UR                  UUUR)                  5       S
9  GM.     U R*                  R,                  (       a  U R.                   Hw  u  nnUR	                  5       R
                  UR                     nUR	                  5       R
                  UR                     nUR	                  5       R'                  SSU0S	U0S9  My     U$ )a~  
**Deprecated**

Get startup program for current parameter server.
Modify operator input variables if there are variables that
were split to several blocks.

Args:
    endpoint (str): current pserver endpoint.
    pserver_program (Program): deprecated, call get_pserver_program first.
    startup_program (Program): deprecated, should pass startup_program
        when initializing

Returns:
    Program: parameter server side startup program.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:DISTRIBUTED)
        >>> pserver_endpoints = "192.168.0.1:6174,192.168.0.2:6174"
        >>> trainer_endpoints = "192.168.0.1:6174,192.168.0.2:6174"
        >>> current_endpoint = "192.168.0.1:6174"
        >>> trainer_id = 0
        >>> trainers = 4

        >>> t = paddle.distributed.transpiler.DistributeTranspiler()
        >>> t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
        >>> pserver_program = t.get_pserver_program(current_endpoint)
        >>> pserver_startup_program = t.get_startup_program(current_endpoint,
        ...                                                 pserver_program)

r.  c                    > [        T5       H7  u  pUR                  n[        X05      (       d  M#  X:w  d  M*  X2R                  4s  $    S/ 4$ )Nr   )r   rT   rB   rK   )r2   r	  splited_parampnamer.  s       r&   _get_splited_name_and_shapeMDistributeTranspiler.get_startup_program.<locals>._get_splited_name_and_shapez  sJ    &/&7"%**$U449I "5"555 '8 r6Mr%   F)r1  r3  r4  r   T)gaussian_randomfill_constantuniform_randomtruncated_gaussian_randomrK   r   r   assignr'  r   r   r   )r   r   rZ  ri  r   r   collectionsOrderedDictr   r  rT   r   r   r   r   _get_input_map_from_opr  r   rK   r   	all_attrsr   r   r  )r5   rk  r  r   s_progorig_s_progr  pserver_varscreated_var_mapr  r[   r  r   new_outputsop_on_pserverkeynewname
new_inputsr  p_bakstartup_param_varstartup_tmpvarr.  s                         @r&   r  (DistributeTranspiler.get_startup_programQ  sY   H **(44++H5h?	 '335::%113"((*FA((*::3?F(.CHH% +
 **,00B%113K!MwwAA??C!<RYYs^A=N!OJGQ(,+:7+C(3*l:(,+7		#q8I+J( + }!88J
77   LL${5/A/G/G*HI##%//%',,.	 0 5 1@ ;;%% //5$*$7$7$9$>$>qvv$F!!'!4!4!6!;!;EJJ!G##%//!!23"N3 0 	 0 r%   c                    SnSnSnSnU R                  UR                  5      u  pgnU(       d  XSU4$ [        UR                  U5      S   5      nSn	U R                  U   n
Sn[        U
S   R                  5      S:  a  [        S U
S   R                  SS  5      nU
S U  H  nXR                  S   -  n	M     X-  nSnXSU4$ )Nrd   r   Fr   r   c                 
    X-  $ r(   r   rE   s     r&   rH   :DistributeTranspiler._get_slice_var_info.<locals>.<lambda>  s    QUr%   T)_get_varname_partsrT   rL   r   rl  rQ   rK   r   )r5   	slice_varblock_suffix	block_idxr3   is_slicer  
block_namer  	skip_dim0
slice_varsorig_dim1_flattens               r&   _get_slice_var_info(DistributeTranspiler._get_slice_var_info  s    	'+'>'>y~~'N$1..
((6q9:		++M:
z!}""#q( &"JqM$7$7$;! $JY/I++I 0 .F**r%   c                 H   ^  U 4S jnT R                    H  nU" U5        M     g )Nc                   > SSK Jn  / n[        TR                  5       HF  u  p4TR	                  U5      (       d  M  TR                  X5      (       d  M5  UR                  U5        MH     U GH  nS nUR                   H:  nUS:X  d  M  UR                  U5      S   nTR                  R                  X5      n  O   UR                   GH5  nUS;   a  M  TR                  R                  5       R                  UR                  U5      S      n	TR                  UR                  XyR                   UR"                  R                   5      n
XR"                  R                   :X  a}  U" U	R$                  U
U	R&                  U	R                  U	R(                  U	R*                  S9nTR                  R-                  U	UUR.                  UR0                  UR2                  SU S9  GM  TR                  R-                  U	U	SSSSU S9  GM8     GM     g )	Nr   )	VarStructParamr@  GradLearningRateBeta1TensorBeta2TensorrT   rK   r  r   r  r   	Optimizer)
origin_varr3  r6  rb   r3   rm  rk  F)r9  r?  r   rH  r  r  rU   r   r   rF  $get_distributed_var_by_origin_and_epr<  r   r   _get_optimizer_input_shaper   rK   slicerT   r  r  r   add_distributed_varr6  rb   r3   )rk  r?  r  r  r   r   dist_varr(  r  rH  	new_shaper  r5   s               r&   _get_distributed_optimizer_var\DistributeTranspiler._get_distributed_optimizer_vars.<locals>._get_distributed_optimizer_var  s   G ""4#4#45((,,1K1K2 2 &,,R0	 6 ,!--Cg~%+\\#%6q%9
#'#5#5#Z#Z&$  . "--C   !!%!4!4!A!A!C!H!HS)!,"J !% ? ?S*:*:HNN<P<P!I !NN$8$88&/!+"+","2"2!+&0&:&:(2(>(>' **>>'1&1%-%6%6%-%6%6#+??"-%- ?  **>>'1&0%*%&#$"-%- ? K . ,r%   )rE  )r5   rO  r  s   `  r&   rs  4DistributeTranspiler._get_distributed_optimizer_vars  s$    @	D ((B*2. )r%   c           
        ^  T R                   nT R                  (       Ga  U Vs/ s H   oUR                  T R                  :w  d  M  UPM"     nnU Vs/ s H*  nUR                  [	        T R                  5      :w  d  M(  UPM,     nn[        U 4S jU 5       5      T l        T R                  S   nT R                  (       a  [        [        T R                  5      5       Vs/ s H]  nUR                  5       R                  UR                   ST R                   SU 3UR                  UR                  UR                   S9PM_     snT l        X4$ [        [        T R                  5      5       Vs/ s HP  nUR                  5       R                  UR                   SU 3UR                  UR                  UR                   S9PMR     snT l        X4$ s  snf s  snf s  snf s  snf )Nc              3   f   >#    U  H&  nUS    R                   TR                  :X  d  M"  Uv   M(     g7f)r   N)rT   rJ  ).0
param_gradr5   s     r&   	<genexpr>FDistributeTranspiler._update_dist_lookup_table_vars.<locals>.<genexpr>)  s0      )".Ja=%%8 
".s   !1	1r   r%  	.pserver_r  )r<  rK  rT   rJ  r   nexttable_param_gradrx   rR   rQ   rE  r   r   r   r   rK   r  trainer_side_table_grad_list)	r5   
param_list	grad_listrI  r  paramgradtable_grad_varr   s	   `        r&   _update_dist_lookup_table_vars3DistributeTranspiler._update_dist_lookup_table_vars  s   
 %%,,,#-#-%t1N:  
 &%D99doo >> %  
 %) )".) %D!
 "2215N~~ "'s4+A+A'B!C5 "D ((*55 . 3 34Idoo=NiX]W^_+00,22,22	 6  "D51& $$ "'s4+A+A'B!C5 "D ((*55 . 3 34IeWE+00,22,22	 6  "D51 $$E55s%   GG'G<GA$G-AGc                    / n/ n[        5       nU R                   H  u  pE[        U5      [        :X  a  UR                  SL a  M)  UR
                  U;  a,  UR                  U5        UR                  UR
                  5        UR
                  U;  d  Mw  UR                  U5        UR                  UR
                  5        M     U R                  XU R                  5      u  pU R                  R                  (       ai  [        U[        U R                  5      U R                  R                  5      n[        U[        U R                  5      U R                  R                  5      nOB[        USU R                  R                  5      n[        USU R                  R                  5      n[        U5      [        U5      :X  d   eU R                  U R                   U5      U l        U R"                  R%                  5        Hi  u  pU R                   R'                  5       R)                  U5      n
U	 H5  nU R+                  U5      u  pnU R,                  R/                  U
UUUUSS9  M7     Mk     U R                  U R                   UU R0                  S:  S9U l        [4        R6                  " 5       U l        [;        Xg5       Hp  u  pTUR=                  S5      u  nnnUR=                  S5      u  nnnU R"                  U   [?        U5         U R8                  U R2                  U   [?        U5         '   Mr     [4        R6                  " 5       U l         U R                   Vs/ s H#  nU R@                  RC                  U/ / S.05      PM%       ng s  snf )NFr   r@  )rH  r3  rb   r3   r6  rm  )add_trainer_suffixr9   )r.  r/  )"r  rI  r   r   	trainablerT   rU   addra  r   r~   re   rQ   rE  rY   _create_vars_from_blocklistr<  rl  r   r   r[   r;  rF  rL  rD  rV  r  r  rh  zipr   rL   ri  update)r5   r\  r]  param_grad_setr  ggrad_blocksparam_blocks	orig_namer~  r  r  r6  rb   r3   g_nameg_bidr  r@   p_bidr  s                        r&   rT  'DistributeTranspiler._init_splited_varsE  s4    
	%%DAAw)#u(<vv^+!!!$""166*vv^+  #""166* & !% C C4#4#4!

 ;;## )D**+**K
 *D**+**L )1dkk88K *At{{99L ;3|#4444 "&!A!A"
 (,'='='C'C'E#I**779==iHH+-1-E-E.*F ""66')%!%! 7   , (F$ !% @ @#//!3 !A !
 #."9"9";2DA wws|FE1 wws|FE1 &&v.s5z: ##%%f-c%j9 3 &1%<%<%>" ,,	
, &&--rb23N.OP,	
 	
s   *Nc           	      N   SSK Jn  / U l        / U l        / U l        / U l        SnSnU(       Ga_  SnUR                  5       R                  nU GH2  nUR                  [        :X  d  M  U R                  UR                  S5      S   :X  d  M>  UR                  S5      (       d  [        S5      eSnUS:w  a  UO[        U5      R                  U5      nUR                  S	5      nUR!                  S
5      n	UR                  5       R"                  US      n
U R                  R%                  U
5        UR                  5       R"                  U	S      nU R
                  R%                  U5        U" UR                  5       U/5          O   U(       a  GM_  ['        [)        U R*                  5      5       GH.  nUR                  5       R-                  [/        S[/        U5      -   5      U R                  S   R                  U R                  S   R0                  U R                  S   R2                  S9nU R                  R%                  U5        UR                  5       R-                  [/        S[/        U5      -   5      U R
                  S   R                  U R
                  S   R0                  U R
                  S   R2                  S9nU R                  R%                  U5        GM1     UR                  5       R5                  USS	U R                  0S
U R                  0S9  UR                  5       R5                  US-   SSU R                  0S
U R                  0SU0S9  UR                  5       R5                  US-   SU R                  U R                  U R                  S.S
U R
                  0S9  g )Nr   r  r   TFr   is_distributedz[lookup_table_op that lookup an distributed embedding tableshould set is_distributed to truer   r   prefetch_compress_in_tmp_r  prefetch_compress_out_tmp_	split_ids)r   r   r   r   r   prefetchr'  r(  r   r   	merge_ids)r   Rowsr'  )r9  r  all_in_ids_varsall_prefetch_input_varsall_prefetch_output_varsall_out_emb_varsr   r   r   r   rJ  r   r   RuntimeErrorr   r   r   r   rU   rR   rQ   rE  r   rV   rK   r  r  )r5   r  rE  r  lookup_table_op_indexcontinue_search_lookup_table_opr
  r   ids_nameout_nameids_varr  r   r  s                 r&   rq  ;DistributeTranspiler._replace_lookup_table_op_with_prefetch  s    	E  "')$(*% " "*.'-.3+**,00GGG00288C=+;;77#344*@  7;3 1B6 .!']004 *
  "xxH!yy/H%22499(1+FG((//8%22499(1+FG))009 w335t<;  .-D 3t5567E))+664s5zAB))!,11**1-33**1-33	 7 F ((//7**,775E
BC**1-22++A.44++A.44	 8 G ))009 8$ 	))'4//0D889	 	* 	
 	))'!+556D99:* 	* 	
 	))'!+++4422
 D112 	* 		
r%   c                    UR                  5       R                  n[        U R                  5      nU GH  nXER                  ;   d  M  [        U5      R                  U5      nUR                  5       R                  US-   SSUR                  5       R                  U   /0SU R                  0[        [        0S9  UR                  5       R                  US-   SSU R                  0SU R                  (       a  U R                  U R                     /O/ 0S	US
U R                  [        [        [         U R"                  U   U/0S9    g    g )Nr   rw  r   r   r   r   r&  r'  r(  r   )r   r   r   rJ  r  r   r   r  r   r[  rc  ro  rx   r\  r   rd  re  rM  )r5   r  rE  r
  table_grad_namer   op_indexs          r&   rr  8DistributeTranspiler._split_table_grad_and_add_send_vars  sJ    &&(,,'8B"5"55=..r2$$&11"Q,$ 4 4 6 ; ;O LM #D$E$EF02IJ 2  $$&11"Q,!B!BC#~~ "==dooNO!#	  !2$doo-/E- 88I+0	 2 * E r%   c           	         UR                  5       R                  U R                     n/ nUR                  UR                  5      nU R
                  U   nUR                  5       R                  UR                  UR                  UR                  UR                  S9nU R                  U   n	UR                  5       R                  U	R                  U	R                  U	R                  U	R                  S9n
UR                  SXS.SU
0SSSS.S9  UR                  UR                  S	-   [        UR                  5      -   5        U$ )
Nr  lookup_sparse_tabler   r   Tr   )r  rt  r   r   r9   )r   r   rJ  r  r	  r|  r   rT   r   rK   r  r}  r   rU   rV   )r5   r  r  optimize_blockr  r  prefetch_blocktrainer_idspserver_idstrainer_outpserver_outs              r&   r  +DistributeTranspiler._create_prefetch_block/  sO    $00277H	(*%(66~7I7IJ22=A%224??!!!!####	 @ 
 33MB%224??!!!!####	 @ 
 	  &&7K(!"&!	 	! 		
 	&,,s"S););%<<	
 -,r%   c           
      R  ^  UR                  U5      n[        U 4S jT R                   5       5      nT R                  R	                  5       R
                  T R                     n[        [        R                  " UR                  S   [        [        T R                  5      5      -  5      5      n[        UR                  5      n	XS'   UR	                  5       R                  UR                   U	UR"                  [$        R&                  R(                  R*                  SS9n
U
R,                  R/                  [$        R&                  R(                  R*                  5        UR	                  5       R1                  T R                  R	                  5       R
                  [3        T R                  5         5      nUR	                  5       R1                  T R                  R	                  5       R
                  UR5                  S5      S      5      nT R6                  (       a  T R8                  S   n[;        T R<                  5       Vs/ s HS  nUR	                  5       R                  UR                    SU SU 3UR>                  UR                  UR"                  S	9PMU     nnURA                  S
SU0SU/00 S9  OyUR                   nT RB                  U   R                   nURE                  U5      (       d  [G        SU-   S-   UR                   -   5      eUR	                  5       RI                  UU5      nU
/U/U/S.nSU
/0n[J        RL                  " SUR>                  -   5        URA                  SUUS9  URO                  UR                   S-   [Q        URR                  5      -   5        U$ s  snf )Nc              3      >#    U  H=  nS UR                   ;   d  M  UR                  S 5      S   TR                  :X  d  M9  Uv   M?     g7f)r@  r   N)r   r   rJ  )rT  r   r5   s     r&   rV  DDistributeTranspiler._create_table_optimize_block.<locals>.<genexpr>Z  sG      
'"..(  !!$7 B's   A A	Ar   T)rT   rK   r  r   r   rC  r   r%  rX  r  sumr'  r   r   zorigin_grad_var: z
 grad_var:)r@  rB  rC  ParamOutz\distribute lookup table only support sgd optimizer, change it's optimizer to sgd instead of sgdr  r9   )*r  rY  rH  r<  r   r   rJ  rL   rM   rP   rK   rO   rQ   rE  r   r   rT   r  r   r   r   r`  descset_typer  r   r   rx   rZ  rR   rD  r   r   r[  r?   rs   _rename_varr  r  rU   rV   r	  )r5   r  r  r  r  r  table_opt_opr  zero_dimtable_shapery  rz  lr_varr`  r   pserver_side_table_grad_listorigin_grad_namesplited_grad_namer   r   s   `                   r&   r  1DistributeTranspiler._create_table_optimize_blockS  s    *77F  
''
 
  ..;;=BBOO
 II &&q)E#d6L6L2M,NN

 +112!A#002==!&&"((%%33 > 
	 	 4 4 B BC"//1AA,,.33doo.
 !--/??,,.33"">215
 >>!2215N #4#3#34, 5E  ,,.99*//0	%	-Y',,(..(..	 :  5 ) , %%9:
+	 &   (}} $ A A!d  %//0@AA ''("# mm$  '335AA "3H
  [J#H

 	{+j 	
 	!!uVW!M 	 3c/:M:M6N NOe,s   ?AN$c                     UR                  5       R                  SS[        R                  R                  R
                  S9  UR                  U5      nUR                  SSU R                  /00 SS0S9  UR                  $ )	z/
create a new block to handle save checkpoint.
kLookupTablePathTr   saver'  	file_pathnoner   )
r   r   r   r   r   r   r  r   rJ  r	  )r5   r  r  checkpoint_save_blocks       r&   r  2DistributeTranspiler._create_checkpoint_save_block  s    
 	$$&11#%%)) 	2 	
 !0 = =m L''$//*+'	 	( 	
 %(((r%   c           
         [         R                  " 5       n[         R                  " 5       nU HG  nUR                  S5      u  pxn	Xt;  a  / XG'   XG   R                  [	        U5      [	        U	5      45        MI     UR                  5        GH  u  pzUR                  5       R                  U5      n[        U
5      S:X  a  U R                  (       ad  U(       a]  UR                   SU R                   3nUR                  5       R                  X|5        UR                  5       R                  U5      /XW'   O,UR                  5       R                  UR                  5      /XW'   M  / XW'   UR                  nSn[        U5      S:  a  [        S USS S5      n[        U
5       H  u  nnUS   n	X-  nU/n[        U5      S:  a  UR!                  USS 5        SnU R                  (       a  U(       a  U SU SU R                   3nOU SU 3nUR                  5       R#                  US	UR$                  UR&                  US
9nXW   R                  U5        M     UR                  5       R)                  5         GM     U$ )a  
Create vars for each split.
NOTE: only grads need to be named for different trainers, use
      add_trainer_suffix to rename the grad vars.
Args:
    program (ProgramDesc): ProgramDesc which gradients belong.
    block_list (list[(varname, block_id, block_size)]): List of gradient blocks.
    add_trainer_suffix (Bool): Add trainer suffix to new variable's name if set True.
Returns:
    var_mapping (collections.OrderedDict(varname->[new_varname_variable])):A dict mapping
        from original var name to each var split.
r9   r   r%  r   c                 
    X-  $ r(   r   rE   s     r&   rH   BDistributeTranspiler._create_vars_from_blocklist.<locals>.<lambda>  s    r%   Nr   r>   F)rT   r   r  r   rK   )r  r  r   rU   rL   r   r   r[   rQ   rx   rT   r   r  rK   r   r   rn  r   r  r   r  )r5   r  
block_listrd  	block_mapvar_mapping	block_strr2   r3   r4   r   r  new_var_name
orig_shaper:  r   rd   rowssplited_shaper[   s                       r&   rg  0DistributeTranspiler._create_vars_from_blocklist  sY   "  ++-	!--/#I$-OOC$8!GT'%'	"%%s6{CI&>?	 $ (oo/NG++-11':H5zQ>>&8&.mm_Idoo=N#OL((*66wM,,.22<@,K(
  ,,.228==A,K( #%K !J !:!#$*&
12%! &e,5Qx0!%z?a'!((AB8!>>&8")6!Idoo5FG ! '.YfQC#8L**,77% %"..!' 8  $++C0) -*   "113W 0X r%   c           	          UR                  UR                  UR                  UR                  UR                  UR
                  US9$ )NrF  )r   rT   rK   r  r   r  )r5   rd   r[   r   s       r&   r  DistributeTranspiler._clone_var	  s@    ))))mm#   
 	
r%   c                 X    / nU  H!  nUR                  UR                  S   5        M#     U$ )Nr   )rU   rK   )r~  r   r  s      r&   rb  .DistributeTranspiler._get_splited_var_sections"	  s-    A""1771:. r%   c           
      T   U R                  U5      nUR                  [        R                  R                  R
                  :X  as  U R                  UR                     nU R                  U5      (       a  UU R                  U'   UR                  5       R                  US-   SSU0SU0SU[        [        0S9  g UR                  [        R                  R                  R                  :X  a5  UR                  5       R                  US-   SSU0SU0SU[        [        0S9  g [        S	5        g )
Nr   split_selected_rowsr'  r   r   r   split_byrefr)  z<Variable type should be in set [DENSE_TENSOR, SELECTED_ROWS])rb  r   r   r   r   r`  rM  rT   r  r   r   r  rc  ro  DENSE_TENSORr_  )r5   r  r  r   r~  r   r  s          r&   r^  %DistributeTranspiler._insert_split_op)	  s    88F==DLL00>>> $ < <X]] K889JKK# 445FG   "--ai*X-%)+B . 	 ]]dll22???  "--ai"X-)+B . 	 Nr%   c                    US:X  a  US;   a  U$  U$ US:X  a  US:X  a  U$  U$ US:X  a  US;   a  U$  U$ US;   a  US:X  a  U$  U$ US	:X  a  US
;   a  U$  U$ US:X  a  US:X  a  U$  U$ US:X  a  US;   a  U$  U$ US:X  a   U$ [        SU 35      e)zs
Returns the shape for optimizer inputs that need to be reshaped when
Param and Grad is split to multiple servers.
adam)Moment1Moment2adagradMomentadamax)r  InfNorm)momentumlars_momentumVelocityrmsprop)r  
MeanSquaredecayed_adagradftrl)SquaredAccumulatorLinearAccumulatorr  z2Not supported optimizer for distributed training: )rs   )r5   r  varkeyr  param_shapes        r&   rJ  /DistributeTranspiler._get_optimizer_input_shapeL	  s:    f//"" 04 1 	!!"" ". +  .."" /( % 55#"" $"  	!11"" 2  ))!"" "  DD"" E  
  DWIN r%   c                     SnSnSnUR                  S5      nUS:  a  XS-   S  nO[        U5      nUR                  S5      nUS:  a  XS-   U nO[        U5      nUS[        Xe5       nX$U4$ )Nr   r%  r   r   r>   )r  rQ   rS   )r5   r2   r  trainer_part
block_parttrainer_idxblock_indexs          r&   r2  'DistributeTranspiler._get_varname_partsr	  s    
ll;/!"?#45Lg,Kll8,! q;?Jg,KC$AB,66r%   c                 0    U R                  U5      u  n  nU$ r(   )r2  )r5   r2   origr  s       r&   _orig_varname"DistributeTranspiler._orig_varname	  s    ,,W5
ar%   c           
      h   UR                   nUR                  5       nS nU R                  U   S    H5  n	U R                  U	R                  5      U R                  U5      :X  d  M3  U	n  O   U(       d  g U R                  UR                  5      u  pnU(       a  SR                  X/5      nOU
nUR                  U   nUR                  UR                  S-   [        UR                  5      -   5        U R                  (       d+  U R                  R                  (       a  U R                  S:  a  / n[        U R                  5       H(  nU SU 3nUR                  UR                  U   5        M*     UR!                  SSU0SU00 S	9  UR!                  S
SU0SU0S
S[#        U R                  5      -  0S	9  U$ )Nr/  .r9   r   r%  r  r'  r   r   scaleg      ?)r  r   ri  r  rT   r2  joinr   rU   rV   r	  rx   r   r   rD  rR   r   rO   )r5   r  r  rk  r  r<  r  pserver_block
grad_blockrk  orig_varnamer7  trainer_namemerged_var_namer  
vars2merger   per_trainer_names                     r&   r  3DistributeTranspiler._append_pserver_grad_merge_ops	  s    !((,,.
++H5g>A!!!&&)T-?-?&.  
 ?  151H1HOO2
., !hh'ABO*O"''8

# 5N<N<N8O OPNN{{//  1$J4++,&5%6is#C !!-"4"45E"FG - $$Z(
+	 %  $$Z(
+eD,<,<&= =>	 %  r%   c                   ^^ TR                  TR                   S3TR                  TR                  TR                  SS9nTR                  S[
        R                  R                  R                  [
        R                  R                  R                  S/SS9n/ nU R                   H2  u  pxUR                  TR                  :X  d  M!  UR                  U5        M4     TR                  SXeS.S	U0S
9  UU4S jn	U	" 5       n
TR                  STUS.S	U
0S
9  U	" 5       nTR                  SXS.S	U0S
9  U	" 5       nTR                  SXS.S	U0S
9  U	" 5       nTR                  SX<S.S	U0S
9  U$ )Nz
.local_bakFrT   rK   r   r  r   z@TRAINER_ID@r   )rT   r   r  rK   r   ref_by_trainer_id)r'  	TrainerIdr   r  c                     > T R                  [        R                  " S5      TR                  TR                  TR
                  SS9$ )Ntmp_dc_outputFr  )r   r   generaterK   r   r  )rd   ry  s   r&   __create_temp_var__EDistributeTranspiler._append_dc_asgd_ops.<locals>.__create_temp_var__	  sA    ## ))/:oo^^oo! $  r%   elementwise_sub)r'  Yelementwise_mulelementwise_add)r   rT   rK   r   r  r   r   r   r  INT64r  rU   r   )r5   rd   ry  rz  local_param_baktrainer_id_var
ref_inputsr  r+  r  o1o2o3o4s    ``           r&   _append_dc_asgd_ops(DistributeTranspiler._append_dc_asgd_ops	  s   **NN#:.//// + 
 ))%%22,,&&,,# * 
 
++HAvv'!!%( , 	$#AO, 	 	
	 !"""9BK 	 	

 !""+BK 	 	

 !""+BK 	 	
 !""!+BK 	 	

 	r%   c                 N  ^ ^ UR                   nUR                  5       n	[        R                  " 5       n
UU 4S jnT R                  R
                  (       a  U" U5      nT R                  XU5      nUR                   GH  nUS:X  a  T R                  R
                  (       a  WX'   M+  UR                  U5      S   n[        R                  " 5       U;   a-  U	R                  U5      (       a  U	R                  U5      nUX'   M  XjU'   M  US:X  aG  U" U5      nU(       d    g U	R                  UR                  SUR                  UR                   S9nUX'   M  US:X  d  M  UR                  U5      S   nUU	R"                  ;   a&  U	R"                  UR                  U5      S      X'   GM*  UR                  5       R"                  U   nU	R                  UR                  UR$                  UR                  UR                   S9nUX'   GM     UR                   H  nS nUS;   a  M  T R&                  R                  5       R"                  UR                  U5      S      nU
S   nT R)                  UR*                  UUR                   UR                   5      nU	R                  UR                  UR$                  UR                  US9nUX'   M     T R-                  T R&                  R                  5       R"                  U5      nU
S   US	'   UR/                  UR*                  U
UUR1                  5       S
9  U
S   R*                  [        R2                  R4                  R6                  :X  aD  UR9                  [;        U
S   R                  5      S-   [;        U
S   R                  5      -   5        g g )Nc                    > S nTR                   T   S    H5  n[        UR                  U R                  S5      S   5      (       d  M2  Un  U$    U$ )Nr.  r@  r   )ri  rB   rT   r   )r   param_blockr  rk  r5   s      r&   _get_param_blockBDistributeTranspiler._append_pserver_ops.<locals>._get_param_block
  sX    K//9(C$QVVV\\'-B1-EFF"#K	 D r%   rB  r   r@  T)rT   r   r  rK   rC  rA  r  r   r9   )r  r   r  r  r   r   r  r   r   r   kNewGradSuffixr  r[   r   rT   r  rK   r   r   r<  rJ  r   _get_output_map_from_opr   r!  r   r   r`  rU   rV   )r5   r  r   rk  r  r<  r  r  r  r  r*  r  ry  dcr(  r  new_gradr  r  
lr_varnamerH  rN  r[   r   s   `  `                    r&   r  (DistributeTranspiler._append_pserver_ops	  sk    !((,,. ,,.
	 ;;%%(0I)).ZPB%%Cf};;--&(JO (.||C'8';$++-1AA)112BCC#0#4#45E#F*2
*43.v6"&11$)) $%++%++	 2  #)
& $\\#.q1
!3!33&3&8&8c9J19M&NJO!/!<!<!>!C!CJ!OJ*55'__$.$:$:(..(..	 6 F '-JOS &V %%CI   %%22499&,,s:KA:NOC"7+I77S#))Y__I #--XXOOii	 . F %JO- &2 ..,,.33V
 )1
  ""$	 	! 	
 f""dll&:&:&H&HH ''Jv&++,j)../0 Ir%   c                    SnUR                  5        H  u  pEU R                  UR                  5      U R                  UR                  5      :X  d  M?  UR                  R                  S5      S:X  d  M`  U R                  UR                  5      U R                  ;   d+  U R                  UR                  5      U R
                  ;   d  M  Un  U$    U$ )a  
Return pserver side grad/param variable, return None
if the variable is not grad/param, e.g.

    a@GRAD -> a@GRAD.block0
    a@GRAD -> a@GRAD (a is not split)
    fc_0.w_0 -> fc_0.w_0.block_0
    fc_0.w_0 -> fc_0.w_0 (weight is not split)
    _generated_var_123 -> None
Nr%  r   )r   r  rT   r  rM  rL  )r5   r[   var_dictr  r  rk  s         r&   _get_pserver_grad_param_var0DistributeTranspiler._get_pserver_grad_param_varq
  s     
NN$DA!!!&&)T-?-?-II66;;{+r1 **1662778--aff5778 &'
 % r%   c                    U R                  U R                  R                  5       R                  U5      nUR	                  5        HV  u  pV[        U[        5      (       d  U/nU H3  nXqR                  5       R                  ;  d  M"  UR                  U5        M5     MX     U R                  U R                  R                  5       R                  U5      nUR	                  5        HV  u  pV[        U[        5      (       d  U/nU H3  nXqR                  5       R                  ;  d  M"  UR                  U5        M5     MX     UR                  UR                  XHUR                  5       S9$ Nr   )r   r<  r   r   r   r   r   r  r  r   r   r!  )	r5   r  rd   r   r   r(  varlistr[   r   s	            r&   r  !DistributeTranspiler._clone_lr_op
  s+   ,,,,.33R
 #LLNLCgt,,")224999))#.  + ..,,.33R
 $MMOLCgt,,")224999))#.  ,   
 	
r%   c                 >   UR                   nU R                  U R                  R                  5       R                  U5      nUR                  5        H  u  pV[        U[        5      (       d  U/n[        [        U5      5       H  nXg   nU R                  XR                  5       R                  5      n	U	(       a  XU'   M=  UR                  UR                  5       R                  ;  a%  UR                  5       R                  U5      n
XU'   M  UR                  5       R                  UR                     Xg'   M     XdU'   M     U R                  U R                  R                  5       R                  U5      nUR                  5        H  u  pV[        U[        5      (       d  U/n[        [        U5      5       H  nXg   nU R                  XR                  5       R                  5      n	U	(       a  XU'   M=  UR                  UR                  5       R                  ;  a%  UR                  5       R                  U5      n
XU'   M  UR                  5       R                  UR                     Xg'   M     XkU'   M     UR                  UR                  UUUR!                  5       S9$ r  )r  r   r<  r   r   r   r   r   rR   rQ   r  rT   r  r  r   r   r!  )r5   r  r   r  r   r(  r  r   r[   r  r  r   s               r&   r  0DistributeTranspiler._append_pserver_non_opt_ops
  s>    ((,,,,.33V
 #LLNLCgt,,")3w<(j "==--/44
 !+AJXXW%9%9%;%@%@@$113CCCHF!'AJ!(!5!5!7!<!<SXX!FGJ ) "3K# +& ..,,.33V
 $MMOLCgt,,")3w<(j!==--/44
 !+AJXXW%9%9%;%@%@@$113CCCHF!'AJ!(!5!5!7!<!<SXX!FGJ ) #CL ," ''""$	 ( 
 	
r%   c                 6   [        UR                  R                  5       5      [        UR                  R                  5       5      -  (       dL  [        UR                  R                  5       5      [        UR                  R                  5       5      -  (       a  ggr  )r  r  r  r   )r5   op1op2s      r&   _is_op_connected%DistributeTranspiler._is_op_connected
  sg     sxx((*+cHH$$&/
 
))+,s3883L3L3N/OOr%   c                     SSK Jn  U" U5      n[        [        U5      5       HP  n[        U[        U5      5       H4  nX   nX   nU R	                  Xg5      (       d  M#  UR                  Xg5        M6     MR     U$ )Nr   	UnionFind)r9  r  rR   rQ   r  union)r5   rH  r  r  r   jr  r  s           r&   r  "DistributeTranspiler._create_ufind
  sk    C,'s<()A1c,/0"o"o((22KK)	 1 * r%   c                 F    SUR                   ;   a  SUR                   ;   a  gg)Nr@  rC  TF)r   )r5   r   s     r&   r  %DistributeTranspiler._is_optimizer_op
  s    bnn$2>>)Ir%   c                    U R                   U   S    Vs/ s H  o3R                  PM     nnUR                  S5      S   U;   a  gU H0  nUR                  S5      S   n[        XV5      (       d  M)  XV:w  d  M0    g   gs  snf )Nr.  r@  r   TF)ri  rT   r   rB   )r5   rk  r   r  param_namesnr^  s          r&   r  *DistributeTranspiler._is_opt_op_on_pserver
  s     66x@J
JqFFJ 	 
 88GQ;. )!,$Q..1: ! 
s   A<c                     [         R                  " 5       nUR                   HL  n/ nUR                  U5       H  nUR	                  X   5        M     [        U5      S:X  a	  US   X4'   MH  XSU'   MN     U$ )z8Returns a dict from op input name to the vars in varmap.r   r   )r  r  r   r   rU   rQ   r5   varmapr   iomapr(  r   r2   s          r&   r   +DistributeTranspiler._get_input_map_from_op  sg    '')>>CD88C=FO, )4yA~!!W
!c
 " r%   c                     [         R                  " 5       nUR                   HL  n/ nUR                  U5       H  nUR	                  X   5        M     [        U5      S:X  a	  US   X4'   MH  XSU'   MN     U$ )z9Returns a dict from op output name to the vars in varmap.r   r   )r  r  r   r   rU   rQ   r(  s          r&   r  ,DistributeTranspiler._get_output_map_from_op  sg    '')??CD99S>FO, *4yA~!!W
!c
 # r%   c                 b   / nU R                   R                  5       n[        UR                  5       GHs  u  p4[	        UR                  [        5      5      nU[	        [        5      :X  d%  U[	        [        5      [	        [        5      -  :X  d  M\  U R                  SL Ga  UR                  S:X  Ga  U R                  U R                   R                  5       R                  U5      nU R                  U R                   R                  5       R                  U5      nU H  nXx   n	M	     [        U R                  5       V
s/ s He  n
U R                   R                  5       R!                  W	R"                   SU
 3U	R                  U	R$                  U	R&                  U	R(                  S9PMg     nn
[        U R*                  R                  5       R                  5       H  u  pUR                  S:X  d  M  UR,                   H  n[/        UR1                  U5      5      S:X  d  M#  UR1                  U5      S   W	R"                  :X  d  MG  U R*                  R                  5       R                  U   R3                  S[5        S	U R                  -
  5      5        M     M     U H  nUR"                  W	R"                   SU R6                   3:X  a  Xl        U R*                  R                  5       R!                  UR"                  UR                  UR&                  UR$                  UR(                  [;        S5      S
9  M     [<        R>                  RA                  5       nURC                  U5        URE                  USSU0UU[        0S9nURG                  U5        [I        SUR                  5        GMv     U$ s  sn
f )NF	incrementr%  )rT   r   rK   r  r   r  r   r   rv   g        )rT   r   r  rK   r   initializerr  r'  r   zappend lr op: )%r<  r   r   r   rL   r   rc  LR_SCHED_OP_ROLE_ATTR_VALUEOPT_OP_ROLE_ATTR_VALUErx   r   r   r   r  rR   rD  r   rT   rK   r  r   r   r   rQ   r   r  rO   r   r   r   r   op_proto_and_checker_makerkOpRoleAttrNamer   r  rU   r,   )r5   r  rd   r   r   role_idr   r   r(  r   id_all_trainer_counter_inputsr   r[   op_role_attr_names                  r&   rg   DistributeTranspiler._get_lr_ops  sB   ##002"599-IE"''"789G#9::g+J*+J, ?, >>U*rww+/E!88++88:??F #::++88:??G  '&-l  ' $))9)9#:	2 $;C ++88:EE$/$4$4#5Yse!D!,!1!1"-"3"3"-"3"3(3(?(? F  $; / 	2 "+,,99;??" 77o5')$'		#$71$<(*		#q(9[=M=M(M$($8$8$E$E$G$K$K()%&&/i(/sT=M=M7M1N'& (7	"  :HH"-"2"2!39T__<MNO 03,,,99;FF!$!$"%))"%))(+(0 G   : 77GGI & $$U+))" #%?@ '02MN * B b!$bgg.A .B g	2s   ;A,N,c                 @   SSK Jn  / n[        5       nU R                   H>  nU R	                  U5      (       d  M  UR                  UR                  S5      S   5        M@     / nU R                  R                  5       nUR                   H3  n[        UR                  5      U-  (       d  M"  UR                  U5        M5     U" UR                  5      nUR                   Hv  nUR                   Hc  n	X:w  d  M
  U R                  X5      (       d  M"  U R	                  U5      (       a  M:  U R	                  U	5      (       a  MR  UR                  X5        Me     Mx     UR                   H6  nU H-  n	UR                  X5      (       d  M  UR                  U5          M4     M8     U$ )Nr   r  rC  )r9  r  r  rH  r  rf  r   r<  r   r   r  rU   r  r  is_connected)
r5   r  r  lr_varsr   find_opsrd   r  r  r  s
             r&   _get_lr_ops_deprecated+DistributeTranspiler._get_lr_ops_deprecatedb  sJ   C%##B$$R((BHH^4Q78 $ ##002))B2&&''11#  %))$99Cyy J--c77 11#66 11#66KK) !  99C%%c//MM#&	    r%   c                    [         R                  n[         R                  R                  R                  nUR	                  5       UR
                  ;   a8  [        UR                  5       UR	                  5          5      [        U5      :X  a  ggr  )r   r3  OpRoleOptimizer4  
attr_namesrL   r!  )r5   r   op_makeroptimize_roles       r&   _is_opt_role_op$DistributeTranspiler._is_opt_role_op  sn     2277>>GG##%63LLN83356<
<  r%   c                    U R                   R                  5       n/ n/ n[        5       nU R                   R                  5       R                  nUR                   GH`  nU R                  U5      (       GaE  [        UR                  5       ;   a  [        UR                  [        5      ;   as  U R                  R                  S:w  aY  U R                  R                  S:w  a?  UR                  S[        [        R                  R                   R"                  5      5        M  UR%                  U5        UR                  [&        5      (       ao  UR                  [&        5      S   nUR                  [&        5      S   nXt;  a7  UR)                  U5        [+        SXx5        UR%                  XW   XX   /5        GMZ  GM]  GM`  GMc     U R-                  5       n	U	(       a  X9-   nX#4$ )z
Get optimizer operators, parameters and gradients from origin_program
Returns:
    opt_ops (list): optimize operators.
    params_grads (dict): parameter->gradient.
r"  r   op_roler   r   zadding param_grad pair: )r<  r   r  r   r   rF  OP_NAME_SCOPEr!  CLIP_OP_NAME_SCOPEr   r   r   r  rL   r   r3  rA  BackwardrU   re  rf  r,   _get_distribute_update_vars)
r5   rd   opt_opsrI  optimize_paramsorigin_var_dictr   r  	grad_namespecial_distribute_update_varss
             r&   rG  'DistributeTranspiler._get_optimize_pass  s    ##002%--::<AA))B##B'' "R\\^3*bggm.DD((G3((L8LL!D;;BBKKL r"77011!#)>!?!BJ "(= >q AI!8'++J76
N$++ / ; / : 9 2 ; @ *.)I)I)K&)'HL$$r%   c                    U R                   R                  5       nU R                   R                  5       R                  n/ nUR                   Hl  nSnXTR	                  5       ;   d  M  UR                  U5      (       d  M2  UR                  U5      R                  S5       H  nUR                  X&   5        M     Mn     [        [        U5      5      n/ nU H  n	UR                  X/5        M     U$ )a  
This Function is used for a special model, like PyramidDnn which has pyramid hash op.
Some Parameters don't use optimizing op to update its value, but updated in its BP process.
In these cases, Transpilse can't find these special vars by optimizing op information.
So we add this function and add attr "distribute_update_vars" to tell transpiler these Parameter
need to be updated in distribute training.
We assume these special var send and receive the same var_name.
distribute_update_varsr   )
r<  r   r   r   r!  r   r   rU   r   r  )
r5   rd   rP  r.  r   special_attrr  unique_paramsrI  r[   s
             r&   rM  0DistributeTranspiler._get_distribute_update_vars  s     ##002--::<AA))B3L||~-77<((&(ggl&;&A&A#&F
o&AB 'G	  S[) C
+ !r%   )"r{  r~  r|  r}  r   r   r   rM  r\  rh  rV  rK  rN  rH  r<  r>  r  ri  rL  rl  rI  rE  r  r   r   r   r   rx   rJ  rZ  r   rD  r[  rF  r(   )NT)NNT)N127.0.0.1:6174r   TNrY  )T)NN)F)8r   r   r   r   r}   r6   r   r   r   r   r  r  r   r  r  r  r  rp  r
  r  r  r;  rs  ra  rT  rq  rr  r  r  r  rg  r  staticmethodrb  r^  rJ  r2  r  r  r  r  r  r  r  r  r  r  r  r   r  rg  r>  rF  rG  rM  r$   r   r%   r&   r   r     si   HT 0 ;8F 0
d	!O4b !)EIN-$Q0$HL3#j^@zx	!-H ?CcL+:D/L)%V[
|`
D(T"-HeN). 7<FP
  !F$L7$4l?Bpd6
22
h
DL%N	1%fr%   r   )9r}   r  r  rM   r   r:  	functoolsr   numpyrW  paddler   paddle.base.frameworkr   paddle.frameworkr   r   r   Cpaddle.incubate.distributed.fleet.parameter_server.ir.ps_dispatcherr	   r
   paddle.nn.initializerr   paddle.staticr   r   r   paddle.utilsr   r   LOOKUP_TABLE_GRAD_TYPErJ  rK  r3  kOpRoleVarAttrNamere  r4  rc  r8  rA  rB  r2  RPCrd  Distro  LRSchedr1  r)   r   r,   r.   rB   re   rg   r   r   r   r%   r&   <module>ri     sJ  "    	 
    / 1 1 + 
 %#%67 -/EF  77JJL ##335 ) 88??HH 88??CC 99@@EE "==DDLL 	 
; ;H,^g! g!T

 

V+ V+r%   