
    Αi                     F   S SK r S SKJr  S SKrS SKJr  S SKJrJr  S SK	J
r
  S SKJrJr  SSK7  \" S	5       " S
 S\5      5       r\" S5       " S S\5      5       r\" S5       " S S\5      5       r\" S5       " S S\5      5       r\" S5       " S S\5      5       r\" S5       " S S\5      5       r\" S5       " S S\5      5       r\" S5       " S S \5      5       r\" S!5       " S" S#\5      5       r\" S$5       " S% S&\5      5       r\" S'5       " S( S)\5      5       rg)*    N)defaultdict)	framework)PassBaseregister_pass)core)	ParameterProgram   )*append_send_ops_passc                   F   ^  \ rS rSrU 4S jrS rS rS rS rS r	Sr
U =r$ )	AppendSendOpsPass   c                 "   > [         TU ]  5         g Nsuper__init__self	__class__s    i/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/distributed/passes/ps_trainer_pass.pyr   AppendSendOpsPass.__init__           c                     gNT r   s    r   _check_selfAppendSendOpsPass._check_self        r   c                     gr   r   r   
other_passs     r   _check_conflict!AppendSendOpsPass._check_conflict#   r"   r   c                    U[         :X  a  / nO-U Vs/ s H   nUR                  5       R                  U   PM"     nn/ n	U[        R                  [        R
                  4;   a0  UR                  5       R                  [        R                  " 5       S9n	UR                  5       R                  SSU0SU	0SU/SUSU[        [        0S9  U	$ s  snf )	N)namesendXOutsend_varnames	is_sparsetable_idtypeinputsoutputsattrs)STEP_COUNTERglobal_blockvarsDistributedModeSYNC
HALF_ASYNC
create_varr   generate_control_dev_var_name	append_opRPC_OP_ROLE_ATTR_NAMERPC_OP_ROLE_ATTR_VALUE)
r   program
union_varsqueuer.   r/   ps_modesend_input_vars	union_vardummy_outputs
             r   _append_send_op!AppendSendOpsPass._append_send_op&   s     L  O ",!+I $$&++I6!+  
 ++_-G-GHH"//1<<<<> = L 	(()L)%YH%'=		 	) 
	
 -s   'Cc                 h    UR                  5       R                  SSU0S/ 0SUSS[        [        0S9  g )Nsend_barrierr+   r,   
trainer_id
half_asyncTr0   )r6   r=   r>   r?   )r   r@   dummysrK   s       r   _append_barrier_op$AppendSendOpsPass._append_barrier_opD   sD    ((=BKjd%'=	 	) 		
r   c                 >   UR                   nUS   n[        XDS   S9n/ nUR                  5        GH$  u  pU	R                  5       (       a  U[        R
                  :w  a  M1  U	R                  5       (       d  U[        R
                  :X  a  M\  U	R                  5       [        US   R                  R                  5      :w  a  M  [        U	R                  5       5      S:  a  M  U	R                  5       (       a  SOSn
U	R                  5       (       a  SOU
n
UR                  U R                  UU	R                  5       UU
U	R!                  5       U5      5        GM'     U[        R"                  [        R$                  4;   a!  ['        US   5      nU R)                  XU5        g g )	NrC   is_heter_ps_modesplit_dense_tablelossr      r
   
role_maker)_attrsget_the_one_send_contextitemsr.   r8   GEO
program_ididblockr@   lenremote_sparse_idsis_distributedappendrG   origin_varnamesr/   r9   r:   get_role_idrN   )r   main_programstartup_programpass_ctxr4   rC   send_ctxrM   merged_namer*   r.   rK   s               r   _apply_single_impl$AppendSendOpsPass._apply_single_implP   sb   	" ,+=%>
 !)!1K~~G/B/B$BNN$$'_5H5H*H BuV}':':'B'B$CC4))+,q0!^^--1I!0022	IMM$$ ((*MMO	 "2* ++_-G-GHH$U<%89J##L*E Ir   r   )__name__
__module____qualname____firstlineno__r   r    r&   rG   rN   ri   __static_attributes____classcell__r   s   @r   r   r      s(    <

!F !Fr   r   distributed_ops_passc                   L   ^  \ rS rSrU 4S jrS rS rS rS rS r	S r
S	rU =r$ )
DistributedOpsPasst   c                 >   > [         TU ]  5         0 U l        0 U l        g r   )r   r   w_2_table_idemb_sizer   s    r   r   DistributedOpsPass.__init__v   s    r   c                     gr   r   r   s    r   r    DistributedOpsPass._check_self{   r"   r   c                     gr   r   r$   s     r   r&   "DistributedOpsPass._check_conflict~   r"   r   c                 *   US   (       a  g [        U5      S:X  a  g S nS nSnUR                  5        H
  u  pU	S   n
  O   W
R                  S5      (       a  U
R                  S5      nUR	                  S5      n[        U5      S:X  a  US   S:X  a  US   nUS	   nXR                  5       R                  ;   aZ  XR                  5       R                  ;   a=  UR                  5       R                  U   nUR                  5       R                  U   nS
nO[        R                  S5        U(       d  [        S5        UR                  5       R                  S[        R                  R                  R                  SS
S9nUR                  5       R                  SS0 SU0S/UR                   SS.S9  UR                  5       R                  S[        R                  R                  R                  SS
S9nUR                  5       R                  SS0 SU0S/UR                   SS.S9  UR                  5        GH  u  pUR                  5       R"                  nU	 Vs/ s H  oR%                  U5      PM     nnU	 Vs/ s H1  oR                  5       R                  UR'                  S5      S      PM3     nnUR                  5       R                  U	S   R)                  S5      S      nU R*                  U   nU	S   R                  S5      nU	S   R                  S5      nU	S   R,                  nU	 Vs/ s H  oR                  S5      PM     nn[        SU5        U	 Vs/ s H2  nUR                  5       R                  UR'                  S5      S      PM4     nnUS S S2    H"  nUR                  5       R/                  U5        M$     UR                  5       R1                  SUUUUUS.SU0UUUU R2                  U   UUS.S 9  GM     g s  snf s  snf s  snf s  snf )!N
use_ps_gpur   Fentry:   show_click_entryrU   r
   TzGShowClickEntry configured, but cannot find show/click var, will not usez+ShowClickEntry not configured, will not useshow)r)   dtypepersistablestop_gradientfill_constantr,   )shaper   valueindexr1   r2   r3   r4   clkIdsW@GRADpadding_idxr`   slotzdebug zcb slots: zOut@GRADdistributed_push_sparse)r   WOutputsShowsClicksr   )r`   r   r/   size
use_cvm_opslotsr0   )r^   rY   has_attrattrsplitr6   r7   warningswarnprintr;   r   VarDescVarTypeFP32
_insert_opr   opsr   inputoutputrw   r1   
_remove_opr=   rx   )r   _programpush_sparse_opsr4   r   r   r   	use_entryparamr   op_firstr   show_var_nameclick_var_nameall_opsopop_idxsr2   wr/   r   r`   op_typer   r3   idxs                             r   _push_sparse_fuse$DistributedOpsPass._push_sparse_fuse   s   1$	)//1JE1vH 2 W%%MM'*EKK$E5zQ58/A#A %a!&q!%:%:%<%A%AA&*?*?*A*F*FF#00277FD"//166~FC $IMMa ?@((*55ll**//!"	 6 D !!#..$S!ZZ / 
 '')44ll**//!"	 5 C !!#..$S YY / 
 *//1JE++-11G3673R}}R(3G7KNKNR%%',,RXXe_Q-?@3   %%',,SV]]8-DQ-GHA((/Ha&++m4K V[[)9:N!fkkG/23sWWV_sE3%u- B %%',,RXXj-A!-DE  
 tt}%%'2237 % !!#--.!&!! #G,&4#. ( MM%0"," . / 27 4s   P48P*P9Pc                 B   S nUS   (       a0  / n/ n/ n/ n	/ n
[        UR                  5       R                  5      S-   nUR                  5        GH  u  pUR                  5       R                  nSnUS   (       a  US   R	                  S5      nU Vs/ s H2  nUR                  5       R
                  UR                  S5      S      PM4     nnUR                  5       R
                  US   R                  S	5      S      nUR                  S   U R                  U'   US
   UR                     nSnUR                  5        H,  u  nnUUR                  5       ;   d  M  UR                  5       nM.     US:X  a  [        S5      eUU R                  U'   US   R	                  S5      nUS   R	                  S5      nUS   R                  nU Vs/ s H2  nUR                  5       R
                  UR                  S5      S      PM4     nnU" UUU5        U Vs/ s H  nUR!                  U5      PM     nnUS S S2    H"  nUR                  5       R#                  U5        M$     S/[        U5      -  n[        UR                  5       R                  5      S-   /[        U5      -  n[%        UR                  5       R                  5       GH  u  nn['        S[        UR(                  5      5       HZ  nUR                  UR(                  U   5      n [%        U5       H*  u  n!n"U"R                  U ;   d  M  [+        UUU!   5      UU!'   M,     M\     ['        S[        UR,                  5      5       HZ  nUR                  UR,                  U   5      n#[%        U5       H*  u  n$n%U%R                  U#;   d  M  [/        UUU$   5      UU$'   M,     M\     GM     US   (       a  WR1                  U5        WR1                  U5        WR1                  U5        W	R1                  U5        W
R1                  UR                  S   /[        U5      -  5        [/        / UQWP76 nGM  [/        U5      [+        U5      -
  S:  aX  [+        U5      S:X  a  [/        U5      n&O[+        U5      S-   n&UR                  5       R3                  U&SUUS.SU0UUUUUS.S9  GMw  ['        [        U5      5       H<  nUU   n&UR                  5       R3                  U&SUU   /US.SUU   /0UUUUUS.S9  M>     GM     US   (       a  [        W5      S:  a  [+        W5      S:  a  [        S5      eUR                  5       R3                  WSSU0SW	0W
SSS.S9  [4        R6                  " 5       n' U V(s/ s H  n([9        U(R                  5      PM     n)n(U'R;                  U)5        U
 V*s/ s H  n*U*S-
  PM
     n+n*U'R=                  U+5        g g g s  snf s  snf s  snf s  sn(f ! [         a    [        S5      ef = fs  sn*f )Nc                 x   U R                  5       n[        UR                  5      nSnS/[        UR                  5      -  nS/[        UR                  5      -  n[        UR                  5       GH  u  p[	        S[        U	R
                  5      5       Ha  n
Xh   S:X  a    OXU	R                  U	R
                  U
   5      n[        U5       H'  u  pUR                  U;   d  M  SXh'   [        XX5      n  M_     Mc     [	        S[        U	R                  5      5       Hb  n
Xx   S:X  a    M  U	R                  U	R                  U
   5      n[        U5       H'  u  nnUR                  U;   d  M  SXx'   [        XH5      nM)     Md     GM     [	        [        UR                  5      5       H,  n
Xj   S:X  d  M  Xz   S:X  d  M  [        R                  S5          g    XE:  Ga  / n[	        US-   [        U5      5       H-  n
Xj   S:X  d  M  UR                  UR                  U
   U
45        M/     [        U5       GH  u  p/ n[        5       nUR                  U	S   5        UR!                  U	S   5        SnU[        U5      :  Ga  UU   nUR                  U   n	/ n[	        S[        U	R                  5      5       H2  nU	R                  U	R                  U   5      nUR                  U5        M4     [	        US-
  US-
  S5       H  nUR                  U   nUU;   a  M  Sn[	        S[        UR
                  5      5       Hd  nUR                  UR
                  U   5      n[	        [        U5      5       H"  nUU    H  nUU;   d  M  Sn  O   U(       d  M"    O   U(       d  Md    O   U(       d  M  UU   (       a  [        R                  S5            g UR                  U5        UR!                  UR                  U   5        GM      US-   nU[        U5      :  a  GM  UR#                  5         U H  nUR$                  R'                  U5      nUR)                  UR                  U   R$                  5        UR$                  R+                  US-   US-   5        UUR                  U   l        UR                  R-                  U5      nUR-                  U5      n UR-                  U5      n!UR                  R/                  UU5        UR/                  UU 5        UR/                  UU!5        US-   nGM     GM     UR$                  R1                  5       [        UR                  5      :X  d   e[	        [        UR                  5      5       H;  n
UR$                  R3                  U
5      UR                  U
   R$                  :X  a  M;   e   g g )	Nr   r   rU   zunable to re-arrange dags order to combine distributed embedding ops because a op both needs embedding table's output as input and produces ids as the same embedding table's inputFTzDunable to re-arrange dags order to combine distributed embedding opsr
   )r6   r^   r   	enumeraterangeoutput_namesr   r)   maxinput_namesr   minr   r   ra   setaddsortdescr   	copy_fromr   popinsertop_sizer   )"r@   r2   r3   r6   min_output_indexmax_input_indexinput_indexesoutput_indexesr   r   ioutsin_idin_varinsout_idout_varmove_opsrB   visitedstartpos	op_inputskjop1foundtyr   r   	insert_opinput_stateoutput_states"                                     r   dag_check_up_and_reorderFDistributedOpsPass._pull_sparse_fuse.<locals>.dag_check_up_and_reorder   s   "//1L"<#3#34 OC#l&6&6"77MS3|'7'7#88N$\%5%56q#boo"67A$)Q.99R__Q%78D)26):!;;$.12M..1/.GO!	 *;	 8 q#bnn"56A%*a/((2>>!#45C+4W+="<<3.23N//23C/I, ,>	 7 7( 3|//01 #q(^->!-CMM N  2  1/!3S5GHA$'1, )9)9!)<a(@A I 'x0EAE!eGLLA'KK1&E#e*,#El)--c2$&	!&q#bnn*=!>A"$((2>>!+<"=C%,,S1 "? "'sQw0@10Db!IA"."2"21"5C"g~ ($)E%*1c#2B2B.C%D'*zz#2B2B12E'F).s9~)>A-6q\+,948E,1 .: (-u(- *? $)5$) &E  %u#1!#4$,MM(n%& %+ %Q 'L,<,<Q,? @/ "J0 !&	?  #e*,B JJL!&+00;;<LM|'7'7'>'C'CD$))44UQY	J7;((/4$0$4$4$8$8$?	&3&7&7&>'5'9'9%'@$((//0@)L%,,-={K&--.>M+;a+?( "'Q 1j $((002c,:J:J6KKKKs<#3#345A',,//2l6F6Fq6I6N6NNNN 6w 2r   r   rU    rQ   r   	op_devicer   r   param_name_to_grad_namer   z0can not find suitable sparse table, please checkr   r`   r,   distributed_lookup_table)r   r   r   )r`   r   r/   lookup_table_versionr   r   z,There can't be ops before embedding in gpupspull_gpups_sparseT)r   r`   r.   z<The slot name in gpups Should be able to convert to integer.r   )r^   r6   r   rY   r   r7   r   r   rx   r)   rb   r/   
ValueErrorrw   r1   r   r   r   r   r   r   r   r   r   extendr   r   PSGPUintset_slot_vectorset_slot_dim_vector),r   r   pull_sparse_opsr4   rg   r   gpups_inputs_idxsgpups_outputs_idxsgpups_inputsgpups_outputsgpups_w_sizegpups_min_distributed_idxr   r   r   r   r   r2   r   	grad_namer/   r)   ctxr   r`   r   r3   r   r   inputs_idxsoutputs_idxsr   r   r   r   r   r   r   distributed_idxr   vargpu_slotxgpu_mf_sizess,                                               r   _pull_sparse_fuse$DistributedOpsPass._pull_sparse_fuse   s   ]	O~  "!#LML(+H,A,A,C,G,G(H1(L%)//1JE++-11GI'(FKK4	KNKNR%%',,RXXe_Q-?@3   %%',,SV\\#->q-ABA#$771:DMM% 78@IH%^^-	c 3 3 55"||~H . 2~ F  (0De$a&++m4K V[[)9:N!fkkG MPLOb%%',,RYYu-=a-@AC   %Xvw?3673Rw}}R(3G7tt}%%'2237 % $V,K 5 5 7 ; ;<q@ACLPL$X%:%:%<%@%@ARq#boo"67A99R__Q%78D)26):v!;;$.14S+e:L1MK. *; 8
 q#bnn"56A((2>>!#45C+4W+="<<3.36 #\&%94L0 ,> 7 B \"!((5")),7##F+$$W-##QWWQZL3v;$>?,/ --7-) < 3{#33q8{#r)&)'lO&)+&6&:O%%'22)3#)2&0*8'2$,07%. 3  s;/0A&-ajO))+66-7(.q	{;!*WQZL 9.<+6(04;)2 7  1q 2R 3|#4q#8$%) !OPP!!#../(< .(&*!% /  JJLE5AB\cCM\B
 !!(++78<aAE<L8%%l35 $9I0 8r C  R 
 9s6   9W/
9W4W9X W>3X 
X>X Xc                    0 n0 n0 n0 nSnUR                  5       R                   GH  nUR                  [        R	                  5       ;   a  UR                  S5      SL a  UR                  [        UR                     5      S   n	US   (       a#  US   (       d  XR                  S5      S   S   -  n	XS   ;   a  M  UR                  U	/ 5      nUR                  U5        XcU	'   UR                  U	/ 5      n
U
R                  UR                  S5      S   5        XU	'   UR                  S	:X  d  GM	  SnGM     UR                  5       R                   H  nUR                  [        R	                  5       ;   d  M'  UR                  [        UR                     5      S   n	X;   d  MS  UR                  S5      S   XI   ;   d  Mo  UR                  U	/ 5      nUR                  U5        XeU	'   M     X5U4$ )
NFremote_prefetchTr   rQ   is_fl_ps_moder   local_sparsecvm)
r6   r   r1   SPARSE_OP_TYPE_DICTkeysr   r   getra   SPARSE_GRAD_OP_TYPE_DICT)r   r   r4   r   pull_sparse_idsr   r   r   r   
param_nameidss              r   _get_pull_sparse_ops'DistributedOpsPass._get_pull_sparse_ops  s   
'')--B.3355GG-.$6XX&9"''&BCAF
+,U?5K((5/!"4Q"77J~!66%))*b9

2.1
+%))*b9

288E?1-..1
+ww%!
% .( '')--Bww27799XX&>rww&GHK
1*o.II)--j"=CJJrN25J/ . ;;r   c                     UR                   nU R                  X5      u  nnn[        SR                  US   5      5        [	        XDS   S9nU R                  XXH5        U R                  XXG5        g )Nz,is_heter_ps_mode in distributed_ops_pass {}?rQ   rR   )rW   r  r   formatrX   r   r   )	r   rd   re   rf   r4   r   r   r   rg   s	            r   ri   %DistributedOpsPass._apply_single_impl  s    
 %%l:		
:AA()	

 ,+=%>
 	|eN|ePr   )rx   rw   )rk   rl   rm   rn   r   r    r&   r   r   r  ri   ro   rp   rq   s   @r   rt   rt   t   s2    
l\k4Z%<NQ Qr   rt   delete_optimizer_passc                   F   ^  \ rS rSrU 4S jrS rS rS rS rS r	Sr
U =r$ )	DeleteOptimizesPassi  c                 "   > [         TU ]  5         g r   r   r   s    r   r   DeleteOptimizesPass.__init__  r   r   c                     gr   r   r   s    r   r    DeleteOptimizesPass._check_self  r"   r   c                     gr   r   r$   s     r   r&   #DeleteOptimizesPass._check_conflict  r"   r   c                    / n/ n/ n/ nU H  nUR                  UR                  5        M      U H>  nUR                  UR                  5        UR                  UR                  S5      5        M@     [        [	        U5      5      n[        [	        U5      5      n[        SU SU SU 35        U H"  n	X;   a  M
  X;  d  M  UR                  U	5        M$     [        [	        U5      5      n
[        UR                  5       U5        U
 HH  n	UR                  5       R                  U	5      (       d  M)  UR                  5       R                  U	5        MJ     g )Nop_role_varzremote_optimize_vars: z , remote_optimize_op_role_vars: z, local_optimize_vars: )r   input_arg_namesr   listr   r   ra   
delete_opsr6   has_var_remove_var)r   r   remote_optimize_opslocal_optimize_opslocal_optimize_varsremote_optimize_varsremote_optimize_op_role_varsoptimize_need_delete_varsr   r   need_delete_optimize_varss              r   _delete_optimizer_op_and_vars1DeleteOptimizesPass._delete_optimizer_op_and_vars!  s_    !!')$$&!$B&&r'9'9: % &B ''(:(:;(//0FG &  $$% 
 (,,-(
$ 	$%9$::Z[wZx  yP  Qd  Pe  f	
 (C)6)005	 (
 %)-F)G$H!8((*,?@,C$$&..s33%%'33C8 -r   c           	          US   R                  5       R                  S   nUR                  5       R                  UR                  UR                  UR
                  UR                  UR                  SS9  g )Norigin_main_programlearning_rate_0T)r)   r   r   r1   	lod_levelr   )r6   r7   r;   r)   r   r   r1   r.  )r   rd   r4   lr_vars       r   _add_lr_varDeleteOptimizesPass._add_lr_varE  sm     '(557<<=NO 	 	!!#..,,,,&& 	/ 	
r   c                 6   UR                   n[        U5      n[        XS   5      n[        U5      nUR                  U5        [	        [        U5      [        U5      -
  5      nU R                  XU5        [        US   S5      (       a  U R                  X5        g g )Nremote_sparser,  lr_scheduler)	rW   get_optimize_ops
get_lr_opsr   r  r   r)  hasattrr0  )	r   rd   re   rf   r4   all_optimize_opsr"  lr_opsr#  s	            r   ri   &DeleteOptimizesPass._apply_single_implS  s    +L9.0
 L)""6*! !C(;$<<
 	**/A	
 5./@@\1 Ar   r   )rk   rl   rm   rn   r   r    r&   r)  r0  ri   ro   rp   rq   s   @r   r  r    s'    "9H
2 2r   r  delete_extra_optimizer_passc                   :   ^  \ rS rSrU 4S jrS rS rS rSrU =r	$ )DeleteExtraOptimizerPassif  c                 "   > [         TU ]  5         g r   r   r   s    r   r   !DeleteExtraOptimizerPass.__init__h  r   r   c                     gr   r   r   s    r   r    $DeleteExtraOptimizerPass._check_selfk  r"   r   c                     gr   r   r$   s     r   r&   (DeleteExtraOptimizerPass._check_conflictn  r"   r   c                    UR                   n/ n/ n/ n[        U5      n[        XS   5      n	[        [        U5      [        U	5      -
  5      n
/ nU
 H  nUR	                  UR
                  5        M      U	 H>  nUR	                  UR
                  5        UR	                  UR                  S5      5        M@     [        [        U5      5      n[        [        U5      5      nU H*  nX;   a  M
  SU:X  a  M  X;  d  M  UR                  U5        M,     [        [        U5      5      n/ nU HY  n/ nUR                  5       R                   H%  nXR                  ;   d  M  UR                  U5        M'     UR	                  U5        M[     [        UR                  5       U5        U HH  nUR                  5       R                  U5      (       d  M)  UR                  5       R                  U5        MJ     g )Nr3  r  r-  )rW   r5  r  r   r   r  r   ra   r6   r   output_arg_namesr  r   r!  )r   rd   re   rf   r4   r%  r&  r'  r8  r"  r#  r$  r   r   r(  init_opsparam_init_ops                    r   ri   +DeleteExtraOptimizerPass._apply_single_implq  s   !')$$&!+L9.0
 " !C(;$<<
 !$B&&r'9'9: % &B ''(:(:;(//0FG &  $C(<$=>'+C0L,M'N$'C) C'6)005 ( %)-F)G$H!,CM%22488---!((, 9 OOM* - 	?//18<,C++-55c::,,.::3? -r   r   
rk   rl   rm   rn   r   r    r&   ri   ro   rp   rq   s   @r   r=  r=  f  s    +@ +@r   r=  fake_init_ops_passc                   F   ^  \ rS rSrU 4S jrS rS rS rS rS r	Sr
U =r$ )	FakeInitOpsPassi  c                 "   > [         TU ]  5         g r   r   r   s    r   r   FakeInitOpsPass.__init__  r   r   c                     gr   r   r   s    r   r    FakeInitOpsPass._check_self  r"   r   c                     gr   r   r$   s     r   r&   FakeInitOpsPass._check_conflict  r"   r   c                 j    [        US   S5      n[        US   S5      n[        [        X#-   5      5      $ )Norigin_main_programsTF)get_sparse_tablenamesr  r   )r   r4   dist_varnamessparse_varnamess       r   _get_sparse_table_names'FakeInitOpsPass._get_sparse_table_names  sC    -()4
 0()5
 C7899r   c           
      \   U GH%  nUR                  5       R                  U   n[        U5      R                  S5      S   R	                  5       R                  5       S   US   ;   a  Mf  / nUR                  5       R
                   H%  nXGR                  ;   d  M  UR                  U5        M'     [        U5      nUS:w  a  [        S[        U5      -   5      eUS   n	UR                  5       R                  S0 SU0S	U	R                  S	5      0S
9  [        UR                  5       U5        GM(     g )Nr   r   r   r  rU   z&table init op num should be 1, now is 	fake_initr,   r   r0   )r6   r7   strr   stripr   rE  ra   r^   r   r=   r   r  )
r   re   sparse_table_namesr4   
table_name	table_vartable_param_init_opr   init_op_numtable_init_ops
             r   _fake_init_sparsetable&FakeInitOpsPass._fake_init_sparsetable  s.    -J'446;;JGII$$S)!,224::<R@() "$%22488!4!44'..r2 9 12Ka <s;?OO  02M((*44 	* 2 27 ;<	 5  3357JK/ -r   c                 b    UR                   nU R                  U5      nU R                  X%U5        g r   )rW   rX  rd  )r   rd   re   rf   r4   sparse_tabless         r   ri   "FakeInitOpsPass._apply_single_impl  s+    44U;##OEJr   r   )rk   rl   rm   rn   r   r    r&   rX  rd  ri   ro   rp   rq   s   @r   rL  rL    s)    :L:K Kr   rL  ps_gpu_passc                   L   ^  \ rS rSrU 4S jrS rS rS rS rS r	S r
S	rU =r$ )
	PsGpuPassi  c                 "   > [         TU ]  5         g r   r   r   s    r   r   PsGpuPass.__init__  r   r   c                     gr   r   r   s    r   r    PsGpuPass._check_self  r"   r   c                     gr   r   r$   s     r   r&   PsGpuPass._check_conflict  r"   r   c                 .   Sn[        [        UR                  5       R                  5      5       H  u  p4UR                  S:X  d  M  UnM     UR                  5       R                   GH+  nUR                  S:w  a  UR                  S:w  a  M&  [
        R                  " UR                  [        5       / 5      u  pVU H  nUR                  5       R                  R                  US-   5      nUR                  U5        UR                  [        [        5        [        R                  R!                  UR                  5       U5      n	UR                  5       R                  R#                  US-   U	5        UR                  5       R%                  5         M     GM.     g )Nr   lookup_table_gradpull_box_sparser   rU   )r  r   r6   r   r1   r   get_grad_op_descr   r   r   r   	_set_attrop_role_attr_namebackwardpaddlestaticOperatorr   _sync_with_cpp)
r   r@   insert_indexr   r   grad_op_descop_grad_to_varop_descnew_op_descnew_ops
             r   _add_push_box_sparse_op!PsGpuPass._add_push_box_sparse_op  sG   Ig&:&:&<&@&@ABGCww--" C &&(,,Bww++;N0N+/+@+@,(L (%22499DD 1$ %%g.%%&7B//((*K $$&**11,2BFK$$&557 ( -r   c                 ~   0 n[        [        UR                  5       R                  5      5       H3  u  p4UR                  S:X  d  M  UR                  S5       H  nSX%'   M	     M5     / n/ n/ n[        U5       H  nSUR                  ;  a  M  UR                  S5       Hn  nXR;   d  M
  UR                  UR                  S5      5        UR                   H4  n	U	S:X  a  M  UR                  U	5       H  n
UR                  U
5        M     M6     Mp     M     [        [        U5      5      n[        [        U5      5      nU H  n
X;  d  M
  UR                  U
5        M     [        [        U5      5      nU HH  nUR                  5       R                  U5      (       d  M)  UR                  5       R                  U5        MJ     g )Nrs  r   rU   Paramr  LearningRate)r  r   r6   r   r1   r   r5  r   r   r   ra   r   r   r!  )r   r@   embedding_wr   r   r)   optimize_varsoptimize_op_role_varsr'  key_namer   r(  s               r   _remove_optimizer_varPsGpuPass._remove_optimizer_var  s}   Ig&:&:&<&@&@ABGCww--HHSMD()K% * C
  "$&!"7+Bbnn,)&)001GH$&NN#~5$#%88H#5C)005 $6 %3 *	 , S/0 $S)>%? @ C/)005 ! %)-F)G$H!-D##%--d33$$&2248 .r   c                 n   0 n/ n/ n[        [        UR                  5       R                  5      5       Hq  u  pVUR                  S:X  d  M  UR                  S5       H)  nSX''   UR                  U5        UR                  U5        M+     UR                  S5       H  nSX''   M	     Ms     [        [        UR                  5       R                  5      5       Hk  u  pVUR                  S:X  d  UR                  S:X  a  M'  UR                   H4  nUR                  U5       H  n	X;   d  M
  UR                  U5          M2     M6     Mm     [        [        U5      5      nUR                  SS9  U H"  nUR                  5       R                  U5        M$     U H"  nUR                  5       R                  U5        M$     g )	Nrs  r   rU   r   rt  r   T)reverse)r  r   r6   r   r1   r   ra   r   r   r   r   r   r!  )
r   r@   lookup_table_grad_varremove_op_index
remove_varr   r   r)   r  r   s
             r   $_remove_lookup_table_grad_op_and_var.PsGpuPass._remove_lookup_table_grad_op_and_var  sz    "
Ig&:&:&<&@&@ABGCww--IIh/D23)/#**3/%%d+ 0 HHSMD23)/ * C Ig&:&:&<&@&@ABGCww++rww:M/MNN88H-C3'..s3 . + C s?34T*"C  "--c2 #D  "..t4 r   c                     UR                   nU R                  U5        U R                  U5        U R                  U5        g r   )rW   r  r  r  )r   rd   re   rf   r4   s        r   ri   PsGpuPass._apply_single_impl8  s5    $$\2""<011,?r   r   )rk   rl   rm   rn   r   r    r&   r  r  r  ri   ro   rp   rq   s   @r   rk  rk    s.    8.!9F5:@ @r   rk  ps_transpile_passc                   :   ^  \ rS rSrU 4S jrS rS rS rSrU =r	$ )PsTranspilePassi?  c                 "   > [         TU ]  5         g r   r   r   s    r   r   PsTranspilePass.__init__A  r   r   c                     gr   r   r   s    r   r    PsTranspilePass._check_selfD  r"   r   c                     gr   r   r$   s     r   r&   PsTranspilePass._check_conflictG  r"   r   c           	          UR                   nUS   S:X  a  SSKJn  U" 5       n[        S5        OSSKJn  U" 5       n[        S5        UR                   n[        5       nUR                  UUUS   US	   US
   SS9  g )Nuse_gpu_graphr   r
   )MultiThreadz8ps_transpile_pass use MultiThread for non_gpu_graph mode)SingleProcessMultiThreadzAps_transpile_pass use SingleProcessMultiThread for gpu_graph moderK   trainer_endpointscurrent_endpointF)re   rd   rank	endpointsr  	wait_port)rW   transpiler.collectiver  r   r  get_dist_env	transpile)	r   rd   re   rf   r4   r  r   r  envs	            r   ri   "PsTranspilePass._apply_single_implJ  s    !Q&;ALMH(*AS n	+%\"-. !34 	 	
r   r   rI  rq   s   @r   r  r  ?      
 
r   r  split_heter_worker_ops_passc                   @   ^  \ rS rSrU 4S jrS rS rS rS rSr	U =r
$ )SplitHeterWorkerOpsPassie  c                 "   > [         TU ]  5         g r   r   r   s    r   r    SplitHeterWorkerOpsPass.__init__g  r   r   c                     gr   r   r   s    r   r    #SplitHeterWorkerOpsPass._check_selfj  r"   r   c                     gr   r   r$   s     r   r&   'SplitHeterWorkerOpsPass._check_conflictm  r"   r   c                 l   / n/ n/ n	UR                   S-
  n
US   nUR                  5       R                  5       n[        UR	                  5       5      nXMS-
     S   nXMS-
     S   nUR                  U
5      nUR                  U5        [        U5       H  u  nn[        X1UU5        M     XmS-
     S   S   n[        UXU5        XmS-
     S   S   n[        UXU5        [        UR                  5      nU[        U5      :  a  UR                  U
5      nUR                  U5        [        U5       H  u  nn[        X1UU5        M     XmS-
     S   S   n[        UXU5        XmS-
     S   S   n[        UXU5        [        XUSS9nUR                  US   S	-   [        UR                  5      -   5        OX[        U5       H  u  nn[        X1UU5        M     XmS-
     S   S   n[        UXU5        XmS-
     S   S   n[        UXU5        Un[        XUSS9nUR                  US   S	-   [        UR                  5      -   5        [        UR                  5      nU[        U5      S-
  ::  a  [        UUUUUUU5      n[        UUUUUUUS
5      n[!        UUXmS-
     S   S   5      n	/ n/ n[#        U5      n SUSUS[%        U5      S[        ['        U5      5      S[)        U5      SUS   S[        [*        R,                  " SS5      5      [.        [0        0nUR3                  5       R5                  SS/ 00 US9  g )NrU   rV   forwardrx  entranceexitr1   block_input_var_namer   Fpersistablesmessage_to_block_idoptimize_blocksendpointfanin
pserver_iddistributed_moderC   rpc_exec_thread_numCPU_NUM    heter_listen_and_servr+   r0   )
num_blocks_heter_device_typelowerr   _get_stage_id_create_blockra   r   block_append_opadd_vars_by_var_listr^   r   get_communicate_var_infor\  r   insert_communicate_opadd_send_opget_ps_endpointsget_heter_worker_endpointget_previous_stage_trainersrc   osgetenvr>   r?   r6   r=   )!r   r@   r4   heter_programprogram_block_ops_list	heter_opsblock_var_detailoptimizer_blockgrad_to_block_idsend_grad_var_listpre_block_idxrV   current_devicestage_idheter_block_ops_forwardheter_block_ops_backwardheter_block_r   entrance_vars	exit_varsfirst_op_index_fpheter_block_bpbp_entrance_varsbp_exit_varsbackward_comm_infoforward_comm_infofirst_op_index_bp
static_varstatic_var_bprD   rF   pserver_endpointss!                                    r   _create_heter_program-SplitHeterWorkerOpsPass._create_heter_programp  s   2 %0014<(
#668>>@z//12"8A"F#
 $:Q,#G$
  $11-@{+67EArMKD 8 )A6yA*M]GKP$\29=fE	YL0c011*88GN"">2"#;<2K =  01=jI  ! '. ,qL9*EfML gn ":#3*" ##"#9:n(()* ##;<2RH =  01=jI  ! '+ ,qL9*EfML gk )N4}9
 	45+//"#	
   2 23s+,q00.! J .	
 )\*:6~F
 ,Z8!#31*=S4Z@A+j1i 0!3ryyB'?#@!#9

 	""$..(9	 	/ 	
r   c                 $   UR                   nSn[        X5      u  pgp[        U5      S:X  a  [        R	                  S5        Ung[        U	5      n	[        XiU5      n
[        R                  R                  5       nU R                  UUUU	UU
5        Ung)z
split heter worker program from origin-program
1. find heter op (located on different device)
2. find input&output of every heter-block
3. create heter worker program, add listen&serv op
cpur   zuCurrently running in Heter Parameter Server mode, but no OP running on heterogeneous devices, Please check your code.N)rW   find_heter_opsr^   r   r   union_forward_gradient_opfind_block_jointsry  r   r	   r  )r   rd   re   rf   r4   default_devicer@   r  r  program_block_opsblock_vars_detailr  s               r   ri   *SplitHeterWorkerOpsPass._apply_single_impl  s     3A4
0A y>QMM H #L56GH-	
 ((002""	
 %r   r   )rk   rl   rm   rn   r   r    r&   r  ri   ro   rp   rq   s   @r   r  r  e  s#    ^
B %  %r   r  split_trainer_ops_passc                   R   ^  \ rS rSrU 4S jrS rS rS rS rS r	S r
S	 rS
rU =r$ )SplitTrainerOpsPassi4  c                 "   > [         TU ]  5         g r   r   r   s    r   r   SplitTrainerOpsPass.__init__6  r   r   c                     gr   r   r   s    r   r    SplitTrainerOpsPass._check_self9  r"   r   c                     gr   r   r$   s     r   r&   #SplitTrainerOpsPass._check_conflict<  r"   r   c                 ,   UR                  5       R                  nUS   nSnU H.  n	[        U	5      [        U5      :X  d  M  UR                  U	5      n  O   US:w  d   e[	        UR                  5       U5        / n
US   nUS:X  a  [        U5      nXS   S   S   n
[        XS-   U
5      nUR                  5       R                  USSUR                  5       R                  U
S      0S	/ 0S
SS/ U
QSPS/ SUS   SUS/ S[        U5      [        [        0S9  U
$ )Nr   r   rV   rU   r  r  send_and_recvr+   r,   modesend_var_namemicrobatch_idrecv_var_namemessage_namer  next_endpointsprevious_endpointsrK   r   )r6   r   r\  r   delete_same_opsget_next_stage_trainersr  r   r7   rc   r>   r?   )r   r@   r4   heter_block_indexops_listr  all_opstart_opfirst_op_idxr   entrance_varrV   next_heter_worker_endpoints	comm_infos                 r   _replace_ops_by_communicate_op2SplitTrainerOpsPass._replace_ops_by_communicate_op?  sY    %%'++A;B2w#h-'%||B/  r!!!,,.9<(
!*A**M'+>yIL 1Q.I   "--"$W11388aIJI#%E|%E_%E#R"I.D$E$&A(" +j"9)+A	 . " r   c                    [        US   5       H1  u  p4US   nUS   nUR                  R                  U:X  d  M+  US   U	 M3     [        US   5       H1  u  p4US   nUS   nUR                  R                  U:X  d  M+  US   U	   g    [        US   5       H1  u  p4US   nUS   nUR                  R                  U:X  d  M+  US   U	   g    g )Nmerged_variables_pairsr   rU   merged_dense_pairsmerged_sparse_pairs)r   
merged_varr)   )r   var_namer4   r   pairr   var_grads          r   _remove_var_pair_by_grad,SplitTrainerOpsPass._remove_var_pair_by_gradk  s    $U+C%DEKEq'CAwH""''8323E:	 F %U+?%@AKEq'CAwH""''83./6 B %U+@%ABKEq'CAwH""''83/07 Cr   c                    XC   S   S   XC   S   S   -   n/ n/ n[        U5       He  n[        XR                  5       U5      u  pU	 H@  nUR                  S5      S   nX;   d  M  UR	                  U5        UR	                  U5        MB     Mg     [        [        U5      5      n[        UR                  5       U5        U H  nU R                  X5        M     g )Nr  r  rx  z@GRADr   )	find_send_opfind_op_input_outputr6   r   ra   r  r   r  r#  )r   r@   r4   r  r  r  need_remove_send_opneed_remove_grad_varr   
input_listr  r   origin_var_namegrad_var_names                 r   _remove_trainer_send_op+SplitTrainerOpsPass._remove_trainer_send_op  s     /	:>J1*=nMN 	 !!w'B0--/MJ '"*.."9!"<"2'..r2(//9	 '	 ( #3':#;<7'')+>?1M))-? 2r   c                    / n[        S[        U5      5       H9  nXG   S   XG   S   -   nX`R                  XXxU5      -  nU R                  XXu5        M;     / n	/ n
US   S   n[	        UR                  5       U5        [        X5        [        XX5      nUS   S   S   n[        USUSS9nU
R                  US   S-   [        UR                  5      -   5        U	R                  U5        US	   nS
U
SU	S[        U5      SSS[        U5      SUS   S[        [        R                   " SS5      5      ["        [$        0nUR                  5       R'                  SSS/ 00 US9  g )NrU   r  rx  r   r  r  r  r   rV   r  r  r  r  r  r  rC   r  r  r  r  r+   r   )r   r^   r  r-  r  r6   delete_trainer_useless_varcreate_backward_blockr  ra   r\  r   get_trainer_endpointrc   r   r  r  r>   r?   r   )r   r@   origin_programr4   r  r  r  r  r  r  r  bp_ops_listbackward_blockr  r  rV   s                   r   _create_trainer_program+SplitTrainerOpsPass._create_trainer_program  s   $ 
!&q#.D*E!F&9)D(;JGH  == 1=M J (( 1 "G ,Q/
;,,.<"77.[
 ,A.z::F5A/j
 	56.$$%&	

 	~.<(
!#3, Q+j1i 0!3ryyB'?#@!#9
 	))(9 	* 	
r   c                     UR                   nSn[        X5      u  pgp[        U	5      n	[        XiU5      n
UR	                  5       nU R                  UUUU	U
5        Ung)z
split cpu-trainer program from origin-program
1. find heter op (located on different device)
2. find input&output of every heter-block
3. create cpu-trainer program, add send&recv op
r  N)rW   r  r  r  cloner6  )r   rd   re   rf   r4   default_device_r@   r  default_opsr  r  trainer_programs               r   ri   &SplitTrainerOpsPass._apply_single_impl  ss     =K>
:K 66GH-	
 "--/$$	
 'r   r   )rk   rl   rm   rn   r   r    r&   r  r#  r-  r6  ri   ro   rp   rq   s   @r   r  r  4  s4    *X*@4I
\' 'r   r  set_heter_pipeline_opt_passc                   :   ^  \ rS rSrU 4S jrS rS rS rSrU =r	$ )SetHeterPipelineOptPassi  c                 "   > [         TU ]  5         g r   r   r   s    r   r    SetHeterPipelineOptPass.__init__  r   r   c                     gr   r   r   s    r   r    #SetHeterPipelineOptPass._check_self	  r"   r   c                     gr   r   r$   s     r   r&   'SetHeterPipelineOptPass._check_conflict  r"   r   c                    UR                   nUS   nUS   R                  S   nU[        UR                  5       5      S-
  UR	                  5       SS.Ul        SSUR                  5       [        UR                  5       5      [        UR                  5       5      S-
  [        UR                  5       5      UUUR	                  5       SS.
Ul        g )	NrV   user_defined_strategyaccumulate_stepsrU   )re   pipeline_stageheter_place
is_fl_modeHeterPipelineTrainerHeterSection)
trainerdevice_workertrainersrK   rJ  num_pipeline_stagessection_programnum_microbatchesrK  rL  )	rW   pipeline_configsr   r  _heter_device_heter_pipeline_opt_get_stage_trainers_role_id_get_num_stage)r   rd   re   rf   r4   rV   rT  s          r   ri   *SetHeterPipelineOptPass._apply_single_impl  s    <(
 !89JJ

  /!*":":"<=A%335	/
+ .+"668j1134!*":":"<=A#&z'@'@'B#C+ 0%335,
(r   r   rI  rq   s   @r   r@  r@    r  r   r@  split_fl_ops_passc                      ^  \ rS rSrU 4S jrS rS rS rS rS r	S r
S	 rS
 rS rS rS rS rS rS rS rS rS rSrU =r$ )SplitFlOpsPassi*  c                 Z   > [         TU ]  5         SU l        SU l        SU l        SU l        g )Nzgpu:0zgpu:2zgpu:1zgpu:3)r   r   PART_A_DEVICE_FlAGPART_A_JOINT_OP_DEVICE_FlAGPART_B_DEVICE_FlAGPART_B_JOINT_OP_DEVICE_FlAGr   s    r   r   SplitFlOpsPass.__init__,  s.    ")+2(")+2(r   c                     gr   r   r   s    r   r    SplitFlOpsPass._check_self3  r"   r   c                     gr   r   r$   s     r   r&   SplitFlOpsPass._check_conflict6  r"   r   c                     g r   r   r   s    r   _insert_encrypt_op!SplitFlOpsPass._insert_encrypt_op9      r   c                     g r   r   r   s    r   _insert_decrypt_op!SplitFlOpsPass._insert_decrypt_op<  rl  r   c                     UR                    HK  nUR                   H8  nUR                  [        5      nUS:w  a  UR	                  [        S5        M6  S   M:     MM     g )Nr   )blocksr   r   OP_DEVICE_KEYrv  )r   r@   r]   r   devices        r   _clear_op_device_flag$SplitFlOpsPass._clear_op_device_flag?  sB    ^^Eii/39R<]B/T   $r   c                    / U l         / U l        [        [        5      nU R                  R                  S5      nUR                   H  nUR                  [        5      nX@R                  :X  d  US:X  d  X@R                  :X  a!  US   nU R                   R                  U5        O>X@R                  :X  d  X@R                  :X  a   US   nU R                  R                  U5        UR                  nWR                  5       R                  R!                  5       nUR#                  U5        UR%                  [        U5        M     S H  nX   nUR'                  5         M     U$ )Nr   r   ab)rw  rx  )	partA_ops	partB_opsr   r	   ori_main_programr]   r   r   rr  r`  ra  ra   rb  rc  r   r6   r=   r   rv  r|  )	r   party_program_mapr]   r   rs  r@   r  ap_opkeys	            r   _split_fl_program SplitFlOpsPass._split_fl_programE  s$   '0%%++A.))BWW]+F111R<===+C0%%b)111===+C0%%b)ggG((*//99;EOOG$OOM62% ( C',G""$  ! r   c                    SS SS S3nUR                  USSU R                  0S/ 0S	S
S/ U R                  QSPS/ SUS[        U R                  5      S[        U R                  5      S[        U R                  5      [        [        0S9  g )Nforward_joint_rU   r  r
   @fl_psr  r+   r,   r  r  r	  r
  r  r  r  r  rK   r0   )	r   partA_to_partB_tensorpartA_to_partB_tensor_namer  rV   r  rc   r>   r?   r   r]   r   r  s       r   _insert_partA_communicate_op+SplitFlOpsPass._insert_partA_communicate_opd  s    $QCq62	 334BK	 "44"#"  	 "9OO# %&AOO' k$//:%'= 	 	
r   c                    SS SS S3nUR                  USSU R                  0S/ 0S	S
S/ U R                  QSPS/ SUS[        U R                  5      S[        U R                  5      S[        U R                  5      [        [        0S9  g )Nbackward_joint_r
   r  rU   r  r  r+   r,   r  rx  r	  r
  r  r  r  r  rK   r0   )	r   partB_to_partA_gradpartB_to_partA_grad_namer  rV   r  rc   r>   r?   r  s       r   _insert_partB_communicate_op+SplitFlOpsPass._insert_partB_communicate_op~  s    %aS!F3	 112BK
 "22"#"  	 "9OO# %&AOO' k$//:%'= 	 	
r   c                 X   U GH#  nUR                  [        U5      5      (       a  M%  U R                  R                  [        U5      5      n[	        U[
        5      (       a}  UR                  UR                  UR                  UR                  UR                  UR                  UR                  UR                  UR                  UR                  UR                   S9
nOUR#                  US5      nUR                  Ul        [%        US5      (       d  GM  UR&                  Ul        GM&     g )N)
r)   r   r   r1   r.  r   	trainableoptimize_attrregularizer
error_clipFr`   )_find_var_recursiver\  ori_main_block_var_recursive
isinstancer   create_parameterr)   r   r   r1   r.  r   r  r  r  r  _clone_variabler7  r`   )r   r7   r]   r   
source_vardest_vars         r   _create_var_for_block$SplitFlOpsPass._create_var_for_block  s    C((S22,,;;CHEJ#y)) 11#$**$**#(22",":":(22",":": * 6 6)44 2  !00UC%/%=%=H"z#344*4*C*C'+ r   c                    U[        UR                  5      :  a  UR                  U5      nOUR                  5       n[	        U5       H  u  pVUR
                  R                  5       nUR                  UR
                  5        UR                  [        UR                  [        5      5        UR
                  R                  5       UR
                  R                  5       -   nU R                  X5        M     UR                  5         U$ r   )r^   rq  r]   r  r   r   r=   r   rv  rr  r   r  rE  r  r|  )	r   op_listr@   	block_idx	new_blockr  r   r}  r7   s	            r   _get_block_by_idx SplitFlOpsPass._get_block_by_idx  s    s7>>**i0I--/Iw'EANN,,.EOOBGG$OOM277=+AB77**,rww/G/G/IID&&t7 ( 	  "r   c                     SnUR                    H5  n[        U5      (       a  UR                  [        5      U:X  a  Us  $ US-  nM7     U$ Nr   rU   )r   is_forward_opr   rr  r   r]   flagop_idxr   s        r   _find_joint_forward_op%SplitFlOpsPass._find_joint_forward_op  sE    ))BR  RWW]%;t%C!	 
 r   c                     SnUR                    H5  n[        U5      (       a  UR                  [        5      U:X  a  Us  $ US-  nM7     U$ r  )r   is_backward_opr   rr  r  s        r   _find_joint_backward_op&SplitFlOpsPass._find_joint_backward_op  sE    ))Bb!!bggm&<&D!	 
 r   c                    U R                  X5      nUR                  U   nUR                  R                  5       nU R	                  X5      nUR                  U   nUR                  R                  5       n[        [        U5      [        U5      -
  5      U l        / U l	        U R                   H7  nU R                  R                  U R                  R                  U5      5        M9     g r   )r  r   r   r  r  rE  r  r   r  r  ra   r  r   )r   r]   r  r  r   vars1vars2r   s           r   _get_partB_to_partA_grad'SplitFlOpsPass._get_partB_to_partA_grad  s    --e:YYv''),,U9YYv((*(,SZ#e*-D(E%#% 55H$$++D,?,?,C,CH,MN 6r   c                 b    U R                   n[        X!5      u  p4[        X#5      [        X$5      -   $ r   )r{  find_ops_list_input_outputscreen_persistables)r   
bp_op_listr@   bp_op_inputbp_op_outputs        r   _find_dense_grad_vars$SplitFlOpsPass._find_dense_grad_vars  s<    ''$>%
! #78;N<
 
 	
r   c                 ,   U R                  XR                  5      n/ n[        [        UR                  5      5       H  nUR                  U   nUR                  U5        XB:X  d  M*  UR                  R                  5       S   nUR                  R                  5       U l        U R                  R                  U5      U l          O   U R                  X0R                  S5      nU R                  XrS-   5        [        U5      n[!        U5      n	U R                  X-   U R                  S5      n
SS SS S3nUS-   [#        U
R$                  5      -   nSU/S	U
/S
['        U R(                  5      SSS[+        U R(                  5      SU R,                  S[/        [0        R2                  " SS5      5      [4        [6        0nU
R9                  SSS/ 00 US9  [;        U R<                  5      n[?        X5        U RA                  U5      n[C        U R<                  X5        g )Nr   rU   r  r
   r  r  r   r  r  r  r  r  r  r  r  r  r  r+   r   )"r  ra  r   r^   r   ra   r   rE  r  r  r   r  r  partA_programr  get_bp_op_list#get_distributed_push_sparse_op_listr\  r   r2  rV   rc   rC   r   r  r  r>   r?   r   r&  r{  r  r  r  )r   r]   r  r  r   r   out_namefirst_blockr  push_sparse_op_listsecond_blockblock_input_flagr  r4   send_opsdense_grad_varss                   r   _get_partA_program!SplitFlOpsPass._get_partA_program  s    ,,33
 s599~&A1BNN2{77335a824''2J2J2L/-1-@-@-D-DX-N* ' ,,W6H6H!L))+zB $E*
A%H--,d.@.@!
 -QCq6:+c1C8H8H4II!$4#5~,T__=Q+doo6!3ryyB'?#@!#9	
 	(9 	  	
   5 56(44Z@D))<Ir   c                    U R                  XR                  5      nU R                  XR                  5      nSn/ n/ n/ nUR                   HG  nXB:  a  UR	                  U5        O(XC::  a  UR	                  U5        OUR	                  U5        US-  nMI     U R                  XPR                  S5      n	U R                  X`R                  S5      n
U R                  U
[        U5      5        U R                  XpR                  S5      n
[        U
5      nU R                  U5      n[        U R                  X5        SS SS S3nUS-   [        U
R                  5      -   nSU/S	U
/S
[        U R                   5      S[        [#        U R                   5      5      SSSU R$                  S['        [(        R*                  " SS5      5      [,        [.        0nU	R1                  [        U5      SS/ 00 US9  g )Nr   rU   r  r  r
   r  r   r  r  r  r  r  r  r  r  r  r  r+   r   )r  rc  r  r   ra   r  partB_programr  r^   r  r  r  r{  r\  r   r  rV   r  rC   r   r  r  r>   r?   r   )r   r]   op_idx1op_idx2op_cntop_list1op_list2op_list3r   r  r  r  r  r  r  r4   s                   r   _get_partB_program!SplitFlOpsPass._get_partB_program  s   --33
 ..33
 ))B#"##aKF  ,,X7I7I1M --h8J8JAN)),HF--h8J8JAN#L1
44Z@D))<I ,A3as&9+c1C8H8H4II!$4#5~1$//BS4T__EF!!3ryyB'?#@!#9	
 	h-(9 	 	
r   c                    UR                   nUS   U l        US   U l        US   U l        Xl        UR                  S5      U l        U R                  5       nUS   n[        S-   n[        Xv5        U R                  UR                  5       U R                  5        US   n[        S-   n[        Xx5        U R                  (       d  [        R                  R                  5       U l        U R#                  UR                  5       5        U R                   UR                   S	'   U R%                  U R                   5        ['        U R                   5        g [        R                  R                  5       U l        U R+                  UR                  5       5        U R(                  UR                   S
'   U R%                  U R(                  5        ['        U R(                  5        g )NrV   rC   is_heter_workerr   rw  z6_fl_A_main_program.prototxtrx  z6_fl_B_main_program.prototxtpart_a_main_programpart_b_main_program)rW   rV   rC   	is_part_br{  r]   r  r  ps_log_root_dirdebug_programr  r6   ra  ry  r   r	   r  r  rt  check_programr  r  )	r   rd   re   rf   r4   r|  prog_a
_main_fileprog_bs	            r   ri   !SplitFlOpsPass._apply_single_implT  s   -Y'01 ,*003 224"3'$'EE
j)%%!4#C#C	
 #3'$'EE
j)~~!'!1!1!9!9!;D##F$7$7$9:595G5GHOO12&&t'9'9:$,,-!'!1!1!9!9!;D##F$7$7$9:595G5GHOO12&&t'9'9:$,,-r   )r`  ra  rb  rc  r  r  r{  ry  r  r  r  rz  r  r  r  rC   rV   )rk   rl   rm   rn   r   r    r&   rj  rn  rt  r  r  r  r  r  r  r  r  r  r  r  ri   ro   rp   rq   s   @r   r^  r^  *  sh    3J!>
4
4D0
O
2Jj5
t .  .r   r^  )r  _collectionsr   ry  paddle.baser   #paddle.distributed.passes.pass_baser   r   paddle.frameworkr   paddle.staticr   r	   ps.utils.publicr   rt   r  r=  rL  rk  r  r  r  r@  r^  r   r   r   <module>r     s   
 $  ! G ! ,  %&UF UF 'UFp %&^Q ^Q '^QB &'L2( L2 (L2^ ,-5@x 5@ .5@p #$3Kh 3K %3Kl }e@ e@ e@P "#"
h "
 $"
J ,-K%h K% .K%\ '(L'( L' )L'^ ,-"
h "
 ."
J "#I.X I. $I.r   