
    x-j                        d dl Z d dlmZ d dlZd dlmZ d dlmZmZ d dl	m
Z
 d dlmZmZ ddlT  ed	           G d
 de                      Z ed           G d de                      Z ed           G d de                      Z ed           G d de                      Z ed           G d de                      Z ed           G d de                      Z ed           G d de                      Z ed           G d d e                      Z ed!           G d" d#e                      Z ed$           G d% d&e                      Z ed'           G d( d)e                      ZdS )*    N)defaultdict)	framework)PassBaseregister_pass)core)	ParameterProgram   )*append_send_ops_passc                   <     e Zd Z fdZd Zd Zd Zd Zd Z xZ	S )AppendSendOpsPassc                 H    t                                                       d S Nsuper__init__self	__class__s    i/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/paddle/distributed/passes/ps_trainer_pass.pyr   zAppendSendOpsPass.__init__           c                     dS NT r   s    r   _check_selfzAppendSendOpsPass._check_self        tr   c                     dS r   r   r   
other_passs     r   _check_conflictz!AppendSendOpsPass._check_conflict#   r   r   c                 n   |t           k    rg }nfd|D             }g }|t          j        t          j        fv r9                                                    t          j                              }                                                    dd|id|id|gd|d|t          t          i	           |S )
Nc                 N    g | ]!}                                 j        |         "S r   )global_blockvars).0	union_varprograms     r   
<listcomp>z5AppendSendOpsPass._append_send_op.<locals>.<listcomp>,   s=        $$&&+I6  r   )namesendXOutsend_varnames	is_sparsetable_idtypeinputsoutputsattrs)STEP_COUNTERDistributedModeSYNC
HALF_ASYNCr&   
create_varr   generate_control_dev_var_name	append_opRPC_OP_ROLE_ATTR_NAMERPC_OP_ROLE_ATTR_VALUE)	r   r*   
union_varsqueuer1   r2   ps_modesend_input_varsdummy_outputs	    `       r   _append_send_opz!AppendSendOpsPass._append_send_op&   s     L   OO   !+  O
 +_-GHHH"//11<<<>> =  L 	(()L)%YH%'=		 	) 
	
 
	
 
	
 r   c                     |                                                     dd|idg id|ddt          t          i           d S )Nsend_barrierr.   r/   
trainer_id
half_asyncTr3   )r&   r>   r?   r@   )r   r*   dummysrI   s       r   _append_barrier_opz$AppendSendOpsPass._append_barrier_opD   s[    ((=BKjd%'=	 	) 		
 		
 		
 		
 		
r   c                 l   |j         }|d         }t          ||d                   }g }|                                D ]-\  }}	|	                                r|t          j        k    r+|	                                s|t          j        k    rP|	                                t          |d         j        j	                  k    rt          |	                                          dk    r|	                                rdnd}
|	                                rdn|
}
|                    |                     ||	                                ||
|	                                |                     /|t          j        t          j        fv r.t'          |d                   }|                     |||           d S d S )	NrC   is_heter_ps_modesplit_dense_tablelossr      r
   
role_maker)_attrsget_the_one_send_contextitemsr1   r9   GEO
program_ididblockr*   lenremote_sparse_idsis_distributedappendrF   origin_varnamesr2   r:   r;   get_role_idrL   )r   main_programstartup_programpass_ctxr7   rC   send_ctxrK   merged_namer-   r1   rI   s               r   _apply_single_implz$AppendSendOpsPass._apply_single_implP   s   	" ,U+=%>
 
 
 !)!1!1 	 	K~~ G/B$B$BNN$$ '_5H*H*H  BuV}':'B$C$CCC4))++,,q00!^^--41I!0022A	IMM$$ ((**MMOO 	 	 	 	 +_-GHHH$U<%899J##L&*EEEEE IHr   )
__name__
__module____qualname__r   r   r#   rF   rL   rf   __classcell__r   s   @r   r   r      s                  <

 

 

!F !F !F !F !F !F !Fr   r   distributed_ops_passc                   B     e Zd Z fdZd Zd Zd Zd Zd Zd Z	 xZ
S )DistributedOpsPassc                 d    t                                                       i | _        i | _        d S r   )r   r   w_2_table_idemb_sizer   s    r   r   zDistributedOpsPass.__init__v   s,    r   c                     dS r   r   r   s    r   r   zDistributedOpsPass._check_self{   r   r   c                     dS r   r   r!   s     r   r#   z"DistributedOpsPass._check_conflict~   r   r   c                    |d         rd S t          |          dk    rd S d }d }d}|                                D ]\  }}	|	d         }
 |
                    d          r|
                    d          }|                    d          }t          |          dk    r|d         dk    r|d         }|d	         }|                                j        v r\|                                j        v rA                                j        |         }                                j        |         }d
}nt                              d           |st          d                                           
                    dt          j        j        j        dd
          }                                                    ddi d|idg|j        dd                                           
                    dt          j        j        j        dd
          }                                                    ddi d|idg|j        dd           |                                D ]m\  }}	                                j        fd|	D             }fd|	D             }                                j        |	d                             d          d                  }| j        |         }|	d                             d          }|	d                             d          }|	d         j        }d |	D             }t          d|           fd|	D             }|d d d         D ])}                                                    |           *                                                    d|||||dd|i|||| j        |         ||d !           od S )"N
use_ps_gpur   Fentry:   show_click_entryrR   r
   TzGShowClickEntry configured, but cannot find show/click var, will not usez+ShowClickEntry not configured, will not useshow)r,   dtypepersistablestop_gradientfill_constantr/   )shaper{   valueindexr4   r5   r6   r7   clkc                 :    g | ]}                     |          S r   r   r(   opall_opss     r   r+   z8DistributedOpsPass._push_sparse_fuse.<locals>.<listcomp>   %    777Rw}}R((777r   c                     g | ]:}                                 j        |                    d           d                  ;S Idsr   r&   r'   inputr(   r   _programs     r   r+   z8DistributedOpsPass._push_sparse_fuse.<locals>.<listcomp>   I       EG%%'',RXXe__Q-?@  r   W@GRADpadding_idxr]   c                 8    g | ]}|                     d           S )slot)attr)r(   r   s     r   r+   z8DistributedOpsPass._push_sparse_fuse.<locals>.<listcomp>   s"    333RWWV__333r   zdebug zcb slots: c                     g | ]:}                                 j        |                    d           d                  ;S )zOut@GRADr   r   r   s     r   r+   z8DistributedOpsPass._push_sparse_fuse.<locals>.<listcomp>   sN        %%'',RXXj-A-A!-DE  r   distributed_push_sparse)r   WOutputsShowsClicksr   )r]   r   r2   size
use_cvm_opslotsr3   )r[   rV   has_attrr   splitr&   r'   warningswarnprintr<   r   VarDescVarTypeFP32
_insert_opr{   opsoutputrp   r4   
_remove_opr>   rq   )r   r   push_sparse_opsr7   r   rz   r   	use_entryparamr   op_firstrv   show_var_nameclick_var_nameop_idxsr5   wr2   r   r]   op_typer   r6   idxr   s    `                      @r   _push_sparse_fusez$DistributedOpsPass._push_sparse_fuse   s}    	F1$$F	)//11 	 	JE31vHW%% 	MM'**EKK$$E5zzQ58/A#A#A %a!&q!X%:%:%<%<%AAA&(*?*?*A*A*FFF#00227FD"//116~FC $IIMMa    $	?@@@((**55l*/!"	 6  D !!##..$S!Z  / 
 
 
 ''))44l*/!"	 5  C !!##..$S Y  / 
 
 
 *//11 )	 )	JE3++--1G77773777G   KN  F %%'',SV]]8-D-DQ-GHA(/Ha&++m44K V[[)9::N!fkG33s333E%u---     G
 ttt} 8 8%%''2237777!!##--.!&!!  #G,&4#. ( M%0","  .    /)	 )	r   c                   ) d }|d         r3g }g }g }g }	g }
t                                          j                  dz   }|                                D ]\  }}                                j        )d}|d         r|d                             d          }fd|D             }                                j        |d                             d	          d                  }|j        d         | j        |<   |d
         |j	                 }d}|                                D ]/\  }}||
                                v r|                                }0|dk    rt          d          || j        |<   |d                             d          }|d                             d          }|d         j        }fd|D             } |||           )fd|D             }|d d d         D ])}                                                    |           *dgt          |          z  }t                                          j                  dz   gt          |          z  }t!                                          j                  D ]\  }}t#          dt          |j                            D ]Y}|                    |j        |                   }t!          |          D ]'\  } }!|!j	        |v rt)          |||                    || <   (Zt#          dt          |j                            D ]Y}|                    |j        |                   }"t!          |          D ]'\  }#}$|$j	        |"v rt-          |||#                   ||#<   (Z|d         r|                    |           |                    |           |                    |           |	                    |           |
                    |j        d         gt          |          z             t-          g ||R  }t-          |          t)          |          z
  dk    rnt)          |          dk    rt-          |          }%nt)          |          dz   }%                                                    |%d||dd|i|||||d           ]t#          t          |                    D ]O}||         }%                                                    |%d||         g|dd||         gi|||||d           P|d         rt          |          dk    rt)          |          dk    rt          d                                                              |dd|id|	i|
ddd           t3          j                    }&	 d |D             }'n# t          $ r t          d          w xY w|&                    |'           d |
D             }(|&                    |(           d S d S d S )Nc                    |                                  }t          |j                  }d}dgt          |j                  z  }dgt          |j                  z  }t          |j                  D ]\  }}	t	          dt          |	j                            D ]e}
||         dk    r nV|	                    |	j        |
                   }t          |          D ]%\  }}|j        |v rd||<   t          ||          } n&ft	          dt          |	j	                            D ]c}
||         dk    r nT|	
                    |	j	        |
                   }t          |          D ]#\  }}|j        |v rd||<   t          ||          }$dt	          t          |j                            D ]7}
||
         dk    r)||
         dk    rt                              d            d S 8||k     rg }t	          |dz   t          |                    D ]0}
||
         dk    r"|                    |j        |
         |
f           1t          |          D ]\  }
}	g }t                      }|                    |	d                    |                    |	d                    d}|t          |          k     r||         }|j        |         }	g }t	          dt          |	j	                            D ]7}|	
                    |	j	        |                   }|                    |           8t	          |dz
  |dz
  d          D ]}|j        |         }||v rd}t	          dt          |j                            D ]\}|                    |j        |                   }t	          t          |                    D ]}||         D ]
}||v rd} n|r n|r n]|r[||         rt                              d             d S |                    |           |                    |j        |                    |dz   }|t          |          k     |                                 |D ]}|j                            |          }|                    |j        |         j                   |j                            |dz   |dz              ||j        |         _        |j                            |          }|                    |          } |                    |          }!|j                            ||           |                    ||            |                    ||!           |dz   }|j                                        t          |j                  k    sJ t	          t          |j                            D ]4}
|j                            |
          |j        |
         j        k    sJ 3d S d S )	Nr   r   rR   zunable to re-arrange dags order to combine distributed embedding ops because a op both needs embedding table's output as input and produces ids as the same embedding table's inputFTzDunable to re-arrange dags order to combine distributed embedding opsr
   )r&   r[   r   	enumeraterangeoutput_namesr   r,   maxinput_namesr   minr   r   r^   setaddsortdescr   	copy_fromr   popinsertop_sizer   )"r*   r5   r6   r&   min_output_indexmax_input_indexinput_indexesoutput_indexesr   r   ioutsin_idin_varinsout_idout_varmove_opsrB   visitedstartpos	op_inputskjop1foundtyr   r   	insert_opinput_stateoutput_states"                                     r   dag_check_up_and_reorderzFDistributedOpsPass._pull_sparse_fuse.<locals>.dag_check_up_and_reorder   s   "//11L"<#344 OC#l&6"7"77MS3|'7#8#88N$\%566 J JRq#bo"6"677 " "A$S)Q..99R_Q%788D)26):): " "v!;$..12M#..1/3.G.GO!E /
 q#bn"5"566 J JA%c*a//((2>!#455C+4W+=+= J J"<3..23N3//23CS/I/I,J
 3|/0011   #q((^A->!-C-CMM N   FF/11/!3S5G5GHH B BA$Q'1,, )9!)<a(@AAA&x00 3@ 3@EArE!eeGLLA'''KK1&&&E#e**,,#El)-c2$&	!&q#bn*=*=!>!> 2 2A"$((2>!+<"="=C%,,S1111!&sQw0@10Db!I!I A AA"."21"5C"g~~ ($)E%*1c#2B.C.C%D%D 
* 
*'*zz#2B12E'F'F).s9~~)>)> !. !.A-6q\ %2 %2+,9948E,1E ,5 (- %.(-%.#( !*$)E!*$ A#1!#4 !+$,MM(n%& %& %& %+FFF %Q 'L,<Q,? @ @ @ %	?  #e**,,B JJLLL!& @ @+0;;<LMM|'7'>'CDDD$)44UQY	JJJ7;(/4$0$4$8$8$?$?	&3&7&7&>&>'5'9'9%'@'@$(//0@)LLL%,,-={KKK&--.>MMM+;a+?((@ $(0022c,:J6K6KKKKKs<#34455 O OA',//22l6Fq6I6NNNNNNy 21vO Or   ru   rR    rN   r   	op_devicec                     g | ]:}                                 j        |                    d           d                  ;S r   r   r   s     r   r+   z8DistributedOpsPass._pull_sparse_fuse.<locals>.<listcomp>\  r   r   r   param_name_to_grad_namer   z0can not find suitable sparse table, please checkr   r]   c                     g | ]:}                                 j        |                    d           d                  ;S )r/   r   )r&   r'   r   r   s     r   r+   z8DistributedOpsPass._pull_sparse_fuse.<locals>.<listcomp>t  sK       FH%%'',RYYu-=-=a-@A  r   c                 :    g | ]}                     |          S r   r   r   s     r   r+   z8DistributedOpsPass._pull_sparse_fuse.<locals>.<listcomp>z  r   r   distributed_lookup_table)r   r   r   )r]   r   r2   lookup_table_versionr   r   z,There can't be ops before embedding in gpupspull_gpups_sparser   r/   T)r   r]   r1   c                 6    g | ]}t          |j                  S r   )intr,   )r(   vars     r   r+   z8DistributedOpsPass._pull_sparse_fuse.<locals>.<listcomp>  s     BBBcCMMBBBr   z<The slot name in gpups Should be able to convert to integer.c                     g | ]}|d z
  S )rx   r   )r(   xs     r   r+   z8DistributedOpsPass._pull_sparse_fuse.<locals>.<listcomp>  s    888aAE888r   )r[   r&   r   rV   r   r'   r   r   rq   r,   r_   r2   
ValueErrorrp   r4   r   r   r   r   r   r   r   r   extendr   r   PSGPUset_slot_vectorset_slot_dim_vector)*r   r   pull_sparse_opsr7   rd   r   gpups_inputs_idxsgpups_outputs_idxsgpups_inputsgpups_outputsgpups_w_sizegpups_min_distributed_idxr   r   r   r5   r   	grad_namer2   r,   ctxr   r]   r   r6   r   r   inputs_idxsoutputs_idxsr   r   r   r   r   r   r   r   distributed_idxr   gpu_slotgpu_mf_sizesr   s*    `                                       @r   _pull_sparse_fusez$DistributedOpsPass._pull_sparse_fuse   s?   ]	O ]	O ]	O~  	M "!#LML(+H,A,A,C,C,G(H(H1(L%)//11 g	 g	JE3++--1GI'( 5FKK44	   KN  F %%'',SV\\#->->q-ABA#$71:DM% 78@IH%^^-- . .	c 3 3 5 555"||~~H2~~ F   (0De$a&++m44K V[[)9::N!fkG   LO  G %$Xvw???77773777Gttt} 8 8%%''2237777$V,K 5 5 7 7 ;<<q@ACLLPL$X%:%:%<%<%@AA  Rq#bo"6"677 N NA99R_Q%788D)26):): N Nv!;$..14S+e:L1M1MK.N q#bn"5"566  A((2>!#455C+4W+=+=  "<3..36 #\&%94 4L0 \" 	!((555")),777##F+++$$W---##QWQZL3v;;$>???,/ --7- - -) <  3{#3#33q88{##r))&)'llOO&)+&6&6&:O%%''22)3#)22&0*8'2$,07%.  3     s;//00  A&-ajO))++66-7(.q	{;;!*WQZL 9.<+6(04;)2  7    "  	43|#4#4q#8#8$%%)) !OPPP!!##../(< .(&*!%  /    JLLEBB\BBB    R   !!(+++88<888L%%l333335	4 	4#8#8s   %V2 2Wc                 &   i }i }i }i }d}|                                 j        D ]*}|j        t                                          v r|                    d          du r|                    t          |j                           d         }	|d         r,|d         s$|	|                    d          d         d         z  }	|	|d         v r|                    |	g           }|                    |           |||	<   |                    |	g           }
|
                    |                    d          d                    |
||	<   |j        d	k    rd},|                                 j        D ]}|j        t                                          v r|                    t          |j                           d         }	|	|v rS|                    d          d         ||	         v r0|                    |	g           }|                    |           |||	<   |||fS )
NFremote_prefetchTr   rN   is_fl_ps_moder   local_sparsecvm)
r&   r   r4   SPARSE_OP_TYPE_DICTkeysr   r   getr^   SPARSE_GRAD_OP_TYPE_DICT)r   r   r7   r   pull_sparse_idsr   r   r   r   
param_nameidss              r   _get_pull_sparse_opsz'DistributedOpsPass._get_pull_sparse_ops  s   
''))- 	" 	"B.335555GG-..$66XX&9"'&BCCAF
+, 8U?5K 8"((5//!"4Q"77J~!666%))*b99

2.1
+%))*b99

288E??1-....1
+w%!
''))- 		6 		6Bw2779999XX&>rw&GHHK
/11*oj.III)--j"==CJJrNNN25OJ/;;r   c                 &   |j         }|                     ||          \  }}}t          d                    |d                              t	          ||d                   }|                     ||||           |                     ||||           d S )Nz,is_heter_ps_mode in distributed_ops_pass {}?rN   rO   )rT   r  r   formatrU   r  r   )	r   ra   rb   rc   r7   r   r   r   rd   s	            r   rf   z%DistributedOpsPass._apply_single_impl  s    
 %%lE::		
:AA() 	
 	
 	

 ,U+=%>
 
 
 	|_eXNNN|_eZPPPPPr   )rg   rh   ri   r   r   r#   r   r  r  rf   rj   rk   s   @r   rn   rn   t   s            
    l l l\k4 k4 k4Z%< %< %<NQ Q Q Q Q Q Qr   rn   delete_optimizer_passc                   <     e Zd Z fdZd Zd Zd Zd Zd Z xZ	S )DeleteOptimizesPassc                 H    t                                                       d S r   r   r   s    r   r   zDeleteOptimizesPass.__init__  r   r   c                     dS r   r   r   s    r   r   zDeleteOptimizesPass._check_self  r   r   c                     dS r   r   r!   s     r   r#   z#DeleteOptimizesPass._check_conflict  r   r   c                    g }g }g }g }|D ]}|                     |j                   |D ]D}|                     |j                   |                     |                    d                     Et          t	          |                    }t          t	          |                    }t          d| d| d|            |D ] }	|	|v r|	|vr|                    |	           !t          t	          |                    }
t          |                                |           |
D ]P}	|                                	                    |	          r'|                                
                    |	           Qd S )Nop_role_varzremote_optimize_vars: z , remote_optimize_op_role_vars: z, local_optimize_vars: )r   input_arg_namesr   listr   r   r^   
delete_opsr&   has_var_remove_var)r   r   remote_optimize_opslocal_optimize_opslocal_optimize_varsremote_optimize_varsremote_optimize_op_role_varsoptimize_need_delete_varsr   r   need_delete_optimize_varss              r   _delete_optimizer_op_and_varsz1DeleteOptimizesPass._delete_optimizer_op_and_vars!  s    !!')$$&!$ 	; 	;B&&r'9::::% 	H 	HB ''(:;;;(//0F0FGGGG#$%% 
  
 (,,--(
 (
$ 	 f%9  f  f[w  f  f  Qd  f  f	
 	
 	
 ( 	6 	6C)))666)00555$(-F)G)G$H$H!8((**,?@@@, 	9 	9C$$&&..s33 9%%''33C888	9 	9r   c                     |d                                          j        d         }|                                                     |j        |j        |j        |j        |j        d           d S )Norigin_main_programlearning_rate_0T)r,   r   r{   r4   	lod_levelr|   )r&   r'   r<   r,   r   r{   r4   r+  )r   ra   r7   lr_vars       r   _add_lr_varzDeleteOptimizesPass._add_lr_varE  sy     '(5577<=NO 	 	!!##..,,& 	/ 	
 	
 	
 	
 	
r   c                    |j         }t          |          }t          ||d                   }t          |          }|                    |           t	          t          |          t          |          z
            }|                     |||           t          |d         d          r|                     ||           d S d S )Nremote_sparser)  lr_scheduler)	rT   get_optimize_ops
get_lr_opsr   r  r   r'  hasattrr-  )	r   ra   rb   rc   r7   all_optimize_opsr   lr_opsr!  s	            r   rf   z&DeleteOptimizesPass._apply_single_implS  s    +L99.%0
 
 L))""6***! !!C(;$<$<<
 
 	**-/A	
 	
 	
 5./@@ 	2\511111	2 	2r   )
rg   rh   ri   r   r   r#   r'  r-  rf   rj   rk   s   @r   r  r    s                "9 "9 "9H
 
 
2 2 2 2 2 2 2r   r  delete_extra_optimizer_passc                   0     e Zd Z fdZd Zd Zd Z xZS )DeleteExtraOptimizerPassc                 H    t                                                       d S r   r   r   s    r   r   z!DeleteExtraOptimizerPass.__init__h  r   r   c                     dS r   r   r   s    r   r   z$DeleteExtraOptimizerPass._check_selfk  r   r   c                     dS r   r   r!   s     r   r#   z(DeleteExtraOptimizerPass._check_conflictn  r   r   c                 (   |j         }g }g }g }t          |          }t          ||d                   }	t          t          |          t          |	          z
            }
g }|
D ]}|                    |j                   |	D ]D}|                    |j                   |                    |                    d                     Et          t          |                    }t          t          |                    }|D ]'}||v rd|k    r||vr|                    |           (t          t          |                    }g }|D ]S}g }|                                j	        D ] }||j
        v r|                    |           !|                    |           Tt          |                                |           |D ]P}|                                                    |          r'|                                                    |           Qd S )Nr/  r  r*  )rT   r1  r  r   r   r  r   r^   r&   r   output_arg_namesr  r  r  )r   ra   rb   rc   r7   r#  r$  r%  r4  r   r!  r"  r   r   r&  init_opsparam_init_ops                    r   rf   z+DeleteExtraOptimizerPass._apply_single_implq  si   !')$$&!+L99.%0
 
 " !!C(;$<$<<
 
 !$ 	; 	;B&&r'9::::% 	H 	HB ''(:;;;(//0F0FGGGG#C(<$=$=>>'+C0L,M,M'N'N$' 	6 	6C))) C''666)00555$(-F)G)G$H$H!, 	+ 	+CM%22448 - -"---!((,,,OOM****?//118<<<, 	@ 	@C++--55c:: @,,..::3???	@ 	@r   rg   rh   ri   r   r   r#   rf   rj   rk   s   @r   r8  r8  f  sl                +@ +@ +@ +@ +@ +@ +@r   r8  fake_init_ops_passc                   <     e Zd Z fdZd Zd Zd Zd Zd Z xZ	S )FakeInitOpsPassc                 H    t                                                       d S r   r   r   s    r   r   zFakeInitOpsPass.__init__  r   r   c                     dS r   r   r   s    r   r   zFakeInitOpsPass._check_self  r   r   c                     dS r   r   r!   s     r   r#   zFakeInitOpsPass._check_conflict  r   r   c                     t          |d         d          }t          |d         d          }t          t          ||z                       S )Norigin_main_programsTF)get_sparse_tablenamesr  r   )r   r7   dist_varnamessparse_varnamess       r   _get_sparse_table_namesz'FakeInitOpsPass._get_sparse_table_names  sU    -()4
 
 0()5
 
 C788999r   c           
         |D ]Y}|                                 j        |         }t          |                              d          d                                                                         d         |d         v r}g }|                                 j        D ] }||j        v r|                    |           !t          |          }|dk    rt          dt          |          z             |d         }	|                                 
                    di d|id	|	                    d	          i
           t          |                                 |           [d S )Nrw   r   r   r  rR   z&table init op num should be 1, now is 	fake_initr/   r   r3   )r&   r'   strr   stripr   r=  r^   r[   r   r>   r   r  )
r   rb   sparse_table_namesr7   
table_name	table_vartable_param_init_opr   init_op_numtable_init_ops
             r   _fake_init_sparsetablez&FakeInitOpsPass._fake_init_sparsetable  s    - 	L 	LJ'4466;JGII$$S))!,2244::<<R@() ) "$%22448 3 3!444'..r222122Ka <s;?O?OO   02M((**44 	* 2 27 ; ;<	 5    33557JKKKK/	L 	Lr   c                 l    |j         }|                     |          }|                     |||           d S r   )rT   rL  rW  )r   ra   rb   rc   r7   sparse_tabless         r   rf   z"FakeInitOpsPass._apply_single_impl  s:    44U;;##O]EJJJJJr   )
rg   rh   ri   r   r   r#   rL  rW  rf   rj   rk   s   @r   rC  rC    s                : : :L L L:K K K K K K Kr   rC  ps_gpu_passc                   B     e Zd Z fdZd Zd Zd Zd Zd Zd Z	 xZ
S )	PsGpuPassc                 H    t                                                       d S r   r   r   s    r   r   zPsGpuPass.__init__  r   r   c                     dS r   r   r   s    r   r   zPsGpuPass._check_self  r   r   c                     dS r   r   r!   s     r   r#   zPsGpuPass._check_conflict  r   r   c                 6   d}t          t          |                                j                            D ]\  }}|j        dk    r|}|                                j        D ]5}|j        dk    r|j        dk    rt          j        |j        t                      g           \  }}|D ]}|                                j        	                    |dz             }|
                    |           |                    t          t                     t          j                            |                                |          }	|                                j                            |dz   |	           |                                                                 7d S )Nr   lookup_table_gradpull_box_sparser   rR   )r  r   r&   r   r4   r   get_grad_op_descr   r   r   r   	_set_attrop_role_attr_namebackwardpaddlestaticOperatorr   _sync_with_cpp)
r   r*   insert_indexr   r   grad_op_descop_grad_to_varop_descnew_op_descnew_ops
             r   _add_push_box_sparse_opz!PsGpuPass._add_push_box_sparse_op  s   Ig&:&:&<&<&@AABB 	# 	#GCw---"&&((, 	8 	8Bw+++;N0N0N+/+@, ,(L. ( 
8 
8%22449DD 1$  %%g...%%&7BBB//((**K  $$&&*11,2BFKKK$$&&557777
8	8 	8r   c                    i }t          t          |                                j                            D ]-\  }}|j        dk    r|                    d          D ]}d||<   .g }g }g }t          |          D ]}d|j        vr|                    d          D ]l}||v rf|                    |	                    d                     |j        D ]6}	|	dk    r	|                    |	          D ]}
|
                    |
           7mt          t          |                    }t          t          |                    }|D ]}
|
|vr|
                    |
           t          t          |                    }|D ]P}|                                                    |          r'|                                                    |           Qd S )Nra  r   rR   Paramr  LearningRate)r  r   r&   r   r4   r   r1  r   r   r   r^   r   r  r  )r   r*   embedding_wr   r   r,   optimize_varsoptimize_op_role_varsr%  key_namer   r&  s               r   _remove_optimizer_varzPsGpuPass._remove_optimizer_var  s   Ig&:&:&<&<&@AABB 	* 	*GCw---HHSMM * *D()K%% "$&!"7++ 	6 	6Bbn,,)) 6 6;&&)001G1GHHH$&N 6 6#~55$#%88H#5#5 6 6C)00555566 S//00 $S)>%?%? @ @  	6 	6C///)00555$(-F)G)G$H$H!- 	9 	9D##%%--d33 9$$&&224888	9 	9r   c                    i }g }g }t          t          |                                j                            D ]t\  }}|j        dk    rd|                    d          D ]1}d||<   |                    |           |                    |           2|                    d          D ]}d||<   ut          t          |                                j                            D ]Y\  }}|j        dk    s|j        dk    r|j        D ]5}|                    |          D ]}	|	|v r|                    |            n6Zt          t          |                    }|
                    d           |D ])}|                                                    |           *|D ])}|                                                    |           *d S )	Nra  r   rR   r   rb  r   T)reverse)r  r   r&   r   r4   r   r^   r   r   r   r   r   r  )
r   r*   lookup_table_grad_varremove_op_index
remove_varr   r   r,   rx  r   s
             r   $_remove_lookup_table_grad_op_and_varz.PsGpuPass._remove_lookup_table_grad_op_and_var  s    "
Ig&:&:&<&<&@AABB 	4 	4GCw---IIh// , ,D23)$/#**3///%%d++++HHSMM 4 4D23)$//Ig&:&:&<&<&@AABB 	 	GCw+++rw:M/M/MN  88H--  C333'..s333 4 s?3344T***" 	3 	3C  ""--c2222 	5 	5D  ""..t4444	5 	5r   c                     |j         }|                     |           |                     |           |                     |           d S r   )rT   rq  ry  r  )r   ra   rb   rc   r7   s        r   rf   zPsGpuPass._apply_single_impl8  sK    $$\222""<00011,?????r   )rg   rh   ri   r   r   r#   rq  ry  r  rf   rj   rk   s   @r   r\  r\    s                8 8 8.!9 !9 !9F5 5 5:@ @ @ @ @ @ @r   r\  ps_transpile_passc                   0     e Zd Z fdZd Zd Zd Z xZS )PsTranspilePassc                 H    t                                                       d S r   r   r   s    r   r   zPsTranspilePass.__init__A  r   r   c                     dS r   r   r   s    r   r   zPsTranspilePass._check_selfD  r   r   c                     dS r   r   r!   s     r   r#   zPsTranspilePass._check_conflictG  r   r   c                 .   |j         }|d         dk    r ddlm}  |            }t          d           nddlm}  |            }t          d           |j         }t                      }|                    |||d         |d	         |d
         d           d S )Nuse_gpu_graphr   r
   )MultiThreadz8ps_transpile_pass use MultiThread for non_gpu_graph mode)SingleProcessMultiThreadzAps_transpile_pass use SingleProcessMultiThread for gpu_graph moderI   trainer_endpointscurrent_endpointF)rb   ra   rank	endpointsr  	wait_port)rT   transpiler.collectiver  r   r  get_dist_env	transpile)	r   ra   rb   rc   r7   r  r   r  envs	            r   rf   z"PsTranspilePass._apply_single_implJ  s    !Q&&;;;;;;ALMMMMHHHHHH((**AS   nn	+%\"-. !34 	 	
 	
 	
 	
 	
r   r@  rk   s   @r   r  r  ?  e                
 
 
 
 
 
 
r   r  split_heter_worker_ops_passc                   6     e Zd Z fdZd Zd Zd Zd Z xZS )SplitHeterWorkerOpsPassc                 H    t                                                       d S r   r   r   s    r   r   z SplitHeterWorkerOpsPass.__init__g  r   r   c                     dS r   r   r   s    r   r   z#SplitHeterWorkerOpsPass._check_selfj  r   r   c                     dS r   r   r!   s     r   r#   z'SplitHeterWorkerOpsPass._check_conflictm  r   r   c                 T   g }g }g }	|j         dz
  }
|d         }|                                                                }t          |                                          }||dz
           d         }||dz
           d         }|                    |
          }|                    |           t          |          D ]\  }}t          ||||           ||dz
           d         d         }t          ||||           ||dz
           d         d         }t          ||||           t          |j                  }|t          |          k     r|                    |
          }|                    |           t          |          D ]\  }}t          ||||           ||dz
           d         d         }t          ||||           ||dz
           d         d         }t          ||||           t          |||d          }|                    |d         d	z   t          |j                  z              n{t          |          D ]\  }}t          ||||           ||dz
           d         d         }t          ||||           ||dz
           d         d         }t          ||||           |}t          |||d          }|                    |d         d	z   t          |j                  z              t          |j                  }|t          |          dz
  k    rt          |||||||          }t          |||||||d
          }t!          ||||dz
           d         d                   }	g }g }t#          |          } d|d|dt%          |          dt          t'          |                    dt)          |          d|d         dt          t+          j        dd                    t.          t0          i}|                                                    ddg ii |           d S )NrR   rS   forwardrf  entranceexitr4   block_input_var_namerw   Fpersistablesmessage_to_block_idoptimize_blocksendpointfanin
pserver_iddistributed_moderC   rpc_exec_thread_numCPU_NUM    heter_listen_and_servr.   r3   )
num_blocks_heter_device_typelowerr   _get_stage_id_create_blockr^   r   block_append_opadd_vars_by_var_listr[   r   get_communicate_var_inforO  r   insert_communicate_opadd_send_opget_ps_endpointsget_heter_worker_endpointget_previous_stage_trainersr`   osgetenvr?   r@   r&   r>   )!r   r*   r7   heter_programprogram_block_ops_list	heter_opsblock_var_detailoptimizer_blockgrad_to_block_idsend_grad_var_listpre_block_idxrS   current_devicestage_idheter_block_ops_forwardheter_block_ops_backwardheter_block_r   entrance_vars	exit_varsfirst_op_index_fpheter_block_bpbp_entrance_varsbp_exit_varsbackward_comm_infoforward_comm_infofirst_op_index_bp
static_varstatic_var_bprD   rE   pserver_endpointss!                                    r   _create_heter_programz-SplitHeterWorkerOpsPass._create_heter_programp  s   2 %014<(
#6688>>@@z//1122"8A"F#
 $:(Q,#G$
  $11-@@{+++677 	E 	EEArM7KDDDD(A6yA*M]G]KPPP$X\29=fE	YLLL00c01111*88GGN"">222"#;<< L L2wKKKK/1=jI  ! '=.   ,HqL9*EfML g}n   ":#3*" " " ##"#9:n())*    ##;<< I I2wRHHHH/1=jI  ! '=+   ,HqL9*EfML g}k   )N4X}9
 
 
 	45+/""#	
 	
 	
   233s+,,q000.!  J .	
 	
 )X\*:6~F
 
 ,Z88!#31*==S4Z@@AA+j11i 0!3ryB'?'?#@#@!#9

 	""$$..(9	 	/ 	
 	
 	
 	
 	
r   c                 X   |j         }d}t          ||          \  }}}}	t          |          dk    rt                              d           |}dS t          |	          }	t          ||	|          }
t          j        	                                }| 
                    ||||	||
           |}dS )z
        split heter worker program from origin-program
        1. find heter op (located on different device)
        2. find input&output of every heter-block
        3. create heter worker program, add listen&serv op
        cpur   zuCurrently running in Heter Parameter Server mode, but no OP running on heterogeneous devices, Please check your code.N)rT   find_heter_opsr[   r   r   union_forward_gradient_opfind_block_jointsrg  r   r	   r  )r   ra   rb   rc   r7   default_devicer*   r  r  program_block_opsblock_vars_detailr  s               r   rf   z*SplitHeterWorkerOpsPass._apply_single_impl  s     3A.4
 4
0A0 y>>QMM H   #LF56GHH-&	
 
 (0022""	
 	
 	
 %r   )	rg   rh   ri   r   r   r#   r  rf   rj   rk   s   @r   r  r  e  sx                ^
 ^
 ^
B %  %  %  %  %  %  %r   r  split_trainer_ops_passc                   H     e Zd Z fdZd Zd Zd Zd Zd Zd Z	d Z
 xZS )	SplitTrainerOpsPassc                 H    t                                                       d S r   r   r   s    r   r   zSplitTrainerOpsPass.__init__6  r   r   c                     dS r   r   r   s    r   r   zSplitTrainerOpsPass._check_self9  r   r   c                     dS r   r   r!   s     r   r#   z#SplitTrainerOpsPass._check_conflict<  r   r   c                    |                                 j        }|d         }d}|D ]9}	t          |	          t          |          k    r|                    |	          } n:|dk    sJ t	          |                                 |           g }
|d         }|dk    rt          |          }||         d         d         }
t          ||dz   |
          }|                                                     |dd|                                 j        |
d                  id	g id
ddg |
ddg d|d         d|dg dt          |          t          t          i           |
S )Nr   r   rS   rR   r  r  send_and_recvr.   r/   modesend_var_namemicrobatch_idrecv_var_namemessage_namer  next_endpointsprevious_endpointsrI   r   )r&   r   rO  r   delete_same_opsget_next_stage_trainersr  r   r'   r`   r?   r@   )r   r*   r7   heter_block_indexops_listr  all_opstart_opfirst_op_idxr   entrance_varrS   next_heter_worker_endpoints	comm_infos                 r   _replace_ops_by_communicate_opz2SplitTrainerOpsPass._replace_ops_by_communicate_op?  s    %%''+A; 	 	B2ww#h--''%||B// ( r!!!!,,..999<(
!!*A**M*M'+,=>yIL 1*Q. I   ""--"$W11338aIJI#%E|%E_%E#R"I.D$E$&A(" +j"9"9)+A	 .   " r   c                    t          |d                   D ].\  }}|d         }|d         }|j        j        |k    r	|d         |= /t          |d                   D ]1\  }}|d         }|d         }|j        j        |k    r|d         |=  d S 2t          |d                   D ]1\  }}|d         }|d         }|j        j        |k    r|d         |=  d S 2d S )Nmerged_variables_pairsr   rR   merged_dense_pairsmerged_sparse_pairs)r   
merged_varr,   )r   var_namer7   r   pairr   var_grads          r   _remove_var_pair_by_gradz,SplitTrainerOpsPass._remove_var_pair_by_gradk  s   $U+C%DEE 	; 	;KE4q'CAwH"'83323E:$U+?%@AA 	 	KE4q'CAwH"'833./6 4 %U+@%ABB 	 	KE4q'CAwH"'833/07 4	 	r   c                    ||         d         d         ||         d         d         z   }g }g }t          |          D ]v}t          ||                                |          \  }	}
|	D ]K}|                    d          d         }||v r*|                    |           |                    |           Lwt          t          |                    }t          |                                |           |D ]}|                     ||           d S )Nr  r  rf  z@GRADr   )	find_send_opfind_op_input_outputr&   r   r^   r  r   r  r   )r   r*   r7   r  r  r  need_remove_send_opneed_remove_grad_varr   
input_listr  r  origin_var_namegrad_var_names                 r   _remove_trainer_send_opz+SplitTrainerOpsPass._remove_trainer_send_op  sL    ./	:>J01*=nMN 	 !!w'' 	: 	:B0--// MJ ' : :"*.."9"9!"<"l22'..r222(//999	:
 #3':#;#;<<7''))+>???1 	@ 	@M))-????	@ 	@r   c                    g }t          dt          |                    D ]S}||         d         ||         d         z   }||                     |||||          z  }|                     ||||           Tg }	g }
|d         d         }t	          |                                |           t          ||           t          ||||          }|d         d         d         }t          |d|d          }|
	                    |d         dz   t          |j                  z              |		                    |           |d	         }d
|
d|	dt          |          dddt          |          d|d         dt          t          j        dd                    t"          t$          i}|                                                    dddg ii |           d S )NrR   r  rf  r   r  r  r  rw   rS   r  r  r  r  r  r  rC   r  r  r  r  r.   r   )r   r[   r  r	  r  r&   delete_trainer_useless_varcreate_backward_blockr  r^   rO  r   get_trainer_endpointr`   r   r  r  r?   r@   r   )r   r*   origin_programr7   r  r  r  r  r  r  r  bp_ops_listbackward_blockr  r  rS   s                   r   _create_trainer_programz+SplitTrainerOpsPass._create_trainer_program  sG   $ 
!&q#.D*E*E!F!F 
	 
	&'89)D():;JGH  $== 18=M  J (( 13C    ,Q/
;,,..<<<"7J777.^[2B
 
 ,A.z::F5A/j
 
 
 	56.$%%&	
 	
 	

 	~...<(
!#3,  Q+j11i 0!3ryB'?'?#@#@!#9
 	))(9 	* 	
 	
 	
 	
 	
r   c                     |j         }d}t          ||          \  }}}}	t          |	          }	t          ||	|          }
|                                }|                     ||||	|
           |}dS )z
        split cpu-trainer program from origin-program
        1. find heter op (located on different device)
        2. find input&output of every heter-block
        3. create cpu-trainer program, add send&recv op
        r  N)rT   r  r  r  cloner  )r   ra   rb   rc   r7   default_device_r*   r  default_opsr  r  trainer_programs               r   rf   z&SplitTrainerOpsPass._apply_single_impl  s     =K/>
 >
:K): 66GHH-&	
 
 "--//$$	
 	
 	
 'r   )rg   rh   ri   r   r   r#   r  r   r	  r  rf   rj   rk   s   @r   r  r  4  s                * * *X  *@ @ @4I
 I
 I
\' ' ' ' ' ' 'r   r  set_heter_pipeline_opt_passc                   0     e Zd Z fdZd Zd Zd Z xZS )SetHeterPipelineOptPassc                 H    t                                                       d S r   r   r   s    r   r   z SetHeterPipelineOptPass.__init__  r   r   c                     dS r   r   r   s    r   r   z#SetHeterPipelineOptPass._check_self	  r   r   c                     dS r   r   r!   s     r   r#   z'SetHeterPipelineOptPass._check_conflict  r   r   c                    |j         }|d         }|d         j        d         }|t          |                                          dz
  |                                dd|_        dd|                                t          |                                          t          |                                          dz
  t          |                                          |||                                dd
|_        d S )	NrS   user_defined_strategyaccumulate_stepsrR   )rb   pipeline_stageheter_place
is_fl_modeHeterPipelineTrainerHeterSection)
trainerdevice_workertrainersrI   r   num_pipeline_stagessection_programnum_microbatchesr!  r"  )	rT   pipeline_configsr   r  _heter_device_heter_pipeline_opt_get_stage_trainers_role_id_get_num_stage)r   ra   rb   rc   r7   rS   r*  s          r   rf   z*SetHeterPipelineOptPass._apply_single_impl  s    <(
 !89J

  /!*":":"<"<==A%3355	/
 /
+ .+"6688j113344!*":":"<"<==A#&z'@'@'B'B#C#C+ 0%3355,
 ,
(((r   r@  rk   s   @r   r  r    r  r   r  split_fl_ops_passc                        e Zd Z fdZd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Z xZS )SplitFlOpsPassc                     t                                                       d| _        d| _        d| _        d| _        d S )Nzgpu:0zgpu:2zgpu:1zgpu:3)r   r   PART_A_DEVICE_FlAGPART_A_JOINT_OP_DEVICE_FlAGPART_B_DEVICE_FlAGPART_B_JOINT_OP_DEVICE_FlAGr   s    r   r   zSplitFlOpsPass.__init__,  s?    ")+2(")+2(((r   c                     dS r   r   r   s    r   r   zSplitFlOpsPass._check_self3  r   r   c                     dS r   r   r!   s     r   r#   zSplitFlOpsPass._check_conflict6  r   r   c                     d S r   r   r   s    r   _insert_encrypt_opz!SplitFlOpsPass._insert_encrypt_op9      r   c                     d S r   r   r   s    r   _insert_decrypt_opz!SplitFlOpsPass._insert_decrypt_op<  r=  r   c                     |j         D ]I}|j        D ]?}|                    t                    }|dk    r|                    t          d          nd  @Jd S )Nr   )blocksr   r   OP_DEVICE_KEYrd  )r   r*   rZ   r   devices        r   _clear_op_device_flagz$SplitFlOpsPass._clear_op_device_flag?  sk    ^ 	J 	JEi J J//39R<<]B///TJ	J 	Jr   c                    g | _         g | _        t          t                    }| j                            d          }|j        D ]}|                    t                    }|| j	        k    s|dk    s|| j
        k    r#|d         }| j                             |           n8|| j        k    s|| j        k    r"|d         }| j                            |           |j        }|                                j                                        }|                    |           |                    t          |           dD ]}||         }|                                 |S )Nr   r   ab)rF  rG  )	partA_ops	partB_opsr   r	   ori_main_programrZ   r   r   rB  r5  r6  r^   r7  r8  r   r&   r>   r   rd  rj  )	r   party_program_maprZ   r   rC  r*   rn  ap_opkeys	            r   _split_fl_programz SplitFlOpsPass._split_fl_programE  s^   '00%++A..) 	3 	3BWW]++F$111R<<T===+C0%%b))))$111T===+C0%%b)))gG((**/99;;EOOG$$$OOM62222 	% 	%C',G""$$$$  r   c                    dd dd d}|                     |dd| j        idg id	d
dg | j        ddg d|dt          | j                  dt          | j                  dt          | j                  t          t          i           d S )Nforward_joint_rR   r  r
   @fl_psr  r.   r/   r  r  r  r  r  r  r  r  rI   r3   )	r   partA_to_partB_tensorpartA_to_partB_tensor_namer  rS   r  r`   r?   r@   r   rZ   r   r  s       r   _insert_partA_communicate_opz+SplitFlOpsPass._insert_partA_communicate_opd  s    2Q22222	 34BK	 "4"#"  	 "9O# # %&AO' ' k$/::%'= 	 	
 	
 	
 	
 	
r   c                    dd dd d}|                     |dd| j        idg id	d
dg | j        ddg d|dt          | j                  dt          | j                  dt          | j                  t          t          i           d S )Nbackward_joint_r
   r  rR   rQ  r  r.   r/   r  rf  r  r  r  r  r  r  rI   r3   )	r   partB_to_partA_gradpartB_to_partA_grad_namer  rS   r  r`   r?   r@   rT  s       r   _insert_partB_communicate_opz+SplitFlOpsPass._insert_partB_communicate_op~  s    3a33!333	 12BK
 "2"#"  	 "9O# # %&AO' ' k$/::%'= 	 	
 	
 	
 	
 	
r   c                    |D ]}|                     t          |                    r%| j                            t          |                    }t	          |t
                    rR|                    |j        |j        |j	        |j
        |j        |j        |j        |j        |j        |j        
  
        }n|                    |d          }|j        |_        t%          |d          r|j        |_        d S )N)
r,   r   r{   r4   r+  r}   	trainableoptimize_attrregularizer
error_clipFr]   )_find_var_recursiverO  ori_main_block_var_recursive
isinstancer   create_parameterr,   r   r{   r4   r+  r}   r\  r]  r^  r_  _clone_variabler3  r]   )r   r'   rZ   r   
source_vardest_vars         r   _create_var_for_blockz$SplitFlOpsPass._create_var_for_block  s    	D 	DC((S22 ,;;CHHEEJ#y)) D 11#$*$*#(2",":(2",": * 6)4 2   !00UCC%/%=H"z#344 D*4*C'+	D 	Dr   c                 :   |t          |j                  k     r|                    |          }n|                                }t	          |          D ]\  }}|j                                        }|                    |j                   |                    t          |
                    t                               |j                                        |j                                        z   }|                     ||           |                                 |S r   )r[   rA  rZ   r  r   r   r>   r   rd  rB  r   r  r=  rh  rj  )	r   op_listr*   	block_idx	new_blockr  r   rL  r'   s	            r   _get_block_by_idxz SplitFlOpsPass._get_block_by_idx  s    s7>****i00II--//Iw'' 	8 	8EArN,,..EOOBG$$$OOM277=+A+ABBB7**,,rw/G/G/I/IID&&tY7777  """r   c                     d}|j         D ]8}t          |          r"|                    t                    |k    r|c S |dz  }9|S Nr   rR   )r   is_forward_opr   rB  r   rZ   flagop_idxr   s        r   _find_joint_forward_opz%SplitFlOpsPass._find_joint_forward_op  sW    ) 	 	BR   RWW]%;%;t%C%C!r   c                     d}|j         D ]8}t          |          r"|                    t                    |k    r|c S |dz  }9|S ro  )r   is_backward_opr   rB  rq  s        r   _find_joint_backward_opz&SplitFlOpsPass._find_joint_backward_op  sW    ) 	 	Bb!! bggm&<&<&D&D!r   c                    |                      ||          }|j        |         }|j                                        }|                     ||          }|j        |         }|j                                        }t          t          |          t          |          z
            | _        g | _	        | j        D ]4}| j	        
                    | j                            |                     5d S r   )rw  r   r   r  rt  r=  r  r   rY  rX  r^   ra  r   )r   rZ   rr  rs  r   vars1vars2r  s           r   _get_partB_to_partA_gradz'SplitFlOpsPass._get_partB_to_partA_grad  s    --eT::Yv'')),,UD99Yv((**(,SZZ#e**-D(E(E%#% 5 	O 	OH$++D,?,C,CH,M,MNNNN	O 	Or   c                 x    | j         }t          ||          \  }}t          ||          t          ||          z   S r   )rJ  find_ops_list_input_outputscreen_persistables)r   
bp_op_listr*   bp_op_inputbp_op_outputs        r   _find_dense_grad_varsz$SplitFlOpsPass._find_dense_grad_vars  sN    '$>Z%
 %
!\ #7K88;N\<
 <
 
 	
r   c                 b   |                      || j                  }g }t          t          |j                            D ]}|j        |         }|                    |           ||k    r^|j                                        d         }|j                                        | _        | j	        
                    |          | _         n|                     || j        d          }|                     ||dz              t          |          }t!          |          }	|                     ||	z   | j        d          }
dd dd d}|dz   t#          |
j                  z   }d|gd	|
gd
t'          | j                  dddt+          | j                  d| j        dt/          t1          j        dd                    t4          t6          i}|
                    dddg ii |           t;          | j                  }t?          ||           |                      |          }tC          | j        |
|           d S )Nr   rR   rW  r
   r  rQ  rw   r  r  r  r  r  r  r  r  r  r  r.   r   )"rt  r6  r   r[   r   r^   r   r=  rS  ra  r   rR  rm  partA_programrU  get_bp_op_list#get_distributed_push_sparse_op_listrO  r   r  rS   r`   rC   r   r  r  r?   r@   r   r  rJ  r  r  r  )r   rZ   rs  rj  r   r   out_namefirst_blockr  push_sparse_op_listsecond_blockblock_input_flagr  r7   send_opsdense_grad_varss                   r   _get_partA_programz!SplitFlOpsPass._get_partA_program  sO    ,,43
 
 s59~~&& 	 	A1BNN2F{{73355a824'2J2J2L2L/-1-@-D-DX-N-N*	 
 ,,Wd6H!LL))+vzBBB $E**
A%HH--,,d.@!
 
 ;Q:::::+c1C8H4I4II!$4#5~,T_==Q+do66!3ryB'?'?#@#@!#9	
 	(9 	  	
 	
 	
   566x(((44Z@@D)<IIIIIr   c                    |                      || j                  }|                     || j                  }d}g }g }g }|j        D ]T}||k     r|                    |           n1||k    r|                    |           n|                    |           |dz  }U|                     || j        d          }	|                     || j        d          }
|                     |
t          |                     |                     || j        d          }
t          |
          }| 
                    |          }t          | j        |
|           dd dd d}|dz   t          |
j                  z   }d|gd	|
gd
t          | j                  dt          t#          | j                            ddd| j        dt'          t)          j        dd                    t,          t.          i}|	                    t          |          ddg ii |           d S )Nr   rR   rP  r  r
   rQ  rw   r  r  r  r  r  r  r  r  r  r  r.   r   )rt  r8  rw  r   r^   rm  partB_programrZ  r[   r  r  r  rJ  rO  r   r  rS   r  rC   r   r  r  r?   r@   r   )r   rZ   op_idx1op_idx2op_cntop_list1op_list2op_list3r   r  r  r  r  r  r  r7   s                   r   _get_partB_programz!SplitFlOpsPass._get_partB_program  sG   --43
 
 ..43
 
 ) 	 	B####7""#######aKFF ,,Xt7I1MM --h8JANN)),HFFF--h8JANN#L11
44Z@@D)<III :A99999+c1C8H4I4II!$4#5~1$/BBS4T_EEFF!!3ryB'?'?#@#@!#9	
 	h--(9 	 	
 	
 	
 	
 	
r   c                    |j         }|d         | _        |d         | _        |d         | _        || _        |                    d          | _        |                                 }|d         }t          dz   }t          ||           | 
                    |                                | j                   |d         }t          dz   }t          ||           | j        st          j                                        | _        |                     |                                           | j        |j         d	<   |                     | j                   t'          | j                   d S t          j                                        | _        |                     |                                           | j        |j         d
<   |                     | j                   t'          | j                   d S )NrS   rC   is_heter_workerr   rF  z6_fl_A_main_program.prototxtrG  z6_fl_B_main_program.prototxtpart_a_main_programpart_b_main_program)rT   rS   rC   	is_part_brJ  rZ   ra  rN  ps_log_root_dirdebug_programr{  r&   r6  rg  r   r	   r  r  rD  check_programr  r  )	r   ra   rb   rc   r7   rK  prog_a
_main_fileprog_bs	            r   rf   z!SplitFlOpsPass._apply_single_implT  s   -Y'01 ,*0033 2244"3'$'EE
j&)))%%!!4#C	
 	
 	
 #3'$'EE
j&)))~ 	.!'!1!9!9!;!;D##F$7$7$9$9:::595GHO12&&t'9:::$,-----!'!1!9!9!;!;D##F$7$7$9$9:::595GHO12&&t'9:::$,-----r   )rg   rh   ri   r   r   r#   r<  r?  rD  rN  rU  rZ  rh  rm  rt  rw  r{  r  r  r  rf   rj   rk   s   @r   r3  r3  *  sE       3 3 3 3 3        J J J! ! !>
 
 
4
 
 
4D D D0      
O 
O 
O
 
 
2J 2J 2Jj5
 5
 5
t .  .  .  .  .  .  .r   r3  )r  _collectionsr   rg  paddle.baser   #paddle.distributed.passes.pass_baser   r   paddle.frameworkr   paddle.staticr   r	   ps.utils.publicr   rn   r  r8  rC  r\  r  r  r  r  r3  r   r   r   <module>r     s   
			 $ $ $ $ $ $  ! ! ! ! ! ! G G G G G G G G ! ! ! ! ! ! , , , , , , , ,     %&&UF UF UF UF UF UF UF '&UFp %&&^Q ^Q ^Q ^Q ^Q ^Q ^Q '&^QB &''L2 L2 L2 L2 L2( L2 L2 ('L2^ ,--5@ 5@ 5@ 5@ 5@x 5@ 5@ .-5@p #$$3K 3K 3K 3K 3Kh 3K 3K %$3Kl }e@ e@ e@ e@ e@ e@ e@ e@P "##"
 "
 "
 "
 "
h "
 "
 $#"
J ,--K% K% K% K% K%h K% K% .-K%\ '((L' L' L' L' L'( L' L' )(L'^ ,--"
 "
 "
 "
 "
h "
 "
 .-"
J "##I. I. I. I. I.X I. I. $#I. I. I.r   