
    Αi                    l   S SK r S SKrS SKrS SKrS SKrS SKrS SKrS SKJrJ	r	  S SK
Jr  S SKJr  S SKJr  S SKJr  SSKJrJr  S	S
KJrJr  S	SKJr  S	SKJrJrJr  S	SKJr  S	SK J!r!J"r"J#r#J$r$J%r%J&r&J'r'J(r(J)r)  \" \*\RV                  SS9r,/ SQr-Sr.Sr/S r0S r1S r2S r3S r4S r5S r6S r7 " S S5      r8g)    N)contains_spmd_ruleget_phi_spmd_rule)Operator)
get_logger)OpRole)core   )ProcessMeshcompute_compatible_process_mesh   )OperatorDistAttrTensorDistAttr)_node_id)_gradient_sync_by_partial_ops*find_compatible_distributed_operator_impls(find_distributed_operator_impl_containerget_world_process_group)	__no_shape_var_type___g_gradient_clip_opsget_pp_degreeis_gradient_clip_opis_loss_grad_op
is_loss_opis_naive_data_parallel6naive_set_dist_op_attr_for_program_by_mesh_and_mappingset_var_dist_attrz&%(asctime)s-%(levelname)s: %(message)s)fmt)create_py_readercreate_double_buffer_readerwhileread Auto_Parallel_Completion_Skippedi  c                     U R                  S5      (       a  U R                  S5      OSnU R                  SU[        -   5        g )Nop_namescope/)has_attrattr	_set_attr_skip_propagation_prefix)opprefixs     r/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/distributed/auto_parallel/static/completion.py$mark_as_sharding_propagation_skip_opr.   C   s4    (*N(C(CRWW^$FLL*B!BC    c                 0   [        U [        R                  R                  R                  5      (       a  U nO1[        U [
        5      (       a  U R                  nO[        SU  S35      eUR                  S5      =(       a    [        UR                  S5      ;   $ )Nz*static mode operator is expected but got []r%   )
isinstancepaddlebase	libpaddleOpDescr   descRuntimeErrorr'   r*   r(   )r+   op_descs     r-   is_sharding_propagation_skip_opr:   H   s}    "fkk++2233	B	!	!''Gt1MNN C
"gll>&B
BCr/   c                 X    U (       d  gS nSnU  H  nU" X#5      u  pBU(       a  M    g   U$ )z?Compute the compatible dim mapping given a list of dim mapping.Nc                 >    U S:X  a  SU4$ US:X  a  SU 4$ X:X  a  SU 4$ g)NT)FN )dm1dm2s     r-   &_compute_compatible_dim_mapping_of_twoNcompute_compatible_dim_mapping.<locals>._compute_compatible_dim_mapping_of_twoY   s6    "99"99:9r/   r=   r>   )dim_mapping_listrA   compatible_resultmapping
compatibles        r-   compute_compatible_dim_mappingrG   T   sA     #(N)
%
 z $ r/   c                     U (       d  g[        U S   5      nU  H  nUc    g[        U5      U:w  d  M    g   / n[        U 6  H-  n[        [        U5      5      nUc    gUR	                  U5        M/     U$ )zgCompute the compatible dims mapping given a list of dims mapping.
Each of dims mapping is also a list.
Nr   )lenziprG   listappend)dims_mapping_listlengthdims_mappingrD   dim_mappingscompatible_dim_mappings         r-   compute_compatible_dims_mappingrR   l   s     "1%&F)|&	 *
 ./!?"
 ")  !78 0 r/   c                     [        5       n[        5       nU c  Uc  g U b  [        U R                  5      nUb  [        UR                  5      nUR                  U5      n[        [	        U5      5      nU$ N)setprocess_idsunionr
   rK   )pm1pm2process_set1process_set2merged_process_setmerged_process_meshs         r-   merge_process_mesh_twor^      sj    5L5L
{s{
3??+
3??+%++L9%d+=&>?r/   c                    U c  g[        [        U 5      5       H(  nX   S:  d  X   [        UR                  5      :  d  M(    g   [        [        UR                  5      5       H  nU R                  U5      S:  d  M    g   g)NFr=   r   T)rangerI   shapecount)rO   process_meshis      r-   _validate_dims_mappingre      sw    3|$%?R<?c,:L:L6M#M & 3|))*+a 1$ , r/   c                 p   [         R                  " SS5      n[        U[        5      (       a  UR	                  5       nUS:X  a  SOSn[        U5      n/ SQn[         R                  " S5      nUS:X  a  UR                  S5        U R                  R                  nU=(       a    [        U5      =(       a    XB;   $ )NFLAGS_infer_spmd_enableTtrueF)fused_rotary_position_embedding	matmul_v2elementwise_div!fused_softmax_mask_upper_triangleelementwise_addelementwise_mulassignscaledropout
reduce_sum
layer_normlookup_table_v2reshape2
transpose2split
unsqueeze2siluconcatexpand_as_v2swiglutilefused_rms_normstrided_slicestack	gather_ndPARALLEL_CROSS_ENTROPYsoftmax_with_cross_entropy)
osgetenvr2   strlowerboolrL   	serial_optyper   )dist_openable__adapted_ops__parallel_ceop_types        r-   _can_apply_infer_spmd_ruler      s    YY0$7F&#6)u&\FO6 ))45Kf;<$$GP(1Pg6PPr/   c           	         [        U 5      n[        R                  SU R                  R                   SUR                   S35        UR                  U 5      nU=(       d    UnUR                  X5      n[        R                  SU R                  R                   SU R                  R                   SU R                  R                   S35        U=(       a    U(       + $ )NzUpdate Op [z] using DistOpContainer [z].Op [z] use dist op impl [z] idx [)
r   _loggerdebugr   r   update_dims_mappingmapping_to_dist_operator_impl	dist_attr	impl_typeimpl_idx)r   original_op_dist_attrchangeddist_op_containerupdatedreverteds         r-   ,_update_op_dims_mapping_and_distoperatorimplr      s     AIMM
g'',,--FGXG]G]F^^`a  33G<G G >>H MM
w  %%&&:7;L;L;V;V:WW^_f_p_p_y_y^zz|} %H~%r/   c                       \ rS rSrS rSS jrSS jrS rS rS r	S r
S	 rS
 rS rS rSS jrS rS rS rS rSS jrSS jrSS jrS rSS jrS rSrg)	Completer   c                 (    Uc   eXl         SU l        g )NF)_dist_context_has_prepared)selfdist_contexts     r-   __init__Completer.__init__   s    ''')"r/   c                 |   SnUR                  5       (       a  UR                  5       c  gUR                  5       nUR                  5       [        ;   a  gU R                  R                  U5      nUc   eUR                  S5      (       a  gUR                  nU(       Ga@  / nUR                   H  nUR                  5       c  M  UR                  5       R                  5       S:X  dD  UR                  5       R                  5       S:X  d"  UR                  5       R                  5       S:X  a  M~  U R                  R                  U5      n	U	R                  UR                  :X  d  M  U	R                  UR                  5       5      n
UR                  U
5        M     UR                  U5        [        U5      n[!        XR                  5      (       d  gUb  X:w  a  Xl        SnU$ / nUR"                   GH  nUR                  5       c  M  UR                  5       R                  5       S:X  db  UR                  5       R                  5       S:X  d@  UR                  5       R                  5       S:X  d  [%        UR                  5       5      (       a  M  U R                  R                  U5      n	U	R                  UR                  :X  d  M  U	R'                  UR                  5       5      n
UR                  U
5        GM     UR                  U5        [        U5      n[!        XR                  5      (       d  gUb  X:w  a  Xl        SnU$ )NFrO   r   r    r"   T)is_varvarr   r   r   get_tensor_dist_attr_for_graphis_annotatedrO   inputsr+   get_op_dist_attr_for_graphrc   get_output_dims_mappingnamerL   rR   re   outputsr:   get_input_dims_mapping)r   tensor_nodefwdr   tensor_desctensor_dist_attrtensor_dims_mappingrM   pred_op_nodeop_dist_attrop_dims_mappingcompatible_dims_mappingsucc_op_nodes                r-    _update_tensor_node_dims_mapping*Completer._update_tensor_node_dims_mapping   s   ""$$+//*;*C!oo'!66--LL
  +++((88.;; " + 2 2??$0$)..04FF'??,11389'??,113v= !**EE( ! %11+889 +7*N*N',,.+ *00A- !3. $$%89&E!'# *')F)F  '3'>0G-N K !# + 3 3??$0$)..04FF'??,11389'??,113v=:<??;LMM **EE( ! %11+889 +7*M*M',,.+ *00A- !4. $$%89&E!'# *')F)F  '3'>0G-r/   c                    SnUR                  5       nUR                  5       (       a  UR                  5       c  gUR                  5       [        ;   d  [	        UR                  5       5      (       a  gU R
                  R                  U5      nUR                  n[        R                  " U5      nU(       a  UR                  nOUR                  nU GH  n	U	R                  5       (       a  U	R                  5       c  M,  U	R                  5       R                  5       [        R                  R                   R"                  :X  a  Mr  U	R                  5       n
U(       a   UR%                  U
R'                  5       5      nOUR)                  U
R'                  5       5      nU(       a  M  U R
                  R+                  U	5      nUR,                  UR,                  :X  d  GM	  UR.                  nU(       a   UR1                  U
R'                  5       5      nOUR3                  U
R'                  5       5      n[5        X/5      n[7        XR,                  5      (       d  GM  Uc  GM  X:w  d  GM  U(       a!  UR9                  U
R'                  5       U5        O UR;                  U
R'                  5       U5        SnGM     [=        U5      (       a9  [>        RA                  SURB                  R                   S35        [E        XWU5      $ [>        RA                  SURB                  R                   S35        [G        XRS9nUb  Sn[        R                  " U5      nUnU Hw  nURI                  U5      nU(       a  SnURK                  U5      (       a;  URM                  5       (       a&  UR                  Ul'        URP                  Ul)        Sn  OUUl        UnMy     U(       a  Xul        SnU$ Xul        SnU$ )NFTr   z/] update dims mapping using New InferSPMD Rule.z1] update dims mapping using Original DistOp Rule.r   )*r+   is_opr   __skip_dims_mapping_op__r:   r   get_dist_op_for_graphr   copydeepcopyr   r   r   r   r   VarDescVarTypeREADERis_annotated_input_dims_mappingr    is_annotated_output_dims_mappingr   rc   rO   r   r   rR   re   set_input_dims_mappingset_output_dims_mappingr   r   r   r   r   r   r   is_auto_compatiblevalidate_dist_attrr   idxr   )r   op_noder   r   r9   r   r   r   	node_listr   r   	annotatedr   r   r   r   op_dist_implsnot_compatiblebackup_op_dist_attrbackup_changedop_dist_impldim_changeds                         r-   _update_op_node_dims_mapping&Completer._update_op_node_dims_mapping?  sz   **, WZZ\%9 LLN66.wzz|<<$$::7C(( $l ; II$K%%'';??+<+D %%'4<<+?+?+F+FF%//+K(HH$$&	 )II$$&	  ""AA+N  ((,<,I,II&6&C&C#&2&I&I#((*'O '3&J&J#((*'O +J$:+' .+-F-F  +7+>$;;',,.0G %<<',,.0G #Ge %j &g..MMw((--..]^ @  MMw((--.._` GM (!%&*mmL&A#!($1L"."B"B7"KK""&$77@@#66881=1B1B.0<0@0@-).,?)"0 %2 "(=%#G
 N %:!Nr/   c                    SnU R                    H  u  p#U R                  R                  U5      nU R                  R                  U5      nUR                  UR                  :w  a  MW  UR                  nUR                  n[        Xg/5      n[        XR                  5      (       d    gUb  X:w  a  Xl        SnUc  M  X:w  d  M  Xl        SnM     U$ )NFT)_node_pairs_between_graphsr   get_dist_attr_for_graphrc   rO   rR   re   )	r   r   parent_node
child_nodeparent_node_dist_attrchild_node_dist_attrparent_node_dims_mappingchild_node_dims_mappingr   s	            r-   #_update_dims_mapping_between_graphs-Completer._update_dims_mapping_between_graphs  s    '+'F'F#K$($6$6$N$N%! $(#5#5#M#M$  &22'445 '<'I'I$&:&G&G#&E)C'# *')K)K  '3'C5L2'3'B4K1= (G> r/   c                    U R                   R                  n/ SQnU GHC  nUR                  5       b$  UR                  5       R                  5       U;   a  M9  U R                   R	                  U5      nUR
                   H  nUR                  5       (       d  M  UR                  5       c  M-  UR                  5       R                  5       [        R                  R                  R                  :X  a  Ms  UR                  5       nU R                   R                  U5      nUR                  UR                  :X  d  M  UR                  UR                  5       5      nXl        M     GMF     g )N)r   r    r"   )r   _serial_ordered_op_nodesr+   r   r   r   r   r   r   r   r   r   r   rc   r   r   rO   )	r   op_nodesrelated_reader_opsr   r   r   r   r   r   s	            r-    _update_dims_mapping_for_special*Completer._update_dims_mapping_for_special  s    %%>>

  G

(JJL%%'+==--EEgNL&%%''KOO,=,I"(--/4<<3G3G3N3NN "-//"3K**II' % %11+889 +7*N*N',,.+ 9H5#  /  r/   c                    SnSnU(       Gd  U[         :  Ga  SnS H  nU(       a  U R                  R                  O[        U R                  R                  5      nU H  nUR	                  5       (       a)  UR                  5       b  U R                  XdS9nU(       a  SnUR                  5       (       d  MX  UR                  5       c  Mk  U R                  XdS9nU(       d  M  SnM     U R                  5       n	U	(       d  M  SnM     U(       a  SnOSnUS-  nU(       d  U[         :  a  GM  U[         :  a  [        R                  S5        U R                  5         g )Nr   F)TFr   Tr   znSharding Propagation reach the Max Step and is NOT Converge! The Sharding Propagation Iteration is Terminated.)_max_propagation_stepr   serial_ordered_nodesreversedr   r   r   r   r+   r   r   r   r   r   )
r   stepreach_fix_pointr   is_fwd	all_nodesnodetensor_changed
op_changedgraph_changeds
             r-   _update_dims_mappingCompleter._update_dims_mapping  s@   "0E)EG'  &&;;!$"4"4"I"IJ 
 &D{{}})?)-)N)N  *O * *&*Gzz||	(=%)%F%F  &G &
 &:&*G & !% H H J ="G+ (. "'"&AID; #0E)E@ ((MM A 	--/r/   c                    U R                   R                  U5      nUR                  S5      (       dM  UR                  nU R                   R                  U5      nUR                  n[	        XF/5      nUb  XG:w  a  Xsl        UR
                  S:X  a  g UR                   H  nUR                  5       (       d  M  UR                  5       c  M-  U R                   R                  U5      n	U	R                  S5      (       a  M`  [        UR                  5      S:w  a  M{  [	        U	R                  UR                  /5      nUc  M  U	R                  U:w  d  M  Xyl        M     UR                   H  nUR                  5       (       d  M  UR                  5       c  M-  U R                   R                  U5      n	U	R                  S5      (       a  M`  [	        U	R                  UR                  /5      nUc  M  U	R                  U:w  d  M  Xyl        M     g )Nrc   r!   r   )r   r   r   rc   r   r   r   r   r   r   rI   r   )
r   r   nearest_op_noder   rc   nearest_op_dis_attrnearest_process_meshcompatible_process_meshr   r   s
             r-   _update_process_mesh_by_nearest)Completer._update_process_mesh_by_nearest(  s   ))AA'J((88'44L"&"4"4"L"L# $7#C#C &E4'# (3 ;,C)7*">>K!!##(9(E&&EE# !
 $00@@{))*a/*I%22L4M4MN+' ,7(559PP4K1' ** #??K!!##(9(E&&EE# !
 $00@@*I%22L4M4MN+' ,7(559PP4K1! +r/   c                    S nS nS nS nU R                   R                  5        GH  u  pVUR                  5       R                  S5      nU R                  R
                  R                  U5      n[        UR                  5       5      n	U R                  R                  U5      n
U
R                  nUR                  nU	 H  nUR                  5       (       a  UR                  5       c*  UR                  5       (       d  M@  UR                  5       c  MS  U R                  R                  U5      n[!        XR                  5      nM     Xl        U" U5        / nUR                  5       R#                  S5      S   nS nUR$                   Hd  nUR                  5       (       d  M  UR                  5       c  M-  UR                  5       R'                  5       U:X  d  MQ  UnUR)                  U5          O   UR+                  U" U5      5        S n[-        U	5       Hn  nUR                  5       (       d  M  UR                  5       c  M-  UR                  5       R'                  5       U:X  d  MQ  [/        UR0                  5      S:X  d  Ml  Un  O   UR+                  U" U5      5        UR                  5       R3                  S5      S   nS nUR0                   HS  nUR                  5       (       d  M  UR                  5       c  M-  UR                  5       R'                  5       U:X  d  MQ  UnMU     UR)                  U5        U H-  nU R                  R                  U5      nUUl        U" U5        M/     UR4                  nUR7                  5        H  u  nnU" U R                  R8                  UU5      nU R                  R                  U5      nUR                  Ul        UR$                   HS  nUR                  5       R'                  5       U:X  d  M'  U R                  R                  U5      nUR                  Ul        MU     M     UR:                  nUR7                  5        H  u  nnU" U R                  R8                  UU5      nUc  U" U R                  R8                  UU5      nU R                  R                  U5      nUR                  Ul        UR0                   HS  nUR                  5       R'                  5       U:X  d  M'  U R                  R                  U5      nUR                  Ul        MU     M     GM	     U R<                  R                  5        Hp  nS nU H3  nU R                  R                  U5      n[!        XR                  5      nM5     U H,  nU R                  R                  U5      nXl        U" U5        M.     Mr     g )	Nc                     [        U S U 5       HS  nUR                  5       (       d  M  UR                  5       c  M-  UR                  5       R                  5       U:X  d  MQ  Us  $    g rT   )r   r   r   r   nodesr   var_namer   s       r-    _find_nearest_tensor_node_beforeUCompleter._update_process_mesh_for_specials.<locals>._find_nearest_tensor_node_beforee  sH     t-KKMM
.
)X5K .r/   c                     XS-   S   HS  nUR                  5       (       d  M  UR                  5       c  M-  UR                  5       R                  5       U:X  d  MQ  Us  $    g )Nr   )r   r   r   r  s       r-   _find_nearest_tensor_node_afterTCompleter._update_process_mesh_for_specials.<locals>._find_nearest_tensor_node_aftern  sF    Agi(KKMM
.
)X5K )r/   c                 p   / n[        5       n/ nUR                  U 5        [        U5      S:w  Ga  US   nUSS  n[        U5      U;   a  M+  UR                  UR
                  -   nU GH  nUR                  5       (       a  UR                  5       b  UR                  5       R                  5       [        R                  R                  R                  :w  aM  [        UR                  5       R                  5       5      S:X  a"  UR                  U5        UR                  U5        UR                  5       (       d  M  UR                  5       c  M  SnUR                  5       R                  5       S:X  dD  UR                  5       R                  5       S:X  d"  UR                  5       R                  5       S:X  a  SnUR                   H  nUR                  5       (       d  M  UR                  5       c  M-  UR                  5       R                  5       [         ;   d-  [        UR                  5       R                  5       5      S:w  d  M  Sn  O   UR
                   H  nUR                  5       (       d  M  UR                  5       c  M-  UR                  5       R                  5       [         ;   d-  [        UR                  5       R                  5       5      S:w  d  M  Sn  O   U(       d  GM}  UR                  U5        UR                  U5        GM     UR#                  [        U5      5        [        U5      S:w  a  GM  U$ )Nr   r   Tr   r    r"   F)rU   rL   rI   r   r   r   r   r   r   r   r   r   r   ra   r   r+   r   add)	source_noderelated_nodesvisitedfrontiercur	neighborsr   flagr   s	            r-   _find_nodes_related_to_condPCompleter._update_process_mesh_for_specials.<locals>._find_nodes_related_to_condw  sc   MeGHOOK(h-1$qk#AB<C=G+JJ4	%D{{}})? HHJOO-1E1E1L1LL #DHHJ$4$4$6 71 <$OOD1)006zz||	(=# GGINN,0BB#wwy~~/3PP#wwy~~/69#(D+/;;K + 2 2 4 4$/OO$5$A %0OO$5$:$:$<'<%='*;??+<+B+B+D'E'J+0D$) ,7 ,0<<K + 2 2 4 4$/OO$5$A %0OO$5$:$:$<'<%='*;??+<+B+B+D'E'J+0D$) ,8  4$OOD1)006U &V HSM*e h-1$f ! r/   c                 0   [        U [        5      (       a-  [        U R                  5       H  u  pSU R                  U'   M     [        U [        5      (       a  U R
                  R                  5        HA  n/ nU R                  U5      nU H  nUR                  S5        M     U R                  X45        MC     U R                  R                  5        HA  n/ nU R                  U5      nU H  nUR                  S5        M     U R                  X45        MC     g g )Nr=   )r2   r   	enumeraterO   r   inputs_dist_attrskeysr   rL   r   outputs_dist_attrsr   r   )r   rd   _arg_namenew_dims_mappingrO   s         r-   _make_dims_mapping_replicateQCompleter._update_process_mesh_for_specials.<locals>._make_dims_mapping_replicate  s    )^44%i&<&<=DA02I**1- >)%566 ) ; ; @ @ BH')$#,#C#CH#ML)(//3 *44XP !C !* < < A A CH')$#,#D#DX#NL)(//3 *55  !D 7r/   	sub_block	Conditionr   
StepScopes)_while_op_nodesvaluesr+   _block_attr_idr   serial_graphget_sub_graphrK   r   r   r   rc   r   r   r   r   r^   inputr   r   rL   extendr   rI   r   outputr  itemsr   r  _array_nodes)r   r  r  r  r   while_op_nodewhile_op_node_idxsub_graph_id	sub_graphsub_graph_nodeswhile_dist_opwhile_op_dist_attrr]   r   r   cond_tensor_related_nodescond_tensor_namecond_tensor_nodestepscopes_tensor_namestepscopes_tensor_nodeoutput_noder   while_op_inputs_dist_attrstensor_namenearest_tensor_nodenearest_tensor_dist_attrnode_dist_attrwhile_op_outputs_dist_attrsarray_node_list
array_nodes                                 r-   !_update_process_mesh_for_specials+Completer._update_process_mesh_for_specialsd  sQ   	 	 9	!v	* 150D0D0K0K0M,M(++-<<[IL**77EEI #9#6#6#89O ..DDM "/!8!8 #5"A"A'KKMMdhhj&<JJLLTWWY%: $ 2 2 J J4 PI*@+-C-C+' ( /B+();< )+%,//177DQG#%,,KKMM
.
)-=='+$-445EF - &,,+,<=
  $ 1KKMM
.
)-==DLL)Q.'+$ 2 &,,+,<= &3%5%5%7%>%>|%LQ%O"%)",44&&((#)5#)..04JJ-8*  5 &,,-CD1#'#5#5#M#M$  1D -,-=> 2 *<)M)M& ,113 &F&&;;%'# &&>>+ ) -99 !- *00Dxxz(K7 ..FFtL ' 5AA '3 1 40 +=*O*O' -224 &F&&;;%'#
 '.*I**??)#+' &&>>+ ) -99 !- *11Dxxz(K7 ..FFtL ' 5AA '3 2) 5] 1NX  $00779O"&-
 ..FF	 '=')?)?'#	 . .
 ..FF	 *=&,Y7 .  :r/   c                 j   U R                    H  u  pU R                  R                  U5      nU R                  R                  U5      nUR                  Ul        [	        UR                  UR                  /5      nUb  UR                  U:w  a  XSl        Uc  M  UR                  U:w  d  M  XTl        M     g rT   )r   r   r   rc   r   )r   r   r   r   r   r   s         r-   #_update_process_mesh_between_graphs-Completer._update_process_mesh_between_graphsc  s    '+'F'F#K$($6$6$N$N%! $(#5#5#M#M$  %11 ". 'F)66(55'# (3)66*+ 6M2'3(559PP4K15 (Gr/   c                    U R                   R                  nU R                   R                  nU GH  nU R                   R                  U5      nUR	                  S5      (       d  M7  S nU H[  nUR                  5       R                  5       S:X  a  M'  UR                   H  n[        U5      [        U5      :X  d  M  Un  O   Uc  M[    O   Uc  M  U R                   R                  U5      nUc  M  UR	                  S5      (       a  M  [        UR                  UR                  /5      n	U	c  M  UR                  U	:w  d  GM  Xl        GM     Sn
[        U5       HJ  u  pU R                   R                  U5      nUR                  c  M/  U
S:X  d  M7  Un
U R                  Xf5        ML     U
S-   [        U5      :  a  g [        XS-   S  5       Hi  u  pX-   S-   nXS-
     nU R                   R                  U5      nU R                   R                  U5      nUR                  c   eU R                  Xm5        Mk     UU
   nUS U
  H  nU R                  Xm5        M     U R                  5         U R!                  5         g )Nrc   r!   r=   r   )r   r   _serial_ordered_tensor_nodesr   r   r+   r   r   r   r   r   rc   r  r  rI   rD  rG  )r   ordered_op_nodesordered_tensor_nodesr   r   first_op_noder   input_tensor_noder   r   %idx_of_first_op_node_has_process_meshr   original_idxr   nearest_op_dist_attrs                  r-   _update_process_meshCompleter._update_process_mesh  s   --FF  $11NN/K""AA+N  $00@@ M+ ::<$$&'1)0%,9J0KK(/ *8 !, , $--EEL '0I0I1 1 +J%22L4M4MN+' ,7$115LL0G-C 0J 13-%&67LC--EEgNL))59R?8;544WF 8 114s;K7LL%QFHI
LC AFJL.a/?@O#'#5#5#M#M$   --EEgNL'44@@@00J
 +1
 ((N)NOG00J P 	..0 	002r/   c                    U R                   (       a  g 0 U l        0 U l        / U l        U R                  R
                  n[        U5       GH  u  p#UR                  5       (       Ga  UR                  5       R                  5       S:X  a  X24U R                  [        U5      '   UR                  5       R                  5       S:X  a  UR                  5       R                  S5      S   nU R                  R                  US 5      c  / U R                  U'   U R                  U   R                  U5        U R                  U   R                  UR                  S   5        UR                  5       R                  5       S:X  a  UR                  5       R                  S5      S   nU R                  R                  US 5      c  / U R                  U'   U R                  U   R                  U5        U R                  U   R                  UR                   S   5        UR#                  5       (       d  GM  UR%                  5       c  GM  UR&                  R)                  5       S:w  d  GM  U R                  R*                  UR&                  R)                  5       S-
     R                  UR%                  5       R-                  5       S 5      nUc  GM|  [/        US S	9nU H!  u  pxU R                  R                  X45        M#     GM     S
U l         g )Nr!   read_from_arrayXr   write_to_arrayOutr   c                     U S   $ Nr   r>   )xs    r-   <lambda>$Completer._prepare.<locals>.<lambda>  s    !r/   )keyT)r   r%  r.  r   r   r   r  r   r+   r   r   r*  getrL   r   r,  r   r   r   r   graph_id_tensor_nodes_with_same_namer   sorted)	r   r   r   r   array_var_nameparent_nodessorted_parent_nodesr  r   s	            r-   _prepareCompleter._prepare  sc   !*,'&&;;	"9-ICzz||779>>#w.<@;D(($8779>>#'88%)WWY__S%9!%<N((,,^TBJ<>)).9%%n5<<TB%%n5<<T[[^L779>>#'77%)WWY%5%5e%<Q%?N((,,^TBJ<>)).9%%n5<<TB%%n5<<T\\!_M{{}}!799%%'1,**GG II..014#dhhjoo/6 !
 $/.4(n/+ /BNA ;;BB!, 3 /B9 .B "r/   Nc                    Uc  U R                   R                  nOXR                   l        U R                  U5      u  p#[	        U R                   5      (       dd  U R                   R                  SS9  U R                  5         U R                  5         U R                  5         U R                   R                  5         O>[        R                  S5        U R                   R                  SS9  U R                  5         U R                  XU5        U R                  U5        U R                  U5        U R                   R!                  5         U R                   R#                  5         U$ )zComplete annotation for the partial annotated serial_main_program.
Arguments:
    serial_main_program: partial annotated serial_main_program.
Returns:
    serial_main_program: completed annotated serial_main_program.
T)
with_graphz+Default distributed attributed will be set.F)r   serial_main_program_serial_main_program*_get_tensor_names_and_ops_with_global_meshr   
initializerf  rR  r   $copy_dist_attr_from_graph_to_programr   info_update_dist_attr_for_dp_complete_with_global_mesh$_complete_high_order_grad_annotation_complete_chunk_idamend_dist_attr_for_programvalidate_dist_attr_for_program)r   rj  tensor_namesopss       r-   complete_forward_annotation%Completer.complete_forward_annotation  s$    &"&"4"4"H"H6I3 KK
 &d&8&899))T):MMO%%'%%'CCELLFG))U);))+''(;3O112EF 3466899;""r/   c                    U R                   R                  (       a/  U R                   R                  R                  R                  (       d  / / 4$ [        R
                  R                  R                  5       nUc  [        R                  S5        / / 4$ UR                  n/ nUR                  5       nUR                  R                  5        Hx  nU R                   R                  U5      nUR                  R                   nUc  M9  [#        UR$                  5      [#        U5      :X  d  M]  UR'                  UR(                  5        Mz     [+        U5      S:X  a  / / 4$ 0 n	UR,                  n
U
 H  nUR.                  nU H  nXU'   M	     M     / n[1        5       n[2        R4                  " 5       nU H  nUR7                  U5        M     UR9                  5         UR;                  5       (       d  UR=                  5       nX;   a  M,  UR?                  U5        UR'                  U5        X   nUR'                  U5        UR@                  nU H  nUR7                  U5        M     UR;                  5       (       d  M  XN4$ )Nzglobal_mesh is not set, tensor annotation with global mesh may be not work, please use paddle.distributed.auto_parallel.set_mesh(mesh) firstly.r   )!r   strategypipeliner   r3   distributedauto_parallelget_meshr   warning_process_idsglobal_blockvarsr&  get_dist_tensor_for_programr   rc   rb  rV   rL   r   rI   rw  output_arg_namesrU   queueQueueputclearemptyr_  r  input_arg_names)r   rj  global_meshglobal_mesh_process_idstensor_names_with_global_meshblockr   dist_varmeshtensor_name_to_oprw  r+   output_tensor_namesr=  ops_with_global_meshhas_visitedtensor_name_queuer  
input_names                      r-   rl  4Completer._get_tensor_names_and_ops_with_global_mesh  s/   ""++%%..77>>r6M ((66??AOO b r6M"-":":(*%#002::$$&C))EEcJH%%22DF4+;+;$<'A % .44SXX> ' ,-2r6M iiB"$"5"5213+.  3 
  "e!KKM8K!!+. 9%++-#))+++//1K)OOK()00="/B ''+ 00O-
!%%j1 . $))++ -BBr/   c                    [        U5      S:X  a  g UR                  5       nUR                  US   5      nU R                  R	                  U5      nUR
                  R                  nU H.  nU R                  R                  U5      n	XyR
                  l        M0     U H?  n
UR                  U
5      nU R                  R	                  U5      nXvR
                  l        MA     g rZ  )rI   r  _var_recursiver   r  r   rc   get_dist_op_for_program)r   rj  rv  rw  r  tensordist_tensorr  r+   r   r=  s              r-   rq  $Completer._complete_with_global_meshY  s     |!#002%%l1o6((DDVL!++88 B((@@DG-8*  (K))+6F,,HHPK1<!!. (r/   c                 "  ^  U 4S jnU 4S jnT R                   R                  (       a/  T R                   R                  R                  R                  (       d  g [	        T R                   5      u  pET R                   R                  R                  R
                  nT R                   R                  R                  R                  nT R                   R                  R                  R                  nUS:  a  US:  a  [        S5      eUS:  a  U(       a  US;  a  [        S5      eUS:  a  g UR                  5       n	U	R                  n
[        R                  " 5       n[        R                  " 5       n[        R                  " U[        R                  5      nSn[!        U
5       H,  u  nnUR#                  UR$                  5      nU(       d  M*  Un  O   ['        U
5      nUS-
  n[)        [+        U5      5       H+  nUR#                  X   R$                  5      nU(       d  M)  Un  O   [+        UUS-   5       GHt  nX   R$                  nUR#                  U5      nU(       d  X   R,                  S	:X  ak  S
X   R.                  S   ;   aV  [+        US-   U5       H*  nUR#                  U
U   R$                  5      nU(       d  M*    O   U(       d   eU
W   R$                  nO[        SX    S35      eUUR1                  S5      S  R3                  S5      S   nT R                   R5                  X   5      nUU;  a"  U/UU'   UR6                  R8                  UU'   GM#  UU   S   S-   U:X  d   S5       eUU   nUUR6                  R8                  :X  d   S5       eUU   R;                  U/5        GMw     XF-  n['        U5      U-  S:X  d   SU S['        U5       SU S35       eSnS/nUR=                  5        H:  nUU;   d   eUR?                  U5      nUS   U:  a  Sn  OURA                  U5        M<     U(       d  [B        RE                  S5        O[B        RE                  S5        / n[G        [+        U5      5      n[+        U5       H,  nUR;                  U5        US:X  d  M  URI                  5         M.     [+        U5       Vs/ s H  oU-  PM	     nn['        U5      U-  n / n!S/US-   -  n"Su  n#n$/ nURK                  5        HU  u  n%n&URA                  U%5        U#S-  n#U#U :X  a%  U&S   S-   U"U$'   SU$S-   n$n#U!RA                  U5        / n['        U
5      U"U'   MW     0 n'0 n([+        ['        U"5      S-
  5       GH  n)U"U)   n*U"U)S-      n+UU)   nUU)   n,UU   n-U!U)   n./ n/U. H  n%U/R;                  UU%   5        M     [B        RE                  SU SU, SU. S35        [B        RE                  SU
U*   R,                   S U
U*   RL                   S!U
U*   R.                   S35        [B        RE                  S"U
U+S-
     R,                   S U
U+S-
     RL                   S!U
U+S-
     R.                   S35        [+        U*U+5       H  n0U
U0   nURO                  S#5      (       a  URQ                  S#5      RR                  n1URT                  U1   n2U(       a  U0U/;   a  U" U	UU-U(5        U" U	UU,U'5        U2R                   H&  n3U(       a  U0U/;   a  U" U2U3U-U(5        U" U2U3U,U'5        M(     M  U(       a  U0U/;   a  U" U	UU-U(5        U" U	UU,U'5        M     GM     US:X  a>  US-  S:X  a4  [+        UU5       H#  nU" XU   US-
  U'5        U" XU   US   U(5        M%     g g g s  snf )$Nc                   > TR                   R                  U5      nX$R                  l        UR                  UR
                  -    H  nSU;   a  M  XS;  d  M  U R                  U5      nTR                   R                  U5      nUR                  R                  UR                  R                  :X  d  Mn  X'R                  l        X#UR                  '   M     g )Nlod_tensor_blocking_queue)
r   r  r   chunk_idr  r  _find_var_recursiver  rc   r   )	r  r+   r  var_to_chunk_idr   r   r   r  r   s	           r-   set_chunk_id2Completer._complete_chunk_id.<locals>.set_chunk_ido  s    ((@@DG)1&**R-@-@@.$6.33D9C**FFsK    ))66&00==> :B--64<1 Ar/   c                 L  > TR                   R                  U5      nUR                   H  nXS;  d  M
  U R                  U5      nTR                   R	                  U5      nUR
                  R                  UR
                  R                  :X  d  Mf  X'R
                  l        X#UR                  '   M     UR                   HT  nXS;  d  M
  U R                  U5      nTR                   R	                  U5      nX'R
                  l        X#UR                  '   MV     X$R
                  l        g rT   )	r   r  r  r  r  r   rc   r   r  )	r  r+   rc   var_to_process_meshr   r   r   r  r   s	           r-   set_process_mesh6Completer._complete_chunk_id.<locals>.set_process_mesh  s   ((@@DG**233D9C**FFsK    ))66&00==> >J--:8DCHH5 + ++233D9C**FFsK   :F))64@1 , .:*r/   r	   r   z3VPP schedule mode only can be set in pipeline mode.)VPPZBVPPzDPlease set right schedule_mode and vpp_seg_method for VPP and ZBVPP.r   ro   reshard_apizThe op z" should only be created by reshardr&   r=   z'The segment's ops should be continuous.z0The segment's ops should have same process_mesh.zThe number of layers[z] (z$) should be divided by part number (z).TFzCannot Use Auto VPPzUsing Auto VPPr  )r   r   zstage=[z], chunk_id=[z], layer_name=[r1   zstart op: [z]: [z] [z	end op: [r"  )+r   r{  r|  r   r   
vpp_degreevpp_seg_methodschedule_mode
ValueErrorr  rw  collectionsOrderedDictrecompile
IGNORECASEr  searchstruct_namerI   r   r`   r   r  startrw   r  r   rc   r+  r&  indexrL   r   ro  rK   reverser-  r  r'   r(   idblocks)4r   rj  r  r  	pp_degreesub_process_meshesr  
seg_methodr  r  rw  seg_op_depsseg_op_meshregexstart_op_indexrd   r+   mtotal_op_numend_op_indexr  jr   pre_mesh
num_chunksnon_decreasingseg_pp_stagesseg_pmpp_stageseg_pp_stager  seg_chunk_ids	part_sizesegment_struct_namessegment_partsmemory_counterseg_idxr   idxsr  r  seg_id	start_idxend_idxr  rc   struct_names
seg_op_idxr   block_idr"  sub_ops4   `                                                   r-   rs  Completer._complete_chunk_idn  s   	=$	:2 ""++%%..77>>(5d6H6H(I%	''0099DD
''0099HH
**33<<JJq=Z!^E  >m3CCV  >#002ii
 "--/!--/

:r}}5s^EArR^^,Aq!"	 $ 3x#a'%-.ASV//0Aq 	 / ~|a'78A&,,K[)A FKK8+%)@)@)CC #1q5,7!LLQ););<1! 8 H1"%a&"4"4K$!#&)KL  &aggajl399#>qAK((@@HG+-,-3K(+2+<+<+I+IK(";/3a71< =< '{37#4#4#A#AA FA K(//4E 9H +
;*,1 	
#J<s3{3C2DDhishttvw	
1 !((*F////)//7HR 8+!&  * + LL./LL)* E),-z"A  .'$$& #
 27z1BC1BAi1BC$
2	!zA~."&%++-JD$t$aN*)-bAg&*+Wq[$++K8 (+CM*% .  C.23F%f-I#FQJ/G$V,H$V,H-h7L/7LJ$!!+d"34 % LL(=
/,WXY LLc)n112$s9~7U7U6VVYZ]^gZhZyZyYzz{| LLC!,112$s7Q;7G7W7W6XX[\_`gjk`k\l\}\}[~~  A Y0X;;{++!ww{366H 3 : :8 DI%#*;(!2|5H !HoF"+--)cZ.?, ) & , 3	 %%vx #0 &#*;(!2|5H !HoF7 1+ 4h G#
Q!(;<6UFJNOL q6#5a#8:M 7 )<#O Ds    ^c                    [        5       R                  n[        U5      nU R                  R                  nUR                  5        H  nX$R                  l        M     U R                  R                  nUR                  5        GHk  nUR                  nUR                  nX(l        [        R                  " U5      n	UR                  S:X  a  MJ  UR                   H  n
UR                  U
5      nUR                  (       a  M'  U R                  R!                  U5      nUR                  nUR                  R                  Ul        UR#                  XR                  R$                  5        M     ['        USS9nUb  Sn[        R                  " U5      nU Hk  nUR)                  U5        UR+                  U5      (       a;  UR-                  5       (       a&  UR                  Ul        UR0                  Ul        Sn  O
Xl        Mm     U(       a  Xl        OXl        UR4                   H  n
UR                  nUR7                  U
5      nUR                  S;   a^  UR9                  U
5      n[;        U5      S:  a>  S/[=        [;        U5      S-
  5       Vs/ s H  nSPM     sn-   nUR?                  U
U5        U R                  R!                  U5      nUR9                  U
5      UR                  l        M     GMn     g s  snf )	Nr   Tr   F)fill_constantr   r   r=   ) r   ranksr
   r   _dist_tensors_for_programr&  r   rc   _dist_ops_for_programr   r   r   r   r  get_serial_inputis_parameterr  r   rO   r   r   r   r   r   r   r   r  get_serial_outputr   rI   r`   r   )r   r  rc   dist_tensorsr  dist_opsr   r   r   r   r  serial_tensorr   r   r   r   old_dims_mappingr  r  s                      r-   rp  "Completer._update_dist_attr_for_dp\  s   ')//"5)))CC'..0K1=!!. 1 %%;;(G))I",,L(4%$(MM,$?!~~!33%55 ' 8 8 B$111**FF)  
 $+#4#4L#--:: !- !77 "7"7"D"D 6  GTM (!%&*mmL&A#$1L 44W=$77@@#66881=1B1B.0<0@0@-).,?) %2 "(=%$9!%66&00 ' 9 9( C>>%66'3'K'K ($ +,q0,-3(-c2B.Ca.G(H2(H1B(H2 ,( %<<$&6 #00LL! !88B %%2# 7a )p2s   K4
c                    Uc  U R                   R                  nOXR                   l        U R                   R                  5         U R	                  5         [        5       nU R                   R                  nU GH2  nUR                  5       (       d  M  UR                  5       R                  5       S;   a  M?  U R                   R                  U5      nUR                  nUR                   GH  nUR                  5       (       d  M  UR                  5       c  M.  [        UR                  5      S:w  a  MI  UR                  5       nUR!                  5       n	UR#                  U	5      n
X;   a  M  U R                   R%                  U5      nUR&                  Ul        U
R(                  (       a  UR+                  U	5      O UR-                  5        Vs/ s H  nSPM     snUl        UR1                  U	5        GM	     UR2                   H  nUR                  5       (       d  M  UR                  5       c  M-  UR                  5       R!                  5       n	X;   a  MR  U R                   R%                  U5      nUR&                  Ul        UR5                  U	5      Ul        UR1                  U	5        M     GM5     U R7                  5         U R9                  5         U R;                  5         U R=                  5         U R                   R?                  5         U R                   RA                  5         U R                   RC                  5         g s  snf )N)r!   r   r=   )"r   rj  rk  rm  rf  rU   r   r   r+   r   r   r   r   r   r   rI   r   r  r   rc   r  r   ra   rO   r  r   r   rD  rG  r   r   rn  rt  ru  )r   rj  has_set_dist_attrr   r   r   r   r   r   r=  r  r   rd   s                r-    _complete_tensor_dist_attr_by_op*Completer._complete_tensor_dist_attr_by_op  s   &"&"4"4"H"H6I3%%'E&&;;	Dzz||779>>#y0,,BB4H&00#';;K"))++0A0M{112a7$&1oo&7&1&6&6&8!(!9!9+!F&;$ ..MM + ) )55 )5
  &22 )??L.9.?.?.A!B.A".A!B )5
 *--k:1 $/2 $(<<K"))++0A0M&1oo&7&<&<&>&;$ ..MM + ) )55 )5 )@@M )5 *--k:! $0? b 	..0002--/002 	??A 	66899;G "Cs   
L=c                    Uc  U R                   R                  nOXR                   l        S nS n[        UR	                  5       R
                  5      nUR	                  5       R                  nU R                   R                  nUR                  n[        U5      S:  a  gSn[        S[        U5      5       GH  n	XI   n
[        U
R                  S5      5      [        [        R                  R                  R                   5      :X  a  MS  [        U
R                  S5      5      [        [        R                  R                  R"                  5      :X  aS  [        XIS-
     R                  S5      5      [        [        R                  R                  R                   5      :X  a  US-  n[        U
R                  S5      5      [        [        [        R                  R                  R"                  5      [        [        R                  R                  R$                  5      -  5      :X  a  U
R&                  S:X  d   e  gXI   nUR(                  R+                  5       UR,                  ;   Ga  U" UUR,                  UR(                  R+                  5          5      nUc   eU R                   R/                  U5      nUR0                  n[3        5       nXl        UR4                   H  nUUR4                  ;  a\  UUR6                  ;  aL  UXx   ;   a  Xx   U   nUR9                  U5      nO^UU   nU R                   R;                  U5      R<                  nO3UUR4                  ;   a  UR?                  U5      nOUR9                  U5      nUc   S	U S
35       eURA                  UU5        M     UR6                   Hp  nUXx   ;   d   eXx   U   nUR?                  U5      nUU   n[C        5       nUUl        UUl        U R                   RE                  UU5        URG                  UU5        Mr     U R                   RI                  X5        GM  UR&                  S:X  Ga	  [K        [M        X+R4                  5      5      (       d   eUR6                  S   nUXx   ;   d   SU S35       eXx   U   nUU   nU R                   R;                  U5      nUR<                  nUR0                  n[C        5       nUUl        UUl        UU   nU R                   RE                  UU5        [3        5       nUUl        UR4                   H  nURA                  UU5        M     URG                  UU5        GOUR&                  S:X  a  UR4                  S   nUU   nU R                   R;                  U5      nUR<                  nUR0                  n [C        5       nUUl        U Ul        UR6                  S   n!UU!   nU R                   RE                  UU5        [3        5       nU Ul        URA                  UU5        URG                  U!U5        O,UR&                  S;   a  GM  [O        SUR&                   S35      eU R                   RI                  X5        GM     g)z
NOTE:
    [HighOrderGrad] Complete the annotation of vars and ops only for high order gradient.
    This function is temporary to support high order gradient, and will be removed in the future.
Nc                     SU ;   a  ggN@GRADTFr>   r   s    r-   _is_grad_var_nameICompleter._complete_high_order_grad_annotation.<locals>._is_grad_var_name      $r/   c                 Z    U  H%  nUR                   R                  5       U:X  d  M#  Us  $    g rT   r7   original_idrw  r  r+   s      r-   _get_op_by_idECompleter._complete_high_order_grad_annotation.<locals>._get_op_by_id
  +    77&&(B.I  r/   r	   r   op_roler   r  [] 's dims mapping is NONEsumsum op's output '' has no corresponding varfill_any_like)ra   r  got unexpected op [r1   )(r   rj  rk  rK   r  rw  r  dist_op_contextgrad_var_to_varrI   r`   intr(   r   op_proto_and_checker_makerr   ForwardBackwardLossr   r7   r  grad_op_id_to_op_idget_op_dist_attr_for_programrc   r   r  r  r    get_tensor_dist_attr_for_programrO   r   r   r    set_tensor_dist_attr_for_programr   set_op_dist_attr_for_programallmapr  )"r   rj  r  r  rw  r  r  r  appended_grad_timesr   r+   grad_op
forward_opfwd_op_dist_attrfwd_op_process_meshgrad_op_dist_attrr  fwd_nameref_dims_mapping	input_varoutput_name
output_varr   ref_fwd_var_nameref_fwd_varref_fwd_dist_attrref_fwd_dims_mappingref_fwd_process_meshr  ref_var_nameref_varref_dist_attrref_process_meshoutput_var_names"                                     r-   rr  .Completer._complete_high_order_grad_annotation  s    &"&"4"4"H"H6I3	
	 &33599:"//166,,<<)99!#CH%CB2779%&#//66>>+  2779%&#//66??+ c'l''	23s//66>>8  $q(#2779%&#D33::CCDd55<<AABC+  ww/111 hG((*"667 +#77002
 "--- &&CCJO ! '7&C&C#$4$6!1D.")"9"9J"**D*DD&j.I.II%)MM'6'K *(H !1 H H$,!" - )-Z(8I/3/A/A/b/b )0*l - &)C)CC 0 G G$.!" - !1 H H$.!" -
 ,7 J<'@A7 &<<"$4G #:N $+#;#;K&/*NNNN.CKPH'7'N'N ($ "&k!2J'5'7$4D$14G$1&&GG"$4 &==#%5 $<& ""?? <<5(s#46M6MNOOOO")":":1"=K#'KK ,K=8RSK (7'K#($ #''7"8K**KK' &
 ,=+I+I(+<+I+I('5'7$4H$14H$1!%k!2J&&GG"$4 )9(:%5I%2$+$;$;)@@$&: %< &==#%9 \\_4#*#:#:1#=L"<0G**KK# "
 (5'A'A$'4'A'A$'5'7$4D$14D$1&-&>&>q&AO!%o!6J&&GG"$4 )9(:%5E%2%<<$&6 &==')9 \\%?? %':7<<.%JKK""??k &r/   c           	      R  ^ ^-^. Uc  T R                   R                  nOUT R                   l        S m-U-4S jnS nU.U 4S jnSnSn[        UR	                  5       R
                  5       H=  u  px[        U5      (       a  Un[        U5      (       d  M)  UR                  S:X  d   eUn  O   US:  a  Uc   S	5       e[        UR	                  5       R
                  5      n	UR	                  5       R                  n
T R                   R                  nUR                  [        UR                  5         m.[        U[        U	5      5       GH  nX   nXv:X  Ga  UR                  S:X  d   e[        UR                  5      S:X  d   S
[        UR                  5       S35       e[        UR                   5      S:X  d   S
[        UR                   5       S35       eXR                   S      nXR                   S      nUR"                  S-   UR"                  :X  d   eT R                   R%                  U5      nT R                   R'                  U5      n[)        T R                   UUR*                  R,                  UR*                  R.                  UR*                  R0                  S9  [3        UUR*                  R.                  UR*                  R5                  UR"                  5      T R                   UR*                  R0                  S9  GM  UR6                  R9                  5       UR:                  ;   Ga-  U" U	SU UR:                  UR6                  R9                  5          5      nUc   eUR=                  S5      (       a  UR=                  S5      (       a  U" UX5        UR?                  S5      R@                  nUR?                  S5      R@                  nURB                  U   nURB                  U   nUR
                   HO  nU" UR
                  UR:                  UR6                  R9                  5          5      nU" UUUR                  5        MQ     GM  U" UX5        GM  UR                  S;   Ga  [E        [G        T-UR                  5      5      (       d   eUR                   S   nUT.;   d   SU S35       eT.U   nU
U   nT R                   RI                  U5      nUR,                  nUR.                  nUR0                  nU
U   n[)        T R                   UUUUS9  [K        5       n UR                   H  n!U RM                  U!U5        M     U RO                  UU5        Sn"Sn#[        US-
  US-   S5       H  n$U	U$   n%[        [Q        U%R                   5      [Q        UR                  5      -  5      n&[        U&5      S:  d  MM  T R                   RS                  U%5      n'U'R0                  n"U'R.                  n#  O   U"b  U#c   eU#U l        U"U l        T R                   RU                  UU 5        GM  UR                  S:X  a  UR                  S   n(U
U(   n)T R                   RI                  U)5      n*U*R,                  n+U*R.                  n#U*R0                  n"UR                   S   n,U
U,   n[)        T R                   UU+U#U"S9  [K        5       n U#U l        U"U l        U RM                  U(U+5        U RO                  U,U+5        T R                   RU                  UU 5        GM  [W        SUR                   S35      e   g)zSComplete the annotation of vars and ops in the backward phase for parallel program.Nc                     SU ;   a  ggr  r>   r  s    r-   r  ACompleter.complete_backward_annotation.<locals>._is_grad_var_name  r  r/   c                 \   > T" U 5      (       d   SU  S35       eU S U R                  S5       $ )Nr   z] is not a grad var name.r  )find)grad_var_namer  s    r-   &_get_forward_varname_from_grad_varnameVCompleter.complete_backward_annotation.<locals>._get_forward_varname_from_grad_varname  sC    $]33 M?";<3 !!>=#5#5g#>??r/   c                 Z    U  H%  nUR                   R                  5       U:X  d  M#  Us  $    g rT   r  r  s      r-   r  =Completer.complete_backward_annotation.<locals>._get_op_by_id  r  r/   c           	      
  > TR                   R                  U 5      n[        5       nUR                  nUR                  nUR
                  S:X  a  U R
                  S:X  a  U R                  S5      S   nUR                  U5      nX!R                  R                  S5      S      n	[        TR                   U	UUUS9  UR                   H  n
UR                  X5        M     UR                  U	R                  U5        GOUR                   H  n
XR                  ;  aV  XR                  ;  aG  U
T;   a  TU
   nUR!                  U5      nO\X*   nTR                   R#                  U5      R$                  nO2XR                  ;   a  UR                  U
5      nOUR!                  U
5      nUc   SU
 S35       eUR                  X5        M     UR                   H  nUS	:X  a\  X-   n	['        [)        U	R*                  5      5       Vs/ s H  nS
PM     nn[        TR                   U	UUUS9  UR                  X5        Me  UT;   d   eTU   nUR                  U5      nX-   n	[        TR                   U	UUUS9  UR                  X5        M     XTl        Xdl        UR,                  Ul        UR.                  Ul        UR                  Ul        S nUR
                  [0        ;   a	  U" X!U5        TR                   R3                  X5        g s  snf )Nrz   rw   rV  r   rX  r  r   r  z@EMPTY@r=   c           	         / nS n/ nUR                   S:X  a  [        UR                  S5      5      S:  a  UR                  S5      S   nUR	                  UR                  S5      5        [        X   R
                  5      n[        XR                  S5      S      R
                  5      nUS::  a  US:  a  [        [        US-
  5      5      nGOXg:  a  [        [        Xg-
  5      5      nGOUR                   S:X  a  UR                  S5      S   nUR	                  UR                  S5      5        UR                  S5      S   n[        [        [        X   R
                  5      [        X   R
                  5      -
  5      5      nGOUR                   S	:X  a  UR                  S5      S   nUR	                  UR                  S
5      5        UR	                  UR                  S5      5        [        UR                  S5      5      n	[        [        U	5      5      nOUR                   S:X  aa  UR                  S5      S   nUR	                  UR                  S5      5        [        [        [        X   R
                  5      S-
  5      5      nO[        SU 35      e[        U5      S:  aU  UR                  U5      n
U H=  nX   S:w  d  M  X   nU H&  nUR                  U5      nUR                  U/5        M(     M?     g g )Nmatmul_v2_gradzY@GRADr   zOut@GRADr	   r   elementwise_add_gradYlayer_norm_gradz	Bias@GRADz
Scale@GRADbegin_norm_axislookup_table_v2_gradzW@GRADz$Backward Partial is not adapted for r=   )r   rI   r,  r*  r+  ra   rK   r`   r	  r(   NotImplementedErrorr   get_output_dist_attr_set_partial_dims)r  r  r  param_gradsactivation_gradbroadcast_axis_indiesact_ndim
param_ndim	param_varr<  activation_grad_dims_mappingaxispartial_dimp_grad_namep_grad_dist_attrs                  r-    infer_backward_op_partial_status{Completer.complete_backward_annotation.<locals>._complete_grad_op_with_forward_op.<locals>.infer_backward_op_partial_status]  s   
 !"&(*%LL$44GNN8459&-mmJ&?&BO&&w~~h'?@"4#8#>#>?H!$T..*B1*E%F%L%L!MJ "Q#a<48x!|9L4M1!.04!("781- \\%;;&-mmJ&?&BO&&w~~h'?@ 'c 21 5I,0 5 ; ;<!$/"7"789-) \\%66&-mmH&=a&@O&&w~~k'BC&&w~~l'CD&)',,7H*I&JO,01G,H)\\%;;&-mmJ&?&BO&&w~~h'?@,0c$"7"="=>BC-) .>wiH  {#a')@@+ 1
 !67=C*F*LK/:$5$J$J(3%& !1
 !1 B B%0M!" 0; !6 (r/   )r   r  r   rc   r  r   r*  r   r7   r,  r   r  r   r   r   r  r   r  rO   r`   rI   ra   r   r   r   r  )r  r  r  r  r  r(  ref_chunk_idsplit_input_var_namer  r  r  r  r  r  r  rL  r  r   s                   r-   !_complete_grad_op_with_forward_opQCompleter.complete_backward_annotation.<locals>._complete_grad_op_with_forward_op  sT   #11NN  !1 2/<<+44L||x'JOOw,F'1'7'7'<Q'?$#3#J#J($  ",,"5"5e"<Q"?@
!&&$$) #*"9"9J%<<" #: "99OO%5
 #*"9"9J"*D*DD&.I.II%8'6z'BH 0 H H$,!" - )-(8I/3/A/A/b/b )0*l - &)C)CC 0 G G$.!" - !1 H H$.!" -
 ,7 J<'@A7 &<<"C #:L $+#;#;K"i/%)%6
(-c*2B2B.C(D,(D1B(D ) , * ..&,,%1 *AA' !&/999.{;H'7'N'N ($ "&!2J%**"((!- &==#C $<J .>*)5&*:*D*D')9)B)B&)9)B)B&H"T ||<<0#4 ;;q,s   ?L r=   r  r   z,No backward procedure found in this program.z7first backward op should has only ONE output, but got [r1   r   r  r6  r"  )r  grad_addr  r  r  r  ),r   rj  rk  r  r  rw  r   r   r   rK   r  r  r  rI   r`   r  r  r   r  r  r   r   rO   rc   r  r   r   r7   r  r  r'   r(   r  r  r  r  r  r   r   r   rU   r  r  r  )/r   rj  r1  r  rP  loss_opfirst_backward_op_idxr   r+   rw  r  r  r  loss_varloss_grad_vardist_loss_vardist_loss_opr  grad_sub_block_idforward_sub_block_idgrad_sub_blockforward_sub_blocksub_grad_opsub_forward_opr  r   r!  r"  r#  r$  ref_fwd_chunk_idr  r  r  rN  r(  pre_idxpre_grad_opinter_arg_namepre_op_dist_attrr%  r&  r'  r  r)  r  r  s/   `                                            @@r-   complete_backward_annotation&Completer.complete_backward_annotation  sl    &"&"4"4"H"H6ID3	
	@	C	J  " !4!A!A!C!G!GHGC"~~r""ww/111(+% I %)g.A 	
:	
A &33599:"//166,,<<)99//0
 .C9ChG+||6667223q8 McRYRiRiNjMkklm8 73349 McRYRjRjNkMllmn9   8 8 ;< $%=%=a%@ A}}w.-2D2DDDD $ 2 2 N N!  $11II  "&&!!++88!++88*44== G **77 **BB  &&)33<< 
 ((*"667 +../#77002
 "---##K00Z5H5H6 6 6j'P(/[(A(D(D%+5??;+G+J+J(%8%?%?)&N )<(B(B,)% (6'9'9)6-11+?? + 0 0 < < >* :*K9L9L (: 6j'P <<#66s#4g6M6MNOOOO")":":1"=K&/9 +K=8RS9 (7{'C$"&'7"8K**KK' &
 ,=+I+I(+<+I+I('8'A'A$ "&k!2J%**",,!1 )9(:%$+$;$;)@@$&: %< &==#%9 $(L'+$#(a!6!:B$ '*'l)- < <=!'"9"9:;* ~.2 $ 2 2 O O$/!" -
 ,<+D+DL/?/L/L,!!$$ %0,89 6F%21=%.&&CC!2 \\_4#*#:#:1#=L"<0G**KK# "
 (5'A'A$'4'A'A$#0#9#9L&-&>&>q&AO!%o!6J%**"((!- )9(:%5E%21=%.%<<$&6 &==')9 &&CC!2 %':7<<.%JKKs :r/   c           	         SSK Jn  U" 5       R                  n[        UR	                  5       R
                  5      nUR	                  5       R                  nSn[        [        U5      5       GH  nXG   n[        UR                  S5      5      [        [        R                  5      :X  d  M?  [        U5      (       Gap  UR                  [        ;   a  [!        5       n	[#        U5      U	l        UR&                   H=  n
XZ   nU R(                  R+                  U5      nU	R-                  XR.                  5        M?     UR0                   Hy  nX]   n[3        5       n[#        U5      Ul        UR4                   Vs/ s H  nSPM     snUl        U R(                  R7                  X5        U	R9                  XR.                  5        M{     GOOXXR;                  S5      S      nU R(                  R+                  U5      nUc   eUR$                  nUR.                  nUR<                  nUR                  S:X  a]  XGS	-      R                  S
:X  aH  XTUS	-      R;                  S5      S      nU R(                  R+                  U5      nUc   eUR$                  nXXR?                  S5      S      nU R(                  R+                  U5      nU(       d  [3        5       nUUl        UUl        UR4                  UR4                  :X  a  UUl        OP[        UR4                  5      S	:X  a  UR4                  S   S	:X  d   eUR4                   Vs/ s H  nSPM     snUl        U R(                  R7                  X5        [!        5       n	UU	l        UR&                   H=  n
XZ   nU R(                  R+                  U5      nU	R-                  XR.                  5        M?     UR0                   H=  nX]   nU R(                  R+                  U5      nU	R9                  XR.                  5        M?     U R(                  RA                  X5        SURB                  ;   d  GM  SXG   RB                  ;   d  GM  [        UR;                  S5      5      S	:X  d   S5       e[        UR;                  S5      5      S	:X  d   S5       eXXR;                  S5      S      nXXR;                  S5      S      nU R(                  R+                  U5      nUc   eU R(                  R+                  U5      R$                  nUc   eU R(                  R+                  U5      R.                  nUc   e[!        5       n	UU	l        U	R-                  URD                  U5        U	R-                  URD                  U5        U	R9                  URD                  U5        XXR;                  S5      S      nU	R-                  URD                  UR4                   Vs/ s H  nSPM     sn5        U	R9                  URD                  UR4                   Vs/ s H  nSPM     sn5        U(       dZ  Sn[3        5       n[#        U5      Ul        UR4                   Vs/ s H  nSPM     snUl        U R(                  R7                  UU5        URF                  RC                  5        GH~  nUS;   a  M  [        URF                  R;                  U5      5      S:X  a  M6  [        URF                  R;                  U5      5      S	:X  d   eXXRF                  R;                  U5      S      n[3        5       nSU;   d  SU;   d  SU;   aw  S/Ul        U	R-                  URD                  UR4                   Vs/ s H  nSPM     sn5        U	R9                  URD                  UR4                   Vs/ s H  nSPM     sn5        O?UUl        U	R-                  URD                  U5        U	R9                  URD                  U5        SU;  d  GM[  UUl        U R(                  R7                  UU5        GM     U R(                  RA                  X5        GM     gs  snf s  snf s  snf s  snf s  snf s  snf s  snf )zQComplete the annotation of vars and ops in the update phase for parallel program.r   r   Fr  r=   rV  Ncastr   rn   rX  GradParamzOnly support one-to-one now.LearningRateT)ri  rh  rj  Beta1TensorBeta2TensorEpsilonTensorBeta1PowBeta2Pow
SkipUpdate)$5paddle.distributed.auto_parallel.static.process_groupr   r  rK   r  rw  r  r`   rI   r	  r(   r   Optimizer   r   r   r   r
   rc   r  r   r  r   rO   r  r   ra   r  r   r*  r  r,  r  input_namesr   r7   )r   rj  r   world_ranksrw  r  learning_rate_completedr   r+   r   in_namein_varin_dist_attrout_nameout_varout_dist_attrr  r(  r  rN  r&  r'  paramgrad_varparam_dist_attrlearning_varvar_dist_attrr  r  input_var_attrs                                 r-   complete_update_annotation$Completer.complete_update_annotation  s   	
 ./55 &33599:"//166"'S?C B2779%&#foo*>>&r**ww"66'7'94?4L1')'9'9G%)]F+/+=+=+^+^ &,L )?? ')B)B (: )+(;(;H&*nG,:,<M9D +:M6 -4MM:,9qM:M6 !..OO ' )@@ (*D*D )<" "&hhsmA&6!7 ..OO & %
  ,777+7+D+D(+7+D+D('3'<'< GGv- #!G 1 15F F&*sQw<+=+=c+B1+E&FG,0,>,>,_,_ '-M $1#<<#</</I/I, #'yy'7':"; ..OO ' &
  -,:,<M9IM65AM2&}}<=M : %($6!$;(/a(8A(=!"%> 18>"0=1B>" : !..OO '
 (8'94D1')'9'9G%)]F+/+=+=+^+^ &,L )?? ')B)B (: )+(;(;H&*nG,0,>,>,_,_ '-M )@@ (*D*D )< &&CC R^^+38;O;O0Orxx01Q6 66 rxx/0A5 65 !'!21!56E#HHV$4Q$78H **KK! $
 +666**KK!&, %
 ,777**KK!&, %
 ,777#3#5L0@L- 77 '7 !77

$4 !88

$4 $((@(C#DL 77$))8J8J+K8J1B8J+K !88$))8J8J+K8J1B8J+K 326/(6(85@5M2(4(:(:6(:1B(:62 **KK(- ')gg&9&9&;
% *  %rww}}Z89Q>$"277==#<=BBB$(z)B1)E$F	)7)9 '*4)Z7+z9;=$N7(?? )Y__0M__0M )@@ )Y__0M__0M ;KN7(?? )0@ )@@ )0@ (z9:JN7 ..OO )>O '<V &&CC g #2:f>"P ,L ,L6> 1N 1Ns*   1a6a
aaa)a a#c                 6   Uc  U R                   R                  nOXR                   l        SU R                   l        U R                   R	                  5         U R                  5         U R                   R                  5         U R                   R                  5         g)z
fill default data parallel annotation for program with primitive operators.

Arguments:
    serial_main_program: partial annotated serial_main_program.
Returns:
    serial_main_program: completed annotated serial_main_program.
NT)r   rj  rk  _is_initialized_init_dist_attr_for_program_init_global_mesh_for_programrt  ru  )r   rj  s     r-   complete_prim_annotation"Completer.complete_prim_annotationg	  sx     &"&"4"4"H"H6I3-1*668**,66899;r/   c                    SSK Jn  U" 5       R                  nU R                  R                  R
                   GHJ  nUR                  R                  5        H=  nU R                  R                  U5      nUc   e[        U5      UR                  l        M?     UR                   H  nU R                  R                  U5      nUc   e[        U5      UR                  l        [        USS9nUc  ML  [        R                   " UR                  5      n	U Hi  n
U
R#                  U5      nU
R%                  U5      (       a9  U
R&                  UR                  l        U
R*                  UR                  l          M  Xl
        Mk     M     GMM     g )Nr   r   Tr   )rq  r   r  r   rk  r  r  r&  r  r
   r   rc   rw  r  r   r   r   r   r   r   r   r   r   )r   r   rt  r  r  r  r+   r   r   r   r   r   s               r-   r  'Completer._init_global_mesh_for_program|	  sX   	
 ./55''<<CCE**++-"00LL #...5@5M%%2 . ii,,DDRH***1<[1I!!. !K! !,*.--8I8I*J'(5&2&F&Fw&O'::7CC ;G:K:KG--79E9I9IG--6!0C- )6   Dr/   )r.  r   r   r   r%  )TrT   )__name__
__module____qualname____firstlineno__r   r   r   r   r   r   r  rD  rG  rR  rf  rx  rl  rq  rs  rp  r  rr  rd  r  r  r  __static_attributes__r>   r/   r-   r   r      s    #
\|un!F!HF*0X:Lx}8~L:M3^("T$#L8Ct=*l\M^L<\WrlL\eN<*(Dr/   r   )9r  r   loggingr   r  r  r3   paddle.base.corer   r   paddle.base.frameworkr   paddle.base.log_helperr   /paddle.distributed.fleet.meta_optimizers.commonr   paddle.frameworkr   rc   r
   r   dist_attributer   r   r   r   operators.commonr   r   r   process_groupr   utilsr   r   r   r   r   r   r   r   r   r  INFOr   r   r*   r   r.   r:   rG   rR   r^   re   r   r   r   r>   r/   r-   <module>r     s       	  	  + - B ! G < " 
 3
 
 
 gll H  >  D
	C0.	'QT&(I#D I#Dr/   