
    x-j                    v   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ ddlmZmZ d	d
lmZmZ d	dlmZ d	dlmZmZmZ d	dlmZ d	dl m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)  ee*ej+        d          Z,g dZ-dZ.dZ/d Z0d Z1d Z2d Z3d Z4d Z5d Z6d Z7 G d d          Z8dS )    N)contains_spmd_ruleget_phi_spmd_rule)Operator)
get_logger)OpRole)core   )ProcessMeshcompute_compatible_process_mesh   )OperatorDistAttrTensorDistAttr)_node_id)_gradient_sync_by_partial_ops*find_compatible_distributed_operator_impls(find_distributed_operator_impl_containerget_world_process_group)	__no_shape_var_type___g_gradient_clip_opsget_pp_degreeis_gradient_clip_opis_loss_grad_op
is_loss_opis_naive_data_parallel6naive_set_dist_op_attr_for_program_by_mesh_and_mappingset_var_dist_attrz&%(asctime)s-%(levelname)s: %(message)s)fmt)create_py_readercreate_double_buffer_readerwhileread Auto_Parallel_Completion_Skippedi  c                     |                      d          r|                     d          nd}|                     d|t          z              d S )Nop_namescope/)has_attrattr	_set_attr_skip_propagation_prefix)opprefixs     r/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/paddle/distributed/auto_parallel/static/completion.py$mark_as_sharding_propagation_skip_opr.   C   sI    (*N(C(CLRWW^$$$FLL*B!BCCCCC    c                    t          | t          j        j        j                  r| }n0t          | t
                    r| j        }nt          d|  d          |                    d          ot          |
                    d          v S )Nz*static mode operator is expected but got []r%   )
isinstancepaddlebase	libpaddleOpDescr   descRuntimeErrorr'   r*   r(   )r+   op_descs     r-   is_sharding_propagation_skip_opr:   H   s    "fk+233 O	B	!	! O'MMMMNNN  C
"gll>&B&B
BCr/   c                 J    | sdS d }d}| D ]} |||          \  }}|s dS |S )z?Compute the compatible dim mapping given a list of dim mapping.Nc                 B    | dk    rd|fS |dk    rd| fS | |k    rd| fS dS )NT)FN )dm1dm2s     r-   &_compute_compatible_dim_mapping_of_twozNcompute_compatible_dim_mapping.<locals>._compute_compatible_dim_mapping_of_twoY   s@    "999"999#::9{r/   r=   r>   )dim_mapping_listrA   compatible_resultmapping
compatibles        r-   compute_compatible_dim_mappingrF   T   sn     t   #  (N(Nw)
 )
%
%  	44	r/   c                     | sdS t          | d                   }| D ]}| dS t          |          |k    r dS g }t          |  D ]8}t          t          |                    }| dS |                    |           9|S )zoCompute the compatible dims mapping given a list of dims mapping.
    Each of dims mapping is also a list.
    Nr   )lenziprF   listappend)dims_mapping_listlengthdims_mappingrC   dim_mappingscompatible_dim_mappings         r-   compute_compatible_dims_mappingrQ   l   s      t"1%&&F)  44|&&44 './ 9 9!?"
 "
 ")44  !78888r/   c                    t                      }t                      }| |d S | t          | j                  }|t          |j                  }|                    |          }t          t	          |                    }|S N)setprocess_idsunionr
   rJ   )pm1pm2process_set1process_set2merged_process_setmerged_process_meshs         r-   merge_process_mesh_twor]      s{    55L55L
{s{t
3?++
3?++%++L99%d+=&>&>??r/   c                 &   | dS t          t          |                     D ]/}| |         dk     s| |         t          |j                  k    r dS 0t          t          |j                            D ]}|                     |          dk    r dS dS )NFr=   r   T)rangerH   shapecount)rN   process_meshis      r-   _validate_dims_mappingrd      s    u3|$$%%  ?R<?c,:L6M6M#M#M55 $N3|)**++  a  1$$55 %4r/   c                 X   t          j        dd          }t          |t                    r|                                }|dk    rdnd}t          |          }g d}t          j        d          }|dk    r|                    d           | j        j        }|ot          |          o||v S )NFLAGS_infer_spmd_enableTtrueF)fused_rotary_position_embedding	matmul_v2elementwise_div!fused_softmax_mask_upper_triangleelementwise_addelementwise_mulassignscaledropout
reduce_sum
layer_normlookup_table_v2reshape2
transpose2split
unsqueeze2siluconcatexpand_as_v2swiglutilefused_rms_normstrided_slicestack	gather_ndPARALLEL_CROSS_ENTROPYsoftmax_with_cross_entropy)
osgetenvr2   strlowerboolrK   	serial_optyper   )dist_openable__adapted_ops__parallel_ceop_types        r-   _can_apply_infer_spmd_ruler      s    Y0$77F&# 56))u&\\F  O6 )455Kf;<<<$GP(11Pg6PPr/   c           	      l   t          |           }t                              d| j        j         d|j         d           |                    |           }|p|}|                    | |          }t                              d| j        j         d| j        j         d| j        j	         d           |o| S )NzUpdate Op [z] using DistOpContainer [z].Op [z] use dist op impl [z] idx [)
r   _loggerdebugr   r   update_dims_mappingmapping_to_dist_operator_impl	dist_attr	impl_typeimpl_idx)r   original_op_dist_attrchangeddist_op_containerupdatedreverteds         r-   ,_update_op_dims_mapping_and_distoperatorimplr      s     AIIMMag',aaGXG]aaa    33G<<G G >>& H MM}w %}}7;L;V}}_f_p_y}}}   %H~%r/   c                       e Zd Zd ZddZddZd Zd Zd Zd Z	d	 Z
d
 Zd Zd ZddZd Zd Zd Zd ZddZddZddZd ZddZd ZdS )	Completerc                 *    |J || _         d| _        d S )NF)_dist_context_has_prepared)selfdist_contexts     r-   __init__zCompleter.__init__   s$    ''')"r/   Tc                    d}|                                 r|                                dS |                                }|                                t          v rdS | j                            |          }|J |                    d          rdS |j        }|rSg }|j        D ]}|	                                |	                                                                dk    sT|	                                                                dk    s*|	                                                                dk    r| j        
                    |          }	|	j        |j        k    r<|	                    |                                          }
|                    |
           |                    |           t          |          }t!          ||j                  sdS |||k    r	||_        d}nug }|j        D ]}|	                                |	                                                                dk    su|	                                                                dk    sK|	                                                                dk    s!t%          |	                                          r| j        
                    |          }	|	j        |j        k    r<|	                    |                                          }
|                    |
            |                    |           t          |          }t!          ||j                  sdS |||k    r	||_        d}|S )NFrN   r   r    r"   T)is_varvarr   r   r   get_tensor_dist_attr_for_graphis_annotatedrN   inputsr+   get_op_dist_attr_for_graphrb   get_output_dims_mappingnamerK   rQ   rd   outputsr:   get_input_dims_mapping)r   tensor_nodefwdr   tensor_desctensor_dist_attrtensor_dims_mappingrL   pred_op_nodeop_dist_attrop_dims_mappingcompatible_dims_mappingsucc_op_nodes                r-    _update_tensor_node_dims_mappingz*Completer._update_tensor_node_dims_mapping   s   ""$$ 	+//*;*;*C5!oo''!6665-LL
 
  +++((88 	5.; K	 " + 2 B B??$$0$))..004FFF'??,,113389 9'??,,1133v== !*EE(  ! %1+89 9 +7*N*N',,..+ + *00AAA$$%8999&E!' '# *')9)F   u'3'+>>>0G - " + 3 B B??$$0$))..004FFF'??,,113389 9'??,,1133v==:<??;L;LMM > !*EE(  ! %1+89 9 +7*M*M',,..+ + *00AAA$$%8999&E!' '# *')9)F   u'3'+>>>0G -r/   c                    d}|                                 }|                                r|                                 dS |                                t          v s!t	          |                                           rdS | j                            |          }|j        }t          j	        |          }|r|j
        }n|j        }|D ]}	|	                                r|	                                ,|	                                                                t          j        j        j        k    rk|	                                }
|r(|                    |
                                          }n'|                    |
                                          }|r| j                            |	          }|j        |j        k    r|j        }|r(|                    |
                                          }n'|                    |
                                          }t5          ||g          }t7          ||j                  s}|[||k    rU|r)|                    |
                                |           n(|                    |
                                |           d}t=          |          r9t>                               d|j!        j         d           tE          |||          S t>                               d|j!        j         d           tG          ||          }|d}t          j	        |          }|}|D ]i}|$                    |          }|rd}|%                    |          r0|&                                r|j        |_'        |j(        |_)        d} n
||_        |}j|r	||_        d}n	||_        d}|S )NFTr   z/] update dims mapping using New InferSPMD Rule.z1] update dims mapping using Original DistOp Rule.r   )*r+   is_opr   __skip_dims_mapping_op__r:   r   get_dist_op_for_graphr   copydeepcopyr   r   r   r   r   VarDescVarTypeREADERis_annotated_input_dims_mappingr    is_annotated_output_dims_mappingr   rb   rN   r   r   rQ   rd   set_input_dims_mappingset_output_dims_mappingr   r   r   r   r   r   r   is_auto_compatiblevalidate_dist_attrr   idxr   )r   op_noder   r   r9   r   r   r   	node_listr   r   	annotatedr   r   r   r   op_dist_implsnot_compatiblebackup_op_dist_attrbackup_changedop_dist_impldim_changeds                         r-   _update_op_node_dims_mappingz&Completer._update_op_node_dims_mapping?  s@   **,,  	WZZ\\%95 LLNN666.wzz||<< 7 5$::7CC( $l ; ;  	(III$ 2	# 2	#K%%'' ;??+<+<+D  %%''4<+?+FFF%//++K (HH$$&& 		 )II$$&& 	   "AA+NN  (,<,III&6&C# &2&I&I#((**' 'OO '3&J&J#((**' 'O +J$&9:+ +' .+\-F   +7+>> $;;',,..0G    %<<',,..0G   #G &g.. )	MM^w(-^^^   @.   MM`w(-```   GS  M (!%&*mL&A&A#!($1 1 1L"."B"B7"K"KK" '"&$77@@
1#6688
1 2>1B.0<0@-).,?)"0! $(=G%#G$9!Nr/   c                 `   d}| j         D ]\  }}| j                            |          }| j                            |          }|j        |j        k    rJ|j        }|j        }t          ||g          }t          ||j                  s dS |||k    r	||_        d}|||k    r	||_        d}|S )NFT)_node_pairs_between_graphsr   get_dist_attr_for_graphrb   rN   rQ   rd   )	r   r   parent_node
child_nodeparent_node_dist_attrchild_node_dist_attrparent_node_dims_mappingchild_node_dims_mappingr   s	            r-   #_update_dims_mapping_between_graphsz-Completer._update_dims_mapping_between_graphs  s   '+'F 	 	#K$($6$N$N% %! $(#5#M#M$ $  &2'45 5 '<'I$&:&G#&E)+BC' '# *')>)K   uu'3'+CCC5L%2'3'+BBB4K$1r/   c                    | j         j        }g d}|D ]7}|                                )|                                                                |v r@| j                             |          }|j        D ]}|                                r|                                |                                                                t          j	        j
        j        k    ri|                                }| j                             |          }|j        |j        k    r.|                    |                                          }||_        ֐9d S )N)r   r    r"   )r   _serial_ordered_op_nodesr+   r   r   r   r   r   r   r   r   r   r   rb   r   r   rN   )	r   op_nodesrelated_reader_opsr   r   r   r   r   r   s	            r-    _update_dims_mapping_for_specialz*Completer._update_dims_mapping_for_special  sf   %>
 
 

   	H 	HG

(JJLL%%''+===-EEgNNL& H H%%'' HKOO,=,=,I"((--//4<3G3NNN "-//"3"3K*II'  % %1+89 9 +7*N*N',,..+ + 9H(5#H	H 	Hr/   c                 n   d}d}|s|t           k     rd}dD ]}|r| j        j        nt          | j        j                  }|D ]}|                                r/|                                |                     ||          }|rd}|                                r/|                                | 	                    ||          }|rd}| 
                                }	|	rd}|rd}nd}|dz  }|s|t           k     |t           k    rt                              d           |                                  d S )Nr   F)TFr   Tr   znSharding Propagation reach the Max Step and is NOT Converge! The Sharding Propagation Iteration is Terminated.)_max_propagation_stepr   serial_ordered_nodesreversedr   r   r   r   r+   r   r   r   r   r   )
r   stepreach_fix_pointr   is_fwd	all_nodesnodetensor_changed
op_changedgraph_changeds
             r-   _update_dims_mappingzCompleter._update_dims_mapping  s   " 	0E)E)EG' # # KD&;;!$"4"IJJ 
 & + +D{{}} +)?)-)N)N f *O * * * +&*Gzz|| +		(=%)%F%F f &G & &
 & +&*G $ H H J J  #"G '"'"&AID; # 	0E)E)E@ (((MM A   	--/////r/   c                    | j                             |          }|                    d          sH|j        }| j                             |          }|j        }t	          ||g          }|||k    r||_        |j        dk    rd S |j        D ]}|                                r|                                x| j         	                    |          }	|	                    d          rZt          |j                  dk    rst	          |	j        |j        g          }||	j        |k    r||	_        |j        D ]}|                                rs|                                _| j         	                    |          }	|	                    d          rZt	          |	j        |j        g          }||	j        |k    r||	_        d S )Nrb   r!   r   )r   r   r   rb   r   r   r   r   r   r   rH   r   )
r   r   nearest_op_noder   rb   nearest_op_dis_attrnearest_process_meshcompatible_process_meshr   r   s
             r-   _update_process_mesh_by_nearestz)Completer._update_process_mesh_by_nearest(  s)   )AA'JJ((88 	D'4L"&"4"L"L# # $7#C &E34' '# (3 $;;;,C)7**F"> 	L 	LK!!## L(9(9(E&EE#  !
 $00@@ {)**a//*I%2L4MN+ +' ,7(59PPP4K$1"? 	L 	LK!!## L(9(9(E&EE#  !
 $00@@ *I%2L4MN+ +' ,7(59PPP4K$1!	L 	Lr/   c                 0   d }d }d }d }| j                                         D ]\  }}|                                                    d          }| j        j                            |          }t          |                                          }	| j        	                    |          }
|
j
        }|j        }|	D ]}|                                r|                                (|                                rC|                                /| j                            |          }t!          ||j                  }||_         ||           g }|                                                    d          d         }d }|j        D ]m}|                                rW|                                C|                                                                |k    r|}|                    |            nn|                     ||                     d }t-          |	          D ]p}|                                rZ|                                F|                                                                |k    rt/          |j                  dk    r|} nq|                     ||                     |                                                    d          d         }d }|j        D ]V}|                                r@|                                ,|                                                                |k    r|}W|                    |           |D ].}| j                            |          }||_         ||           /|j        }|                                D ]\  }} || j        j        ||          }| j                            |          }|j        |_        |j        D ]R}|                                                                |k    r&| j                            |          }|j        |_        S|j        }|                                D ]\  }} || j        j        ||          }| || j        j        ||          }| j                            |          }|j        |_        |j        D ]R}|                                                                |k    r&| j                            |          }|j        |_        S| j                                        D ]i}d }|D ]1}| j                            |          }t!          ||j                  }2|D ].}| j                            |          }||_         ||           /jd S )	Nc                     t          | d |                   D ]X}|                                rB|                                .|                                                                |k    r|c S Yd S rS   )r   r   r   r   nodesr   var_namer   s       r-    _find_nearest_tensor_node_beforezUCompleter._update_process_mesh_for_specials.<locals>._find_nearest_tensor_node_beforee  sp     tt--    KKMM 

.

))X55KKK   r/   c                     | |dz   d          D ]X}|                                 rB|                                .|                                                                |k    r|c S Yd S )Nr   )r   r   r   r   s       r-   _find_nearest_tensor_node_afterzTCompleter._update_process_mesh_for_specials.<locals>._find_nearest_tensor_node_aftern  sm    cAgii(    KKMM 

.

))X55KKK   r/   c                    g }t                      }g }|                    |            t          |          dk    rC|d         }|dd          }t          |          |v r8|j        |j        z   }|D ]}|                                r|                                |                                                                t          j
        j        j        k    rat          |                                                                          dk    r*|                    |           |                    |           |                                r|                                d}|                                                                dk    sT|                                                                dk    s*|                                                                dk    rd}|j        D ]}|                                r||                                h|                                                                t           v s7t          |                                                                          dk    rd} n|j        D ]}|                                r||                                h|                                                                t           v s7t          |                                                                          dk    rd} n|r*|                    |           |                    |           |                    t          |                     t          |          dk    C|S )Nr   r   Tr   r    r"   F)rT   rK   rH   r   r   r   r   r   r   r   r   r   r   r`   r   r+   r   add)	source_noderelated_nodesvisitedfrontiercur	neighborsr   flagr   s	            r-   _find_nodes_related_to_condzPCompleter._update_process_mesh_for_specials.<locals>._find_nodes_related_to_condw  s   MeeGHOOK(((h--1$$qk#ABB<C==G++J4	% *7 *7D{{}} 7)? HHJJOO--1E1LLL #DHHJJ$4$4$6$6 7 71 < <$OOD111)00666zz|| "7		(=# GGIINN,,0BBB#wwyy~~//3PPP#wwyy~~//699#(D+/; * *K + 2 2 4 4
*$/OO$5$5$A %0OO$5$5$:$:$<$<'<%= %='*;??+<+<+B+B+D+D'E'E'J'J+0D$)E+/< * *K + 2 2 4 4
*$/OO$5$5$A %0OO$5$5$:$:$<$<'<%= %='*;??+<+<+B+B+D+D'E'E'J'J+0D$)E 7$OOD111)00666HSMM***e h--1$$f ! r/   c                 2   t          | t                    r$t          | j                  D ]\  }}d| j        |<   t          | t                    r| j                                        D ]I}g }|                     |          }|D ]}|                    d           | 	                    ||           J| j
                                        D ]K}g }|                     |          }|D ]}|                    d           |                     ||           Jd S d S )Nr=   )r2   r   	enumeraterN   r   inputs_dist_attrskeysr   rK   r   outputs_dist_attrsr   r   )r   rc   _arg_namenew_dims_mappingrN   s         r-   _make_dims_mapping_replicatezQCompleter._update_process_mesh_for_specials.<locals>._make_dims_mapping_replicate  sb   )^44 3%i&<== 3 3DAq02I*1--)%566  ) ; @ @ B B Q QH')$#,#C#CH#M#ML) 4 4(//333344X?OPPPP ) < A A C C  H')$#,#D#DX#N#NL) 4 4(//333355 "2     r/   	sub_block	Conditionr   
StepScopes)_while_op_nodesvaluesr+   _block_attr_idr   serial_graphget_sub_graphrJ   r   r   r   rb   r   r   r   r   r]   inputr   r   rK   extendr   rH   r   outputr  itemsr   r  _array_nodes)r   r   r  r  r  while_op_nodewhile_op_node_idxsub_graph_id	sub_graphsub_graph_nodeswhile_dist_opwhile_op_dist_attrr\   r   r   cond_tensor_related_nodescond_tensor_namecond_tensor_nodestepscopes_tensor_namestepscopes_tensor_nodeoutput_noder   while_op_inputs_dist_attrstensor_namenearest_tensor_nodenearest_tensor_dist_attrnode_dist_attrwhile_op_outputs_dist_attrsarray_node_list
array_nodes                                 r-   !_update_process_mesh_for_specialsz+Completer._update_process_mesh_for_specialsd  s   	  	  	 	  	  	 9	! 9	! 9	!v	 	 	* 150D0K0K0M0M I	 I	,M,(++--<<[IIL*7EE I #9#6#6#8#899O .DD M "/!8 #5"A'  KKMM dhhjj&<JJLL '=%)WWYY%: $ 2 J J4 P PI*@+Y-C+ +' /B+(();<<< )+%,//1177DDQG#%,  KKMM

.

))-==='+$-445EFFFE%,,++,<==  
  $ 11  KKMM

.

))-===DL))Q..'+$E%,,++,<==   &3%5%5%7%7%>%>|%L%LQ%O"%)",4 9 9&&((9#))5#))..004JJJ-8*%,,-CDDD1 ? ?#'#5#M#M$ $  1D -,,-=>>>> *<)M& ,1133   &F&F&;%' '# &>>+  ) -9 !- *0  Dxxzz((K77 .FFtLL ' 5A '3 +=*O' -2244   &F&F&;%' '#
 '.*I*I*?)#+ +' &>>+  ) -9 !- *1  Dxxzz((K77 .FFtLL ' 5A '3/B  $07799 	8 	8O"&-  
 .FF 	 '=')?' '## . 8 8
 .FF 	 *=	&,,Y77778	8 	8r/   c                 &   | j         D ]\  }}| j                            |          }| j                            |          }|j        |_        t	          |j        |j        g          }||j        |k    r||_        ||j        |k    r||_        d S rS   )r   r   r   rb   r   )r   r   r   r   r   r   s         r-   #_update_process_mesh_between_graphsz-Completer._update_process_mesh_between_graphsc  s    '+'F 	L 	L#K$($6$N$N% %! $(#5#M#M$ $  %1 ". 'F)6(5' '# (3)6*+ + 6M%2'3(59PPP4K$15	L 	Lr/   c                    | j         j        }| j         j        }|D ]}| j                             |          }|                    d          s2d }|D ]_}|                                                                dk    r-|j        D ]&}t          |          t          |          k    r|} n'| n`|| j         	                    |          }|D|                    d          s/t          |j        |j        g          }	|	|j        |	k    r|	|_        d}
t          |          D ]D\  }}| j         	                    |          }|j        |
dk    r|}
|                     ||           E|
dz   t          |          k    rd S t          ||
dz   d                    D ]k\  }}|
|z   dz   }||dz
           }| j         	                    |          }| j         	                    |          }|j        J |                     ||           l||
         }|d |
         D ]}|                     ||           |                                  |                                  d S )Nrb   r!   r=   r   )r   r   _serial_ordered_tensor_nodesr   r   r+   r   r   r   r   r   rb   r  r   rH   r7  r9  )r   ordered_op_nodesordered_tensor_nodesr   r   first_op_noder   input_tensor_noder   r   %idx_of_first_op_node_has_process_meshr   original_idxr   nearest_op_dist_attrs                  r-   _update_process_meshzCompleter._update_process_mesh  s   -F  $1N/ !	H !	HK"AA+NN  $00@@  M+ 
 
 ::<<$$&&'11)0  %,,9J0K0KKK(/ L !,E -$-EE L '0I0I1 1' +J%2L4MN+ +' ,7$15LLL0GL- 13-%&677 	G 	GLC-EEgNNL)59R??8;544WgFFF014s;K7L7LLL4%BQFHHI
 
 
	K 
	KLC A3FJL.|a/?@O#'#5#M#M$ $   -EEgNNL'4@@@00/JJJJ*1
 ((N)N(NO 	K 	KG00/JJJJ 	..000 	0022222r/   c                 $   | j         rd S i | _        i | _        g | _        | j        j        }t          |          D ]\  }}|                                r|                                	                                dk    r||f| j        t          |          <   |                                	                                dk    r|                                                    d          d         }| j                            |d           
g | j        |<   | j        |                             |           | j        |                             |j        d                    |                                	                                dk    r|                                                    d          d         }| j                            |d           
g | j        |<   | j        |                             |           | j        |                             |j        d                    |                                r|                                |j                                        dk    r| j        j        |j                                        dz
                               |                                                                d           }|6t/          |d 	          }|D ]!\  }}| j                            ||f           "d
| _         d S )Nr!   read_from_arrayXr   write_to_arrayOutr   c                     | d         S Nr   r>   )xs    r-   <lambda>z$Completer._prepare.<locals>.<lambda>  s
    ! r/   )keyT)r   r  r!  r   r   r   r  r   r+   r   r   r  getrK   r   r  r   r   r   r   graph_id_tensor_nodes_with_same_namer   sorted)	r   r   r   r   array_var_nameparent_nodessorted_parent_nodesr  r   s	            r-   _preparezCompleter._prepare  s    	F!*,'&;	"9-- 	 	ICzz|| N7799>>##w..<@#;D($87799>>##'888%)WWYY__S%9%9!%<N(,,^TBBJ<>).9%n5<<TBBB%n5<<T[^LLL7799>>##'777%)WWYY%5%5e%<%<Q%?N(,,^TBBJ<>).9%n5<<TBBB%n5<<T\!_MMM{{}} !79%%''1,,*G I..0014#dhhjjoo//66 !
 $/.4(nn/ / /+ /B  NA{ ;BB!,d 3    "r/   Nc                    || j         j        }n|| j         _        |                     |          \  }}t	          | j                   sq| j                             d           |                                  |                                  |                                  | j         	                                 nIt                              d           | j                             d           |                                  |                     |||           |                     |           |                     |           | j                                          | j                                          |S )a  Complete annotation for the partial annotated serial_main_program.
        Arguments:
            serial_main_program: partial annotated serial_main_program.
        Returns:
            serial_main_program: completed annotated serial_main_program.
        NT)
with_graphz+Default distributed attributed will be set.F)r   serial_main_program_serial_main_program*_get_tensor_names_and_ops_with_global_meshr   
initializerU  rC  r   $copy_dist_attr_from_graph_to_programr   info_update_dist_attr_for_dp_complete_with_global_mesh$_complete_high_order_grad_annotation_complete_chunk_idamend_dist_attr_for_programvalidate_dist_attr_for_program)r   rX  tensor_namesopss       r-   complete_forward_annotationz%Completer.complete_forward_annotation  sk    &"&"4"H6ID3 KK
 
c &d&899 	,))T):::MMOOO%%'''%%'''CCEEEELLFGGG))U);;;))+++''(;\3OOO112EFFF 34446688899;;;""r/   c                 |   | j         j        r| j         j        j        j        sg g fS t          j        j                                        }|t          	                    d           g g fS |j
        }g }|                                }|j                                        D ]i}| j                             |          }|j        j        }|?t#          |j                  t#          |          k    r|                    |j                   jt+          |          dk    rg g fS i }	|j        }
|
D ]}|j        }|D ]}||	|<   g }t1                      }t3          j                    }|D ]}|                    |           |                                 |                                s|                                }||v r-|                    |           |                    |           |	|         }|                    |           |j         }|D ]}|                    |           |                                ||fS )Nzglobal_mesh is not set, tensor annotation with global mesh may be not work, please use paddle.distributed.auto_parallel.set_mesh(mesh) firstly.r   )!r   strategypipeliner   r3   distributedauto_parallelget_meshr   warning_process_idsglobal_blockvarsr  get_dist_tensor_for_programr   rb   rQ  rU   rK   r   rH   re  output_arg_namesrT   queueQueueputclearemptyrN  r  input_arg_names)r   rX  global_meshglobal_mesh_process_idstensor_names_with_global_meshblockr   dist_varmeshtensor_name_to_opre  r+   output_tensor_namesr0  ops_with_global_meshhas_visitedtensor_name_queuerx  
input_names                      r-   rZ  z4Completer._get_tensor_names_and_ops_with_global_mesh  s   "+	%.7>	 r6M (6??AAOO b   r6M"-":(*%#0022:$$&& 	? 	?C)EEcJJH%2DF4+;$<$<'A A % % .44SX>>> ,--22r6M i 	4 	4B"$"52 4 413!+..4  "ee!KMM8 	/ 	/K!!+....%++---#))++ 	2+//11Kk))OOK((()00===";/B ''+++ 0O- 2 2
!%%j1111 $))++ 	2 -.BBBr/   c                    t          |          dk    rd S |                                }|                    |d                   }| j                            |          }|j        j        }|D ](}| j                            |          }	||	j        _        )|D ]=}
|                    |
          }| j                            |          }||j        _        >d S rJ  )rH   ro  _var_recursiver   rq  r   rb   get_dist_op_for_program)r   rX  rd  re  r|  tensordist_tensorry  r+   r   r0  s              r-   r_  z$Completer._complete_with_global_meshY  s     |!!F#0022%%l1o66(DDVLL!+8  	9 	9B(@@DDG-8G**' 	= 	=K))+66F,HHPPK1<K!..	= 	=r/   c                    3  fd} fd} j         j        r j         j        j        j        sd S t	           j                   \  3} j         j        j        j        } j         j        j        j        } j         j        j        j        }3dk     r|dk    rt          d          |dk    r|r|dvrt          d          |dk     rd S |	                                }|j
        }	t          j                    }
t          j                    }t          j        |t          j                  }d}t!          |	          D ]%\  }}|                    |j                  }|r|} n&t'          |	          }|dz
  }t)          t+          |                    D ](}|                    |	|         j                  }|r|} n)t+          ||dz             D ]}|	|         j        }|                    |          }|s|	|         j        d	k    rad
|	|         j        d         v rLt+          |dz   |          D ]&}|                    |	|         j                  }|r n'|sJ |	|         j        }nt          d|	|          d          ||                    d          d                              d          d         } j                             |	|                   }||
vr|g|
|<   |j        j        ||<   #|
|         d         dz   |k    s
J d            ||         }||j        j        k    s
J d            |
|                             |g           3|z  }t'          |
          |z  dk    s!J d| dt'          |
           d| d            d}dg}|                                D ]B}||v sJ |                    |          }|d         |k    rd} n|                     |           C|stB          "                    d           ntB          "                    d           g }tG          t+          3                    }t+          |          D ]1}|                    |           |dk    r|$                                 23fdt+          |          D             }t'          |
          |z  }g } dg|dz   z  }!d\  }"}#g }|
%                                D ]c\  }$}%|                     |$           |"dz  }"|"|k    r,|%d         dz   |!|#<   d|#dz   }#}"|                      |           g }t'          |	          |!|<   di }&i }'t+          t'          |!          dz
            D ]}(|!|(         })|!|(dz            }*||(         }||(         }+||         },| |(         }-g }.|-D ]}$|.                    |
|$                    tB          "                    d| d|+ d|- d           tB          "                    d |	|)         j         d!|	|)         j&         d"|	|)         j         d           tB          "                    d#|	|*dz
           j         d!|	|*dz
           j&         d"|	|*dz
           j         d           t+          |)|*          D ]}/|	|/         }|'                    d$          rv|(                    d$          j)        }0|j*        |0         }1|r|/|.v r ||||,|'            ||||+|&           |1j
        D ]$}2|r|/|.v r ||1|2|,|'            ||1|2|+|&           %|r|/|.v r ||||,|'            ||||+|&           |dk    rM|dz  dk    rFt+          ||          D ]7} |||	|         |dz
  |&            |||	|         |d         |'           4d S d S d S )%Nc                 H   j                             |          }||j        _        |j        |j        z   D ]j}d|v r||vr_|                     |          }j                             |          }|j        j        |j        j        k    r||j        _        |||j	        <   kd S )Nlod_tensor_blocking_queue)
r   r  r   chunk_idrx  rr  _find_var_recursiverq  rb   r   )	r|  r+   r  var_to_chunk_idr   r   r   r  r   s	           r-   set_chunk_idz2Completer._complete_chunk_id.<locals>.set_chunk_ido  s    (@@DDG)1G&*R-@@ = =.$66..33D99C*FFsKK    )6&0=> > :B-64<1= =r/   c                    j                             |          }|j        D ]e}||vr_|                     |          }j                             |          }|j        j        |j        j        k    r||j        _        |||j        <   f|j        D ]K}||vrE|                     |          }j                             |          }||j        _        |||j        <   L||j        _        d S rS   )	r   r  rx  r  rq  r   rb   r   rr  )	r|  r+   rb   var_to_process_meshr   r   r   r  r   s	           r-   set_process_meshz6Completer._complete_chunk_id.<locals>.set_process_mesh  s   (@@DDG* E E22233D99C*FFsKK    )6&0=> > >J-:8D+CH5+ A A22233D99C*FFsKK   :FK)64@'1-9G***r/   r	   r   z3VPP schedule mode only can be set in pipeline mode.)VPPZBVPPzDPlease set right schedule_mode and vpp_seg_method for VPP and ZBVPP.r   rn   reshard_apizThe op z" should only be created by reshardr&   r=   z'The segment's ops should be continuous.z0The segment's ops should have same process_mesh.zThe number of layers[z] (z$) should be divided by part number (z).TFzCannot Use Auto VPPzUsing Auto VPPr  c                     g | ]}|z  S r>   r>   ).0rc   	pp_degrees     r-   
<listcomp>z0Completer._complete_chunk_id.<locals>.<listcomp>  s    CCCAiCCCr/   )r   r   zstage=[z], chunk_id=[z], layer_name=[r1   zstart op: [z]: [z] [z	end op: [r  )+r   rh  ri  r   r   
vpp_degreevpp_seg_methodschedule_mode
ValueErrorro  re  collectionsOrderedDictrecompile
IGNORECASEr  searchstruct_namerH   r   r_   r   rr  startrv   r  r   rb   r  r  indexrK   r   r]  rJ   reverser   rx  r'   r(   idblocks)4r   rX  r  r  sub_process_meshesr  
seg_methodr  r|  re  seg_op_depsseg_op_meshregexstart_op_indexrc   r+   mtotal_op_numend_op_indexr  jr   pre_mesh
num_chunksnon_decreasingseg_pp_stagesseg_pmpp_stageseg_pp_stager  seg_chunk_ids	part_sizesegment_struct_namessegment_partsmemory_counterseg_idxr   idxsr  r  seg_id	start_idxend_idxr  rb   struct_names
seg_op_idxr   block_idr  sub_opr  s4   `                                                  @r-   ra  zCompleter._complete_chunk_idn  s	   	= 	= 	= 	= 	=$	: 	: 	: 	: 	:2 "+	%.7>	 F(5d6H(I(I%	%'09D
'09H
*3<Jq==Z!^^E   >> +3CCCV   >>F#0022i
 "-//!-//
:r}55s^^ 	 	EArR^,,A !" 3xx#a'%--.. 	 	ASV/00A  
 ~|a'788 "	5 "	5Aa&,K[))A  FK8++%Q)@)CCC #1q5,77 " "!LLQ);<< "!E"HH1"%a&"4KK$L#a&LLL   &aggajjll399#>>qAK(@@QHHG+--,-3K(+2+<+IK((";/3a71<<<= =<< '{37#4#AAAAF BAA K(//4444+
;*,111wJww3{3C3Cwwiswww 211 !((** 	+ 	+F/////)//77HR 8++!&  **** 	+LL.////LL)*** E),,--z"" 	' 	'A  ...''$$&&&CCCCz1B1BCCC$$
2	!zA~."&%++-- 	1 	1JD$t$$$aN**)-bAg&*+Wq[$++K888 (+CM*%%  C..233 0	G 0	GF%f-I#FQJ/G$V,H$V,H-h7L/7LJ$ 5 5!!+d"34444LLY(YYYY,YYY   LL|c)n1||s9~7U||Z]^gZhZy|||   LL AC!,1  A  As7Q;7G7W  A  A\_`gjk`k\l\}  A  A  A   Y00 G GX;;{++ G!ww{336H 3 :8 DI% #*;*;((!2|5H   !LHoFFF"+- 
 
) cZ.?.?,, ) & , 3	   %%vx   
 & #*;*;((!2|5H   !LHoFFFF7G> G##
Q!(;(;<66  UCFJNOLLL  3q6#5a#8:M    $#(;(; r/   c           	      2   t                      j        }t          |          }| j        j        }|                                D ]}||j        _        | j        j        }|                                D ]#}|j	        }|j        }||_        t          j        |          }	|j        dk    r8|j        D ]p}
|                    |
          }|j        sR| j                            |          }|j        }|j        j        |_        |                    |
|j        j                   qt'          |d          }|d}t          j        |          }|D ]c}|                    |           |                    |          r0|                                r|j        |_        |j        |_        d} n||_        d|r|	|_        n|	|_        |j        D ]}
|j        }|                    |
          }|j        dv rk|                    |
          }t;          |          dk    rCdgd t=          t;          |          dz
            D             z   }|                    |
|           | j                            |          }|                    |
          |j        _        ̐%d S )	Nr   Tr   F)fill_constantr   c                     g | ]}d S r=   r>   r  r  s     r-   r  z6Completer._update_dist_attr_for_dp.<locals>.<listcomp>  s%     2 2 2#$B2 2 2r/   r   ) r   ranksr
   r   _dist_tensors_for_programr  r   rb   _dist_ops_for_programr   r   r   r   rx  get_serial_inputis_parameterrq  r   rN   r   r   r   r   r   r   r   rr  get_serial_outputr   rH   r_   r   )r   r  rb   dist_tensorsr  dist_opsr   r   r   r   r  serial_tensorr   r   r   r   old_dims_mappingr  s                     r-   r^  z"Completer._update_dist_attr_for_dp\  s(   '))/"5)))C'..00 	> 	>K1=K!..%;(( C	 C	G)I",L(4L%$(M,$?$?!~!333%5   ' 8 8 B B$1 *FF)   
 $+#4L#-: !- !77 +"7"D   GT  M (!%&*mL&A&A#$1 @ @L 44W===$77@@	@#6688	@ 2>1B.0<0@-).,?))! >(=G%$9!%6  &0 ' 9 9( C C>%666'3'K'K ( ($ +,,q00,-3 2 2(-c2B.C.Ca.G(H(H2 2 2 ,( %<<$&6   #0LL!  !88BB %22#aC	 C	r/   c                 B   || j         j        }n|| j         _        | j                                          |                                  t                      }| j         j        }|D ]}|                                r|                                	                                dv rA| j         
                    |          }|j        }|j        D ] }|                                r|                                t          |j                  dk    rD|                                }|                                }	|                    |	          }
|	|v r| j                             |          }|j        |_        |
j        r|                    |	          nd |                                D             |_        |                    |	           |j        D ]}|                                r|                                |                                                                }	|	|v rU| j                             |          }|j        |_        |                    |	          |_        |                    |	           |                                  |                                  |                                  |                                  | j                                          | j                                           | j         !                                 d S )N)r!   r   c                     g | ]}d S r  r>   )r  rc   s     r-   r  z>Completer._complete_tensor_dist_attr_by_op.<locals>.<listcomp>  s    !B!B!B"!B!B!Br/   )"r   rX  rY  r[  rU  rT   r   r   r+   r   r   r   r   r   r   rH   r   r  r   rb   r  r   r`   rN   r  r   r   r7  r9  r   r   r\  rb  rc  )r   rX  has_set_dist_attrr   r   r   r   r   r   r0  r  r   s               r-    _complete_tensor_dist_attr_by_opz*Completer._complete_tensor_dist_attr_by_op  s    &"&"4"H6ID3%%'''EE&;	 /	; /	;Dzz|| .;7799>>##y00,BB4HH&0#'; ; ;K"))++ ;0A0A0M{122a77$&1oo&7&7&1&6&6&8&8!(!9!9+!F!F&*;;;$ .MM +  ) )5 )5
  &2CL??LLL!B!Bk.?.?.A.A!B!B!B )5
 *--k:::#'< ; ;K"))++ ;0A0A0M&1oo&7&7&<&<&>&>&*;;;$ .MM +  ) )5 )5 )@@MM )5 *--k:::..00000222--///00222 	??AAA 	6688899;;;;;r/   c                    || j         j        }n|| j         _        d }d }t          |                                j                  }|                                j        }| j         j        }|j        }t          |          dk     rdS d}t          dt          |                    D ]}	||	         }
t          |
                    d                    t          t          j        j        j                  k    rSt          |
                    d                    t          t          j        j        j                  k    rUt          ||	dz
                               d                    t          t          j        j        j                  k    r|dz  }t          |
                    d                    t          t          t          j        j        j                  t          t          j        j        j                  z            k    r|
j        dk    sJ  dS ||	         }|j                                        |j        v r |||j        |j                                                           }|J | j                             |          }|j        }t3                      }||_        |j        D ]}||j        vr_||j        vrV|||         v r$||         |         }|                    |          }n\||         }| j                             |          j        }n4||j        v r|                    |          }n|                    |          }|J d	| d
            |                     ||           |j        D ]}|||         v sJ ||         |         }|                    |          }||         }tC                      }||_        ||_        | j         "                    ||           |#                    ||           | j         $                    ||           |j        dk    rtK          tM          ||j                            sJ |j        d         }|||         v sJ d| d            ||         |         }||         }| j                             |          }|j        }|j        }tC                      }||_        ||_        ||         }| j         "                    ||           t3                      }||_        |j        D ]}|                     ||           |#                    ||           n|j        dk    r|j        d         }||         }| j                             |          }|j        }|j        } tC                      }||_        | |_        |j        d         }!||!         }| j         "                    ||           t3                      }| |_        |                     ||           |#                    |!|           n#|j        dv r~tO          d|j         d          | j         $                    ||           dS )z
        NOTE:
            [HighOrderGrad] Complete the annotation of vars and ops only for high order gradient.
            This function is temporary to support high order gradient, and will be removed in the future.
        Nc                     d| v rdS dS N@GRADTFr>   r   s    r-   _is_grad_var_namezICompleter._complete_high_order_grad_annotation.<locals>._is_grad_var_name      $t5r/   c                 R    | D ]#}|j                                         |k    r|c S $d S rS   r7   original_idre  r  r+   s      r-   _get_op_by_idzECompleter._complete_high_order_grad_annotation.<locals>._get_op_by_id
  =      7&&((B..III /4r/   r	   r   op_roler   r  [] 's dims mapping is NONEsumsum op's output '' has no corresponding varfill_any_like)r`   r  got unexpected op [r1   )(r   rX  rY  rJ   ro  re  rp  dist_op_contextgrad_var_to_varrH   r_   intr(   r   op_proto_and_checker_makerr   ForwardBackwardLossr   r7   r  grad_op_id_to_op_idget_op_dist_attr_for_programrb   r   rx  rr  r    get_tensor_dist_attr_for_programrN   r   r   r    set_tensor_dist_attr_for_programr   set_op_dist_attr_for_programallmapr  )"r   rX  r  r  re  rp  r  r  appended_grad_timesr   r+   grad_op
forward_opfwd_op_dist_attrfwd_op_process_meshgrad_op_dist_attrr  fwd_nameref_dims_mapping	input_varoutput_name
output_varr   ref_fwd_var_nameref_fwd_varref_fwd_dist_attrref_fwd_dims_mappingref_fwd_process_meshr   ref_var_nameref_varref_dist_attrref_process_meshoutput_var_names"                                     r-   r`  z.Completer._complete_high_order_grad_annotation  s    &"&"4"H6ID3	 	 	
	 	 	 &33559::"//116,<)9!##FCHH%% w	 w	CSB2779%%&&#/6>+ +   2779%%&&#/6?+ +  c#'l''	2233s/6>8 8   $q(#2779%%&&#D3:CDDd5<ABBC+ +   w/1111 #hG((**"67 7 +]#70022 
 "--- &CCJOO ! '7&C#$4$6$6!1D!.")"9 % %J"**DDD&j.III%9L)MMM'67J'K *(H !1 H H$,!" !" -, )-Z(8I/3/A/b/b )0 0* -, &)CCC 0 G G$.!" !" -, !1 H H$.!" !" -
 ,77AJAAA 877 &<<"$4    $+#;  K&/:M*NNNNN./BCKPH'7'N'N ( ($ "&k!2J'5'7'7$4D$14G$1&GG"$4   &==#%5    "??.    <5((s#4g6MNNOOOOO")":1"=K#7J'KKKKSKSSS LKK (77J'K#($ #''7"8K*KK'  &
 ,=+I(+<+I('5'7'7$4H$14H$1!%k!2J&GG"$4   )9(:(:%5I%2$+$;  )@@$&:    &==#%9    \_44#*#:1#=L"<0G*KK#  "
 (5'A$'4'A$'5'7'7$4D$14D$1&-&>q&AO!%o!6J&GG"$4   )9(:(:%5E%2%<<$&6   &==')9    \%??? %%J7<%J%J%JKKK"??.   kw	 w	r/   c                 D   -. | j         j        }n| j         _        d --fd}d }. fd}d}d}t          |                                j                  D ]6\  }}t          |          r|}t          |          r|j        dk    sJ |} n7|dk    r|
J d	            t          |                                j                  }	|                                j
        }
 j         j        }|j        t          |j                           .t          |t          |	                    D ]t}|	|         }||k    ro|j        dk    sJ t          |j                  dk    s J d
t          |j                   d            t          |j                  dk    s J d
t          |j                   d            |
|j        d                  }|
|j        d                  }|j        dz   |j        k    sJ  j                             |          } j                             |          }t)           j         ||j        j        |j        j        |j        j                   t3          ||j        j        |j                            |j                   j         |j        j                   |j                                        |j        v r ||	d|         |j        |j                                                           }|J |                    d          r|                    d          r ||||
           |                    d          j         }|                    d          j         }|j!        |         }|j!        |         }|j        D ]G} ||j        |j        |j                                                           } ||||j
                   H ||||
           |j        dv rtE          tG          -|j                            sJ |j        d         }|.v sJ d| d            .|         }|
|         } j         $                    |          }|j        }|j        }|j        }|
|         }t)           j         ||||           tK                      } |j        D ]}!| &                    |!|           | '                    ||           d}"d}#t          |dz
  |dz   d          D ]}}$|	|$         }%t          tQ          |%j                  tQ          |j                  z            }&t          |&          dk    r* j         )                    |%          }'|'j        }"|'j        }# n~|"|#J |#| _        |"| _         j         *                    ||            ||j        dk    r|j        d         }(|
|(         }) j         $                    |)          }*|*j        }+|*j        }#|*j        }"|j        d         },|
|,         }t)           j         ||+|#|"           tK                      } |#| _        |"| _        | &                    |(|+           | '                    |,|+            j         *                    ||            ^tW          d|j         d          dS )zSComplete the annotation of vars and ops in the backward phase for parallel program.Nc                     d| v rdS dS r  r>   r  s    r-   r  zACompleter.complete_backward_annotation.<locals>._is_grad_var_name  r  r/   c                 p     |           sJ d|  d            | d |                      d                   S )Nr  z] is not a grad var name.r  )find)grad_var_namer  s    r-   &_get_forward_varname_from_grad_varnamezVCompleter.complete_backward_annotation.<locals>._get_forward_varname_from_grad_varname  sW    $$]33  <M<<< 3 !!>=#5#5g#>#>!>??r/   c                 R    | D ]#}|j                                         |k    r|c S $d S rS   r  r  s      r-   r  z=Completer.complete_backward_annotation.<locals>._get_op_by_id  r  r/   c                    j                             |           }t                      }|j        }|j        }|j        dk    r| j        dk    r|                     d          d         }|                    |          }||j        	                    d          d                  }	t          j         |	|||           |j        D ]}
|                    |
|           |                    |	j        |           n|j        D ]}
|
| j        vrS|
| j        vrJ|
v r|
         }|                    |          }n\||
         }j                             |          j        }n4|
| j        v r|                    |
          }n|                    |
          }|J d|
 d            |                    |
|           |j        D ]}|d	k    rc||         }	d
 t'          t)          |	j                            D             }t          j         |	|||           |                    ||           k|v sJ |         }|                    |          }||         }	t          j         |	|||           |                    ||           ||_        ||_        |j        |_        |j        |_        |j        |_        d }|j        t0          v r ||||           j                             ||           d S )Nry   rv   rF  r   rH  r  r  r  z@EMPTY@c                     g | ]}d S r  r>   r  s     r-   r  zeCompleter.complete_backward_annotation.<locals>._complete_grad_op_with_forward_op.<locals>.<listcomp>4  s%     , , ,#$B, , ,r/   c           	      2   g }d }g }|j         dk    r
t          |                    d                    dk    r|                    d          d         }|                    |                    d                     t          | |         j                  }t          | |                    d          d                  j                  }|dk    r'|dk    rt          t          |dz
                      }n.||k    rt          t          ||z
                      }n|j         dk    r|                    d          d         }|                    |                    d                     |                    d          d         }t          t          t          | |         j                  t          | |         j                  z
                      }nM|j         d	k    r|                    d          d         }|                    |                    d
                     |                    |                    d                     t          |	                    d                    }	t          t          |	                    }n|j         dk    r{|                    d          d         }|                    |                    d                     t          t          t          | |         j                  dz
                      }nt          d|           t          |          dk    r^|                    |          }
|D ]H}|
|         dk    r8|
|         }|D ]-}|                    |          }|                    |g           .Gd S d S )Nmatmul_v2_gradzY@GRADr   zOut@GRADr	   r   elementwise_add_gradYlayer_norm_gradz	Bias@GRADz
Scale@GRADbegin_norm_axislookup_table_v2_gradzW@GRADz$Backward Partial is not adapted for r=   )r   rH   r  r  r  r`   rJ   r_   r  r(   NotImplementedErrorr   get_output_dist_attr_set_partial_dims)rp  r  r  param_gradsactivation_gradbroadcast_axis_indiesact_ndim
param_ndim	param_varr$  activation_grad_dims_mappingaxispartial_dimp_grad_namep_grad_dist_attrs                  r-    infer_backward_op_partial_statusz{Completer.complete_backward_annotation.<locals>._complete_grad_op_with_forward_op.<locals>.infer_backward_op_partial_status]  s   
 !"&(*%L$444GNN8445599&-mmJ&?&?&BO&&w~~h'?'?@@@"4#8#>??H!$T'..*B*B1*E%F%L!M!MJ "Q#a<<48x!|9L9L4M4M1!J..04!(Z"7881 1- \%;;;&-mmJ&?&?&BO&&w~~h'?'?@@@ 'c 2 21 5I,0_ 5 ;<<!$y/"7889 - -)) \%666&-mmH&=&=a&@O&&w~~k'B'BCCC&&w~~l'C'CDDD&)',,7H*I*I&J&JO,01G1G,H,H))\%;;;&-mmJ&?&?&BO&&w~~h'?'?@@@,0c$"7"=>>BCC- -)) .HwHH   {##a'')@@+  1
 !6 " "7=CC*Ft*LK/: " "$5$J$J(3%& %& !1
 !1 B B%0M!" !" !" !" ('" "r/   )r   r  r   rb   r  r   r  r   r7   r  r   rx  r   r   r   rr  r   r  rN   r_   rH   r`   r   r   r   r  )r  r  rp  r  r  r  ref_chunk_idsplit_input_var_namer  r
  r  r  r  r	  r4  r  r   s                  r-   !_complete_grad_op_with_forward_opzQCompleter.complete_backward_annotation.<locals>._complete_grad_op_with_forward_op  s6   #1NN    !1 2 2/<+4L|x''JOw,F,F'1'7'7'<'<Q'?$#3#J#J($ $  "',"5"5e"<"<Q"?@
!&$$)    #*"9  J%<<"$4    "99O%5   
 #*"9 # #J"**DDD&j.III%88'6z'BH 0 H H$,!" !" -, )-Z(8I/3/A/b/b )0 0* -, &)CCC 0 G G$.!" !" -, !1 H H$.!" !" -
 ,77AJAAA 877 &<<"$4   
 $+#; # #K"i//%)+%6
, ,(-c*2B.C.C(D(D, , ,( * .&,,%1    *AA')9   !&/9999.{;H'7'N'N ( ($ "&k!2J%*"((!-    &==#%5    .>*)5&*:*D')9)B&)9)B&H" H" H"T |<<<00'#4   ;;*    r/   r=   r  r   z,No backward procedure found in this program.z7first backward op should has only ONE output, but got [r1   r   r  r  r  )r  grad_addr  r  r  r  ),r   rX  rY  r  ro  re  r   r   r   rJ   rp  r  r  rH   r_   rx  rr  r   rq  r  r   r   rN   rb   r  r   r   r7   r  r  r'   r(   r  r  r  r  r  r   r   r   rT   r  r  r  )/r   rX  r  r  r7  loss_opfirst_backward_op_idxr   r+   re  rp  r  r  loss_varloss_grad_vardist_loss_vardist_loss_opr  grad_sub_block_idforward_sub_block_idgrad_sub_blockforward_sub_blocksub_grad_opsub_forward_opr	  r  r  r  r  r  ref_fwd_chunk_idr
  r  r   r5  r  pre_idxpre_grad_opinter_arg_namepre_op_dist_attrr  r  r  r  r  r  r  s/   `                                            @@r-   complete_backward_annotationz&Completer.complete_backward_annotation  s    &"&"4"H6ID3	 	 	
	@ 	@ 	@ 	@ 	@	 	 	C	 C	 C	 C	 C	 C	J  " !4!A!A!C!C!GHH 	 	GC"~~ r"" w/1111(+%
 %))g.A.A: /B.AA &33559::"//116,<)9/00
 .C99 y	L y	LC#hG+++|66667233q888mcRYRiNjNjmmm 988 7344999ncRYRjNkNknnn :99   8 ;< $W%=a%@ A}w.-2DDDDD $ 2 N N! !  $1II    "&!!+8!+8*4=    G *7 *BB   &)3<    
 ((**"67 7 +].../#70022 
 "---##K00 QZ5H5H6 6 Q 65j'4PPP(/[(A(A(D%+5??;+G+G+J(%8%?)&N )<(B,)% (6'9 	 	)6-1+? + 0 < < > >* * :9*K9L   	 65j'4PPPP <#666s#4g6MNNOOOOO")":1"=K&/999SKSSS :99 (7{'C$"&'7"8K*KK'  &
 ,=+I(+<+I('8'A$ "&k!2J%*",,!1    )9(:(:%$+$;  )@@$&:    &==#%9   $(L'+$#(a!6!:B$ $ " " '*'l)- <==!'"9::;* * ~..22 $ 2 O O$/!" !" -
 ,<+DL/?/L,!E 3 %0,889 6F%21=%.&CC!2    \_44#*#:1#=L"<0G*KK#  "
 (5'A$'4'A$#0#9L&-&>q&AO!%o!6J%*"((!-    )9(:(:%5E%21=%.%<<$&6   &==')9   &CC!2    %%J7<%J%J%JKKKsy	L y	Lr/   c                    ddl m}  |            j        }t          |                                j                  }|                                j        }d}t          t          |                    D ]}||         }t          |
                    d                    t          t          j                  k    rAt          |          rF|j        t          v rt!                      }	t#          |          |	_        |j        D ]?}
||
         }| j                            |          }|	                    |
|j                   @|j        D ]x}||         }t3                      }t#          |          |_        d |j        D             |_        | j                            ||           |	                    ||j                   yn2||                    d          d                  }| j                            |          }|J |j        }|j        }|j        }|j        dk    rc||d	z            j        d
k    rO|||d	z                                d          d                  }| j                            |          }|J |j        }||                    d          d                  }| j                            |          }|st3                      }||_        ||_        |j        |j        k    r||_        nAt          |j                  d	k    r|j        d         d	k    sJ d |j        D             |_        | j                            ||           t!                      }	||	_        |j        D ]?}
||
         }| j                            |          }|	                    |
|j                   @|j        D ]?}||         }| j                            |          }|	                    ||j                   @| j                             ||	           d|j!        v rd||         j!        v rt          |                    d                    d	k    s
J d            t          |                    d                    d	k    s
J d            ||                    d          d                  }||                    d          d                  }| j                            |          }|J | j                            |          j        }|J | j                            |          j        }|J t!                      }	||	_        |	                    |j"        |           |	                    |j"        |           |	                    |j"        |           ||                    d          d                  }|	                    |j"        d |j        D                        |	                    |j"        d |j        D                        |sUd}t3                      }t#          |          |_        d |j        D             |_        | j                            ||           |j#        !                                D ]a}|dv rt          |j#                            |                    dk    r4t          |j#                            |                    d	k    sJ ||j#                            |          d                  }t3                      }d|v sd|v sd|v r]dg|_        |	                    |j"        d |j        D                        |	                    |j"        d |j        D                        n=||_        |	                    |j"        |           |	                    |j"        |           d|vr"||_        | j                            ||           c| j                             ||	           dS )zQComplete the annotation of vars and ops in the update phase for parallel program.r   r   Fr  c                     g | ]}d S r  r>   r  s     r-   r  z8Completer.complete_update_annotation.<locals>.<listcomp>  s%     : : :'(: : :r/   rF  Ncastr   rm   rH  c                     g | ]}d S r  r>   r  s     r-   r  z8Completer.complete_update_annotation.<locals>.<listcomp>  s%     >" >" >"+,B>" >" >"r/   GradParamzOnly support one-to-one now.LearningRatec                     g | ]}d S r  r>   r  s     r-   r  z8Completer.complete_update_annotation.<locals>.<listcomp>&	      +K+K+K1B+K+K+Kr/   c                     g | ]}d S r  r>   r  s     r-   r  z8Completer.complete_update_annotation.<locals>.<listcomp>)	  rS  r/   Tc                     g | ]}d S r  r>   r  s     r-   r  z8Completer.complete_update_annotation.<locals>.<listcomp>0	  s%     6 6 6#$B6 6 6r/   )rP  rO  rQ  Beta1TensorBeta2TensorEpsilonTensorBeta1PowBeta2Pow
SkipUpdater=   c                     g | ]}d S r  r>   r  s     r-   r  z8Completer.complete_update_annotation.<locals>.<listcomp>O	      0M0M0M0M0M0Mr/   c                     g | ]}d S r  r>   r  s     r-   r  z8Completer.complete_update_annotation.<locals>.<listcomp>R	  r]  r/   )$5paddle.distributed.auto_parallel.static.process_groupr   r  rJ   ro  re  rp  r_   rH   r  r(   r   Optimizer   r   r   r   r
   rb   rx  r   r  r   rN   rr  r   r`   r  r   r  r  r  r  input_namesr   r7   )r   rX  r   world_ranksre  rp  learning_rate_completedr   r+   r   in_namein_varin_dist_attrout_nameout_varout_dist_attrr  r  r5  r  r  paramgrad_varparam_dist_attrlearning_varvar_dist_attrr  r  input_var_attrs                                r-   complete_update_annotationz$Completer.complete_update_annotation  si	   	
 	
 	
 	
 	
 	
 .-//5 &33559::"//116"'S?? S	 S	C SB2779%%&&#fo*>*>>>&r** dw"666'7'9'94?4L4L1')'9  G%)']F+/+=+^+^ &, ,L )?? ')B    )+(;  H&*8nG,:,<,<M9D +: :M6: :,3M: : :M6 !.OO '   )@@ (-*D   " "&bhhsmmA&6!7 .OO &  %
  ,777+7+D(+7+D('3'< Gv-- #C!G 15F F F&*3sQw<+=+=c+B+B1+E&FG,0,>,_,_ '- -M $1#<#<#</</I, #'ryy'7'7':"; .OO '  &
  - ,:,<,<M9IM65AM2&}<<=M : : %($6$6!$;$;(/a(8A(=(=(=%>>" >"07>" >" >" : !.OO '  
 (8'9'94D1')'9  G%)']F+/+=+^+^ &, ,L )?? ')B    )+(;  H&*8nG,0,>,_,_ '- -M )@@ (-*D    &CCL   R^++3s8;O0O0Orxx0011Q6666 766 rxx//00A5556 655 !'!2!21!56E#BHHV$4$4Q$78H *KK!  $
 +666*KK! & %
 ,777*KK! & %
 ,777#3#5#5L0@L- 77 '7   !77
$4   !88
$4   $((@(@(C#DL 77$)+K+K8J+K+K+K   !88$)+K+K8J+K+K+K   3 	26/(6(8(85@5M5M26 6(4(:6 6 62 *KK(-   ')g&9&9&;&; ) )
% *   %rw}}Z8899Q>>$"27==#<#<==BBBB$(z)B)B1)E$F	)7)9)9 '*44)Z77+z99;=$N7(?? )0M0MY_0M0M0M   )@@ )0M0MY_0M0M0M    ;KN7(?? )0@   )@@ )0@   (z99:JN7 .OO )>   &CCL   gS	 S	r/   c                    || j         j        }n|| j         _        d| j         _        | j                                          |                                  | j                                          | j                                          dS )a  
        fill default data parallel annotation for program with primitive operators.

        Arguments:
            serial_main_program: partial annotated serial_main_program.
        Returns:
            serial_main_program: completed annotated serial_main_program.
        NT)r   rX  rY  _is_initialized_init_dist_attr_for_program_init_global_mesh_for_programrb  rc  )r   rX  s     r-   complete_prim_annotationz"Completer.complete_prim_annotationg	  s     &"&"4"H6ID3-1*66888**,,,6688899;;;;;r/   c                    ddl m}  |            j        }| j        j        j        D ]}|j                                        D ]9}| j                            |          }|J t          |          |j
        _        :|j        D ]}| j                            |          }|J t          |          |j
        _        t          |d          }|st          j        |j
                  }	|D ]W}
|
                    |          }|
                    |          r$|
j        |j
        _        |
j        |j
        _         n|	|_
        Xd S )Nr   r   Tr   )r_  r   r  r   rY  r  rp  r  rq  r
   r   rb   re  r  r   r   r   r   r   r   r   r   r   )r   r   rb  r|  r  r  r+   r   r   r   r   r   s               r-   rt  z'Completer._init_global_mesh_for_program|	  s   	
 	
 	
 	
 	
 	
 .-//5'<C 	D 	DE*++-- N N"0LL  #...5@5M5M%22i D D,DDRHH***1<[1I1I!. !K! ! ! !,*.-8I*J*J'(5 D D&2&F&Fw&O&O'::7CC 	D ;G:KG-79E9IG-6!E0CG--/D	D 	Dr/   )TrS   )__name__
__module____qualname__r   r   r   r   r   r   r   r7  r9  rC  rU  rf  rZ  r_  ra  r^  r  r`  rJ  rp  ru  rt  r>   r/   r-   r   r      s       # # #
\ \ \ \|u u u un! ! !F!H !H !HF*0 *0 *0X:L :L :Lx}8 }8 }8~L L L:M3 M3 M3^(" (" ("T$# $# $# $#L8C 8C 8Ct= = =*l l l\M M M^L< L< L< L<\W W W WrlL lL lL lL\e e eN< < < <*(D (D (D (D (Dr/   r   )9r  r   loggingr   rs  r  r3   paddle.base.corer   r   paddle.base.frameworkr   paddle.base.log_helperr   /paddle.distributed.fleet.meta_optimizers.commonr   paddle.frameworkr   rb   r
   r   dist_attributer   r   r   r   operators.commonr   r   r   process_groupr   utilsr   r   r   r   r   r   r   r   r   rw  INFOr   r   r*   r   r.   r:   rF   rQ   r]   rd   r   r   r   r>   r/   r-   <module>r     s         				  				         + * * * * * - - - - - - B B B B B B ! ! ! ! ! ! G G G G G G G G < < < < < < < < " " " " " "         
 3 2 2 2 2 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 *gl H      >  D D D
	C 	C 	C  0  .  	 	 	'Q 'Q 'QT& & &(I#D I#D I#D I#D I#D I#D I#D I#D I#D I#Dr/   