
    Αi                       S SK r S SKrS SKJr  S SKJr  S SKrS SKJr  S SK	J
r
  S SKJrJrJrJr  S SKJr  SS	KJrJrJrJrJrJrJr  SS
KJr  SSKJr  SSKJrJ r J!r!J"r"J#r#J$r$J%r%  SS/q&SS/r'S r( " S S5      r) " S S5      r* " S S5      r+ " S S5      r, " S S5      r- " S S5      r. " S S5      r/ " S  S!5      r0 " S" S#5      r1 " S$ S%5      r2g)&    N)OrderedDict)reduce)OpRole)ExecutionStreamType)LayerHelperOpProtoHolderProgramcore)unique_name   )AllgatherOpCostCommContextConcatOpCost
SendOpCostSliceOpCostSplitOpCostbuild_comm_desc)DistributedContext)new_process_group)_g_gradient_clip_opsis_gradient_clip_opis_optimize_opis_reshard_op*naive_set_dist_op_attr_for_program_by_mesh6naive_set_dist_op_attr_for_program_by_mesh_and_mappingset_var_dist_attrcheck_finite_and_unscaleupdate_loss_scalingwhileconditional_blockc                     SnXR                   ;   a  UR                   U    nOUR                  U 5      nUc   UR                   S35       eU$ )z=Get var in the parent block if not found in the current blockNz is not found)vars_var_recursivename)var_nameblockprogramvars       o/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/distributed/auto_parallel/static/reshard.pyget_var_with_recursionr*   5   sN    
C::jj"""8,?6sxxj66?J    c                   4    \ rS rSrSrS r\S 5       rS rSr	g)	EndOpDescA   z
Describe to end reshard parse process.
It is supposed to contain a list of variables which are the outputs of one reshard process.

Args:
    vars (list): a list of variables.
c                     Xl         g N_vars)selfr"   s     r)   __init__EndOpDesc.__init__J   s    
r+   c                     U R                   $ r0   r1   r3   s    r)   r"   EndOpDesc.varsM       zzr+   c                 "    SU R                    S3$ )NzEnd vars : .r1   r7   s    r)   __repr__EndOpDesc.__repr__Q   s    TZZL**r+   r1   N)
__name__
__module____qualname____firstlineno____doc__r4   propertyr"   r<   __static_attributes__ r+   r)   r-   r-   A   s%      +r+   r-   c                   x    \ rS rSrSrSS jr\S 5       r\S 5       r\S 5       r	\S 5       r
\S 5       rS	 rS
rg)AllGatherOpDescU   z
Describe the allgather op in the reshard phase.

Args:
    group (list): Process group.
    shape (list): The tensor shape.
    is_bool (bool): Whether allgather bool data. Default: False.
c                 B    Xl         SU l        X l        X0l        X@l        g )N
all_gather)_group_desc_shape_is_bool_need_split)r3   groupshapeis_bool
need_splits        r)   r4   AllGatherOpDesc.__init___   s    !
%r+   c                     U R                   $ r0   rN   r7   s    r)   rR   AllGatherOpDesc.is_boolf       }}r+   c                     U R                   $ r0   rK   r7   s    r)   rP   AllGatherOpDesc.groupj       {{r+   c                     U R                   $ r0   rL   r7   s    r)   descAllGatherOpDesc.descn   r9   r+   c                     U R                   $ r0   rM   r7   s    r)   rQ   AllGatherOpDesc.shaper   r\   r+   c                     U R                   $ r0   )rO   r7   s    r)   rS   AllGatherOpDesc.need_splitv       r+   c                     SU R                    SU R                   SU R                   SU R                   SU R                   S3$ )Nop: 	, group: 	, shape: , is_bool: z, need_split: r;   )rL   rK   rM   rN   rO   r7   s    r)   r<   AllGatherOpDesc.__repr__z   s]    djj\4;;-y[Y]YfYfXgguvz  wG  wG  vH  HI  J  	Jr+   )rL   rK   rN   rO   rM   N)FT)r>   r?   r@   rA   rB   r4   rC   rR   rP   r_   rQ   rS   r<   rD   rE   r+   r)   rG   rG   U   sv    &            Jr+   rG   c                   h    \ rS rSrSrSS jr\S 5       r\S 5       r\S 5       r	\S 5       r
S rS	rg
)AllGatherConcatOpDesc~   z
Describe the c_concat op in the reshard phase.

Args:
    group (list): Process group.
    shape (list): The tensor shape.
    is_bool (bool): Whether c_concat bool data. Default: False.
c                 6    Xl         SU l        X l        X0l        g )Nc_concat)rK   rL   rM   rN   )r3   rP   rQ   rR   s       r)   r4   AllGatherConcatOpDesc.__init__   s    
r+   c                     U R                   $ r0   rV   r7   s    r)   rR   AllGatherConcatOpDesc.is_bool   rX   r+   c                     U R                   $ r0   rZ   r7   s    r)   rP   AllGatherConcatOpDesc.group   r\   r+   c                     U R                   $ r0   r^   r7   s    r)   r_   AllGatherConcatOpDesc.desc   r9   r+   c                     U R                   $ r0   rb   r7   s    r)   rQ   AllGatherConcatOpDesc.shape   r\   r+   c           	      p    SU R                    SU R                   SU R                   SU R                   S3	$ )Nrh   ri   rj   rk   r;   )rL   rK   rM   rN   r7   s    r)   r<   AllGatherConcatOpDesc.__repr__   s8    djj\4;;-y[Y]YfYfXgghiir+   )rL   rK   rN   rM   NF)r>   r?   r@   rA   rB   r4   rC   rR   rP   r_   rQ   r<   rD   rE   r+   r)   rn   rn   ~   sb             jr+   rn   c                       \ rS rSrSrSS jr\S 5       r\S 5       r\S 5       r	\S 5       r
\S 5       r\S	 5       rS
 rSrg)
SendOpDesc   a  
Describe the send op in the reshard phase.

Args:
    partition_index (list): The index of partition in complete tensor.
    src (int): The source process to send.
    dst (int): The destination process to receive.
    is_bool (bool): Whether send bool data. Default: False.
c                 P    X0l         Xl        SU l        / U l        X@l        X l        g )Nsend)_dst_partition_indexrL   rM   rN   _srcr3   partition_indexsrcdstrR   s        r)   r4   SendOpDesc.__init__   %    	 /
	r+   c                     U R                   $ r0   r   r7   s    r)   r   SendOpDesc.src       yyr+   c                     U R                   $ r0   rV   r7   s    r)   rR   SendOpDesc.is_bool   rX   r+   c                     U R                   $ r0   r   r7   s    r)   r   SendOpDesc.partition_index       $$$r+   c                     U R                   $ r0   r   r7   s    r)   r   SendOpDesc.dst   r   r+   c                     U R                   $ r0   r^   r7   s    r)   r_   SendOpDesc.desc   r9   r+   c                     U R                   (       d7  U R                   H'  nU R                   R                  US   US   -
  5        M)     U R                   $ Nr   r   rM   r   appendr3   items     r)   rQ   SendOpDesc.shape   B    {{,,""47T!W#45 -{{r+   c                     SU R                    SU R                   SU R                   SU R                   SU R                   S3$ Nrh   z, partition_index: z, dst: rj   rk   r;   rL   r   r   rM   rN   r7   s    r)   r<   SendOpDesc.__repr__   j    djj\!4T5J5J4K7SWS\S\R]]fgkgrgrfss~  @D  @M  @M  N  NO  P  	Pr+   rL   r   rN   r   rM   r   Nr}   )r>   r?   r@   rA   rB   r4   rC   r   rR   r   r   r_   rQ   r<   rD   rE   r+   r)   r   r               % %      Pr+   r   c                       \ rS rSrSrSS jr\S 5       r\S 5       r\S 5       r	\S 5       r
\S 5       r\S	 5       rS
 rSrg)
RecvOpDesc   a  
Describe the recv op in the reshard op.

Args:
    partition_index (list): The index of partition in complete tensor.
    src (int): The source process to send.
    dst (int): The destination process to receive.
    is_bool (bool): Whether receive bool data. Default: False.
c                 P    X l         Xl        SU l        / U l        X@l        X0l        g )Nrecv)r   r   rL   rM   rN   r   r   s        r)   r4   RecvOpDesc.__init__   r   r+   c                     U R                   $ r0   r   r7   s    r)   r   RecvOpDesc.dst   r   r+   c                     U R                   $ r0   rV   r7   s    r)   rR   RecvOpDesc.is_bool   rX   r+   c                     U R                   $ r0   r   r7   s    r)   r   RecvOpDesc.partition_index   r   r+   c                     U R                   $ r0   r   r7   s    r)   r   RecvOpDesc.src   r   r+   c                     U R                   $ r0   r^   r7   s    r)   r_   RecvOpDesc.desc   r9   r+   c                     U R                   (       d7  U R                   H'  nU R                   R                  US   US   -
  5        M)     U R                   $ r   r   r   s     r)   rQ   RecvOpDesc.shape   r   r+   c                     SU R                    SU R                   SU R                   SU R                   SU R                   S3$ r   r   r7   s    r)   r<   RecvOpDesc.__repr__  r   r+   r   Nr}   )r>   r?   r@   rA   rB   r4   rC   r   rR   r   r   r_   rQ   r<   rD   rE   r+   r)   r   r      r   r+   r   c                   x    \ rS rSrSrSS jr\S 5       r\S 5       r\S 5       r	\S 5       r
\S	 5       rS
 rSrg)SliceOpDesci  aG  
Describe the slice op in the reshard phase.

Args:
    starts (list): It represents start indices of corresponding axis in ``axes``.
    ends (list):  It represents end indices of corresponding axis in ``axes``.
    axes (list):  Axes that `starts` and `ends` apply to.
    shape (list): The shape of the tensor to be sliced.
Nc                 B    Xl         X l        X0l        SU l        X@l        g )Nslice)_starts_ends_axesrL   rM   )r3   startsendsaxesrQ   s        r)   r4   SliceOpDesc.__init__  s    


r+   c                     U R                   $ r0   )r   r7   s    r)   r   SliceOpDesc.starts  s    ||r+   c                     U R                   $ r0   )r   r7   s    r)   r   SliceOpDesc.ends  r9   r+   c                     U R                   $ r0   )r   r7   s    r)   r   SliceOpDesc.axes   r9   r+   c                     U R                   $ r0   r^   r7   s    r)   r_   SliceOpDesc.desc$  r9   r+   c                     U R                   $ r0   rb   r7   s    r)   rQ   SliceOpDesc.shape(  r\   r+   c                    U R                   bD  SU R                   SU R                   SU R                   SU R                   SU R                    S3$ SU R                   SU R                   SU R                   SU R                   S3	$ )Nrh   z
, starts: z, ends: z, axes: rj   r;   )rM   rL   r   r   r   r7   s    r)   r<   SliceOpDesc.__repr__,  s    ;;"$**Z~Xdjj\QYZ^ZdZdYeenosozozn{{|}}$**Z~Xdjj\QYZ^ZdZdYeefggr+   )r   rL   r   rM   r   r0   )r>   r?   r@   rA   rB   r4   rC   r   r   r   r_   rQ   r<   rD   rE   r+   r)   r   r     sv              hr+   r   c                   D    \ rS rSrSrS r\S 5       r\S 5       rS r	Sr
g)	ConcatOpDesci3  z}
Describe the concat op in the reshard phase.

Args:
    partition_index_list (list): The list contains all partition index.
c                     Xl         SU l        g )Nconcat)_partition_index_listrL   )r3   partition_index_lists     r)   r4   ConcatOpDesc.__init__;  s    %9"
r+   c                     U R                   $ r0   )r   r7   s    r)   r   !ConcatOpDesc.partition_index_list?  s    )))r+   c                     U R                   $ r0   r^   r7   s    r)   r_   ConcatOpDesc.descC  r9   r+   c                 <    SU R                    SU R                   S3$ )Nrh   z, partition_index_list: r;   rL   r   r7   s    r)   r<   ConcatOpDesc.__repr__G  s$    djj\!9$:T:T9UUVWWr+   r   N)r>   r?   r@   rA   rB   r4   rC   r   r_   r<   rD   rE   r+   r)   r   r   3  s:     * *  Xr+   r   c                       \ rS rSrSr\SS j5       r\SS j5       r\SS j5       r\SS j5       r	\SS j5       r
\ SS j5       r\ SS	 j5       r\SS
 j5       r\ SS j5       r\SS j5       r\ SS j5       rSrg)InserteriK  z*Insert op required in the reshard process.c           
      x   [         R                  R                  R                  SR	                  SS/5      5      nU R                  UUUR                  UR                  S9nU(       a  U R                  OU R                  nU" USSU/0SU/0UR                  UR                  US.S	9n	U	R                  S
S5        U$ )Nr;   zcast@RESHARDtmpr$   dtypetype	lod_levelcastXOutin_dtype	out_dtypeop_roler   inputsoutputsattrsop_namescope/auto_parallel/reshard)paddleutilsr   generate_with_ignorable_keyjoin
create_varr   r   
_insert_op_insert_op_without_syncr   	_set_attr)
r&   idxtensorr   tensor_typesyncnew_var_nameoutinsert_operationcast_ops
             r)   insert_cast_opInserter.insert_cast_opN  s     ||//KKHHne,-
 &&	  
 !%E%*G*G 	 #&?SEN"LL YY"

 	.*BC
r+   c                     SnU(       a  U R                   OU R                  n[        X4/SS9n	U" UUSU/0U	R                  U	R                  R                  U5      SUSS.S9n
U
R                  SS	5        g
)z-Insert send op into block at the given index.send_v2p2p
group_typer   T)ring_idpeeruse_calc_streamr   dynamic_shape)r   r   r   r   r   N)r   r   r   idranksindexr   )r&   r   r  r   r   r   r  op_typer  process_groupsend_ops              r)   insert_send_opInserter.insert_send_opl  s      $E%*G*G 	 *3*G"&?(++%++11#6#'"!%	
 	.*BCr+   c                    SnU(       a  U R                   OU R                  n[        X4/SS9n	U" UUSU/0SU/0U	R                  U	R                  R                  U5      UR                  UR                  SUSS.S9n
U
R                  S	S
5        g)z-Insert recv op into block at the given index.recv_v2r  r  r   r   T)r  r  	out_shaper   r  r   r  r   r   r   N)	r   r   r   r  r  r  rQ   r   r   )r&   r   r  r   r   r   r  r  r  r  recv_ops              r)   insert_recv_opInserter.insert_recv_op  s      $E%*G*G 	 *3*G"&?VH%(++%++11#6#\\#'"!%
 	.*BCr+   c                 r   [         R                  R                  R                  SR	                  SS/5      5      nU(       a  U R
                  OU R                  nU R                  UUR                  UR                  UR                  UR                  S9nU" USX#S.SU0SU0S	9n	U	R                  S
S5        U$ )z2Insert reset_lod op into block at the given index.r;   zreset_lod@RESHARDr   r$   rQ   r   r   r   	lod_resetr   Yr   r   r   r   r   )r   r   r   r   r   r   r   r   rQ   r   r   r   r   )
r&   r   r   r%  r   r  r  r  reset_lod_outreset_ops
             r)   insert_reset_lod_opInserter.insert_reset_lod_op  s     ||//KKHH)512
 !%E%*G*G 	 ((''''kk ) 
 $#M*g&
 	>+CDr+   c                 F   SU0n0 nX7S'   XGS'   U(       a  U R                   OU R                  n[        S0 [        5       D6n	[        R
                  R                  U R                  5         U R                  [        R                  R                  R                  SR                  U	R                  S/5      5      US   R                  SUS   R                  US   R                   SSS	9n
SSS5        U" US
USW
/0US9nUR#                  SS5        U
$ ! , (       d  f       N/= f)z/Insert concat op into block at the given block.r   axisr   r;   r   r   NFr$   r   rQ   r   r   persistablestop_gradientr   r   r   r   r   )zconcat@RESHARD)r   r   r   localsr   staticprogram_guardr'   r   r   r   r   r   r$   r   r   r   r   )r&   r   tensorsr+  r   r  r   r   r  helperr  	concat_ops               r)   insert_concat_opInserter.insert_concat_op  s    wf"i !%E%*G*G 	 ::]]((7""\\--IIHHfkk512 aj&&!!*..QZ__!# # 
C 8 %SEN
	 	N,DE
) 87s   *A?D
D c	                    UR                   n	[        [        U5      5       V
s/ s H  oU
   X:   -
  PM     nn
/ n[        U5       H   u  pXU   :w  d  M  UR	                  U5        M"     U(       a  U R
                  OU R                  n[        U5      S:X  a^  U R                  UUR                  UR                  UUR                  S9nSU/0nSU/0nSUS.nU" USUUUS9nUR                  S	S
5        U$ [        U5      S:X  GaQ  US   nU	U   UU   -  nUnUU   UU   -  nU	nSU0nUUUS.n/ n[        UR                   5       H2  u  pUU:w  a  UR	                  U5        M  UR	                  UU-  5        M4     [        R                  R                  U R                  5         [        U5       V
s/ s Hn  n
U R                  [        R                   R"                  R%                  SR'                  SS/5      5      UR                  SUR                  SUR                  SS9PMp     nn
UU   nSSS5        U" USUSW0US9nUR                  S	S
5        W$ SU0n[        [        U5      5       V
s/ s H  n
SPM     nn
UUUUUS.nU R                  UUR                  UR                  UR                  S9nU" USUSU/0US9nUR                  S	S
5        U$ s  sn
f s  sn
f ! , (       d  f       N= fs  sn
f )z.Insert slice op into block at the given block.r   )r$   r   r   rQ   r   r   r   F)in_placer   assignr   r   r   r   numr+  r   r;   split@RESHARDr   N)r$   r   rQ   r   r-  r   r.  splitInput)r   r   r   infer_flagsr   r   r   )rQ   rangelen	enumerater   r   r   r   r   r   r   r   r   r0  r1  r'   r   r   r   r   )r&   r   r  r   r   r   r  r   r  global_shapeislice_shape	diff_dimsr  r   r  r  r   r   r   	assign_opdiff_dimnum_or_sectionsr+  cur_idxinput_shape	new_shapeoutssplit_opr?  slice_ops                                  r)   insert_slice_opInserter.insert_slice_op  sW    ||49#f+4FG4FqAw*4FG	$[1KEE**  ' 2 !%E%*G*G 	
 y>Q""!ll[[! ** # C F8_FsenG!&7;E((67%I 0HIJ ^q  |H*84H8MMODX&+h*??G&K6]F+TgNEI(6D=$$T*$$T_%<=	  7
 ,,U]]; #?3 4 $$#\\55QQHHou%=> %ll"#[[$)"("2"2&+ % 
 4   7m <  (H ~/GHJ v&F&+CI&67&61&6K7 *"E ""!ll[[ **	 # C (H ~/GHJ{ HV <;: 8s*   K$K!2A5K'K!0K2K!!
K/c                    [        S0 [        5       D6nUR                  nSU0n	X5US.n
U(       a  U R                  OU R                  n/ n[        UR                  5       H0  u  pX:w  a  UR                  U5        M  UR                  X-  5        M2     [        R                  R                  U R                  5         [        U5       Vs/ s Hx  nU R                  [        R                  R                  R                  SR!                  UR"                  S/5      5      UR$                  SUR&                  UR(                  SSS9PMz     nnSSS5        U" USU	S	W0U
S
9nUR+                  SS5        U$ s  snf ! , (       d  f       N3= f)z.Insert split op into block at the given index.r   r:  r;   r   NFr,  r=  r   r   r   r   )r<  )r   r/  rQ   r   r   rB  r   r   r0  r1  r'   r@  r   r   r   r   r   r$   r   r   r   r   )r&   r   r  rI  r   r+  r  r3  rK  r   r   r  rL  r  r   rD  rM  rN  s                     r)   insert_split_opInserter.insert_split_opF  so   
 99llv''J $E%*G*G 	 	$V\\2KE}  &  !89	 3
 ]]((7 / 0A   11MM&++u!56 !,,$.. %"' ! 
 0   8 $gfudm5
 	>+CD% 87s   <E8
A?E3	E83E88
Fc           
         [        S0 [        5       D6n[        R                  R	                  U R
                  5         U R                  [        R                  R                  R                  SR                  UR                  S/5      5      [        R                  S[        R                  R                  R                   SSS9nSSS5        0 nSS0n[#        [%        S5      5      US'   [%        S5      US	'   WR&                  US
'   X(S'   [        R                  R)                  XxUSS9  U(       a  U R*                  OU R,                  n	U	" USUSU/0US9n
SUl        U
R1                  SS5        U$ ! , (       d  f       N= f)z6Insert fill constant op into block at the given index.r;   r   NF)r$   r   rQ   r   r-  r.  	force_cpu1	str_valuevaluer   r   fill_constant)r   r   rQ   r  r   r   Tr   r   )zfill_constant@RESHARD)r   r/  r   r0  r1  r'   r   r   r   r   r   r$   int64r
   VarDescVarTypeDENSE_TENSORstrintr   get_shape_tensor_inputsr   r   r.  r   )r&   r   r   rQ   r  r3  r  r   r   r  fillconstant_ops              r)   insert_fill_constant_op Inserter.insert_fill_constant_opn  s]    AA]]((7""\\--IIHHfkk512 ll\\))66!# # 	C 8 e$ S]kSgg"i,,e_ 	- 	

 !%E%*G*G 	 + SEN
 !!!.2JK
C 87s   BE00
E>c                    / n[        U5      nSn	Sn
[        U
S-   40 [        5       D6nU(       a  U R                  OU R                  n[
        R                  R                  U R                  5         U R                  [
        R                  R                  R                  SR                  UR                  S/5      5      UR                  SUR                   UR"                  SSS9nSSS5        U" X-   U
S	U/0S
W/0UR$                  UR&                  US.S9nUR)                  SS5        [*        R,                  R.                  UR0                  l        U	S-  n	U(       a=  [4        R7                  U X-   UUR&                  UUS9nU	S-  n	UR9                  U5        Xy4$ UR9                  U/5        Xy4$ ! , (       d  f       N= f)z2Insert allgather op into block at the given index.r   rJ   @RESHARDr;   r   NFr,  xr  )r  nranksr   r   r   r   r   r  )r   r   r/  r   r   r   r0  r1  r'   r   r   r   r   r   r$   r   r   r   r  rh  r   r   DefaultStreamrY  	dist_attrexecution_streamr   rS  extend)r&   r   r  r  r   rS   r  tensor_listrP   
idx_offsetr  r3  r  allgather_outallgather_op	split_outs                   r)   insert_allgather_opInserter.insert_allgather_op  s   
 !%(
 Wz1>VX> $E%*G*G 	 ]]((7!,,\\--IIHHfkk512 ll **[[!# - 
M 8 (&?]O, 88,,"

 	~/GH--33 	/ 	a
  00  1 I !OJy) && /&&W 87s   1A6F22
G c                    [        U5      nSnU(       a  U R                  OU R                  nSn	[        U	S-   40 [	        5       D6n
[
        R                  R                  U R                  5         U R                  [
        R                  R                  R                  SR                  U
R                  S/5      5      UR                  SUR                   UR"                  SSS9nSSS5        [
        R$                  R'                  5       nU" X-   U	S	U/0S
W/0UR(                  SSUR*                  UX;   a  UR,                  R/                  U5      OSS.S9nUR1                  SS5        U$ ! , (       d  f       N= f)z1Insert c_concat op into block at the given index.r   rq   rf  r;   r   NFr,  r   r   T)r  r  use_model_parallelrh  r   rankr   r   r   )r   r   r   r   r/  r   r0  r1  r'   r   r   r   r   r   r$   r   r   r   distributedget_rankr  rh  r  r  r   )r&   r   r  r  r   r  rP   ro  r  r  r3  c_concat_outcur_rankc_concat_ops                 r)   insert_c_concat_opInserter.insert_c_concat_op  sZ    "%(
 $E%*G*G 	
 Wz1>VX>]]((7 ++\\--IIHHfkk512 ll **[[!# , 
L 8 %%..0&&?\N+ 88#'&*,,"7?7H))(3a
 	n.FG9 87s   /A6E,,
E:c           
         U (       d  U R                  X45        gSnSnU[        U 5      :  a  [        R                  X   S   U5      u  n	n
nU	S:w  a  SnU
S:X  a"  [        R                  UUS   X   S   U/U	UUS9O![        R                  UUS   XU   S   /U	UUS9nU R                  U5        US==   S-  ss'   [        R                  U UUUUUUS9  OUS-  nU[        U 5      :  a  M  U(       d  U R                  X45        gg)z(Concat the tensors and insert concat op.r   Fr   Tri  N)r   rA  	Reshardercompute_concat_infor   r5  popconcat_partitions_with_op)partition_tensor_listr  r   r&   r   r   r  rD  
has_concatconcat_axisfirst_ordernew_partition_s                r)   r  "Inserter.concat_partitions_with_op   si    %!((&)BCAJc/00
 11),Q/	! "$!%J '!+ !11!F25a8&A'#!% 2  &66!F#1%=a%@A'#!% 7  & *--a0FaKF66-%! 7  QS c/00T %,,f-FG r+   rE   NT)r   T)r>   r?   r@   rA   rB   staticmethodr  r  r  r(  r5  rP  rS  rc  rs  r}  r  rD   rE   r+   r)   r   r   K  s    4 : D D, D D2  8    D LPf fP CG% %N & &P =A;' ;'z ( (T  :H :Hr+   r   c                   X    \ rS rSrSr\S 5       r\S 5       r\S 5       r\S 5       r	Sr
g)	Removeri>  z)Remove var and op in the reshard process.c           
         / SQn/ n[         R                   H  nUR                  U5        M     [        U R                  5       H  u  pVXT;  d  M  UR                  U5        M     U GHt  n/ nU R                  U   nUR
                  nUR                  n	[        U5       GH  u  pUR                  S:X  ax  / nUR                   H(  nUR                  [        XU 5      R                  5        M*     [        U
SS5       H+  nX   R                  S:X  d  M  X   R                  SU5          O   M  UR                  S:X  Ga  / nUR                   HK  nUR                  [        XU 5      5      R                   nUUR"                  ;   d  M:  UR                  U5        MM     U(       d  UR                  U
5        GM  [$        R&                  " 5       R)                  UR                  5      nUR*                  R-                  UR.                  S   R0                  U5        UR*                  R3                  UR4                  S   R0                  U5        GM  UR7                  U5      nUc  GM  UR                   nUUR"                  ;  d  GM  UR                  U;  d  GM  UR                  U
5        GM     USSS2    H  n
UR9                  U
S	S
9  M     UR;                  5         GMw     g)z&Remove no need ops in the main program)create_py_readercreate_double_buffer_readerreadr  r  r  shape_concatc_sync_comm_streamr   NFri  )r  while_block_infor   rB  blocksopsr"   r   output_arg_namesrm  r*   rQ   r@  r   input_arg_names get_tensor_dist_attr_for_programprocess_meshprocess_idsr   instanceget_op_protor_   	set_inputr   r$   
set_outputr   get_op_dist_attr_for_program
_remove_op_sync_with_cpp)auto_parallel_main_progdist_contextrank_idnot_remove_op_refremove_block_order	block_idxr&   remove_op_idxr  r"   r   opdim_listr%   rD  	need_saver  protoop_dist_attrop_process_meshs                       r)   remove_no_need_opsRemover.remove_no_need_opsA  s   
  "33I%%i0 4 !**A*H*H II2")))4 !J
 ,IM+229=E))C::D$S>77f$!H$&$7$7 2 (1H#e %8 #3B/6;;*<<F,,^XF! 0  7722 "I$&$6$6(II 6$,5L!" +l % #l&>&>>%,,X6 %7 %%,,S1 )224AA"''JEGG%%ell1o&:&:IFGG&&u}}Q'7'<'<iH  ,HHL+&2&?&?O'B'BBGG+<<%,,S1[ *^ %TrT*  5 1 +  "m ,r+   c                    [        U R                  5       GHG  u  p4[        5       nUR                  nUR                  n[        5       nU HY  n	U	R
                   H  n
X;   d  M
  UR                  U
5        M     U	R                   H  n
X;   d  M
  UR                  U
5        M     M[     U H  nX;  d  M
  UR                  U5        M     US:X  Ga^  0 nU H  n	[        U	R                  S5      5      [        [        R                  5      :X  d  M:  SU	R                  ;   d  ML  SU	R                  ;   d  M^  U	R                  S5      S   nU	R                  S5      S   nXU'   M     / n[        U5       H:  u  nnUS   R                  UR                  5       ;  d  M)  UR!                  U5        M<     USSS2    H  nUR#                  U5        M     SnU[%        U5      :  aP  UU   S   R                  nUU   S   R                  nXU   :w  a  X}   X|U      4UU'   US-  nU[%        U5      :  a  MP  U H  nX;   a  M
  UR'                  USS	9  M     UR)                  5         GMJ     g)
z'Remove no need vars in the main programr   r   ParamGradNr  r   Fri  )rB  r  setr  r"   r  addr  r`  attrr   Optimizeinput_namesinputr$   keysr   r  rA  _remove_varr  )r  dist_params_gradsfeed_var_namesr  r&   remove_varsr  r"   	need_varsr  r%   r(   param_grad_map
param_name	grad_nameneed_remove_idxr   r   s                     r)   remove_no_need_varsRemover.remove_no_need_vars  sH   
 !**A*H*H II%K))C::DI " 2 2H'!h/ !3 !# 3 3H'!h/ !4	  'OOC( 
 A~!#B2779-.#foo2FF#r~~5 &".. 8)+'):1)=J(*(8(;I9B:6  #%!*+<!=ICAw||>+>+>+@@'..s3 "> +4R40C%))#. 1 C 122!23!7!:!?!?J 1# 6q 9 > >I :$>> , 
!;<2)#. 1HC C 122 #(!!#E!2 #   "i !Jr+   c                 F   [         R                  XU5        [        R                  X5        / n[	        [
        R                  [        UR                  R                  5       5      / 5       H  nUR                  UR                  5        M      [         R                  XU5        g)z0Remove no need vars and ops in the main program.N)r  r  r   change_while_op_input_and_outputr   operatoriaddlistserial_feed_varsvaluesr   r$   r  )r  r  r  r  r  r(   s         r)   remove_no_need_in_mainRemover.remove_no_need_in_main  s    
 	""#7	
 	22#	
 MM4 = = D D FG
C !!#((+
 	###	
r+   c                    [        5       nU R                  5       R                  nU H'  nUR                   H  nUR	                  U5        M     M)     UR                  5       n[        5       nUR                  nU H9  nUR
                  S:X  a  M  UR                   H  nUR	                  U5        M     M;     [        5       n	U H  nXR;   d  M
  U	R	                  U5        M     UR                  n[        5       n
[        U5       H  u  pSnUR
                  S:X  a  M  UR                   H  nXY;   d  M
  Sn  O   U(       d  M>  UR                   H  nU
R	                  U5        M     UR                   H  nU
R	                  U5        M     M     [        5       nUR                   H  nXZ;  d  M
  UR	                  U5        M     U H  nUR                  USS9  M     UR                  5         / nUR                  n[        UR                  5       GH-  u  pSnUR
                  S:X  a  / nUR                   H  nUU;   d  M  UR                  U5        M     U(       d  UR                  U5        O[        R                  " 5       R                  UR
                  5      nUR                  R!                  UR"                  S   R$                  U5        UR                  R'                  UR(                  S   R$                  U5        M  UR                   H  nUU;  d  M  Sn  O   U(       d  GM  UR                  U5        GM0     USSS2    H  nUR+                  USS9  M     UR                  5         g)z3Remove no need vars and ops in the startup program.r  FTri  r   Nr  )r  global_blockr  r  r  r   r  rB  r"   r  r  r   r   r  r  r_   r  r   r$   r  r   r  )r  auto_parallel_startup_progmain_input_varsmain_opsr  r%   startup_blockstartup_output_varsstartup_opsr  actual_need_varsr   
is_need_opr  r(   r  r"   is_no_need_op	var_namesr  s                       r)   remove_no_need_in_startup!Remover.remove_no_need_in_startup  s   
 %*779==B..##H- /  3??A!e#''Bww..//#''1 0	  E	+H*h' , $''5 -GCJww..//(!%J 0 z " 3 3H$((2 !4 " 2 2H$((2 !3 . e%**H/) + C%%c%6 $$&!! !2!23GC!Mww..	 " 2 2H4'!((2 !3 !!((-)224AA"''JEGG%%ell1o&:&:IFGG&&u}}Q'7'<'<iH//4'$(M 0 }$$S)) 4* !2&C$$Su$5 '$$&r+   rE   N)r>   r?   r@   rA   rB   r  r  r  r  r  rD   rE   r+   r)   r  r  >  sY    3H# H#T 8# 8#t 
 
( J' J'r+   r  c                      \ rS rSrSr0 r S,S jr\S 5       r\S 5       r	\S 5       r
\S 5       r\S	 5       r\S
 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       rS rS rS rS rS-S jrS r  S.S jr S/S jr S r!S r"S r#S  r$S! r%S" r&S# r'S$ r(S% r)S& r*S' r+S( r,S) r-S* r.S+r/g)0r  i*  a  
Reshard tensor in the program according to its distributed attribute and corresponding op distributed attribute.

Args:
    auto_parallel_main_prog (Program): An auto parallel main program.
    auto_parallel_startup_prog (Program): An auto parallel startup program.
    rank_id (int): The process id.
    dist_context (DistributedContext): The distributed context of this rank.
    dist_params_grads (list): The list contains the tuple of param and grad.
    batch_size (int): The batch size. Default: None.
Nc                 *   [        U[        5      (       d   S[        U5       S35       eUb)  [        U[        5      (       d   S[        U5       S35       e[        U[        5      (       d   S[        U5       S35       e[        U[        5      (       d   S[        U5       S35       eUb)  [        U[        5      (       d   S[        U5       S35       eXl        X l        X0l        X@l        XPl	        X`l
        0 U l        0 U l        0 U l        0 U l        g )Nz?The type of auto_parallel_main_prog should be Program, but got r;   zJThe type of auto_parallel_startup_prog should be Program or None, but got z+The type of rank_id should be int, but got z?The type of dist_context should be DistributedContext, but got z.The type of batch_size should be int, but got )
isinstancer	   r   r`  r   _auto_parallel_main_prog_auto_parallel_startup_prog_rank_id_dist_context_dist_params_grads_batch_size	_has_sent	_has_recv_has_allgather_has_resharded)r3   r  r  r  r  r  
batch_sizes          r)   r4   Resharder.__init__9  sO    17;; 	
345Q8	
; &15w??  :;<A?? '3'' 	
9$w-J	
' ,(:;; 	
L)*!-	
;
 !j#.. 
+,A/.
 )@%+E()"3%  r+   c                     U R                   $ r0   )r  r7   s    r)   r  !Resharder.auto_parallel_main_proge  s    ,,,r+   c                     U R                   $ r0   )r  r7   s    r)   r  $Resharder.auto_parallel_startup_progi  s    ///r+   c                     U R                   $ r0   )r  r7   s    r)   r  Resharder.rank_idm  rX   r+   c                     U R                   $ r0   )r  r7   s    r)   r  Resharder.dist_contextq  s    !!!r+   c                     U R                   $ r0   )r  r7   s    r)   r  Resharder.dist_params_gradsu  s    &&&r+   c                     U R                   $ r0   )r  r7   s    r)   r  Resharder.batch_sizey  rf   r+   c                     U R                   $ r0   )r  r7   s    r)   has_sentResharder.has_sent}      ~~r+   c                     U R                   $ r0   )r  r7   s    r)   has_recvResharder.has_recv  r  r+   c                     U R                   $ r0   )r  r7   s    r)   has_allgatherResharder.has_allgather  s    """r+   c                     / n[        U 5       H8  u  pEX   S:X  a  UR                  U5        M   UR                  XRX      -  5        M:     U$ )zCompute the shape of partition.r  rB  r   )complete_shapedims_mappingprocess_shapepartition_shaper   r   s         r)   compute_partition_shape!Resharder.compute_partition_shape  sT     ">2IC B&&&t,&&t\=N/O'OP	 3 r+   c                     UR                  U 5      n/ n[        S US5      n[        [        U5      5       H.  nX5X&   -  -  nXRU   -  nX3U-  U-  -
  nUR	                  U5        M0     U$ )z@Compute the index of process_shape corresponding to the process.c                 
    X-  $ r0   rE   )rg  ys     r)   <lambda>1Resharder.compute_process_index.<locals>.<lambda>  s    aer+   r   )r  r   r@  rA  r   )processr  r	  relative_processprocess_indexproductrD  r   s           r)   compute_process_indexResharder.compute_process_index  s     )..w7+]A>s=)*A"-2B'BCCq!11G w#>#HH    % + r+   c                 *   [         R                  XU5      n[         R                  XU5      n/ n[        [	        U5      5       HL  nX(   S:X  a  UR                  SXX   /5        M"  UR                  XbU      XX   -  XbU      S-   XX   -  /5        MN     U$ )z/Compute the partition index in complete tensor.r  r   r   )r  r  r  r@  rA  r   )	r  r  r  r	  r  r
  r  r   rD  s	            r)   compute_partition_index!Resharder.compute_partition_index  s    
 $;;-
 "77M
 s>*+A"$&&?+='>?&&%1o69KK&A7!;),-	 , r+   c                    SnSnSn/ n[        U 5       H  u  pgXqU   :w  a  US-  nUS   X   S   :X  a-  US   X   S   :  a  UnUR                  US   X   S   /5        MM  US   X   S   :X  a1  US   X   S   :  a!  SnUnUR                  X   S   US   /5        M  M  M  UR                  U5        M     US:X  a  X4U4$ SXE4$ )zYJudge whether two partition can be concatenated and compute concatenated partition index.r   r  r   r  )partition_index_xpartition_index_ydiffer_countr  r  r  r   r   s           r)   r  Resharder.compute_concat_info  s    "#45IC--!G05a88Q"3"8";;"%K!(($q'3D3I!3L)MNG05a88Q"3"8";;"#K"%K!((*;*@*CT!W)MN	 < 9 $$T*# 6& 1]::{11r+   c                     / n[        U 5       H8  u  pEX$   S:X  a  UR                  U5        M   UR                  XQX$      -  5        M:     U$ )zVcompute the complete shape of the slice tensor  with its process mesh and dims mappingr  r  )rE  r	  r  r  r   r   s         r)   compute_complete_shape Resharder.compute_complete_shape  sT     ";/IC B&%%d+%%d<;L-M&MN	 0
 r+   c                 N   U (       d  U R                  U5        gSnSnU[        U 5      :  a`  [        R                  X   U5      u  pEnUS:w  a)  SnU R	                  U5        [        R                  X5        OUS-  nU[        U 5      :  a  M`  U(       d  U R                  U5        gg)z8Concat the given partitions without inserting concat op.r   Fr  Tr   N)r   rA  r  r  r  concat_partitions)r   r   rD  r  r  r  r  s          r)   r$  Resharder.concat_partitions  s     $ ''8AJc.//090M0M(+_1- "$!%J(,,Q///, Q c.// $++O< r+   c                    [         R                   GH  nU R                  U   n[         R                  U   S   nU R                  UR                     n[	        5       n/ nUR
                   H  nUR                  U5      n	U	(       dI  UR                  S:X  a  U	(       a2  UR                  S:X  a  U	(       a  UR                  S:X  d  M[  U	(       a  Md  UR                   H  n
X;  d  M
  UR                  U
5        M     UR                   H  n
UR                  U
5        M     M     SnUR
                   H7  nUR                  R                  5       U:X  d  M#  UR                  S:X  d  M5  Un  O   Uc  GM`  [        R                  " 5       R!                  UR                  5      n/ nUR#                  S5       H  n
X;   d  M
  UR                  U
5        M     U(       d   eUR%                  5         UR                  R'                  UR(                  S   R*                  U5        / nUR-                  S	5       Ha  n
USSS
2    HR  nUR/                  U
5      S
:w  d  M  [1        U
5      [1        U5      :X  d  SU;   d  M:  X;  d  MA  UR                  U5        MT     Mc     U(       d   eUR                  R3                  UR4                  S   R*                  U5        GM     g)zNChange while op input and output after the corresponding sub block ops removedop_idr   r=  r9  Nr   r   r   r   r  rf  )r  r  r  
parent_idxr  r  get_dist_op_for_programr   r  r   r  r  r_   r  r   r  r  r  sortr  r   r$   outputfindrA  r  r   )r  r  sub_block_idx	sub_blockparent_while_op_idparent_blocksub_block_op_inputssub_block_op_outputsr  dist_opr%   while_opr  new_Xnew_Outoutput_names                   r)   r  *Resharder.change_while_op_input_and_output  sO    '77M/66}EI!*!;!;M!J" 399):N:NOL"%%#% mm&>>rB7*77*78+GG$&$7$7#?077A %8 %'$6$6+//9 %7 $  H"&&77::<#55"''W:L!H '
  "**,99(--HEE$NN3/2LL* 0 L5JJLMM##ELLO$8$8%@G$OOE2#7"#=K"''1R7H[)99%4&5#NN;7 $> 3 N7MM$$U]]1%5%:%:GDm 8r+   c                     SnUS   US   s=::  a	  US   :  d  O  US   US   s=::  a	  US   :  a  O  OSnUSS/:X  a
  USS/:X  a  SnU$ )zBJudge whether two partitions intersect on the specified dimension.Fr   r   TrE   )r3   shape_xshape_y
overlappeds       r)   is_overlappedResharder.is_overlapped>  s`    
AJ'!*1wqz1AJ'!*1wqz1Jq!fQF!2Jr+   c                 &    U H  nUS:w  d  M    g   g)Nr  FTrE   )r3   r  dims      r)   
is_unshardResharder.is_unshardI  s    Cby   r+   c                 x    UR                   [        ;   a  g[        U5      (       a  UR                   [        ;   a  gg)NTF)r   _g_special_opsr   r   )r3   r  s     r)   is_special_opResharder.is_special_opO  s.    77n$r""rww2F'Fr+   c                    U R                   R                  UR                  S5      R                     nUR                  S:X  a  UR                  S5      nO!UR                  S:X  a  UR                  S5      nW H^  n[        XBU R                   5      nU R                  R                  U5      nUR                  nUR                  nU H  n	U	S:w  d  M      g   M`     g)	Nr.  r   	Conditionr    Condr  FT)r  r  r  r  r   r  r*   r  get_dist_tensor_for_programrk  r  )
r3   r  r.  
input_condr%   r(   dist_tensortensor_dist_attrvar_dims_mappingr@  s
             r)   is_condition_replicative"Resharder.is_condition_replicativeW  s    00778L8O8OP	77g+.JWW++&)J #H(T%A%AC ++GGLK*44/<<'"9  ( # r+   c                 <   SnUR                   nUR                  nUR                  nUS   n	U(       a  US   n
[        S UUU
U	4 5       5      (       a  Xz:w  a6  UU R                  R
                  ;  a  U H  nUS:w  d  M  [        S5      e   SnX:w  a  SnX:w  aI  [        UR                  5      [        U	R                  5      :X  a  UR                  R                  (       a  SnU$ US   n[        S UUUU	4 5       5      (       a  X|:w  a  [        S	5      eX:w  a  SnU$ )
z/Judge the tensor whether needs to be resharded.Fr   r   c              3   &   #    U  H  nUv   M	     g 7fr0   rE   .0rg  s     r)   	<genexpr>)Resharder.need_reshard.<locals>.<genexpr>y        A    r  z7The dim must be -1 when tensor process mesh is a union.Tc              3   &   #    U  H  nUv   M	     g 7fr0   rE   rS  s     r)   rU  rV    rW  rX  zVIt is not supported that tensor dims mapping is different from op output dims mapping.)rk  r  r  allr  process_meshes
ValueErrorrA  r  serial_tensoris_data)r3   rL  rk  op_inputr3  
is_reshardrM  tensor_dims_mappingtensor_process_meshr  op_input_dims_mappingr   op_output_dims_mappings                r)   need_reshardResharder.need_reshardm  sk   
&00.;;.;; $A,$-aL!  (')#	   '?+#00??@ %8D#rz&0$]'" !" %8
 "&J '9!%J (:/;;<?6678#1199!&J& # &/q\"  ('*#	   '@$p  '9!%Jr+   c                    / nU R                   R                  U5      nUR                  R                  nU R                   R                   Ht  n[        UR                  5      [        UR                  5      -  (       d  M5  [        UR                  5      [        UR                  5      :  d  Mc  UR                  U5        Mv     U(       d  UR                  U5        U$ )zEGet sub process meshes of the given op if op process mesh is a union.)	r  r)  rk  r  r[  r  r  rA  r   )r3   r  r[  r3  r  r  s         r)   get_op_process_meshesResharder.get_op_process_meshes  s    ##;;B?!++88 --<<L<++,O//0 l../#++3  %%l3 = !!/2r+   c                 J   UR                   nUR                  nUR                  nUR                  nUR                  n	UR
                  n
US   nUS   nUR                  nUR
                  nU(       ag  [        [        U5      5      S:X  a!  [        [        [        U5      5      5      S:X  d   e[        U5      R                  [        U	5      5      (       a  Un	Un
UR
                  S   S:  aT  UR
                  S   S:X  d   e[        UR
                  5      nU R                  US'   UR                  R                  U5        U(       d   [        R!                  UR
                  X5      OUR
                  n[#        5       n[        U5      R                  [        U	5      5      (       a+  [        U5      R%                  [        U	5      5      (       a   U$ X:w  Ga(  / nU	 H  n[        R'                  UUUU
U	5      nU(       d  UR)                  UU/S//5        M;  U Vs/ s H  nUS   PM
     nnU Vs/ s H  nUS   PM
     nnU Vs/ s H  nUS   PM
     nnUR+                  U5      S:X  a;  UR-                  U5      nUU   R)                  U5        UU   R)                  S5        M  UR)                  UU/S//5        M     U GH7  n/ n[        R'                  UUUUU5      n/ n/ nU	 GHK  n[        R'                  UUUU
U	5      nSn[/        S [        [1        U R2                  UU5      5       5       5      (       d  MV  UU;  d  M^  U Vs/ s H  nUS   PM
     snR-                  U5      n U Vs/ s H  nUS   PM
     snU    nU Vs/ s H  nUS   PM
     snU    nSn!U![        U5      :  a+  UU!   (       d  UU!   nSUU!'   OU!S-  n!U![        U5      :  a  M+  U![        U5      :X  a  U V"s/ s H  n"SPM     nn"US   nSUS'   Uc   S	5       eUUR5                  5       ;  a  / UU'   UUR5                  5       ;  a  / UU'   UR)                  U5        UR                  R6                  [8        R:                  :H  n#[=        UUUU#S
9n$[?        UUUU#S
9n%UU   R)                  U$5        UU   R)                  U%5        UR)                  U5        [        RA                  UU5        US   n&[C        U&5      [C        [D        RF                  5      :X  d  GM%  U RH                  RJ                  RM                  UU5        GMN     UU   R)                  [O        U5      5        / n'/ n(/ n)US   n*/ n+[Q        U*5       Hk  u  n nU'R)                  UU    S   US   -
  5        U(R)                  UU    S   US   -
  5        U)R)                  U 5        U+R)                  US   US   -
  5        Mm     UU   R)                  [S        U'U(U)U+S95        GM:     U$ / n/ n/ n,U	 Ho  n[        R'                  UUUU
U	5      nUU;  a'  UR)                  U5        U,R)                  U/U/5        MI  U,UR-                  U5         S   R)                  U5        Mq     [U        [        U,S   S   5      5       GH  n!/ n-[U        [        U,5      5       H<  n.U-R)                  U,U.   S   U!   5        U!S:X  d  M%  UR)                  U,U.   S   5        M>     U- GH  n/[V        RX                  " U-5      n0[V        RX                  " U5      n1[        R'                  U/UUUU5      nU- Hu  n2[        R'                  U2UUU
U	5      n[/        S [        [1        U R2                  UU5      5       5       5      (       a  MS  U0R[                  U25        U1R[                  U5        Mw     / n3U1 H  n4[        RA                  U3U45        M     U3S   n*/ n'/ n(/ n)/ n+[Q        U*5       HQ  u  n nU'R)                  UU    S   US   -
  5        U(R)                  UU    S   US   -
  5        U)R)                  U 5        MS     U+R)                  WS   US   -
  5        [S        U'U(U)U+S9n5U(       d  SOUR]                  U/S9n6UR+                  S5      [        U5      :X  a>  USS R+                  S5      [        USS 5      :X  a  US   S:w  a  [_        U-U6S9/UU/'   GM  USS USS :X  aJ  US   S:X  aA  US   S:w  a8  [a        U0U6UR6                  [8        R:                  :H  SS9[c        S5      /UU/'   GMB  [        U05      S:  a0  [a        U0U6UR6                  [8        R:                  :H  S9[O        U1S9U5/OU5/UU/'   GM     GM     U$ s  snf s  snf s  snf s  snf s  snf s  snf s  sn"f )aE  
Find the op description sequence to reshard the source tensor for matching the op requirement.

Args:
    dist_tensor (DistributedTensor): A distributed tensor.
    dist_attr (list): A list contains process_mesh and dims_mapping such as [process_mesh, dims_mapping].
    serial (bool): If serial is true, the dist tensor and dist op come from serial program. Otherwise, they come from auto program.

Returns:
    Dict, the dict represents the required op description sequence corresponding to process, The key of dict is
    process and value is a list containing op description.
r   r   r  F   Nc              3   &   #    U  H  nUv   M	     g 7fr0   rE   rT  r  s     r)   rU  -Resharder.find_op_desc_seq.<locals>.<genexpr>A  s      	& &rX  Tz Failed to find the send process.)rR   )rQ   c              3   &   #    U  H  nUv   M	     g 7fr0   rE   rm  s     r)   rU  rn    s      	#& &rX  )r   r   r   rQ   )rw  )rP   rQ   )rP   rQ   rR   rS   )rP   rQ   rR   )r   )2rk  r]  r  r  r  rQ   rA  r  nextiterintersectionr  r  r_   	set_shaper  r!  r   
differencer  r   countr  rZ  mapr=  r  r   r   boolr   r   r$  r`  r   Forwardr  up_down_streamsadd_pair_streamr   rB  r   r@  copydeepcopyremovelocal_sizesrn   rG   r-   )7r3   rL  rk  serialis_union_process_mesh_tensorrM  source_tensorsource_dims_mappingsource_process_meshsource_process_groupsource_process_shapetarget_process_meshtarget_dims_mappingtarget_process_grouptarget_process_shaperL  r  op_desc_seqpartition_process_mapping_listsource_processsource_partition_indexr   partition_listprocess_listhas_usedr  target_processr  target_partition_indexr   all_partition_index_listto_send_processr   rD  rg  rR   send_op_descrecv_op_descr   slice_starts
slice_endsslices_axesconcatenated_partition_indexto_slice_tensor_shaper  rP   jr  min_comm_groupall_partition_index_list_copied_process!concatenated_partition_index_listr   slice_op_descallgather_shapes7                                                          r)   find_op_desc_seqResharder.find_op_desc_seq  s.   & '00#11.;;.;;2>>288'l'l2>>288 (C+,-2c"5678B>? '(55()  (<$';$q!A% &&q)R///]001I??IaL((3  ,,##%9 $$ 	 "m #$11$%
 
&'2237K3LMMx	 u	 "9-/*"6)2)J)J""'((*& 6299/.1AE7K
 -K&,JDQ,J # & -K$,JDQ,J ! $ -K ,JDQ,J    &++,BCqH . 4 45K L$U+22>B ..u56==3n5EwO= #7D #7)2)J)J""'((*& (*$+-(&:N-6-N-N&&+,,.* '+O 	%) #$($6$6$:$:!"&	 	 	 3(B 1O0NDG0N% 67  1O$0NDG0N$$ 1O(0NDG0N(( #h-/#+A;2>q/.2 %FA  #h-/ H-7?'@x!xH'@.:1oO*.HQK.: >: ++2B2B2DD;=K8)1A1A1CC:<K70778NO #.";";"A"AV[["P'12+*$+	( (22+*$+	( $O4;;LI#N3::<H (>?!3302H #,B-w<3v~~+>> --==MM /a ';j N+22 !9:
  "
 /CA/F,(*%!*+G!HIC ''.s3A6a@ %%&<S&A!&DtAw&NO&&s+)00a471BC "I N+22$"#3	i #7l m $& ')$M"6)2)J)J""'((*& *1EE(//0FG!(( !/ 3	 ",223IJ/+ #7. 3}Q/234s=12ALLq!1!!4Q!78Av077a8H8KL 3  %G%)]]5%9N6:mm073 .7-N-N&+,,.* %*%== ( . 3 4 4 /  # 	#%) #$($6$6$:$:!"&	# 	  	  +11(;;BB 6+ %*2 9;5+J!33= ,K :!< 1 $&L!#J"$K,.)%./K%L	T$++237:T!WD #))237:T!WD $**3/ &M *00a471BC$/+'(3	%M  & (44'4B $ ,11"5234/4::2>23B789/3r9 2&+?0G, ,AB/3Fqr3JJ/2b8/2b8 ,&4&5)6)<)<)K+0	 &dO0G,0  #>2Q6 !0*8*9(5(;(;v{{(J	!" !-9X!" !. #0 $G,I  % 5x S	&$ ^$( (As*   f.fff,ff)f c                    U H  nX(   n	U	 H  n
[        U
[        [        45      (       a  [        U
R                  5        M5  [        U
[
        5      (       a"  [        U
R                  U
R                  /SS9  Ml  [        U
[        5      (       d  M  [        U
R                  U
R                  /SS9  M     M     / n/ nU R                  UR                  5       ;  a  gX R                     n	Sn[        [        UR                  5      5       H7  u  pUR                  R                  UR                  R                  :X  d  M5  Un  O   Uc   SU R                   S35       eUR                   nS n[#        U5      (       a  U" U5      (       a  [$        R&                  OUR)                  S5      nSnU	 GH+  n
[        U
[        5      (       Ga  UU R*                  R                  5       ;  a  / U R*                  U'   U R*                  U   (       a2  U
R                  U R*                  U    Vs/ s H  nUS   PM
     sn;  Ga"  U
R,                  (       a  [.        R1                  UUUU[2        R4                  US	9n[.        R7                  UUS
-   UU
R                  UU
R8                  US9u  nnUU-  n/ nU HI  n[.        R1                  UUUU[2        R:                  US	9nUR=                  UR                   5        US
-  nMK     U R*                  U   R=                  U
R                  U/5        GO[.        R7                  UUUU
R                  UU
R8                  US9u  nn[?        U5       H  nUR                  UU-      nUR@                   HT  nURB                  U   n[E        U RF                  US/[I        URJ                  5      -  URL                  URN                  S9  MV     [Q        UURL                  U RF                  URN                  S9  M     US
:X  a  UnUU-  nU Vs/ s H  nUR                   PM     nnU R*                  U   R=                  U
R                  U/5        OUU R*                  U    HB  nU
R                  US   :X  d  M  US
    Vs/ s H  n[S        UUU RT                  5      PM     nn  O   U(       d   S5       eGM  [        U
[
        5      (       GaZ  UU RV                  R                  5       ;  a  / U RV                  U'   U
R                  U RV                  U   ;  Ga  U
R,                  (       a\  [.        R1                  UUUU[2        R4                  US	9n[.        RY                  UUS
-   UU
R                  U
R                  UUS	9  US-  nOt[.        RY                  UUUU
R                  U
R                  UUS	9  [[        UR                  U   URL                  UR\                  U RF                  URN                  S9  US
-  nU RV                  U   R=                  U
R                  5        GM  GM  [        U
[        5      (       Gay  UU R^                  R                  5       ;  a  0 U R^                  U'   U
R                  U R^                  U   R                  5       ;  Ga  U
R`                  n/ nU H  nUR=                  US
   US   -
  5        M     U
R,                  (       a  URc                  [d        Rf                  " US-   5      UURh                  [2        R4                  URj                  S9n [.        Rm                  UUU U
R                  U
R                  UUS	9  [.        R1                  UUS
-   U U[2        R:                  US	9nUR=                  U5        US-  nUU R^                  U   U
R                  '   GM  URc                  [d        Rf                  " US-   5      UURh                  URn                  URj                  S9n [.        Rm                  UUU U
R                  U
R                  UUS	9  [E        U RF                  U US
   US   US   S9  [[        UR                  U   US   US
   U RF                  US   S9  U Rh                  S:w  a  Sn!U RT                  Rp                   H  n"U"RB                   H  n#U"RB                  U#   n$U$Rr                  (       d  M%  U$Rh                  U Rh                  :X  d  MA  [.        Ru                  UUS
-   U U$UUS	9n%UR=                  U%5        US-  nU%U R^                  U   U
R                  '   Sn!  O   U!(       d  M    O   U!SL d   eGM  UR=                  U 5        US
-  nU U R^                  U   U
R                  '   GM  UR=                  U R^                  U   U
R                     5        GM  [        U
[v        5      (       a  U
Rx                  n&Un'U/n([        U5       H"  u  nn)[.        R{                  UU)U&U   UU(UUS	9  M$     U(S   nUn*[?        U'U*5       H  n+UR                  U+   nUR@                   HM  nURB                  U   n[E        U RF                  US/[I        URJ                  5      -  US   URN                  S9  MO     [Q        UUS   U RF                  URN                  S9  M     G	M  [        U
[|        [        [~        45      (       d  G	M:  Sn,[        U
[|        5      (       a  [I        U5      S
:X  d	  U(       a   e[I        U5      S
:X  a  US   S   OUn-[d        Rf                  " US-   5      n.[.        R                  UUU-U
R                  U
R                  U
R                  U.UUS9	n,Oe[        U
[        5      (       a#  [.        R                  UUUU
R                  UUS	9n,O-[        U
[~        5      (       d   e[I        U5      S
:X  d   eUS   n,[        U
[~        5      (       dR  U,c   e[E        U RF                  U,US
   US   US   S9  [[        UR                  U   US   US
   U RF                  US   S9  URj                  S:X  Ga-  S[        R                  WR)                  S5      R                     R                  5       ;  a/  0 [        R                  UR)                  S5      R                     S'   U[        R                  UR)                  S5      R                     S   R                  5       ;  a2  / [        R                  UR)                  S5      R                     S   U'   [        R                  UR)                  S5      R                     S   U   R=                  UU,R                   /5        UR                  US  GH8  n[        U5      (       a  M  / n/UR                   GH  n0U RF                  R                  U5      n1U1c   eU0U:X  d  M,  U1R                  U5      n2U0n3U,R                   n.U3U.:w  d   eUR                  R                  5       UR                  R                  5       :X  a  UR                  R                  U0U.5        U1R                  U.U25        U RF                  R                  UU15        U RF                  R                  U,U25        URj                  S:X  a  U/R=                  U.5        GM  U1RL                  n4U1R                  U5      n5U4US   :X  d  GM<  U5US
   :X  d  GMH  UR                  R                  U0U.5        U1R                  U.U25        U RF                  R                  UU15        GM     U/(       d  GM  [        R                  " 5       R                  URj                  5      n6UR                  R                  U6R                  S   R                   UR                  S5      U/-   5        GM;     GM.     gs  snf s  snf s  snf )z
Parse op desc sequence and insert op in the block

src_tensor_attr(TensorDistAttr): tensor's dist_attr
dst_input_attr(list): input_var's dist_attrs of the op
r  r  Nz/The op for reshard cannot be found in the rank z	 program.c                 $    U R                  S5      $ )NGRAD)endswith)r$   s    r)   is_grad(Resharder.parse_op_desc.<locals>.is_grady  s    ==((r+   r   r   ri  r   )rS   r  r  chunk_idz6The result of parsing allgather op should not be None.rk  @recvr$   rQ   r   r   r   FTrf  )r   r   r   r  r   r  r   var_reshard_mappingr.  r   )Vr  rG   rn   r   rP   r   r   r   r   r  r  r  rB  r  r_   r  r$   r   r   Backwardr  r  rR   r   r  r   r[  rs  rS   rw  r   r@  r  r"   r   r  rA  rQ   r  r  r   r*   r  r  r  r   r  r   r   r   r   generater   r   r  r   r  r^  r(  r   r   r  r   r-   rP  r   r   r   r}  r  r  r   r  r  get_input_dist_attr_rename_inputset_input_dist_attrset_op_dist_attr_for_program set_tensor_dist_attr_for_programget_input_dims_mappingr   r  r  r  r   r  )7r3   r&   r  
src_tensor
reshard_opsrc_tensor_attrdst_input_attrr  r  op_desc_listop_descrn  r  r   r  r  src_namer  r   end_varsrg  out_castro  tensor_name_listr(   offsetout_nameout_varr   r%   r   rQ   recv_tensorset_lod	tmp_blocktmp_var_nametmp_varr&  r   pre_idxidx_listr  rJ  rD  target_tensorto_slice_tensornew_namewhile_op_X_appendr$   r  op_input_dist_attrold_namer  rc  r  s7                                                          r)   parse_op_descResharder.parse_op_descC  st   * #G&/L'o/DE  &gmm444% gkk2u  44% gkk2u ( #   "<<{//11"<<0i		23IEwwzzZ__/// 4  	
=dll^9U	
 ??	) j))gh.?.? OO+ 	 #G'?334#5#5#:#:#<<35D&&x0))(3w}}"&"4"4X">M">QAaD">M 8 #+#:#:!&#"LL!% $; $ 3;2N2N!!G$#MM#'.'9'9!% 3O 3/Z z)+-(#.C'/'>'> % # # ' &%) (? (H -33HMMB1HC $/ **84;;$]],<= 3;2N2N!&#MM#'.'9'9!% 3O 3/Z ',J&7F!&3<!8B,.,?,?*/**X*> 1$($5$5$+%'D3w}}+=$=$3$@$@-<-E-E!" -@ G " / < < $ 1 1)8)A)A	 '8$ &?'2Hz)@K+LCHH(+L**84;;$]],<= !% 2 28 <"==DG3 15Q+ 18H !7$,$)$($@$@!"
 18 ( + " != # L{ GZ004==#5#5#77.0DMM(+;;dmmH&==#+#:#:!&#"LL!% $; $ !//!!G$#KK#KK#!% 0  q //!&#KK#KK#!% 0  O!IIcN+88+88 --%4%=%= qMM(+227;;?M >P GZ004==#5#5#77.0DMM(+;;dmmH&=&B&B&DD&-&=&=OE!0U1Xa%89 "1&+&6&6!,!5!5h6H!I"'&0&:&:"(,,!+ '7 ' !//!'#KK#KK#!% 0  $,#:#:!!G'#"KK!% $; $ $**84q?Gh/<&+&6&6!,!5!5h6H!I"'&0&:&:","2"2!+ '7 ' !//!'#KK#KK#!% 0  * --'*1-*1-%3A%6 O!IIcN*1-*1- --%3A%6 '00A5&+G "&!=!=!D!D !*4=NNL.7nn\.JG(/,3,=,=+6+@+@-A -5,H,H0503a0;070759 -I -. )6 )4(:(:=(I(+q,9 )-h(?(L 37(-/ 5C0 $+7$)5 "E6 $+d?2?'..{;1HCCNDMM(3GKK@&&t}}X'>w{{'KLG\22'.'C'C$5%.{%;ME666-,U3 ! 7  &< qkw0A1B$&$7$7"'**X"6) --#D3w}}#55*1-%4%=%= %8 ?&q)))!0!9!9	 1$ +'<iH  !%g{3312a745
 45: .a03' $
  +33Hz4IJH$,$<$<'&~~$\\$\\%- '! %= 
%M  )>??$,$?$?"! %@ %M &gy9999x=A---$,QKM!'955(444%))%&q)&q)!/!2 K		#&q)&q)))!/!2 ??g- .(99GGK033 $&!  "22277;3G3J3JK1 !(99GGK033 / 1158 ') "22277;3G3J3JK1"$ ..rww{/C/F/FG-  &8J8J'K L  ))CD/B$R(( (*% " 2 2 --JJ2N %  ,7778+ , @ @ J / (,H'4'9'9H#+x#77#7!wwzz|z/A/A/CC " 5 5dH E , @ @$,.@!" !% 1 1 N N$&!" !% 1 1 R R$13E!" $&77g#5$5$<$<X$F (.:.G.GO , C CH M 2
 !0>!3D D$9^A=N$N " 5 5dH E , @ @$,.@!" !% 1 1 N N$&!"S !3\ )( - 6 6 8 E Ebgg N))!LLO00HHSM,==i *Y $ML ,M+s   #"	!'c           
      Z   UR                   [        ;   d   eU R                  R                  UR	                  S5      R
                     nUR                  n/ nU H  nU R                  R                  U5      nU(       d  M'  UR                  nUR                   H  nX:X  d  M
  UR                  n	UR                  U5      n
UR                  nSnU H#  nXS   :X  d  M  XS   :X  d  M  XS   :X  d  M!  Sn  O   U(       a  Mg  UR                  U	U
UUR	                  S5      /5        M     M     U$ Nr.  Fr   r   rk  Tr   )r   _g_subblock_opsr  r  r  r  r  r  r)  rk  r  r  r  r  r   )r3   r  r%   r.  r  input_attrsr3  rk  r$   r  input_dims_mappingr  	has_exist
input_attrs                 r)   _get_subblock_input_attrs#Resharder._get_subblock_input_attrs*	  s&   ww/)))00778L8O8OP	mmB''??CG))I**##,#9#9L)2)I)I *&  )11H %I&1
(qM9 2m C (qM 9(,I! '2 %9#** , 2 ( "	 2	# + < r+   c           
      n   UR                   [        ;   d   eU R                  R                  UR	                  S5      R
                     nUR                  n/ nU H  nU R                  R                  U5      nU(       d  M'  UR                  nUR                   H  nX:X  d  M
  UR                  n	UR                  U5      n
UR                  R                  nSnU H#  nXS   :X  d  M  XS   :X  d  M  XS   :X  d  M!  Sn  O   U(       a  Mq  UR                  U	U
UUR	                  S5      /5        M     M     U$ r  )r   r  r  r  r  r  r  r  r)  rk  r  r  get_output_dims_mappingr  r   )r3   r  r%   r.  r  output_attrsr3  rk  r$   r  output_dims_mappingr  r  output_attrs                 r)   _get_subblock_output_attrs$Resharder._get_subblock_output_attrsQ	  s,   ww/)))00778L8O8OP	mmB''??CG))I++##,#9#9L*3*K*K +'  '0099H %I'3(N: 31~ E (N :(,I! (4 %9$++ , 3 ( "	 2	# , < r+   c           	      P   / nU R                   R                  U5      nUR                  nUR                  nU R                   R                   Ht  n[        UR                  5      [        UR                  5      -  (       d  M5  [        UR                  5      [        UR                  5      :  d  Mc  UR                  U5        Mv     U(       d  UR                  U5        UR                  U5      nUR                  n	/ n
U H%  nU
R                  XxXR                  S5      /5        M'     U
$ )Nr   )r  r)  rk  r  r[  r  r  rA  r   r  r  r  )r3   r  r%   r[  r3  rk  r  r  r  r  r  s              r)   _get_common_op_input_attrs$Resharder._get_common_op_input_attrsx	  s   ##;;B?%%	#00 --<<L<++,O//0 l../#++3  %%l3 = !!/2&==hG%%*L8WWY=OP +
 r+   c                     / nUR                   [        ;   a*  U R                  X5      nU(       d  U R                  X5      nOU R	                  X5      nU(       d   SUR
                   SU S35       eU$ )NzThe input 'z	' of op 'z+' has no distributed attributes in subblock)r   r  r  r  r  r$   )r3   r  r%   op_input_attrss       r)   get_op_input_attrsResharder.get_op_input_attrs	  sw    77o%!;;BIN! "&!@!@!N!<<RJN 	
"'')H:5`a	
~ r+   c                 t   [        5       n[        U R                  R                  5      nUS:  Ga  / nSnU R                  R                   H'  nUR                   H  nUR                  U5        M     M)     [        U R                  R                  5       H`  u  pu[        [        UR                  5      5      [        U5      :X  a  UR                  U5        MC  [        UR                  5      U:  d  M^  SnMb     U(       a8  [        U5       H(  nU R                  R                  R                  U5        M*     ggg)z;Remove global process mesh from dist_context.process_meshesr   FTN)
r  rA  r  r[  r  r  rB  r   reversedr  )r3   r  process_mesh_countglobal_process_mesh_idxhas_sub_process_meshr  
process_idr   s           r)   _remove_global_process_mesh%Resharder._remove_global_process_mesh	  s	   e !2!2!A!AB!&(##(  $ 1 1 @ @".":":JOOJ/ #; !A &/!!00&! s<3345[9II+2237112[@+/(& $#$;<C%%4488= = $ "r+   c                 p   S[         R                  U   ;   Ga  [         R                  U   S   nUR                   GHv  nUR                   H  nXS;   d  M
  U R                  R                  U5      nUR                  nS nX5    H=  n	UR                  U	S   S   :X  d  M  UR                  U5      U	S   S   :X  d  M8  U	S   n  O   U(       d  M  UR                  R                  XX5        UR                  U5      n
UR                  X5        M     UR                   H  nXS;   d  M
  [        X5   5      S:  a  [        S5      eX5   S   S   nUR                  R!                  XX5        U R                  R                  U5      nUR                  nUR#                  U5      nUR%                  X5        M     GMy     g g )Nr  r   r   zpThe scene is not supported that the output is inplaced and the tensor has been resharded multiply when as input.)r  r  r  r  r  r)  rk  r  r  r_   r  r  r  r  rA  r\  _rename_outputget_output_dist_attrset_output_dist_attr)r3   r  r&   r  r  r%   r3  rk  target_namer   r  r  op_output_dist_attrs                r)   $_change_subblock_op_input_and_output.Resharder._change_subblock_op_input_and_output	  s    I$>$>y$II"+"<"<Y"G%# ii " 2 2H6"&"3"3"K"KB"O$+$5$5	&*$7$AD ) 6 6$q'!* D$-$D$DX$N#'71:%. /31g % %B ';GG11(H1:1N1N (2. &99 +) !32 !# 3 3H62<=A", !S#  ':&CA&Fq&I..xE"&"3"3"K"KB"O'.'8'8.:.O.O$/+ %99' !45  	 Jr+   c           
      l   SnU[        UR                  5      :  Ga  [        UR                  5      nUR                  U   nU R                  U5      (       a  US-  nM[  U R                  R	                  U5      nUGb  UR
                  [        ;   a  U R                  U5      (       d  [        S5      eUR                  S5      R                  [        R                  ;  a,  0 [        R                  UR                  S5      R                  '   UR                  R                  5       [        R                  UR                  S5      R                     S'   UR
                  S:X  a  UR                  S5      nO.UR
                  S:X  a  UR                  S	5      nOUR                  nUR!                  5         SnU GH  nS
U;   a  M  [#        XU R$                  5      n	U R                  R'                  U	5      n
SnU
R(                  R*                  U R                  R,                  ;  ae  U R                  R,                  (       aJ  SnU
R(                  R.                  R1                  S5      [        U
R(                  R.                  5      :X  d   eU R3                  XH5      nU H  nU(       ab  [5        US   R6                  5      [5        U
R(                  R*                  R6                  5      ::  a  US   U
R(                  R.                  :X  a  Ml  U
c  Mq  U R9                  X5      (       d  M  U R;                  U
UUS9nU R=                  UUU	UU
R(                  U5        [        UR                  5      nX-   U-
  nUnM     GM     X'-   S-   nOUS-  nU[        UR                  5      :  a  GM  UR?                  5         g )Nr   r   zFPlease check the condition due to the dims mapping is not replicative.r.  r'  r   r   r    r>  lod_tensor_blocking_queueFTr  )r  ) rA  r  rE  r  r)  r   r  rO  r\  r  r  r  r  r_   r  r  r*  r*   r  rJ  rk  r  r[  r  ru  r  r  r  re  r  r  r  )r3   r&   r   pre_op_countr  r3  input_var_namesro  r%   r(   rL  r  r  r  reshard_op_desccur_op_counts                   r)   _reshard_inputResharder._reshard_input	  sk   C		N"uyy>L3B!!"%%q''??CG"77o-88<<(d  ,//(99: OQ	22277;3G3J3JK 

 ..rww{/C/F/FG 77g%&(hhsmOWW 33&(hhw&7O&(&8&8O$$&
 /H2h> 0 )E)EC #'"3"3"O"O#K
 490#--::#00??@ --<<7;4*44AAGG  !6!6!C!CD E E  E &*%<%<R%JN&4
7
 !$JqM$=$= >#&$/$9$9$F$F$R$R$"!" %/qM#.#8#8#E#E%F !)&2t7H7H'8 8 /3.C.C + *=Y /D /O
 !.. % / # " + 5 5 * ,/uyy>L * 9L H ' ,8LI '51 !0z &*qE C		N"F 	r+   c	                    U R                   U:X  Ga  UR                  [        R                  :X  Ga  UR	                  [
        R                  " UR                  S-   5      UR                  UR                  [        R                  UR                  S9n	[        R                  UUS-   U	UUUR                  S5      5        S n
UR                  S:w  Ga  SnU R                  R                    H  nUR"                   H  nUR"                  U   nUR$                  (       d  M%  UR                  UR                  :X  d  MA  UR	                  [
        R                  " UR                  S-   5      U	R                  U	R                  U	R                  U	R                  S9n
US-  nUR'                  US	XS
.SU
0SUR                  S5      0S9  Sn  O   U(       d  M    O   USL d   eUR'                  US-   SSU
c  U	/OU
/0SU/0U	R                  UR                  UR                  S5      S.S9nUR)                  SS5        g UR                  S:w  Ga9  UR	                  [
        R                  " UR                  S-   5      UR                  UR                  UR                  UR                  S9n[        R                  UUS-   UUUUR                  S5      5        SnU R                  R                    H  nUR"                   Hq  nUR"                  U   nUR$                  (       d  M%  UR                  UR                  :X  d  MA  US-  nUR'                  US	UUS
.SU0SUR                  S5      0S9  Sn  O   U(       d  M    O   USL d   eg [        R                  UUS-   UUUUR                  S5      5        [+        UR,                  US-      UR.                  UR0                  U R2                  UR4                  S9  g g )Nr  r  r   r   r   Fz	@RESETLODr"  r#  r$  r   r   Trk  r   r   r   r   r   r  )r  r   r   rw  r   r   r  r$   rQ   r   r[  r   r   r  r  r  r  r"   r^  r   r   r   r  r  r  r  r  )r3   r&   r   r(   r  	send_rank	recv_ranksrc_output_attrdst_tensor_attrrecv_cast_outr&  r  r  r  r  r  recv_outs                    r)   _handle_recvResharder._handle_recvU
  s    <<9$yyFKK' % 0 0$--chh.@A))!mm ,, !1 ! ''!G!GGI& !%==A%#G%)%A%A%H%H	,5NNL&/nn\&BG '$+$5$5$F050@0@)4)=)=(+;(>*& +8*=*=)6););*7*=*=.;.E.E 1A 1" !$q % 0 0$')41>+M-2M,B+4bggi6H*I !1 !" +/ %1 -;2 #7!7 &I8 #d?*?  **!G,4 +O"/	 #SEN$1$7$7%(YY#%779#5 + " !!.2JK==A%$//(11#((W2DE!ii"%--!ii XX  0  H ++a !!	* $G%)%A%A%H%H	,5NNL&/nn\&BG '$+$5$5$F #q % 0 0$')419+H-2CL+4bggi6H*I !1 !" +/ % -;  #7!% &I& #d?*?++a!!	* K		#'*'44'44))!0!9!9k %r+   c	           
         UR                   [        R                  :X  ad  [        R	                  XS-   X4R                  S5      [        R                  5      n	[        R                  UUS-   U	UUUR                  S5      5        g [        R                  XS-   X5XdR                  S5      5        [        UR                  US-      US   US   U R                  US   S9  g )Nr   r   rk  r   r  )r   r   rw  r   r  r  r[  r  r   r  r  )
r3   r&   r   r(   r  r  r  r  r  cast_outs
             r)   _handle_sendResharder._handle_send
  s     99#..QwWWY%7H ##a	" ##Qw	779;M C		#'"""!!(+r+   c                 p	   SnS nU[        UR                  5      :  Ga  [        UR                  5      nUR                  U   nU R                  R                  U5      nUGb2  U" U5      (       Gd$  SnUR                   GH	  n[        XU R                  5      n	U R                  R                  U	5      n
U
R                  R                  nUR                  R                  UR                  R                  U5      UR                  R                  UR                  S5      /nU
c  M  U R                  XS5      (       d  M  [        UR                  5      [        UR                  5      [        US   R                  5      -  -
  nU(       d  GM  [        U5      [        US   R                  5      :w  Ga  U
R                  R                   R#                  S5      [        U
R                  R                   5      :w  d$  US   R#                  S5      [        US   5      :w  a  [%        S5      e['        U5       GH;  u  pUnUnU[        US   R                  5      :  a4  U[        US   R                  5      -
  [        US   R                  5      -  nUS   R                  U   nUU:X  a  Mt  U	R(                  S   S:X  a?  [+        U	R(                  5      nU R,                  US'   U	R.                  R1                  U5        U R2                  U:X  a$  U R5                  UUU	UUUUU
R                  5        M  U R2                  U:X  a%  U R7                  UUU	UUUUU
R                  5        GM/  [9        UU/SS	9  GM>     O['        U5       H  u  pUnUS   R                  U   nUU:X  a  M!  U	R(                  S   S:X  a?  [+        U	R(                  5      nU R,                  US'   U	R.                  R1                  U5        U R2                  U:X  a$  U R5                  UUU	UUUUU
R                  5        M  U R2                  U:X  a$  U R7                  UUU	UUUUU
R                  5        M  [9        UU/SS	9  M     [        UR                  5      nUU-   U-
  nUnGM     X'-   S-   nOUS-  nU[        UR                  5      :  a  GM  g g )
Nr   c                 t    / SQnU[         -  nU[        -  nU R                  U;   a  g[        U 5      (       a  gg)N)r  r  r  write_to_arrayread_from_arraynopdependTF)rD  r  r   r   )r  skip_opss     r)   _is_special_op1Resharder._reshard_output.<locals>._is_special_op  s?    H &H'Hww("R  r+   r   Fr  r   zThe dims_mapping must be -1r  r  )rA  r  r  r)  r  r*   r  rJ  rk  r  r  r  r  re  r  r  r  ru  r\  rB  rQ   r  r  r_   rs  r  r  r
  r   )r3   r&   r   r  r  r  r3  ro  r%   r(   rL  rb  r  tensor_processesr  tensor_processr  actual_indexr   rL  r   s                        r)   _reshard_outputResharder._reshard_output   s   	& C		N"uyy>L3B''??CG">"+=+=
 " 3 3H0 )E)EC #'"3"3"O"O#K +6*?*?*L*L'))66))AA(K))22	*	#K #.43D3D#%4 4 ,//;;,   3 ? ?@!+a."<"<=>,( ,+"#34 +A : :9   $/#8#8#E#E#K#K$&$"%($/$9$9$F$F&"$" &1^%9%9"%=$/NB" &" +5(E+& %& BK(8B&(= 5C	7<+0C,7N,F,F5* ,* 16254?N4N4N3212 14KN4N4N0O<PL 0;1~/I/I,80* ,5+<,4+.99Q<2+=8<SYYI;???IaL,/HH,>,>y,I+/<<4+?,0,=,=0503030204090;0;0E0E	-. .2\\Y-F,0,=,=0503030204090;0;0E0E	-. ->15y0A;@-.gB&p >G$4>"$9E 1?I+6q>+E+Ee+LD'0D'8(0'*yy|r'948O	7;	!(+(:(:9(E'+||t';(,(9(9,1,/,/,.,0,5,7,7,A,A	)* *.)B(,(9(9,1,/,/,.,0,5,7,7,A,A	)* ):-19,=%)*O>"V ,/uyy>L *\ 9L H ' ,8L[ !4^ &*qo C		N""r+   c                    U R                  5         [        U R                  R                  5       HL  u  pU[        R
                  ;   a  U R                  X5        U R                  U5        U R                  U5        MN     [        R                  U R                  U R                  U R                  U R                  5        [        R                  U R                  U R                  5        0 [        l        g r0   )r  rB  r  r  r  r  r  r  r  r  r  r  r  r  r  r  )r3   r  r&   s      r)   reshardResharder.reshard  s    ((* )$*F*F*M*M NII66699)K &   ' !O 	&&((LL""		
 	))(($*I*I	

 &(	"r+   c                 |   / [         QSPnS nUR                  U;   a  U$ UR                  nUS:X  a  U$ U R                  R	                  U5      nU R                  R                  U5      nU(       a  U(       d  U$ UR                  R                  UR                  5      n	UR                  R                  n
U
U	UR                  R                  UR                  S5      /nUb  U R                  X{5      (       a  X`R                  ;  a  U/U R                  U'   OmU R                  U    H<  nUR                  nUR                  U5      nUR                  nX:X  d  M3  X:X  d  M:  Us  $    U R                  U   R                  U5        U R                  X{SS9nUR                  R                   nU R#                  UUU5      nU$ )Nr   lod_tensor_blocking_queue_0r   T)r  )rD  r   r$   r  rJ  r)  rk  r  r  r  r  re  r  r   r  r]  r   parse_op_desc_for_cost)r3   r  r  clusternot_supported_op_typereshard_op_costtensor_namerL  r3  r  r  rk  r   item_dist_attritem_dims_mappingitem_process_meshr  r   s                     r)   get_costResharder.get_cost  s    !;. :' :77++"" ++K;;&&"//KK ++CCBG"'**&00GGKK   '00==  %%..GGI&		 *t/@/@0 0 #*=*==<C9++K8$($7$7$DD-1^^N . E E$/!" .
 1?0K0K- , A$5$E'6 6 %E ++K8??H&*&;&;#t '< 'O (55;;E&*&A&A''O r+   c                    U(       d  UR                  U5        g SnSnU[        U5      :  a  [        R                  X   U5      u  n	n
nU	S:w  a  Sn0 nSUS'   SU	0US'   U
S:X  a  S	X1U   4X24/0US
'   OS	X24X1U   4/0US
'   UR	                  U5        XE;  a  / XT'   X<S'   XT   R                  [        XUS95        U R                  UUUUUU5        OUS-  nU[        U5      :  a  M  U(       d  UR                  U5        g g )Nr   Fr  Tr   r  r+  r   r   r   r   r  r$  rw  r   )r   rA  r  r  r  r   _concat_partitions_for_cost)r3   r  r   r   r  local_rank_comp_costr$  rD  r  r  r  r  concat_descs                r)   r/  %Resharder._concat_partitions_for_cost  ss    %!((9AJc/00
 11),o	! "$!%J"$K(0K%,2K+@K("a'!&a(@ A!& 8"1H-  !& 8!&a(@ A"1H- *--a0:8:,5+0((188$$/w
 44-%, Q[ c/00\ %,,_= r+   c                    S n[        U5      n/ n/ n0 nU GHR  n	/ n
X   nU GHA  n[        U[        5      (       a  XR                  /nUR                  n[        SXU5      nU" X}5      u  nnUc8  UR                  U[        UUS94/5        UR                  [        U5      5        M  U(       d   UU   R                  U[        UUS945        M  M  [        U[        5      (       Ga*  UR                  nUR                  n[        SXU5      n/ n[        U5       H<  u  nnUS:X  a  UR                  U[        U5      -  5        M+  UR                  U5        M>     U" X}5      u  nnUc7  UR                  U[        UUS94/5        UR                  [        U5      5        O%U(       d  UU   R                  U[        UUS945        X;  a  / X'   0 nSUS'   SUU4/0US'   [        U5      SS	.US
'   UUS'   X   R                  [        UX9S95        GM  [        U[        5      (       a:  UR                   n[        U5       H  u  nnU R#                  U
UUU	UU5        M     GM=  [        U[$        5      (       d  GMU  X;  a  / X'   [        U
5      S:X  d	  U
(       a   e/ n[        U
5      S:X  a'  U
S    H  nUR                  US   US   -
  5        M     OUR                  n0 nSUS'   ['        [        UR(                  5      5       Vs/ s H  nSPM     nnUR(                  UR*                  UR,                  US.US
'   SUU4/0US'   UUS'   X   R                  [/        UX9S95        GMD     GMU     Xh4nU$ s  snf )Nc                     Su  p#SnU[        U 5      :  aY  X   [        U5      :X  a  SnU H"  nXPU   ;   d  M  UnX   R                  U5        M$     Uc  US-  nO X#4$ U[        U 5      :  a  MY  X#4$ )N)NFr   Tr   )rA  r  r  )
comm_ranksgroup_ranksresis_the_samer   rw  s         r)   _get_idx2Resharder.parse_op_desc_for_cost.<locals>._get_idxK  s    *CCJ'?c+&66"&K'D#.!"++D1 ( ;1HC## J' ##r+   r  )r  comm_contextrJ   r   r=  r  r   )r;  r+  r   r   r.  r   r   )r   r   r   r?  r>  )r   r  r   r   rQ   r   r   r   r  rG   rP   rB  rA  r   r   r   r   r/  r   r@  r   r   r   r   )r3   r  r   r$  r9  r;  
comm_costsr5  r0  keyr  r  r  r6  rQ   	send_descr   r8  allgather_descsplit_inputs_shaper@  
split_descr   partition_idexr  r   
slice_descrD  r?  r7  s                                 r)   r#   Resharder.parse_op_desc_for_costJ  s   	$" #7+ 

!"C$&!*/L'gz22#&"4K#MME /!;u!I (0
'H$C{")) %0$.095A%&!"
 #))#k*:;*&sO22$/$.095A%&!"  +  99")--K#MME%4$k%&N *,&$-e$4S!8.55cC<L6LM.55c:	 %5
 (0
'H$C{")) %0$30>5A%&!"
 #))#k*:;*&sO22$/$30>5A%&!" 646,1!#J'.Jt$ E+=#>"?,Jx( 36k2BA*NJw'*/Jw'(-44#$.
  66+2+H+H(/89M/N+^881*!0# 0O  55646,112a745 -/)01Q6$9!$<D188a479JK %= 18-!#J'.Jt$.3C4E.F"G.F1.FK"G '").. ''2	+Jw'  5*?"@!A,Jx( +0Jw'(-44#$.w ( #H 0
' #Hs   M3)
r  r  r  r  r  r  r  r  r  r  r0   )TN)FFr  )0r>   r?   r@   rA   rB   r  r4   rC   r  r  r  r  r  r  r  r   r  r  r  r  r  r  r!  r$  r  r=  rA  rE  rO  re  rh  r  r  r  r  r  r  r  r  r  r
  r  r  r  r+  r/  r#  rD   rE   r+   r)   r  r  *  s   
  *!X - - 0 0   " " ' '         # # 	 	     4 2 2>   = =, 8E 8Et	,@D0 %*~P eN%N%N6&>,0deNFP!Fob(<:x=>~^r+   r  )3r{  r  collectionsr   	functoolsr   r   /paddle.distributed.fleet.meta_optimizers.commonr   %paddle.distributed.utils.stream_utilsr   paddle.frameworkr   r   r	   r
   paddle.utilsr   costr   r   r   r   r   r   r   r  r   r  r   r   r   r   r   r   r   r   r   rD  r  r*   r-   rG   rn   r   r   r   r   r   r  r  rE   r+   r)   <module>rL     s       #   B E F F $   - ,   -.CD/0	+ +(&J &JR!j !jH/P /Pd/P /Pd*h *hZX X0pH pHfi' i'X~" ~"r+   