
    x-j                       d dl Z d dlZd dlmZ d dlmZ d dlZd dlmZ d dl	m
Z
 d dlmZmZmZmZ d dlmZ dd	lmZmZmZmZmZmZmZ dd
lmZ ddlmZ ddlmZm Z m!Z!m"Z"m#Z#m$Z$m%Z% ddga&ddgZ'd Z( G d d          Z) G d d          Z* G d d          Z+ G d d          Z, G d d          Z- G d d          Z. G d d          Z/ G d  d!          Z0 G d" d#          Z1 G d$ d%          Z2dS )&    N)OrderedDict)reduce)OpRole)ExecutionStreamType)LayerHelperOpProtoHolderProgramcore)unique_name   )AllgatherOpCostCommContextConcatOpCost
SendOpCostSliceOpCostSplitOpCostbuild_comm_desc)DistributedContext)new_process_group)_g_gradient_clip_opsis_gradient_clip_opis_optimize_opis_reshard_op*naive_set_dist_op_attr_for_program_by_mesh6naive_set_dist_op_attr_for_program_by_mesh_and_mappingset_var_dist_attrcheck_finite_and_unscaleupdate_loss_scalingwhileconditional_blockc                     d}| |j         v r|j         |          }n|                    |           }|J |j         d            |S )z=Get var in the parent block if not found in the current blockNz is not found)vars_var_recursivename)var_nameblockprogramvars       o/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/paddle/distributed/auto_parallel/static/reshard.pyget_var_with_recursionr*   5   sT    
C5:j"""8,,??sx666???J    c                   4    e Zd ZdZd Zed             Zd ZdS )	EndOpDescz
    Describe to end reshard parse process.
    It is supposed to contain a list of variables which are the outputs of one reshard process.

    Args:
        vars (list): a list of variables.
    c                     || _         d S N_vars)selfr"   s     r)   __init__zEndOpDesc.__init__J   s    


r+   c                     | j         S r/   r0   r2   s    r)   r"   zEndOpDesc.varsM   
    zr+   c                     d| j          dS )NzEnd vars : .r0   r5   s    r)   __repr__zEndOpDesc.__repr__Q   s    *TZ****r+   N)__name__
__module____qualname____doc__r3   propertyr"   r9    r+   r)   r-   r-   A   sW              X+ + + + +r+   r-   c                       e Zd ZdZddZed             Zed             Zed             Zed             Z	ed	             Z
d
 ZdS )AllGatherOpDescz
    Describe the allgather op in the reshard phase.

    Args:
        group (list): Process group.
        shape (list): The tensor shape.
        is_bool (bool): Whether allgather bool data. Default: False.
    FTc                 L    || _         d| _        || _        || _        || _        d S )N
all_gather)_group_desc_shape_is_bool_need_split)r2   groupshapeis_bool
need_splits        r)   r3   zAllGatherOpDesc.__init___   s,    !
%r+   c                     | j         S r/   rG   r5   s    r)   rK   zAllGatherOpDesc.is_boolf   
    }r+   c                     | j         S r/   rD   r5   s    r)   rI   zAllGatherOpDesc.groupj   
    {r+   c                     | j         S r/   rE   r5   s    r)   desczAllGatherOpDesc.descn   r6   r+   c                     | j         S r/   rF   r5   s    r)   rJ   zAllGatherOpDesc.shaper   rR   r+   c                     | j         S r/   )rH   r5   s    r)   rL   zAllGatherOpDesc.need_splitv       r+   c                 X    d| j          d| j         d| j         d| j         d| j         dS )Nop: 	, group: 	, shape: , is_bool: z, need_split: r8   )rE   rD   rF   rG   rH   r5   s    r)   r9   zAllGatherOpDesc.__repr__z   sl     Jdj  J  J4;  J  J  J  JY]Yf  J  Jvz  wG  J  J  J  	Jr+   N)FT)r:   r;   r<   r=   r3   r>   rK   rI   rU   rJ   rL   r9   r?   r+   r)   rA   rA   U   s         & & & &   X   X   X   X     X J J J J Jr+   rA   c                   x    e Zd ZdZd
dZed             Zed             Zed             Zed             Z	d Z
d	S )AllGatherConcatOpDescz
    Describe the c_concat op in the reshard phase.

    Args:
        group (list): Process group.
        shape (list): The tensor shape.
        is_bool (bool): Whether c_concat bool data. Default: False.
    Fc                 >    || _         d| _        || _        || _        d S )Nc_concat)rD   rE   rF   rG   )r2   rI   rJ   rK   s       r)   r3   zAllGatherConcatOpDesc.__init__   s"    
r+   c                     | j         S r/   rN   r5   s    r)   rK   zAllGatherConcatOpDesc.is_bool   rO   r+   c                     | j         S r/   rQ   r5   s    r)   rI   zAllGatherConcatOpDesc.group   rR   r+   c                     | j         S r/   rT   r5   s    r)   rU   zAllGatherConcatOpDesc.desc   r6   r+   c                     | j         S r/   rW   r5   s    r)   rJ   zAllGatherConcatOpDesc.shape   rR   r+   c           	      H    d| j          d| j         d| j         d| j         d	S )Nr[   r\   r]   r^   r8   )rE   rD   rF   rG   r5   s    r)   r9   zAllGatherConcatOpDesc.__repr__   s5    idjii4;iiiiY]Yfiiiir+   NF)r:   r;   r<   r=   r3   r>   rK   rI   rU   rJ   r9   r?   r+   r)   r`   r`   ~   s                   X   X   X   Xj j j j jr+   r`   c                       e Zd ZdZddZed             Zed             Zed             Zed             Z	ed             Z
ed	             Zd
 ZdS )
SendOpDesca0  
    Describe the send op in the reshard phase.

    Args:
        partition_index (list): The index of partition in complete tensor.
        src (int): The source process to send.
        dst (int): The destination process to receive.
        is_bool (bool): Whether send bool data. Default: False.
    Fc                 Z    || _         || _        d| _        g | _        || _        || _        d S )Nsend)_dst_partition_indexrE   rF   rG   _srcr2   partition_indexsrcdstrK   s        r)   r3   zSendOpDesc.__init__   1    	 /
			r+   c                     | j         S r/   ro   r5   s    r)   rr   zSendOpDesc.src   
    yr+   c                     | j         S r/   rN   r5   s    r)   rK   zSendOpDesc.is_bool   rO   r+   c                     | j         S r/   rn   r5   s    r)   rq   zSendOpDesc.partition_index       $$r+   c                     | j         S r/   rm   r5   s    r)   rs   zSendOpDesc.dst   rw   r+   c                     | j         S r/   rT   r5   s    r)   rU   zSendOpDesc.desc   r6   r+   c                     | j         s3| j        D ]+}| j                             |d         |d         z
             ,| j         S Nr   r   rF   rq   appendr2   items     r)   rJ   zSendOpDesc.shape   L    { 	6, 6 6""47T!W#45555{r+   c                 X    d| j          d| j         d| j         d| j         d| j         dS Nr[   z, partition_index: z, dst: r]   r^   r8   rE   rn   rm   rF   rG   r5   s    r)   r9   zSendOpDesc.__repr__   s     Pdj  P  PT5J  P  PSWS\  P  Pgkgr  P  P  @D  @M  P  P  P  	Pr+   Nrh   )r:   r;   r<   r=   r3   r>   rr   rK   rq   rs   rU   rJ   r9   r?   r+   r)   rj   rj                     X   X % % X%   X   X   XP P P P Pr+   rj   c                       e Zd ZdZddZed             Zed             Zed             Zed             Z	ed             Z
ed	             Zd
 ZdS )
RecvOpDesca0  
    Describe the recv op in the reshard op.

    Args:
        partition_index (list): The index of partition in complete tensor.
        src (int): The source process to send.
        dst (int): The destination process to receive.
        is_bool (bool): Whether receive bool data. Default: False.
    Fc                 Z    || _         || _        d| _        g | _        || _        || _        d S )Nrecv)ro   rn   rE   rF   rG   rm   rp   s        r)   r3   zRecvOpDesc.__init__   rt   r+   c                     | j         S r/   r}   r5   s    r)   rs   zRecvOpDesc.dst   rw   r+   c                     | j         S r/   rN   r5   s    r)   rK   zRecvOpDesc.is_bool   rO   r+   c                     | j         S r/   rz   r5   s    r)   rq   zRecvOpDesc.partition_index   r{   r+   c                     | j         S r/   rv   r5   s    r)   rr   zRecvOpDesc.src   rw   r+   c                     | j         S r/   rT   r5   s    r)   rU   zRecvOpDesc.desc   r6   r+   c                     | j         s3| j        D ]+}| j                             |d         |d         z
             ,| j         S r   r   r   s     r)   rJ   zRecvOpDesc.shape   r   r+   c                 X    d| j          d| j         d| j         d| j         d| j         dS r   r   r5   s    r)   r9   zRecvOpDesc.__repr__  r   r+   Nrh   )r:   r;   r<   r=   r3   r>   rs   rK   rq   rr   rU   rJ   r9   r?   r+   r)   r   r      r   r+   r   c                       e Zd ZdZd
dZed             Zed             Zed             Zed             Z	ed             Z
d	 ZdS )SliceOpDescac  
    Describe the slice op in the reshard phase.

    Args:
        starts (list): It represents start indices of corresponding axis in ``axes``.
        ends (list):  It represents end indices of corresponding axis in ``axes``.
        axes (list):  Axes that `starts` and `ends` apply to.
        shape (list): The shape of the tensor to be sliced.
    Nc                 L    || _         || _        || _        d| _        || _        d S )Nslice)_starts_ends_axesrE   rF   )r2   startsendsaxesrJ   s        r)   r3   zSliceOpDesc.__init__  s)    


r+   c                     | j         S r/   )r   r5   s    r)   r   zSliceOpDesc.starts  s
    |r+   c                     | j         S r/   )r   r5   s    r)   r   zSliceOpDesc.ends  r6   r+   c                     | j         S r/   )r   r5   s    r)   r   zSliceOpDesc.axes   r6   r+   c                     | j         S r/   rT   r5   s    r)   rU   zSliceOpDesc.desc$  r6   r+   c                     | j         S r/   rW   r5   s    r)   rJ   zSliceOpDesc.shape(  rR   r+   c                     | j         +d| j         d| j         d| j         d| j         d| j          dS d| j         d| j         d| j         d| j         d	S )Nr[   z
, starts: z, ends: z, axes: r]   r8   )rF   rE   r   r   r   r5   s    r)   r9   zSliceOpDesc.__repr__,  s|    ;"}$*}}}}dj}}Z^Zd}}osoz}}}}g$*ggggdjggZ^Zdggggr+   r/   )r:   r;   r<   r=   r3   r>   r   r   r   rU   rJ   r9   r?   r+   r)   r   r     s               X   X   X   X   Xh h h h hr+   r   c                   J    e Zd ZdZd Zed             Zed             Zd ZdS )ConcatOpDescz
    Describe the concat op in the reshard phase.

    Args:
        partition_index_list (list): The list contains all partition index.
    c                 "    || _         d| _        d S )Nconcat)_partition_index_listrE   )r2   partition_index_lists     r)   r3   zConcatOpDesc.__init__;  s    %9"


r+   c                     | j         S r/   )r   r5   s    r)   r   z!ConcatOpDesc.partition_index_list?  s    ))r+   c                     | j         S r/   rT   r5   s    r)   rU   zConcatOpDesc.descC  r6   r+   c                 (    d| j          d| j         dS )Nr[   z, partition_index_list: r8   )rE   r   r5   s    r)   r9   zConcatOpDesc.__repr__G  s     WdjWW$:TWWWWr+   N)	r:   r;   r<   r=   r3   r>   r   rU   r9   r?   r+   r)   r   r   3  sw            * * X*   XX X X X Xr+   r   c                   "   e Zd ZdZedd            Zedd            Zedd            Zedd            Zedd            Z	e	 dd            Z
e	 dd
            Zedd            Ze	 dd            Zedd            Ze	 dd            ZdS )Inserterz*Insert op required in the reshard process.Tc           
      R   t           j        j                            d                    ddg                    }|                     |||j        |j                  }|r| j        n| j	        } ||dd|gid|gi|j
        |j
        |d	          }	|	                    d
d           |S )Nr8   zcast@RESHARDtmpr$   dtypetype	lod_levelcastXOutin_dtype	out_dtypeop_roler   inputsoutputsattrsop_namescope/auto_parallel/reshard)paddleutilsr   generate_with_ignorable_keyjoin
create_varr   r   
_insert_op_insert_op_without_syncr   	_set_attr)
r&   idxtensorr   tensor_typesyncnew_var_nameoutinsert_operationcast_ops
             r)   insert_cast_opzInserter.insert_cast_opN  s     |/KKHHne,--
 
 &	  
 
 !%GE%*G 	 #"&?SEN"L Y" 

 

 

 	.*BCCC
r+   c                     d}|r| j         n| j        }t          ||gd          }	 |||d|gi|	j        |	j                            |          d|dd          }
|
                    dd	           d
S )z-Insert send op into block at the given index.send_v2p2p
group_typer   T)ring_idpeeruse_calc_streamr   dynamic_shape)r   r   r   r   r   N)r   r   r   idranksindexr   )r&   r   r   rr   rs   r   r   op_typer   process_groupsend_ops              r)   insert_send_opzInserter.insert_send_opl  s      $GE%*G 	 *3*GGG""&?(+%+11#66#'"!% 	
 
 
 	.*BCCCCCr+   c                    d}|r| j         n| j        }t          ||gd          }	 |||d|gid|gi|	j        |	j                            |          |j        |j        d|dd          }
|
                    d	d
           dS )z-Insert recv op into block at the given index.recv_v2r   r   r   r   T)r   r   	out_shaper   r   r   r   r   r   r   N)	r   r   r   r   r   r   rJ   r   r   )r&   r   r   rr   rs   r   r   r   r   r   recv_ops              r)   insert_recv_opzInserter.insert_recv_op  s      $GE%*G 	 *3*GGG""&?VH%(+%+11#66#\#'"!% 
 
 
 	.*BCCCCCr+   c                 N   t           j        j                            d                    ddg                    }|r| j        n| j        }|                     ||j        |j	        |j
        |j                  } ||d||dd|id|i	          }	|	                    d
d           |S )z2Insert reset_lod op into block at the given index.r8   zreset_lod@RESHARDr   r$   rJ   r   r   r   	lod_resetr   Yr   r   r   r   r   )r   r   r   r   r   r   r   r   rJ   r   r   r   r   )
r&   r   r   r   r   r   r   r   reset_lod_outreset_ops
             r)   insert_reset_lod_opzInserter.insert_reset_lod_op  s     |/KKHH)5122
 
 !%GE%*G 	 ((''k ) 
 
 $###M*g&
 
 
 	>+CDDDr+   c           
      2   d|i}i }||d<   ||d<   |r| j         n| j        }t          di t                      }	t          j                            | j                  5  |                     t          j	        j
                            d                    |	j        dg                    |d         j        d|d         j        |d         j        d	d	
          }
ddd           n# 1 swxY w Y    ||d|d|
gi|          }|                    dd           |
S )z/Insert concat op into block at the given block.r   axisr   concat@RESHARDr8   r   r   NFr$   r   rJ   r   r   persistablestop_gradientr   r   r   r   r   )r   )r   r   r   localsr   staticprogram_guardr'   r   r   r   r   r   r$   r   r   r   r   )r&   r   tensorsr   r   r   r   r   r   helperr   	concat_ops               r)   insert_concat_opzInserter.insert_concat_op  s    wf"i !%GE%*G 	 ::::]((77 	 	""\-IIHHfk5122  aj&!!*.QZ_!# # 
 
C	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 %$SEN
 
 
	 	N,DEEE
s   A:C##C'*C'c	                 6    j         }	fdt          t                              D             }
g }t          |
          D ]&\  }}||	|         k    r|                    |           '|r j        n j        }t          |          dk    r`                     |j        j	        |
j
                  }dgi}d|gi}d|d} ||d|||	          }|                    d
d           |S t          |          dk    r|d         }|	|         |
|         z  }|}|         |
|         z  }|	}di}|||d}g }t          j                   D ]9\  }}||k    r|                    |           !|                    ||z             :t          j                             j                  5   fdt          |          D             }||         }ddd           n# 1 swxY w Y    ||d|d|i|	          }|                    d
d           |S di}d t          t          |                    D             }|||d}                     |j        j	        j
                  } ||d|d|gi|	          }|                    d
d           |S )z.Insert slice op into block at the given block.c                 2    g | ]}|         |         z
  S r?   r?   ).0ir   r   s     r)   
<listcomp>z,Inserter.insert_slice_op.<locals>.<listcomp>  s&    GGGqtAw*GGGr+   r   )r$   r   r   rJ   r   r   r   F)in_placer   assignr   r   r   r   numr   r   c                     g | ]d}                     t          j        j                            d                     ddg                    j        dj        dj        d          eS )r8   split@RESHARDr   NF)r$   r   rJ   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )r  r  r&   r   s     r)   r  z,Inserter.insert_slice_op.<locals>.<listcomp>  s         $$#\5QQHHou%=>>  %l"#[$)"("2&+ % 
 
  r+   NsplitInputc                     g | ]}d S r   r?   r  r  s     r)   r  z,Inserter.insert_slice_op.<locals>.<listcomp>.  s    7771777r+   )r   r   r   infer_flagsr   r   r   )rJ   rangelen	enumerater   r   r   r   r   r   r   r   r   r   r   r'   )r&   r   r   r   r   r   r   r   r   global_shapeslice_shape	diff_dimsr   r   r   r   r   r   r   	assign_opdiff_dimnum_or_sectionsr   cur_idxinput_shape	new_shapeoutssplit_opr  slice_ops   ` ```                         r)   insert_slice_opzInserter.insert_slice_op  s    |GGGGGE#f++4F4FGGG	$[11 	( 	(KE4|E***  ''' $GE%*G 	
 y>>Q""!l[! * #  C F8_FsenG!&7;;E(((67%  I 0HIIIJ ^^q   |H*84H8MMODX&+h*??G&K6]F+TgNNEI(66 > >tD==$$T****$$T_%<====,,U];; $ $     #?33   7m$ $ $ $ $ $ $ $ $ $ $ $ $ $ $  ('  H ~/GHHHJ v&F77eCII&6&6777K *" E ""!l[ *	 #  C ('  H ~/GHHHJs   0%G!!G%(G%r   c                      t          di t                      j        }di}|||d}	|r j        n j        }
g }t          j                  D ]9\  }}||k    r|                    |           !|                    ||z             :t          j        	                     j
                  5   fdt          |          D             }ddd           n# 1 swxY w Y    |
|d|d|i|	          }|                    d	d
           |S )z.Insert split op into block at the given index.r	  r   r  c                     g | ]i}                     t          j        j                            d                     j        dg                    j        dj        j	        dd          jS )r8   r   NFr   )
r   r   r   r   r   r   r$   r   r   r   )r  r  r&   r   r   s     r)   r  z,Inserter.insert_split_op.<locals>.<listcomp>Z  s           1MM&+u!566  !,$. %"' ! 
 
  r+   Nr
  r   r   r   r   )r	  )r   r   rJ   r   r   r  r   r   r   r   r'   r  r   )r&   r   r   r  r   r   r   r  r   r   r   r  r   r   r  r  r   s   ` `             @r)   insert_split_opzInserter.insert_split_opF  s   
 9999lv''JJ $GE%*G 	 	$V\22 	: 	:KE4}}  &&&&  !89999]((77 	 	      //  D	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 $#gfudm5
 
 
 	>+CDDDs   1CC"Cc           	         t          di t                      }t          j                            | j                  5  |                     t          j        j        	                    d
                    |j        dg                    t          j        dt          j        j        j        dd          }ddd           n# 1 swxY w Y   i }ddi}t#          t%          d                    |d	<   t%          d          |d
<   |j        |d<   ||d<   t          j                            |||d           |r| j        n| j        }	 |	|d|d|gi|          }
d|_        |
                    dd           |S )z6Insert fill constant op into block at the given index.fill_constant@RESHARDr8   r   NF)r$   r   rJ   r   r   r   	force_cpu1	str_valuevaluer   r   fill_constant)r   r   rJ   r   r   r   Tr   r   )r$  )r   r   r   r   r   r'   r   r   r   r   r   r$   int64r
   VarDescVarTypeDENSE_TENSORstrintr   get_shape_tensor_inputsr   r   r   r   )r&   r   r   rJ   r   r   r   r   r   r   fillconstant_ops              r)   insert_fill_constant_opz Inserter.insert_fill_constant_opn  s    AAAA]((77 
	 
	""\-IIHHfk5122  l\)6!# # 	 	C
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 e$ S]]kSgg"i,,e_ 	- 	
 	
 	

 !%GE%*G 	 +* SEN
 
 
 !!!.2JKKK
s   A6B??CCc           
      B   g }t          |          }d}	d}
t          |
dz   fi t                      }|r| j        n| j        }t
          j                            | j                  5  | 	                    t
          j
        j                            d                    |j        dg                    |j        d|j        |j        dd          }ddd           n# 1 swxY w Y    |||	z   |
d	|gid
|gi|j        |j        |d          }|                    dd           t*          j        j        |j        _        |	dz  }	|rCt4                              | ||	z   ||j        ||          }|	dz  }	|                    |           n|                    |g           ||	fS )z2Insert allgather op into block at the given index.r   rC   @RESHARDr8   r   NFr   xr   )r   nranksr   r   r   r   r   r   )r   r   r   r   r   r   r   r   r'   r   r   r   r   r   r$   r   r   r   r   r6  r   r   DefaultStreamr(  	dist_attrexecution_streamr   r"  extend)r&   r   r   r   r   rL   r   tensor_listrI   
idx_offsetr   r   r   allgather_outallgather_op	split_outs                   r)   insert_allgather_opzInserter.insert_allgather_op  s0   
 !%((
 Wz1>>VXX>> $GE%*G 	 ]((77 	 	!,,\-IIHHfk5122  l *[!# - 
 
M	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ('*&?]O, 8," 

 

 

 	~/GHHH-3 	/ 	a
  	0 00j  1  I !OJy))))///J&&s   &A(CC!Cc                    t          |          }d}|r| j        n| j        }d}	t          |	dz   fi t	                      }
t
          j                            | j                  5  | 	                    t
          j
        j                            d                    |
j        dg                    |j        d|j        |j        dd          }ddd           n# 1 swxY w Y   t
          j                                        } |||z   |	d	|gid
|gi|j        dd|j        |||v r|j                            |          ndd          }|                    dd           |S )z1Insert c_concat op into block at the given index.r   rb   r4  r8   r   NFr   r   r   T)r   r   use_model_parallelr6  r   rankr   r   r   )r   r   r   r   r   r   r   r   r'   r   r   r   r   r   r$   r   r   r   distributedget_rankr   r6  r   r   r   )r&   r   r   r   r   r   rI   r=  r   r   r   c_concat_outcur_rankc_concat_ops                 r)   insert_c_concat_opzInserter.insert_c_concat_op  s    "%((
 $GE%*G 	
 Wz1>>VXX>>]((77 	 	 ++\-IIHHfk5122  l *[!# , 
 
L	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 %..00&&*&?\N+ 8#'&*,"7?57H7H))(333a 
 
 
 	n.FGGGs   $A(CCCc           	         | s|                      ||f           dS d}d}|t          |           k     rt                              | |         d         |          \  }	}
}|	dk    rd}|
dk    r4t                              ||d         | |         d         |g|	||          n3t                              ||d         || |         d         g|	||          }|                     |           |dxx         dz  cc<   t                              | ||||||           n|dz  }|t          |           k     |s|                      ||f           dS dS )z(Concat the tensors and insert concat op.r   Fr   Tr7  N)r   r  	Reshardercompute_concat_infor   r   popconcat_partitions_with_op)partition_tensor_listr   rq   r&   r   r   r   r  
has_concatconcat_axisfirst_ordernew_partition_s                r)   rP  z"Inserter.concat_partitions_with_op   s    % 0	H!((&/)BCCCCCAJc/0000
 11)!,Q/ 	! "$$!%J '!++ !11!F215a8&A'#!% 2    &66!F#%:1%=a%@A'#!% 7   & *--a000FFFaKFFF66-%! 7    QS c/0000T  H%,,fo-FGGGGGH Hr+   NT)r   T)r:   r;   r<   r=   staticmethodr   r   r   r   r   r  r"  r2  rA  rJ  rP  r?   r+   r)   r   r   K  s       44   \: D D D \D, D D D \D2    \8       \ D LPf f f \fP CG% % % \%N & & & \&P =A;' ;' ;' \;'z ( ( ( \(T  :H :H :H \:H :H :Hr+   r   c                   j    e Zd ZdZed             Zed             Zed             Zed             ZdS )Removerz)Remove var and op in the reshard process.c           
      .   g d}g }t           j        D ]}|                    |           t          | j                  D ]\  }}||vr|                    |           |D ]3}g }| j        |         }|j        }|j        }	t          |          D ]\  }
}|j        dk    ryg }|j        D ]+}|	                    t          |||           j                   ,t          |
dd          D ]1}||         j        dk    r||                             d|            n2|j        dk    rg }|j        D ]I}|                    t          |||                     j        }||j        v r|                    |           J|s|                    |
           t%          j                                        |j                  }|j                            |j        d         j        |           |j                            |j        d         j        |           |                    |          }|.|j        }||j        vr|j        |vr|                    |
           |ddd         D ]}
|                    |
d	
           |                                 5dS )z&Remove no need ops in the main program)create_py_readercreate_double_buffer_readerreadr^  rL  r\  shape_concatc_sync_comm_streamr   NFr7  )rM  while_block_infor   r  blocksopsr"   r   output_arg_namesr;  r*   rJ   r  r   input_arg_names get_tensor_dist_attr_for_programprocess_meshprocess_idsr   instanceget_op_protorU   	set_inputr   r$   
set_outputr   get_op_dist_attr_for_program
_remove_op_sync_with_cpp)auto_parallel_main_progdist_contextrank_idnot_remove_op_refremove_block_order	block_idxr&   remove_op_idxrc  r"   r   opdim_listr%   r  	need_saverg  protoop_dist_attrop_process_meshs                       r)   remove_no_need_opszRemover.remove_no_need_opsA  s*   
 
 
  "3 	1 	1I%%i0000 )*A*H I I 	5 	5Iu 222")))444 , 6	# 6	#IM+29=E)C:D$S>> -2 -2R7f$$!H$&$7   2 (%1H #   
 #3B// " "q6;*<<<F,,^XFFF!E =  7222 "I$&$6 	7 	7(II 6$,e5L!" !"  + % #l&>>>%,,X666$ !%,,S111 )244AA"'JJEG%%el1o&:IFFFG&&u}Q'7'<iHHH  ,HHLL+&2&?O'BBBG+<<<%,,S111$TTrT* 2 2  5 1111  """"m6	# 6	#r+   c                    t          | j                  D ]N\  }}t                      }|j        }|j        }t                      }|D ]H}	|	j        D ]}
|
|v r|                    |
           |	j        D ]}
|
|v r|                    |
           I|D ]}||vr|                    |           |dk    ryi }|D ]}	t          |		                    d                    t          t          j                  k    rMd|	j        v rDd|	j        v r;|	                    d          d         }|	                    d          d         }|||<   g }t          |          D ];\  }}|d         j        |                                vr|                    |           <|ddd         D ]}|                    |           d}|t%          |          k     rc||         d         j        }||         d         j        }|||         k    r||         |||                  f||<   |dz  }|t%          |          k     c|D ]}||v r|                    |d	           |                                 PdS )
z'Remove no need vars in the main programr   r   ParamGradNrL  r   Fr7  )r  rb  setrc  r"   re  addrd  r/  attrr   Optimizeinput_namesinputr$   keysr   rO  r  _remove_varro  )rp  dist_params_gradsfeed_var_namesru  r&   remove_varsrc  r"   	need_varsrw  r%   r(   param_grad_map
param_name	grad_nameneed_remove_idxr   r   s                     r)   remove_no_need_varszRemover.remove_no_need_vars  s   
 !**A*H I I 4	# 4	#Iu%%K)C:DI 0 0 " 2 0 0H4''!h/// " 3 0 0H4''!h///0  ) )i''OOC((( A~~!# C CB2779--..#fo2F2FFF#r~55 &". 8 8)+'):):1)=J(*(8(8(;I9BN:6"$!*+<!=!= 4 4ICAw|>+>+>+@+@@@'..s333*44R40 / /C%))#....C 12222!23!7!:!?J 1# 6q 9 >I N:$>>> , 
!;<2)#. 1HC C 12222 # 3 3.((!!#E!2222  """"i4	# 4	#r+   c                 h   t                               | ||           t                              | |           g }t	          t
          j        t          |j        	                                          g           D ]}|
                    |j                   t                               | ||           dS )z0Remove no need vars and ops in the main program.N)rZ  r}  rM   change_while_op_input_and_outputr   operatoriaddlistserial_feed_varsvaluesr   r$   r  )rp  rq  rr  r  r  r(   s         r)   remove_no_need_in_mainzRemover.remove_no_need_in_main  s    
 	""#\7	
 	
 	
 	22#\	
 	
 	
 M4 = D D F FGG
 
 	, 	,C !!#(++++###%6	
 	
 	
 	
 	
r+   c                    t                      }|                                 j        }|D ]!}|j        D ]}|                    |           "|                                }t                      }|j        }|D ]-}|j        dk    r|j        D ]}|                    |           .t                      }	|D ]}||v r|	                    |           |j        }t                      }
t          |          D ]e\  }}d}|j        dk    r|j        D ]
}||	v rd} n|r>|j        D ]}|
                    |           |j        D ]}|
                    |           ft                      }|j        D ]}||
vr|                    |           |D ]}|	                    |d           |
                                 g }|j        }t          |j                  D ]\  }}d}|j        dk    rg }|j        D ]}||v r|                    |           |s|                    |           nt          j                                        |j                  }|j                            |j        d         j        |           |j                            |j        d         j        |           |j        D ]
}||vrd} n|r|                    |           |ddd         D ]}|                    |d           |
                                 dS )z3Remove no need vars and ops in the startup program.r`  FTr7  r   NrL  )r  global_blockrc  re  r  r   rd  r  r"   r  ro  r   r   ri  rj  rU   rk  r   r$   rl  r   rn  )rp  auto_parallel_startup_progmain_input_varsmain_opsrw  r%   startup_blockstartup_output_varsstartup_opsr  actual_need_varsr   
is_need_opr  r(   rv  r"   is_no_need_op	var_namesrz  s                       r)   remove_no_need_in_startupz!Remover.remove_no_need_in_startup  s   
 %%*7799= 	. 	.B. . .##H----. 3??AA!ee#' 	2 	2Bw.../ 2 2#''11112 EE	+ 	( 	(H?**h'''#'55 -- 	3 	3GCJw.../  y((!%JE )  3 " 3 3 3H$((2222 " 2 3 3H$((2222ee%* 	* 	*H///))) 	7 	7C%%c%6666$$&&&! !233 	* 	*GC!Mw...	 " 2 3 3H4''!((222  I!((----)244AA"'JJEG%%el1o&:IFFFG&&u}Q'7'<iHHH/  4''$(ME (  *$$S))) 2& 	6 	6C$$Su$5555$$&&&&&r+   N)	r:   r;   r<   r=   rX  r}  r  r  r  r?   r+   r)   rZ  rZ  >  s        33H# H# \H#T 8# 8# \8#t 
 
 \
( J' J' \J' J' J'r+   rZ  c                      e Zd ZdZi Z	 d,dZed             Zed             Zed             Z	ed             Z
ed             Zed	             Zed
             Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zd Zd Zd Zd Zd-dZd Z	 	 d.dZ	 d/dZd Z d Z!d  Z"d! Z#d" Z$d# Z%d$ Z&d% Z'd& Z(d' Z)d( Z*d) Z+d* Z,d+ Z-dS )0rM  a!  
    Reshard tensor in the program according to its distributed attribute and corresponding op distributed attribute.

    Args:
        auto_parallel_main_prog (Program): An auto parallel main program.
        auto_parallel_startup_prog (Program): An auto parallel startup program.
        rank_id (int): The process id.
        dist_context (DistributedContext): The distributed context of this rank.
        dist_params_grads (list): The list contains the tuple of param and grad.
        batch_size (int): The batch size. Default: None.
    Nc                 z   t          |t                    sJ dt          |           d            |0t          |t                    sJ dt          |           d            t          |t                    sJ dt          |           d            t          |t                    sJ dt          |           d            |0t          |t                    sJ dt          |           d            || _        || _        || _        || _        || _	        || _
        i | _        i | _        i | _        i | _        d S )Nz?The type of auto_parallel_main_prog should be Program, but got r8   zJThe type of auto_parallel_startup_prog should be Program or None, but got z+The type of rank_id should be int, but got z?The type of dist_context should be DistributedContext, but got z.The type of batch_size should be int, but got )
isinstancer	   r   r/  r   _auto_parallel_main_prog_auto_parallel_startup_prog_rank_id_dist_context_dist_params_grads_batch_size	_has_sent	_has_recv_has_allgather_has_resharded)r2   rp  r  rr  rq  r  
batch_sizes          r)   r3   zResharder.__init__9  s    17;; 	
 	
83448 8 8	
 	
; &15w??  ? :;;? ? ? ? '3'' 	
 	
J$w--JJJ	
 	
' ,(:;; 	
 	
-L))- - -	
 	
;
 !j#..  /
++/ / / .
 )@%+E()"3%  r+   c                     | j         S r/   )r  r5   s    r)   rp  z!Resharder.auto_parallel_main_proge  s    ,,r+   c                     | j         S r/   )r  r5   s    r)   r  z$Resharder.auto_parallel_startup_progi  s    //r+   c                     | j         S r/   )r  r5   s    r)   rr  zResharder.rank_idm  rO   r+   c                     | j         S r/   )r  r5   s    r)   rq  zResharder.dist_contextq  s    !!r+   c                     | j         S r/   )r  r5   s    r)   r  zResharder.dist_params_gradsu  s    &&r+   c                     | j         S r/   )r  r5   s    r)   r  zResharder.batch_sizey  rY   r+   c                     | j         S r/   )r  r5   s    r)   has_sentzResharder.has_sent}  
    ~r+   c                     | j         S r/   )r  r5   s    r)   has_recvzResharder.has_recv  r  r+   c                     | j         S r/   )r  r5   s    r)   has_allgatherzResharder.has_allgather  s    ""r+   c                     g }t          |           D ]K\  }}||         dk    r|                    |           '|                    ||||                  z             L|S )zCompute the shape of partition.rL  r  r   )complete_shapedims_mappingprocess_shapepartition_shaper   r   s         r)   compute_partition_shapez!Resharder.compute_partition_shape  sy     ">22 	Q 	QICC B&&&&t,,,,&&t}\#=N/O'OPPPPr+   c                    |                     |           }g }t          d |d          }t          t          |                    D ];}||||         z  z  }|||         z  }|||z  |z  z
  }|                    |           <|S )z@Compute the index of process_shape corresponding to the process.c                     | |z  S r/   r?   )r5  ys     r)   <lambda>z1Resharder.compute_process_index.<locals>.<lambda>  s
    a!e r+   r   )r   r   r  r  r   )processr   r  relative_processprocess_indexproductr  r   s           r)   compute_process_indexzResharder.compute_process_index  s     )..w77++]A>>s=))** 	& 	&A"w-2B'BCCq!11G #3w#>#HH    %%%%r+   c                    t                               |||          }t                               | ||          }g }t          t	          |                    D ]p}||         dk    r|                    d||         g           ,|                    |||                  ||         z  |||                  dz   ||         z  g           q|S )z/Compute the partition index in complete tensor.rL  r   r   )rM  r  r  r  r  r   )	r  r  r  r  r   r  r  rq   r  s	            r)   compute_partition_indexz!Resharder.compute_partition_index  s    
 $;;L-
 
 "77]M
 
 s>**++ 
	 
	AA"$$&&?1+='>????&&%l1o69KK&|A7!;)!,-    r+   c                    d}d}d}g }t          |           D ]\  }}|||         k    r|dz  }|d         ||         d         k    rD|d         ||         d         k     r,|}|                    |d         ||         d         g           r|d         ||         d         k    rE|d         ||         d         k    r-d}|}|                    ||         d         |d         g           |                    |           |dk    r|||fS d||fS )zYJudge whether two partition can be concatenated and compute concatenated partition index.r   rL  r   r  )partition_index_xpartition_index_ydiffer_countrS  rT  rU  r   r   s           r)   rN  zResharder.compute_concat_info  sV    "#455 	+ 	+IC(---!G05a888Q"3C"8";;;"%K!(($q'3DS3I!3L)MNNNNG05a888Q"3C"8";;;"#K"%K!((*;C*@*CT!W)MNNN$$T****1]::{M11r+   c                     g }t          |           D ]K\  }}||         dk    r|                    |           '|                    ||||                  z             L|S )zVcompute the complete shape of the slice tensor  with its process mesh and dims mappingrL  r  )r  r  r  r  r   r   s         r)   compute_complete_shapez Resharder.compute_complete_shape  sy     ";// 	O 	OICC B&&%%d++++%%d]<;L-M&MNNNNr+   c                    | s|                      |           dS d}d}|t          |           k     rvt                              | |         |          \  }}}|dk    r3d}|                     |           t                              | |           n|dz  }|t          |           k     v|s|                      |           dS dS )z8Concat the given partitions without inserting concat op.r   FrL  Tr   N)r   r  rM  rN  rO  concat_partitions)r   rq   r  rR  rS  rV  rU  s          r)   r  zResharder.concat_partitions  s     $ 	= ''88888AJc.////090M0M(+_1 1-Q "$$!%J(,,Q/////,m   Q c.////  =$++O<<<<<= =r+   c                    t           j        D ]^}| j        |         }t           j        |         d         }| j        |j                 }t	                      }g }|j        D ]}|                    |          }	|	s'|j        dk    r|	r|j        dk    r|	r|j        dk    rD|	sB|j        D ]}
|
|vr|	                    |
           |j
        D ]}
|                    |
           d}|j        D ].}|j                                        |k    r|j        dk    r|} n/|t          j                                        |j                  }g }|                    d          D ]}
|
|v r|	                    |
           |sJ |                                 |j                            |j        d         j        |           g }|                    d	          D ]f}
|ddd
         D ]X}|                    |
          d
k    r=t1          |
          t1          |          k    sd|v r||vr|	                    |           Yg|sJ |j                            |j        d         j        |           `dS )zNChange while op input and output after the corresponding sub block ops removedop_idr   r
  r  Nr   r   r   r   rL  r4  )rM  ra  rb  
parent_idxr  rc  get_dist_op_for_programr   rd  r   re  r  rU   r   r   ri  rj  r  sortrk  r   r$   outputfindr  rl  r   )rp  rq  sub_block_idx	sub_blockparent_while_op_idparent_blocksub_block_op_inputssub_block_op_outputsrw  dist_opr%   while_oprz  new_Xnew_Outoutput_names                   r)   r  z*Resharder.change_while_op_input_and_output  s    '7 6	E 6	EM/6}EI!*!;M!J" 39):NOL"%%%#% m : :&>>rBB
:7**7*7**7*8++G+$&$7 B B#+???077AAA$&$6 : :+//9999 H"&  7::<<#555"'W:L:L!HE "*,,99(-HHEE$NN3// + +222LL***LL5JJLLLM##ELO$8%@@@G$OOE22 8 8#7"#= 8 8K"''11R77H[)9)999%44&g55#NN;7778 NN7M$$U]1%5%:GDDDDm6	E 6	Er+   c                     d}|d         |d         cxk    r|d         k     s$n |d         |d         cxk    r|d         k     rn nd}|ddgk    r
|ddgk    rd}|S )zBJudge whether two partitions intersect on the specified dimension.Fr   r   Tr?   )r2   shape_xshape_y
overlappeds       r)   is_overlappedzResharder.is_overlapped>  s    
AJ'!*1111wqz1111AJ'!*1111wqz11111Jq!fQF!2!2Jr+   c                 "    |D ]}|dk    r dS dS )NrL  FTr?   )r2   r  dims      r)   
is_unshardzResharder.is_unshardI  s+     	 	Cbyyuu tr+   c                 d    |j         t          v rdS t          |          r|j         t          v rdS dS )NTF)r   _g_special_opsr   r   )r2   rw  s     r)   is_special_opzResharder.is_special_opO  s;    7n$$4r"" 	rw2F'F'F4ur+   c                    | j         j        |                    d          j                 }|j        dk    r|                    d          }n |j        dk    r|                    d          }|D ]O}t          ||| j                   }| j                            |          }|j	        }|j
        }|D ]}	|	dk    r  dS PdS )	Nr  r   	Conditionr    CondrL  FT)rp  rb  r  r   r   r  r*   rq  get_dist_tensor_for_programr9  r  )
r2   rw  r  
input_condr%   r(   dist_tensortensor_dist_attrvar_dims_mappingr  s
             r)   is_condition_replicativez"Resharder.is_condition_replicativeW  s    078L8L8OP	7g+..JJW+++&))J # 		! 		!H()T%A C +GGLLK*4/<' ! !"99 555 ! tr+   Tc                    d}|j         }|j        }|j        }|d         }	|r|d         }
t          d |||
|	fD                       rv||
k    r*|| j        j        vr|D ]}|dk    rt          d          d}||	k    rd}||	k    r8t          |j                  t          |	j                  k    r|j	        j
        rd}nB|d         }t          d ||||	fD                       r||k    rt          d	          ||	k    rd}|S )
z/Judge the tensor whether needs to be resharded.Fr   r   c              3      K   | ]}|V  d S r/   r?   r  r5  s     r)   	<genexpr>z)Resharder.need_reshard.<locals>.<genexpr>y  6              r+   rL  z7The dim must be -1 when tensor process mesh is a union.Tc              3      K   | ]}|V  d S r/   r?   r  s     r)   r  z)Resharder.need_reshard.<locals>.<genexpr>  r  r+   zVIt is not supported that tensor dims mapping is different from op output dims mapping.)r9  r  rg  allrq  process_meshes
ValueErrorr  rh  serial_tensoris_data)r2   r  r9  op_inputr  
is_reshardr  tensor_dims_mappingtensor_process_meshr|  op_input_dims_mappingr   op_output_dims_mappings                r)   need_reshardzResharder.need_reshardm  s   
&0.;.; $A, 4	&$-aL!   (')#	     !' '*???+#0?@ @ %8 " "D#rzz&0$]'" '" !"  * "&J '/99!%J (?::/;<<?6778 8#198 "'J%.q\"   ('*#	     & '*@@@$p   '/99!%Jr+   c                 r   g }| j                             |          }|j        j        }| j         j        D ]j}t          |j                  t          |j                  z  r?t          |j                  t          |j                  k     r|                    |           k|s|                    |           |S )zEGet sub process meshes of the given op if op process mesh is a union.)	rq  r  r9  rg  r	  r  rh  r  r   )r2   rw  r	  r  r|  rg  s         r)   get_op_process_mesheszResharder.get_op_process_meshes  s    #;;B??!+8 -< 	4 	4L<+,,O/00 4l.//#+3 3   %%l333  	3!!/222r+   Fc                     |j         }|j        }|j        }|j        }|j        }	|j        }
|d         }|d         }|j        }|j        }|rt          t          |                    dk    r-t          t          t          |                              dk    sJ t          |          
                    t          |	                    r|}	|}
|j        d         dk     rK|j        d         dk    sJ t          |j                  }| j        |d<   |j                            |           |s!t                              |j        |
|          n|j        }t#                      }t          |          
                    t          |	                    r1t          |                              t          |	                    rn||	k    rg }|	D ]}t                              ||||
|	          }|s|                    ||gdgg           =d |D             }d |D             }d |D             }|                    |          dk    rL|                    |          }||                             |           ||                             d           |                    ||gdgg           |D ]}g }t                              |||||          }g }g }|	D ]d}t                              ||||
|	          }d}t/          d	 t          t1          | j        ||                    D                       r||vrd
 |D                                 |          }d |D             |         }d |D             |         }d} | t          |          k     r.||          s||          }d|| <   n| dz  } | t          |          k     .| t          |          k    rd |D             }|d         }d|d<   |
J d            ||                                vrg ||<   ||                                vrg ||<   |                    |           |j        j        t8          j        k    }!t=          ||||!          }"t?          ||||!          }#||                             |"           ||                             |#           |                    |           t                               ||           |d         }$tC          |$          tC          tD          j#                  k    r | j$        j%        &                    ||           f||                             tO          |                     g }%g }&g }'|d         }(g })tQ          |(          D ]\  }}*|%                    ||         d         |*d         z
             |&                    ||         d         |*d         z
             |'                    |           |)                    |*d         |*d         z
             ||                             tS          |%|&|'|)                     n6g }g }g }+|	D ]}t                              ||||
|	          }||vr.|                    |           |+                    |g|g           R|+|                    |                   d                             |           tU          t          |+d         d                             D ]}} g },tU          t          |+                    D ]P}-|,                    |+|-         d         |                     | dk    r!|                    |+|-         d                    Q|,D ]}.tW          j,        |,          }/tW          j,        |          }0t                              |.||||          }|,D ]}1t                              |1|||
|	          }t/          d t          t1          | j        ||                    D                       s*|/-                    |1           |0-                    |           g }2|0D ]}3t                               |2|3           |2d         }(g }%g }&g }'g })tQ          |(          D ]n\  }}*|%                    ||         d         |*d         z
             |&                    ||         d         |*d         z
             |'                    |           o|)                    |*d         |*d         z
             tS          |%|&|'|)          }4|sdn|.                    |.          }5|                    d          t          |          k    rY|dd                             d          t          |dd                   k    r#|d         dk    rt_          |,|5          g||.<   P|dd         |dd         k    rR|d         dk    rF|d         dk    r:ta          |/|5|j        t8          j        k    d          tc          d          g||.<   t          |/          dk    r6ta          |/|5|j        t8          j        k              tO          |0          |4gn|4g||.<   	|S )a  
        Find the op description sequence to reshard the source tensor for matching the op requirement.

        Args:
            dist_tensor (DistributedTensor): A distributed tensor.
            dist_attr (list): A list contains process_mesh and dims_mapping such as [process_mesh, dims_mapping].
            serial (bool): If serial is true, the dist tensor and dist op come from serial program. Otherwise, they come from auto program.

        Returns:
            Dict, the dict represents the required op description sequence corresponding to process, The key of dict is
            process and value is a list containing op description.
        r   r   rL  Fc                     g | ]
}|d          S r   r?   r  r   s     r)   r  z.Resharder.find_op_desc_seq.<locals>.<listcomp>  s)     & & &$(Q& & &r+   c                     g | ]
}|d          S r  r?   r  s     r)   r  z.Resharder.find_op_desc_seq.<locals>.<listcomp>  s)     $ $ $$(Q$ $ $r+   c                     g | ]
}|d          S    r?   r  s     r)   r  z.Resharder.find_op_desc_seq.<locals>.<listcomp>  s)          $(Q     r+   Nc              3      K   | ]}|V  d S r/   r?   r  rV  s     r)   r  z-Resharder.find_op_desc_seq.<locals>.<genexpr>A  s6       	 	 ! 	 	 	 	 	 	r+   c                     g | ]
}|d          S r  r?   r  s     r)   r  z.Resharder.find_op_desc_seq.<locals>.<listcomp>M  s)       (,DG  r+   c                     g | ]
}|d          S r  r?   r  s     r)   r  z.Resharder.find_op_desc_seq.<locals>.<listcomp>P  s)     $ $ $(,DG$ $ $r+   c                     g | ]
}|d          S r  r?   r  s     r)   r  z.Resharder.find_op_desc_seq.<locals>.<listcomp>S  s)     ( ( ((,DG( ( (r+   Tc                     g | ]}d S rh   r?   r  s     r)   r  z.Resharder.find_op_desc_seq.<locals>.<listcomp>_  s    '@'@'@!'@'@'@r+   z Failed to find the send process.)rK   )rJ   c              3      K   | ]}|V  d S r/   r?   r  s     r)   r  z-Resharder.find_op_desc_seq.<locals>.<genexpr>  s6       	# 	# ! 	# 	# 	# 	# 	# 	#r+   )r   r   r   rJ   )rD  )rI   rJ   )rI   rJ   rK   rL   )rI   rJ   rK   )r   )2r9  r  r  rg  rh  rJ   r  r  nextiterintersectionr  r  rU   	set_shaperM  r  r   
differencer  r   countr   r  mapr  r  r   r   boolrj   r   r  r/  r   Forwardrq  up_down_streamsadd_pair_streamr   r  r   r  copydeepcopyremovelocal_sizesr`   rA   r-   )6r2   r  r9  serialis_union_process_mesh_tensorr  source_tensorsource_dims_mappingsource_process_meshsource_process_groupsource_process_shapetarget_process_meshtarget_dims_mappingtarget_process_grouptarget_process_shaper  r  op_desc_seqpartition_process_mapping_listsource_processsource_partition_indexpartition_listprocess_listhas_usedr   target_processr  target_partition_indexr   all_partition_index_listto_send_processr   r  rK   send_op_descrecv_op_descr   slice_starts
slice_endsslices_axesconcatenated_partition_indexto_slice_tensor_shaper   r  rI   jr  min_comm_groupall_partition_index_list_copied_process!concatenated_partition_index_listrq   slice_op_descallgather_shapes6                                                         r)   find_op_desc_seqzResharder.find_op_desc_seq  s   & '0#1.;.;2>28'l'l2>28 ( 		<C+,,--22c"5667788B>>>? '((55())  < (<$';$q!A%% &q)R////]011I?IaL((333 %I,,#%9;N   $ 	 "mm #$$11$%%
 
 }	&''2237K3L3LMM}	 !%999-/*"6    )2)J)J""'((* *& 6 299/.1AE7K   & &,J& & &N$ $,J$ $ $L   ,J     H &++,BCCqHH . 4 45K L L$U+22>BBB ..u55556==3n5EwO    #7 { {)2)J)J""'((* *& (*$+-(&: R RN-6-N-N&&+,,. .* '+O 	 	%) #$($6$:$:!" !"& &	 	 	 	 	I 3(BB 0N  % 677 $ $0N$ $ $$( (0N( ( (( #h--//#+A; &2>q/.2 %FA  #h--// H--'@'@x'@'@'@H.:1oO*.HQK.::>  ;:: ++2B2B2D2DDD;=K8)1A1A1C1CCC:<K70778NOOO #.";"AV["P'12+*$+	( ( ( (22+*$+	( ( ( $O4;;LIII#N3::<HHH (>???!3302H   #,B-w<<3v~+>+>>> -=MM /  
 N+22 !9::  
  "
 /CA/F,(*%!*+G!H!H D DIC ''.s3A6a@   %%&<S&A!&DtAw&NOOO&&s+++)00a471BCCCCN+22$"#3	     i{@ $& ')$M"6 0 0)2)J)J""'((* *& *1EEE(//0FGGG!(( !/ 3	    ",223IJJ////3}Q/23344 z zs=1122 M MALLq!1!!4Q!7888Avv077a8H8KLLL$ t tG%)]5%9%9N6:m07 73 .7-N-N&+,,. .* %*  %== ( . 3 4 4  /  # 	# 	#%) #$($6$:$:!" !"& &	# 	# 	# 	  	   +11(;;;;BB 6   9;5+J  !33=   
 :!< 1 $&L!#J"$K,.)%./K%L%L 0 0	T$++237:T!WD   #))237:T!WD   $**3////)00a471BCCC$/+'(3	% % %M  &C(44'4BB $ ,11"552334 4/4::2>>23B37889 9/3r99 2&+?  0G,, ,ABB/3Fqrr3JJJ/2b88/2b88 ,&4&5)6)<)K+0	   &dOO0G,,0  #>22Q66 !0*8*9(5(;v{(J	!" !" !" !-9X!" !" !" !.  #0 $G,,Itl r+   c                 0    |D ]}||         }	|	D ]}
t          |
t          t          f          rt          |
j                   3t          |
t
                    rt          |
j        |
j        gd           ft          |
t                    rt          |
j        |
j        gd           g }g } j	        |
                                vrdS | j	                 }	d}t          t          j                            D ]#\  }}|j        j        |j        j        k    r|} n$|J d j	         d            |j        }d }t#          |          r ||          rt$          j        n|                    d          }d}|	D ]}
t          |
t                    rz| j        
                                vr
g  j        |<    j        |         r|
j        d  j        |         D             vr|
j        rt.                              |||t2          j        |	          }t.                              |d
z   ||
j        ||
j        |          \  }}||z  }g }|D ]K}t.                              |||t2          j        |	          }|                    |j                   |d
z  }L j        |                             |
j        |g           n;t.                              |||
j        ||
j        |          \  }}t?          |          D ]}j        ||z            }|j         D ]H}j!        |         }tE           j#        |dgtI          |j%                  z  |j&        |j'                   ItQ          ||j&         j#        |j'                   |d
k    r|}||z  }d |D             } j        |                             |
j        |g           n8 j        |         D ]*}|
j        |d         k    r fd|d
         D             } n+|s
J d            t          |
t
                    r(| j)        
                                vr
g  j)        |<   |
j         j)        |         vr|
j        r^t.                              |||t2          j        |	          }t.          *                    |d
z   ||
j        |
j        ||	           |dz  }nct.          *                    |||
j        |
j        ||	           tW          j        |         |j&        |j,         j#        |j'                   |d
z  } j)        |                             |
j                   t          |
t                    r%| j-        
                                vr
i  j-        |<   |
j         j-        |         
                                vr|
j.        }g }|D ]&}|                    |d
         |d         z
             '|
j        r̉/                    ta          j1        |dz             ||j2        t2          j        |j3                  }t.          4                    |||
j        |
j        ||	           t.                              |d
z   ||t2          j        |	          }|                    |           |dz  }| j-        |         |
j        <   8/                    ta          j1        |dz             ||j2        |j5        |j3                  }t.          4                    |||
j        |
j        ||	           tE           j#        ||d
         |d         |d                    tW          j        |         |d         |d
          j#        |d                    |j2        dk    rd} j6        j7        D ]} | j!        D ]|}!| j!        |!         }"|"j8        rf|"j2        |j2        k    rVt.          9                    |d
z   ||"||	          }#|                    |#           |dz  }|# j-        |         |
j        <   d} n}|r n|du sJ |                    |           |d
z  }| j-        |         |
j        <   |                     j-        |         |
j                            t          |
tt                    r|
j;        }$|}%|g}&t          |          D ],\  }}'t.          <                    ||'|$|         |&||	           -|&d         }|}(t?          |%|(          D ]})j        |)         }|j         D ]I}j!        |         }tE           j#        |dgtI          |j%                  z  |d         |j'                   JtQ          ||d          j#        |j'                   	t          |
tz          t          t|          f          rd}*t          |
tz                    rtI          |          d
k    s|rJ tI          |          d
k    r|d         d         n|}+ta          j1        |dz             },t.          ?                    ||+|
j@        |
jA        |
jB        |,||	  	        }*not          |
t                    r&t.          C                    |||
j        ||	          }*n4t          |
t|                    sJ tI          |          d
k    sJ |d         }*t          |
t|                    se|*J tE           j#        |*|d
         |d         |d                    tW          j        |         |d         |d
          j#        |d                    |j3        dk    r2dt          jE        |                    d          j                 
                                vr-i t          jE        |                    d          j                 d<   |t          jE        |                    d          j                 d         
                                vr3g t          jE        |                    d          j                 d         |<   t          jE        |                    d          j                 d         |                             ||*j        g           j        |d         D ]}t          |          rg }-|jG        D ]}. j#        H                    |          }/|/J |.|k    rc|/I                    |          }0|.}1|*j        },|1|,k    sJ |j                                        |j                                        k    r|j        J                    |.|,           |/K                    |,|0            j#        L                    ||/            j#        M                    |*|0           |j3        dk    r|-                    |,           |/j&        }2|/N                    |          }3|2|d         k    rX|3|d
         k    rL|j        J                    |.|,           |/K                    |,|0            j#        L                    ||/           |-rlt          jP                    Q                    |j3                  }4|j        R                    |4jS        d         j        |T                    d          |-z              dS )z
        Parse op desc sequence and insert op in the block

        src_tensor_attr(TensorDistAttr): tensor's dist_attr
        dst_input_attr(list): input_var's dist_attrs of the op
        r   r   Nz/The op for reshard cannot be found in the rank z	 program.c                 ,    |                      d          S )NGRAD)endswithr$   s    r)   is_gradz(Resharder.parse_op_desc.<locals>.is_grady  s    ==(((r+   r   c                     g | ]
}|d          S r  r?   r  s     r)   r  z+Resharder.parse_op_desc.<locals>.<listcomp>  s/     M M MAaDM M Mr+   r7  r   )rL   r   rL  chunk_idc                     g | ]	}|j         
S r?   r]  )r  r(   s     r)   r  z+Resharder.parse_op_desc.<locals>.<listcomp>  s    +L+L+LCH+L+L+Lr+   r   c                 <    g | ]}t          |j                  S r?   )r*   rp  )r  r%   r&   r2   s     r)   r  z+Resharder.parse_op_desc.<locals>.<listcomp>  sA     + + + %- !7$,$)$($@!" !"+ + +r+   z6The result of parsing allgather op should not be None.r  @recvr$   rJ   r   r   r   FTr4  )r   r   r   r   r   r   r   var_reshard_mappingr  r   )Ur  rA   r`   r   rI   rj   rr   rs   r   rr  r  r  r  rc  rU   r   r$   r   r   Backwardr  r  rK   r   r   r   r*  rA  rL   r,  r   r  rd  r"   r   rq  r  rJ   rg  ra  r   r  r   r   r  r  rq   r   r   generater   r   r   r   rp  rb  r  r   r   r   rP  r   r-   r  r   r   r   rJ  rM  ra  r   re  rm  get_input_dist_attr_rename_inputset_input_dist_attrset_op_dist_attr_for_program set_tensor_dist_attr_for_programget_input_dims_mappingr   ri  rj  rk  r   r  )5r2   r&   r?  
src_tensor
reshard_opsrc_tensor_attrdst_input_attrr   rr  op_desc_listop_descr<  rQ  r   r   rw  src_namer^  r   end_varsout_castr=  tensor_name_listr(   offsetout_nameout_varr   rq   rJ   recv_tensorset_lod	tmp_blocktmp_var_nametmp_varr   r   pre_idxidx_listr   r  r  target_tensorto_slice_tensornew_namewhile_op_X_appendr$   r{  op_input_dist_attrold_namer|  r  rz  s5   ``                                                   r)   parse_op_desczResharder.parse_op_descC  s   * # 	 	G&w/L'  o/DE   &gm444444 % gk2u      44 % gk2u     "<{//1111F"4<0i	2233 	 	IE2wzZ_/// 0 UdlUUU  ?	) 	) 	) j)),.5gh.?.?,FOO++ 	 # c	 c	G'?33 b4#5#:#:#<#<<<35D&x0)(3 U"w} M M"&"4X">M M M 8 8  F#+#:#:!&#"L!% $; $ $ 3;2N2N!!G$#M#'.'9!% 3O 3 3/Z z)+-(#. 
% 
%C'/'>'> % # # ' &%) (? ( (H -33HMBBB1HCC*84;;$],<=    3;2N2N!&#M#'.'9!% 3O 3 3/Z ',J&7&7  F!&3<!8B,.,? " "*/*X*> 1$($5$+%'D3w}+=+=$=$3$@-<-E!" !" !" !" !" G " / < $ 1)8)A	     &??'2Hz)+L+L+L+L+L(*84;;$],<=    !% 28 < 
" 
""=DG33+ + + + + 15Q+ + +K "E 4 #  L {  GZ00 E4=#5#5#7#777.0DM(+;dmH&=== $!#+#:#:!&#"L!% $; $ $ !//!!G$#K#K#!% 0    q //!&#K#K#!% 0    O!IcN+8+8 -%4%=    qM(+227;???GZ00 Z4=#5#5#7#777.0DM(+;dmH&=&B&B&D&DDD&-&=OE!0 : :U1Xa%89999 bO&+&6&6!,!5h6H!I!I"'&0&:"(,!+ '7 ' ' !//!'#K#K#!% 0    $,#:#:!!G'#"K!% $; $ $ $**8444q?Gh/<<&+&6&6!,!5h6H!I!I"'&0&:","2!+ '7 ' ' !//!'#K#K#!% 0    * -'*1-*1-%3A%6    O!IcN*1-*1- -%3A%6    '0A55&+G "&!=!D* * )4=N !. !.L.7n\.JG(/%.,3,=+6+@-A -A -5,H,H0503a0;070759 -I -. -. )6 )4(:(:=(I(I(I(+q,9 )-h(?(L 37(-#* !*$)E!*#*d?????'..{;;;1HCCNDM(3GK@@&&t}X'>w{'KLLLLG\22 l'.'C$5%.{%;%; 	 	ME666-,U3 ! 7     qkw00  A1B$&$7  "'*X"6) -#D3w}#5#55*1-%4%=     ?&q))!0!9	    $ +'<iH  I !%g{33 "0122a774 87 5
 455:: .a033' $
  +3Hz4IJJH$,$<$<'&~$\$\%- '! %= 
% 
%MM  )>?? 0$,$?$?"! %@ % %MM &gy99999x==A----$,QKM!'955 (444%)%&q)&q)!/!2    K	#&q)&q))!/!2    ?g-- .(9GGK003 $&&! !  "2277;3G3G3JK1 !(9GGK003 / 1158 8 ') "2277;3G3G3JK1"$ .rww{/C/C/FG-  &8J'K L L L  )CDD/ 7 7B$R(( ! (*% " 2 +" +" -JJ2NN %  ,7778++ , @ @ J J / (,H'4'9H#+x#7#7#7#7!wzz||z/A/A/C/CCC " 5 5dH E E E , @ @$,.@!" !" !" !% 1 N N$&!" !" !" !% 1 R R$13E!" !" !" $&7g#5#5$5$<$<X$F$F$F (.:.GO , C CH M M 2
 !0>!3D D D$9^A=N$N$N " 5 5dH E E E , @ @$,.@!" !" !" !% 1 N N$&!" !" !"
 )  - 6 8 8 E Ebg N N))!LO0HHSMM,==  Ac	 c	r+   c           
         |j         t          v sJ | j        j        |                    d          j                 }|j        }g }|D ]}| j                            |          }|s|j	        }|j
        D ]}||k    r|j        }	|                    |          }
|j        }d}|D ]*}|	|d         k    r|
|d         k    r||d         k    rd} n+|s,|                    |	|
||                    d          g           |S Nr  Fr   r   r  Tr   )r   _g_subblock_opsrp  rb  r  r   rc  rq  r  r9  re  rg  rn  ra  r   )r2   rw  r%   r  rc  input_attrsr  r9  r$   rg  input_dims_mappingra  	has_exist
input_attrs                 r)   _get_subblock_input_attrsz#Resharder._get_subblock_input_attrs*	  sW   w/))))078L8L8OP	m 	 	B'??CCG )I*  8###,#9L)2)I)I * *&  )1H %I&1 " "
(JqM99 2jm C C (JqM 9 9(,I!E$ #** , 2 ( "	 2 2	  #2 r+   c           
         |j         t          v sJ | j        j        |                    d          j                 }|j        }g }|D ]}| j                            |          }|s|j	        }|j
        D ]}||k    r|j        }	|                    |          }
|j	        j        }d}|D ]*}|	|d         k    r|
|d         k    r||d         k    rd} n+|s,|                    |	|
||                    d          g           |S r  )r   r  rp  rb  r  r   rc  rq  r  r9  rd  rg  get_output_dims_mappingra  r   )r2   rw  r%   r  rc  output_attrsr  r9  r$   rg  output_dims_mappingra  r  output_attrs                 r)   _get_subblock_output_attrsz$Resharder._get_subblock_output_attrsQ	  sZ   w/))))078L8L8OP	m 	 	B'??CCG )I+  8###,#9L*3*K*K + +'  '09H %I'3 " "(KN:: 3{1~ E E (KN : :(,I!E$ $++ , 3 ( "	 2 2	  #2 r+   c           	         g }| j                             |          }|j        }|j        }| j         j        D ]j}t          |j                  t          |j                  z  r?t          |j                  t          |j                  k     r|                    |           k|s|                    |           |	                    |          }|j
        }	g }
|D ].}|
                    |||	|                    d          g           /|
S )Nr   )rq  r  r9  rg  r	  r  rh  r  r   rn  ra  r  )r2   rw  r%   r	  r  r9  r|  rg  r  ra  r  s              r)   _get_common_op_input_attrsz$Resharder._get_common_op_input_attrsx	  s5   #;;B??%	#0 -< 	4 	4L<+,,O/00 4l.//#+3 3   %%l333  	3!!/222&==hGG%* 	 	L18RWWY=O=OP    r+   c                     g }|j         t          v r/|                     ||          }|s|                     ||          }n|                     ||          }|sJ d|j         d| d            |S )NzThe input 'z	' of op 'z+' has no distributed attributes in subblock)r   r  r  r  r  r$   )r2   rw  r%   op_input_attrss       r)   get_op_input_attrszResharder.get_op_input_attrs	  s    7o%%!;;BIIN! O "&!@!@X!N!N!<<RJJN 	
 	
a"'aaHaaa	
 	
~ r+   c                 4   t                      }t          | j        j                  }|dk    rg }d}| j        j        D ]!}|j        D ]}|                    |           "t          | j        j                  D ]g\  }}t          t          |j                            t          |          k    r|                    |           Mt          |j                  |k     rd}h|r3t          |          D ]%}| j        j        	                    |           "dS dS dS )z;Remove global process mesh from dist_context.process_meshesr   FTN)
r  r  rq  r	  rh  r  r  r   reversedrO  )r2   rh  process_mesh_countglobal_process_mesh_idxhas_sub_process_meshrg  
process_idr   s           r)   _remove_global_process_meshz%Resharder._remove_global_process_mesh	  sS   ee !2!ABB!!&(##(  $ 1 @ 0 0".": 0 0JOOJ////0%.!0& & 0 0!\ s<34455[9I9III+2237777122[@@+/(# >#$;<< > >C%488====! "!> >> >r+   c                 r   dt           j        |         v rt           j        |         d         }|j        D ]}|j        D ]}||v r| j                            |          }|j        }d }||         D ]H}	|j        |	d         d         k    r/|                    |          |	d         d         k    r
|	d         } nI|rF|j	        
                    ||           |                    |          }
|                    ||
           |j        D ]}||v rt          ||                   dk    rt          d          ||         d         d         }|j	                            ||           | j                            |          }|j        }|                    |          }|                    ||           d S d S )Nrf  r   r   zpThe scene is not supported that the output is inplaced and the tensor has been resharded multiply when as input.)rM  ra  rc  re  rq  r  r9  rg  rn  rU   rj  ri  rk  rd  r  r
  _rename_outputget_output_dist_attrset_output_dist_attr)r2   ru  r&   rf  rw  r%   r  r9  target_namer   r  r{  op_output_dist_attrs                r)   $_change_subblock_op_input_and_outputz.Resharder._change_subblock_op_input_and_output	  s)    I$>y$III"+"<Y"G%# i + + " 2  H#666"&"3"K"KB"O"O$+$5	&*$7$A & &D ) 6$q'!* D D$-$D$DX$N$N#'71:%. %. /31g %& G11(KHHH1:1N1N (2 2. &99 +-?  
 !# 3  H#66628<==AA", !S# #  ':(&CA&Fq&I..xEEE"&"3"K"KB"O"O'.'8.:.O.O$/ /+ %99')<  = JI+ +r+   c           
         d}|t          |j                  k     rt          |j                  }|j        |         }|                     |          r|dz  }U| j                            |          }||j        t          v r|                     |          st          d          |	                    d          j
        t          j        vr'i t          j        |	                    d          j
        <   |j        
                                t          j        |	                    d          j
                 d<   |j        dk    r|                    d          }n(|j        dk    r|                    d	          }n|j        }|                                 d}|D ]w}d
|v rt#          ||| j                  }	| j                            |	          }
d}|
j        j        | j        j        vrJ| j        j        r>d}|
j        j                            d          t          |
j        j                  k    sJ |                     ||          }|D ]}|rQt5          |d         j                  t5          |
j        j        j                  k    r|d         |
j        j        k    rU|
k|                     |
|          rU|                     |
||          }|                     |||	||
j        |           t          |j                  }||z   |z
  }|}Ðy||z   dz   }n|dz  }|t          |j                  k     |                                 d S )Nr   r   zFPlease check the condition due to the dims mapping is not replicative.r  r  r   r   r    r  lod_tensor_blocking_queueFTrL  )r5  ) r  rc  r  rq  r  r   r  r  r
  r  r   rM  ra  rU   r  re  r  r*   rp  r  r9  rg  r	  r  r*  r  r  rh  r  rX  r  ro  )r2   r&   r   pre_op_countrw  r  input_var_namesr=  r%   r(   r  r5  r  r  reshard_op_desccur_op_counts                   r)   _reshard_inputzResharder._reshard_input	  s   C	NN""uy>>L3B!!"%% q'??CCG"7o--88<< (d   ,,/(9: : OQ	2277;3G3G3JK 

 .rww{/C/C/FG 7g%%&(hhsmmOOW 333&(hhw&7&7OO&(&8O$$&&&
 / <8 <8H2h>> 0 %)E C #'"3"O"O# #K
 490#-:#0?@ @ -<@ 8<4*4AGG    !6!CDD E  E  E  E &*%<%<R%J%JN&4 $8 $8
7 )
 !$JqM$= > >#&$/$9$F$R$" $"!" !" %/qM#.#8#E%F %F !)&2t7H7H'8 82 /3.C.C + *=Y /D / /O
 !.. % / # " + 5 *   ,/uy>>L *\ 9L H ' ,8LI$8J J&*qE C	NN""F 	r+   c	                    | j         |k    rR|j        t          j        k    r|                    t          j        |j        dz             |j        |j	        t          j
        |j                  }	t                              ||dz   |	|||                    d                     d }
|j	        dk    rd}| j        j        D ]}|j        D ]}|j        |         }|j        r|j	        |j	        k    r|                    t          j        |j        dz             |	j        |	j        |	j        |	j	                  }
|dz  }|                    |d	|	|d
d|
id|                    d          i           d} n|r n|du sJ |                    |dz   dd|
|	gn|
gid|gi|	j        |j        |                    d          d          }|                    dd           d S |j	        dk    r|                    t          j        |j        dz             |j        |j	        |j
        |j                  }t                              ||dz   ||||                    d                     d}| j        j        D ]q}|j        D ]c}|j        |         }|j        rM|j	        |j	        k    r=|dz  }|                    |d	||d
d|id|                    d          i           d} nd|r nr|du sJ d S t                              ||dz   ||||                    d                     t+          |j        |dz            |j        |j        | j        |j                   d S d S )Nrd  re  r   r   r   Fz	@RESETLODr   r   r   r   r   Tr  r   r   r   r   r   r`  )rr  r   r   r,  r   r   rh  r$   rJ   r   r*  r   r   r   r  rp  rb  r"   r  r   r   r   rc  rg  r  rq  ra  )r2   r&   r   r(   rw  	send_rank	recv_ranksrc_output_attrdst_tensor_attrrecv_cast_outr   r}  r~  r  r  r   recv_outs                    r)   _handle_recvzResharder._handle_recvU
  sR    <9$$yFK'' % 0 0$-ch.@AA)!m , !1 ! ! ''!G!GGI&&   !%=A%%#G%)%A%H " "	,5N & &L&/n\&BG '&$+$5$F$F050@0@)4)=(+;(>*& *& +8*=)6);*7*=.;.E 1A 1" 1" !$q % 0 0$')41>W+M+M-2M,B+4bggi6H6H*I !1 !" !" !" +/ %" "!E""d????  **!G,4 +OO"/	 #SEN$1$7%(Y#%779#5#5  +  " !!.2JKKKKK=A%%$//(1#(W2DEE!i"%-!i X  0    H ++a !!	**   $G%)%A%H " "	,5N & &L&/n\&BG '&$+$5$F$F #q % 0 0$')419+H+H-2CL+4bggi6H6H*I !1 !" !" !" +/ %" "!E""d??????++a!!	**   K	#'*'4'4)!0!9     k %$r+   c	           
         |j         t          j        k    rut                              ||dz   ||                    d          t          j                  }	t                              ||dz   |	|||                    d                     d S t                              ||dz   ||||                    d                     t          |j	        |dz            |d         |d         | j
        |d                    d S )Nr   r   r  r   r`  )r   r   r,  r   r   r  r*  r   r   rc  rq  )
r2   r&   r   r(   rw  r  r  r  r  cast_outs
             r)   _handle_sendzResharder._handle_send
  s    9##..sQwRWWY%7%7 H ##a	""     ##sQwY	2779;M;M   C	#'"""!(+     r+   c                    d}d }|t          |j                  k     rSt          |j                  }|j        |         }| j                            |          }| ||          sd}|j        D ]}t          ||| j                  }	| j                            |	          }
|
j        j	        }|j        j	        |j        
                    |          |j        j        |                    d          g}|
N|                     |
|d          r6t          |j                  t          |j                  t          |d         j                  z  z
  }|rt          |          t          |d         j                  k    r|
j        j                            d          t          |
j        j                  k    s2|d                             d          t          |d                   k    rt%          d          t'          |          D ]1\  }}|}|}|t          |d         j                  k    r8|t          |d         j                  z
  t          |d         j                  z  }|d         j        |         }||k    rz|	j        d         dk    r8t+          |	j                  }| j        |d<   |	j                            |           | j        |k    r"|                     |||	|||||
j                   | j        |k    r#|                     |||	|||||
j                   t9          ||gd	           3nt'          |          D ]\  }}|}|d         j        |         }||k    r!|	j        d         dk    r8t+          |	j                  }| j        |d<   |	j                            |           | j        |k    r"|                     |||	|||||
j                   | j        |k    r"|                     |||	|||||
j                   t9          ||gd	           t          |j                  }||z   |z
  }|}||z   dz   }n|dz  }|t          |j                  k     Qd S d S )
Nr   c                 n    g d}|t           z  }|t          z  }| j        |v rdS t          |           rdS dS )N)r\  r]  r^  write_to_arrayread_from_arraynopdependTF)r  r  r   r   )rw  skip_opss     r)   _is_special_opz1Resharder._reshard_output.<locals>._is_special_op  sU      H &H'Hw(""tR   t5r+   r   FrL  r   zThe dims_mapping must be -1r   r   )r  rc  rq  r  rd  r*   rp  r  r9  rg  r  ra  r  r  r  rh  r  r*  r
  r  rJ   r  r  rU   r(  rr  r  r  r   )r2   r&   r   r  r  rw  r  r=  r%   r(   r  r  r  tensor_processesr   tensor_processr  actual_indexr   r  r  s                        r)   _reshard_outputzResharder._reshard_output   s   	 	 	& C	NN""uy>>L3B'??CCG">>"+=+="
 " 3 M8 M8H0 %)E C #'"3"O"O# #K +6*?*L')6)AA(KK)2	**	#K #.43D3D#[%4 4. ,//;, ,   3 ?@@!+a."<==>,( , v8"#344 +A :9 9     $/#8#E#K#K$&$" $"%($/$9$F&" &"$" $" &1^%9%9"%=%=$/NB" B" &" &" +5(E+& +& %& BK(8B& B& 6%. 6%.(=~ 5C	7<+0C,7N,F5* 5* ,* ,* 16254?N4N32 3212 14KN4N0O0O<PL 0;1~/I,80* ,5+<+<,4+.9Q<2+=+=8<SYI;??IaL,/H,>,>y,I,I,I+/<4+?+?,0,=,=0503030204090;0;0E	-. 	-. 	-. 	-. .2\Y-F-F,0,=,=0503030204090;0;0E	-. 	-. 	-. 	-. ->15y0A;@-. -. -. -. -.g6%.p >G$4>" >" )!* )!*$9E> 1?I+6q>+Ee+LD'0D'8'8(0'*y|r'9'948OO	7;	!(+(:(:9(E(E(E'+|t';';(,(9(9,1,/,/,.,0,5,7,7,A	)* 	)* 	)* 	)* *.)B)B(,(9(9,1,/,/,.,0,5,7,7,A	)* 	)* 	)* 	)* ):-19,=%)* )* )* )* )* ,/uy>>L *\ 9L H ' ,8LJ&*qo C	NN""""""r+   c                    |                                   t          | j        j                  D ]S\  }}|t          j        v r|                     ||           |                     |           |                     |           Tt          
                    | j        | j        | j        | j                   t                              | j        | j                   i t          _        d S r/   )r  r  rp  rb  rM  ra  r  r  r  rZ  r  rq  rr  r  r  r  )r2   ru  r&   s      r)   reshardzResharder.reshard  s    ((*** )$*F*M N N 
	( 
	(IuI66699)UKKK &&&   '''' 	&&(L"		
 	
 	
 	))($*I	
 	
 	

 &(	"""r+   c                    g t           d}d }|j        |v r|S |j        }|dk    r|S | j                            |          }| j                            |          }|r|s|S |j                            |j                  }	|j        j        }
|
|	|j        j	        |
                    d          g}||                     ||          r|| j        vr|g| j        |<   nc| j        |         D ]5}|j        }|                    |          }|j        }|	|k    r
||
k    r|c S 6| j        |                             |           |                     ||d          }|j        j        }|                     |||          }|S )Nr   lod_tensor_blocking_queue_0r   T)r4  )r  r   r$   rq  r  r  r9  rn  rg  ra  r  r  r  r   rX  r  r   parse_op_desc_for_cost)r2   rw  r   clusternot_supported_op_typereshard_op_costtensor_namer  r  r  rg  r9  r   item_dist_attritem_dims_mappingitem_process_meshr  r   s                     r)   get_costzResharder.get_cost  s    !;. :' :7+++"" +K;;;&&"/KK  +CCBGG" +' +**&0GGK     '0=  %.GGI&&		 *t/@/@0 0* #$*===<C9+K88$($7$D 7 7D-1^N . E E$/!" !" .
 1?0K- ,0A A A$5$E$E'6 6 6 6+K8??HHH&*&;&;#Yt '< ' 'O (5;E&*&A&A'' 'O r+   c                 j   |s|                     |           d S d}d}|t          |          k     rt                              ||         |          \  }	}
}|	dk    rd}i }d|d<   d|	i|d<   |
dk    rd	|||         f||fgi|d
<   nd	||f|||         fgi|d
<   |                    |           ||vrg ||<   ||d<   ||                              t          |||                     |                     ||||||           n|dz  }|t          |          k     |s|                     |           d S d S )Nr   FrL  Tr   rw  r   r   r   r   r   rt  r  rD  r   )r   r  rM  rN  rO  r   _concat_partitions_for_cost)r2   rQ  rq   r   rr  local_rank_comp_costr  r  rR  rS  rT  rU  concat_descs                r)   r  z%Resharder._concat_partitions_for_cost  s    % 4	>!((99999AJc/0000
 11)!,o 	! "$$!%J"$K(0K%,2K+@K("a''!&(=a(@ A!& 8"1H--  !& 8!&(=a(@ A"1H- *--a000&:::8:,W5+0K((188$$/w    
 44-%,   Q[ c/0000\  >%,,_=====> >r+   c                    d }t          |          }g }g }i }|D ]}	g }
||	         }|D ]}t          |t                    r|	|j        g}|j        }t          d|||          } |||          \  }}|J|                    |t          ||          fg           |                    t          |                     |s,||                             |t          ||          f           t          |t                    rb|j
        }|j        }t          d|||          }g }t          |          D ]F\  }}|dk    r&|                    |t          |          z             1|                    |           G |||          \  }}|J|                    |t          ||          fg           |                    t          |                     n.|s,||                             |t          ||          f           |	|vrg ||	<   i }d|d<   d||fgi|d<   t          |          dd	|d
<   ||d<   ||	                             t          |||	                     <t          |t                    r8|j        }t          |          D ]\  }}|                     |
|||	||            t          |t$                    r|	|vrg ||	<   t          |
          dk    s|
rJ g }t          |
          dk    r0|
d         D ]&}|                    |d         |d         z
             'n|j        }i }d|d<   d t'          t          |j                            D             }|j        |j        |j        |d|d
<   d||fgi|d<   ||d<   ||	                             t/          |||	                     ||f}|S )Nc                    d\  }}d}|t          |           k     rc| |         t          |          k    rd}|D ])}|| |         v r|}| |                             |           *||dz  }nn|t          |           k     c||fS )N)NFr   Tr   )r  r  r  )
comm_ranksgroup_ranksresis_the_samer   rD  s         r)   _get_idxz2Resharder.parse_op_desc_for_cost.<locals>._get_idxK  s    *CCJ''c?c+&6&666"&K' 2 2Dz#..!"3++D111;1HCC J'' ##r+   r   )rt  comm_contextrC   r   r
  rw  r   )r  r   r   r   r  r   r   c                     g | ]}d S r  r?   r  s     r)   r  z4Resharder.parse_op_desc_for_cost.<locals>.<listcomp>  s    "G"G"G1"G"G"Gr+   )r   r   r   r  r  )r   r  rj   rs   rJ   r   r   r   r  rA   rI   r  r  r   r   r   r   r  r   r  r   r   r   r   )r2   r  r   r  r  r  
comm_costsr  r  keyrQ  rs  rt  r  rJ   	send_descr   r  allgather_descsplit_inputs_shaper  
split_descr   partition_idexrP  r   
slice_descr  r  s                                r)   r  z Resharder.parse_op_desc_for_costJ  s[   	$ 	$ 	$" #7++ 

!" B	 B	C$&!*3/L'  gz22 ~#&"4K#ME /!;u! !I (0x
K'H'H$C{")) %0$.095A%& %& %&!"
 
 
 #))#k*:*:;;;;* 	&sO22$/$.095A%& %& %&!"    99 _")-K#ME%4$k5%& &N *,&$-e$4$4 ; ;S!88.55cC<L<L6LMMMM.55c::::'/x
K'H'H$C{")) %0$30>5A%& %& %&!"
 
 
 #))#k*:*:;;;;* 	&sO22$/$30>5A%& %& %&!"   "66646,S1!#J'.Jt$ E+=#>"?,Jx( 36k2B2BA*N*NJw'*/Jw'(-44#$.c     
  66 *+2+H(/89M/N/N  +^881*!0#     55 "66646,S1122a774 87 5 -/)011Q66$9!$< L LD188a479JKKKKL 18-!#J'.Jt$"G"GeC4E4E.F.F"G"G"GK '"). ''2	+ +Jw'  5*?"@!A,Jx( +0Jw'(-44#$.c    wB /0
r+   r/   )TN)FFrW  ).r:   r;   r<   r=   ra  r3   r>   rp  r  rr  rq  r  r  r  r  r  rX  r  r  r  rN  r  r  r  r  r  r  r  r  r  rX  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r?   r+   r)   rM  rM  *  s       
 
  *! *! *! *!X - - X- 0 0 X0   X " " X" ' ' X'     X    X   X # # X# 	 	 \	   \    \4 2 2 \2>   \ = = \=, 8E 8E \8Et	 	 	      ,@ @ @ @D  0 %*~ ~ ~ ~P e e e eN% % %N% % %N  6  &> > >,0 0 0de e eNF F FP! ! !Fo o ob( ( (<: : :x=> => =>~^ ^ ^ ^ ^r+   rM  )3r0  r  collectionsr   	functoolsr   r   /paddle.distributed.fleet.meta_optimizers.commonr   %paddle.distributed.utils.stream_utilsr   paddle.frameworkr   r   r	   r
   paddle.utilsr   costr   r   r   r   r   r   r   rq  r   r   r   r   r   r   r   r   r   r   r   r  r  r*   r-   rA   r`   rj   r   r   r   r   rZ  rM  r?   r+   r)   <module>r     s      # # # # # #        B B B B B B E E E E E E F F F F F F F F F F F F $ $ $ $ $ $                  - , , , , , , , , , , ,                  -.CD/0	 	 	+ + + + + + + +(&J &J &J &J &J &J &J &JR!j !j !j !j !j !j !j !jH/P /P /P /P /P /P /P /Pd/P /P /P /P /P /P /P /Pd*h *h *h *h *h *h *h *hZX X X X X X X X0pH pH pH pH pH pH pH pHfi' i' i' i' i' i' i' i'X~" ~" ~" ~" ~" ~" ~" ~" ~" ~"r+   