
    x-j[@                    8   d dl mZ d dlZd dlZd dlmZ d dlZd dlZd dl	m
Z d dlmZ d dlmZ ddlmZmZ ddlmZ dd	lmZ dd
lmZ erd dl	mZ d dlmZ d$dZ	 d%dZd Z G d de          Zd Zd Z  G d de          Z!d&dZ"d Z#d  Z$d! Z%d$d"Z&d$d#Z'dS )'    )annotationsN)TYPE_CHECKING)Tensor)PyLayer   )check_placements_equal
to_dim_map)choose_reshard_func)get_1D_sub_process_mesh)
split_mesh)	Placement)ProcessMeshdist_tensorr   meshr   
placementslist[Placement]c                
   t          j        d          dk    sdS d}t          j                    r| j        }| j        }n>t          j                                        r | j        }|                                 j	        }||k    s|j
        dk    rdS t          d |D                       rdS t          d |D                       rdS t          t          t          |          t          |                              D ]}||         }||         }|                                r_|                                rK||k    rE|                                }	|                                }
|t#          |	|
z
            dk    r dS |}|S )zS
    Get the specific dimension for alltoall communication in nd_mesh reshard.
    FLAGS_enable_moe_utilstrueNr   c              3  >   K   | ]}|                                 V  d S N
is_partial.0ps     j/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/paddle/distributed/auto_parallel/moe_utils.py	<genexpr>z)_specific_alltoall_dim.<locals>.<genexpr>;   s*      
2
2a1<<>>
2
2
2
2
2
2    c              3  >   K   | ]}|                                 V  d S r   r   r   s     r   r   z)_specific_alltoall_dim.<locals>.<genexpr>=   s*      
.
.a1<<>>
.
.
.
.
.
.r   )osgetenvpaddlein_dynamic_modeprocess_meshr   	frameworkin_pir_mode	dist_attrplacements_attrndimanyrangeminlenis_shardget_dimabs)r   r   r   mesh_dimsrc_meshsrc_placementsisrc_pdst_psrc_dimdst_dims              r   _specific_alltoall_dimr:   '   s    9-..&88tH A+$/			%	%	'	' A+$..00@48=A--t

2
2>
2
2
222 t

.
.:
.
.
... t3s>**C
OO<<==  q!1>> 	 0 0 	Ue^^ mmooGmmooG#s7W+<'='='B'BttOr   c                r   t          | j                  }||}t          |          D ]H\  }}|                                r/|                                }||         }||j        |         z  ||<   It          j                    rVt
          j                                        }	t
          j        	                    |	          }	t          j
        | ||||	          S t
          j                                        r1t          | t          d           t
          j        j        f          s
J d            |                                 s
J d            t
          j        j        j                            ||| j                  }
t
          j        j        j        j                            |
|          }| j        }t
          j                            |                                 |          }t
          j        j        j                            ||||          }|                     |           | S t=          d          )Ndimsr%   r   placezinput tensor is not pir value.z@dtensor_from_local() are only supported dense tensor type right.z?dtensor_from_local() are only supported in dynamic or pir mode.)listshape	enumerater/   r0   r#   r$   r&   _current_expected_place_get_paddle_placer   r'   
isinstancetypepirValueis_dense_tensor_typedistributedauto_parallelplacement_typeget_shard_specr*   staticutilsconvert_to_dims_mappingcreate_shaped_typebase	libpaddle-create_dist_dense_tensor_type_by_dense_tensorset_typeRuntimeError)local_tensorr   r   local_tensor_shapeglobal_dimsidx	placement	shard_dimlocal_dim_sizer>   sharding_specsdims_mappinglocal_shapeglobal_tensor_typedist_dense_tensor_types                  r   _dtensor_from_localrb   P   sC    |)**K%(#J// F FY 	F!))++I(3N%3djo%EK	" (
 88:: 22599}!
 
 
 	
 
		%	%	'	' 
,dVZ5E(FGG 	
 	
,	
 	
G 0022 	
 	
N	
 	
2 ,;JJj,"3  	
 )7>D\\D
 
 #(#Z::
 
 "(!6!:!h!hT<"
 "
 	4555M
 
 	
r   c                P   t           j                            |           }t          ||          }|                                 j        |         g}t          j        j        	                    | j
        ||          }t           j                            |                                |          }	t          |t          |                    \  }
}t           j        j        j                            ||
|          }t           j        j        j                            |	|          }|                    |           ||         g}t          |t          |                    \  }}t           j                            |                                |          }t           j        j        j                            |||          }t           j        j        j                            ||          }t+          ||          }|                    ||||          }||                    |           |S )z:
    Use all to all communication in nd_mesh reshard.
    )r#   _C_ops
share_datar   r(   r)   distrJ   api_cal_global_shape_local_shaperF   rP   rE   r	   r.   rQ   rR   create_tensor_dist_attributecvt_to_dist_typerT   r
   reshard)	src_valuedst_typer   r   dim	sub_valuesub_meshsub_placementssub_value_shapesub_value_typesub_dims_mappingpartial_statussub_value_dist_attrsub_value_dist_typedst_placementssub_dst_dims_mappingsub_dst_typesub_dst_dist_attrreshard_funcouts                       r   _pir_nd_mesh_all2allr      s   
 ((33I&tS11H))++;C@AN(,>>. O Z22/ N (2O,,( ($n 	!>>&	
 	
 
 !+/3DD+  *+++ !o&N+5O,,, ,(. :00/ L -1NN&  ;(,==' L '':<MNNL


.	< C
 XJr   c                  <    e Zd Zedd	            Zed
             ZdS )_NdMeshAlltoAllr   r   r   r   r   r   ro   intc                   t          ||          }|| _        t          j        |j                  | _        t          j        |j                  | _        t          j        |          | _        t          j        |          | _	        t          |j        ||j        |         g          }t          |                                ||j        |         g|          }t          j        ||||         g          }t          |j        ||j                  }t          |                                |||          }|j        |_        |S r   )r   alltoall_dimcopydeepcopyr%   x_meshr   x_placementsout_meshout_placements_cal_local_shaper@   rb   _local_valuerf   rl   stop_gradient)ctxr   r   r   ro   rq   r_   r~   s           r   forwardz_NdMeshAlltoAll.forward   s&    +455];#;<<
=)?@@}T**!]:66&x+*@*E)F
 
 "$$&&#C()	
 
 l3:c?*;<<&sy(CNKK!j+
 
 (5
r   c                    t          | j        |j                  s t          j        || j        | j                  }t                              || j        | j	        | j
                  }|S r   )r   r   r   rf   rl   r   r   applyr   r   r   )r   out_gradr~   s      r   backwardz_NdMeshAlltoAll.backward   s`    %c&8(:MNN 	K,xs7IJJC##cj#"2C4D
 
 
r   N)r   r   r   r   r   r   ro   r   __name__
__module____qualname__staticmethodr   r    r   r   r   r      sM           \>   \  r   r   c                    t          |           }t          |          D ]F\  }}|                                r-|                                }||         |j        |         z  ||<   G|S r   )r?   rA   r/   r0   r@   )global_shaper   r   r_   rY   rZ   r[   s          r   r   r      ss    |$$K#J// O OY 	O!))++I%0%;tz#%NK	"r   c                   t          |t          t          f          rt          j        |          }n|                                }t          j        |dk              d         }|j        dk    rT|j        dk    s
J d            t          j        |           }d||d         <   |t          j        |          z  ||d         <   t          |          S )Nr   r   z*At most one -1 is allowed in target shape.)	rD   r?   tuplenparrayr   wheresizeprod)	src_shape	tgt_shape	ret_shapeminus_one_idxnelems        r   infer_positive_shaper      s    )dE]++ %HY''		NN$$	HY"_--a0MA!Q&&&8 '&& 	""&'	-"#&+rwy/A/A&A	-"#	??r   c                  <    e Zd Zedd
            Zed             ZdS )_local_reshaper   r   r   r?   r_   r   r   r   r   c                   t           j                                        }t           j                            |          }|                                                                r'|                                                                }n|                                }t          j        |j	                  | _
        t          j        |j	                  | _        t          j        |j                  | _        t          j        |j                  | _        |                    |          }t          j        |||||          }|j        |_        |S )Nr<   )r#   r&   rB   rC   r   _is_initializedcloner   r   r@   x_global_shapex_local_shaper%   r   r   r   reshaper   r   )	r   r   r   r_   r   r   r>   rV   r~   s	            r   r   z_local_reshape.forward  s     88:: 22599##%%5577 	6&3355;;==LL&3355L!];+<== M,*<==];#;<<
=)?@@#++K88m!
 
 
 (5
r   c                   t           j                                        }t           j                            |          }|                                                                r.|                                                                }| j        }n|                                }dg}|                    |          }t          j	        || j
        | j        | j        |          }|S )Nr   r<   )r#   r&   rB   rC   r   r   r   r   r   r   r   r   r   )r   r   r>   
local_gradr   rets         r   r   z_local_reshape.backward%  s     88:: 22599  ""2244 	 !..006688J-MM!..00JCM''66
m#'
 
 
 
r   N)
r   r   r   r?   r_   r?   r   r   r   r   r   r   r   r   r   r     sM           \>   \  r   r   r   r?   c                t   t          | j        |          }t          |||          }t          j                    rY|                                 j        }|                                                                 s|                                 j        }nrt          j                                        rEt          | j        | 	                                j
        | 	                                j                  }nt          d          t          j        |          t          j        |          k    sJ d| d| d            t          j                    rt                              | ||||          S t          j                                        r)t          j                            | | j        ||||          S dS )z
    Reshape the local tensors of the dist tensor on each rank,
    and manually set the process_mesh and placements of the output.
    z7dist_reshape is only supported in dynamic and pir mode.zThe local shapes z and z are mismatched.N)r   r@   r   r#   r$   r   r   r&   r'   r(   r%   r)   NotImplementedErrorr   r   r   r   rd   dist_reshaper   )r   r   r   r   tgt_global_shapetgt_local_shapesrc_local_shapes          r   _dist_reshaper   ;  s    ,K,=|LL&'7zJJO 
%2244:''))99;; 	?)6688>O			%	%	'	' 

*!!##0!!##3
 
 "E
 
 	
 7?##rw'?'????SOSS/SSS @??  
##)?D*
 
 	
 
		%	%	'	' 
}))"
 
 	

 
r   c                   t          | |          }t          |          }||         d         ||         d         z
  }||z   dz
  |z  }||dz
  z  }||z
  }	g }
t          |          D ]}||         d         ||z  z   }||dz
  k    r t          ||	z   ||         d                   }nt          ||z   ||         d                   }t	          |          }||f||<   |
                    |           ||
fS )Nr   r   )r   r.   r,   r-   r?   append)r   tensor_slice
tensor_dimr2   new_sub_meshes
num_shards
total_size
shard_sizeeffective_sizelast_shard_size
new_slicesr5   startend	new_slices                  r   shard_submesh_and_slicer   j  s&   h//N^$$Jj)!,|J/G/JJJz)A-*<J:>2N >1OJ: % %Z(+a*n<
Qeo-|J/G/JKKCCej(,z*B1*EFFC&&	!&	*)$$$$:%%r   c                b    i }|                                  D ]\  }}|j        D ]
}||d||<   |S )N)slicepartial)itemsprocess_ids)sub_mesh_indices_infosub_mesh_partial_inforank2tensor_indicesrq   
slice_inforanks         r   get_rank2tensor_indicesr     sb     5 ; ; = =  *( 	 	D#0) )%%	
 r   c           	     (   t          |          t          |j                  k     rZt          t          |j                  t          |          z
            D ](}|                    t	          j                               )|d | j        D             i}i }t          |          D ]\  }}|                                r|                                }i }	|r^|	                                \  }
}t          |
|||          \  }}|	                    t          t          ||                               |^|                    |	           t          |d          r+|                                r|                                ||<   t#          ||          S )Nc                    g | ]}d |fS )r   r   )r   ss     r   
<listcomp>z$get_local_slices.<locals>.<listcomp>  s    #A#A#AqQF#A#A#Ar   r   )r.   r@   r,   r   rf   	ReplicaterA   r/   r0   popitemr   updatedictziphasattrr   reduce_typer   )tensorr   r   _r   r   r2   rZ   r   tmprq   r   r   r   s                 r   get_local_slicesr     s    :TZ((s4:Z899 	0 	0Adn..////!#A#AFL#A#A#AB(44 F F) 		."**,,JC' B'<'D'D'F'F$*-Dj*h. .*
 

4NJ ? ?@@AAA ( B "((---9l++ 	F	0D0D0F0F 	F.7.C.C.E.E!(+"#8:OPPPr   c                   t          j        d          dk    sdS t          j                    r| j        }| j        }n`t          j                                        r3|                                 j	        }|                                 j        }nt          d          ||k    s|j        |j        k    rdS t          | ||          }t          | ||          }||k    rdS dS )Nr   r   FzC_only_reshard_mesh_shape is only supported in dynamic and pir mode.T)r!   r"   r#   r$   r   r%   r&   r'   r(   r)   r   r   r   )r   r   r   r4   r3   src_rank2tensor_indicesdst_rank2tensor_indicess          r   _only_reshard_mesh_shaper     s     9-..&88u 	
$/+			%	%	'	' 
$..00@((**7!Q
 
 	
 48/43CCCu.X~  /{D*MM"999u4r   c                    t          j        d          dk    sdS | j        }||k    s|j        |j        k    rdS t	          d | j        |z   D                       sdS dS )Nr   r   Fc              3  >   K   | ]}|                                 V  d S r   )is_replicatedr   s     r   r   z&_reshard_mesh_shape.<locals>.<genexpr>  s,      NNQq  NNNNNNr   T)r!   r"   r%   r   allr   )r   r   r   r3   s       r   _reshard_mesh_shaper     sz     9-..&88u'H48/43CCCu
 NN+*@:*MNNNNN u4r   )r   r   r   r   r   r   r   )r   r   r   r?   r   r   r   r   )(
__future__r   r   r!   typingr   numpyr   r#   paddle.distributedrI   rf   r   paddle.autogradr   rK   r   r	   &static.reshard_funcs.base_reshard_funcr
   )static.reshard_funcs.nd_mesh_reshard_funcr   static.utilsr   r   -paddle.distributed.auto_parallel.process_meshr   r:   rb   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>r      s-    # " " " " "  				                  ! ! ! ! ! !       # # # # # # > > > > > > > > G G G G G G N N N N N N $ $ $ $ $ $ J,,,,,,IIIIII& & & &T 8<5
 5
 5
 5
p2 2 2j( ( ( ( (g ( ( (V    &4 4 4 4 4W 4 4 4n,
 ,
 ,
 ,
^& & &,  Q Q Q<   6     r   