
    Αi[@                    r   S SK Jr  S SKrS SKrS SKJr  S SKrS SKrS SK	J
r  S SKJr  S SKJr  SSKJrJr  SSKJr  SS	KJr  SS
KJr  \(       a  S SK	Jr  S SKJr        SS jr SS jrS r " S S\5      rS rS r  " S S\5      r!        SS jr"S r#S r$S r%      SS jr&      SS jr'g)    )annotationsN)TYPE_CHECKING)Tensor)PyLayer   )check_placements_equal
to_dim_map)choose_reshard_func)get_1D_sub_process_mesh)
split_mesh)	Placement)ProcessMeshc                   [         R                  " S5      S:X  d  gSn[        R                  " 5       (       a  U R                  nU R
                  nOI[        R                  R                  5       (       a&  U R                  nU R                  5       R                  nWU:w  d  UR                  S:X  a  g[        S W 5       5      (       a  g[        S U 5       5      (       a  g[        [        [        U5      [        U5      5      5       Hx  nXV   nX&   nUR                  5       (       d  M"  UR                  5       (       d  M9  Xx:w  d  M@  UR!                  5       n	UR!                  5       n
Uc  [#        X-
  5      S:w  a    gUnMz     U$ )zK
Get the specific dimension for alltoall communication in nd_mesh reshard.
FLAGS_enable_moe_utilstrueNr   c              3  @   #    U  H  oR                  5       v   M     g 7fN
is_partial.0ps     j/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/distributed/auto_parallel/moe_utils.py	<genexpr>)_specific_alltoall_dim.<locals>.<genexpr>;   s     
2>a<<>>>   c              3  @   #    U  H  oR                  5       v   M     g 7fr   r   r   s     r   r   r   =   s     
.:a<<>>:r   )osgetenvpaddlein_dynamic_modeprocess_mesh
placements	frameworkin_pir_mode	dist_attrplacements_attrndimanyrangeminlenis_shardget_dimabs)dist_tensormeshr#   mesh_dimsrc_meshsrc_placementsisrc_pdst_psrc_dimdst_dims              r   _specific_alltoall_dimr:   '   s<    99-.&8H++$//				%	%	'	'++$..0@@48==A-

2>
222

.:
...3s>*C
O<=!>> 0 0U^ mmoGmmoG#s7+<'='B > O    c                   [        U R                  5      nUb  Un[        U5       HC  u  pVUR                  5       (       d  M  UR	                  5       nXG   nXR                  U   -  XG'   ME     [
        R                  " 5       (       aU  [
        R                  R                  5       n	[
        R                  R                  U	5      n	[
        R                  " U UUUU	S9$ [
        R                  R                  5       (       GaS  [        U [        S 5      [
        R                  R                  45      (       d   S5       eU R!                  5       (       d   S5       e[
        R"                  R$                  R&                  R)                  XU R*                  5      n
[
        R"                  R$                  R,                  R.                  R1                  X5      nU R                  n[
        R                  R3                  U R                  5       U5      n[
        R4                  R6                  R                  R9                  XX5      nU R;                  U5        U $ [=        S5      e)Ndimsr"   r#   placezinput tensor is not pir value.z@dtensor_from_local() are only supported dense tensor type right.z?dtensor_from_local() are only supported in dynamic or pir mode.)listshape	enumerater-   r.   r    r!   r$   _current_expected_place_get_paddle_placer   r%   
isinstancetypepirValueis_dense_tensor_typedistributedauto_parallelplacement_typeget_shard_specr(   staticutilsconvert_to_dims_mappingcreate_shaped_typebase	libpaddle-create_dist_dense_tensor_type_by_dense_tensorset_typeRuntimeError)local_tensorr1   r#   local_tensor_shapeglobal_dimsidx	placement	shard_dimlocal_dim_sizer?   sharding_specsdims_mappinglocal_shapeglobal_tensor_typedist_dense_tensor_types                  r   _dtensor_from_localrc   P   s	    |))*K%(#J/!))+I(3N%3jjo%EK"	 0   88:  2259}}!
 	
 
			%	%	'	',dVZZ5E5E(FGG 	
,	
G 0022 	
N	
2 ,,;;JJ,"3"3 	
 ))77>>DD\\
 #((#ZZ::
 "(!6!6!:!:!h!hT"
 	45M
 	
r;   c                D   [         R                  R                  U 5      n[        X$5      nU R	                  5       R
                  U   /n[        R                  R                  R                  U R                  Xg5      n[         R                  R                  UR                  5       U5      n	[        U[        U5      5      u  p[         R                   R"                  R                  R%                  XjU5      n[         R                   R"                  R                  R'                  X5      nUR)                  U5        X4   /n[        U[        U5      5      u  p[         R                  R                  UR                  5       U5      n[         R                   R"                  R                  R%                  XoU5      n[         R                   R"                  R                  R'                  UU5      n[+        UU5      nUR-                  UUUU5      nUb  UR)                  U5        U$ )z2
Use all to all communication in nd_mesh reshard.
)r    _C_ops
share_datar   r&   r'   distrK   api_cal_global_shape_local_shaperG   rQ   rF   r	   r,   rR   rS   create_tensor_dist_attributecvt_to_dist_typerU   r
   reshard)	src_valuedst_typer1   r#   dim	sub_valuesub_meshsub_placementssub_value_shapesub_value_typesub_dims_mappingpartial_statussub_value_dist_attrsub_value_dist_typedst_placementssub_dst_dims_mappingsub_dst_typesub_dst_dist_attrreshard_funcouts                       r   _pir_nd_mesh_all2allr      s   
 ((3I&t1H))+;;C@AN((,,>>O ZZ22/N (2O,($ 	!!>>	
 
 !++//33DD *+ !o&N+5O,,( ::00/L --11NN ;;((,,=='L '':<MNL


.	<C
 XJr;   c                  H    \ rS rSr\        SS j5       r\S 5       rSrg)_NdMeshAlltoAll   c                   [        X$5      nX@l        [        R                  " UR                  5      U l        [        R                  " UR                  5      U l        [        R                  " U5      U l        [        R                  " U5      U l	        [        UR                  XQR                  U   /5      n[        UR                  5       UUR                  U   /U5      n[        R                  " XuX4   /5      n[        UR                  XWR                  5      n[        UR                  5       X#U5      nUR                   Ul        U$ r   )r   alltoall_dimcopydeepcopyr"   x_meshr#   x_placementsout_meshout_placements_cal_local_shaperA   rc   _local_valuerg   rm   stop_gradient)ctxr0   r1   r#   rp   rr   r`   r   s           r   forward_NdMeshAlltoAll.forward   s    +45]];#;#;<
==)?)?@}}T*!]]:6&x*@*@*E)F
 "$$&##C()	
 ll3:?*;<&syy(NNK!+
 (55
r;   c                   [        U R                  UR                  5      (       d+  [        R                  " XR
                  U R                  5      n[        R                  XR                  U R                  U R                  5      nU$ r   )r   r   r#   rg   rm   r   r   applyr   r   r   )r   out_gradr   s      r   backward_NdMeshAlltoAll.backward   sc    %c&8&8(:M:MNN,,xs7I7IJC##jj#"2"2C4D4D
 
r;    N)r0   r   r1   r   r#   list[Placement]rp   int__name__
__module____qualname____firstlineno__staticmethodr   r   __static_attributes__r   r;   r   r   r      sN      $	
  >  r;   r   c                    [        U 5      n[        U5       HB  u  pEUR                  5       (       d  M  UR                  5       nX6   UR                  U   -  X6'   MD     U$ r   )r@   rB   r-   r.   rA   )global_shaper1   r#   r`   rZ   r[   r\   s          r   r   r      sZ    |$K#J/!))+I%0%;tzz#%NK" 0 r;   c                   [        U[        [        45      (       a  [        R                  " U5      nOUR                  5       n[        R                  " US:H  5      S   nUR                  S:  aR  UR                  S::  d   S5       e[        R                  " U 5      nSX#S   '   U[        R                  " U5      -  X#S   '   [        U5      $ )Nr   r   z*At most one -1 is allowed in target shape.)	rE   r@   tuplenparrayr   wheresizeprod)	src_shape	tgt_shape	ret_shapeminus_one_idxnelems        r   infer_positive_shaper      s    )dE]++HHY'	NN$	HHY"_-a0MA!!Q& 	
8	
& 	"&'	"#&+rwwy/A&A	"#	?r;   c                  L    \ rS rSr\          SS j5       r\S 5       rSrg)_local_reshapei  c                   [         R                  R                  5       n[         R                  R                  U5      nUR	                  5       R                  5       (       a  UR	                  5       R                  5       nOUR	                  5       n[        R                  " UR                  5      U l
        [        R                  " UR                  5      U l        [        R                  " UR                  5      U l        [        R                  " UR                  5      U l        UR!                  U5      n[         R"                  " UUUUUS9nUR$                  Ul        U$ )Nr=   )r    r$   rC   rD   r   _is_initializedcloner   r   rA   x_global_shapex_local_shaper"   r   r#   r   reshaper   r   )	r   r0   r   r`   r1   r#   r?   rW   r   s	            r   r   _local_reshape.forward  s      88:  2259##%5577&335;;=L&335L!]];+<+<= MM,*<*<=]];#;#;<
==)?)?@#++K8mm!
 (55
r;   c                   [         R                  R                  5       n[         R                  R                  U5      nUR	                  5       R                  5       (       a+  UR	                  5       R                  5       nU R                  nOUR	                  5       nS/nUR                  U5      n[         R                  " UU R                  U R                  U R                  US9nU$ )Nr   r=   )r    r$   rC   rD   r   r   r   r   r   r   r   r   r   )r   r   r?   
local_gradr   rets         r   r   _local_reshape.backward%  s      88:  2259  "2244!..0668J--M!..0JCM''6
mm##''
 
r;   r   N)
r0   r   r   r@   r`   r@   r1   r   r#   r   r   r   r;   r   r   r     sX      	
  $ >  r;   r   c                   [        U R                  U5      n[        XBU5      n[        R                  " 5       (       aX  U R                  5       R                  nU R                  5       R                  5       (       d  U R                  5       R                  nOv[        R                  R                  5       (       aH  [        U R                  U R                  5       R                  U R                  5       R                  5      nO[        S5      e[        R                  " U5      [        R                  " U5      :X  d   SU SU S35       e[        R                  " 5       (       a  [        R!                  XXRU5      $ [        R                  R                  5       (       a.  [        R"                  R%                  U U R&                  UUUU5      $ g)z|
Reshape the local tensors of the dist tensor on each rank,
and manually set the process_mesh and placements of the output.
z7dist_reshape is only supported in dynamic and pir mode.zThe local shapes z and z are mismatched.N)r   rA   r   r    r!   r   r   r$   r%   r&   r"   r'   NotImplementedErrorr   r   r   r   re   dist_reshaper#   )r0   r   r1   r#   tgt_global_shapetgt_local_shapesrc_local_shapes          r   _dist_reshaper   ;  s    ,K,=,=|L&'7zJO%224::'')99;;)668>>O				%	%	'	'*!!#00!!#33
 "E
 	
 77?#rww'?? 
O,E/1BBRS? ##?*
 	
 
			%	%	'	'}}))""
 	
 
(r;   c                \   [        X5      n[        U5      nX   S   X   S   -
  nXe-   S-
  U-  nXuS-
  -  nXh-
  n	/ n
[        U5       H_  nX   S   X-  -   nXS-
  :X  a  [        X-   X   S   5      nO[        X-   X   S   5      n[	        U5      nX4X'   U
R                  U5        Ma     XJ4$ )Nr   r   )r   r,   r*   r+   r@   append)r1   tensor_slice
tensor_dimr2   new_sub_meshes
num_shards
total_size
shard_sizeeffective_sizelast_shard_size
new_slicesr5   startend	new_slices                  r   shard_submesh_and_slicer   j  s    /N^$J)!,|/G/JJJ)A-*<J>2N 1OJ:(+an<Qe-|/G/JKCe(,*B1*EFC&	!&	)$  %%r;   c                p    0 nU R                  5        H  u  p4UR                   H
  nUUS.X%'   M     M!     U$ )N)slicepartial)itemsprocess_ids)sub_mesh_indices_infosub_mesh_partial_inforank2tensor_indicesrr   
slice_inforanks         r   get_rank2tensor_indicesr     sG     5 ; ; =((D#0)% ) !> r;   c           	     
   [        U5      [        UR                  5      :  aU  [        [        UR                  5      [        U5      -
  5       H'  nUR                  [        R
                  " 5       5        M)     XR                   Vs/ s H  nSU4PM	     sn0n0 n[        U5       H  u  pxUR                  5       (       av  UR                  5       n	0 n
U(       aL  UR                  5       u  p[        XX5      u  pU
R                  [        [        X5      5      5        U(       a  ML  UR                  U
5        [        US5      (       d  M  UR                  5       (       d  M  UR!                  5       Xg'   M     [#        XV5      $ s  snf )Nr   r   )r,   rA   r*   r   rg   	ReplicaterB   r-   r.   popitemr   updatedictziphasattrr   reduce_typer   )tensorr1   r#   _sr   r   r2   r[   r   tmprr   r   r   r   s                  r   get_local_slicesr     s@    :TZZ(s4::Z89Adnn./ : "LL#ALqQFL#AB(4"**,JC''<'D'D'F$-D*.* 

4N ?@A (' "((-9l++	0D0D0F0F.7.C.C.E!+  5 ##8PP# $Bs   F c                   [         R                  " S5      S:X  d  g[        R                  " 5       (       a  U R                  nU R
                  nOc[        R                  R                  5       (       a5  U R                  5       R                  nU R                  5       R
                  nO[        S5      eXA:X  d  UR                  UR                  :w  a  g[        XU5      n[        XU5      nXV:w  a  gg)Nr   r   FzC_only_reshard_mesh_shape is only supported in dynamic and pir mode.T)r   r   r    r!   r#   r"   r$   r%   r&   r'   r   r   r   )r0   r1   r#   r4   r3   src_rank2tensor_indicesdst_rank2tensor_indicess          r   _only_reshard_mesh_shaper     s     99-.&8$//++				%	%	'	'$..0@@((*77!Q
 	
 8//43C3CC.~ /{*M9r;   c                    [         R                  " S5      S:X  d  gU R                  nX1:X  d  UR                  UR                  :w  a  g[	        S U R
                  U-    5       5      (       d  gg)Nr   r   Fc              3  @   #    U  H  oR                  5       v   M     g 7fr   )is_replicatedr   s     r   r   &_reshard_mesh_shape.<locals>.<genexpr>  s     N*MQ  *Mr   T)r   r   r"   r   allr#   )r0   r1   r#   r3   s       r   _reshard_mesh_shaper     sd     99-.&8''H8//43C3CC
 N+*@*@:*MNNNr;   )r0   r   r1   r   r#   r   r   )r0   r   r   r@   r1   r   r#   r   )(
__future__r   r   r   typingr   numpyr   r    paddle.distributedrJ   rg   r   paddle.autogradr   rL   r   r	   &static.reshard_funcs.base_reshard_funcr
   )static.reshard_funcs.nd_mesh_reshard_funcr   static.utilsr   r   -paddle.distributed.auto_parallel.process_meshr   r:   rc   r   r   r   r   r   r   r   r   r   r   r   r   r;   r   <module>r      s     #  	     !  # > G N $,I&&*&8G&T 8<5
p2j(g (V&4W 4n,
,
,
 ,
  	,
^&,Q<*8G6*8Gr;   