
    x-j                       U d dl mZ d dlZd dlmZ d dlmZmZ d dlm	Z	m
Z
 d dlmZ d dlZd dlZd dlmZ d dlmZ dd	lmZmZ dd
lmZ ddlmZmZmZ erd dlmZ ddl m!Z! i Z"de#d<    ed           G d d                      Z$ ed           G d de$                      Z% G d de	          Z& G d de	          Z'dZ(d Z)d(dZ*d(d Z+d! Z,d" Z- G d# d$          Z.d% Z/ G d& d'          Z0dS ))    )annotationsN)defaultdict)	dataclassreplace)Enumauto)TYPE_CHECKING)logger   )LocalTensorIndexLocalTensorMetadata)ShardedWeight)%compute_local_shape_and_global_offsetget_target_tensorslice_tensor)Group)AbstractCommunicatorzdict[str, tuple[list, list]]PATH_TO_CHECKPOINT_FILEST)frozenc                      e Zd ZU dZded<   ded<   ded<   ded<   d	ed
<   ded<   ded<   ded<   ded<   ded<   dZded<   dS )ReadItema  
    A communication operation for a Tensor between ranks.

    Attributes:
        tensor_name (str): Name of the tensor.
        src_global_offset (tuple[int]): Global offset in the source tensor.
        dst_global_offset (tuple[int] | None): Global offset in the destination tensor.
        dst_rank (list[int]): Destination ranks.
        src_rank (int): Source rank.
        dst_local_offset (tuple[int]): Local offset in the destination tensor partition.
        src_local_offset (tuple[int]): Local offset in the source tensor partition.
        slice_shape (tuple[int]): Shape of the slice to transfer.
        file_name (str): The name of the file from which the source tensor is read on the source rank.
        dtype (str): Data type of the tensor.
    strtensor_namez
tuple[int]src_global_offsettuple[int] | Nonedst_global_offsetdst_rankintsrc_rankdst_local_offsetsrc_local_offsetslice_shape	file_namedtypeNzGroup | None
comm_group)__name__
__module____qualname____doc____annotations__r%        p/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/paddle/distributed/flex_checkpoint/dcp/resharder.pyr   r   /   s            !!!!((((MMM        NNNJJJ#J######r,   r   c                      e Zd ZU dZded<   dS )ExtendReadItemNr   global_shape)r&   r'   r(   r0   r*   r+   r,   r-   r/   r/   N   s#         &*L******r,   r/   c                  6    e Zd Z e            Z e            ZdS )OperationTypeN)r&   r'   r(   r   GLOBAL_BROADCASTBROADCAST_ALLGATHERr+   r,   r-   r2   r2   S   s(        tvv$&&r,   r2   c                  6    e Zd Z e            Z e            ZdS )AllGatherTypeN)r&   r'   r(   r   WITH_PADDING
NO_PADDINGr+   r,   r-   r6   r6   X   s$        466LJJJr,   r6    __internal_padding_tensor_name__c                |   i }t           j                                        }| D ],}|j                                        D ]\  }}||v r||f||<   -g }	|r"t           j                            |	||           n|	                    |           i }
|	D ],}|                                D ]\  }\  }}||
vsJ ||f|
|<   -|
S N)paddledistributedget_rankstorage_metadataitemsall_gather_objectappend)metadata_listlocal_load_filesprocess_groupuse_dist	load_infocur_rankmetadatalocal_tensor_indexr#   load_info_list
load_infosranks               r-   get_load_infosrN   `   s(   I!**,,H!  -5-F-L-L-N-N 	 	)	,,,1	,-	 N ),,I}	
 	
 	
 	
 	i(((J# ? ?	5>__5F5F 	? 	?1 1y%Z7777.2I->J)**	? r,   cur_chunk_metadatar   storage_local_tensor_metadatac           	     :   g }g }g }t          | j        | j        |j        |j                  D ]\  }}}}t          ||          }	t	          ||z   ||z             }
|	|k    r.|                    d           |                    |	|z
             nL|	|k    r.|                    |	|z
             |                    d           nt          d|	 d| d|           |                    |
|	z
             |d         dk    sJ d|d          d|
 d|	             |||fS )	Nr   zInvalid begin_offset:z, cur_offset:z, storage_offset:zInvalid length:z, end_offset:z, begin_offset:)ziplocal_shapeglobal_offsetmaxminrB   
ValueError)rO   rP   cur_offsetsstorage_offsetslengthscur_len
cur_offsetstorage_lenstorage_offsetbegin_offset
end_offsets              r-   compute_overlaprb   y   s    KOG<?&(%1%3	= = 
 
8[. :~66g-~/KLL
:%%q!!!""<.#@AAAA^++|j8999""1%%%%ppp:pp`npp   	zL0111r{aagbkaa
aaS_aa   00r,   c                    t          | j        | j        |j        |j                  D ]\  }}}}|||z   k    s	||z   |k    r dS dS )NTF)rS   rT   rU   )rO   rP   r\   r]   r^   r_   s         r-   not_overlaprd      st     =@&(%1%3	= = 
 
8[. >K788W$7744 8 5r,   c                   i }| D ]L}|j                                         }|D ].\  }}|                    |d          t          |          z   ||<   /Md |                                D             }t                              |d          }| D ]R}|j                                         }|D ]4\  }}||         }t          |          }	|||         |||	z   <   ||	z   ||<   5S|S )Nr   c                "    i | ]\  }}|d g|z  S r;   r+   ).0kns      r-   
<dictcomp>z5build_storage_state_dict_metadata.<locals>.<dictcomp>   s$    7771a$!777r,   )state_dict_metadatar@   getlendictfromkeys)
rC   countsmdr@   rh   lstresultoffsetori   s
             r-   !build_storage_state_dict_metadatarv      s   F 4 4&,,.. 	4 	4FAs

1a((3s883F1II	4 87777F]]61%%F  &,,.. 	 	FAsq	ACA#&F1Ia!a%i AF1II		 Mr,   c                   i }t          |           }g }d }|                                D ]\  }}	d }
t          |	t          j                  r|	                                rWt          |	j                  dk    r t          |	j        |	j	        |	j
                  nd\  }}t          |	j                  }||nUt          |	j                  }t          |	j                  dk    r%t          dgt          |	j                  z            nd}|}t          |	j                                      d          d         }|}
nt          |	t                    r}t          |	j                  dk    r|	j        |	j        fnd\  }}t          |	j        j                                      d          d         }t          |t                    r|d         n|}
nt)          dt+          |	                     t-          ||||          }||
         D ]}t/          ||          rt1          ||          \  }}}t3          |
t          |j                  t          |j                            }||         \  }}|                    t7          |
t          |j                  |t          j                                        f|t          |          t          |          t          |          ||j        
  
                   g }g }|r"t          j                            |||           n|                    |           |D ]}|D ]}|                    |           |S )	Nr   )r+   r+   r+   .r   z&Only support paddle.Tensor., val type:)rT   )
r   r   r   r   r   r    r!   r"   r#   r$   )rv   r@   
isinstancer<   Tensoris_distrm   shaper   process_mesh
placementstupler   r$   splitr   r0   rT   rU   local_tensorrX   typer   rd   rb   r   rB   r   r=   r>   rA   )rC   
state_dictrE   rF   rL   storage_state_dict_metadata
read_itemsr0   
tensor_keyvalr   rT   rU   r$   rO   rP   rY   rZ   r[   storage_local_tensor_indexr   r#   global_read_itemstmpr@   items                             r-   get_read_itemsr      s    #%"C# # JL%++-- L L
Cc6=)) '	{{}} + 39~~)) :	(   "!  %SY//&-*? +@ $CI..36sy>>A3E3EE1#CI.///2   +	NN((--a0E$KK]++ 	 s'((1,, #"344 'K
 (.//55c::1=E!+J!>!>N
1J K DcDD   1;|
 
 .I.
 	 	) -/LMM 4C"$A5 51K' *:3ABB!"?"KLL* * *&
 #--G"HHi +&+5C' ' '4$099;;=%%*;%7%7%*?%;%; %g'7=     	> 
C ,,S*mLLLL

: + + 	+ 	+D$$T****	+r,   c                  0    e Zd Z	 	 	 d
ddZd Zd Zd	 ZdS )StateDictResharderNFTcommunicatorr   c                h    || _         || _        || _        || _        || _        || _        || _        d S r;   )target_state_dictsource_state_dictrC   r   rE   offloadrF   )selfr   r   rC   r   rE   r   rF   s           r-   __init__zStateDictResharder.__init__%  s>     "3!2*(* r,   c                D   | j         rE| j                                        D ]+\  }}d |                                D             | j        |<   ,t          | j                                                  }t          | j        || j        | j                  }t          | j        | j
        | j        | j        |          }d | j
                                        D             }t          d |D                       }t          d |D                       }|r|r
J d            ||fS )Nc                    i | ]I\  }}|t          |t          j                  r't          j        |t          j                               n|JS )placery   npndarrayr<   	to_tensorCPUPlacerg   rh   vs      r-   rj   z1StateDictResharder.preprocess.<locals>.<dictcomp>:  b     5 5 5 1 !!RZ00v'1B1BCCCC5 5 5r,   c                R    i | ]$\  }}|t          |t                    r|j        n|%S r+   )ry   r   r   r   s      r-   rj   z1StateDictResharder.preprocess.<locals>.<dictcomp>O  sC     '
 '
 '
1 A}!=!=Dq~~1'
 '
 '
r,   c              3  @   K   | ]}t          |t                    V  d S r;   ry   r   rg   rh   s     r-   	<genexpr>z0StateDictResharder.preprocess.<locals>.<genexpr>S  =       
 
%&Jq%  
 
 
 
 
 
r,   c              3  B   K   | ]}t          |t                     V  d S r;   r   r   s     r-   r   z0StateDictResharder.preprocess.<locals>.<genexpr>V  @        
  
)*
1e$$$ 
  
  
  
  
  
r,   =target_state_dict contains a mix of tuple and non-tuple keys.)r   r   r@   listkeysrN   rC   rE   rF   r   r   any)	r   r#   r   rD   rL   r   processed_target_state_dicthas_tuple_keyhas_non_tuple_keys	            r-   
preprocesszStateDictResharder.preprocess7  s   < 	)-)?)E)E)G)G  %	:5 5 !+ 0 0 2 2	5 5 5&y11   6 ; ; = =>>#M	
 

 $"M
 

'
 '
.4466'
 '
 '
#  
 
*E
 
 
 
 
    
  
.I 
  
  
 
 
 " 	
&7 	
 	
K	
 	
8 +J66r,   c                x   |D ]}| j         |j                 |j                 }t          ||j        |j                                                  }t          ||          }t          ||j        |j                  }|j	        |j	        k    r|
                    |j	                  }t          j        ||           d S r;   )r   r#   r   r   r!   r"   
contiguousr   r    r   tor<   assign)r   r   r   	read_item
src_tensorsrc_chunk_tensor
dst_tensordst_chunk_tensors           r-   local_reshardz StateDictResharder.local_reshard^  s    # 	> 	>I/	0CD%J  ,I6	8M   jll  ++Y J  ,I6	8M     %)9)???#3#6#67G7M#N#N M*,<====	> 	>r,   c                b   t           j                                        }|                                 \  }}t	          j        dt          |           d           |s|S || j        d}| j        |d}| j	        r| j
                            |||           n|                     ||           | `|S )Nz/ReadItem generation completed, with a total of rx   )rM   rE   )r   r   )r<   r=   r>   r   r
   inform   rE   r   rF   r   communicater   )r   rH   r   r   contextstates         r-   reshardzStateDictResharder.reshardp  s    %..0026//2C2C/#ZPc*ooPPP	
 	
 	
  	/.. !/
 
 "&!7!<
 

 = 	H))*eWEEEEz+FGGG"**r,   )NFT)r   r   )r&   r'   r(   r   r   r   r   r+   r,   r-   r   r   $  sf         ! ! ! ! !$%7 %7 %7N> > >$+ + + + +r,   r   c                "   | j         |j         k    s
J d            t          | j                   }g g }}g g }}t          |          D ]}| j        |         }|| j        |         z   }	|j        |         }
|
|j        |         z   }t          ||
          }t          |	|          }||k    r d S |                    ||z
             |                    ||z
             |                    ||
z
             |                    ||
z
             t          j	        | j
        t          t          |                    ||          }t          j	        |j
        t          t          |                    ||          }t          j        ||           d S )NzGlobal shapes must be the same)axesstartsends)r0   rm   rangerU   rT   rV   rW   rB   r<   slicer   r   r   )srcdstndimr   r   
dst_startsdst_endsi	src_beginsrc_end	dst_begindst_endoverlap_beginoverlap_end	src_slice	dst_slices                   r-   assign_sharded_weightr     s   s////( 0// s  DrDFrJ4[[ 1 1%a(	coa00%a(	coa00Iy11'7++-''FFmi/000K)+,,,-)3444i/0000tE$KK00d  I %++	  I M)Y'''''r,   c                  b    e Zd Z	 	 dddZd Zd Zd Zd	 Z	 ddZd Z	d Z
d Zd Zd Zd Zd
S )ThreeDCommGroupStateResharder        Fmemory_growth_thresholdr   c	           	        || _         || _        t          |          dk    s
J d            |d         | _        || _        || _        | j        df| j        dffD ])\  }	}
|	j        dk    sJ d|
 d|	j         d|	 d	            *|| _        | j         p| j        j        dk    | _        || _	        || _
        d
| _        |                                  d S )Nr   zOnly support one metadata now!r   
horizontalverticalzThe number of ranks in the z@ communication group must be greater than 1, but actually it is z). Please check this communication group: !T)r   r   rm   rI   h_groupv_groupnranksp_groupusing_2d_comm_groupr   r   using_tuple_keyr   )r   r   r   rC   r   r   r   r   r   groupnames              r-   r   z&ThreeDCommGroupStateResharder.__init__  s-    "3!2=!!Q&&&(H&&&%a(\<(\:&
 	 	KE4 <!###fd f f&+lf f]bf f f $### (,$4 $
L1$ 	  (?$#r,   c                   | j         r| j                                        D ]+\  }}d |                                D             | j        |<   ,| j                                        D ]u\  }}|                                D ][\  }}|j        t          j        k    r*|                                                                ||<   D|                                ||<   \vt          | j        	                                          | _
        t          d | j        D                       }t          d | j        D                       }|r|r
J d            t          d | j                                        D                       s
J d            || _        t          t                    | _        | j                                        D ]S\  }}| j        r'| j        |d                                      |           3| j        |                             |           Tt          j                                        | _        |                                  |                                  |                                  |                                  d S )Nc                    i | ]I\  }}|t          |t          j                  r't          j        |t          j                               n|JS r   r   r   s      r-   rj   z<ThreeDCommGroupStateResharder.preprocess.<locals>.<dictcomp>  r   r,   c              3  @   K   | ]}t          |t                    V  d S r;   r   r   s     r-   r   z;ThreeDCommGroupStateResharder.preprocess.<locals>.<genexpr>  r   r,   c              3  B   K   | ]}t          |t                     V  d S r;   r   r   s     r-   r   z;ThreeDCommGroupStateResharder.preprocess.<locals>.<genexpr>  r   r,   r   c              3  F   K   | ]\  }}t          |t                    V  d S r;   )ry   r   )rg   _r   s      r-   r   z;ThreeDCommGroupStateResharder.preprocess.<locals>.<genexpr>  sD       
 
1 q-((
 
 
 
 
 
r,   z/All sharded weights must be ShardedWeight type.r   )r   r   r@   r$   r<   float32cuda
pin_memoryr   r   rD   r   r   allr   r   grouped_target_state_dictrB   r=   r>   rH   _build_cross_section_topologyr   schedule_read_itemsaggregate_global_read_items)	r   r#   r   r   tensorr   r   keysharded_weights	            r-   r   z(ThreeDCommGroupStateResharder.preprocess  s   < 	@)-)?)E)E)G)G  %	:5 5 !+ 0 0 2 2	5 5 5&y11 *.)?)E)E)G)G @ @%	:+5+;+;+=+= @ @'K|v~5528++--2J2J2L2L
;//28++--
;//	@ !%T%;%@%@%B%B C C 
 
*.*@
 
 
 
 
    
  
.2.D 
  
  
 
 
 " 	
&7 	
 	
K	
 	
8  
 
.4466
 
 
 
 
 	= 	= =	= 	= 

  -)4T):):&#'#9#?#?#A#A 	K 	KC# K.s1v6==nMMMM.s3::>JJJJ*3355**,,,  """((*****r,   c                    | j         }| j        }g }t          j                            |||           g }t          j                            |||           d |D             }|S )Nc                    g | ]	}|D ]}|
S r+   r+   )rg   sublistxs      r-   
<listcomp>zMThreeDCommGroupStateResharder.all_gather_cross_section_fn.<locals>.<listcomp>  s%    FFFwgFFFFFFr,   )r   r   r<   r=   rA   )r   r   r   r   
h_obj_list
v_obj_listgathered_infos          r-   all_gather_cross_section_fnz9ThreeDCommGroupStateResharder.all_gather_cross_section_fn  si    ,,
,,ZwGGG
,,ZWMMMFF*FFFr,   c                   g }g | _         t          j                            || j        | j                   t          j                            | j         || j                   | j        s.g }t          j                            || j        | j                   n| j        g}d t          |          D             | _
        || _        | j
        | j                 | _        d t          | j          D             | _        d t          | j                  D             | _        d t          | j                   D             | _        | j        | j                 | _        | j        j        | _        | j        j        | _        d S )Nc                    i | ]\  }}||	S r+   r+   )rg   r   rM   s      r-   rj   zOThreeDCommGroupStateResharder._build_cross_section_topology.<locals>.<dictcomp>#  s    III71dtQIIIr,   c                ,    g | ]}t          |          S r+   )set)rg   cols     r-   r   zOThreeDCommGroupStateResharder._build_cross_section_topology.<locals>.<listcomp>'  s    GGGCs3xxGGGr,   c                $    i | ]\  }}|D ]}||S r+   r+   )rg   r   ranksrM   s       r-   rj   zOThreeDCommGroupStateResharder._build_cross_section_topology.<locals>.<dictcomp>(  sE     !
 !
 !
5!
 !
  !!
 !
 !
 !
r,   c                $    i | ]\  }}|D ]}||S r+   r+   )rg   r   rowrM   s       r-   rj   zOThreeDCommGroupStateResharder._build_cross_section_topology.<locals>.<dictcomp>-  s?     
 
 
3s
 
?CD!
 
 
 
r,   )topologyr<   r=   rA   rH   r   r   r   r   	enumerateparallel_indexp_rankscur_parallel_indexrS   vertical_rankshorizontal_indexvertical_indexcur_horizontal_indexr   h_group_sizev_group_size)r   h_ranksr  s      r-   r   z;ThreeDCommGroupStateResharder._build_cross_section_topology  s   ,,T]DL	
 	
 	
 	,,M7DL	
 	
 	
 ' 	&G00    }oGIIi6H6HIII"&"5dm"DGG33FGGG!
 !
%d&9::!
 !
 !


 
"+DM":":
 
 
 %)$9$-$H! L/ L/r,   c                   t          t                    }|D ]1}|j        |j        |j        f}||                             |           2g }|                                D ],\  }}t          |d           }|                    |           -|S )Nc                    | j         S r;   r   r   s    r-   <lambda>z@ThreeDCommGroupStateResharder.dedup_read_items.<locals>.<lambda>=  s    
 r,   r   )r   r   r   r   r"   rB   r@   rW   )r   r   r   r   r   rs   r@   min_items           r-   dedup_read_itemsz.ThreeDCommGroupStateResharder.dedup_read_items6  s    D!!% 	$ 	$D#T%;T=MNC#Jd####++-- 	$ 	$JC5&:&:;;;HMM(####r,   Nc                `   t           j                                        }| j        j        }| j        j        }i }|                                D ]\\  }}|j        }||         }	t          |	          dk    sJ d| d|             |	d         j	        }
||f}|
|j
        |j        f}|||<   ]g }| j                                        D ],\  }}|                                D ]\  }}|||f         \  }
}}t          |j                                      d          d         }t!          |j                  t!          |          k    s(J d| d| d| dt!          |j                               ||t!          |          t!          |          d	t          |          z  d	t          |          z  t!          |          t!          |
          ||d d d
}|                    t'          di |           .|                     |          }d |D             }|                     |          | _        d S )Nr   z'No metadata found for tensor with name z	 in file rx   r   zShape mismatch in  tensor name z, expected shape z
, but got r   )r   r   r   r   r!   r    r"   r0   r#   r$   r   r%   c                    g | ]	}|D ]}|
S r+   r+   )rg   planr   s      r-   r   z@ThreeDCommGroupStateResharder.get_read_items.<locals>.<listcomp>w  s9     (
 (
 (
(
 (
9=D(
 (
 (
 (
r,   r+   )r<   r=   r>   rI   rk   r?   r@   r   rm   r0   rT   rU   r   r   r$   r   r   r|   rB   r/   r   r  r   )r   all_gather_argscurrent_rankrk   r?   shard_infosrJ   r#   r   local_tensor_metadatar0   r   
shard_infolocal_read_plan	read_filer   r   r   rT   rU   r$   common_attrsgathered_plans_per_rankglobal_read_plan_per_sections                           r-   r   z,ThreeDCommGroupStateResharder.get_read_itemsA  s    )2244"m?=9-=-C-C-E-E 	* 	*)	+6J$7
$C!,--222Z*ZZyZZ 322 13@Ly)C"."0J
  *K%)%;%A%A%C%C 	G 	G!Iz'1'7'7'9'9 G G#V;F ),<8k= FL))//44Q7V\**eK.@.@@@@ Uk  U  UI  U  Uhs  U  U  @E  FL  FR  @S  @S  U  U A@@ $/ ,).})=)=).})=)=(,s;/?/?(?(,s;/?/?(?#(#5#5$),$7$7!*" $"&     &&~'E'E'E'EFFFF-G0 #'"B"B#
 #
(
 (
4(
 (
 (
$ //0LMMr,   c                4	   | j         }g }t          t                    }| j        D ]}|j        }|j        }t          j                            t          t          |                    }|t          j        |          z  t          |          z  }|| j        k    r|                    |           ||f|                             |           t          |                                d           }	t#          |	          }d }
|                                D ]\  }}t          ||
          ||<   g }t          |                                          D ]?\  \  }}}t          |          | j        k     r"t          |          | j        k    rd gt          |          z  }d}t'                      t)          |          D ]Z\  }}|v r
|j        }| j        |         }||         4|||<                       |           |dz  }|t          |          k    r n[t1          d |D                       rD|                    |t2          j        f           fdt)          |          D             }||||f<   nnt          |          | j        k    At          |          dk    rd gt          |          z  }d }t          t&                    d}|                                D ]
\  \  }}}|} |                                D ]|\  \  }}}||k    rt)          |          D ]]\  }}|j        }| j        |         }||         <|||<   ||f                             |           |dz  }|t          |          k    r n^}g }t          |                                          D ]O\  }fdt)          |          D             }t          |          dk    r|                               J||<   P|D ]|= t)          |          D ]B\  }}|;t7          ||                   }t8          |d	d	d	d	d
d
d|d d}t;          ddd i|}|||<   C|                    |t2          j        f           t          |          dk    || _        || _         d S )Nc                :    | d         d         | d         d         fS )Nr   r   r+   r  s    r-   r  zCThreeDCommGroupStateResharder.schedule_read_items.<locals>.<lambda>  s    !Q!Q r,   r  c           
         | j         | j        | j        | j        | j        | j        | j        nd| j        | j        | j        | j	        f
S )Nr+   )
r   r   r   r   r    r   r!   r"   r#   r$   )r   s    r-   r  zCThreeDCommGroupStateResharder.schedule_read_items.<locals>.<lambda>  sU    !'&*6 ''&!O)
 r,   r   r   c              3     K   | ]}|d uV  	d S r;   r+   )rg   r   s     r-   r   zDThreeDCommGroupStateResharder.schedule_read_items.<locals>.<genexpr>  s&      CCq}CCCCCCr,   c                "    g | ]\  }}|v	|S r+   r+   )rg   r   r   used_indicess      r-   r   zEThreeDCommGroupStateResharder.schedule_read_items.<locals>.<listcomp>  s2       #AtL00 000r,   c                .    g | ]\  }}|         v|S r+   r+   )rg   r   r   r   r1  s      r-   r   zEThreeDCommGroupStateResharder.schedule_read_items.<locals>.<listcomp>  s7     # # #4S 111 111r,   r  )r   padding_vfile)r   r   r   r   r!   r    r"   r0   r#   r$   r%   r   r+   )!r  r   r   r   r$   r"   r<   coresize_of_dtypegetattrmathprodrm   r   rB   sortedr@   rn   r  r  r  r   r  addr   r6   r8   rW   INTERNAL_PADDING_TENSOR_NAMEr/   r7   global_broadcast_read_itemsbatch_read_items)r   r  r<  bucket_read_itemsr   	cur_dtype	cur_shapeelement_sizememory_growthbucket_read_items_torder_rulesrh   r   r=  r@   cur_batch_read_itemscntr   r   h_indexcur_batch_dtypeneed_removeremaining_itemsr)  padding_read_itemr   r1  s                            @@r-   r   z1ThreeDCommGroupStateResharder.schedule_read_items}  s   ,&(#'--O 	4 	4D
I(I!;44WVY5O5OPPLty333c.6I6II  t;;;+224888 ),!#&--d3333$##%% 
 
 
 !!455
 
 &++-- 	> 	>DAq#)!#=#=#=a  -12C2I2I2K2K-L-L  	  	)"Y	E5zzD---e** 111(,vN0C0C'C$"uu(// 
" 
"GAtL(( #}H"3H=G+G4<8<,W5$((+++q#n"5"555!ECC.BCCCCC $++-}/GH     '0'7'7  E
 AF%y)&<==9 e** 111< #$$))$(6C,?,?#? "O&s++LC1B1H1H1J1J  -&I"+1B1H1H1J1J " "-&I//(// " "GAt#}H"3H=G+G4<8<,W5$i%;<@@CCCq#n"5"555!EK"#4#:#:#<#<== 	= 	=
U# # # # ##,U#3#3# # #
 ''1,,&&s++++-<%c**" + +%c**$%9:: @ @4<">!#455H'C$,-1-1,0,0'+(,%4!0&*$ $L )7 ) )!%))5) )% /@(+##%}'AB  w #$$))~ ,G( 0r,   c                R   | j         r$| j        | _        d | j        D             | _        d S g }g }t          j        || j        | j                   t          j        || j        | j                   d |D             | _        g | _        t          d |D                       }t          |          D ]v}g }|D ]U}t          |          dk    r)|                    |                    d                     >|                    g d f           V| j                            |           wd S )Nc                    g | ]}|gS r+   r+   )rg   batch_itemss     r-   r   zMThreeDCommGroupStateResharder.aggregate_global_read_items.<locals>.<listcomp>  s'     0 0 0"-0 0 0r,   c                    g | ]	}|D ]}|
S r+   r+   )rg   r   r   s      r-   r   zMThreeDCommGroupStateResharder.aggregate_global_read_items.<locals>.<listcomp>&  s?     7
 7
 7
7
 7
  7
 7
 7
 7
r,   c                ,    g | ]}t          |          S r+   )rm   )rg   r   s     r-   r   zMThreeDCommGroupStateResharder.aggregate_global_read_items.<locals>.<listcomp>-  s    EEEgS\\EEEr,   r   )r   r<  &aggregated_global_broadcast_read_itemsr=  aggregated_batch_read_itemsdistrA   r   rV   r   rm   rB   pop)r   rQ  rR  	max_tasksr   task_batchesr=  s          r-   r   z9ThreeDCommGroupStateResharder.aggregate_global_read_items  s   # 	0 70 0151F0 0 0D, F13.&(#2,L	
 	
 	

 	'!L	
 	
 	

7
 7
A7
 7
 7
3
 ,.(EE)DEEE
 
	 y!! 	B 	BAL$? 4 4 '((A-- ''(8(<(<Q(?(?@@@@ ''T
3333,33LAAAA	B 	Br,   c                   |\  }}t          |          dk    rg S || j                 }| j        |j        k    r|j        t
          k    rt          j        |j        |j	                  n| j
        |j                 |j                 }t          |j        t          j                  s|                                }n t          j        |j        |j	                  }t          j                            ||j        | j                   g }|t&          j        k    rmt+          d |D                       }t-          j        |j                  |k    r|                    |g          }na|                                }t          j        |g|j	                  }	t          j        |dg          |	d|<   |                                 |	}t          j                            ||| j                   g }
t?          |          D ]\  }}||         }t-          j        |j                  }|d|                                                              |j                  }|
!                    |           |                                 |
}n't          j                            ||| j                   |                                 |S )z:Performs V-Broadcast + H-AllGather for one batch of items.r   r$   r   r   c              3  H   K   | ]}t          j        |j                  V  d S r;   )r7  r8  r"   )rg   r   s     r-   r   zXThreeDCommGroupStateResharder._process_one_batch_broadcast_in_section.<locals>.<genexpr>P  s/      OODDId&677OOOOOOr,   rR   N)r   )"rm   r  rH   r   r   r;  r<   emptyr"   r$   r   r#   ry   r   	CUDAPlacer   r=   	broadcastr   r6   r7   rV   r7  r8  r|   reshapenumelzeros_clear
all_gatherr   r  clonerB   )r   rN  r   allgather_typer   buffertensor_list	max_numelr_  padded_bufferunpadded_tensor_listidxpadded_tensorunpadded_tensors                 r-   '_process_one_batch_broadcast_in_sectionzEThreeDCommGroupStateResharder._process_one_batch_broadcast_in_section8  s   %0"
Nz??aIt89	=I... (,HHH Y2IODDD+I,?@)  flF,<== '\)"7yOOOF$$	*$, 	% 	
 	
 	
 ]777OOJOOOOOIy&&)33!   &i[ M M M(.vt(D(Dfuf%&))V4< *    $& &/&<&< ' '"]&sO		)"788!&5&)//1199):OPP   %++O<<<$$&&&&.KK))V4< *   
 	r,   c                   || j                  \  }}t                      }t          |          D ]*\  }}|j        t          k    r|                    |           +t          |d          D ]}||= g }t          |          D ]P\  }\  }}| j        |         }	|D ]8}|j        t          k    r&t          ||	          }
|	                    |
           9Qd}t          |          D ]n\  }}| j
        sk|j        | j        k    r||         }|dz  }n t          j        |j        |j                  }t          j                            ||j        | j                   n||         }|dz  }t)          |j        ||j        |j        |j                  }| j        |j                 D ]}|j                                        s3t          j        |j                  }|                    |j                   |j        }|j        j        }|j        |k    r|                    |          }||_        t=          ||	           |                                 ~pd S )
NT)reverser  r   r   rX  rY  r   r   rT   r0   rU   r   r   ) r  r  r  r   r;  r:  r9  r  r   rB   r   r   rH   r<   r[  r"   r$   r=   r]  r   r   r0   r   r   r   _is_initialized
zeros_like_share_buffer_tor   r   r   ra  )r   rf  rV  r=  rd  need_remove_indicesrj  r   filtered_read_itemsr   replcaed_read_itemrF  re  received_sharded_weighttarget_sharded_weightr   	tgt_places                    r-   "broadcast_cross_p_group_and_assignz@ThreeDCommGroupStateResharder.broadcast_cross_p_group_and_assignt  s   +78O+P(.!ee'(899 	- 	-NC$(DDD#'',,,-t<<< 	! 	!CC   7@7N7N 	C 	C3C3"N|C(H- C C	(,HHH)0X)N)N)N&'../ABBBC
 '(;<< 0	( 0	(NC+ %66(-F1HCC#\!-Y_  F ",,	 2$, -     %S)q&3)#%1&3'9' ' '# *.)G%*  % -9IIKK P#.-: F ++,A,NOOO4A
1>D	#y00!+y!9!9J7A'4%/-    
 MMOOO''a0	( 0	(r,   c           	        | j         }t          |          }t          |d          D ]B\  }}|dz  dk    s||k    r"t          j        d| d| d|j                    | j        |j        k    rQ| j        |j	                 |j                 }t          |j        t          j                  s|                                }n t          j        |j        |j                  }t          j                            ||j        d 	           t)          |j        ||j        |j        |j        
          }| j        |j                 D ]}t1          ||           |                                 ~Dd S )Nr   start
   r   zBroadcasting item /z: rX  rY  rp  rq  )rQ  rm   r  r
   r   r   rH   r   r   r#   ry   r   r<   r\  r   r[  r"   r$   r=   r]  r   r0   r   r   r   ra  )r   r<  total_itemsrj  r   re  rx  ry  s           r-   'broadcast_cross_global_group_and_assignzEThreeDCommGroupStateResharder.broadcast_cross_global_group_and_assign  s   7 	$ 566'(C1MMM $	( $	(NCRx1}}{ 2 2UUU{UUi>SUU   }	 222/	0CD) "&,0@AA +#[[]]F)   ((I.d )    '4)#%1&3'9' ' '# *.)G%*  % &/-    
 MMOOO''I$	( $	(r,   c           
     (   t          | j                  }t          j        d           t	          | j        d          D ]n\  }}|                     || j                           }|                     ||           |dz  dk    s||k    r$t          j        d| d| d|d	z  |z   d
           ot          j        d           t          j        d           |                                  t          j        d           t          j        d           d S )NzL[ThreeDCommGroupStateResharder] Begin resharding using batch broadcasting...r   r}  r  r   zResharding batches: r  z (d   z%)zJ[ThreeDCommGroupStateResharder] End resharding using batch broadcasting...zM[ThreeDCommGroupStateResharder] Begin resharding using global broadcasting...zK[ThreeDCommGroupStateResharder] End resharding using global broadcasting...z4[ThreeDCommGroupStateResharder] Resharding finished.)	rm   rR  r
   r   r  rm  r  r{  r  )r   totalrj  rV  rf  s        r-   r   z%ThreeDCommGroupStateResharder.reshard  sW   D455Z	
 	
 	
 "+,A"
 "
 "
 
	 
	C FFT45 K 33KNNNRx1}}uP3PPPP#)u:LPPP   	X	
 	
 	
 	[	
 	
 	
 	44666Y	
 	
 	
 	JKKKKKr,   )r   F)r   r   r;   )r&   r'   r(   r   r   r   r   r  r   r   r   rm  r{  r  r   r+   r,   r-   r   r     s         (3         D/+ /+ /+b  "0 "0 "0J	 	 	 :N :N :N :NxQ1 Q1 Q1f&B &B &BP: : :xC( C( C(J)( )( )(VL L L L Lr,   r   )rO   r   rP   r   )1
__future__r   r7  collectionsr   dataclassesr   r   enumr   r   typingr	   numpyr   r<   paddle.distributedr=   rS  'paddle.distributed.fleet.utils.log_utilr
   rI   r   r   r   r   utilsr   r   r   paddle.distributed.collectiver   reshard_commr   r   r*   r   r/   r2   r6   r;  rN   rb   rd   rv   r   r   r   r   r+   r,   r-   <module>r     s   # " " " " " "  # # # # # # * * * * * * * *                          ! ! ! ! ! ! : : : : : : ; ; ; ; ; ; ; ;                33333332222229;  ; ; ; ; $$ $ $ $ $ $ $ $< $+ + + + +X + + +! ! ! ! !D ! ! !
    D   
  B   21 1 1 1@   $  *a a aHf+ f+ f+ f+ f+ f+ f+ f+R (  (  (FN	L N	L N	L N	L N	L N	L N	L N	L N	L N	Lr,   