
    Αi)                        S SK Jr  S SKrS SKJr  S SKrS SKJr  S SKJ	r	J
r
  S SKJrJr  S SKJrJrJr  \R$                  " \5      rS rSS jrSS jr " S	 S
5      rS r  S           SS jjr  SS jrg)    )annotationsN)Any)	ReplicateShard)dtensor_from_localdtensor_to_local)flattenmap_structurepack_sequence_asc                l  ^  T R                  5       (       d  [        R                  " T X5      nU$ [        R                  " 5       (       a  U 4S jnU" 5         T R
                  nT R                  n[        T XV5      n[        R                  " XqU5      n[        U5       H  n[        X8   XV5      X8'   M     U$ )Nc                   > TR                   S   [        R                  " S5      :X  d   S5       eTR                  R                  S   n TR                  S   U -  n/ n[        U 5       H+  n[        U5       H  nUR                  X4U -  -   5        M     M-     TU   n[        R                  " UTR                  TR                   5      mg )Nr   z inputs should be placed on S(0).)
placementsdistr   process_meshshaperangeappendreshard)	shardingsrows_per_shardnew_indicess_idrow_in_shardtmpxs         v/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/distributed/auto_parallel/pipelining/microbatch.py_reorder_data_for_align._split_tensor.<locals>._reorder_data_for_align+   s    ||A$**Q-7 67 NN003	!"y!8 !),D(-n(=#**42J+JK )> - nLLannallC    )
is_distpaddletensor_splitr   in_auto_parallel_align_moder   r   r   r   r   )	r   
num_chunks
split_axischunk_tensorsr   meshr   dense_xis	   `        r   _split_tensorr*   $   s    99;;++AzF< 7 ++--D  $%~~\\
"1d7++GLz"A1 $ M # r   c                *   U S   nUR                  5       (       d  [        R                  " X5      nU$ UR                  n[	        UR
                  5       Vs/ s H  n[        5       PM     nnSUR                  ;   a  UR                  R                  S5      OSn[        S5      Xg'   [	        [        U 5      5       H.  n[        R                  " X   XF5      X'   [        X   XF5      X'   M0     [        R                  " X5      n[        X4U5      nU$ s  snf )Nr   dp)r    r!   concatr   r   ndimr   	dim_namesindexr   lenr   r   r   r   )	r&   axischunk0outr'   _r   dp_indexr)   s	            r   _concat_tensorr7   G   s    1F>>mmM0" J ""+0+;<+;aik+;
<151G4>>''-Q$Qx
s=)*A#||M,<dOM/ $ M + mmM0 J7J =s   Dc                  6    \ rS rSr% SrS rS\S'   S rS rSr	g	)
TensorChunkSpec^   z*
Class used to specify chunking of inputs
c                    Xl         g Nr%   )selfr%   s     r   __init__TensorChunkSpec.__init__c   s    $r   intr%   c                |    U R                   R                   SU R                   R                   SU R                   S3$ )N.())	__class__
__module____name__r%   r>   s    r   __repr__TensorChunkSpec.__repr__h   s7    ..++,Adnn.E.E-FaGXXYZZr   c                "    SU R                    S3$ )NzTensorChunkSpec(rE   r=   rI   s    r   __str__TensorChunkSpec.__str__k   s    !$//!2!44r   r=   N)
rH   rG   __qualname____firstlineno____doc__r?   __annotations__rJ   rM   __static_attributes__ r   r   r9   r9   ^   s    % O[5r   r9   c                   [        U 5      [        U5      :X  d;   S[        U R                  5       5       S[        UR                  5       5       35       e0 nU R                  5        GH+  u  pE[	        U5      nX   nUc   e[	        U5      n[        U5      [        U5      :X  d!   U S[        U5       S[        U5       35       e/ n	[        Xh5       H  u  p[        U
[        R                  5      (       d  U	R                  U
/U-  5        M;  [        U[        5      (       a\  U
R                  UR                     nX:  a  [        SU SU SU S	35      e[        XUR                  5      nU	R                  U5        M  [        S
U 35      e   XU'   GM.     / n[!        U5       H  n0 nUR                  5        HB  u  nnU(       d  SOUS   U   n[        U5      S:  a  U V
s/ s H  oU   PM	     sn
OUnUUU'   MD     [	        U5       Vs/ s H
  nUc  M  UPM     nn[#        U U5      nUR                  U5        M     U$ s  sn
f s  snf )z5
A helper function of split_args_kwargs_into_chunks.
zargs_dict.keys() = z args_chunk_spec.keys() = N z != zArg z% on chunking dimension has a size of z$, smaller than the number of chunks z(. Please adjust your num_chunks setting.zUnrecognized chunk spec: r      )r1   listkeysitemsr	   zip
isinstancer!   Tensorr   r9   r   r%   
ValueErrorr*   	TypeErrorr   r   )	args_dictargs_chunk_specr$   shared_args_dict_flatarg_keyargarg_flat
chunk_specchunk_spec_flatshard_arg_flatvchunk_vv_split_axis_sizer&   
args_splitidx
chunk_argskeylast_argarg_of_curr_chunkr   flatten_chunk_argss                         r   _split_args_helperrs   o   sl    y>S11 
d9>>#3455OPTUdUiUiUkPlOmn1 !)3<$-
%%%!*-?#s8}4 	
iqXtC,@+AB	
4 h8JAa//%%qcJ&67G_55$%GGG,>,>$?!$1$wi'LM^L_ `==GL IAA  !.aW=O=O P%%m4";G9 EFF# 9& *8g&A *F JZ 
-335HC#&tCF3KH),SA%A3%8  0JsO 6 *1)<N)<Aa)<N%i1CD
*% !  &
 Os   "IIIc                  ^ Uc  0 nUc  [        S U 5      nUc  [        S U5      n[        [        [        U 5      5      [        [        U5      5      U5      n[        UUU5      n[	        U5      [	        U5      :X  d   S[	        U5       S[	        U5       35       eU V^s/ s H*  m[        U4S j[        [	        T5      5       5       5      PM,     nnX4$ s  snf )a  
Given a sequence of args and kwargs, split them into a number of chunks
according to  their respective chunking specs.

Args:
    args: tuple of args
    kwargs: dict of kwargs
    chunks: Number of chunks to split the args and kwargs into
    args_chunk_spec: chunking specs for args, in same shape as args
    kwargs_chunk_spec: chunking specs for kwargs, in same shape as kwargs

Returns:
    args_split: list of sharded args
    kwargs_split: list of sharded kwargs
c                     [        [        5      $ r<   r9   DEFAULT_CHUNK_DIMr5   s    r   <lambda>/split_args_kwargs_into_chunks.<locals>.<lambda>   
    o&78r   c                     [        [        5      $ r<   rv   rx   s    r   ry   rz      r{   r   z<args and kwargs are split into difference number of chunks: z, c              3  .   >#    U  H
  nTU   v   M     g 7fr<   rT   ).0r)   
args_chunks     r   	<genexpr>0split_args_kwargs_into_chunks.<locals>.<genexpr>   s     <%;jm%;s   )r
   rs   dict	enumerater1   tupler   )	argskwargschunksra   kwargs_chunk_specargs_split_dictkwargs_splitr   rl   s	          ` r   split_args_kwargs_into_chunksr      s    N ~'8$
  )8&
 )Yt_Y'(O
 &L 3|#44 F
 3|#4"5	74 *)J 	<U3z?%;<<)  
 ##s   !1Cc                   [        U 5      S:X  a  [        R                  S5        U $ Uc  [        S U S   5      n/ n[	        U5      nU  HD  n[	        U5      n[        U5      [        U5      :X  d   SU SU 35       eUR                  U5        MF     S
S jn/ n[        U5       H  u  px[        U[        5      (       a  [        US   U   [        R                  5      (       a=  [        [        U5      5       V	s/ s H
  n	X)   U   PM     n
n	[        XR                  S9nO,[        R                  SU S	35        U" X'U5      nO	U" X'U5      nUR                  U5        M     [        U S   U5      $ s  sn	f )z
Given a list of chunks, merge them into a single chunk according to
the chunk spec.

Args:
    chunks: list of chunks
    chunk_spec: Chunking spec for the chunks

Returns:
    chunk: chunks merged value
r   zNo chunks to merge.c                     [        [        5      $ r<   rv   rx   s    r   ry   merge_chunks.<locals>.<lambda>  r{   r   zChunk z did not match chunk spec c                ~    U S   U   n[        S[        U 5      5       H  nX   U   U:X  a  M   SU SU 35       e   U$ )Nr   rW   z%Cannot merge chunks with index 0 and z9 with different values,When the arg's TensorChunkSpec is )r   r1   )r   rm   chunk_spec_of_argarg_0	chunk_idxs        r   _merge_non_tensor_type_arg0merge_chunks.<locals>._merge_non_tensor_type_arg   s`    q	#q#f+.I$S)U2 7u =55F4GI2 /
 r   )r2   z)Cannot merge chunks with TensorChunkSpec z6.The TensorChunkSpec only supports paddle.Tensor type.r<   )r1   loggerwarningr
   r	   r   r   r\   r9   r!   r]   r   r7   r%   r   )r   rf   chunks_flatchunk
chunk_flatr   	args_flatarg_idxr   r   arg_chunks_to_merge
merged_args               r   merge_chunksr      s    6{a,-"8&)

 K$JU^
:#j/1 	
UG5j\B	
1 	:&  I&/
&;"'99+a.16==AA &+3{+;%<'%<	  *73%< $ ' ,'.J.J
 ?@Q?R SL L
 8*;
 4&7J 	$1 '<6 F1Iy111's   8E/)r   )NN)r   ztuple[Any, ...]r   zdict[str, Any] | Noner   rA   ra   z]tuple[tuple[TensorChunkSpec, ...] | list[TensorChunkSpec, ...] | TensorChunkSpec, ...] | Noner   z\dict[str, tuple[TensorChunkSpec, ...] | list[TensorChunkSpec, ...] | TensorChunkSpec] | Nonereturnztuple[list[tuple], list[dict]])r   z	list[Any])
__future__r   loggingtypingr   r!   paddle.distributeddistributedr   r   r   $paddle.distributed.auto_parallel.apir   r   paddle.utilsr	   r
   r   	getLoggerrH   r   rw   r*   r7   r9   rs   r   r   rT   r   r   <module>r      s    #    ! / B A			8	$   F.5 5"?\ 	 	+J$
J$!J$ J$
	J$	J$, $-J$ZH2H2r   