
    Bj U                     j   d dl Z d dlZd dlmZ d dlmZ d dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZmZmZ g d	Z e j        e          Z	 d
Z G d d          Z G d de          Z e ej        d          ej                  Zd Z G d d          Z G d d          Zdede de!e         fdZ"dej#        dede deej#                 fdZ$d Z%	 	 d$de&edf         de'e(ef         dz  de d e&edf         dz  d!e'e(ef         dz  de&e!e&         e!e'         f         fd"Z)de!e         fd#Z*dS )%    N)Sequence)Any)DTensor)	local_mapmap_aggregate)	BlockMask)tree_flattentree_maptree_unflatten)TensorChunkSpecsplit_args_kwargs_into_chunksmerge_chunksFc                       e Zd ZdZd ZdS )_CustomReducera$  
    Custom reducer class that can be used to specify a custom operation that
    reduces losses of multiple microbatches into one value.

    Example:
    >>> # xdoctest: +SKIP
    >>> sum_reducer = _CustomReducer(
    >>>     torch.tensor(0.0),
    >>>     lambda a, b: a + b
    >>> )
    c                 "    || _         || _        d S N)
init_value	reduce_fn)selfr   r   s      j/var/www/html/Carbon-Document/venv/lib/python3.11/site-packages/torch/distributed/pipelining/microbatch.py__init__z_CustomReducer.__init__-   s    $"    N)__name__
__module____qualname____doc__r    r   r   r   r       s-        
 
# # # # #r   r   c                       e Zd ZdS )_LossReducerNr   r   r   r   r   r   r    r    2           Dr   r    g        c                       e Zd ZU dZd Zeed<   d Zd Ze	de
edf         fd            Ze	deeef         fd	            Zd
S )r   z2
    Class used to specify chunking of inputs
    c                     || _         d S r   	split_dim)r   r&   s     r   r   zTensorChunkSpec.__init__B   s    "r   r&   c                 J    | j         j         d| j         j         d| j         dS )N.())	__class__r   r   r&   r   s    r   __repr__zTensorChunkSpec.__repr__G   s/    ~(VV4>+BVVT^VVV	
r   c                     d| j          dS )NzTensorChunkSpec(r*   r%   r,   s    r   __str__zTensorChunkSpec.__str__L   s    3$.3333r   
chunk_dims.c                 (    t          | d           }|S )a  
        A helper for creating a tuple of `TensorChunkSpec` from a tuple of chunk
        dimensions (int's).
        Example:
            >>> # xdoctest: +SKIP
            >>> # There are three positional arguments to the model, and
            >>> # we are chunking them along dimension 0, 0 and 1, respectively
            >>> args_chunk_spec = TensorChunkSpec.from_tuple((0, 0, 1))
        c                      t          |           S r   r   dims    r   <lambda>z,TensorChunkSpec.from_tuple.<locals>.<lambda>^       ,, r   r   )r0   args_chunk_specs     r   
from_tuplezTensorChunkSpec.from_tupleO   s$     (,,
 
 r   c                 (    t          | d           }|S )a\  
        A helper for creating a dictionary of `TensorChunkSpec` from a
        dictionary of chunk dimensions (int's).
        Example:
            >>> # xdoctest: +SKIP
            >>> # Chunk dimension 0 for the "id" argument, 1 for the "mask" argument
            >>> kwargs_chunk_spec = TensorChunkSpec.from_dict({"id": 0, "mask": 1})
        c                      t          |           S r   r3   r4   s    r   r6   z+TensorChunkSpec.from_dict.<locals>.<lambda>p   r7   r   r   )r0   kwargs_chunk_specs     r   	from_dictzTensorChunkSpec.from_dictb   s%     *,,
 
 ! r   N)r   r   r   r   r   int__annotations__r-   r/   staticmethodtupler9   dictstrr=   r   r   r   r   r   =   s          # # # NNN
 
 

4 4 4 #s(O   \$ !cN! ! ! \! ! !r   r   c                       e Zd ZdS )
_ReplicateNr!   r   r   r   rE   rE   v   r"   r   rE   
block_mask
num_chunksreturnc                      j                             d          dk    r g|z  S  j                             d          |k    st          d          d}t          j         j         ||          }t          j         j        ||          } j        t          j         j        ||          ndg|z  } j        t          j         j        ||          ndg|z  }g }d}t          |          D ]~}	 fd}
|	                    t          j        ||	         ||	         ||	         ||	          j         |
|           j                             |||	                             d          z  }|S )a	  Given a block mask, split the block mask along the batch dimension (dim0).

    Args:
        block_mask: Block mask to split
        num_chunks: Number of chunks to split the block mask into

    Returns:
        chunk_block_masks: List of chunked block masks
    r      z;Block mask has fewer batch size than the number of chunks. Nc                       fd}|S )Nc                 d    t          j        |           }                    | |z   |||          S r   )torch	full_likemask_mod)bhq_idxkv_idxb_offsetrF   idxs        r   batch_offset_mask_modzI_split_block_mask.<locals>.create_mask_mod.<locals>.batch_offset_mask_mod   s2     ?1c22!**1x<E6JJJr   r   )rU   rV   rF   s   ` r   create_mask_modz*_split_block_mask.<locals>.create_mask_mod   s0    K K K K K K )(r   )kv_num_blocks
kv_indicesfull_kv_num_blocksfull_kv_indices
BLOCK_SIZErO   seq_lengths)rX   sizeAssertionErrorrM   tensor_splitrY   rZ   r[   rangeappendr	   from_kv_blocksr\   r]   )rF   rG   	batch_dimkv_num_blocks_chunkskv_indices_chunksfull_kv_num_blocks_chunksfull_kv_indices_chunkschunk_block_masksbatch_offset	chunk_idxrW   s   `          r   _split_block_maskrl   z   s    $$Q''1,,|j((#((++z99I
 
 	
 I - *i  *:+@*iXX (4 	:8*iPPPVj   %1 	:5z9MMMVj   L:&& @ @		) 	) 	) 	) 	) 	  $29=,Y7#<Y#G 6y A%0(66&2  
	
 
	
 
	
 	,Y7<<Q???r   tensorspecc                    |                      j                  k    s+t          d|                      j                   d          t          | t                    }|r.| j        }t          fd|fz  |f          } ||           }nt          j        | j                  }| j	        r | j
        r|D ]}|                                 t          s|S dt          j        dt          j        dt          t          j        df         ffd	}|rJ| j        }t          |          }	t          ||f|	z  |f|f|	z  z             }
t!           |
| g|R            S t!           || g|R            S )
zGiven a tensor, and a chunking spec, split the tensor.
    Args:

        tensor: Tensor to split
        spec: Chunking spec
        num_chunks: Number of chunks to split the tensor into

    Returns:
        chunk_tensors: List of chunked tensors
    zTensor size z is smaller than num_chunksc                 :    t          j        | j                  S r   )rM   r`   r&   )trG   rn   s    r   r6   z_split_tensor.<locals>.<lambda>   s    e(JGG r   out_placementsin_placementsorigchunksrH   .c                 d   g }d}|D ]}t          j        |           }||                    j                  z   }t	          d           g|j        z  }t	          ||          |j        <   |||<   |                    |           ||                    j                  z  }t          |          S )Nr   )rM   
zeros_liker^   r&   slicendimrb   rA   )	ru   rv   expandedrU   chunknew_valupperslicesrn   s	           r   _expand_chunksz%_split_tensor.<locals>._expand_chunks   s      	. 	.E&t,,G%**T^444E#(;;-',">F%*3%6%6F4>"#GFOOOG$$$5::dn---CCXr   )r^   r&   r_   
isinstancer   
placementsr   rM   r`   requires_gradis_leafretain_grad_debug_mask_minibatchesTensorrA   lenlist)rm   rn   rG   _is_dtensorr   split_fnchunk_tensorsr|   r   n	expand_fns    ``        r   _split_tensorr      s     ;;t~&&*44S6;;t~66SSS
 
 	
 VW--K O
 &
GGGGG&=:5%-
 
 

 190@0@*6:t~NN
     " 	  	 E" l%*\	u|S 	!       
<&
&=1,%-:-!*;;
 
 
	
 IIf5}555666NN6:M:::;;;r   c           	         | sd t          |          D             S t          |           t          |          k    sSt          dt          |                                            dt          |                                                     |t          d          t          | d           \  }t          |d           \  }}g }t          ||d	
          D ]\  }}|t          u st          |t                    r|	                    |           :t          |t          j                  rbt          |t                    st          dt          |                     |	                    |                    |j                             t          |t                     rt          |t                    st          dt          |                     |j        dk    st          d          |j                            d          dk    r|	                    |           N|	                    |j                            d                     }t%          d| d| d          t'          g ||R  }	d t          |	          D             }
t          ||d	
          D ]\  }}g }|t          u st          |t                    r|g|	z  }nht          |t          j                  rt)          |||	          }n<t          |t                     rt+          ||	          }nt%          d| d| d          t          |
|d	
          D ]\  }}|	                    |           fd|
D             S )aW  
    Given a dictionary of args, and a dictionary of chunking specs, shard the
    args according to the chunking specs.

    Args:
        args_dict: Dictionary of args
        args_chunk_spec: Dictionary of chunking specs
        num_chunks: Number of chunks to shard the args into

    Returns:
        args_split: List of sharded args
    c                     g | ]}i S r   r   .0_s     r   
<listcomp>z'_shard_dict_of_args.<locals>.<listcomp>  s    ...q...r   zargs_dict.keys() = z args_chunk_spec.keys() = Nz.args_chunk_spec should have been set by callerc                 ,    t          | t                    S r   r   r	   xs    r   r6   z%_shard_dict_of_args.<locals>.<lambda>%  s    Z9%=%= r   r   c                 ,    t          | t                    S r   r   r   s    r   r6   z%_shard_dict_of_args.<locals>.<lambda>(  s    :a+C+C r   TstrictzExpected TensorChunkSpec, got r   z#BlockMask only supports split_dim=0rJ   zUnsupported chunk spec: z and value: z combination.c                     g | ]}g S r   r   r   s     r   r   z'_shard_dict_of_args.<locals>.<listcomp>F  s    $J$J$JAR$J$J$Jr   c                 0    g | ]}t          |          S r   )r   )r   _flat_split_result	tree_specs     r   r   z'_shard_dict_of_args.<locals>.<listcomp>Y  s4        	)955  r   )ra   r   r_   r   keysr
   ziprE   r   rb   rM   r   r   typer^   r&   r	   rX   
ValueErrorminr   rl   )	args_dictr8   rG   valueschunk_specsr   split_sizesvrn   result_num_chunksflat_split_resultsv_splitsr   _v_splitr   s                 @r   _shard_dict_of_argsr     s   $  /..E*--....y>>S1111G$y~~'7'7"8"8 G G(,_-A-A-C-C(D(DG G
 
 	
 MNNN$==  FI "!C!C  NK
 Kv{4888  4 :D*!=!=z****5<(( 	dO44 T$%Rd4jj%R%RSSSqvvdn5566669%% 	dO44 T$%Rd4jj%R%RSSS>Q&&$%JKKK##A&&!++"":....""1?#7#7#:#:;;;;M4MMQMMM   5[5*555$J$J7H1I1I$J$J$Jv{4888 0 04"$:D*!=!=s..HH5<(( 	$Q.?@@HH9%% 	(,=>>HHM4MMQMMM   -0-
 -
 -
 	0 	0( %%h////	0
   "4   r   args.kwargsrv   r8   r<   c                    |i }d }|t          || d           }|t          ||d           }t          t          t          |                     t          t          |                    |          }t	          |          }t          |||          }t	          |          |k     rTt	          |          }t          t          t          |                     t          t          |                    |          }t	          |          t	          |          k    r/t          dt	          |           dt	          |                     d |D             }	|	|fS )	a  
    Given a sequence of args and kwargs, split them into a number of chunks
    according to  their respective chunking specs.

    Args:
        args: Tuple of args
        kwargs: Dict of kwargs
        chunks: Number of chunks to split the args and kwargs into
        args_chunk_spec: chunking specs for args, in same shape as args
        kwargs_chunk_spec: chunking specs for kwargs, in same shape as kwargs

    Returns:
        args_split: List of sharded args
        kwargs_split: List of sharded kwargs
    Nc                     t          | t          j        t          z            rt	          t
                    S t                      S r   )r   rM   r   r	   r   DEFAULT_CHUNK_DIMrE   r   s    r   default_specz3split_args_kwargs_into_chunks.<locals>.default_spec  s4    a	122 	 "#4555<<r   c                 ,    t          | t                    S r   r   r   s    r   r6   z/split_args_kwargs_into_chunks.<locals>.<lambda>  s    *Q	2J2J r   r   c                 ,    t          | t                    S r   r   r   s    r   r6   z/split_args_kwargs_into_chunks.<locals>.<lambda>  s    Jq)4L4L r   z;args and kwargs are split into different number of chunks: z, c           
      z    g | ]7t          fd t          t                              D                       8S )c              3   (   K   | ]}|         V  d S r   r   )r   i
chunk_argss     r   	<genexpr>z;split_args_kwargs_into_chunks.<locals>.<listcomp>.<genexpr>  s'      <<jm<<<<<<r   )rA   ra   r   )r   r   s    @r   r   z1split_args_kwargs_into_chunks.<locals>.<listcomp>  sT        	<<<<U3z??%;%;<<<<<  r   )r   r   rB   	enumerater   RuntimeError)
r   r   rv   r8   r<   r   args_split_dictreal_num_chunkskwargs_split
args_splits
             r   r   r   _  s   p ~      "$(J(J
 
 
  $&*L*L
 
 
 *Yt__Y''(( O
 /**O& L <?** l++-4!!?++,,
 
 ?s<0000;?##; ;'*<'8'8; ;
 
 	

 )  J
 |##r   c                 	   |t          |          \  }}n=t          | d                   \  }}t          t                    gt          |          z  }g | D ]^}t          |          \  }}t          |          t          |          k    rt	          d| d|                               |           _g }t          |          D ]\  t          t                    rfdt          t                              D             }	t          rQ|	d         j
        }
|	dd         D ]'}|j
        |
k    st          d|
 d|j
                   (t          j        t          j        |
d	d
it          |	          j                  }g }d}t          |	          t          |          k    s/t          dt          |	           dt          |                     t!          |	|d          D ]s\  }}||                    j                  z   }t%          ddd          g|j        z  }t%          ||          |j        <   ||         }|                    |           |}tn|	}d |D             }t)          |          rt+          |          st          d          |d         j        t          |dd         d          D ]-\  }}|j        k    rt          d| d d|j                   .t/          fdft1          fdt          t          |                    D                                 }|                     ||            |                    t          j        |j                             t          t4                    r_j        }t          t                              D ]$}                    ||                            }%|                    |           3d                  }t          dt                              D ]5}|                  |k    s!t          d| d|                             6|                    |           t;          ||          S )z
    Given a list of chunks, merge them into a single value according to
    the chunk spec.

    Args:
        chunks: list of chunks
        chunk_spec: Chunking spec for the chunks

    Returns:
        value: Merged value
    Nr   zChunk z did not match chunk spec c                 ,    g | ]}|                  S r   r   )r   rk   arg_idxchunks_flatteneds     r   r   z merge_chunks.<locals>.<listcomp>  s3        !+G4  r   rJ   zExpected shape z, got devicemeta)sectionsr5   z6Expected len(partial_values) == len(meta_chunks), got z != Tr   c                 8    g | ]}t          |t                    S r   )r   r   )r   r   s     r   r   z merge_chunks.<locals>.<listcomp>C  s"    KKKZ733KKKr   zRmerge_chunks: expected all values to be DTensors or none to be DTensors, got a mixz*merge_chunks: placement mismatch at chunk z: expected c                  :    t          j        | j                  S )Nr4   )rM   catr&   )rv   args    r   r6   zmerge_chunks.<locals>.<lambda>S  s    EIf#-$H$H$H r   c              3      K   | ]}V  d S r   r   )r   r   r   s     r   r   zmerge_chunks.<locals>.<genexpr>U  s#      'V'Vq
'V'V'V'V'V'Vr   rr   r4   z	Expected )r
   r   r   r   r   rb   r   r   ra   r   shaper_   rM   r`   emptyr&   r   r^   ry   rz   anyallr   r   rA   r   r   r   r   r   )rv   
chunk_specspec_flattenedflatten_specchunk0_flatr|   chunk_flattenedr   args_flattenedpartial_valuesoverall_shapevalmeta_chunksvalues_to_catchunk_start_idxpartial_value
meta_chunkchunk_end_idxslice_indicessliceddtensor_flagsr   r   cat_fnreduced_valrk   valuer   r   r   r   s                              @@@@r   r   r     s   Z '3J'?'?$ %1$;$;!\)*;<<=K@P@PP  1 1)%003~#6#666SeSSzSSTTT0000
 N!.11 T) T)c?++ S	)    !&s+;'<'<!=!=  N
 ' "/ .q 1 7)!""-  C955,NmNN39NN   6 $0K>v>> 00   !#"#>**c+.>.>>>(|QTUcQdQd||jmnyjzjz||   25"K2 2 2 
4 
4-M: %4joocm6T6T$TM%*4t%<%<$=@R$RM38-3X3XM#-0*=9F!((000&3OO
4 !/ LK]KKKM=!! S=)) (9  
 +1-8
%mABB&7;;  DAq|z11,I I I(2I I:;,I I   2
 #HHHH$.="''V'V'V'VE#mBTBT<U<U'V'V'V"V"V  
 %%ffm&<====%%ei3=&Q&Q&QRRRR^,, 	).K"3'7#8#899  	!mm!1)!<W!E  !!+....$Q'0E"1c*:&;&;<<  	'	27;uDD(WEWW1A)1LW1UWW   E !!%(((( .,777r   )NN)+loggingoperatorcollections.abcr   typingr   rM   torch.distributed.tensorr   %torch.distributed.tensor.experimentalr   torch.fx.noder   !torch.nn.attention.flex_attentionr	   torch.utils._pytreer
   r   r   __all__	getLoggerr   loggerr   r   r    rm   addsum_reducerr   r   rE   r>   r   rl   r   r   r   rA   rB   rC   r   r   r   r   r   <module>r      s     $ $ $ $ $ $        , , , , , , ; ; ; ; ; ; ' ' ' ' ' ' 7 7 7 7 7 7 F F F F F F F F F F   
	8	$	$
   # # # # # # # #$	 	 	 	 	> 	 	 	 l<5<,,hl;;  5! 5! 5! 5! 5! 5! 5! 5!r	 	 	 	 	 	 	 	>>> 
)_> > > >BI<LI<
I< I< el	I< I< I< I<XU U Ux ;?;?p$ p$
S/p$cNT!p$ p$ ?C/047	p$
 C01D8p$ 4;T
"#p$ p$ p$ p$f[8I[8 [8 [8 [8 [8 [8r   