
    Αi{2                         S SK rS SKrS SKJr  S SKJr  SSKJrJ	r	J
r
   SS jrS rS rS	 rS
 rS rS rS r\
" S5       " S S\5      5       rg)    N)core)unique_name   )PassBasePassTyperegister_passc                    [        U 5      n/ nUc  S nSn XS:  a*  U" X   5      (       d  US-  nXS:  a  U" X   5      (       d  M  US-   nXc:  aN  U" X   5      (       a?  U" X   X   5      (       a-  US-  nXc:  a#  U" X   5      (       a  U" X   X   5      (       a  M-  XS:  a  Xc::  a  UR                  XV45        US-   nXS:  a   U$ M  )Nc                     gNT )ref_opnew_ops     i/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/distributed/passes/fuse_all_reduce.py<lambda>/find_adjacent_match_sequences.<locals>.<lambda>   s    d    r   r   )lenappend)iterablefilter_funcadjacent_filter_funcnmatch_sequencesijs          r   find_adjacent_match_sequencesr      s     	HAO#:	A
eK44FA eK44EEHK(($X[(+>>FA	 EHK(($X[(+>> 5QV""A6*E6 r   c                 4   U R                  [        R                  " SUS    35      US9nUS   (       GdQ  US   n[        U5      n[	        U5       GH  u  pU
S-
  nUS:  aX  U R
                  U   R                  S:X  a;  UR                  U5        US-  nUS:  a  U R
                  U   R                  S:X  a  M;  U	S:  d  Mq  U
S-   n[        U R
                  5      nX:  d  M  U R
                  U   R                  S:X  d  M  U R
                  U   R                  S5      U:X  d   eUR                  U5        X:  d  M  U R
                  U   R                  S:X  a  M[  GM     [        [        U5      5      nUR                  S	S
9  UnUS   S-   n[        R                  R                  5       n/ n/ nU HI  nU R                  U5      R                   nUR#                  U5        UR                  [        U5      5        MK     SSU0UUS.SS	SUSUSUXU   0S.nUS   (       d  U R%                  USSU0SXoX_   0S9  US-  n[&        R(                  R*                  R,                  US'   U R%                  USSU0SU0US9  U H  n
U R/                  U
5        M     U$ )NFusedOutput_r   )namedtypeuse_calc_streamring_idr   c_sync_calc_streamc_sync_comm_streamT)reversecoalesce_tensorInput)OutputFusedOutput	use_alignr    concated_shapesconcated_ranks)typeinputsoutputsattrsXOut)r-   r.   r/   reduce_type
all_reducexout)
create_varr   generatelist	enumerateopsr-   r   r   attrsetsortr   op_proto_and_checker_makerkOpRoleAttrName_find_var_recursiveshapeextend_insert_op_without_syncpaddledistributedReduceOpSUM
_remove_op)blockreversed_op_indicesinput_var_namesoutput_var_namesr    r0   	fused_varr"   new_op_indicesr   op_idxprev_op_idxnext_op_idxr   
insert_idxop_role_keyr+   r,   var_namerB   coalesce_tensor_op_kwargss                        r   insert_fuse_all_reduce_opsrW   2   s      !!L1C0D"EF ! I "##	"12"#67IA 1*Kq IIk*//3GG%%k2q 	 q IIk*//3GG
 1u$qj		NO		+.337KK 99[166yAWLLL"))+6	  O		+.337KKK 8& c.12D),$Q'!+J11AACKON$))(399u%c%j) % "_
 '$

 Un{+
!$ "#%%%#IE4FG	 	& 	
 	a
 "--66::E-	!!Y	" "  &  & %$r   c                 b    U H)  nU R                  U5      UR                  U5      :w  d  M)    g   g)NFT)r<   )op1op2
attr_names	attr_names       r   has_same_attrsr]      s.    	88I#((9"55   r   c                     1 Skn/ n[        U R                  5       H(  u  p4UR                  U;   d  M  UR                  U5        M*     U$ )N>   	broadcast
all_gatherr4   c_broadcast)r:   r;   r-   r   )rJ   all_collective_opsmatch_op_indicesr   ops        r    filter_all_collective_op_indicesre      sJ     599%77((##A& & r   c                   ^ ^
 [        T 5      nU Vs/ s H  nT R                  U   PM     nnU 4S jnSS[        R                  R	                  5       [        R                  R                  5       /m
U U
4S jn[        X4U5      n/ nU H2  u  p(UR                  [        X(5       V	s/ s H  oU	   PM	     sn	5        M4     U$ s  snf s  sn	f )Nc                   > U R                   S:w  d  U R                  S5      (       a  gU R                  S5      S   nU R                  S5      S   nX:w  a  gTR	                  U5      nUc   eUR                   [
        R                  R                  R                  :w  a  gUR                  n[        S U 5       5      (       a  gg)	Nc_allreduce_sumuse_model_parallelFr1   r   r2   c              3   *   #    U  H	  oS :*  v   M     g7f)r   Nr   ).0ss     r   	<genexpr>Qfind_all_fuse_all_reduce_groups.<locals>.is_valid_allreduce_op.<locals>.<genexpr>   s     %u!Avus   T)r-   r<   inputoutputrA   r   VarDescVarTypeDENSE_TENSORrB   any)rd   in_var_nameout_var_namein_varrB   rJ   s        r   is_valid_allreduce_op>find_all_fuse_all_reduce_groups.<locals>.is_valid_allreduce_op   s    77''2773G+H+HhhsmA&yy'*&**;7!!!;;$,,..;;;%u%%%r   r"   r!   c                    > [        XT5      (       d  gTR                  U R                  S5      S   5      nTR                  UR                  S5      S   5      nUR                  UR                  :w  a  gg)NFr1   r   T)r]   rA   ro   r    )r   r   ref_op_in_varnew_op_in_varrJ   same_attr_namess       r   is_same_adjacent_op<find_all_fuse_all_reduce_groups.<locals>.is_same_adjacent_op   si    fo>>11&,,s2CA2FG11&,,s2CA2FG-"5"55r   )	re   r;   r   r?   r@   kOpDeviceAttrNamer   r   range)rJ   collective_op_indicesr   collective_opsrx   r~   
match_seqsnew_match_seqsr   kr}   s   `         @r   find_all_fuse_all_reduce_groupsr      s    <UC,AB,Aqeiil,ANB" 	''779''99;	O //BJ NqMAQ7MN Q CN Ns   C %C
c                 L  ^ / mU4S jnU H  n[        U5      nUS:  d   eUS:X  a  M  SnUS-   nXu:  aa  Sn[        Xg5       HD  n	X$U	      XG      n
U
[        R                  R                  R
                  :X  a  M9  U" XFU5        Un  O   US-  nXu:  a  Ma  U" XFU5        M     T$ )Nc                 >   > X!-
  S:  a  TR                  XU 5        g g )Nr   )r   )
op_indices	start_idxend_idx
new_groupss      r   insert_new_group>split_fuse_all_reduce_groups_by_deps.<locals>.insert_new_group   s&    "j7;< #r   r   r   F)r   r   r   NodeDepNoDep)rJ   groupsop_depsr   r   r   r   r   found_groupprev_idxdepr   s              @r   $split_fuse_all_reduce_groups_by_depsr      s    J= 

O1uu6	MeK!)/23JMB$))----- :	 0 FA e 	2) , r   c                 @   U(       d  g 0 n[        U R                  5       HE  u  p4UR                   H  nXR;  d  M
  US/X%'   M     UR                   H  nXR;  d  M
  US/X%'   M     MG     [	        U R                  5      n/ n[        U5       Ha  u  pU	S   S   U	S   S   -   n
UnSnU
 H(  nXR;  a  SnSn  OX%   u  pU(       a  Sn[        X5      nM*     XS   S	'   UR                  X45        Mc     UR                  S
 SS9  U H  u  p9U R                  " U40 U	D6  M     g )NTFr.   r'   r/   r(   r   r0   	copy_datac                     U S   $ )Nr   r   )elements    r   r   ,insert_coalesce_tensor_ops.<locals>.<lambda>  s    71:r   )keyr%   )	r:   r;   input_arg_namesoutput_arg_namesr   minr   r>   rD   )rJ   coalesce_ops_kwargs	var_infosidxrd   varr   insert_idx_and_kwargs	group_idxkwargsall_vars
min_op_idxr   min_idxrP   is_inputs                   r   insert_coalesce_tensor_opsr      sC   IUYY'%%C#"%t	 & &&C#"%u	 ' ( 	EIIA&':;	(#G,vi/@/JJ
	C# 	(~F 	Z0J  (1w$$$j%9: <  #=tL,%%c4V4 -r   c                    [         R                  R                  5       n[         R                  R                  5       n[         R                  R	                  5       n/ n[        U5       GH9  nU R                  US      nUR                  S5      n	UR                  S5      n
UR                  S5      nUR                  U5      nUR                  U5      nSU	SU
SUX<X]0nU R                  UR                  S5      S   5      R                  n[         R                  " U5      nSn/ n/ n/ n/ n[        U5       GH0  nU R                  U   nUR                  S5      S   nUR                  S5      S   nU R                  U5      n[        [        R                  " UR                   5      5      U-  nUU-   U:  a>  [#        U5      S:  a%  UX'   [%        U UUUUU5      nUR'                  U5        Sn/ n/ n/ n/ nUU-  nUR'                  U5        UR'                  U5        UR'                  U5        UR)                  U5      (       d  GM  UR+                  UR                  U5      5        GM3     [#        U5      S:  d  GM  UX'   [%        U UUUUU5      nUR'                  U5        GM<     U R-                  5         [/        X5        g )Nr   r"   r!   ri   r1   r2   r   )r   r?   r@   kOpRoleVarAttrNamer   reversedr;   r<   rA   ro   r    size_of_dtyperp   intnpprodrB   r   rW   r   has_attrrC   _sync_with_cppr   )rJ   r   max_memory_sizerT   op_role_var_keyop_device_keyr   groupfirst_opr"   r!   ri   op_role	op_devicer0   r    sizeofcur_mem_sizeop_role_varsrecorded_op_indicesin_var_namesout_var_namesrP   rd   ru   rv   rw   mem_sizecoalesce_op_kwargss                                r   %insert_fuse_all_reduce_by_memory_sizer     s   11AACK55HHJO33EEGM&!99U1X&--	*"--(9:%]]+?@--,MM-0	 w "4
 ))(..*=a*@AGG##E* uoF6"B((3-*K99U+A.L..{;F2776<<01F:Hh&8*+a/-9E*)C+$%*& (../AB !&(#! "H$L&&v.,  .{{?++##BGGO$<== &@ "#a'%1E"!;#"  &&'9:C "D 
u:r   fuse_all_reducec                   @   ^  \ rS rSrU 4S jrS rS rS rS rSr	U =r
$ )FuseAllReducePassia  c                 F   > [         TU ]  5         U R                  SS5        g )Nr   )super__init__set_attr)self	__class__s    r   r   FuseAllReducePass.__init__c  s    ',r   c                 .    U R                  S5      nUS:  $ )Nr   r   )get_attr)r   r   s     r   _check_selfFuseAllReducePass._check_selfg  s    --(9:""r   c                     gr   r   )r   
other_passs     r   _check_conflict!FuseAllReducePass._check_conflictk  s    r   c                 "    [         R                  $ N)r   COMM_OPT)r   s    r   _typeFuseAllReducePass._typen  s       r   c                 "   U R                  S5      nUR                  R                  5       nUR                  n[	        U5       H9  nUR                  U5      n[        U5      n	[        XXW   5      n	[        XU5        M;     UR                  5         g )Nr   )
r   descget_op_deps
num_blocksr   rJ   r   r   r   r   )
r   main_programstartup_programcontextr   r   r   r   rJ   r   s
             r   _apply_single_impl$FuseAllReducePass._apply_single_implv  s    --(9:##//1!,,
z"A &&q)E4U;F9wzF 2 # 	##%r   r   )__name__
__module____qualname____firstlineno__r   r   r   r   r   __static_attributes____classcell__)r   s   @r   r   r   a  s!    -#!& &r   r   r   )numpyr   rE   paddle.frameworkr   paddle.utilsr   	pass_baser   r   r   r   rW   r]   re   r   r   r   r   r   r   r   r   <module>r      sn      ! $ 8 8 154X%v *Z@"5JH;V  !!& !& "!&r   