
    x-j{2                         d dl Zd dlZd dlmZ d dlmZ ddlmZm	Z	m
Z
 	 ddZd Zd Zd	 Zd
 Zd Zd Zd Z e
d           G d de                      ZdS )    N)core)unique_name   )PassBasePassTyperegister_passc                    t          |           }g }|d }d}	 ||k     r- || |                   s|dz  }||k     r || |                   |dz   }||k     r] || |                   rL || |         | |                   r4|dz  }||k     r) || |                   r || |         | |                   4||k     r||k    r|                    ||f           |dz   }||k    rn|S )Nc                     dS NT )ref_opnew_ops     i/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/paddle/distributed/passes/fuse_all_reduce.py<lambda>z/find_adjacent_match_sequences.<locals>.<lambda>   s    d     r   Tr   )lenappend)iterablefilter_funcadjacent_filter_funcnmatch_sequencesijs          r   find_adjacent_match_sequencesr      sI    	HAO#::	A!eeKK44eFA !eeKK44eEEEHQK(( $$Xa[(1+>>  FA	 EEHQK(( $$Xa[(1+>> 
 q55Q!VV""Aq6***E66 r   c                 R   |                      t          j        d|d                    |          }|d         sI|d         }t          |          }t	          |          D ]\  }	}
|
dz
  }|dk    rL| j        |         j        dk    r6|                    |           |dz  }|dk    r| j        |         j        dk    6|	dk    r|
dz   }t          | j                  }||k     rm| j        |         j        dk    rW| j        |         	                    d          |k    sJ |                    |           ||k     r| j        |         j        dk    Wt          t          |                    }|                    d	
           |}|d         dz   }t          j                                        }g }g }|D ]S}|                     |          j        }|                    |           |                    t          |                     Tdd|i||ddd	d|d|d||||         id}|d         s*|                     |dd|id||||         i           |dz  }t&          j        j        j        |d<   |                     |dd|id|i|           |D ]}
|                     |
           |S )NFusedOutput_r   )namedtypeuse_calc_streamring_idr   c_sync_calc_streamc_sync_comm_streamT)reversecoalesce_tensorInput)OutputFusedOutput	use_alignr   concated_shapesconcated_ranks)typeinputsoutputsattrsXOut)r,   r-   r.   reduce_type
all_reducexout)
create_varr   generatelist	enumerateopsr,   r   r   attrsetsortr   op_proto_and_checker_makerkOpRoleAttrName_find_var_recursiveshapeextend_insert_op_without_syncpaddledistributedReduceOpSUM
_remove_op)blockreversed_op_indicesinput_var_namesoutput_var_namesr   r/   	fused_varr!   new_op_indicesr   op_idxprev_op_idxnext_op_idxr   
insert_idxop_role_keyr*   r+   var_namerA   coalesce_tensor_op_kwargss                        r   insert_fuse_all_reduce_opsrV   2   sb      !"E1C"E"EFF !  I "# -	"122"#677 	7 	7IAv 1*Kq  Ik*/3GGG%%k222q 	 q  Ik*/3GGG
 1uu$qj	NN!OO	+.37KKK 9[166yAAWLLLL"))+666	  !OO	+.37KKK
 c.1122D))),$Q'!+J1AACCKON$ * *))(339u%%%c%jj)))) "_
 '$
 

 Un{+
! !$ "# %%%#I{E+4FG	 	& 	
 	
 	
 	a
 "-6:E-	!!Y	" "    & ! !    $$r   c                 n    |D ]1}|                      |          |                     |          k    r dS 2dS )NFT)r;   )op1op2
attr_names	attr_names       r   has_same_attrsr\      sE      	88I#((9"5"55555 64r   c                     h d}g }t          | j                  D ]#\  }}|j        |v r|                    |           $|S )N>   	broadcast
all_gatherr3   c_broadcast)r9   r:   r,   r   )rI   all_collective_opsmatch_op_indicesr   ops        r    filter_all_collective_op_indicesrd      sb       59%% ' '27(((##A&&&r   c                 l   	 t                      fdD             } fd}ddt          j                                        t          j                                        g	 	fd}t          |||          }g }|D ]4\  }}|                    fdt          ||          D                        5|S )Nc                 *    g | ]}j         |         S r   )r:   ).0r   rI   s     r   
<listcomp>z3find_all_fuse_all_reduce_groups.<locals>.<listcomp>   s    BBBqeilBBBr   c                    | j         dk    s|                     d          rdS |                     d          d         }|                     d          d         }||k    rdS                     |          }|J |j         t
          j        j        j        k    rdS |j	        }t          d |D                       rdS dS )	Nc_allreduce_sumuse_model_parallelFr0   r   r1   c              3   "   K   | ]
}|d k    V  dS )r   Nr   )rg   ss     r   	<genexpr>zQfind_all_fuse_all_reduce_groups.<locals>.is_valid_allreduce_op.<locals>.<genexpr>   s&      %%!qAv%%%%%%r   T)r,   r;   inputoutputr@   r   VarDescVarTypeDENSE_TENSORrA   any)rc   in_var_nameout_var_namein_varrA   rI   s        r   is_valid_allreduce_opz>find_all_fuse_all_reduce_groups.<locals>.is_valid_allreduce_op   s    7'''2773G+H+H'5hhsmmA&yy''*,&&5**;77!!!;$,.;;;5%%u%%%%% 	5tr   r!   r    c                 
   t          | |          sdS                     |                     d          d                   }                    |                    d          d                   }|j        |j        k    rdS dS )NFr0   r   T)r\   r@   ro   r   )r   r   ref_op_in_varnew_op_in_varrI   same_attr_namess       r   is_same_adjacent_opz<find_all_fuse_all_reduce_groups.<locals>.is_same_adjacent_op   s    ffo>> 	511&,,s2C2CA2FGG11&,,s2C2CA2FGG-"5555tr   c                      g | ]
}|         S r   r   )rg   kcollective_op_indicess     r   rh   z3find_all_fuse_all_reduce_groups.<locals>.<listcomp>   s    MMMA4Q7MMMr   )rd   r   r>   r?   kOpDeviceAttrNamer   r   range)
rI   collective_opsrx   r}   
match_seqsnew_match_seqsr   r   r   r|   s
   `       @@r   find_all_fuse_all_reduce_groupsr      s   <UCCBBBB,ABBBN    " 	'7799'99;;	O      /-/B J N O O1MMMMq!MMMNNNNr   c                 X   g fd}|D ]}t          |          }|dk    sJ |dk    r d}|dz   }||k     rdd}t          ||          D ]F}	|||	                  ||                  }
|
t          j        j        j        k    r7 ||||           |} |dz  }||k     d ||||           S )Nc                 X    ||z
  dk    r                     | ||                    d S d S )Nr   )r   )
op_indices	start_idxend_idx
new_groupss      r   insert_new_groupz>split_fuse_all_reduce_groups_by_deps.<locals>.insert_new_group   s?    Y""j7):;<<<<< #"r   r   r   F)r   r   r   NodeDepNoDep)rI   groupsop_depsr   r   r   r   r   found_groupprev_idxdepr   s              @r   $split_fuse_all_reduce_groups_by_depsr      s	   J= = = = =  3 3

OO1uuuu66	M!eeK!)Q//  j23JqMB$)----  Y:::	FA !ee 	Y2222r   c                 .   |sd S i }t          | j                  D ]/\  }}|j        D ]}||vr|dg||<   |j        D ]}||vr|dg||<   0t	          | j                  }g }t          |          D ]v\  }}	|	d         d         |	d         d         z   }
|}d}|
D ]+}||vrd}d} n ||         \  }}|rd}t          ||          },||	d         d	<   |                    ||	f           w|                    d
 d           |D ]\  }}	 | j        |fi |	 d S )NTFr-   r&   r.   r'   r   r/   	copy_datac                     | d         S )Nr   r   )elements    r   r   z,insert_coalesce_tensor_ops.<locals>.<lambda>  s
    71: r   )keyr$   )	r9   r:   input_arg_namesoutput_arg_namesr   minr   r=   rC   )rI   coalesce_ops_kwargs	var_infosidxrc   varr   insert_idx_and_kwargs	group_idxkwargsall_vars
min_op_idxr   min_idxrO   is_inputs                   r   insert_coalesce_tensor_opsr      s    IUY'' . .R% 	- 	-C)##"%t	#& 	. 	.C)##"%u	#	. 	EIA&':;; ; ;	6(#G,vi/@/JJ
	 	1 	1C)## 	(~FH ! 	Z00JJ'0w$$$j&%9::::#=#=tLLL, 5 5V%%c44V44445 5r   c                 6   t           j                                        }t           j                                        }t           j                                        }g }t          |          D ]}| j        |d                  }|                    d          }	|                    d          }
|                    d          }|                    |          }|                    |          }d|	d|
d|||||i}|                     |	                    d          d                   j
        }t          j        |          }d}g }g }g }g }t          |          D ]Y}| j        |         }|	                    d          d         }|                    d          d         }|                     |          }t          t          j        |j                            |z  }||z   |k    rKt#          |          dk    r.|||<   t%          | |||||          }|                    |           d}g }g }g }g }||z  }|                    |           |                    |           |                    |           |                    |          r(|                    |                    |                     [t#          |          dk    r.|||<   t%          | |||||          }|                    |           |                                  t/          | |           d S )Nr   r!   r    rk   r0   r1   r   )r   r>   r?   kOpRoleVarAttrNamer   reversedr:   r;   r@   ro   r   size_of_dtyperp   intnpprodrA   r   rV   r   has_attrrB   _sync_with_cppr   )rI   r   max_memory_sizerS   op_role_var_keyop_device_keyr   groupfirst_opr!   r    rk   op_role	op_devicer/   r   sizeofcur_mem_sizeop_role_varsrecorded_op_indicesin_var_namesout_var_namesrO   rc   ru   rv   rw   mem_sizecoalesce_op_kwargss                                r   %insert_fuse_all_reduce_by_memory_sizer     s4   1AACCK5HHJJO3EEGGM&!! A; A;9U1X&--	**"--(9::%]]+?@@--,,MM-00	 w "49
 ))(..*=*=a*@AAG#E** uoo 	> 	>F6"B((3--*K99U++A.L..{;;F276<0011F:Hh&88*++a//-9E/*)C+$%* *& (../ABBB !&(#! "H$L&&v...,,,  ...{{?++ >##BGGO$<$<==="##a''%1E/"!;#" "  &&'9:::	u&9:::::r   fuse_all_reducec                   6     e Zd Z fdZd Zd Zd Zd Z xZS )FuseAllReducePassc                 t    t                                                       |                     dd           d S )Nr   )super__init__set_attr)self	__class__s    r   r   zFuseAllReducePass.__init__c  s4    ',,,,,r   c                 8    |                      d          }|dk    S )Nr   r   )get_attr)r   r   s     r   _check_selfzFuseAllReducePass._check_selfg  s    --(9::""r   c                     dS r   r   )r   
other_passs     r   _check_conflictz!FuseAllReducePass._check_conflictk  s    tr   c                     t           j        S N)r   COMM_OPT)r   s    r   _typezFuseAllReducePass._typen  s      r   c                 T   |                      d          }|j                                        }|j        }t	          |          D ]N}|                    |          }t          |          }	t          ||	||                   }	t          ||	|           O|	                                 d S )Nr   )
r   descget_op_deps
num_blocksr   rI   r   r   r   r   )
r   main_programstartup_programcontextr   r   r   r   rI   r   s
             r   _apply_single_implz$FuseAllReducePass._apply_single_implv  s    --(9::#//11!,
z"" 	 	A &&q))E4U;;F9vwqz F 2v    	##%%%%%r   )	__name__
__module____qualname__r   r   r   r   r   __classcell__)r   s   @r   r   r   a  st        - - - - -# # #  ! ! !& & & & & & &r   r   r   )numpyr   rD   paddle.frameworkr   paddle.utilsr   	pass_baser   r   r   r   rV   r\   rd   r   r   r   r   r   r   r   r   <module>r      s7        ! ! ! ! ! ! $ $ $ $ $ $ 8 8 8 8 8 8 8 8 8 8 15   4X% X% X%v     * * *Z  @"5 "5 "5JH; H; H;V  !!!& !& !& !& !& !& !& "!!& !& !&r   