
    ΑiG                   r   S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKrS SKrSSK	J
r
  / SQr\R                  " S5      rS$S jr S%S jrS%S	 jrS
 rS rS rS rS rS rS rS rS rS rS rS$S jrS r S& S'S jjr S( S'S jjr S)S jr S*S jr!S r"   S+ S,S jjr#S r$S r%S r&S  r'S! r(S" r)S# r*g)-    )annotationsN   )_PRUNE_FUNC)full	full_attn	core_attn
auto_tunerc                    U S:X  a  U /$ [        5       nSnU S-  S-   nX4:  a8  X-  S:X  a$  UR                  U5        UR                  X-  5        US-  nX4:  a  M8  [        U5      n[        X!S9$ )z'Return the divisor of the given number.r      r   reverse)setaddlistsorted)numr   resultsimids        c/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/distributed/auto_tuner/utils.pydivisorr       sv    
axueG	A
(Q,C
'7a<KKNKK!	Q	 '
 7mG'++    c                X    [        XX$5      nU Vs/ s H  ofU;   d  M
  UPM     sn$ s  snf )zZReturn the degree of different parallel modes by gpus and nodes num with customized range.)dist_degree)modenum_gpus	num_nodescustomized_range	tuner_cfgdist_degree_alldegrees          r   !dist_degree_with_customized_ranger"   0   s.     "$)GO!0Ov>N4NFOOOs   	''c                |   U S;   d   e/ n/ nU S:X  a/  UR                  SS5      S:w  a  [        USS9nU$ [        USS9n U$ U S	:X  a  US
:  a1  UR                  SS5      (       a  [        [        US
-   SS5      5      nO
[        USS9nU HE  nSnUS   R                  SS5      nU(       a
  X-  S:w  a  SnU(       a  M4  UR	                  U5        MG     UnU$ U S:X  GaL  UR                  SS5      (       a0  X-  n	UR                  SS5      S:w  a  [        U	SS9nO6[        U	SS9nO+UR                  SS5      S:w  a  [        USS9nO
[        USS9nU H  n
SnUS   R                  SS5      nUS   R                  SS5      nUS   R                  SS5      nUS   R                  SS5      nUR                  SS5      nU(       a
  X-  S:w  a  SnU(       a
  X-  S:w  a  SnU(       a
  X-  S:w  a  SnU(       a  X-  S:w  a	  U(       a  SnU(       a  M  UR	                  U
5        M     UnU$ U S:X  a  [        USS9nU$ U S:X  a;  UR                  SS5      S:w  a  [        US   S   SS9nU$ [        US   S   SS9n U$ U S:X  a8  UR                  SS5      S:w  a  [        US   S   SS9nU$ [        US   S   SS9nU$ )zDReturn the degree of different parallel modes by gpus and nodes num.)	dp_degree	mp_degree	pp_degreesharding_degreemicro_batch_size
vpp_degreer$   schedule_modememoryperformanceFr   Tr&   r   enable_pp_pruner   	model_cfg
num_layersNr%   enable_mp_prunehidden_size
vocab_sizenum_attention_heads
seq_lengthuse_sequence_parallelr'   r(   global_batch_sizer)   )getr   r   rangeappend)r   r   r   r   r   prune_resultsr&   
prune_flagr0   gpus_per_noder%   r2   r3   r4   r5   r6   s                   r   r   r   8   s]        GM{==(3}Dh6Gr No h5Gn Nk 
	q=Y]]+<dCC5Q267Gh5G IJ";/33L$GJ)Q.!%J:$$Y/ !  L NI 
	==*D11$1M}}_h7=H!->!-?}}_h7=H!(D9!(E: IJ#K044]DIK";/33L$GJ"+K"8"<"<%t# #;/33L$GJ$-MM'%! {6!;!
j49!
"':'F!'K!
 *a/)!
:$$Y/; !<  2 N/ 
"	"(D1, N) 
#	#==(3}D+&':;UG$ N +&':;TG N 
	==(3}D+&|4eG N	 +&|4dG Nr   c                T   0 nSnSU ;   a  SU S   ;   a  U S   S   nUc  U S   OUnUc  U S   OX S   -  nUS:  d   eS H.  n[        U R                  US5      X55      n[        XSXFU 5      X'   M0     [        U R                  S	S5      U S
   S   S	5      n[        S	UUUU 5      US	'   [        U R                  SS5      U S
   S   S5      n[        SX4X5      US'   U R                  SS5      n	[        U R                  SS5      SS5      n
S Vs/ s H  oU
;   d  M
  UPM     snUS'   U	S:w  a  [        US   SS9US'   O[        US   SS9US'   U R                  SS5      n[	        U[
        5      (       a&  UR                  5       S:X  a  U	S:w  a  SS/OSS/US'   O[	        U[        5      (       a  U/US'   O[	        U[        5      (       ae  [        U5      S:X  a  S/US'   Od/ US'   U H+  nUS;  a  [        SU 35      eUS   R                  U5        M-     [        US   5      S:X  a  S/US'   OUc  S/US'   O[        S5      eU R                  SS5      n[	        U[
        5      (       a  UR                  5       S:X  a)  U	S:w  a  [        O[        [        [        5      5      US'   GO0UR                  5       [        ;   a  UR                  5       /US'   GO[        SSR                  [        5       SU 35      e[	        U[        5      (       a  [        U5      S:X  a  S/US'   O/ US'   U Ha  nUR                  5       [        ;  a$  [        SSR                  [        5       SU 35      eUS   R                  UR                  5       5        Mc     [        US   5      S:X  a  S/US'   O.Uc  S/US'   O$[        SSR                  [        5       SU 35      eU R                  S S5      nUb6  / US '   UR                  5        H  u  nnUS    R                  US!   5        M     U$ s  snf )"JReturn the default candidates of every hyper param which user defined autoNsearch_algoestimated_num_gpusr   nodesr=   r   )r$   r%   r&   r'   r)   r/   r0   r(   r7   r*   r+   sharding_stage   )rD   r   r   r,   Tr   Fuse_recomputeauto)TFz5use_recompute only supports auto/True/False, but got z&use_recompute supports auto/True/Falserecompute_granularityz)recompute_granularity only supports auto//
, but got custom_search_dimvalue)_param2ranger8   r"   r   
isinstancestrlowerboolr   len
ValueErrorr:   #__SUPPORTED_RECOMPUTE_GRANULARITY__reversedjoinitems)r   
candidatesrA   r   r   strategystrategy_customized_rangevpp_degree_customized_rangembs_customized_ranger*   sharding_stage_customized_rangestagerE   recompute_settingrG   granularityrJ   keyrK   s                      r   default_candidatesra      s;   J" Im$<<&}56JK % 	*  % 	'_#== 
 a<<N$0MM(D)8%
!  A	i 

	 O #/lD)+|,#
  A# J| (($/+23
 &GH1E&J!" MM/8<M&2&-q2B'# %$$%1P(P9$J  %'-'($(

#$ (.'(%(

#$ MM/48M-%%-*=*=*?6*I*m;T5M% 	?# 
M4	(	('4o
?#	M4	(	(}"+/&J'*,J'%2!$M9$OPaObc  /667HI &3 :o./14/3f
?+		'+f
?#ABB%MM*A4H'-- &&(F2 !M1 4(#FGH ./ "'')-PP &++-3J./ ;CHHEh<i;jjt  vK  uL  M  
)4	0	0$%*37&J./24J./4%%'>? %CCHHMpDqCrr|  ~I  }J  K  67>>#))+  5 :5671<7;f
23		&/3f
*+7Ad8e7ffp  rG  qH  I
 	
 "&94@$*,
&'+113JC*+225>B 4q$s   	P%P%c           
        U S   nUS   nUS   nUS   nUS   nUS   nUS   nUS   nUS	   n	US
   n
SU ;  d	  SU S   ;  a  U S   OU S   S   n/ nU GH  n/ nX-  S:w  a  M  UR                  U5        X-  nU H  nUU-  S:w  a  M  UR                  U5        UU-  nU H  nUU-  S:w  a  M  UR                  U5        UU-  nU Hb  nUU:w  a  M  UR                  U5        [        U5      S:X  d   eUR                  [        R                  " U5      5        UR	                  5         Md     UR	                  5         M     UR	                  5         M     GM     [        [        R                  " UUUU	U
5      5      nU R                  SS5      nUb_  US   n[        [        R                  " U6 5      nUn/ nU H4  nU H+  n[        U5      [        U5      -   nUR                  U5        M-     M6     / nU R                  SS5      nU GH  nU GH  n Uu  nnnn[        U SS 5      u  n!n"n#n$n%U S   S   U"U-  U-  -  S:w  a  M6  U S   S   UU#-  -  S:w  a  MJ  UGb>  US:X  d  U$(       a  U$(       a^  U%S:w  aX  [        U5      [        U 5      -   [        [        U5      5       V&s/ s H  n&SPM     sn&-   nUU;  a  UR                  U5        M  M  U S   S   U-  n'[        [        SU'S-   5      5      n([        U5      n)/ n*U( HM  n+U+/nUR                  [        U)S-
  5       V,s/ s H  n,SPM     sn,5        UU*;  d  M<  U*R                  U5        MO     Sn&U&U):  a  U( Hz  n+[        U&5       V,s/ s H  n,U'PM     nn,UR                  U+/5        UR                  [        U)U&-
  S-
  5       V,s/ s H  n,SPM     sn,5        UU*;  d  Mi  U*R                  U5        M|     U&S-  n&U&U):  a  M  U R                  S5      S:w  aH  U* H?  n-[        U5      [        U 5      -   [        U-5      -   nUU;  d  M.  UR                  U5        MA     GM5  U*R                  SS9  U* H?  n-[        U5      [        U 5      -   [        U-5      -   nUU;  d  M.  UR                  U5        MA     GM  [        U5      [        U 5      -   nUR                  U5        GM     GM     SSSSSSSS	S
S.	n.Ub(  UR                  5        H  u  n/n,U/U.[        U.5      '   M     Ub  U H  n0U0U.[        U.5      '   M     / n1U H3  n0 n2[        U5       H  u  n3n4U4U2U.U3   '   M     U1R                  U25        M5     [        U15      n5/ n6XS'   U1 HA  n7Sn8[         H  n9U9" U U7U65      n:U:(       d  M  Sn8  O   U8(       a  M0  U6R                  U75        MC     [        U65      n;[        R!                  U5U;-
   S35        U R                  S S5      (       a  [#        U6U 5      n6U6$ s  sn&f s  sn,f s  sn,f s  sn,f )!-Permutate the candidates of all hyper params.rW   r$   r%   r&   r)   r(   rC   r'   rE   rG   r@   rA   r   r      rJ   Nrefined_recompute   r/   r7   r0   r   r   r*   r,   Tr   )	r   r   r   rD   rd   rf            Fz# tasks are pruned before launching.schedule_prior)r:   rQ   copydeepcopypopr   	itertoolsproductr8   r9   extendsortrV   	enumerater   loggerinfosort_by_special)<r   rW   dp_degree_candidatesmp_degree_candidatespp_degree_candidatesvpp_degree_candidatesmbs_candidatessharding_stage_candidatessharding_degree_candidatesuse_recompute_candidates recompute_granularity_candidatesr   valid_degreesr%   degreessharding_resr'   pp_resr&   dp_resr$   other_dim_cfgsrJ   custom_search_dim_candidatescustom_dim_cfgsother_cfgs_without_cumtomcfg_without_cumtom
custom_cfgcfgall_cfgsre   valid_degreeother_dim_cfgrC   mbsvpprE   rG   r   	max_valuerr_valid_valuesop_countrr_dim_cfgsrK   _
rr_dim_cfgmappingr`   dimnew_all_cfgsnew_cfgidxvalsearch_space_size_before_prunepruned_all_cfgscur_cfgprunedfuncresultsearch_space_size_after_prunes<                                                               r   
search_allr   4  s   <(J%k2%k2%k2&|4 23N *+; <!+,=!>)/:'12I'J$ 	)y'?? 	* }%&:;	  M)	1$y!,9Oo-2NN?+!_4F1	I%*y)9,!5I* NN9-w<1,,,!((w)?@KKM "6  2 KKM)  : *: %!$,	
N "&94@$'12E'F$y002NOP$2!";-
-.j1AA%%c* . #<
 H!&94@%+M?K<I	9 ]2A&'% +&':;(946 %l3y3G1L , !^)%*?6*I \*}-.&+C0A,B&CD&C1&CDE 
 (* , + "+.|<	I  '+5IM+B&CO"#45H #%K!0$g

uX\/B#C/B!A/B#CDk1'..s3	 "1 Ah,%4E6;Ah"?h9hC"?JJw/JJ5A9I3J'K3Ja3J'KL"+5 + 2 23 7 &5 Q h, !}}_5F*5J $\ 2"&}"5!6"&z"2!3  
  #(2 ( 4 +6 $(((6*5J $\ 2"&}"5!6"&z"2!3  
  #(2 ( 4 +6 <(4+>>$a , &h "
G $'--/FC$'GCL! 0 $$C$'GCL! %L!#HC$'GGCL! 'G$	 
 &)%6"O$jD)Wo>Fv	  
 v""7+   %($8!
KK),II
JJmn }}%u--)/9EC E  $D #@'Ks    W9
W>)X
'Xc                n   UR                  SS5      (       d   eUS   nUR                  SS9  U H  nSnSnU[        U 5      :  aR  X   n[        Xc5      (       a(  U R	                  U5        U R                  SU5        US-  nUS-  nU[        U 5      :  a  MR  U S U nUR                  5         XpS U& M     U $ )Nrj   FTr   r   r   )r8   rq   rQ   _matchedrm   insertr   )cfgsr   prior_strategyrX   r   matched_countr   tmps           r   ru   ru     s    ==)51111/0N%"CIo)C&&As#"1HC CIo >M""^m # Kr   c                J    U S   * U S   * U S   * U S   * U S   * U S   U S   * 4$ )Nr%   r&   r)   r'   rC   r(   rE    r   s    r   memory_sortr     s[     
[		[		\						_	 r   c                    U S   * $ )Nr(   r   r   s    r   performance_sortr      s    "###r   c           
        SSSSSSSSS	S
.	nSSSS.n0 nU H	  nXTX%   '   M     [        U[        5      (       d   eUR                  S5      nSnU GH  nS n	U H  nUR                  U5      (       d  M  Un	  O   U	(       d  M2  U[	        U	5         n
U	S;   a9  U
S:X  a  XU	      S:  a  US-  nM^  M`  [        U
5      n
XU	      U
:X  a  US-  nM}  M  U	S:X  aC  U
S:X  a  XU	      (       a  US-  nM  M  [        [        U
5      5      n
XU	      U
:X  a  US-  nM  M  U	S:X  a;  U
S:X  a  XS      S:  a  US-  nM  M  [        U
5      n
XU	      U
:X  a  US-  nGM  GM	  U	S:X  a/  U
S:X  a  US-  nGM  [        U
5      n
XU	      U
:X  a  US-  nGM;  GM>  U	S	:X  d  GMG  U
S:X  a  XS      (       a  US-  nGMa  GMd  [        U
5      n
X:   nXU	      U:X  d  GM  US-  nGM     U[	        U5      :X  a  gg)Ndpmpppr   r   shardingr]   	recomputer_   )	r$   r%   r&   r)   r(   r'   rC   rE   rG   r   r   r   )r   r   r   r   r   )r   r   r   r   r   *r   rE   TF)rM   rN   split
startswithrQ   intrP   )r   rX   r   granularity_mappingreversed_mappingr`   dimshas_matchedr   matchedrK   r_   s               r   r   r   $  s   !%!$!.
G %E),&  h$$$$>>#DK#C~~c"" $ 7G%E??C<89A=#q(  >  JE89UB#q(  C K'C<89#q(  : !U,E89UB#q(  C G#C<
;<q@#q(  A  JE89UB#q(  C E!C<1$KJE89UB#q(  C M)C<@A#q(  B  JE"5"<K89[H#q( u v c$ir   c                   Sn[        U [        5      (       a=  SU R                  5       ;   a  [        [	        SUS-   5      5      nU$ [        SU S35      e[        U [        5      (       aZ  U R                  SS5      nU R                  SS5      nU(       a  U(       d  [        SU S35      e[        [	        XES-   5      5      nU$ [        U [        5      (       a  U nU$ [        U [        5      (       a  U /nU$ U c  S/nU$ [        SU S	35      e)
z3Convert a param from json file to candidates range.NrF   r   zIllegal param found: z , only support auto in str type.minmaxz/, min and max should be specified in dict type.z,, only support str, dict, list and int type.)	rM   rN   rO   r   r9   rR   dictr8   r   )param_from_json_filer   	param_keyselected_rangecustomized_min_valuecustomized_max_values         r   rL   rL   x  sH   N&,,)//11!%9q="9:N2 / '	{2RS  
($	/	/377tD377tD$)='	{2ab  &q(@A
  
($	/	/-  
(#	.	../  
	%
  #I;.Z[
 	
r   c                |   [        U 5      nU S   R                  SS 5      nUc   e/ nU H  n[        US   US   -  5      US'   SUS'   SUS'   SUS'   US   US	   -  US
'   US
   nXPS   ::  a  SnOEXPS   -  S:X  a  XPS   -  nO2[        SU S   S-   5       H  nXW-  S:X  d  M  XW-  U S   ::  d  M  Un  O   UW-  S:X  d   eXdS'   U S   S   US   -  US'   XC;  d  M  US   U S   ::  d  M  UR	                  U5        M     / nU S   R                  SS 5      (       Ga  U GH  n[
        R                  " U5      n	U S   U S   -  n
XS
   -  nUS:  d  M4  XS'   SU	S'   S U	S'   U	S   U	S	   -  U	S   -  U	S
'   U	S
   nXPS   ::  a  SnOEXPS   -  S:X  a  XPS   -  nO2[        SU S   S-   5       H  nXW-  S:X  d  M  XW-  U S   ::  d  M  Un  O   UW-  S:X  d   eXiS'   US   U-  U	S'   SU	S'   UR	                  U	5        [
        R                  " U	5      nSUS'   UR	                  U5        GM
     UR                  U5        U$ )Nr@   rA   r$   r'   estimated_dp_degreer   rC   r%   r&   r   r=   r   r   rB   r/   r7   sharding_overlapFT)r   r8   r   r9   r:   rk   rl   rp   )r   r   rA   r   taskactual_cardsnnodesr   sharding_all_cfgsnew_taskgiven_num_gpusr'   overlap_new_tasks                r   search_by_dp_estimationr     s9   )$H"=155d )))L&)%6 77'
"# ["#!",tK/@@ZJ'_55Fo66!;!%??F1i0145 $)$)Y-GGF 6 f$)))Wk"#67)*+ 	 ! #W79K(K%9 > ##$6== D}}T*H&w/)O2LLN,Z0@@O".=*+-.)*26./[){+,012 $
  (
3_#==F!o$>>!C)-GGF"1i&81&<=(,1 , 1Y5O O%&F! > $f,111$*!,-? ,- 05+,!((2#'==#: 7; !34!(()9:I !L )*r   c                P   XS   S      (       Ga  SnSnU HO  n/ SQnSnU H  nXX   X   :w  d  M  Sn  O   U(       d  M)  SU;  a  UnM3  US   (       a  M?  US   U S   :X  d  MM  UnMQ     Uc   eUc   eUUS   S      n	XS   S      n
XAS   S      nU(       a  U
(       a  U	(       a|  X-
  U	-  n[         R                  " [        UR                  5       5      5      nU HA  nUR	                  S	5      (       d  M  XH   (       d  M&  S
U-   n[        XH   SU-   -  S5      XM'   MC     ggggg)z
In single dp search scenario,
the overlay acceleration ratio is obtained by automatically running overlap and non overlap tasks,
and the estimated performance of the multi dp after overlap is obtained.

metric_cfgnameN)r$   r%   r&   r)   r(   rE   rG   rC   TFr   r'   bw_overlap_r   rf   )rk   rl   r   keysr   round)r   r   history_cfgsnon_overlap_cfgraw_cfgr   r   samer`   before_overlap_performanceoverlap_performanceraw_performanceratiomew_keys                 r   add_overlap_performancer     sw    &v.//C	D D8w|+ D  t%S0!G/00,-9J1KK*-+  , ***"""%4l#F+&
" &&=f&EF!L"9&"AB#* $@*+E ==glln!56D>>%((W\\(3.G',W\QY-G'KG$  + $ E 0r   c                ^   SUS   ;  a  g[         R                  " US   S   5      n/ SQnU H  nXT;  d  M
  [        SU SU S35      e   US   nUS	   nUS
   nSn	US:  a  US:X  a	  US:  a  Sn	O,US:X  a  US:  a	  US:  a  Sn	OUS:  a  US:  a	  US:  a  Sn	OSn	U	c   eX;   GaC  SX9   S   ;   aX  [        X9   5      n
U
S-  S:X  d   eSnX:  a8  X9   U   [	        X9   US-      5      /nU R                  U5        US-  nX:  a  M7  ggSX9   S   ;   a  U R                  X9   5        gSX9   S   ;   a  SSKnX9   S   n [        US5       nUR                  U5      nSSS5        X9   S   R                  S5      nSnUS[        U5      S-
    H  nU(       a  UU   nM  WU   nM     U(       a  X9   S   UUS   '   OX9   S   WUS   '   UR                  W[        X9   S   S5      5        gSX9   S   ;   a  SSKnX9   S   n [        US5       nUR                  U5      nSSS5        [        X9   5      S-
  n
U
S-  S:X  d   eSnX:  a{  X9   U   R                  S5      nSnUS[        U5      S-
    H  nU(       a  UU   nM  WU   nM     U(       a  US-  nX9   U   UUS   '   OUS-  nX9   U   WUS   '   US-  nX:  a  M{  UR                  W[        X9   S   S5      5        ggg! , (       d  f       GN= f!   [        S5      e= f! , (       d  f       N= f!   [        S5      e= f)"Generate args of sharding overlap.r   r@   N)sharding_mpsharding_ppsharding_mp_pp
no_overlapOnly support rI   .r'   r%   r&   r   r   r   r   r   --r   r   -o.jsonr0Please check your auto tuner json whether valid.r.   w.yaml)rk   rl   rR   rQ   rN   rp   jsonopenloadr   dumpyaml	safe_load)res_argsr   r   cmdvalid_hybrid_strategyr`   r'   r%   r&   argarg_map_lenr   new_argr   	file_pathfcmd_cfgr   rK   r   s                       r   (gen_sharding_overlap_args_of_grid_searchr  "  s   =!99
--	-01CD
EC + 56jQG  
 +,OK IK I
C1}aOa,?	aIMo.A	Q9q=_q-@??
z38A;ch-K?a'''A/8A;CHQUO(<=(Q / SXa[ OOCH%#I)S)Q"iilG * 8A;$$S)DEOc$i!m,!#JE#CLE	 -
 "%(1+d2h$'HQKR!IIgtCHQK56#I)S)Q"nnQ/G * ch-!+K?a'''A/x{((-#d)a-0C %c
 '	 1
 FA&)hqkE$r(OFA(+GDH%Q / IIgtCHQK56= $G   *) F * *) F sT   K> K,K> L L#L ,
K;6K> ;K> >L
LL L L,c                   SUS   ;  a  g[         R                  " US   S   5      nSU;   Ga\  / SQnU H  nXT;  d  M
  [        SU SU S35      e   US   nUS	:  d   eUS
   nUS   nSn	US	:  a	  US	:X  a  Sn	OUS	:X  a	  US	:  a  Sn	OUS	:  a	  US	:  a  Sn	OgU	c   eX;   Ga  SX9   S   ;   a  U R                  X9   5        gSX9   S   ;   a  U R                  X9   5        gSX9   S   ;   a  SSKn
X9   S   n [        US5       nU
R                  U5      nSSS5        X9   S	   R                  S5      nSnUS[        U5      S	-
    H  nU(       a  X   nM  WU   nM     U(       a  X9   S   XS   '   OX9   S   WUS   '   U
R                  W[        X9   S   S5      5        gSX9   S   ;   a  SSK
nX9   S   n [        US5       nUR                  U5      nSSS5        X9   S	   R                  S5      nSnUS[        U5      S	-
    H  nU(       a  X   nM  WU   nM     U(       a  US   (       a  X9   S   OX9   S   XS   '   OUS   (       a  X9   S   OX9   S   WUS   '   UR                  W[        X9   S   S5      5        gggg! , (       d  f       GN{= f!   [        S5      e= f! , (       d  f       N= f!   [        S5      e= f)r   r   r@   N)r   r   r   r   rI   r   r'   r   r%   r&   r   r   r   r   r   r   r   r   r   r   r.   r   r   rD   )rk   rl   rR   rp   r   r   r   r   rQ   r   r   r   )r   r   r   r   r   r`   r'   r%   r&   r   r   r   r  r  r   rK   r   s                    r   gen_sharding_overlap_argsr    s0   =!99
--	-01CD
ECS  PC/ #$9#:*SEK  
 /0"""$	$	q=Y!^C!^	AC]y1}"C:sx{")!$)CHQK'HQK	i-"&))A, . x{((-#d)a-0C %
 '	 1
 &)hqkEr(O(+GDH%		'4S#9:CHQK'HQK	i-"&.."3 . x{((-#d)a-0C %
 '	 1
 '*+='>CHQK r(O
 (++='>CHQK DH% 		'4S#9:5 (9 + !> .-$J ( .-$J sT   9J JJ ;J> J-J> 
JJ J J*-
J;7J> ;J> >Kc                *  ^^ [         R                  " U5      nU4S jnU4S jnSU;   d   e[         R                  " US   5      m[         R                  " U 5      n/ SQnSU;   a  US    H  nUR                  U5        M     U H  n	U" U	TX5        U" U	TXU5        M     US   R                  SS5      (       Ga  T(       Gd  [         R                  " US   S   5      mT GH  n	STU	   S	   ;   a  UR	                  TU	   5        M&  S
TU	   S	   ;   a  UR	                  TU	   5        MH  STU	   S	   ;   a  S	SKn
TU	   S	   n [        US5       nU
R                  U5      nSSS5        TU	   S   R                  S5      nSnUS[        U5      S-
    H  nU(       a  X   nM  WU   nM     U(       a  TU	   S   XS   '   OTU	   S   WUS   '   U
R                  W[        TU	   S	   S5      5        GM  STU	   S	   ;   d  GM!  S	SKnTU	   S	   n [        US5       nUR                  U5      nSSS5        TU	   S   R                  S5      nSnUS[        U5      S-
    H  nU(       a  X   nM  WU   nM     U(       a  TU	   S   XS   '   OTU	   S   WUS   '   UR                  W[        TU	   S	   S5      5        GM     US   R                  SS5      (       Ga  T(       Ga  [         R                  " US   S   5      mT GH  n	STU	   S	   ;   a  UR	                  TU	   5        M&  S
TU	   S	   ;   a  UR	                  TU	   5        MH  STU	   S	   ;   a  S	SKn
TU	   S	   n [        US5       nU
R                  U5      nSSS5        TU	   S   R                  S5      nSnUS[        U5      S-
    H  nU(       a  X   nM  WU   nM     U(       a  TU	   S   XS   '   OTU	   S   WUS   '   U
R                  W[        TU	   S	   S5      5        GM  STU	   S	   ;   d  GM!  S	SKnTU	   S	   n [        US5       nUR                  U5      nSSS5        TU	   S   R                  S5      nSnUS[        U5      S-
    H  nU(       a  X   nM  WU   nM     U(       a  TU	   S   XS   '   OTU	   S   WUS   '   UR                  W[        TU	   S	   S5      5        GM     US   S   S:X  a  [        XaU5        U$ [        XaU5        U$ ! , (       d  f       GN= f!   [        S5      e= f! , (       d  f       GN= f!   [        S5      e= f! , (       d  f       GN= f!   [        S5      e= f! , (       d  f       GN*= f!   [        S5      e= f)zGenerate new script args.c                  > U S:X  a*  U T;   a$  SU;   a  US   OUS   S   nXBS   -  US   -  nXRS'   U S:X  a2  U T;   a,   SU;   a  US   OUS   S   nUUS   -  US   -  US   -  nXbS'   U S:X  a  U T;   a   US	   S
:  a  S
OSnXrS'   U S:X  a   U T;   a   SU;   a  US   OUS   S   nXBS'   g g g !    g = f!    g = f!    g = f)Nlocal_batch_sizer7   r/   r'   r$   gradient_accumulation_stepsr(   sequence_parallelr%   r   r   r   )	r   cmgr   r   r7   r  r	  r
  r   s	           r   _get_new_cfg"gen_new_args.<locals>._get_new_cfg  s   $$ '#- '({+,?@  "):%;;s;?OO  '7"#//C3J +c1 +,";/0CD " &,-.;'( -./ , 6Q12 %%#*),[)9A)=A1!+<'( %%#* +c1 +,";/0CD "
 ,='( +5%s#   +B/ 7B6 B= /B36B:=Cc                &  > X;   Gag  X;   Gaa  SX   S   ;   a.  X   S   [        X    5      -   X   S'   UR                  X   5        g SX   S   ;   a1  X   S   S-   [        X    5      -   X   S'   UR                  X   5        g SX   S   ;   Gak  SS KnX   S   nSn[        X   5      S:  a  X   S	   n [	        US
5       nUR                  U5      n	S S S 5        X   S   R                  S5      n
S nU
S [        U
5      S-
    H  nU(       d  W	U   nM  X   nM     U(       a!  U(       a  U[        X    5      -   OX    XS   '   O!U(       a  U[        X    5      -   OX    W	U
S   '   UR                  W	[	        X   S   S5      5        US   R                  SS5      (       aX  T(       dP  [        R                  R                  X   S   5      S   S-   US   -   S-   nUR                  U	[	        US5      5        g g g SX   S   ;   Gah  SS KnX   S   nSn[        X   5      S:  a  X   S	   n [	        US
5       nUR                  U5      n	S S S 5        X   S   R                  S5      n
S nU
S [        U
5      S-
    H  nU(       d  W	U   nM  X   nM     U(       a!  U(       a  U[        X    5      -   OX    XS   '   O!U(       a  U[        X    5      -   OX    W	U
S   '   UR                  W	[	        X   S   S5      5        US   R                  SS5      (       aU  T(       dM  [        R                  R                  X   S   5      S   US   -   S-   nUR                  U	[	        US5      5        g g g g U S:X  Ga  X;   Ga  SUS   S   ;   a  [        S5      eSUS   S   ;   a  [        S5      eSX   S   ;   Gac  SS KnX   S   n[        X   5      S:  a  [        S5      e [	        US
5       nUR                  U5      n	S S S 5        X   S   R                  S5      n
S n0 nUR                  SS 5      nU(       d  g U H	  nX,   X'   M     U
S [        U
5      S-
    H  nU(       d  W	U   nM  X   nM     U(       a  XU
S   '   OUW	U
S   '   UR                  W	[	        X   S   S5      5        US   R                  SS5      (       aU  T(       dM  [        R                  R                  X   S   5      S   US   -   S-   nUR                  U	[	        US5      5        g g g SX   S   ;   Gac  SS KnX   S   n[        X   5      S:  a  [        S5      e [	        US
5       nUR                  U5      n	S S S 5        X   S   R                  S5      n
S n0 nUR                  SS 5      nU(       d  g U H	  nX,   X'   M     U
S [        U
5      S-
    H  nU(       d  W	U   nM  X   nM     U(       a  XU
S   '   OUW	U
S   '   UR                  W	[	        X   S   S5      5        US   R                  SS5      (       aU  T(       dM  [        R                  R                  X   S   5      S   US   -   S-   nUR                  U	[	        US5      5        g g g g g g ! , (       d  f       GN= f!   [        S5      e= f! , (       d  f       GNj= f!   [        S5      e= f! , (       d  f       GN= f!   [        S5      e= f! , (       d  f       GN= f!   [        S5      e= f)Nr   r   r   r   =r    rD   r   r   r   r   r.   r   run_cmdgenerate_launch_cfgTr   log_dir_namer   re   z;refined recompute is not supported by command in autotuner.z8refined recompute is not supported by '-o' in autotuner.z1The 3rd arg is not supported in refined_recompute)rN   rp   r   rQ   r   r   rR   r   r   r8   ospathsplitextr   r   NotImplementedError)r   r   r   r   r   r   r   prefixr  r  r   rK   r`   new_cmd_apthr   	rr_valuesrrrun_bests                    r   _gen_new_arg"gen_new_args.<locals>._gen_new_arg	  s   :#*sx{"!hqkCM9)!$!hqkC/#ch-?)CHQK'HQK	sx=A% Xa[Fi-"&))A, . x{((-#d)a-0C  ' %
	 1
 28SX.ch r(O
 39SX.ch DH% 		'4S#9:i(,,-BDII$ ((!5a8n-. "" ! IIgtL#'>? % J CHQK'HQK	sx=A% Xa[Fi-"&.."3 . x{((-#d)a-0C  ' %
	 1
 28SX.ch r(O
 39SX.ch DH% 		'4S#9:i(,,-BDII$ ((!5a8n-.!" !
 IIgtL#'>? % J? (R ''CJs./22)Q  01!44)N  CHQK'HQK	sx=A%$K i-"&))A, . x{((-	]]#6=C%(XIN #d)a-0C  ' %
	 1
 &/$r(O(1GDH%		'4S#9:i(,,-BDII$ ((!5a8n-.!" !
 IIgtL#'>? % J CHQK'HQK	sx=A%$K i-"&.."3 . x{((-	]]#6=C%(XIN #d)a-0C  ' %
	 1
 &/$r(O(1GDH%		'4S#9:i(,,-BDII$ ((!5a8n-.!" !
 IIgtL#'>? % JE (k -7'W .-$J N .-$J ` .-$J R .-$J s   5Z ZZ ,Z? 8Z-
Z? [! *[<[! \ [1+\ 
ZZ Z Z*-
Z<7Z? <Z? ?[
[[! [! ![.1
\ ;\  \ \r  )r$   r%   r&   r)   r(   r'   rC   rE   rG   r  r	  r7   r
  re   rJ   search_stageNr   r   r   r   r   r   r   r   r   r.   r   r   run_best_stager@   r   grid)rk   rl   r:   r8   rp   r   r   r   rR   r   rQ   r   r   r   r  r  )raw_argsr   r   r  r  r  r   new_argsr`   r   r   r   r  r  r   rK   r   r   s      `             @r   gen_new_argsr$    s   
--
C-^z@x 	!!!
--	),
-C}}X&HH" i'01COOC  2 S#s.S#si8  55hmmIi0@ACs3x{"C)S!$C)CHQK'HQK	i-"&))A, . 3x{((-#d)a-0C %
 '	 1
 &)#hqkEr(O(+CGDH%		'4CS#9:CHQK'HQK	i-"&.."3 . 3x{((-#d)a-0C %
 '	 1
 &)#hqkEr(O(+CGDH%		'4CS#9:e h  0$77HmmIi01ABCCs3x{"C)S!$C)CHQK'HQK	i-"&))A, . 3x{((-#d)a-0C %
 '	 1
 &)#hqkEr(O(+CGDH%		'4CS#9:CHQK'HQK	i-"&.."3 . 3x{((-#d)a-0C %
 '	 1
 &)#hqkEr(O(+CGDH%		'4CS#9:e j '610	J O 	"(;OM .-$J ( .-$J 8 .-$J ( .-$J s   TT!TUT/.U%U#1UU#2V>U3V
T	TTT,/
T>	9U>UU
U 	U# U##U03
V	=VVVc                   [         R                  " U 5      nSU;   GaU  SUS   ;   GaK  US   S:X  d   eUS   S:X  d   eUS   US   -  US   -  nXBS	   ::  aq  S
R                  [        U5       Vs/ s H  n[	        U5      PM     sn5      UR
                  l        UR
                  R                  (       a  SUR
                  l        U$ XBS	   -  S:X  a  XBS	   -  nO2[        SUS   S-   5       H  nXE-  S:X  d  M  XE-  US	   ::  d  M  Un  O   UW-  S:X  d   eS
R                  [        XF-  5       Vs/ s H  n[	        U5      PM     sn5      UR
                  l        U SU 3UR
                  l        U$ s  snf s  snf )zGenerate new running context.r@   rA   r$   r   rC   r%   r&   r'   r=   ,z1:1r   r   rB   :)	rk   rl   rU   r9   rN   argsdevicesmasterr   )ctxr   r   new_ctxr   r   r   s          r   gen_new_ctxr-  W  s   mmC G" Im$<<{#q((('(A---K k"#'() 	
 _55#&88!&|!45!4AQ!45$GLL  ||""&+#" N 88A=%?)CCq)G"4q"89A$(A-(-?1KK!" :  &(A---#&88!&|'=!>?!>AQ!>?$GLL  &,HAfX"6GLLN) 6" @s   /E52E:c                <    SnU S-   U-   n[         R                  R                  U5      (       d  g[        US5       nUS-   U-   n[        R
                  R                  S5      (       a  SOSnSn/ n	UR                  5       n
U
 Hq  n[        R                  " Xk5      n[        R                  " X{5      nU(       a0  S	nUS    H   n [        U5      nU	R                  U5          O   Uc   eU(       d  Mo  S
nMs     U(       a	  SnX8S
-  -  nU	(       d  SnUS
-  nOS[        U	5      S:  a  U	S   nO>[        U	5      S:  a  [        U	SS	 5      [        U	SS	 5      -  nO[        U	SS	 5      S-  n[        US5      nS	S	S	5        WU4nU$ !    M  = f! , (       d  f       N= f)z$For extracting metric from log file.r   rH   )        r   r   !:* *(\d+(\.\d*)?)|(\d+(\.\d*)?) *npuzout of memoryzOut of memory error onNr   r/  
   r.      	   rf   )r  r  existsr   paddledeviceis_compiled_with_custom_device	readlinesrefindallfloatr:   rQ   sumr   )r  filetarget_metricerr_codetarget_filer  re_metric_patternre_out_of_memory_patternout_of_memory_flagmetric_listlineslinemetricout_of_memoryrK   item
metric_averess                     r   read_metric_logrN  ~  s    H*t#K77>>+&&	k3	1 @@=P 	
 }};;EBB * 	!
 DZZ 18FJJ'?FM"1ID! %d#**51	 & (((}%&" " J#:;HJ!|H"$RJ"[_-[_1EFJ[./"4J:q)
W 
 X h
CJ-! 1 
 	s+   BFF!F3BFF
	F
Fc                   U S-   U-   n[         R                  R                  U5      (       d  g [        US5       nUS-   U-   n/ nUR	                  5       nU HR  n[
        R                  " XX5      n	U	(       d  M"  S n
U	S    H   n [        U5      n
UR                  U
5          O   U
b  MR   e   U(       d  S n S S S 5        g [        U5      S:  a  US   nO>[        U5      S:  a  [        USS  5      [        USS  5      -  nO[        US	S  5      S-  n[        US
5      nS S S 5        UnU$ !    M  = f! , (       d  f       WnU$ = f)NrH   r   r0  r   r2  r.   r3  r4  r5  rf   )r  r  r6  r   r:  r;  r<  r=  r:   rQ   r>  r   )r  r?  r@  rB  r  rC  rF  rG  rH  rI  rK   rK  rL  rM  s                 r   read_step_time_logrP    se    *t#K77>>+&&	k3	1 @@=P 	 DZZ 18Fv"1ID! %d#**51	 & (((  J+ 
 	, "$RJ"[_-[_1EFJ[./"4J:q)
; 
 < CJ! # 
 	< CJs6   ;D39
D3D+ D3)D3 AD3+D0	-D33
Ec                   U S-   U-   n[         R                  R                  U5      (       d  g [        US5       nUS-   U-   n/ nUR	                  5       nU H[  n[
        R                  " XX5      n	U	(       d  M"  S n
U	S    H)  n [        [        U5      5      n
UR                  U
5          O   U
b  M[   e   U(       d
   S S S 5        g UR                  5         US   sS S S 5        $ !    Mj  = f! , (       d  f       g = f)NrH   r   r0  r   r.   )r  r  r6  r   r:  r;  r<  r   r=  r:   rq   )r  r?  r@  rB  r  rC  rF  rG  rH  rI  rK   rK  s               r   read_allocated_memory_logrR    s     *t#K77>>+&&	k3	1 @@=P 	 DZZ 18Fv"1ID! #E$K 0#**51	 & (((  ) 
 	, r?/ 
 	 ! # 
 	s5   ;C-9
C-%C%)C-2C-C-%C*	'C--
C;c                n   [         R                  R                  X5      n[         R                  R                  U5      (       d  g/ n/ n/ n[	        US5       n[
        R                  " U5      nSnU(       d+  [        U5      n	[        U	5      S:X  a  SU	;   a  SnU(       d  M+  U Hi  n	[        U	5      S:X  d  M  U	u  pp  nUR                  [        U
5      5        UR                  [        U5      5        UR                  [        U5      5        Mk     S S S 5        [        U5      S4$ ! , (       d  f       N= f)N)r/  Tr   Frg   memory_usedT)r  r  rU   r6  r   csvreadernextrQ   r:   r   r   )r  r?  log_pathrT  utilization_gpuindicesr  rV  flagrowindexutil_gpur   mem_useds                 r   read_memory_logr`    s    ww||D'H77>>(##KOG	h	Av,C3x1}#!5	 $
 C 3x1}582as5z*""3x=1&&s8}5  
" {U""# 
	s   A	D&"D&8AD&&
D4c           	        [         R                  " U 5       H  u  pnU H  nUR                  S5      (       d  M  U S-   U-   n[         R                  R	                  U5      (       d      g[        US5       nSnUR                  5       nU H;  n	[        R                  " Xy[        R                  5      n
U
(       d  M1    SSS5            g   SSS5        M     M     g! , (       d  f       M  = f)zU
check if training is completed
return:
    True: completed
    False: not completed
	workerlogrH   Fr   zTraining completed.NT)
r  walkr   r  r6  r   r:  r;  r<  
IGNORECASE)r  rootdirsfilesr?  rB  r  re_completed_patternrG  rH  	completeds              r   read_completedrj  "  s      WWT]ED??;//*t+K77>>+..k3'1'=$!D "

,BMM!I !y# (' "	 ('  +"  ('s   5AC;CC
C)c                :   Sn[         R                  " U 5       HF  u  pVnU H:  nUR                  S5      (       d  M  [        XU5      u  pU
(       d  M2  U
S-  U-  nM<     MH     [        XU5      u  pX-  n [	        X5      u  pUS-  U-  nXU4$ !   SnSU-  n N= f)z
extract metric and max memory usage from log file
return:
    metric: average metric of last 10 steps
    memory: max memory used
    err_code: 00: no error, 01: no metric, 10: out of memory, 100: no memory log
r   rb  r   r/  rd   )r  rc  r   rN  r`  )r  metric_filer@  memory_filerA  re  rf  rg  r?  rI  metric_flag
res_metric
res_memorymemory_flags                 r   read_logrr  =  s     HWWT]ED??;//"1$m"LF{'!Ox7  + .dOJ%H'"1$"D
1$0 8++'
h&s   6B 	Bc                j   / nSn[        U S5       nUR                  5       nUSS nU H[  n[        R                  " X&[        R                  5      nU(       d  M1  SU;   a  UR                  S5        MJ  UR                  U5        M]     SSS5        [        [        U5      5      $ ! , (       d  f       N"= f)zM
get error info from log file
return:
    error_info: Specific error message
Errorr   iNzOut of memory)r   r:  r;  r<  rd  r:   r   r   )filenameerror_infoserror_patternr?  rG  
last_linesrH  
error_infos           r   get_error_inforz  a  s     KM	h	 45\
DMGJz"d*&&7&&t,  
 K !! 
	s   AB$.B$$
B2c                   Sn/ n[         R                  " U 5       H<  u  p4nU H0  nUR                  S5      (       d  M  [        U S-   U-   5      nX'-  nM2     M>     [	        [        U5      5      nU H
  nX-   S-   nM     USS nU$ )z`
find error infos from log directory
return:
    error_info: all error message on log directory
r  rb  rH   r&  Nr.   )r  rc  r   rz  r   r   )	r  unique_error_infoall_error_infosre  rf  rg  r?  rv  rt   s	            r   find_error_from_logr~  v  s     OWWT]ED??;//(d):;K*O	  + 3/0O-4s:  )#2.r   c                    / n[        SU S-  S-   5       HJ  nX-  S:X  d  M  [        X S-  S-   5       H(  nX-  U-  S:X  d  M  UR                  X#X-  U-  45        M*     ML     U$ )zAReturn the combinations of three numbers which product is target.r   rD   r   r   )r9   r:   )targetr   r   js       r   three_mul_combinationsr    sn    G1fkAo&:?1kAo.K1$)NNA&+*:#;< / '
 Nr   c                    [        US-  5      n[        USS5       H<  nX-  S:X  d  M  X-  n[        US-  5      nXV-  S:w  a  US-  nXV-  S:w  a  M  XFXV-  4s  $    [        S5      e)z&Return middle candidates of dp, mp, ppgUUUUUU?r   r.   g      ?r   zCannot distribute GPUs equally)r   r9   rR   )r   r   r   startr   	remainingr  s          r   gbs_dp_mp_pp_candidatesr    s     (u%&E 5!R <1 Iin%A-1$Q -1$'' ! 5
66r   c                   0 nU S   nU S   nUS:  d   eU R                  S0 5      R                  SS5      nUS:X  a|  [        XU5      u  pVnUnS/US'   U/US	'   U/US
'   U/US'   S/US'   S/US'   S/US'   [        SS5       V	s/ s H  n	SU	-  PM
     sn	US'   US    V
s/ s H
  n
Xu-  U
-  PM     sn
US'   U$ s  sn	f s  sn
f )r?   r   rB   r   r/   r7   rF   r   r$   r%   r&   r'   rC   FrE   NrG   r2  r   r(   )r8   r  r9   )r   rW   r   r   r7   dp_candidatemp_candidatepp_candidatesharding_degree_candidater   es              r   gbs_default_candidatesr    s0   J$H'"Ia<<!k26::V F"3J4
0L %1!#$#
;#/.
;#/.
;)B(C
$%()s
#$',g
?#/3f
*+8=a)E1!Q$)E
%&   23+
3 '!+3+

&'  *F+
s   B;"C c                ^   U S   nUS   nUS   nUS   nUS   nUS   nUS   nUS   nUS	   n	[        [        R                  " UUUUUUUU	5      5      n
SSSSSSSS	S
.n/ nU
 HF  n0 n[        U5       H  u  nnUXU   '   M     US   US   -  US   -  US'   UR	                  U5        MH     U$ )rc   rW   r$   r%   r&   r(   rC   r'   rE   rG   )r   r   r   rD   rf   rd   rg   rh   r7   )r   rn   ro   rr   r:   )r   rW   rv   rw   rx   rz   r{   r|   r}   r~   r   r   r   r   r   r   r   s                    r   gbs_search_allr    s4   <(J%k2%k2%k2 23N *+; <!+,=!>)/:'12I'J$   &%$,
	
H "
G L!#HC$'GCL! ' K k"#()* 	#$
 	G$  r   c                   / n/ SQnSS/n[        U S5       n[        R                  " U5      n[        U5      nSSS5        W H  n0 nU H#  n	UR	                  U	S5      n
 [        U
5      X'   M%     UR	                  SS5      nUR                  5       S;   d   U S	U 35       eUR                  5       S
:H  US'   UR	                  SS5      nUS:X  d,  UR                  5       [        ;   d   U S[         SU S35       eUS:w  a  UOSUS'   UR                  U5        M     U$ ! , (       d  f       N= f! [         a    [        U	 SU
 35      ef = f)zLoad the configs from csv file.)r$   r%   r&   r)   r(   r'   rC   rE   rG   r   Nr  z must be integer, but got )truefalsez  must be true or false, but got r  z must be one of rI   r   )
r   rU  
DictReaderr   r8   r   rR   rO   rS   r:   )configs_csvall_configsextract_keys_integerextract_keys_stringr  rV  raw_configs
raw_configconfigextract_keyr   rE   rG   s                r   load_configs_from_csvr    s   K +,CD	k3	1"6l 
  "
/K..b1C&)#h# 0 #;""$ )
 
 	M _<]OL	M 
 #0"5"5"76"A */F K!R'$**,23	

 %%%56Y5ZZdezd{{|}	
3 &;b%@!d 	&' 	6"= "@ G 
 	   "m#=cUC s   "D! D2!
D/2E)F)N)workerlog.0step/s)returnztuple[float, int])r  interval_runtime)r  max_memory_allocated)r  ztuple[float, bool])r  r  z	0.gpu.log)r  ztuple[float, float, int])+
__future__r   rk   rU  rn   loggingr  r;  r7  pruner   rS   	getLoggerrs   r   r"   r   ra   r   ru   r   r   r   rL   r   r   r  r  r$  r-  rN  rP  rR  r`  rj  rr  rz  r~  r  r  r  r  r  r   r   r   <module>r     s   #  
   	 	  &H #			<	(," <@PgTOdHV(
$QhBPf3Ll^7BP;f~B$P -5==B -?%%R -C#@#8: 	!,
 !,H"**7$:0f0r   