
    ΑioR                     h   S SK r S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SKrS SK	r	S SK
r
S SKJrJr  S SKJr  S SKJr  S SKJrJr  SSKJr  SSKJr  SS	KJrJr  SS
KJr  SSKJr  SSK J!r!  SSK"J#r#  SSK$J%r%  SSK&J'r'J(r(J)r)J*r*J+r+  SSK,J-r-  SSK.J/r/J0r0  \" \Rb                  5      r2 " S S5      r3g)    N)PassContextnew_pass)
get_logger)core)append_backwardprogram_guard   )Cluster)	Completer)DistributedContextset_default_distributed_context)DistributedOperator)DistributedTensor)mapping)Partitioner)Planner)ProcessGroup_g_process_group_mapget_all_process_groupsget_process_groupget_world_process_group)	Resharder)SerialProgramInfomake_data_unshardc                   \    \ rS rSrSrS rS rS rS rS r	S r
SS
 jr   SS jrS rSrg	)AutoParallelizer5   a  
AutoParallelizer is the main controller class to do the auto parallel process.
And the auto parallel process will be triggered in the wrapped parallelize function.
To facilitate the auto parallelization, it will contain information about program, cluster and the
related context. In this basic version, the program information will be retrieved from
Fleet object, and the cluster information can be retrieved in the new created Cluster object,
and the context information can be retrieved in the new created DistributedContext.
c                    Xl         U R                   R                  U l        U R                   R                  U l        [        5       U l        S U l        [        R                  " SS 5      U l
        U R                  b4  [        5       U l        U R                  R                  U R                  5        [        R                  " SS 5      U l        [        R                  " SS 5      nUc  SU l        OSU l        [        5       U l        [        R                  " S5      U l        U R"                  (       a&  U R"                  R%                  5       S:X  a  SU l        g SU l        g )NPADDLE_CLUSTER_TOPO_PATHPADDLE_RANK_MAPPING_PATHPADDLE_ENABLE_AUTO_MAPPINGFTPADDLE_NEED_RANK_MAPPINGtrue)_fleetuser_defined_optimizer
_optimizer_user_defined_strategy_dist_strategyr   _dist_context_clusterosgetenv_cluster_topo_pathr
   build_from_file_rank_mapping_path_enable_auto_mappingr   _pass_context_need_rank_mappinglower)selffleetenable_auto_mapping_envs      t/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/distributed/auto_parallel/static/parallelizer.py__init__AutoParallelizer.__init__?   s   ++<<"kk@@/1"$)),F"M"".#IDMMM))$*A*AB"$)),F"M"$)),H$"O"*(-D%(,D%(]"$)),F"G &&''--/69  	 	 	    c                     [         R                  " 5       nUR                   HA  nUR                   H.  nUR                   H  nX%;   d  M
  UR                  U5        M     M0     MC     g N)r   kAutoParallelSuffixblocksops
attr_names_remove_attr)r4   main_programsuffixblockop	attr_names         r7   _remove_distributed_attrs*AutoParallelizer._remove_distributed_attrs[   sP    ))+ "((Eii!#I*	2 "/   )r:   c                    U R                   R                  (       a  [        R                  " U R                   R                  5      nU R
                  US'   XFS'   X6S'   US   (       aK  U R                  US'   [        SU5      nUR                  U/U/U R                  5        UR                  5       nO;[        SU5      nUR                  U/U/U R                  5        UR                  5       nU R                   R                  (       a  [        R                  " U R                   R                  5      nU R
                  US'   [        R                  " U5      US'   X6S'   [        S	U5      n	U	R                  U/U/U R                  5        g g )
Ndist_contextparams_gradslossuse_pure_fp16base_optauto_parallel_fp16auto_parallel_ampno_grad_setauto_parallel_recompute)r(   ampcopydeepcopyamp_configsr)   r&   r   applyr1   get_loss	recomputerecompute_configs)
r4   rB   startup_programrL   rK   rQ   configauto_parallel_fp16_passauto_parallel_amp_passauto_parallel_recompute_passs
             r7   _apply_pre_optimization_passes/AutoParallelizer._apply_pre_optimization_passese   sj    ""]]4#6#6#B#BCF%)%7%7F>"%1>"!6No&%)__z"*23G*P''--!N_$5t7I7I /779)12Ev)N&&,,!N_$5t7I7I .668 ((]]4#6#6#H#HIF%)%7%7F>"$(MM+$>F=!!6N+3)6,( ).. 143E3E )r:   c           	      F   [        X5         [        UUUUU R                  R                  S9nS S S 5        [	        U R                  5      U l        U R
                  R                  U5        U R                  R                  R                  U5        W$ ! , (       d  f       Nj= f)N)distop_context)	r   r   r)   dist_op_contextr   
_completercomplete_backward_annotationblock_stateparse_backward_blocks)r4   rB   r[   rL   parameter_listrQ   	callbacksrK   s           r7   _generate_backward#AutoParallelizer._generate_backward   s     <9*#11AAL : $D$6$6744\B&&<<\J :9s   "B
B c                 <   [         R                  " U R                  5      n[        X5         UR	                  U5      nS S S 5        X@R
                  l        [        U R
                  5      U l        U R                  R                  U5        W$ ! , (       d  f       NU= fr<   )
rT   rU   r&   r   apply_gradientsr)   _serial_optimizerr   re   complete_update_annotation)r4   rB   r[   rK   	optimizeroptimize_opss         r7   _apply_optimize AutoParallelizer._apply_optimize   sr    MM$//2	<9$44\BL : 09,#D$6$6722<@ :9s   B
Bc                 (   U R                   R                  (       a  [        R                  " U R                   R                  5      nU R
                  US'   XES'   X5S'   [        SU5      nUR                  U/U/U R                  5        U R                  R                  S5      n[        R                  " U R                   R                  5      nU R
                  US'   XES'   X5S'   [        SU5      nUR                  U/U/U R                  5        U R                   R                  (       ai  [        R                  " U R                   R                  5      nU R
                  US'   XES'   [        SU5      nUR                  U/U/U R                  5        g g )NrJ   rK   global_rankauto_parallel_shardingrank_idauto_parallel_grad_clip!auto_parallel_gradient_merge_pass)r(   shardingrT   rU   sharding_configsr)   r   rW   r1   get_attrgradient_mergegradient_merge_configs)	r4   rB   r[   rankrK   r\   auto_parallel_sharding_passauto_parallel_clip_passrz   s	            r7   _apply_post_optimization_passes0AutoParallelizer._apply_post_optimization_passes   sy    '']]4#6#6#G#GHF%)%7%7F>"%1>"$(=!*2(&+' (-- 143E3E  --66~FLt22CCD!%!3!3~!-~ y"*+Df"M%%N_-t/A/A	
 --]]4#6#6#M#MNF%)%7%7F>"%1>"083V1- .33 143E3E .r:   Nc                    S nU R                   R                  5       nU R                  R                  5       nUR                  5       R	                  U R
                  R                  5      nUcZ  [        5       U l        [        R                  S5        [        U R                  5      U l        U R                  R                  U5      nOUn[        R                  " U5      U l        U R                  R                   R#                  U5        U R%                  UUUU R&                  U R(                  U R*                  5      nU R-                  UUUUU R(                  5        [/        U R                  U5      n	U	R1                  XFU5      u  n
nnU R3                  XU5      n[5        XU R                  5        [7        U
UUU R                  U5      nUR9                  5         U R;                  XX5        S nU(       d  [        R                  " [<        5      n[<        R>                  " 5         [A        S/ 5      [<        S'   U R                  RB                   H%  n[<        S   RE                  URF                  5        M'     UUUU
U4$ )NzStart annotation dist attr.r   )$_main_programclone_startup_programglobal_blockvar_lossnamer   r)   _loggerinfor   re   complete_forward_annotationrT   rU   rg   parse_forward_blocksrk   _parameter_list_no_grad_set
_callbacksr`   r   	partitionrs   r   r   reshardr   r   clearr   _process_meshes	add_ranksprocess_ids)r4   r   rJ   relaunch_phasecompleted_main_programserial_main_programserial_startup_programserial_lossrK   partitionerdist_main_progdist_startup_progdist_params_gradsdist_optimize_ops	resharderg_process_group_mapprocess_meshs                    r7   _get_dist_program"AutoParallelizer._get_dist_program   sR   !%"00668!%!6!6!<!<!>)668<<TZZ__M !3!5DLL67'(:(:;DO;;<OP # &9"!%|!<D 	&&;;<OP ..""  OO
 	++""	
 "$"4"4d;
 !!"L
		
 !00/@
 	.T=O=OP
	 	,,t	
 #"&--0D"E &&(&21b&9 # $ 2 2 B B$Q'11,2J2JK !C 
 	
r:   c                 
   Uc   eXl         X l        UR                  R                  U l        X0l        X@l        XPl        U R                  (       GaG  U R                  (       Ga5  U R                  c   S5       e0 n[        5       nS nU R                  R                  (       a  [        R                  " S5        [!        U R                  U R                  U R                   U R"                  U R                  5      n	[%        U	U SSS.S9n
U
R'                  5       u  p[        R                  " S5        UGbL  [        R                  " S5        [(        R*                  " 5       R-                  5       n[.        R0                  R3                  US	[4        R4                  " 5        S
35      n0 n0 n0 nUR6                  R9                  5        H  u  nnUR:                  UU'   M     UR<                  R9                  5        H  u  nnUR:                  UU'   M     XS'   UUS'   UR>                  US'   [A        US5       n[B        RD                  " UU5        U[.        RF                  S'   [        R                  " SU 35        S S S 5        URH                   H"  nU RK                  UU5      u  nnnnnUU/UU'   M$     [M        X`R                  5      n[O        URQ                  5       5      n[A        U RR                  S5       n[T        RD                  " UU5        S S S 5        [.        RV                  " S5      nU(       a  URY                  5       S:X  a  SOSnU(       aW  [[        S5        [\        R^                  " [`        Rb                  Rd                  Rf                  Rh                  Rj                  5        [.        RV                  " S5      nSR3                  SU RR                  /5      n [.        RF                  Rm                  SS5      S:X  a  / SQn!O/ n!SU -   S-   U-   n"[\        Rn                  S/U!Q[p        Rr                  " U"5      Qn#[t        Rv                  " U#5      n$U$Ry                  5         U$Rz                  S :X  d   S!5       e[[        S"5        [\        R^                  " S 5        g [`        Rb                  R}                  5       nS n[.        RV                  " SS 5      nUGb   [A        US#5       n[B        R~                  " U5      n[        5       nU R                  R                  5       R                   H@  n%US   U%R                  R                  5          n&[        U%U&5      nUR                  U5        MB     U R                  R                  5       R                  n'U'RQ                  5        H@  n(US   U(R                  R                  5          n&[        U(U&5      nUR                  U5        MB     US   Ul        S S S 5        OzU R                  R                  (       a_  [!        U R                  U R                  U R                   U R"                  U R                  S$9n	[%        U	U SSS.S9n
U
R'                  5       u  pUb9  [        S 5      n)UR>                   H  n*U)R                  U*R                  5        M      U RK                  UUSS%9u  nnnnnU R                  R                  (       a  Sn+UR                  5       R                   H'  n%U%R                  S&:X  d  U%R                  S':X  d  M%  Sn+  O   U+(       aF  [`        R                  R                  U5         [`        Rb                  R                  5         S S S 5        [        5       n,U, H  n-U-R                  5         M     [        U R                  5        U R                  U5        UUUU4$ ! , (       d  f       GNP= f! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       N= f)(Nz5The cluster must not be none when using auto mapping.zStart searching dist attr.mcmc   )r   max_search_times)algorithm_configzEnd searching dist attr.z"Start serialize searched dist attrsearched_dist_context_z.pklops_dist_attrtensors_dist_attrprocess_mesheswb!PADDLE_SEARCHED_DIST_CONTEXT_PATHz$End serialize searched dist attr to wPADDLE_ENABLE_ELASTICr#   TFz/Auto mapping finished, now do elastic re-launchPADDLE_ORIGINAL_CMD_ARGS z--rank_mapping_pathWITH_COVERAGEOFFON)z-mcoveragerunz--branchz-pz#-m paddle.distributed.fleet.launch z-ur   zLaunch failed with rank mappingz3Successfully do the second launch for auto mapping!rb)cluster)r   send_v2recv_v2)Vr   r   rD   programr   r   r   r   r0   r2   r*   r   r(   auto_searchloggingr   r   r&   r   searchpathlibPathcwdr+   pathjointime_dist_ops_for_programitems	dist_attr_dist_tensors_for_programr   openpickledumpenvironranksr   r   listvaluesr/   jsonr,   r3   printsysexitpaddledistributedr5   elasticmanagerELASTIC_AUTO_PARALLEL_EXIT_CODEget
executableshlexsplit
subprocessPopenwait
returncodeget_rankloadr   r   r?   descidr   add_dist_op_for_programvarsr   add_dist_tensor_for_programr   r   r   typestaticr   barrierr   instantiater   r)   rG   ).r4   rL   r[   ri   rQ   rj   dist_programsworld_process_grouprJ   serial_program_infoplanner_r   searched_dist_context_pathsaved_dist_contextr   r   keydist_opdist_tensordist_context_filer   r   r   r   r   r   rank_mapping_dictrank_mappingrank_mapping_fileenable_elasticoriginal_cmd_argsrank_mapping_argscoverage_argsnew_cmd_argsnew_cmdnew_processrE   r   r   r   pg0r   is_pipelineall_process_groupsprocess_groups.                                                 r7   parallelizeAutoParallelizer.parallelize$  sU    ***
 /!ZZ//-'#$$$)@)@)@==, G, M"9";L""..9:&7&&))JJOOMM'# "'.4!%L
 #*.."278 'ABlln((*-/WW\\1$))+dC.* &(" "$&!$0$F$F$L$L$NLC)0):):M#& %O
 ";;AAC-8-B-B%c* D6C?3:K"#67 00 ##34 .&KK 24EF2 JJBC LL>?Y>Z[ ,11 **4>%%%"''57J&Kd# 2 !(}} E 1 8 8 :;L d--s37H		,(9: 4  YY'>?N "n&:&:&<&F  
 GH&&,,44<<\\ !#		*D E #&(?(?@! zz~~ou5= K "#$  $	$    \*	G %**73K))Q. 1. GHHHQK %%..0DL)+3T*& *5.&)/5F)G&#5#7L"00==?CC$6$GGGJJL%	 #6b)"D$<<WE D  --::<AAD#{{}$67J$KHHKKM%	 '8Y&G$@@M  - 4F(4L0) 2 &&22*;**--

 $+' &+$*01*G '.nn&6OL ''*$0$@$@LMM,":":; %A &&t\$&O!!! ""..#(557;;Bww)+rww)/C&* < 44^D**224 E
 "8!9!3))+ "4 ,D,>,>? **>: "!!	 { 4 43f @ EDs2   1A^>3_D_"_4>
_
_"
_14
`c           	      8   U R                   nUR                  U5      nX1[        U 5      '   U R                  R	                  5        HQ  u  pEUS:X  d  US:X  d  US:X  d  US:X  d  US:X  a  [        X4U5        M1  [        X4[        R                  " XQ5      5        MS     U$ )Nr   r   r)   r$   r   )	__class____new__r   __dict__r   setattrrT   rU   )r4   memoclsresultkvs         r7   __deepcopy__AutoParallelizer.__deepcopy__  s    nnS!RXMM'')DA_$**'=<1%4==#9: * r:   )r   r*   r-   re   r)   r(   r0   r$   r   r   r2   r   r&   r   r1   r/   r   )NF)NNN)__name__
__module____qualname____firstlineno____doc__r8   rG   r`   rk   rs   r   r   r  r  __static_attributes__ r:   r7   r   r   5   sE    
83"H,
"HS
r bHr:   r   )4rT   r   r   r+   r   r   r   r   r   r   r   paddle.distributed.passesr   r   "paddle.distributed.utils.log_utilsr   paddle.frameworkr   paddle.staticr   r   r   r
   
completionr   rJ   r   r   r   r   r   r   mapperr   r   r   r   r   r
  r   r   r   r   r   r   r   utilsr   r   INFOr   r   r  r:   r7   <module>r(     sx       	     
   ; 9 ! 8  ! M ( *  $    7
W\\
"b br:   