
    Αi˱                   
   % S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKrS SKJ	r	J
r
Jr  S SKrS SKrS SKJs  Js  Js  Jr  S SKJrJrJr  S SKJr  S SKJr  S SKJr  S SKJr  S S	KJr  S S
K J!r!J"r"  S SK#J$r$J%r&J'r'J(r(  S SK)J*r*  S SK+J,r,J-r-J.r.J/r/  S SK0J1r1  SSK2J3r3  SSK4J5r5J6r6J7r7  SSK8J9r9  SSK:J;r;  SSK<J=r=  SSK>J?r?J@r@  SSKAJBrB  SSKCJDrD  SSKEJFrFJGrG  SSKHJIrI  SSKJJKrK  SSKLJMrM  SSKNJOrO  SSKPJQrQ  SS KRJSrS  SS!KTJUrU  SS"KVJWrWJXrXJYrYJZrZJ[r[J\r\J]r]J^r^  SS#K_J`r`  SS$KaJbrbJcrc  SS%KJdrd  \	(       aN  S S&KeJfrfJgrg  S S'KhJiri  S S(KJjrj  S S)KkJlrl  S S*KmJnrn  S S+KoJprp  S S,KqJrrr  S S-KsJtrt  S S.KuJvrv  S S/KwJxrx  S S0K+Jyry  \S1   rzS2\{S3'    " S4 S55      r|g)6    )annotationsN)TYPE_CHECKINGAnyLiteral)pirstaticutils)_to_name_str)auto_complete_op_role)decomp)OpRole)new_pass)-_split_program_into_forward_backward_optimizeset_skip_gc_vars)IrGraph_current_expected_place_corein_dynamic_mode)Metric)	InputSpecOperatorVariableglobal_scope)_convert_float_to_bfloat16   )
get_logger   )CollectionNamesfetchget_collection)DistributedTensor)Strategy   )config_callbacks)Clusterget_default_cluster)	Converter)get_cost_from_engine)DistributedContextget_default_distributed_context)DistributedInputSpec)DistributedDataLoader)DistributedOperator)DistributedSaver)ProgramHelper)apply_mix2dist_pass)Parallelizer)RemovePassesReshardPassesapply_partition_passcheck_chunk_idcomplete_chunk_idfuse_attention_ffn_qkv_passpipeline_passremove_unuseful_comm_op_pass)Planner)get_all_process_groupsnew_process_group)set_all_ops_op_role)CallableSequence)	TypeAlias)Tensor)	PlaceLike)Callback)Dataset)
_CollateFn)Layer)	Optimizer)Value)Programtrainevalpredictr@   _Modec                  X   \ rS rSrSr      SB             SCS jjrS rS rS r/ 4S jr	S	 r
S
 r      SBS jrS rSDS jrS rS rS rS rSES jrS rS rS rSDS jrS rSSSSSSSSSSSSSSSS/4                                 SFS jjr       SG                 SHS jjr      SI               SJS jjr              SK                               SLS jjr        SM                 SNS  jjr    SO         SPS! jjrSQS" jrSRS# jr              SSS$ jr!STS% jr"S& r#S' r$S( r%S) r&S* r'S+ r(S, r)S- r*SUS. jr+S/ r,SDSVS0 jjr- SW       SXS1 jjr.   SY       SZS2 jjr/S[S3 jr0S[S4 jr1S[S5 jr2S[S6 jr3\4S\S7 j5       r5\4S\S8 j5       r6\4S]S9 j5       r7\4S\S: j5       r8\4S\S; j5       r9\4S^S< j5       r:\4S^S= j5       r;\4S_S> j5       r<\4SQS? j5       r=\4SQS@ j5       r>SAr?g)`Enginec   a
  
An High-Level API for auto parallel, which could be used for distributed Training (engine.fit) and Inference (engine.predict).
Static graph mode is supported natively, Dynamic graph mode is also supported under `@to_static <https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api/paddle/jit/to_static_cn.html#to-static>`_ .

Args:
    model (paddle.nn.Layer, optional): The model is an instance of
        paddle.nn.Layer.
    loss (Loss|Callable|None, optional): The loss can be a `paddle.nn.Layer`
        instance or any callable function taken the predicted values and
        ground truth values as input. It can be None when there is no loss.
        Default: None.
    optimizer (Optimizer|None, optional): The optimizer need to be set in training
        and should be None in eval and predict mode. Default: None.
    metrics (Metric|list[Metric]|None, optional): If metrics is set, all
        metrics will be calculated and output in train/eval mode. Default: None.
    cluster (Cluster|None, optional): The cluster represents the topology information
        about the used physical devices. Default: None. (Unused for now)
    strategy (Strategy|None, optional): The strategy is used to configure the
    parallelization and optimization behaviors. Default: None.

Examples:

    .. code-block:: python

        >>> import paddle
        >>> import paddle.vision.transforms as T
        >>> from paddle.distributed.fleet import auto
        >>> from paddle.vision.datasets import MNIST

        >>> transform = T.Compose([
        ...     T.Transpose(),
        ...     T.Normalize([127.5], [127.5])
        >>> ])
        >>> train_dataset = MNIST(mode='train', transform=transform)
        >>> valid_dataset = MNIST(mode='test', transform=transform)

        >>> model = paddle.vision.models.LeNet()
        >>> loss = paddle.nn.CrossEntropyLoss()
        >>> optimizer = paddle.optimizer.Adam(
        ...     learning_rate=0.001, parameters=model.parameters())
        >>> metrics = paddle.metric.Accuracy(topk=(1, 2))

        >>> engine = auto.Engine(model, loss, optimizer, metrics)
        >>> # fit
        >>> engine.fit(train_dataset,
        ...            epochs=2,
        ...            batch_size=64)
        >>> # evaluate
        >>> engine.evaluate(valid_dataset,
        ...                 batch_size=64)
        >>> # predict
        >>> engine.predict(valid_dataset,
        ...                batch_size=64)
        >>> # save
        >>> engine.save("./my_model")
        >>> # load
        >>> engine.load("./my_model")

Nc                   U(       aD  [        U[        R                  R                  5      (       d  [	        U5      (       d  [        S5      eXl        U(       d  S O)UR                  5        Vs/ s H  owR                  PM     snU l	        U(       aJ  [        U[        R                  R                  [        45      (       d  [	        U5      (       d  [        S5      eX l        U(       a4  [        U[        R                  R                  5      (       d  [        S5      e/ nUb  UR                  br  [        UR                  5      S:  aY  [        UR                  S   [        5      (       d7  UR                   H'  nUR                   (       a  M  UR#                  U5        M)     U Vs/ s H  owR                  PM     snU l        [&        R(                  " U5      U l        U=(       d    / n[&        R,                  " U5       HC  n	U	(       d  M  [        U	[.        5      (       a  M#  [        U	R0                  R2                   S35      e   [&        R,                  " U5      U l        U(       a   [        U[6        5      (       d  [        S5      eU(       a   [        U[8        5      (       d  [        S5      eU=(       d
    [9        5       U l        [=        [>        R@                  5      U l!        0 U l"        0 U l#        0 U l$        0 U l%        0 U l&        0 U l'        / U l(        / U l)        [T        RV                  " U R*                  5      U l,        S U l-        [        R\                  R_                  5       U l0        [        R\                  Rc                  5       U l2        [g        5       U l4        [j        Rl                  " 5       U l7        [j        Rp                  " 5       U l9        [u        5       U l;        0 U l<        0 U l=        SSSS	.U l>        SSSS	.U l?        / U l@        / U lA        / U lB        / U lC        / U lD        S U lE        SU lF        SU lG        S U lH        SU lI        U R:                  R                  U lK        S
U lL        S U lM        [        R                  R                  R                  S5      S   U lQ        U R:                  R                  R                  (       a&  U R:                  R                  R                  U lL        OJU R:                  R                  R                  (       a%  U R:                  R                  R                  U lL        U R:                  R                  R                  (       aE  U R:                  R                  R                  S:X  a!  [        R                  " S5      S:w  d   S5       eS U lZ        [        R                  R                  SS
05        [        R                  R                  SS
05        [        R                  R                  SS 5      n
U
c!  [        R                  R                  SS
05        SU l^        S U l_        U R                  (       d  S U l`        U(       a  XPla        OS n[        R                  " S5      (       aH   [        R                  " S5      n[        US5       n[        R                  " U5      U l`        S S S 5        O:[        R                  " S5      (       a  [        [        R                  " S5      5      n[        U R                  U5      U la        U R                  c  [        S5      eg g s  snf s  snf ! , (       d  f       NM= f! [         a,  nU RB                  R                  S5        S U l`         S nANS nAff = f)NzI'model must be sub classes of `paddle.nn.Layer` or any callable function.zW'loss' must be sub classes of `paddle.nn.Layer` or any callable function or a Variable.z@'optimizer' must be object of class `paddle.optimizer.Optimizer`r   z is not sub class of MetriczP'cluster' must be the object or class `paddle.distributed.auto_parallel.Cluster`zN'strategy' must be object of class `paddle.distributed.auto_parallel.Strategy`FrJ   r#   FLAGS_enable_pir_api1F1BCUDA_MODULE_LOADINGLAZYz<EXP_CUDA_MODULE_LOADING_LAZY not supported in 1F1B pipeline.!FLAGS_new_executor_sequential_runFLAGS_new_executor_static_buildFLAGS_enable_pir_in_executorPADDLE_AUTO_PARALLEL_CONFIGrzILoad json failed, please check json file, engine will run default config.PADDLE_AUTO_CLUSTER)i
isinstancepaddlennrF   callable	TypeError_model
parametersname_parameter_listr   _loss	optimizerrG   lendictstop_gradientappend_parameter_name_list
auto_utilsvalidate_opt
_optimizerto_listr   	__class____name___metricsr%   r"   	_strategyr   loggingINFO_logger_fwd_dist_contexts_fwd_main_progs_startup_progs_pir_dist_main_progs_pir_dist_startup_progs_pir_dense_main_progs_pir_fetch_values_pir_user_defined_fetch_namescopydeepcopy_orig_optimizer	_executordistributedget_rank	_cur_rankget_world_size_nranksr.   _saverr   default_main_program_orig_main_progdefault_startup_program_orig_startup_progr*   _orig_dist_context_dist_contexts	_planners_has_prepared_has_prepared_reader_inputs_spec_labels_spec_inputs_labels_losses_mode_skip_build_outside_dataloader_planned_mode_dygraph_modetuning_tuning
_acc_steps	_job_planbase	framework	get_flags_in_pir_modegradient_mergeenablek_stepspipelineaccumulate_stepsschedule_modeosgetenvhistory	set_flagsenvirongetenable_job_schedule_profilerfused_ffn_qkv_json_config_clusteropenjsonload	Exceptioninfointr&   )selfmodellossrg   metricsclusterstrategyp
param_listmetricis_pir_modeauto_configpathfes                  n/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/distributed/auto_parallel/static/engine.py__init__Engine.__init__   s    ufiioo66UOO[  DE4D4D4F#G4FqFF4F#G 	
 tfiioox%@AATNNi  
Z''
 
 R  
 %%1I--.2y88;TBB..%%a( / 6@$@ZVVZ$@!$11)<-R ((1Fvj88''0011LM  2
 #**73:gw77b  Jx::`  "/XZ!',,/ #%! $&!')$%'"!#-/*#}}T__=++446))88:&(%::<"("@"@"B"A"C ',eN%
!
 
 #( !"~~,,"KK11;;"

 " >>((//"nn;;CCDO^^$$++"nn55FFDO NN##**''55?9923v= N= ""$G#KL""$Eq#IJjjnn%CTJ&&(F'JK,1)!    $D '"99:;;1!yy)FG!$_04		!D- -_ yy!677&)"))4I*J&K 3%%{! }}$f  %/ !} $H< %AR -_$ 1))g -1))	1sB   -\7$\<"] #]?] 
]] ] 
^"^^c                  ^ / n/ n[        U" 5       5      n[        US5      (       a  UR                  nOUR                  R                  n[        US5      (       a  UR	                  S5        [        U[        5      (       aS  [        UR                  5       5      n[        U5      S:  a  UR                  5       nUnO[        S[        U5       S35      e[        U[        [        45      (       a:  [        U5      S:  a  UR                  5       nUnO0[        S[        U5       S35      e[        S[        U5       S35      e[        U[        [        45      (       d  [        R                   " U5      n[        R                   " U5      nU4S	 jmT" U5      nUbQ  [#        U5       HB  u  pU	c   S
5       eS[%        U5      -   n
UR'                  [(        R*                  " X5      5        MD     UbQ  [#        U5       HB  u  pU	c   S
5       eS[%        U5      -   n
UR'                  [(        R*                  " X5      5        MD     U R-                  U5      nU R-                  U5      nX#4$ )Nbatch_sampler	set_epochr   r   z6Data should be a dict at least two keys, but received .z@Data should be a dict or list at list two element, but received z,Data should be a dict or list, but received c                   > / nU  HH  n[        U[        [        45      (       a  UR                  T" U5      5        M7  UR	                  U5        MJ     U$ N)r]   listtupleextendrk   )nested_list	flat_listitemflatten_lists      r   r   ?Engine._prepare_data_spec_from_dataloader.<locals>.flatten_listt  sL    I#dT5M22$$\$%78$$T*	 $
     Receive None input.inputlabel)nexthasattrr   _dataloaderr   r]   ri   r   valuesrh   pop
ValueErrorr   ra   typerm   rp   	enumeratestrrk   r+   from_dtensor_validate_spec)r   
dataloaderinputs_speclabels_specdatar   labelsinputsir   rd   r   s              @r   "_prepare_data_spec_from_dataloader)Engine._prepare_data_spec_from_dataloaderN  s<   JL!://&44M&22@@M=+.. ##A&dD!!&D4yA~ LSQUYKWXY  tUm,,4yA~ VWZ[_W`Vaabc  >tDzl!L  &4-00''/F##F+	 f%$V,'>)>>'Q'""(55dA - $V,'>)>>'Q'""(55dA - ))+6))+6''r   c                p  ^^ / n/ n[        U[        R                  R                  5      (       a8  Uc  [	        [        U5      5      u  pgO[	        [        U5      5      nUS U nXS  nOe[        U[        R                  R                  5      (       a  Uc  US   u  pgO1US   nUS U nXS  nO"[        S[        U5      R                   S35      e[        R                  " U5      n[        R                  " U5      nU R                  R                  R                  mS mUU4S jn	Ub5  [        U5       H&  u  pUc   S5       eS[!        U
5      -   nU	" XX45        M(     Ub5  [        U5       H&  u  pUc   S5       eS[!        U
5      -   nU	" XX55        M(     U R#                  U5      nU R#                  U5      nXE4$ )	Nr   z:Data should be a Dataset or IterableDataset, but received r   c                    U S:  a:  [        UR                  5      S:  a   UR                  S   U -  UR                  S'   g g g Nr#   r   )rh   shape)
num_shardsspecs     r   _adjust_item_spec4Engine._prepare_data_spec.<locals>._adjust_item_spec  s9    A~#djj/A"5 $

1
 :

1 #6~r   c                  > [        U [        R                  5      (       aU  [        R                  " X5      nUc  T" TU5        UR                  U5        g UR                  UR                  U5      5        g [        U [        [        R                  R                  45      (       aU  [        R                  " X5      nT" TU5        Uc  UR                  U5        g UR                  UR                  U5      5        g [        U [        R                  5      (       a'  UR                  [        U/[        U 5      U5      5        g [        S[        U 5      R                    35      e)NzWThe sample's dtype returned of dataset should be number, np.ndarray or Tensor, but got )r]   npndarrayr   
from_numpyrk   batchr   r   eagerrA   from_tensornumbersNumberr   ra   rr   )r   rd   
batch_sizespecsr   r   r   s        r   _infer_item_spec3Engine._prepare_data_spec.<locals>._infer_item_spec  s   $

++ ++D7%%j$7LL&LLJ!78D8TZZ->->"?@@ ,,T8!*d3%LL&LLJ!78D'..11Y
|T$ZFGmnrswnx  oB  oB  nC  D r   r   r   r   )r]   r^   ioIterableDatasetr   iterrD   ra   r   rr   rm   rp   rt   datasetr   r   r   r   )r   r   splitr  r   r   r   r   sampler  r   r   rd   r   r   s                @@r   _prepare_data_specEngine._prepare_data_spec  s   dFII5566}!%d4j!1d4j)fii//00}!%aaLTRVZM`M`Laabc  ##F+##F+^^++66
	;	, $V,'>)>>'Q' ZE - $V,'>)>>'Q' ZE -
 ))+6))+6''r   c                   [        5       (       d  U R                  (       a  [        S5      eU(       a  [        U[        5      (       d   S[        U5       35       e[        U[        5      (       d   S[        U5       35       e[        U5      [        U5      :X  d   S5       e[        X5       HF  u  pVUR                  UR                  :w  d  M!  UR                  R                  UR                  5        MH     U(       a  [        U[        5      (       d   S[        U5       35       e[        U[        5      (       d   S[        U5       35       e[        U5      [        U5      :X  d   S5       e[        X$5       HF  u  pxUR                  UR                  :w  d  M!  UR                  R                  UR                  5        MH     X44$ )NzOnly support static graph mode.z$inputs should be list, but received z:the number of `inputs_spec` should be equal to `inputs`'s.z$labels should be list, but received z:the number of `labels_spec` should be equal to `labels`'s.)r   r   r   r]   r   r   rh   zipr   desc	set_shape)	r   r   r   r   r   
input_specr   
label_specr   s	            r   _prepare_data_tensorEngine._prepare_data_tensor  s    2 2>??k400 6tK7H6IJ0 fd++ 6tF|nE+ {#s6{2 L2 &)%=!
##u{{2JJ(()9)9: &> k400 6tK7H6IJ0 fd++ 6tF|nE+ {#s6{2 L2 &)%=!
##u{{2JJ(()9)9: &> ~r   c                   U R                   U R                     nUR                  U R                     nUR	                  5       n/ SQnUR
                  S   R                  S:X  aJ  [        [        U5      5       H2  nUR
                  S   R                  U;   d  M"  UR                  SSS9  M4     UR                  5         / n[        UR
                  5       H(  u  pU	R                  U;   d  M  UR                  U5        M*     / n
/ n[        U5       H  nUR                  R                  5       nUR!                  UR
                  U   R                  5        U
R                  U5        [#        XLUR                  5       S9nUR                  U5        [%        U5      nUR'                  U5        M     U H  nUR
                  R)                  SU5        M!     [        [        U5      5       H  nXv==   [        U5      -  ss'   M     [        U5       H<  nUR
                  R+                  U5      n	UR                  R                  XS-   5        M>     UR                  5         SU R,                  U R                  '   g )	N)create_py_readercreate_double_buffer_readerreadr   r  F)sync)r   r#   T)r   r   dist_main_programsr   global_blockopsr   rangerh   
_remove_op_sync_with_cppr   rk   reversedr  _prepend_op	copy_fromr   r-   add_dist_op_for_programinsertr   r   )r   	feed_listdist_contextdist_main_progdist_main_blockrelated_reader_opsr   reader_op_indicesidxopread_ops_descnew_reader_opsnew_op_descnew_opdist_ops                  r   _prepare_readerEngine._prepare_reader  s.   **4::6%88H(557
 q!&&*<<3123"&&q)..2DD#..qu.= 4 	&&( !4!45GCww,,!((- 6
 -.C)..::<K!!/"5"5c":"?"?@  -;3C3C3EF !!&))&1G009 / %F&&q&1 %s,-.A C(9$::  / -.C $$((-B  ++Cq9 / 	&&(04!!$**-r   c                   0 nUb  [        U[        [        45      (       aV  [        U5      S:X  a9  [        US   [        5      (       a!  US   R                  5        H	  u  pVXdU'   M     OO[        SU 35      e[        U[        5      (       a  UR                  5        H	  u  pVXdU'   M     O[        SU 35      eUbO  [        U[        5      (       d   S[        U5      R                   35       eUR                  5        H	  u  pQXU'   M     U$ )Nr#   r   zUnsupported data z'user_feeds must be a dict, but receive )	r]   r   r   rh   ri   itemsr   r   rr   )r   r   
user_feedsmodefeedsrd   values          r   _prepare_feedEngine._prepare_feed&  s    $u..t9>ja$&?&?'+Aw}}&+d (7 %'8%?@@D$''#'::<KD"'$K $0 !#4TF!;<<!j$// 9$z:J:S:S9TU/ )..0
"d 1r   c                z  ^ ^^ Ub2  [        U[        5      (       d   S[        U5      R                   35       e/ m/ mT R                  (       a  TT4$ UUU 4S jnT R
                  U   nUR                  nUS:w  a  U" SUS   5        US:w  a.  US   n[        U5       H  u  pxU" S[        U5      -   U5        M     US:X  a  U" SUS   5        U=(       d    /  H  n	[        U	5      n
[        U
5        M     [        [        R                  5       Vs/ s H  oS   PM	     nnU=(       d    / nU" S	U5        TT4$ s  snf )
Nz)user_fetches must be a list, but receive c                   > / nU H]  nTR                  U5      (       d  M  [        U5      nUT;  a  TR                  U5        UR                  TR                  U5      5        M_     TR                  U5        g r   )_is_local_varr
   rk   index)
group_namevar_listgroup_indicesvarvar_namefetch_indicesfetch_namesr   s        r   _process_fetch_group3Engine._prepare_fetch.<locals>._process_fetch_groupH  sl    M%%c**+C0H{2#**84!(():):8)DE     /r   rM   r   r   metrics_outputsr#   fetches)r]   r   r   rr   r   r   serial_fetch_varsr   r   r
   r   r    r   FETCHES)r   user_fetchesr9  rI  r(  
fetch_varsr   r   rC  	usr_fetchrF  r   user_fetches_collectionrG  rH  s   `            @@r   _prepare_fetchEngine._prepare_fetch<  sV   #lD11 ;D<N<W<W;XY1  --		0 **40!33
9 F);<9 +G(1$Z#a&%8(C  29 Jy,AB%++I#I.H(O , !//F/F G#
 GG G 	  #
 +0bY1M))#
s   D8c                $   0 nUb  X(S'   Ub  US-   US'   Ub  XHS'   Sn	US:w  a  Xi   n
[        U
5      S::  d   eU
 H
  nX   US'   M     U	S-  n	U R                  U   nUR                  S   nU(       a  U R                   H  nXi   n/ nU H  nUR	                  X   5        M     U(       a]  UR
                  " U6   UR                  5       n[        [        R                  " U5      5       H  u  nnUXR                  5       U   '   M     U	S-  n	M     O)US:X  a#  Xi   n0 nU H  nX   US	U 3'   M     UUS
'   U	S-  n	[        [        R                  5      n0 nU H/  u  nnUU;   d  M  UR                  U5      nX   UU=(       d    U'   M1     UUS'   U$ )Nepochr#   steplrr   rM   r   r   outrL  rM  )rh   r   rN  rs   rk   update
accumulater   rm   rp   rd   r    r   rO  rA  )r   outsrW  rX  rY  rH  rG  r9  logs	group_idxloss_indicesr-  r(  metric_varsr   metrics_indices
metric_outresultsr   resoutputs_indiceslogs_outcollect_fetches
logs_fetchrd   rF  s                             r   _prepare_loggerEngine._prepare_loggerg  s    !M!8DL>J	9(3L|$)))##yV $NI..t4L&88CK"mmF&3&>O!#J."))$)4  /!z2"("3"3"5&/
0B0B70K&LFAs58Dq!12 'MNI , Y+6OH&(,	3se% '&DONI()@)@A
-ND(;&!''1/3y
4+8, . %Yr   c                "   U R                   U   nU R                  U   n[        R                  " S5      S;   a  [	        UUU R
                  SS9U l        U R                  R                  5        H  nU R                  U    H  nUR                  5        Hv  u  pgU R                  R                  US   R                  5      nU R                  R                  X5        U H(  n	U R                  R                  U	R                  5        M*     Mx     M     M     Sn
SnSnUR                  5       n[        U5        U R                   R"                  R$                  (       a   ['        S0 5      nUR)                  U/U/5        [+        UR-                  5       [.        R0                  5        U R                   R2                  R4                  (       aD  U R                   R2                  R6                  S:X  a   [9        XU R                   R2                  5        U R                   R"                  R:                  (       a1  0 nU R
                  US	'   ['        S
U5      nUR)                  U/U/5        U R                   R2                  R<                  (       ak  0 nU R
                  US	'   U R                   R2                  US'   ['        SU5      nUR?                  X5      nU H  nU R                  RA                  U5        M      US:X  Ga[  U RB                  (       GaI  U RD                  (       Ga7  URG                  U RH                  S   5      nURK                  5       (       Ga  [L        RN                  " X5         U R                   RP                  R4                  (       Ga  U R                   RP                  RR                  RU                  5       U R                   RP                  l)        [V        RL                  RP                  RX                  R[                  U R                   RP                  R\                  U R                   RP                  R^                  U R                   RP                  R`                  S9nSU RD                  l1        U R                   Vs/ s H  nURe                  U5      PM     nn[V        RL                  RP                  RX                  Rg                  U RD                  UU R                   RP                  RR                  U R                   RP                  R`                  U R                   RP                  Rh                  U R                   RP                  Rj                  U R                   RP                  Rl                  U R                   RP                  Rn                  U R                   RP                  Rp                  U R                   RP                  Rr                  U R                   RP                  Rt                  U R                   RP                  Rv                  U R                   RP                  Rx                  S9U l"        [{        U[.        R0                  5         [V        RP                  R}                  U R                   RP                  Rh                  U R                   RP                  Rn                  U R                   RP                  Rp                  U R                   RP                  Rj                  U R                   RP                  Rl                  U R                   RP                  Rr                  U R                   RP                  R4                  =(       a#    U R                   RP                  R`                  S:g  S9nUR                  U5      nSSS5        WR                  U RD                  WUS9u  nnO[{        U[.        R                  5         [V        R                  R                  R                  U5      nSSS5        [{        U[.        R                  5         U RD                  R                  UUWS9  SSS5        SSS5        OU R                  R                  S5        [        U5        US:X  a}  U R                   R                  R4                  (       aX  [        R                  " U R                   R                  R                  5       5      n['        SU5      nUR)                  U/U/5        U R                   R                  S:X  a  OU R                   R                  S:X  d    [        U5        US:X  a%  U RB                  (       a  U RD                  (       a  WnO/ n/ n[        R                  " UU5        UR-                  5       R                  5       nU R                  R                  U5        [        R                  " XU5        U R                   R2                  R<                  (       a  WR                  XU5      nU R                   R                  R4                  (       a   U R                   R                  R4                  (       a    US:X  a  U R                   R2                  R4                  (       ao  SU R                   R                  l        U R                   R2                  R                  U R                   R                  l]        SU R                   R                  l^        US:X  a  U R                   R                  R4                  (       ab  [        R                  " U R                   R                  R                  5       5      nSUS'   UUS'   ['        SU5      nUR)                  U/U/5        U R                   R2                  R4                  (       a/  U R                   R2                  R6                  S:X  a  [        U5        UR                  5       n[V        R                  R                  R                  R                  U5        [        U5        [        R                  " 5       (       aJ  [        R                  " S5        [        R                  " 5          [        R                  " U5        SSS5        [        R                  " 5       (       a.  [        R                  " S5        [        R                  " US  5      nU R                   R                  R                  b  S!U R                   R                  R                  ;   ag  [        R                  " 5       n U R                  S!0 5        U R                  U5        U R                   R                  R                  R                  S!5        U R                   R2                  R4                  (       a*  [        U/U/U R                   R2                  5      U ls        GO^US:X  GaW  U R                   R                  R4                  (       Ga1  [        U5      n!/ S"Qn"/ n#[        U R                   R                  R                  5       Hs  n$[        R                  " S#5      n%U%R                  U$5        U#RA                  U%5        [        R                  " S$5      n&U&R                  U$5        U#RA                  U&5        Mu     [        R                  " S%5      n'U'R                  S5        U#RA                  U'5        [        U R                   R                  R                  U"U!U#5      n([        R                  " U#U(5      U ls        U R                   R                  R                  GbI  U R                   R                  R                  (       Ga#  [        R                  " 5       n U R                   R                  R                   H~  n)U)S&:X  ac  SS'KzJ{n*  U R                  c
  U*" U5        M&  U R                  R                  5        H&  n+U R                  R                  U+5      n,U*" U,5        M(     Ml  U R                  U)0 5        M     U R                  c  U R                  U5        OMU R                  R                  5        H/  n+U R                  R                  U+5      n,U R                  U,5        M1     [        U5        UU R                  U'   XR                  U'   X0GR                   U'   gs  snf ! , (       d  f       G	N<= f! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       GN= f)(aQ  A concise and light weight parallel transform for auto parallel in pir mode.
Its logic consist of Four parts:
    1. Complete program: build a completion program with forward-backward-optimizer from a forward program. (if in train mode, maybe re-placed.)
    2. Parallelism completion: rule-based entire-graph sharding propagation(Semi-Auto) Or algorithm/random-based parallel search(Fully-Auto).
    3. Graph partition: Partition(Pipeline-like parallel) and Reshard Pass(SPMD parallel).
    4. Parallel related Optimization Pass. (maybe re-placed.)

It is experimental and subject to change.
FLAGS_enable_fused_ffn_qkv_passTruetrue1allr9  r   #replace_with_parallel_cross_entropyVPPconcrete_programauto_parallel_c_embedding_passpipeline_strategy auto_parallel_sync_shared_paramsrK   )custom_white_listcustom_black_listdtypeF)rg   	amp_listslevelr}  init_loss_scalingincr_every_n_stepsdecr_every_n_nan_or_inf
incr_ratio
decr_ratiouse_dynamic_loss_scalinguse_amp_guarduse_master_graduse_promotebfloat16)r  r  r  r  r  r  r   N)parameter_list)params_gradsz.loss value is not found, skip append backward.auto_parallel_recompute_pirz	semi-autorandomTgradient_sync_after_accumulater  !auto_parallel_gradient_merge_passz apply decompose in auto parallelz%apply auto_recompute in auto parallelc                b    [        U R                  S5      =(       a    U R                  S:H  5      $ )Nop_roler   )boolhas_attrr  )r.  s    r   <lambda>&Engine._parallel_pir.<locals>.<lambda>  s    4I 6 J2::?Kr   fused_gemm_epilogue_pass)forwardbackwardoptimizer  r  r  eliminate_transpose)eliminate_transpose_by_reshape)ry   rz   r   r   r7   rw  r   keysr7  rl   rA  rd   r&  removecloner0   rt   mp_optimizationru  r   applyr=   r  r   Forwardr   r   r   r6   replace_with_c_embeddingrz  sync_shared_parametersrk   rf   ro   get_output_value_by_name_loss_namesinitializedr   program_guardampr  upperr^   	decoratorAutoMixedPrecisionListsr{  r|  r}  _sortedget_parameter_value_by_nameOptimizerWithMixedPrecisionr  r  r  r  r  r  use_fp16_guardr  r  r   
GradScalerscaleminimizeBackwardautogradir_backwardappend_backwardOptimize_apply_optimizerw   r   	recomputer   r   to_dict	auto_moder   formatr4   r3   apply_reshard_passall_parametersprogram_helpercache_whole_graph_dist_attrr2   	apply_allsync_shared_parameter_gradientdp_optimizationsp_optimizationr   r   r   avgr5   r   	libpaddler   apply_dist2dense_passr9   r   _enable_dist_prim_allru   r   
prim_guarddecompose_dist_program_enable_auto_recomputeauto_recompute_pir_programfused_passesfused_passes_listPassManageradd_passrunr8   r   r   r  Jobset_micro_batch_idr   Plan0paddle.distributed.auto_parallel.static.pir_passr  	job_types
ir_programr}   r{   r|   )-r   r9  mix_fw_programstartup_programkfusionafter_fuse_namebefore_fuse_paramsrA  before_fuse_paramforward_op_start_idxbackward_op_start_idxopt_op_start_idxdist_program6auto_parallel_replace_with_parallel_cross_entropy_passconfigrx  %auto_parallel_sync_shared_params_passshared_paramspnamer   r~  parameter_value_listscalerscaledoptimizer_opsr   auto_parallel_recompute_pir_passglobal_params_grads
all_paramsr  dense_programpmsub_programsr  jobsr   forward_jobbackward_jobopt_jobtype_to_programr   r  job_typer  s-                                                r   _parallel_pirEngine._parallel_pir  s    --d3--d3 9967 <
 

 "=%%	"D '',,."003F?E||~; $ 9 9 ? ?.q166! 1188P1C- 55<< 1 6 6 2D @N 4 /  ! " &++-L)>>))MMEM5rFB CHH 1 	L557HNN##**''55>t~~/F/F >>))BBF)-)>)>F%&-50&.* +00 1 >>""CCF)-)>)>F%&*...*A*AF&'4<2F51 6LL  
 '))007 ' 7?tzzzdooo889I9I!9LMD!!)),H~~))000 NN..44::< **0 %+MM$5$5$?$?$W$W.2nn.@.@.R.R.2nn.@.@.R.R"&.."4"4":": %X %	
 38/ *.)B)B0)B )DDUK)B - 0
 +1--*;*;*E*E*a*a&*oo&/"&.."4"4":":"&.."4"4":":.2nn.@.@.R.R/3~~/A/A/T/T48NN4F4F4^4^'+~~'9'9'D'D'+~~'9'9'D'D59^^5G5G5`5`*...*<*<*K*K,0NN,>,>,N,N(,(:(:(F(F +b + 3(&.. &,ZZ%:%:26..2D2D2V2V+/>>+=+=+H+H+/>>+=+=+H+H37>>3E3E3X3X8<8J8J8b8b9=9K9K9d9d'+~~'9'9'@'@ (K$(NN$6$6$<$<
$J &; 	&F &,\\$%7F 7=oo OO"+? 7F 73| 3(&// !' ; ; K K$(!" ) 3(&// !OO;; $oL < y IHF !!D
 	L)7?t~~77>>]]4>>#;#;#C#C#EFF/7-v0, -22 1 >>##{2 ^^%%1]  	\*7?tzzdoo"."$L
 	((7JK
 "..0??A
77
C|lK>>""CC"G"f"f/B# >>))00 >>))00
 7?t~~66==37DNN))0''88 NN))1 15DNN))-7?t~~<<CC]]4>>#@#@#H#H#JKF7;F34%8F>"083V1- .33 1
 NN##**''55><( %**,!!77F$]3%%''LL;<""$--m< % &&((LL@A"==KM NN''99E*~~**<<= "BKK2B7FF=!NN''99@@* >>""))*-$..2I2IDN W_!>!>!E!E!EHL <I D4>>88@@A"hhy1..q1K(#xx
3//2L) B hhz*G&&q)KK .--55	O "YYt_=DN NN''99E++==="B^^00BB-- ~~-6}E(,(@(@(BH)-)B)B8)LJ::F )C
 KK2& C" ~~%}% $ 8 8 :H!%!:!:8!DJFF:& !; 	%]3+8""4(*6!!$'-<$$T*E0( *  y IHj %$s   'DAC*AB/F2AC*D1AB4?AAC* *AC*"AC*AC(AC*AC<B/AC*B4
AC	B>AC*C
AC	CAC*C
AC'	C"AC*C*
AC9C<
ADc           	        U R                   (       Ga2  [        R                  R                  U R                  R                  R
                  U R                  R                  R                  U R                  R                  R                  U R                  R                  R                  U R                  R                  R                  U R                  R                  R                  S9   U R                  U5        S S S 5        U R                  U5        U R                  5         U R                  X5        SU R                  U'   g U R                  U5        U R!                  U5        U R#                  U5        U R                  5         U R                  X5        U R%                  U5        SU R                  U'   g ! , (       d  f       N= f)N)r   r{  r|  r  r}  r  T)r   r^   r  	auto_castrt   r   r{  r|  r  r}  r  _buildr  
_init_comm_initializer   _plan	_parallel
_mark_prim)r   r9  init_parameterss      r   _prepare_programEngine._prepare_program  sJ   %%~~))00"&.."4"4"F"F"&.."4"4"F"Fnn((..nn((.. NN..:: &  D! t$OOT3'+Dt$ 	D

4t/#'4 = s   )F::
Gc                |   [        U R                  S   [        5      (       a  S n[        [	        U R
                  5      5       H)  nU R
                  U   nU R                  U   nU" X45        M+     [        [	        U R                  5      5       H)  nU R                  U   nU R                  U   nU" X45        M+     g g )Nr   c                (   [        U 5      nUR                  UR                  l        UR                  UR                  l        UR                  R                  S5        UR                  R                  S5        [        5       nUR                  U5        g )Nprocess_meshdims_mapping)r!   mesh	dist_attrr  r  mark_annotatedr*   add_dist_tensor_for_program)	input_varr  dist_tensordefault_dist_ctxs       r   _create_dist_input_var@Engine._process_dist_input_specs.<locals>._create_dist_input_var2  sq    /	:5?__%%25?5L5L%%2%%44^D%%44^D#B#D  <<[Ir   )r]   r   r+   r  rh   r   r   r   )r   r  rA  r  r  s        r   _process_dist_input_specs Engine._process_dist_input_specs/  s    d''*,@AAJ s4<<01 LL/	!..u5
&y= 2
 s4<<01 LL/	!..u5
&y= 2# Br   c                   [        5       (       d  U R                  (       Ga  [        R                  " 5         SU l        U R                  R                  S5        [        U R                  U R                  U R                  U R                  U R                  5      U l        [        R                  R                  5          U R                  R!                  U5        S S S 5        U R                  R"                  U l        U R                  R$                  nU R                  R&                  nU R                  R(                  U l        U R                  R,                  U l        U R                  R0                  nU R                  R2                  U l        U R                  R6                  U l        U R                  R:                  n[        R<                  " 5         GOU R>                  RA                  US 5      nUb  g / n/ n/ U l        U RB                  RE                  5       nU RF                  RE                  5       nU RH                  (       Gd  [J        RL                  " X#5         [        R                  R                  5          U R                   Vs/ s H  owRO                  5       PM     snU l        U R                   Vs/ s H  owRO                  5       PM     snU l        [P        RR                  " U R                  " U R*                  6 5      nUS:w  a  U R                  (       a  [U        U R                  [        RV                  RX                  5      (       d!  [[        U R                  5      (       d   S5       e[P        RR                  " U R                  " X@R.                  -   6 5      U l        US:w  ai  U(       d  U R.                  (       aQ  U R                   HA  nUR]                  [P        RR                  " UR^                  " X@R.                  -   6 5      5        MC     S S S 5        S S S 5        OQUS:X  aK  [U        U R                  [`        5      (       d   S5       e[P        RR                  " U R                  5      U l        U Rb                  (       a  X Rd                  U'   X0Rf                  U'   g [i        5       n	U	Rj                  (       d  [m        [o        [q        U Rr                  5      5      5        SU	l:        U R*                   V
s/ s H  n
[P        Rv                  " U
5      PM     sn
U l        U R.                   V
s/ s H  n
[P        Rv                  " U
5      PM     sn
U l        U R*                  U R.                  S.n[        R                  Ry                  U5      U R4                  US.nUS:w  a  URE                  SS	9n[P        Rz                  " U R                  U R4                  U R|                  U5        [        UUU R                  U R4                  UUU R                  U R|                  U R                  5	      U R>                  U'   [        UUU R                  U R4                  UUU R                  U R|                  U R                  5	      U R                  U'   U R|                  R                  U R>                  U   lD        U R|                  R                  U R>                  U   lE        URE                  5       U Rd                  U'   g ! , (       d  f       GN.= fs  snf s  snf ! , (       d  f       GN= f! , (       d  f       GN= fs  sn
f s  sn
f )
NTz'Building model with 'to_static' method.rM   zothe type of `loss` of the Engine arguments should be sub classes of `paddle.nn.Layer` or any callable function.rK   z>the type of `loss` of the Engine arguments should be Variable.)r   r   )rL  r   r   for_test)Fr   r   r^   disable_staticrw   r   r/   rb   rf   rs   r   r   r  r	   unique_nameguardbuild_programrw  main_programr  
input_varsr   
label_varsr   output_vars	loss_varsr   
loss_namesr  ra  enable_staticr   r   r   r  r   r   r   r  _create_feed_layerrm   rp   r]   r_   rF   r`   rk   computer   r   ry   rz   r*   has_annotationr<   r   r  r   data_parallelset_data_parallelflattenset_recompute_segmentsrt   r)   ro   r   r   rx   gradient_scale"gradient_scale_using_allreduce_avg)r   r9  serial_main_progserial_startup_progrL  r   r(  sr   default_ctxrE  	feed_varsrQ  s                r   r  Engine._buildF  s    2 2 2!!#!%DLLGH"/

!!!!#D ""((*##11$7 + %)$7$7$H$HD!#22??"&"5"5"E"E..99DL..99DL))55G..88DL#22==D))55G  "  ..224>L'GGDL#3399;"&"9"9"?"?"A###(()9O%%++- 9=8I8I$8I1,,.8I$DL 9=8I8I$8I1,,.8I$DL )00dll1KLGy(TZZ) JJ		   %djj11 N  2
 (2'9'9 JJ<<)?A( y(g&*mmF#NN * 2 2$*NNW||5K$M!" '4- . PO: !$**h77 T7  *11$**=  *:  &(;%57)) d5#678(,K%=A\\=Ic
,,S1\DL >B\\=Ic
,,S1\DL  $||t||D	 ||++G4LL

 7?/55t5D))KKt~~7G	
 %7OOLLMMNN
%
D! );OOLLMMNN
)
% 48>>3P3PD!0 NN== 		

, &6%;%;%=T"S +*@$$ .- POvs[   =\ \.=\\%\;\E\\.> ] 4 ] 
\
\
\+	&\..
\=c           	     .   U R                   R                  (       d  [        S5      eUS:X  d   eU R                  U5        U R	                  U5        U R
                  Ul        U R                  Ul        SSK	J
n  U" U R                  U   UU R                  U R                  UU R                  S9U l        U R                  R!                  5         U R                   R"                  (       a-  U R                  R%                  5       U R                  U   l        g g )Nz Please set `tuning.enable=True`.rK   r#   )OptimizationTuner)r  rank)r   r   r   r  r  _dp_world_sizesdp_world_size	_dp_ranksdp_ranktuner.optimization_tunerr2  r   r   r   r   _optimization_tunertunerun_after_tuningget_best_configrt   )r   r9  r
  r  r2  s        r   _optimization_tuningEngine._optimization_tuning  s    ||""?@@wD

4 $ 4 4..?#4%!$
  	  %%'<<(( 22BBD 	 )r   c                   U R                   c  Xl         O+U R                  R                  S:w  a  U R                  U5        [	        XR
                  U   5      U R                  U'   U R                  U   R                  5         U R
                  U   R                  S   nU R
                  U   R                  S   nU R
                  U   R                  R                  5       n/ nX#-    HG  nUR                  UR                  ;   d  M  UR                  UR                  UR                     5        MI     [        U S/ 5      U l        [        U S/ 5      U l        US;   d"  U R                  (       d  U R                   (       d~  / U l        / U l        U Hi  n["        R$                  " U R&                  XpR
                  U   5      u  pU R                  R                  U5        U R                   R                  U	5        Mk     g g g )Nsemir   r   r4  r6  )rL   predice)r   rt   r  _init_dist_contextr:   r   r   planserial_feed_varsserial_main_programr  rd   varsrk   getattrr4  r6  rm   get_input_split_infor   )
r   r9  
inputs_var
labels_varblockr'  rE  feed_varr5  r7  s
             r   r  Engine._plan  s   %!%^^%%/##D)&t-@-@-FGtt!!# ((.??I
((.??I
##D)==JJL	*Cxx5::%  CHH!56 +  't->C {B7&&$$T^^#%D DN%)3)H)HNNH.A.A$.G*& $$++M:%%g. &	 .<$r   c                    [        UU R                  U   R                  U R                  U   5      nU(       d'  UR	                  U R
                  U R                  5        g UR                  U R                  5        g r   )r1   r   	completerr   parallelr   re   parallel_all)r   r9  	all_ranksparallelizers       r   r   Engine._parallel   sd     $NN4 **%

 !!$..$2F2FG%%d&:&:;r   c                   U R                   U   nUR                  nU R                  nU R                   U   nUR                  nUR                  n[	        UR                  5       H  u  p[	        U	R
                  5       Hz  u  pXx   R
                  U
   nUR                  UR                  :X  d(   SU SUR                   SU SUR                   S3	5       eUR                  U5      nUR                  X5        M|     M     g )N'z' mode op 'z' is different with 'z' op 'z'. )	r   _original_serial_main_programr   blocksr   r  r   get_op_dist_attr_for_programset_op_dist_attr_for_program)r   r9  r(  origin_main_progref_moderef_dist_contextref_origin_main_prog
ref_blocksibrK  iopr.  ref_opref_op_dist_attrs                 r   rB  Engine._init_dist_context.  s    **40'EE%%..x8/MM)00
"#3#:#:;IB$UYY/#++C0ww&++- v[	1FxjPVW]WbWbVccfg- %AA&I ! 99"O 0 <r   c                L   U R                   S:  a  U R                  (       a$  [        5       nU H  nUR                  5         M     g [        5       nU R                  R
                  S:X  a!  [        R                  " XR                  5        g U H  nUR                  5         M     g g )Nr#   full_random)	r   r   r;   instantiatert   r  rm   initialize_pg_in_full_moder   )r   all_process_groupsprocess_groups      r   r  Engine._init_commB  s    <<!   &<%="%7M!--/ &8 "8!9~~''=855& &8M!--/ &8% r   c                    S nUR                  5       R                   HE  nUR                  5       S:X  d  M  SUR                  5       S   ;   d  M2  UR                  5       S   n  O   Ub  [	        5       R                  U5      R                  5       n[        U R                  R                  [        5      (       aE  UR                  [        R                  " U R                  R                  5      U R                  5        g g g )N
pd_op.datalearning_raterd   )r  r  rd   attrsr   rE  
get_tensorr]   ro   _learning_ratefloatsetr   float32_place)r   r  lr_namer.  buffer_tensors        r   _init_lrEngine._init_lrX  s    ++-11B	\)#rxxz&'99((*V, 2 (N..w7BBDM$//88%@@!!JJt==> A r   c                .   [        5       U l        [        U R                  [        R                  R
                  5      (       aJ  [        R                  R                  [        R                  R                  5       R                  5      U l        U R                  (       Ga  US:w  a  g U R                  R                  U R                  U   U R                  5        / nU R                  Gc  [        R                  R                  U R                  5      U l        U R                   U   R#                  5       nU R                  U   n0 nUR%                  5       R&                   HP  nUR)                  5       S:X  d  M  UR+                  S5      nX;  d   SU SU S35       eUR-                  S5      Xh'   MR     / n	UR%                  5       n
U
R&                   GH  nUR)                  5       S:X  a  UR+                  S	5      nO(UR)                  5       S
:X  a  UR+                  S5      nOMR  [/        5       R1                  U5      nU(       a  UR3                  5       R5                  5       (       a  UR7                  S5      nUR9                  5       nU
R;                  XR=                  5       5      nSUl        UR3                  5       R                  5       Ul         URC                  U5        U	RE                  U5        U	RE                  U5        GM5  X;   d  GM=  Xh   RF                  nXh   RH                  nUU:w  d  GMb  UR7                  S5      nURH                  U:X  d   eXh   RK                  5       nURM                  5       (       d  [        RN                  RP                  RR                  RU                  URV                  S/[Y        URH                  5      -  0 5      nUR[                  [        RN                  RP                  RR                  R]                  UR=                  5       U5      5        [R        R^                  " UR9                  5       5        [        R`                  Rc                  UU5      nUR>                  (       a  SUl        URE                  U5        URe                  S5      Rg                  U5        GM     U	 H  nURi                  5         M     [k        UR%                  5       [l        Rn                  5        [p        Rr                  " U5        [        RN                  RP                  RR                  Ru                  U5        [w        U5        U H  nSUR7                  S5      l        M     U R                  Ry                  U5        U Rz                  b%  U R                  R}                  U Rz                  5        g U R~                  R                  (       a  [        R                  " U R~                  R                  U R                  S   -   5        [        R                  R                  U R~                  R                  U R                  S   -   5        [        R                  " U R~                  R                  U R                  S   -   5        U R                  U   nUR                  U R                     nU R                  (       Ga-  U R                  R                  UU R                  U5        U R                  (       Ga  [Y        U R                  R                  5       5      S:  Ga  U R                  R                  5        GH  nUR%                  5       R                  UR(                  5      (       d  M4  UR%                  5       R                  UR(                  5      R                  n[/        5       R1                  UR(                  5      n[/        5       R                  UR(                  5      R3                  5       nU(       a  UR5                  5       (       a  M  U[        R                  :X  aA  UR                  [        U R                  UR                  5       5      U R                  5        GM.  U[        R                  :X  aA  UR                  [        R                  " UR                  5       5      U R                  5        GM  UR                  UR                  5       U R                  5        GM     U R                  Gc  [        R                  R                  U R                  5      U l        / nUR                  U R                     nUR                  5        Hc  n[/        5       R1                  UR(                  5      nU(       a%  UR3                  5       R5                  5       (       a  MR  URE                  U5        Me     / nU R                  S:  aO  UR%                  5       R&                   H1  n[        R                  " U5      (       d  M   URE                  U5        M3     UU-   nU(       a,  UR                  U5      n U R                  Ry                  U 5        [        U S5      (       aB  [        U S5      (       a1  U R                  XR                  U R                  U R                  5        U R~                  R                  (       aP  U R                  R                  S5        UR                  U R                     nU R                  Ry                  U5        g g )NrK   rm  rd   z
The value z in z is already existr   zbuiltin.set_parameterparameter_namezbuiltin.shadow_outputoutput_nameTrt  Fr#   _state_dict
_dist_attrz(NOTE: parameters will be re-initialized.)a_get_deviceru  r]   r^   r   	CUDAPlacer   ParallelEnvdev_idr   r  init_pirr{   r   r   Executorrz   r  r  r  rd   str_attrresultr   find_varrp  _is_initializedoperand_sourceget_defining_op	add_kwargr   persistable
place_attrreplace_all_uses_withrk   _local_shaper   r
  is_distr   r  r   create_tensor_dist_attributer  rh   set_typecvt_to_dist_typeset_insertion_point_after_C_ops
reshard_v2operand
set_sourceeraser=   r   r  r3   r  r  r9   r  r   	_set_planrt   seedr6  r   r  r   r  r   r   initrb   buffershas_varrE  r}  r  rs  r   numpyfloat16dist_startup_programs	list_varsr   rm   
is_comm_op_pruner   _set_state_dict_strictr}  r~  reinitrw   r   )!r   r9  r  changed_output_op_liststartup_progr)  name_map_valuer.  rF  del_opsrK  	scope_varparam
initial_op	new_paramlocal_shapeglobal_shape	src_valuedst_dist_attrsrc_dist_attrreshard_vardel_opr(  dist_main_programbuffer	dest_typerw  uninitializeddist_startup_progrE  	commu_opsreserved_vars_and_opsprune_startup_progs!                                    r   r  Engine._initializei  sD   !mdkk6#3#3#=#=>> **44""..077DK w (())$/ &("~~%!'!7!7!D#2248>>@!%!:!:4!@!#(557;;BwwyL0#%;;v#6'= (
$rd:KL= 4699Q<0 < $113))Bwwy$;;#%;;/?#@&==#%;;}#=  , 7 7 AI Y%9%9%;%K%K%M%M " 1 1! 4%*%:%:%<
$)OOHjjl$K	04	-/8/C/C/E/L/L/N	,33I>r*z2!3&4&>&K&K'5'?'E'E&,6(*(9(9!(<I#,??l#BB#B,:,D,N,N,PM#,#4#4#6#6060E0E0I0I0f0f$1$>$>%'D3y+?$?$&1"
 !* 2 2$*KK$9$9$=$=$N$N(1(8-%&!"
  99 ) 9 9 ; +1--*B*B )=+K  )448=	 5 6 = =b AJJqM44[AY $Z &FLLN & $L$=$=$?P00>%%))??M,\:0B7;B%%a(4 1""<0>>- NN,,T^^<>>KK++dnnQ.??@IINN4>>..1BBCKK++dnnQ.??@**40(;;DNNK$$!4;;
 {{{s4;;#6#6#89A="kk113F(557??LL-::< S-"U "
 %1N$;$;FKK$H	(N..v{{;FFH & %)F)F)H)H$$7)-- :$(KK!" !%	 '&..8)-- "

6<<> :DKK *--fllndkkJ5 48 >>!#]]33DKK@DNM , B B! )224(N33CHH=	!5!5!7!G!G!I!I$$S)	 5 I||a+88:>>B!,,R00!((, ? %2I$=!$%6%=%=)&" ""#56t]++l0K0K$$,,(8(8$// >>  LLHI , B B! NN01 !r   c                   [         R                  " S5      S;   am  U R                  U   nUR                  U R                     nSUl        [        R                  " U5      n[        R                  " X0R                  U5        XCl
        g g )N"FLAGS_enable_prim_after_distributern  T)r   r   r   r  r   _need_decomprm   get_grad_var_to_varupdate_grad_var_to_varrt   _grad_var_to_var)r   r9  r(  r  grad_var_to_vars        r   r  Engine._mark_prim   s    999: ?
 

  ..t4L , ? ? O-1*(<<O --!>>? 2A.
r   r#   
   r   rt  c                   SU l         U R                  U R                      (       d;  U R                  XU5      u  U l        U l        U R                  U R                   5        OU R                  U R                   5        U R                  U5      nU R                  USUUUS9nUc  [        U5      OUnU R                  SU R                   S9u  nn[        UU UUUUUUUU R                  5       U R                  R                  R                  (       a  SOU R                   S9nUR#                  S5        [%        U5       GH  n0 nUR'                  U5        [)        U5       GH!  u  nnU R+                  U5      nU H  n[,        R.                  R0                  R3                  UUS   US   S	9   UR5                  SUU5        U R6                  R9                  U R:                  UUU R                  R<                  U R                  R>                  S
9n[@        RB                  " U RD                  5      nU RG                  UUUUUUU R                   5      nURI                  SUU5        SSS5        M     U(       d  GM  UU:  d  GM"    O   U	(       ap  US-   U-  S:X  ad  U RK                  U	U
UUUUUU5      nURM                  5        VV s0 s H  u  nn SU-   U _M     nnn URO                  U5        U R                  S5        OU RQ                  5         URS                  UU5        GM     URU                  SW5        U RV                  $ ! , (       d  f       GM  = fs  sn nf )aP  
Trains the model for a fixed number of epochs. If `valid_data` is set,
evaluation will be done at the end of each epoch.

Args:
    train_data (Dataset): An instance of paddle paddle.io.Dataset. Default: None.
    train_sample_split (int|None, optional): Each sample of the train dataset is assumed
        to be a (input, label) pair by default and has two items. If each sample has
        more than two items, train_sample_split specifies how to split these items into
        input and label. The items before it are input and the left are label. Default: None.
    batch_size (int, optional): The batch size of train_data and valid_data if provided.
        The user's data will be used directly without batching if set to None. Default: 1.
    epochs (int, optional): The number of epochs to train the model. Default: 1.
    steps_per_epoch (int|None, optional): The total number of steps (batches of samples)
        is executed in one epoch before stating the next one. If None, it is equal to
        the number samples in your dataset divided by the batch size. Default: None.
    valid_data (Dataset|None, optional): An instance of paddle paddle.io.Dataset used for
        evaluation at the end of epoch. No evaluation will be done if set to None.
        Default: None. (Unsupported for now)
    valid_freq (int, optional): Only relevant if valid_data is provided. This specifies
        how many training epochs before a new evaluation is performed. Default: 1.
    valid_sample_split (int|None, optional): Only relevant if valid_data is provided.
        Each sample of the valid dataset is assumed to be a (input, label) pair
        by default and has two items. If each sample has more than two items,
        valid_sample_split specifies how to split these items into input and label.
        The items before it are input and the left are label. Default: None.
    valid_steps (int|None, optional): Only relevant if valid_data is provided.
        It is the total number of steps (batches of samples) to draw before
        stopping validation at the end of every epoch. If None, validation will run until the
        `valid_data` dataset is exhausted. The validation will start from the
        beginning of the dataset at each epoch. Default: None.
    collate_fn(callable|None, optional): function to generate mini-batch data by merging
        the sample list, None for only stack each fields of sample in axis
        0. Default None.
    callbacks (Callback|None, optional): A list of `Callback` instances to apply
        during training. Default: None. (Unused for now)
    nvprof_range(list, optional): A list of integers indicating nvprof ranges in form of [start_step, end_step]. Note that if start_step >= end_step, the nvprof will not apply.

Returns:
    None

Examples:

    .. code-block:: python

        >>> import paddle
        >>> import paddle.vision.transforms as T
        >>> from paddle.distributed.fleet import auto
        >>> from paddle.vision.datasets import MNIST

        >>> transform = T.Compose([
        ...     T.Transpose(),
        ...     T.Normalize([127.5], [127.5])
        >>> ])
        >>> train_dataset = MNIST(mode='train', transform=transform)

        >>> model = paddle.vision.models.LeNet()
        >>> loss = paddle.nn.CrossEntropyLoss()
        >>> optimizer = paddle.optimizer.Adam(
        ...     learning_rate=0.001, parameters=model.parameters())
        >>> metrics = paddle.metric.Accuracy(topk=(1, 2))

        >>> engine = auto.Engine(model, loss, optimizer, metrics)
        >>> engine.fit(train_dataset,
        ...             epochs=2,
        ...             batch_size=64)
rK   F)return_listr  epochs
collate_fnNrs  r#   )
enginer  r  stepslog_freq	save_freqsave_dirverboser   acc_stepr   )iter_idstartendfeed
fetch_listuse_program_cachereturn_numpyval_),r   r   r  r   r   r  _switch_mode_validate_batch_size_prepare_dataloaderrh   rT  r$   _metrics_namert   r   r   r   on_beginr  on_epoch_beginr   _validate_batchr^   profilerr	   _nvprof_rangeon_batch_beginr   r  r  	use_cacher  rm   get_lrrg   rj  on_batch_endevaluater7  r[  _reset_metricson_epoch_endon_endr   )!r   
train_datatrain_sample_splitr  r  steps_per_epochr  r  r  
valid_datavalid_sample_split
valid_freqvalid_stepsr  	callbacksr  nvprof_rangelocal_batch_sizetrain_dataloaderrH  rG  cbksrW  r^  rX  r   batchesmicro_batchr]  rY  val_logsrd   vals!                                    r   fit
Engine.fit1  sE   l 
!!$**-373J3J
40Dt0 !!$**-djj)44Z@33'! 4 
 &  !  	 &*%8%8DJJ%8%O"]'!&&(^^,,33
  	g6]ED&()9:e..u5#*K..<< $*1o(O = 
 ++GT4@#~~11 --!,'2.2nn.F.F)-)D)D  2   (..t~~>#33 ! ') JJ  ))'4>1  $+6 #?t'>?  ;B uqyJ6!;==&	 9A8H8H94FTM3&8H   H%!!'*##%eT*q #t 	GT"||g Ps   )B7L?	M?
Mc	           
        SU l         U R                  U R                      (       d;  U R                  XU5      u  U l        U l        U R                  U R                   5        OU R                  U R                   5        U R                  U5      n	U R                  USU	US9n
Uc  [        U
5      OUnU R                  SU R                   S9u  p[        UU U	UUU R                  5       S9nUnUR                  SXR                  5       S.5        0 n[        U
5       H  u  nnU R                  U5      nU He  nUR!                  SUU5        U R"                  R%                  U R&                  UUU R(                  R*                  U R(                  R,                  S9nMg     U(       a  UU:  a    O7U R/                  WSUSXU R                   5      nUR1                  SUU5        M     UR3                  SU5        U R5                  5         U$ )	a  
Evaluate the loss and metrics of the model on evaluation data.

Args:
    valid_data (Dataset): An instance of paddle paddle.io.Dataset. Default: None.
    valid_sample_split (int|None, optional): Each sample of the eval dataset is assumed
        to be a (input, label) pair by default and has two items. If each sample has
        more than two items, valid_sample_split specifies how to split these items into
        input and label. The items before it are input and the left are label. Default: None.
    batch_size (int, optional): The batch size of valid_data. The user's data will
        be used directly without batching if set to None. Default: 1.
    steps (int|None, optional): It is the total number of steps (batches of samples) to draw before
        stopping evaluation. If None, evaluation will run until the `valid_data` dataset is exhausted.
        The evaluation will start from the beginning of the dataset in each run. Default: None.
    collate_fn(callable|None, optional): function to generate mini-batch data by merging
        the sample list, None for only stack each fields of sample in axis
        0. Default None.
    callbacks (Callback|None, optional): A list of `Callback` instances to apply
        during evaluating. Default: None. (Unused for now)

Returns:
    None

Examples:

    .. code-block:: python

        >>> import paddle
        >>> import paddle.vision.transforms as T
        >>> from paddle.distributed.fleet import auto
        >>> from paddle.vision.datasets import MNIST

        >>> transform = T.Compose([
        ...     T.Transpose(),
        ...     T.Normalize([127.5], [127.5])
        >>> ])
        >>> valid_dataset = MNIST(mode='test', transform=transform)

        >>> model = paddle.vision.models.LeNet()
        >>> loss = paddle.nn.CrossEntropyLoss()
        >>> metrics = paddle.metric.Accuracy(topk=(1, 2))

        >>> engine = auto.Engine(model, loss, metrics=metrics)
        >>> engine.evaluate(valid_dataset, batch_size=64)

rL   Fr  r  r  Nrs  )r  r  r  r  r   )r  r   r  )r   r   r  r   r   r  r  r  r  rh   rT  r$   r  r  r   r  r  r   r  r  rt   r  r  rj  r  r  r  )r   r  r  r  r  r  r  r  r  r  valid_dataloaderr  rH  rG  r  
eval_stepsr^  rX  r   r  r  r]  s                         r   r  Engine.evaluate  s   r 
!!$**-373J3J
40Dt0 !!$**-djj)44Z@33'!	 4 
 49=#./e%)%8%8DJJ%8%O"'&&(
 %
j5G5G5IJ	
 $%56KD%**51G&##FD$7~~))%%$*&*nn&>&>!%!<!< *   ' 4?#:''dD$DJJD fdD1# 7$ 	FD!r   c           
        SU l         U R                  U R                      (       d;  U R                  XU5      u  U l        U l        U R                  U R                   5        OU R                  U R                   5        U R                  U5      nU R                  USUUS9n	Uc  [        U	5      OUn
U R                  SU R                   S9u  p/ n[        X`US9nU
nUR                  SSU05        0 n[        U	5       H  u  nnU R                  U5      nU He  nUR                  SUU5        U R                   R#                  U R$                  UUU R&                  R(                  U R&                  R*                  S9nMg     U
(       a  UU
:  a    ObU R-                  WSUSXU R                   5      nUR/                  SUU5        UR1                  [3        US	   R5                  5       5      5        M     UR7                  SU5        U$ )
a$  
Compute the output predictions on testing data.

Args:
    test_data (Dataset): An instance of paddle paddle.io.Dataset. Default: None.
    test_sample_split (int, optional): Each sample of the test dataset is assumed
        to be a (input, label) pair by default and has two items. If each sample has
        more than two items, test_sample_split specifies how to split these items into
        input and label. The items before it are input and the left are label. Default: None.
    batch_size (int, optional): The batch size of test_data. The user's data will
        be used directly without batching if set to None. Default: 1.
    steps (int, optional): It is the total number of steps (batches of samples) to draw before
        stopping predict. If None, predict will run until the `test_data` dataset is exhausted.
        The predict will start from the beginning of the dataset in each run. Default: None.
    collate_fn(callable, optional): function to generate mini-batch data by merging
        the sample list, None for only stack each fields of sample in axis
        0. Default None.
    callbacks (Callback|None, optional): A list of `Callback` instances to apply
        during testing. Default: None. (Unused for now)

Returns:
    None

Examples:

    .. code-block:: python

        >>> import paddle
        >>> import paddle.vision.transforms as T
        >>> from paddle.distributed.fleet import auto
        >>> from paddle.vision.datasets import MNIST

        >>> transform = T.Compose([
        ...     T.Transpose(),
        ...     T.Normalize([127.5], [127.5])
        >>> ])
        >>> valid_dataset = MNIST(mode='test', transform=transform)

        >>> model = paddle.vision.models.LeNet()

        >>> engine = auto.Engine(model)
        >>> engine.predict(valid_dataset, batch_size=64)
rM   Fr  Nrs  )r  r  r  r  rL  )r   r   r  r   r   r  r  r  r  rh   rT  r$   r  r   r  r  r   r  r  rt   r  r  rj  r  rk   r   r   r  )r   	test_datatest_sample_splitr  r  r  r  r  r  test_dataloaderr  rH  rG  rL  r  
test_stepsr^  rX  r   r  r  r]  s                         r   rM   Engine.predicta  s   j 
!!$**-373J3Jj40Dt0 !!$**-djj)44Z@22'!	 3 
 38-#o.U%)%8%8DJJ%8%O"	H$
i':!67$_5KD%**51G&##ItT:~~))%%$*&*nn&>&>!%!<!< *   ' 4?#:''dD$DJJD it4NN4Y 6 6 89:% 6& 	It$r   c                p   Ub  U R                  U5        U R                  U R                     (       d;  U R                  XU5      u  U l        U l        U R                  U R                  5        OU R                  U R                  5        U R                  U5      nU R                  USUUUUUUUU	U
UUUS9nU$ )NF)r  r  shuffle	drop_lastr  num_workersuse_buffer_readeruse_shared_memorytimeoutworker_init_fnr  r  places)
to_moder   r   r  r   r   r  r  r  r  )r   r
  r  r  r  r  r  r  r  r  r  r  r  sample_splitr9  r  r   s                    r   r   Engine.dataloader  s    $ LL!!$**-373J3Jz40Dt0 !!$**-djj)..z:
--!!#//)+ . 

  r   c	                0   Ub  U R                  U5        U R                  (       d  [        S5      eU R                  U R                     (       a  g U R	                  U5      nU R	                  U5      nU R                  U5      nU R                  U5      nXPl        X`l        U(       d  U(       aj  SU l        U R                  XX45      u  p4U R                  c  [        R                  " 5       U l        U R                  c  [        R                  " 5       U l        OU(       d  U(       aV  SU l        U R                  c  [        R                  " 5       U l        U R                  c  [        R                  " 5       U l        O)U R                  (       a  U R                  (       d   S5       eXsU l        U l        X4sU l        U l        U R                  U R                     (       d  U R%                  U R                  U5        g U R'                  U R                  5        g )Nz7Please set mode to be prepared with `prepare(mode=...)`Tz;Please call the dataloader(...) before calling prepare(...))r  r   r   r   r   _validate_varsr   r   r   r  r   r   r   r   r   r   r   r   r  r  )	r   r   r   r   r   r  r  r9  r  s	            r   prepareEngine.prepare  s    LLzzI  djj)))+6))+6$$V,$$V,+"1V#D!66&NF ##+'-'B'B'D$&&.*0*H*H*J'K'+D$##+'-'B'B'D$&&.*0*H*H*J'$$):): M: 0;,4,%+"dl!!$**-!!$**o>djj)r   c           	        Ub  U R                  U5        U R                  XU R                  5      nU R                  X0R                  5      u  pgU R                  (       a.  U R
                  U R                     (       d  U R                  5         U R                  U R                  l        U R                  R                  nU R                  (       a  SnSn	U R                  c  U R                  n
OiSnU R                  R                  R                  S:X  a(  U R                  R                  R                   nSUS-
   3nU R                  R#                  U5      n
U
R%                  U R&                  S   5      n[(        R*                  " U5      (       a  Sn	/ nOU/nX`R,                  -  nU R                  R/                  U R                  UUUU R                  R0                  S9nU R                  (       aH  W	(       a  S S S.nSnOUS   US   S.nSn[3        U R4                  5       H  u  nnUUU-      UU'   M     U$ U R7                  US S S XgU R                  5      nU$ )	NFr  rv  r#   r   Tr  )rL  r   )r  r<  r   rT  r   r   r4  r   r   rt   r  r   r   r  r   r   
vpp_degreer  r  r  r   is_fake_valuer~   r  r  r   r   rj  )r   r   r  r  r9  	feed_dictrH  rG  r  no_fetchprogram_for_executorloss_job_typer  
loss_valuer]  r^  	start_idxr   rd   s                      r   r  
Engine.run.  s*    LL&&t4::>	%)%8%8ZZ%P"$$--djj9  " -- 	3
 NN,,	IH~~%'+'8'8$ !*>>**88EA!%!8!8!C!CJ&-j1n-=$>M'+~~'@'@'O$-FF  #J   ,, )l111K~~!!"'44 " 
 #'6	#'7DG<	$T%G%GH4!)a-0T
 IK##$dK


 r   c                X   U R                   U R                     nUR                  U R                     nUR                  U R                     nUR                  5       nUR                  S   nUR                  S   n/ nXV-    H  nUR                  UR                  ;   a*  UR                  UR                  UR                     5        MG  UR                  XR                  5      n	U	R                  R                  UR                  R                  5       5        UR                  U	5        M     U$ )Nr   r   )r   r   r  r   r  r  rD  rd   rF  rk   _clone_variabler  r  set_original_idoriginal_id)
r   r(  r)  r  r*  rI  rJ  r'  rE  copy_vars
             r   get_feed_listEngine.get_feed_list{  s    **4::6%88H(>>t~~N(557 "228<
!228<
	*Cxx?///  !5!5chh!?@*::3P--chh.B.B.DE  * + r   c                r    U R                   U R                  -    Vs/ s H  oR                  PM     sn$ s  snf r   )r   r   rd   )r   r   s     r   get_feed_name_listEngine.get_feed_name_list  s/    &*&7&7$:K:K&KL&Kd		&KLLLs   4c                   U R                   U R                     nUR                  U R                     nUR                  U R                     nUR                  5       nUR                  S   nUR                  S   n/ nUU-    H  nUR                  UR                  ;   a*  UR                  UR                  UR                     5        MG  UR                  UUR                  5      nUR                  R                  UR                  R                  5       5        UR                  U5        M     [        R                   " UU5         [#        U40 SU_SU_SU_SU_SU_SU_S	U_S
U_SU_SU	_SU
_SU_SU_SU_SU R$                  R&                  _SU R(                  _SU R*                  _6nS S S 5        U$ ! , (       d  f       W$ = f)Nr   r   r'  r  r  r  r  r  r  r  r  r  r  r  r  r  
split_datadata_parallel_world_sizedata_parallel_rank)r   r   r  r   r  r  rD  rd   rF  rk   r&  r  r  r'  r(  r   r  r,   rt   r0  r4  r6  )r   r
  r  r  r  r  r  r  r  r  r  r  r  r  r  r(  r)  r  r*  rI  rJ  r'  rE  r)  r   s                            r   r  Engine._prepare_dataloader  s   " **4::6%88H(>>t~~N(557 "228<
!228<
	
*Cxx?///  !5!5chh!?@*::3P--chh.B.B.DE  * + !!.2CD.#  (	
 &   $ & ( #4 #4    .  !0   >>44!" *.)=)=#$ $(>>%J E, - ED, s   A&F33
Gc                    SU l         U R                  XU5      u  U l        U l        U R	                  U R                   X5        g )NrK   )r   r  r   r   r=  )r   	tune_datatune_sample_splitr  s       r   _tuneEngine._tune  s?    
/3/F/F*0
,4, 	!!$**iDr   c                   Uc  g [        [        U R                  5      5      S:X  d'   S[        [        U R                  5      5       S35       eXR                  S   -  S:X  d   SU SU R                  S    S35       eXR                  S   -  $ )Nr#   zGDistributedBatchSampler only support one data parallel group, but got [z ] different data parallel groupsr   zbatch_size [z%] is not divisible by dp_world_size [])rh   rs  r4  )r   r  s     r   r  Engine._validate_batch_size  s    3t++,-2 	
UVYZ]^b^r^rZsVtUu  vV  W	
2 0033q8 	
:,&KDL`L`abLcKddef	
8 11!444r   c           	         Uc  S /$ U R                   R                  R                  (       d  U R                  S:X  a  U$ / n/ nUS   R	                  5        H[  u  pEUR                  U5        UR                  [        R                  " [        R                  " U5      U R                  S5      5        M]     / n[        U R                  5       H:  nU Vs/ s H  oU   PM	     n	nUR                  [        [        X)5      5      5        M<     U$ s  snf r   )rt   r   r   r   r7  rk   r   r  arrayr  ri   r  )
r   r   
feed_namessplit_batches	feed_namecur_feedbachesr   split_batchr  s
             r   r  Engine._validate_batch  s    =6M>>""))T__-AL JM',Qx~~'7#	!!),$$HHRXXh/!D (8
 F4??+ANO+1~Od3z#?@A , M Ps   Dc                   [         R                  " U5      nUb  [        U5       H  u  p#[        U[        5      (       d   [        U[
        5      (       d  [        S5      eUR                  c  [        SU SU S35      eU R                  S:  d  Mk  [        UR                  5      nUS   U R                  -  S:X  d%   SUR                  S    SU R                   S	35       eUS==   U R                  -  ss'   XCl        M     U=(       d    / $ )
NzS'spec' must be object of class `paddle.static.InputSpec` or `DistributedInputSpec`.zRequires Input[z(].name != None, but receive `None` with r   r#   r   zRequires batch_size[z] to be divisible by k_steps[z].)rm   rp   r   r]   r   r+   ra   rd   r   r   r   r   )r   r  r   r   r   s        r   r   Engine._validate_spec  s   ""5)$U+!$	22:.< < $m  99$$)!,TUYTZZ[\  ??Q& ,E 8doo5: .tzz!}o=Z[_[j[jZkkmn: !H0H!&J# ,$ {r   c                    [         R                  " U5      nUb4  [        U5       H%  u  p#[        U[        5      (       a  M  [        S5      e   U=(       d    / $ )Nz'var' must be a `Variable`.)rm   rp   r   r]   r   ra   )r   rF  r   rE  s       r   r  Engine._validate_vars	  sM    !!$'#D/!#x00#$ABB * zrr   c                d    [        U5      nX R                  R                  5       R                  ;   $ r   )r
   r  r  rF  )r   rE  rF  s      r   r@  Engine._is_local_var	  s*    $,,99;@@@@r   c                J    U R                    H  nUR                  5         M     g r   )rs   reset)r   r   s     r   r  Engine._reset_metrics	  s    mmFLLN $r   c                    U R                   (       a  S/O/ nU R                   H6  nUR                  [        R                  " UR                  5       5      5        M8     U$ )Nr   )rf   rs   r   rm   rp   rd   )r   metrics_namems      r   r  Engine._metrics_name#	  sD    #'::x2A
 2 21668 <= r   c                X    XR                   ;   d
   U S35       eU R                  U5        g )Nz3 model is not ready, please call `prepare()` first.)r   r  r   r9  s     r   r  Engine._switch_mode)	  s4    *** 	
fGH	
* 	Tr   c                2    US;   d   SU S35       eXl         g )NrJ   zmode z. should be one of ['train', 'eval', 'predict'])r   rS  s     r   r  Engine.to_mode/	  s6     
 
 	H 4&FG		H 

 
r   c           
     P   U R                   U   nUR                  U R                     n[        R                  " Xe5      n[        X4U5      nUR                  US9nUR                  5       R                  5        H  u  p[        R                  " U
5      nX;  a  M"  UR                  X9   R                  :w  d  M@  U R                  R                  SU	 SX9   R                   SUR                   S35        X9   R                  UR                  5      X9'   M     UR                  U5        g )N)strictzcast z's dtype from 'z' to 'rV  )r   r  r   rm   get_dist_attrr'   convert
state_dictr7  r   r=  r}  rw   r   astypeset_state_dict)r   r9  rX  r[  r
  r(  programcur_dist_attr	converterrd   r  param_arrays               r   r  Engine._set_state_dict7	  s
   **4011$..A"00Gj]C	&&f&5
"--/557KD((5/K%  J$4$:$::!!D61A1G1G0H{O`O`Naabc $.#3#:#:;;L;L#M
  8 	z*r   c                L   U(       aw  U R                   U R                  ;   d   eU R                  U R                      nUR                  nUR                  U R                     nU R
                  R                  UUUUS9  gSU R                  ;   d   eU R                  S   nUR                  S   nUR                  S   nUR                  U R                     nU R                  R                  R                  (       Ga  U R                  R                  R                  (       a  SSKJn  U R                  R!                  S5        U R                  R!                  SU R                  R                  R#                  5        35        [%        [&        R(                  " UR*                  5      S	S
9n	U" [-        5       U R.                  5      n
U	R1                  5        H  nU
R3                  U5        M     U	R5                  5       nU R
                  R7                  UUUU R8                  US9  g)a  
Saves the model, parameters, optimizer state to path.
If `training` is set to False, only inference model will be saved.

Args:
    path (str): The file prefix to save model. The format
        is 'dirname/file_prefix' or 'file_prefix'. if empty str.
        A exception will be raised.
    training (bool, optional): Whether to save for training. If not, save
        for inference only. If `training` is set to True, the optimizer state
        will be saved. Otherwise, only the model and parameters are saved.
        This function will silently overwrite existing file at the target
        location. Default: True.

Returns:
    None

Examples:

    .. code-block:: python

        >>> import paddle
        >>> import paddle.vision.transforms as T
        >>> from paddle.distributed.fleet import auto
        >>> from paddle.vision.datasets import MNIST

        >>> transform = T.Compose([
        ...     T.Transpose(),
        ...     T.Normalize([127.5], [127.5])
        >>> ])
        >>> train_dataset = MNIST(mode='train', transform=transform)

        >>> model = paddle.vision.models.LeNet()
        >>> loss = paddle.nn.CrossEntropyLoss()
        >>> optimizer = paddle.optimizer.Adam(
        ...     learning_rate=0.001, parameters=model.parameters())
        >>> metrics = paddle.metric.Accuracy(topk=(1, 2))

        >>> engine = auto.Engine(model, loss, optimizer, metrics)
        >>> engine.fit(train_dataset,
        ...             epochs=1,
        ...             batch_size=64)
        >>> engine.save("./my_model")

)serial_programr  r(  rM   r   rL  r   )QuantWeightPasszexport quantized model.zconvert config Tr  )r^  N)r   r   rE  r  r   r   saverD  rN  rt   qatr   onnx_formatpaddle.static.quantizationre  rw   r   r  r   r   Graphr  r   ru  all_sub_graphsr  
to_programsave_inference_modelr   )r   r   trainingr(  rd  r)  r/  rQ  re  
test_graphquant_weight_pass	sub_graphs               r   rf  Engine.saveH	  s   \ ::!4!4444..tzz:L)==N)<<T^^LNKK-"0)	    3 3333..y9L$55h?I%77	BJ)<<T^^LN~~!!(((T^^-?-?-K-KF!!";<!!%dnn&8&8&@&@&B%CD %JJ~223d
 %4LNDKK$P!!+!:!:!<I%++I6 "=!+!6!6!8KK,,& - r   c                    X l         U R                  R                  X5      u  U l        U l        U R                  U R                  4$ )a	  
Load the stored model, parameters and optimizer states.

Args:
    path (str): The prefix of files storing the model states and
        optimizer states.
    strict (bool, optional): Whether to skip the loading of mismatch
        parameter or raise an error when mismatch happens (not found
        the parameter in file storing model states of or receives a
        mismatch shape). Default: True.
    load_optimizer (bool, optional): If True, the stored optimizer
        states is restored. Otherwise, the optimizer states is initialized
        from scratch. Default: True.

Returns:
    None

Examples:

    .. code-block:: python

        >>> import paddle
        >>> import paddle.vision.transforms as T
        >>> from paddle.distributed.fleet import auto
        >>> from paddle.vision.datasets import MNIST

        >>> transform = T.Compose([
        ...     T.Transpose(),
        ...     T.Normalize([127.5], [127.5])
        >>> ])
        >>> train_dataset = MNIST(mode='train', transform=transform)

        >>> model = paddle.vision.models.LeNet()
        >>> loss = paddle.nn.CrossEntropyLoss()
        >>> optimizer = paddle.optimizer.Adam(
        ...     learning_rate=0.001, parameters=model.parameters())
        >>> metrics = paddle.metric.Accuracy(topk=(1, 2))

        >>> engine = auto.Engine(model, loss, optimizer, metrics)
        >>> engine.fit(train_dataset,
        ...             epochs=1,
        ...             batch_size=64)
        >>> engine.save("./my_model")
        >>> engine.load("./my_model")

)r  r   r   r}  r~  )r   r   rX  load_optimizers       r   r   Engine.load	  sB    b ,0KK,<,<-
)$/ 00r   c           	        U R                   R                  S:X  a  U R                  R                  S5        gUb  UOU R                  nUc   S5       eX0R
                  ;  a2  [        SU S[        U R
                  R                  5       5       35      eU R                  U5        Ubc  U R
                  U   (       dO  U R                  U5      U l        U R                  U5      U l        U R                  U5        U R                  U5        O[        5       (       d  U R                   (       a  [        S5      eU R                  R                  S5        ["        R$                  R'                  5       nUR)                  5       R*                  (       a  UR)                  5       R*                  (       d  U R
                  U   (       d  [        S5      e[-        X5      u  pVUR.                  U4$ )	aU  
Get and Print cost, including memory of every rank,
max memory among all ranks, and the global cost of one step based on
communication cost(computation cost is 0 by default).
In the future, the flops information of every rank and global cost including
computation cost will be added.

Args:
    inputs_spec(InputSpec): The specification of inputs. Default: None.
    labels_spec(InputSpec): The specification of labels. Default: None.
    mode (str): The engine mode must be in ["train", "predict", "eval"]. Default: None.

Returns:
    Return the global execution time (ms) and max memory (B).

fullzMThe cost will be calculated in the search process when the auto mode is full.NzPlease set mode.z	The mode z is not in accepted modes z\Please call `prepare()` or `fit()` or  `evaluate()` or  `predict()` before calling `cost()`.zThe program whose cost to be estimated must be static default program. Otherwise, please call `prepare()`before calling `cost()`.)rt   r  rw   r   r   r   r   r   r  r  r   r   r   r  r  r   r   r^   r   r   r  r  r(   time)r   r   r   r9  r^  global_cost
max_memorys          r   costEngine.cost	  s   . >>##v-LL_  'tTZZ3!33)))D6!;DASASAXAXAZ<[;\]  	T"4+=+=d+C $ 3 3K @D $ 3 3K @DKKJJt  D$6$6 r  !! X !--<<>,,.22"//155,,T2$v 
 #7t"B++r   c                    U R                   (       a  U R                  U R                     $ U R                  U   R                  U R
                     $ r   )r   r{   r   r   r  r   rS  s     r   get_dist_main_programEngine.get_dist_main_program
  s@    ,,TZZ88""4(;;DNNKKr   c                    U R                   (       a  U R                  U R                     $ U R                  U   R                  U R
                     $ r   )r   r|   r   r   r  r   rS  s     r   get_dist_startup_programEngine.get_dist_startup_program
  s@    //

;;""4(>>t~~NNr   c                t    U R                   (       a  U R                  U   $ U R                  U   R                  $ r   )r   ry   r   rE  rS  s     r   get_serial_main_programEngine.get_serial_main_program 
  s3    ''--""4(<<<r   c                t    U R                   (       a  U R                  U   $ U R                  U   R                  $ r   )r   rz   r   serial_startup_programrS  s     r   get_serial_startup_program!Engine.get_serial_startup_program%
  s3    &&t,,""4(???r   c                    U R                   (       a  U R                  U R                     $ U R                  U R                     nUR                  U R
                     $ r   )r   r}   r   r   r  r   r   r(  s     r   r  Engine.main_program*
  sI    --djj99**4::6..t~~>>r   c                f    U R                   U R                     nUR                  U R                     $ r   )r   r   r  r   r  s     r   r  Engine.startup_program1
  s+    **4::611$..AAr   c                4    U R                   U R                     $ r   )r   r   r   s    r   r(  Engine.dist_context6
  s    ""4::..r   c                L    U R                   U R                     nUR                  $ r   )r   r   rE  r  s     r   rE  Engine.serial_main_program:
  s"    **4::6///r   c                L    U R                   U R                     nUR                  $ r   )r   r   r  r  s     r   r  Engine.serial_startup_program?
  s"    **4::6222r   c                L    U R                   U R                     nUR                  $ r   )r   r   rD  r  s     r   r/  Engine.feed_varsD
  s"    **4::6,,,r   c                L    U R                   U R                     nUR                  $ r   )r   r   rN  r  s     r   rQ  Engine.fetch_varsI
  s"    **4::6---r   c                    U R                   U R                     nUR                  (       a  UR                  $ U R                  $ r   )r   r   _serial_optimizerro   r  s     r   rg   Engine.optimizerN
  s5    **4::6))111r   c                    U R                   $ r   )r   r  s    r   r   Engine.inputsU
      ||r   c                    U R                   $ r   )r   r  s    r   r   Engine.labelsY
  r  r   )9r   r   r   r~  r   r6  r4  r   r   rx   ry   r   r   r   r   r   r   r   r   r   rw   rf   r  r   rs   r   rb   r   r9  ro   r   r   r   r   r   re   rl   r}   r{   r|   r~   r   ru  r   r   r   r   rz   r}  rt   r  r   rw  r   r   r   r  )NNNNNN)r   z!Layer | Callable[..., Any] | Noner   z*Layer | Callable[..., Any] | Tensor | Nonerg   zOptimizer | Noner   z Metric | Sequence[Metric] | Noner   zCluster | Noner   zStrategy | NonereturnNone)T)F)"r  rD   r  
int | Noner  r   r  r   r  r  r  r   r  z
str | Noner  r   r  zDataset | Noner  r  r  r   r  r  r  _CollateFn | Noner  Sequence[Callback] | Noner  r   r  zlist[int] | tuple[int, int]r  r  )Nr#   Nr  NNr   )r  rD   r  r  r  r   r  r  r  r   r  r  r  r  r  r   r  dict[str, Any])Nr#   NNNr   )r  rD   r  r  r  r   r  r  r  r  r  r  r  r   r  z	list[Any])r#   FTNr   TTr   Nr#   Nr#   NN) r
  rD   r  r   r  r  r  r  r  r  r  r   r  r  r  r  r  r   r  zCallable[[int], None] | Noner  r   r  r  r  r   r9  _Mode | Noner  z&PlaceLike | Sequence[PlaceLike] | Noner  r,   )NNNNNNNT)r   InputSpec | Noner   r  r   Sequence[Tensor] | Noner   r  r  Program | Noner  r  r9  r  r  r  r  r  )NNNN)
r   zDlist[dict[str, Any]] | tuple[dict[str, Any]] | dict[str, Any] | Noner  zdict[str, Any] | Noner  z,list[Tensor | str | Operator | Value] | Noner9  r  r  r  )r  zlist[Tensor])r  z	list[str])Tr#   FTNr   TTr   Nr#   NN)Nr#   )r9  rN   r  r  )r   r   rn  r  r  r  )TT)r   r   rX  r  rt  r  r  z%tuple[dict[str, Any], dict[str, Any]])NNN)r   r  r   r  r9  r  r  ztuple[int, int] | None)r9  rN   r  rI   )r  rI   )r  r)   )r  zdict[str, list[Tensor]])r  rG   )@rr   
__module____qualname____firstlineno____doc__r   r   r  r  r4  r<  rT  rj  r  r  r  r  r=  r  r   rB  r  rx  r  r  r  r  rM   r   r  r  r*  r-  r  r7  r  r  r   r  r@  r  r  r  r  r  rf  r   r{  r~  r  r  r  propertyr  r  r(  rE  r  r/  rQ  rg   r   r   __static_attributes__ r   r   rP   rP   c   s7   :| 48;?&*48"&$(k0k 9k $	k
 2k  k "k 
k\B(HA(FB )+ .5`,)*\ 8tj=X (D>.W>rE@/B<P(0,"q2nA( *.&*#%))-"&(,/357H#|| '| 	|
 | $| | | | #| '| |  | &| -|  !|" 2#|$ 
%|B *. (,/3pp 'p 	p
 p p &p -p p 
pj )- (,/3cc &c 	c
 c &c -c c 
cP (,"&"&7;&*!9=!.. . 	.
 . &. .  .  . . 5. . $. . .  7!." 
#.d )-(,*.*.'+*.! $6*%6* &6* (	6*
 (6* %6* (6* 6* 6* 
6*x &*CG!K QK
 $K AK K 
KZ.M <|E
5,.A+"Sl FJ5151!%51>B51	.51r )-(,!	@,%@, &@, 	@,
 
 @,DL
O
=
@
 ? ? B B / / 0 0 3 3 - - . .      r   rP   )}
__future__r   r   r   ru   r  r   r  typingr   r   r   r  r   r^   -paddle.distributed.auto_parallel.static.utilsr   auto_parallelr   r	   rm   r   paddle.base.executorr
   paddle.base.frameworkr   paddle.decompositionr   /paddle.distributed.fleet.meta_optimizers.commonr   #paddle.distributed.passes.pass_baser   $paddle.distributed.passes.pass_utilsr   r   paddle.frameworkr   r   r  r   r   paddle.metricr   paddle.staticr   r   r   r   paddle.static.amp.fp16_utilsr   utils.log_utilsr   	interfacer   r   r    static.dist_tensorr!   r   r"   r  r$   r   r%   r&   r`  r'   cost.estimate_costr(   r(  r)   r*   dist_input_specr+   dist_loaderr,   r3  r-   
dist_saverr.   helperr/   mix_to_dist_passr0   parallelizer_v2r1   pir_passr2   r3   r4   r5   r6   r7   r8   r9   
planner_v2r:   rj  r;   r<   r=   collections.abcr>   r?   typing_extensionsr@   rA   paddle._typingrB   paddle.hapi.callbacksrC   	paddle.iorD   paddle.io.readerrE   	paddle.nnrF   paddle.optimizerrG   
paddle.pirrH   rI   rN   __annotations__rP   r  r   r   <module>r     s    #     	  . .   B B % % - 7 ' B 8  ! E E C ) > > 2  ' 1   4 M 1 ) ( ! 1 )	 	 	   D &2+(.!+* %9:E9:x' x'r   