
    Αi#                   X   S SK Jr  S SKrS SKrS SKrS SKJrJrJrJ	r	  S SK
r
S SKJr  S SKJr  S SKJrJr  S SKJr  SS	KJr  SS
KJr  SSKJr  SSKJrJr  SSKJr  SSK J!r!  SSK"J#r#  SSK$J%r%J&r&  \(       ax  S SK'J(r(J)r)J*r*  S SK+Jr,  S SK-J.r.J/r/J0r0J1r1  S SK
J2r2  S SK3J4r4  S SK5J6r6J7r7  S SK8J9r9  S SK:J;r;  S SK<J=r=J>r>J?r?J@r@JArAJBrB  SSKCJDrDJErE   " S S\	SS9rF " S S\	SS9rG\." S 5      rH\0" S!5      rI/ rJ        S'S# jrK    S(S$ jrL    S(S% jrM\" \L5      rN\" \M5      rO " S& S"5      rPg))    )annotationsN)TYPE_CHECKINGAnyLiteral	TypedDict)compiler)wrap_decorator)_global_flagsin_dynamic_mode)apply_build_strategy   )topology)DistributedStrategy)MetaOptimizerFactory)PaddleCloudRoleMakerRoleMakerBase)RuntimeFactory)StrategyCompiler)model_parallel_random_seed)loggerset_log_level)CallableIterableSequence)	ParamSpecSelfTypeVarUnpack)Tensor)	PlaceLike)DistFleetWrapper_Scope)Group)	Optimizer)BuildStrategyExecutorOperator	ParameterProgramVariable)CommunicateTopologyHybridCommunicateGroupc                       \ rS rSr% S\S'   Srg)_SaveConfigsF   intmode N__name__
__module____qualname____firstlineno____annotations____static_attributes__r2       ^/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/distributed/fleet/fleet.pyr.   r.   F   s    	r:   r.   F)totalc                  *    \ rS rSr% S\S'   S\S'   Srg)_SaveCacheConfigsI   r0   r1   table_idr2   Nr3   r2   r:   r;   r>   r>   I   s    	r:   r>   _InputT_RetTFleetc                   UR                   R                  R                  5       n[        5       S   (       d  U$ [	        U S0 5      nU(       a  US   n UR
                  S   nSUR                  0nUR                   R                  nU(       a.  UR                  (       a  [        R                  " S5        SUl        [        XX55      $ )NFLAGS_apply_pass_to_program_pipeline_optsection_programstartup_programuse_cudazCurrently, the fuse_all_optimizer_ops pass has conflict with fuse_all_reduce_ops pass. Disable the fuse_all_optimizer_ops pass temporarily.F)_user_defined_strategybuild_strategy_copyr
   getattrrF   _is_collectivefuse_all_reduce_opsfuse_all_optimizer_opsr   warningr   )main_programrH   configrK   pipeline_opt
pass_attrsfuse_all_reduces          r;   apply_ir_passesrW   S   s    
 22AAGGIN?89<"=L#$56)778IJf334J33GGO>@@ 	 Z	
 16-~ r:   c                   ^  SU 4S jjnU$ )Nc                 N   > U S   nUR                   c  [        S5      eT" U 0 UD6$ )Nr   z+Fleet can not find suitable runtime handler)_runtime_handle
ValueErrorargskwargsclsfuncs      r;   __impl__*_inited_runtime_handler_.<locals>.__impl__v   s4    1g&JKKT$V$$r:   r]   z_InputT.argsr^   z_InputT.kwargsreturnrB   r2   r`   ra   s   ` r;   _inited_runtime_handler_rf   s   s    % Or:   c                   ^  SU 4S jjnU$ )Nc                    > U S   nUR                   bA  UR                   R                  5       SL a$  [        R                  " TR                   S35        g T" U 0 UD6$ )Nr   Tz8() function doesn't work when use non_distributed fleet.)_role_maker_is_non_distributedr   rQ   r4   r\   s      r;   ra   ,_is_non_distributed_check_.<locals>.__impl__   s]    1g OO'335=NN==/!YZ T$V$$r:   rc   r2   re   s   ` r;   _is_non_distributed_check_rl      s    % Or:   c                     \ rS rSrSrSDS jr    SE         SFS jjr SG             SHS jjr            SIS jr            SIS jr	            SIS	 jr
            SIS
 jrS0 S4S jrS0 4       SJS jjrS rS rSKS jrSLS jrSMS jrSNS jrSNS jrSNS jrSOS jrSOS jrSOS jrSMS jrSMS jrSGSPS jjrSNS jrSNS jrSGSPS jjrSMS jrSDS  jr  SQ     SRS! jjr!\"\#SSSTS" jj5       5       r$\"\#SSSTS# jj5       5       r%SDS$ jr&\"\#SUS% j5       5       r'\"\#SVS& j5       5       r(\"\#SWS' j5       5       r)\"\#SXS( j5       5       r*\"\#SWS) j5       5       r+\"\#SDS* j5       5       r,\"\#SDS+ j5       5       r-\"\#/ / 4         SYS, jj5       5       r.\"\#   SZ               S[S- jj5       5       r/\"\#  S\         S]S. jj5       5       r0\"\#      S^S/ j5       5       r1\"\#SDS0 j5       5       r2\"\# S_       S`S1 jj5       5       r3\"\#SXS2 j5       5       r4\"\# SS           SaS3 jj5       5       r5\"\#SbS4 j5       5       r6\"\#ScS5 j5       5       r7\"\#SSSdS6 jj5       5       r8 SS     SeS7 jjr9S8 r:SfS9 jr;   Sg         ShS: jjr<S; r=  Si       SjS< jjr>S= r?S> r@S? rA   Sk         SlS@ jjrB   SkSA jrC   SkSB jrDSCrEg)mrC      aT  
Unified API for distributed training of PaddlePaddle.
Please reference the https://github.com/PaddlePaddle/PaddleFleetX for details

Returns:
    Fleet: A Fleet instance

Examples:
    .. code-block:: python
        :name: code-example1

        >>> # Example1: for collective training
        >>> import paddle
        >>> paddle.enable_static()
        >>> import paddle.distributed.fleet as fleet

        >>> fleet.init(is_collective=True)

        >>> strategy = fleet.DistributedStrategy()
        >>> linear = paddle.nn.Linear(10, 10)
        >>> optimizer = paddle.optimizer.SGD(learning_rate=0.001, parameters=linear.parameters())
        >>> optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy)

        >>> # do distributed training

    .. code-block:: python
        :name: code-example2

        >>> # Example2: for parameter server training
        >>> import paddle
        >>> paddle.enable_static()
        >>> import paddle.distributed.fleet as fleet
        >>> strategy = fleet.DistributedStrategy()
        >>> fleet.init(strategy=strategy)

        >>> optimizer = paddle.optimizer.SGD(learning_rate=0.001)
        >>> optimizer = fleet.distributed_optimizer(optimizer)

        >>> if fleet.is_first_worker():
        ...     print("this is first worker")

        >>> print("current node index: {}".format(fleet.worker_index()))
        >>> print("total number of worker num: {}".format(fleet.worker_num()))

        >>> if fleet.is_worker():
        ...     print("this is worker")
        >>> print("worker endpoints: {}".format(fleet.worker_endpoints(to_string=True)))

        >>> print("server num: {}".format(fleet.server_num()))
        >>> print("server endpoints: {}".format(fleet.server_endpoints(to_string=True)))

        >>> if fleet.is_server():
        ...     print("this is server")
        >>> fleet.stop_worker()

c                    S U l         S U l        SU l        S U l        S U l        0 U l        [        R                  R                  S5      U l	        g )NFg        )
ri   strategy_compilerrN   rZ   _util_contextpaddle	optimizerr$   user_defined_optimizerselfs    r;   __init__Fleet.__init__   sI    :>##
171A1A1K1KC1P#r:   Nc                   SSK Jn  [        U5        Uc
  [        5       n[        R
                  " U5      U l        UcK  [        U[        5      (       a  X l	        [        U R                  S9U l        O[[        S[        U5       35      e[        U[        5      (       a  Xl        UR                  U l	        O[        S[        U5       35      eU R                  R                  5         SSK Jn  UR"                  R%                  U R                  5        ['        5       U l        [+        5       (       Ga  UR-                  5       (       a  [.        R0                  " S5        OS	[2        R4                  ;   a  [.        R0                  " S
5        O0[7        U R                  R8                  5      [2        R4                  S	'   [:        R<                  R?                  U R                  R@                  S   RB                  5        U R                  RD                  (       d9  [F        RH                  c  U RK                  5         U $ [.        R0                  " S5        U $ U R                  (       Ga  U R                  RL                  nU RO                  5       nU RQ                  5       n	U(       a  SOSn
[S        [U        U	5      5      n[F        RH                  c  [F        RV                  " 5         [F        RH                  nXl,        UR[                  SUU	U
U5        U R                  R\                  nU=(       d    UnUSL a  gSnSnU(       a$  U R                  R^                  n[a        US   5      nU(       a$  U R                  Rb                  n[a        US   5      nU(       a  U(       a  UU:X  d   eU(       a  UOUnUS:  aI  U	U-  S:X  d   eSnUU-  nUU-  nU Vs/ s H  nUU-  U:X  d  M  UPM     nnUR[                  SUUUU5        U $ s  snf )a  
Initialize role_maker in Fleet.

This function is responsible for the distributed architecture
what you want to run your code behind.

Args:
    role_maker (RoleMakerBase, optional): A ``RoleMakerBase`` containing the configuration
        of environment variables related to distributed training.If you did not initialize
        the rolemaker by yourself, it will be automatically initialized to PaddleRoleMaker.
        The default value is None.
    is_collective (Boolean, optional): A ``Boolean`` variable determines whether the program
        runs on Collective mode or ParameterServer mode. True means the program runs on
        Collective mode, and False means running on ParameterServer mode. The default value
        is False.
    strategy (DistributedStrategy): Extra properties for distributed training.
        For details, please refer to paddle.distributed.fleet.DistributedStrategy. Default: None.
    log_level (Integer, String, optional): A ``Integer`` or ``String`` Variable determining how height
        the logging level is. Default is "INFO".

Returns:
    None

Examples:
    .. code-block:: python
        :name: code-init-example1

        >>> import paddle.distributed.fleet as fleet
        >>> fleet.init()

    .. code-block:: python
        :name: code-init-example2

        >>> import paddle.distributed.fleet as fleet
        >>> fleet.init(is_collective=True)

    .. code-block:: python
        :name: code-init-example3

        >>> import paddle.distributed.fleet as fleet
        >>> role = fleet.PaddleCloudRoleMaker()
        >>> fleet.init(role)

    .. code-block:: python
        :name: code-init-example4

        >>> import paddle.distributed.fleet as fleet
        >>> strategy = fleet.DistributedStrategy()
        >>> fleet.init(strategy=strategy)

    .. code-block:: python
        :name: code-init-example5

        >>> import paddle.distributed.fleet as fleet
        >>> strategy = fleet.DistributedStrategy()
        >>> fleet.init(log_level = "DEBUG")

r   )parallel_helperN)is_collectivez6`is_collective` should be instance of `bool`, but got z<`role_maker` should be subclass of `RoleMakerBase`, but got fleetz6The dygraph parallel environment has been initialized.FLAGS_nccl_nringszYou have set the environment variable FLAGS_nccl_nrings outside the program, so the nccl_comm_num in DistributedStrategy will not take effect here.default_comm_group_configsz=The dygraph hybrid parallel environment has been initialized.   globalFr   	mp_degreetensor_parallel_degreemodel)2paddle.distributedr{   r   r   copydeepcopyrJ   
isinstanceboolrN   r   ri   r[   typer   _generate_roler~   util_set_role_makerr   rp   r   _is_parallel_ctx_initializedr   rQ   osenvironstrnccl_comm_numrs   distributedinit_parallel_envhybrid_configsnccl_configheter_ccl_modetp_HYBRID_PARALLEL_GROUP_init_hybrid_parallel_envshardingworker_index
worker_numlistrange_CommunicateGroup_hcgset_comm_grouptensor_parallelsharding_configsr0   tensor_parallel_configs)rw   
role_makerr|   strategy	log_levelr{   r~   use_shardingglobal_rankglobal_world_sizeglobal_ring_idglobal_rankscguse_tensor_paralleluse_mpmp_degree_shardingmp_degree_tensor_parallelr   r   r   
mp_ring_idmp_rankmp_group_ididxmp_group_rankss                            r;   init
Fleet.init   s   B 	7i *,H&*mmH&=#-..&3##7"&"5"5$  !LTR_M`Lab  *m44#- &0&?&?# RSWXbScRde  	''),

""4#3#34!1!3;;==L
 '"**4NNI 7:33AA7BJJ23 ""44//>>4!k ..==,,4224N K NNWJ E    66??L ++-K $ 1".QAN&7 89L((0$$&**BI! #'"="="M"M!8%8F !"()%#'#>#>#O#O %()9+)F%G""//GG ( -0+,DE-)  3)-FFFF   #.  1}(94999
%	1)Y6  ,"+i';6 +  "
 !!Wi^ "s   O8O8c                   Ub  UR                   S::  a  [        R                  " S5        g [        R                  R                  5         [        R                  R                  R                  5         [        R                  " 5       n[        U5       H   n[        R                  R                  X#S9  M"     [        R                  R                  R                  5         [        R                  " 5       n	X-
  U-  n
U(       a  g [        R                  " SU SU
 S35        US:  a#  X:  a  [        R                  " SU
 S	U 35        g g g )
Nr   z)allreduce_perf is invalid, group invalid!groupz[AllReduceTest] nbytes B test result:  s/iterz'[Perf Warning] AllReduce Test Timeout!  > )nranksr   rQ   rs   r   barrierdevicecudasynchronizetimer   
all_reduceinfo)rw   	iterationxr   	perf_sizeperf_threshold_timewarmupstart_t_end_trets              r;   allreduce_perfFleet.allreduce_perf  s    =ELLA-NNFG""$&&())+y!A))!)9 "&&(		)+%i[uGL	
 #(ANN9#cBUAVW )B#r:   c                   Ub  UR                   S::  a  [        R                  " S5        g [        R                  R                  5         [        R                  R                  R                  5         [        R                  " 5       n[        U5       H5  n[        R                  R                  U[        UR                  5      US9  M7     [        R                  R                  R                  5         [        R                  " 5       nX-
  U-  n	[        R                  " SU SU	 S35        US:  a#  X:  a  [        R                  " SU	 S	U 35        g g g )
Nr   z&reduce_perf is invalid, group invalid!)dstr   z[ReduceTest] nbytes r   r   r   z$[Perf Warning] Reduce Test Timeout! r   )r   r   rQ   rs   r   r   r   r   r   r   r   reduceminranksr   
rw   r   r   r   r   r   r   r   r   r   s
             r;   reduce_perfFleet.reduce_perf  s    =ELLA-NNCD""$&&())+y!A%%aS-=U%K "&&(		)+"9+_SEI	
 #(ANN6se3?R>ST )B#r:   c                   Ub  UR                   S::  a  [        R                  " S5        g [        R                  R                  5         [        R                  R                  R                  5         [        R                  " 5       n[        U5       H5  n[        R                  R                  U[        UR                  5      US9  M7     [        R                  R                  R                  5         [        R                  " 5       nX-
  U-  n	[        R                  " SU SU	 S35        US:  a#  X:  a  [        R                  " SU	 S	U 35        g g g )
Nr   z)broadcast_perf is invalid, group invalid!)srcr   z[BroadcastTest] nbytes r   r   r   z'[Perf Warning] Broadcast Test Timeout! r   )r   r   rQ   rs   r   r   r   r   r   r   r   	broadcastr   r   r   r   s
             r;   broadcast_perfFleet.broadcast_perf  s    =ELLA-NNFG""$&&())+y!A((EKK0@(N "&&(		)+%i[uGL	
 #(ANN9#cBUAVW )B#r:   c                   Ub  UR                   S::  a  [        R                  " S5        g [        R                  R                  5         [        R                  R                  R                  5         [        R                  " 5       n[        U5       H#  n/ n[        R                  R                  XUS9  M%     [        R                  R                  R                  5         [        R                  " 5       n	X-
  U-  n
[        R                  " SU SU
 S35        US:  a#  X:  a  [        R                  " SU
 S	U 35        g g g )
Nr   z)allgather_perf is invalid, group invalid!r   z[AllgatherTest] nbytes r   r   r   z'[Perf Warning] Allgather Test Timeout! r   )r   r   rQ   rs   r   r   r   r   r   r   r   
all_gatherr   )rw   r   r   r   r   r   r   r   tmpr   r   s              r;   allgather_perfFleet.allgather_perf  s    =ELLA-NNFG""$&&())+y!AC))#)> " 	&&(		)+%i[uGL	
 #(ANN9#cBUAVW )B#r:   c           	        Ub  UR                   S::  a  [        R                  " S5        g [        R                  R                  5         [        R                  R                  R                  5         UR                   nUR                  nUR                  S   U-  S:w  a+  [        R                  " SUR                  S    SU S35        g US   U-  US'   [        R                  " XrR                  S9n[        R                  " 5       n	[        U5       HP  n
[        R                  R                  R                  UU[        R                  R                   R"                  USS	9  MR     [        R                  R                  R                  5         [        R                  " 5       nX-
  U-  n[        R$                  " S
U SU S35        US:  a#  X:  a  [        R                  " SU SU 35        g g g )Nr   z.reduce_scatter_perf is invalid, group invalid!r   zthe shape of input[z9] can't be divided exactly by reduce_scatter parallelism[z], test stopped!)shapedtypeT)opr   sync_opz[ReduceScatterTest] nbytes r   r   r   z+[Perf Warning] ReduceScatter Test Timeout! r   )r   r   rQ   rs   r   r   r   r   r   r   emptyr   r   r   streamreduce_scatterReduceOpSUMr   )rw   r   r   r   r   r   parallelismoutput_shapeoutputr   r   r   r   s                r;   reduce_scatter_perfFleet.reduce_scatter_perf  s    =ELLA-NNKL""$&&(llww771:#q(NN%aggaj\1jkvjw  xH  I &q/[8QL@))+y!A%%44%%..22 5  " 	&&(		)+))OC5P	
 #(ANN=cU#FYEZ[ )B#r:   2   c           
        Uc  U R                  5       nU R                  U R                  U R                  U R                  U R
                  S.nUR                  5       nUR                  5       nUR                  5       nS nUR                  S:  a  UnOUR                  S:  a  UnUUUUUS.n	UR                  5        H  u  pSnSn[        R                  nSnUb  US   nUnUS   nUS::  a  [        R                  " SU 35          g X::  d  MR  [        R                  " US-  /US9nU R                  S	US USS
S9  XJ   " UUX   UUS9  US-  nX::  a  MG  M     g )N)	allreducer   r   	allgatherr   r   i   i   @r   zBSize for collective performance check should be positive, but got    )r   
   T)r   )r   r   r   r   r   )get_hybrid_communicate_groupr   r   r   r   r   get_data_parallel_groupget_sharding_parallel_groupget_model_parallel_groupr   itemsrs   float32r   rQ   zeros)rw   roundcontexthcgcollective_perf_func_mapdp_groupsharding_groupmp_group
data_groupcollective_perf_group_map	comm_typesize_and_timenbytesfinal_nbytesr   time_thresholdr   s                    r;   _collective_perf_implFleet._collective_perf_implC  s   ;335C ,,&&,,,,"66$
  ..088://1
??Q!J""Q&'J $ #!&%
! )0$IF"LNNEN(&q)%!.q!1{XY_X`a (LL&A+e<##B44#H(3#3>$(6  1 (% )8r:   c                T    U R                   (       d  [        R                  " S5        gg)aF  
Run performance test for given communication type
and compare the time cost with the threshold.

Args:
    comm_type (str): Communication type for performance test. Currently support
                    "allreduce", "broadcast", "reduce", "allgather" and "reduce_scatter".
    round (int, optional): Loop times for performance test. More loops will cost more time
                    and provide more accurate result. Defaults to 50.
    size_and_time (dict, optional): Message sizes and time thresholds for performance test.
                    each pair will invoke a performance check. Defaults to {}, which indicates
                    acting performance check from 1MB to 1GB without threshold set.

Returns:
    None

Examples:
    .. code-block:: python

        >>> import paddle.distributed.fleet as fleet
        >>> fleet.init(is_collective=True)
        >>> # run two tests, one with 1MB (threshold 0.5s) and another with 1GB (threshold 1s)
        >>> size_and_time = {1<<20: 0.5, 1<<30: 1}
        >>> fleet.collective_perf("allreduce", round=50, size_and_time = size_and_time)
zRfleet.collective_perf is only for collective mode, will return with no test acted.N)rN   r   rQ   )rw   r  r  r  s       r;   collective_perfFleet.collective_perf~  s'    B ""NNd 	 #r:   c                0   SU;   aK  X!R                  S5         S:  a4  [        R                  " XU R                  5      nUR                  U l        U$ [        R                  " X5      U l        [        R                  " U R
                  U R                  5      $ )Nexpertr   )indexr   EPHybridCommunicateGroupr   _dense_topo	_topologyr+   r,   )rw   hybrid_group_namesdimsr  s       r;   _create_hcgFleet._create_hcg  s    **--h781< --"$*=*=C !__DNJ334FMDN,, 3 3 r:   c           	     2   U R                   R                  U l        U R                  S   U l        U R                  S   U l        U R                  S   U l        U R                  S   U l        U R                  S   U l        U R                  S   U l        U R                  S   U l        U R                  S   U l	        U R                  S	:  d   S
5       eU R                  S	:  d   S5       eU R
                  S	:  d   S5       eU R                  S	:  d   S5       eU R                  S	:  d   S5       e[        U R                  S5      U l        [        U R                  S5      U l        [        U R
                  S5      U l        [        U R                  S5      U l        [        U R                  S5      U l        [        U R                  S5      U l	        U R                  S	:  a>  [        R                  R                  5       nXR                  U R                  -  -  U l        [        U R                  S5      U l        SU R                  /SU R                  /SU R                  /SU R                  /SU R
                  /SU R                  /SU R                  /SU R                  /S.nU R                   R                  nUSS R                  5       [!        UR#                  5       5      SS R                  5       :w  a  [%        S5      e/ n/ nU H+  nX&   u  pxUR'                  U5        UR'                  U5        M-     U R)                  XE5      U l        U R                  S:  a8  U R                   R,                  n	U	S   n
U
S:X  a  [/        5         g[/        U
5        gg)z"initialize the hybrid environment.	dp_degreer   	pp_degree
sep_degree	cp_degreesharding_degree	ep_degreemoe_sharding_degreer   z)mp_degree should be greater or equal to 0z)pp_degree should be greater or equal to 0z*sep_degree should be greater or equal to 0z)cp_degree should be greater or equal to 0z/sharding_degree should be greater or equal to 0r   datapiper   r   sepr  r  moe_sharding)dpppr   mpr*  cpepr+  Nz0The order of hybrid_config setting is incorrect.tensor_init_seedr   )rJ   r   r!  r   r"  r#  r$  r%  r&  r'  maxrs   r   get_world_sizehybrid_parallel_ordersortr   keysAssertionErrorappendr  r   r   r   )rw   r   d_hybrid_degreeorderr  r  h_namenamedegreer   r1  s              r;   r   Fleet._init_hybrid_parallel_env  s@   "99HH,,[9,,[9,,[9--l;,,[9#223DE,,[9#'#6#67L#M ~~"O$OO"~~"O$OO"!# 	
8	
# ~~"O$OO"##q( 	
=	
( T^^Q/T^^Q/dooq1T^^Q/T^^Q/#&t'?'?#C >>A''668F#(GHDNT^^Q/ 4>>*4>>*#T%9%9:DNN+4??+dnn-T^^,+T-E-EF	
 ++AA8==?d?#7#7#9:1=BBDD B   F*2LD%%d+KK 
 $$%7>	>>A++CC $  77IJ2%*,*+;< r:   c                8    U R                   c   eU R                   $ N)r   rv   s    r;   r   "Fleet.get_hybrid_communicate_group   s    yy$$$yyr:   c                8    U R                   c   eU R                   $ r@  )r  rv   s    r;   get_hybrid_parallel_topology"Fleet.get_hybrid_parallel_topology  s    ~~)))~~r:   c                6    U R                   R                  5       $ )a  
Check whether the node is the first instance of worker.

Returns:
    bool: True if this is the first node of worker, False if not.

Examples:
    .. code-block:: python

        >>> import paddle.distributed.fleet as fleet
        >>> fleet.init()
        >>> fleet.is_first_worker()

)ri   _is_first_workerrv   s    r;   is_first_workerFleet.is_first_worker  s     0022r:   c                6    U R                   R                  5       $ )z
Get current worker index.

Returns:
    int: node id

Examples:

    .. code-block:: python

        >>> import paddle.distributed.fleet as fleet
        >>> fleet.init()
        >>> fleet.worker_index()

)ri   _worker_indexrv   s    r;   r   Fleet.worker_index        --//r:   c                6    U R                   R                  5       $ )z
Get current total worker number.

Returns:
    int: worker numbers

Examples:

    .. code-block:: python

        >>> import paddle.distributed.fleet as fleet
        >>> fleet.init()
        >>> fleet.worker_num()

)ri   _worker_numrv   s    r;   r   Fleet.worker_num+  s      ++--r:   c                6    U R                   R                  5       $ r@  )ri   _get_node_numrv   s    r;   node_numFleet.node_num=  s    --//r:   c                6    U R                   R                  5       $ r@  )ri   _get_local_rankrv   s    r;   
local_rankFleet.local_rank@      //11r:   c                6    U R                   R                  5       $ r@  )ri   _get_local_device_idsrv   s    r;   local_device_idsFleet.local_device_idsC      5577r:   c                6    U R                   R                  5       $ r@  )ri   _get_world_device_idsrv   s    r;   world_device_idsFleet.world_device_idsF  r]  r:   c                6    U R                   R                  5       $ )a  
Check whether the node is an instance of worker.

Returns:
    bool: True if this is a node of worker,
          False if not.

Examples:

    .. code-block:: python

        >>> import paddle.distributed.fleet as fleet
        >>> fleet.init()
        >>> fleet.is_worker()

)ri   
_is_workerrv   s    r;   	is_workerFleet.is_workerI      " **,,r:   c                6    U R                   R                  5       $ r@  )ri   _is_coordinatorrv   s    r;   is_coordinatorFleet.is_coordinator\  rX  r:   c                    U(       a)  SR                  U R                  R                  5       5      $ U R                  R                  5       $ )a  
Get current worker endpoints, such as ["127.0.0.1:1001", "127.0.0.1:1002"].

Returns:
    list/string: server endpoints

Examples:

    .. code-block:: python

        >>> import paddle.distributed.fleet as fleet
        >>> fleet.init()
        >>> fleet.worker_endpoints()

,)joinri   _get_trainer_endpointsrw   	to_strings     r;   worker_endpointsFleet.worker_endpoints_  s:      88D,,CCEFF##::<<r:   c                H    [        U R                  R                  5       5      $ )z
Get current total worker number.

Returns:
    int: server number

Examples:

    .. code-block:: python

        >>> import paddle.distributed.fleet as fleet
        >>> fleet.init()
        >>> fleet.server_num()
)lenri   _get_pserver_endpointsrv   s    r;   
server_numFleet.server_numt  s     4##::<==r:   c                6    U R                   R                  5       $ )z
Get current server index.

Returns:
    int: node id

Examples:

    .. code-block:: python

        >>> import paddle.distributed.fleet as fleet
        >>> fleet.init()
        >>> fleet.server_index()

)ri   _server_indexrv   s    r;   server_indexFleet.server_index  rL  r:   c                    U(       a)  SR                  U R                  R                  5       5      $ U R                  R                  5       $ )a  
Get current server endpoints, such as ["127.0.0.1:1001", "127.0.0.1:1002"].

Returns:
    list/string: server endpoints

Examples:

    .. code-block:: python

        >>> import paddle.distributed.fleet as fleet
        >>> fleet.init()
        >>> fleet.server_endpoints()

rl  )rm  ri   ru  ro  s     r;   server_endpointsFleet.server_endpoints  s:    " 88D,,CCEFF##::<<r:   c                6    U R                   R                  5       $ )a  
Check whether the node is an instance of server.

Returns:
    bool: True if this is a node of server,
          False if not.

Examples:

    .. code-block:: python

        >>> import paddle.distributed.fleet as fleet
        >>> fleet.init()
        >>> fleet.is_server()

)ri   
_is_serverrv   s    r;   	is_serverFleet.is_server  rf  r:   c                :    U R                   R                  S5        g)z
barrier all workers

Returns:
    None

Examples:

    .. code-block:: python

        >>> import paddle.distributed.fleet as fleet
        >>> fleet.init()
        >>> fleet.barrier_worker()
workerN)ri   _barrierrv   s    r;   barrier_workerFleet.barrier_worker  s     	!!(+r:   c                :    U R                   R                  XS5      $ )a  
all reduce input between all workers, mode can be sum, mean or max, default is sum

Returns:
    list/int: all reduce result

Examples:

    .. code-block:: python

        >>> import paddle.distributed.fleet as fleet
        >>> fleet.init()
        >>> res = fleet.all_reduce(5)

r  )ri   _all_reduce)rw   inputr1   s      r;   r   Fleet.all_reduce  s    ( ++EBBr:   c                :    U R                   R                  U5        g)a.  
initialize `Communicator` for parameter server training.


Returns:
    None

Examples:

    .. code-block:: python

        >>> import paddle.distributed.fleet as fleet
        >>> fleet.init()

        >>> # build net
        >>> # fleet.distributed_optimizer(...)

        >>> fleet.init_worker()

N)rZ   _init_workerrw   scopess     r;   init_workerFleet.init_worker  s    . 	))&1r:   c                :    U R                   R                  U5        g)z
initialize coordinator node
N)rZ   _init_coordinatorr  s     r;   init_coordinatorFleet.init_coordinator   s     	..v6r:   c                8    U R                   R                  5         g r@  )rZ   _make_fl_strategyrv   s    r;   make_fl_strategyFleet.make_fl_strategy  s    ..0r:   c                .    U R                   R                  $ )z
get worker(training node) ptr
)rZ   _workerrv   s    r;   get_fl_clientFleet.get_fl_client  s     ##+++r:   c                <    U R                   R                  " U0 UD6  g)a{  
init_server executor to initialize startup program,
if the `args` is not empty, it will run load_persistables for increment training.


Returns:
    None

Examples:

    .. code-block:: python

        >>> import paddle.distributed.fleet as fleet
        >>> fleet.init()

        >>> # build net
        >>> # fleet.distributed_optimizer(...)

        >>> fleet.init_server()

N)rZ   _init_server)rw   r]   r^   s      r;   init_serverFleet.init_server  s    0 	))4:6:r:   c                :    U R                   R                  X5        g)a  
load fleet model from path


Returns:
    None

Examples:

    .. code-block:: python

        >>> import paddle.distributed.fleet as fleet
        >>> fleet.init()

        >>> # build net
        >>> # fleet.distributed_optimizer(...)

        >>> fleet.load_model("path", mode=0)

N)rZ   _load_persistablesrw   pathr1   s      r;   
load_modelFleet.load_model-  s    . 	//;r:   c                <    U R                   R                  XU5        g)a(  
load fleet one table from path


Returns:
    None

Examples:

    .. code-block:: python

        >>> import paddle.distributed.fleet as fleet
        >>> fleet.init()

        >>> # build net
        >>> # fleet.distributed_optimizer(...)

        >>> fleet.load_one_table(0, "path", mode=0)

N)rZ   _load_one_tablerw   r@   r  r1   s       r;   load_one_tableFleet.load_one_tableF      . 	,,XTBr:   c                :    U R                   R                  X5        g)a1  
load fleet inference model from path


Returns:
    None

Examples:

    .. code-block:: python

        >>> import paddle.distributed.fleet as fleet
        >>> fleet.init()

        >>> # build net
        >>> # fleet.distributed_optimizer(...)

        >>> fleet.load_inference_model("path", mode=1)

N)rZ   _load_inference_modelr  s      r;   load_inference_modelFleet.load_inference_model_  s    . 	224>r:   c                8    U R                   R                  5         g)aR  
run server will run pserver main program with executor.

Returns:
    None

Examples:

    .. code-block:: python

        >>> import paddle.distributed.fleet as fleet
        >>> fleet.init()

        >>> # build net
        >>> # fleet.distributed_optimizer(...)

        >>> if fleet.is_server():
        ...     fleet.init_server()

N)rZ   _run_serverrv   s    r;   
run_serverFleet.run_serverx  s    . 	((*r:   c                8    U R                   R                  5         g)a?  
stop `Communicator` and give training complete notice to parameter server.

Returns:
    None

Examples:

    .. code-block:: python

        >>> import paddle.distributed.fleet as fleet
        >>> fleet.init()

        >>> # build net
        >>> # fleet.distributed_optimizer(...)

        >>> fleet.init_server()

N)rZ   _stop_workerrv   s    r;   stop_workerFleet.stop_worker  s    , 	))+r:   c           	        SnU(       d	  U(       d  Sn[         R                  " 5       n[         R                  R                  U5      nU(       Gao  / n/ n	U Hz  n
[	        U
[
        5      (       a  UR                  U
5        M+  [	        U
[         R                  R                  5      (       a  UR                  U
R                  5        Mq  [        S5      e   U Hz  n
[	        U
[
        5      (       a  U	R                  U
5        M+  [	        U
[         R                  R                  5      (       a  U	R                  U
R                  5        Mq  [        S5      e   U	 Vs/ s H>  n[         R                  R                  5       R                  5       R                  U5      PM@     nnU R                  R                  XqXS SS5        g SnSU;   a  [        US   5      nU R                  R!                  XqS US9  g s  snf )NTFzfeed must be [str|Variable]r   r1   )rR   r1   )rs   CPUPlacestaticr&   r   r   r8  r*   r<  r[   default_main_programglobal_blockvarrZ   _save_inference_modelr0   _save_persistables)rw   dirnamefeedfetchconfigs	inferenceplaceexecutorfeeded_var_namesfetch_var_namesr  r<  
fetch_varsincrement_modes                 r;   save
Fleet.save  s    	EI!==))%0! Oc3''$++C0V]]%;%;<<$++CHH5$%BCC  c3''#**3/V]]%;%;<<#**3884$%BCC  ,+D 224AACGGM+  
   66#3tQ N !$WV_!5  33> 4 s   AG0c           	     F    U R                   R                  UUUUUUU5        g)a  
save inference model for inference.

Returns:
    None

Examples:

    .. code-block:: python

        >>> import paddle.distributed.fleet as fleet
        >>> fleet.init()

        >>> # build net
        >>> # fleet.distributed_optimizer(...)

        >>> fleet.init_server()

N)rZ   r  )rw   r  r  r  target_varsrR   export_for_deploymentr1   s           r;   save_inference_modelFleet.save_inference_model  s.    @ 	22!	
r:   c                <    U R                   R                  XX45        g)a  

saves all persistable tensors from :code:`main_program` to
the folder :code:`dirname`. You can refer to

The :code:`dirname` is used to specify the folder where persistable tensors
are going to be saved. If you would like to save tensors in separate
files, set :code:`filename` None.

Args:
    executor(Executor): The executor to run for saving persistable tensors.
                        You can refer to :ref:`api_guide_executor_en` for
                        more details.

    dirname(str, optional): The saving directory path.
                        When you need to save the parameter to the memory, set it to None.
    main_program(Program, optional): The program whose persistable tensors will
                                     be saved. Default: None.


Returns:
    None

Examples:

    .. code-block:: python

        >>> import paddle
        >>> paddle.enable_static()
        >>> import paddle.distributed.fleet as fleet

        >>> fleet.init()

        >>> # build net
        >>> # fleet.distributed_optimizer(...)

        >>> exe = paddle.static.Executor(paddle.CPUPlace())
        >>> fleet.save_persistables(exe, "dirname", paddle.static.default_main_program())

N)rZ   r  )rw   r  r  rR   r1   s        r;   save_persistablesFleet.save_persistables  s    b 	//|	
r:   c                <    U R                   R                  " U40 UD6$ r@  )rZ   _save_cache_model)rw   r  r  s      r;   save_cache_modelFleet.save_cache_model=  s     
 ##55gIIIr:   c                6    U R                   R                  5       $ r@  )rZ   _check_save_pre_patch_donerv   s    r;   check_save_pre_patch_doneFleet.check_save_pre_patch_doneD  s     ##>>@@r:   c                :    U R                   R                  XU5      $ r@  )rZ   _save_cache_table)rw   r@   pass_idmem_cache_key_thresholds       r;   save_cache_tableFleet.save_cache_tableI  s#     ##556
 	
r:   c                <    U R                   R                  XU5        g)a(  
save fleet one table from path


Returns:
    None

Examples:

    .. code-block:: python

        >>> import paddle.distributed.fleet as fleet
        >>> fleet.init()

        >>> # build net
        >>> # fleet.distributed_optimizer(...)

        >>> fleet.save_one_table(0, "path", mode=0)

N)rZ   _save_one_tabler  s       r;   save_one_tableFleet.save_one_tableU  r  r:   c                >    U R                   R                  XX4U5        g)a  
save fleet one table from path


Returns:
    None

Examples:

    .. code-block:: python

        >>> import paddle.distributed.fleet as fleet
        >>> fleet.init()
        >>> import paddle
        >>> place = paddle.CPUPlace()
        >>> exe =  paddle.static.Executor(place)

        >>> # build net
        >>> # fleet.distributed_optimizer(...)

        >>> fleet.save_dense_params(exe, "path", scope=paddle.static.global_scope(), program=paddle.static.default_main_program())

N)rZ   _save_dense_params)rw   r  r  scopeprogram	var_namess         r;   save_dense_paramsFleet.save_dense_paramsn  s     B 	//uy	
r:   c                N    U R                   R                  U[        U5      5        g)a  
set_date for gpups table

Returns:
    None

Examples:

    .. code-block:: python

        >>> import paddle.distributed.fleet as fleet
        >>> fleet.init()

        >>> # build net
        >>> # fleet.distributed_optimizer(...)

        >>> fleet.set_date(0, "20250101")

N)rZ   	_set_dater   )rw   r@   day_ids      r;   set_dateFleet.set_date  s    , 	&&xV=r:   c                <    U R                   R                  XU5        g)a'  
Print stat info of table_id for gpups table, format: tableid, feasign size, mf size.

Args:

    table_id (int): The id of table.
    pass_id (int): The id of pass.
    threshold (float): The threshold of print.

Examples:

    .. code-block:: text

        fleet.print_table_stat(0,6,600000)

N)rZ   _print_table_stat)rw   r@   r  	thresholds       r;   print_table_statFleet.print_table_stat  s    & 	..x)Lr:   c                :    U R                   R                  U5        g r@  )rZ   _shrink)rw   r  s     r;   shrinkFleet.shrink  s     	$$Y/r:   c                    Xl         UbB  U R                  (       a  [        R                  " S5        [        R
                  " U5      U l        0 U l        U $ )a  
Optimizer for distributed training.

For the distributed training, this method would rebuild a new instance of DistributedOptimizer.
Which has basic Optimizer function and special features for distributed training.

Args:
    optimizer(Optimizer): The executor to run for init server.
    strategy(DistributedStrategy): Extra properties for distributed optimizer.
        It is recommended to use DistributedStrategy in fleet.init(). The strategy
        here is for compatibility. If the strategy in fleet.distributed_optimizer()
        is not None, then it will overwrite the DistributedStrategy in fleet.init(),
        which will take effect in distributed training.

Returns:
    Fleet: instance of fleet.

Examples:

    .. code-block:: python

        >>> import paddle
        >>> import paddle.distributed.fleet as fleet
        >>> fleet.init(is_collective=True)
        >>> linear = paddle.nn.Linear(10, 10)
        >>> strategy = fleet.DistributedStrategy()
        >>> optimizer = paddle.optimizer.SGD(learning_rate=0.001, parameters=linear.parameters())
        >>> optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy)

a  It is recommended to use DistributedStrategy in fleet.init(). The strategy here is only for compatibility. If the strategy in fleet.distributed_optimizer() is not None, then it will overwrite the DistributedStrategy in fleet.init(), which will take effect in distributed training.)ru   rN   r   rQ   r   r   rJ   rr   )rw   rt   r   s      r;   distributed_optimizerFleet.distributed_optimizer  sK    F '0#""F +/--*AD'r:   c                    S nU R                   R                  5        H  n[        US5      (       d  M  Un  O   Uc'  [        U R                  S5      (       a  U R                  nUc   S5       eU$ )Namp_initzSamp_init can only be used when the amp(auto mixed precision) strategy is turned on.rp   _get_applied_meta_optimizerhasattrru   )rw   amp_optimizerrt   s      r;   _get_amp_optimizerFleet._get_amp_optimizer  sz    //KKMIy*-- ) N
  t22J?? $ ; ;( 	
a	
( r:   c                B    U R                  5       nUR                  5       $ )z)Return the real-time loss scaling factor.)r  get_loss_scaling)rw   r  s     r;   r
  Fleet.get_loss_scaling
  s    //1--//r:   c                F    U R                  5       nUR                  XX45      $ )a
  
Init the amp training, such as cast fp32 parameters to fp16 type.

Args:
    place(CUDAPlace): place is used to initialize
        fp16 parameters with fp32 values.
    scope(Scope): The scope is used to find fp32 parameters.
    test_program(Program): The program is used for testing.
    use_fp16_test(bool): Whether to use fp16 testing.

Examples:
    .. code-block:: python

        >>> import paddle
        >>> import paddle.nn.functional as F
        >>> paddle.enable_static()

        >>> def run_example_code():
        ...     place = paddle.CUDAPlace(0)
        ...     exe = paddle.static.Executor(place)
        ...     data = paddle.static.data(name='X', shape=[None, 1, 28, 28], dtype='float32')
        ...     conv2d = paddle.static.nn.conv2d(input=data, num_filters=6, filter_size=3)
        ...     # 1) Use fp16_guard to control the range of fp16 kernels used.
        ...     with paddle.static.amp.fp16_guard():
        ...         bn = paddle.static.nn.batch_norm(input=conv2d, act="relu")
        ...         pool = F.max_pool2d(bn, kernel_size=2, stride=2)
        ...         hidden = paddle.static.nn.fc(pool, size=10)
        ...         loss = paddle.mean(hidden)
        ...     # 2) Create the optimizer and set `multi_precision` to True.
        ...     # Setting `multi_precision` to True can avoid the poor accuracy
        ...     # or the slow convergence in a way.
        ...     optimizer = paddle.optimizer.Momentum(learning_rate=0.01, multi_precision=True)
        ...     # 3) These ops in `custom_black_list` will keep in the float32 computation type.
        ...     amp_list = paddle.static.amp.CustomOpLists(
        ...         custom_black_list=['pool2d'])
        ...     # 4) The entry of Paddle AMP.
        ...     # Enable pure fp16 training by setting `use_pure_fp16` to True.
        ...     optimizer = paddle.static.amp.decorate(
        ...         optimizer,
        ...         amp_list,
        ...         init_loss_scaling=128.0,
        ...         use_dynamic_loss_scaling=True,
        ...         use_pure_fp16=True)
        ...     # If you don't use the default_startup_program(), you should pass
        ...     # your defined `startup_program` into `minimize`.
        ...     optimizer.minimize(loss)
        ...     exe.run(paddle.static.default_startup_program())
        ...     # 5) Use `amp_init` after FP32 parameters initialization(such as `exe.run(startup_program)`).
        ...     # If you want to perform the testing process, you should pass `test_program` into `amp_init`.
        ...     optimizer.amp_init(place, scope=paddle.static.global_scope())

        >>> if paddle.is_compiled_with_cuda() and len(paddle.static.cuda_places()) > 0:
        ...     run_example_code()
)r  r  )rw   r  r  test_programuse_fp16_testr  s         r;   r  Fleet.amp_init  s&    z //1%%eLPPr:   c                    S nU R                   R                  5        H  n[        US5      (       d  M  Un  O   Uc'  [        U R                  S5      (       a  U R                  nUc   S5       eU$ )Nqat_initzZqat_init can only be used when the qat(quantization aware training) strategy is turned on.r  )rw   qat_optimizerrt   s      r;   _get_qat_optimizerFleet._get_qat_optimizerO  sz    //KKMIy*-- ) N
  t22J?? $ ; ;( 	
h	
( r:   c                B    U R                  5       nUR                  XUS9$ )a  
Init the qat training, such as insert qdq ops and scale variables.

Args:
    place(CUDAPlace): place is used to initialize
        scale parameters.
    scope(Scope): The scope is used to find parameters and variables.
    test_program(Program): The program is used for testing.
)r  r  )r  r  )rw   r  r  r  r  s        r;   r  Fleet.qat_init`  s/     //1%%\ & 
 	
r:   c                Z    SU R                   ;  a  [        S5        0 $ U R                   S   $ )Nvalid_strategyzNWARNING: You may need to call minimize function before this function is calledrr   printrv   s    r;   _final_strategyFleet._final_strategyt  s0    4==0` I==!122r:   c                Z    SU R                   ;  a  [        S5        / $ U R                   S   $ )Napplied_meta_listzTWARNING: You may need to call minimize function before _get_applied_meta_list calledr  rv   s    r;   _get_applied_meta_listFleet._get_applied_meta_list}  s0    dmm3f I==!455r:   c                Z    SU R                   ;  a  [        S5        / $ U R                   S   $ )Napplied_graph_listzUWARNING: You may need to call minimize function before _get_applied_graph_list calledr  rv   s    r;   _get_applied_graph_listFleet._get_applied_graph_list  s0    t}}4g I==!566r:   c                   [        U[        5      (       d  U R                  XX45      $ [        5       (       d0  U R                  R                  5       (       d  U R                  (       a  [        S5      eU R                  XX45      $ )a	  
Add distributed operations to minimize ``loss`` by updating ``parameter_list``.

Args:
    loss (Tensor): A ``Tensor`` containing the value to minimize.
    startup_program (Program, optional): :ref:`api_paddle_static_Program` for
        initializing parameters in ``parameter_list``. The default value
        is None, at this time :ref:`api_paddle_static_default_startup_program` will be used.
    parameter_list (Iterable, optional): Iterable of ``Tensor`` or ``Tensor.name`` to update
        to minimize ``loss``. The default value is None, at this time all parameters
        will be updated.
    no_grad_set (set, optional): Set of ``Tensor``  or ``Tensor.name`` that don't need
        to be updated. The default value is None.

Returns:
    tuple: tuple (optimize_ops, params_grads), A list of operators appended
    by minimize and a list of (param, grad) tensor pairs, param is
    ``Parameter``, grad is the gradient value corresponding to the parameter.
    The returned tuple can be passed to ``fetch_list`` in ``Executor.run()`` to
    indicate program pruning. If so, the program will be pruned by ``feed`` and
    ``fetch_list`` before run, see details in ``Executor``.

Examples:

    .. code-block:: python

        >>> import paddle
        >>> paddle.enable_static()
        >>> import paddle.distributed.fleet as fleet
        >>> import paddle.nn.functional as F

        >>> hid_dim = 10
        >>> label_dim = 2
        >>> input_x = paddle.static.data(name='x', shape=[None, 13], dtype='float32')
        >>> input_y = paddle.static.data(name='y', shape=[None, 1], dtype='int64')
        >>> fc_1 = paddle.static.nn.fc(x=input_x, size=hid_dim, activation='tanh')
        >>> fc_2 = paddle.static.nn.fc(x=fc_1, size=hid_dim, activation='tanh')
        >>> prediction = paddle.static.nn.fc(x=[fc_2], size=label_dim, activation='softmax')
        >>> cost = F.cross_entropy(input=prediction, label=input_y)
        >>> avg_cost = paddle.mean(x=cost)

        >>> fleet.init(is_collective=True)
        >>> strategy = fleet.DistributedStrategy()
        >>> linear = paddle.nn.Linear(10, 10)
        >>> optimizer = paddle.optimizer.SGD(learning_rate=0.001, parameters=linear.parameters())
        >>> optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy)
        >>> optimizer.minimize(avg_cost)

        >>> # for more examples, please reference https://github.com/PaddlePaddle/PaddleFleetX

z loss can be list only in PS mode)	r   r   _minimize_implr   ri   rj   rN   r[   _minimize_losses_impl)rw   lossrH   parameter_listno_grad_sets        r;   minimizeFleet.minimize  sw    z $%%&&~ 
  !!##7799&& !CDD--~ r:   c                   0 n[         R                  " U R                  5      US'   [        5       (       a#  U R                  nXPl        UR                  U5      $ UR                  R                  U l	        [        U R                  S5      (       d  0 U R                  l        U R                  R                  S   U R                  R                  S'   U R                  R                  S   U R                  R                  S'   U R                  R                  S   U R                  R                  S'   U R                  R                  S   U R                  R                  S'   U R                  US'   U R                  /US'   XS	'   UcO  [        R                  R                  5       R!                  S
S9U l        [        R                  R                  5       nOUR!                  S
S9U l        X%S'   U/US'   U R$                  US'   U R                  R&                  (       d  U R                  R(                  (       a+  SSKJn  U" U 5      nUR/                  XX45      u  n	n
nnU	U
UU4$ [         R                  " U R                  5      US'   [         R                  " U R                  5      n/ n/ n/ n/ nU R0                  (       a4  [3        U R                  R4                  5      S:  a  UR7                  S5        [9        5       R;                  U R                  U5      nUR=                  5       (       a  U H  nUR?                  X5        M     / n/ nU H  nURA                  UU R$                  U R                  U5        URC                  5       (       a(  URE                  5       (       d  UR7                  U5        Mh  URC                  5       (       a(  URE                  5       (       a  UR7                  U5        M  UR7                  U5        M     U R0                  (       a  [3        U R                  R4                  5      S:  ay  SUS'   SSK#J$n  U" U R                  5      nURA                  UU R$                  U R                  U5        URK                  5         UR7                  U5        UR7                  U5        S n[M        SU5        U RN                  RQ                  UU R$                  U R                  UUU5      u  nn[M        SU5        [M        SU5        U RN                  RS                  X5      n[         R                  " U5      US'   [T        RV                  " S[Y        US   5      -   5        [T        RV                  " S[Y        US   5      -   5        U RN                  R[                  5       nU RN                  R]                  5       nUUS'   UUS'   XPl        UU l/        U R^                  Ra                  5         / n	/ n
U R$                  Rc                  5       (       a  U R0                  (       d  U Rd                  c  [g        5       Ri                  U5      U l2        [j        Rl                  " U R                  5      nUUR                  R                  l7        U R                  R                  UUUUS9$ U(       Gaf  [T        RV                  " S [Y        [q        UR                  R                  5      5      -   5        UR                  UUUUS9u  p[T        RV                  " S![Y        [q        UR                  R                  5      5      -   5        [        R                  Rs                  5       n[T        RV                  " S"[Y        [q        U5      5      -   5        [q        U5      [q        UR                  R                  5      :w  a3  [        Rt                  Rw                  UR                  R                  5        [T        RV                  " S#[Y        [q        U5      5      -   5        OU R                  R                  UUUUS9u  n	n
XS$'   XS%'   U(       a\  [T        RV                  " S&[Y        [q        UR                  R                  5      5      -   5        UR                  UUUUS9u  pXS''   XS('   OAUR                  R                  Rx                  c   [{        UR                  R                  X 5        U R$                  R|                  (       d  [        R                  Rs                  5       nUR~                  c  0 OUR~                  nU R                  5       US)'   U R                  5       US*'   U R                  R                  R                  5        H  u  nnU(       d  UU;  d  M  UUU'   M     UUl?        U Rd                  c  [g        5       Ri                  U5      U l2        SS+KDJEn   U R                  R                  US   5        X4$ ),Nuser_defined_strategydistributed_info_r!  r   r"  r%  origin_main_programorigin_main_programsr(  Ffor_testorigin_startup_programorigin_startup_programsr      )AutoParallelizerr   ShardingOptimizerTuse_fleet_psr   ParameterServerOptimizerzvalid_optimizer_list=zmeta_optimizer=zgraph_optimizer=r  zvalid_strategy: zuser_defined_strategy: r  r"  r*  zbefore minimize program id: zafter minimize program id: zdefault program id: z!default program id after switch: program_optimize_opsprogram_params_gradsz"before graph minimize program id: graph_optimize_opsgraph_optimize_gradsmpi_sizempi_rankr}   )Hr   r   rJ   r   ru   rr   r+  blockr  r0  r  r/  r   rs   r  default_startup_programcloner4  ri   	semi_autoauto_search!auto_parallel.static.parallelizerr7  parallelizerN   rt  sparse_table_configsr8  r   _get_valid_meta_optimizers_is_strict_auto_enable_strategy_set_basic_info
_can_apply_is_graph_outmeta_optimizersr;  clearr  rp   generate_optimizer_get_valid_strategyr   debugr   r  r#  r  _enable_envrj   rZ   r   _create_runtimer   CompiledProgram_graphidr  	frameworkswitch_main_program_pass_appliedrW   _is_heter_parameter_server_mode
_fleet_optr   r   trainer_desc_configsr   r   r~   r   _set_strategy)!rw   r(  rH   r)  r*  r  
target_optr7  auto_parallelizeroptimize_opsparams_gradsdist_startup_progdist_main_progcopy_user_defined_strategycan_not_apply_optimizer_listvalid_optimizer_listvalid_graph_optimizer_list
skip_namesdistributed_optimizer_listoptr;  meta_optimizergraph_optimizerr  r  r"  compiled_programdefault_programr  opt_infokvr~   s!                                    r;   r&  Fleet._minimize_impl  s"	    +/=='',
'( 44J#M&&t,, (,zz'9'9D$4335HII=?((://@@M ((::;G //@@M ((::;G //@@M ((::;G
 //@@% ((::% .2-E-EG)*/3/G/G.HG*+"FO&MM99;AA!& B  +
 #)--"G"G"I.=.C.C" /D /+ 1@,-2A1BG-.$($4$4GL! ++55..::P$4T$:! &11>  %" ! %"	  04}}++0G+, *.++*& ,.(#% )+&J##33HHIAM!!"56 %&AA// ' *99;;5C(()CM 6 $& )+&1##$$//.	 >>##C,=,=,?,?(//4^^%%#*;*;*=*=.55c:077< 2 ##33HHIAM*.'E!9//" ..$$//.	 %**,$++N;,33NC #' )+?@
 &&99  ++*$* #^4$o6!33GG*N )-n(EG$%LL+c':J2K.LLMLL)g5678
 !% 6 6 M M O&&>>@  ,=G'(,>G()#M"0D++-LL   4466++''/+9+;+K+K,D( $,#;#;,,$  -=

"")22;;#" +	 <   2SDJJ<N<N9O5PP .<-D-D#" +	 .E .* 1C4::;M;M8N4OO #)--"D"D"F3c"_:M6NNOo&"TZZ-?-?*@@$$889K9KL7"_-./ //88#" +	 9    /;*+.:*+8"TZZ//012 .=-E-E#" +	 .F .* 1=,-2>./##119

 2 2OJ##CC --<<>!,,4B':L:L  (,'8$'+'8'8':$ 00EEKKMAX-&' N &."##+'5'7'G'G'P$0JJ$$W-=%>?--r:   c           	        0 nUS   R                   R                  U l        U R                  US'   / US'   U H+  nUS   R                  UR                   R                  5        M-     XS'   Uc:  [	        U5      S:X  a   [
        R                  R                  5       /nO[        S5      eUR                  5       nUS   R                  SS9U l        US   US	'   / US
'   U H  nUS
   R                  U5        M     U R                  US'   [        R                  " U R                  5      US'   [        R                  " U R                  5      US'   XPl        US   U l        U R                   R#                  5         / n	/ n
SSKJn  U" U R(                  5      nUR+                  UU R                  U R(                  U R                  5        UR-                  XX4S9u  p[/        [	        U5      5       H"  nX}   R1                  X-   R2                  5        M$     XS'   XS'   U H  nUR                   R                  nUR4                  c  0 OUR4                  nU R7                  5       US'   U R9                  5       US'   U R                  R:                  R=                  5        H  u  nnU(       d  X;  d  M  UX'   M     Xl        [>        R@                  " S[C        [E        U5      5      -   [C        UR4                  5      -   5        M     U RF                  c  [I        5       RK                  U5      U l#        SSK&J'n  URP                  RS                  US   5        X4$ )Nr   r0  r1  r(  r   z0startup_program can't be None when loss is list.Fr2  r4  r5  r   r.  r  r:  r<  r=  r>  rA  rB  zfleet base opt info: r}   )*rC  r  r0  r8  rt  rs   r  rD  r[   r   rE  r4  ri   r   rJ   rr   r  rV  rQ  r;  ru   rN  minimize_losses_implr   _rebuild_from_descdescr_  r   r   r`  r   r   r   r   rZ  rZ   r   rW  r   r~   r   ra  )rw   lossesstartup_programsr)  r*  r  r(  ori_startup_programsr  rd  re  r;  ps_optimizerirs  rt  ru  r~   s                     r;   r'  Fleet._minimize_losses_impl	  s8     $*!9??#:#: )-)A)A%&*,&'D*+224::3E3EF  #6{a$*MM$I$I$K#L  F   0446&6q&9&?&?&?&O#,<Q,?()-/)*'G-.55g> ( !% 0 0+/=='',
'( %)MM$2M2M$N !%&67'')=/0K0KL$$''''		
 &2%F%Fn &G &
" s/01A #667G7J7O7OP 2*6&'*6&'Djj((G$//7rW=O=OH#'??#4HZ #'#4#4#6HZ  ,,AAGGI)"#HK J "*KK'bk"#g(()* $ '#1#3#C#CG#LD ,

  )9!:;))r:   )rr   r   rN   ri   rZ   r  rJ   rq   r$  r!  r&  r   r'  r   r0  r4  r"  r#  r%  rp   ru   r  )rd   None)NFNINFO)
r   zRoleMakerBase | Noner|   r   r   DistributedStrategy | Noner   z	int | strrd   r   )F)r   r0   r   r   r   r#   r   r0   r   floatr   r   rd   r  )r   r0   r   r   r   r#   r   r0   r   r  rd   r  )r  zJLiteral['allreduce', 'broadcast', 'reduce', 'allgather', 'reduce_scatter']r  r0   r  zdict[int, float]rd   r  )rd   r,   )rd   r+   )rd   r   )rd   r0   )rd   z
str | None)rp  r   rd   zlist[str] | str)sum)r  r   r1   zLiteral['sum', 'mean', 'max']rd   znpt.NDArray[Any]r@  )r  zSequence[_Scope] | Nonerd   r  )rd   r!   )r]   r   r^   r   rd   r  )r  r   r1   r0   rd   r  )r@   r0   r  r   r1   r0   rd   r  )
r  r   r  list[str | Variable]r  r  r  zUnpack[_SaveConfigs]rd   r  )NTr   )r  r&   r  r   r  z	list[str]r  zlist[Variable]rR   Program | Noner  r   r1   r0   rd   r  )Nr   )
r  r&   r  r   rR   r  r1   r0   rd   r  )r  r   r  zUnpack[_SaveCacheConfigs]rd   r0   )l    (\ )r@   r0   r  r0   r  r0   rd   r  )r  r&   r  r   r  r"   r  r)   r  zlist[str] | Nonerd   r  )r@   r0   r  r   rd   r  )r@   r0   r  r0   r  r  )r  z
int | Nonerd   r  )rt   r$   r   r  rd   r   )rd   r  )NNF)
r  r    r  _Scope | Noner  r  r  r   rd   r  )NN)r  r    r  r  r  r  rd   r  )NNN)
r(  r   rH   r  r)  zIterable[Tensor | str] | Noner*  zset[Tensor | str] | Nonerd   z5tuple[list[Operator], list[tuple[Parameter, Tensor]]])Fr4   r5   r6   r7   __doc__rx   r   r   r   r   r   r   r  r  r  r   r   rC  rG  r   r   rR  rV  r[  r`  rd  ri  rq  rv  rz  r}  r  r  r   is_non_distributed_checkinited_runtime_handlerr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r
  r  r  r  r  r  r#  r+  r&  r'  r9   r2   r:   r;   rC   rC      s-   7rQ ,0#/3%F(F F -	F
 F 
F`   	
  #  
>  	
  # 
8  	
  # 
8  	
  # 
:(( ( 	(
 ( #( 
(T +-bd 9%@ *,%
%
 % (% 
%T"E=N3"0$.$0288-&2=*>"0$=,-&,( /4CC ,C 
	C, 2  2. 7  71 ,  , ;  ;0 <  <. C  C. ?  ?. +  +. ,  ,,  &(&(	11 #1 $	1
 (1 
1  1f  (,&*&
&
 &
 $	&

 $&
 %&
  $&
 &
 
&
  &
P 
 (,1
1
 1
 %	1

 1
 
1
  1
f JJ'@J	J  J
 A  A 
 (2	

 
 "%	

 

  
 C  C.  '+!
!
 !
 	!

 !
 $!
 
!
  !
F >  >, M  M& 0  0 0422 -2 
	2h"0  $'+#>Q>Q >Q %	>Q
 >Q 
>Q@(  $'+	

 
 %	

 

(367 +/8<04JJ (J 6	J
 .J
J^ l.b	 _*r:   )rR   r)   rH   r)   rS   rC   rd   r%   )r`   Callable[_InputT, _RetT]rd   r  )Q
__future__r   r   r   r   typingr   r   r   r   rs   paddle.baser   paddle.base.wrapped_decoratorr	   paddle.frameworkr
   r   paddle.framework.irr   baser   r   base.distributed_strategyr   base.meta_optimizer_factoryr   base.role_makerr   r   base.runtime_factoryr   base.strategy_compilerr   meta_parallelr   utils.log_utilr   r   collections.abcr   r   r   numpy.typingnpttyping_extensionsr   r   r   r   r   paddle._typingr    paddle.base.corer!   r"   paddle.distributed.collectiver#   paddle.optimizerr$   paddle.staticr%   r&   r'   r(   r)   r*   base.topologyr+   r,   r.   r>   rA   rB   __all__rW   rf   rl   r  r  rC   r2   r:   r;   <module>r     s-   #  	      8 ; 4   : = @ 0 4 5 1  BB(93* 
y IU  	"GGE
  	@
"
"( ((@A )*DE O* O*r:   