
    Αi                     P    S SK r S SKJr  S SKJr  SSKJrJr  SSKJ	r	  S	S jr
S rg)
    N)fleet)in_dynamic_mode   )HeterParallelOptimizerHybridParallelOptimizer)loggerc                    [         R                   nXl        UbB  UR                  (       a  [        R                  " S5        [
        R                  " U5      Ul        0 Ul        UR                  5       S:  a  UR                  R                  (       d  [        XR                  UR                  5      nUR                  R                  S   R                  (       a  SUl        SUl        UR                  R                  S   R"                  (       a  SUl        UR                   (       a   S5       eU$ ['        XR                  5      $ U $ )a|  
Optimizer for distributed training.
For the distributed training, this method would rebuild a new instance of DistributedOptimizer.
Which has basic Optimizer function and special features for distributed training.
Args:
    optimizer(Optimizer): The executor to run for init server.
    strategy(DistributedStrategy): Extra properties for distributed optimizer.
        It is recommended to use DistributedStrategy in fleet.init(). The strategy
        here is for compatibility. If the strategy in fleet.distributed_optimizer()
        is not None, then it will overwrite the DistributedStrategy in fleet.init(),
        which will take effect in distributed training.
Returns:
    Fleet: instance of fleet.
Examples:
    .. code-block:: python

        >>> import paddle
        >>> import paddle.distributed.fleet as fleet
        >>> fleet.init(is_collective=True)
        >>> strategy = fleet.DistributedStrategy()
        >>> linear = paddle.nn.Linear(10, 10)
        >>> optimizer = paddle.optimizer.SGD(learning_rate=0.001, parameters=linear.parameters())
        >>> optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy)

a$  It is recommended to use DistributedStrategy in fleet_env.init(). The strategy here is only for compatibility. If the strategy in fleet_env.distributed_optimizer() is not None, then it will overwrite the DistributedStrategy in fleet_env.init(), which will take effect in distributed training.r   
pp_configsFz7sep parallel can not coexist with sharding_comm_overlap)r   user_defined_optimizer_is_collectiver   warningcopydeepcopy_user_defined_strategy_context
worker_numheter_ccl_moder   _hcghybrid_configsdp_comm_overlap
_dp_enable_sep_enablesharding_comm_overlap_sharding_enabler   )	optimizerstrategy	fleet_envhp_optims       b/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/distributed/fleet/optimizer.py_dygraph_distributed_optimizerr       s'   4 I'0$##NNB ,0==+B	(I!//>>.>>9+K+KH //>>o ',#',$//>>##$ -2)#// M/ O);;      c                  v    [        5       (       a  [        U 0 UD6$ [        R                  R                  " U 0 UD6$ N)r   r    r   distributed_optimizer)argskwargss     r   r$   r$   `   s5    -t>v>>{{00$A&AAr!   r#   )r   paddle.distributedr   paddle.frameworkr   meta_optimizersr   r   utils.log_utilr   r    r$    r!   r   <module>r,      s#     $ , L "EPBr!   