
    ёi                       S SK Jr  S SKrS SKrS SKJr  S SKrS SKJr	  S SK
Jr  S SKJr  S SKJrJrJr  S SKJr  S SKJr  S SKJrJrJr  S S	KJr  S S
KJrJrJrJ r   \(       a  S SKJ!r!  / r"S&S jr#S&S jr$S&S jr%Sq&S r'S r(S'S jr)S r* " S S5      r+ " S S\+5      r,S r- " S S5      r. " S S\.5      r/ " S S\.5      r0Sq1S r2Sq3S  r4 " S! S"\.5      r5\Rl                  S(S# j5       r7S$ r8S% r9\.r:\/r;\0r<\5r=g))    )annotationsN)TYPE_CHECKING)_C_ops)core	frameworkunique_name)check_variable_and_dtype)DataType)Variable
check_typedefault_main_program)get_complete_pp_mesh)LayerHelperin_dynamic_modein_dynamic_or_pir_modein_pir_mode)Tensorc                   [        5       (       a  [        R                  " X5      $ [        S0 [	        5       D6n[        U S/ SQS5        [        US[        S5        Uc1  [        R                  " SR                  UR                  S/5      5      nUR                  U R                  X R                  SS9nUR                  SSU 0SU0S	U0S
9  U$ )aM  

Limits the L2 norm of the input :math:`x` within :math:`max\_norm`.
If the L2 norm of :math:`x` is less than or equal to :math:`max\_norm`, :math:`out` will be
the same as :math:`x`. If the L2 norm of :math:`x` is greater than :math:`max\_norm`, :math:`x` will
be linearly scaled to make the L2 norm of :math:`out` equal to :math:`max\_norm`, as
shown in the following formula:

.. math::

    out = \frac{max\_norm * x}{norm(x)}

where :math:`norm(x)` represents the L2 norm of :math:`x`.

Args:
    x(Tensor): The input of clip_by_norm and data type is float32.
        The number of dimensions must be between [1, 9].
    max_norm(float): The maximum norm value.
    name(str, optional): For detailed information, please refer
        to :ref:`api_guide_Name`. Usually name is no need to set and
        None by default.

Returns:
    Tensor: The output of clip_by_norm with shape as input.
        The data type is float32.

Examples:

    .. code-block:: python

        >>> import paddle
        >>> from paddle.nn import clip

        >>> input = paddle.to_tensor([[2.0, 2.0], [2.0, 2.0]], dtype='float32')
        >>> reward = clip.clip_by_norm(x=input, max_norm=1.0)
        >>> print(reward)
        Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
        [[0.50000000, 0.50000000],
         [0.50000000, 0.50000000]])
clip_by_normX)float16float32uint16max_norm.tmpF)typenamedtypepersistableOutr   inputsattrsoutputs)r   )r   r   r   r   localsr	   r   floatr   generate_with_ignorable_keyjoinr   create_variabler   r   	append_op)xr   r   helperouts        N/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/nn/clip.pyr   r   +   s    T ""1//4684F	30. xen=|66HHfkk5)*
 
 
 VV$gg5 ! C Qx8$	   J    c                    [        5       (       a  [        R                  " U 5      $ [        S0 [	        5       D6nUR                  U R                  S9nUR                  SSU 00 SU0S9  U$ )aS  
Merge by adding duplicated rows in the input SelectedRows object.

Args:
    x(Tensor): The input selected rows to be merge.
    name(basestring|None): Name of the output.

Returns:
    Tensor, merged output.

Examples:

    .. code-block:: python

        >>> import paddle
        >>> import paddle.base as base

        >>> b = paddle.static.default_main_program().global_block()
        >>> var = b.create_var(
        ...     name="X", dtype="float32", persistable=True,
        ...     type=base.core.VarDesc.VarType.SELECTED_ROWS)
        >>> y = paddle.nn.clip.merge_selected_rows(var)
merge_selected_rowsr   r   r!   r"   )r2   )r   r   r2   r   r&   "create_variable_for_type_inferencer   r+   r,   r   r-   r.   s       r/   r2   r2   q   sr    0 ))!,,;&(;F

3
3!''
3
BC
"Qx	   Jr0   c                r   [        5       (       a  [        R                  " U 5      $ [        U S[        S5        U R
                  [        R                  R                  R                  :w  a  [        S5      e[        S0 [        5       D6nUR                  U R                  S9nUR                  SSU 0SU00 S9  U$ )	a  
Get tensor data from input with SelectedRows type, and outputs a Tensor.

.. code-block:: text

    input x is SelectedRows:
       x.rows = [0, 5, 5, 4, 19]
       x.height = 20
       x.value = [[1, 1] [2, 2] [2, 2] [3, 3] [6, 6]]

    Output is DenseTensor:
       out.shape = [5, 2]
       out.data = [[1, 1],
                   [2, 2],
                   [2, 2],
                   [3, 3],
                   [6, 6]]

Args:
    x(SelectedRows): Input with SelectedRows type. The data type is float32, float64, int32 or int64.
    name(str, optional): The default value is None.  Normally there is no need for user to set this property.
        For more information, please refer to :ref:`api_guide_Name` .

Returns:
    Variable: DenseTensor transformed from SelectedRows. The data type is same with input.

Examples:
    .. code-block:: python

        >>> import paddle
        >>> import paddle.base as base
        >>> from paddle.base import core
        >>> paddle.enable_static()
        >>> scope = core.Scope()
        >>> block = paddle.static.default_main_program().global_block()
        >>> x_rows = [0, 5, 5, 4, 19]
        >>> height = 20
        >>> x = scope.var('X').get_selected_rows()
        >>> x.set_rows(x_rows)
        >>> x.set_height(height)
        >>> x = block.create_var(name="X", dtype="float32", persistable=True, type=base.core.VarDesc.VarType.SELECTED_ROWS)
        >>> z = paddle.nn.clip.get_tensor_from_selected_rows(x)
r,   get_tensor_from_selected_rowszGThe type of 'x' in get_tensor_from_selected_rows must be SELECTED_ROWS.r3   r   r!   r   r#   r%   r$   )r7   )r   r   r7   r   r   r   r   VarDescVarTypeSELECTED_ROWS	TypeErrorr   r&   r4   r   r+   r5   s       r/   r7   r7      s    X }}33A66q#x!@Avv%%333U
 	
 EFHEF

3
3!''
3
BC
,Qx	   Jr0   Fc                     [        U 5      S::  d   e[        U 5      S:X  a'  [        U S   [        5      (       d   e[        nU S   qU$ [        $ )N   r   )len
isinstancebool'_clip_by_global_norm_using_mp_type_flagargs	old_values     r/   "_clip_by_global_norm_using_mp_typerF      sO    t9>>
4yA~$q'4((((;	26q'/66r0   c                &   U R                   [        R                  R                  R                  :X  d2  U R                   [        R                  R                  R
                  :X  aB  [        5       (       a3  U R                  [        R                  R                  R                  5      $ U R                   [        R                  :X  d  U R                   [        R                  :X  a.  [        5       (       a  U R                  [        R                  5      $ U $ N)r   r   r9   r:   FP16BF16rF   astypeFP32r
   FLOAT16BFLOAT16FLOAT32r,   s    r/   _cast_to_mp_type_if_enabledrQ      s    	4<<'',,,77dll**///
,
.
.xx,,1122	8###qww(2C2C'C
,
.
.xx(())r0   c                    U R                  5       (       a  UR                  5       (       d  gU R                  5       (       d  U R                  5       (       a  [        U R                  5      S:w  a  gg)NFr   T)_is_initializedis_distis_denser?   shape)grad
clip_inputs     r/   _can_inplace_clip_gradrY      sN    !!)C)C)E)E 	$--//s4::!/Cr0   c                   [        U 5      n [        5       (       a  [        R                  " U 5      $ Sn[	        U S/ SQU5        [        U40 [        5       D6nUR                  U R                  5      nSU 0nSU0nUR                  XUS9  U$ )z)
Return the squared L2 norm of a tensor.
squared_l2_normr,   )r   float64r   r   r   r!   r   r#   r%   )
rQ   r   r   r[   r	   r   r&   r4   r   r+   )r,   op_typer-   r.   r#   r%   s         r/   _squared_l2_normr_      s    
 	$A&A%%a((G	3;W -FH-F

3
3AGG
<C1XFclG
''BJr0   c                       \ rS rSrS rS rSrg)BaseErrorClipAttri  c                    [         erH   NotImplementedErrorselfs    r/   __str__BaseErrorClipAttr.__str__      !!r0   c                    [         erH   rc   )rf   block	grad_names      r/   _append_clip_op!BaseErrorClipAttr._append_clip_op  ri   r0    N)__name__
__module____qualname____firstlineno__rg   rm   __static_attributes__ro   r0   r/   ra   ra     s    ""r0   ra   c                  .    \ rS rSrSrSS jrS rS rSrg)	ErrorClipByValuei  a  
Clip tensor values to the range [min, max].

Given a tensor ``t`` (see Examples below), this operation clips its value \
to ``min`` and ``max`` inplace.

- Any values less than min are set to min.
- Any values greater than max are set to max.

Args:
    max (float): The maximum value to clip by.
    min (float, optional): The minimum value to clip by. if not set by user, \
    will be set to ``-max`` by framework.

Examples:
    .. code-block:: python

        >>> import paddle

        >>> paddle.enable_static()
        >>> BATCH_SIZE = 128
        >>> CLIP_MAX = 2e-6
        >>> CLIP_MIN = -1e-6
        >>> prog = paddle.static.Program()
        >>> with paddle.static.program_guard(main_program=prog):
        ...     image = paddle.static.data(name='x', shape=[None, 784], dtype='float32')
        ...     hidden1 = paddle.static.nn.fc(image, size=128, activation='relu')
        ...     hidden2 = paddle.static.nn.fc(hidden1, size=64, activation='relu')
        ...     predict = paddle.static.nn.fc(hidden2, size=10, activation='softmax')
        ...     label = paddle.static.data(name='y', shape=[1], dtype='int64')
        ...     cost = paddle.nn.functional.cross_entropy(input=predict, label=label)
        ...     avg_cost = paddle.mean(cost)
        >>> prog_clip = prog.clone()
        >>> prog_clip.block(0).var(hidden1.name)._set_error_clip(
        ...     paddle.nn.clip.ErrorClipByValue(
        ...         max=CLIP_MAX, min=CLIP_MIN))
Nc                V    [        U5      nUc  U* nO[        U5      nXl        X l        g rH   )r'   maxmin)rf   rx   ry   s      r/   __init__ErrorClipByValue.__init__E  s*    Cj;$C*Cr0   c                >    SU R                   S SU R                  S 3$ )NzByValue, min=f, max=ry   rx   re   s    r/   rg   ErrorClipByValue.__str__N  s!    txxl&!==r0   c                   UR                   R                  5       nUR                  S5        UR                  SU/5        UR	                  SU/5        UR                  SU R                  5        UR                  SU R                  5        g )Nclipr   r!   ry   rx   )descr+   set_type	set_input
set_output	_set_attrry   rx   )rf   rk   rl   clip_op_descs       r/   rm    ErrorClipByValue._append_clip_opQ  sn    zz++-f%sYK0	{3udhh/udhh/r0   rx   ry   rH   )	rp   rq   rr   rs   __doc__rz   rg   rm   rt   ro   r0   r/   rv   rv     s    $L>0r0   rv   c                   UnU R                   R                  U R                   R                  5       S-
  5      nUR                  5        Vs/ s H  oDU;   d  M
  UPM     sn H\  nU R	                  X%   5      n[        USS 5      nUb   [        U[        5      (       d  [        S5      eUc  MK  UR                  X5        M^     g s  snf )Nr>   
error_clipzIVariable's error_clip should be an instance of BaseErrorClipAttr or None.)
r   opop_sizeoutput_arg_names_var_recursivegetattrr@   ra   r<   rm   )rk   contextgrad_to_varop_descngrad_nfwd_varr   s           r/   error_clip_callbackr   Z  s    KjjmmEJJ..0145G%668M8<L18M&&{':;WlD9
*Z9J"K"K[  !&&u5 NMs   	CCc                     ^  \ rS rSrU 4S jrS r\R                  " 5       S 5       rS r	S r
    SS jrS rS	 rS
rU =r$ )ClipGradBaseik  c                "   > [         TU ]  5         g rH   )superrz   )rf   	__class__s    r/   rz   ClipGradBase.__init__l  s    r0   c                    [         erH   rc   re   s    r/   rg   ClipGradBase.__str__o  ri   r0   c                    [         erH   rc   rf   params_gradss     r/   _dygraph_clipClipGradBase._dygraph_clipr  s    !!r0   c                    [         erH   rc   r   s     r/   	_pir_clipClipGradBase._pir_clipv  ri   r0   c                    [         erH   rc   r   s     r/   _static_clipClipGradBase._static_clipy  ri   r0   c                   [        5       (       a  U R                  U5      $ [        5       (       a  U R                  U5      $ U H+  u  p#[	        USS 5      c  M  [
        R                  " S5          O   U R                  U5      $ )Ngradient_clip_attrz'set_gradient_clip' will be ineffective, because you have set 'need_clip' in 'ParamAttr'. So, 'set_gradient_clip' is redundant and you can remove it.)r   r   r   r   r   warningswarnr   )rf   r   pgs       r/   __call__ClipGradBase.__call__|  sy     %%l33]]>>,//$12D9EMM>
  % $$\22r0   c                    [         erH   rc   rf   r   paramrW   s       r/   _process_contextClipGradBase._process_context  ri   r0   c                    [         erH   rc   )rf   r   rW   s      r/   _create_operatorsClipGradBase._create_operators  ri   r0   ro   )r   list[tuple[Tensor, Tensor]]returnr   )rp   rq   rr   rs   rz   rg   imperative_baseno_gradr   r   r   r   r   r   rt   __classcell__r   s   @r/   r   r   k  sX    " " """373	$3$"" "r0   r   c                     ^  \ rS rSr% SrS\S'   S\S'   SSU 4S jjjrSS jr\R                  " 5       S 5       r
S	 rS
 rS rSrU =r$ )ClipGradByValuei  a\  
Limit the value of multi-dimensional Tensor :math:`X` to the range [min, max].

- Any values less than min are set to ``min``.

- Any values greater than max are set to ``max``.

The multi-dimensional Tensor :math:`X` is not passed from this class, but the gradients of all parameters set in ``optimizer``.
If ``need_clip`` of specific param is ``False`` in its ``ParamAttr``, then the gradients of this param will not be clipped.

Gradient clip will takes effect after being set in ``optimizer`` , see the document ``optimizer``
(for example: :ref:`api_paddle_optimizer_SGD`).

Note:
    ``need_clip`` of ``ClipGradByValue`` HAS BEEN DEPRECATED since 2.0.
    Please use ``need_clip`` in ``ParamAttr`` to specify the clip scope.

Args:
    max (float): The maximum value to clip by.
    min (float, optional): The minimum value to clip by. if not set by user, it will be set to ``-max``
        automatically. In this case, ``max`` must be greater than :math:`0`.

Examples:
    .. code-block:: python

        >>> import paddle
        >>> x = paddle.uniform([10, 10], min=-1.0, max=1.0, dtype='float32')
        >>> linear = paddle.nn.Linear(in_features=10, out_features=10,
        ...                           weight_attr=paddle.ParamAttr(need_clip=True),
        ...                           bias_attr=paddle.ParamAttr(need_clip=False))
        >>> out = linear(x)
        >>> loss = paddle.mean(out)
        >>> loss.backward()

        >>> clip = paddle.nn.ClipGradByValue(min=-1, max=1)
        >>> sdg = paddle.optimizer.SGD(learning_rate=0.1, parameters=linear.parameters(), grad_clip=clip)
        >>> sdg.step()
r'   rx   ry   c                ~   > [         TU ]  5         Uc  US:  d   eU* n[        U5      U l        [        U5      U l        g )N        )r   rz   r'   rx   ry   )rf   rx   ry   r   s      r/   rz   ClipGradByValue.__init__  s<    ;99$C::r0   c                >    SU R                   S SU R                  S 3$ )NzClip Gradient By Value, min = r}   r~   r   re   s    r/   rg   ClipGradByValue.__str__  s!    /|6$((1NNr0   c                    / nU Hi  u  p4Uc  M
  [        USS5      SL a  UR                  X445        M.  [        R                  " X@R                  U R
                  S9nUR                  X545        Mk     U$ )N	need_clipTFr,   ry   rx   )r   appendpaddler   ry   rx   rf   r   params_and_gradsr   r   new_grads         r/   r   ClipGradByValue._dygraph_clip  sp     DAyq+t,5 ''/{{QHH$((CH##QM2 !  r0   c           	        / n0 n[         R                  " S5         U H  u  pEUc  M
  [        USS5      SL a  UR                  XE45        M.  UR                  R
                  R                  XE/5         [        R                  " XPR                  U R                  S9nS S S 5        UR                  UW45        UR                  X4R                  '   M     S S S 5        [        X#5        U$ ! , (       d  f       NR= f! , (       d  f       N,= f)Ngradient_clipr   TFr   )r   
name_scoper   r   rk   program_optimized_guardr   r   ry   rx   r   _correct_clip_op_role_varrf   r   r   param_new_grad_name_dictr   r   r   s          r/   r   ClipGradByValue._static_clip  s    #% !!/2$91k40E9$++QF3WW__55qf=%{{QHH$((KH > ''H63;==(0 % 3 	""2M >= 32s$   AC92*C(7C9(
C62C99
Dc                    g rH   ro   r   s       r/   r    ClipGradByValue._process_context      r0   c                Z    [         R                  " X R                  U R                  S9nX4$ )Nr   )r   r   ry   rx   rf   r   rW   r   s       r/   r   !ClipGradByValue._create_operators  s"    ;;88Br0   r   rH   )rx   r'   ry   zfloat | Noner   Noner   str)rp   rq   rr   rs   r   __annotations__rz   rg   r   r   r   r   r   r   rt   r   r   s   @r/   r   r     sV    %N 
J	J O 
  
  $ r0   r   c                     ^  \ rS rSr% SrS\S'   SU 4S jjrSS jrS r\	R                  " 5       S 5       rS	 rS
 rS rS rSrU =r$ )ClipGradByNormi  ao  
Limit the l2 norm of multi-dimensional Tensor :math:`X` to ``clip_norm`` .

- If the l2 norm of :math:`X` is greater than ``clip_norm`` , :math:`X` will be compressed by a ratio.

- If the l2 norm of :math:`X` is less than or equal to ``clip_norm`` , nothing will be done.

The multidimensional Tensor :math:`X` is not passed from this class, but the gradients of all parameters set in ``optimizer``.
If ``need_clip`` of specific param is ``False`` in its ``ParamAttr``, then the gradients of this param will not be clipped.

Gradient clip will takes effect after being set in ``optimizer`` , see the document ``optimizer``
(for example: :ref:`api_paddle_optimizer_SGD`).

The clipping formula is:

.. math::
    Out =
    \left\{
        \begin{array}{ccl}
            X & & if (norm(X) \leq clip\_norm) \\
            \frac{clip\_norm*X}{norm(X)} & & if (norm(X) > clip\_norm) \\
    \end{array}
    \right.


where :math:`norm(X)` represents the L2 norm of :math:`X`.

.. math::
    norm(X) = ( \sum_{i=1}^{n}|x\_i|^2)^{ \frac{1}{2}}

Note:
    ``need_clip`` of ``ClipGradByNorm`` HAS BEEN DEPRECATED since 2.0.
    Please use ``need_clip`` in ``ParamAttr`` to specify the clip scope.

Args:
    clip_norm(float): The maximum norm value.

Examples:
    .. code-block:: python

        >>> import paddle
        >>> x = paddle.uniform([10, 10], min=-1.0, max=1.0, dtype='float32')
        >>> linear = paddle.nn.Linear(in_features=10, out_features=10,
        ...                           weight_attr=paddle.ParamAttr(need_clip=True),
        ...                           bias_attr=paddle.ParamAttr(need_clip=False))
        >>> out = linear(x)
        >>> loss = paddle.mean(out)
        >>> loss.backward()

        >>> clip = paddle.nn.ClipGradByNorm(clip_norm=1.0)
        >>> sdg = paddle.optimizer.SGD(learning_rate=0.1, parameters=linear.parameters(), grad_clip=clip)
        >>> sdg.step()
r'   	clip_normc                B   > [         TU ]  5         [        U5      U l        g rH   )r   rz   r'   r   )rf   r   r   s     r/   rz   ClipGradByNorm.__init__+  s    y)r0   c                "    SU R                   S 3$ )Nz!Gradient Clip By Norm, clip_norm=r}   r   re   s    r/   rg   ClipGradByNorm.__str__/  s    24>>!2DEEr0   c                    / nU HS  u  p4Uc  M
  [        USS5      SL a  UR                  X445        M.  [        X@R                  S9nUR                  X545        MU     U$ )Nr   TFr,   r   )r   r   r   r   r   s         r/   _clip_gradientsClipGradByNorm._clip_gradients2  sf     DAyq+t,5 ''/#a..AH##QM2 !  r0   c                $    U R                  U5      $ rH   r   r   s     r/   r   ClipGradByNorm._dygraph_clip>  s    ##L11r0   c                $    U R                  U5      $ rH   r   r   s     r/   r   ClipGradByNorm._pir_clipB  s    ##L11r0   c                   / n[         R                  " S5         0 nU H  u  pEUc  M
  [        USS5      SL a  UR                  XE45        M.  UR                  R
                  R                  XE/5         [        XPR                  S9nS S S 5        WR                  X4R                  '   UR                  XF45        M     S S S 5        [        UW5        U$ ! , (       d  f       NR= f! , (       d  f       N-= f)Nr   r   TFr   )r   r   r   r   rk   r   r   r   r   r   r   r   s          r/   r   ClipGradByNorm._static_clipE  s    !!/2')$$91k40E9$++QF3WW__55qf=+a..IH >3;==(0 ''6 % 3 	""24LM >= 32s$   AC#2C6C#
C C##
C1c                    g rH   ro   r   s       r/   r   ClipGradByNorm._process_contextW  r   r0   c                .    [        X R                  S9nX4$ )Nr   )r   r   r   s       r/   r    ClipGradByNorm._create_operatorsZ  s    $@r0   r   )r   r'   r   r   r   )rp   rq   rr   rs   r   r   rz   rg   r   r   r   r   r   r   r   r   rt   r   r   s   @r/   r   r     sW    4l *F
  2 22 $ r0   r   c                     [        U 5      S:X  a  [        $ [        U 5      S:X  a  [        U S   [        5      (       d   e[        nU S   qU$ Nr   r>   )r?   &_allow_pure_fp16_global_norm_clip_flagr@   rA   rC   s     r/   !_allow_pure_fp16_global_norm_clipr   b  I    
4yA~554yA~*T!Wd";";;;:	15a.r0   c                     [        U 5      S:X  a  [        $ [        U 5      S:X  a  [        U S   [        5      (       d   e[        nU S   qU$ r   )r?   &_allow_pure_bf16_global_norm_clip_flagr@   rA   rC   s     r/   !_allow_pure_bf16_global_norm_clipr  p  r  r0   c                     ^  \ rS rSr% SrS\S'   S\S'   S\S'     S       SU 4S	 jjjrSS
 jr\R                  " 5       S 5       r
S rS rS rS rSrU =r$ )ClipGradByGlobalNormi{  a  
Given a list of Tensor :math:`t\_list` , calculate the global norm for the elements of all tensors in
:math:`t\_list` , and limit it to ``clip_norm`` .

- If the global norm is greater than ``clip_norm`` , all elements of :math:`t\_list` will be compressed by a ratio.

- If the global norm is less than or equal to ``clip_norm`` , nothing will be done.

The list of Tensor :math:`t\_list` is not passed from this class, but the gradients of all parameters set in ``optimizer``.
If ``need_clip`` of specific param is ``False`` in its ``ParamAttr``, then the gradients of this param will not be clipped.

Gradient clip will takes effect after being set in ``optimizer`` , see the document ``optimizer``
(for example: :ref:`api_paddle_optimizer_SGD`).

The clipping formula is:

.. math::

    t\_list[i] = t\_list[i] * \frac{clip\_norm}{\max(global\_norm, clip\_norm)}

where:

.. math::

    global\_norm = \sqrt{\sum_{i=0}^{N-1}(l2norm(t\_list[i]))^2}

Note:
    ``need_clip`` of ``ClipGradyGlobalNorm`` HAS BEEN DEPRECATED since 2.0.
    Please use ``need_clip`` in ``ParamAttr`` to specify the clip scope.

Args:
    clip_norm (float): The maximum norm value.
    group_name (str, optional): The group name for this clip. Default value is ``default_group``.
    auto_skip_clip (bool, optional): skip clipping gradient. Default value is ``False``.

Examples:
    .. code-block:: python

        >>> import paddle
        >>> x = paddle.uniform([10, 10], min=-1.0, max=1.0, dtype='float32')
        >>> linear = paddle.nn.Linear(in_features=10, out_features=10,
        ...                           weight_attr=paddle.ParamAttr(need_clip=True),
        ...                           bias_attr=paddle.ParamAttr(need_clip=False))
        >>> out = linear(x)
        >>> loss = paddle.mean(out)
        >>> loss.backward()

        >>> clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=1.0)
        >>> sdg = paddle.optimizer.SGD(learning_rate=0.1, parameters=linear.parameters(), grad_clip=clip)
        >>> sdg.step()
r'   r   r   
group_namerA   auto_skip_clipc                   > [         TU ]  5         [        U5      U l        X l        [        U[        5      (       d   eX0l        S U l        SU l	        g )NF)
r   rz   r'   r   r  r@   rA   r  _async_add_nshould_comm_on_shard_dim)rf   r   r  r  r   s       r/   rz   ClipGradByGlobalNorm.__init__  sL     	y)$.$////, !(-%r0   c                "    SU R                   S 3$ )Nz)Gradient Clip By GlobalNorm, global_norm=r}   r   re   s    r/   rg   ClipGradByGlobalNorm.__str__  s    :4>>!:LMMr0   c                .   / n/ n/ n/ nSn[        U5      S:  a%  [        US   5      S:  a  US   S   R                  nOS nU GH  u  pU	c  M  [        USS5      SL a  M  U	n
[        5       (       a1  U	R	                  5       (       a  [        U	5      n
U
R                  5       n
OHU	R                  [        R                  R                  R                  :X  a  [        U	5      n
[        U
5      n
[        U
5      nUb  U	R                  U:w  a  Sn[        U	R                  5      n[        U	R                  R                   5      [        UR                   5      :  a#  Sn["        R$                  " XUR&                  5      n["        R$                  " XUR&                  5      nUR(                  [*        R,                  :X  d  UR(                  [*        R.                  :X  a  UR1                  U5        GM  UR(                  [*        R2                  :X  a  UR1                  U5        GM  UR1                  U5        GM     [        U5      [        U5      -   [        U5      -   S:X  a  U$ S n[        U5      S:  a  SOSn/ n[        U5      S:  a(  U" U5      nUR1                  UR5                  U5      5        [        U5      S:  a@  U" U5      nUS:X  a  UR1                  U5        O UR1                  UR5                  U5      5        [        U5      S:  a  U" U5      nUR1                  U5        U" U5      nU(       a  Ub  ["        R6                  " 5       nU(       a  SUR8                  ;   a  UR;                  S5      S	:  a  UR=                  S5      R?                  S5      nURA                  5       n["        RB                  " U["        RD                  RF                  US
9  ["        RH                  " UUR                  UR&                  5      nU RJ                  (       aU  [M        U S5      (       aD  [*        RN                  RC                  URA                  5       U RP                  S9RS                  5         U RJ                  (       aU  [M        U S5      (       aD  [*        RN                  RC                  URA                  5       U RT                  S9RS                  5         [*        RV                  " U5      n[*        RX                  " S	/XRZ                  S9nSnU R\                  (       d+  Sn[*        R^                  " U[*        R`                  " UUS9S9nOUU:  a  Sn[*        R^                  " UUS9nU GH4  u  pU	c  M  [        USS5      SL a  UR1                  X45        M/  U(       Ga  WR(                  U	R(                  :w  a  UR5                  U	R(                  5      OUnUR                  U	R                  :w  Ga5  [        U	R                  R                   5      [        UR                  R                   5      :  aS  UR&                  nSnU H  nURc                  5       (       a  M  Sn  O   U(       a  URA                  5       nO[e        S5      e[        U	R                  5      n[        U	R                  R                   5      [        UR                   5      :  a"  ["        R$                  " UUUR&                  5      n[*        RN                  R%                  UU	R                  UR&                  5      n[g        U	U5      (       a&  U	Ri                  U5        UR1                  X45        GM  [*        Rj                  " U	U5      nUR1                  UU45        GM"  UR1                  X45        GM7     U$ )NTr   r   Fc                J    [         R                  " U 5      R                  5       $ rH   r   stacksumvar_lists    r/   async_add_n7ClipGradByGlobalNorm._dygraph_clip.<locals>.async_add_n	      <<)--//r0   r\   r   ppr>   )r   groupsharding_group)r  mp_grouprV   r   
fill_valuer,   yzLReshard a sharded tensor from a local mesh to a global mesh is not supported)6r?   process_meshr   r   is_selected_rowsr2   _get_tensor_from_selected_rowsr   r   r9   r:   r;   r7   r_   r   setprocess_idsdistreshard
placementsr   r   r   bfloat16r   r   rK   get_mesh	dim_namesget_dim_sizeget_submesh_with_dim	get_group_local_value
all_reduceReduceOpSUMshard_tensorr  hasattrdistributedr  waitr  sqrtfullr   r  dividemaximumis_replicatedrd   rY   	multiply_multiply)rf   r   r   sum_square_listsum_square_list_fp16sum_square_list_fp32flag_auto_hybrid_ppsrc_meshr   r   
merge_grad
sum_squarepp_meshr  	sum_dtypeglobal_norm_varglobal_norm_var_fp16global_norm_var_fp32global_norm_var_fp64g_meshpp_groupglobal_norm_var_localmax_global_normr   clip_varrX   r(  is_replicate	placementr   s                                 r/   r   "ClipGradByGlobalNorm._dygraph_clip  sw   !!"|q Sa%9A%=#Aq)66HH DAyq+t,5J  Q%7%7%9%903
'FFH
4<<//===03
::F
)*5J #((B&+#.q~~>q~~112S9L9L5MM*.'!%"Z-B-B"J "\\**?*?

   FNN2##v6$++J7!!V^^3$++J7&&z2Q !X  &'(&'( 
  	0 "%_!5!9Iy	#$q(#./C#D ""#7#>#>y#IJ#$q(#./C#D I%&&';<&&';'B'B9'MN!##.#? ""#78%o6 8#7]]_FF,,,''-1 "66t<FFtL )8(D(D(F%)}}((" #'"3"3)#00#..# ((WT;K-L-L)),,.d6I6I * df((WT:-F-F)),,.dmm * df ++o6 ++#Y>>
 	""I}}!..?oFH .I}}/JH DAyq+t,5 ''/  ~~0 OOAGG,! 
 **ann< 1>>556"//;;:  &0%:%:
'+)3I#,#:#:#<#</4 % *4 ()3)@)@)BJ"5 n#  #7q~~"Fq~~99:S#//>  *. *GZ5J5J*J &,%7%7%?%?&
8M8M&
 *!Z88KK
+$++QF3%q*=H$++QM: ''/k !n  r0   c                   / n/ n/ n/ n/ n/ n/ n/ n	/ n
/ nSn[        5       nS nU GH  u  nnUR                  5       (       d  M  UR                  UR                  5       R                  5        SUR                  5       R                  R
                  ;   d  Mq  Uc  UR                  5       R                  nM  UR                  5       R                  n[        UR
                  5      [        UR
                  5      :  a  Un[        UR
                  5      [        UR
                  5      ::  a  GM   e   [        U5      S:  a  SSKJn  SnUc   eU GH  u  nnUc  M  [        USS5      SL a  M  Un[        5       (       a+  UR                  5       (       a  [        U5      n[        U5      n[        U5      nU(       a  UR                  5       R                  U:w  aq  [        R                   R#                  UUW" UR                  5       R$                  UR                  5       R                  UR                  5       R&                  5      5      nU R(                  (       a  UR*                  S   (       a  UR,                  [.        R0                  :X  d  UR,                  [.        R2                  :X  a  UR5                  U5        GMp  UR,                  [.        R6                  :X  a  UR5                  U5        GM  UR5                  U5        GM  UR8                  (       a  UR,                  [.        R0                  :X  d  UR,                  [.        R2                  :X  a  UR5                  U5        GM  UR,                  [.        R6                  :X  a  UR5                  U5        GMI  UR5                  U5        GM]  UR,                  [.        R0                  :X  d  UR,                  [.        R2                  :X  a  U
R5                  U5        GM  UR,                  [.        R6                  :X  a  UR5                  U5        GM  U	R5                  U5        GM     [        U5      [        U5      -   [        U5      -   [        U5      -   [        U5      -   [        U5      -   [        U	5      -   [        U
5      -   [        U5      -   S:X  a  U$ S n[        U5      [        U5      -   [        U	5      -   S:  a  S	OS
n/ n/ n/ n[        U5      S:  a(  U" U5      nUR5                  UR;                  U5      5        [        U5      S:  a(  U" U5      nUR5                  UR;                  U5      5        [        U
5      S:  a(  U" U
5      nUR5                  UR;                  U5      5        [        U5      S:  a@  U" U5      nUS
:X  a  UR5                  U5        O UR5                  UR;                  U5      5        [        U5      S:  a@  U" U5      nUS
:X  a  UR5                  U5        O UR5                  UR;                  U5      5        [        U5      S:  a@  U" U5      nUS
:X  a  UR5                  U5        O UR5                  UR;                  U5      5        [        U5      S:  a  U" U5      nUR5                  U5        [        U5      S:  a  U" U5      nUR5                  U5        [        U	5      S:  a  U" U5      nUR5                  U5        S n[        U5      S:  a  U" U5      n[        U5      S:  a	  U" U5      nO9U R(                  (       a(  U R<                  (       a  [        R>                  " S/USS9nU R(                  (       a  U R<                  (       a  [        R@                  RC                  WU RD                  RF                  [H        RJ                  RL                  5      n[        R@                  RC                  UU RN                  RF                  [H        RJ                  RL                  5      nUc  UnOUU-   n[        U5      S:  a	  U" U5      nO9U R(                  (       a(  U RP                  (       a  [        R>                  " S/USS9nU R(                  (       ai  U RP                  (       aX  [        R@                  RC                  WU RD                  RF                  [H        RJ                  RL                  5      nUc  UnOUU-   n[        RR                  " U5      n[        R>                  " S/UR,                  U RT                  S9n Sn!U RV                  (       d+  Sn![        RX                  " U [        RZ                  " UU S9S9n"OUU :  a  Sn![        RX                  " U US9n"U GHv  u  nnUc  M  [        USS5      SL a  UR5                  UU45        M1  U!(       Ga*  W"R,                  UR,                  :w  a  U"R;                  UR,                  5      OU"n#U(       a  U#R                  5       R                  UR                  5       R                  :w  a  [        R                   R#                  U#UR                  5       R                  W" U#R                  5       R$                  U#R                  5       R                  U#R                  5       R&                  5      5      n#[        R\                  " UU#5      n$UR5                  UU$45        GMc  UR5                  UU45        GMy     U$ )NFr   r>   )to_placementsTr   	no_fusionc                J    [         R                  " U 5      R                  5       $ rH   r  r  s    r/   r  3ClipGradByGlobalNorm._pir_clip.<locals>.async_add_n  r  r0   r\   r   r   r  r  )/r$  is_dist_dense_tensor_typeadd	dist_attrr!  r%  r?   /paddle.distributed.auto_parallel.placement_typerT  r   r   is_selected_row_typer2   r7   r_   r   r5  r'  dims_mappingpartial_dimsr  optimize_attrr   r
   rM   rN   r   rO   is_distributedrK   has_dist_paramr8  r   r0  r  idr&  r1  r2  r  has_not_dist_paramr7  r   r  r9  r:  r=  )%rf   r   r   no_fusion_sum_squareno_fusion_sum_square_fp16no_fusion_sum_square_fp32sum_square_distsum_square_dist_fp16sum_square_dist_fp32sum_square_not_distsum_square_not_dist_fp16sum_square_not_dist_fp32auto_parallel_pp	pp_meshespp_stage0_meshr   r   p_meshrT  rC  rD  r  rF  no_fusion_global_normglobal_norm_distglobal_norm_not_distrH  rI  rJ  rG  global_norm_dist_varglobal_norm_not_dist_varrN  r   rO  rX   r   s%                                        r/   r   ClipGradByGlobalNorm._pir_clip  s	     "$&!$&! !! !#% #%  E	 DAq**,,akkm88922>>>%-)*)C)C!"!;!;~99:S"..>  .4N"6#5#56#*66;      !  y>A  $!--- DAqyq+t,5J}}!7!7!9!903
::F
)*5J ((*77>I#//77"!",,.;;",,.;;",,.;;
 11??;/ $$(8(88!''8+<+<<-44Z@%%)9)99-44Z@(//
;!!$$(8(88!''8+<+<<(//
;%%)9)99(//
;#**:6 $$(8(88!''8+<+<<,33J?%%)9)99,33J?'..z:s !z $%+,-+,- /"# &'	(
 &'( %&' *+, *+, 	  	0
 '(/"#%&'  
  	 !#!()A-#./H#I !(()=)D)DY)OP#$q(#./C#D ##$8$?$?	$JK'(1,#./G#H  ''(<(C(CI(NO()A-#./H#I I%%,,-AB%,,(//	: #$q(#./C#D I% ''(<= ''(<(C(CI(NO'(1,#./G#H I%$++,@A$++(//	: #$q(#./C#D !(()=>!##.#? ##$89"#a'#.#?  ''(<=$%))*?@O 1$#./?#@ **t/B/B#);;cs$  ((T-@-@#)==#;#;$d&9&9&<&<dmm>O>O$  $*==#;#;$dmm&6&68I8I$  &"6"14H"H#$q('23G'H$**t/F/F'-{{cs($ ((T-D-D'-}}'?'?(##&&!!($
 &":"14L"L ++o6 ++#_22t~~
 	""I}}!..?oFH .I}}/JH DAqyq+t,5 ''A/  ~~0 OOAGG,!  %",,.;;{{}112 "(!3!3!;!;"22%&002??&002??&002??"J "??1j9 ''H6 ''A/? !B  r0   c                  ^  / n/ n/ n/ n/ nU 4S jn[         R                  " S5         U GH~  u  pU	c  M  [        USS5      SL a  M  U	n
UR                  R                  R                  X/5         U	R                  [        R                  R                  R                  :X  a  [        U	5      n
[        U
5      n
[        U
5      nUR                  [        R                  R                  R                  :X  a  UR!                  U5        OUR                  [        R                  R                  R"                  :X  a  UR!                  U5        OUUR                  [        R                  R                  R$                  :X  a  UR!                  U5        OUR!                  U5        S S S 5        GM     ['        U5      S:  a  ['        U5      S:  a  [)        S5      e['        U5      ['        U5      -   ['        U5      -   S:X  a2  ['        U5      ['        U5      -   ['        U5      -   S:X  a  UsS S S 5        $ WR                  R                  R                  UW	/5         ['        U5      S:  a  SOS	n/ n['        U5      S:  aW  U" U5      nU(       d  U(       d  [+        5       (       d!  UR!                  UR-                  U5      5        OUR!                  U5        ['        U5      S:  aW  U" U5      nU(       d  U(       d  [/        5       (       d!  UR!                  UR-                  U5      5        OUR!                  U5        ['        U5      S:  a@  U" U5      nUS	:X  a  UR!                  U5        O UR!                  UR-                  U5      5        ['        U5      S:  a  U" U5      nUR!                  U5        ['        U5      S
:  a  U" U5      OUS   n[0        R2                  " US9n[0        R4                  " S
/UR                  T R6                  S9n[0        R8                  " U[0        R:                  " UUS9S9nS S S 5        0 nU GH  u  pU	c  M  [        USS5      SL a  UR!                  X45        M/  UR                  R                  R                  X/5         [=        U	5      nUR                  [        R                  R                  R                  :X  aD  WR                  [        R                  R                  R                  :w  a  UR-                  S5      nOxUR                  [        R                  R                  R"                  :X  aD  WR                  [        R                  R                  R"                  :w  a  UR-                  S5      nOWn[?        5       RA                  5       nURC                  SUUS.SU0S9  UU	La-  URC                  SSU0SU	0UR                  U	R                  S.S9  S S S 5        U	RD                  UURD                  '   UR!                  X45        GM     S S S 5        [G        UW5        U$ ! , (       d  f       GM!  = f! , (       d  f       GN#= f! , (       d  f       Ny= f! , (       d  f       NR= f)Nc                   > TR                   (       a$  [        R                  " U 5      R                  5       $ [        R                  " U 5      $ rH   )r
  r   r  r  add_n)r  rf   s    r/   _add_n1ClipGradByGlobalNorm._static_clip.<locals>._add_n  s3      ||H-1133||H--r0   r   r   TFr   z1FP16 and BF16 are not supported at the same time.r\   r   r>   rP   r  r  r   r)  elementwise_mulr   Yr!   r]   castr   )in_dtype	out_dtyper8   )$r   r   r   rk   r   r   r   r   r9   r:   r;   r2   r7   r_   r   rI   r   rJ   rL   r?   rd   r   rK   r  r   r7  r8  r   r9  r:  rQ   r   current_blockr+   r   r   )rf   r   r   r>  r?  sum_square_list_bf16r@  rz  r   r   rC  rD  rF  rG  rH  global_norm_var_bf16rI  global_norm_var_other_dtyperN  	scale_varr   new_gscale_inputrk   s   `                       r/   r   !ClipGradByGlobalNorm._static_clip  s   !!!	. !!/2$91k40E9
WW__55qf=vv!5!5!C!CC%8%;
%B:%N
!1*!=J!''4<<+?+?+D+DD,33J?#))T\\-A-A-F-FF,33J?#))T\\-A-A-F-FF,33J?'..z: >= %( '(1,5I1JQ1N)G  O$*+,*+, 
 O$*+,*+, 
 $M 32P 111a&9),_)=)AIy	"$+,q0+12F+G(,*@BB'..077	B (../CD+,q0+12F+G(,*@BB'..077	B (../CD+,q0+12F+G( I-'../CD'..077	B '!+282I/#**+FG ?+a/ ?+(+  
 #)++"@"(++#)//#~~#
 #MM%nn/J	i :p (*$$91k40E9$++QF3WW__55qf=7:E t||';';'@'@@%OOt||/C/C/H/HH&/&6&6y&At||';';'@'@@%OOt||/C/C/H/HH&/&6&6z&B&/
 12@@BEOO.%*=!& $ 
 A~!'$'<%*AJ,1KK-.WW#	 ( 5 >H 4566(0 ''/Y %C 3^ 	""24LMS >=B :9@ >=Q 32s_   AY20D1X<!BY20'Y2GY3A!Y2EY!.8Y2<
Y	Y2
Y	Y2!
Y/+Y22
Z c                   U R                   U;  ad  / XR                   '   U R                  XR                   S-   '   [        R                  " S/UR                  U R                  S9XR                   S-   '   O*U R                  XR                   S-      :X  d  [        S5      eUnUR                  [        R                  R                  R                  :X  a  [        U5      n[        U5      nO:[        5       (       a+  UR                  5       (       a  [        U5      n[        U5      n[        U5      nXR                      R!                  U5        Xl        g )N_clip_valuer>   r  _clipz>All parameters' 'clip_norm' of a same group should be the same)r  r   r   r8  r   
ValueErrorr   r   r9   r:   r;   r2   r7   r   r\  r_   r   r   )rf   r   r   rW   rC  local_norm_vars         r/   r   %ClipGradByGlobalNorm._process_context:  s   ??')')GOO$7;~~GOOm3417c2GOOg-. >>W__}-L%MM T  
99,,:::,T2J6zBJ]]t88::,T2J6zBJ)*5 ''7r0   c                8   S nU R                   S-   nX@R                  ;  a  U" U R                  U R                      5      n[        R                  " US9nU R                  U R                   S-      n[        R                  " U[        R
                  " XeS9S9nUR                  S:X  d   eXpR                  U'   [        5       (       a&  [        R                  " X R                  U   5      nX4$ UR                  R                  SX R                  U   S.S	U0S
9  X4$ )Nc                J    [         R                  " U 5      R                  5       $ rH   r  r  s    r/   r  ;ClipGradByGlobalNorm._create_operators.<locals>.async_add_nU  r  r0   _scalerP   r  r  )r>   r|  r}  r!   r]   )r  r   r   r7  r9  r:  rV   r   r=  rk   r+   )rf   r   rW   r  group_scale_namegroup_norm_varrO  group_scale_vars           r/   r   &ClipGradByGlobalNorm._create_operatorsT  s   	0  ??X5<</(doo)FGN#[[>:N||DOOg$=>H$mm..8>O #((D000-<LL)*==??46F)GHD; 	"LL1A$BCDM 	 	
 {r0   )r
  r  r   r   r  r  )default_groupF)r   r'   r  r   r  rA   r   r   r   )rp   rq   rr   rs   r   r   rz   rg   r   r   r   r   r   r   r   rt   r   r   s   @r/   r  r  {  s    2h O
 *$	.. . 	.
 
. .&N C  C JI V] ~4 r0   r  c                   [         R                  " S5        [        U [        5      (       d  [	        S5      eUc  [
        R                  " 5       nUR                  S5      R                   HF  nSUR                  5       ;   d  M  SUR                  S5      ;   d  M0  [         R                  " S5          O   Uc  UR                  S5      R                  5       n[        S U 5       5      (       a/  U Vs/ s H"  oBR                  S5      R                  U5      PM$     nn[        S	 U 5       5      (       d  [	        S
5      eU H  n[        R                  " U 5      Ul        M      gs  snf )a  
Warning:

    This API must be used after building network, and before ``minimize`` ,
    and it may be removed in future releases, so it is not recommended.
    It is recommended to set ``grad_clip`` when initializing the ``optimizer`` ,
    this is a better method to clip gradient. There are three clipping strategies:
     :ref:`api_paddle_nn_ClipGradByGlobalNorm` , :ref:`api_paddle_nn_ClipGradByNorm` ,
     :ref:`api_paddle_nn_ClipGradByValue` .

To specify parameters that require gradient clip.

Args:
    grad_clip (GradientClipBase, optional): Gradient clipping strategy, it's an instance of
        some derived class of ``GradientClipBase`` . There are three clipping strategies
        ( :ref:`api_paddle_nn_ClipGradByGlobalNorm` , :ref:`api_paddle_nn_ClipGradByNorm` ,
        :ref:`api_paddle_nn_ClipGradByValue` ). Default value: None, and there is no
        gradient clipping.
    param_list (list(Variable), optional): Parameters that require gradient clip.
            It can be a list of parameter or a list of parameter's name.
            Default None, meaning that all parameters in the program will be included.
    program (Program, optional): The program where parameters are located.
            Default None, meaning that using :ref:`api_paddle_static_default_main_program` .

Returns:
    None

Examples:
    .. code-block:: python

        >>> import paddle

        >>> paddle.enable_static()

        >>> def network():
        ...     image = paddle.static.data(name='image', shape=[
        ...                        None, 28], dtype='float32')
        ...     param_attr1 = paddle.ParamAttr("fc1_param")
        ...     fc1 = paddle.static.nn.fc(image, size=10, weight_attr=param_attr1)
        ...     param_attr2 = paddle.ParamAttr("fc2_param")
        ...     fc2 = paddle.static.nn.fc(fc1, size=10, weight_attr=param_attr2)
        ...     loss = paddle.mean(fc2)
        ...     return loss


        >>> # network 1: clip all parameter gradient
        >>> with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()):
        ...     loss = network()
        ...     paddle.nn.clip.set_gradient_clip(
        ...         paddle.nn.ClipGradByGlobalNorm(clip_norm=2.0))
        ...     sgd = paddle.optimizer.SGD(learning_rate=1e-3)
        ...     sgd.minimize(loss)

        >>> # network 2: clip parameter gradient by name
        >>> with paddle.static.program_guard(base.Program(), paddle.static.Program()):
        ...     loss = network()
        ...     paddle.nn.clip.set_gradient_clip(
        ...         paddle.nn.ClipGradByValue(min=-1.0, max=1.0),
        ...         param_list=["fc1_param", "fc2_param"])
        ...     sgd = paddle.optimizer.SGD(learning_rate=1e-3)
        ...     sgd.minimize(loss)

        >>> # network 3: clip parameter gradient by value
        >>> with paddle.static.program_guard(base.Program(), paddle.static.Program()):
        ...     loss = network()
        ...     param_var1 = paddle.static.default_main_program().global_block().var("fc1_param")
        ...     param_var2 = paddle.static.default_main_program().global_block().var("fc2_param")
        ...     paddle.nn.clip.set_gradient_clip(
        ...         paddle.nn.ClipGradByValue(min=-1.0, max=1.0),
        ...         param_list=[param_var1, param_var2])
        ...     sgd = paddle.optimizer.SGD(learning_rate=1e-3)
        ...     sgd.minimize(loss)

        >>> # network 4: use 'set_gradient_clip' and 'optimize(grad_clip=clip)' together
        >>> with paddle.static.program_guard(base.Program(), paddle.static.Program()):
        ...     loss = network()
        ...     clip1 = paddle.nn.ClipGradByValue(min=-1.0, max=1.0)
        ...     clip2 = paddle.nn.ClipGradByNorm(clip_norm=1.0)
        ...     # Set the gradient clipping strategy: clip1
        ...     paddle.nn.clip.set_gradient_clip(clip1)
        ...     # Set the gradient clipping strategy: clip2
        ...     sgd = paddle.optimizer.SGD(learning_rate=1e-3, grad_clip=clip2)
        ...     sgd.minimize(loss)
        ...     # 'set_gradient_clip' will not take effect when setting has a conflict,
        ...     # and the gradient clipping strategy will be 'clip2'


zCaution! 'set_gradient_clip' is not recommended and may be deprecated in future! We recommend a new strategy: set 'grad_clip' when initializing the 'optimizer'. This method can reduce the mistakes, please refer to documentation of 'optimizer'.z<'clip' should be an instance of ClipGradBase's derived classNr   op_namescope	optimizerz'minimize' has been invoked before, this will make 'set_gradient_clip' be ineffective! Please invoke 'set_gradient_clip' before 'minimize'.c              3  B   #    U  H  n[        U[        5      v   M     g 7frH   )r@   r   .0elems     r/   	<genexpr>$set_gradient_clip.<locals>.<genexpr>  s     
8ZT:dC  Zs   c              3  V   #    U  H  n[        U[        R                  5      v   M!     g 7frH   )r@   r   	Parameterr  s     r/   r  r    s      Lz$	 3 344s   ')zK'param_list' should be a list of Parameter or basestring(parameter's name).)r   r   r@   r   r<   r   r   rk   ops	all_attrsattrall_parametersallvarcopydeepcopyr   )r   
param_listr   r   r  r   s         r/   set_gradient_clipr  r  s:   t MM	1 dL))J
 	
 002mmA""R\\^+rww@
 1
 MMW  # ]]1%446


8Z
888=GHZTmmA&**40Z
HLLLLY
 	
 #'==#6   Is   1)E#c           	     P   0 nU  H  u  p#Uc  M
  UR                   R                  R                  X#/5         [        R                  " S5         [        USS 5      nUc  U sS S S 5        sS S S 5        s  $ [        U[        5      (       d  [        S5      eUR                  XUS9  S S S 5        S S S 5        M     / n0 nU  H  u  p#Uc  M
  UR                   R                  R                  X#/5         [        R                  " S5         WR                  X#S9u  pxUR                  XgR                  '   UR                  Xx/5        S S S 5        S S S 5        M     [        XV5        U$ ! , (       d  f       N= f! , (       d  f       GM}  = f! , (       d  f       NK= f! , (       d  f       M  = f)Nr   r   z8clip attribute should be an instance of GradientClipBase)r   r   rW   )r   rW   )rk   r   r   r   r   r   r@   r   r<   r   r   r   r   r   )	param_gradsr   r   r   	clip_attrresr   r   r   s	            r/   append_gradient_clip_opsr    sf   G9GGOO,,aV4  1#7>I "	 21 54 i66N  &&wa&H 2 54	 " C!9GGOO,,aV4  1'999JOE3;==$ZZ0JJ()	 2 54	  c<J5 21 54( 21 54sS   E2E!	E230E!#E2-F<F F!
E/+E22
F	
FF
F%	c                   / n[        U5      S:X  a  g U  H  u  p4Uc  M
  UR                  R                  nXR;   a  M'  UR                  U5        UR                  R                  R                  5       R                   H}  nUR                  S5      (       d  M  SUR                  S5      ;   d  M2  UR                  S5      (       d  MJ  UR                  S5      S   nXq;   d  Me  UX   /nUR                  SU5        M     M     g )Nr   r  r   op_role_var)
r?   rk   idxr   r   global_blockr  has_attrr  r   )	r   r   block_id_listr   rW   block_idr   
param_namecorrect_p_gs	            r/   r   r     s    M
#$)#<;;??$X&++%%22488BN++#rww~'>>GGM**WW]3A6
9"0<#K LL< 9 $r0   rH   )rW   r   rX   r   )NN)>
__future__r   r  r   typingr   r   paddle.autogradautogradr   paddle.distributedr5  r&  r   paddle.baser   r   r   paddle.base.data_feederr	   paddle.base.libpaddler
   paddle.common_ops_importr   r   r   "paddle.distributed.utils.moe_utilsr   paddle.frameworkr   r   r   r   r   __all__r   r2   r7   rB   rF   rQ   rY   r_   ra   rv   r   r   r   r   r   r   r  r  r  dygraph_not_supportr  r  r   GradientClipBaseGradientClipByValueGradientClipByNormGradientClipByGlobalNormro   r0   r/   <module>r     s+   #      ) !  4 4 < * O O C  
CL#L<~ +0 '	7	." "90( 90x6"'" '"TZl Zzj\ jZ */ & */ &t< tn }7 }7@!N=4   % # / r0   