
    ёi             	         S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKrS SKJ	r	J
r
  S SKJrJrJrJr  S SKrS SKJrJr  S SKrS SKJrJrJrJr  S SKJr  S SKJrJrJr  S S	KJ r   S S
K!J"r"  S SK#J$r$J%r%J&r&J'r'  S SK(J)r)  S SK*J+r+J,r,  S SK-J.r.J/r/J0r1J2r2J3r3J4r4J5r5J6r6J7r7  S SK8J9r9  S SK:J;r;J<r<  S SK=J>r>  S SK?J@r@  S SKAJBrB  S SKCJDrD  \(       a  S SKEJFrFJGrGJHrHJIrI  S SKJJKrKJLrLJMrMJNrN  S SKOJPrP  / rQ\\S\/\4   \S\\R\S\4   /\T\\R\S\4   4   4   4   rU\\S\\/\4   \S\\R\S\4   \/\4   4   rV\\R\S\4   \R                  \S\4   4   rW\\W/S4   rX\R                  " S5      rZ\R                  " S5      r[S r\S r]S r^S r_S r`S  ra " S! S"\
" S#S$S%/5      5      rb " S& S'\95      rc " S( S)5      rd " S* S+5      re " S, S5      rfg)-    )annotationsN)OrderedDict
namedtuple)TYPE_CHECKINGAnyCallableUnion)Selfoverload)Tensordtypennprofiler)ValueSet)core	frameworkunique_name)VarDesc)no_grad)_convert_into_variablein_declarative_modein_sot_simulation_modein_to_static_mode)_append_activation_in_dygraph)Executorglobal_scope)		ParameterProgram_current_expected_placeconvert_np_dtype_to_dtype_default_main_programin_dygraph_modein_pir_modename_structpaddle_type_to_proto_type)LayerHelperBase)ShardedStateDictbuild_sharded_state_dict)	ParamAttr)in_profiler_mode)
deprecated)param_one_alias)IterableIteratorMappingSequence)	DTypeLikeParamAttrLike	PlaceLike	ShapeLike)InitializerLayerz(.)([A-Z][a-z]+)z([a-z])([A-Z])c                X   [        5       (       d  U R                  R                  S:  aL  [        [	        5       R                  5       R                  5      U R                  l        SU R                  l        gSU R                  l        [        R                  " U R                   S35        gg)z;
A pre-hook to mark op numbers before enter layer.forward.
r   TFzX has recorded the op information before. Please check whether you call this layer twice.N)r"   _op_recorderstartlenr!   current_blockopsis_validwarningswarn
_full_name)layerinputss     V/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/nn/layer/layers.pyrecord_program_ops_pre_hookrD   \   s     ##a''*$&446::(E$ +/E'*/E'MM##$$|}     c                V   [        5       (       Gd  U R                  R                  (       a  U R                  R                  n[	        [        5       R                  5       R                  5      nUS:  a  XC:  d   e[        5       R                  5       R                  X4 nX@R                  l        XPR                  l        U H7  nU R                  R                  5        H  u  pxUR                  Xx5        M     M9     U R                  R                   H  n	U	R                  5         M     ggg)z\
A post-hook to append customized attributes into all operators generated in current layer.
r   N)r"   r8   r=   r9   r:   r!   r;   r<   end_customized_attrsitems	_set_attrhooksremove)
rA   rB   outputsr9   rG   r<   op	attr_namevalhook_helpers
             rC   !set_op_customized_attrs_post_hookrR   m   s     !3!3!<!<""((&(668<<=zcl**"$22488C!$!$B"'"9"9"?"?"A	Y, #B 
 !--33K  4 "=rE   c                0    SSSS.nUR                  X 5      $ )Nlinear	embedding)row_parallel_linearcolumn_parallel_linearvocab_parallel_embedding)get)
dist_scopemappings     rC   _scope_dist2singler\      s#    '"*$/G ;;z..rE   c                v    [         R                  SU 5      n[        R                  SU5      R                  5       $ )Nz\1_\2)_first_cap_resub_all_cap_relower)names1s     rC   _convert_camel_to_snakerd      s.    			8T	*B??8R(..00rE   c                    U R                  S5      n[        U5      S:X  a  U $ / n[        U5       H-  u  pEUS:  d  M  UR                  [	        US-  U-   5      5        M/     US   S-   SR                  U5      -   $ )N
   r    )splitr:   	enumerateappendstrjoin)stringindentrc   s2idxlines         rC   
_addindentrs      ss    	d	B
2w!|	Br]	7IIc6C<4/01 # a54<$))B-''rE   c                F    [        U 5      U;   a  g U R                  USSS9  g )NTFr   floating_onlyinclude_sublayers)type_to_impl)rA   r   excluded_layerss      rC   _layer_trans_dtyper{      s#    E{o%	NNdeNLrE   c                  4   ^  \ rS rSrSrSU 4S jjr\rSrU =r$ )_IncompatibleKeys    c                f   > U R                   (       d  U R                  (       d  g[        TU ]  5       $ )Nz<All keys matched successfully>)missing_keysunexpected_keyssuper__repr__)self	__class__s    rC   r   _IncompatibleKeys.__repr__   s%      )=)=4w!!rE   returnrl   )	__name__
__module____qualname____firstlineno__	__slots__r   __str____static_attributes____classcell__r   s   @rC   r}   r}      s     I"
 GrE   r}   IncompatibleKeysr   r   c                  x   ^  \ rS rSrU 4S jr     SS jrS rS rSS jrSS jr	S r
S	 rSS
 jrS rSrU =r$ )LayerObjectHelper   c                    > [         TU ]  XS9  g )N)
layer_type)r   __init__)r   rb   r   s     rC   r   LayerObjectHelper.__init__   s    /rE   c                X    U R                   R                  5       R                  UUUUUS9$ )aC  append an operator for this layer object.

   Args:
       type: operator type
       inputs: input variable of the operator
       dtype: data type of this parameter
       is_bias: if this is a bias parameter
       default_initializer: set the default initializer for this parameter

Returns created parameter Variable.
)rx   rB   rM   attrsstop_gradient)main_programr;   	append_op)r   rx   rB   rM   r   r   s         rC   r   LayerObjectHelper.append_op   s:    &   ..0::' ; 
 	
rE   c                    Un/ n[        U[        [        45      (       a+  U H#  nUR                  U R	                  U5      5        M%     U$ UR                  U R	                  U5      5        U$ N)
isinstancelisttuplerk   to_variable)r   	inputs_inrB   retinps        rC   _multiple_input!LayerObjectHelper._multiple_input   sc    ftUm,,

4++C01  
 JJt''/0
rE   c                j    U R                  U5      n[        U5      S:w  a  U R                   S3eUS   $ )Nrg   z layer only takes one input inr   )r   r:   r   )r   r   rB   s      rC   _inputLayerObjectHelper._input   s;    %%i0v;!__%%CDDayrE   c                :   Un[        U[        5      (       a  U/n[        U5      S:w  a'  [        U5      U:w  a  [        SU R                   35      e[        U5      S:X  a;  US:w  a5  S /U-  n[        U5       H  n[        R                  " US   5      XE'   M      UnU$ )Nrg   zparameter number mismatch in r   )r   r)   r:   
ValueErrorrb   rangecopydeepcopy)r   lengthparam_attr_in
param_attrtmpis         rC   _multiple_param_attr&LayerObjectHelper._multiple_param_attr   s    "
j),,$Jz?aC
Ov$=<TYYKHII_!fk&6/C6]z!}5 #JrE   c              #  "  #    [         R                  " U5      n[        U[        5      (       a  [	        SU R
                   35      eUb  UO/ nU R                  U5      nU R                  [        U5      U5      n[        X45       Sh  vN   g N7f)zAccess all inputs and params one by one

   Args:
       inputs_in: inputs to be iter
       param_attr_in: param_attr to be iter

Returns input, param_attr
z"Param_attr should not be False in N)
r)   _to_attrr   boolr   rb   r   r   r:   zip)r   r   r   rB   param_attrss        rC   iter_inputs_and_params(LayerObjectHelper.iter_inputs_and_params   s{      "**=9mT**A$))MNN(42%%f-//F]Kv+++s   BBBBc           	         Ub  UO/ nU R                  U5      nSnU HK  nUc  UR                  nM  X4R                  :w  d  M%  [        SU SUR                   SU R                   35      e   U$ )zmGet input data type

   Args:
       inputs_in: inputs wanted know the data type

Returns dtype of the input
NzData Type mismatch: z to z in )r   r   r   rb   )r   r   rB   r   eachs        rC   input_dtypeLayerObjectHelper.input_dtype  sy     #,"7Ib	%%i0D}

**$ *5'djj\dii[Q 	  rE   c                    U R                   R                  5       R                  U5      n[        U[        5      (       d  [        SU SU R                   35      eU$ )z]Get parameter specifically

   Args:
       name: parameter's name

Returns target parameter
zno Parameter name z
 found in )r   global_blockvarr   r   r   rb   r   rb   params      rC   get_parameterLayerObjectHelper.get_parameter  sR     !!..044T:%++1$z$))MNNrE   c                V   UnUc  U$ [        U[        5      (       a  SU0nO[        U SU R                   35      eUb  U(       a  X2S'   UR	                  S5      n[        5       (       a  [        XU5      nU$ U R                  UR                  S9nU R                  USU/0SU/0US9  U$ )zAppend activation

    Args:
        input_var: the input variable. The len(input_var.shape) is
        larger or equal than 2.
        act: activation type
        use_cudnn: if use cudnn

Return the Variable of after append activation
rx   z should be unicode or str in 	use_cudnnr   XOut)rx   rB   rM   r   )
r   rl   	TypeErrorrb   popr"   r   "create_variable_for_type_inferencer   r   )r   	input_varactr   act_typeresr   s          rC   append_activation#LayerObjectHelper.append_activation#  s     ;c33-Cse#@LMM!y(776?/	YOCJ99	9PCNNi[)	   JrE   c                    Un[        X5      (       d-  [        SUU R                  UR                  U R                  5      eg)zCheck if the input parameter is instance of input class

    Args:
        param: parameter to be check
        cls: class of the parameter

Return result of the check (True or False)
z?The input {0} parameter of method {1} must be {2}, in layer {3}N)r   r   r   r   rb   )r   r   clss      rC   is_instanceLayerObjectHelper.is_instanceF  sA     %%%Q		  &rE   r   )NNNNNr   )NN)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   s   @rC   r   r      sL    0
 
6,"(!F rE   r   c                  "    \ rS rSrSrSS jrSrg)LayerOpsRecorderiZ  z5
Record generated operators information in nn.Layer.
Nc                @    Xl         X l        X0l        X@l        XPl        g r   )r9   rG   r<   r=   rK   )r   r9   rG   r<   r=   rK   s         rC   r   LayerOpsRecorder.__init___  s    
 
rE   )rG   rK   r=   r<   r9   )r   NFN)r   r   r   r   __doc__r   r   r   rE   rC   r   r   Z  s    rE   r   c                  N    \ rS rSr% SrSrS\S'   SS.     SS jjrSS	 jrS
r	g)HookRemoveHelperig  z3A HookRemoveHelper that can be used to remove hook.r   intnext_hook_idNextra_hook_dictc               >   [         R                  " U5      U l        [        R                  U l        [        =R                  S-  sl        SU l        UbJ  [        U[        5      (       a  [        S U 5       5      U l        g [         R                  " U5      4U l        g g )Nrg   r   c              3  N   #    U  H  n[         R                  " U5      v   M     g 7fr   )weakrefref).0ds     rC   	<genexpr>,HookRemoveHelper.__init__.<locals>.<genexpr>y  s      .,;qGKKNNOs   #%)
r   r   
_hooks_refr   r   _hook_id_extra_hooks_refr   r   r   )r   rK   r   s      rC   r   HookRemoveHelper.__init__l  s     "++e,(55%%*%')&/400(- .,;. )% *1_)E(G% 'rE   c                    U R                  5       nUb  U R                  U;   a  XR                  	 U R                   H-  nU" 5       nUc  M  U R                  U;   d  M!  X0R                  	 M/     g r   )r   r   r   )r   rK   r   extra_hookss       rC   rL   HookRemoveHelper.remove  s[    !%!7mm$((C%K&4==K+G. )rE   )r   r   r   )rK   z+typing.OrderedDict[int, Callable[..., Any]]r   r   r   None)r   r   )
r   r   r   r   r   r   __annotations__r   rL   r   r   rE   rC   r   r   g  s>    =L#  $	H:H 	H
 
H&/rE   r   c                  v   \ rS rSr% SrS\S'    Sh     SiS jjr\S 5       r\R                  S 5       r\S	 5       r
\
R                  S
 5       r
SjSkS jjrSlS jrSmS jrSnS jrSSSS.         SoS jjr\rSSS.       SpS jjr     Sq             SrS jjrSsS jr\" SSSS9   St       SuS jj5       r   St       SuS jjr\" SS/5      SjSvS jj5       rSwSxS  jjrSyS! jrSzS" jrS{S|S# jjr\" SS/5         S}       S~S% jj5       r    S         SS& jjrSS' jr   S     SS( jjr \" SS)/5       Sj       SS* jj5       r!\" SS/5      SjSvS+ jj5       r"SS, jr#\" SS/5         S}       S~S- jj5       r$SjSS. jjr%SS/ jr&SS0 jr'SS1 jr(SS2 jr)SS3 jr*SS4 jr+SS5 jr,\" S6S7/5       S{       SS8 jj5       r-\,r.\-r/SS9 jr0\0r1SS: jr2SS; jr3S< r4SS= jr5SS> jr6SS? jr7SS@ jr8SSA jr9SSB jr:SnSC jr;SnSD jr<    SSE jr=   S       SSF jjr>      S             SSG jjr?     S           SSH jjr@\A     S           SSI jj5       rB\ASJSJSK.       SSL jj5       rB\ASJSJSK.     SSM jj5       rB\ASS$SSN. SSO jj5       rBSSP jrB S   SSQ jjrC Sw SSR jjrD\ER                   Sj     SSS jj5       rG  S     SST jjrH    S         SSU jjrI Sj           SSV jjrJ          SSW jrK      S           SSX jjrLSSY jrM\GrN\GrOSSZ jrPSlS[ jrQSlS\ jrR Sw   SS] jjrS Sw   SS^ jjrT Sw   SS_ jjrUSwSS` jjrVSwSSa jjrWSlSb jrXSSc jrYSjSSd jjrZSjSSe jjr[Sf r\Sgr]g)r6   i  aF  
Dynamic graph Layer based on OOD, includes the parameters of the layer, the structure of the forward graph and so on.

Parameters:
    name_scope (str, optional): prefix name used by the layer to name parameters.
        If prefix is "my_layer", parameter name in MyLayer
        can be "my_layer_0.w_n", where "w" is the parameter
        base name and "n" is an unique suffix auto-generated.
        If None, prefix name will be snake cased class name. Default: None.
    dtype(str, optional): data type of this parameter.
            If set str, it can be "bool",  "float16", "float32", "float64",
            "int8", "int16", "int32", "int64", "uint8" or "uint16".
            Default: "float32"

Returns:
    None

Examples:
    .. code-block:: python

        >>> import paddle
        >>> paddle.seed(100)

        >>> class MyLayer(paddle.nn.Layer):
        ...     def __init__(self):
        ...         super().__init__()
        ...         self._linear = paddle.nn.Linear(1, 1)
        ...         self._dropout = paddle.nn.Dropout(p=0.5)
        ...
        ...     def forward(self, input):
        ...         temp = self._linear(input)
        ...         temp = self._dropout(temp)
        ...         return temp
        ...
        >>> x = paddle.randn([10, 1], 'float32')
        >>> mylayer = MyLayer()
        >>> mylayer.eval()  # set mylayer._dropout to eval mode
        >>> out = mylayer(x)
        >>> mylayer.train()  # set mylayer._dropout to train mode
        >>> out = mylayer(x)
        >>> print(out)
        Tensor(shape=[10, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
        [[-3.44879317],
         [ 0.        ],
         [ 0.        ],
         [-0.73825276],
         [ 0.        ],
         [ 0.        ],
         [ 0.64444798],
         [-3.22185946],
         [ 0.        ],
         [-0.68077987]])
r   trainingNc                   SU l         Uc*  [        U R                  R                  5      n[	        U5      n[
        R                  " U5      U l        [        U R                  5      U l	        SU l
        X l        [        5       U l        [        5       U l        [        5       U l        [#        5       U l        [        5       U l        [        5       U l        [+        / / S9U l        0 U l        [        5       U l        [        5       U l        [        5       U l        [        5       U l        [        5       U l        SU l        [        5       U l        [        5       U l        g )NTF)r<   rK   ) r   rd   r   r   r\   r   generater@   r   _helper_built_dtyper"   _init_in_dynamic_moder   _parameters_buffersset!_non_persistable_buffer_names_set_sub_layers_loaddict_holderr   r8   rH   _forward_pre_hooks_forward_post_hooks#_forward_pre_hooks_with_kwargs_flag$_forward_post_hooks_with_kwargs_flag!_forward_post_hooks_always_called_cast_to_low_precision_state_dict_hooks_original_funcs)r   
name_scoper   s      rC   r   Layer.__init__  s"    01H1HIJ+J7J%..z:(9%4%6"&=#14.&= + -2>!# M 	 M 	 
 M 	0
 M 	1
 M 	.
 '+# M 	  +}rE   c                    U R                   $ r   )r  r   s    rC   _modulesLayer._modules  s    rE   c                    [        U[        5      (       d  [        S[        U5       35      eU R                  R                  5         U R                  R                  U5        g )Nz _modules must be dict-like, got )r   dictr   rx   r  clearupdater   values     rC   r  r    sL    %&&>tE{mLMM &rE   c                    U R                   $ r   )r  r  s    rC   _non_persistent_buffers_set!Layer._non_persistent_buffers_set  s    555rE   c                    [        U[        5      (       d  [        S[        U5       35      eU R                  R                  5         U R                  R                  U5        g )Nz/_non_persistent_buffers_set must be a set, got )r   r  r   rx   r  r  r  r  s     rC   r  r     sR    %%%A$u+O  	..446..55e<rE   c                F   [        U[        5      (       d  [        S5      e[        5       (       aN  U(       a$  [        R
                  " 5       R                  5         O#[        R
                  " 5       R                  5         Xl        U R                  5        H	  nXl        M     U $ )a  

Sets this Layer and all its sublayers to training mode.
This only effects certain modules like `Dropout` and `BatchNorm`.

Returns:
    Layer: self

Examples:
    .. code-block:: python

        >>> import paddle
        >>> paddle.seed(100)

        >>> class MyLayer(paddle.nn.Layer):
        ...     def __init__(self):
        ...         super().__init__()
        ...         self._linear = paddle.nn.Linear(1, 1)
        ...         self._dropout = paddle.nn.Dropout(p=0.5)
        ...
        ...     def forward(self, input):
        ...         temp = self._linear(input)
        ...         temp = self._dropout(temp)
        ...         return temp
        ...
        >>> x = paddle.randn([10, 1], 'float32')
        >>> mylayer = MyLayer()
        >>> mylayer.eval()  # set mylayer._dropout to eval mode
        >>> out = mylayer(x)
        >>> mylayer.train()  # set mylayer._dropout to train mode
        >>> out = mylayer(x)
        >>> print(out)
        Tensor(shape=[10, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
        [[-3.44879317],
         [ 0.        ],
         [ 0.        ],
         [-0.73825276],
         [ 0.        ],
         [ 0.        ],
         [ 0.64444798],
         [-3.22185946],
         [ 0.        ],
         [-0.68077987]])

z'training mode is expected to be boolean)
r   r   r   r"   r   _dygraph_tracer
train_mode	eval_moder   	sublayers)r   moderA   s      rC   trainLayer.train  sz    \ $%%FGG ))+668))+557 ^^%E!N & rE   c                    [        5       (       a#  [        R                  " 5       R                  5         SU l        U R                  5        H
  nSUl        M     U $ )a  
Sets this Layer and all its sublayers to evaluation mode.
This only effects certain modules like `Dropout` and `BatchNorm`.

Returns:
    Layer: self

Examples:
    .. code-block:: python

        >>> import paddle
        >>> paddle.seed(100)
        >>> class MyLayer(paddle.nn.Layer):
        ...     def __init__(self):
        ...         super().__init__()
        ...         self._linear = paddle.nn.Linear(1, 1)
        ...         self._dropout = paddle.nn.Dropout(p=0.5)
        ...
        ...     def forward(self, input):
        ...         temp = self._linear(input)
        ...         temp = self._dropout(temp)
        ...         return temp
        ...
        >>> x = paddle.randn([10, 1], 'float32')
        >>> mylayer = MyLayer()
        >>> mylayer.eval()  # set mylayer._dropout to eval mode
        >>> out = mylayer(x)
        >>> print(out)
        Tensor(shape=[10, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
        [[-1.72439659],
         [ 0.31532824],
         [ 0.01192369],
         [-0.36912638],
         [-1.63426113],
         [-0.93169814],
         [ 0.32222399],
         [-1.61092973],
         [ 0.77209264],
         [-0.34038994]])

F)r"   r   r#  r%  r   r&  )r   rA   s     rC   eval
Layer.evalL  sI    Z %%'113^^%E"EN & rE   c                f    U R                  5        H  nUR                  U5        M     U" U 5        U $ )a  

Applies ``fn`` recursively to every sublayer (as returned by ``.sublayers()``)
as well as self. Typical use includes initializing the parameters of a model.

Parameters:
    fn (function): a function to be applied to each sublayer

Returns:
    Layer, self

Examples:
    .. code-block:: python

        >>> import paddle
        >>> import paddle.nn as nn
        >>> paddle.seed(2023)

        >>> net = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2))

        >>> def init_weights(layer):
        ...     if type(layer) == nn.Linear:
        ...         print('before init weight:', layer.weight.numpy())
        ...         new_weight = paddle.full(shape=layer.weight.shape, dtype=layer.weight.dtype, fill_value=0.9)
        ...         layer.weight.set_value(new_weight)
        ...         print('after init weight:', layer.weight.numpy())
        ...
        >>> net.apply(init_weights)

        >>> print(net.state_dict())
        before init weight: [[ 0.89611185  0.04935038]
                             [-0.5888344   0.99266374]]
        after init weight: [[0.9 0.9]
                            [0.9 0.9]]
        before init weight: [[-0.18615901 -0.22924072]
                             [ 1.1517721   0.59859073]]
        after init weight: [[0.9 0.9]
                            [0.9 0.9]]
        OrderedDict([('0.weight', Parameter containing:
        Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
        [[0.89999998, 0.89999998],
         [0.89999998, 0.89999998]])), ('0.bias', Parameter containing:
        Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=False,
        [0., 0.])), ('1.weight', Parameter containing:
        Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
        [[0.89999998, 0.89999998],
         [0.89999998, 0.89999998]])), ('1.bias', Parameter containing:
        Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=False,
        [0., 0.]))])
)childrenapply)r   fnrA   s      rC   r/  Layer.apply  s.    f ]]_EKKO % 	4rE   c                    U R                   $ )ah  

Full name for this layer, composed by name_scope + "/" + MyLayer.__class__.__name__

Returns:
    str, full name of this layer.

Examples:
    .. code-block:: python

        >>> import paddle

        >>> class LinearNet(paddle.nn.Layer):
        ...     def __init__(self):
        ...         super().__init__(name_scope = "demo_linear_net")
        ...         self._linear = paddle.nn.Linear(1, 1)
        ...
        ...     def forward(self, x):
        ...         return self._linear(x)
        ...
        >>> linear_net = LinearNet()
        >>> print(linear_net.full_name())
        demo_linear_net_0

)r@   r  s    rC   	full_nameLayer.full_name  s    4 rE   F)prependwith_kwargsalways_callc               `   [        U R                  U R                  U R                  /S9nXR                  UR                  '   U(       a  SU R                  UR                  '   U(       a  SU R                  UR                  '   U(       a$  U R                  R                  UR                  SS9  U$ )aQ  

Register a forward post-hook for Layer. The hook will be called after `forward` function has been computed.

It should have the following form, `input` and `output` of the `hook` is `input` and `output` of the `Layer` respectively.
User can use forward post-hook to change the output of the Layer or perform information statistics tasks on the Layer.

hook(Layer, input, output) -> None or modified output

Parameters:
    hook(function): a function registered as a forward post-hook
    prepend (bool): If ``True``, the provided ``hook`` will be fired
        before all existing ``forward_post`` hooks on this
        :class:`paddle.nn.Layer`.
        Default: ``False``
    with_kwargs (bool): If ``True``, the ``hook`` will be passed the
        kwargs given to the forward function.
        Default: ``False``
    always_call (bool): If ``True`` the ``hook`` will be run regardless of
        whether an exception is raised while calling the Module.
        Default: ``False``

Returns:
    HookRemoveHelper, a HookRemoveHelper object that can be used to remove the added hook by calling `hook_remove_helper.remove()` .

Examples:
    .. code-block:: python

        >>> import paddle
        >>> import numpy as np

        >>> # the forward_post_hook change the output of the layer: output = output * 2
        >>> def forward_post_hook(layer, input, output):
        ...     # user can use layer, input and output for information statistics tasks
        ...
        ...     # change the output
        ...     return output * 2
        ...
        >>> linear = paddle.nn.Linear(13, 5)

        >>> # register the hook
        >>> forward_post_hook_handle = linear.register_forward_post_hook(forward_post_hook)

        >>> value1 = np.arange(26).reshape(2, 13).astype("float32")
        >>> in1 = paddle.to_tensor(value1)

        >>> out0 = linear(in1)

        >>> # remove the hook
        >>> forward_post_hook_handle.remove()

        >>> out1 = linear(in1)

        >>> # hook change the linear's output to output * 2, so out0 is equal to out1 * 2.
        >>> assert (out0.numpy() == (out1.numpy()) * 2).any()

r   TFlast)r   r  r  r  r   move_to_end)r   hookr5  r6  r7  hook_remove_helpers         rC   register_forward_post_hook Layer.register_forward_post_hook  s    B .$$9966
 AE  !3!<!<=  55"++   22"++ $$00"++% 1  "!rE   )r5  r6  c                  [        U R                  U R                  S9nXR                  UR                  '   U(       a  SU R                  UR                  '   U(       a$  U R                  R	                  UR                  SS9  U$ )au  

Register a forward pre-hook for Layer. The hook will be called before `forward` function has been computed.

It should have the following form, `input` of the `hook` is `input` of the `Layer`,
hook can either return a tuple or a single modified value in the hook. We will wrap the value into a tuple if
a single value is returned(unless that value is already a tuple).
User can use forward pre-hook to change the input of the Layer or perform information statistics tasks on the Layer.

hook(Layer, input) -> None or modified input

Parameters:
    hook(function): a function registered as a forward pre-hook
    prepend (bool): If ``True``, the provided ``hook`` will be fired
        before all existing ``forward_pre`` hooks on this
        :class:`paddle.nn.Layer`.
        Default: ``False``
    with_kwargs (bool): If true, the ``hook`` will be passed the kwargs
        given to the forward function.
        Default: ``False``

Returns:
    HookRemoveHelper, a HookRemoveHelper object that can be used to remove the added hook by calling `hook_remove_helper.remove()` .

Examples:
    .. code-block:: python

        >>> import paddle
        >>> import numpy as np

        >>> # the forward_pre_hook change the input of the layer: input = input * 2
        >>> def forward_pre_hook(layer, input):
        ...     # user can use layer and input for information statistics tasks
        ...
        ...     # change the input
        ...     input_return = (input[0] * 2)
        ...     return input_return
        ...
        >>> linear = paddle.nn.Linear(13, 5)

        >>> # register the hook
        >>> forward_pre_hook_handle = linear.register_forward_pre_hook(forward_pre_hook)

        >>> value0 = np.arange(26).reshape(2, 13).astype("float32")
        >>> in0 = paddle.to_tensor(value0)
        >>> out0 = linear(in0)

        >>> # remove the hook
        >>> forward_pre_hook_handle.remove()

        >>> value1 = value0 * 2
        >>> in1 = paddle.to_tensor(value1)
        >>> out1 = linear(in1)

        >>> # hook change the linear's input to input * 2, so out0 is equal to out1.
        >>> assert (out0.numpy() == out1.numpy()).any()
r   TFr9  )r   r
  r  r   r;  )r   r<  r5  r6  r=  s        rC   register_forward_pre_hookLayer.register_forward_pre_hook2  s    @ .## DD
 @D 2 ; ;<  44"++ ##//"++% 0  "!rE   c           	         [         R                  " U5      n[        U[        5      (       a  US:X  a  SnU R                  R                  XqX4XVS9$ )a  Create parameters for this layer.

Parameters:
    shape(list): Shape of the parameter. The data type in the list must be int.
    attr(ParamAttr, optional): Parameter attribute of weight. Please refer to :ref:`api_paddle_ParamAttr`. Default: None.
    dtype(str, optional): Data type of this parameter.
        If set str, it can be "bool",  "float16", "float32", "float64",
        "int8", "int16", "int32", "int64", "uint8" or "uint16". Default: "float32".
    is_bias(bool, optional): if this is a bias parameter. Default: False.
    default_initializer(Initializer, optional): the default initializer for this parameter.
        If set None, default initializer will be set to paddle.nn.initializer.Xavier and paddle.nn.initializer.Constant
        for non-bias and bias parameter, respectively. Default: None.
    device(PlaceLike, optional): the device place for the parameter. Default: None.

Returns:
    :Tensor, created parameter.

Examples:
    .. code-block:: python

        >>> import paddle
        >>> paddle.seed(2023)

        >>> class MyLayer(paddle.nn.Layer):
        ...     def __init__(self):
        ...         super().__init__()
        ...         self._linear = paddle.nn.Linear(1, 1)
        ...         w_tmp = self.create_parameter([1,1])
        ...         self.add_parameter("w_tmp", w_tmp)
        ...
        ...     def forward(self, input):
        ...         return self._linear(input)
        ...
        >>> mylayer = MyLayer()
        >>> for name, param in mylayer.named_parameters():
        ...     print(name, param)      # will print w_tmp,_linear.weight,_linear.bias
        w_tmp Parameter containing:
        Tensor(shape=[1, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
        [[0.06979191]])
        _linear.weight Parameter containing:
        Tensor(shape=[1, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
        [[1.26729357]])
        _linear.bias Parameter containing:
        Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=False,
        [0.])
 Ndevice)r   r   r   rl   r   create_parameter)r   shapeattrr   is_biasdefault_initializerrF  	temp_attrs           rC   rG  Layer.create_parameter  sO    n MM$'	i%%)r/I||,,e.A - 
 	
rE   c                ^   UR                  S5      u  p#nU R                  U5      n[        XT5      (       d"  [        UR	                  5       S-   U-   S-   5      e[        XT5      n[        U[        R                  R                  [        R                  45      (       d  [        SU-   S-   5      eU$ )z
Return the parameter given by ``target`` if it exists, otherwise throw an error.
Parameters:
    target(str): The fully-qualified string name of the Parameter to look for.

Returns:
    Parameter: The Parameter referenced by ``target``.
. has no attribute ``z` is not an nn.Parameter)
rpartitionget_sublayerhasattrAttributeError	_get_namegetattrr   paddler   r   r   )r   targetmodule_path_
param_namemodr   s          rC   r   Layer.get_parameter  s     &,%6%6s%;"
#00=s'' "77*DsJ  &-S%=%&))"5"5v}}!EFF z!14N!NOOrE   z2.0.0zpaddle.nn.Layer.create_tensorz(New api in create_tensor, easier to use.)since	update_toreasonpersistablec                R   Ub  SR                  U R                  U/5      nO1[        R                  " SR                  U R                  S/5      5      nU R                  R
                  R                  5       R                  UUU[        R                  R                  R                  S9$ )ad  

Create Tensor for this layer.

Parameters:
    name(str, optional): name of the tensor. Please refer to :ref:`api_guide_Name` . Default: None

    persistable(bool, optional): if set this tensor persistable. Default: False

    dtype(str, optional): data type of this parameter. If set str, it can be "bool", "float16", "float32", "float64","int8", "int16", "int32", "int64", "uint8" or "uint16". If set None, it will be "float32". Default: None

Returns:
    Tensor, created Tensor.

Examples:
    .. code-block:: python

        >>> import paddle

        >>> class MyLinear(paddle.nn.Layer):
        ...     def __init__(self,
        ...                 in_features,
        ...                 out_features):
        ...         super().__init__()
        ...         self.linear = paddle.nn.Linear( 10, 10)
        ...
        ...         self.back_var = self.create_variable(name = "linear_tmp_0", dtype=self._dtype)
        ...
        ...     def forward(self, input):
        ...         out = self.linear(input)
        ...         paddle.assign( out, self.back_var)
        ...
        ...         return out

rO  _generated_varrb   rb  r   rx   rm   r@   r   r   r   r   r;   
create_varr   r   VarTypeDENSE_TENSORr   rb   rb  r   var_names        rC   create_variableLayer.create_variable  s    \ xx$ 78H"++$//+;<=H ||((668CC#%%22	 D 
 	
rE   c                R   Ub  SR                  U R                  U/5      nO1[        R                  " SR                  U R                  S/5      5      nU R                  R
                  R                  5       R                  UUU[        R                  R                  R                  S9$ )a}  

Create Tensor for this layer.

Parameters:
    name(str, optional): name of the tensor. Please refer to :ref:`api_guide_Name` . Default: None.
    persistable(bool, optional): if set this tensor persistable. Default: False.
    dtype(str, optional): data type of this parameter.
        If set str, it can be "bool",  "float16", "float32", "float64",
        "int8", "int16", "int32", "int64", "uint8" or "uint16".
        If set None, it will be "float32". Default: None.

Returns:
    Tensor, created Tensor.

Examples:
    .. code-block:: python

        >>> import paddle

        >>> class MyLinear(paddle.nn.Layer):
        ...     def __init__(self,
        ...                  in_features,
        ...                  out_features):
        ...         super().__init__()
        ...         self.linear = paddle.nn.Linear(10, 10)
        ...
        ...         self.back_var = self.create_tensor(name = "linear_tmp_0", dtype=self._dtype)
        ...
        ...     def forward(self, input):
        ...         out = self.linear(input)
        ...         paddle.assign(out, self.back_var)
        ...
        ...         return out

rO  rd  re  rf  rj  s        rC   create_tensorLayer.create_tensor  s    T xx$ 78H"++$//+;<=H ||((668CC#%%22	 D 
 	
rE   rw   recursec                X    U R                  US9 VVs/ s H  u  p#UPM	     nnnU$ s  snnf )a  

Returns a list of all Parameters from current layer and its sub-layers.

Parameters:
    include_sublayers (bool, optional): Whether to return the parameters of the sublayer.
        If True, the returned list contains the parameters of the sublayer.
        Default: True.

Returns:
    list, list of Tensor, a list of Parameters.

Examples:
    .. code-block:: python

        >>> import paddle
        >>> paddle.seed(100)

        >>> linear = paddle.nn.Linear(1, 1)
        >>> print(linear.parameters())
        [Parameter containing:
        Tensor(shape=[1, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
        [[0.18551230]]), Parameter containing:
        Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=False,
        [0.])]

rw   )named_parameters)r   rw   r[  r   r   s        rC   
parametersLayer.parametersN  sI    > !11"3 2 
  	 
 

   &c                ,   / SQn[        U[        R                  [        R                  45      (       d  [	        U5      [
        L a  X;   a  [        U[
        [        R                  45      (       a  [        R                  " U5      nXl        U R                  5        H	  nXl        M     U R                  SS9 H  u  pEUR                  SU5        M     U R                  SS9 H  u  pFUR                  SU5        M     U $ [        S[        U5      -   5      e)a}  

Casts all parameters and buffers to dtype and then return the Layer.

Parameters:
    dtype(str|paddle.dtype|numpy.dtype): target data type of layer.
        If set str, it can be "bool", "bfloat16", "float16", "float32", "float64",
        "int8", "int16", "int32", "int64", "uint8", "complex64", "complex128".
        Default: None

Returns:
    Layer, self

Examples:
    .. code-block:: python

        >>> import paddle
        >>> import paddle.nn as nn
        >>> weight_attr = paddle.ParamAttr(name="weight",initializer=paddle.nn.initializer.Constant(value=1.5))
        >>> bias_attr = paddle.ParamAttr(name="bias",initializer=paddle.nn.initializer.Constant(value=2.5))

        >>> linear = paddle.nn.Linear(2, 2, weight_attr=weight_attr, bias_attr=bias_attr).to(device="cpu",dtype="float32")
        >>> print(linear)
        Linear(in_features=2, out_features=2, dtype=float32)
        >>> print(linear.parameters())
        [Parameter containing:
        Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
            [[1.50000000, 1.50000000],
                [1.50000000, 1.50000000]]), Parameter containing:
        Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=False,
            [2.50000000, 2.50000000])]

        >>> linear=linear.astype("int8")
        >>> print(linear)
        Linear(in_features=2, out_features=2, dtype=paddle.int8)
        >>> print(linear.parameters())
        >>> # doctest: +SKIP("There are bugs in the `Layer.astype`. For details, refer to the following webpage: https://github.com/PaddlePaddle/Paddle/issues/76614")
        [Parameter containing:
        Tensor(shape=[2, 2], dtype=int8, place=Place(cpu), stop_gradient=False,
            [[1, 1],
                [1, 1]]), Parameter containing:
        Tensor(shape=[2], dtype=int8, place=Place(cpu), stop_gradient=False,
            [2, 2])]
        >>> # doctest: -SKIP

bfloat16float16float32float64int8int16int32int64uint8	complex64
complex128r   Trs  Ndtype value error, must be 'bfloat16', 'float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'complex64', 'complex128', 'bool', or paddle.dtype, numpy.dtype, but receive )r   rX  r   nprx   rl   r   r    r  r&  rt  _tonamed_bufferstor   )r   r   valid_dtypesrA   r[  r   buffers          rC   astypeLayer.astypes  s    ^
 uv||RXX677E{c!%%#rxx11!<<UCK)$ * 11D1I		$& J!//$/G			$& HK Te* rE   c              #  F   #    U R                  5        H	  u  pUv   M     g7f)a
  

Returns an iterator over immediate children layers.

Yields:
    Layer: a child layer

Examples:
    .. code-block:: python

        >>> import paddle

        >>> linear1 = paddle.nn.Linear(10, 3)
        >>> linear2 = paddle.nn.Linear(3, 10, bias_attr=False)
        >>> model = paddle.nn.Sequential(linear1, linear2)

        >>> layer_list = list(model.children())

        >>> print(layer_list)
        [Linear(in_features=10, out_features=3, dtype=float32), Linear(in_features=3, out_features=10, dtype=float32)]

N)named_children)r   r[  rA   s      rC   r.  Layer.children  s!     . ++-HAK .   !c              #     #    [        5       nU R                  R                  5        H'  u  p#Uc  M
  X1;  d  M  UR                  U5        X#4v   M)     g7f)a  Returns an iterator over immediate children layers, yielding both
the name of the layer as well as the layer itself.

Yields:
    (string, Layer): Tuple containing a name and child layer

Examples:
    .. code-block:: python

        >>> import paddle

        >>> linear1 = paddle.nn.Linear(10, 3)
        >>> linear2 = paddle.nn.Linear(3, 10, bias_attr=False)
        >>> model = paddle.nn.Sequential(linear1, linear2)
        >>> for prefix, layer in model.named_children():
        ...     print(prefix, layer)
        0 Linear(in_features=10, out_features=3, dtype=float32)
        1 Linear(in_features=3, out_features=10, dtype=float32)
N)r  r  rI   add)r   memorb   rA   s       rC   r  Layer.named_children  sG     ( u++113KD U%6k! 4s   +AAAc                X    U R                  US9 VVs/ s H  u  p#UPM	     nnnU$ s  snnf )a_  

Returns a list of sub layers.

Parameters:
    include_self(bool, optional): Whether return self as sublayers. Default: False.

Returns:
    list of Layer, a list of sub layers.

Examples:
    .. code-block:: pycon

        >>> import paddle

        >>> class MyLayer(paddle.nn.Layer):
        ...     def __init__(self):
        ...         super().__init__()
        ...         self._linear = paddle.nn.Linear(1, 1)
        ...         self._dropout = paddle.nn.Dropout(p=0.5)
        ...
        ...     def forward(self, input):
        ...         temp = self._linear(input)
        ...         temp = self._dropout(temp)
        ...         return temp
        >>> mylayer = MyLayer()
        >>> print(mylayer.sublayers())
        [Linear(in_features=1, out_features=1, dtype=float32), Dropout(p=0.5, axis=None, mode=upscale_in_train, inplace=False)]

)include_selfnamed_sublayers)r   r  r[  rA   r   s        rC   r&  Layer.sublayers  sA    B !00l0K
K K 	 
 
	
rw  rD  c              #    #    [        5       (       a  [        5       (       d
  [        5       O	[        5       nU(       a  U R	                  USUS9O[        U/U /5      nU Ha  u  pgUR                  R                  5       nU H<  u  pU
b  X;   a  M  U(       a  UR                  U
5        Xf(       a  SOS-   U	-   nX4v   M>     Mc     g7f)a  
Returns an iterator over all parameters in the Layer, yielding tuple of name and parameter.

Parameters:
    prefix(str, optional): Prefix to prepend to all parameter names. Default: ''.
    include_sublayers(bool, optional): Whether include the parameters of sublayers.
        If True, also include the named parameters from sublayers. Default: True.
    remove_duplicate(bool, optional): Whether to remove duplicated parameters in the result.
        Default: True.

Yields:
    (string, Parameter): Tuple of name and Parameter

Examples:
    .. code-block:: python

        >>> import paddle
        >>> paddle.seed(100)

        >>> fc1 = paddle.nn.Linear(10, 3)
        >>> fc2 = paddle.nn.Linear(3, 10, bias_attr=False)
        >>> model = paddle.nn.Sequential(fc1, fc2)
        >>> for name, param in model.named_parameters():
        ...     print(name, param)
        0.weight Parameter containing:
        Tensor(shape=[10, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
        [[ 0.07276392, -0.39791510, -0.66356444],
         [ 0.02143478, -0.18519843, -0.32485050],
         [-0.42249614,  0.08450919, -0.66838276],
         [ 0.38208580, -0.24303678,  0.55127048],
         [ 0.47745085,  0.62117910, -0.08336520],
         [-0.28653207,  0.47237599, -0.05868882],
         [-0.14385653,  0.29945642,  0.12832761],
         [-0.21237159,  0.38539791, -0.62760031],
         [ 0.02637231,  0.20621127,  0.43255770],
         [-0.19984481, -0.26259184, -0.29696006]])
        0.bias Parameter containing:
        Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=False,
        [0., 0., 0.])
        1.weight Parameter containing:
        Tensor(shape=[3, 10], dtype=float32, place=Place(cpu), stop_gradient=False,
        [[ 0.01985580, -0.40268910,  0.41172385, -0.47249708, -0.09002256,
         -0.00533628, -0.52048630,  0.62360322,  0.20848787, -0.02033746],
         [ 0.58281910,  0.12841827,  0.12907702,  0.02325618, -0.07746267,
         0.31950659, -0.37924835, -0.59209681, -0.11732036, -0.58378261],
         [-0.62100595,  0.22293305,  0.28229684, -0.03687060, -0.59323978,
         0.08411229,  0.53275704,  0.40431368,  0.03171402, -0.17922515]])
Tprefixr  remove_duplicateNrO  rD  )	r#   r   r   r  r  r   r  rI   r  )r   r  rw   r  
params_setr  layer_prefixsublayerparamskeyr   rb   s               rC   rt  Layer.named_parameters  s     p &--0A0C0CHJ 	 !   !!1 !  fXv& 	 '6"L))//1F$
=E$7#NN5)#lsCcIk! % '6s   CCc              #     #    Uc
  [        5       nU(       a"  X;  a  U(       a  UR                  U 5        X4v   U R                  R                  5        H4  u  pVUc  M
  X(       a  SOS-   U-   nUR	                  USUUS9 Sh  vN   M6     g N	7f)a  
Returns an iterator over all sublayers in the Layer, yielding tuple of name and sublayer.
The duplicate sublayer will only be yielded once.

Parameters:
    prefix(str, optional): Prefix to prepend to all parameter names. Default: ''.
    include_self(bool, optional): Whether include the Layer itself. Default: False.
    layers_set(set, optional): The set to record duplicate sublayers. Default: None.
    remove_duplicate(bool, optional): Whether to remove duplicated sublayers in the result.
        Default: True.

Yields:
    (string, Layer): Tuple of name and Layer

Examples:
    .. code-block:: python

        >>> import paddle

        >>> fc1 = paddle.nn.Linear(10, 3)
        >>> fc2 = paddle.nn.Linear(3, 10, bias_attr=False)
        >>> model = paddle.nn.Sequential(fc1, fc2)
        >>> for prefix, layer in model.named_sublayers():
        ...     print(prefix, layer)
        0 Linear(in_features=10, out_features=3, dtype=float32)
        1 Linear(in_features=3, out_features=10, dtype=float32)

        >>> l = paddle.nn.Linear(10, 3)
        >>> model = paddle.nn.Sequential(l, l)
        >>> for prefix, layer in model.named_sublayers(include_self=True, remove_duplicate=True):
        ...     print(prefix, layer)
         Sequential(
          (0): Linear(in_features=10, out_features=3, dtype=float32)
          (1): Linear(in_features=10, out_features=3, dtype=float32)
        )
        0 Linear(in_features=10, out_features=3, dtype=float32)

        >>> l = paddle.nn.Linear(10, 3)
        >>> model = paddle.nn.Sequential(l, l)
        >>> for prefix, layer in model.named_sublayers(include_self=True, remove_duplicate=False):
        ...     print(prefix, layer)
         Sequential(
          (0): Linear(in_features=10, out_features=3, dtype=float32)
          (1): Linear(in_features=10, out_features=3, dtype=float32)
        )
        0 Linear(in_features=10, out_features=3, dtype=float32)
        1 Linear(in_features=10, out_features=3, dtype=float32)

NrO  rD  Tr  r  
layers_setr  )r  r  r  rI   r  )r   r  r  r  r  r  rA   r  s           rC   r  Layer.named_sublayersk  s     p JD2t$,**002JC}!FS;cAL,,#!%!1	 -   	 3s   B BB
Bc              #  F   #    U R                  5        H	  u  pUv   M     g7f)zb
Return an iterator over all modules in the network.

Yields:
    Layer: a layer in the network.

N)named_modules)r   r[  modules      rC   modulesLayer.modules  s!      ++-IAL .r  c                .    SnUnU R                  UUUUS9$ )a  
Returns an iterator over all sublayers in the Layer, yielding tuple of name and sublayer.
The duplicate sublayer will only be yielded once.

Parameters:
    memo(set, optional): The set to record duplicate sublayers. Default: None.
    prefix(str, optional): Prefix to prepend to all parameter names. Default: ''.
    remove_duplicate(bool, optional): Whether to remove duplicated sublayers in the result.
        Default: True.

Yields:
    (string, Layer): Tuple of name and Layer
Tr  r  )r   r  r  r  r  r  s         rC   r  Layer.named_modules  s2    & 
##%!-	 $ 
 	
rE   
persistentc                x   SU R                   ;  a  [        S5      e[        U[        5      (       d"  [	        S[        U5      R                   S35      eSU;   a  [        S5      eUS:X  a  [        S5      e[        X5      (       a  XR                  ;  a  [        SU S	35      eUbI  [        U5      [        R                  R                  :X  d"  [	        S[        U5      R                   S35      eX R                  U'   U(       a  U R                  R                  U5        g
U R                  R                  U5        g
)a  
Registers a tensor as buffer into the layer.

`buffer` is a non-trainable tensor and will not be updated by optimizer,
but is necessary for evaluation and inference. For example, the mean and variance in BatchNorm layers.
The registered buffer is persistable by default, and will be saved into
`state_dict` alongside parameters. If set persistable=False, it registers
a non-persistable buffer, so that it will not be a part of `state_dict` .

Buffers can be accessed as attributes using given names.

Parameters:
    name (string): name of the buffer. The buffer can be accessed
        from this layer using the given name
    tensor (Tensor): the tensor to be registered as buffer.
    persistable (bool): whether the buffer is part of this layer's
        state_dict.

Returns:
    None

Examples:
    .. code-block:: python

        >>> import numpy as np
        >>> import paddle

        >>> linear = paddle.nn.Linear(10, 3)
        >>> value = np.array([0]).astype("float32")
        >>> buffer = paddle.to_tensor(value)
        >>> linear.register_buffer("buf_name", buffer, persistable=True)

        >>> # get the buffer by attribute.
        >>> print(linear.buf_name)
        Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True,
        [0.])

r  )super().__init__() should be called firstz4The name of buffer should be a string, but received rO  zThe name of buffer can not contain `.`, because when you access the newly added buffer in the form of `self.**.**`, it will cause AttributeError.rD  z$The name of buffer can not be empty.zattribute '' already exists.Nz>The registered buffer should be a Paddle.Tensor, but received )__dict__r   r   rl   r   rx   r   KeyErrorrT  r  r   eagerr   r  discardr  )r   rb   tensorrb  s       rC   register_bufferLayer.register_buffer  s%   V T]]*HIID#&&FtDzGZGZF[[\]  D[F 
 RZABBT  T%>[.?@AAf9J9J)JPQUV\Q]QfQfPgghi  #)MM$66>>tD66::4@rE   c                X    U R                  US9 VVs/ s H  u  p#UPM	     nnnU$ s  snnf )a  

Returns a list of all buffers from current layer and its sub-layers.

Parameters:
    include_sublayers(bool, optional): Whether include the buffers of sublayers. If True, also include the buffers from sublayers. Default: True.

Returns:
    list of Tensor, a list of buffers.

Examples:
    .. code-block:: python

        >>> import numpy as np
        >>> import paddle

        >>> linear = paddle.nn.Linear(10, 3)
        >>> value = np.array([0]).astype("float32")
        >>> buffer = paddle.to_tensor(value)
        >>> linear.register_buffer("buf_name", buffer, persistable=True)

        >>> print(linear.buffers())
        [Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True,
        [0.])]

rs  )r  )r   rw   r[  r  r   s        rC   buffersLayer.buffers!  sI    < "//"3 0 
	  	 
 

rw  c                
   UR                  S5      u  p#nU R                  U5      n[        XT5      (       d"  [        UR	                  5       S-   U-   S-   5      e[        XT5      nXER                  ;  a  [        SU-   S-   5      eU$ )az  
Return the buffer given by ``target`` if it exists, otherwise throw an error.

See the docstring for ``get_sublayer`` for a more detailed
explanation of this method's functionality as well as how to
correctly specify ``target``.

Parameters:
    target(str): The fully-qualified string name of the buffer to look for.

Returns:
    Tensor: The buffer referenced by ``target``.
rO  rP  rQ  z` is not a buffer)rR  rS  rT  rU  rV  rW  r  )r   rY  rZ  r[  buffer_namer]  r  s          rC   
get_bufferLayer.get_bufferE  s     '-&7&7&<#,s(( "77+EK  *ll* {!25H!HIIrE   c              #  :  #    [        5       nU(       a  U R                  USUS9O[        U/U /5      nU Ha  u  pgUR                  R	                  5       nU H<  u  pU
b  X;   a  M  U(       a  UR                  U
5        Xf(       a  SOS-   U	-   nX4v   M>     Mc     g7f)a  
Returns an iterator over all buffers in the Layer, yielding tuple of name and Tensor.

Parameters:
    prefix(str, optional): Prefix to prepend to all buffer names. Default: ''.
    include_sublayers(bool, optional): Whether include the buffers of sublayers.
        If True, also include the named buffers from sublayers. Default: True.
    remove_duplicate(bool, optional): Whether to remove duplicated buffers in the result.
        Default: True.

Yields:
    (string, Tensor): Tuple of name and tensor

Examples:
    .. code-block:: python

        >>> import numpy as np
        >>> import paddle

        >>> fc1 = paddle.nn.Linear(10, 3)
        >>> buffer1 = paddle.to_tensor(np.array([0]).astype("float32"))
        >>> # register a tensor as buffer by specific `persistable`
        >>> fc1.register_buffer("buf_name_1", buffer1, persistable=True)

        >>> fc2 = paddle.nn.Linear(3, 10)
        >>> buffer2 = paddle.to_tensor(np.array([1]).astype("float32"))
        >>> # register a buffer by assigning an attribute with Tensor.
        >>> # The `persistable` can only be False by this way.
        >>> fc2.buf_name_2 = buffer2

        >>> model = paddle.nn.Sequential(fc1, fc2)

        >>> # get all named buffers
        >>> for name, buffer in model.named_buffers():
        ...     print(name, buffer)
        0.buf_name_1 Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True,
        [0.])
        1.buf_name_2 Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True,
        [1.])
Tr  NrO  rD  )r  r  r   r  rI   r  )r   r  rw   r  buffers_setr  r  r  r  r  r  rb   s               rC   r  Layer.named_buffersc  s     ^ e !   !!1 !  fXv& 	 '6"L''--/G&>V%:#OOF+#lsCcIl"  ' '6s   BBc                z    U R                  5        H'  nUR                  (       d  M  UR                  U5        M)     g)a  
Clear the gradients of all parameters for this layer.

Args:
    set_to_zero (bool, optional): Whether to set the trainable parameters'
        gradients to zero or None. Default is True.

Returns:
    None

Examples:
    .. code-block:: python

        >>> import paddle
        >>> import numpy as np

        >>> value = np.arange(26).reshape(2, 13).astype("float32")
        >>> a = paddle.to_tensor(value)
        >>> linear = paddle.nn.Linear(13, 5)
        >>> adam = paddle.optimizer.Adam(learning_rate=0.01,
        ...                              parameters=linear.parameters())
        >>> out = linear(a)
        >>> out.backward()
        >>> adam.step()
        >>> linear.clear_gradients()

N)ru  	trainableclear_gradient)r   set_to_zerops      rC   clear_gradientsLayer.clear_gradients  s,    8 "A{{{  - #rE   c                    g r   r   )r   argskwargss      rC   _build_onceLayer._build_once  s    rE   c                  ^ ^^^^	 S m	[        5       mUUUU	U 4S jn U" 5       $ ! [         a    T R                  R                  5        H  u  pEUT R                  ;   d  M  UT;  d  M   UT R
                  ;   a  U" T TTT	5      nO
U" T TT	5      nUb  Um	MM  MO  ! [         a%  n[        R                  " SU< 35         S nAMy  S nAff = f   e f = f)Nc                   > T	R                   R                  5        H  u  pU T	R                  ;   aI  U" T	TT5      nUb:  [        U[        5      (       a  [        U5      S:X  a  Uu  mmMM  [        SU S35      eM^  U" T	T5      nUc  Ml  [        U[        5      (       d  U4nUmM     T	R                  (       d  T	R                  " T0 TD6  ST	l        [        5       (       a_  [        R                  " T	R                  R                  [        R                  R                  5         T	R                   " T0 TD6mS S S 5        O:[#        T	R                  R                  5         T	R                   " T0 TD6mS S S 5        T	R$                  R                  5        HS  u  pU T	R&                  ;   a  TR)                  U 5        U T	R*                  ;   a  U" T	TTT5      nO
U" T	TT5      nUc  MQ  UmMU     T$ ! , (       d  f       N= f! , (       d  f       N= f)N   zPforward pre-hook must return None or a tuple of (new_args, new_kwargs), but got rO  T)r
  rI   r  r   r   r:   RuntimeErrorr  r  r*   r   RecordEventr   r   TracerEventTypeForwardforwardr$   r  r  r  r  )
hook_idforward_pre_hookargs_kwargs_resulthook_resultforward_post_hookcalled_always_called_hooksrB   r  rM   r   s
        rC   inner'Layer._dygraph_call_func.<locals>.inner  s    .2-D-D-J-J-L)dFFF)9$)O&)5&'95AA #$6 71 <-?NFF".!FFXEYYZ!\#  6 #34"@K".)+u==+6.K!,' .M* ;;  &3F3"!!))NN++X-E-E-M-M #llF=f=G 
 !!8!89"llF=f=G : /3.F.F.L.L.N*dDDD.227;dGGG"3ffg#K #4D&'"JK*)G /O N/ 
 :9s   "G+G<+
G9<
H
zuforward hook with ``always_call=True`` raised an exception that was silenced as another error was raised in forward: )r  	Exceptionr  rI   r  r  r>   r?   )
r   rB   r  r  r  r  r  er  rM   s
   ```     @@rC   _dygraph_call_funcLayer._dygraph_call_func  s    %(U"5	 5	n	7N 	.2.F.F.L.L.N*tEEE%??!"d&O&OO*; $ffg+K +< $fg+K '2&1G 3$ ! YYZX]_ !!! /O. 1	s9   $ 7CC(+BC
C!C;CCCc                v   [        5       (       d  U R                  (       d  U R                  (       dw  U R                  R                  [
        R                  L d  U R                  (       a?  [        5       (       a0  [        5       (       a  [        5       (       a  U R                  " U0 UD6$ U R                  " U0 UD6$ r   )r   r
  r  r   r  r6   r  r"   r*   r   r  r  r   rB   r  s      rC   __call__Layer.__call__   s    "$$,,--++u/@/@@DKK!!%''+A+C+C<<2622**F=f==rE   c                    [         e)z
Defines the computation performed at every call.
Should be overridden by all subclasses.

Parameters:
    *inputs(tuple): unpacked tuple arguments
    **kwargs(dict): unpacked dict arguments
)NotImplementedErrorr  s      rC   r  Layer.forward-  s
     "!rE   c                    [        S5      e)Nz"Layer shouldn't implement backward)r   )r   rB   s     rC   backwardLayer.backward8  s    =>>rE   c                V    [        U[        5      (       d  Ub   eX R                  U'   U$ )a#  

Adds a sub Layer instance.

Added sublayer can be accessed by self.name

Parameters:
    name(str): name of this sublayer.
    sublayer(Layer): an instance of Layer.
Returns:
    Layer, the sublayer passed in.

Examples:
    .. code-block:: python

        >>> import paddle

        >>> class MySequential(paddle.nn.Layer):
        ...     def __init__(self, *layers):
        ...         super().__init__()
        ...         if len(layers) > 0 and isinstance(layers[0], tuple):
        ...             for name, layer in layers:
        ...                 self.add_sublayer(name, layer)
        ...         else:
        ...             for idx, layer in enumerate(layers):
        ...                 self.add_sublayer(str(idx), layer)
        ...
        ...     def forward(self, input):
        ...         for layer in self._sub_layers.values():
        ...             input = layer(input)
        ...         return input
        ...
        >>> fc1 = paddle.nn.Linear(10, 3)
        >>> fc2 = paddle.nn.Linear(3, 10, bias_attr=False)
        >>> model = MySequential(fc1, fc2)
        >>> for prefix, layer in model.named_sublayers():
        ...     print(prefix, layer)
        0 Linear(in_features=10, out_features=3, dtype=float32)
        1 Linear(in_features=3, out_features=10, dtype=float32)
)r   r6   r  )r   rb   r  s      rC   add_sublayerLayer.add_sublayer;  s0    R (E**h.>>>!)rE   c                <   US:X  a  U $ UR                  S5      nU nU Hz  n[        X45      (       d"  [        UR                  5       S-   U-   S-   5      e[	        X45      n[        U[        R                  R                  5      (       a  Mk  [        SU-   S-   5      e   U$ )z
Return the submodule given by ``target`` if it exists, otherwise throw an error.

Parameters:
    target(str): The fully-qualified string name of the submodule to look for.

Returns:
    Layer: The sublayer referenced by ``target``.
rD  rO  rP  rQ  ` is not an nn.Layer)	ri   rT  rU  rV  rW  r   rX  r   r6   )r   rY  atomsr]  items        rC   rS  Layer.get_sublayeri  s     R<K!<<,#D3%%$MMO&;;dBSH  #$Cc699??33$S4Z2H%HII  
rE   rA   r  c                   US:X  a  [        S5      eUR                  S5      n[        U[        R                  R
                  5      (       d  [        SS[        U5       3-   5      e[        U5      S:X  a  U nO%SR                  USS 5      nU R                  U5      nU(       a8  [        XTS   5      (       d%  [        UR                  5       S	-   US   -   S
-   5      e[        XTS   5      (       aK  [        XTS   5      n[        U[        R                  R
                  5      (       d  [        S
US   -   S-   5      e[        XTS   U5        g)a  
Set the sublayer given by ``target`` if it exists, otherwise throw an error.

Parameters:
    target(str): The fully-qualified string name of the sublayer to look for.
    layer(Layer): The layer to set the sublayer to.
    strict(bool): If ``False``, the method will replace an existing sublayer
        or create a new sublayer if the parent module exists. If ``True``,
        the method will only attempt to replace an existing sublayer and throw an error
        if the sublayer doesn't already exist.
rD  z.Cannot set the sublayer without a target name!rO  z`modulez` is not an nn.Layer, found rg   Nr   rP  rQ  r  )r   ri   r   rX  r   r6   rx   r:   rm   rS  rT  rU  rV  rW  setattr)r   rY  rA   strictr  parent
parent_keyr]  s           rC   set_sublayerLayer.set_sublayer  s*    R<MNN!<<,%11#?U}!MM  u:?&*F%*-J&&z2F'&)44   "%::U2YFL  69%%&),Cc699??33$S59_7M%MNNb	5)rE   c                &    U R                  X5        g)z
Adds a sub layer instance. Added layer can be accessed by self.name

Parameters:
    name(str): name of this sublayer.
    layer(Layer): an instance of Layer.
Returns:
    None
N)r  )r   rb   r  s      rC   
add_moduleLayer.add_module  s     	$'rE   c                &   SU R                   ;  a  [        S5      e[        U[        5      (       d"  [	        S[        U5      R                   S35      eSU;   a  [        S5      eUS:X  a  [        S5      e[        X5      (       a  XR                  ;  a  [        SU S	35      eUb[  [        U[        R                  [        R                  R                  45      (       d"  [	        S[        U5      R                   S35      eUc  S
U R                  U'   [        U R                   5      S:  aW  UR"                  U R                   ;   d   SUR"                   S35       eUR%                  U R                   UR"                     5        X R                  U'   U$ )a   Adds a Parameter instance.

Added parameter can be accessed by self.name

Parameters:
    name(str): name of this sublayer.
    parameter(Parameter): an instance of Parameter.
Returns:
    Parameter, the parameter passed in.
Examples:
    .. code-block:: python

        >>> import paddle
        >>> paddle.seed(100)

        >>> class MyLayer(paddle.nn.Layer):
        ...     def __init__(self):
        ...         super().__init__()
        ...         self._linear = paddle.nn.Linear(1, 1)
        ...         w_tmp = self.create_parameter([1,1])
        ...         self.add_parameter("w_tmp", w_tmp)
        ...
        ...     def forward(self, input):
        ...         return self._linear(input)
        ...
        >>> mylayer = MyLayer()
        >>> for name, param in mylayer.named_parameters():
        ...     print(name, param)
        w_tmp Parameter containing:
        Tensor(shape=[1, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
        [[-1.01448846]])
        _linear.weight Parameter containing:
        Tensor(shape=[1, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
        [[0.18551230]])
        _linear.bias Parameter containing:
        Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=False,
        [0.])
r  z,super().__init__() should be called firstly.z7The name of parameter should be a string, but received rO  zThe name of parameter can not contain `.`, because when you access the newly added parameter in the form of `self.**.**`, it will cause AttributeError.rD  z'The name of parameter can not be empty.zThe parameter 'r  Nz>The parameter to be added should be a Parameter, but received r   &Parameter not found, Can't not find [  ] in state_dict)r  r  r   rl   r   rx   r   r  rT  r  r   r   rX  pirValuer:   r	  rb   	set_value)r   rb   	parameters      rC   add_parameterLayer.add_parameter  s   N -MNND#&&I$t*J]J]I^^_`  D[F 
 RZDEET  T1A1A%A_TF2CDEE":	++VZZ-=-=>,
 ,
 PQUV_Q`QiQiPjjkl   )-  &4(()A- ~~)>)>> <Y^^<LL\]> ##D$9$9)..$IJ%.T"rE   c                &    U R                  X5        g)z
Adds a Parameter instance. Added parameter can be accessed by self.name

Parameters:
    name(str): name of this submodule.
    parameter(Optional[Parameter]): an instance of Parameter.
Returns:
    None
N)r  r   s      rC   register_parameterLayer.register_parameter	  s     	4'rE   c                  ^  U 4S jn[        U[        5      (       d!  [        S[        U5      R                   35      eT R
                  R                  U5        U" SS9(       dL  T R                  [        5      n[        T R                  R                  5      S:X  d   eU/T R                  l        U" SS9(       d  T R                  [        5      n[        T R                  5      S:  a$  T R                  R                  UR                   SS9  [        T R                  R                  5      S:X  d   eT R                  R                  R#                  U5        g	g	)
a)  
Add customized attribute while append_op. In case of quantization, we want to save
some attributes into op_desc while exporting inference model by @to_static.

Arguments:
    attrs(dict): customized attributes that will be added into op_descs.

NOTE: The interface is only exposed to developers.
c                   > U (       a  TR                   OTR                  nU (       a  [        O[        nSnU(       a  [	        [        U5      5      nX   U:H  nU$ )NF)r
  r  rD   rR   nextreversed)is_pre_hooklayers_hookscandidate_hookalready_registeredlast_keyr   s        rC   is_already_registered2Layer._set_op_attrs.<locals>.is_already_registered   sa      ''--   ,6  "' 67%1%;~%M"%%rE   z)attrs should be type(dict), but received T)r  r   Frg   r9  N)r   r  r   rx   r   rH   r  rA  rD   r:   r8   rK   r>  rR   r  r;  r   rk   )r   r   r  pre_hook_helperpost_hook_helpers   `    rC   _set_op_attrsLayer._set_op_attrs  s6   	&& %&&;DK<P<P;QR 
 	%%e,$6"<<+O t((../1444'6&7D# %7#>>1  4++,q0((44$--E 5  t((../1444 ##**+;< 8rE   c                    U R                   $ r   )r  r  s    rC   __getstate__Layer.__getstate__Q  s    }}rE   c                :    U R                   R                  U5        g r   )r  r  )r   states     rC   __setstate__Layer.__setstate__T  s    U#rE   c                   SU R                   ;   aT  U R                   S   nXR                  ;   a6  [        5       (       a  [        U R                  U   5      $ U R                  U   $ SU R                   ;   a-  U R                   S   nXR                  ;   a  U R                  U   $ SU R                   ;   a4  U R                   S   nX;   a   [        5       (       a  [        XA   5      $ XA   $ [
        R                  X5      $ )Nr  r  r  )r  r  r   r   r  object__getattribute__)r   rb   r  r  r  s        rC   __getattr__Layer.__getattr__W  s    DMM)--6K'''$&&1$2B2B42HII''--DMM)--6K'''''--&}}Z0H$&&1(.AA~%&&t22rE   c           	     	  ^ U4S jn[        U[        R                  R                  R                  R
                  5      (       a  [        R                  U TU5        TUl        g [        [        [        U 5      TS 5      [        5      (       a  [        R                  U TU5        U R                  R                  SS 5      n[        U[        R                  5      (       a  Uc  [!        S5      e[#        U R$                  5      S:  aW  UR&                  U R$                  ;   d   SUR&                   S35       eUR)                  U R$                  UR&                     5        U" U R                  U R*                  U R,                  5        X$T'   g [        U[        R.                  R0                  5      (       a]  UR3                  5       R'                  5       S:X  a;  Uc  [!        S5      eU" U R                  U R*                  U R,                  5        X$T'   g Ub4  TU;   a.  Ub%  [5        ST S	[        U5      R6                   S
35      eS UT'   g U R                  R                  SS 5      n[        U[8        5      (       a;  Uc  [!        S5      eU" U R                  U R:                  U R*                  5        X%T'   g Ub4  TU;   a.  Ub%  [5        ST S[        U5      R6                   S
35      eS UT'   g U R                  R                  SS 5      n[        U[<        R>                  R@                  5      (       a  Uc  [!        S5      eU" U R                  U R:                  U R,                  5        TU R*                  ;  a  U RB                  RE                  T5        UR&                  (       d  [F        RH                  " ST-   5      Ul        X&T'   g UGb  TU;   a  [        U5      [        RJ                  :X  d)  [        U[        R.                  R0                  5      (       a  SSKJ&n  [O        5       (       a  UT   c  [Q        ST ST ST S35      eUT   b1  [        [        U T5      5      [<        R>                  R@                  :X  a  U" U5      UT'   g U" U[        U T5      5        g Ub%  [5        ST S[        U5      R6                   S
35      eS UT'   g [        R                  U TU5        g )Nc                 .   > U  H  nTU;   d  M  UT	 M     g r   r   )dictsr   rb   s     rC   _remove_if_exist+Layer.__setattr__.<locals>._remove_if_existk  s    19$ rE   r  r  r   r  r  zbuiltin.parameterzassignment to parameter 'z0' should be of type Parameter or None, but got ''r  zassignment to sublayer 'z,' should be of type Layer or None, but got 'r  	_buffers_)assignzIn Dy2stat, self.z is a buffer and self.z0 is not allowed to be set to Variable when self.z	 is None.zassignment to buffers 'z7' should be of type core.DenseTensor or None, but got '))r   rX  jit	dy2staticprogram_translatorStaticFunctionr$  __setattr___patched_namerW  rx   propertyr  rY   r   r   r   r:   r	  rb   r  r  r  r  r  get_defining_opr   r   r6   r  r   r  r   r  r  r   r   Variabler/  r   r  )r   rb   r  r+  r  layersr  r/  s    `      rC   r4  Layer.__setattr__j  sm   	 
 6::''::II
 
 tT51"&Egd4j$5x@@tT51""=$7eY0011~ !LMM4(()A-zzT%:%:: <UZZLHXY:  5 5ejj ABT]]DMM4;K;KL 4Lufjj..//%%',,.2EE~ !LMMT]]DMM4;K;KL 4LDFN /v5efjkpfqfzfze{{|}   F4L]]&&}d;F%''>$C  !0@0@$--P$t#$#24&8deijoepeyeydzz{|   $t==,,Z>eTZZ%6%677'(G  %t'7'79I9I
 4==0>>BB4H ::%0%9%9+:L%M
%*TN)dh.> E{i&8&88Jvzz//= = 2 -..8D>3I"."3D69OPTv VOOSfT]!_# 
 %TN2#GD$$78DJJ<M<MM-3E]HTN"5'$*=>*'5dV;rswx}s~  tH  tH  sI  IJ  K  *.&&tT59rE   c                   XR                   ;   a  U R                   U	 g XR                  ;   a  U R                  U	 g XR                  ;   a)  U R                  U	 U R                  R	                  U5        g [
        R                  X5        g r   )r  r  r  r  r  r$  __delattr__)r   rb   s     rC   r<  Layer.__delattr__  sn    ###  &%%%  &]]"d#22::4@t*rE   c                b   [        U R                  5      n[        U R                  R	                  5       5      n[        U R
                  R	                  5       5      n[        U R                  R	                  5       5      n[        U R                  R	                  5       5      nX-   U-   U-   U-   nU$ )a$  
Return a list. Get all parameters, buffers(non-parameter tensors), sublayers, method and attr of Layer.

Examples:
    .. code-block:: python
        >>> import paddle
        >>> import numpy as np

        >>> class Mylayer(paddle.nn.Layer):
        ...     def __init__(self):
        ...         super().__init__()
        ...         self.linear1 = paddle.nn.Linear(10, 10)
        ...         self.linear2 = paddle.nn.Linear(5, 5)
        ...         self.conv2d = paddle.nn.Conv2D(3, 2, 3)
        ...         self.embedding = paddle.nn.Embedding(128, 16)
        ...         self.h_0 = paddle.to_tensor(np.zeros([10, 10]).astype('float32'))
        ...
        >>> mylayer = Mylayer()
        >>> print(dir(mylayer))
        ['__call__', '__class__', '__delattr__', '__dict__', ..., 'training']
)dirr   r   r  keysr  r  r  )r   methodr   ru  r&  r  r@  s          rC   __dir__Layer.__dir__  s    , T^^$T]]'')*$**//12
))..01	t}}))+,~
*Y6@rE   c                    g)z[
Extra representation of this layer, you can have custom implementation
of your own layer.
rD  r   r  s    rC   
extra_reprLayer.extra_repr	  s    
 rE   c                   / nU R                  5       nUR                  S5      n/ nU R                  R                  5        H6  u  pE[	        U5      n[        US5      nUR                  SU-   S-   U-   5        M8     U R                  R                  S-   nU(       a@  [        U5      S:  a  USSR                  U5      -   S-   -  nO[        U5      S:X  a  XqS   -  nU(       a  USSR                  U5      -   S-   -  nUS-  nU$ )	Nrf   r  (z): rg   z
  r   ))rE  ri   r  rI   reprrs   rk   r   r   r:   rm   )r   extra_linesrE  sublayer_linesrb   rA   sublayer_str	final_strs           rC   r   Layer.__repr__
	  s    __&
 &&t,++113KD;L%lA6L!!#*u"4|"CD 4
 NN++c1	;!#Vfkk+&>>EE	[!Q&^+	&++n"==DDIS	rE   c                `    [        U R                  5      nXR                  UR                  '   U$ r   )r   r  r   )r   r<  r=  s      rC   register_state_dict_hookLayer.register_state_dict_hook 	  s0     .d.D.DE>B1::;!!rE   c                   Uc
  [        5       nU R                  R                  5        H  u  pEUc  M
  XQX4-   '   M     U R                  R                  5        H!  u  pFUc  M
  X@R                  ;  d  M  XaX4-   '   M#     U(       a@  U R
                  R                  5        H"  u  pxUc  M
  UR                  UUX7-   S-   5        M$     U$ )z
The difference from state_dict() is that state_dict_hook will not be called,
but the original types of parameters and buffers will be maintained.
rO  )r   r  rI   r  r  r  _obtain_parameters_buffers)	r   destinationrw   structured_name_prefixrb   datar  
layer_name
layer_items	            rC   rT   Layer._obtain_parameters_buffers'	  s     %-K**002JD=A29: 3 !MM//1LD" F FF=C29: 2 *.*:*:*@*@*B&
)99#).;cA +C rE   c           	        Uc
  [        5       nU R                  R                  5        H(  u  pxUc  M
  U(       a  UOUR                  5       XU-   '   M*     U R                  R                  5        He  u  pyU(       d6  U	b1  XpR
                  ;  a   U(       a  U	OU	R                  5       XU-   '   M>  M@  MB  U	c  MG  U(       a  U	OU	R                  5       XU-   '   Mg     U(       aC  U R                  R                  5        H%  u  pUc  M
  UR                  UUX:-   S-   UUU5        M'     U(       a0  U R                  R                  5        H  nU" U5      nUc  M  UnM     U$ )a<  
Get all parameters and persistable buffers of current layer and its sub-layers. And set them into a dict

Parameters:
    destination(dict, optional) : If provide, all the parameters and persistable buffers will be set to this dict . Default: None.
    include_sublayers(bool, optional) : If true, also include the parameters and persistable buffers from sublayers. Default: True.
    include_non_persistable_buffer(bool, optional): If true, include non persistable buffers of current layer and its sub-layers, it is used in pure fp16 and jit.save. Default: False.
    use_hook(bool, optional) : If true, the operations contained in _state_dict_hooks will be appended to the destination. Default: True.
    keep_vars(bool, optional) : If false, the returned tensors in the state dict are detached from autograd. Default: True.
rO  )
r   r  rI   detachr  r  r  _state_dict_implr  values)r   rU  rw   rV  include_non_persistable_bufferuse_hook	keep_varsrb   rW  r  rX  rY  state_dict_hookr  s                 rC   r]  Layer._state_dict_implH	  sQ   ( %-K**002JD%D4;;= T9: 3
 !MM//1LD1&$J$JJ #,   => K ' %"+   => 2 *.*:*:*@*@*B&
)//#).;cA6 ! +C #'#9#9#@#@#B-k:*"-K $C
 rE   c           	     *    U R                  UUUSUUS9$ )a  

Get all parameters and buffers of current layer and its sub-layers. And set them into a dict

Parameters:
    destination(dict, optional) : If provide, all the parameters and persistable buffers will be set to this dict . Default: None.
    include_sublayers(bool, optional) : If true, also include the parameters and persistable buffers from sublayers. Default: True.
    use_hook(bool, optional) : If true, the operations contained in _state_dict_hooks will be appended to the destination. Default: True.
    keep_vars(bool, optional) : If false, the returned tensors in the state dict are detached from autograd. Default: True.

Returns:
    dict, a dict contains all the parameters and persistable buffers.

Examples:
    .. code-block:: python

        >>> import paddle

        >>> emb = paddle.nn.Embedding(10, 10)

        >>> state_dict = emb.to_static_state_dict()
        >>> paddle.save( state_dict, "paddle_dy.pdparams")

TrU  rw   rV  r_  r`  ra  )r]  r   rU  rw   rV  r`  ra  s         rC   to_static_state_dictLayer.to_static_state_dict	  s/    @ $$#/#9+/ % 
 	
rE   c                    g r   r   rf  s         rC   
state_dictLayer.state_dict	  s     rE   .)r  ra  c                   g r   r   )r   rU  r  ra  s       rC   rj  rk  	  s     rE   c                   g r   r   )r   r  ra  s      rC   rj  rk  	  s     rE   rU  r  ra  c                   g r   r   )r   rU  r  ra  r  s        rC   rj  rk  	  s     rE   c                  ^ [        U5      nSU4S jjnUS:  a  [        US   [        5      (       d  ST;   az  / SQn[        [	        U[        U5      5      5       H  nU" XV   X   5        M     U R                  TR                  SS5      STR                  SS	5      S
STR                  SS
5      S9$ U R
                  " U0 TD6$ )a  
Get all parameters and persistable buffers of current layer and its sub-layers. And set them into a dict

Parameters:
    destination(dict, optional) : If provide, all the parameters and persistable buffers will be set to this dict . Default: None.
    include_sublayers(bool, optional) : If true, also include the parameters and persistable buffers from sublayers. Default: True.
    use_hook(bool, optional) : If true, the operations contained in _state_dict_hooks will be appended to the destination. Default: True.
    keep_vars(bool, optional) : If false, the returned tensors in the state dict are detached from autograd. Default: True.

Returns:
    dict: a dict contains all the parameters and persistable buffers.

Examples:
    .. code-block:: python

        >>> import paddle

        >>> emb = paddle.nn.Embedding(10, 10)

        >>> state_dict = emb.state_dict()
        >>> paddle.save(state_dict, "paddle_dy.pdparams")

c                :   > U T;   a  [        SU  S35      eUTU '   g )Nz"got multiple values for argument 'r-  )r   )r  r  r  s     rC   safe_set_param(Layer.state_dict.<locals>.safe_set_param	  s(    f}"DSE KLLF3KrE   r  rg   r  rn  rU  NTrD  Fra  re  )r  rl   r  r   )r:   r   rl   r   minr]  rY   )r   r  r  len_argsrr  base_param_keysrq   s     `    rC   rj  rk  	  s    0 t9	  Mja#66DOS3+?@A3TY? B (("JJ}d;"&'-zz(B'?/4 **[%8 )   $$d5f55rE   c                    0 nU R                  SSS9n[        USUS9nUR                  U5        U R                  R	                  5        H/  u  pVUc  M
  UR                  U U S3S9nUR                  U5        M1     U$ )aP  Recursively builds a sharded state dictionary for the model and its sub-layers.

Args:
    structured_name_prefix: Prefix to prepend to all tensor names for hierarchical naming.

Returns:
    Dictionary mapping tensor names to ShardedWeight.
    The dictionary contains both the current layer's parameters and all sub-layer parameters.
rD  F)rV  rw   N)rj  shard_rulesr  rO  )rV  )rj  r(   r  r  rI   sharded_state_dict)r   rV  ry  rj  current_sharded_dictrX  rY  sub_shardeds           rC   ry  Layer.sharded_state_dict 
  s      __#%# % 

  8!) 

 	!!"67 '+&6&6&<&<&>"J%(;;.D-Ej\QR+S <  #))+6 '? "!rE   c                >    SSK Jn  U" U R                  5       U40 UD6$ )a  
Returns an iterator over the full, unsharded model parameters.
The output parameters can be customized using the `aoa_config` argument.

Args:
sharded_state_dict (ShardedStateDict):
    The state dict containing parameter shards local to the current process.
aoa_config (dict[str, list[str]] | None, optional):
    AoA (Almost AllReduce) configuration. Default is None.
kwargs:
    Optional keyword arguments:
    - h_group: The horizontal communication group.
        If using group communication, both h_group and v_group must be provided.
    - v_group: The vertical communication group.
    - process_group: The communication group in single-group setups (when h_group and v_group are not used).
    - num_splits (int): The number of splits to divide the parameters.
    - shard_idx (int): The index of the split handled by the current process. Default is 0.
    - memory_growth_threshold (int): The memory threshold (in bytes) for controlling memory growth during parameter assembly.
        Default is 8 * (2 ** 30), i.e., 8GB.

Returns:
    Iterator:
        An iterator over the full, unsharded model parameters, optionally filtered and customized according to `aoa_config`.

r   )
full_param)1paddle.distributed.flex_checkpoint.dcp.full_paramr~  ry  )r   
aoa_configr  r~  s       rC   full
Layer.full&
  s$    >	
 $113ZJ6JJrE   c                  ^^^ / m[        5       m/ nUUU4S jn/ nU R                  SS9R                  5        H4  u  pgU(       a  UOUR                  n U" X5      n	UR	                  U	5        M6     TR                  5        H  nUT;  d  M  UR	                  U5        M     [        5       (       a   U H  u  p{UR                  U5        M     TU4$ S n [        5       (       a  [        [        R                  R                   R#                  5       5      R$                  n[        R                  R&                  R(                  R+                  U VVs/ s H  u  p{UPM	     snn[-        5       U5        OR[        [/        5       5      R$                  n[0        R2                  " U VVs/ s H  u  p{UPM	     snn[-        5       U5        U H  u  p{U" X{5        M     TU4$ ! [
         a2  n
[        R                  " SU S3[        U
5      -   5         Sn
A
GM  Sn
A
ff = fs  snnf s  snnf ! [
         a  n[        S5      eSnAf[4         a  n[        S5      eSnAff = f)	as  
Set parameters and persistable buffers from state_dict. All the parameters and buffers will be reset by the tensor in the state_dict

Parameters:
    state_dict(dict) : Dict contains all the parameters and persistable buffers.
    use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter or buffer name as key.
                                          Default: True.
Returns:
    missing_keys(list):A list of str containing the missing keys
    unexpected_keys(list):A list of str containing the unexpected keys

Examples:
    .. code-block:: python

        >>> import paddle

        >>> emb = paddle.nn.Embedding(10, 10)

        >>> state_dict = emb.state_dict()
        >>> paddle.save(state_dict, "paddle_dy.pdparams")
        >>> para_state_dict = paddle.load("paddle_dy.pdparams")
        >>> emb.set_state_dict(para_state_dict)

c           	       > TR                  U S 5      nUc  TR                  U 5        [        U  S35      e[        U[        [
        45      (       ab  [        U5      [        U5      :w  a6  TR                  U 5        [        U  S[        U5       S[        U5       35      eTR                  U 5        X4$ [        R                  " UR                  5      (       a  UR                  5       OUR                  n[        U5      [        UR                  5      :w  aA  TR                  U 5        [        U  S[        U5       S[        UR                  5       S35      eTR                  U 5        X4$ )Nz# is not found in the provided dict.z receives the length of z, but the expected shape is z receives a shape rO  )rY   rk   r   r   r  r   r:   r  inspectismethodrH  )r  r   r   state_shape
match_keysr   rj  s       rC   _check_match*Layer.set_state_dict.<locals>._check_matchm
  sT   NN3-E}##C( C5(K!LMM%$..u:U+ '',$%7E
| D558ZLB 
 NN3' <' ''44 KKM  $U[[(99 '',$%1${2C1DD`aefkfqfqar`sstu  s#|#rE   F)r`  zSkip loading for z. Nc                   [        5       R                  U R                  5      R                  5       nUR	                  5       nUR                  5       (       a  [        R                  " 5       nGOUR                  5       (       a  [        R                  " 5       nGOcUR                  5       (       aY  [        R                  " 5       nUR                  UR	                  5       5        [        R                  " UR                  5       5      nOUR                  5       (       a  [        R                  " 5       nUR                  UR	                  5       5        [        R                   " ["        R$                  R'                  5       R)                  S5      S   UR+                  5       5      nOX[        R                  " 5       nUR                  UR	                  5       5        [        R,                  " UR/                  5       5      nUR1                  X5        g )N:r   )r   find_varrb   
get_tensor_placeis_cpu_placer   CPUPlaceis_cuda_pinned_placeCUDAPinnedPlaceis_xpu_placePlace	set_placeXPUPlacexpu_device_idis_custom_placeCustomPlacerX  rF  
get_deviceri   custom_device_id	CUDAPlacegpu_device_idr  )r   ndarraytr  places        rC   _set_var&Layer.set_state_dict.<locals>._set_var
  sH    N++CHH5@@BHHJ>>## MMOE++-- 002E^^%%

AKK
+ MM!//*;<E&&((

AKK
+ ,,00288=a@**,E
 

AKK
+ NN1??+<=Eg%rE   zThis error might happens in dy2static, while calling 'set_state_dict' dynamically in 'forward', which is not supported. If you only need call 'set_state_dict' once, move it to '__init__'.)r  r]  rI   rb   rk   r   r>   r?   rl   r@  r"   r  r#   r   rX  baser   _current_expected_place__default_executor	libpaddler  create_loaded_parameterr   _get_devicer   _create_loaded_parameterr   )r   rj  use_structured_namer   r  matched_param_stater  r   key_name	match_reserrr   r  executorr  r  r   s    `             @@rC   set_state_dictLayer.set_state_dictK
  s.   < U
	$< !///?EEGJC1suzzHF(9	#**95	 H ??$C*$&&s+ %  3& !4r _,,k&0=='--FFH ''  KK))--EE3FG3F<53FG$   (6HHH113FG3F<53FG$ 
 %8LEU* %8 _,,  F 1#b9CHDEEFR H H    R     R s\   GA<H% H
AH% #H
1%H% 
H$&HHH% %
I/H::IIIc           	         / nU R                  USS9u  pVU(       a  [        U5      S:  a7  UR                  SSR                  SR	                  S U 5       5      5      5        [        U5      S:  a7  UR                  SSR                  SR	                  S U 5       5      5      5        [        U5      S:  a>  [        S	R                  U R                  R                  S
R	                  U5      5      5      e[        XV5      $ )aD  
Copy parameters and buffers from :attr:`state_dict` into this module and its descendants.

If :attr:`strict` is ``True``, then
the keys of :attr:`state_dict` must exactly match the keys returned
by this module's :meth:`~torch.nn.Module.state_dict` function.


Parameters:
    state_dict (dict): a dict containing parameters and persistent buffers.
    strict (bool, optional): whether to strictly enforce that the keys
        in :attr:`state_dict` match the keys returned by this module's
        :meth:`~torch.nn.Module.state_dict` function. Default: ``True``
    assign (bool, optional): When set to ``False``, the properties of the tensors
        in the current module are preserved whereas setting it to ``True`` preserves
        properties of the Tensors in the state dict. The only
        exception is the ``requires_grad`` field of :class:`~torch.nn.Parameter`
        for which the value from the module is preserved. Default: ``False``

Returns:
    ``NamedTuple`` with ``missing_keys`` and ``unexpected_keys`` fields:
        * ``missing_keys`` is a list of str containing any keys that are expected
            by this module but missing from the provided ``state_dict``.
        * ``unexpected_keys`` is a list of str containing the keys that are not
            expected by this module but present in the provided ``state_dict``.
T)r  r   z%Unexpected key(s) in state_dict: {}. z, c              3  .   #    U  H  nS U S 3v   M     g7f"Nr   r   ks     rC   r   (Layer.load_state_dict.<locals>.<genexpr>
  s     !DOqAaS(O   z"Missing key(s) in state_dict: {}. c              3  .   #    U  H  nS U S 3v   M     g7fr  r   r  s     rC   r   r    s     !ALqAaS(Lr  z*Error(s) in loading state_dict for {}:
	{}z
	)	r  r:   insertformatrm   r  r   r   r}   )r   rj  r  r/  
error_msgsr   r   s          rC   load_state_dictLayer.load_state_dict
  s    @ !#
(,(;(;D )< )
% ?#a'!!;BB		!DO!DD < 1$!!8??		!AL!AA z?Q>EENN++V[[-D 
 !??rE   c           	     *    U R                  UUUUSSS9$ )as	  
Cast the parameters and buffers of Layer by the give device, dtype and blocking.

Parameters:
    device(str|paddle.CPUPlace()|paddle.CUDAPlace()|paddle.CUDAPinnedPlace()|paddle.XPUPlace()|None, optional): The device of the Layer which want to be stored.
    If None, the device is the same with the original Tensor. If device is string, it can be ``cpu``, ``gpu:x`` and ``xpu:x``, where ``x`` is the
    index of the GPUs or XPUs. Default: None.

    dtype(str|numpy.dtype|paddle.dtype|None, optional): The type of the data. If None, the dtype is the same with the original Tensor. Default: None.

    blocking(bool|None, optional): If False and the source is in pinned memory, the copy will be
      asynchronous with respect to the host. Otherwise, the argument has no effect. If None, the blocking is set True. Default: None.

    non_blocking(bool|None, optional): If True and the source is in pinned memory, the copy will be
      asynchronous with respect to the host. Otherwise, the argument has no effect. If None, the non_blocking is set False. Default: None.

Returns:
    self

Examples:
    .. code-block:: python

        >>> import paddle
        >>> paddle.seed(2023)

        >>> linear=paddle.nn.Linear(2, 2)
        >>> linear.weight
        >>> print(linear.weight)
        Parameter containing:
        Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
        [[ 0.89611185,  0.04935038],
         [-0.58883440,  0.99266374]])

        >>> linear.to(dtype='float64')
        >>> linear.weight
        >>> print(linear.weight)
        Parameter containing:
        Tensor(shape=[2, 2], dtype=float64, place=Place(gpu:0), stop_gradient=False,
        [[ 0.89611185,  0.04935038],
         [-0.58883440,  0.99266374]])

        >>> linear.to(device='cpu')
        >>> linear.weight
        >>> print(linear.weight)
        Parameter containing:
        Tensor(shape=[2, 2], dtype=float64, place=Place(cpu), stop_gradient=False,
        [[ 0.89611185,  0.04935038],
         [-0.58883440,  0.99266374]])

        >>> # doctest: +REQUIRES(env:GPU)
        >>> linear.to(device=paddle.CUDAPinnedPlace(), blocking=False)
        >>> linear.weight
        >>> print(linear.weight)
        Parameter containing:
        Tensor(shape=[2, 2], dtype=float64, place=Place(gpu_pinned), stop_gradient=False,
        [[ 0.89611185,  0.04935038],
         [-0.58883440,  0.99266374]])

TF)rF  r   blockingnon_blockingrw   rv   ry   )r   rF  r   r  r  s        rC   r  Layer.to  s-    D }}%"  
 	
rE   c                0   U(       a*  U R                  5        H  nUR                  XX4U5        M     U R                  R                  5        H`  u  pxUc  M
  [	        5          U" XX45      n	S S S 5        UR
                  c  M5  [	        5          U" UR                  5       X#U5      n
S S S 5        Mb     U R                  R                  5        H   u  p{Uc  M
  U" XX45      U R                  U'   M"     X0l        g ! , (       d  f       N= f! , (       d  f       M  = fr   )	r.  _applyr  rI   r   grad
_grad_ivarr  r  )r   funcrF  r   r  rw   rA   r  r   param_appliedgrad_appliedbufs               rC   r  Layer._apply[  s     T5<MN ) **002JC Y$($HM  ::) '+!,,.x( # 3 ++-HC%)#u%Gc" .  Y #s    
C5D5
D	
D	c                   Uc  UR                   nUc  UR                  n[        U[        R                  [
        R                  45      (       d  [        U5      nUR                   R                  5       (       a  [        U[
        R                  5      (       a	  [        U   OUn[
        R                  " U5      n[        R                  " UR                  5      U-  S-  S-   S-  S-  n[
        R                  " 5       nX:  aR  UR                  [         R"                  " 5       U5      n	UR%                  5       R'                  5       R)                  5         OUn	OUn	UbY  X9R                  :w  aJ  [         R*                  R,                  R/                  U	R                   S9   U	R1                  US9n
S S S 5        OU	n
Ub2  W
R                   R3                  U5      (       d  U
R                  X$5      nOW
nUR%                  5       R'                  5       nUR%                  5       R'                  5       nUR5                  5       (       a  UR7                  U5        U$ UR9                  U5        U$ ! , (       d  f       N= f)N   rg   g333333?)r  r   )r  r   r   r   rh  r   DataTyper    is_gpu_placer%   size_of_dtyper  prodrH  gpu_memory_available_copy_torX  r  r  r  _clearr  r   _dygraph_place_guardcast_equals_is_initialized_share_data_with_share_data_nocheck_with)r   r  rF  r   r  proto_dtype
size_dtypewaiting_alloc_memoryr  t_usedt_castednew_t
dst_tensor
src_tensors                 rC   
_transformLayer._transformz  s    >WWF=GGE%'//4==!ABB.u5E 77!! eT]]33 *%0 
 ++K8J ''!''"Z/36:cACG ! $(#<#<#> #:OO%x 	$$&--/F ,,!6&&;;&,,;O!;;U;3 PO H hnn&<&<V&D&D%%f7EE WWY))+
[[]--/
''
3
  //
;) POs   I$$
I2c                  ^ ^ Uc  Uc  Uc  Uc  T $ Ubv  [        U[        5      (       a   [        R                  R	                  U5      nOA[        U[
        R                  5      (       a  O![        S[        U5      R                   35      eUc  SnO[        U[        5      (       d   S5       eUc  SnO[        U[        5      (       d   S5       eU(       a  U(       a  SOSnUU 4S jn[        R                  " 5          [        R                  " S[        S	9  T R                  XqX#U5        SSS5        UT l        T $ ! , (       d  f       N= f)
a  
Cast the parameters and buffers of Layer by the give device, dtype and blocking.

Parameters:
    device(str|paddle.CPUPlace()|paddle.CUDAPlace()|paddle.CUDAPinnedPlace()|paddle.XPUPlace()|None, optional): The device of the Layer which want to be stored.
    If None, the device is the same with the original Tensor. If device is string, it can be ``cpu``, ``gpu:x`` and ``xpu:x``, where ``x`` is the
    index of the GPUs or XPUs. Default: None.

    dtype(str|numpy.dtype|paddle.dtype|None, optional): The type of the data. If None, the dtype is the same with the original Tensor. Default: None.

    blocking(bool|None, optional): If False and the source is in pinned memory, the copy will be
      asynchronous with respect to the host. Otherwise, the argument has no effect. If None, the blocking is set True. Default: None.

    non_blocking(bool|None, optional): If True and the source is in pinned memory, the copy will be
      asynchronous with respect to the host. Otherwise, the argument has no effect. If None, the non_blocking is set False. Default: None.

    include_sublayers(bool, optional): If True, deal with self and all sublayers parameters and buffers, if not only deal with self parameters and buffers. Default: True.

    floating_only(bool, optional): If True, only cast all floating point parameters and buffers of Layer by the give device, dtype and blocking.

Returns:
    self

Nzdevice should be type of str, paddle.CPUPlace, paddle.CUDAPlace, paddle.CUDAPinnedPlace, paddle.XPUPlace, or paddle.base.libpaddle.Place, but got Tz5blocking value error, must be the True, False or NoneFz9non_blocking value error, must be the True, False or Nonec                p   > T(       a  [         R                  " U 5      (       d  U $ TR                  XX#5      $ r   )rX  is_floating_pointr  )r  rF  r   r  rv   r   s       rC   	transform!Layer._to_impl.<locals>.transform  s,    f&>&>q&A&A??1e>>rE   ignore)category)r   rl   rX  rF  _convert_to_placer   r  r   rx   r   r   r>   catch_warningsfilterwarningsUserWarningr  r  )r   rF  r   r  r  rw   rv   r  s   `     ` rC   ry   Layer._to_impl  sX   F N $K&#&&88@

    i  jn  ou  jv  j  j  i@  A  Hh-- G-  LlD11 K1 !)L5d	?
 $$&##H{CKK	5<MN '  '&s   <-D::
Ec                   [         R                  R                  5       n[         R                  R                  5       n[         R                  R                  X!5         U R	                  5        H"  nUR                  UR                  5       5        M$     SSS5        [         R                  R                  5       (       a  U$ U$ ! , (       d  f       N5= f)z
Return startup program containing initialization operations of all parameters.

NOTE(dev): This is a very low level API and only for inner developer.
N)	rX  r  r   program_guardru  _create_init_opr   r   use_pir_api)r   startup_programr   r   s       rC   _startup_programLayer._startup_program  s     !++--/{{**,[[&&|E*%%o&B&B&DE + F ''))"" FEs   7C
Cc                t  ^ / SQn[        T[        R                  [        R                  45      (       d  [	        T5      [
        L aX  TU;   aR  [        T[
        [        R                  45      (       a  [        R                  " T5      mU4S jnU R                  U5      $ [        S[        [        5      -   5      e)aN  
Casts all parameters and buffers to :attr:`dst_type`.

Parameters:
    dtype(str|paddle.dtype): target data type of layer.
        If set str, it can be "bool", "bfloat16", "float16", "float32", "float64",
        "int8", "int16", "int32", "int64", "uint8", "complex64", "complex128".
        Default: None

Returns:
    Layer: self
ry  c                (   > U R                  TSSS9  g )NFTru   r  )rA   dst_types    rC   layer_transLayer.type.<locals>.layer_transE  s    "%4  rE   r  )
r   rX  r   r  rx   rl   r   r    r/  r   )r   r  r  r  s    `  rC   rx   
Layer.type"  s    
 x&,,!9::H~$L((S"((O44$??I
 ::k** Te* rE   c                @    U R                  [        R                  5      $ )zd
Casts all floating point parameters and buffers to ``double`` datatype.

Returns:
    Module: self
)rx   rX  r}  r  s    rC   doubleLayer.doubleQ       yy((rE   c                @    U R                  [        R                  5      $ )zb
Casts all floating point parameters and buffers to ``half`` datatype.

Returns:
    Module: self
)rx   rX  r{  r  s    rC   half
Layer.halfZ  r  rE   c                  ^ Tc  / OTm[        T[        5      (       a  T/mOI[        T[        [        45      (       a  [        T5      mO"[	        S[        T5      R
                   S35      eU4S jnU R                  U5      $ )a  
Casts all floating point parameters and buffers to ``float`` data type.

Parameters:
    excluded_layers(nn.Layer|list|tuple|None, optional): Specify the layers that need to be kept original data type. if excluded_layers is None, casts all floating point parameters and buffers. Default: None.

Returns:
    Layer: self

Examples:
    .. code-block:: pycon

        >>> import paddle

        >>> class Model(paddle.nn.Layer):
        ...     def __init__(self):
        ...         super().__init__()
        ...         self.linear = paddle.nn.Linear(1, 1)
        ...         self.dropout = paddle.nn.Dropout(p=0.5)
        ...
        ...     def forward(self, input):
        ...         out = self.linear(input)
        ...         out = self.dropout(out)
        ...         return out
        >>> model = Model()
        >>> model.float()
        Model(
            (linear): Linear(in_features=1, out_features=1, dtype=paddle.float32)
            (dropout): Dropout(p=0.5, axis=None, mode=upscale_in_train, inplace=False)
        )
9excluded_layers should be type nn.Layer or list, but got rO  c                <   > [        U [        R                  T5        g r   )r{   rX  r|  rA   rz   s    rC   r   Layer.float.<locals>.layer_trans      ufnnoFrE   )r   rx   r   r   r   r   r/  r   rz   r  s    ` rC   floatLayer.floatc  s    F !0 7"_ot,,./O$77"?3OKDQ`LaLjLjKkklm 	G zz+&&rE   c                  ^ [         R                  R                  5       SL a  [        R                  " S5        U $ Tc  [
        R                  /OTm[        T[        5      (       a  T/mOI[        T[        [        45      (       a  [        T5      mO"[        S[        T5      R                   S35      eU4S jnU R                  U5      $ )a  
Casts all floating point parameters and buffers to ``float16`` data type.


.. note::
    ``nn.BatchNorm`` does not support ``bfloat16`` weights, so it would not be converted by default.


Parameters:
   excluded_layers(nn.Layer|list|tuple|None, optional): Specify the layers that need to be kept original data type. if excluded_layers is None, casts all floating point parameters and buffers except ``nn.BatchNorm``. Default: None.

Returns:
    Layer: self

Examples:
    .. code-block:: python

        >>> # doctest: +SKIP('Paddle compiled by the user does not support float16, so keep original data type.')
        >>> import paddle

        >>> class Model(paddle.nn.Layer):
        ...     def __init__(self):
        ...         super().__init__()
        ...         self.linear = paddle.nn.Linear(1, 1)
        ...         self.dropout = paddle.nn.Dropout(p=0.5)
        ...
        ...     def forward(self, input):
        ...         out = self.linear(input)
        ...         out = self.dropout(out)
        ...         return out
        ...
        >>> model = Model()
        >>> model.float16()
        Model(
            (linear): Linear(in_features=1, out_features=1, dtype=float32)
            (dropout): Dropout(p=0.5, axis=None, mode=upscale_in_train)
        )
FzQPaddle compiled by the user does not support float16, so keep original data type.r	  rO  c                <   > [        U [        R                  T5        g r   )r{   rX  r{  r  s    rC   r  "Layer.float16.<locals>.layer_trans  r  rE   )rX  ampis_float16_supportedr>   r?   r   	BatchNormr   rx   r   r   r   r   r/  r  s    ` rC   r{  Layer.float16  s    T ::**,5MMc K .5R\\N? 	 ot,,./O$77"?3OKDQ`LaLjLjKkklm 	G zz+&&rE   c                  ^ [         R                  R                  5       SL a  [        R                  " S5        U $ Tc  [
        R                  /OTm[        T[        5      (       a  T/mOI[        T[        [        45      (       a  [        T5      mO"[        S[        T5      R                   S35      eU4S jnU R                  U5      $ )a?  
Casts all floating point parameters and buffers to ``bfloat16`` data type.


.. note::
    ``nn.BatchNorm`` does not support ``bfloat16`` weights, so it would not be converted by default.


Parameters:
    excluded_layers(nn.Layer|list|tuple|None, optional): Specify the layers that need to be kept original data type. if excluded_layers is None, casts all floating point parameters and buffers except ``nn.BatchNorm``. Default: None.

Returns:
    Layer: self

Examples:
    .. code-block:: python

        >>> # doctest: +SKIP('bfloat need V100 compile')
        >>> import paddle

        >>> class Model(paddle.nn.Layer):
        ...     def __init__(self):
        ...         super().__init__()
        ...         self.linear = paddle.nn.Linear(1, 1)
        ...         self.dropout = paddle.nn.Dropout(p=0.5)
        ...
        ...     def forward(self, input):
        ...         out = self.linear(input)
        ...         out = self.dropout(out)
        ...         return out
        ...
        >>> model = Model()
        >>> model.bfloat16()
        >>> #UserWarning: Paddle compiled by the user does not support bfloat16, so keep original data type.
        Model(
            (linear): Linear(in_features=1, out_features=1, dtype=float32)
            (dropout): Dropout(p=0.5, axis=None, mode=upscale_in_train)
        )
FzRPaddle compiled by the user does not support bfloat16, so keep original data type.r	  rO  c                <   > [        U [        R                  T5        g r   )r{   rX  rz  r  s    rC   r  #Layer.bfloat16.<locals>.layer_trans  s    ufooGrE   )rX  r  is_bfloat16_supportedr>   r?   r   r  r   rx   r   r   r   r   r/  r  s    ` rC   rz  Layer.bfloat16  s    V ::++-6MMd K .5R\\N? 	 ot,,./O$77"?3OKDQ`LaLjLjKkklm 	H zz+&&rE   c                R   Uc3  [         R                  " [         R                  R                  5       5      nOc[	        U[
        5      (       a  [         R                  " U5      nO7[	        U[         R                  5      (       a  O[        S[        U5       35      eU R                  US9$ )ae  
Move all model parameters and buffers to the GPU.

This also makes associated parameters and buffers different objects. So
it should be called before constructing the optimizer if the layer will
live on GPU while being optimized.

Parameters:
    device(int, optional): if specified, all parameters will be copied to that device.

Returns:
    Layer: self
z2device must be int, paddle.CUDAPlace or None, got rE  )	rX  r  cudacurrent_devicer   r   r   rx   ry   r   rF  s     rC   r  
Layer.cuda  s     >%%fkk&@&@&BCF$$%%f-F 0 011DT&\NS  }}F}++rE   c                   Uc  [         R                  " S5      nOc[        U[        5      (       a  [         R                  " U5      nO7[        U[         R                  5      (       a  O[	        S[        U5       35      eU R                  US9$ )aa  
Move all model parameters and buffers to the XPU.

This also makes associated parameters and buffers different objects. So
it should be called before constructing optimizer if the layer will
live on XPU while being optimized.

Parameters:
    device(int, optional): if specified, all parameters will be copied to that device.

Returns:
    Layer: self
r   z1device must be int, paddle.XPUPlace or None, got rE  )rX  r  r   r   r   rx   ry   r   s     rC   xpu	Layer.xpu6  st     >__Q'F$$__V,F00CDL>R  }}F}++rE   c                F    U R                  [        R                  " 5       S9$ )zM
Move all model parameters and buffers to the CPU.

Returns:
    Layer: self
rE  )ry   rX  r  r  s    rC   cpu	Layer.cpuQ  s     }}FOO$5}66rE   c                    [        S5      e)NzMReached a code path in Module.get_extra_state() that should never be called. )r  r  s    rC   get_extra_stateLayer.get_extra_stateZ  s    [
 	
rE   c                L    U R                  5        H  nU(       + Ul        M     U $ )z
Change if autograd should record operations on parameters in this layer.

Parameters:
    requires_grad (bool): whether autograd should record operations on
                          parameters in this layer. Default: ``True``.

Returns:
    Layer: self
)ru  r   )r   requires_gradr  s      rC   requires_grad_Layer.requires_grad__  s$     "A"//AO #rE   c                |    U R                  5        H(  nUR                  c  M  UR                  U(       + 5        M*     g)z
Reset gradients of all model parameters.

Parameters:
    set_to_none (bool): instead of setting to zero, set the grads to None. Currently, set_to_none=True
    is not fully supported.
N)ru  r  r  )r   set_to_noner  s      rC   	zero_gradLayer.zero_gradn  s/     "Avv!  [1 #rE   c                .    U R                   R                  $ r   )r   r   r  s    rC   rV  Layer._get_namez  s    ~~&&&rE   )r  r  r  rH   r  r  r  r  r
  r  r@   r   r  r	  r  r8   r  r  r  r  r   )Nr|  )r  
str | Noner   r1   r   r   )T)r'  r   r   r
   )r   r
   )r0  zCallable[[Self], None]r   r
   r   )
r<  _ForwardPostHookr5  r   r6  r   r7  r   r   r   )r<  _ForwardPreHookr5  r   r6  r   r   r   )NNFNN)rH  r4   rI  zParamAttrLike | Noner   DTypeLike | NonerJ  r   rK  zInitializer | NonerF  PlaceLike | Noner   r   )rY  rl   r   r   )NNN)rb   r5  rb  bool | Noner   r8  r   r   )rw   r   r   zlist[Tensor]r   )r   r8  r   r
   )r   zIterable[Layer])r   Iterable[tuple[str, Layer]])F)r  r   r   zlist[Layer])rD  TT)r  rl   rw   r   r  r   r   zIterable[tuple[str, Tensor]])rD  FNT)
r  rl   r  r   r  set[Layer] | Noner  r   r   r;  )r   zIterator[Layer])NrD  T)r  r<  r  rl   r  r   )rb   rl   r  r   rb  r   r   r   )rY  rl   r   r   )r  r   r   r   )r  r   r  r   r   r   )rB   r   r  r   r   r   )rB   r   r   r   )rb   rl   r  r6   r   r6   )rY  rl   r   r6   )rY  rl   rA   r6   r  r   r   r   )rb   rl   r  zLayer | Noner   r   )rb   rl   r  r   r   r   )rb   rl   r   zParameter | Noner   r   )r   dict[str, Any])r   r=  r   r   )rb   rl   r   r   )rb   rl   r  r   r   r   )rb   rl   r   r   )r   z	list[str])r<  _StateDictHookr   r   )NTrD  )rU  _StateDict | Nonerw   r   rV  rl   r   
_StateDict)NTrD  FTT)rU  r?  rw   r   rV  rl   r_  r   r`  r   ra  r   r   r@  )NTrD  TT)rU  r?  rw   r   rV  rl   r`  r   ra  r   r   r@  )rU  r@  r  rl   ra  r   r   r@  )r  rl   ra  r   r   r@  )r   r@  )r  r   r  r   r   r@  )rD  )rV  rl   r   r'   )r  zdict[str:list[str]] | None)rj  r@  r  r   r   ztuple[list[str], list[str]])TF)rj  zMapping[str, Any]r  r   r/  r   )NNNN)
rF  r9  r   r8  r  r:  r  r:  r   r
   )r  zICallable[[Tensor, PlaceLike | None, DTypeLike | None, bool | None], None]rF  r9  r   r8  r  r:  rw   r   r   r   )
r  r   rF  r9  r   r8  r  r:  r   r   )NNNNTF)rF  r9  r   r8  r  r:  r  r:  rw   r   rv   r   )r   r   )r  zdtype | strr   r
   )rz   zLayer | Sequence[Layer] | Noner   r
   )rF  zint | PlaceLike | Noner   r
   )r   r   )r,  r   r   r
   )r0  r   r   r   )^r   r   r   r   r   r   r   r6  r  setterr  r(  r+  r/  r3  r>  register_forward_hookrA  rG  r   r+   rl  ro  r,   ru  r  r.  r  r&  rt  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rS  r  get_submoduleset_submoduler  register_moduler  r  r  r  r!  r&  r4  r<  rB  rE  r   rQ  rT  r]  rg  r   rj  ry  r  r   deprecate_stat_dictr  r  r  r  r  ry   r  set_dict	load_dictrx   r  r  r  r{  rz  r  r#  r&  r)  r-  r1  rV  r   r   rE   rC   r6   r6     s	   4l N AJ/-$/-4=/-	/-b     __' ' 6 6 !''= (=>@4l8t@ !!U"U" 	U"
 U" U" 
U"p 7 !N"N" 	N"
 N" 
N"f &*"&26#'<
<
 #<
  	<

 <
 0<
 !<
 
<
|2 19  #'"&	5
5
 !5
  	5

 
5

5
t  #'"&	6
6
 !6
  	6

 
6
p )956" 7"HPd4"4#J )956 "&!%	J"J"  J" 	J"
 
&J" 7J"\ "(,!%GG G &	G
 G 
%GR	 #'!%	

 
 	
8 m\23=ACACA!'CA6:CA	CA 4CAJ )956! 7!F< )956 "&!%	@#@#  @# 	@#
 
&@# 7@#D.@Un>	"?,\: gx()8=$*$*"'$*15$*	$* *$*L !M M
( !OIV
(:=x$3&l:\	+@,"""	" *."&&(	&   !$	
 
F *."&&(/4<&<  < !$	<
 )-< < < 
<@ *."&&('
&'
  '
 !$	'

 '
 '
 
'
R  *."&&(&   !$	
   
  
    	
  
   	  	
 
  !%bE	 /6f ')$" #$" 
$"P 48#K0#KJ "" %)D-D- "D- 
%	D- #D-R 	<@%<@ <@ 	<@@ $("& $$(I
 I
  I
 	I

 "I
 
I
f #'

 !      
>@@ !@  	@
 @ 
@H $("& $$("&#P P  P 	P
 "P  P Pd#" HI-^)) AE1'=1'	1'h AE@'=@'	@'F AEA'=A'	A'F,6,67


2'rE   )g
__future__r   r   r  retypingr>   r   collectionsr   r   r   r   r   r	   numpyr  typing_extensionsr
   r   rX  r   r   r   r   paddle.autograd.backward_utilsr   paddle.baser   r   r   paddle.base.corer   paddle.base.dygraphr   paddle.base.dygraph.baser   r   r   r   paddle.base.dygraph_utilsr   paddle.base.executorr   r   paddle.base.frameworkr   r   r   r  r    r!   r"   r#   r$   r%   paddle.base.layer_helper_baser&   5paddle.distributed.flex_checkpoint.dcp.sharded_weightr'   r(   paddle.frameworkr)   paddle.profiler.utilsr*   paddle.utilsr+   paddle.utils.decorator_utilsr,   collections.abcr-   r.   r/   r0   paddle._typingr1   r2   r3   r4   paddle.nn.initializerr5   __all__r  rl   r   r7  r6  r@  r>  compiler^   r`   rD   rR   r\   rd   rs   r{   r}   r   r   r   r6   r   rE   rC   <module>rb     s   #   	    / 6 6  ,  . . 3 4 4 $ '  D 7
 
 
 : ' 2 # EEMM1  gv&'gvtCH~.fd38n6L0MMNP 	&&!6) gvtCH~v6>?A  4V$f&8&8f&EEF
:,,-

-.jj)*"!,/1
(M
!N4E#FG
e eP
 
 /  /Fq/' q/'rE   