
    ϑi                       S r SSKJr  SSKrSSKrSSKJr  SSKrSSK	r	SSK
JrJrJr  SSKJr  SSKJrJr  SS	KJr  \R*                  R,                  r\R*                  R/                  5       r\(       a8  SS
KJrJr  SSKJrJrJr  SSKJr   SSK	J!r!  SSK"J#r#  SSK$J%r%  SSK&J'r'  SSK(J)r)J*r*  / r+ S     SS jjr,SSS jjr-SS jr.    S           S S jjr/ " S S5      r0 " S S5      r1 " S S5      r2g)!z;
Functions for Auto SParsity (ASP) training and inference.
    )annotationsN)TYPE_CHECKING)coreglobal_scopeprogram_guard)dygraph_only   )_default_pruning#supported_layers_and_prune_func_map)MaskAlgo)IterableSequence)AnyCallableLiteral)Tensor)	PlaceLike)Layer)	Optimizer)OperatorProgramc                l    Uc  [         R                  R                  5       n[        R	                  XS9  g)a  
Set parameter name of layers which would not be pruned as sparse weights.

Args:
    param_names (list of string): A list contains names of parameters.
    main_program (Program|None, optional): Program with model definition and its parameters.
                                      If None is given, then it would be set as `paddle.static.default_main_program().
                                      Default is None.
Examples:
    .. code-block:: python
        :name: dynamic-graph

        >>> # Example1: Usage of Dynamic Graph
        >>> import paddle

        >>> class MyLayer(paddle.nn.Layer):
        ...     def __init__(self):
        ...         super().__init__()
        ...         self.conv1 = paddle.nn.Conv2D(
        ...             in_channels=3, out_channels=4, kernel_size=3, padding=2)
        ...         self.linear1 = paddle.nn.Linear(4624, 100)
        ...
        ...     def forward(self, img):
        ...         hidden = self.conv1(img)
        ...         hidden = paddle.flatten(hidden, start_axis=1)
        ...         prediction = self.linear1(hidden)
        ...         return prediction

        >>> my_layer = MyLayer()
        >>> optimizer = paddle.optimizer.SGD(
        ...     learning_rate=0.01, parameters=my_layer.parameters())

        >>> # Need to set excluded layers before calling decorate
        >>> paddle.incubate.asp.set_excluded_layers([my_layer.linear1.full_name()])

        >>> optimizer = paddle.incubate.asp.decorate(optimizer)

    .. code-block:: python
        :name: static-graph

        >>> # Example2: Usage of Static Graph
        >>> import paddle

        >>> paddle.enable_static()

        >>> class MyLayer(paddle.nn.Layer):
        ...     def __init__(self):
        ...         super().__init__()
        ...         self.conv1 = paddle.nn.Conv2D(
        ...             in_channels=3, out_channels=4, kernel_size=3, padding=2)
        ...         self.linear1 = paddle.nn.Linear(4624, 100)
        ...
        ...     def forward(self, img):
        ...         hidden = self.conv1(img)
        ...         hidden = paddle.flatten(hidden, start_axis=1)
        ...         prediction = self.linear1(hidden)
        ...         return prediction

        >>> main_program = paddle.static.Program()
        >>> startup_program = paddle.static.Program()

        >>> with paddle.static.program_guard(main_program, startup_program):
        ...     input_data = paddle.static.data(name='data', shape=[None, 3, 224, 224])
        ...     label = paddle.static.data(name='label', shape=[None, 100])
        ...     my_layer = MyLayer()
        ...     prob = my_layer(input_data)
        ...     loss = paddle.mean(paddle.nn.functional.square_error_cost(prob, label))
        ...
        ...     # Setup excluded layers out from ASP workflow.
        ...     # Please note, excluded_layers must be set before calling optimizer.minimize().
        ...     paddle.incubate.asp.set_excluded_layers([my_layer.linear1.full_name()], main_program)
        ...
        ...     optimizer = paddle.optimizer.SGD(learning_rate=0.1)
        ...     optimizer = paddle.static.amp.decorate(optimizer )
        ...     # Calling paddle.incubate.asp.decorate() to wrap minimize() in optimizer, which
        ...     # will insert necessary masking operations for ASP workflow.
        ...     optimizer = paddle.incubate.asp.decorate(optimizer)
        ...     optimizer.minimize(loss, startup_program)
Nparam_namesmain_program)paddlestaticdefault_main_program	ASPHelperset_excluded_layersr   s     W/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/incubate/asp/asp.pyr    r    7   s3    d }}99;!! "     c                *    [         R                  U S9  g)a  
Reset excluded layers setting corresponding to :attr:`main_program`. If :attr:`main_program`
is None, then all configurations of excluded_layers would be cleaned.

Args:
    main_program (Program, optional): Program with model definition and its parameters.
                                      If None is given, then this function would reset all excluded_layers.
                                      Default is None.
Examples:
    .. code-block:: python
        :name: dynamic-graph

        >>> # Example1: Usage of Dynamic Graph
        >>> import paddle

        >>> class MyLayer(paddle.nn.Layer):
        ...     def __init__(self):
        ...         super().__init__()
        ...         self.conv1 = paddle.nn.Conv2D(
        ...             in_channels=3, out_channels=4, kernel_size=3, padding=2)
        ...         self.linear1 = paddle.nn.Linear(4624, 100)
        ...
        ...     def forward(self, img):
        ...         hidden = self.conv1(img)
        ...         hidden = paddle.flatten(hidden, start_axis=1)
        ...         prediction = self.linear1(hidden)
        ...         return prediction

        >>> my_layer = MyLayer()
        >>> optimizer = paddle.optimizer.SGD(
        ...     learning_rate=0.01, parameters=my_layer.parameters())

        >>> # Need to set excluded layers before calling decorate
        >>> paddle.incubate.asp.set_excluded_layers([my_layer.linear1.full_name()])
        >>> # Reset excluded_layers, all supported layers would be included into Automatic SParsity's workflow.
        >>> # Please note, reset_excluded_layers also must be called before calling asp.decorate().
        >>> paddle.incubate.asp.reset_excluded_layers()

        >>> optimizer = paddle.incubate.asp.decorate(optimizer)

    .. code-block:: python
        :name: static-graph

        >>> # Example2: Usage of Static Graph
        >>> import paddle

        >>> paddle.enable_static()

        >>> class MyLayer(paddle.nn.Layer):
        ...     def __init__(self):
        ...         super().__init__()
        ...         self.conv1 = paddle.nn.Conv2D(
        ...             in_channels=3, out_channels=4, kernel_size=3, padding=2)
        ...         self.linear1 = paddle.nn.Linear(4624, 100)
        ...
        ...     def forward(self, img):
        ...         hidden = self.conv1(img)
        ...         hidden = paddle.flatten(hidden, start_axis=1)
        ...         prediction = self.linear1(hidden)
        ...         return prediction

        >>> main_program = paddle.static.Program()
        >>> startup_program = paddle.static.Program()

        >>> with paddle.static.program_guard(main_program, startup_program):
        ...     input_data = paddle.static.data(name='data', shape=[None, 3, 224, 224])
        ...     label = paddle.static.data(name='label', shape=[None, 100])
        ...     my_layer = MyLayer()
        ...     prob = my_layer(input_data)
        ...     loss = paddle.mean(paddle.nn.functional.square_error_cost(prob, label))
        ...
        ...     # Setup excluded layers out from ASP workflow.
        ...     # Please note, excluded_layers must be set before calling optimizer.minimize().
        ...     paddle.incubate.asp.set_excluded_layers([my_layer.linear1.full_name()], main_program)
        ...     # Reset excluded_layers, all supported layers would be included into Automatic SParsity's workflow.
        ...     # Please note, reset_excluded_layers also must be called before calling optimizer.minimize().
        ...     paddle.incubate.asp.reset_excluded_layers(main_program)
        ...
        ...     optimizer = paddle.optimizer.SGD(learning_rate=0.1)
        ...     optimizer = paddle.static.amp.decorate(optimizer )
        ...     # Calling paddle.incubate.asp.decorate() to wrap minimize() in optimizer, which
        ...     # will insert necessary masking operations for ASP workflow.
        ...     optimizer = paddle.incubate.asp.decorate(optimizer)
        ...     optimizer.minimize(loss, startup_program)
r   N)r   reset_excluded_layersr$   s    r!   r%   r%      s    l ###>r"   OptimizerWithSparsityGuaranteec                ,    [         R                  U 5      $ )a  
Wrap the given optimizer as a OptimizerWithSparsityGuarantee,
If running with dynamic graph mode. ASP would creates mask variables for supported parameters.
Else if in static graph mode, ASP would creates mask variables and inserts necessary ops
when calling minimize()

Args:
    optimizer (Optimizer): A Optimizer used for training.
Returns:
    OptimizerWithSparsityGuarantee: A wrapper for ASP to decorate `minimize` function of the given optimizer.
Examples:
    .. code-block:: python
        :name: dynamic-graph

        >>> # Example1: Usage of Dynamic Graph
        >>> import paddle

        >>> class MyLayer(paddle.nn.Layer):
        ...     def __init__(self):
        ...         super().__init__()
        ...         self.conv1 = paddle.nn.Conv2D(
        ...             in_channels=3, out_channels=4, kernel_size=3, padding=2)
        ...         self.linear1 = paddle.nn.Linear(4624, 32)
        ...         self.linear2 = paddle.nn.Linear(32, 32)
        ...         self.linear3 = paddle.nn.Linear(32, 10)
        ...
        ...     def forward(self, img):
        ...         hidden = self.conv1(img)
        ...         hidden = paddle.flatten(hidden, start_axis=1)
        ...         hidden = self.linear1(hidden)
        ...         hidden = self.linear2(hidden)
        ...         prediction = self.linear3(hidden)
        ...         return prediction

        >>> my_layer = MyLayer()
        >>> optimizer = paddle.optimizer.SGD(
        ...     learning_rate=0.01, parameters=my_layer.parameters())

        >>> # Calling paddle.incubate.asp.decorate() to wrap step() in optimizer, which
        >>> # will apply necessary masking operations for ASP workflow.
        >>> # In dynamic graph mode, ASP would create related mask variables during decoration.
        >>> optimizer = paddle.incubate.asp.decorate(optimizer)

    .. code-block:: python
        :name: static-graph

        >>> # Example2: Usage of Static Graph
        >>> import paddle

        >>> paddle.enable_static()

        >>> class MyLayer(paddle.nn.Layer):
        ...     def __init__(self):
        ...         super().__init__()
        ...         self.conv1 = paddle.nn.Conv2D(
        ...             in_channels=3, out_channels=4, kernel_size=3, padding=2)
        ...         self.linear1 = paddle.nn.Linear(4624, 100)
        ...
        ...     def forward(self, img):
        ...         hidden = self.conv1(img)
        ...         hidden = paddle.flatten(hidden, start_axis=1)
        ...         prediction = self.linear1(hidden)
        ...         return prediction

        >>> main_program = paddle.static.Program()
        >>> startup_program = paddle.static.Program()

        >>> with paddle.static.program_guard(main_program, startup_program):
        ...     input_data = paddle.static.data(name='data', shape=[None, 3, 224, 224])
        ...     label = paddle.static.data(name='label', shape=[None, 100])
        ...     my_layer = MyLayer()
        ...     prob = my_layer(input_data)
        ...     loss = paddle.mean(paddle.nn.functional.square_error_cost(prob, label))
        ...
        ...     optimizer = paddle.optimizer.SGD(learning_rate=0.1)
        ...     # Calling paddle.incubate.asp.decorate() to wrap minimize() in optimizer, which
        ...     # will insert necessary masking operations for ASP workflow.
        ...     # In static graph mode, ASP creates related mask variables
        ...     # during minimize().
        ...     optimizer = paddle.incubate.asp.decorate(optimizer)
        ...     optimizer.minimize(loss, startup_program)
)r   decorate)	optimizers    r!   r(   r(      s    f i((r"   c           	     "   [         R                  R                  5       n[         R                  " U5      n[        R
                  [        R                  [        R                  S.nX7;   d   S5       eSn[        U [         R                  R                  5      (       a  [        R                  nO[        U [         R                  R                  5      (       a  [        R                  n[!        U S5      (       au  U R"                  S   S:  ab  [         R$                  R'                  5       (       a?  [)        [*        R,                  R/                  SS5      5      n	[         R0                  " U	5      nO[3        S	[5        U 5       35      eU" UU UUXs   US
9$ )a+  
Pruning parameters of supported layers in :attr:`model` via
specified mask generation function given by :attr:`mask_algo`. This
function supports both training and inference controlled by :attr:`with_mask`.
If :attr:`with_mask` is True, it would also prune parameter related ASP mask Variables,
else only prunes parameters.

*Note*: (Static graph mode) If calling this function with :attr:`with_mask`, it should call `OptimizerWithSparsityGuarantee.minimize`
and initialization (`exe.run(startup_program`)) before (For successfully obtain mask Variable).
Typically set `with_mask` as true for training (have called `OptimizerWithSparsityGuarantee.minimize`) and false for
inference only. To obtain OptimizerWithSparsityGuarantee, please see `paddle.incubate.asp.decorate()`.

Args:
    model (Program|nn.Layer): Program with model definition and its parameters, or a object of `paddle.nn.Layer`.
    n (int, optional): n of `n:m` sparse pattern. Default is 2.
    m (int, optional): m of `n:m` sparse pattern. Default is 4.
    mask_algo (string, optional): The function name to generate sparse mask. Default is `mask_1d`.
                                  The valid inputs should be one of 'mask_1d', 'mask_2d_greedy' and 'mask_2d_best'.
    with_mask (bool, optional): To prune mask Variables related to parameters or not. True is pruning also, False is not. Default is True.
Returns:
    dictionary: A dictionary with key: `parameter name` (string) and value: its corresponding mask Variable.
Examples:
    .. code-block:: python
        :name: dynamic-graph

        >>> # Example1: Usage of Dynamic Graph
        >>> import paddle
        >>> import numpy as np

        >>> class MyLayer(paddle.nn.Layer):
        ...     def __init__(self):
        ...         super().__init__()
        ...         self.conv1 = paddle.nn.Conv2D(
        ...             in_channels=3, out_channels=4, kernel_size=3, padding=2)
        ...         self.linear1 = paddle.nn.Linear(4624, 32)
        ...         self.linear2 = paddle.nn.Linear(32, 32)
        ...         self.linear3 = paddle.nn.Linear(32, 10)
        ...
        ...     def forward(self, img):
        ...         hidden = self.conv1(img)
        ...         hidden = paddle.flatten(hidden, start_axis=1)
        ...         hidden = self.linear1(hidden)
        ...         hidden = self.linear2(hidden)
        ...         prediction = self.linear3(hidden)
        ...         return prediction

        >>> my_layer = MyLayer()
        >>> loss_fn = paddle.nn.MSELoss(reduction='mean')

        >>> optimizer = paddle.optimizer.SGD(
        ...     learning_rate=0.01, parameters=my_layer.parameters())

        >>> # Calling paddle.incubate.asp.decorate() to wrap step() in optimizer, which
        >>> # will apply necessary masking operations for ASP workflow.
        >>> # In dynamic graph mode, ASP would create related mask variables during decoration.
        >>> optimizer = paddle.incubate.asp.decorate(optimizer)

        >>> # Must call paddle.incubate.asp.decorate() first before calling paddle.incubate.asp.prune_model()
        >>> paddle.incubate.asp.prune_model(my_layer, mask_algo='mask_2d_best')

        >>> for i in range(10):
        ...     imgs = paddle.to_tensor(
        ...         np.random.randn(64, 3, 32, 32),
        ...         dtype='float32', stop_gradient=False)
        ...     labels = paddle.to_tensor(
        ...         np.random.randint(10, size=(64, 1)),
        ...         dtype='float32', stop_gradient=False)
        ...     output = my_layer(imgs)
        ...     loss = loss_fn(output, labels)
        ...     loss.backward()
        ...     optimizer.step()
        ...     optimizer.clear_grad()

    .. code-block:: python
        :name: static-graph

        >>> # Example2: Usage of Static Graph
        >>> import paddle
        >>> import numpy as np

        >>> paddle.enable_static()

        >>> class MyLayer(paddle.nn.Layer):
        ...     def __init__(self):
        ...         super().__init__()
        ...         self.conv1 = paddle.nn.Conv2D(
        ...             in_channels=3, out_channels=4, kernel_size=3, padding=2)
        ...         self.linear1 = paddle.nn.Linear(4624, 32)
        ...         self.linear2 = paddle.nn.Linear(32, 32)
        ...         self.linear3 = paddle.nn.Linear(32, 10)
        ...
        ...     def forward(self, img):
        ...         hidden = self.conv1(img)
        ...         hidden = paddle.flatten(hidden, start_axis=1)
        ...         hidden = self.linear1(hidden)
        ...         hidden = self.linear2(hidden)
        ...         prediction = self.linear3(hidden)
        ...         return prediction

        >>> main_program = paddle.static.Program()
        >>> startup_program = paddle.static.Program()

        >>> with paddle.static.program_guard(main_program, startup_program):
        ...     input_data = paddle.static.data(name='data', shape=[None, 3, 32, 32])
        ...     label = paddle.static.data(name='label', shape=[None, 1])
        ...     my_layer = MyLayer()
        ...     prob = my_layer(input_data)
        ...     loss = paddle.mean(paddle.nn.functional.square_error_cost(prob, label))
        ...
        ...     optimizer = paddle.optimizer.SGD(learning_rate=0.1)
        ...     # Calling paddle.incubate.asp.decorate() to wrap minimize() in optimizer, which
        ...     # will insert necessary masking operations for ASP workflow.
        ...     # In static graph mode, ASP creates related mask variables
        ...     # during minimize().
        ...     optimizer = paddle.incubate.asp.decorate(optimizer)
        ...     optimizer.minimize(loss, startup_program)

        >>> device = paddle.device.get_device()
        >>> place = paddle.set_device(device)

        >>> exe = paddle.static.Executor(place)
        >>> exe.run(startup_program)

        >>> # Must call exe.run(startup_program) first before calling paddle.asp.prune_model()
        >>> paddle.incubate.asp.prune_model(my_layer, mask_algo='mask_2d_best')
        >>> # it also be accepted to call
        >>> # paddle.incubate.asp.prune_model(main_program, mask_algo='mask_2d_best')

        >>> for i in range(10):
        ...     imgs = np.random.randn(64, 3, 32, 32).astype('float32')
        ...     labels = np.random.randint(10, size=(64, 1)).astype('float32')
        ...     exe.run(main_program, feed={'data':imgs, 'label':labels})
)mask_1dmask_2d_greedymask_2d_bestzNThe "mask_algo" should be one of ["mask_1d", "mask_2d_greedy", "mask_2d_best"]Ndistributed_info_sharding_degreer	   FLAGS_selected_gpusr   zBmodel should be paddle.nn.Layer or paddle.static.Program, but got nm	mask_algo	with_mask)r   device
get_device
set_devicer   MASK_1DMASK_2D_GREEDYMASK_2D_BEST
isinstancennr   r   prune_model_by_layerr   r   prune_model_by_programhasattrr.   baseis_compiled_with_cudaintosenvironget	CUDAPlace	TypeErrortype)
modelr2   r3   r4   r5   r6   placeMaskAlgo_mapping
prune_funcgpu_ids
             r!   prune_modelrO   ?  sP   X ]]%%'Ff%E ##"11 --
 ( X( J%))33
	E6==00	1	155
E.//''(9:Q>1133(=qABF$$V,EPQUV[Q\P]^
 	
 

"- r"   c                      \ rS rSrSrSS jrSS jrSS jrSS jrSS jr	\
SS j5       r\
SS	 j5       r\
SS
 j5       rSrg)ProgramASPInfoi  a  
ProgramASPInfo is a container to keep ASP relevant information of Program. It contains three inner-variables:
1. __mask_vars (Dictionary): Key is parameter's name and value is its corresponding sparse mask Variable object, which is created by `ASPHelper.create_mask_variables`.
2. __masks (Dictionary): Key is parameter's name and value is its corresponding sparse mask Numpy array, which is created by `ASPHelper.prune_model`.
3. __excluded_layers (List): It stores name of layers which should not involve into ASP workflow.
c                .    0 U l         0 U l        / U l        g N)_ProgramASPInfo__mask_vars_ProgramASPInfo__masks _ProgramASPInfo__excluded_layersselfs    r!   __init__ProgramASPInfo.__init__  s    !#r"   c                     X R                   U'   g rS   rT   rX   
param_namevars      r!   update_mask_varsProgramASPInfo.update_mask_vars  s    '*$r"   c                     X R                   U'   g rS   rU   r]   s      r!   update_masksProgramASPInfo.update_masks  s    #&Z r"   c                b    U R                   R                  [        R                  " U5      5        g rS   )rV   extendcopydeepcopy)rX   r   s     r!   update_excluded_layers%ProgramASPInfo.update_excluded_layers  s    %%dmmK&@Ar"   c                    / U l         g rS   rV   rW   s    r!   r%   $ProgramASPInfo.reset_excluded_layers  s
    !#r"   c                    U R                   $ rS   r\   rW   s    r!   	mask_varsProgramASPInfo.mask_vars  s    r"   c                    U R                   $ rS   rc   rW   s    r!   masksProgramASPInfo.masks  s    ||r"   c                    U R                   $ rS   rm   rW   s    r!   excluded_layersProgramASPInfo.excluded_layers  s    %%%r"   )__excluded_layers__mask_vars__masksNreturnNone)r^   strr_   r   r|   r}   )r^   r~   r_   znpt.NDArray[Any]r|   r}   )r   	list[str]r|   r}   r|   dict[str, Tensor])r|   dict[str, npt.NDArray[Any]])r|   r   )__name__
__module____qualname____firstlineno____doc__rY   r`   rd   rj   r%   propertyrp   rs   rv   __static_attributes__ r"   r!   rQ   rQ     s]    $
+'B$       & &r"   rQ   c                  "   \ rS rSrSrSrSr0 r\      SS j5       r	\SSS jj5       r
\SS j5       r\SS	S
\R                  S4             SS jj5       r\S	S
\R                  S4             SS jj5       r\SS j5       r\SS j5       r\S S j5       r\      S!S j5       r\    S"S j5       r\    S#             S$S jj5       r\\S%S j5       5       r\        S&S j5       r\      S'S j5       rSrg)(r   i  a  
ASPHelper is a collection of Auto SParsity (ASP) functions to enable

1. training models with weights in 2:4 sparse pattern on FP16 or 1:2 sparse pattern on FP32 from scratch.
2. pruning well-trained models into 2:4 sparse pattern on FP16 or 1:2 sparse pattern on FP32 for fine-tuning.
asp_maskw_c                H    U R                  U5      nUR                  U5        g)z{
This is the implementation of `asp.set_excluded_layers`, for details please see explanation in `asp.set_excluded_layers`.
N)_get_program_asp_inforj   )clsr   r   asp_infos       r!   r    ASPHelper.set_excluded_layers%  s"     ,,\:''4r"   Nc                    Uc1  U R                    H   nU R                   U   R                  5         M"     gU R                  U5      R                  5         g)z
This is the implementation of `asp.reset_excluded_layers`, for details please see explanation in `asp.reset_excluded_layers`.
N)_ASPHelper__asp_infor%   r   )r   r   progs      r!   r%   ASPHelper.reset_excluded_layers/  sF    
 t$::< ' %%l3IIKr"   c                   [         R                  " 5       (       a\  [         R                  R                  5       n[         R                  R	                  5       n[
        R                  XU R                  5        [        U 5      $ )ze
This is the implementation of `asp.decorate`, for details please see explanation in `asp.decorate`.
)	r   in_dynamic_moder   r   default_startup_programr   _create_mask_variables_parameter_listr&   )r)   	main_progstartup_progs      r!   r(   ASPHelper.decorate:  s_    
 !!##
 ::<I!==@@BL,,)B)B .i88r"         Tc                ,   Uc  [         R                  R                  5       nU R                  U5      nUR	                  5       R                  5        GH  n[        R                  X(R                  5      (       d  M*  [        5       R                  UR                  5      R                  5       n	[        R                  " U	5      n
[        R                  UR                  5      nU" XX5UR                  5      u  pUR                  U
R                   5      nU	R#                  X5        U(       a  [        5       R                  [        R%                  UR                  5      5      nUc(   S[        R%                  UR                  5       S35       eUR                  5       nUR                  [        R                  " U5      R                   5      nUR#                  X5        UR'                  UR                  U5        GM     UR(                  R+                  5       $ )k
This is the implementation of `asp.prune_model`, for details please see explanation in `asp.prune_model`.
Cannot find z variable, please call optimizer.minimize (paddle.incubate.asp.decorate(optimizer).minimize(loss) and initialization (exe.run(startup_program)) first!)r   r   r   r   global_blockall_parametersr   _is_supported_layernamer   find_var
get_tensornparray_get_prune_func_by_nameastypedtypeset_get_mask_namerd   rs   rh   )r   rK   r   r2   r3   r4   r5   r   paramweight_tensorweight_nparrayrM   weight_pruned_nparrayweight_sparse_maskweight_mask_paramweight_mask_tensors                   r!   r?    ASPHelper.prune_model_by_programK  s    !====?L,,\:!..0??AE,,\::FF , 7 7

 C N N P!#-!8&>>uzzJ
<F"qUZZ=9% )>(D(D"(()% !!"7?(4(?(?!00<)% -8 &y'?'?

'K&L MP P8
 *;)E)E)G&);)B)B!34::*& '**+=E%%ejj2DE; B< ~~""$$r"   c           	        [         R                  " 5       (       Ga~  [         R                  R                  5       nU R	                  U5      nUR                  5        GH   n	[        R                  XyR                  5      (       d  M*  U	R                  5       n
[        R                  U	R                  5      nU" XX5U	R                  5      u  pUR                  U
R                  5      nU	R                  U5        U(       ab  UR                  R                  U	R                  S5      nUc(   S[        R!                  U	R                  5       S35       eUR                  U5        UR#                  U	R                  U5        GM#     UR$                  R'                  5       $ SnUR                  5        H  n	U	R(                  R*                  nM     Uc   S5       e[        R-                  UUUUUUS9$ )r   Nr   zG variable, please call asp.decorate() to decorate your optimizer first!z6Cannot get paddle.static.Program from Paddle.nn.Layer.r1   )r   r   r   r   r   
parametersr   r   r   numpyr   r   r   	set_valuerp   rF   r   rd   rs   rh   blockprogramr?   )r   rK   layerr2   r3   r4   r5   r   r   r   r   rM   r   r   r   target_programs                   r!   r>   ASPHelper.prune_model_by_layer}  s    !!##!====?L00>H))+00zzJJ%*[[]N!*!B!B5::!NJ@J&1A=) -B,H,H&,,-) OO$9: ,4,>,>,B,B!JJ-)  1< *9+C+CEJJ+O*P Q> >< *334FG))%**6HI3 ,6 >>&&(( "N))+!&!4!4 ,!- H- 33## 4  r"   c                .    U S-   [         R                  -   $ )z
Return mask name by given parameter name :attr:`param_name`.

Args:
    param_name (string): The name of parameter.
Returns:
    string: The mask name of :attr:`param_name`.
.)r   MASK_APPENDDED_NAME)r^   s    r!   r   ASPHelper._get_mask_name  s     C)"?"???r"   c                    / nU R                  5       R                  5        HE  nUR                  R                  S5      n[        R
                  U;  d  M4  UR                  U5        MG     U$ )a  
Get all parameters's Variables in :attr:`main_program` but excluded ASP mask Variables.

Args:
    main_program (Program): Program with model definition and its parameters.
Returns:
    list: A list of parameter Variables in :attr:`main_program` (excluded ASP mask Variables).
r   )r   r   r   splitr   r   append)r   var_listr   param_name_lists       r!   _get_not_ASP_relevant_vars$ASPHelper._get_not_ASP_relevant_vars  s[     !..0??AE#jj..s3O,,OC&	 B
 r"   c                l    XR                   ;  a  [        5       U R                   U'   U R                   U   $ rS   )r   rQ   )r   r   s     r!   r   ASPHelper._get_program_asp_info  s,    ~~-+9+;CNN<(~~l++r"   c                h   UR                  S5      n[        R                  U;   a  gU R                  U5      R                   H
  nXB;   d  M
    g   U[
        ;   a  g[        U5      S:X  a  gUS   nUS   nUSUR                  S5       n[        R                  U;  a  gU[
        ;   d
  U[
        ;   a  gg)ao  
Verify if given :attr:`param_name` is supported by ASP.

Args:
    param_name (string): The name of parameter.
Returns:
    bool: True if it is supported, else False.
Examples:
    .. code-block:: python

        >>> from paddle.incubate.asp import ASPHelper
        >>> paddle.enable_static()

        >>> main_program = paddle.static.Program()
        >>> startup_program = paddle.static.Program()

        >>> with paddle.static.program_guard(main_program, startup_program):
        ...     input_data = paddle.static.data(name='data', shape=[None, 128])
        ...     fc = paddle.static.nn.fc(x=input_data, num_flatten_dims=-1, size=32, activation=None)

        >>> for param in main_program.global_block().all_parameters():
        ...     print(param.name,'->',ASPHelper._is_supported_layer(main_program, param.name))
        fc_0.w_0 -> True
        fc_0.b_0 -> False
r   FTr	   r   N_)	r   r   r   r   rv   r   lenrfindPADDLE_WEIGHT_SUFFIX)r   r   r^   r   r   param_name_no_weight_suffixparam_type_suffix
layer_names           r!   r   ASPHelper._is_supported_layer  s    : %**3/((O;..|<LLE" M << 1$&5a&8#+A.04)//4

 ))1BB (+NN@@r"   c                    [         R                  " US 5      nUR                  S5      S   nUc  [         R                  " US 5      nUc/  US UR                  S5       n[         R                  " U[        5      nU$ )Nr   r   r   )r   rF   r   r   r
   )r   r^   funcr   r   s        r!   r   !ASPHelper._get_prune_func_by_name  s     366z4H&0&6&6s&;A&>#<6::+TD <48-33C8J 7::,D r"   c                   Uc  UR                   R                  nUc  [        R                  R	                  5       nUR                  X$XVS9u  pxU V	s/ s H  oS   PM	     n
n	U R                  X4U
5        U R                  X:5        Xx4$ s  sn	f )a@  
This function is a decorator of `minimize` function in `Optimizer`.
There are three steps:

1. Call :attr:`optimizer`.minimize(:attr:`loss`)
2. Create sparse mask Tensors according to supported layers in :attr:`main_program`.
3. Insert masking ops in the end of parameters update.

*Note*: Please use `ASP.decorate` instead when applying distributed training with `Fleet`.
(Due to there is a invisible graphs optimization in `Fleet.minimize()` which make training graph
cannot be modified anymore.)

Args:
    optimizer (Optimizer): A Optimizer used for training.
    loss (Variable): A Variable containing the value to minimize.
    main_program (Program, optional): Program with model definition and its parameters. Default is `loss.block.program`.
    startup_program (Program, optional): Program for initializing parameters in `parameter_list`. Default is `paddle.static.default_startup_program()`.
    parameter_list (Iterable, optional): Iterable of `Variable` or `Variable.name` to update to minimize `loss`. The default value is None, at this time all parameters will be updated.
    no_grad_set (set, optional): Set of `Variable  or `Variable.name` that don't need to be updated. The default value is None.
Returns:
    list: operators from :attr:`optimizer`.minimize(:attr:`loss`).
    list: pairs of parameters and their gradients.
)no_grad_setr   )r   r   r   r   r   minimizer   _insert_sparse_mask_ops)r   r)   lossr   startup_programparameter_listr   optimizer_opsparams_and_gradspgparams_onlys              r!   	_minimizeASPHelper._minimize3  s    B ::--L"$mmCCEO*3*<*<> += +
' (88'7!u'78""<+N##L>.. 9s   Bc                "   UR                  5         [        R                  R                  5       n[        R                  R
                  R                  5          [        R                  X!R                  5        SSS5        g! , (       d  f       g= fa  
This function is a decorator of `step` function in `Optimizer`.
There are three steps:

1. Call :attr:`optimizer`.step()
2. Mask parameters with sparse masks.

*Note*: Please use `ASP.decorate` instead when applying distributed training with `Fleet`.
(Due to there is a invisible graphs optimization in `Fleet.minimize()` which make training graph
cannot be modified anymore.)

Args:
    optimizer (Optimizer): A Optimizer used for training.
N)
stepr   r   r   rA   dygraphno_gradr   r   r   )r   r)   r   s      r!   _stepASPHelper._stepc  sY    " 	MM668	[[  ((*--44 +**s    B  
Bc                4   U R                  U5      n[        X5         U H  n[        R                  XR                  5      (       d  M)  UR                  UR
                  ;  d  ME  [        R                  " [        R                  UR                  5      UR                  UR                  [        R                  R                  R                  SS9S9nSUl        SUl        UR!                  UR                  U5        M     SSS5        g! , (       d  f       g= f)aT  
Create sparse mask Tensors according to supported layers in :attr:`main_program`.
This function is called in second step of `ASPHelper._minimize`

Args:
    main_program (Program): Program with model definition and its parameters.
    startup_program (Program): Program for initializing parameters.
    params (list): Variable parameters.
g      ?)value)r   shaper   default_initializerTFN)r   r   r   r   r   rp   r   create_parameterr   r   r   r=   initializerConstantstop_gradient	trainabler`   )r   r   r   paramsr   r   
mask_params          r!   r    ASPHelper._create_mask_variables{  s      ,,\:<900zzJJzz););;%+%<%<!*!9!9%**!E"'++"'++06		0E0E0N0N&) 1O 1	&
 48
0/4
, 11%**jI   :99s   (D		D	%BD		
Dc                (   UR                  5       nU R                  U5      nU Hk  nUR                  UR                  ;   d  M  UR	                  SXTR                  UR                     S.SU0SS[
        [        [        R                  5      0S9  Mm     g)z
Insert masking ops in the end of parameters update.
This function is called in third step of `ASPHelper._minimize`

Args:
    main_program (Program): Program with model definition and its parameters.
    params (list): Variable parameters.
elementwise_mul)XYOutaxis)rI   inputsoutputsattrsN)	r   r   r   rp   	append_opOP_ROLE_KEYrC   OpRoleOptimize)r   r   r   r   r   r   s         r!   r   !ASPHelper._insert_sparse_mask_ops  s     ))+,,\:EzzX///*!&-?-?

-KL"EN#S%9	    r"   r   )r   r   r   r   r|   r}   rS   r   Program | Noner|   r}   r)   r   r|   r&   )rK   r   r   r  r2   rC   r3   rC   r4   r   r5   boolr|   r   )rK   r   r   r   r2   rC   r3   rC   r4   r   r5   r  r|   r   )r^   r~   r|   r~   )r   r   r|   zlist[Tensor])r   r   r|   rQ   )r   r   r^   r~   r|   r  )r^   r~   r|   z`Callable[[npt.NDArray[Any], int, int, MaskAlgo, str], tuple[npt.NDArray[Any], npt.NDArray[Any]]])NNNN)r)   r   r   r   r   r  r   r  r   'Iterable[Tensor] | Iterable[str] | Noner   set[Tensor] | set[str] | Noner|   2tuple[list[Operator], list[tuple[Tensor, Tensor]]]r)   r   r|   r}   )r   r   r   r   r   Sequence[Tensor]r|   r}   )r   r   r   r  r|   r}   )r   r   r   r   r   r   r   r   classmethodr    r%   staticmethodr(   r   r9   r?   r>   r   r   r   r   r   r   r   r   r   r   r   r   r"   r!   r   r     s    %J5#53:5	5 5 L L 9 9   (,&../%/% %/% 	/%
 /% /% /% 
%/% /%b 
 &..<< < 	<
 < < < 
%< <| 	@ 	@  " , ,
 :":03:	: :x 
 * 
 (,*.BF59-/-/ -/ %	-/
 (-/ @-/ 3-/ 
<-/ -/^   , JJ !J !	J
 
J J@ ",<	 r"   r   c                      \ rS rSrSrSS jrSS jr   S         SS jjr\SS j5       r	\SS j5       r
\SS	 j5       rS
rg)r&   i  a  
OptimizerWithSparsityGuarantee is a wrapper to decorate `minimize` function of given optimizer by `_minimize` of ASPHelper.
The decorated `minimize` function would do three things (exactly same as `ASPHelper._minimize`):
1. Call `minimize` function of given optimizer.
2. Call `ASPHelper._create_mask_variables` to create mask Variables.
3. Call `ASPHelper._insert_sparse_mask_ops` to insert weight masking ops in the end of `loss`'s Program.
c                    Xl         g rS   
_optimizer)rX   r)   s     r!   rY   'OptimizerWithSparsityGuarantee.__init__  s    #r"   c                .    [        U R                  U5      $ rS   )getattrr  )rX   items     r!   __getattr__*OptimizerWithSparsityGuarantee.__getattr__  s    t--r"   Nc                D    [         R                  U R                  UUUUS9$ )a  
This function is to call `ASPHelper.minimize()` and return its return

Args:
    loss (Variable): A Variable containing the value to minimize.
    startup_program (Program, optional): Program for initializing parameters in `parameter_list`. Default is `paddle.static.default_startup_program()`.
    parameter_list (Iterable, optional): Iterable of `Variable` or `Variable.name` to update to minimize `loss`. The default value is None, at this time all parameters will be updated.
    no_grad_set (set, optional): Set of `Variable  or `Variable.name` that don't need to be updated. The default value is None.
Returns:
    list: operators from :attr:`optimizer`.minimize(:attr:`loss`).
    list: pairs of parameters and their gradients.
)r   r   r   )r   r   r  )rX   r   r   r   r   s        r!   r   'OptimizerWithSparsityGuarantee.minimize  s/    & ""OO+)# # 
 	
r"   c                B    [         R                  U R                  5        gr   )r   r   r  rW   s    r!   r   #OptimizerWithSparsityGuarantee.step  s      	(r"   c                .   U R                   R                  5       n[        R                  [        R
                  R                  5       5      nUR                  R                  5        H+  u  p4UR                  [        R                  U5      U05        M-     U$ )z
This function is a decorator of `state_dict` function in `Optimizer`.

Returns:
    state_dict(dict) : dict contains all the Tensor used by optimizer
)r  
state_dictr   r   r   r   r   rp   itemsupdater   )rX   r$  r   r^   r_   s        r!   r$  )OptimizerWithSparsityGuarantee.state_dict  sw     __//1
22MM..0
  (11779OJy77
CSIJ  :r"   c                   [         R                  [        R                  R	                  5       5      nUR
                  R                  5        H\  u  p4[         R                  U5      nXQ;   d   SU S35       eUR                  X   5        UR                  X4R                  5       5        M^     U R                  R                  U5      $ )z
This function is a decorator of `set_state_dict` function in `Optimizer`.
Args:
    state_dict(dict) : Dict contains all the Tensor needed by optimizer
Return:
    None
zThe z is not found.)r   r   r   r   r   rp   r%  r   r   rd   r   r  set_state_dict)rX   r$  r   r^   r_   param_mask_names         r!   r)  -OptimizerWithSparsityGuarantee.set_state_dict  s     22MM..0
  (11779OJ'66zBO"0 '~60 MM*56!!*iik:  : --j99r"   r  r  )r  r~   r|   r   )NNN)
r   r   r   r  r   r  r   r  r|   r  r{   r   )r$  r   r|   r}   )r   r   r   r   r   rY   r  r   r   r   r$  r)  r   r   r"   r!   r&   r&     s    $. +/BF59

 (
 @	

 3
 
<
6 ) )"   : :r"   rS   )r   r   r   r  r|   r}   r
  r  )r   r   r+   T)rJ   zProgram | Layerr2   rC   r3   rC   r4   z4Literal['mask_1d', 'mask_2d_greedy', 'mask_2d_best']r5   r  r|   r   )3r   
__future__r   rh   rD   typingr   r   r   r   paddle.baser   r   r   paddle.base.frameworkr   supported_layer_listr
   r   utilsr   op_proto_and_checker_makerr  kOpRoleAttrNamer  collections.abcr   r   r   r   r   numpy.typingnptr   paddle._typingr   	paddle.nnr   paddle.optimizerr   paddle.staticr   r   __all__r    r%   r(   rO   rQ   r   r&   r   r"   r!   <module>r<     s'   #  	     9 9 . 		(	(	/	/--==?2--(*/
 <@VV*8V	VrV?rS)p FOpp
p p D	p
 p pf#& #&L\ \~_: _:r"   