
    {-jKF                       d dl Z d dlZd dlZd dlZ	 d dlmZ n#  ddlmZ Y nxY wd dlmZm	Z	 ddl
mZ ddlmZ ddlmZ d	d
lmZ ddl
mZ ddlmZ ddlmZ ddlmZmZmZmZ ddlmZmZmZmZmZmZm Z   ee!e j"        d          Z#d Z$d Z%d Z&	 ddZ' G d d          Z( G d de(          Z) G d d          Z*dS )    N)tqdm   )IrGraph_get_var   )static)core)unique_name   )
get_logger)utils)run_adaround)cal_kl_threshold)SUPPORT_QUANTIZATION_OP_DICTARMCPUQuantizerBaseQuantizerTensorRTQuantizer)AddQuantDequantForInferencePassAddQuantDequantPassAddQuantDequantPassV2QuantizationFreezePassQuantizationTransformPassQuantizationTransformPassV2QuantWeightPassz&%(asctime)s-%(levelname)s: %(message)s)fmtc                 z    g }|                                  D ]#}|j        r|                    |j                   $|S N)	list_varspersistableappendname)programpersistable_var_namesvars      u/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/paddle/static/quantization/post_training_quantization.py_all_persistable_var_namesr&   8   sJ      "" 3 3? 	3!((222      c                 x   t                      |                                 }|D ]@}|j        D ]}                    |           |j        D ]}                    |           Ad D             t          t          fd|                                                     }|                     |           | S )Nc                     h | ]	}|j         
S  node).0ns     r%   	<setcomp>z+_remove_unused_var_nodes.<locals>.<setcomp>I   s    333QV333r'   c                     | j         vS r   r+   )r,   all_used_varss    r%   <lambda>z*_remove_unused_var_nodes.<locals>.<lambda>L   s    -7 r'   )setall_op_nodesinputsaddoutputsfilterall_var_nodessafe_remove_nodes)graphopsop_node
input_nodeoutput_nodeall_unused_varsr1   s         @r%   _remove_unused_var_nodesrA   @   s    EEM




C + +!. 	* 	*Jj))))"? 	+ 	+Kk****	+ 43]333M77779L9L9N9N	
 	
 O
 
O,,,Lr'   c                     t                      }|                                 D ]+}|                                r|                    |           ,|                     |           | S r   )r3   r9   is_ctrl_varr6   r:   )r;   remove_ctr_varsr,   s      r%   _remove_ctrl_varsrE   S   sh    eeO##%% & & 	&%%%	O,,,Lr'   Fc                    t          j        |          }|j        }|                    d          s|                    d|            |rX|r t          |          t          |          k    s
J d            t          ||          D ]\  }}	|                    ||	           |                    |           |r,|	                    dd| |
                                           t          |           |S )N__param_scope__z5Different number of pass attributes and their values..	qat_fp32_)r	   get_passr;   hasset_not_ownedlenzipr3   applydrawr4   rA   )
scoper;   	pass_nameattrsattr_valuesdebugir_pass	cpp_graphattrvalues
             r%   _apply_passrZ   \   s    mI&&GI==*++ : 15999 % 	
s5zzS-=-====C  >== uk22 	% 	%KD%KKe$$$$MM) G

3/I//1C1C1E1EFFFU###Lr'   c                       e Zd ZdZddddddddddg ddddd	d	d
dddddddddddfdZd Zd Z	 d&dZd Zd Z	d Z
d Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd  Zd! Zd" Zd# Zd$ Zd% ZdS )'PostTrainingQuantizationz
    Utilizing post training quantization method to quantize the FP32 model,
    and it uses calibrate data to get the quantization information for all
    quantized variables.
    N
   KLwJ?roundMbP?F   range_abs_maxchannel_wise_abs_maxTc                  t   g d| _         ddg| _        g d| _        |dv sJ || _        || _        dg| _        |
J d            |
J d	            |	d
k    s
J d            || j        v s
J d            || j         v sJ d| d| j          d            || j        v sJ d| d| j         d            || _        || _        |t          j	                    n|| _
        || _        || _        || _        || _        || _        |	| _        |
| _        || _        || _        || _        || _        || _        || _        || _        | j        rdnd| _        || _        || _        | j        j        | _        d| _        d| _        d| _         || _!        tE                      | _#        tE                      | _$        i | _%        i | _&        i | _'        i | _(        i | _)        d| _*        i | _+        i | _,        i | _-        i | _.        i | _/        tE                      | _0        || _1        || _2        || _3        || _4        d| _5        | j        d| _5        || _6        |r!to          tq          j9                              }n6|r4|D ]1} | to          tq          j9                              v sJ | dz               2||k    s
J d            g d}!|stu          ||          | _;        dS |<                                dk    rt{          ||          | _;        dS |<                                dk    rt}          ||          | _;        dS d| d|! dsJ dS )a"  
        Constructor.

        Args:
            executor(static.Executor): The executor to load, run and save the
                quantized model.
            scope(static.Scope, optional): The scope of the program, use it to load
                and save variables. If scope=None, get scope by static.global_scope().
            model_dir(str): The path of the fp32 model that will be quantized,
                and the model and params files are under the path.
            model_filename(str, optional): The name of file to load the inference
                program. If it is None, the default filename '__model__' will
                be used. Default is 'None'.
            params_filename(str, optional): The name of file to load all parameters.
                When all parameters were saved in a single binary file, set it
                as the real filename. If parameters were saved in separate files,
                set it as 'None'. Default is 'None'.
            batch_generator(Python Generator, deprecated): The batch generator provides
                calibrate data for DataLoader, and it returns a batch every
                time. Note that, sample_generator and batch_generator, only one
                should be set. Besides, batch_generator supports lod tensor.
            sample_generator(Python Generator, deprecated): The sample generator provides
                calibrate data for DataLoader, and it only returns a sample every
                time. Note that, sample_generator and batch_generator, only one
                should be set. Besides, sample_generator dose not support lod tensor.
            data_loader(Paddle.io.DataLoader): The
                Dataloader provides calibrate data, and it could
                return a batch every time.
            batch_size(int, optional): The batch size of DataLoader. Default is 10.
            batch_nums(int, optional): If batch_nums is not None, the number of
                calibrate data is batch_size*batch_nums. If batch_nums is None, use
                all data provided by sample_generator as calibrate data.
            algo(str, optional): If algo='KL', use KL-divergence method to
                get the KL threshold for quantized activations and get the abs_max
                value for quantized weights. If algo='abs_max', get the abs max
                value for activations and weights. If algo= 'min_max', get the min
                and max value for quantized activations and weights. If algo='avg',
                get the average value among the max values for activations. If
                algo= 'hist', get the value of 'hist_percent' quantile as the threshold.
                If algo='mse', get the value which makes the quantization mse loss
                minimal. Default is KL.
            hist_percent(float, optional): The threshold of algo 'hist' for activations.
                Default is 0.99999.
            quantizable_op_type(list[str], optional): List the type of ops
                that will be quantized. Default is []. If quantizable_op_type is [],
                it will use the default quantization op type of the qunat config in
                the current deploy_backend.
            round_type(str, optional): The method of converting the quantized weights
                value float->int. Currently supports ['round', 'adaround'] methods.
                Default is `round`, which is rounding nearest to the integer.
                'adaround' is refer to https://arxiv.org/abs/2004.10568.
            learning_rate(float, optional): The learning rate of adaround method.
            is_full_quantized(bool, optional): If set is_full_quantized as True,
                apply quantization to all supported quantizable op type. If set
                is_full_quantized as False, it will apply quantization to the op type
                according to the input quantizable_op_type or quant config of deploy_backend.
            bias_correction(bool, optional): If set as True, use the bias correction
                method of https://arxiv.org/abs/1810.05723. Default is False.
            activation_bits(int): quantization bit number for activation.
            weight_bits(int, optional): quantization bit number for weights.
            activation_quantize_type(str): quantization type for activation,
                now support 'range_abs_max', 'moving_average_abs_max' and 'abs_max'.
                This param only specifies the fake ops in saving quantized model.
                If it is 'range_abs_max' or 'moving_average_abs_max', we save the scale
                obtained by post training quantization in fake ops. Note that, if it
                is 'abs_max', the scale will not be saved in fake ops.
            weight_quantize_type(str): quantization type for weights,
                support 'abs_max' and 'channel_wise_abs_max'. This param only specifies
                the fake ops in saving quantized model, and we save the scale obtained
                by post training quantization in fake ops. Compared to 'abs_max',
                the model accuracy is usually higher when it is 'channel_wise_abs_max'.
            onnx_format(bool): Whether to export the quantized model with format of ONNX.
                Default is False.
            freeze_model(bool): Whether to convert quantized and trained ``program`` to final
                quantized ``program``. Default: True.
            skip_tensor_list(list): List of skip quant tensor name. Default: None.
            same_scale_tensor_list(list(list)): The list of tensor keep same scale in the outermost
                list, the final scale about every list is the max of the scale in the list
                of tensor. Default: None.
            optimize_model(bool, optional): If set optimize_model as True, it applies
                some passes to the model before quantization, and it supports
                `conv2d/depthwise_conv2d + bn` pass so far. Some targets require the
                weights are quantized by tensor-wise method, which means the weights
                scale for all channel are the same. However, if fuse
                `conv2d/depthwise_conv2d + bn`, the weights scale for all channel will
                be different. In address this problem, fuse the pattern before
                quantization. Default False.
            is_use_cache_file(bool, optional): This param is deprecated.
            cache_dir(str, optional): This param is deprecated.
            deploy_backend(str, optional): Deploy backend, it can be None, `TensorRT`,
                `MKLDNN`, `ARM`. And it will extend the new backend. Default is None,
                which means to use the default general quantization configuration.
        Returns:
            None

        Examples:
            .. code-block:: python

                >>> # doctest: +SKIP("There are some example variables in the code.")
                >>> import paddle.static as static
                >>> from paddle.static.quantization import PostTrainingQuantization

                >>> exe = static.Executor(paddle.CPUPlace())
                >>> model_dir = "path/to/fp32_model_params"
                >>> # set model_filename as None when the filename is __model__,
                >>> # otherwise set it as the real filename
                >>> model_filename = None
                >>> # set params_filename as None when all parameters were saved in
                >>> # separate files, otherwise set it as the real filename
                >>> params_filename = None
                >>> save_model_path = "path/to/save_model_path"
                >>> # prepare the sample generator according to the model, and the
                >>> # sample generator must return a sample every time. The reference
                >>> # document: https://www.paddlepaddle.org.cn/documentation/docs/zh
                >>> # /user_guides/howto/prepare_data/use_py_reader.html
                >>> data_loader = your_data_loader
                >>> batch_size = 10
                >>> batch_nums = 10
                >>> algo = "KL"
                >>> quantizable_op_type = ["conv2d", "depthwise_conv2d", "mul"]
                >>> ptq = PostTrainingQuantization(
                ...     executor=exe,
                ...     sample_generator=None,
                ...     data_loader=data_loader,
                ...     model_dir=model_dir,
                ...     model_filename=model_filename,
                ...     params_filename=params_filename,
                ...     batch_size=batch_size,
                ...     batch_nums=batch_nums,
                ...     algo=algo,
                ...     quantizable_op_type=quantizable_op_type
                ... )
                >>> ptq.quantize()
                >>> ptq.save_quantized_model(save_model_path)
        )rc   moving_average_abs_maxabs_maxrg   rd   )r^   histavgmseemdrg   min_maxptf)adaroundr`   lstmNzThe executor cannot be None.zdata_loader cannot be None.r   z(The batch_size should be greater than 0.z?The algo should be KL, hist, mse, avg, abs_max, min_max or ptf.zThe activation_quantize_type (z) should in (z).zThe weight_quantize_type (TFi   # is not supported for quantization.zPactivation_bits and weight_bits must be the same, other cases are not supported.)Ntensorrtmkldnnonednnarm)quantizable_op_type
quant_bitsrq   rt   zDeploy Backend z# not support, please choose one of rH   )?!_support_activation_quantize_type_support_weight_quantize_type_support_algo_type_round_type_learning_rate_dynamic_quantize_op_type_bias_correction	_executorr   global_scope_scope
_model_dir_model_filename_params_filename_sample_generator_batch_generator_batch_size_batch_nums_algo_hist_percent_activation_bits_weight_bits_activation_quantize_type_weight_quantize_type_onnx_format_clip_extra_skip_tensor_list_optimize_modelplace_place_program
_feed_list_fetch_list_data_loaderr3   _quantized_weight_var_name_quantized_act_var_name_weight_op_pairs_sampling_act_abs_min_max_sampling_act_histogram_sampling_data_quantized_var_threshold_histogram_bins_quantized_var_min_quantized_var_max_quantized_var_avg_best_calibration_loss_quantized_threshold_zero_size_var_names_same_scale_tensor_list_freeze_model_scale_dict_return_graphFLAG_is_full_quantizelistr   keysr   quant_configlowerr   r   )"selfexecutor	model_dirrQ   model_filenameparams_filenamebatch_generatorsample_generatordata_loader
batch_size
batch_numsalgohist_percentru   
round_typelearning_rateis_full_quantizebias_correctionactivation_bitsweight_bitsactivation_quantize_typeweight_quantize_typeonnx_formatfreeze_modeloptimize_modelis_use_cache_fileskip_tensor_listsame_scale_tensor_list	cache_dir
scale_dictreturn_graphdeploy_backendop_typesupport_deploy_backends"                                     r%   __init__z!PostTrainingQuantization.__init__w   sK   T2
 2
 2
.
 /89O-P*	#
 	#
 	#
 22222%+*0& ##%C###&&(E&&&A~~~I~~~t....M /.. %(NNNN~-E~~TXTz~~~ ONN $t'IIIIr)=rrDLnrrr JII
 !0!/4}f)+++%#- /!1 /%%
) /')A&%9"'#'#4?44%!1- n*'*-%%''*uu$ ")+&')$ (*%#"$"$"$&(#$&! %(EE!'=$)%)	=$DI!1 	"&'C'H'J'J"K"K  	.  $'C'H'J'J"K"KKKKCC LKKK +---^ .-- "O!N!N 	r -$7&! ! !D !!##z11 1$7&! ! !D !!##u,, /$7&! ! !D
 r^qqXnqqqqqqqqr'   c           	                                                                                                                  j        dv rd}t	           j        dd          5 }                                 D ]q} j                             j	        | j
        d j                                                     |dz  }|                                  j        r| j        k    r nrd	d	d	           n# 1 swxY w Y                                     d}t	           j        d
d          5 }                                 D ]q} j                             j	        | j
        d j                                                     |dz  }|                                  j        r| j        k    r nrd	d	d	           n# 1 swxY w Y    j        dk    rM j        D ]E}| j        vrt%          j         j        |                                                    j        |<   F j        dv r                                   j        dk    r                                                                     j        dk    r                                  n                                   j        s                                  t=           fd j        D                       r                       j                   tC          j"         j	                    j#        s j	        S tI          tK          j&         j	        j'                  d          }|S )a7  
        Load the FP32 model, and use the calibrate data to calculate the forward-stage.
        Based on the sample data, we can get the quantization information, and obtain
        the final quantized model.

        Args:
            None
        Returns:
            the program of quantized model.
        r^   rh   r   z8Preparation stage, Run batch:|{bar}| {n_fmt}/{total_fmt}P   )total
bar_formatncolsF)r"   feed
fetch_listreturn_numpyrQ   r   Nz5Sampling stage, Run batch:|{bar}| {n_fmt}/{total_fmt}ri   rn   rl   c              3   4   K   | ]}|j         j        v V  d S r   )r    activation_quant_operation_types)r-   r   r   s     r%   	<genexpr>z4PostTrainingQuantization.quantize.<locals>.<genexpr>  sB       
 
 t(II
 
 
 
 
 
r'   Tfor_test)(_load_model_data_collect_target_varnames_set_activation_persistabler   r   r   r   r~   runr   r   r   _collect_activation_abs_min_maxupdate_init_sampling_act_histogram	_samplingr   r   nparraymeanr   _calculate_kl_hist_thresholdrz   _adaround_apply_reset_activation_persistable_save_input_threshold_update_programr   _save_output_thresholdanyr|   &_collect_dynamic_quantize_op_thresholdr   $move_persistable_var_to_global_blockr   r   r	   Graphdesc)r   batch_idtdatavar_name
main_graphs   `     r%   quantizez!PostTrainingQuantization.quantize  s4    	%%'''((***:''H&U     --//  DN&& $!#'#3%*"k '    88:::MHHHJJJ' H8H,H,H#              $ --///"N
 
 
 	 ))++  "" M#/!&+ #       A


# D4D(D(DE#	 	 	 	 	 	 	 	 	 	 	 	 	 	 	& : 8  4#:::68h+H57 7$&& )(33 :''--///z))  """**,,,:""&&((((  """ y 	*''))) 
 
 
 
9
 
 
 
 
 	 77.   	24=AAA! 	=  DM,>!?!?$OOOJs&    BC33C7:C7+BF>>GGc                    | j         dk    s
J d            | j         dv r| j        }n| j        }t          | j        | j        | j        | j        | j        | j	        | j
        | j        || j        | j        | j                   d S )Nrl   zThe algo should not be min_max.r   )num_iterationsr   lr)r   r   r   r   r   r   r   r~   r   r   _quantized_op_pairsr   r   r}   r{   )r   r   s     r%   r   z(PostTrainingQuantization._adaround_apply  s    zY&&&(I&&&:''6JJ2JMNKK$!+ 1"	
 	
 	
 	
 	
 	
r'   c                 z    d}|d}n4|                     d          r|                    dd          d         }n|}t          j                            ||          } fd j        D             }t          j        || j         j	         j
         j                   t                              d	|z              dS )
a  
        Save the quantized model to the disk.

        Args:
            save_model_path(str): The path to save the quantized model.
            model_filename(str, optional): If the model_filename is None,
                save the model to 'model.pdmodel' and 'model.pdiparams'. Otherwise, save the model to 'model_name.pdmodel' and
                'model_name.pdiparams". Default: None.
        Returns:
            None
        Nmodel.pdmodelrH   r   r   c                 h    g | ].}j                                                             |          /S r*   )r   global_blockr$   )r-   r!   r   s     r%   
<listcomp>zAPostTrainingQuantization.save_quantized_model.<locals>.<listcomp>-  sB     
 
 
7;DM&&((,,T22
 
 
r'   )r   r"   
clip_extraz The quantized model is saved in )endswithrsplitospathjoinr   r   save_inference_modelr   r~   r   r   _loggerinfo)r   save_model_pathr   r   
model_namepath_prefix	feed_varss   `      r%   save_quantized_modelz-PostTrainingQuantization.save_quantized_model  s     
! JJ$$Z00 	('..sA66q9JJ'Jgll?J??
 
 
 
?C
 
 
	 	#^M'	
 	
 	
 	
 	7/IJJJJJr'   c                 r     j         Yt                              d           t          j         j         j         j         j                  \   _          _	         _
         j        r                                   fd j	        D             } j        r j        nt           j                   _        dS )z1
        Load model and set data loader.
        Nz"Load model and set data loader ...r   r   r   c                 T    g | ]$}t          t          |          j                  %S r*   )r   strr   )r-   r   r   s     r%   r  z=PostTrainingQuantization._load_model_data.<locals>.<listcomp>N  s;     
 
 
 S]]DM22
 
 
r'   )r   r  r  r   load_inference_modelr   r~   r   r   r   r   r   _optimize_fp32_modelr   rM   r   )r   r  s   ` r%   r   z)PostTrainingQuantization._load_model_data:  s     = LL=>>>
 +#3 $ 5	  	   	(%%'''
 
 
 
 O
 
 
	 !% 0LDc$:K6L6L 	r'   c                    t                               d           t          t          j        | j        j                  d          }t          |          }t          | j	        |d          }t          | j	        |d          }t          | j	        |d          }t          | j	        |d          }t          | j	        |d          }|
                                | _        d	S )
zH
        Fuse the `conv2d/depthwise_conv2d + bn` in FP32 model.
        zOptimize FP32 model ...Tr   conv_bn_fuse_passdepthwise_conv_bn_fuse_passconv_transpose_bn_fuse_passconv_eltwiseadd_bn_fuse_pass&depthwise_conv_eltwiseadd_bn_fuse_passN)r  r  r   r	   r   r   r   rE   rZ   r   
to_program)r   r;   s     r%   r  z-PostTrainingQuantization._optimize_fp32_modelW  s     	.///
4=#566FFF!%((DK0CDDDK0MNNDK0MNNDK0NOOK H
 
 ((**r'   c                     t                               d           i  _         fd}t           j                  }t          t           j        j                            D ]} j        j        |         j        D ]p} j	        6t          j        |          D ]!}| j	        v r|                    dd           "|j        }|dk    r|                    d          d         } j        j        |         j        D ]a}t          j        |          }	||	v rGt          j        |          D ]2}
|
|vr,|                    dd           |                    dd           3b j        r?|t#          t%          j                              vrt                               |d	z              g }|D ]}d
|v r|                    |           |dk    o7t          j        |          d         |v ot          j        |          d         |v }| j        j        v s| j        j        v s|r|dk    o|                    d          }|r|dz   n|} |t          j        |          ||            |t          j        |          ||           t          j        |          D ]'}t          j        |          D ]}||v r
| j        |<   (C| j        j        v r |t          j        |          ||           rdS )zr
        Collect the variable names for sampling, and set activation
        variables to be persistable.
        z$Collect quantized variable names ...c                     | D ]E}||v r%j                             |           |j        |<   +j                            |           Fd S r   )r   r6   r   r   )var_name_listr#   r   r   r   s       r%   collect_var_namezKPostTrainingQuantization._collect_target_varnames.<locals>.collect_var_nameq  si    ) ? ?444377AAA6=D)(33044X>>>>? ?r'   Nop_namescope
skip_quantconv2d_transposeFilterr   rp   conv1d
unsqueeze2	matmul_v2trans_y_trans_y)r  r  r   r&   r   rangerM   blocksr<   r   r   _get_op_input_var_names	_set_attrtypeinput_get_op_output_var_namesr   r   r   r   warningr    r   weight_quant_operation_typesr   rX   observer_operation_types)r   r#  r#   block_idopinp_namer   in_name_opr   r!   conv1d_persistable_var_namesopnameis_conv1d_quantr+  out_var_namein_var_names   `                r%   r   z1PostTrainingQuantization._collect_target_varnamesh  s    	;<<<#% 	? 	? 	? 	? 	? !;4= I Ic$-"67788 I	 I	Hm*848 H H)5$)$A"$E$E G G#t'===LLFFF'000 hhx003G#}3H=A P P#(#A##F#F"h..(-(Ec(J(J P P#'/D#D#D$&LL$N$N$N$'MM.,$O$O$O) gT0577> > / / OO"GG   02,3 D DF6))4;;FCCC , 5b99!<78 5b99!<78   t0MMM(IJ J&J  '+5M2779;M;MG6=Jg
227G$$5b99-  
 %$6r::-   ).(Fr(J(J " "+0+H+L+L " "K*.CCC$0 !% 8 E""  1 JJJ$$6r::-  IHI	 I	r'   c                 h    | j                                         D ]}|j        | j        v rd|_        dS )zr
        Set activation variables to be persistable, so can obtain
        the tensor data in sample_data
        TN)r   r   r!   r   r   r   r$   s     r%   r   z4PostTrainingQuantization._set_activation_persistable  sC    
 =**,, 	' 	'Cx4777"&	' 	'r'   c                     | j                                         D ]Z}|j        | j        v rJd|_        | j                            |j                                                                                   [dS )z:
        Reset activations to be not persistable.
        FN)	r   r   r!   r   r   r   find_var
get_tensor_clearrB  s     r%   r   z6PostTrainingQuantization._reset_activation_persistable  sw     =**,, 	E 	ECx4777"'$$SX..99;;BBDDD	E 	Er'   c                    | j         dk    r|                                  dS | j         dk    r|                                  dS | j         dk    r|                                  dS | j         dk    r|                                  dS | j         dk    r|                                  dS | j         dk    r|                                  dS | j         dv r|                                  dS dS )	zO
        Sample the min/max, abs_max or histogram in every iterations.
        rg   ri   rl   rj   rk   rm   r   N)r   _sample_abs_max_sample_avg_sample_min_max_sample_mse_sample_emd_sample_ptf_sample_histogram)r   s    r%   r   z"PostTrainingQuantization._sampling  s    :""  """""Z5  Z9$$  """""Z5  Z5  Z5  Z>))""$$$$$ *)r'   c                 ^   | j         i k    rl| j        D ]c}t          j        | j        |          }| j        dk    r4t          t          j        t          j	        |                              }n| j        dk    rg }| j
        |         t          j        v rnt          |j        d                   D ]R}|                    t          t          j        t          j	        |d d |f                                                  Snit          |j        d                   D ]N}|                    t          t          j        t          j	        ||                                                  O|| j         |<   et                              d           | j        D ]}t          j        | j        |          }|j        dk    r| j                            |           C|                                }t          t          j        t          j	        |                              }|dk    rdn|}d}|| j        vrt          d	          | j        |<   |d
k    r||z  }|dz  }d| j        dz
  z  dz
  }| j        r;t          j        t          j        ||z  |z            | dz
  |          }||z  |z  }	n4t          j        t          j        |d|          |z  |z            |z  |z  }	||	z
  dz                                  }
|
| j        |         k    r|
| j        |<   || j         |<   |d
k    Րd S )Nrg   rd   r   r   zMSE searching stage ...        :0yE>333333?inf      ?{Gz?r   )r   r   r   load_variable_datar   r   floatr   maxabsr   _channelwise_quant_axis1_opsr-  shaper    r  r  r   sizer   r6   flattenr   r   r   clipr`   r   )r   r   
var_tensorabs_max_valueisscalebins	quant_varquant_dequant_varmse_losss              r%   rK  z$PostTrainingQuantization._sample_mse  sl   $** ; D D"5dk8LL
-::$)"&
1C1C*D*D$E$EMM/3III$&M-h7 => > "'z'7':!;!;  A)00 %bfRVJqqq!t4D-E-E&F&F G G   
 "'z'7':!;!;  A)00 %bfRVJqM-B-B&C&C D D    7D)(33.///4 	@ 	@H1$+xHHJ!##)--h777#++--J!"&
););"<"<==M$1S$8$8DDmMAt:::8=e+H5s((M)T	T2Q67!;$ 
 "e!3d!:;;dUQY! !I )2D(85(@%% S%!@!@5!H4!OPP  &
 (*;;AGGIIt:8DDD<DD/9:?D-h7% s((	@ 	@r'   c                    | j         i k    rl| j        D ]c}t          j        | j        |          }| j        dk    r4t          t          j        t          j	        |                              }n| j        dk    rg }| j
        |         t          j        v rnt          |j        d                   D ]R}|                    t          t          j        t          j	        |d d |f                                                  Snit          |j        d                   D ]N}|                    t          t          j        t          j	        ||                                                  O|| j         |<   et                              d           | j        D ]}t          j        | j        |          }|j        dk    r| j                            |           C|                                }t          t          j        t          j	        |                              }|dk    rdn|}d}|| j        vrt          d	          | j        |<   |d
k    r3||z  }|dz  }d| j        dz
  z  dz
  }| j        r;t          j        t          j        ||z  |z            | dz
  |          }||z  |z  }	n4t          j        t          j        |d|          |z  |z            |z  |z  }	t          j	        t          j        |          t          j        |	          z
            t          j	        t          j        |          t          j        |	          z
            z   }
|
| j        |         k    r|
| j        |<   || j         |<   |d
k    3d S )Nrg   rd   r   r   zEMD searching stage ...rP  rQ  rR  rS  rT  rU  r   )r   r   r   rV  r   r   rW  r   rX  rY  r   rZ  r-  r[  r    r  r  r   r\  r   r6   r]  r   r   r   r^  r`   r   std)r   r   r_  r`  ra  rb  rc  rd  re  rf  emd_losss              r%   rL  z$PostTrainingQuantization._sample_emd   s   $** ; D D"5dk8LL
-::$)"&
1C1C*D*D$E$EMM/3III$&M-h7 => > "'z'7':!;!;  A)00 %bfRVJqqq!t4D-E-E&F&F G G   
 "'z'7':!;!;  A)00 %bfRVJqM-B-B&C&C D D    7D)(33.///4 	@ 	@H1$+xHHJ!##)--h777#++--J!"&
););"<"<==M$1S$8$8DDmMAt:::8=e+H5s((M)T	T2Q67!;$ 
 "e!3d!:;;dUQY! !I )2D(85(@%% S%!@!@5!H4!OPP  &
 6GJ''"'2C*D*DD F26*--7H0I0IIJJK t:8DDD<DD/9:?D-h7) s((	@ 	@r'   c                 &   | j         i k    rl| j        D ]c}t          j        | j        |          }| j        dk    r4t          t          j        t          j	        |                              }n| j        dk    rg }| j
        |         t          j        v rnt          |j        d                   D ]R}|                    t          t          j        t          j	        |d d |f                                                  Snit          |j        d                   D ]N}|                    t          t          j        t          j	        ||                                                  O|| j         |<   e| j        D ]}t          j        | j        |          }|j        dk    r| j                            |           Ct          t          j        t          j	        |                              }|| j        vr
g | j        |<   t          t          j        t          j        t          j	        |                    |j        d         d                    d                              }| j        |                             |           d S )Nrg   rd   r   r   )axis)r   r   r   rV  r   r   rW  r   rX  rY  r   rZ  r-  r[  r    r   r\  r   r6   r   r   reshape)r   r   r_  r`  ra  abs_avg_values         r%   rI  z$PostTrainingQuantization._sample_avgX  s   $** ; D D"5dk8LL
-::$)"&
1C1C*D*D$E$EMM/3III$&M-h7 => > "'z'7':!;!;  A)00 %bfRVJqqq!t4D-E-E&F&F G G   
 "'z'7':!;!;  A)00 %bfRVJqM-B-B&C&C D D    7D)(334 	D 	DH1$+xHHJ!##)--h777!"&
););"<"<==Mt66646'1!Fz11*2B12ErJJKK    M #H-44]CCCC!	D 	Dr'   c                 8   | j         i k    rl| j        D ]c}t          j        | j        |          }| j        dk    r4t          t          j        t          j	        |                              }n| j        dk    rg }| j
        |         t          j        v rnt          |j        d                   D ]R}|                    t          t          j        t          j	        |d d |f                                                  Snit          |j        d                   D ]N}|                    t          t          j        t          j	        ||                                                  O|| j         |<   e| j        D ]}t          j        | j        |          }|j        dk    r| j                            |           Bt          t          j        t          j	        |                              }|| j         vs|| j         |         k    r
|| j         |<   d S Nrg   rd   r   r   )r   r   r   rV  r   r   rW  r   rX  rY  r   rZ  r-  r[  r    r   r\  r   r6   )r   r   r_  r`  ra  s        r%   rH  z(PostTrainingQuantization._sample_abs_max  s&   $** ; D D"5dk8LL
-::$)"&
1C1C*D*D$E$EMM/3III$&M-h7 => > "'z'7':!;!;  A)00 %bfRVJqqq!t4D-E-E&F&F G G   
 "'z'7':!;!;  A)00 %bfRVJqM-B-B&C&C D D    7D)(334 		D 		DH1$+xHHJ!##)--h777!"&
););"<"<==M 999 9( CCC6C)(3		D 		Dr'   c                    | j         i k    r| j        i k    r| j        D ]}t          j        | j        |          }| j        dk    rDt          t          j	        |                    }t          t          j
        |                    }nT| j        dk    rHg }g }| j        |         t          j        v rt          |j        d                   D ]~}|                    t          t          j	        |d d |f                                        |                    t          t          j
        |d d |f                                        nt          |j        d                   D ]v}|                    t          t          j	        ||                                        |                    t          t          j
        ||                                        w|| j         |<   || j        |<   | j        D ]}t          j        | j        |          }|j        dk    r| j                            |           Bt          t          j	        |                    }t          t          j
        |                    }|| j         vs|| j         |         k     r
|| j         |<   || j        vs|| j        |         k    r
|| j        |<   d S rq  )r   r   r   r   rV  r   r   rW  r   minrX  r   rZ  r-  r[  r    r   r\  r   r6   )r   r   r_  	min_value	max_valuera  s         r%   rJ  z(PostTrainingQuantization._sample_min_max  s   "b((T-D-J-J ; > >"5dk8LL
-:: %bfZ&8&8 9 9I %bfZ&8&8 9 9II/3III "I "I-h7 => > "'z'7':!;!; N NA%,,U26*QQQT:J3K3K-L-LMMM%,,U26*QQQT:J3K3K-L-LMMMMN "'z'7':!;!; K KA%,,U26*Q-3H3H-I-IJJJ%,,U26*Q-3H3H-I-IJJJJ4='14='114 	> 	>H1$+xHHJ!##)--h777bfZ0011IbfZ0011I 777D3H===4='1 777D3H===4='1	> 	>r'   c                 b   | j         D ]}t          j        | j        |          }|j        dk    s	|| j        vr| j                            |           Kt          j	        |          }| j        |         d         }t          j
        ||          \  }}| j        |         dxx         |z  cc<   d S )Nr   r   )rd  )r   r   rV  r   r\  r   r   r6   r   rY  	histogram)r   r   r_  var_tensor_absrd  rh   _s          r%   rN  z*PostTrainingQuantization._sample_histogram  s    4 
	> 
	>H1$+xHHJ1$$ <<<)--h777VJ//N/9!<Dl>===GD!(21555=5555
	> 
	>r'   c                    | j         i k    rl| j        D ]c}t          j        | j        |          }| j        dk    r4t          t          j        t          j	        |                              }n| j        dk    rg }| j
        |         t          j        v rnt          |j        d                   D ]R}|                    t          t          j        t          j	        |dd|f                                                  Snit          |j        d                   D ]N}|                    t          t          j        t          j	        ||                                                  O|| j         |<   e| j        D ]}t          j        | j        |          }|j        dk    r| j                            |           Ct          t          j        t          j	        |                              }d| j        dz
  z  dz
  }||z  }|dz  }|dz  }|dz  }	t          j        t          j        ||	z            d|          |	z  }
t          j        t          j        ||z            d|          |z  }t          j        t          j        ||z            d|          |z  }t          j        t          j        ||z            d|          |z  }t          j        ||
          }t          j        ||          }t          j        ||          }t          j        ||          }||||g}d|                    t1          |                    z  }|	|z  }||z  }|| j         |<   dS )zj
        The following code are modified from:
        https://github.com/megvii-research/FQ-ViT/
        rg   rd   r   Nr   r   )r   r   r   rV  r   r   rW  r   rX  rY  r   rZ  r-  r[  r    r   r\  r   r6   r   r^  r`   l2_lossindexrs  )r   r   r_  r`  ra  q_maxscale8scale4scale2scale1quant_dequant_var_scale1quant_dequant_var_scale2quant_dequant_var_scale4quant_dequant_var_scale8score1score2score4score8scoremaskrc  	thresholds                         r%   rM  z$PostTrainingQuantization._sample_ptf  su   
 $** ; D D"5dk8LL
-::$)"&
1C1C*D*D$E$EMM/3III$&M-h7 => > "'z'7':!;!;  A)00 %bfRVJqqq!t4D-E-E&F&F G G   
 "'z'7':!;!;  A)00 %bfRVJqM-B-B&C&C D D    7D)(334 	< 	<H1$+xHHJ!##)--h777!"&
););"<"<==M$/!34q8E"U*FaZFaZFaZFf!455q%@@6I % f!455q%@@6I % f!455q%@@6I % f!455q%@@6I % ]:/GHHF]:/GHHF]:/GHHF]:/GHHFVVV4ECJJ///DTMEI2;D%h//?	< 	<r'   c                    | j         dk    s
J d            t          t          | j        j                            D ]}| j        j        |         j        D ]}|j        | j        j        v s|j        | j        j	        v rt          j        |          D ]v}|| j        v sJ || j        v sJ |                    |dz   | j        |                    |                    |dz   | j        |                    |                    dd           wdS )z;
        Save input threshold to the quantized op.
        rl   z3The algo should be min_max to save input threshold.z.minz.maxwith_quant_attrTN)r   r-  rM   r   r.  r<   r1  r   r5  r   r   r/  r   r   r0  )r   r7  r8  r   s       r%   r   z.PostTrainingQuantization._save_input_threshold  sQ    zY&&&A '&& c$-"67788 	> 	>Hm*848 > >Gt0MMMw(IJ J %*$A"$E$E 	> 	>'4+BBBBB'4+BBBBB$v-t/Fx/P   $v-t/Fx/P   %6====>	> 	>r'   c                    | j         D ]}t          j        | j        |          }|j        dk    r| j                            |           Bt          j        |          }t          t          j
        |                    }t          t          j        |                    }|| j        vr||g| j        |<   || j        |         d         k     r|| j        |         d<   || j        |         d         k    r|| j        |         d<   dS )z
        Collect the abs_min and abs_max for all activation. When algo = KL,
        get the min and max value, and then calculate the threshold.
        r   r   N)r   r   rV  r   r\  r   r6   r   rY  rW  rs  rX  r   )r   r   r_  rt  ru  s        r%   r   z8PostTrainingQuantization._collect_activation_abs_min_max-  s   
 4 	L 	LH1$+xHHJ!##)--h777
++JbfZ0011IbfZ0011It===<.x88
 t=hGJJJBKD28<Q?t=hGJJJBKD28<Q?#	L 	Lr'   c                     | j         D ]q}|| j        v r
|| j        vr|| j        vrS| j        |         d         }| j        |         d         }t	          j        g | j        ||f          \  }}||g| j        |<   rdS )zN
        Based on the min/max value, init the sampling_act_histogram.
        r   r   rd  r-  N)r   r   r   r   r   rw  r   )r   r   min_valmax_valrh   
hist_edgess         r%   r   z5PostTrainingQuantization._init_sampling_act_histogramE  s     4 	L 	LHD555 >>>t;;;8B1E8B1E#%<T1'79K$ $ $ j ;?
9K,X6	L 	Lr'   c                 l   t                               d| j         d           | j        dv s
J d            | j        D ]c}t	          j        | j        |          }| j        dk    r4t          t          j
        t          j        |                              }n| j        dk    rg }| j        |         t          j        v rnt          |j        d                   D ]R}|                    t          t          j
        t          j        |dd|f                                                  Snit          |j        d	                   D ]N}|                    t          t          j
        t          j        ||                                                  O|| j        |<   e| j        D ]}|| j        v r
|| j        vr| j        |         \  }}| j        d
k    r0|d         |d	         z
  }t+          ||| j                  | j        |<   `| j        dk    r|                     ||          | j        |<   dS )zL
        Calculate the KL or hist threshold of quantized variables.
        z
Calculate z threshold ...r   zThe algo should be KL or hist.rg   rd   r   Nr   r^   rh   )r  r  r   r   r   rV  r   r   rW  r   rX  rY  r   rZ  r-  r[  r    r   r   r   r   r   r   _get_hist_scaling_factor)r   r   weight_dataweight_thresholdra  rh   r  	bin_widths           r%   r   z5PostTrainingQuantization._calculate_kl_hist_thresholdV  sp    	<$*<<<===z^+++-M+++ 7 	G 	GH24;IIK)Y66#({0C0C)D)D#E#E  +/EEE#% )(39: : #;#4Q#788  (//!"&AAAqD0A)B)B"C"CDD   
 #;#4Q#788  (//!"&A)?)?"@"@AA    7GD)(334 	 	HD555 <<<#;HED*zT!!&qMJqM9	:J)T%:; ;-h77 v%%11$
CC -h7	 	r'   c           
      
   t                               d           t          t          j        | j        j                  d          }| j        s?t          | j	        | j
        | j        | j        | j        | j        | j        j                  }n>t#          | j	        | j
        | j        | j        | j        | j        | j        j                  }|                                D ]}d|_        |                    |           | j        s't+          | j	        | j
        | j        j                  }n&t/          | j	        | j
        | j        j                  }|                                D ]}d|_        |                    |           | j        | j        dv r| j        }n| j        }| j        | j        D ]}d}|D ]}d|v r|                    d          \  }	}
}|	|                                vr6|
d	k    r)t?          ||	                   t?          |          z  ||	<   n.|
d
k    r(t?          ||	                   t?          |          z  ||	<   |||	         ntA          |||	                   }||                                vr|||         ntA          |||                   }|D ]}d|v rh|                    d          \  }	}
}|	|                                vr6|
d	k    r|t?          |          z  ||	<   R|
d
k    r|t?          |          z  ||	<   n||                                vr|||<   || _        | j        !                                D ]\  }}tE          j#        | j	        | j
        |dz   tI          j%        |gtH          j&                             tE          j#        | j	        | j
        |dz   tI          j%        |gtH          j&                             | j        s| j'        rwtQ          | j	        | j
        | j)        | j        | j*        | j        | j        | j        j                  }|                                D ]}d|_        |                    |           ntW          | j	        | j
                  }|                                D ]}d|_        |                    |           | j        j        | j        j        z   | j        j,        z   }t[          | j	        | j
        | j        || j                  }|                                D ]}d|_        |                    |           |.                                | _        dS )z
        Use QuantizationTransformPass and AddQuantDequantPass to insert
        fake_quantize, fake_dequantize and fake_quant_dequant op.
        Besides, save all threshold to the scale var node.
        zUpdate the program ...Tr   )rQ   r   r   r   r   r   ru   )rQ   r   ru   Nr   #*/z@scaledtypez.quant_dequant@scale)rQ   r   r   r   r   r   r   ru   )rQ   r   rv   ru   calibration_range_dict)/r  r  r   r	   r   r   r   r   r   r   r   r   r   r   r   r   r5  r   all_sub_graphs	_for_testrO   r   r   r   r   r   r   r   r   splitr   rW  rX  itemsr   set_variable_datar   r   float32r   r   r}   rz   r   r6  r   r  )r   r;   transform_pass	sub_graphadd_quant_dequant_passr   tensor_list	max_scaletensor_namereal_tensor_nameoperascalarkeyvalfreeze_passquant_weight_passinfer_pass_quant_op_typesout_scale_infer_passs                     r%   r   z(PostTrainingQuantization._update_program  sJ    	-...
4=#566FFF   	6kk - $ 5)-)G%)%?$($5$R  NN 9kk - $ 5)-)G%)%?$($5$R  N --// 	, 	,I #'I  ++++   	%8kk$($5$V& & &"" &;kk$($5$V& & &" --// 	4 	4I"&I"((3333 #z^++!:

!6
+7#'#? 3@ 3@K $I'2  +-->I>O>O #? ?;,eV  0z7H7HHH ($||?D$./?$@@" @"$)&MM@2
+; < < "'#?D$./?$@@" @"$)&MM@2
+; <
 $-#4 !++; < <%($-z:J/K&" &" &I  +*//2C2CCC ( $-#4 !+; 7 7%(J{4K%L%L &I (3 @ @+-->I>O>O #? ?;,eV  0z7H7HHH ($||$-f$= !++; < < "'#$-f$= !++; <  +*//2C2CCC (6?J{33%@&  *D(..00 	 	HC#h#bj111	   #,,#bj111	      %	6! 14++$($9 $ 1#/$($9)-)C(,(9(V	 	 	 "'!5!5!7!7 1 1I*.I'%%i0000 /T[ I I"1133 3 3	&*	#!''	2222 !>#DE#<= &
 $Ckk0$='+'7$ $ $  #1133 6 6	&*	#$**95555((**r'   c                 \    i  _          fd fd}t          t           j        j                            D ]n} j        j        |         j        D ]Tj         j        j         j        j	        z    j        j
        z   v r%t          j                  }|D ]} ||           UodS )z<
        Save output threshold to the quantized op.
        c                 <   |j         v r#||vrt                              | d           d S ||v sJ d| d| j         d            j        r"i j        |<   ||         j        |         d<   d S |                     |||                    |                     |d         t          |d                   z   dz   ||                    |                     d	d
           | j        j        j	        v s| j        j        j
        v r                    d|           d S d S )Nz? is zero-size tensor and unable to calibrate, so skip quant it.zThe output (z) of z node does not have threshold.rc  r   r   
_thresholdr  Tquantization_type)r   r  r4  r1  r   _calibration_scalesr0  r  r   r5  r   )r=   r?  threshold_mapout_info_nameargname_indexquantized_typer8  r   s         r%   	save_infozBPostTrainingQuantization._save_output_threshold.<locals>.save_info5  s     999M11#ddd   #}444b<bbglbbb 544   F9;(6BO C(6w??? !!-|1LMMM!!!!$s=+;'<'<<|K!,/   !!"3T:::L(EF F|(IJ J LL!4nEEEEEJ Jr'   c                    t          j        | |          }|J |dz               j        dv r> | |j        d|dt	          j                                                  z              d S j        dv r, | |j        d|dt	          j                  z              d S j        dk    r, | |j        d|d            | |j        d	|d           d S d S )
Nz is not the output of the opr   out_thresholdpost_)ri   rg   rj   rk   rm   rl   out_minpost_min_maxout_max)	r   _get_output_name_indexr   r   r  r   r   r   r   )r=   r?  r  r  r   s      r%   analysis_and_save_infozOPostTrainingQuantization._save_output_threshold.<locals>.analysis_and_save_info]  sR   !8,OOM ,,== -,, z^++	 1#!c$*oo33555     FFF	 -#!c$*oo-     y((	 +!"   	 +!"     )(r'   N)r  r-  rM   r   r.  r<   r1  r   r5  r   r6  r   r3  )r   r  r7  out_var_namesr   r8  r  s   `    @@r%   r   z/PostTrainingQuantization._save_output_threshold/  s    $& &	F &	F &	F &	F &	F &	FP(	 (	 (	 (	 (	 (	T c$-"67788 		= 		=Hm*848 = =7%B'HI'@A 
 %*$B2$F$FM$1 = =..r8<<<<=		= 		=r'   c           	      &   g }t          | j        j                  D ]B}| j                            |          j        D ] }|j        |v r|                    |           !Ct          d| j        z             	                                }t          | j                  }|D ]}t          j        |          D ]}||v rt          j        | j        |          }t          t!          j        t!          j        |                              }	t          j        ||          \  }
}|                    |
t          |          z   dz   |	           |                    d|           |                    d| j                   |                    dd           ܌dS )z
        Collect and save the weight threshold for dynamic quantize ops,
        such as lstm and gru.
        Args:
            target_ops_type(list): the op type of target ops
        Returns:
            None
        r  r  r  
bit_lengthr  TN)r-  r   
num_blocksblockr<   r1  r    r  r   r   r&   r   r/  rV  r   rW  r   rX  rY  _get_input_name_indexr0  r   )r   target_ops_type
target_opsr|  r8  r  r#   r   var_datar  argnames              r%   r   z?PostTrainingQuantization._collect_dynamic_quantize_op_threshold  s    
4=344 	* 	*Em))%004 * *7o--%%b)))*  $* 455;;== :4= I I 		: 		:B!9"== : :444$7XNNH %bfRVH-=-=&>&> ? ?I%*%@X%N%NNGULL3u::!5!DiPPPLL!46GHHHLLt/@AAALL!2D999:		: 		:r'   c                     | j         }|t          t          |                    z  }d}d}t          t	          |                    D ]}|||         z  }||k    r|dz   } n|d         |d         z
  }|dz
  |z  S )zB
        Using the hist method to get the scaling factor.
        r   r   g      ?)r   rW  sumr-  rM   )r   rh   r  threshold_ratehist_sum
hist_indexra  r  s           r%   r  z1PostTrainingQuantization._get_hist_scaling_factor  s     +eCII&&&
s4yy!! 	 	AQH>))U
 * qMJqM1	S I--r'   NN)__name__
__module____qualname____doc__r   r   r   r  r   r  r   r   r   r   rK  rL  rI  rH  rJ  rN  rM  r   r   r   r   r   r   r   r  r*   r'   r%   r\   r\   p   s:         !03#Afr fr fr frP	` ` `D
 
 
, EI"K "K "K "KH
 
 
:+ + +"[ [ [z' ' 'E E E% % %&4@ 4@ 4@l6@ 6@ 6@p'D 'D 'DR D  D  DD&> &> &>P> > >:< :< :<x> > >2L L L0L L L"+ + +Zj+ j+ j+Xa= a= a=F: : ::. . . . .r'   r\   c                   \     e Zd Zddddddddddg dddddd	d	d
ddddddddddf fd	Z xZS )PostTrainingQuantizationProgramNr]   r^   r_   conv2ddepthwise_conv2dmulr`   ra   Frb   rc   rd   Tc                       t                      j        ||d d d ||||	|
||||||||||||||||||||           d| _        || _        | j        d| _        |
J d            |
J d            || _        || _        d S )NFTzFeed list should not be None.zFetch list should not be None.)superr   r   r   r   r   ) r   r   r"   	feed_listr   rQ   r   r   r   r   r   r   r   ru   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   	__class__s                                   r%   r   z(PostTrainingQuantizationProgram.__init__  s    B 	$ "=	
 	
 	
@ 	=$DI$$&E$$$%%'G%%%#%r'   )r  r  r  r   __classcell__)r  s   @r%   r  r    s        
 AAA!03#?H& H& H& H& H& H& H& H& H& H&r'   r  c                   x    e Zd Zg dZddgZddZddddgddd	d
fdZd Zd Zd Z	d Z
d Zd Zd Zd ZddZdS )WeightQuantizationr  rd   rg   Nc                 0    || _         || _        || _        dS )a  
        This class quantizes the weight of some ops to reduce the size of model
        or improve the performance.

        Args:
            model_dir(str): The path of the fp32 model that will be quantized,
                and the model and params files are under the path.
            model_filename(str, optional): The name of file to load the inference
                program. If it is None, the default filename '__model__' will
                be used. Default is 'None'.
            params_filename(str, optional): The name of file to load all parameters.
                When all parameters were saved in a single binary file, set it
                as the real filename. If parameters were saved in separate files,
                set it as 'None'. Default is 'None'.
        N)r   r   r   )r   r   r   r   s       r%   r   zWeightQuantization.__init__  s!      $- /r'   r  r  rb   FrP  c	           
         |D ]}	|	| j         v sJ d|	z   dz               |dv s
J d            || j        v sJ d| j                     t          j                            |d          }
|                     |
|||||d|           |r>t          j                            |d          }|                     ||||||d	|           d
S d
S )ac  
        In order to reduce the size of model, this api quantizes the weight
        of some ops from float32 to int8/16. In the inference stage, the
        quantized weight will be dequantized to float32 again.

        Args:
            save_model_dir(str): The path to save the quantized model.
            save_model_filename(str, optional): The name of file to
                save the inference program. If it is None, the default
                filename '__model__' will be used. Default is 'None'.
            save_params_filename(str, optional): The name of file to
                save all parameters. If it is None, parameters were
                saved in separate files. If it is not None, all
                parameters were saved in a single binary file.
            quantizable_op_type(list[str], optional): The list of ops
                that will be quantized, and the quantized ops should be
                contained in ["conv2d", "depthwise_conv2d", "mul"].
                Default is ["conv2d","mul"].
            weight_bits(int, optional): The bits for the quantized weight,
                and it should be 8 or 16. Default is 8.
            weight_quantize_type(str, optional): quantization type for weights,
                support 'channel_wise_abs_max' and 'abs_max'. Set it as
                'channel_wise_abs_max', the accuracy performs better.
            generate_test_model(bool, optional): If set generate_test_model
                as True, it saves a fake quantized model, in which the weights
                are quantized and dequantized. We can use PaddlePaddle to load
                the fake quantized model and test the accuracy on GPU or CPU.
            threshold_rate(float, optional): This api uses abs_max method to
                quantize the weight from float32 to int8/16, and the abs max
                value is important for quantization diff. When the abs_max
                value is far away from the center of the numerical distribution,
                we can set threshold_rate between 1e-6 and 1e-8, so the abs max
                value will be optimized. Default is 0.0.
        zInput error:z* is not supported for weight quantization.)rb      z+Input error: weight_bits should be 8 or 16.z,Input error: weight_quantize_type should in quantized_modelF
test_modelTN)_supported_quantizable_op_type_supported_weight_quantize_typer  r	  r
  _quantize_weight_to_int)r   save_model_dirsave_model_filenamesave_params_filenameru   r   r   generate_test_modelr  r   quantized_model_dirtest_model_dirs               r%   quantize_weight_to_intz)WeightQuantization.quantize_weight_to_int$  sM   Z + 	 	GdAAAA>? BAAA
  
 
 
 
 9
 
 
 $t'KKKKa4;_aa LKK !gll>;LMM$$  		
 		
 		
  	W\\.,GGN((#$#$	 	 	 	 		 	r'   c           	      `   t          j                    }t          j        |          }t          j                    }t          j        | j        || j        | j                  \  }}}t          j	                    }|
                                }	i }
|                                D ]}|j        t           j        j        j        k    s/|j        r(|j        dv s|j        t           j        j        j        k    rQ|	                    |          }| j        ||
|j        <   xt*          j                            t*          j                            |          |j                  }|	                    dd|gii t*          j                            |          dd           | j        g }t5          |
                                          D ]}|                    |
|                    |	                    t           j        j        j        t=          j        d	          
          }|j         !                    d           t*          j                            t*          j                            |          | j                  }|	                    dd|id|i|dd           |"                                 |#                    |           | j        dn| j        }t*          j                            | j        |          }t*          j                            ||          }tI          j%        ||           dS )a  
        Convert all persistable vars from fp32 to fp16.
        Note that, this api only changes the data type of variables in
        __params__ file, and the __model__ file remains unchanged.

        Args:
            save_model_dir(str): The path to save the fp16 model.
        r  )r   fetchNsaveXT)	file_pathsave_as_fp16)r1  r5   r7   rS   saved_params)r1  r!   save_combineY	__model__)&r	   CPUPlacer   Executorr   r  r   r   r   Programr  r   r1  VarDescVarTypeRAWr   r!   r  FP32_clone_variabler  r	  r
  normpath	append_opsortedr   r    
create_varr
   generater   set_persistable_sync_with_cppr   shutilcopyfile)r   r  r   exerQ   infer_programr  r   save_program
save_blocksave_var_mapr$   new_varsave_file_pathsave_var_listr!   saved_params_var	save_pathr   	src_model
dest_models                        r%   convert_weight_to_fp16z)WeightQuantization.convert_weight_to_fp16x  s     oe$$#%%171LO/ 1	2
 2
 2
.	: ~''!..00
 **,, 	 	CT\1555 6H 111I!5!::: !0055G$0-4W\**!#G$$^44gl" " $$'+%'W%5%5n%E%E(, 	 %      ,M|002233 9 9$$\$%78888)44\)- ).99  5     !11$777  00$2G I   #]+./$-tDD	 !    	##%%%
 #+ K% 	
 GLL.AA	W\\..AA
	:.....r'   c	                 J   t          j                    }	t          j        |	          }
t          j                    }t          j        | j        |
| j        | j                  \  }}g }t          j
                  D ]?}                    |          }|j        D ] }|j        |v r|                    |           !@t                    }|D ]R}|j        D ]H}||v rB|dk    r|                     ||	|||||           (|dk    r|                     ||	||||           ISd}|d}n4|                    d          r|                    dd          d	         }n|}t*          j                            ||          }fd
|D             }t          j        ||||
           dS )zC
        Generate quantized model or fake quantized model.
        r  rg   rd   Nr   r  rH   r   r   c                 ^    g | ])}                                                     |          *S r*   )r  r$   )r-   r!   r"   s     r%   r  z>WeightQuantization._quantize_weight_to_int.<locals>.<listcomp>  s3    LLL$W))++//55LLLr'   )r   r"   )r	   r  r   r  r   r  r   r   r   r-  r  r  r<   r1  r    r&   input_arg_names_weight_abs_max_quantization)_weight_channel_wise_abs_max_quantizationr  r  r  r	  r
  r  )r   r  r  r  ru   r   r   r   r  r   r  rQ   r  r   quantized_opsr|  r  r8  r#   r   r  r  r  r"   s                          @r%   r  z*WeightQuantization._quantize_weight_to_int  s)    oe$$#%%+1+FO/ 1	,
 ,
 ,
()Z 7-.. 	- 	-EMM%((Ei - -7111!((,,,-
 !;7 C C 	 	B.  444+y8899!!'*$$    .1GGGFF!5+r8X    
& JJ ))*55 	-,33C;;A>JJ,Jgll>:>>LLLL)LLL	#	
 	
 	
 	
 	
 	
r'   c                    d|dz
  z  dz
  }|dk    rt           j        nt           j        }	t          j        ||          }
t          |          dk     r't          j        t          j        |
                    }n*|                     |
|          }||
|
|k    <   | |
|
| k     <   ||z  }t          j        |
|z            	                    |	          }|st          j
        ||||           n9||z  	                    t           j                  }t          j
        ||||           |                    dd           |                    d|           |                    |dz   |g           |                    dd	           d
S )z8
        Use abs_max method to quantize weight.
        r   rb   g|=r  post_weight_abs_maxquantize_weight_bits_quant_scaler  TN)r   int8int16r   rV  rY  rX  _calculate_thresholdaroundastyper  r  r0  )r   rQ   r   r   r  r8  r   r   quantize_rangesave_weight_dtyper  threshold_valuerc  quantized_weight_datadequantized_weight_datas                  r%   r#  z/WeightQuantization._weight_abs_max_quantization  s    a0A5'2a'7'7BGGRX .uh??~&& fRVK%8%899OO"77^ O :IKo56;J:JK&667.0 "	+*= > > E E!
 !

  
	#uh(=    (=u'D&L&L
' '# #uh(?  
 	(*?@@@
+[999
X.888
&-----r'   c                 :   d|dz
  z  dz
  }|dk    rt           j        nt           j        }t          j        ||          }	|j        dk    r|                     |	||          \  }
}nF|j        dv r|                     |	||          \  }
}n"t          	                    |j        dz              |st          j
        ||||           n{|j        dk    r|                     ||
          }nB|j        dv r|                     ||
          }n"t          	                    |j        dz              t          j
        ||||           |                    dd           |                    d|           |                    |d	z   |
           |                    d
d           dS )zE
        Use channel_wise_abs_max method to quantize weight.
        r   rb   r  )r  r  z( is not supported by weight quantizationr   post_weight_channel_wise_abs_maxr(  r)  r  TN)r   r*  r+  r   rV  r1  _mul_channel_wise_quantization_conv_channel_wise_quantizationr  errorr   _mul_channel_wise_dequantization!_conv_channel_wise_dequantizationr0  )r   rQ   r   r   r8  r   r   r/  r0  r  scalesr2  r3  s                r%   r$  z<WeightQuantization._weight_channel_wise_abs_max_quantization?  s    a0A5'2a'7'7BGGRX .uh??7e,0,O,O^->- -)F)) W666 44^-> %%
 MM"'$NNOOO  	#uh(=    w%*.*O*O)6+ +'' :::::-v  (' GHH   #uh(?  
 	(*LMMM
+[999
X.777
&-----r'   c                 f   g }t          j        ||          }|j        d         }t          |          D ]y}t          j        t          j        ||                             |z  }|                    |           t          j        ||         |z                                |          ||<   z||fS )z
        Get channel wise scale for the weights of conv2d and depthwise_conv2d,
        and quantize the weights.
        r  r   	r   
zeros_liker[  r-  rX  rY  r    r-  r.  	r   r  r/  r0  r;  r2  channel_numra  rc  s	            r%   r7  z2WeightQuantization._conv_channel_wise_quantizationv  s      "0!
 !
 !
 "'*{## 	 	AF26+a.1122^CEMM%   ')yQ%1G'H'H'O'O!( (!!$$ ,,,r'   c                     t          j        |t           j                  }t          t	          |                    D ]3}||         ||         z                      t           j                  ||<   4|S )zR
        For conv2d and depthwise_conv2d, dequantize the weights to fp32.
        r  r   r?  r  r-  rM   r.  r   r2  r;  r3  ra  s        r%   r:  z4WeightQuantization._conv_channel_wise_dequantization  sv     #%-!#
 #
 #
 s6{{## 	! 	!A%a(6!94fRZ   $A&& '&r'   c                 ~   g }t          j        ||          }|j        d         }t          |          D ]}t          j        t          j        |dd|f                             |z  }|                    |           t          j        |dd|f         |z                                |          |dd|f<   ||fS )r=  r  rl  Nr>  r@  s	            r%   r6  z1WeightQuantization._mul_channel_wise_quantization  s      "0!
 !
 !
 "'+{## 	( 	(AF26+aaad"34455FEMM%   *,)AAAqD!E)+ +f&'' "!!!Q$'' ,,,r'   c                     t          j        |t           j                  }t          t	          |                    D ];}|dd|f         ||         z                      t           j                  |dd|f<   <|S )z:
        For mul, dequantize the weights to fp32.
        r  NrC  rD  s        r%   r9  z3WeightQuantization._mul_channel_wise_dequantization  s     #%-!#
 #
 #
 s6{{## 	! 	!A%aaad+fQi7fRZ   $AAAqD)) '&r'     c                 l   t          j        |          }t          j        ||dt          j        |          f          \  }}|t	          t          |                    z  }d}d}t          t          |                    D ]}	|||	         z  }|d|z
  k    r|	dz   } n|d         |d         z
  }
||
z  S )Nr   r  rT  r   )r   rY  rw  rX  rW  r  r-  rM   )r   r2  r  histogram_bins	input_absrh   r  r  r  ra  r  s              r%   r,  z'WeightQuantization._calculate_threshold  s    F5MM	<N1bfY6G6G2H
 
 
j eCII&&&
s4yy!! 	 	AQH3///U
 0 qMJqM1	I%%r'   r  )rG  )r  r  r  r  r  r   r  r  r  r#  r$  r7  r:  r6  r9  r,  r*   r'   r%   r  r    s
       %J%J%J"'=y&I#0 0 0 0. !!%u-3!R R R RhU/ U/ U/nC
 C
 C
J). ). ).V5. 5. 5.n- - -(' ' '- - -(' ' '& & & & & &r'   r  )NNF)+loggingr  r  numpyr   r   r   paddle.base.frameworkr   r    r   	frameworkr	   r
   
log_helperr   rn   r   r   r   r   r   r   r   quantization_passr   r   r   r   r   r   r   r  INFOr  r&   rA   rE   rZ   r\   r  r  r*   r'   r%   <module>rS     s    				      3 3 3 3 3 3 3 3                         # # # # # #       " " " " " " . . . . . .                             *gl H  
! ! !  &   BG   (M. M. M. M. M. M. M. M.`*I& I& I& I& I&&> I& I& I&Xz& z& z& z& z& z& z& z& z& z&s    #