
    ёiKF                    l   S SK r S SKrS SKrS SKr S SKJr  S SKJrJ	r	  SSK
Jr  SSKJr  SSKJr  S	S
KJr  SSK
Jr  SSKJr  SSKJr  SSKJrJrJrJr  SSKJrJrJrJrJrJrJ r   \" \!\ RD                  SS9r#S r$S r%S r& SS jr' " S S5      r( " S S\(5      r) " S S5      r*g!   SSKJr   N= f)    N)tqdm   )IrGraph_get_var   )static)core)unique_name   )
get_logger)utils)run_adaround)cal_kl_threshold)SUPPORT_QUANTIZATION_OP_DICTARMCPUQuantizerBaseQuantizerTensorRTQuantizer)AddQuantDequantForInferencePassAddQuantDequantPassAddQuantDequantPassV2QuantizationFreezePassQuantizationTransformPassQuantizationTransformPassV2QuantWeightPassz&%(asctime)s-%(levelname)s: %(message)s)fmtc                     / nU R                  5        H1  nUR                  (       d  M  UR                  UR                  5        M3     U$ N)	list_varspersistableappendname)programpersistable_var_namesvars      u/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/static/quantization/post_training_quantization.py_all_persistable_var_namesr&   8   s>      "???!((2 # !     c                   ^ [        5       mU R                  5       nU HK  nUR                   H  nTR                  U5        M     UR                   H  nTR                  U5        M     MM     T Vs1 s H  oUR
                  iM     snm[        [        U4S jU R                  5       5      5      nU R                  U5        U $ s  snf )Nc                 "   > U R                   T;  $ r   )node)r*   all_used_varss    r%   <lambda>*_remove_unused_var_nodes.<locals>.<lambda>L   s    -7r'   )	setall_op_nodesinputsaddoutputsr*   filterall_var_nodessafe_remove_nodes)graphopsop_node
input_nodeoutput_nodenall_unused_varsr+   s          @r%   _remove_unused_var_nodesr=   @   s    EM



C!..Jj) )"??Kk* +  &33]VV]3M79L9L9N	
O
 
O,L 4s   1Cc                     [        5       nU R                  5        H+  nUR                  5       (       d  M  UR                  U5        M-     U R	                  U5        U $ r   )r.   r4   is_ctrl_varr1   r5   )r6   remove_ctr_varsr*   s      r%   _remove_ctrl_varsrA   S   sN    eO##%% & 
O,Lr'   c                    [         R                  " U5      nUR                  nUR                  S5      (       d  UR	                  SU 5        U(       aK  U(       a  [        U5      [        U5      :X  d   S5       e[        X45       H  u  pUR                  X5        M     UR                  U5        U(       a$  UR                  SSU 3UR                  5       5        [        U5        U$ )N__param_scope__z5Different number of pass attributes and their values..	qat_fp32_)r	   get_passr6   hasset_not_ownedlenzipr.   applydrawr/   r=   )
scoper6   	pass_nameattrsattr_valuesdebugir_pass	cpp_graphattrvalues
             r%   _apply_passrV   \   s     mmI&GI==*++ 159s5zS-== 	
C	
= u2KDKK$ 3MM)

3)I;/1C1C1EFU#Lr'   c                       \ rS rSrSrSSSSSSSSSS/ SSS	S	S
S
SSS	SS	S	SSSSS	S4S jrS rS r S(S jrS r	S r
S rS rS rS rS rS rS rS rS rS rS rS rS  rS! rS" rS# rS$ rS% rS& rS'rg))PostTrainingQuantizationp   z
Utilizing post training quantization method to quantize the FP32 model,
and it uses calibrate data to get the quantization information for all
quantized variables.
N
   KLwJ?roundMbP?F   range_abs_maxchannel_wise_abs_maxTc                  z   / SQU l         SS/U l        / SQU l        US;   d   eXl        Xl        S/U l        Uc   S5       eUc   S	5       eU	S
:  d   S5       eXR                  ;   d   S5       eUU R                   ;   d   SU SU R                    S35       eUU R                  ;   d   SU SU R                   S35       eUU l        Xl        Uc  [        R                  " 5       OUU l
        X l        X@l        XPl        Xpl        X`l        Xl        Xl        Xl        Xl        UU l        UU l        UU l        UU l        UU l        U R0                  (       a  SOSU l        UU l        UU l        U R                  R8                  U l        SU l        SU l        SU l         Xl!        [E        5       U l#        [E        5       U l$        0 U l%        0 U l&        0 U l'        0 U l(        0 U l)        SU l*        0 U l+        0 U l,        0 U l-        0 U l.        0 U l/        [E        5       U l0        UU l1        UU l2        UU l3        UU l4        SU l5        U R<                  b  SU l5        UU l6        U(       a  [o        [p        Rr                  " 5       5      nO<U(       a5  U H/  n U [o        [p        Rr                  " 5       5      ;   a  M'   U S-   5       e   UU:X  d   S5       e/ SQn!U(       d  [u        UUS9U l;        gURy                  5       S:X  a  [{        UUS9U l;        gURy                  5       S:X  a  [}        UUS9U l;        gSU SU! S3(       d   eg)a  
Constructor.

Args:
    executor(static.Executor): The executor to load, run and save the
        quantized model.
    scope(static.Scope, optional): The scope of the program, use it to load
        and save variables. If scope=None, get scope by static.global_scope().
    model_dir(str): The path of the fp32 model that will be quantized,
        and the model and params files are under the path.
    model_filename(str, optional): The name of file to load the inference
        program. If it is None, the default filename '__model__' will
        be used. Default is 'None'.
    params_filename(str, optional): The name of file to load all parameters.
        When all parameters were saved in a single binary file, set it
        as the real filename. If parameters were saved in separate files,
        set it as 'None'. Default is 'None'.
    batch_generator(Python Generator, deprecated): The batch generator provides
        calibrate data for DataLoader, and it returns a batch every
        time. Note that, sample_generator and batch_generator, only one
        should be set. Besides, batch_generator supports lod tensor.
    sample_generator(Python Generator, deprecated): The sample generator provides
        calibrate data for DataLoader, and it only returns a sample every
        time. Note that, sample_generator and batch_generator, only one
        should be set. Besides, sample_generator dose not support lod tensor.
    data_loader(Paddle.io.DataLoader): The
        Dataloader provides calibrate data, and it could
        return a batch every time.
    batch_size(int, optional): The batch size of DataLoader. Default is 10.
    batch_nums(int, optional): If batch_nums is not None, the number of
        calibrate data is batch_size*batch_nums. If batch_nums is None, use
        all data provided by sample_generator as calibrate data.
    algo(str, optional): If algo='KL', use KL-divergence method to
        get the KL threshold for quantized activations and get the abs_max
        value for quantized weights. If algo='abs_max', get the abs max
        value for activations and weights. If algo= 'min_max', get the min
        and max value for quantized activations and weights. If algo='avg',
        get the average value among the max values for activations. If
        algo= 'hist', get the value of 'hist_percent' quantile as the threshold.
        If algo='mse', get the value which makes the quantization mse loss
        minimal. Default is KL.
    hist_percent(float, optional): The threshold of algo 'hist' for activations.
        Default is 0.99999.
    quantizable_op_type(list[str], optional): List the type of ops
        that will be quantized. Default is []. If quantizable_op_type is [],
        it will use the default quantization op type of the qunat config in
        the current deploy_backend.
    round_type(str, optional): The method of converting the quantized weights
        value float->int. Currently supports ['round', 'adaround'] methods.
        Default is `round`, which is rounding nearest to the integer.
        'adaround' is refer to https://arxiv.org/abs/2004.10568.
    learning_rate(float, optional): The learning rate of adaround method.
    is_full_quantized(bool, optional): If set is_full_quantized as True,
        apply quantization to all supported quantizable op type. If set
        is_full_quantized as False, it will apply quantization to the op type
        according to the input quantizable_op_type or quant config of deploy_backend.
    bias_correction(bool, optional): If set as True, use the bias correction
        method of https://arxiv.org/abs/1810.05723. Default is False.
    activation_bits(int): quantization bit number for activation.
    weight_bits(int, optional): quantization bit number for weights.
    activation_quantize_type(str): quantization type for activation,
        now support 'range_abs_max', 'moving_average_abs_max' and 'abs_max'.
        This param only specifies the fake ops in saving quantized model.
        If it is 'range_abs_max' or 'moving_average_abs_max', we save the scale
        obtained by post training quantization in fake ops. Note that, if it
        is 'abs_max', the scale will not be saved in fake ops.
    weight_quantize_type(str): quantization type for weights,
        support 'abs_max' and 'channel_wise_abs_max'. This param only specifies
        the fake ops in saving quantized model, and we save the scale obtained
        by post training quantization in fake ops. Compared to 'abs_max',
        the model accuracy is usually higher when it is 'channel_wise_abs_max'.
    onnx_format(bool): Whether to export the quantized model with format of ONNX.
        Default is False.
    freeze_model(bool): Whether to convert quantized and trained ``program`` to final
        quantized ``program``. Default: True.
    skip_tensor_list(list): List of skip quant tensor name. Default: None.
    same_scale_tensor_list(list(list)): The list of tensor keep same scale in the outermost
        list, the final scale about every list is the max of the scale in the list
        of tensor. Default: None.
    optimize_model(bool, optional): If set optimize_model as True, it applies
        some passes to the model before quantization, and it supports
        `conv2d/depthwise_conv2d + bn` pass so far. Some targets require the
        weights are quantized by tensor-wise method, which means the weights
        scale for all channel are the same. However, if fuse
        `conv2d/depthwise_conv2d + bn`, the weights scale for all channel will
        be different. In address this problem, fuse the pattern before
        quantization. Default False.
    is_use_cache_file(bool, optional): This param is deprecated.
    cache_dir(str, optional): This param is deprecated.
    deploy_backend(str, optional): Deploy backend, it can be None, `TensorRT`,
        `MKLDNN`, `ARM`. And it will extend the new backend. Default is None,
        which means to use the default general quantization configuration.
Returns:
    None

Examples:
    .. code-block:: python

        >>> # doctest: +SKIP("There are some example variables in the code.")
        >>> import paddle.static as static
        >>> from paddle.static.quantization import PostTrainingQuantization

        >>> exe = static.Executor(paddle.CPUPlace())
        >>> model_dir = "path/to/fp32_model_params"
        >>> # set model_filename as None when the filename is __model__,
        >>> # otherwise set it as the real filename
        >>> model_filename = None
        >>> # set params_filename as None when all parameters were saved in
        >>> # separate files, otherwise set it as the real filename
        >>> params_filename = None
        >>> save_model_path = "path/to/save_model_path"
        >>> # prepare the sample generator according to the model, and the
        >>> # sample generator must return a sample every time. The reference
        >>> # document: https://www.paddlepaddle.org.cn/documentation/docs/zh
        >>> # /user_guides/howto/prepare_data/use_py_reader.html
        >>> data_loader = your_data_loader
        >>> batch_size = 10
        >>> batch_nums = 10
        >>> algo = "KL"
        >>> quantizable_op_type = ["conv2d", "depthwise_conv2d", "mul"]
        >>> ptq = PostTrainingQuantization(
        ...     executor=exe,
        ...     sample_generator=None,
        ...     data_loader=data_loader,
        ...     model_dir=model_dir,
        ...     model_filename=model_filename,
        ...     params_filename=params_filename,
        ...     batch_size=batch_size,
        ...     batch_nums=batch_nums,
        ...     algo=algo,
        ...     quantizable_op_type=quantizable_op_type
        ... )
        >>> ptq.quantize()
        >>> ptq.save_quantized_model(save_model_path)
)r`   moving_average_abs_maxabs_maxrd   ra   )r[   histavgmseemdrd   min_maxptf)adaroundr]   lstmNzThe executor cannot be None.zdata_loader cannot be None.r   z(The batch_size should be greater than 0.z?The algo should be KL, hist, mse, avg, abs_max, min_max or ptf.zThe activation_quantize_type (z) should in (z).zThe weight_quantize_type (TFi   # is not supported for quantization.zPactivation_bits and weight_bits must be the same, other cases are not supported.)Ntensorrtmkldnnonednnarm)quantizable_op_type
quant_bitsrn   rq   zDeploy Backend z# not support, please choose one of rD   )?!_support_activation_quantize_type_support_weight_quantize_type_support_algo_type_round_type_learning_rate_dynamic_quantize_op_type_bias_correction	_executorr   global_scope_scope
_model_dir_model_filename_params_filename_sample_generator_batch_generator_batch_size_batch_nums_algo_hist_percent_activation_bits_weight_bits_activation_quantize_type_weight_quantize_type_onnx_format_clip_extra_skip_tensor_list_optimize_modelplace_place_program
_feed_list_fetch_list_data_loaderr.   _quantized_weight_var_name_quantized_act_var_name_weight_op_pairs_sampling_act_abs_min_max_sampling_act_histogram_sampling_data_quantized_var_threshold_histogram_bins_quantized_var_min_quantized_var_max_quantized_var_avg_best_calibration_loss_quantized_threshold_zero_size_var_names_same_scale_tensor_list_freeze_model_scale_dict_return_graphFLAG_is_full_quantizelistr   keysr   quant_configlowerr   r   )"selfexecutor	model_dirrM   model_filenameparams_filenamebatch_generatorsample_generatordata_loader
batch_size
batch_numsalgohist_percentrr   
round_typelearning_rateis_full_quantizebias_correctionactivation_bitsweight_bitsactivation_quantize_typeweight_quantize_typeonnx_formatfreeze_modeloptimize_modelis_use_cache_fileskip_tensor_listsame_scale_tensor_list	cache_dir
scale_dictreturn_graphdeploy_backendop_typesupport_deploy_backends"                                     r%   __init__!PostTrainingQuantization.__init__w   s   T2
.
 /89O-P*	#
 2222%+*0& #C%CC#&E(EE&A~III~... 	
M	
. %(N(NN	
 --E,FmTXTzTzS{{}~	
N $t'I'II 	
()=(>mDLnLnKooqr	
I
 !0!/4}f))+%#- /!1 /%%
) /')A&%9"'#'#4#44%!1- nn**'*-%''*u$ ")+&')$ (*%#"$"$"$&(#$&! %(E!'=$)%)	==$DI!1"&'C'H'H'J"K .$'C'H'H'J"KK CCK / +- 	
^	
- "O -$7&!D !!#z1 1$7&!D !!#u, /$7&!D
 %^$44WXnWoopqqqr'   c           
      f  ^  T R                  5         T R                  5         T R                  5         T R                  S;   a  Sn[	        T R
                  SSS9 nT R                  5        H  nT R                  R                  T R                  UT R                  ST R                  S9  T R                  5         US-  nUR                  5         T R
                  (       d  Mv  UT R
                  :  d  M    O   S	S	S	5        T R                  5         Sn[	        T R
                  S
SS9 nT R                  5        H  nT R                  R                  T R                  UT R                  ST R                  S9  T R                  5         US-  nUR                  5         T R
                  (       d  Mv  UT R
                  :  d  M    O   S	S	S	5        T R                  S:X  ac  T R                    HS  nUT R"                  ;  a  M  [$        R&                  " T R"                  U   5      R)                  5       T R*                  U'   MU     T R                  S;   a  T R-                  5         T R.                  S:X  a  T R1                  5         T R3                  5         T R                  S:X  a  T R5                  5         OT R7                  5         T R8                  (       d  T R;                  5         [=        U 4S jT R>                   5       5      (       a  T RA                  T R>                  5        [B        RD                  " T R                  5        T RF                  (       d  T R                  $ [I        [J        RL                  " T R                  RN                  5      SS9nU$ ! , (       d  f       GN= f! , (       d  f       GN= f)z
Load the FP32 model, and use the calibrate data to calculate the forward-stage.
Based on the sample data, we can get the quantization information, and obtain
the final quantized model.

Args:
    None
Returns:
    the program of quantized model.
r[   re   r   z8Preparation stage, Run batch:|{bar}| {n_fmt}/{total_fmt}P   )total
bar_formatncolsF)r"   feed
fetch_listreturn_numpyrM   r   Nz5Sampling stage, Run batch:|{bar}| {n_fmt}/{total_fmt}rf   rk   ri   c              3   V   >#    U  H  nUTR                   R                  ;   v   M      g 7fr   )r    activation_quant_operation_types).0r   r   s     r%   	<genexpr>4PostTrainingQuantization.quantize.<locals>.<genexpr>  s)      
9 t((III9s   &)Tfor_test)(_load_model_data_collect_target_varnames_set_activation_persistabler   r   r   r   r{   runr   r   r}   _collect_activation_abs_min_maxupdate_init_sampling_act_histogram	_samplingr   r   nparraymeanr   _calculate_kl_hist_thresholdrw   _adaround_apply_reset_activation_persistable_save_input_threshold_update_programr   _save_output_thresholdanyry   &_collect_dynamic_quantize_op_thresholdr   $move_persistable_var_to_global_blockr   r   r	   Graphdesc)r   batch_idtdatavar_name
main_graphs   `     r%   quantize!PostTrainingQuantization.quantize  s    	%%'((*::'H&&U  --/DNN&& $!#'#3#3%*"kk '  88:MHHHJ'''H8H8H,H 0$ --/""N
 ))+"" MM#//!&++ #   A
###D4D4D(D ,
& :: 884#:#::68hh++H57$& ))(3 9 ::'--/z)  "**,::"&&(  " yy'') 
99
 
 
 77.. 	224==A!!==  DMM,>,>!?$OJ_ *
 
s2   BN N2N%BN!,N!>N!
N!
N0c                    U R                   S:w  d   S5       eU R                   S;   a  U R                  nOU R                  n[        U R                  U R
                  U R                  U R                  U R                  U R                  U R                  U R                  UU R                  U R                  U R                  S9  g )Nri   zThe algo should not be min_max.r   )num_iterationsr   lr)r   r   r   r   r   r   r   r{   r}   r   _quantized_op_pairsr   r   rz   rx   )r   r   s     r%   r   (PostTrainingQuantization._adaround_apply  s    zzY&I(II&::'66J22JMMNNKKKK$$!!++ 11""	
r'   c           	         SnUc  SnO.UR                  S5      (       a  UR                  SS5      S   nOUn[        R                  R	                  X5      nU R
                   Vs/ s H+  o`R                  R                  5       R                  U5      PM-     nn[        R                  " UUU R                  U R                  U R                  U R                  S9  [        R                  SU-   5        gs  snf )	ah  
Save the quantized model to the disk.

Args:
    save_model_path(str): The path to save the quantized model.
    model_filename(str, optional): If the model_filename is None,
        save the model to 'model.pdmodel' and 'model.pdiparams'. Otherwise, save the model to 'model_name.pdmodel' and
        'model_name.pdiparams". Default: None.
Returns:
    None
Nmodel.pdmodelrD   r   r   )r   r"   
clip_extraz The quantized model is saved in )endswithrsplitospathjoinr   r   global_blockr$   r   save_inference_modelr   r{   r   _loggerinfo)r   save_model_pathr   r   
model_namepath_prefixr!   	feed_varss           r%   save_quantized_model-PostTrainingQuantization.save_quantized_model  s     
! J$$Z00'..sA6q9J'Jggll???C
?NtMM&&(,,T2 	 
 	##^^MM''	
 	7/IJ
s   $2C2c                 $   U R                   cg  [        R                  S5        [        R                  " U R
                  U R                  U R                  U R                  S9u  U l         U l	        U l
        U R                  (       a  U R                  5         U R                   Vs/ s H"  n[        [        U5      U R                   5      PM$     nnU R                  (       a  U R                  U l        g[!        U R"                  5      U l        gs  snf )z!
Load model and set data loader.
Nz"Load model and set data loader ...r   r   r   )r   r  r  r   load_inference_modelr~   r{   r   r   r   r   r   _optimize_fp32_modelr   strr   rI   r   )r   r   r  s      r%   r   )PostTrainingQuantization._load_model_data:  s     == LL=>
 ++#33 $ 5 5		  %%' !OO
+ S]DMM2+ 	 
 !% 0 0D 	69$:K:K6L 	
s   $)Dc                    [         R                  S5        [        [        R                  " U R
                  R                  5      SS9n[        U5      n[        U R                  US5      n[        U R                  US5      n[        U R                  US5      n[        U R                  US5      n[        U R                  US5      nUR                  5       U l        g	)
z8
Fuse the `conv2d/depthwise_conv2d + bn` in FP32 model.
zOptimize FP32 model ...Tr   conv_bn_fuse_passdepthwise_conv_bn_fuse_passconv_transpose_bn_fuse_passconv_eltwiseadd_bn_fuse_pass&depthwise_conv_eltwiseadd_bn_fuse_passN)r  r  r   r	   r   r   r   rA   rV   r}   
to_program)r   r6   s     r%   r  -PostTrainingQuantization._optimize_fp32_modelW  s     	./

4==#5#56F!%(DKK0CDDKK0MNDKK0MNDKK0NOKK H
 ((*r'   c                 
  ^  [         R                  S5        0 T l        U 4S jn[        T R                  5      n[        [        T R                  R                  5      5       GH  nT R                  R                  U   R                   GH  nT R                  bA  [        R                  " U5       H'  nUT R                  ;   d  M  UR                  SS5        M)     UR                  nUS:X  a  UR                  S5      S   nT R                  R                  U   R                   Hh  n[        R                  " U5      n	Xy;   d  M   [        R                  " U5       H.  n
X;  d  M
  UR                  SS5        UR                  SS5        M0     Mj     T R                   (       a:  U[#        [$        R&                  " 5       5      ;  a  [         R)                  US	-   5        / nU H  nS
U;   d  M  UR+                  U5        M     US:H  =(       a?    [        R                  " U5      S   U;   =(       a    [        R                  " U5      S   U;   nUT R,                  R.                  ;   d!  UT R,                  R0                  ;   d  U(       a  US:H  =(       a    UR3                  S5      nU(       a  US-   OUnU" [        R                  " U5      UU5        U" [        R                  " U5      UU5        [        R                  " U5       H7  n[        R                  " U5       H  nUU;   d  M  UT R                  U'   M     M9     GM  UT R,                  R4                  ;   d  GM  U" [        R                  " U5      UU5        GM     GM!     g)zZ
Collect the variable names for sampling, and set activation
variables to be persistable.
z$Collect quantized variable names ...c                    > U  HO  nX1;   a,  TR                   R                  U5        UTR                  U'   M4  TR                  R                  U5        MQ     g r   )r   r1   r   r   )var_name_listr#   r   r   r   s       r%   collect_var_nameKPostTrainingQuantization._collect_target_varnames.<locals>.collect_var_nameq  sK    )43377A6=D))(30044X> *r'   Nop_namescope
skip_quantconv2d_transposeFilterr   rm   conv1d
unsqueeze2	matmul_v2trans_y_trans_y)r  r  r   r&   r   rangerI   blocksr7   r   r   _get_op_input_var_names	_set_attrtypeinput_get_op_output_var_namesr   r   r   r   warningr    r   weight_quant_operation_typesr   rT   observer_operation_types)r   r%  r#   block_idopinp_namer   in_name_opr   r!   conv1d_persistable_var_namesopnameis_conv1d_quantr.  out_var_namein_var_names   `                r%   r   1PostTrainingQuantization._collect_target_varnamesh  s    	;<#% 	? !;4== Ic$--"6"678Hmm**8488))5$)$A$A"$E#t'='==LLF %F ''00 hhx03G#}}33H=AA#(#A#A##F".(-(E(Ec(J#'#D$&LL$N$'MM.,$O )K  B ))gT0557> / OO"GG 02,3F6)4;;FC 4
 , 55b9!<78 55b9!<78   t00MMM((IIJ&&+5M2779;MG6=g
27G$55b9-
 %66r:- ).(F(Fr(J+0+H+H+LK*.CC$0 !% 8 8 E ,M )K  1 1 J JJ$66r:-I 9 9r'   c                     U R                   R                  5        H&  nUR                  U R                  ;   d  M  SUl        M(     g)zZ
Set activation variables to be persistable, so can obtain
the tensor data in sample_data
TN)r   r   r!   r   r   r   r$   s     r%   r   4PostTrainingQuantization._set_activation_persistable  s4    
 ==**,Cxx4777"& -r'   c                    U R                   R                  5        Hg  nUR                  U R                  ;   d  M  SUl        U R
                  R                  UR                  5      R                  5       R                  5         Mi     g)z*
Reset activations to be not persistable.
FN)	r   r   r!   r   r   r}   find_var
get_tensor_clearrF  s     r%   r   6PostTrainingQuantization._reset_activation_persistable  s\     ==**,Cxx4777"'$$SXX.99;BBD -r'   c                    U R                   S:X  a  U R                  5         gU R                   S:X  a  U R                  5         gU R                   S:X  a  U R                  5         gU R                   S:X  a  U R	                  5         gU R                   S:X  a  U R                  5         gU R                   S:X  a  U R                  5         gU R                   S;   a  U R                  5         gg)	z?
Sample the min/max, abs_max or histogram in every iterations.
rd   rf   ri   rg   rh   rj   r   N)r   _sample_abs_max_sample_avg_sample_min_max_sample_mse_sample_emd_sample_ptf_sample_histogram)r   s    r%   r   "PostTrainingQuantization._sampling  s     ::"  "ZZ5 ZZ9$  "ZZ5 ZZ5 ZZ5 ZZ>)""$ *r'   c                     U R                   0 :X  Ga  U R                   GHw  n[        R                  " U R                  U5      nU R
                  S:X  a4  [        [        R                  " [        R                  " U5      5      5      nOU R
                  S:X  a  / nU R                  U   [        R                  ;   ai  [        UR                  S   5       HL  nUR                  [        [        R                  " [        R                  " US S 2U4   5      5      5      5        MN     Oc[        UR                  S   5       HG  nUR                  [        [        R                  " [        R                  " X$   5      5      5      5        MI     WU R                   U'   GMz     [        R!                  S5        U R"                   GH  n[        R                  " U R                  U5      nUR$                  S:X  a  U R&                  R)                  U5        MR  UR+                  5       n[        [        R                  " [        R                  " U5      5      5      nUS:X  a  SOUnSnXR,                  ;  a  [        S	5      U R,                  U'   US
::  d  M  XS-  nUS-  nSU R.                  S-
  -  S-
  nU R0                  (       a=  [        R2                  " [        R4                  " X&-  U-  5      U* S-
  U5      nX-  U-  n	O8[        R4                  " [        R2                  " USU5      U-  U-  5      U-  U-  n	X)-
  S-  R7                  5       n
XR,                  U   ::  a  XR,                  U'   X`R                   U'   US
::  a  M  GM     g )Nrd   ra   r   r   zMSE searching stage ...        :0yE>333333?inf      ?{Gz?r   )r   r   r   load_variable_datar}   r   floatr   maxabsr   _channelwise_quant_axis1_opsr0  shaper    r  r  r   sizer   r1   flattenr   r   r   clipr]   r   )r   r   
var_tensorabs_max_valueisscalebins	quant_varquant_dequant_varmse_losss              r%   rQ  $PostTrainingQuantization._sample_mse  s   $$* ;;"55dkk8L
--:$)"&&
1C*D$EM//3II$&M--h7 ==> "'z'7'7':!;A)00 %bffRVVJq!t4D-E&F G "<
 "'z'7'7':!;A)00 %bffRVVJM-B&C D "< 7D))(3' <( 	./44H11$++xHJ!#))--h7#++-J!"&&
);"<=M$1S$8DmMA:::8=e++H5s()T	T22Q67!;$$ "!3d!:;dUQY!I )2(85(@% S%!@5!H4!OP  &
 (;AGGI::8DD<D//9:?--h7% s(( 5r'   c                    U R                   0 :X  Ga  U R                   GHw  n[        R                  " U R                  U5      nU R
                  S:X  a4  [        [        R                  " [        R                  " U5      5      5      nOU R
                  S:X  a  / nU R                  U   [        R                  ;   ai  [        UR                  S   5       HL  nUR                  [        [        R                  " [        R                  " US S 2U4   5      5      5      5        MN     Oc[        UR                  S   5       HG  nUR                  [        [        R                  " [        R                  " X$   5      5      5      5        MI     WU R                   U'   GMz     [        R!                  S5        U R"                   GH.  n[        R                  " U R                  U5      nUR$                  S:X  a  U R&                  R)                  U5        MR  UR+                  5       n[        [        R                  " [        R                  " U5      5      5      nUS:X  a  SOUnSnXR,                  ;  a  [        S	5      U R,                  U'   US
::  d  M  XS-  nUS-  nSU R.                  S-
  -  S-
  nU R0                  (       a=  [        R2                  " [        R4                  " X&-  U-  5      U* S-
  U5      nX-  U-  n	O8[        R4                  " [        R2                  " USU5      U-  U-  5      U-  U-  n	[        R                  " [        R6                  " U5      [        R6                  " U	5      -
  5      [        R                  " [        R8                  " U5      [        R8                  " U	5      -
  5      -   n
XR,                  U   ::  a  XR,                  U'   X`R                   U'   US
::  a  GM^  GM1     g )Nrd   ra   r   r   zEMD searching stage ...rW  rX  rY  rZ  r[  r\  r   )r   r   r   r]  r}   r   r^  r   r_  r`  r   ra  r0  rb  r    r  r  r   rc  r   r1   rd  r   r   r   re  r]   r   std)r   r   rf  rg  rh  ri  rj  rk  rl  rm  emd_losss              r%   rR  $PostTrainingQuantization._sample_emd   s+   $$* ;;"55dkk8L
--:$)"&&
1C*D$EM//3II$&M--h7 ==> "'z'7'7':!;A)00 %bffRVVJq!t4D-E&F G "<
 "'z'7'7':!;A)00 %bffRVVJM-B&C D "< 7D))(3' <( 	./44H11$++xHJ!#))--h7#++-J!"&&
);"<=M$1S$8DmMA:::8=e++H5s()T	T22Q67!;$$ "!3d!:;dUQY!I )2(85(@% S%!@5!H4!OP  &
 66GGJ'"''2C*DDFF266*-7H0IIJK ::8DD<D//9:?--h7) s(( 5r'   c                    U R                   0 :X  Ga  U R                   GHw  n[        R                  " U R                  U5      nU R
                  S:X  a4  [        [        R                  " [        R                  " U5      5      5      nOU R
                  S:X  a  / nU R                  U   [        R                  ;   ai  [        UR                  S   5       HL  nUR                  [        [        R                  " [        R                  " US S 2U4   5      5      5      5        MN     Oc[        UR                  S   5       HG  nUR                  [        [        R                  " [        R                  " X$   5      5      5      5        MI     WU R                   U'   GMz     U R                   GH$  n[        R                  " U R                  U5      nUR                   S:X  a  U R"                  R%                  U5        MR  [        [        R                  " [        R                  " U5      5      5      nXR&                  ;  a  / U R&                  U'   [        [        R(                  " [        R                  " [        R                  " UR+                  UR                  S   S5      5      SS95      5      nU R&                  U   R                  U5        GM'     g )Nrd   ra   r   r   )axis)r   r   r   r]  r}   r   r^  r   r_  r`  r   ra  r0  rb  r    r   rc  r   r1   r   r   reshape)r   r   rf  rg  rh  abs_avg_values         r%   rO  $PostTrainingQuantization._sample_avgX  s   $$* ;;"55dkk8L
--:$)"&&
1C*D$EM//3II$&M--h7 ==> "'z'7'7':!;A)00 %bffRVVJq!t4D-E&F G "<
 "'z'7'7':!;A)00 %bffRVVJM-B&C D "< 7D))(3' <* 44H11$++xHJ!#))--h7!"&&
);"<=M66646''1!FFz11*2B2B12ErJKM ##H-44]C! 5r'   c                    U R                   0 :X  Ga  U R                   GHw  n[        R                  " U R                  U5      nU R
                  S:X  a4  [        [        R                  " [        R                  " U5      5      5      nOU R
                  S:X  a  / nU R                  U   [        R                  ;   ai  [        UR                  S   5       HL  nUR                  [        [        R                  " [        R                  " US S 2U4   5      5      5      5        MN     Oc[        UR                  S   5       HG  nUR                  [        [        R                  " [        R                  " X$   5      5      5      5        MI     WU R                   U'   GMz     U R                   H  n[        R                  " U R                  U5      nUR                   S:X  a  U R"                  R%                  U5        MQ  [        [        R                  " [        R                  " U5      5      5      nXR                   ;  d  X0R                   U   :  d  M  X0R                   U'   M     g Nrd   ra   r   r   )r   r   r   r]  r}   r   r^  r   r_  r`  r   ra  r0  rb  r    r   rc  r   r1   )r   r   rf  rg  rh  s        r%   rN  (PostTrainingQuantization._sample_abs_max  s   $$* ;;"55dkk8L
--:$)"&&
1C*D$EM//3II$&M--h7 ==> "'z'7'7':!;A)00 %bffRVVJq!t4D-E&F G "<
 "'z'7'7':!;A)00 %bffRVVJM-B&C D "< 7D))(3' <* 44H11$++xHJ!#))--h7!"&&
);"<=M 9 99 9 9( CC6C))(3 5r'   c                    U R                   0 :X  Ga  U R                  0 :X  Ga  U R                   GH  n[        R                  " U R
                  U5      nU R                  S:X  a@  [        [        R                  " U5      5      n[        [        R                  " U5      5      nGO?U R                  S:X  Ga.  / n/ nU R                  U   [        R                  ;   a  [        UR                  S   5       Hm  nUR                  [        [        R                  " US S 2U4   5      5      5        UR                  [        [        R                  " US S 2U4   5      5      5        Mo     O[        UR                  S   5       Hc  nUR                  [        [        R                  " X%   5      5      5        UR                  [        [        R                  " X%   5      5      5        Me     WU R                   U'   WU R                  U'   GM     U R                    H  n[        R                  " U R
                  U5      nUR"                  S:X  a  U R$                  R'                  U5        MQ  [        [        R                  " U5      5      n[        [        R                  " U5      5      nXR                   ;  d  X0R                   U   :  a  X0R                   U'   XR                  ;  d  X@R                  U   :  d  M  X@R                  U'   M     g r{  )r   r   r   r   r]  r}   r   r^  r   minr_  r   ra  r0  rb  r    r   rc  r   r1   )r   r   rf  	min_value	max_valuerh  s         r%   rP  (PostTrainingQuantization._sample_min_max  sS   ""b(T-D-D-J ;;"55dkk8L
--: %bffZ&8 9I %bffZ&8 9I//3II "I "I--h7 ==> "'z'7'7':!;A%,,U266*QT:J3K-LM%,,U266*QT:J3K-LM "< "'z'7'7':!;A%,,U266*-3H-IJ%,,U266*-3H-IJ "< 5>''14=''1) <, 44H11$++xHJ!#))--h7bffZ01IbffZ01I 7 7733H==4=''1 7 7733H==4=''1 5r'   c                    U R                    H  n[        R                  " U R                  U5      nUR                  S:X  d  XR
                  ;  a  U R                  R                  U5        M`  [        R                  " U5      nU R
                  U   S   n[        R                  " X4S9u  pVU R
                  U   S==   U-  ss'   M     g )Nr   r   )rk  )r   r   r]  r}   rc  r   r   r1   r   r`  	histogram)r   r   rf  var_tensor_absrk  re   _s          r%   rT  *PostTrainingQuantization._sample_histogram  s    44H11$++xHJ1$ < <<))--h7VVJ/N//9!<Dll>=GD((215=5 5r'   c                 `   U R                   0 :X  Ga  U R                   GHw  n[        R                  " U R                  U5      nU R
                  S:X  a4  [        [        R                  " [        R                  " U5      5      5      nOU R
                  S:X  a  / nU R                  U   [        R                  ;   ai  [        UR                  S   5       HL  nUR                  [        [        R                  " [        R                  " USS2U4   5      5      5      5        MN     Oc[        UR                  S   5       HG  nUR                  [        [        R                  " [        R                  " X$   5      5      5      5        MI     WU R                   U'   GMz     U R                   GH  n[        R                  " U R                  U5      nUR                   S:X  a  U R"                  R%                  U5        MR  [        [        R                  " [        R                  " U5      5      5      nSU R&                  S-
  -  S-
  nX5-  nUS-  nUS-  nUS-  n	[        R(                  " [        R*                  " X)-  5      SU5      U	-  n
[        R(                  " [        R*                  " X(-  5      SU5      U-  n[        R(                  " [        R*                  " X'-  5      SU5      U-  n[        R(                  " [        R*                  " X&-  5      SU5      U-  n[        R,                  " X*5      n[        R,                  " X+5      n[        R,                  " X,5      n[        R,                  " X-5      nXUU/nSUR/                  [1        U5      5      -  nU	U-  nUU-  nUU R                   U'   GM     g)zR
The following code are modified from:
https://github.com/megvii-research/FQ-ViT/
rd   ra   r   Nr   r   )r   r   r   r]  r}   r   r^  r   r_  r`  r   ra  r0  rb  r    r   rc  r   r1   r   re  r]   l2_lossindexr~  )r   r   rf  rg  rh  q_maxscale8scale4scale2scale1quant_dequant_var_scale1quant_dequant_var_scale2quant_dequant_var_scale4quant_dequant_var_scale8score1score2score4score8scoremaskrj  	thresholds                         r%   rS  $PostTrainingQuantization._sample_ptf  s   
 $$* ;;"55dkk8L
--:$)"&&
1C*D$EM//3II$&M--h7 ==> "'z'7'7':!;A)00 %bffRVVJq!t4D-E&F G "<
 "'z'7'7':!;A)00 %bffRVVJM-B&C D "< 7D))(3' <* 44H11$++xHJ!#))--h7!"&&
);"<=M$//!34q8E"*FaZFaZFaZF!45q%@6I % !45q%@6I % !45q%@6I % !45q%@6I % ]]:HF]]:HF]]:HF]]:HFVV4ECJ//DTMEI2;D%%h/? 5r'   c                    U R                   S:X  d   S5       e[        [        U R                  R                  5      5       GH  nU R                  R                  U   R
                   H  nUR                  U R                  R                  ;   d&  UR                  U R                  R                  ;   d  MM  [        R                  " U5       H{  nX0R                  ;   d   eX0R                  ;   d   eUR                  US-   U R                  U   5        UR                  US-   U R                  U   5        UR                  SS5        M}     M     GM     g)z+
Save input threshold to the quantized op.
ri   z3The algo should be min_max to save input threshold.z.minz.maxwith_quant_attrTN)r   r0  rI   r   r1  r7   r4  r   r8  r   r   r2  r   r   r3  )r   r:  r;  r   s       r%   r   .PostTrainingQuantization._save_input_threshold  s$    zzY& 	
A	
& c$--"6"678Hmm**8488GGt00MMMww((IIJ %*$A$A"$E'+B+BBBB'+B+BBBB$v-t/F/Fx/P $v-t/F/Fx/P %6= %F 9 9r'   c                 T   U R                    GH  n[        R                  " U R                  U5      nUR                  S:X  a  U R
                  R                  U5        MR  [        R                  " U5      n[        [        R                  " U5      5      n[        [        R                  " U5      5      nXR                  ;  a  UU/U R                  U'   M  X0R                  U   S   :  a  X0R                  U   S'   X@R                  U   S   :  d  GM  X@R                  U   S'   GM     g)z
Collect the abs_min and abs_max for all activation. When algo = KL,
get the min and max value, and then calculate the threshold.
r   r   N)r   r   r]  r}   rc  r   r1   r   r`  r^  r~  r_  r   )r   r   rf  r  r  s        r%   r   8PostTrainingQuantization._collect_activation_abs_min_max-  s    
 44H11$++xHJ!#))--h7
+JbffZ01IbffZ01I===<..x8
 ==hGJJBK228<Q?==hGJJBK228<Q?# 5r'   c                 :   U R                    H  nXR                  ;   a  XR                  ;  a  M#  XR                  ;  d  M4  U R                  U   S   nU R                  U   S   n[        R
                  " / U R                  X#4S9u  pEXE/U R                  U'   M     g)z>
Based on the min/max value, init the sampling_act_histogram.
r   r   rk  r0  N)r   r   r   r   r   r  r   )r   r   min_valmax_valre   
hist_edgess         r%   r   5PostTrainingQuantization._init_sampling_act_histogramE  s     44H555 > >>;;;88B1E88B1E#%<<T11'9K$  ;?9K,,X6 5r'   c                    [         R                  SU R                   S35        U R                  S;   d   S5       eU R                   GHw  n[        R
                  " U R                  U5      nU R                  S:X  a4  [        [        R                  " [        R                  " U5      5      5      nOU R                  S:X  a  / nU R                  U   [        R                  ;   ai  [        UR                  S   5       HL  nUR!                  [        [        R                  " [        R                  " USS2U4   5      5      5      5        MN     Oc[        UR                  S	   5       HG  nUR!                  [        [        R                  " [        R                  " X$   5      5      5      5        MI     WU R"                  U'   GMz     U R$                   H  nXR&                  ;   a  XR(                  ;  a  M#  U R(                  U   u  pVU R                  S
:X  a0  US   US	   -
  n[+        XWU R,                  5      U R"                  U'   Mt  U R                  S:X  d  M  U R/                  XV5      U R"                  U'   M     g)z<
Calculate the KL or hist threshold of quantized variables.
z
Calculate z threshold ...r   zThe algo should be KL or hist.rd   ra   r   Nr   r[   re   )r  r  r   r   r   r]  r}   r   r^  r   r_  r`  r   ra  r0  rb  r    r   r   r   r   r   r   _get_hist_scaling_factor)r   r   weight_dataweight_thresholdrh  re   r  	bin_widths           r%   r   5PostTrainingQuantization._calculate_kl_hist_thresholdV  s	    	z$**^<=zz^+M-MM+ 77H224;;IK))Y6#({0C)D#E ++/EE#% ))(399: #;#4#4Q#78(//!"&&AqD0A)B"CD 9
 #;#4#4Q#78(//!"&&)?"@A 9 7GD))(3' 8* 44H555 < <<#;;HEDzzT!&qMJqM9	:JT%:%:;--h7 v%11$C --h7 5r'   c                 >   [         R                  S5        [        [        R                  " U R
                  R                  5      SS9nU R                  (       d`  [        U R                  U R                  U R                  U R                  U R                  U R                  U R                  R                   S9nO_[#        U R                  U R                  U R                  U R                  U R                  U R                  U R                  R                   S9nUR%                  5        H  nSUl        UR)                  U5        M     U R                  (       d4  [+        U R                  U R                  U R                  R,                  S9nO3[/        U R                  U R                  U R                  R,                  S9nUR%                  5        H  nSUl        UR)                  U5        M     U R0                  Gc  U R2                  S;   a  U R4                  nOU R6                  nU R8                  GbV  U R8                   GHE  nSnU H  nSU;   a  UR;                  S5      u  pnXR=                  5       ;  a  M2  U
S	:X  a  [?        XY   5      [?        U5      -  XY'   O!U
S
:X  a  [?        XY   5      [?        U5      -  XY'   Uc  XY   O[A        XuU	   5      nM  XR=                  5       ;  a  M  Uc  XX   O[A        XuU   5      nM     U H}  nSU;   a[  UR;                  S5      u  pnXR=                  5       ;  a  M2  U
S	:X  a  U[?        U5      -  XY'   MJ  U
S
:X  a  U[?        U5      -  XY'   Mb  Md  XR=                  5       ;  a  My  XuU'   M     GMH     XPl        U R0                  RC                  5        H  u  p[D        RF                  " U R                  U R                  US-   [H        RJ                  " U/[H        RL                  S95        [D        RF                  " U R                  U R                  US-   [H        RJ                  " U/[H        RL                  S95        M     U R                  (       d  U RN                  (       a  [Q        U R                  U R                  U RR                  U R                  U RT                  U R                  U R                  U R                  R                   S9nUR%                  5        H  nSUl        UR)                  U5        M     O[W        U R                  U R                  5      nUR%                  5        H  nSUl        UR)                  U5        M     U R                  R                   U R                  R,                  -   U R                  RX                  -   n[[        U R                  U R                  U R                  UU R0                  S9nUR%                  5        H  nSUl        UR)                  U5        M     UR]                  5       U l        g)z
Use QuantizationTransformPass and AddQuantDequantPass to insert
fake_quantize, fake_dequantize and fake_quant_dequant op.
Besides, save all threshold to the scale var node.
zUpdate the program ...Tr   )rM   r   r   r   r   r   rr   )rM   r   rr   Nr   #*/z@scaledtypez.quant_dequant@scale)rM   r   r   r   r   r   r   rr   )rM   r   rs   rr   calibration_range_dict)/r  r  r   r	   r   r   r   r   r   r}   r   r   r   r   r   r   r8  r   all_sub_graphs	_for_testrK   r   r   r   r   r   r   r   r   splitr   r^  r_  itemsr   set_variable_datar   r   float32r   r   rz   rw   r   r9  r   r   )r   r6   transform_pass	sub_graphadd_quant_dequant_passr   tensor_list	max_scaletensor_namereal_tensor_nameoperascalarkeyvalfreeze_passquant_weight_passinfer_pass_quant_op_typesout_scale_infer_passs                     r%   r   (PostTrainingQuantization._update_program  sq    	-.

4==#5#56F   6kkkk -- $ 5 5)-)G)G%)%?%?$($5$5$R$RN 9kkkk -- $ 5 5)-)G)G%)%?%?$($5$5$R$RN --/I #'I  +	 0   %8kkkk$($5$5$V$V&" &;kkkk$($5$5$V$V&" --/I"&I"((3 0
 #zz^+!::
!66
++7#'#?#?K $I'2+->I>O>O #?;,V  07HH ($|?D$.$@@"$)&M@2
 < "'#?D$.$@@"$)&M@2
 <
 $-#4 !+ <%($-:J/K&" &  +//2CC ( $-#4 !+ 7%({4K%L &3 (3> (3+->I>O>O #?;,V  07HH ($|$-f$= !+ < "'#$-f$= !+ < ".
  +//2CC (6?{3% (3C $@h  *((..0HC##h#bjj1	 ##,,#bjj1	 1   !!4++++$($9$9 $ 1 1#//$($9$9)-)C)C(,(9(9(V(V	 "'!5!5!7I*.I'%%i0 "8 !0T[[ I"113	&*	#!''	2 4
 !!>>##DDE##<<= &
 $Ckkkk00$='+'7'7$  #113	&*	#$**95 4 ((*r'   c                   ^ ^^ 0 T l         UU 4S jmUU 4S jn[        [        T R                  R                  5      5       H  nT R                  R                  U   R
                   H  mTR                  T R                  R                  T R                  R                  -   T R                  R                  -   ;   d  MW  [        R                  " T5      nU H  nU" TU5        M     M     M     g)z,
Save output threshold to the quantized op.
c                 \  > UTR                   ;   a  X;  a  [        R                  U S35        g X;   d   SU SU R                   S35       eTR                  (       a%  0 TR
                  U'   UU   TR
                  U   S'   g U R                  X2U   5        U R                  US   [        US   5      -   S-   X!   5        U R                  S	S
5        U R                  TR                  R                  ;   d$  U R                  TR                  R                  ;   a  TR                  SU5        g g )Nz? is zero-size tensor and unable to calibrate, so skip quant it.zThe output (z) of z node does not have threshold.rj  r   r   
_thresholdr  Tquantization_type)r   r  r7  r4  r   _calibration_scalesr3  r  r   r8  r   )r8   rB  threshold_mapout_info_nameargname_indexquantized_typer;  r   s         r%   	save_infoBPostTrainingQuantization._save_output_threshold.<locals>.save_info5  s?     9 991#n$cd #4 "<.gll^Cab4   9;((6BO C((6w? !!-|1LM!!!!$s=+;'<<|K!/ !!"3T:LL((EEF||((IIJ LL!4nEJr'   c                   > [         R                  " X5      nUc
   US-   5       eTR                  S;   a<  T" U UTR                  SUS[	        TR                  5      R                  5       -   5        g TR                  S;   a.  T" U UTR                  SUS[	        TR                  5      -   5        g TR                  S:X  a/  T" U UTR                  SUS5        T" U UTR                  S	US5        g g )
Nz is not the output of the opr   out_thresholdpost_)rf   rd   rg   rh   rj   ri   out_minpost_min_maxout_max)	r   _get_output_name_indexr   r   r  r   r   r   r   )r8   rB  r  r  r   s      r%   analysis_and_save_infoOPostTrainingQuantization._save_output_threshold.<locals>.analysis_and_save_info]  s   !88OM , ==, zz^+ 11#!c$**o3355 FF --#!c$**o- y( ++!"  ++!" )r'   N)r  r0  rI   r   r1  r7   r4  r   r8  r   r9  r   r6  )r   r  r:  out_var_namesr   r;  r  s   `    @@r%   r   /PostTrainingQuantization._save_output_threshold/  s     $& &	FP(	T c$--"6"678Hmm**848877%%BB''HHI''@@A
 %*$B$B2$FM$1.r8< %2 9 9r'   c           	      \   / n[        U R                  R                  5       HR  nU R                  R                  U5      R                   H&  nUR
                  U;   d  M  UR                  U5        M(     MT     [        SU R                  -   5      R                  5       n[        U R                  5      nU H  n[        R                  " U5       H  nXv;   d  M
  [        R                  " U R                  U5      n[        [         R"                  " [         R$                  " U5      5      5      n	[        R&                  " XG5      u  pUR)                  U
[        U5      -   S-   U	5        UR)                  SU5        UR)                  SU R*                  5        UR)                  SS5        M     M     g)z
Collect and save the weight threshold for dynamic quantize ops,
such as lstm and gru.
Args:
    target_ops_type(list): the op type of target ops
Returns:
    None
r  r  r  
bit_lengthr  TN)r0  r   
num_blocksblockr7   r4  r    r  r   r   r&   r   r2  r]  r}   r^  r   r_  r`  _get_input_name_indexr3  r   )r   target_ops_type
target_opsr  r;  r  r#   r   var_datar  argnames              r%   r   ?PostTrainingQuantization._collect_dynamic_quantize_op_threshold  sA    
4==334Emm))%04477o-%%b) 5 5
  $** 45;;= :4== IB!99"=4$77XNH %bffRVVH-=&> ?I%*%@%@%NNGLL3u:!5!DiPLL!46GHLLt/@/@ALL!2D9 > r'   c                     U R                   nU[        [        U5      5      -  nSnSn[        [	        U5      5       H  nXAU   -  nXC:  d  M  US-   n  O   US   US   -
  nUS-
  U-  $ )z2
Using the hist method to get the scaling factor.
r   r   g      ?)r   r^  sumr0  rI   )r   re   r  threshold_ratehist_sum
hist_indexrh  r  s           r%   r  1PostTrainingQuantization._get_hist_scaling_factor  s     ++eCI&&
s4y!AQH)U
	 "
 qMJqM1	S I--r'   )6r   r   r   r   r   r   r   r   rz   r  r   r   ry   r{   r   r   r   r   r   r   rx   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rw   r   r   r   r   r   r   r}   r   rt   rv   ru   r   r   r   r   r   NN)__name__
__module____qualname____firstlineno____doc__r   r   r   r  r   r  r   r   r   r   rQ  rR  rO  rN  rP  rT  rS  r   r   r   r   r   r   r   r  __static_attributes__ r'   r%   rX   rX   p   s     !03#AfrP	`D
, EI"KH
:+"[z'E%&4@l6@p'DR DD&>P>:<x>2L0L"+Zj+Xa=F::.r'   rX   c                   h   ^  \ rS rSrSSSSSSSSSS/ SQSSS	S	S
S
SSS	SS	S	SSSSS4U 4S jjrSrU =r$ )PostTrainingQuantizationProgrami  NrZ   r[   r\   conv2ddepthwise_conv2dmulr]   r^   Fr_   r`   ra   Tc                     > [         TU ]  " UUS S S UUUU	U
UUUUUUUUUUUUUUUUUUUU5        SU l        X l        U R                  b  SU l        Uc   S5       eUc   S5       eX0l        X@l        g )NFTzFeed list should not be None.zFetch list should not be None.)superr   r   r   r   r   ) r   r   r"   	feed_listr   rM   r   r   r   r   r   r   r   rr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   	__class__s                                   r%   r   (PostTrainingQuantizationProgram.__init__  s    B 	$ "=	
@ 	==$DI$E&EE$%G'GG%#%r'   )r   r   r   r   )r  r  r  r  r   r  __classcell__)r   s   @r%   r  r    se    
 A!03#?H& H&r'   r  c                       \ rS rSr/ SQrSS/rSS jrSSSS/S	SS
S4S jrS rS r	S r
S rS rS rS rS rSS jrSrg)WeightQuantizationi  r  ra   rd   Nc                 (    Xl         X l        X0l        g)a  
This class quantizes the weight of some ops to reduce the size of model
or improve the performance.

Args:
    model_dir(str): The path of the fp32 model that will be quantized,
        and the model and params files are under the path.
    model_filename(str, optional): The name of file to load the inference
        program. If it is None, the default filename '__model__' will
        be used. Default is 'None'.
    params_filename(str, optional): The name of file to load all parameters.
        When all parameters were saved in a single binary file, set it
        as the real filename. If parameters were saved in separate files,
        set it as 'None'. Default is 'None'.
Nr~   r   r   )r   r   r   r   s       r%   r   WeightQuantization.__init__  s      $- /r'   r  r  r_   FrW  c	           
         U H  n	XR                   ;   a  M   SU	-   S-   5       e   US;   d   S5       eX`R                  ;   d   SU R                   35       e[        R                  R	                  US5      n
U R                  U
UUUUUSU5        U(       a9  [        R                  R	                  US5      nU R                  UUUUUUS	U5        g
g
)ac  
In order to reduce the size of model, this api quantizes the weight
of some ops from float32 to int8/16. In the inference stage, the
quantized weight will be dequantized to float32 again.

Args:
    save_model_dir(str): The path to save the quantized model.
    save_model_filename(str, optional): The name of file to
        save the inference program. If it is None, the default
        filename '__model__' will be used. Default is 'None'.
    save_params_filename(str, optional): The name of file to
        save all parameters. If it is None, parameters were
        saved in separate files. If it is not None, all
        parameters were saved in a single binary file.
    quantizable_op_type(list[str], optional): The list of ops
        that will be quantized, and the quantized ops should be
        contained in ["conv2d", "depthwise_conv2d", "mul"].
        Default is ["conv2d","mul"].
    weight_bits(int, optional): The bits for the quantized weight,
        and it should be 8 or 16. Default is 8.
    weight_quantize_type(str, optional): quantization type for weights,
        support 'channel_wise_abs_max' and 'abs_max'. Set it as
        'channel_wise_abs_max', the accuracy performs better.
    generate_test_model(bool, optional): If set generate_test_model
        as True, it saves a fake quantized model, in which the weights
        are quantized and dequantized. We can use PaddlePaddle to load
        the fake quantized model and test the accuracy on GPU or CPU.
    threshold_rate(float, optional): This api uses abs_max method to
        quantize the weight from float32 to int8/16, and the abs max
        value is important for quantization diff. When the abs_max
        value is far away from the center of the numerical distribution,
        we can set threshold_rate between 1e-6 and 1e-8, so the abs max
        value will be optimized. Default is 0.0.
zInput error:z* is not supported for weight quantization.)r_      z+Input error: weight_bits should be 8 or 16.z,Input error: weight_quantize_type should in quantized_modelF
test_modelTN)_supported_quantizable_op_type_supported_weight_quantize_typer  r  r	  _quantize_weight_to_int)r   save_model_dirsave_model_filenamesave_params_filenamerr   r   r   generate_test_modelr  r   quantized_model_dirtest_model_dirs               r%   quantize_weight_to_int)WeightQuantization.quantize_weight_to_int$  s   Z +GAAA >?A +  
 
 	9 9	9 
 $'K'KK 	
:4;_;_:`a	
K !ggll>;LM$$  		
 WW\\.,GN((#$#$	 r'   c           	         [         R                  " 5       n[        R                  " U5      n[        R                  " 5       n[        R
                  " U R                  UU R                  U R                  S9u  pVn[        R                  " 5       nUR                  5       n	0 n
UR                  5        GH5  nUR                  [         R                  R                  R                  :X  dS  UR                   (       aB  UR"                  S;   d2  UR$                  [         R                  R                  R&                  :w  a  M  U	R)                  U5      nU R                  b  XUR"                  '   M  [*        R,                  R/                  [*        R,                  R1                  U5      UR"                  5      nU	R3                  SSU/00 [*        R,                  R1                  U5      SS.S9  GM8     U R                  b  / n[5        U
R7                  5       5       H  nUR9                  X   5        M     U	R;                  [         R                  R                  R                  [<        R>                  " S	5      S
9nUR@                  RC                  S5        [*        R,                  R/                  [*        R,                  R1                  U5      U R                  5      nU	R3                  SSU0SU0USS.S9  URE                  5         URG                  U5        U R                  c  SOU R                  n[*        R,                  R/                  U R                  U5      n[*        R,                  R/                  UU5      n[H        RJ                  " UU5        g)z
Convert all persistable vars from fp32 to fp16.
Note that, this api only changes the data type of variables in
__params__ file, and the __model__ file remains unchanged.

Args:
    save_model_dir(str): The path to save the fp16 model.
r  )r   fetchNsaveXT)	file_pathsave_as_fp16)r4  r0   r2   rO   saved_params)r4  r!   save_combineY	__model__)&r	   CPUPlacer   Executorr|   r  r~   r   r   Programr
  r   r4  VarDescVarTypeRAWr   r!   r  FP32_clone_variabler  r  r	  normpath	append_opsortedr   r    
create_varr
   generater   set_persistable_sync_with_cppr   shutilcopyfile)r   r  r   exerM   infer_programr  r   save_program
save_blocksave_var_mapr$   new_varsave_file_pathsave_var_listr!   saved_params_var	save_pathr   	src_model
dest_models                        r%   convert_weight_to_fp16)WeightQuantization.convert_weight_to_fp16x  s    ooe$##%171L1LOO// 11	2
.: ~~'!..0
 **,CT\\11555HH 11II!5!5!:!:: !005G$$0-4W\\*!#GG$$^4gll" $$'+%'WW%5%5n%E(,	 % # -6   ,M|0023$$\%78 4  *44\\))-- )).9  5   !!11$7  0$2G2GI   #]+./$-tD	 !  	##%
 ##+ %% 	
 GGLL.A	WW\\..A
	:.r'   c	                    [         R                  " 5       n	[        R                  " U	5      n
[        R                  " 5       n[        R
                  " U R                  U
U R                  U R                  S9u  pn/ n[        UR                  5       HJ  nUR                  U5      nUR                   H&  nUR                  U;   d  M  UR                  U5        M(     ML     [        U5      nU HZ  nUR                    HG  nUU;   d  M  US:X  a  U R#                  UU	UUUUU5        M*  US:X  d  M2  U R%                  XUUUU5        MI     M\     SnUc  SnO.UR'                  S5      (       a  UR)                  SS5      S	   nOUn[*        R,                  R/                  UU5      nU Vs/ s H"  nUR1                  5       R3                  U5      PM$     nn[        R4                  " UUUU
US
9  gs  snf )z3
Generate quantized model or fake quantized model.
r  rd   ra   Nr  r  rD   r   r   )r   r"   )r	   r!  r   r"  r|   r  r~   r   r   r0  r  r  r7   r4  r    r&   input_arg_names_weight_abs_max_quantization)_weight_channel_wise_abs_max_quantizationr  r  r  r  r	  r
  r$   r  )r   r  r  r  rr   r   r   r   r  r   r2  rM   r"   r  r   quantized_opsr  r  r;  r#   r   r  r  r!   r  s                            r%   r  *WeightQuantization._quantize_weight_to_int  s    ooe$##%+1+F+FOO// 11	,
(Z 7--.EMM%(Eii7711!((,   / !;7 CB..44+y899!!'*$$ .1GGFF!+r8X /  " 
& J ))*55,33C;A>J,Jggll>:>BKL)$W))+//5)	L##	
 Ms   #)G'c                    SUS-
  -  S-
  nUS:X  a  [         R                  O[         R                  n	[        R                  " X5      n
[        U5      S:  a+  [         R                  " [         R
                  " U
5      5      nO U R                  X5      nXX:  '   U* XU* :  '   X-  n[         R                  " X-  5      R                  U	5      nU(       d  [        R                  " XXm5        O8X-  R                  [         R                  5      n[        R                  " XXn5        UR                  SS5        UR                  SU5        UR                  US-   U/5        UR                  SS	5        g
)z(
Use abs_max method to quantize weight.
r   r_   g|=r  post_weight_abs_maxquantize_weight_bits_quant_scaler  TN)r   int8int16r   r]  r`  r_  _calculate_thresholdaroundastyper  r  r3  )r   rM   r   r   r  r;  r   r   quantize_rangesave_weight_dtyper  threshold_valuerj  quantized_weight_datadequantized_weight_datas                  r%   rB  /WeightQuantization._weight_abs_max_quantization  sO    a0A5'2a'7BGGRXX ..u?~& ffRVVK%89O"77O :I56;J:JK&6670 "		+*= > E E!

 ##h (='D&L&L

'# ##h
 	(*?@
+[9
X.8
&-r'   c                 B   SUS-
  -  S-
  nUS:X  a  [         R                  O[         R                  n[        R                  " X5      n	UR
                  S:X  a  U R                  XU5      u  pOHUR
                  S;   a  U R                  XU5      u  n
nO"[        R                  UR
                  S-   5        U(       d  [        R                  " XUW5        OUR
                  S:X  a  U R                  WW
5      nOEUR
                  S;   a  U R                  WW
5      nO"[        R                  UR
                  S-   5        [        R                  " XUW5        UR                  SS5        UR                  SU5        UR                  US	-   W
5        UR                  S
S5        g)z5
Use channel_wise_abs_max method to quantize weight.
r   r_   r  )r  r  z( is not supported by weight quantizationr   post_weight_channel_wise_abs_maxrH  rI  r  TN)r   rJ  rK  r   r]  r4  _mul_channel_wise_quantization_conv_channel_wise_quantizationr  errorr   _mul_channel_wise_dequantization!_conv_channel_wise_dequantizationr3  )r   rM   r   r   r;  r   r   rO  rP  r  scalesrR  rS  s                r%   rC  <WeightQuantization._weight_channel_wise_abs_max_quantization?  s    a0A5'2a'7BGGRXX ..u?77e,0,O,O->-)F) WW66 44->%
 MM"''$NNO ##h(= ww%*.*O*O)6+' ::::-v ( GGHH ##h(?
 	(*LM
+[9
X.7
&-r'   c                 N   / n[         R                  " XS9nUR                  S   n[        U5       Ho  n[         R                  " [         R
                  " X   5      5      U-  nUR                  U5        [         R                  " X   U-  5      R                  U5      XW'   Mq     XE4$ )b
Get channel wise scale for the weights of conv2d and depthwise_conv2d,
and quantize the weights.
r  r   	r   
zeros_likerb  r0  r_  r`  r    rM  rN  	r   r  rO  rP  r\  rR  channel_numrh  rj  s	            r%   rX  2WeightQuantization._conv_channel_wise_quantizationv  s      "!
 "''*{#AFF266+.12^CEMM% ')yy%1G'H'O'O!(!$ $ ,,r'   c                     [         R                  " U[         R                  S9n[        [	        U5      5       H+  nX   X$   -  R                  [         R                  5      X4'   M-     U$ )zB
For conv2d and depthwise_conv2d, dequantize the weights to fp32.
r  r   ra  r  r0  rI   rN  r   rR  r\  rS  rh  s        r%   r[  4WeightQuantization._conv_channel_wise_dequantization  s\     #%--!#
 s6{#A%(694fRZZ  $& $ '&r'   c                 l   / n[         R                  " XS9nUR                  S   n[        U5       H~  n[         R                  " [         R
                  " USS2U4   5      5      U-  nUR                  U5        [         R                  " USS2U4   U-  5      R                  U5      USS2U4'   M     XE4$ )r_  r  ru  Nr`  rb  s	            r%   rW  1WeightQuantization._mul_channel_wise_quantization  s      "!
 "''+{#AFF266+ad"345FEMM% *,))AqD!E)+f&' "!Q$' $ ,,r'   c                     [         R                  " U[         R                  S9n[        [	        U5      5       H5  nUSS2U4   X$   -  R                  [         R                  5      USS2U4'   M7     U$ )z*
For mul, dequantize the weights to fp32.
r  Nrf  rg  s        r%   rZ  3WeightQuantization._mul_channel_wise_dequantization  sh     #%--!#
 s6{#A%ad+fi7fRZZ  $AqD) $ '&r'   c                 @   [         R                  " U5      n[         R                  " XCS[         R                  " U5      4S9u  pVU[	        [        U5      5      -  nSnSn[        [        U5      5       H  n	XuU	   -  nUSU-
  :  d  M  U	S-   n  O   US   US   -
  n
X-  $ )Nr   r  r[  r   )r   r`  r  r_  r^  r  r0  rI   )r   r5  r  histogram_bins	input_absre   r  r  r  rh  r  s              r%   rL  'WeightQuantization._calculate_threshold  s    FF5M	<<1bffY6G2H
 eCI&&
s4y!AQH3//U
	 "
 qMJqM1	%%r'   r  r  )i  )r  r  r  r  r  r  r   r  r>  r  rB  rC  rX  r[  rW  rZ  rL  r  r  r'   r%   r  r    sn    %J"'=y&I#0. !!%u-3!RhU/nC
J).V5.n-('-('&r'   r  )NNF)+loggingr  r0  numpyr   r   r   paddle.base.frameworkr   r    r   	frameworkr	   r
   
log_helperr   rk   r   r   r   r   r   r   r   quantization_passr   r   r   r   r   r   r   r  INFOr  r&   r=   rA   rV   rX   r  r  r  r'   r%   <module>ry     s     	   4     #  " .    gll H
!& BG(M. M.`*I&&> I&Xz& z&k/s   B) )B3