
    ёi9@                    *   S SK r S SKrS SKr S SKJr  S SKrSSKJrJ	r	  SSK
JrJr  SSKJrJrJrJr  SSKJr  S	S
KJr  SSKJr  SSKJrJrJr  \" \\R6                  SS9r/ SQrSS/r/ SQr/ SQr Sr!S r"S r# " S S5      r$ " S S5      r% " S S5      r& " S S5      r' " S S 5      r( " S! S"5      r) " S# S$5      r* " S% S&5      r+ " S' S(\$5      r, " S) S*5      r- " S+ S,5      r. " S- S.5      r/ " S/ S05      r0 " S1 S25      r1g!   SSKJr   N= f)3    N)tqdm      )IrGraphIrNode)_get_paddle_placecore)Programdataprogram_guardscope_guard)unique_name   )
get_logger)utils) SUPPORT_ACT_QUANTIZATION_OP_DICTSUPPORT_QUANTIZATION_OP_DICT#SUPPORT_WEIGHT_QUANTIZATION_OP_DICTz&%(asctime)s-%(levelname)s: %(message)s)fmt)fake_quantize_abs_maxfake_quantize_range_abs_max$fake_quantize_moving_average_abs_max"fake_channel_wise_quantize_abs_maxfake_dequantize_max_abs$fake_channel_wise_dequantize_max_abs)/fake_quantize_dequantize_moving_average_abs_max-fake_channel_wise_quantize_dequantize_abs_max fake_quantize_dequantize_abs_max)conv2ddepthwise_conv2dconv2d_transposegMbP?c                     [        U[        R                  5      (       d   S5       eUc   S5       eUc   S5       eUR                  U R	                  5       5      R                  5       nUR                  X5        g )Nz(The type of value should be numpy array.The scope cannot be set None.The place cannot be set None.)
isinstancenpndarrayvarname
get_tensorset)var_nodevaluescopeplacetensors        l/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/static/quantization/quantization_pass.py_init_var_noder2   B   sr    eRZZ(( 2( ======YYx}}'224F
JJu    c                     Sn[         R                  " U5       H=  nU R                  UR                  U5      nU=(       a    UR	                  5       (       + nM?     U$ )zA
Analyse the real inputs of the op node are all not persistable.
T)r   _get_op_input_var_names_find_node_by_nameinputspersistable)graphop_nodeis_input_all_not_persistablevar_namein_nodes        r1   _is_input_all_not_persistabler>   L   sY     $( 11':**7>>8D'C (
##%% 	% ;
 ('r3   c                       \ rS rSrSrSSSSSSSSS// S	QSSSSSSS4S
 jrS rS rS rS r	S r
S rS rS rS rS rS rS rS rS rS rS rSrg)QuantizationTransformPassY   z_
Quantize the ops that have weights. Add quant and dequant ops for
the quantized ops's inputs.
N   abs_max'  ?
skip_quantr   r    mulc                 Z   Xl         [        U5      U l        X0l        X@l        Xl        Xl        Xl        Xl        Xl	        Xl
        UU l        / SQnUS:w  d   S5       eUU;  a  [        SU S35      eUU;  a  [        SU S35      eXPl        X`l        Xpl        Xl        Xl        U R"                   H/  nU[%        [&        R(                  " 5       5      ;   a  M'   US-   5       e   U R"                   Vs/ s H  nU S	3PM
     snU l        UU l        S
U l        0 U l        0 U l        g
s  snf )a  
Constructor.

Args:
    scope(static.Scope): When activation use 'range_abs_max' as the quantize
        type, this pass will create some new parameters. The scope is used to
        initialize these new parameters.
    place(static.CPUPlace|static.CUDAPlace|str): place is used to initialize new
        parameters described above. If it's string, It can be ``cpu``, and ``gpu:x``,
        where ``x`` is the index of the GPUs.
    weight_bits(int): quantization bit number for weights,
        the bias is not quantized.
    activation_bits(int): quantization bit number for activation.
    activation_quantize_type(str): quantization type for activation,
        now support 'abs_max', 'range_abs_max' and 'moving_average_abs_max'.
        If use 'abs_max' mode, the quantization scale will be calculated
        dynamically each step in both training and testing period. If use
        'range_abs_max', a static quantization scale will be calculated
        during training and used in inference.
    weight_quantize_type(str): quantization type for weights,
        support 'abs_max' and 'channel_wise_abs_max'. The 'range_abs_max'
        usually is not used for weight, since weights are fixed once the
        model is well trained.
    window_size(int): the window size for 'range_abs_max' quantization.
    moving_rate(float): the param for 'moving_average_abs_max' quantization.
    skip_pattern(str or str list): The user-defined quantization skip pattern, which
        will be presented in the name scope of an op. When the skip pattern is
        detected in an op's name scope, the corresponding op will not be quantized.
    quantizable_op_type(list[str]): List the type of ops that will be quantized.
        Default is ["conv2d", "depthwise_conv2d", "mul"]. The quantizable_op_type in
        QuantizationFreezePass and ConvertToInt8Pass must be the same as this.
    weight_quantize_func(function): Function that defines how to quantize weight.
        Using this can quickly test if user's quantization method works or not.
        In this function, user should both define quantization function and
        dequantization function, that is, the function's input is non-quantized
        weight and function returns dequantized weight. If None, will use
        quantization op defined by 'weight_quantize_type'. Default is None.
    act_quantize_func(function): Function that defines how to quantize activation.
        Using this can quickly test if user's quantization method works or not.
        In this function, user should both define quantization and dequantization
        process, that is, the function's input is non-quantized activation and
        function returns dequantized activation. If None, will use quantization
        op defined by 'activation_quantize_type'. Default is None.
    weight_preprocess_func(function): Function that defines how to preprocess
        weight before quantization. Using this can quickly test if user's preprocess
        method works or not. The function's input is non-quantized weight and
        function returns processed weight to be quantized. If None, the weight will
        be quantized directly. Default is None.
    act_preprocess_func(function): Function that defines how to preprocess
        activation before quantization. Using this can quickly test if user's
        preprocess method works or not. The function's input is non-quantized
        activation and function returns processed activation to be quantized.
        If None, the activation will be quantized directly. Default is None.
    optimizer_func(function): Function return a optimizer. When 'is_test' is
        False and user want to use self-defined quantization function and
        preprocess function, this function must be set. Default is None.
    executor(base.Executor): If user want to use self-defined quantization
        function and preprocess function, executor must be set for initialization.
        Default is None.


Examples:
    .. code-block:: python

        >>> # The original graph will be rewrite.
        >>> import paddle.static as static
        >>> from paddle.static.quantization import QuantizationTransformPass
        >>> from paddle.base.framework import IrGraph
        >>> from paddle.framework import core

        >>> graph = IrGraph(core.Graph(static.Program().desc), for_test=False)
        >>> place = paddle.CPUPlace()
        >>> transform_pass = QuantizationTransformPass(static.global_scope(), place)
        >>> transform_pass.apply(graph)
rC   channel_wise_abs_maxrange_abs_maxmoving_average_abs_maxrK   IThe activation quantization type does not support 'channel_wise_abs_max'.$Unknown activation_quantize_type : 'K'. It can only be 'abs_max' or 'range_abs_max' or 'moving_average_abs_max'.Unknown weight_quantize_type: 'e'. It can only be 'abs_max' or 'channel_wise_abs_max' or 'range_abs_max' or 'moving_average_abs_max'.# is not supported for quantization._gradN)_scoper   _place_weight_bits_activation_bits_skip_pattern_weight_quantize_func_act_quantize_func_weight_preprocess_func_act_preprocess_func
_optimizer_exe
ValueError_activation_quantize_type_weight_quantize_type_window_size_moving_rate_quantizable_opslistr   keys_quantizable_grad_ops_is_test_global_stepcreate_var_mapcreate_op_mapselfr.   r/   weight_bitsactivation_bitsactivation_quantize_typeweight_quantize_typewindow_sizemoving_rateskip_patternquantizable_op_typeweight_quantize_funcact_quantize_funcweight_preprocess_funcact_preprocess_funcoptimizer_funcexecutoris_test
quant_typeops                       r1   __init__"QuantizationTransformPass.__init___   su   ~ '.' /)%9""3'=$$7!(	

 (+AA 	
W	
A $:567O6P QL L   z112F1G H/ /  *B&%9"'' 3''BAFFHII ::I (
 $(#8#8&
#8Rrd%L#8&
"    &
s   4D(c                 $  ^ ^
^^ [        U[        5      (       d   S5       eT R                  c  UR                  5       T l        [        R
                  " 5       m
UR                  5        Vs/ s H  o"R                  5       PM     snm/ mU 4S jnU
UUU 4S jnU
4S jnU4S jnT R                  (       d  T R                  U5        UR                  5       nU HI  nUR                  5       T R                  ;   d   UR                  5       T R                  ;   d  MA  U" U5        MK     0 Ul        [        [        U5      SSS	9 n	U H\  nUR                  5       T R                  ;   a+  T R                  X5      (       d  U" U5      (       a  U" X5        U	R!                  5         M^     SSS5        U H:  nUR                  5       T R                  ;   d  M#  U" U5      (       d  M2  U" X5        M<     UR#                  5         U$ s  snf ! , (       d  f       Ne= f)

Quantize the graph for training process. According to weight and
activation quantization type, the graph will be added some fake
quantize operators and fake dequantize operators.

Args:
    graph(IrGraph): the applied graph.
Returns:
    None
&graph must be the instance of IrGraph.Nc                 f  >^  Sn[        TR                  [        5      (       aF  T R                  5       R	                  S5      =(       a    [        U 4S jTR                   5       5      nO[        TR                  [        5      (       aa  T R                  5       R	                  S5      =(       a;    T R                  5       R                  S5      R                  TR                  5      S:g  nU(       aA  T R                  5       R                  SS5        T R                  5       R                  SS5        g g )NFop_namescopec              3   h   >#    U  H'  nUTR                  5       R                  S 5      ;   v   M)     g7fr   Nr   attr.0patternr:   s     r1   	<genexpr>MQuantizationTransformPass.apply.<locals>._quant_preprocess.<locals>.<genexpr>	  s/      M#5 wzz|00@@#5   /2rF   Twith_quant_attr
r%   rY   rf   r   has_attranystrr   find	_set_attr)r:   user_skippedrn   s   ` r1   _quant_preprocess:QuantizationTransformPass.apply.<locals>._quant_preprocess  s     L$,,d33&zz|44^D   M#'#5#5M J D..44JJL)).9 

T.)T$,,-  

&&|T:

&&'8$? r3   c           	        > UR                  5       R                  SS5        UR                  5       R                  SS5        UR                  5       R                  S5      nUR                  nU GH  nUR	                  5       UR                  5       ;  a  M(  UR	                  5       T;   a  TUR	                  5          nGO3UR	                  5       nUT;   a  Mi  UR	                  5       T;   a  SOSnU(       a+  TR                  b  TR                  U TR                  XA5      nO1U(       d*  TR                  b  TR                  U TR                  XA5      nU(       a>  TR                  b1  TR                  U TR                  XA5      nTR                  U5        GM)  U(       d>  TR                  b1  TR                  U TR                  XA5      nTR                  U5        GMn  UR	                  5       T;   a  TR                  OTR                  n	U(       a  TR                  OTR                  n
U
S:X  a  UR	                  5       nUS:H  =(       a    UR                  5       R                  S	5      nU(       a  US
-   OUnU[         R"                  ;   a  SOSnTR%                  U UUU	UU5      u  nnTR'                  U UU/U	/UU5      nO-TR)                  U UUU	U
U5      u  pTR+                  U UUU	U5      nUTU'   U R-                  XEU5        GM     g )Nquantization_typeqat_with_weightr   Top_roleFrK   	matmul_v2trans_y_trans_yr   r   )r   r   r   r7   r)   input_arg_namesr\   _insert_funcr]   rZ   appendr[   rW   rX   rb   ra   r   _channelwise_quant_axis1_ops_insert_channel_quant_op_insert_channel_dequant_op_insert_quant_op_insert_dequant_opupdate_input_link)r9   r   r   r7   r,   dequant_var_noder)   	is_weighttarget_out_node
quant_bitsr~   op_typer   
quant_axisquant_var_nodescale_var_nodedequantized_varspersistable_varsprocessed_varsrn   s                   r1   _transform_forward;QuantizationTransformPass.apply.<locals>._transform_forward  s   EEG13DEEEG/6eegll9-GYYF"==?"*<*<*>>==?&66'7'H$#==?D~-  (3C C  !T%A%A%M#'#4#4!4#?#?$ &$*C*C*O#'#4#4!4#<#<h$ !T%?%?%K*.*;*;!4#=#=x+ '--d3 &4+B+B+N*.*;*;!4#:#:H+ '--d3  $==?.>> ))!22  % 22!;;  #&<<"$'')#*k#9 #ruuw||%@ ;B'J"6w  '%*L*LL !" # !99!$ &&#** ,0+J+J!*+,'L&#,( :>9N9N!$ &&#:6 ,0+B+B!**&#,( .>$T*''BGU #r3   c                    > UR                    Hb  nUR                  5       UR                  5       ;  a  M'  UR                  5       T;   d  M=  TUR                  5          nU R                  X#U5        Md     g N)r7   r)   r   r   )r9   r   r,   r   r   s       r1   _transform_backward<QuantizationTransformPass.apply.<locals>._transform_backward  s[    II==?"*<*<*>>==?&66'7'H$++HK &r3   c                    > SnU R                    HO  nUR                  5       U R                  5       ;  a  M'  UR                  5       nUR                  5       T;   d  MM  SnMQ     U$ NFT)r7   r)   r   )r   
has_weightr,   r)   r   s       r1   _has_weight4QuantizationTransformPass.apply.<locals>._has_weight  sW    JII==?"*<*<*>>}}==?&66!%J & r3   7Adding quant op with weight:|{bar}| {n_fmt}/{total_fmt}P   total
bar_formatncols)r%   r   ri   r}   collectionsOrderedDictall_persistable_nodesr)   _create_global_stepall_op_nodesre   rh   out_node_mapping_tabler   len_is_skip_quantupdateresolve_hazard)rn   r9   pr   r   r   r   opsr   tr   r   r   s   `         @@@r1   applyQuantizationTransformPass.apply   s    %)) 	
4	
) == !MMODM&224.3.I.I.KL.KFFH.KL	@(o	H o	Hb	L	 }}$$U+  " B	T222779 : ::!"%  (*$ c(P
 779 5 55..u99k"oo*55
	 
 BwwyD666;r??#E.  	q MR
 
s   *G<?A#H
Hc                    U R                   S:X  d  U R                  S:X  GaT  SnUR                  5        H  nUR                  5       U:X  d  M  X0l        M!     U R                  Gc  UR                  U[        R                  R                  R                  S/[        R                  R                  R                  S9n[        U[        R                  " S/SS9U R                  U R                  5        UR!                  UR#                  5       5      nUR%                  SS[        R&                  R(                  R*                  S	.S
U0SU0S9nUR-                  XF5        UR-                  Xe5        XPl        g g g )NrL   z@STEP_COUNTER@r   r)   var_typeshape	var_dtypeint64dtype	incrementg      ?)stepr   XOutr   attrsr7   outputs)rb   ra   all_var_nodesr)   rj   create_persistable_noder	   VarDescVarTypeDENSE_TENSORINT64r2   r&   zerosrU   rV   create_var_node_from_descr(   create_op_nodeop_proto_and_checker_makerOpRoleForwardlink_to)rn   r9   counter_namenodeglobal_step_inglobal_step_outincrement_ops          r1   r   -QuantizationTransformPass._create_global_step  sX   &&/9--@+L++-99;,.(,% .   (!&!>!>%!\\11>>#"ll2288	 "? " "HHaS0KKKK	 #("A"A"&&(#  %33' ##'#B#B#I#I#Q#Q  0"O4  4   n;l<$3!9 ) Ar3   c                     US:X  a  U R                  XX4U5      $ US:X  a  U R                  XX4U5      $ US:X  a  U R                  XX4U5      $ g)z'
Insert fake_quantize_op in the graph.
rC   rL   rM   N)_insert_quant_abs_max_op_insert_quant_range_abs_max_op'_insert_quant_moving_average_abs_max_op)rn   r9   r,   r)   r   r~   r   s          r1   r   *QuantizationTransformPass._insert_quant_op  st     "007  ?*667  33??7  4r3   c                    UR                  5       (       d   UR                  5        S35       eUR                  U R                  U5      UR	                  5       UR                  5       UR                  5       S9nU R                  U5      nUR                  5       [        R                  :X  a  SnO'UR                  5       [        R                  :X  a  SnOSn [        R                  " U R                  R                  U5      R                  5       5      n	UR#                  UUR	                  5       S/UR                  5       S9n
[%        XU R                  U R&                  5        UR)                  SXES	.S
U0XjS.S9nUR+                  X+5        UR+                  X5        UR+                  X5        Xj4$ !   [        R                   " S/US9n	 N= f)z/
Insert fake_quantize_abs_max op in the graph.
 is not a varr   float64float32float16r   r   r   )
bit_lengthr   r   r   OutScaler   )is_varr)   create_var_node_quantized_var_nametyper   r   _quantized_scale_namepaddler   r  r&   arrayrU   find_varr*   r   r   r2   rV   r   r   )rn   r9   r,   r)   r   r   r   
scale_name	data_typescale_valuer   quant_op_nodes               r1   r   2QuantizationTransformPass._insert_quant_abs_max_op  s      CX]]_$5]"CC ..))$/]]_.."nn&	 / 
 //5
>>v~~-!I^^/!I!I	9(($$Z0;;=K
 66]]_#nn&	 7 
 	~DKKM,,+!+@?*G	 - 
 	h.m4m4--'	9((A3i8Ks   =F2 2Gc                    UR                  5       (       d   UR                  5        S35       eUR                  U R                  U5      UR	                  5       UR                  5       UR                  5       S9nU R                  U5      nUR                  5       [        R                  :X  a  SnO'UR                  5       [        R                  :X  a  SnOSn [        R                  " U R                  R                  U5      R                  5       5      n	UR#                  U[$        R&                  R(                  R*                  S/UR                  5       S9n
[-        XU R                  U R.                  5        UR1                  U
R3                  5       5      nX*S.nXkS	.nU R4                  (       Gd   UR#                  [6        R8                  " S
5      [$        R&                  R(                  R*                  U R:                  /UR                  5       S9nUR                  5       [        R                  :X  a  SnO'UR                  5       [        R                  :X  a  SnOSn[-        U[        R<                  " U R:                  /US9U R                  U R.                  5        U R>                  US'   XS'   U R:                  UU R4                  US.nURA                  SUUUS9nURC                  UU5        URC                  U
U5        URC                  UU5        URC                  UU5        U R4                  (       d.  URC                  U R>                  U5        URC                  UW5        Xk4$ !   [        R                  " [         /US9n	 GNs= f)z2
Insert fake_quantize_range_abs_max on the graph.
r   r   r   r  r  r   r   r   InScaler  scalesIter	OutScales)rs   r  r}   r   r   r   )"r  r)   r  r  r	  r   r   r
  r  r   r  r&   r  rU   r  r*   _SCALE_DEFAULT_VALUEr   r	   r   r   r   r2   rV   r   r(   ri   r   generaterc   r   rj   r   r   )rn   r9   r,   r)   r   r   r   r  r  r  scale_in_nodescale_out_noder7   r   scales_noder   r  s                    r1   r   8QuantizationTransformPass._insert_quant_range_abs_max_op%  s      CX]]_$5]"CC ..))$/]]_.."nn&	 / 
 //5
>>v~~-!I^^/!I!I	L(($$Z0;;=K
 55\\))66#nn&	 6 
 	}4;;L889J9J9LM:(E}}}77 ))(3--::(()"..*	 8 K ~~6>>1%	!V^^3%	%	$++,I>	 "..F6N#.K ,,$}}	
 ,,1	 - 
 	h.m]3m^4m^4}}MM$++];MM-5--w	L(($8#9KKs   =M( (Nc                 Z   UR                  U R                  U5      UR                  5       UR                  5       UR	                  5       S9nU R                  U5      nUR	                  5       [        R                  :X  a  SnO'UR	                  5       [        R                  :X  a  SnOSn [        R                  " U R                  R                  U5      R                  5       5      n	UR                  U[         R"                  R$                  R&                  S/UR	                  5       S9n
[)        XU R                  U R*                  5        UR-                  U
R/                  5       5      nX*S.nXkS.nU R0                  (       Gd  UR                  [2        R4                  " S	5      [         R"                  R$                  R&                  UR	                  5       S/S
9nUR	                  5       [        R                  :X  a  SnO'UR	                  5       [        R                  :X  a  SnOSn[)        U[        R6                  " S/US9U R                  U R*                  5        UR                  [2        R4                  " S5      [         R"                  R$                  R&                  UR	                  5       S/S
9n[)        U[        R6                  " S/US9U R                  U R*                  5        UR-                  UR/                  5       5      nUR-                  UR/                  5       5      nXS'   XS'   UUS'   UUS'   UU R8                  U R0                  US.nUR;                  SUUUS9nUR=                  UU5        UR=                  U
U5        UR=                  UU5        UR=                  UU5        U R0                  (       dH  UR=                  WU5        UR=                  WU5        UR=                  UW5        UR=                  UW5        Xk4$ !   [        R                  " [        /US9n	 GNC= f)z+Insert fake_quantize_moving_average_abs_maxr   r   r  r  r   r   r  r  stater)   r   r   r   accumInStateInAccumOutStateOutAccumr  rt   r}   r   r   r   )r  r  r	  r   r   r
  r  r   r  r&   r  rU   r  r*   r  r   r	   r   r   r   r2   rV   r   r(   ri   r   r  onesrd   r   r   )rn   r9   r,   r)   r   r   r   r  r  r  r  r  insoutsstate_in_nodeaccum_in_nodestate_out_nodeaccum_out_noder   r  s                       r1   r   AQuantizationTransformPass._insert_quant_moving_average_abs_max_op|  s    ..))$/]]_.."nn&	 / 
 //5
>>v~~-!I^^/!I!I	L(($$Z0;;=K
 55\\))66#nn&	 6 
 	}4;;L889J9J9LM7%B}}}!99 ))'2--::"..*c	 : M ~~6>>1%	!V^^3%	%	9-	 "99 ))'2--::"..*c	 : M 9-	 #<<!!#N #<<!!#N +	N*	N-D-D %,,}}	
 ,,:	 - 
 	h.m]3m^4m^4}}MM-7MM-7MM-8MM-8--c	L(($8#9KKs   *=P P*c                    UR                  5       (       d   UR                  5        S35       eUR                  U R                  U5      UR	                  5       UR                  5       UR                  5       S9nU R                  U5      nUR                  5       [        R                  :X  a  Sn	O'UR                  5       [        R                  :X  a  Sn	OSn	 [        R                  " U R                  R                  U5      R                  5       5      n
UR#                  U R                  U5      UR	                  5       UR                  5       U   /UR                  5       S9n[%        XU R                  U R&                  5        UR)                  SUUU R*                  US.S	U0X{S
.S9nUR-                  X,5        UR-                  X5        UR-                  X5        X{4$ !   [        R                   " UR                  5       U   /U	S9n
 N= f)z<
Insert fake_channel_wise_quantize_abs_max op in the graph.
r   r   r   r  r  r   r   )r  r   r}   r   r   r  r   )r  r)   r  r  r	  r   r   r
  r  r   r  r&   r  rU   r  r*   r   r   r2   rV   r   ri   r   )rn   r9   r,   r)   r   r   r   r   r  r  r  r   r  s                r1   r   2QuantizationTransformPass._insert_channel_quant_op  s      CX]]_$5]"CC ..))$/]]_.."nn&	 / 
 //5
>>v~~-!I^^/!I!I	(($$Z0;;=K 66++D1]]_>>#J/0nn&	 7 
 	~DKKM,,8((=="	 ?*G - 

 	h.m4m4--3	((!*-.iKs   =G )H
c                    UR                  5       (       d   UR                  5        S35       eUR                  U R                  UR                  5       5      UR	                  5       UR                  5       UR                  5       S9nSUS-
  -  S-
  nUR                  S[        U5      US.X#S.SU0S9nUR                  X(5        UR                  X85        UR                  X5        U$ )	z)
Insert fake_dequantize_op in the graph.
r   r   r   r   	max_ranger   r   Scaler   r   )
r  r)   r  _dequantized_var_namer	  r   r   r   floatr   )	rn   r9   r,   r   r   r   r   r4  dequant_op_nodes	            r1   r   ,QuantizationTransformPass._insert_dequant_op  s       CX]]_$5]"CC  00++HMMO<]]_.."nn&	 1 
 :>*a/	..- %i 0WE!;,-	 / 
 	h0n6o8r3   c                    UR                  5       (       d   UR                  5        S35       eUR                  U R                  UR                  5       5      UR	                  5       UR                  5       UR                  5       S9nUR                  SUUUS.X#S.SU0S9nUR                  X(5        U H  n	UR                  X5        M     UR                  X5        U$ )z;
Insert fake_channel_wise_dequantize_max_abs in the graph.
r   r   r   )r   r   r   r   Scalesr   r   )	r  r)   r  r7  r	  r   r   r   r   )
rn   r9   r,   scale_var_nodesr   r   r   r   r9  scale_ns
             r1   r   4QuantizationTransformPass._insert_channel_dequant_op2  s       CX]]_$5]"CC  00++HMMO<]]_.."nn&	 1 
  ..:(("
 "=,- / 	
 	h0&GMM'3 'o8r3   c                    SnUR                    H  nX4R                  5       -   nM     X2R                  5       -   nUR                   H  nX4R                  5       -   nM     X0R                  R	                  5       ;   a  U R                  U   nU$ UR                  5       (       a   UR                  5       nXPR                  U'   U$ UR                  UR                  R                  5       5      nXPR                  U'   U$ )z
create a node that same with in_node in graph
Args:
    graph(IrGraph): create node in graph.
    in_node(IrVarNode): create node that same with in_node.
Returns:
    created new node
 )
r7   r)   r   rk   rg   is_ctrl_varcreate_control_dep_varr   r   r(   )rn   r9   r=   keyinpnew_nodes         r1   _create_new_node*QuantizationTransformPass._create_new_nodeP  s     >>C
"C "LLN"??C
"C # %%**,,**3/H    ""335H'/$  66w||7G7G7IJH'/$r3   c                    SnUR                    H  nXER                  5       -   nM     XCR                  5       -   nUR                   H  nXER                  5       -   nM     SnX@R                  R	                  5       ;   a  U R                  U   nSnO7UR                  UR                  R                  5       5      nXpR                  U'   U(       a  gUR                    H%  nU R                  X5      n	UR                  X5        M'     UR                   H%  nU R                  X5      n	UR                  Xy5        M'     UR                   H(  n
U
R                   H  nU R                  XU5        M     M*     g)a  
copy op_node in source_graph to graph. And will run recursively
for next ops that link to op_node's outputs.
Args:
    graph(IrGraph): target graph to copy.
    source_graph(IrGraph): source graph to copy.
    op_node(IrOpNode): op node in source_graph.
Returns:
    None

rB  FTN)r7   r)   r   rl   rg   create_op_node_from_descr   r   rH  r   _copy_graph)rn   r9   source_graphr:   rE  rF  has_creatednew_op_noder=   rG  r,   next_op_nodes               r1   rL  %QuantizationTransformPass._copy_graphj  s8    >>C
"C "LLN"??C
"C #$$))++,,S1KK889JKK&1s#~~G,,U<HMM(0 & G,,U<HMM+0 '  H ( 0 0  lC !1 ( 	r3   c                    [        5       n[        5       n[        XV5         UR                  UR                  5       S-   5         [	        UR                  5       S-   UR                  5       SS9nU" U5      nUR                  5       UR                  UR                  '   [        R                  " U5      n	UR                  (       d@  U R                  (       d   S5       eSUl        U R                  5       n
U
R                  U	5        SSS5        SSS5        [        U R                  5         U R                  R!                  U5        SSS5        [#        [$        R&                  " UR(                  5      UR                  S9nUR+                  UR-                  5       WR                  5      nUR+                  UR-                  5       WR                  5      n/ n/ nUR-                  5        H=  nUR.                  / :X  d  M  UR1                  5       (       d  M,  UR3                  U5        M?     UR5                  5        H&  nUR.                  / :X  d  M  UR3                  U5        M(     UR6                   H  nU R9                  XU5        M     U H(  nUR6                   H  nU R9                  XU5        M     M*     U H  nU R9                  XU5        M     UR+                  UR-                  5       UR                  5       5      nUR+                  UR-                  5       UR                  5       5      nUR+                  UR-                  5       W	R                  5      nUR6                  nU H  nUR;                  UX>5        M     UR;                  UUU5        UR                  (       Gd  UR6                  S	   nUR+                  UR-                  5       UR                  5       S
-   5      nUR6                  S	   nUR+                  UR-                  5       UR                  5       S
-   5      nUR+                  UR-                  5       UR                  5       S
-   5      nUR.                  nUR;                  UUU5        SnUR6                   H,  nUR                  5       S
-   UR                  5       ;   d  M*  UnM.     Ub  UR=                  UUU5        OUR?                  UU5        U H1  nUR;                  UX>5        U(       d  M  UR=                  UUU5        M3     UR.                  S	   nUR.                  S	   nUR.                  S	   nURA                  U5        URA                  U5        URA                  U5        URA                  U5        URA                  UR.                  S	   5        URA                  U5        URA                  U5        U$ ! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       GN= f)aA  
Insert a tmp program that returned by func between var_node and op.

Args:
    graph(IrGraph): target graph to insert tmp program.
    func(Function): function to define a tmp program
    var_node(IrVarNode): node in target graph.
    op(IrOpNode): op in target graph.
Returns:
    op's new input that replaces var_node
_
_tmp_inputr  )r   r   z3optimizer_func must be set when graph is test graphFN)for_testr   @GRAD)!r
   r   switch_name_generator_guardr)   r   r   r   r  mean	_for_testr^   stop_gradientminimizer   rU   r_   runr   r	   Graphdescr6   r   r7   r8   r   r   r   rL  r   update_output_linkr   safe_remove_nodes)rn   r9   funcr,   r   tmp_programstartup_programr=   out_nodeloss	optimizer	tmp_graphin_node_params
in_op_noder   r:   target_in_noder   	loss_noder   op_outop_out_gradop_gradtarget_out_grad_nodein_node_gradin_node_grad_opop_grad_out	mean_gradmean_out_gradfill_constant_nodes                                 r1   r   &QuantizationTransformPass._insert_func  s    i!)+733HMMOc4IJ,.nn&G
 G}H:B--/E((7;;x(D?? I ).% OO-	""4(# K 8& %IIMM/* & JJ{''(5??
	 ..##%w||
 //##%x}}
 
++-D{{b T%5%5%7%7%%d+ . **,D{{b !!$' - OODUt4 $"D<<  7; ( # DUt4  11!7<<>
  22!8==?
 ,,U-@-@-BDIIN	 ((D##NHC /2> ZZ]F22##%v{{}w'>K "))!,G#(#;#;##%';';'='G$  !33##%~':':'<w'FL +11O##HowGK==?W,		;"&K ( &((!5w g';<'''G;,,\;M (
 -33A6I%,,Q/M!.!5!5a!8##I.##M2##$67##L1	 0 0 34	*/W KJ 87& &%s0   #WCWW)W'
W	W
W$'
W6c                     U S3$ z:
Return quantized variable name for the input `var_name`.

.quantized rn   r<   s     r1   r  -QuantizationTransformPass._quantized_var_name       :&&r3   c                     U S3$ z<
Return dequantized variable name for the input `var_name`.
.dequantizedrz  r{  s     r1   r7  /QuantizationTransformPass._dequantized_var_name       <((r3   c                     U S3$ zG
Return the scale name of quantized variable for the input `var_name`.
@scalerz  r{  s     r1   r
  /QuantizationTransformPass._quantized_scale_name       6""r3   c                 |   SnUR                  5       R                  S5      (       a&  UR                  5       R                  S5      (       a  SnUR                  5       S;   a  [	        X5      (       a  SnUR                  5       R                  S5      (       a%  UR                  5       R                  S5      S:X  a  SnU$ )z1
Analyse whether the op node skips quantization.
FrF   T)rH   matmulr   qat_without_weight)r   r   r   r)   r>   )rn   r9   r:   is_skips       r1   r   (QuantizationTransformPass._is_skip_quant  s     ::<  ..7::<3D3D4
 4
 G <<> 
 
 ,E;;GJJL!!"566

!!"56:NNGr3   )r]   r[   rX   ra   r_   rj   ri   rd   r^   rV   rh   re   rU   rY   rW   r\   rZ   rb   rc   rl   rk   )__name__
__module____qualname____firstlineno____doc__r   r   r   r   r   r   r   r   r   r   rH  rL  r   r  r7  r
  r   __static_attributes__rz  r3   r1   r@   r@   Y   s     !*&"^A!# %P dJX%4N&,.\U.nf.P2.h 4 <4$L{z')#r3   r@   c                   f    \ rS rSr      SS jrS rS rS rS rS r	S	 r
S
 rS rS rS rSrg)QuantizationFreezePassi7  Nc	                 z   Uc   S5       eUc   S5       eXl         X0l        [        U5      U l        X@l        XPl        X`l        Xpl        [        U l	        [        U l        [        R                  " 5       U l        [        R                  " 5       U l        [        R                  " 5       U l        [#        5       U l        g)a   
The freeze pass is used to adjust the quantize operator order, for example:
    1) `activation -> quant -> dequant -> conv2d` will be frozen into
    `activation -> quant -> conv2d -> dequant`
    2) `weight -> quant -> dequant -> conv2d` will be frozen into `weight -> conv2d`,
    and weight will be scaled offline.

Args:
    scope(static.Scope): scope is used to get the weight tensor values.
    place(static.CPUPlace|static.CUDAPlace|str): place is used to restore the weight tensors.
        If it's string, It can be ``cpu``, and ``gpu:x``, where ``x`` is the index of the GPUs.
    bias_correction(bool): whether use bias correction for post-training quantization.
         https://arxiv.org/abs/1810.05723.
    weight_bits(int): quantization bit number for weights.
    activation_bits(int): quantization bit number for activation.
    round_type(str, optional): The method of converting the quantized weights
        value float->int. Currently supports ['round', 'adaround'] methods.
        Default is `round`, which is rounding nearest to the integer.
        'adaround' is refer to https://arxiv.org/abs/2004.10568.
    weight_quantize_type(str): quantization type for weights, support 'abs_max' and
        'channel_wise_abs_max'. The 'range_abs_max' usually is not used for weight,
        since weights are fixed once the model is well trained.
    quantizable_op_type(list[str]): This input param will be removed latter. The pass
        will process all quantized op, so it is not necessary to set the input param.
Nr#   r$   )rU   _bias_correctionr   rV   rW   rX   _round_typerb   _fake_quant_op_list_fake_quant_op_names_fake_dequant_op_list_fake_dequant_op_namesr   r   _op_input_rename_map_op_output_rename_map_quant_var_scale_mapr+   _quantized_ops)	rn   r.   r/   bias_correctionro   rp   
round_typerr   rv   s	            r1   r   QuantizationFreezePass.__init__8  s    H  A"AA  A"AA  /'.' /%%9"$7!&;#$/$;$;$=!%0%<%<%>"$/$;$;$=!!er3   c           	         UR                  5        Vs/ s H  o"R                  5       PM     nnUR                  5       nU GH  nUR                  5       nX`R                  ;   d  M%  UR	                  S5      S   n[        US5      (       a,  XqR                  R                  5       ;   a  UR                  U   nXs;  a>  UR                  UR                  UR                  S5      S   5      nXR                  U'   M  U R                  UR                  S5      S   5      nUR                  S;   d   S5       eUR                  S:X  a  US   nUR                  S:X  a  U R                  S	:X  a  US   nOUR!                  5       nXR                  U'   U R"                  S
:X  Ga  U R                  U5      n	Sn
UR%                  5       R'                  S5      (       a  UR%                  5       R)                  S5      n
XpR*                  ;  a  U R*                  R-                  U5        [.        R0                  " U	R3                  5       UU
U R4                  5      n[6        R8                  " U5      nU R:                  SL a8  [.        R<                  " U	UUU
U R4                  S9n[6        R8                  " U5      nU R?                  X{5        U RA                  X5        GM     UR                  5       nU H5  nUR                  5       nX`RB                  ;   d  M$  U RA                  X5        M7     UR                  5       nU H  nUR%                  5       nUR'                  S5      (       d  M+  UR)                  S5      S:X  d  MB  U R                  S:X  a:  UR                  5       [.        RD                  ;   a  SOSn
U RG                  XU
5        M  U RI                  X5        M     U H_  nURJ                   HL  nURL                  U RN                  ;   d  M  UnU RN                  URL                     nURQ                  XU5        MN     Ma     U RS                  U5        URU                  5         U$ s  snf )z
Adjust quantize/dequantize operators order for the inference process.

Args:
    graph(IrGraph): the applied graph.
Returns:
    None
r   r   r   r  r   r   #the dim of scale_v should be 1 or 2r   r   rC   roundr   Tro   r   r   rK   )+r   r)   r   r  inputhasattrr   rg   r6   r   outputr  	_load_varndimsizerb   tolistr  r   r   r   r  addr   quant_tensorcopyrW   r&   r  r  bias_correction_w_restore_var!_remove_fake_quant_and_dequant_opr  r   _insert_post_channel_dequant_op_insert_post_dequant_opr7   r   r  r   _remove_unused_var_nodesr   )rn   r9   r   r   r   r:   op_nameinput_arg_namescale_vparam_vr   quantized_param_vop_node_descr,   old_innew_ins                   r1   r   QuantizationFreezePass.applyl  s    /4.I.I.KL.KFFH.KL  "GllnG333!(s!3A!65":;;%)E)E)J)J)LL).)E)E** "9#66
)CA)FG AH--n= #nnW^^J-G-JKG"<< ,  = ==  ||q(")!*) 66)C")!*").."2@G--n=''72"&.."@%&
"::<00>>)0):):<)HJ)1D1DD //33NC050B0B ' ' * $ 1 1	1- 139J0K-#44<494K4K$+$5$+$.040A0A5" 1 57HH=N4O 1 --nP::5Ju z   "GllnG55566uF    "G"::<L%%&9:: %%&9:>OO--1GG #<<>U-O-OO  
 88
 00@! & G#NN==D$>$>>%F!77FF++FGD	 +  	%%e,I Ms   Qc                    UR                  UR                  UR                  S5      S   5      nUR                  UR                  UR	                  S5      S   5      nUR
                  U R                  ;  a  X@R                  UR
                  '   O0U R                  UR
                     U R                  UR
                  '   UR                  U5        g )Nr   r   r   )r6   r   r  r7   r  r   r  r`  )rn   r9   r:   kvs        r1   r  8QuantizationFreezePass._remove_fake_quant_and_dequant_op  s    $$W__gnnU6KA6NO$$W^^W]]35G5JK6622201%%aff-040I0I1D%%aff- 	(r3   c                 F   UR                  5        Vs/ s H  oDR                  5       PM     nnUR                   GH  nUR                  5       nXrR                  5       ;  a  M)  UR                  U R
                  ;   a=  UnU R
                  UR                     n	U	R                  5         UR                  XU5        U R                  U5      n
U R                  U
   nX;   a8  [        U[        5      (       d   SU
 S35       e[        R                  " U5      nM  [        U[        5      (       d   eU R                  U
   nGM     [        UR!                  5       5      S:w  a  [#        SUR                  5        S35      eUR%                  UR&                  UR!                  5       S   5      nUR)                  [*        R,                  " S5      [.        R0                  R2                  R4                  WR6                  S   /UR9                  5       S9nUR9                  5       [:        R<                  :X  a  S	nO'UR9                  5       [:        R>                  :X  a  S
nOSn[A        UURC                  U5      U RD                  U RF                  5        URI                  U RK                  UR                  5       5      URM                  5       UR7                  5       UR9                  5       S9nSnUR                  5       S;   a)  [        UR&                  S   R7                  5       5      S-
  nURO                  5       RQ                  S5      (       a  URO                  5       RS                  S5      nURU                  SU RV                  U RX                  /U[.        RZ                  R\                  R^                  US.UUW/S.SU0S9nURa                  UU5        URa                  UU5        URa                  UU5        URa                  UU5        UU Rb                  UR                  '   U$ s  snf )NThe scale of parameter z is not a list.r    Only support one output, but op  has more than one output.r   channel_scaler   r   r  r  )r  r   rH   x_num_col_dimsr   )r   r   r   r  r<  r   r   )2r   r)   r7   r   r   r  clear_outputsr   _original_var_namer  r%   rf   r&   r  r   r   output_arg_namesr`   r6   r   r   r   r  r	   r   r   r   r   r   r  r   r  r2   astyperU   rV   r  r7  r	  r   r   r   r   rW   rX   r   r   r   r   r  )rn   r9   r:   r   r   r   r,   r)   r  r  original_var_namer  r  r   output_var_nodeweight_scale_noder  r   r  r9  s                       r1   r  6QuantizationFreezePass._insert_post_channel_dequant_op  s   .3.I.I.KL.KFFH.KLH==?D2244}} 9 99!228==A$$&''@ $ 7 7 =//0ABG 4!'400 -.?-@P0 !# 1!'62222!%!:!:;L!M% '( w'')*a/27<<>2B C) ) 
  22OOW557:
 "99%%o6\\))66 &&q)*%++-	 : 
   "fnn4!I""$6!I!I  +KKKK		
 !00++O,@,@,BC$))+!'')%++-	 1 
 <<>;; !3!9!9!;<q@N::<  !122$ZZ\../?@N..:#00$2G2GH(::AAII"0	 %,n= ,- / 
 	o7no6'9o'78;K""?#7#78_ Ms   Pc                    UR                  5        Vs/ s H  o3R                  5       PM     nnSnSU R                  S-
  -  S-
  nSU R                  S-
  -  S-
  nUR                   GH  nUR                  5       n	XR                  5       ;  a  M)  UR                  U R                  ;   a=  Un
U R                  UR                     nUR                  5         UR                  XU5        U R                  U	5      nU R                  U   nX;   a4  U R                  U5      (       d   SU S35       eUS:X  a  SOUnXVU-  -  nM  XW-  n[        U[        5      (       d   eU R                  U   nGM     [        UR!                  5       5      S:w  a  [#        SUR                  5        S35      eUR%                  UR&                  UR!                  5       S   5      nUR)                  U R+                  UR                  5       5      UR-                  5       UR/                  5       UR1                  5       S	9nUR3                  S
[5        U5      [6        R8                  R:                  R<                  S.UWS.SU0S9nUR?                  UU5        UR?                  UU5        UR?                  UU5        UU R@                  UR                  '   U$ s  snf )Nr   r  z is not a float.g        g:0yE>r  r  r   r   r   r3  r5  r   r   )!r   r)   rW   rX   r7   r   r   r  r  r   r  r  	_is_floatr%   r   r   r  r`   r6   r   r  r7  r	  r   r   r   r8  r	   r   r   r   r   r  )rn   r9   r:   r   r   r4  param_range	act_ranger,   r)   r  r  r  r  r   r  r   r9  s                     r1   r  .QuantizationFreezePass._insert_post_dequant_op9  s   .3.I.I.KL.KFFH.KL	T..23q8400145:	H==?D2244}} 9 99!228==A$$&''@ $ 7 7 =//0ABG 4~~g.. -.?-@@PQ. #*S.$g722	&	!'62222!%!:!:;L!M) ', w'')*a/27<<>2B C) ) 
  22OOW557:
 !00++O,@,@,BC$))+!'')%++-	 1 
  ..-"9-::AAII )>B,- / 
 	o7no6o'78;K""?#7#78m Ms   Kc                 |    [         R                  " U R                  R                  U5      R	                  5       5      $ r   r&   r  rU   r  r*   rn   r)   s     r1   r   QuantizationFreezePass._load_varr  )    xx,,T2==?@@r3   c                     U R                   R                  U5      R                  5       nUR                  X R                  5        g r   rU   r  r*   r+   rV   rn   r)   r  r0   s       r1   r  #QuantizationFreezePass._restore_varu  /    %%d+668

5++&r3   c                   ^ [        5       mUR                  5       nU HK  nUR                   H  nTR                  U5        M     UR                   H  nTR                  U5        M     MM     T Vs1 s H  ofR
                  iM     snm[        [        U4S jUR                  5       5      5      nUR                  U5        g s  snf )Nc                 "   > U R                   T;  $ r   r   r   all_used_varss    r1   <lambda>AQuantizationFreezePass._remove_unused_var_nodes.<locals>.<lambda>      TYYm;r3   	r+   r   r7   r  r   r   filterr   r`  	rn   r9   r   r:   
input_nodeoutput_nodenall_unused_varsr  s	           @r1   r  /QuantizationFreezePass._remove_unused_var_nodesy        "G%nn
!!*- -&!!+.  /  *77A7;##%
 	0 8   1Cc                 .   UR                  S5      (       a  US[        S5      *  $ UR                  S5      (       a  US[        S5      *  $ UR                  S5      (       a  US[        S5      *  $ UR                  S5      (       a  US[        S5      *  $ U$ )z$
Return the original variable name.
z.quantized.dequantizedNry  r  r  )endswithr   r{  s     r1   r  )QuantizationFreezePass._original_var_name  s     566<s#;<<==\**0s<0011^,,2s>2233X&&,s8}n--Or3   c                     U S3$ r  rz  r{  s     r1   r7  ,QuantizationFreezePass._dequantized_var_name  r  r3   c                 ~    [        U[        [        R                  [        R                  [        R
                  45      $ r   )r%   r8  r&   r  r  r   )rn   r  s     r1   r   QuantizationFreezePass._is_float  s$    !eRZZRZZHIIr3   )rX   r  r  r  r  r  rV   r  r  r  rU   rW   rb   )FrB   rB   r  rC   N)r  r  r  r  r   r   r  r  r  r  r  r  r  r7  r  r  rz  r3   r1   r  r  7  sU    
 & 2$hn`	)P d7 rA'1$)Jr3   r  c                   <    \ rS rSrS
S jrS rS rS rS rS r	S	r
g)ConvertToInt8Passi  Nc                 X    Uc   S5       eUc   S5       eXl         [        U5      U l        g)a  
Convert the weights into int8_t type.

Args:
    scope(static.Scope): scope is used to get the weight tensor values.
    place(static.CPUPlace|static.CUDAPlace|str): place is used to restore the
        8bits weight tensors. If it's string, It can be ``cpu``, and ``gpu:x``,
        where ``x`` is the index of the GPUs.
    quantizable_op_type(list[str]): This input param will be removed latter. The pass
        will process all quantized op, so it is not necessary to set the input param.
Nr#   r$   )rU   r   rV   )rn   r.   r/   rv   s       r1   r   ConvertToInt8Pass.__init__  s9      A"AA  A"AA '.r3   c                 *   UR                  5        Vs/ s H  o"R                  5       PM     nnUR                  5       n0 nU H  nUR                  5       R	                  S5      (       d  M)  UR                  5       R                  S5      S:X  d  MN  UR                   HI  nUR                  5       nX;   d  M  X;  a  U R                  X5      n	XU'   UR                  XuU   U5        MK     M     U R                  U5        UR                  5         U$ s  snf )z
Convert weights' type of the graph. After that, the data type of the
graph weights is int8_t.

Args:
    graph(IrGraph): the applied graph.
Returns:
    None
r   r   )r   r)   r   r   r   r   r7   _convert_to_int8r   r  r   )
rn   r9   r   r   r   	input_mapr:   r,   r)   int8_var_nodes
             r1   r   ConvertToInt8Pass.apply  s     /4.I.I.KL.KFFH.KL  "	G

%%&9::JJL%%&9:>OO 'H#==?D/0,0,A,A %-M /<dO//$ow !/ $ 	%%e,/ Ms   Dc                    UR                  5       S-   nUR                  UUR                  5       UR                  5       [        R
                  R                  R                  S9nU R                  UR                  5       5      nU R                  R                  U5        U R                  X5[        R                  5        U$ )Nz.int8r   )r)   r   r	  r   r	   r   r   INT8r  rU   r(   
_store_varr&   int8)rn   r9   r,   int8_var_node_namer  r  s         r1   r   "ConvertToInt8Pass._convert_to_int8  s    %]]_w655#]]_.."ll**//	 6 
 x}}/*+*277;r3   c                 |    [         R                  " U R                  R                  U5      R	                  5       5      $ r   r  r  s     r1   r  ConvertToInt8Pass._load_var  r  r3   c                     U R                   R                  U5      R                  5       nUR                  UR	                  U5      U R
                  5        g r   )rU   r  r*   r+   r  rV   )rn   r)   r  r   r0   s        r1   r  ConvertToInt8Pass._store_var  s:    %%d+668

5<<&4r3   c                   ^ [        5       mUR                  5       nU HK  nUR                   H  nTR                  U5        M     UR                   H  nTR                  U5        M     MM     T Vs1 s H  ofR
                  iM     snm[        [        U4S jUR                  5       5      5      nUR                  U5        g s  snf )Nc                 "   > U R                   T;  $ r   r  r  s    r1   r  <ConvertToInt8Pass._remove_unused_var_nodes.<locals>.<lambda>  r  r3   r  r  s	           @r1   r  *ConvertToInt8Pass._remove_unused_var_nodes  r  r  )rV   rU   r   )r  r  r  r  r   r   r   r  r  r  r  rz  r3   r1   r  r    s"    /"!FA51r3   r  c                        \ rS rSrS rS rSrg)TransformForMobilePassi   c                 0    [         U l        [        U l        g)zL
This pass is used to convert the frozen graph for paddle-mobile execution.
N)r  r  r  r  )rn   s    r1   r   TransformForMobilePass.__init__  s     %8!&;#r3   c                    UR                  5       nU GHF  nUR                  5       nX@R                  ;   a  UR                  S5        UR	                  UR                  5       5      nUR                   H  nUR                  Xe5        M     UR                   H  nUR                  XW5        M     UR                  U5        X@R                  ;   d  M  UR                  S5        UR	                  UR                  5       5      nUR                   H  nUR                  Xh5        M     UR                   H  nUR                  X5        M     UR                  U5        GMI     UR                  5         U$ )z
Because paddle-mobile use `quantize` an `dequantize` as the names of
quantize operator and dequantize operator, the `apply` function just
realize this logic.

Args:
    graph(IrGraph): the graph will be transformed.
Returns:
    None
quantize
dequantize)r   r)   r  set_typerK  r   r7   r   r   r`  r  r   )	rn   r9   r   r:   r)   
quant_noder  r  dequant_nodes	            r1   r   TransformForMobilePass.apply  s      "G<<>D000  ,";;GJJLI
")..JMM*9 #1#*??KMM*: $3''0222  .$==gjjlK")..JMM*; #1#*??KMM,< $3''0# $ 	r3   )r  r  N)r  r  r  r  r   r   r  rz  r3   r1   r  r     s    <r3   r  c                   4    \ rS rSr     SS jrS rS rSrg)OutScaleForTrainingPassi*  Nc                     Xl         [        U5      U l        X0l        X@l        [        [        R                  " 5       5      U l        XPl	        g)a  
This pass is used for calculating output scales of some operators.
These output scales may be used by tensorRT or some other inference engines.

Args:
    scope(static.Scope): The scope is used to initialize these new parameters.
    place(static.CPUPlace|static.CUDAPlace|str): The place is used to initialize new parameters.
        If it's string, It can be ``cpu``, and ``gpu:x``, where ``x`` is the
        index of the GPUs.
    moving_rate(float): The decay coefficient of moving average. The default value is 0.9.
N)
rU   r   rV   rd   ri   rf   r   rg   _teller_set_scale_dict)rn   r.   r/   rt   r}   
scale_dicts         r1   r    OutScaleForTrainingPass.__init__+  s=    & '.' < A A CD%r3   c                 x   [        U[        5      (       d   S5       eU R                  c  UR                  5       U l        / nUR	                  5        H4  nUR                  5       U R                  ;   d  M#  UR                  U5        M6     [        [        U5      SSS9 nU GHl  n[        R                  " U5       GH=  nUR                  UR                  U5      nUR                  5       [        R                   R"                  R$                  [        R                   R"                  R&                  [        R                   R"                  R(                  4;  d  SUR                  5       ;   a  M  UR                  5       [*        R,                  :X  a  SnO'UR                  5       [*        R.                  :X  a  SnOS	n UR                  UR1                  5       U R3                  UR                  5       5      5        GM@     URY                  5         GMo     SSS5        U$ !   UR5                  U R3                  UR                  5       5      [        R                   R"                  R6                  S
/UR                  5       S9nU R8                  bS   [:        R<                  " U R8                  UR                  5          /5      n	 O9!   [:        R>                  " S
/US9n	  O= f[:        R>                  " S
/US9n	 O= f[A        XU RB                  U RD                  5        SU0n
SU0nU R                  (       Gdh  UR5                  [F        RH                  " S5      [        R                   R"                  R6                  UR                  5       S
/S9n[A        U[:        R>                  " S
/US9U RB                  U RD                  5        UR5                  [F        RH                  " S5      [        R                   R"                  R6                  UR                  5       S
/S9n[A        U[:        R>                  " S
/US9U RB                  U RD                  5        URK                  URM                  5       5      nURK                  URM                  5       5      nXS'   XS'   XS'   XS'   U RN                  U R                  URQ                  5       RS                  S5      S.nURU                  SUU
US9nSn[        UR                  5      S:  a  UR                  S   nURW                  UU5        URW                  UU5        U(       a  URW                  UU5        U R                  (       a  GM  URW                  WU5        URW                  WU5        URW                  UW5        URW                  UW5        GM  ! , (       d  f       U$ = f)z
Insert the `moving_average_abs_max_scale` op in order to calculate output scales
of operators in the teller_set.

Args:
    graph(IrGraph): the target graph.
r   Nz.Adding OutScale op:|{bar}| {n_fmt}/{total_fmt}r   r   rV  r   r  r  r   r   r   r   r  zscale_state@r!  zscale_accum@r#  r$  r%  r&  r   )rt   r}   r   moving_average_abs_max_scaler   r   )-r%   r   ri   r}   r   r)   r   r   r   r   r   _get_op_output_var_namesr6   r   r   r	   r   r   FP64FP32FP16r  r   r  r   _scale_namer   r   r!  r&   r  r(  r2   rU   rV   r   r  r   r(   rd   r   r   r   r   r   )rn   r9   
target_opsr   r   output_var_namer=   r  
scale_noder  r)  r*  r+  r,  r-  r.  r   scale_op_noderP  s                      r1   r   OutScaleForTrainingPass.applyE  s    %)) 	
4	
) == !MMODM
$$&BwwyD,,,!!"% ' j/G
  ','E'Eb'IO#66

OG   LL0055 LL0055 LL0055  #glln4 }}&..8$-	 FNN:$-	$-	H00!//1 ,,W\\^< !7 (Jb 
e !
p mH%*%B%B!%!1!1',,.!A%)\\%9%9%F%F#$#&-mmo	 &C &
  ++7L.0hh%)%5%5glln%E$F/"L.0ggqc.K*,''1#Y*GK""dkk .C&
3D===(-(E(E!,!5!5n!E%)\\%9%9%F%F&-mmo#$#	 )F ) ')GGQCy9 KK KK	 ).(E(E!,!5!5n!E%)\\%9%9%F%F&-mmo#$#	 )F ) ')GGQCy9 KK KK	 */)H)H)--/* */)H)H)--/* *7I)6I+9Z(+9Z( (,'8'8#'==#%557<<	#:E
 %*$8$8 >#" $	 %9 %M $(L7??+a/'.q'9MM'=9MM-<#j,?===m]Cm]Cm^Dm^Dm
 
p sR   DV*:=H7V*A/K5	2J?=V*?KK5	V*K5	3I'V*AV**
V9c                     U S3$ 5
Return the scale name for the var named `var_name`.
r  rz  r{  s     r1   r*  #OutScaleForTrainingPass._scale_name  r  r3   )ri   rd   rV   r!  rU   r   )NNrE   NNr  r  r  r  r   r   r*  r  rz  r3   r1   r  r  *  s$     &4IV#r3   r  c                   *    \ rS rSrSS jrS rS rSrg)OutScaleForInferencePassi  Nc                 V    Xl         [        [        R                  " 5       5      U l        g)z
This pass is used for setting output scales of some operators.
These output scales may be used by tensorRT or some other inference engines.

Args:
    scope(static.Scope): The scope is used to initialize these new parameters.
N)rU   rf   r   rg   r   )rn   r.   s     r1   r   !OutScaleForInferencePass.__init__  s       < A A CDr3   c           	         [        U[        5      (       d   S5       eUR                  5       nU GH  nUR                  5       U R                  ;   d  M$  [
        R                  " U5      nU GH  nUR                  UR                  U5      nUR                  R                  5       b}  UR                  5       [        R                  R                  R                  [        R                  R                  R                   [        R                  R                  R"                  4;  a  M  U R%                  U5      nU R&                  R)                  U5      nUc   SU S35       e[*        R,                  " UR/                  5       5      S   n	UR1                  5       R3                  S[5        U	5      5        [
        R6                  " X55      n
U
c
   US-   5       eUR1                  5       R3                  U
S   [9        U
S   5      -   S-   [5        U	5      5        UR1                  5       R3                  S	S
5        GM     GM	     UR;                  5         U$ )z
Get output scales from the scope and set these scales in op_descs
of operators in the teller_set.

Args:
    graph(IrGraph): the target graph.
r   zCan not find z variable in the scoper   out_thresholdz is not the output of the opr   
_thresholdr   T)r%   r   r   r)   r   r   r&  r6   r   r   r(   r   r	   r   r   r'  r(  r)  r*  rU   r  r&   r  r*   r   r   r8  _get_output_name_indexr   r   )rn   r9   op_nodesr:   	var_namesr<   r=   r  	scale_varr  argname_indexs              r1   r   OutScaleForInferencePass.apply  s     %)) 	
4	
) %%'G||~!1!11!::7C	 )H#66G  ((*2 LL0055 LL0055 LL0055  !!%!1!1(!;J $ 4 4Z @I$0 '
|3IJ0 #%((9+?+?+A"B1"EK JJL**?E+<NO$)$@$@%M )4  #AA4 JJL**%a(3}Q/?+@@<Ok* JJL**+<dCE !*  L 	r3   c                     U S3$ r1  rz  r{  s     r1   r*  $OutScaleForInferencePass._scale_name  r  r3   )rU   r   r   r4  rz  r3   r1   r6  r6    s    	E3j#r3   r6  c                   L    \ rS rSrSr/ SQrSSSSS/SS	/SS4S
 jrS rS rSr	g)AddQuantDequantPassi  ze
Quantize the ops that do not have weights, and add quant_dequant op for the
quantized ops's inputs.
relurelu6
leaky_relutanhswishNrE   rB   rF   elementwise_addpool2dc	                    Xl         [        U5      U l        X0l        X@l        Xpl        XPl        Xl        X`l        U R                   H/  n	U	[        [        R                  " 5       5      ;   a  M'   U	S-   5       e   U R                   V
s/ s H  o S3PM	     sn
U l        U R                   c   S5       eU R                  c   S5       egs  sn
f )a  
Constructor.

Args:
    scope(static.Scope): The scope is used to initialize these new parameters.
    place(static.CPUPlace|static.CUDAPlace|str): place is used to initialize new
        parameters described above. If ``place`` is string, it can be It can be ``cpu``
        or ``gpu:x``, where ``x`` is the index of the GPUs.
    moving_rate(float, optional): the param for 'quant_dequant_moving_average_abs_max'
        quantization. Default is 0.9.
    quant_bits(int, optional): quantization bit number for activation. Default is 8.
    skip_pattern(str, optional): The user-defined quantization skip pattern, which
        will be presented in the name scope of an op. When the skip pattern is
        detected in an op's name scope, the corresponding op will not be quantized.
        Default is 'skip_quant'.
    quantizable_op_type(list[str], optional): List the type of ops that will be
        quantized. Default is ["elementwise_add", "pool2d"].
rS   rT   Nscope must not be None.place must not be None.)rU   r   rV   rd   _quant_bitsri   rY   r!  _quantizable_op_typerf   r   rg   _quantizable_grad_op_typern   r.   r/   rt   r   ru   rv   r}   r"  r   r   s              r1   r   AddQuantDequantPass.__init__(  s    : '.'%)%$7!00Gd#C#H#H#JKK ??K 1
 $(#<#<*
#<Rd%L#<*
& {{&A(AA&{{&A(AA&*
s   Cc                   ^ [        U[        5      (       d   S5       eU R                  c  UR                  5       U l        [        R
                  " 5       nUR                  5       n[        [        U5      SSS9 nU GH  mTR                  5       U R                  ;   Gas  Sn[        U R                  [        5      (       aF  TR                  5       R                  S5      =(       a    [        U4S jU R                   5       5      nO[        U R                  [         5      (       aa  TR                  5       R                  S5      =(       a;    TR                  5       R#                  S5      R%                  U R                  5      S	:g  nTR                  5       R                  S
5      =(       a"    TR                  5       R#                  S
5      S:H  nU(       d  U(       d  ['        UT5      (       d  GMt  TR                  5       R)                  S
S5        TR                  5       R)                  SU R*                  5        TR                  5       R)                  SS5        [,        R.                  " T5      nSnU H  n	SU	;   d  M  Sn  O   U(       a  GM  U H}  n	UR1                  TR2                  U	5      n
X;   a  X)   nOBU R5                  UU
U R*                  TR                  5       R#                  S5      5      u  nnXU	'   UR7                  XT5        M     UR9                  5         GM     SSS5        U Hs  mTR                  5       U R:                  ;   d  M#  TR=                  5        H<  nX;   d  M
  UR1                  TR2                  U5      n
X-   nUR7                  XT5        M>     Mu     UR?                  5         U$ ! , (       d  f       N= f)
Add quant_dequant before some ops, such as the 'elementwise_add' and
'pool2d' op.

Args:
    graph(IrGraph): the target graph.
Returns:
    None
r   N6Adding quant activation op:|{bar}| {n_fmt}/{total_fmt}r   r   Fr   c              3   h   >#    U  H'  nUTR                  5       R                  S 5      ;   v   M)     g7fr   r   r   s     r1   r   ,AddQuantDequantPass.apply.<locals>.<genexpr>u  /      P+= $wzz|'8'8'HH+=r   r   r   r   r  rp   r   Tquantized.dequantizedr   ) r%   r   ri   r}   r   r   r   r   r   r)   rR  rY   rf   r   r   r   r   r   r   r>   r   rQ  r   r5   r6   r7   /_insert_quant_dequant_moving_average_abs_max_opr   r   rS  r   r   )rn   r9   dequantized_vars_mapr   r   r  is_quantized	arg_namesrF   arg_namer=   r   rS  
input_namer   r:   s                  @r1   r   AddQuantDequantPass.applyY  sO    %)) 	
4	
) == !MMODM*668 ))+l#O
 '<<>T%>%>>#G!$"4"4d;;")**,"7"7"G #C P+/+=+=P M $D$6$6<<#JJL11.A " '

!T.1!T$"4"45!!"    

--.AB -#JJL--.AB,- !  ' =eW M M JJL**+-A JJL**+<d>N>NOJJL**+<dC % = =g FI!&J$-2h>)-J! %. " $-"'":":#NNH# $;-A-KN
 !% T T % ' $ 0 0 '

 1 1) <	! . ! >L://#W# %.( 
 (
N $G||~!?!??")"9"9";J!9"'":":#NNJ# ,@+K(//#w #< $ 	i
 
s   9HN:?B(N::
Oc                 X	   UR                  UR                  5        S3UR                  5       UR                  5       UR	                  5       S9nUR                  5        S3nUR	                  5       [
        R                  :X  a  SnO'UR	                  5       [
        R                  :X  a  SnOSn U R                  b^  UR                  5       U R                  R                  5       ;   a2  [        R                  " U R                  UR                  5          /US9nO=[        R                  " U R                  R                  U5      R                  5       US9n UR!                  UR                  5        S3["        R$                  R&                  R(                  S/UR	                  5       S9n	[+        XU R                  U R,                  5        UR/                  U	R1                  5       5      n
X)S	.nXZS
.nU R2                  (       Gd  UR!                  [4        R6                  " S5      ["        R$                  R&                  R(                  UR	                  5       S/S9nUR	                  5       [
        R                  :X  a  SnO'UR	                  5       [
        R                  :X  a  SnOSn[+        U[        R8                  " S/US9U R                  U R,                  5        UR!                  [4        R6                  " S5      ["        R$                  R&                  R(                  UR	                  5       S/S9n[+        U[        R8                  " S/US9U R                  U R,                  5        UR/                  UR1                  5       5      nUR/                  UR1                  5       5      nXS'   XS'   XS'   UUS'   UU R:                  U R2                  US.nUR=                  SUUUS9nUR?                  UU5        UR?                  U	U5        UR?                  UU5        UR?                  UU
5        U R2                  (       dH  UR?                  WU5        UR?                  WU5        UR?                  UW5        UR?                  UW5        XZ4$ !   [        R                  " [        /US9n GNS= f)z:Insert fake_quantize_dequantize_moving_average_abs_max op.z.quant_dequantr   z.quant_dequant@scaler   r  r  r   r   r  r  zquant_dequant.stater!  zquant_dequant.accumr#  r$  r%  r&  r'  r   r   ) r  r)   r	  r   r   r  r   r  r!  rg   r&   r  rU   r  r*   r  r   r	   r   r   r   r2   rV   r   r(   ri   r   r  r(  rd   r   r   )rn   r9   r,   r   r   r   r  r  r  r  r  r)  r*  r+  r,  r-  r.  r   r  s                      r1   r]  CAddQuantDequantPass._insert_quant_dequant_moving_average_abs_max_op  s    ..MMO$N3]]_.."nn&	 / 
 !((<=
>>v~~-!I^^/!I!I	L  ,MMOt'7'7'<'<'>> hh%%hmmo67y !hhKK((4??A# 55MMO$$89\\))66#nn&	 6 
 	}4;;L889J9J9LM7%B}}}!99 ))*?@--::"..*c	 : M ~~6>>1%	!V^^3%	%	9-	 "99 ))*?@--::"..*c	 : M 9-	 #<<!!#N #<<!!#N +	N*	N--D %,,}}	
 ,,E	 - 
 	h.m]3m^4m^4}}MM-7MM-7MM-8MM-8--e	L(($8#9KKs   .A*R
 <R
 
R))	ri   rd   rV   rQ  rS  rR  r!  rU   rY   )
r  r  r  r  r  _activation_typer   r   r]  r  rz  r3   r1   rE  rE    sE     H "^.9/BbgRp.r3   rE  c                       \ rS rSrSr      SS jrSS\R                  R                  R                  4S jr
S rS rS rS	 rS
 rSrg)InsertQuantizeLineari5  a  
Insert quantize_linear and dequantize_linear op before ops.

Args:
    place(paddle.CPUPlace|paddle.CUDAPlace|str): place is used to restore the weight tensors.
        If it's string, It can be ``cpu``, and ``gpu:x``, where ``x`` is the index of the GPUs.
    scope(paddle.Scope): scope is used to get the weight tensor values.
    quant_bits(int, optional): quantization bit number for weight. Default is 8.
    quant_axis(int, optional): quantization dimension of channels. When it is greater than or
        equal to 0, it will quantization with per channel, else quantization with per layer.
        Default is -1.
    channel_wise(bool, optional): Whether quantization with per channel or not. Default is False.
    moving_rate(float): the rate for 'moving average' method.
    is_test(bool, optional): Whether quantization with training or not. Default is True.
    scale_dict(dict, optional): calibration ranges of tensors output.
Nc	                 d    Xl         X l        X0l        X@l        XPl        Xpl        X`l        Xl        g r   )rV   rU   r   r   channel_wiseri   rd   r!  )	rn   r/   r.   r   r   rj  rt   r}   r"  s	            r1   r   InsertQuantizeLinear.__init__G  s/     $$('%r3   c                    UR                  5       (       d   UR                  5        S35       eU(       d  UR                  5       OUnUR                  U R                  U5      UR	                  5       UR                  5       UR                  5       S9nU(       Gd  UR                  5       [        R                  :X  a  SnO'UR                  5       [        R                  :X  a  SnOSnU R                  U5      nU R                  (       a]  UR                  5       U R                     n	[        R                  R                  R                   n
["        R$                  " XS9[&        -  nO,Sn	UR	                  5       n
["        R(                  " [&        /US9nU R*                  b]  UR                  5       U R*                  R-                  5       ;   a1  ["        R(                  " U R*                  UR                  5          /US9nUR/                  UU
U	/UR                  5       S9n[1        XKU R2                  U R4                  5        S nUc  UR/                  U R7                  UR                  5       5      [        R                  R                  R                   UR                  5       [        R                  R                  R8                  S9n[1        U["        R:                  " UR                  5       SS9U R2                  U R4                  5        X$S	.nUb  XS
'   U R                  U R<                  S.nX^S'   SU0nU R>                  (       Gd  URA                  URC                  5       5      nUR/                  [D        RF                  " S5      [        R                  R                  R                   UR                  5       S/S9nUR                  5       [        R                  :X  a  SnO'UR                  5       [        R                  :X  a  SnOSn[1        U["        R$                  " S/US9U R2                  U R4                  5        UR/                  [D        RF                  " S5      [        R                  R                  R                   UR                  5       S/S9n[1        U["        R$                  " S/US9U R2                  U R4                  5        URA                  URC                  5       5      nURA                  URC                  5       5      nUUS'   UUS'   UUS'   UUS'   UUS'   U R>                  US'   U RH                  US'   URK                  SUUUS9nURM                  UU5        URM                  UU5        Ub  URM                  UU5        URM                  UU5        U R>                  (       dZ  URM                  WU5        URM                  WU5        URM                  UW5        URM                  UW5        URM                  UW5        Xd4$ )Nr   r   r   r  r  r   r   int32r5  	ZeroPointr   r  r   Yr   r!  r"  r  r#  r$  r%  r&  r}   rt   quantize_linearr   )'r  r)   r  r  r	  r   r   r  r   r  r
  rj  r   r	   r   r   r   r&   r(  r  r  r!  rg   r   r2   rU   rV   _zero_point_nameINT32r   r   ri   r   r(   r   r  rd   r   r   )rn   r9   r,   r<   r   r   r   r  r  scale_var_shapescale_var_typeinit_scale_valuezero_point_noder7   r   r   r  r+  r,  r-  r.  r  s                         r1   insert_quant_op$InsertQuantizeLinear.insert_quant_op[  s-      CX]]_$5]"CC *28==?..))(3]]_.."nn&	 / 
 ~~6>>1%	!V^^3%	%	33H=J  "*.."24??"C!%!5!5!B!BGGO=*+ !
 #$!)#%88)*)$ 
   ,MMOt'7'7'<'<'>>#%88%%hmmo67y$  #::'&'"..*	 ; N $++t{{ "#;;**>+>+>+@A--::$**,,,..44	 < O --/w?	  9&"1;#dooN"i'}}}"<<""$N "99 ))'2--::"..*c	 : M ~~6>>1%	!V^^3%	%	9-	 "99 ))'2--::"..*c	 : M 9-	 #<<!!#N #<<!!#N #1GJ -F9 -F9"0GJ"0GJ#}}E)#'#4#4E- ,,%	 - 
 	h.nm4&MM/=9m^4}}MM-7MM-7MM-8MM-8MM-8--r3   c                    UR                  5       (       d   UR                  5        S35       eUR                  U R                  UR                  5       5      UR	                  5       UR                  5       UR                  5       S9nS nUc  UR                  U R                  UR                  5       5      [        R                  R                  R                  UR                  5       [        R                  R                  R                  S9n[        U[        R                   " UR                  5       SS9U R"                  U R$                  5        X#S.nUb  XgS'   U R&                  U R(                  S.nXHS'   UR+                  S	UUS
U0S9n	UR-                  X)5        UR-                  X95        Ub  UR-                  Xi5        UR-                  X5        U$ )Nr   r   rm  r   r5  rn  ro  r   dequantize_linearrp  r   )r  r)   r  r7  r	  r   r   r   rr  r	   r   r   r   rs  r2   r&   r   rU   rV   r   r   r   r   )
rn   r9   r,   r   r   r   rw  r7   r   r  s
             r1   insert_dequant_op&InsertQuantizeLinear.insert_dequant_op  s     CX]]_$5]"CC  00++HMMO<]]_.."nn&	 1 
 "#;;**+;+@+@+BC--::$**,,,..44	 < O --/w?	  9&"1;#dooN"i,,'*+	 - 
 	h.n4&MM/9m6r3   c                     U S3$ rx  rz  r{  s     r1   r  (InsertQuantizeLinear._quantized_var_name	  r}  r3   c                     U S3$ r  rz  r{  s     r1   r7  *InsertQuantizeLinear._dequantized_var_name!	  r  r3   c                     U S3$ r  rz  r{  s     r1   r
  *InsertQuantizeLinear._quantized_scale_name'	  r  r3   c                     U S3$ r2  @zero_pointrz  r{  s     r1   rr  %InsertQuantizeLinear._zero_point_name-	       ;''r3   )ri   rd   rV   r!  rU   rj  r   r   )rB   r   FrE   TN)r  r  r  r  r  r   r	   r   r   r   rx  r|  r  r7  r
  rr  r  rz  r3   r1   rh  rh  5  s^    * &0 //66>>P.d, \')#(r3   rh  c                   n    \ rS rSrSrSSSSSSSSS// S	QSSSSSSS4S
 jrS rS rS rS r	S r
S rSrg)QuantizationTransformPassV2i4	  z
Quantize the ops that have weights. Add quant and dequant ops for
the quantized ops's inputs. It is used in the new format of quantization.
NrB   rC   rD   rE   rF   rG   c                 h   Xl         [        U5      U l        X0l        X@l        Xl        Xl        Xl        Xl        Xl	        Xl
        UU l        SU l        / SQnUS:w  d   S5       eUU;  a  [        SU S35      eUU;  a  [        SU S35      eXPl        X`l        Xpl        Xl        Xl        U R$                   H/  nU['        [(        R*                  " 5       5      ;   a  M'   US	-   5       e   U R$                   Vs/ s H  nU S
3PM
     snU l        UU l        SU l        0 U l        0 U l        gs  snf )a  
Args:
    scope(paddle.Scope): When activation use 'range_abs_max' as the quantize
        type, this pass will create some new parameters. The scope is used to
        initialize these new parameters.
    place(paddle.CPUPlace|paddle.CUDAPlace|str): place is used to initialize new
        parameters described above. If it's string, It can be ``cpu``, and ``gpu:x``,
        where ``x`` is the index of the GPUs.
    weight_bits(int): quantization bit number for weights,
        the bias is not quantized.
    activation_bits(int): quantization bit number for activation.
    activation_quantize_type(str): quantization type for activation,
        now support 'abs_max', 'range_abs_max' and 'moving_average_abs_max'.
        If use 'abs_max' mode, the quantization scale will be calculated
        dynamically each step in both training and testing period. If use
        'range_abs_max', a static quantization scale will be calculated
        during training and used in inference.
    weight_quantize_type(str): quantization type for weights,
        support 'abs_max' and 'channel_wise_abs_max'. The 'range_abs_max'
        usually is not used for weight, since weights are fixed once the
        model is well trained.
    window_size(int): the window size for 'range_abs_max' quantization.
    moving_rate(float): the param for 'moving_average_abs_max' quantization.
    skip_pattern(str or str list): The user-defined quantization skip pattern, which
        will be presented in the name scope of an op. When the skip pattern is
        detected in an op's name scope, the corresponding op will not be quantized.
    quantizable_op_type(list[str]): List the type of ops that will be quantized.
        Default is ["conv2d", "depthwise_conv2d", "mul"]. The quantizable_op_type in
        QuantizationFreezePass and ConvertToInt8Pass must be the same as this.
    weight_quantize_func(function): Function that defines how to quantize weight.
        Using this can quickly test if user's quantization method works or not.
        In this function, user should both define quantization function and
        dequantization function, that is, the function's input is non-quantized
        weight and function returns dequantized weight. If None, will use
        quantization op defined by 'weight_quantize_type'. Default is None.
    act_quantize_func(function): Function that defines how to quantize activation.
        Using this can quickly test if user's quantization method works or not.
        In this function, user should both define quantization and dequantization
        process, that is, the function's input is non-quantized activation and
        function returns dequantized activation. If None, will use quantization
        op defined by 'activation_quantize_type'. Default is None.
    weight_preprocess_func(function): Function that defines how to preprocess
        weight before quantization. Using this can quickly test if user's preprocess
        method works or not. The function's input is non-quantized weight and
        function returns processed weight to be quantized. If None, the weight will
        be quantized directly. Default is None.
    act_preprocess_func(function): Function that defines how to preprocess
        activation before quantization. Using this can quickly test if user's
        preprocess method works or not. The function's input is non-quantized
        activation and function returns processed activation to be quantized.
        If None, the activation will be quantized directly. Default is None.
    optimizer_func(function): Function return a optimizer. When 'is_test' is
        False and user want to use self-defined quantization function and
        preprocess function, this function must be set. Default is None.
    executor(paddle.Executor): If user want to use self-defined quantization
        function and preprocess function, executor must be set for initialization.
        Default is None.

Examples:
    .. code-block:: python

        >>> # The original graph will be rewrite.
        >>> import paddle
        >>> import paddle.static as static
        >>> from paddle.static.quantization import QuantizationTransformPassV2
        >>> from paddle.base.framework import IrGraph
        >>> from paddle.framework import core

        >>> graph = IrGraph(core.Graph(static.Program().desc), for_test=False)
        >>> place = paddle.CPUPlace()
        >>> scope = paddle.static.global_scope()
        >>> transform_pass = QuantizationTransformPassV2(scope, place)
        >>> transform_pass.apply(graph)
FrJ   rK   rN   rO   rP   rQ   rR   rS   rT   N)rU   r   rV   rW   rX   rY   rZ   r[   r\   r]   r^   r_   _conv1dtranspose_flagr`   ra   rb   rc   rd   re   rf   r   rg   rh   ri   rj   rk   rl   rm   s                       r1   r   $QuantizationTransformPassV2.__init__:	  s}   | '.' /)%9""3'=$$7!(	%*"

 (+AA 	
W	
A $:567O6P QL L   z112F1G H/ /  *B&%9"'' 3''BAFFHII ::I (
 $(#8#8&
#8Rrd%L#8&
"    &
s   ;D/c                 d  ^ Sn[        U R                  [        5      (       aF  TR                  5       R	                  S5      =(       a    [        U4S jU R                   5       5      nO[        U R                  [        5      (       aa  TR                  5       R	                  S5      =(       a;    TR                  5       R                  S5      R                  U R                  5      S:g  nU(       aA  TR                  5       R                  SS5        TR                  5       R                  SS5        g g )NFr   c              3   h   >#    U  H'  nUTR                  5       R                  S 5      ;   v   M)     g7fr   r   r   s     r1   r   @QuantizationTransformPassV2._quant_preprocess.<locals>.<genexpr>	  s/      I1G 7::<,,^<<1r   r   rF   Tr   r   )rn   r:   r   s    ` r1   r   -QuantizationTransformPassV2._quant_preprocess	  s    d(($//"::<00@ S I#11I FL **C00

%%n5 JJL%%n5::4;M;MN  JJL""<6JJL""#4d; r3   c                    UR                  5       R                  SS5        UR                  5       R                  S5      nS nUR                  nU GH5  nUR	                  5       UR                  5       ;  a  M(  UR	                  5       U R                  ;   a  U R                  UR	                  5          nGOUR	                  5       nXR                  ;   a  M  UR	                  5       U R                  ;   d  UR	                  5       U R                  ;   a  SOSn	U	(       a*  U R                  b  U R                  XR                  Xb5      nO0U	(       d)  U R                  b  U R                  XR                  Xb5      nU	(       aG  U R                  b:  U R                  XR                  Xb5      n
U R                  R                  U5        GMu  U	(       dG  U R                  b:  U R                  XR                  Xb5      n
U R                  R                  U5        GM  UR	                  5       U R                  ;   a  U R                   OU R"                  nU	(       a  U R$                  OU R&                  nSnSnUS:X  a  SnUR	                  5       nUS:H  =(       a    UR                  5       R                  S	5      nU(       a  US
-   OUnU R(                  (       a
  SnSU l        O&UR	                  5       [*        R,                  ;   a  SOSn[/        U R0                  U R2                  UUUU R4                  U R6                  S9nUR9                  XXS9u  nnUR;                  UUUU5      nXpR                  U'   U	(       a  UnUR=                  XgU5        GM8     U$ )Nr   r   r   TFr   rK   r   r   r   r   r   r   r   rj  rt   r}   r<   r   )r   r   r   r7   r)   r   r   r   r   persistable_cast_output_varsr\   r   r]   rZ   r   r[   rW   rX   rb   ra   r  r   r   rh  rV   rU   rd   ri   rx  r|  r   )rn   r9   r   r   r  r7   r,   r   r)   r   r   r   r~   r   rj  r   r   insert_quant_passr   r   s                       r1   r   .QuantizationTransformPassV2._transform_forward	  sA   
-/@A%%',,y) H}}b&8&8&::}}$"7"77#'#8#8#I }}...  }}$*?*??}}$*K*KK  	  !=!=!I#00;;X H #t'@'@'L#0088( H !;!;!G&*&7&7998'O ''..t4"t'>'>'J&*&7&766'O ''..t4  }}$*?*?? %%..  ! ..77 
  
$!77#'L ggiG&+5 2557<<!<G 7>g
27G11%&
5:2  "wwyE,N,NN !" #
 %9KKKK))!- $ 1 1 MM%! &55d 6 "" $5#F#F>>7$  /?%%d+(6%##HCE F ! r3   c                    UR                    Hv  nUR                  5       UR                  5       ;  a  M'  UR                  5       U R                  ;   d  MG  U R                  UR                  5          nUR	                  X4U5        Mx     g r   )r7   r)   r   r   r   )rn   r9   r   r,   r   s        r1   r   /QuantizationTransformPassV2._transform_backwardH
  se    		H}}b&8&8&::}}$"7"77#'#8#8#I ''BG "r3   c                     SnUR                    Hg  nUR                  5       UR                  5       ;  a  M'  UR                  5       U R                  ;   d   UR                  5       U R                  ;   d  Me  SnMi     U$ r   )r7   r)   r   r   r  )rn   r   r   r,   s       r1   r   'QuantizationTransformPassV2._has_weightP
  sd    
		H}}b&8&8&::4#8#88==?d&G&GG!
 " r3   c                 <   SUR                  5       ;  d  SUR                  S5      S   ;  a  g UR                  S5      S   nS nUR                  5        H  n[        R                  " U5      nX6;   d  M   U R                  U5      (       d  M8  UR                  5       S:X  a+  U R                  X5      (       d  U R                  X5      nMu  Mw  U R                  X5      nM     UR                   GH  nUR                  5       U:X  a  U R                  OU R                  nUR                  5       U:X  a  U R                  OU R                  n	Sn
SnU	S:X  a[  S	nUR                  5       [        R                  ;   a  S
OSn
S[        R                  ;   a  [        R                  R                  S5        U R                  X5      (       a    g [        U R                   U R"                  UU
UU R$                  U R&                  S9nUR                  5       U:X  a  UOS nUR)                  UUUR                  5       UUR+                  5       R-                  S5      S9u  nnUR/                  UUUUR+                  5       R-                  S5      5      nUR1                  XU5        GM     g )Nr   
unsqueeze2Filterr   r!   r   FrK   Tr   r  r   )r<   r   r   )r)   r  r   r   r&  r   r   r   r7   rW   rX   rb   ra   r   removerh  rV   rU   rd   ri   rx  r   r   r|  r   )rn   r9   r   conv_weight_var_namer  _opr>  r,   r   r~   r   rj  r  r   r   r   s                   r1   _quant_conv1d)QuantizationTransformPassV2._quant_conv1d\
  sm   BGGI% 21 55!xx1!4 %%'C66s;I#0T5E5Ec5J5J779 22..u::,0,C,CE,O) ; )-(?(?(K% ( 		H ==?&:: !!**  ==?&:: **33 
 J L33#e&H&HHAa   5#E#EE66==lK""5-- 4%%) --! ==?&:: "  "11!-Y/ 2   1BBY'	  ##HCi "r3   c                 Z   [        U[        5      (       d   S5       eU R                  c  UR                  5       U l        [        R
                  " 5       U l        / U l        / U l        UR                  5        Vs/ s H  o"R                  5       PM     snU l        UR                  5       n/ U l        UR                  5        H|  nUR                  5       S:X  d  M  UR                  S   R                  5       U R                  ;   d  MF  U R                  R                  UR                  S   R                  5       5        M~     U HR  nUR                  5       U R                   ;   d   UR                  5       U R"                  ;   d  MA  U R%                  U5        MT     0 Ul        [)        [+        U5      SSS9 nU H  nUR                  5       U R                   ;   aO  U R-                  X5      (       d(  U R/                  U5      (       a  U R1                  X5        OU R3                  X5        UR5                  5         M     SSS5        U HL  nUR                  5       U R"                  ;   d  M#  U R/                  U5      (       d  M;  U R7                  X5        MN     U$ s  snf ! , (       d  f       Ng= f)r   r   Ncastr   r   r   r   )r%   r   ri   r}   r   r   r   r   r   r   r)   r   r  r7   r   r   re   rh   r   r   r   r   r   r   r   r  r   r   )rn   r9   r   r   r   r   s         r1   r   !QuantizationTransformPassV2.apply
  s6    %)) 	
4	
) == !MMODM + 7 7 9 "  $99;!
;FFH;!
   " -/)$$&B	V#IIaL%%'4+@+@@1188A9K9K9MN ' B	T222779 : ::&&r*  (*$ c(P
 779 5 55..u99d>N>N? ? //: **55
 
  BwwyD6664;K;KB;O;O((3  ]!
6
 
s   9J4BJ
J*)r]   r[   rX   ra   r  r_   rj   ri   rd   r^   rV   rh   re   rU   rY   rW   r\   rZ   rb   rc   rl   rk   r   r  r   r   )r  r  r  r  r  r   r   r   r   r   r  r   r  rz  r3   r1   r  r  4	  sj     !*&"^A!# %P d<$h!TH
GDRCr3   r  c                   F    \ rS rSrSr/ SQrSSSSS/SS	/SS4S
 jrS rSrg)AddQuantDequantPassV2i
  z
Quantize the ops that do not have weights, and add quant_linear and dequant_linear
op for the quantized ops's inputs. It is used in the new format of quantization.
rF  NrE   rB   rF   rL  rM  c	                    Xl         [        U5      U l        X0l        X@l        Xpl        XPl        Xl        X`l        U R                   H/  n	U	[        [        R                  " 5       5      ;   a  M'   U	S-   5       e   U R                   V
s/ s H  o S3PM	     sn
U l        U R                   c   S5       eU R                  c   S5       e/ U l        gs  sn
f )ab  
Args:
    scope(paddle.Scope): The scope is used to initialize these new parameters.
    place(paddle.CPUPlace|paddle.CUDAPlace|str): place is used to initialize new
        parameters described above. If ``place`` is string, it can be It can be ``cpu``
        or ``gpu:x``, where ``x`` is the index of the GPUs.
    moving_rate(float, optional): the param for 'quant_dequant_moving_average_abs_max'
        quantization. Default is 0.9.
    quant_bits(int, optional): quantization bit number for activation. Default is 8.
    skip_pattern(str, optional): The user-defined quantization skip pattern, which
        will be presented in the name scope of an op. When the skip pattern is
        detected in an op's name scope, the corresponding op will not be quantized.
        Default is 'skip_quant'.
    quantizable_op_type(list[str], optional): List the type of ops that will be
        quantized. Default is ["elementwise_add", "pool2d"].
    scale_dict(dict, optional): calibration ranges of tensors output.

Examples:
    .. code-block:: python

        >>> # The original graph will be rewrite.
        >>> import paddle
        >>> import paddle.static as static
        >>> from paddle.static.quantization import AddQuantDequantPassV2
        >>> from paddle.base.framework import IrGraph
        >>> from paddle.framework import core

        >>> graph = IrGraph(core.Graph(static.Program().desc), for_test=False)
        >>> place = paddle.CPUPlace()
        >>> scope = paddle.static.global_scope()
        >>> add_quant_dequant_pass = AddQuantDequantPassV2(scope, place)
        >>> add_quant_dequant_pass.apply(graph)
rS   rT   NrO  rP  )rU   r   rV   rd   rQ  ri   rY   r!  rR  rf   r   rg   rS  r   rT  s              r1   r   AddQuantDequantPassV2.__init__
  s    X '.'%)%$7!00Gd#C#H#H#JKK ??K 1
 $(#<#<*
#<Rd%L#<*
& {{&A(AA&{{&A(AA& "*
s   Cc                   ^ [        U[        5      (       d   S5       eU R                  c  UR                  5       U l        [        R
                  " 5       nUR                  5        Vs/ s H  o3R                  5       PM     snU l        UR                  5       n[        [        U5      SSS9 nU GH  mTR                  5       U R                  ;   Ga  Sn[        U R                  [        5      (       aF  TR                  5       R!                  S5      =(       a    [#        U4S jU R                   5       5      nO[        U R                  [$        5      (       aa  TR                  5       R!                  S5      =(       a;    TR                  5       R'                  S5      R)                  U R                  5      S	:g  nTR                  5       R!                  S
5      =(       a"    TR                  5       R'                  S
5      S:H  nU(       d  U(       a  GMc  [*        R,                  " T5      nSn	U H  n
SU
;   d  M  Sn	  O   U	(       a  GM  U GHo  n
UR/                  TR0                  U
5      nUR3                  5       (       a  M7  UR5                  5       [6        R8                  [6        R:                  [6        R<                  4;  a)  [>        RA                  STR                  5        S35          OX;   a  X*   nO[C        U RD                  U RF                  U RH                  S	SU RJ                  U R                  U RL                  S9nURO                  UUTR                  5       R'                  S5      S9u  nnURQ                  UUUTR                  5       R'                  S5      5      nXU
'   URS                  XT5        GMr     URU                  5         GM!     SSS5        U Hu  mTR                  5       U RV                  ;   d  M#  TRY                  5        H>  nUU;   d  M  UR/                  TR0                  U5      nUU   nURS                  XT5        M@     Mw     U$ s  snf ! , (       d  f       N= f)rW  r   NrX  r   r   Fr   c              3   h   >#    U  H'  nUTR                  5       R                  S 5      ;   v   M)     g7fr   r   r   s     r1   r   .AddQuantDequantPassV2.apply.<locals>.<genexpr>U  r[  r   r   r   r   r\  Tz
Since the zJ contains an input of type INT, the quantization of this layer is skipped.)r   r   rj  rt   r}   r"  r   )r   )-r%   r   ri   r}   r   r   r   r)   r   r   r   r   rR  rY   rf   r   r   r   r   r   r   r   r5   r6   r7   r8   r   r  r   r  r  _loggerwarningrh  rV   rU   rQ  rd   r!  rx  r|  r   r   rS  r   )rn   r9   r^  r   r   r   r  r_  r`  rF   ra  r=   r   r  r   r   rb  r:   s                    @r1   r   AddQuantDequantPassV2.apply5  s    %)) 	
4	
) == !MMODM*668 $99;!
;FFH;!

 ))+l#O
 '<<>T%>%>>#G!$"4"4d;;")**,"7"7"G #C P+/+=+=P M $D$6$6<<#JJL11.A " '

!T.1!T$"4"45!!"    

--.AB -#JJL--.AB,- !
 ,  % = =g FI!&J$-2h>)-J! %. " $-"'":":#NNH# #..00$"==?"NN"NN"NN3 
 $OO",W\\^,<  =G  !H "#;/C/M,0D $ $+/+;+;+--2,0,=,=(,+/+;+;	1- !2 A A % '(/

(9(9)(D !B ! . . !2 C C$)$2$2$+JJL$5$5i$@	!" - >N://#w_ %.d 
i (
x $G||~!?!??")"9"9";J!%99"'":":#NNJ# ,@
+K(//#w #< $ ]!

 
s   'Q,FQ7FQ
Q*)
ri   rd   rV   rQ  rS  rR  r!  rU   rY   r   )	r  r  r  r  r  rf  r   r   r  rz  r3   r1   r  r  
  s=     H "^.9?#Br3   r  c                   :    \ rS rSrSrS
S jrS rS rS rS r	Sr
g	)ReplaceFakeQuantDequantPassi  zK
replace quant-dequant ops with quantize_linear and dequantize_linear ops.
c                     [        U5      U l        Xl        X0l        U R                  c   S5       eU R                  c   S5       eg)a  
Args:
    scope(paddle.Scope): The scope is used to initialize these new parameters.
    place(paddle.CPUPlace|paddle.CUDAPlace|str): place is used to initialize new
        parameters described above. If ``place`` is string, it can be It can be ``cpu``
        or ``gpu:x``, where ``x`` is the index of the GPUs.
    quant_bits(int, optional): quantization bit number for activation. Default is 8.

Examples:
    .. code-block:: python

        >>> # The original graph will be rewrite.
        >>> import paddle
        >>> import paddle.static as static
        >>> from paddle.static.quantization import ReplaceFakeQuantDequantPass
        >>> from paddle.base.framework import IrGraph
        >>> from paddle.framework import core

        >>> graph = IrGraph(core.Graph(static.Program().desc), for_test=False)
        >>> place = paddle.CPUPlace()
        >>> scope = paddle.static.global_scope()
        >>> replace_pass = ReplaceFakeQuantDequantPass(scope, place)
        >>> replace_pass.apply(graph)
NrO  rP  )r   rV   rU   rQ  )rn   r.   r/   r   s       r1   r   $ReplaceFakeQuantDequantPass.__init__  sG    2 (.%{{&A(AA&{{&A(AA&r3   c                 x   [        U[        5      (       d   S5       e/ n/ n/ nUR                  5        H<  nUR                  5       S:X  d  M  UR	                  UR                  S5      S   5        M>     UR                  5        Hn  nUR                  5       [        ;   d  UR                  5       S:X  d  M1  UR                  S5      S   nXd;   a  UR	                  U5        M]  UR	                  U5        Mp     U H  nUR                  UR                  UR                  S5      S   5      nUR                  UR                  UR                  S5      S   5      n	U	R                   H  n
UR                  XU
5        M     M     U H%  nU R                  X5        UR                  U5        M'     UR                  5         U$ )Nr   r%  r   r   r   )r%   r   r   r)   r   r  _fake_quant_dequant_op_listr  r6   r7   r   r   _replace_opr`  r   )rn   r9   fake_quant_dequant_opsremove_fake_quant_opsobserver_out_node_namesr   r<   r  x_noderd  rP  s              r1   r   !ReplaceFakeQuantDequantPass.apply  s   %)) 	
4	
) "$ ""$$$&Bwwy::'..ryy/?/BC '
 $$&B	88779 >>88C=+6)004*11"5 ' )C--cjj#))C.:KLF//SZZ.q1H !) 0 0'',G !1 ) *CU(##C( * 	r3   c                 >   UR                  UR                  UR                  S5      S   5      nUR                  UR                  UR	                  S5      S   5      nUR                  UR                  UR	                  S5      S   5      nUR                  5       R                  S5      (       a  UR                  5       R                  S5      OSnUR                  5       R                  S5      (       a  UR                  5       R                  S5      OU R                  nSUS-
  -  S-
  nSU-  S-
  n	S n
UnU
c  UR                  U R                  UR                  5       5      [        R                  R                  R                  UR!                  5       [        R                  R                  R"                  S	9n
[%        U
[&        R(                  " UR!                  5       S
S9U R*                  U R,                  5        UR/                  U R1                  UR                  5       5      UR3                  5       UR!                  5       UR5                  5       S	9nUR7                  SUUU	US.UUU
S.SU0S9nUR9                  X=5        UR9                  X]5        U
b  UR9                  X5        UR9                  X5        UR7                  SUUU	US.UUU
S.SU0S9nUR9                  X5        UR9                  X^5        U
b  UR9                  X5        UR9                  X5        g )Nr   r   r   r  r   r   r  r   r   rm  r   rq  )r   r  qminqmax)r   r6  rn  rp  r   r{  )r6   r7   r  r   r  r   r   r   rQ  r   rr  r)   r	   r   r   r   r   rs  r2   r&   r   rU   rV   r  r  r	  r   r   r   )rn   r9   r   r  rd  r-  r   r  r  r  rw  quanted_noder   r  r9  s                  r1   r  'ReplaceFakeQuantDequantPass._replace_op  s   ))"))RXXc]15EF++BJJ		%8H8KL--JJ		*-a0


 +-%%'*:*:<*H*HBEEGLL&b 	
 uuw-- EEGLL&!! 	
 j1n%*Dy1}"#;;**<+<+<+>?--:: &&(,,..44	 < O ))+7;	 ..))&++-8[[],,.lln	 / 
 ,,%((	 #,
 .) - 
 	f,j0&MM/9m4..'((	 $#,
 (O / 
 	n6j2&MM/;o0r3   c                     U S3$ rx  rz  r{  s     r1   r  /ReplaceFakeQuantDequantPass._quantized_var_nameR  r}  r3   c                     U S3$ r  rz  r{  s     r1   rr  ,ReplaceFakeQuantDequantPass._zero_point_nameX  r  r3   )rV   rQ  rU   N)rB   )r  r  r  r  r  r   r   r  r  rr  r  rz  r3   r1   r  r    s$    B>$LO1b'(r3   r  c                   @    \ rS rSrSr   S
S jrS rS rS rS r	Sr
g	)QuantWeightPassi_  ae  
quant weights and remove weights input quantize_linear node. for example:
`weight -> quant -> dequant -> conv2d` will be frozen into `weight -> dequant -> conv2d`,
and weight will be scaled offline.

Args:
    scope(paddle.Scope): scope is used to get the weight tensor values.
    place(paddle.CPUPlace|paddle.CUDAPlace|str): place is used to restore the weight tensors.
        If it's string, It can be ``cpu``, and ``gpu:x``, where ``x`` is the index of the GPUs.
    bias_correction(bool): whether use bias correction for post-training quantization.
         https://arxiv.org/abs/1810.05723.
    quant_bits(int, optional): quantization bit number for weight. Default is 8.
    save_int_weight(bool, optional): Whether the type saving the weight is int. Default is True.

Examples:
    .. code-block:: python

        >>> # The original graph will be rewrite.
        >>> import paddle
        >>> import paddle.static as static
        >>> from paddle.static.quantization import QuantWeightPass
        >>> from paddle.base.framework import IrGraph
        >>> from paddle.framework import core

        >>> graph = IrGraph(core.Graph(paddle.static.Program().desc), for_test=False)
        >>> place = paddle.CPUPlace()
        >>> scope = paddle.static.global_scope()
        >>> quant_weight_pass = QuantWeightPass(scope, place)
        >>> quant_weight_pass.apply(graph)
c                     [        U5      U l        Xl        X0l        X@l        XPl        U R                  c   S5       eU R                  c   S5       e[        5       U l        g )NrO  rP  )r   rV   rU   r  rQ  _save_int_weightr+   r  )rn   r.   r/   r  r   save_int_weights         r1   r   QuantWeightPass.__init__  s]     (. /% /{{&A(AA&{{&A(AA&!er3   c           	         [        U[        5      (       d   S5       e/ nUR                  5        Vs/ s H  o3R                  5       S:X  d  M  UPM     nnU GH  nUR	                  UR
                  UR                  S5      S   5      nUR                  5       (       d  MI  UR	                  UR
                  UR                  S5      S   5      nUR	                  UR
                  UR                  S5      S   5      nUR	                  UR                  UR                  S5      S   5      n	U R                  UR                  5       5      n
U
R                  S;   d   S	5       eU
R                  S
:X  a  U
S   n
U
R                  S:X  a  UR                  5       S:X  a  U
S   n
OU
R                  5       n
U R                  UR                  5       5      nUR                  5       R                  S5      nUR                  5       R                  S5      nUR                  5       U R                   ;  a  U R                   R#                  UR                  5       5        [$        R&                  " UR)                  5       U
UUSS9nU R*                  SL a  [$        R,                  " UUU
UUS9nU R.                  (       a1  U R0                  S:X  a  [2        R4                  nUR7                  W5      nU R9                  UR                  5       U5        U	R                   H  nUR;                  XU5        M     UR=                  U5        GM     U R?                  U5        g s  snf )Nr   rq  r   r   r6  rn  rp  r  r  r   r   rC   r   r  T)onnx_formatr  rB   ) r%   r   r   r)   r6   r7   r  r8   r   r  r  r  r  r  r   r   r  r  r   r  r  r  r  r  rQ  r&   r  r  r  r   r`  r  )rn   r9   fake_quant_ops_for_weightr   fake_quant_opsr  r  r-  rw  rd  r  r  r   bits_lengthr  save_weight_dtyperP  s                    r1   r   QuantWeightPass.apply  s   %)) 	
4	
) %'! ++-
-2>O1OB- 	 
 "C--cjj#))C.:KLF!!##"55JJ		' 21 5
 #(":":JJ		+ 6q 9# !33KKC!3 ..):;|| (  9 99  <<1$%ajG<<1$y)@%ajG%nn.G..7 VVX]]<8
!ffhmmL9;;=(;(;;''++FKKM:(-(:(:"#$()% ,,4,1,C,C#-#&(3-) ,,++q002-,=,D,D--) %%fkkm5FG$,$4$4L++HlK %5'',o "p 	%%e,w
s   MMc                   ^ [        5       mUR                  5       nU HK  nUR                   H  nTR                  U5        M     UR                   H  nTR                  U5        M     MM     T Vs1 s H  ofR
                  iM     snm[        [        U4S jUR                  5       5      5      nUR                  U5        g s  snf )Nc                 "   > U R                   T;  $ r   r  r  s    r1   r  :QuantWeightPass._remove_unused_var_nodes.<locals>.<lambda>  r  r3   r  r  s	           @r1   r  (QuantWeightPass._remove_unused_var_nodes  r  r  c                 |    [         R                  " U R                  R                  U5      R	                  5       5      $ r   r  r  s     r1   r  QuantWeightPass._load_var  r  r3   c                     U R                   R                  U5      R                  5       nUR                  X R                  5        g r   r  r  s       r1   r  QuantWeightPass._restore_var  r  r3   )r  rV   rQ  r  r  rU   N)FrB   T)r  r  r  r  r  r   r   r  r  r  r  rz  r3   r1   r  r  _  s/    F $"A-F1$A'r3   r  c                   <    \ rS rSrSrS/ SS4S jrS rS rS	 rS
r	g)AddQuantDequantForInferencePassi  zy
When export quant model, it will traverse to find the output of each op, and then insert the quant/dequant op after it.
rB   NTc                     Xl         X l        X0l        X`l        U(       a  UO[	        [
        R                  " 5       5      U l        XPl        g)ae  
Args:
    scope(static.Scope): The scope is used to initialize these new parameters.
    place(paddle.CPUPlace|paddle.CUDAPlace|str): place is used to restore the weight tensors.
        If it's string, it can be ``cpu``, and ``gpu:x``, where ``x`` is the index of the GPUs.
    quant_bits(int, optional): quantization bit number for weight. Default is 8.
N)	rU   rV   rQ  _only_observerrf   r   rg   r   _calibration_range_dict)rn   r.   r/   r   rv   calibration_range_dictonly_observers          r1   r   (AddQuantDequantForInferencePass.__init__  sF      %+ #  2779: 	
 (>$r3   c                 h   [        U[        5      (       d   S5       e0 n[        R                  " 5       nUR	                  5        H  nUR                  5       U R                  ;   d  M#  [        R                  " U5      nU H  nUR                  UR                  U5      nUR                  5       [        R                  [        R                  [        R                  4;  a  Mb  Xc;   a  X6   nOU R!                  X5      nXU'   XU'   M     M     UR	                  5        H  nUR                  5       S:X  a  UR#                  U5        M*  [        R$                  " U5      nU HL  nUR'                  U5      (       d  M  UR                  UR(                  U5      n	UR+                  XU   U5        MN     M     U$ )-
Args:
    graph(IrGraph): the target graph.
r   r%  )r%   r   r   r   r   r)   r   r   r&  r6   r   r   r  r   r  r  _insert_quant_dequant_opr`  r5   getr7   r   )
rn   r9   dequant_node_mapr^  r:   r>  r<   rd  r   r=   s
             r1   r   %AddQuantDequantForInferencePass.apply  s   
 %)) 	
4	
) *668))+G||~!1!11!::7C	 )H$77 H  ~~'0 
 !7+?+I(+/+H+H!,( :JX61AX.# !* ,. ))+G||~!??''0!99'B	 )H'++H55"'":":#NNH# //#h%? !* , r3   c                     U S3$ r1  rz  r{  s     r1   r*  +AddQuantDequantForInferencePass._scale_name>  r  r3   c           	      T   UR                  5       (       d   UR                  5        S35       eUR                  5       nSnUR                  U S3UR                  5       UR	                  5       UR                  5       S9n UR                  UR                  5       U R                  U5      5      n UR                  UR                  5       UR                  5        S35      n	X&S.n
U	b  XS'   UU R4                  U R6                  S.n[(        R8                  R:                  R<                  US'   SU0nUR?                  SUU
US9nURA                  X-5        URA                  Xm5        U	b  URA                  X5        URA                  X5        UR                  UR                  5        S3UR                  5       UR	                  5       UR                  5       S9nXVS.n
U	b  XS'   SU R4                  U R6                  S.n[(        R8                  R:                  R<                  US'   UR?                  SUU
SU0S9nURA                  X_5        URA                  Xo5        U	b  URA                  X5        URA                  X5        U$ !   U R                  (       a  X0R                  ;   a  U R                  U   nUR                  U R                  U5      UR                  5       S/UR                  5       S9nUR                  5       [        R                  :X  a  SOSn[        U[        R                  " XxS9U R                   U R"                  5         GN[$        R'                  S	U S
35         g = f!   UR                  UR                  5        S3[(        R*                  R,                  R.                  UR	                  5       [(        R*                  R,                  R0                  S9n	[        U	[        R2                  " UR	                  5       SS9U R                   U R"                  5         GNF= f)Nr   r   ry  r   r   r   r  r   zCannot find the target node z% in scope, so skip adding quant node.r  rm  r5  rn  )r   r  r  r   rp  rq  r   r  r{  )!r  r)   r  r	  r   r   r6   r   r*  r  r   r  r   r2   r&   r  rU   rV   r  r  r	   r   r   r   rs  r   rQ  r  r   r   r   r   r   )rn   r9   r,   r<   r   r   r   r  r  rw  r7   r   r   r  r   r9  s                   r1   r  8AddQuantDequantForInferencePass._insert_quant_dequant_opD  s     CX]]_$5]"CC ==?
..:Z(]]_.."nn&	 / 
	"55++-t/?/?/IN>	#66++-!&&()5O$  9&"1; %**!00

  ::AAIIi',,%	 - 
 	h.n4&MM/9m4 !00"'')*,7#((* &&($**,	 1 
 &?&"1; **!00

  ::AAIIi..'*+	 / 
 	n6n6&MM/;o8O	,, < <<"::8D!&!>!>))(3%]]_#&nn.	 "? "  ~~'6>>9 " 
 "HH[:KKKK	 28*<ab 	#;;&++-.k:--::$**,,,..44	 < O --/w?	s     /J  01M*  C	M'M'*B:P')r  r  rV   rQ  rU   r   )
r  r  r  r  r  r   r   r*  r  r  rz  r3   r1   r  r    s+     #>6/b#v r3   r  c                   >    \ rS rSrSr  S
S jrS rS rS rS r	Sr
g	)AddQuantDequantForResiduali  zW
Quantize the residual connections. Add quant and dequant ops for the residual inputs.
c                     [        U5      U l        Xl        X0l        X@l        U R                  c   S5       eU R                  c   S5       eg)a  
Args:
    scope(static.Scope): The scope is used to initialize these new parameters.
    place(paddle.CPUPlace|paddle.CUDAPlace|str): place is used to restore the weight tensors.
        If it's string, it can be ``cpu``, and ``gpu:x``, where ``x`` is the index of the GPUs.
    quant_bits(int, optional): quantization bit number for weight. Default is 8.
    is_test(bool, optional): Whether quantization with training or not. Default is True.
NrO  rP  )r   rV   rU   rQ  ri   )rn   r.   r/   r   r}   s        r1   r   #AddQuantDequantForResidual.__init__  sL     (.%{{&A(AA&{{&A(AA&r3   c                    [        U[        5      (       d   S5       eU R                  U5      nU R                  U5      nUR	                  5        H  nUR                  5       S:w  a  M  UR                  S   R                  5       nUR                  S   R                  5       nXR;   d  Xb;   a  M_  X5   X6   :  a  UR                  S   OUR                  S   nU R                  XU5        M     g)r  r   rL  r   r   N)r%   r   _all_weight_node_names_var_name_orderr   r)   r7   _insert_quant_dequant)rn   r9   weight_var_namesvar_node_names_with_orderr   first_input_namesecond_input_name	skip_nodes           r1   r    AddQuantDequantForResidual.apply  s    
 %)) 	
4	
)  66u=$($8$8$?!$$&Bwwy--!yy|002 "		! 1 1 3 4$8 ->+>? 		! YYq\	  &&u<! 'r3   c                 d   UR                  5        Vs/ s H  o"R                  5       PM     nnUR                  5        Hi  nUR                  5       S:X  d  M  UR                  S   R	                  5       (       d  M=  UR                  UR                  S   R                  5       5        Mk     U$ s  snf )z=
Return a list of weight variables (including casted weight)
r  r   )r   r)   r   r7   r8   r   r   )rn   r9   r   r  r   s        r1   r  1AddQuantDequantForResidual._all_weight_node_names  s    
 %*$?$?$A
$ADIIK$A 	 
 $$&BwwyF"ryy|'?'?'A'A ''

1(:(:(<= '  
s   B-c                     UR                  5       n0 n[        U5       H@  u  pEUR                   H+  nUR                  5       nUR	                  U5      b  M'  XCU'   M-     MB     U$ )zI
Return a dictionary with variable names as key and their order as value
)topology_sort	enumerater7   r)   r  )rn   r9   ordered_opsr  idxr:   in_var_nodein_var_names           r1   r  *AddQuantDequantForResidual._var_name_order  sf     ))+$&!%k2LC&~~)..0,00=E=@k:  . 3 )(r3   c           	      B   [        U R                  U R                  U R                  SSU R                  S9nUR                  5       S-   nUR                  5       R                  S5      nUR                  XXVS9u  nnUR                  XX5      n	UR                  X)U5        g)zV
Insert per tensor quantize_linear and dequantize_linear node between var_node and op
r   F)r   r   rj  r}   z.skipr   r  N)rh  rV   rU   rQ  ri   r)   r   r   rx  r|  r   )
rn   r9   r,   r   r  quant_var_namer   r   r   r   s
             r1   r  0AddQuantDequantForResidual._insert_quant_dequant  s     1KKKK''MM
 "72%%',,y) --n . 
	
 ->>>
 	B?r3   )ri   rV   rQ  rU   N)rB   T)r  r  r  r  r  r   r   r  r  r  r  rz  r3   r1   r  r    s*     B,=8 )@r3   r  )2r   loggingnumpyr&   r   r   r  base.frameworkr   r   	frameworkr   r	   staticr
   r   r   r   r   
log_helperr   rB  quant_configr   r   r   r  INFOr  r  r  r  	_conv_opsr  r2   r>   r@   r  r  r  r  r6  rE  rh  r  r  r  r  r  r  rz  r3   r1   <module>r     sQ       - 0 ? ?   #   gll H  * 
  ?	 
([ [|jJ jJZY1 Y1x' 'Tj# j#ZE# E#PS. S.l|( |(~t"; tnI IXe( e(PK' K'\M  M `i@ i@Oms   D D