
    ёiI                        S r SSKJr  SSKrSSKrSSKJrJr  SSKJr  SSK	J
r
Jr  SSKJr  SS	 jrSS
 jr " S S\5      r " S S\5      r " S S\5      r " S S\\R&                  S9rg)zEDefine some layers used to export quantization model with ONNX style.    )annotationsN)_C_ops_legacy_C_ops)unique_name)in_dynamic_modein_pir_mode   )Layerc                   US:  a<  S/[        U R                  5      -  nUR                  5       XB'   UR                  U5      nU R	                  S5      nUS:X  aF  [
        R                  " US-  U-  R                  SS5      S5      R	                  U R                  5      $ US:X  aF  [
        R                  " US	-  U-  R                  S
S	5      S5      R	                  U R                  5      $ [        S5      e)Nr      float32e4m3  @float8_e4m3fne5m2     float8_e5m2only support e4m3 or e5m2 now)
lenshapenumelreshapeastypepaddlecastclipdtypeNotImplementedError)inputscaleaxistyper   inps         V/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/nn/quant/format.pyfake_fp8_quantr'      s    qyc%++&&kkme$
,,y
!Cv~{{3Y$$T3/

&
	 
{{5[5 &&vu5}

&
	 ""ABB    c                t   US:  a<  S/[        U R                  5      -  nUR                  5       XB'   UR                  U5      nUS:X  a0  U R	                  S5      S-  U-  R	                  U R
                  5      $ US:X  a0  U R	                  S5      S-  U-  R	                  U R
                  5      $ [        S5      e)	Nr   r   r   r   r   r   r   r   )r   r   r   r   r   r   r    )r!   r"   r#   r$   r   s        r&   fake_fp8_dequantr*   0   s    qyc%++&&kkme$v~Y'#-5==ekkJJ	Y'%/%7??LL!"ABBr(   c                  >   ^  \ rS rSrU 4S jrS r\S 5       rSrU =r	$ )LinearQuanterDequanter>   c                :   > [         TU ]  5         Xl        X l        g N)super__init___quanter
_dequanter)selfquanter	dequanter	__class__s      r&   r1   LinearQuanterDequanter.__init__?   s    #r(   c                    UnU R                   b  U R                  U5      nU R                  b  U R                  U5      nU$ r/   )r2   r3   )r4   r!   outs      r&   forwardLinearQuanterDequanter.forwardD   s<    ==$--$C??&//#&C
r(   c                p    U c   e[        [        R                  U 5      [        R                  U 5      5      $ r/   )r,   LinearQuanterfrom_quanterLinearDequanterr5   s    r&   r?   #LinearQuanterDequanter.from_quanterL   s7    """%&&w/((1
 	
r(   )r3   r2   
__name__
__module____qualname____firstlineno__r1   r;   staticmethodr?   __static_attributes____classcell__r7   s   @r&   r,   r,   >   s!    $
 
 
r(   r,   c                  J   ^  \ rS rSr    SU 4S jjrS r\S 5       rSrU =r	$ )r>   U   c                8  > [         TU ]  5         [        R                  " USS9n[        R                  R                  [        R                  R                  R                  S5      [        R                  R                  R                  S5      SS9nU R                  UR                  USS9U l        U R                  R                  U5        [        R                  " SSS9U l        [        R                  " SSS9U l        Ub  UOUn[        R                  " USS9n[        R                  R                  [        R                  R                  R                  S	5      [        R                  R                  R                  S5      SS9nU R                  UR                  USS9U l        U R$                  R                  U5        Uc  S
OUU l        X@l        XPl        [-        U R(                  [.        5      (       a  U R(                  S   S:X  a;  U R(                  S   S:X  a(  [1        U R(                  5      S:X  a  SU l        SU l        OU R(                  S   S:X  a;  U R(                  S   S:X  a(  [1        U R(                  5      S:X  a  SU l        SU l        O<[7        S5      eSU R(                  S-
  -  S-
  U l        S
U R4                  -  S-
  U l        [-        U R(                  [.        5      (       a(  U R(                  S   U R(                  S   -   S-   U l        g g Nr   )r   zquant_dequant.scaleg      ?F)nameinitializer	trainable)r   attrr           zquant_dequant.zero_pointr      r      r	   r   r      r   r   zCurrently, only float8_e4m3 and float8_e5m2 formats are supported. Please set quant_bits to (4,3) or (5,2) for the corresponding format.r0   r1   r   	to_tensor	framework	ParamAttrutilsr   generatennrQ   Constantcreate_parameterr   _scales	set_valuein_accumin_state_zero_point_quant_axis_bit_length_group_size
isinstancetupler   _qmin_qmaxr    	r4   scales
zero_point
quant_axis
bit_length
group_size
scale_attrzp_attrr7   s	           r&   r1   LinearQuanter.__init__V       	!!&	:%%//))223HI		--66s; 0 


 ,,,,Zy - 
 	v&((I>((I>#-#9Zz
%%j	B
"",,))223MN		--66s; - 

  00""	 1 
 	"":.!+!32%%d&&..  #q($$Q'1,(()Q.%
 
  #q($$Q'1,(()Q.'
"
) _   0 01 45:DJdjj1,DJd&&..#//2T5E5Ea5HH1LD /r(   c                   [        5       (       Gac  U R                  S:X  a  [        XR                  U R                  SS9$ U R                  S:X  a  [        XR                  U R                  SS9$ [        U R                  R                  5      S:  Ga]  U R                  R                  5       S:w  aj  [        R                  " [        R                  " UR                  S5      U R                  -  5      U R                  -   U R                  U R                  5      nO[        R                  " U R                  U R                  S5      n[        R                  " U R                  U R                  S5      n[        R                  " [        R                  " UR                  S5      U-  U R                  -  5      U-   U R                  U R                  5      nUR                  UR                   5      $ ["        R$                  " UR                  S5      U R                  U R                  S	U R                  S
U R&                  SU R                  SU R                  5      R                  UR                   5      $ [)        5       (       a  SUl        [        R,                  R.                  R1                  SUR                  [2        R4                  " S5      [        R6                  R8                  R;                  S5      SS9n[<        R$                  " UU R                  U R                  U R>                  U R@                  U R                  U R&                  U R                  U R                  SSS5      u  pVpxU$ U RB                  RE                  UR                   5      n	U RB                  RG                  SUU R                  U R                  S.SU	0U R                  U R&                  U R                  U R                  S.S9  U	$ )Nr   r   r$   r   r   r   r   r   rq   rr   qminqmaxT	quant_outrT   r   r   rP   rQ   stop_gradientFquantize_linearXScale	ZeroPointYrq   rr   rz   r{   r$   inputsoutputsattrs)$r   rm   r'   rb   rg   r   r   rf   sumr   r   roundr   rl   repeat_interleaveri   r   r   r   rh   r   r~   pircorecreate_persistable_valuer   r^   r_   rQ   r`   r   rd   re   _helper"create_variable_for_type_inference	append_op)
r4   r!   quant_weightnew_snew_zpr|   	out_state	out_accum	out_scaler:   s
             r&   r;   LinearQuanter.forward   s7   zzS %<<)9)9  u$%<<)9)9  T\\''(1,##'')Q.#);;UZZ	%:T\\%IJ**+



	$L #44d&6&6E $55(($*:*:AF $*;;UZZ	%:U%BTZZ%OP !



	$L $((55 00

9%      



 d5;;  =="&E

@@kk ))+6"II11::3?" A I :@9O9O      



:6I) ,,AA%++NCLL""&!\\!%!1!1
 c
"&"2"2"&"2"2 JJ JJ	 #  Jr(   c                    [        U R                  5       U R                  5       U R                  5       U R	                  5       S9$ N)rp   rq   rr   )r>   ro   zero_pointsrq   rr   rA   s    r&   r?   LinearQuanter.from_quanter   s=    NN**,))+))+	
 	
r(   	rh   ri   rm   rl   rg   rb   rf   rd   re   NN      rC   rK   s   @r&   r>   r>   U   s3     :MxXt 
 
r(   r>   c                  J   ^  \ rS rSr    SU 4S jjrS r\S 5       rSrU =r	$ )r@      c                8  > [         TU ]  5         [        R                  " USS9n[        R                  R                  [        R                  R                  R                  S5      [        R                  R                  R                  S5      SS9nU R                  UR                  USS9U l        U R                  R                  U5        [        R                  " SSS9U l        [        R                  " SSS9U l        Ub  UOUn[        R                  " USS9n[        R                  R                  [        R                  R                  R                  S	5      [        R                  R                  R                  S5      SS9nU R                  UR                  USS9U l        U R$                  R                  U5        Uc  S
OUU l        X@l        XPl        [-        U R(                  [.        5      (       a  U R(                  S   S:X  a;  U R(                  S   S:X  a(  [1        U R(                  5      S:X  a  SU l        SU l        OU R(                  S   S:X  a;  U R(                  S   S:X  a(  [1        U R(                  5      S:X  a  SU l        SU l        O<[7        S5      eSU R(                  S-
  -  S-
  U l        S
U R4                  -  S-
  U l        [-        U R(                  [.        5      (       a(  U R(                  S   U R(                  S   -   S-   U l        g g rO   rY   rn   s	           r&   r1   LinearDequanter.__init__   rw   r(   c                   [        5       (       Ga  U R                  S:X  a  [        XR                  U R                  SS9$ U R                  S:X  a  [        XR                  U R                  SS9$ [        U R                  R                  5      S:  a  U R                  R                  5       S:w  a,  UR                  S5      U R                  -
  U R                  -  nO|[        R                  " U R                  U R                  S5      n[        R                  " U R                  U R                  S5      nUR                  S5      U-
  U R                  -  U-  nUR                  UR                  5      $ [        R                  " UR                  S5      U R                  U R                  S	U R                  S
U R                   SU R"                  SU R                  5      R                  UR                  5      $ [%        5       (       a  SUl        [        R(                  R*                  R-                  SUR                  [.        R0                  " S5      [        R2                  R4                  R7                  S5      SS9n[8        R                  " UU R                  U R                  U R:                  U R<                  U R                  U R                   U R"                  U R                  SSS5      u  pVpxU$ U R>                  RA                  UR                  5      n	U R>                  RC                  SUU R                  U R                  S.SU	0U R                  U R                   U R"                  U R                  S.S9  U	$ )Nr   r   ry   r   r   r   r   r   rq   rr   rz   r{   Tr|   rT   r}   Fdequantize_linearr   r   r   r   )"r   rm   r*   rb   rg   r   r   rf   r   r   r   r   ri   r   r   r   rh   rl   r   r~   r   r   r   r   r^   r_   rQ   r`   r   rd   re   r   r   r   )
r4   r!   quant_dequant_weightr   r   dequant_outr   r   r   r:   s
             r&   r;   LinearDequanter.forward3  s   zzS '<<)9)9  u$'<<)9)9  T\\''(1,##'')Q.

9-0@0@@,%( #44d&6&6E $55(($*:*:AF I.74::EM ) ,00== 22

9%      



 d5;;  =="&E **//BBkk ))+6"II11::3?" C K ((LL$$MMMM$$$$JJJJ 9KI  ,,AA%++NCLL""(!\\!%!1!1
 c
"&"2"2"&"2"2 JJ JJ	 #  Jr(   c                    [        U R                  5       U R                  5       U R                  5       U R	                  5       S9$ r   )r@   ro   r   rq   rr   rA   s    r&   r?   LinearDequanter.from_quanter  s=    NN**,))+))+	
 	
r(   r   r   rC   rK   s   @r&   r@   r@      s3     :MxTl 
 
r(   r@   c                     ^  \ rS rSrSrU 4S jr\R                  S
S j5       r\R                  SS j5       r	SS jr
S rSS jrS	rU =r$ )ConvertibleQuantedLayeri  a1  Abstract class to help convert quantized layer to inference model.
It defines some functions to convert quantizers and observers to quantize
or dequantize operators that maintain the quantization parameters used
during inference.

Examples:
    .. code-block:: python

        >>> # Given codes in ./customized_quanter.py
        >>> class CustomizedQuantedLayer(ConvertibleQuantedLayer):
        ...     def __init__(self):
        ...         super().__init__()
        ...         self.weight_a = paddle.create_parameter(shape=[1], dtype='float32')
        ...         self.weight_b = paddle.create_parameter(shape=[1], dtype='float32')
        ...         self.quanter_for_weight_a = None
        ...         self.activation_weight = None
        ...
        ...     def forward(self, input):
        ...         qweight_a = self.quanter_for_weight_a(self.weight_a)
        ...         weight_b = self.weight_b
        ...         qinput = self.activation_weight(input)
        ...         # compute with qweight_a, weight_b and qinput.
        ...         return qweight * qinput + weight_b
        ...
        ...     def weights_to_quanters(self):
        ...         return [('weight_a', 'quanter_for_weight_a')]
        ...
        ...     def activation_quanters(self):
        ...         return ['activation_weight']
c                0   > [         TU ]  5         SU l        g )NF)r0   r1   	converted)r4   r7   s    r&   r1    ConvertibleQuantedLayer.__init__  s    r(   c                    g)up  Get the name pairs of weights to be quantized and their corresponding
quantizers. In the convert function of this abstract class, it will call
the ‘weights_to_quanters’ function and do something as follows:
For each pair, the quantizer will be converted to a quantize operator and
a dequantize operator. Then, the weight will be quantized by the quantize
operator. Finally, the quantize operator will be removed and the weights
will be stored in integer data type.

Returns: A list of name pairs. Each pair contains two names. The first is name of weight
to be quantized and the second is name of corresponding quanter.
N r4   s    r&   weights_to_quanters+ConvertibleQuantedLayer.weights_to_quanters  s     	r(   c                    g)zGet the names of quanters used to quantize activations.
All the quanters or observers returned by this function will be converted to quantize
and dequantize operators for deployment.
Returns: A list of quanter names.
Nr   r   s    r&   activation_quanters+ConvertibleQuantedLayer.activation_quanters  s     	r(   c                    [        X5      (       d  g[        X5      nUc  g[        R                  U5      n[	        XU5        X R
                  U'   U$ )z9Convert quanter to an instance of LinearQuanterDequanter.N)hasattrgetattrr,   r?   setattr_sub_layers)r4   quanter_namer5   s      r&   _convert_quanter_to_qdq/ConvertibleQuantedLayer._convert_quanter_to_qdq  sO    t**$-?(55g>G,)0&r(   c                L    [        X5      nU" U5      nUR                  U5        g)z%Quantize the weight by given quanter.N)r   rc   )r4   weight_namer5   weightqweights        r&   _quant_weights&ConvertibleQuantedLayer._quant_weights  s#    +&/!r(   c                `   U R                   (       a   S5       eU R                  5        HS  u  p#U R                  U5      nUc  M  USL d  M"  U R                  X$R                  5        SUl        SUR
                  S'   MU     U R                  5        H  nU R                  U5        M     SU l         g)z2Convert current layer to onnx style for inference.z(The model should be converted only once.NFr2   T)r   r   r   r   r2   r   r   )r4   remain_weightr   r   qdqs        r&   _convert ConvertibleQuantedLayer._convert  s    >>M#MM!)-)A)A)C%K..|<C=E#9##K>#.2
+ *D !446L((6 7 r(   )r   )returnzlist[tuple[str, str]])r   z	list[str])r   r,   )F)rD   rE   rF   rG   __doc__r1   abcabstractmethodr   r   r   r   r   rI   rJ   rK   s   @r&   r   r     sU    > 	  	 
" r(   r   )	metaclass)rU   r   )r   
__future__r   r   r   r   r   paddle.baser   paddle.frameworkr   r   layer.layersr
   r'   r*   r,   r>   r@   ABCMetar   r   r(   r&   <module>r      sj    L " 
  ( # 9  C(C
U 
.^
E ^
BZ
e Z
z[es{{ [r(   