
    ёi=                    J   % S SK Jr  S SKrS SKJrJrJr  S SKrS SKJrJ	r	J
r
  S SKJr  S SKJr  S SKJr  S SKJrJr  S S	KJr  S S
KJr  S SKJrJr  S SKJr  SSKJr  \(       a   S SK J!r!J"r"  S SKJ#r#  S SK$J%r%J&r&  \S   r'S\(S'   / SQr)\" \*\RV                  SS9r, " S S\5      r- " S S\5      r. " S S\5      r/ " S S\5      r0\0r1 " S  S!\5      r2 " S" S#\5      r3 " S$ S%\5      r4 " S& S'\5      r5 " S( S)\5      r6 " S* S+\5      r7 " S, S-\5      r8 " S. S/\5      r9S0 r:g)1    )annotationsN)TYPE_CHECKINGAnyLiteral)_C_ops_legacy_C_opsin_dynamic_mode)check_variable_and_dtype)_create_tensor)
get_logger)	ParamAttrcore)
functional)Constant)FakeQuantActLSQPlusFakeQuantWeightLSQPlus)unique_name   )Layer)Never	TypeAlias)Tensor)	DTypeLikeSize2)abs_maxmoving_average_abs_maxchannel_wise_abs_max
lsq_weightchannel_wise_lsq_weightlsq_actr   
_QuantType)FakeQuantAbsMaxFakeQuantMovingAverageAbsMaxFakeQuantChannelWiseAbsMaxQuantizedConv2DQuantizedConv2DTransposeQuantizedLinearMovingAverageAbsMaxScaleMAOutputScaleLayerFakeQuantMAOutputScaleLayer	QuantStubQuantizedRowParallelLinearQuantizedColumnParallelLinearQuantizedMatmulz&%(asctime)s-%(levelname)s: %(message)s)fmtc                  ^   ^  \ rS rSrSr     S           SU 4S jjjrSS jrSrU =r$ )	r"   E   z
FakeQuantAbsMax layer does the abs_max quant and then dequant.
Its computational formula is described as below:

:math:`scale = max(abs(X))`
:math:`range = 2^{bit\_length - 1} - 1`
:math:`Out = round(X / scale * range) * scale / range`
c                T  > [         TU ]  5         X l        Xl        XPl        U(       a  U S3OSn[
        R                  " U5      U l        U(       aP  [        U R                  [        S5      SS9nU R                  S/XpR                  S9U l        SU R                  l        g S U l        g )	N.scalequant_dequant.scaleMbP?Fnameinitializer	trainable   shapeattrdtypeT)super__init___quant_bits_name_reduce_typer   generate_scale_namer   r   create_parameter_dtype_scalestop_gradient)	selfr7   
quant_bitsr>   quant_on_weightreduce_typescale_prefix
scale_attr	__class__s	           \/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/nn/quant/quant_layers.pyr@   FakeQuantAbsMax.__init__O   s     	%
'*.$v4I&//="%%$UOJ
 //c
++ 0 DK )-DKK%DK    c                   [        5       (       GaF  SU R                  4n[        UR                  UR                   S3UR
                  UR                  SS9nU R                  nU R                  S:X  a@  [        R                  R                  U[        R                  R                  R                  S9  U(       dK  [        [        R                  R                   R"                  U R$                  S/U R&                  SS9nSUl        [*        R,                  " XR                  S5      u  nn[*        R.                  " XS5        [*        R.                  " Xd5        U$ [1        US	S
/S5        SU R                  0nSU/0nU R2                  R5                  UR                   S3UR                  [        R                  R                   R"                  SSS9nU R                  nU(       dS  U R2                  R5                  U R$                  U R&                  [        R                  R                   R"                  SSS9nU/U/S.nU R2                  R7                  SUUUS9  U$ )N
bit_length.quantized.dequantizedFtyper7   r<   r>   persistablemaxopr:   Tinputfloat32r"   Xr7   r>   rX   rY   rI   OutOutScale fake_quantize_dequantize_abs_maxrX   inputsoutputsattrs)r	   rA   r   rX   r7   r<   r>   rH   rC   paddledistributed
all_reduceReduceOpMAXr   VarDescVarTypeDENSE_TENSORrE   rG   rI   r   rd   assign_out_r
   _helpercreate_variable	append_op)	rJ   r]   rh   	quant_out	out_scaleout1out2rf   rg   s	            rQ   forwardFakeQuantAbsMax.forwardj   s"   !4#3#34E&ZZ

|#9:kkkk!I I  E)""--&"4"4"="="A"A .  *--::))#++ %	 +/	' 77'' t/t/ )>OPt//0wLL00JJ<56++%%22 1 
	 KK	44%%kk\\))66!" 5 I %+I;?3	 	 	
 rS   )rB   rA   rC   rH   rE   )N   r^   FN)r7   
str | NonerK   intr>   r   rL   boolrM   Literal['max'] | NonereturnNoner]   r   r   r   	__name__
__module____qualname____firstlineno____doc__r@   ry   __static_attributes____classcell__rP   s   @rQ   r"   r"   E   sg      $ %-1  	
  + 
 6? ?rS   r"   c                  ^   ^  \ rS rSrSr     S           SU 4S jjjrSS jrSrU =r$ )	r#      a5  
FakeQuantMovingAverageAbsMax layer does the moving_average_abs_max quant and then dequant.
Its computational formula is described as below:

:math:`scale = (moving\_rate*accum+max(abs(x)))/(moving\_rate*state+1)`
:math:`range = 2^{bit\_length - 1} - 1`
:math:`Out = round(X / scale * range) * scale / range`
c                t  > [         TU ]  5         X l        X0l        XPl        U(       a  U S3OSn[        [        R                  " U5      [        S5      SS9nU R                  S/XtS9U l
        SU R                  l        U(       a  U S	3OS
n[        [        R                  " U5      [        S5      SS9n	U R                  S/XS9U l        SU R                  l        U(       a  U S3OSn
[        [        R                  " U
5      [        S5      SS9nU R                  S/XS9U l        SU R                  l        g )Nr3   r4   r5   Fr6   r:   r;   T.statezquant_dequant.state.accumzquant_dequant.accum)r?   r@   _moving_raterA   rC   r   r   rD   r   rF   rH   rI   _state_accum)rJ   r7   moving_raterK   r>   rM   rN   rO   state_prefix
state_attraccum_prefix
accum_attrrP   s               rQ   r@   %FakeQuantMovingAverageAbsMax.__init__   sB    	'%'*.$v4I%%l3 


 ++#J , 
 %)!*.$v4I%%l3 


 ++#J , 
 %)!*.$v4I%%l3 


 ++#J , 
 %)!rS   c           
        [        5       (       Ga  SU R                  SU R                  SU R                  (       + 4n[	        UR
                  UR                   S3UR                  UR                  SS9nU R                  S:X  aJ  [        R                  R                  U R                  [        R                  R                  R                  S9  U R                  (       a  U R                   OS nU R                  (       a  U R"                  OS n[$        R&                  " UU R                  UUU R                  U R                  U R                  (       + S	5      u  nnnn	[$        R(                  " Xc5        UR+                  5       (       a   [$        R(                  " XpR                  5        U(       a  [$        R(                  " X5        U(       a  [$        R(                  " X5        U$ [-        US
S/S5        U R                  U R                  U R                  (       + S.nU/U R                  /S.n
U R.                  R1                  UR                   S3UR                  [2        R4                  R6                  R8                  SSS9nU/U R                  /S.nU R                  (       a@  U R                   /U
S'   U R"                  /U
S'   U R                   /US'   U R"                  /US'   U R.                  R;                  SU
UUS9  U$ )Nr   rU   is_testrV   FrW   rZ   r[   r:   r]   r^   r#   )r   rU   r   )r_   InScaler`   ra   InStateInAccumOutStateOutAccum/fake_quantize_dequantize_moving_average_abs_maxre   )r	   r   rA   trainingr   rX   r7   r<   r>   rC   ri   rj   rk   rH   rl   rm   r   r   r   r   rq   _is_initializedr
   rr   rs   r   rn   ro   rp   rt   )rJ   r]   rh   ru   stateaccumrw   rx   out3out4rf   rg   s               rQ   ry   $FakeQuantMovingAverageAbsMax.forward   s   !!  MM!E 'ZZ

|#9:kkkk!I   E)""--KKF$6$6$?$?$C$C .  $(==DKKdE#'==DKKdE FF!!  MM!	 t/##%%""45""4/""4/ 7YK)G	
  ,,**==(

 wDKK=9LL00JJ<56++%%22 1 
	 %+DKK=A==!%F9!%F9#';;-GJ#';;-GJB	 	 	
 rS   )r   r   rA   rC   rH   r   )N?r{   r^   N)r7   r|   r   floatrK   r}   r>   r   rM   r   r   r   r   r   r   s   @rQ   r#   r#      sj       $-1+)+) +) 	+)
 +) ++) 
+) +)ZP PrS   r#   c                  f   ^  \ rS rSr       S               SU 4S jjjrSS jrSrU =r$ )r$   i6  c                  > U(       d   S5       e[         T
U ]  5         X0l        X@l        XPl        Xl        X l        Xpl        U(       a  U S3OSn[        R                  " U5      U l
        U(       aZ  [        U R                  [        S5      SS9n	U R                  U R                  /XR                  S9U l        SU R                  l        g S U l        g )	Nz5Channel_wise only can be used on weight quantization.r3   r4   g        Fr6   r;   T)r?   r@   rA   _quant_axisrG   rB   _channel_numrC   r   rD   rE   r   r   rF   rH   rI   )rJ   r7   channel_numrK   
quant_axisr>   rL   rM   rN   rO   rP   s             rQ   r@   #FakeQuantChannelWiseAbsMax.__init__7  s      	
C	
 	%%
''*.$v4I&//="%%$SMJ
 //(()
++ 0 DK )-DKK%DKrS   c                   [        5       (       Gac  SU R                  SU R                  4n[        UR                  UR
                   S3UR                  UR                  SS9nU R                  nU R                  S:X  a@  [        R                  R                  U[        R                  R                  R                  S9  UcU  [        [        R                   R"                  R$                  U R&                  U R(                  /U R*                  SS9nSUl        [.        R0                  " XR                  S	U R                  5      u  nn[.        R2                  " XS5        [.        R2                  " Xd5        U$ [5        US
S/S5        U R                  S	U R                  S.nSU/0nU R6                  R9                  UR
                   S3UR                  [        R                   R"                  R$                  SSS9nU R                  nU(       dS  U R6                  R9                  U R&                  U R*                  [        R                   R"                  R$                  SSS9nU/U/S.nU R6                  R;                  SUUUS9  U$ )NrU   r   rV   FrW   rZ   r[   Tr:   r]   r^   r$   )rU   
round_typer   r_   r`   ra   -fake_channel_wise_quantize_dequantize_abs_maxre   )r	   rA   r   r   rX   r7   r<   r>   rH   rC   ri   rj   rk   rl   rm   r   rn   ro   rp   rE   r   rG   rI   r   r   rq   r
   rr   rs   rt   )	rJ   r]   rh   ru   rv   outscalerf   rg   s	            rQ   ry   "FakeQuantChannelWiseAbsMax.forwardZ  sX       	E 'ZZ

|#9:kkkk!I I  E)""--&"4"4"="="A"A .   *--::)),,-++ %	 +/	'
 DD''D,<,< s.u0 7YK)E	
 ****

 wLL00JJ<56++%%22 1 
	 KK	44%%kk\\))66!" 5 I %+I;?@	 	 	
 rS   )r   rG   rB   r   rA   rC   rH   rE   )NNr{   r   r^   FN)r7   r|   r   z
int | NonerK   r}   r   r}   r>   r   rL   r~   rM   r   r   r   r   r   r   r   r   r@   ry   r   r   r   s   @rQ   r$   r$   6  s      "&$ %-1!!  ! 	!
 ! ! ! +! 
! !FK KrS   r$   c                  T   ^  \ rS rSr    S         SU 4S jjjrSS jrSrU =r$ )r(   i  c                l  > [         TU ]  5         X l        X@l        U(       a  U S3OSn[        R
                  " U5      n[        U[        S5      SS9nU R                  S/XsS9U l	        SU R                  l
        U(       a  U S	3OS
n[        [        R
                  " U5      [        S5      SS9n	U R                  S/XS9U l        SU R                  l
        U(       a  U S3OSn
[        [        R
                  " U
5      [        S5      SS9nU R                  S/XS9U l        SU R                  l
        g)z
MovingAverageMaxScale layer is used to calculating the output quantization
scale of Layer. Its computational formula is described as below:

:math:`scale = (moving\_rate*accum+max(abs(x)))/(moving\_rate*state+1)`
:math:`Out = X`
r3   zoutscale.scaler   Fr6   r:   r;   Tr   zoutscale.stater   zoutscale.accumN)r?   r@   r   rC   r   rD   r   r   rF   rH   rI   r   r   )rJ   r7   r   r>   rM   rN   
scale_namerO   r   r   r   r   rP   s               rQ   r@   !MovingAverageAbsMaxScale.__init__  s?    	''*.$v4D )),7
!

 ++#J , 
 %)!*.$v4D%%l3 


 ++#J , 
 %)!*.$v4D%%l3 


 ++#J , 
 %)!rS   c           	     n   [        5       (       Ga  SU R                  SU R                  (       + 4n[        UR                  UR
                   S3UR                  UR                  SS9nU R                  S:X  aJ  [        R                  R                  U R                  [        R                  R                  R                  S9  U R                  (       a  U R                  OS nU R                  (       a  U R                   OS n["        R$                  " UUUUU R                  UU/UQ76 u  n    nU$ ['        USS	S
/S5        U R                  U R                  (       + S.nSU/0nU R(                  R+                  UR
                   S3UR                  [,        R.                  R0                  R2                  SSS9nU/U R                  /S.n	U R                  (       a@  U R                  /US'   U R                   /US'   U R                  /U	S'   U R                   /U	S'   U R(                  R5                  SUU	US9  U$ )Nr   r   z.tmpFrW   rZ   r[   r]   r^   float64r(   )r   r   r_   r`   ra   r   r   r   r   moving_average_abs_max_scalere   )r	   r   r   r   rX   r7   r<   r>   rC   ri   rj   rk   rH   rl   rm   r   r   r   r   r
   rr   rs   r   rn   ro   rp   rt   )
rJ   r]   rh   ru   r   r   r   _rf   rg   s
             rQ   ry    MovingAverageAbsMaxScale.forward  s   !!MM!	E 'ZZ

|4(kkkk!I   E)""--KKF$6$6$?$?$C$C .  $(==DKKdE#'==DKKdE(EE	 	LCAq J 7Y	24N	
 !% 1 1$-->OPwLL00JJ<t$++%%22 1 
	 %+DKK=A==!%F9!%F9#';;-GJ#';;-GJ/	 	 	
 rS   )r   r   rC   rH   r   )Nr   r^   N)
r7   r|   r   r   r>   r   rM   r   r   r   r   r   r   s   @rQ   r(   r(     sX       $-1/)/) /) 	/)
 +/) 
/) /)b@ @rS   r(   c                     ^  \ rS rSr% SrS\S'   S\S'            S	                     S
U 4S jjjrSS jrSrU =r	$ )r%   i   z
The computational logic of QuantizedConv2D is the same with Conv2D.
The only difference is that its inputs are all fake quantized.
r   weightbiasc                $  > [         TU ]  5         UR                  U l        UR                  U l        UR                  U l        UR
                  U l        U R
                  S:w  a  UR                  U l        UR                  U l        UR                  U l        UR                  U l	        UR                  U l
        SU l        U	b  U	" 5       U l        O^[        UU R                  R                  UUU R                  SU R                  R                   U R                     U R                  S9U l        U
b  U
" 5       U l        O+[        UUR%                  5       UUU R                  SS9U l        Ub  U" 5       OS U l        Ub  U" 5       U l        g S U l        g )Nzerosr   Tr7   r   rK   r>   rL   r   r   Fr7   r   rK   r>   rL   )r?   r@   _groups_stride_padding_padding_mode _reversed_padding_repeated_twice	_dilation_data_formatr   r   _conv2d_quant_axis_fake_quant_weight_get_fake_quant_typer7   rG   r<   _fake_quant_input	full_name_act_preprocess_weight_preprocessrJ   layerweight_bitsactivation_bitsr   weight_quantize_typeactivation_quantize_typeweight_pre_layeract_pre_layerweight_quant_layeract_quant_layerrP   s              rQ   r@   QuantizedConv2D.__init__)  sl    	}}}}"00(66 1 !..llJJ	 #$)&8&:D#&:$[[%%'&kk $ KK--d.E.EF22	'D# &%4%6D"%9(__&'*kk %&D"  -8MOd 	 #3"> 	DH 	rS   c                0   U R                   b  U R                  U5      nU R                  U5      nU R                  nU R                  b  U R                  U R                  5      nU R	                  U5      nU R
                  S:w  a<  [        R                  " UU R                  U R
                  U R                  S9nSU l
        [        R                  " UUU R                  U R                  U R                  U R                  U R                  U R                  S9$ )Nr   )modedata_formatr   )r   paddingstridedilationgroupsr   )r   r   r   r   r   r   Fpadr   r   r   conv2dr   r   r   r   )rJ   r]   quant_inputr   quant_weights        rQ   ry   QuantizedConv2D.forwardg  s    +((/E,,U3"".,,T[[9F..v6(%%55'' --	K DMxxMM<<^^<<))	
 		
rS   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   	r{   r{   r   r   r   NNNNr   r   r   r}   r   r}   r   r   r   r!   r   r!   r   Layer | Noner   r   r   r   r   r   r   r   r   
r   r   r   r   r   __annotations__r@   ry   r   r   r   s   @rQ   r%   r%      s    
 N
L
   +4/8)-&*+/(,<
<
 <
 	<

 <
 )<
 #-<
 '<
 $<
 )<
 &<
 
<
 <
|
 
rS   r%   c                     ^  \ rS rSr% SrS\S'   S\S'            S	                     S
U 4S jjjr S     SS jjrSrU =r	$ )r&   i  a  

The computational logic of QuantizedConv2DTranspose is the same with Conv2DTranspose.
The only difference is that its inputs are all fake quantized.

Examples:
    .. code-block:: pycon

        >>> import paddle
        >>> import paddle.nn as nn
        >>> from paddle.nn.quant.quant_layers import (
        ...     QuantizedConv2DTranspose,
        ... )

        >>> x_var = paddle.uniform((2, 4, 8, 8), dtype='float32', min=-1.0, max=1.0)
        >>> conv = nn.Conv2DTranspose(4, 6, (3, 3))
        >>> conv_quantized = QuantizedConv2DTranspose(conv)
        >>> y_quantized = conv_quantized(x_var)
        >>> y_var = conv(x_var)
        >>> print(y_var.shape)
        paddle.Size([2, 6, 10, 10])
        >>> print(y_quantized.shape)
        paddle.Size([2, 6, 10, 10])

r   r   r   c                  > [         TU ]  5         UR                  U l        UR                  U l        UR                  U l        UR
                  U l        UR                  U l        UR                  U l        UR                  U l	        UR                  U l
        SU l        U	b  U	" 5       U l        O^[        UU R                  R                  UUU R                  SU R                  R                   U R                     U R                  S9U l        U
b  U
" 5       U l        O+[        UUR%                  5       UUU R                  SS9U l        Ub  U" 5       OSU l        Ub  U" 5       U l        gSU l        g)zC
Constructor.

The arguments are the same as ImperativeQuantAware.
r:   NTr   Fr   )r?   r@   r   r   r   output_padding_output_paddingr   r   r   r   _conv2d_transpose_quant_axisr   r   r7   rG   r<   r   r   r   r   r   s              rQ   r@   !QuantizedConv2DTranspose.__init__  sN   $ 	}}}}$33!..llJJ	,-))&8&:D#&:$[[%%'&kk $ KK--55  <<'D# &%4%6D"%9(__&'*kk %&D"  -8MOd 	 #3"> 	DH 	rS   c                   U R                   b  U R                  U5      nU R                  U5      nU R                  nU R                  b  U R                  U R                  5      nU R	                  U5      nUc  U R
                  nOSn[        R                  " UUU R                  U R                  UU R                  U R                  U R                  UU R                  S9
$ )Nr   )r   r   r   r   r   r   output_sizer   )r   r   r   r   r   r   r   conv2d_transposer   r   r   r   r   r   )rJ   r]   r   r   r   r   r   s          rQ   ry    QuantizedConv2DTranspose.forward  s     +((/E,,U3"".,,T[[9F..v6!11NN!!MM)<<^^<<#))
 	
rS   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   N)r]   r   r   zSize2 | Noner   r   r   r   s   @rQ   r&   r&     s    4 N
L
   +4/8)-&*+/(,>
>
 >
 	>

 >
 )>
 #->
 '>
 $>
 )>
 &>
 
>
 >
B :>

*6
	
 
rS   r&   c                     ^  \ rS rSr% SrS\S'   S\S'   S\S'            S                     SU 4S jjjrSS	 jrS
rU =r	$ )r'   i  z
The computational logic of QuantizedLinear is the same with Linear.
The only difference is that its inputs are all fake quantized.
r   r   r   strr7   c                :  > [         TU ]  5         UR                  U l        UR                  U l        UR                  U l        SU l        U	b  U	" 5       U l        O_[        UU R                  R                  UUU R                  SU R                  R                  U R
                     U R
                  SS9	U l        U
b  U
" 5       U l
        O+[        UUR                  5       UUU R                  SS9U l
        Ub  U" 5       OS U l        Ub  U" 5       U l        g S U l        g )Nr:   T)r7   r   rK   r>   rL   r   r   quant_linearFr   )r?   r@   r   r   r7   _linear_quant_axisr   r   rG   r<   r   r   r   r   r   s              rQ   r@   QuantizedLinear.__init__  s    	llJJ	JJ	"#)&8&:D#&:$[[%%'&kk $ KK--d.E.EF22!
'D# &%4%6D"%9(__&'*kk %&D"  -8MOd 	 #3"> 	DH 	rS   c                B   U R                   b  U R                  U5      nU R                  U5      nU R                  nU R                  b  U R                  U R                  5      nU R	                  U5      n[
        R                  " X$U R                  U R                  S9nU$ )Nxr   r   r7   )	r   r   r   r   r   r   linearr   r7   )rJ   r]   r   r   r   r   s         rQ   ry   QuantizedLinear.forwardD  s    +((/E,,U3"".,,T[[9F..v6hhTYYTYY
 
rS   )r   r   r   r  r   r   r7   r   r   r   r   r   r   s   @rQ   r'   r'     s    
 N
L
I
   +4/8)-&*+/(,5
5
 5
 	5

 5
 )5
 #-5
 '5
 $5
 )5
 &5
 
5
 5
n rS   r'   c                     ^  \ rS rSr% S\S'   S\S'   S\S'   S\S'   S	\S
'   S\S'            S                     SU 4S jjjrSS jrSrU =r$ )r-   iT  r   r   r   r   r7   r~   is_mp#paddle.distributed.collective.Groupmodel_parallel_groupgather_outputc                  > [         TU ]  5          U	b   S5       eU
b   S5       eUR                  U l        UR                  U l        UR                  U l        SU l        UR                  U l        UR                  U l        UR                  U l	        [        UU R                  R
                  UUU R                  SU R                  R                  U R                     U R                  [        R                  R                  5       S:  a  SOS S9	U l        [        UUR#                  5       UUU R                  SS S9U l        Ub  U" 5       OS U l        Ub  U" 5       U l        g S U l        g )	NzHWhen quantizing ColumnParallelLinear, weight_quant_layer should be None.zEWhen quantizing ColumnParallelLinear, act_quant_layer should be None.r:   TrZ   r7   r   rK   r>   rL   r   r   rM   Fr7   r   rK   r>   rL   rM   )r?   r@   r   r   rB   r7   r  r  r  r  r   rG   r<   ri   rj   get_world_sizer   r   r   r   r   r   s              rQ   r@   &QuantizedColumnParallelLinear.__init__\  sf    		 ") 	
V	
) & 	
S	
& llJJ	KK	"#[[
$)$>$>!"00"6 !!#"++ ))$*A*AB..++::<q@d#
 "6$"#&++!"
  -8MOd 	 #3"> 	DH 	rS   c                z   U R                   (       a2  [        R                  R                  R	                  XR
                  S9nOUnU R                  b  U R                  U5      nU R                  U5      nU R                  nU R                  b  U R                  U R                  5      nU R                  U5      n[        R                  " X5U R                  U R                  S9nU R                  (       aD  U R                   (       a3  [        R                  R                  R!                  X`R
                  S9nU$ UnU$ )Ngroupr  )r  ri   rj   
collective_c_identityr  r   r   r   r   r   r   r  r   r7   r  	_c_concat)rJ   r]   input_parallelr   r   r   output_paralleloutputs           rQ   ry   %QuantizedColumnParallelLinear.forward  s   ::#//::FF66 G N #N+!11.AN,,^<"".,,T[[9F..v6((TYYTYY
 $**''22<<'@'@ = F
  %FrS   )r   r   r   r  r   r   r  r  r  r7   r   r   r   r   r   r}   r   r}   r   r   r   r!   r   r!   r   r   r   r   r   r   r   r   r   r   r   	r   r   r   r   r   r@   ry   r   r   r   s   @rQ   r-   r-   T  s    N
L
IK==
   +4/8)-&*#' $?
?
 ?
 	?

 ?
 )?
 #-?
 '?
 $?
 !?
 ?
 
?
 ?
B rS   r-   c                     ^  \ rS rSr% S\S'   S\S'   S\S'   S\S'   S	\S
'   S\S'            S                     SU 4S jjjrSS jrSrU =r$ )r,   i  r   r   r   r   r7   r~   r  r  r  r  c                  > [         TU ]  5         U	b   S5       eU
b   S5       eUR                  U l        UR                  U l        UR                  U l        SU l        UR                  U l        UR                  U l        UR                  U l	        [        UU R                  R
                  UUU R                  SU R                  R                  U R                     U R                  [        R                  R                  5       S:  a  SOS S9	U l        [        UUR#                  5       UUU R                  S[        R                  R                  5       S:  a  SOS S9U l        Ub  U" 5       OS U l        Ub  U" 5       U l        g S U l        g )	NzQWhen quantizing RowParallelLinear, weight_quant_layer cannot defined by yourself.zNWhen quantizing RowParallelLinear, act_quant_layer cannot defined by yourself.r:   TrZ   r  Fr  )r?   r@   r   r   rB   r7   r  input_is_parallelr  r  r   rG   r<   ri   rj   r  r   r   r   r   r   r   s              rQ   r@   #QuantizedRowParallelLinear.__init__  sw    	!) 	
_	
) & 	
\	
&
 llJJ	KK	"#!&!8!8[[
$)$>$>!"6 !!#"++ ))$*A*AB..++::<q@d#
 "6$"#&++!++::<q@d
"
  -8MOd 	 #3"> 	DH 	rS   c                   U R                   (       d  U R                  (       d  UnO1[        R                  R                  R                  XR                  S9nU R                  b  U R                  U5      nU R                  U5      nU R                  nU R                  b  U R                  U R                  5      nU R                  U5      n[        R                  " X5U R                  S9nU R                  (       a5  [        R                  R                  R                  UU R                  SSS9nOUnU R                   b  XpR                   -   nU$ UnU$ )Nr  )r  r   r7   T)r  use_calc_streamuse_model_parallel)r"  r  ri   rj   r  _c_splitr  r   r   r   r   r   r   r  r7   _mp_allreducer   )	rJ   r]   r  r   r   r   r  output_r  s	            rQ   ry   "QuantizedRowParallelLinear.forward  s-   !!$**"N $//::CC66 D N +!11.AN,,^<"".,,T[[9F..v6((TYY
 ::((33AA// $#'	 B G &G(,		(=99$ DKrS   )r   r   r   r  r   r   r"  r  r  r7   r   r   r  r   r  r   s   @rQ   r,   r,     s    N
L
IK==
   +4/8)-&*#' $?
?
 ?
 	?

 ?
 )?
 #-?
 '?
 $?
 !?
 ?
 
?
 ?
B rS   r,   c                     ^  \ rS rSrSr          S                     SU 4S jjjr   S           S	S jjrSrU =r$ )
r.   i&  z
The computational logic of QuantizedMatmul is the same with Matmul.
The only difference is that its inputs are all fake quantized.
c                   > [         TU ]  5         U
b  U
" 5       U l        U
" 5       U l        O"[	        UUUSS9U l        [	        UUUSS9U l        Ub  U" 5       OS U l        Ub  U" 5       U l        g S U l        g )NF)r   rK   rL   )r?   r@   _fake_quant_x_fake_quant_yr   _act_preprocess_x_act_preprocess_yr   s              rQ   r@   QuantizedMatmul.__init__,  s     	 &!0!2D!0!2D!5('* %	"D "6('* %	"D  -8MOd 	  -8MO 	>B 	rS   c                    U R                   b  U R                  U5      nU R                  U5      nU R                  b  U R                  U5      nU R                  U5      n[        R
                  " XgX4U5      nU$ r   )r/  r-  r0  r.  ri   matmul)	rJ   r  ytranspose_xtranspose_yr7   quant_xquant_yr   s	            rQ   ry   QuantizedMatmul.forwardT  ss     !!-&&q)A$$Q'!!-&&q)A$$Q'mmGkM
rS   )r/  r0  r-  r.  )
Nr{   r{   r   r   r   NNNN)r   r   r   r}   r   r}   r   r   r   r!   r   r!   r   r   r   r   r   r   r   r   r   r   )FFN)r  r   r4  r   r5  r~   r6  r~   r7   r|   r   r   r   r   s   @rQ   r.   r.   &  s     #  +4/8)-&*+/(,&
&
 &
 	&

 &
 )&
 #-&
 '&
 $&
 )&
 &&
 
&
 &
X "!  	
   
 rS   r.   c                  ^   ^  \ rS rSrSr     S           SU 4S jjjrSS jrSrU =r$ )	r)   ih  z
Add MovingAverageMaxScale layer to the behind of the input layer.
Calculate the scale (moving average abs max) for the output of the input layer.
c                v   > [         TU ]  5         Xl        Uc  UR                  5       n[	        X2XE5      U l        g)z
Construct
N)r?   r@   _layerr   r(   _ma_output_scale)rJ   r   r   r7   r>   rM   rP   s         rQ   r@   MAOutputScaleLayer.__init__n  s8     	<??$D 8u!
rS   c                    U R                   " U0 UD6n[        U[        [        [        45      (       a  U$ U R                  U5      $ r   )r<  
isinstancelisttupledictr=  rJ   rf   kwargsr   s       rQ   ry   MAOutputScaleLayer.forward  s?    kk6,V,cD%.//J((--rS   )r<  r=  )Nr   Nr^   N)r   r   r   r   r7   r|   r>   r   rM   r   r   r   rf   r   rE  r   r   r   r   r   s   @rQ   r)   r)   h  sg     # $-1

 
 	

 
 +
 

 
&. .rS   r)   c                  j   ^  \ rS rSrSr     S                 SU 4S jjjrSS jrSrU =r$ )	r*   i  zJ
Add FakeQuantMovingAverageAbsMax layer to the behind of the input layer.
c           
        > [         T	U ]  5         Xl        [        SUc  UR	                  5       OUUUU R
                  SUS9U l        g )Nr   Fr  )r?   r@   r<  r   r   rG   _fake_quant_output)
rJ   r   r   r   r   r7   rM   argsrE  rP   s
            rQ   r@   $FakeQuantMAOutputScaleLayer.__init__  sH     	"6$&*l"#&++!##
rS   c                    U R                   " U0 UD6n[        U[        [        45      (       a  [	        U5      S:  a  U$ U R                  U5      $ )Nr:   )r<  r@  rA  rB  lenrJ  rD  s       rQ   ry   #FakeQuantMAOutputScaleLayer.forward  sF    kk6,V,sT5M**C1J**3//rS   )rJ  r<  )r{   r{   r   NN)r   r   r   r}   r   r}   r   r   r7   r|   rM   r   rK  r   rE  r   r   r   rG  r   r   s   @rQ   r*   r*     s       -1

 
 	

 
 
 +
 
 
 

 
.0 0rS   r*   c                   UR                  SS 5      UR                  SS5      UR                  SS5      UR                  SS 5      S.nU S:X  a  UR                  S	S
5      US	'   GO5U S:X  a  UR                  SS5      US'   GOU S:X  aM  UR                  S	S
5      US	'   UR                  SS 5      US'   UR                  SS5      US'   US   c   S5       eOU S:X  a5  UR                  SS
5      US'   S
US'   SUS'   UR                  SS
5      US'   OU S:X  aT  Sn UR                  SS
5      US'   SUS'   UR                  SS 5      US'   UR                  SS
5      US'   US   c   S5       eO0U S:X  a*  UR                  SS
5      US'   UR                  SS5      US'   [        [        [        [        [
        S.nX0   " S0 UD6$ )Nr7   rK   r{   r>   r^   rM   )r7   rK   r>   rM   r   rL   Fr   r   r   r   r   r   r   zHYou need to input channel_numwhen you use channel_wise_abs_max strategy.r   all_positiveper_channelr:   r  r   Tr    	symmetric)r   r   r   r   r     )getr"   r#   r$   r   r   )
quant_typerE  	call_argsfake_quant_maps       rQ   r   r     s    

64(jjq1GY/zz-6	I Y'-zz2CU'K	#$	/	/#)::mS#A	- 	-	-'-zz2CU'K	#$#)::mT#B	- "(**\1"=	,'3 	
:	
3 
|	#$*JJ~u$E	.!#(	- #$	- $*JJ~u$E	.!	0	0!
$*JJ~u$E	.!#'	- #)::mT#B	- $*JJ~u$E	.!'3 	
:	
3 
y	 $*JJ~u$E	.!!'K!>	+""> :,&N %2	22rS   );
__future__r   loggingtypingr   r   r   ri   r   r   r	   paddle.base.data_feederr
   paddle.base.frameworkr   paddle.base.log_helperr   paddle.frameworkr   r   	paddle.nnr   r   paddle.nn.initializerr   paddle.nn.quant.lsqr   r   paddle.utilsr   layer.layersr   typing_extensionsr   r   r   paddle._typingr   r   r!   r   __all__r   INFO_loggerr"   r#   r$   r(   r+   r%   r&   r'   r-   r,   r.   r)   r*   r   rT  rS   rQ   <module>rj     s2   #  . .  9 9 < 0 - , % * K $  2/#	J	   gll H
de dNG5 GTo odru rj %	c
e c
Lz
u z
zNe NbdE dNh hV?e ?D. .D"0% "0J.3rS   