
    {-j=                       U d dl mZ d dlZd dlmZmZmZ d dlZd dlmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZ ddlmZ er#d dl m!Z!m"Z" d dlm#Z# d dl$m%Z%m&Z& ed         Z'de(d<   g dZ) ee*ej+        d          Z, G d de          Z- G d de          Z. G d de          Z/ G d de          Z0e0Z1 G d  d!e          Z2 G d" d#e          Z3 G d$ d%e          Z4 G d& d'e          Z5 G d( d)e          Z6 G d* d+e          Z7 G d, d-e          Z8 G d. d/e          Z9d0 Z:dS )1    )annotationsN)TYPE_CHECKINGAnyLiteral)_C_ops_legacy_C_opsin_dynamic_mode)check_variable_and_dtype)_create_tensor)
get_logger)	ParamAttrcore)
functional)Constant)FakeQuantActLSQPlusFakeQuantWeightLSQPlus)unique_name   )Layer)Never	TypeAlias)Tensor)	DTypeLikeSize2)abs_maxmoving_average_abs_maxchannel_wise_abs_max
lsq_weightchannel_wise_lsq_weightlsq_actr   
_QuantType)FakeQuantAbsMaxFakeQuantMovingAverageAbsMaxFakeQuantChannelWiseAbsMaxQuantizedConv2DQuantizedConv2DTransposeQuantizedLinearMovingAverageAbsMaxScaleMAOutputScaleLayerFakeQuantMAOutputScaleLayer	QuantStubQuantizedRowParallelLinearQuantizedColumnParallelLinearQuantizedMatmulz&%(asctime)s-%(levelname)s: %(message)s)fmtc                  8     e Zd ZdZ	 	 	 	 	 dd fdZddZ xZS )r"   a  
    FakeQuantAbsMax layer does the abs_max quant and then dequant.
    Its computational formula is described as below:

    :math:`scale = max(abs(X))`
    :math:`range = 2^{bit\_length - 1} - 1`
    :math:`Out = round(X / scale * range) * scale / range`
    N   float32Fname
str | None
quant_bitsintdtyper   quant_on_weightboolreduce_typeLiteral['max'] | NonereturnNonec                r   t                                                       || _        || _        || _        |r| dnd}t          j        |          | _        |rUt          | j        t          d          d          }| 
                    dg|| j                  | _        d| j        _        d S d | _        d S )	N.scalequant_dequant.scaleMbP?Fr3   initializer	trainable   shapeattrr7   T)super__init___quant_bits_name_reduce_typer   generate_scale_namer   r   create_parameter_dtype_scalestop_gradient)	selfr3   r5   r7   r8   r:   scale_prefix
scale_attr	__class__s	           \/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/paddle/nn/quant/quant_layers.pyrJ   zFakeQuantAbsMax.__init__O   s     	%
'*.I$4I&/== 	"%$UOO  J
 //c
$+ 0  DK )-DK%%%DKKK    inputr   c                   t                      rd| j        f}t          |j        |j         d|j        |j        d          }| j        }| j        dk    r5t          j
                            |t          j
        j        j                   |s:t          t          j        j        j        | j        dg| j        d          }d|_        t+          j        || j        d          \  }}t+          j        ||           t+          j        ||           |S t1          |d	d
gd           d| j        i}d|gi}| j                            |j         d|j        t          j        j        j        dd          }| j        }|s=| j                            | j        | j        t          j        j        j        dd          }|g|gd}| j                            d|||           |S )N
bit_length.quantized.dequantizedFtyper3   rG   r7   persistablemaxoprE   TrZ   r2   r"   Xr3   r7   r_   r`   rS   OutOutScale fake_quantize_dequantize_abs_maxr_   inputsoutputsattrs)r	   rK   r   r_   r3   rG   r7   rR   rM   paddledistributed
all_reduceReduceOpMAXr   VarDescVarTypeDENSE_TENSORrO   rQ   rS   r   ri   assign_out_r
   _helpercreate_variable	append_op)	rT   rZ   rm   	quant_out	out_scaleout1out2rk   rl   s	            rX   forwardzFakeQuantAbsMax.forwardj   s/     	!4#34E&Z
:::kk!  I I E))"--&"4"="A .     /*-:)#+ %  	 +/	' 7t'  tY///tY/// )>OPPPt/0wL00J666+%2 1 
 
	 K	 	44%k\)6!" 5  I %+I;??3	 	 	
 	
 	
 rY   )Nr1   r2   FN)r3   r4   r5   r6   r7   r   r8   r9   r:   r;   r<   r=   rZ   r   r<   r   __name__
__module____qualname____doc__rJ   r~   __classcell__rW   s   @rX   r"   r"   E   sq           $ %-1      6? ? ? ? ? ? ? ?rY   r"   c                  8     e Zd ZdZ	 	 	 	 	 dd fdZddZ xZS )r#   aM  
    FakeQuantMovingAverageAbsMax layer does the moving_average_abs_max quant and then dequant.
    Its computational formula is described as below:

    :math:`scale = (moving\_rate*accum+max(abs(x)))/(moving\_rate*state+1)`
    :math:`range = 2^{bit\_length - 1} - 1`
    :math:`Out = round(X / scale * range) * scale / range`
    N?r1   r2   r3   r4   moving_ratefloatr5   r6   r7   r   r:   r;   r<   r=   c                   t                                                       || _        || _        || _        |r| dnd}t          t          j        |          t          d          d          }| 	                    dg||          | _
        d| j
        _        |r| d	nd
}t          t          j        |          t          d          d          }	| 	                    dg|	|          | _        d| j        _        |r| dnd}
t          t          j        |
          t          d          d          }| 	                    dg||          | _        d| j        _        d S )Nr?   r@   rA   FrB   rE   rF   T.statezquant_dequant.state.accumzquant_dequant.accum)rI   rJ   _moving_raterK   rM   r   r   rN   r   rP   rR   rS   _state_accum)rT   r3   r   r5   r7   r:   rU   rV   state_prefix
state_attraccum_prefix
accum_attrrW   s               rX   rJ   z%FakeQuantMovingAverageAbsMax.__init__   s    	'%'*.I$4I%l33 
 
 


 ++#Je , 
 
 %)!*.I$4I%l33 
 
 


 ++#Je , 
 
 %)!*.I$4I%l33 
 
 


 ++#Je , 
 
 %)!!!rY   rZ   r   c           
     h   t                      rPd| j        d| j        d| j         f}t	          |j        |j         d|j        |j        d          }| j	        dk    r:t          j                            | j        t          j        j        j                   | j        r| j        nd }| j        r| j        nd }t%          j        || j        ||| j        | j        | j         d	          \  }}}}	t%          j        ||           |                                rt%          j        || j                   |rt%          j        ||           |rt%          j        |	|           |S t-          |d
dgd           | j        | j        | j         d}|g| j        gd}
| j                            |j         d|j        t2          j        j        j        dd          }|g| j        gd}| j        r,| j        g|
d<   | j        g|
d<   | j        g|d<   | j        g|d<   | j                            d|
||           |S )Nr   r\   is_testr]   Fr^   ra   rb   rE   rZ   r2   r#   )r   r\   r   )rd   InScalere   rf   InStateInAccumOutStateOutAccum/fake_quantize_dequantize_moving_average_abs_maxrj   )r	   r   rK   trainingr   r_   r3   rG   r7   rM   rn   ro   rp   rR   rq   rr   r   r   r   r   rv   _is_initializedr
   rw   rx   r   rs   rt   ru   ry   )rT   rZ   rm   rz   stateaccumr|   r}   out3out4rk   rl   s               rX   r~   z$FakeQuantMovingAverageAbsMax.forward   s    .	! M!E 'Z
:::kk!  I  E))"--KF$6$?$C .    $(=:DKKdE#'=:DKKdE F! M!	 	 tY///##%% 6"4555 0"4/// 0"4/// 7YK)G	
 	
 	
  ,*=(
 

 wDK=99L00J666+%2 1 
 
	 %+DK=AA= 	0!%F9!%F9#';-GJ#';-GJB	 	 	
 	
 	
 rY   )Nr   r1   r2   N)r3   r4   r   r   r5   r6   r7   r   r:   r;   r<   r=   r   r   r   s   @rX   r#   r#      sz            $-1+) +) +) +) +) +) +)ZP P P P P P P PrY   r#   c                  8     e Zd Z	 	 	 	 	 	 	 dd fdZddZ xZS )r$   Nr1   r   r2   Fr3   r4   channel_num
int | Noner5   r6   
quant_axisr7   r   r8   r9   r:   r;   r<   r=   c                   |s
J d            t                                                       || _        || _        || _        || _        || _        || _        |r| dnd}t          j	        |          | _
        |rZt          | j
        t          d          d          }	|                     | j        g|	| j                  | _        d| j        _        d S d | _        d S )	Nz5Channel_wise only can be used on weight quantization.r?   r@   g        FrB   rF   T)rI   rJ   rK   _quant_axisrQ   rL   _channel_numrM   r   rN   rO   r   r   rP   rR   rS   )rT   r3   r   r5   r   r7   r8   r:   rU   rV   rW   s             rX   rJ   z#FakeQuantChannelWiseAbsMax.__init__7  s     	
 	
C	
 	
 	%%
''*.I$4I&/== 	"%$SMM  J
 //()
$+ 0  DK )-DK%%%DKKKrY   rZ   r   c                   t                      rd| j        d| j        f}t          |j        |j         d|j        |j        d          }| j        }| j	        dk    r5t          j                            |t          j        j        j                   |?t          t          j        j        j        | j        | j        g| j        d          }d|_        t/          j        || j        d	| j                  \  }}t/          j        ||           t/          j        ||           |S t5          |d
dgd           | j        d	| j        d}d|gi}| j                            |j         d|j        t          j        j        j        dd          }| j        }|s=| j                            | j        | j        t          j        j        j        dd          }|g|gd}| j                            d|||           |S )Nr\   r   r]   Fr^   ra   rb   TrE   rZ   r2   r$   )r\   
round_typer   rd   re   rf   -fake_channel_wise_quantize_dequantize_abs_maxrj   )r	   rK   r   r   r_   r3   rG   r7   rR   rM   rn   ro   rp   rq   rr   r   rs   rt   ru   rO   r   rQ   rS   r   r   rv   r
   rw   rx   ry   )	rT   rZ   rm   rz   r{   outscalerk   rl   s	            rX   r~   z"FakeQuantChannelWiseAbsMax.forwardZ  s]    &	  	E 'Z
:::kk!  I I E))"--&"4"="A .     *-:),-+ %  	 +/	'
 Dt'D,<  sI...ui000 7YK)E	
 	
 	
 **
 

 wL00J666+%2 1 
 
	 K	 	44%k\)6!" 5  I %+I;??@	 	 	
 	
 	
 rY   )NNr1   r   r2   FN)r3   r4   r   r   r5   r6   r   r6   r7   r   r8   r9   r:   r;   r<   r=   r   r   r   r   rJ   r~   r   r   s   @rX   r$   r$   6  sv          "&$ %-1! ! ! ! ! ! !FK K K K K K K KrY   r$   c                  2     e Zd Z	 	 	 	 dd fdZddZ xZS )r(   Nr   r2   r3   r4   r   r   r7   r   r:   r;   r<   r=   c                   t                                                       || _        || _        |r| dnd}t	          j        |          }t          |t          d          d          }|                     dg||          | _	        d| j	        _
        |r| d	nd
}t          t	          j        |          t          d          d          }	|                     dg|	|          | _        d| j        _
        |r| dnd}
t          t	          j        |
          t          d          d          }|                     dg||          | _        d| j        _
        dS )a  
        MovingAverageMaxScale layer is used to calculating the output quantization
        scale of Layer. Its computational formula is described as below:

        :math:`scale = (moving\_rate*accum+max(abs(x)))/(moving\_rate*state+1)`
        :math:`Out = X`
        r?   zoutscale.scaler   FrB   rE   rF   Tr   zoutscale.stater   zoutscale.accumN)rI   rJ   r   rM   r   rN   r   r   rP   rR   rS   r   r   )rT   r3   r   r7   r:   rU   
scale_namerV   r   r   r   r   rW   s               rX   rJ   z!MovingAverageAbsMaxScale.__init__  s    	''*.D$4D ),77
!
 
 

 ++#Je , 
 
 %)!*.D$4D%l33 
 
 


 ++#Je , 
 
 %)!*.D$4D%l33 
 
 


 ++#Je , 
 
 %)!!!rY   rZ   r   c           	     8   t                      rd| j        d| j         f}t          |j        |j         d|j        |j        d          }| j        dk    r:t          j
                            | j        t          j
        j        j                   | j        r| j        nd }| j        r| j        nd }t#          j        ||||| j        ||g|R  \  }}}}|S t'          |dd	d
gd           | j        | j         d}d|gi}| j                            |j         d|j        t,          j        j        j        dd          }|g| j        gd}	| j        r,| j        g|d<   | j        g|d<   | j        g|	d<   | j        g|	d<   | j                            d||	|           |S )Nr   r   z.tmpFr^   ra   rb   rZ   r2   float64r(   )r   r   rd   re   rf   r   r   r   r   moving_average_abs_max_scalerj   )r	   r   r   r   r_   r3   rG   r7   rM   rn   ro   rp   rR   rq   rr   r   r   r   r   r
   rw   rx   r   rs   rt   ru   ry   )
rT   rZ   rm   rz   r   r   r   _rk   rl   s
             rX   r~   z MovingAverageAbsMaxScale.forward  s    !	!M!	E 'Z
(((kk!  I  E))"--KF$6$?$C .    $(=:DKKdE#'=:DKKdE(E	 	 	 	LCAq J 7Y	24N	
 	
 	
 !% 1$->OPPwL00J$$$+%2 1 
 
	 %+DK=AA= 	0!%F9!%F9#';-GJ#';-GJ/	 	 	
 	
 	
 rY   )Nr   r2   N)
r3   r4   r   r   r7   r   r:   r;   r<   r=   r   r   r   s   @rX   r(   r(     sm           $-1/) /) /) /) /) /) /)b@ @ @ @ @ @ @ @rY   r(   c                  V     e Zd ZU dZded<   ded<   	 	 	 	 	 	 	 	 	 dd fdZddZ xZS ) r%   z
    The computational logic of QuantizedConv2D is the same with Conv2D.
    The only difference is that its inputs are all fake quantized.
    r   weightbiasr1   r   r   Nlayerr   weight_bitsr6   activation_bitsr   r   weight_quantize_typer!   activation_quantize_typeweight_pre_layerLayer | Noneact_pre_layerweight_quant_layeract_quant_layerr<   r=   c           
        t                                                       |j        | _        |j        | _        |j        | _        |j        | _        | j        dk    r|j        | _        |j        | _        |j        | _        |j	        | _	        |j
        | _
        d| _        |	 |	            | _        nEt          || j	        j        ||| j        d| j	        j        | j                 | j                  | _        |
 |
            | _        n1t          ||                                ||| j        d          | _        |
 |            nd | _        |
 |            nd | _        d S )Nzerosr   Tr3   r   r5   r7   r8   r   r   Fr3   r   r5   r7   r8   )rI   rJ   _groups_stride_padding_padding_mode _reversed_padding_repeated_twice	_dilation_data_formatr   r   _conv2d_quant_axis_fake_quant_weight_get_fake_quant_typer3   rQ   rG   _fake_quant_input	full_name_act_preprocess_weight_preprocessrT   r   r   r   r   r   r   r   r   r   r   rW   s              rX   rJ   zQuantizedConv2D.__init__)  s    	}}"0((6 1 !.lJ	 #$)&8&8&:&:D##&:$[%'&k $ K-d.EF2	' 	' 	'D# &%4_%6%6D""%9(__&&'*k %& & &D"  -8MMOOOd 	 #3">D 	rY   rZ   c           
        | j         |                      |          }|                     |          }| j        }| j        |                     | j                  }|                     |          }| j        dk    r.t          j        || j        | j        | j	                  }d| _
        t          j        ||| j        | j
        | j        | j        | j        | j	                  S )Nr   )modedata_formatr   )r   paddingstridedilationgroupsr   )r   r   r   r   r   r   Fpadr   r   r   conv2dr   r   r   r   )rT   rZ   quant_inputr   quant_weights        rX   r~   zQuantizedConv2D.forwardg  s    +((//E,,U33".,,T[99F..v66((%5' -	  K DMxM<^<)	
 	
 	
 		
rY   	r1   r1   r   r   r   NNNNr   r   r   r6   r   r6   r   r   r   r!   r   r!   r   r   r   r   r   r   r   r   r<   r=   r   r   r   r   r   __annotations__rJ   r~   r   r   s   @rX   r%   r%      s          
 NNNLLL
   +4/8)-&*+/(,<
 <
 <
 <
 <
 <
 <
|
 
 
 
 
 
 
 
rY   r%   c                  Z     e Zd ZU dZded<   ded<   	 	 	 	 	 	 	 	 	 dd  fdZ	 d!d"dZ xZS )#r&   aX  

    The computational logic of QuantizedConv2DTranspose is the same with Conv2DTranspose.
    The only difference is that its inputs are all fake quantized.

    Examples:
        .. code-block:: pycon

            >>> import paddle
            >>> import paddle.nn as nn
            >>> from paddle.nn.quant.quant_layers import (
            ...     QuantizedConv2DTranspose,
            ... )

            >>> x_var = paddle.uniform((2, 4, 8, 8), dtype='float32', min=-1.0, max=1.0)
            >>> conv = nn.Conv2DTranspose(4, 6, (3, 3))
            >>> conv_quantized = QuantizedConv2DTranspose(conv)
            >>> y_quantized = conv_quantized(x_var)
            >>> y_var = conv(x_var)
            >>> print(y_var.shape)
            paddle.Size([2, 6, 10, 10])
            >>> print(y_quantized.shape)
            paddle.Size([2, 6, 10, 10])

    r   r   r   r1   r   r   Nr   r   r   r6   r   r   r   r   r!   r   r   r   r   r   r   r<   r=   c           
        t                                                       |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |j	        | _	        |j
        | _
        d| _        |	 |	            | _        nEt          || j	        j        ||| j        d| j	        j        | j                 | j                  | _        |
 |
            | _        n1t          ||                                ||| j        d          | _        |
 |            nd| _        |
 |            nd| _        dS )z[
        Constructor.

        The arguments are the same as ImperativeQuantAware.
        rE   NTr   Fr   )rI   rJ   r   r   r   output_padding_output_paddingr   r   r   r   _conv2d_transpose_quant_axisr   r   r3   rQ   rG   r   r   r   r   r   s              rX   rJ   z!QuantizedConv2DTranspose.__init__  sg   $ 	}}$3!.lJ	,-))&8&8&:&:D##&:$[%'&k $ K-5  <' ' 'D# &%4_%6%6D""%9(__&&'*k %& & &D"  -8MMOOOd 	 #3">D 	rY   rZ   output_sizeSize2 | Nonec                n   | j         |                      |          }|                     |          }| j        }| j        |                     | j                  }|                     |          }|| j        }nd}t          j        ||| j        | j	        || j
        | j        | j        || j        
  
        S )Nr   )r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   conv2d_transposer   r   r   r   r   r   )rT   rZ   r   r   r   r   r   s          rX   r~   z QuantizedConv2DTranspose.forward  s     +((//E,,U33".,,T[99F..v66!1NNN!M)<^<#)
 
 
 	
rY   r   r   N)rZ   r   r   r   r<   r   r   r   s   @rX   r&   r&     s          4 NNNLLL
   +4/8)-&*+/(,>
 >
 >
 >
 >
 >
 >
B :>
 
 
 
 
 
 
 
 
rY   r&   c                  `     e Zd ZU dZded<   ded<   ded<   	 	 	 	 	 	 	 	 	 dd  fdZd!dZ xZS )"r'   z
    The computational logic of QuantizedLinear is the same with Linear.
    The only difference is that its inputs are all fake quantized.
    r   r   r   strr3   r1   r   r   Nr   r   r   r6   r   r   r   r   r!   r   r   r   r   r   r   r<   r=   c                    t                                                       |j        | _        |j        | _        |j        | _        d| _        |	 |	            | _        nFt          || j        j        ||| j        d| j        j	        | j                 | j        d	  	        | _        |
 |
            | _
        n1t          ||                                ||| j        d          | _
        |
 |            nd | _        |
 |            nd | _        d S )NrE   T)r3   r   r5   r7   r8   r   r   quant_linearFr   )rI   rJ   r   r   r3   _linear_quant_axisr   r   rQ   rG   r   r   r   r   r   s              rX   rJ   zQuantizedLinear.__init__  s4    	lJ	J	"#)&8&8&:&:D##&:$[%'&k $ K-d.EF2!
' 
' 
'D# &%4_%6%6D""%9(__&&'*k %& & &D"  -8MMOOOd 	 #3">D 	rY   rZ   c                &   | j         |                      |          }|                     |          }| j        }| j        |                     | j                  }|                     |          }t          j        ||| j        | j                  }|S )Nxr   r   r3   )	r   r   r   r   r   r   linearr   r3   )rT   rZ   r   r   r   r   s         rX   r~   zQuantizedLinear.forwardD  s    +((//E,,U33".,,T[99F..v66h,TYTY
 
 
 
rY   r   r   r   r   r   s   @rX   r'   r'     s          
 NNNLLLIII
   +4/8)-&*+/(,5
 5
 5
 5
 5
 5
 5
n       rY   r'   c                  z     e Zd ZU ded<   ded<   ded<   ded<   ded	<   ded
<   	 	 	 	 	 	 	 	 	 d#d$ fd Zd%d"Z xZS )&r-   r   r   r   r   r3   r9   is_mp#paddle.distributed.collective.Groupmodel_parallel_groupgather_outputr1   r   r   Nr   r   r   r6   r   r   r   r   r!   r   r   r   r   r   r=   r   r<   c                   t                                                       	 |	
J d            |

J d            |j        | _        |j        | _        |j        | _        d| _        |j        | _        |j        | _        |j	        | _	        t          || j        j        ||| j        d| j        j        | j                 | j        t          j                                        dk    rdnd 	  	        | _        t          ||                                ||| j        dd           | _        |
 |            nd | _        |
 |            nd | _        d S )	NzHWhen quantizing ColumnParallelLinear, weight_quant_layer should be None.zEWhen quantizing ColumnParallelLinear, act_quant_layer should be None.rE   Tra   r3   r   r5   r7   r8   r   r   r:   Fr3   r   r5   r7   r8   r:   )rI   rJ   r   r   rL   r3   r   r   r   r   r   rQ   rG   rn   ro   get_world_sizer   r   r   r   r   r   s              rX   rJ   z&QuantizedColumnParallelLinear.__init__\  sy    		 "))V *)) &&S '&& lJ	K	"#[
$)$>!"0"6 !#"+ )$*AB.+::<<q@@d#
 #
 #
 "6$""#&+!"
 "
 "
  -8MMOOOd 	 #3">D 	rY   rZ   c                   | j         r,t          j        j                            || j                  }n|}| j        |                     |          }|                     |          }| j        }| j	        | 	                    | j                  }| 
                    |          }t          j        ||| j        | j                  }| j        r3| j         r,t          j        j                            || j                  }n|}|S )Ngroupr   )r   rn   ro   
collective_c_identityr   r   r   r   r   r   r   r   r   r3   r   	_c_concat)rT   rZ   input_parallelr   r   r   output_paralleloutputs           rX   r~   z%QuantizedColumnParallelLinear.forward  s   : 	##/:FFT6 G  NN #N+!11.AAN,,^<<".,,T[99F..v66(,TYTY
 
 
  	%$* 	%'2<<t'@ =  FF %FrY   r   r   r   r   r6   r   r6   r   r   r   r!   r   r!   r   r   r   r   r   r=   r   r=   r<   r=   r   r   r   r   r   rJ   r~   r   r   s   @rX   r-   r-   T  s         NNNLLLIIIKKK====
   +4/8)-&*#' $?
 ?
 ?
 ?
 ?
 ?
 ?
B       rY   r-   c                  z     e Zd ZU ded<   ded<   ded<   ded<   ded	<   ded
<   	 	 	 	 	 	 	 	 	 d#d$ fd Zd%d"Z xZS )&r,   r   r   r   r   r3   r9   r   r   r   r   r1   r   r   Nr   r   r   r6   r   r   r   r   r!   r   r   r   r   r   r=   r   r<   c                   t                                                       |	
J d            |

J d            |j        | _        |j        | _        |j        | _        d| _        |j        | _        |j        | _        |j	        | _	        t          || j        j        ||| j        d| j        j        | j                 | j        t          j                                        dk    rdnd 	  	        | _        t          ||                                ||| j        dt          j                                        dk    rdnd           | _        |
 |            nd | _        |
 |            nd | _        d S )	NzQWhen quantizing RowParallelLinear, weight_quant_layer cannot defined by yourself.zNWhen quantizing RowParallelLinear, act_quant_layer cannot defined by yourself.rE   Tra   r   Fr  )rI   rJ   r   r   rL   r3   r   input_is_parallelr   r   r   rQ   rG   rn   ro   r  r   r   r   r   r   r   s              rX   rJ   z#QuantizedRowParallelLinear.__init__  s    	!))_ *)) &&\ '&&
 lJ	K	"#!&!8[
$)$>!"6 !#"+ )$*AB.+::<<q@@d#
 #
 #
 "6$""#&+!+::<<q@@d
"
 
"
 
"
  -8MMOOOd 	 #3">D 	rY   rZ   c                &   | j         s| j        s|}n+t          j        j                            || j                  }| j        |                     |          }|                     |          }| j	        }| j
        | 
                    | j	                  }|                     |          }t          j        ||| j                  }| j        r.t          j        j                            || j        dd          }n|}| j        
|| j        z   n|}|S )Nr  )r   r   r3   T)r  use_calc_streamuse_model_parallel)r  r   rn   ro   r  _c_splitr   r   r   r   r   r   r   r   r3   _mp_allreducer   )	rT   rZ   r	  r   r   r   r
  output_r  s	            rX   r~   z"QuantizedRowParallelLinear.forward  s2   ! 	$* 	"NN $/:CCT6 D  N +!11.AAN,,^<<".,,T[99F..v66(,TY
 
 
 : 	&(3AA/ $#'	 B  GG &G(,	(=49$$7rY   r   r  r   r  r   s   @rX   r,   r,     s         NNNLLLIIIKKK====
   +4/8)-&*#' $?
 ?
 ?
 ?
 ?
 ?
 ?
B       rY   r,   c                  J     e Zd ZdZ	 	 	 	 	 	 	 	 	 	 d!d" fdZ	 	 	 d#d$d Z xZS )%r.   z
    The computational logic of QuantizedMatmul is the same with Matmul.
    The only difference is that its inputs are all fake quantized.
    Nr1   r   r   r   r   r   r6   r   r   r   r   r!   r   r   r   r   r   r<   r=   c                6   t                                                       |
 |
            | _         |
            | _        n0t	          |||d          | _        t	          |||d          | _        |
 |            nd | _        |
 |            nd | _        d S )NF)r   r5   r8   )rI   rJ   _fake_quant_x_fake_quant_yr   _act_preprocess_x_act_preprocess_yr   s              rX   rJ   zQuantizedMatmul.__init__,  s     	 &!0!2!2D!0!2!2D!5('* %	" " "D "6('* %	" " "D  -8MMOOOd 	  -8MMOOOd 	rY   Fr   r   ytranspose_xr9   transpose_yr3   r4   c                    | j         |                      |          }|                     |          }| j        |                     |          }|                     |          }t	          j        |||||          }|S r   )r  r  r  r  rn   matmul)	rT   r   r  r  r  r3   quant_xquant_yr   s	            rX   r~   zQuantizedMatmul.forwardT  s~     !-&&q))A$$Q''!-&&q))A$$Q''mGWk;MM
rY   )
Nr1   r1   r   r   r   NNNN)r   r   r   r6   r   r6   r   r   r   r!   r   r!   r   r   r   r   r   r   r   r   r<   r=   )FFN)r   r   r  r   r  r9   r  r9   r3   r4   r<   r   r   r   s   @rX   r.   r.   &  s          #  +4/8)-&*+/(,&
 &
 &
 &
 &
 &
 &
X "!        rY   r.   c                  8     e Zd ZdZ	 	 	 	 	 dd fdZddZ xZS )r)   z
    Add MovingAverageMaxScale layer to the behind of the input layer.
    Calculate the scale (moving average abs max) for the output of the input layer.
    Nr   r2   r   r   r   r   r3   r4   r7   r   r:   r;   r<   r=   c                    t                                                       || _        ||                                }t	          ||||          | _        dS )z
        Construct
        N)rI   rJ   _layerr   r(   _ma_output_scale)rT   r   r   r3   r7   r:   rW   s         rX   rJ   zMAOutputScaleLayer.__init__n  sW     	<??$$D 8+uk!
 !
rY   rk   r   kwargsr   c                     | j         |i |}t          |t          t          t          f          r|S |                     |          S r   )r&  
isinstancelisttupledictr'  rT   rk   r(  r   s       rX   r~   zMAOutputScaleLayer.forward  sJ    dk6,V,,cD%.// 	.J((---rY   )Nr   Nr2   N)r   r   r   r   r3   r4   r7   r   r:   r;   r<   r=   rk   r   r(  r   r<   r   r   r   s   @rX   r)   r)   h  sq          # $-1
 
 
 
 
 
 
&. . . . . . . .rY   r)   c                  8     e Zd ZdZ	 	 	 	 	 dd fdZddZ xZS )r*   zR
    Add FakeQuantMovingAverageAbsMax layer to the behind of the input layer.
    r1   r   Nr   r   r   r6   r   r   r   r3   r4   r:   r;   argsr   r(  r<   r=   c           	         t                                                       || _        t          d||                                n|||| j        d|          | _        d S )Nr   Fr  )rI   rJ   r&  r   r   rQ   _fake_quant_output)
rT   r   r   r   r   r3   r:   r1  r(  rW   s
            rX   rJ   z$FakeQuantMAOutputScaleLayer.__init__  si     	"6$&*l"""#&+!##
 #
 #
rY   rk   r   r   c                     | j         |i |}t          |t          t          f          rt	          |          dk    r|S |                     |          S )NrE   )r&  r*  r+  r,  lenr3  r.  s       rX   r~   z#FakeQuantMAOutputScaleLayer.forward  sU    dk6,V,,sT5M** 	0C1J**3///rY   )r1   r1   r   NN)r   r   r   r6   r   r6   r   r   r3   r4   r:   r;   r1  r   r(  r   r<   r=   r/  r   r   s   @rX   r*   r*     sq            -1
 
 
 
 
 
 
.0 0 0 0 0 0 0 0rY   r*   c                &   |                     dd           |                     dd          |                     dd          |                     dd           d}| dk    r|                     d	d
          |d	<   nk| dk    r|                     dd          |d<   nJ| dk    r^|                     d	d
          |d	<   |                     dd           |d<   |                     dd          |d<   |d         
J d            n| dk    r=|                     dd
          |d<   d
|d<   d|d<   |                     dd
          |d<   n| dk    red} |                     dd
          |d<   d|d<   |                     dd           |d<   |                     dd
          |d<   |d         
J d            n8| dk    r2|                     dd
          |d<   |                     dd          |d<   t          t          t          t          t
          d} ||          di |S )Nr3   r5   r1   r7   r2   r:   )r3   r5   r7   r:   r   r8   Fr   r   r   r   r   r   r   zHYou need to input channel_numwhen you use channel_wise_abs_max strategy.r   all_positiveper_channelrE   r   r   Tr    	symmetric)r   r   r   r   r     )getr"   r#   r$   r   r   )
quant_typer(  	call_argsfake_quant_maps       rX   r   r     sg   

64((jjq11GY//zz-66	 I Y'-zz2CU'K'K	#$$	/	/	/#)::mS#A#A	-  	-	-	-'-zz2CU'K'K	#$#)::mT#B#B	- "(**\1"="=	,'33: 4333 
|	#	#$*JJ~u$E$E	.!#(	- #$	- $*JJ~u$E$E	.!!	0	0	0!
$*JJ~u$E$E	.!#'	- #)::mT#B#B	- $*JJ~u$E$E	.!'33: 4333 
y	 	 $*JJ~u$E$E	.!!'K!>!>	+""> :,& N &>*%22	222rY   );
__future__r   loggingtypingr   r   r   rn   r   r   r	   paddle.base.data_feederr
   paddle.base.frameworkr   paddle.base.log_helperr   paddle.frameworkr   r   	paddle.nnr   r   paddle.nn.initializerr   paddle.nn.quant.lsqr   r   paddle.utilsr   layer.layersr   typing_extensionsr   r   r   paddle._typingr   r   r!   r   __all__r   INFO_loggerr"   r#   r$   r(   r+   r%   r&   r'   r-   r,   r.   r)   r*   r   r:  rY   rX   <module>rP     s$   # " " " " " "  . . . . . . . . . .  9 9 9 9 9 9 9 9 9 9 < < < < < < 0 0 0 0 0 0 - - - - - - , , , , , , , , % % % % % % * * * * * * K K K K K K K K $ $ $ $ $ $             22222222////////#	J        *gl H  
d d d d de d d dNG G G G G5 G G GTo o o o o o o odr r r r ru r r rj %	c
 c
 c
 c
 c
e c
 c
 c
Lz
 z
 z
 z
 z
u z
 z
 z
zN N N N Ne N N Nbd d d d dE d d dNh h h h h h h hV? ? ? ? ?e ? ? ?D. . . . . . . .D"0 "0 "0 "0 "0% "0 "0 "0J.3 .3 .3 .3 .3rY   