
    }-j                         d dl mZ d dlmZ d dlmZ ddlmZ erddlm	Z	 ddl
mZmZmZmZ dd	lmZmZ  e            rd d
lZ ej        e          ZdZdZ G d de          Zd
S )    )metadata)TYPE_CHECKING)version   )HfQuantizer   )PreTrainedModel)is_gptqmodel_availableis_optimum_availableis_torch_availablelogging)
GPTQConfigQuantizationConfigMixinNz1.4.3z1.24.0c                   ~     e Zd ZU dZdZded<   def fdZd Zdd
Z	d Z
ddZddZed	efd            Zd Z xZS )GptqHfQuantizerz
    Quantizer of the GPTQ method - for GPTQ the quantizer support calibration of the model through
    the GPT-QModel package (Python import name `gptqmodel`). Quantization is done under the hood for users if they
    load a non-prequantized model.
    Fr   quantization_configc                      t                      j        |fi | t                      st          d          ddlm} |                    | j                                                  | _	        d S )NGLoading a GPTQ quantized model requires optimum (`pip install optimum`)r   )GPTQQuantizer)
super__init__r   ImportErroroptimum.gptqr   	from_dictr   to_dict_optimumoptimum_quantizer)selfr   kwargsr   	__class__s       f/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/transformers/quantizers/quantizer_gptq.pyr   zGptqHfQuantizer.__init__1   s~    ,77777#%% 	ighhh......!.!8!89Q9a9a9c9c!d!d    c                 V   t                      st          d          t                      }|s-t          j                                        st          d          t                      st          d          t                      rt          j        t          j        d                    t          j        t                    k     sAt          j        t          j        d                    t          j        t                    k     r!t          dt           dt                     d S d S )Nr   z2GPU is required to quantize or run quantize model.zTLoading a GPTQ quantized model requires gptqmodel (`pip install gptqmodel`) library.	gptqmodeloptimumz#The gptqmodel version should be >= z, optimum version should >= )r   r   r
   torchcudais_availableRuntimeErrorr   parser   MIN_GPTQ_VERSIONMIN_OPTIMUM_VERSION)r   argsr   gptq_supports_cpus       r    validate_environmentz$GptqHfQuantizer.validate_environment:   s   #%% 	ighhh244  
	)@)@)B)B 
	STTT')) 	tuuu#%% 	M(*;77887=IY;Z;ZZZ}X-i8899GMJ]<^<^^^y6Fyydwyy  		 	^^r!   dtypetorch.dtypereturnc                 Z    |t           j        k    rt                              d           |S )NzLWe suggest you to set `dtype=torch.float16` for better efficiency with GPTQ.)r%   float16loggerinfo)r   r/   s     r    update_dtypezGptqHfQuantizer.update_dtypeK   s'    EM!!KKfgggr!   c                 6    |dt          j        d          i}|S )N cpu)r%   device)r   
device_maps     r    update_device_mapz!GptqHfQuantizer.update_device_mapP   s"    el5112Jr!   modelr	   c                 6   |j         j        dk    rt          d          | j        rrt	          j        t          j        d                    t	          j        t                    k     r| j        	                    |          }d S  | j        j	        |fi |}d S d S )N	input_idsz%We can only quantize pure text model.r$   )
r   main_input_namer(   pre_quantizedr   r)   r   r+   r   convert_modelr   r=   r   s      r    $_process_model_before_weight_loadingz4GptqHfQuantizer._process_model_before_weight_loadingU   s    ?*k99FGGG 	N}X-i8899GMJ]<^<^^^.<<UCC<.<UMMfMM	N 	Nr!   c                 :   | j         r| j                            |          }d S | j        j        |j        | j        _        | j                            || j        j                   t          j        | j        	                                          |j
        _        d S )N)rA   r   post_init_modelr   	tokenizername_or_pathquantize_modelr   r   to_dictconfigrC   s      r    #_process_model_after_weight_loadingz3GptqHfQuantizer._process_model_after_weight_loading`   s     	f*::5AAEEE'195:5G(2"11%9Q9[\\\/9/CDDZDbDbDdDd/e/eEL,,,r!   c                     dS NT r   s    r    is_trainablezGptqHfQuantizer.is_trainablej   s    tr!   c                     dS rN   rO   rP   s    r    is_serializablezGptqHfQuantizer.is_serializablen   s    tr!   )r/   r0   r1   r0   )r=   r	   )__name__
__module____qualname____doc__requires_calibration__annotations__r   r   r.   r6   r<   rD   rL   propertyboolrQ   rS   __classcell__)r   s   @r    r   r   '   s          !%%%%e,C e e e e e e  "   
  
	N 	N 	N 	Nf f f f d    X      r!   r   )	importlibr   typingr   	packagingr   baser   modeling_utilsr	   utilsr
   r   r   r   utils.quantization_configr   r   r%   
get_loggerrT   r4   r*   r+   r   rO   r!   r    <module>re      s!                                  1000000 ] ] ] ] ] ] ] ] ] ] ] ] K K K K K K K K  LLL		H	%	%   H H H H Hk H H H H Hr!   