
    }-j                         d dl Zd dlmZ d dlmZ ddlmZ erddlm	Z	 ddl
mZ dd	lmZmZmZmZ dd
l
mZ  e            rd dlZ ej        e          Z G d de          ZdS )    N)TYPE_CHECKING)version   )HfQuantizer   )PreTrainedModel)	AwqConfig)is_accelerate_availableis_gptqmodel_availableis_torch_availablelogging)
AwqBackendc                   h     e Zd ZU dZdZded<    fdZd Zd Zdd
Z	d Z
d Zed             Z xZS )AwqQuantizerzu
    4-bit quantization for Activation-aware Weight Quantization(AWQ) (https://huggingface.co/papers/2306.00978)
    Tr	   quantization_configc                 <     t                      j        |fi | d S )N)super__init__)selfr   kwargs	__class__s      e/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/transformers/quantizers/quantizer_awq.pyr   zAwqQuantizer.__init__-   s)    ,7777777    c                 z    t                      st          d          t                      st          d          d S )NzaLoading an AWQ quantized model requires gptqmodel. Please install it with `pip install gptqmodel`zMLoading an AWQ quantized model requires accelerate (`pip install accelerate`))r   ImportErrorr
   )r   r   s     r   validate_environmentz!AwqQuantizer.validate_environment0   sQ    %'' 	s   '(( 	omnnn	o 	or   c                    |t           j        k    rct           j                                        st           j                                        r't
                              d           t           j        }nf|t           j        k    rVt           j                                        st           j                                        rt
                              d           |S )Nz[`torch.bfloat16` is not supported for AWQ CUDA/XPU kernels yet. Casting to `torch.float16`.zWWe suggest you to set `dtype=torch.float16` for better efficiency on CUDA/XPU with AWQ.)torchbfloat16cudais_availablexpuloggerwarningfloat16)r   dtypes     r   update_dtypezAwqQuantizer.update_dtype9   s    EN""
(?(?(A(A"UYE[E[E]E]"NNm   MEEem##)@)@)B)B#eiF\F\F^F^#NNtuuur   modelr   c                     ddl m}m} |                     || j        j        |j        d          | _         ||| j        | j        |                    d                    } |||j        j	                  }d S )Nr   )replace_quantization_scalesreplace_with_awq_linearT)add_default_skips
device_map)r   modules_to_not_convertr-   )
integrationsr*   r+   get_modules_to_not_convertr   r.   _keep_in_fp32_modulesgetconfig
model_type)r   r(   r   r*   r+   s        r   $_process_model_before_weight_loadingz1AwqQuantizer._process_model_before_weight_loadingC   s    WWWWWWWW&*&E&E4+BED_sw 'F '
 '
# (' $ 8#'#>zz,//	
 
 
 ,+E5<3JKKr   c                 @    ddl m}  ||| j        j                   d S )Nr   )hf_gptqmodel_post_init)use_act_order)gptqmodel.utils.modelr7   r   desc_act)r   r(   r   r7   s       r   #_process_model_after_weight_loadingz0AwqQuantizer._process_model_after_weight_loadingS   s6    @@@@@@uD4L4UVVVVVVr   c                     | j         j        t          j        t          j        fv rt
                              d           dS dS )Nz7You cannot save an AWQ model that uses Exllama backend!FT)r   backendr   
EXLLAMA_V1
EXLLAMA_V2r#   r$   r   s    r   is_serializablezAwqQuantizer.is_serializableX   s;    #+
0EzG\/]]]NNTUUU5tr   c                     t          j        t          j                             d                    t          j        d          k    S )N	gptqmodelz5.0.0)r   parse	importlibmetadatar@   s    r   is_trainablezAwqQuantizer.is_trainable_   s3    }Y/77DDEEW^I_I___r   )r(   r   )__name__
__module____qualname____doc__requires_calibration__annotations__r   r   r'   r5   r;   rA   propertyrG   __classcell__)r   s   @r   r   r   $   s          
  $$$$8 8 8 8 8o o o  L L L L W W W
   ` ` X` ` ` ` `r   r   )importlib.metadatarE   typingr   	packagingr   baser   modeling_utilsr   utils.quantization_configr	   utilsr
   r   r   r   r   r   
get_loggerrH   r#   r    r   r   <module>rY      s                                6000000555555 ` ` ` ` ` ` ` ` ` ` ` ` 2 2 2 2 2 2  LLL		H	%	%=` =` =` =` =`; =` =` =` =` =`r   