
    }-j                         d dl mZ ddlmZ erddlmZ ddlmZ ddlm	Z	m
Z
mZ  e
            rd dlZ ej        e          Z G d	 d
e          ZdS )    )TYPE_CHECKING   )HfQuantizer   )PreTrainedModel)BitNetQuantConfig)is_accelerate_availableis_torch_availableloggingNc                        e Zd ZU dZdZded<    fdZd Z	 	 dd	Zd
e	e
ee
z  f         de	e
ee
z  f         fdZd Zedefd            Zedefd            Zd Z xZS )BitNetHfQuantizerz
    1.58-bit quantization from BitNet quantization method:
    Before loading: it converts the linear layers into BitLinear layers during loading.

    Check out the paper introducing this method: https://huggingface.co/papers/2402.17764
    Tr   quantization_configc                 <     t                      j        |fi | d S )N)super__init__)selfr   kwargs	__class__s      h/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/transformers/quantizers/quantizer_bitnet.pyr   zBitNetHfQuantizer.__init__,   s)    ,7777777    c                    t                      st          d          t          j                                        st
                              d           d S |                    d          }|t
                              d           d S t          |t                    rNt          |          dk    rd|                                v sd|                                v rt          d          d S d S )	NzOLoading a BitNet quantized model requires accelerate (`pip install accelerate`)zhYou don't have a GPU available to load the model, the inference will be slow because of weight unpacking
device_mapzYou have loaded a BitNet model on CPU and have a CUDA device available, make sure to set your model on a GPU device in order to run your model.r   cpudiskzYou are attempting to load a BitNet model with a device_map that contains a CPU or disk device.This is not supported. Please remove the CPU or disk device from the device_map.)r	   ImportErrortorchcudais_availableloggerwarning_onceget
isinstancedictlenvalues
ValueError)r   argsr   r   s       r   validate_environmentz&BitNetHfQuantizer.validate_environment/   s   &(( 	qopppz&&(( 	z   FZZ--
I     
D)) 	:""u
0A0A0C0C'C'CvQ[QbQbQdQdGdGd g  	 	GdGdr   modelr   c                     ddl m} |                     || j        j        |j                  | _         ||| j        | j                  }d S )Nr   )replace_with_bitnet_linear)modules_to_not_convertr   )integrationsr+   get_modules_to_not_convertr   r,   _keep_in_fp32_modules)r   r)   r   r+   s       r   $_process_model_before_weight_loadingz6BitNetHfQuantizer._process_model_before_weight_loadingF   sj    
 	>=====&*&E&E4+BED_'
 '
# +*#'#> $ 8
 
 
r   
max_memoryreturnc                 B    d |                                 D             }|S )Nc                      i | ]\  }}||d z  S )g? ).0keyvals      r   
<dictcomp>z7BitNetHfQuantizer.adjust_max_memory.<locals>.<dictcomp>X   s"    III(#sc3:IIIr   )items)r   r1   s     r   adjust_max_memoryz#BitNetHfQuantizer.adjust_max_memoryW   s'    IIj6F6F6H6HIII
r   c                     dS )NTr5   r   s    r   is_serializablez!BitNetHfQuantizer.is_serializable[   s    tr   c                 B    | j         j        dk    o| j         j        dk    S )Nautobitlinearonliner   linear_classquantization_moder=   s    r   is_trainablezBitNetHfQuantizer.is_trainable^   s+     $1_D G(:hF	
r   c                 B    | j         j        dk    o| j         j        dk    S )zUFlag indicating whether the quantized model can carry out quantization aware trainingr@   rA   rB   r=   s    r   is_qat_trainablez"BitNetHfQuantizer.is_qat_trainablee   s+     $1_D G(:hF	
r   c                     ddl m} ddlm} | j        j        dk    r+| j        j        dk    r |dgdg ||           g          gS g S )Nr   )WeightConverter)BitNetDeserializer@   offlineweight)source_patternstarget_patterns
operations)core_model_loadingrI   integrations.bitnetrJ   r   rC   rD   )r   rI   rJ   s      r   get_weight_conversionsz(BitNetHfQuantizer.get_weight_conversionsm   s    888888;;;;;; $1_DD(:iGG  %-J%-J 1 1$ 7 78    	r   )r)   r   )__name__
__module____qualname____doc__requires_calibration__annotations__r   r(   r0   r#   strintr;   r>   propertyboolrE   rG   rR   __classcell__)r   s   @r   r   r   !   s,           ,,,,8 8 8 8 8  .
 
 
 
 
"DcCi,@ T#sUXy.EY       
d 
 
 
 X
 
$ 
 
 
 X
      r   r   )typingr   baser   modeling_utilsr   utils.quantization_configr   utilsr	   r
   r   r   
get_loggerrS   r   r   r5   r   r   <module>rd      s    !                  >000000====== H H H H H H H H H H  LLL 
	H	%	%[ [ [ [ [ [ [ [ [ [r   