
    }-j                         d dl mZ ddlmZ erddlmZ ddlmZ ddlm	Z	m
Z
mZmZmZmZmZmZmZ ddlmZ  e            r
d d	lZdd
lmZ  ej        e          Z G d de          Zd	S )    )TYPE_CHECKING   )HfQuantizer   )PreTrainedModel)BitsAndBytesConfig)	ACCELERATE_MIN_VERSIONBITSANDBYTES_MIN_VERSIONis_accelerate_availableis_bitsandbytes_availableis_torch_availableis_torch_hpu_availableis_torch_npu_availableis_torch_xpu_availablelogging)get_module_from_nameN)WeightConverterc                        e Zd ZU dZdZded<    fdZd Zdee	e
e	z  f         dee	e
e	z  f         fd	Zd
 Zddde	dddef fdZddde	defdZddZ	 	 ddZd Zedefd            ZddZd Zd Z xZS )Bnb8BitHfQuantizerzB
    8-bit quantization from bitsandbytes quantization method
    Fr   quantization_configc                 <     t                      j        |fi | d S N)super__init__)selfr   kwargs	__class__s      j/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/transformers/quantizers/quantizer_bnb_8bit.pyr   zBnb8BitHfQuantizer.__init__5   s)    ,7777777    c                    t                      st          dt           d          t                      st          dt           d          ddlm}  |d           |                    d	          }| j        j	        sTt          |t                    rAt          |                                          }|d
hk    rd
|v sd|v rt          d          d S d S d S d S )NzWUsing `bitsandbytes` 8-bit quantization requires accelerate: `pip install 'accelerate>=z'`z]Using `bitsandbytes` 8-bit quantization requires bitsandbytes: `pip install -U bitsandbytes>=`r   )!validate_bnb_backend_availabilityT)raise_exception
device_mapcpudiska  Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. )r   ImportErrorr	   r   r
   integrationsr"   getr    llm_int8_enable_fp32_cpu_offload
isinstancedictsetvalues
ValueError)r   argsr   r"   r$   r.   s         r   validate_environmentz'Bnb8BitHfQuantizer.validate_environment8   sL   &(( 	 E  kA  E  E  E   )** 	 L  qI  L  L  L   	EDDDDD))$????ZZ--
'H 
	ZXbdhMiMi 
	**,,--F%  evoo69I9I )  
	 
	 
	 
	  9I9Ir   
max_memoryreturnc                 B    d |                                 D             }|S )Nc                      i | ]\  }}||d z  S )g? ).0keyvals      r   
<dictcomp>z8Bnb8BitHfQuantizer.adjust_max_memory.<locals>.<dictcomp>U   s"    III(#sc3:IIIr   )items)r   r2   s     r   adjust_max_memoryz$Bnb8BitHfQuantizer.adjust_max_memoryS   s'    IIj6F6F6H6HIII
r   c                 H   |t           j                                        r!dt           j                                        i}nt	                      r9t          t           d          r$ddt           j                                         i}nzt                      r9t          t           d          r$ddt           j                                         i}n3t                      r!dt           j
                                        i}nddi}t                              d| d           |S )	N npuznpu:hpuzhpu:r%   z:The device_map was not initialized. Setting device_map to zL. If you want to use the model for inference, please set device_map ='auto' )torchcudais_availablecurrent_devicer   hasattrr?   r   r@   r   xpuloggerinfo)r   r$   s     r   update_device_mapz$Bnb8BitHfQuantizer.update_device_mapX   s&   z&&(( 	) %*";";"="=>

')) )geU.C.C ) "E)A)A)C)C"E"EF

')) )geU.C.C ) "E)A)A)C)C"E"EF

')) ) %)":":"<"<=

 %[
KK])3] ] ]  
 r   modelr   
param_nameparamztorch.Tensorc                 z    |                      ||          rdS t                                          |||          S )z4Return the element size (in bytes) for `param_name`.r   )param_needs_quantizationr   param_element_size)r   rJ   rK   rL   r   s       r   rO   z%Bnb8BitHfQuantizer.param_element_sizek   s<    ((
;; 	1ww))%UCCCr   c                 p    dd l }t          ||          \  }}t          ||j        j                  o|dk    S )Nr   bias)bitsandbytesr   r+   nnLinear8bitLt)r   rJ   rK   r   bnbmodulenames          r   rN   z+Bnb8BitHfQuantizer.param_needs_quantizationr   s?    """"+E:>>&#&"566I46>Ir   c                 Z    t          |dd           |                                 |_        |S )Nis_loaded_in_8bitT)setattris_serializableis_8bit_serializable)r   rJ   r   s      r   #_process_model_after_weight_loadingz6Bnb8BitHfQuantizer._process_model_after_weight_loadingx   s.    *D111%)%9%9%;%;"r   c                 V   ddl m} |                     || j        j        |j                  | _        | j        j        rMt          |t                    r8d |
                                D             }| j                            |            ||| j        | j        | j                  }d S )Nr   )replace_with_bnb_linearc                      g | ]\  }}|d v 	|S ))r&   r%   r6   )r7   r8   values      r   
<listcomp>zKBnb8BitHfQuantizer._process_model_before_weight_loading.<locals>.<listcomp>   s'    dddzsE5TcKcKcsKcKcKcr   )modules_to_not_convertr   pre_quantized)r(   r_   get_modules_to_not_convertr   llm_int8_skip_modules_keep_in_fp32_modulesrc   r*   r+   r,   r;   extendrd   )r   rJ   r$   r   r_   keys_on_cpus         r   $_process_model_before_weight_loadingz7Bnb8BitHfQuantizer._process_model_before_weight_loading}   s     	;:::::&*&E&E4+A5C^'
 '
# #D 	@*d++ @ddZ5E5E5G5Gddd+22;???''#'#> $ 8,	
 
 
r   c                     dS NTr6   r   s    r   r[   z"Bnb8BitHfQuantizer.is_serializable   s    tr   c                     dS rl   r6   rm   s    r   is_trainablezBnb8BitHfQuantizer.is_trainable   s    tr   Nc                 8    ddl m}  ||| j        |          }|S )Nr   )dequantize_and_replace)r   dtype)r(   rq   r   )r   rJ   rr   rq   s       r   _dequantizezBnb8BitHfQuantizer._dequantize   s4    999999&&u$BZbghhhr   c                 $    ddl m}  ||           S )Nr   )Bnb8bitQuantize)integrations.bitsandbytesru   )r   ru   s     r   get_quantize_opsz#Bnb8BitHfQuantizer.get_quantize_ops   s$    ??????t$$$r   c                 ^    ddl m} | j        rt          g dd ||           g          gS g S )Nr   )Bnb8bitDeserialize)SCBweight_formatweightr|   )source_patternstarget_patterns
operations)rv   ry   rd   r   )r   ry   s     r   get_weight_conversionsz)Bnb8BitHfQuantizer.get_weight_conversions   s_    BBBBBB 	$F$F$F$, 2 24 8 89    	r   )rJ   r   r   )__name__
__module____qualname____doc__requires_calibration__annotations__r   r1   r,   strintr<   rI   floatrO   boolrN   r]   rj   r[   propertyro   rs   rw   r   __classcell__)r   s   @r   r   r   -   s          !----8 8 8 8 8  6DcCi,@ T#sUXy.EY    
  &D(9 Ds DSa Dfk D D D D D DJ.? JS J_c J J J J   

 
 
 
 
0   d    X   % % %
      r   r   )typingr   baser   modeling_utilsr   utils.quantization_configr   utilsr	   r
   r   r   r   r   r   r   r   quantizers_utilsr   rA   core_model_loadingr   
get_loggerr   rG   r   r6   r   r   <module>r      sY   !                  ?000000>>>>>>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 3 2 2 2 2 2  5LLL444444		H	%	%E E E E E E E E E Er   