
    }-jf                         d dl mZ ddlmZ ddlmZ erddlmZ ddlm	Z	 ddl
mZmZmZmZmZmZmZmZmZ  e            r
d d	lZdd
lmZ  ej        e          Z G d de          Zd	S )    )TYPE_CHECKING   )HfQuantizer)get_module_from_name   )PreTrainedModel)BitsAndBytesConfig)	ACCELERATE_MIN_VERSIONBITSANDBYTES_MIN_VERSIONis_accelerate_availableis_bitsandbytes_availableis_torch_availableis_torch_hpu_availableis_torch_npu_availableis_torch_xpu_availableloggingN)WeightConverterc                        e Zd ZU dZdZded<    fdZd Zddd	ed
dde	f fdZ
ddd	edefdZdeeeez  f         deeeez  f         fdZd Z	 	 ddZddZd Zedefd            ZddZd Zd Z xZS )Bnb4BitHfQuantizerzB
    4-bit quantization from bitsandbytes quantization method
    Fr	   quantization_configc                 <     t                      j        |fi | d S N)super__init__)selfr   kwargs	__class__s      j/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/transformers/quantizers/quantizer_bnb_4bit.pyr   zBnb4BitHfQuantizer.__init__5   s)    ,7777777    c                    t                      st          dt           d          t                      st          dt           d          ddlm}  |d           |                    d	          }| j        j	        sTt          |t                    rAt          |                                          }|d
hk    rd
|v sd|v rt          d          d S d S d S d S )NzWUsing `bitsandbytes` 4-bit quantization requires accelerate: `pip install 'accelerate>=z'`z]Using `bitsandbytes` 4-bit quantization requires bitsandbytes: `pip install -U bitsandbytes>=`r   )!validate_bnb_backend_availabilityT)raise_exception
device_mapcpudiska  Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. )r   ImportErrorr
   r   r   integrationsr"   getr    llm_int8_enable_fp32_cpu_offload
isinstancedictsetvalues
ValueError)r   argsr   r"   r$   r.   s         r   validate_environmentz'Bnb4BitHfQuantizer.validate_environment8   sL   &(( 	 E  kA  E  E  E   )** 	 L  qI  L  L  L   	EDDDDD))$????ZZ--
'H 
	ZXbdhMiMi 
	**,,--F%  evoo69I9I )  
	 
	 
	 
	  9I9Ir   modelr   
param_nameparamztorch.Tensorreturnc                 z    |                      ||          rdS t                                          |||          S )z4Return the element size (in bytes) for `param_name`.g      ?)param_needs_quantizationr   param_element_size)r   r2   r3   r4   r   s       r   r8   z%Bnb4BitHfQuantizer.param_element_sizeS   s<    ((
;; 	3ww))%UCCCr   c                 p    dd l }t          ||          \  }}t          ||j        j                  o|dk    S )Nr   bias)bitsandbytesr   r+   nn
Linear4bit)r   r2   r3   r   bnbmodulenames          r   r7   z+Bnb4BitHfQuantizer.param_needs_quantization[   s?    """"+E:>>&#&"344GGr   
max_memoryc                 B    d |                                 D             }|S )Nc                      i | ]\  }}||d z  S )g? ).0keyvals      r   
<dictcomp>z8Bnb4BitHfQuantizer.adjust_max_memory.<locals>.<dictcomp>c   s"    III(#sc3:IIIr   )items)r   rA   s     r   adjust_max_memoryz$Bnb4BitHfQuantizer.adjust_max_memorya   s'    IIj6F6F6H6HIII
r   c                 H   |t           j                                        r!dt           j                                        i}nt	                      r9t          t           d          r$ddt           j                                         i}nzt                      r9t          t           d          r$ddt           j                                         i}n3t                      r!dt           j
                                        i}nddi}t                              d| d           |S )	N npuznpu:hpuzhpu:r%   z:The device_map was not initialized. Setting device_map to zL. If you want to use the model for inference, please set device_map ='auto' )torchcudais_availablecurrent_devicer   hasattrrM   r   rN   r   xpuloggerinfo)r   r$   s     r   update_device_mapz$Bnb4BitHfQuantizer.update_device_mapf   s&   z&&(( 	) %*";";"="=>

')) )geU.C.C ) "E)A)A)C)C"E"EF

')) )geU.C.C ) "E)A)A)C)C"E"EF

')) ) %)":":"<"<=

 %[
KK])3] ] ]  
 r   c                 V   ddl m} |                     || j        j        |j                  | _        | j        j        rMt          |t                    r8d |
                                D             }| j                            |            ||| j        | j        | j                  }d S )Nr   )replace_with_bnb_linearc                      g | ]\  }}|d v 	|S ))r&   r%   rD   )rE   rF   values      r   
<listcomp>zKBnb4BitHfQuantizer._process_model_before_weight_loading.<locals>.<listcomp>   s'    dddzsE5TcKcKcsKcKcKcr   )modules_to_not_convertr   pre_quantized)r(   rY   get_modules_to_not_convertr   llm_int8_skip_modules_keep_in_fp32_modulesr]   r*   r+   r,   rI   extendr^   )r   r2   r$   r   rY   keys_on_cpus         r   $_process_model_before_weight_loadingz7Bnb4BitHfQuantizer._process_model_before_weight_loadingy   s     	;:::::&*&E&E4+A5C^'
 '
# #D 	@*d++ @ddZ5E5E5G5Gddd+22;???''#'#> $ 8,	
 
 
r   c                 n    t          |dd           t          |d|                                            |S )Nis_loaded_in_4bitTis_4bit_serializable)setattris_serializable)r   r2   r   s      r   #_process_model_after_weight_loadingz6Bnb4BitHfQuantizer._process_model_after_weight_loading   s9    *D111-t/C/C/E/EFFFr   c                     dS NTrD   r   s    r   ri   z"Bnb4BitHfQuantizer.is_serializable   s    tr   c                     dS rl   rD   rm   s    r   is_trainablezBnb4BitHfQuantizer.is_trainable   s    tr   Nc                 8    ddl m}  ||| j        |          }|S )Nr   )dequantize_and_replace)r   dtype)r(   rq   r   )r   r2   rr   rq   s       r   _dequantizezBnb4BitHfQuantizer._dequantize   s4    999999&&u$BZbghhhr   c                 $    ddl m}  ||           S )Nr   )Bnb4bitQuantize)integrations.bitsandbytesru   )r   ru   s     r   get_quantize_opsz#Bnb4BitHfQuantizer.get_quantize_ops   s$    ??????t$$$r   c                 ^    ddl m} | j        rt          g dd ||           g          gS g S )Nr   )Bnb4bitDeserialize)zweight.nested_absmaxzweight.nested_quant_mapzweight.quant_mapzweight.absmaxz$weight.quant_state.bitsandbytes__nf4z$weight.quant_state.bitsandbytes__fp4weightrz   )source_patternstarget_patterns
operations)rv   ry   r^   r   )r   ry   s     r   get_weight_conversionsz)Bnb4BitHfQuantizer.get_weight_conversions   sg    BBBBBB 	% % % %- 2 24 8 89    	r   )r2   r   r   )__name__
__module____qualname____doc__requires_calibration__annotations__r   r1   strfloatr8   boolr7   r,   intrJ   rW   rd   rj   ri   propertyro   rs   rw   r~   __classcell__)r   s   @r   r   r   -   s          !----8 8 8 8 8  6D(9 Ds DSa Dfk D D D D D DH.? HS H_c H H H HDcCi,@ T#sUXy.EY    
  &
 
 
 
 
0   
   d    X   % % %
      r   r   )typingr   baser   quantizers_utilsr   modeling_utilsr   utils.quantization_configr	   utilsr
   r   r   r   r   r   r   r   r   rO   core_model_loadingr   
get_loggerr   rU   r   rD   r   r   <module>r      sW   !                 2 2 2 2 2 2  ?000000>>>>>>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  5LLL444444		H	%	%N N N N N N N N N Nr   