
    }-j                         d dl mZmZ ddlmZ ddlmZ erddlmZ ddl	m
Z
 ddlmZmZmZmZmZ dd	l	mZ  e            rd d
lZ ej        e          Z G d de          Zd
S )    )TYPE_CHECKINGOptional   )HfQuantizer)get_module_from_name   )PreTrainedModel)FPQuantConfig)is_fp_quant_availableis_qutlass_availableis_torch_availableis_torch_xpu_availablelogging)QuantizationConfigMixinNc                        e Zd ZU dZdZdZded<   def fdZd Z	ddZ
ddded
efdZ	 	 ddZedded         fd            Zd Zd Zd Z xZS )FPQuantHfQuantizerz
    Quantizer for the FP-Quant method. Enables the loading of prequantized models and in-flight quantization of full-precision models.
    FTr
   quantization_configc                 <     t                      j        |fi | d S N)super__init__)selfr   kwargs	__class__s      j/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/transformers/quantizers/quantizer_fp_quant.pyr   zFPQuantHfQuantizer.__init__+   s)    ,7777777    c                 R   t           j                                        st                      st	          d          t                      s| j        j        st          d          | j        j        re| j        j	        dk    rUt           j                                        r7t           j        
                                d         dk     rt          d          | j        j        rt                              d           t                      st          d          || j        j        st          d	          t          |t                     rZ| j        j        s)t#          |          d
k    rd|                                v sd|                                v rt          d          d S d S )Nz]FPQuant quantization is only supported on GPU or Intel XPU. Please use a different quantizer.a  Using `fp_quant` with real quantization requires a **Blackwell GPU** and qutlass: `git clone https://github.com/IST-DASLab/qutlass.git && cd qutlass && pip install --no-build-isolation .`. You can use `FPQuantConfig(pseudoquantization=True, ...)` to use Triton-based pseudo-quantization. It doesn't provide any speedups but emulates the quantization behavior of the real quantization.nvfp4r   	   zNVFP4 pseudoquantization requires a GPU with compute capability >= 9.0 (Hopper or newer) because the Triton kernel uses the `fp8e4nv` type. Please use `forward_dtype='mxfp4'` instead, or use a GPU with compute capability >= 9.0.zUsing pseudo-quantization for FP-Quant. This doesn't provide any speedups but emulates the quantization behavior of the real quantization.zGUsing `fp_quant` quantization requires fp_quant: `pip install fp_quant`zyYou are attempting to load a FPQuant model without setting device_map. Please set device_map comprised of 'cuda' devices.r   cpudiskzYou are attempting to load a FPQuant model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)torchcudais_availabler   NotImplementedErrorr   r   pseudoquantizationImportErrorforward_dtypeget_device_capability
ValueErrorloggerwarningr   
isinstancedictlenvalues)r   
device_mapr   s      r   validate_environmentz'FPQuantHfQuantizer.validate_environment.   s   z&&(( 	1G1I1I 	%o   $%% 	d.F.Y 	 S  
 $7
	(6'AA
'')) B
00221599?   #6 	NN ]   %&& 	ighhhd&>&QF   
D)) 
	,?	
OOa''Z..0000Z..0000 h  
	 
	
 10r   dtypetorch.dtypereturnc                 z    |t           j        k    r*t                              d| d           t           j        }|S )NzSetting dtype to zP, but only bfloat16 is supported right now. Overwriting torch_dtype to bfloat16.)r"   bfloat16r+   warning_once)r   r3   s     r   update_dtypezFPQuantHfQuantizer.update_dtype^   sC    EN""{E{{{   NEr   modelr	   
param_namec                 d    ddl m} t          ||          \  }}t          ||          r|dv rdS dS )Nr   )FPQuantLinear)weightqweightdqweightTF)fp_quantr=   r   r-   )r   r:   r;   r   r=   moduletensor_names          r   param_needs_quantizationz+FPQuantHfQuantizer.param_needs_quantizationf   sO    ******25*EEfm,, 	@a1a1a45r   c                 T    ddl m} ddlm}  || || j                             d S )Nr   )replace_with_fp_quant_linearr   )adapt_fp_quant_config)fp_quant_linear_config)rA   rF   integrations.fp_quantrG   r   )r   r:   r   rF   rG   s        r   $_process_model_before_weight_loadingz7FPQuantHfQuantizer._process_model_before_weight_loadingp   s`    
 	:99999AAAAAA$$#8#89Q#R#R	
 	
 	
 	
 	
 	
r   Nc                 V    | j         j        }|st                              d           |S )NzYou are attempting to train a model with FPQuant quantization. This is only supported when `store_master_weights=True`. Please set `store_master_weights=True` to train the model.)r   store_master_weightsr+   r,   )r   r:   	trainables      r   is_trainablezFPQuantHfQuantizer.is_trainable~   s9    ,A	 	NN E   r   c                     dS )NT )r   s    r   is_serializablez"FPQuantHfQuantizer.is_serializable   s    tr   c                 $    ddl m}  ||           S )Nr   )FpQuantQuantize)rI   rS   )r   rS   s     r   get_quantize_opsz#FPQuantHfQuantizer.get_quantize_ops   s$    ;;;;;;t$$$r   c                     ddl m} ddlm} | j        r@| j        j        r |dgd ||           g          gS  |dgd ||           g          gS g S )Nr   )WeightConverter)FpQuantDeserializez	.dqweight)source_patternstarget_patterns
operationsz.qweight)core_model_loadingrV   rI   rW   pre_quantizedr   r&   )r   rV   rW   s      r   get_weight_conversionsz)FPQuantHfQuantizer.get_weight_conversions   s    888888>>>>>> 	': #O)4(3$6$6t$<$<#=    $O)3(2$6$6t$<$<#=    	r   )r3   r4   r5   r4   )r:   r	   r   )__name__
__module____qualname____doc__requires_calibrationis_qat_trainable__annotations__r   r   r2   r9   strboolrD   rJ   propertyr   rN   rQ   rT   r]   __classcell__)r   s   @r   r   r   "   s5          !((((8,C 8 8 8 8 8 8. . .`   .? S _c    
 
 
 
 
  (+<"=    X  % % %
      r   r   )typingr   r   baser   quantizers_utilsr   modeling_utilsr	   utils.quantization_configr
   utilsr   r   r   r   r   r   r"   
get_loggerr^   r+   r   rP   r   r   <module>rp      s   + * * * * * * *       2 2 2 2 2 2  :000000999999 t t t t t t t t t t t t t t ? ? ? ? ? ?  LLL		H	%	%B B B B B B B B B Br   