
    }-j                      $   d dl Z d dlmZ ddlmZ ddlmZmZ erddlm	Z	 ddl
mZ d d	lmZ dd
lmZmZmZ dZ e            rddlmZ  e            rd dlZ e            rd dlmZ  ej        e          Zdededz  fdZ G d de          ZdS )    N)TYPE_CHECKING   )HfQuantizer)get_module_from_nameshould_convert_module   )PreTrainedModel)TorchAoConfig)	safe_open)is_torch_availableis_torchao_availableloggingz2.5.0)WeightConverter)flatten_tensor_state_dictconfig_namereturnc                     t          j        d|                                           }|r|                    d          ndS )z
    Extract the size digit from torchao config class names like "Int4WeightOnlyConfig", "Int8WeightOnlyConfig".
    Returns the digit as a string if found, otherwise None.
    z
(\d)weightr   N)researchlowergroup)r   matchs     i/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/transformers/quantizers/quantizer_torchao.py_fuzzy_match_sizer   1   s:    
 Im[%6%6%8%899E",5;;q>>>,    c                       e Zd ZU dZdZded<    fdZd Zd Zdd	d
e	ddde
f fdZdee	ee	z  f         dee	ee	z  f         fdZdddZdd	d
e	defdZdefdZedefd            Zedefd            Zdee	         fdZd Zd Z xZS )TorchAoHfQuantizerz?
    Quantizer for torchao: https://github.com/pytorch/ao/
    Fr
   quantization_configc                      t                      j        |fi | t          t          | j        j                  j                  }|dk    rdnd| _        d S )N4g      ?r   )super__init__r   typer   
quant_type__name__quantized_param_size)selfr   kwargs
size_digit	__class__s       r   r"   zTorchAoHfQuantizer.__init__B   s]    ,77777&tD,D,O'P'P'YZZ
+5+<+<CC!!!!r   c                    t                      st          d          |                    d          }d| _        t	          |t
                    rd|                                v sd|                                v r^t          |          dk    rId|                                v | _        | j        r+d|                                v rt          d          d S d S d S d S d S )NzSLoading an torchao quantized model requires torchao library (`pip install torchao`)
device_mapFdiskcpur   zYou are attempting to perform disk offload with a pre-quantized torchao model This is not supported yet . Please remove the disk device from the device_map.)
r   ImportErrorgetoffload_to_cpu
isinstancedictvalueslenpre_quantized
ValueError)r'   argsr(   r,   s       r   validate_environmentz'TorchAoHfQuantizer.validate_environmentH   s   #%% 	ustttZZ--
#j$'' 	*++----*:K:K:M:M1M1MSVWaSbSbefSfSf&+z/@/@/B/B&B#% &J4E4E4G4G*G*G$i  		 	SfSf1M1M *G*Gr   c                 D    t          |                                          S )zv
        We flatten the state dict of tensor subclasses so that it is compatible with the safetensors format.
        )r   
state_dict)r'   models     r   get_state_dict_and_metadataz.TorchAoHfQuantizer.get_state_dict_and_metadataW   s     ))9)9););<<<r   r<   r	   
param_nameparamztorch.Tensorr   c                     |                      ||          r| j        | j        S t                                          |||          S )z4Return the element size (in bytes) for `param_name`.)param_needs_quantizationr&   r!   param_element_size)r'   r<   r>   r?   r*   s       r   rB   z%TorchAoHfQuantizer.param_element_size]   sH    ((
;; 	-@Y@e,,ww))%UCCCr   
max_memoryc                 B    d |                                 D             }|S )Nc                      i | ]\  }}||d z  S )g? ).0keyvals      r   
<dictcomp>z8TorchAoHfQuantizer.adjust_max_memory.<locals>.<dictcomp>f   s"    HHHcc39HHHr   )items)r'   rC   s     r   adjust_max_memoryz$TorchAoHfQuantizer.adjust_max_memoryd   s'    HHZ5E5E5G5GHHH
r   Nc                    |                      || j        j        |j                  | _        | j        j        r|                                fd|                                D             |                                fd|                                D             fd| j        D             | _        ||                     |           d S d S )Nc                 Z    g | ]'\  }}t          |          t                    k    %|(S rF   id)rG   namemodule	input_embs      r   
<listcomp>zKTorchAoHfQuantizer._process_model_before_weight_loading.<locals>.<listcomp>o   s;    mmmfQSTZQ[Q[_abk_l_lQlQltQlQlQlr   c                 Z    g | ]'\  }}t          |          t                    k    %|(S rF   rO   )rG   rQ   rR   
output_embs      r   rT   zKTorchAoHfQuantizer._process_model_before_weight_loading.<locals>.<listcomp>q   s;    ooovRTU[R\R\`bcm`n`nRnRnRnRnRnr   c                 "    g | ]}|z   v	|S rF   rF   )rG   xinput_emb_namesoutput_emb_namess     r   rT   zKTorchAoHfQuantizer._process_model_before_weight_loading.<locals>.<listcomp>r   s0     + + +!?UeCe:e:e:e:e:er   )	get_modules_to_not_convertr   modules_to_not_convert_keep_in_fp32_modulesinclude_input_output_embeddingsget_input_embeddingsnamed_modulesget_output_embeddingsset_metadata)r'   r<   checkpoint_filesr(   rS   rY   rV   rZ   s       @@@@r   $_process_model_before_weight_loadingz7TorchAoHfQuantizer._process_model_before_weight_loadingi   s	   &*&E&E4+BED_'
 '
# #C 	2244Immmm8K8K8M8MmmmO4466Joooo9L9L9N9Nooo+ + + + +6+ + +D' './//// ('r   c                 R   t          || j                  sdS t          ||          \  }}t          j        j        g}| j        j        r$|                    t          j        j	                   ddl
m}m} t          | j        j        |          rw|                    dd          \  }	}
 ||	| j        j                  sF ||| j        j                  s0d| j        j        j        v rt          |t#          |                    rdS t          |t#          |                    o|dk    S )	NFr   )FqnToConfigfqn_matches_fqn_config.r   _defaultTweight)r   r\   r   torchnnLinearr   r^   append	Embeddingtorchao.quantizationrf   rg   r2   r$   rsplitfqn_to_configtuple)r'   r<   r>   r(   rR   tensor_name_QUANTIZABLErf   rg   
module_fqn_s              r   rA   z+TorchAoHfQuantizer.param_needs_quantizationy   s>   $Z1LMM 	5 35*EE(#C 	4 2333LLLLLLLLd.9;GG 
	&--c155MJ&&z43K3VWW))*d6N6YZZ $":"E"SSS"65+>+>?? T t&%"5"566R;(;RRr   c                     dS NTrF   r'   s    r   is_serializablez"TorchAoHfQuantizer.is_serializable   s    tr   c                 `    t          t          | j        j                  j                  dk    S )N8)r   r#   r   r$   r%   rz   s    r   is_trainablezTorchAoHfQuantizer.is_trainable   s)     !d&>&I!J!J!STTX[[[r   c                     dS ry   rF   rz   s    r   is_compileablez!TorchAoHfQuantizer.is_compileable   s    tr   rc   c                    |d                              d          rdi }|D ]V}t          |d          5 }|                                pi }|                    |           d d d            n# 1 swxY w Y   W|| _        d S d S )Nr   z.safetensorspt)	framework)endswithr   metadataupdate)r'   rc   r   
checkpointf	metadata_s         r   rb   zTorchAoHfQuantizer.set_metadata   s    A''77 	%H. / /
zT::: /a !

 2IOOI.../ / / / / / / / / / / / / / / %DMMM	% 	%s   ,A++A/	2A/	c                 $    ddl m}  ||           S )Nr   )TorchAoQuantize)integrations.torchaor   )r'   r   s     r   get_quantize_opsz#TorchAoHfQuantizer.get_quantize_ops   s$    ::::::t$$$r   c                 ^    ddl m} | j        rt          g dd ||           g          gS g S )Nr   )TorchAoDeserialize)_weight_qdata_weight_scale_and_zero_weight_per_tensor_scale_weight_scale_weight_zero_point_weight_act_pre_scalerj   )source_patternstarget_patterns
operations)r   r   r6   r   )r'   r   s     r   get_weight_conversionsz)TorchAoHfQuantizer.get_weight_conversions   sg    ====== 	% % % %- 2 24 8 89   " 	r   )N)r<   r	   )r%   
__module____qualname____doc__requires_calibration__annotations__r"   r9   r=   strfloatrB   r3   intrL   rd   boolrA   r{   propertyr~   r   listrb   r   r   __classcell__)r*   s   @r   r   r   :   s          !((((D D D D D  = = =D(9 Ds DSa Dfk D D D D D DDcCi,@ T#sUXy.EY    
0 0 0 0 0 S.? SS S_c S S S S6     \d \ \ \ X\     X%T#Y % % % %% % %
      r   r   )r   typingr   baser   quantizers_utilsr   r   modeling_utilsr	   utils.quantization_configr
   safetensorsr   utilsr   r   r   MIN_TORCH_VERSIONcore_model_loadingr   rk   1torchao.prototype.safetensors.safetensors_supportr   
get_loggerr%   loggerr   r   r   rF   r   r   <module>r      s   
			                   I I I I I I I I  :000000999999 ! ! ! ! ! ! E E E E E E E E E E    5444444  LLL      
 
	H	%	%-3 -3: - - - -J J J J J J J J J Jr   