
    |-j+                        U d Z ddlZddlZddlZddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZmZ dd	lmZ dd
lmZ ddlmZmZmZmZmZ ddlmZ ddlmZ ddlmZ ddl m!Z!m"Z"m#Z#m$Z$m%Z%  e            rddl&m'Z' ndZ' e            rddl(m)Z) ndZ) ej*        e+          Z,i Z-e.e/e0e         f         e1d<   i Z2e.e/e0e         f         e1d<    ee/e/dz  f         g d e            rdndfd e            rdndfd e            rdndfd e            rdndfd e            rdndfd  e            rdndfd! e            rd"ndfd# e            rd$ndfd%d& e            rdndfd' e            rd(ndfd)d*d+ e            rd,ndfd- e            rd.ndfd/d0 e            rd1ndfd2d3 e            rdndfd4 e            rd5ndfd6d7 e            rdndfd8d9 e            rd:ndfd;d< e            rdndfd=d> e            rdndfd? e            rdndfd@dA e            rdBndfdC e            rd5ndfdD e            rdndfdE e            rdndfdF e            rdndfdG e            rdndfdH e            rdIndfdJdKdLdMdN e            rd5ndfdO e            rdPndfdQ e            rdRndfdSdT e            rdndfdU e            rdVndfdW e            rdndfdX e            rd5ndfdY e            rdndfdZd[ e            rd\ndfd] e            rd^ndfd_d` e            rdndfda e            rd5ndfdb e            rdcndfdd e            rdendfdfdg e            rdhndfdi e            rdjndfdk e            rdjndfdl e            rdjndfdm e            rdjndfdn e            rdjndfdo e            rdjndfdp e            rdndfdq e            rdrndfds e            rdrndfdt e            rdrndfdu e            rdrndfdv e            rdrndfdw e            rdrndfdx e            rdrndfdy e            rdrndfdz e            rdrndfd{ e            rd|ndfd} e            rd5ndfd~ e            rd5ndfd e            rd5ndfd e            rd\ndfdd e            rd5ndfd e            rdrndfd e            rdrndfd e            rdrndfd e            rdrndfd e            rdndfd e            rdndfd e            rdndfddd e            rdndfd e            rdndfd e            rd5ndfd e            rd5ndfd e            rdndfd e            rd5ndfd e            rdndfd e            rdndfd e            rdndfd e            rdndfd e            rdndfd e            rdndfd e            rdndfd e            rdndfd e            rdndfd e            rdndfd e            rd"ndfd e            rd"ndfdd e            rdndfd e            rdndfd e            rd\ndfd e            rd\ndfd e            rdndfd e            rdndfd e            rdndfd e            rdndfdd e            rdndfd e            rdndfdd e            rdrndfd e
            rdn e            rdrndfd e
            rdn e            rdrndfd e
            rdn e            rdrndfd e
            rdn e            rdrndfd e
            rdn e            rdrndfd e            rdndfd e            rdndfd e            rdndfd e            rdndfd e            rd\ndfdɑd e            rdndfd e            rdndfd e            rdndfd e            rdndfdБd e            rdndfd e            rdndfd e            rdndfd e            rdndfd e            rdndfd e            rdndfd e            rd\ndfd e            rd\ndfd e            rdrndfd e            rdrndfd e            rd\ndfd e            rdndfd e            rdndfd e            rdndfd e            rd5ndfd e            rdndfd e            rdndfd e            rdndfd e            rdndfd e            rdndfd e            rd.ndfd e            rd.ndfdd e            rd5ndfdd e            rdndfd e
            rdn e            rdrndfd e            rdndfd e            rdndfdd e            rdndfd e            rdndfd e            rdndfd e            rdndfd e            rdndfd e            rdndfd e            rdndfd e            rdndfd e            rdndfd e            rdndfd e            rdndfd  e            rdndfd e            rdndfd e            rdndfd e            rdndfd e            rdndfdd e            rdndfd e            rdjndfd e            rd	ndfd
 e            rdndfd e            rdndfdddd e            rdndfd e            rd\ndfd e            rdndfd e            rdndfd e            rdndfd e            rdndfd e            rdjndfd e            rdndfd e            rdndfd e            rdndfd e            rd ndfd!d" e            rdndfd# e            rd\ndfd$ e            rd5ndfd% e            rdndfd& e            rdndfd' e            rdjndfd(d) e            rdndfd* e            rdndfd+ e            rd,ndfd- e            rdndfd.d/d0 e            rdndfd1 e            rdndfd2d3 e
            rdn e            rdrndfd4 e
            rdn e            rdrndfd5d6d7d8d9 e            rd:ndfd; e            rdndfd< e            rd=ndfd>d? e            rdndfd@ e            rdndfdA e            rdBndfdC e            rd\ndfdD e            rdndfdE e            rdndf          Z3h dFZ4e5e/         e1dG<   e4D ]Z6e6e3vr e            rdrnde3e6<    ee!e3          Z7dH  e!j8                    D             Z9dI Z:dJ Z;dKe/dLe0e         dz  fdMZ<	 	 	 	 	 	 	 d\dPe/ej=        e/         z  dQe/ej=        e/         z  dz  dRe>dSe.e/e/f         dz  dTe>e/z  dz  dUe/dz  dVe>dWe/dLe.e/ef         fdXZ? G dY dZ          Z@d[dZgZAdS (]  zAuto Tokenizer class.    N)OrderedDict)Any)is_mistral_common_available   )PreTrainedConfig)get_class_from_dynamic_moduleresolve_trust_remote_code)load_gguf_checkpoint)TOKENIZER_CONFIG_FILE)extract_commit_hashis_g2p_en_availableis_sentencepiece_availableis_tokenizers_availablelogging)cached_file   )EncoderDecoderConfig   )_LazyAutoMapping)CONFIG_MAPPING_NAMES
AutoConfigconfig_class_to_model_typemodel_type_to_module_name!replace_list_option_in_docstrings)TokenizersBackend)SentencePieceBackendREGISTERED_TOKENIZER_CLASSESREGISTERED_FAST_ALIASESaimv2CLIPTokenizeralbertAlbertTokenizeralignBertTokenizeraudioflamingo3Qwen2Tokenizer
aya_visionCohereTokenizerbarkbartRobertaTokenizerbarthezBarthezTokenizer)bartphoBartphoTokenizerbertzbert-generationBertGenerationTokenizer)zbert-japaneseBertJapaneseTokenizer)bertweetBertweetTokenizerbig_birdBigBirdTokenizerbigbird_pegasusPegasusTokenizer)biogptBioGptTokenizer
blenderbotBlenderbotTokenizer)zblenderbot-smallBlenderbotSmallTokenizerblipzblip-2GPT2Tokenizer)bridgetowerr+   bros)byt5ByT5Tokenizer	camembertCamembertTokenizer)canineCanineTokenizerchinese_clip)clapr+   clipclipseg)clvpClvpTokenizer
code_llamaCodeLlamaTokenizercodegencoherecohere2colqwen2convbertcpmCpmTokenizer)cpmantCpmAntTokenizer)ctrlCTRLTokenizer)zdata2vec-audioWav2Vec2CTCTokenizer)zdata2vec-textr+   dbrxdebertaDebertaTokenizerz
deberta-v2DebertaV2Tokenizer)diaDiaTokenizer
distilbertdprDPRQuestionEncoderTokenizerelectraemu3ernie)esmEsmTokenizerfalcon_mambaGPTNeoXTokenizerfastspeech2_conformerFastSpeech2ConformerTokenizer)flaubertFlaubertTokenizerflava	flex_olmo	florence2BartTokenizerfnetFNetTokenizer)fsmtFSMTTokenizerfunnelFunnelTokenizergemmaGemmaTokenizergemma2gemma3gemma3_textgemma3ngemma3n_textgitglmr   glm4glm4_moeglm4_moe_liteglm4v	glm4v_moe	glm_imageglmasrgot_ocr2zgpt-sw3GPTSw3Tokenizergpt2gpt_bigcodegpt_neogpt_neox)gpt_neox_japaneseGPTNeoXJapaneseTokenizergptjgranite
granitemoegranitemoehybridgranitemoesharedzgrounding-dinogroupvitherbertHerbertTokenizer)hubertr[   )ibertr+   ideficsLlamaTokenizeridefics2instructblipinstructblipvideointernvljais2jina_embeddings_v3XLMRobertaTokenizerzkosmos-2lasr_ctcLasrTokenizerlasr_encoderlayoutlm
layoutlmv2LayoutLMv2Tokenizer
layoutlmv3LayoutLMv3Tokenizer	layoutxlmLayoutXLMTokenizerledLEDTokenizerlighton_ocrQwen2TokenizerFastlilt
longformer)lukeLukeTokenizerlxmertLxmertTokenizerm2m_100M2M100Tokenizermambamamba2marianMarianTokenizermarkuplmMarkupLMTokenizermbartMBartTokenizermbart50MBart50Tokenizer)megar+   zmegatron-bert
metaclip_2)zmgp-strMgpstrTokenizerminicpmv4_6	ministralMistralCommonBackend
ministral3mistralmistral3mixtralmlukeMLukeTokenizerzmm-grounding-dino
mobilebertMobileBertTokenizermpnetMPNetTokenizermpt)mrar+   mt5T5Tokenizermusicgenmusicgen_melodymvpMvpTokenizer)myt5MyT5TokenizernezhanllbNllbTokenizerznllb-moe
nomic_bertnougatNougatTokenizernystromformerolmoolmo2olmo3olmo_hybridolmoezomdet-turbo	oneformerz
openai-gptOpenAIGPTTokenizeroptovis2owlv2owlvitparakeet_ctcParakeetTokenizerparakeet_tdtpegasus	pegasus_x)	perceiverPerceiverTokenizerphi)phobertPhobertTokenizer
pix2structpixtralplbartPLBartTokenizerpp_formulanet)
prophetnetProphetNetTokenizerqdqbertqianfan_ocrqwen2qwen2_5_omni
qwen2_5_vlqwen2_audio	qwen2_moeqwen2_vlqwen3qwen3_5Qwen3_5Tokenizerqwen3_5_moe	qwen3_moe
qwen3_nextqwen3_omni_moeqwen3_vlqwen3_vl_moe)ragRagTokenizerrealmrecurrent_gemmareformerReformerTokenizerrembertRemBertTokenizer	retribert)robertar+   )zroberta-prelayernormr+   )roc_bertRoCBertTokenizerroformerRoFormerTokenizerrwkvsam3
sam3_videoseamless_m4tSeamlessM4TTokenizerseamless_m4t_v2shieldgemma2siglipSiglipTokenizersiglip2Siglip2Tokenizerspeech_to_textSpeech2TextTokenizerspeecht5SpeechT5Tokenizer)splinterSplinterTokenizersqueezebertstablelm
starcoder2switch_transformerst5t5gemma)tapasTapasTokenizertrocrtvpudopUdopTokenizerumt5)	unispeechr[   )zunispeech-satr[   viltvisual_bert)vitsVitsTokenizervoxtralvoxtral_realtime)wav2vec2r[   )zwav2vec2-bertr[   )zwav2vec2-conformerr[   )wav2vec2_phonemeWav2Vec2PhonemeCTCTokenizerwhisperWhisperTokenizerxclipxglmXGLMTokenizer)xlmXLMTokenizerzxlm-robertazxlm-roberta-xlxlnetXLNetTokenizerxlstmxmodyoso>%   fuyuphi3jambajanusllavamolmonvfp4r  arcticchatlmmolmo2phi3_vphimoeopencuaopenvlastep3p5minicpmvnemotronstep3_vlvipllava	chameleon	internlm2
cohere_asr
h2ovl_chat
llava_next
minimax_m2
modernbertdeepseek_v2deepseek_v3deepseek_v4deepseek_vldeepseek_ocrdeepseek_ocr2internvl_chatdeepseek_vl_v2hyperclovax_vlmdeepseek_vl_hybrid)MODELS_WITH_INCORRECT_HUB_TOKENIZER_CLASSc                     i | ]\  }}||	S  r{  ).0kvs      j/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/transformers/models/auto/tokenization_auto.py
<dictcomp>r    s    @@@41a!Q@@@    c                     t          | dd          5 }t          j        |          cddd           S # 1 swxY w Y   dS )z*Loads a vocabulary file into a dictionary.rutf-8encodingN)openjsonload)
vocab_filereaders     r  
load_vocabr    s    	j#	0	0	0 !Fy  ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !s   488c                 (   g }t          | dd          5 }|D ]a}|                                }|rI|                    d          s4|                    t	          |                                                     b	 ddd           n# 1 swxY w Y   |S )z Loads a merges file into a list.r  r  r  #N)r  strip
startswithappendtuplesplit)merges_filemergesr  lines       r  load_mergesr    s    F	k3	1	1	1 3V 	3 	3D::<<D 3DOOC00 3eDJJLL11222	33 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 Ms   A%BBB
class_namereturnc                    | dv rt           S | t          v rt          |          S | t          v rt          |          S | dk    rt           S t                                          D ]\  }}|| k    rt          |          }|dv r| dk    rt          j        dd          }nt          j        d| d          }	 t          ||           }t          |d	d           x}r\|t          j
        v rNt          j
        |         }t          ||j        d
z   |           t          j
                            |dz   |           |c S # t          $ r Y w xY wt          j                                        D ]}t          |dd           | k    r|c S t          j        d          }t%          ||           rt          ||           S |                     d
          rt)          | d d                   S d S )N>   BloomTokenizerBloomTokenizerFastr   )r   r   r   r   r   r   rD  r   z.tokenization_mistral_commontransformers.ztransformers.models
__module__Fast_fast__name__)r   r   r   TOKENIZER_MAPPING_NAMESitemsr   	importlibimport_modulegetattrsysmodulessetattrr  
setdefaultAttributeErrorTOKENIZER_MAPPING_extra_contentvalueshasattrendswithtokenizer_class_from_name)	r  module_nametokenizer_classmoduleresultsubmodbase_mod	tokenizermain_modules	            r  r  r    s4   ===  ,,,&z22111+J77(((   )@(E(E(G(G  $_j((3K@@Krrr"888"01OQ_``"01B[1B1BDYZZ	 44%flDAAAF GvQTQ\G\G\"{62HHfo&>GGGK**6G+;XFFF!   # )( '5<<>>  	9j$//:== > ).99K{J'' 0{J/// 6"" :(CRC9994s   1B D44
E EF pretrained_model_name_or_path	cache_dirforce_downloadproxiestokenrevisionlocal_files_only	subfolderc                 R   |                     d          }	t          | t          |||||||ddd|	          }
|
t                              d           i S t          |
|	          }	t          |
d          5 }t          j        |          }ddd           n# 1 swxY w Y   |	|d<   |S )aY  
    Loads the tokenizer configuration from a pretrained model tokenizer configuration.

    Args:
        pretrained_model_name_or_path (`str` or `os.PathLike`):
            This can be either:

            - a string, the *model id* of a pretrained model configuration hosted inside a model repo on
              huggingface.co.
            - a path to a *directory* containing a configuration file saved using the
              [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.

        cache_dir (`str` or `os.PathLike`, *optional*):
            Path to a directory in which a downloaded pretrained model configuration should be cached if the standard
            cache should not be used.
        force_download (`bool`, *optional*, defaults to `False`):
            Whether or not to force to (re-)download the configuration files and override the cached versions if they
            exist.
        proxies (`dict[str, str]`, *optional*):
            A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
            'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
        token (`str` or *bool*, *optional*):
            The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
            when running `hf auth login` (stored in `~/.huggingface`).
        revision (`str`, *optional*, defaults to `"main"`):
            The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
            git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
            identifier allowed by git.
        local_files_only (`bool`, *optional*, defaults to `False`):
            If `True`, will only try to load the tokenizer configuration from local files.
        subfolder (`str`, *optional*, defaults to `""`):
            In case the tokenizer config is located inside a subfolder of the model repo on huggingface.co, you can
            specify the folder name here.

    <Tip>

    Passing `token=True` is required when you want to use a private model.

    </Tip>

    Returns:
        `dict`: The configuration of the tokenizer.

    Examples:

    ```python
    # Download configuration from huggingface.co and cache.
    tokenizer_config = get_tokenizer_config("google-bert/bert-base-uncased")
    # This model does not have a tokenizer config so the result will be an empty dict.
    tokenizer_config = get_tokenizer_config("FacebookAI/xlm-roberta-base")

    # Save a pretrained tokenizer locally and you can reload its config
    from transformers import AutoTokenizer

    tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
    tokenizer.save_pretrained("tokenizer-test")
    tokenizer_config = get_tokenizer_config("tokenizer-test")
    ```_commit_hashF)r  r  r  r  r  r  r   _raise_exceptions_for_gated_repo%_raise_exceptions_for_missing_entries'_raise_exceptions_for_connection_errorsr  Nz\Could not locate the tokenizer configuration file, will try to use the model config instead.r  r  )	getr   r   loggerinfor   r  r  r  )r  r  r  r  r  r  r  r  kwargscommit_hashresolved_config_filer  r  s                r  get_tokenizer_configr    s   J **^,,K&%%))..305    #rsss	%&:KHHK	"W	5	5	5 #6""# # # # # # # # # # # # # # #(F>Ms   6BBBc                   v    e Zd ZdZd Ze ee          dee	z  fd                        Z
e	 dd            ZdS )	AutoTokenizera  
    This is a generic tokenizer class that will be instantiated as one of the tokenizer classes of the library when
    created with the [`AutoTokenizer.from_pretrained`] class method.

    This class cannot be instantiated directly using `__init__()` (throws an error).
    c                      t          d          )Nz}AutoTokenizer is designed to be instantiated using the `AutoTokenizer.from_pretrained(pretrained_model_name_or_path)` method.)OSError)selfs    r  __init__zAutoTokenizer.__init__>  s    _
 
 	
r  r  c           	      D   |                     dd          }d|d<   |                     dd          }|                     dd          }|                     dd          }|                    d          }|t                              |d          }	|	8t          d	| d
d                    d t          D                        d          t          |	          }
|
t          d|	 d           |
j        |g|R i |S |r7t          ||fi |}t          |d          d         }t          j
        d+i |}nA|?	 t          j        |fd|i|}n)# t          t          f$ r t          j        |fi |}Y nw xY w|j        }t          |fi |}|                    dd          }d}d|v rGt          |d         t           t"          f          r	|d         }n|d                             dd          }| |||dk    rt                              |          t                              |                              d          |                    d          k    rt                              |                              d          }|dvr8|t&          v r|n|}t          |          }
|
|
j        dvr |
j        |g|R i |S t*          t+          j        |g|R i |S t          d| d          d|v r|d         |d<   |r|                    d          r
|dd         }|du}t/          |          t0          v p(|duo$t          |          dupt          |dz             du}|oSt/          |          t0          vo=|duo9t          |          pt          |dz             j                            d           }|r|t&          v rd}d}|rN|d         	|d         }n|d         }d|v r|                    d          d         }nd}t9          |||||          }|rs|rq|so|r"t          |                    d                     t;          ||fi |}
|                     d d          }|
                                  |
j        |g|R d|i|S |h|}t          |          }
|
'|                    d          st          |dz             }
|
|
j        d!k    rt*          }
|
t*          }
 |
j        |g|R i |S t?          |dd          rJ|j         }d"|vr|                    d          r
|dd         }t          |          }
 |
j        |g|R i |S t          |tB                    rdt/          |j"                  t/          |j#                  ur5tH          %                    d#|j#        j&         d$|j"        j&         d%           |j#        }tO          t/          |          j                  pt?          |d&d          }|@t0                              t/          |          t*                    }
|
 |
j        |g|R i |S |                    dd          }||d'k    r|                    d          r
|dd         }t          |          }
|
'|                    d          st          |dz             }
|
|
j        d!k    rt*          }
|
t*          }
 |
j        |g|R i |S t          d(|j&         d)d                    d* t0          D                        d          ),a  
        Instantiate one of the tokenizer classes of the library from a pretrained model vocabulary.

        The tokenizer class to instantiate is selected based on the `model_type` property of the config object (either
        passed as an argument or loaded from `pretrained_model_name_or_path` if possible), or when it's missing, by
        falling back to using pattern matching on `pretrained_model_name_or_path`:

        List options

        Params:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                Can be either:

                    - A string, the *model id* of a predefined tokenizer hosted inside a model repo on huggingface.co.
                    - A path to a *directory* containing vocabulary files required by the tokenizer, for instance saved
                      using the [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.
                    - a path to a single saved vocabulary file if and only if the tokenizer only requires a
                      single vocabulary file (like Bert or XLNet), e.g.: `./my_model_directory/vocab.txt`. (Not
                      applicable to all derived classes)
            inputs (additional positional arguments, *optional*):
                Will be passed along to the Tokenizer `__init__()` method.
            config ([`PreTrainedConfig`], *optional*)
                The configuration object used to determine the tokenizer class to instantiate.
            cache_dir (`str` or `os.PathLike`, *optional*):
                Path to a directory in which a downloaded pretrained model configuration should be cached if the
                standard cache should not be used.
            force_download (`bool`, *optional*, defaults to `False`):
                Whether or not to force the (re-)download the model weights and configuration files and override the
                cached versions if they exist.
            proxies (`dict[str, str]`, *optional*):
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
            revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
                git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
                identifier allowed by git.
            subfolder (`str`, *optional*):
                In case the relevant files are located inside a subfolder of the model repo on huggingface.co (e.g. for
                facebook/rag-token-base), specify it here.
            tokenizer_type (`str`, *optional*):
                Tokenizer type to be loaded.
            backend (`str`, *optional*, defaults to `"tokenizers"`):
                Backend to use for tokenization. Valid options are:
                - `"tokenizers"`: Use the HuggingFace tokenizers library backend (default)
                - `"sentencepiece"`: Use the SentencePiece backend
            trust_remote_code (`bool`, *optional*, defaults to `False`):
                Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
                should only be set to `True` for repositories you trust and in which you have read the code, as it will
                execute code present on the Hub on your local machine.
            kwargs (additional keyword arguments, *optional*):
                Will be passed to the Tokenizer `__init__()` method. Can be used to set special tokens like
                `bos_token`, `eos_token`, `unk_token`, `sep_token`, `pad_token`, `cls_token`, `mask_token`,
                `additional_special_tokens`. See parameters in the `__init__()` for more details.

        Examples:

        ```python
        >>> from transformers import AutoTokenizer

        >>> # Download vocabulary from huggingface.co and cache.
        >>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")

        >>> # Download vocabulary from huggingface.co (user-uploaded) and cache.
        >>> tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-german-cased")

        >>> # If vocabulary files are in a directory (e.g. tokenizer was saved using *save_pretrained('./test/saved_model/')*)
        >>> # tokenizer = AutoTokenizer.from_pretrained("./test/bert_saved_model/")

        >>> # Download vocabulary from huggingface.co and define model-specific arguments
        >>> tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-base", add_prefix_space=True)

        >>> # Explicitly use the tokenizers backend
        >>> tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer", backend="tokenizers")

        >>> # Explicitly use the sentencepiece backend
        >>> tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer", backend="sentencepiece")
        ```configNT
_from_autouse_fasttokenizer_typetrust_remote_code	gguf_filezPassed `tokenizer_type` z3 does not exist. `tokenizer_type` should be one of z, c              3      K   | ]}|V  d S Nr{  r|  cs     r  	<genexpr>z0AutoTokenizer.from_pretrained.<locals>.<genexpr>  s"       D Dq D D D D D Dr  r  zTokenizer class z is not currently imported.F)return_tensorsr  auto_mapr  r  r  )r   PythonBackendPreTrainedTokenizerFastzTokenizer class 'zf' specified in the tokenizer config was not found. The tokenizer may need to be converted or re-saved.r  r  ztransformers.r   r   z--code_revisionr  r  z The encoder model config class: z3 is different from the decoder model config class: z. It is not recommended to use the `AutoTokenizer.from_pretrained()` method in this case. Please use the encoder and decoder specific tokenizer classes.
model_typer   z!Unrecognized configuration class z8 to build an AutoTokenizer.
Model type should be one of c              3   $   K   | ]}|j         V  d S r  )r  r  s     r  r  z0AutoTokenizer.from_pretrained.<locals>.<genexpr>Y  s$      4[4[AQZ4[4[4[4[4[4[r  r{  )(popr  r  
ValueErrorjoinr  from_pretrainedr   r
   r   	for_modelr  r   r  r  
isinstancer  listremovesuffixry  r  r   r  typer  r  r  r  r	   r   register_for_auto_classr  r  r   decoderencoderr  warning	__class__r   )clsr  inputsr  r  _r  r  r  tokenizer_class_namer  	gguf_pathconfig_dictconfig_model_typetokenizer_configtokenizer_config_classtokenizer_auto_mapregistered_class_namer  has_remote_codehas_local_codeexplicit_local_code	class_refupstream_repotokenizer_class_candidate_classr  s                              r  r  zAutoTokenizer.from_pretrainedD  s	   d Hd++#| JJz4(($4d;;"JJ':DAAJJ{++	 %#:#>#>~t#T#T #+ H~ H Hyy D D,C D D DDDH H H  
 88LMMO& !e4H!e!e!efff2?23PdSYddd]cddd 
	c#$A9WWPVWWI.yOOOPXYK)88K88FF^c#31 EVZ`  ( c c c)9:Wbb[abbc #- 00MXXQWXX!1!5!56G!N!N ")))*:6FF ]%5j%A""%5j%A%E%EoW[%\%\"
 &&2!-!R'''++,=>>J(,,->??LLVTT&33F;;= = %<$?$?@Q$R$R$_$_`f$g$g!$,mmm ),UUU *)/ 
 #<J"G"G".?3K T 4 4
 ;?:;Xl[allleklll ,(89VjY_jjjcijjjG$: G G G  
 ---%5n%EF>"! 	A&<&E&Ef&M&M 	A%;CRC%@",D8f):: 
"$. )*@AAM Z,-Cf-LMMUYY	 	  V$55 'd2 9-.DEE R01G&1PQQZZ889 	  	&04]]]#O!% 	!!$0.q1		.q1	y   ) 5 5a 8 $ 9!#@.Racp! !  	e0 	e9L 	e% W)*@*M*Mf*U*UVVV;IGdoohnooO

?D11A335552?2-06  J[_e   $/(>%78QRRO&/H/Q/QRX/Y/Y&";<UX^<^"_"_*/G?/Z/Z"3&"32?23PdSYddd]cdddV.55 	e+F(666??6;R;R67??O2?23PdSYddd]cddd f233 	$FN##4+?+???2v~7O 2 2%+^%=2 2 2   ^F/V0EFFm'RXZfhlJmJm
!/33DLLBSTTO*667ThW]hhhaghhh "2!5!56G!N!N!-%)<<<AWA`A`agAhAh<)?)D&78NOOO&/E/N/Nv/V/V&";<RU[<["\"\*/G?/Z/Z"3&"32?23PdSYddd]cddd_0@ _ _+/994[4[IZ4[4[4[+[+[_ _ _
 
 	
s   ;E #E65E6NFc                     |||}n||}nt          d          |||fD ]}||t          |j        <   |||t          |j        <   t                              | ||           dS )a  
        Register a new tokenizer in this mapping.

        Args:
            config_class ([`PreTrainedConfig`]):
                The configuration corresponding to the model to register.
            tokenizer_class: The tokenizer class to register (V5 - preferred parameter).
            slow_tokenizer_class: (Deprecated) The slow tokenizer to register.
            fast_tokenizer_class: (Deprecated) The fast tokenizer to register.
        Nz$You need to pass a `tokenizer_class`)exist_ok)r  r   r  r   r  register)config_classr  slow_tokenizer_classfast_tokenizer_classr  	candidates         r  r  zAutoTokenizer.register\  s     "#/"6%1"6 !GHHH.0DoV 	M 	MI$CL,Y-?@+0D0PEY#$8$AB""<8"TTTTTr  )NNNF)r  r  __qualname____doc__r  classmethodr   r  r   r   r  staticmethodr  r{  r  r  r  r  6  s         
 
 
 &&'>??T
	1	1T
 T
 T
 @? [T
l kpU U U \U U Ur  r  r  )NFNNNFr  )Br  r  r  osr  collectionsr   typingr   transformers.utils.import_utilsr   configuration_utilsr   dynamic_module_utilsr   r	   modeling_gguf_pytorch_utilsr
   tokenization_utils_baser   utilsr   r   r   r   r   	utils.hubr   encoder_decoderr   auto_factoryr   configuration_autor   r   r   r   r   tokenization_utils_tokenizersr    tokenization_utils_sentencepiecer   
get_loggerr  r  r   dictstrr  __annotations__r   r  ry  setr  r  r  CONFIG_TO_TYPEr  r  r  PathLikeboolr  r  __all__r{  r  r  <module>r-     st$           				 



 # # # # # #       G G G G G G 3 3 3 3 3 3 \ \ \ \ \ \ \ \ ? ? ? ? ? ? < < < < < <              % $ $ $ $ $ 2 2 2 2 2 2 * * * * * *               BBBBBBB  HHHHHHH		H	%	% 68 d3S	>2 7 7 702 c49n- 2 2 26+c3:o6X	%<%<%>%>H//DIX	(?(?(A(AK$$tLX 
%<%<%>%>H//DIX 
/F/F/H/HR++dS	X
 
,C,C,E,EO((4PX 
$;$;$=$=G4HX 
'>'>'@'@J##dKX 
*A*A*C*CM&&NX 	(X 
$;$;$=$=G4HX 
9S9S9U9U_55[_`X 	3X 	*X 
+B+B+D+DN''$OX 
2I2I2K2KU..QUVX  	&!X" 
0G0G0I0IS,,tT#X$ 	9%X& 
$;$;$=$=G4H'X( 
&=&=&?&?I??TJ)X* 	,+X, 
$;$;$=$=G4H-X. 	"/X0 
.E.E.G.GQ**TR1X2 	&3X4 
,C,C,E,EO4P5X6 	%7X8 
$;$;$=$=G4H9X: 
'>'>'@'@JOOdK;X< 	"=X> 
/F/F/H/HR++dS?X@ 
'>'>'@'@JOOdKAXB 
(?(?(A(AK$$tLCXD 
)@)@)B)BL%%MEXF 
)@)@)B)BL%%MGXH 
(?(?(A(AK__tLIXJ 
"9"9";";EFKXL 	&MXN 	"OXP 	3QXR 	.SXT 
$;$;$=$=G4HUXV 
*A*A*C*CM&&NWXX 
/F/F/H/HR++dSYXZ 	 [X\ 
*A*A*C*CMN]X^ 
1H1H1J1JT--PTU_X` 
'>'>'@'@JOOdKaXb 
$;$;$=$=G4HcXd 
%<%<%>%>H//DIeXf 	 gXh 
/F/F/H/HR++dSiXj 
!EXEXEZEZ"d"A"A`dekXl 	*mXn 
%<%<%>%>H//DIoXp 
)@)@)B)BLooMqXr 
)@)@)B)BLooMsXt 
$;$;$=$=G4HuXv 	"wXx 
(?(?(A(AK$$tLyXz 
&=&=&?&?I""TJ{X| 
'>'>'@'@J##dK}X~ 
'>'>'@'@J##dKX@ 
,C,C,E,EO((4PAXB 
(?(?(A(AK$$tLCXD 
-D-D-F-FP))DQEXF 
#:#:#<#<F$GGXH 
'>'>'@'@J##dKIXJ 
(?(?(A(AK$$tLKXL 
,C,C,E,EO((4PMXN 
1H1H1J1JT--PTUOXP 
)@)@)B)BL%%MQXR 
-D-D-F-FP))DQSXT 
-D-D-F-FP))DQUXV 
*A*A*C*CM&&NWXX 
,C,C,E,EO((4PYXZ 
)C)C)E)EO%%4P[X\ 
$;$;$=$=G4H]X^ 
+B+B+D+DN$O_X` 
'>'>'@'@JOOdKaXb 
+B+B+D+DN''$OcXd 	:eXf 
$;$;$=$=G4HgXh 
+B+B+D+DN''$OiXj 
.E.E.G.GQ**TRkXl 
4K4K4M4MW00SWXmXn 
4K4K4M4MW00SWXoXp 
.E.E.G.GQ??TRqXr 
(?(?(A(AK__tLsXt 
*A*A*C*CM&&NuXv 	+wXx 	&yXz 
(?(?(A(AK$$tL{X| 
)@)@)B)BL%%M}X~ 
,C,C,E,EO4PX@ 
1H1H1J1JTooPTUAXB 
)@)@)B)BL%%MCXD 
%<%<%>%>H//DIEXF 
8O8O8Q8Q[44W[\GXH 
.E.E.G.GQ**TRIXJ 
(?(?(A(AK__tLKXL 
,C,C,E,EO4PMXN 
(?(?(A(AK__tLOXP 
0G0G0I0IS,,tTQXR 
0G0G0I0IS,,tTSXT 
.E.E.G.GQ**TRUXV 
"9"9";";EFWXX 
0G0G0I0IS,,tTYXZ 
'>'>'@'@J##dK[X\ 
-D-D-F-FP))DQ]X^ 	"_X` 
(?(?(A(AK$$tLaXb 
)C)C)E)EO%%4PcXd 
(?(?(A(AK$$tLeXf 
)@)@)B)BL%%MgXh 
(B(B(D(DN$$$OiXj 
,C,C,E,EO((4PkXl 
&=&=&?&?I""TJmXn 
*A*A*C*CM&&NoXp 	%qXr 
-D-D-F-FP//DQsXt 
0G0G0I0IS,,tTuXv 	'wXx 
/F/F/H/HR++dSyX| **,,N"")@)@)B)BL%%		
{XH **,,N"")@)@)B)BL%%		
GXT **,,N"")@)@)B)BL%%		
SX` **,,N"")@)@)B)BL%%		
_Xl **,,N"")@)@)B)BL%%		
kXv 
&@&@&B&BL""MwXx 
1H1H1J1JTooPTUyXz 
0G0G0I0IS,,tT{X| 
&=&=&?&?I""TJ}X~ 
&=&=&?&?I""TJX@ 	$AXB 
!8!8!:!:DECXD 
&=&=&?&?I]]TJEXF 
-D-D-F-FPMMDQGXH 
"9"9";";EFIXJ 	"KXL 
%<%<%>%>H//DIMXN 
$;$;$=$=G4HOXP 
(?(?(A(AK__tLQXR 
*A*A*C*CMNSXT 
(?(?(A(AK$$tLUXV 
/F/F/H/HR++dSWXX 
'>'>'@'@J##dKYXZ 
(?(?(A(AK$$tL[X\ 
)@)@)B)BL%%M]X^ 
/F/F/H/HR++dS_X` 
(?(?(A(AK$$tLaXb 
+B+B+D+DN$OcXd 
)@)@)B)BLooMeXf 
/F/F/H/HR++dSgXh 
#:#:#<#<F$GiXj 
&=&=&?&?I""TJkXl 
%<%<%>%>H//DImXn 
&=&=&?&?I??TJoXp 
0G0G0I0IS,,tTqXr 
0G0G0I0IS,,tTsXt 
*A*A*C*CM&&NuXv 
,C,C,E,EO((4PwXx 	,yXz 
#:#:#<#<F$G{X| 	(}X~ 
(?(?(A(AK}}tLXB **,,N"")@)@)B)BL%%		
AXL 
(?(?(A(AK$$tLMXN 
/F/F/H/HR++dSOXP 	.QXR 
'>'>'@'@JOOdKSXT 
,C,C,E,EO((4PUXV 
&=&=&?&?I""TJWXX 
-D-D-F-FP))DQYXZ 
+B+B+D+DN''$O[X\ 
,C,C,E,EO((4P]X^ 
*A*A*C*CM&&N_X` 
)@)@)B)BL%%MaXb 
&=&=&?&?I""TJcXd 
*A*A*C*CM&&NeXf 
.E.E.G.GQ**TRgXh 
*A*A*C*CM&&NiXj 
+B+B+D+DN''$OkXl 
/F/F/H/HR++dSmXn 
)@)@)B)BL%%MoXp 
-D-D-F-FP))DQqXr 	 sXt 
%<%<%>%>H//DIuXv 
0G0G0I0IS,,tTwXx 
,C,C,E,EO((4PyXz 
*A*A*C*CM&&N{X| 
)@)@)B)BLooM}X~ 	(X@ 	5AXB 	)CXD 
,C,C,E,EO((4PEXF 
'>'>'@'@J##dKGXH 
$;$;$=$=G4HIXJ 
*A*A*C*CMNKXL 
3J3J3L3LV//RVWMXN 
6M6M6O6OY22UYZOXP 
-D-D-F-FP))DQQXR 
(B(B(D(DN$$$OSXT 
*A*A*C*CM&&NUXV 
5O5O5Q5Q[11W[\WXX 
,F,F,H,HR((dSYXZ 	*[X\ 
+B+B+D+DN$O]X^ 
+B+B+D+DN''$O_X` 
*A*A*C*CMNaXb 
1H1H1J1J TPTUcXd 
 7 7 9 9C}}tDeXf 
(?(?(A(AK$$tLgXh 	$iXj 
+B+B+D+DN''$OkXl 
#:#:#<#<F$GmXn 
$;$;$=$=G4HoXp 
"9"9";";EFqXr 	.sXt 	2uXv 
$;$;$=$=G4HwXx 
+B+B+D+DN$OyXz 	"{X~ **,,N"")@)@)B)BL%%		
}XJ **,,N"")@)@)B)BL%%		
IXT 	-UXV 	2WXX 	7YXZ 	<[X\ 
*A*A*C*CM&&N]X^ 
%<%<%>%>H//DI_X` 
$;$;$=$=G4HaXb 	 cXd 
1H1H1J1JT--PTUeXf 
4K4K4M4MW00SWXgXh 
&=&=&?&?I""TJiXj 
(?(?(A(AK$$tLkXl 
*A*A*C*CM&&NmXn 
&=&=&?&?I""TJoXZ Z |&7 &7 &7 )3s8 & & &P < i iJ000E\E\E^E^.h.A.Adh
+$$%9;RSS @@#=#7#=#?#?@@@! ! !  3# 3$s)d2B 3 3 3 3p 04 %)#"] ]#&S)9#9]R[%%,] ] #s(^d"	]
 #:] Dj] ] ] 
#s(^] ] ] ]@DU DU DU DU DU DU DU DUN
 
0r  