
    Ƒi*                        % S r SSKrSSKrSSKrSSKrSSKrSSKJr  Sr	\
\S'   Sq\\\4   S-  \S'   \R                   " 5       rSq\\\\\S-  \\4      4   S-  \S'   \R                   " 5       rSq\\\4   S-  \S'   \R                   " 5       r0 r\\\4   \S	'   \R4                  " 5        H\  r\" \R:                  5      S
:X  d  M  \R:                  S   \\R<                  '   \R>                   H  r \R:                  S   \\ '   M     M^     S\\\4   4S jr!S\\\\\S-  \\4      4   4S jr"S\S\S-  4S jr#S\S\$4S jr%S\\\4   4S jr& " S S5      r' SS\'S\S\S\4S jjr( SS\)S\S\'S-  S\\\S-  4   4S jjr*g)zModel loading and bigram scoring utilities.

Note: ``from __future__ import annotations`` is intentionally omitted because
this module is compiled with mypyc, which does not support PEP 563 string
annotations.
    N)REGISTRY   NON_ASCII_BIGRAM_WEIGHT_MODEL_CACHE
_ENC_INDEX_MODEL_NORMS_SINGLE_LANG_MAP   returnc                     [         b  [         $ [           0 n [        R                  R	                  S5      R                  S5      nUR                  5       nU(       d'  [        R                  " S[        SS9  U q U sSSS5        $  Sn[        R                  " SX#5      u  nUS	-  nUS
:  a  SU S3n[        U5      e[        U5       H  n[        R                  " SX#5      u  nUS	-  nUS:  a  SU S3n[        U5      eX#X7-    R                  S5      nX7-  n[        R                  " SX#5      u  n	US	-  nU	S:  a  SU	 S3n[        U5      e[        S5      n
[        U	5       H,  n[        R                  " SX#5      u  pnUS-  nXUS-  U-  '   M.     XU'   M     U q U sSSS5        $ ! [        R                   ["        4 a  nSU 3n[        U5      UeSnAff = f! , (       d  f       g= f)zLoad all bigram models from the bundled models.bin file.

Each model is a bytearray of length 65536 (256*256).
Index: (b1 << 8) | b2 -> weight (0-255).

:returns: A dict mapping model key strings to 65536-byte lookup tables.
Nzchardet.modelsz
models.binuX   chardet models.bin is empty — statistical detection disabled; reinstall chardet to fix   )
stacklevelr   z!I   i'  z"corrupt models.bin: num_encodings=z exceeds limit   zcorrupt models.bin: name_len=z exceeds 256zutf-8   z corrupt models.bin: num_entries=z exceeds 65536z!BBB   r   zcorrupt models.bin: )r   _MODEL_CACHE_LOCK	importlib	resourcesfilesjoinpath
read_byteswarningswarnRuntimeWarningstructunpack_from
ValueErrorrangedecode	bytearrayerrorUnicodeDecodeError)modelsrefdataoffsetnum_encodingsmsg_name_lennamenum_entriestableb1b2weightes                  W/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/chardet/models/__init__.pyload_modelsr4   &   s    	 (*!!''(89BB<P~~MM+	 "L! 
	$	)F%11$E]aKFv%:=/X o%=)$00tD!c>9(<PC$S/)V%67>>wG"!'!3!3D$!G!&<[MXC$S/)!%({+A%+%7%7%MNBFaKF,227b.) ,  %t' *0 i 
	^ 01 	)(,CS/q(	)_ 
	s1   A%GDF'G'GGGG
G(c                     [         b  [         $ [           [        5       n 0 nU R                  5        H<  u  p#UR	                  SS5      u  pEUR                  U/ 5      R                  XCU45        M>     0 n[        R                  " 5        H$  nUR                   H  nUR                  Xh'   M     M&     UR                  5        H  u  pX;   d  M  X;  d  M  X   X'   M     Uq UsSSS5        $ ! , (       d  f       g= f)zTReturn a pre-grouped index mapping encoding name -> [(lang, model, model_key), ...].N/r
   )r   _ENC_INDEX_LOCKr4   itemssplit
setdefaultappendr   valuesaliasesr,   )
r$   indexkeymodellangencalias_to_primaryentryaliasprimarys
             r3   get_enc_indexrG   i   s     	 DF ,,.JC		#q)IDS"%,,d3-?@ ) ,.__&E*/** ' ' ' /446NE~'"6!& 7 
) 
s   B4C*C*C**
C8encodingc                 ,    [         R                  U 5      $ )zReturn the language for a single-language encoding, or None.

:param encoding: The canonical encoding name.
:returns: An ISO 639-1 language code, or ``None`` if the encoding is
    multi-language.
)r	   getrH   s    r3   infer_languagerL      s     ))    c                     U [        5       ;   $ )zReturn True if the encoding has language variants in the model index.

:param encoding: The canonical encoding name.
:returns: ``True`` if bigram models exist for this encoding.
)rG   rK   s    r3   has_model_variantsrO      s     }&&rM   c                  0   [         b  [         $ [           [        5       n 0 nU R                  5        HE  u  p#Sn[	        S5       H  nX5   nU(       d  M  XFU-  -  nM     [
        R                  " U5      X'   MG     Uq UsSSS5        $ ! , (       d  f       g= f)zAReturn cached L2 norms for all models, keyed by model key string.Nr   r   )r   _MODEL_NORMS_LOCKr4   r8   r   mathsqrt)r$   normsr?   r@   sq_sumivs          r3   _get_model_normsrX      s     	 "$ ,,.JCF5\H1!eOF " 6*EJ )  
		s   ;B*B
Bc                   T    \ rS rSrSrSrS\SS4S jr\S\	\
\
4   SS 4S	 j5       rSrg)
BigramProfile   a  Pre-computed bigram frequency distribution for a data sample.

Computing this once and reusing it across all models reduces per-model
scoring from O(n) to O(distinct_bigrams).

Stores a single ``weighted_freq`` dict mapping bigram index to
*count * weight* (weight is 8 for non-ASCII bigrams, 1 otherwise).
This pre-multiplies the weight during construction so the scoring
inner loop only needs a single dict traversal with no branching.
)
input_norm
weight_sumweighted_freqr&   r   Nc                    [        U5      S-
  nUS::  a  0 U l        SU l        SU l        g0 nSn[        nUR
                  n[        U5       HI  nX   nXS-      n	US-  U	-  n
US:  d  U	S:  a  U" U
S5      U-   X:'   XE-  nM6  U" U
S5      S-   X:'   US-  nMK     X0l        X@l        [        R                  " [        S UR                  5        5       5      5      U l        g)zbCompute the bigram frequency distribution for *data*.

:param data: The raw byte data to profile.
r
   r           Nr      c              3   *   #    U  H	  oU-  v   M     g 7fN .0rW   s     r3   	<genexpr>)BigramProfile.__init__.<locals>.<genexpr>   s     'E}!A}   )lenr^   r]   r\   r   rJ   r   rR   rS   sumr<   )selfr&   total_bigramsfreqw_sumhi_w_getrV   r/   r0   idxs              r3   __init__BigramProfile.__init__   s    
 D	AA13D#$DO%(DO!&xx}%AB!eB7b.CDyBI aL4/	 aL1,	
 & "))C'Et{{}'E$EFrM   r^   c                     U " S5      nXl         [        UR                  5       5      Ul        [        R
                  " [        S UR                  5        5       5      5      Ul        U$ )a#  Create a BigramProfile from pre-computed weighted frequencies.

Computes ``weight_sum`` and ``input_norm`` from *weighted_freq* to
ensure consistency between the three fields.

:param weighted_freq: Mapping of bigram index to weighted count.
:returns: A new :class:`BigramProfile` instance.
rM   c              3   *   #    U  H	  oU-  v   M     g 7frc   rd   re   s     r3   rg   3BigramProfile.from_weighted_freq.<locals>.<genexpr>   s     *Q:PQq5:Pri   )r^   rk   r<   r]   rR   rS   r\   )clsr^   profiles      r3   from_weighted_freq BigramProfile.from_weighted_freq   sS     c( - !5!5!78!YYs*Q-:N:N:P*Q'QRrM   )__name__
__module____qualname____firstlineno____doc__	__slots__bytesrs   classmethoddictintrz   __static_attributes__rd   rM   r3   rZ   rZ      sN    	 >IGU Gt G< tCH~ /  rM   rZ   ry   r@   	model_keyc                 |   U R                   S:X  a  g[        5       nU(       a  UR                  U5      OSnUc>  Sn[        S5       H  nX   nU(       d  M  XWU-  -  nM     [        R
                  " U5      nUS:X  a  gSnU R                  R                  5        H  u  pXU	   U
-  -  nM     XU R                   -  -  $ )zSScore a pre-computed bigram profile against a single model using cosine similarity.r`   Nr   r   )r\   rX   rJ   r   rR   rS   r^   r8   )ry   r@   r   rT   
model_normrU   rV   rW   dotrr   wcounts              r3   score_with_profiler      s     S E)29%JuAAqa%  YYv&
S
C,,224SzF"" 5w11122rM   r&   c                     U (       d  Uc  g[        5       nUR                  U5      nUc  gUc  [        U 5      nSnSnU H  u  pxn	[        X(U	5      n
X:  d  M  U
nUnM     XV4$ )ah  Score data against all language variants of an encoding.

Returns (best_score, best_language). Uses a pre-grouped index for O(L)
lookup where L is the number of language variants for the encoding.

If *profile* is provided, it is reused instead of recomputing the bigram
frequency distribution from *data*.

:param data: The raw byte data to score.
:param encoding: The canonical encoding name to match against.
:param profile: Optional pre-computed :class:`BigramProfile` to reuse.
:returns: A ``(score, language)`` tuple with the best cosine-similarity
    score and the corresponding language code (or ``None``).
N)r`   Nr`   )rG   rJ   rZ   r   )r&   rH   ry   r>   variants
best_score	best_langrA   r@   r   ss              r3   score_best_languager     sz    & GOOEyy"H%J I"*Ywy9>JI	 #+   rM   ) rc   )+r   importlib.resourcesr   rR   r   	threadingr   chardet.registryr   r   r   __annotations__r   r   strr!   Lockr   r   listtupler7   r   floatrQ   r	   r<   _encrj   	languagesr,   r=   _aliasr4   rG   rL   boolrO   rX   rZ   r   r   r   rd   rM   r3   <module>r      s        %  !   ,0d3	>"T) 0NN$ HL
Dd5tY!;<==>E L.."(,d3:% ,NN$  $& $sCx. %OOD
4>>a&*nnQ&7#llF'+~~a'8V$ # @T#y.) @FtCeC$J	3,F&G!HHI 8*S *S4Z *' ' '$sEz* *: :| @B33#,39<3
34 %)&!
&!&! T!&! 5#*	&!rM   