
    Ƒi*                       % S r SSKJr  SSKrSSKrSSKJr  SRS jr0 S\" SS15      _S	\" S
S15      _S
\" S15      _S\" S15      _S\" S15      _S\" SS15      _S\" SS15      _S\" S15      _S\" S15      _S\" S15      _S\" S15      _S\" S15      _S\" 1 Sk5      _S\" S S!15      _S"\" S#15      _S$\" S15      _S%\" S&15      _\" S'15      \" S(15      \" S)15      \" S*15      \" S+15      \" S,15      S-.Er	S.\
S/'   S0S1S0S2S3S4S5S6S7S8S9S8S:.rS;\
S<'       SSS= jrS>rS?\
S@'   SArS?\
SB'   STSC jr\" 5       rS.\
SD'   SUSE jr\	R%                  5        V Vs0 s H  u  p\" U 5      \" SF U 5       5      _M     snn rS.\
SG'   STSH jr\" 5       rS.\
SI'   SVSJ jrSWSK jr\" SLSM15      rSN\
SO'   SXSP jr        SYSQ jrgs  snn f )ZaA  Encoding equivalences and legacy name remapping.

This module defines:

1. **Directional supersets** for accuracy evaluation: detecting a superset
   encoding when the expected encoding is a subset is correct (e.g., detecting
   utf-8 when expected is ascii), but not the reverse.

2. **Bidirectional equivalents**: groups of encodings where detecting any
   member when another member was expected is considered correct.  This
   includes UTF-16/UTF-32 endian variants (which encode the same text with
   different byte order) and ISO-2022-JP branch variants (which are
   compatible extensions of the same base encoding).

3. **Preferred superset mapping** for the ``should_rename_legacy`` API option:
   replaces detected ISO/subset encoding names with their Windows/CP superset
   equivalents that modern software actually uses.
    )annotationsN)DetectionDictc                     [         R                  " U 5      R                  $ ! [         a3    U R	                  5       R                  SS5      R                  SS5      s $ f = f)zNormalize encoding name for comparison.

:param name: The encoding name to normalize.
:returns: The canonical codec name, or a lowered/stripped fallback.
- _)codecslookupnameLookupErrorlowerreplace)r   s    T/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/chardet/equivalences.pynormalize_encoding_namer      sQ    >}}T"''' >zz|##C,44S"==>s   " :AAasciizutf-8zwindows-1252tis-620iso-8859-11cp874gb2312gb18030gbkbig5	big5hkscscp950	shift_jiscp932shift_jis_2004zshift-jisx0213zeuc-jpzeuc-jis-2004zeuc-jisx0213euc-krcp949cp037cp1140ziso-2022-jp>   iso2022-jp-2iso2022-jp-extiso2022-jp-2004ziso2022-jp-1r"   r#   ziso2022-jp-3r$   
iso-8859-1
iso-8859-2zwindows-1250zwindows-1251zwindows-1256zwindows-1253zwindows-1255zwindows-1254zwindows-1257)
iso-8859-5
iso-8859-6
iso-8859-7
iso-8859-8
iso-8859-9iso-8859-13dict[str, frozenset[str]]	SUPERSETSzWindows-1252CP949zWindows-1250zWindows-1251zWindows-1256zWindows-1253zWindows-1255zWindows-1254CP874zWindows-1257)r   r   r%   r&   r'   r(   r)   r*   r+   r   r,   r   zdict[str, str]PREFERRED_SUPERSETc                    U R                  S5      n[        U[        5      (       a'  [        R                  UR	                  5       U5      U S'   U $ )a#  Replace the encoding name with its preferred Windows/CP superset.

Modifies the ``"encoding"`` value in *result* in-place and returns *result*
for fluent chaining.

:param result: A detection result dict containing an ``"encoding"`` key.
:returns: The same *result* dict, modified in-place.
encoding)get
isinstancestrr1   r   )resultencs     r   apply_legacy_renamer9   b   sA     **Z
 C#s/33CIIKEzM    ))zutf-16z	utf-16-lez	utf-16-be)zutf-32z	utf-32-lez	utf-32-be)r"   r$   r#   ztuple[tuple[str, ...], ...]BIDIRECTIONAL_GROUPS))skcs)ukrubgbe)msid)nodasvLANGUAGE_EQUIVALENCESc                 T    0 n [          H  n[        U5      nU H  nX U'   M	     M     U $ )zBBuild a lookup: ISO code -> frozenset of all equivalent ISO codes.)rG   	frozenset)r7   group	group_setcodes       r   _build_language_equiv_indexrM      s3    (*F&e$	D$4L  ' Mr:   _LANGUAGE_EQUIVc                V    X:X  a  g[         R                  U 5      nUSL=(       a    X;   $ )av  Check whether *detected* is an acceptable language for *expected*.

Returns ``True`` when *expected* and *detected* are the same ISO 639-1
code, or belong to the same equivalence group in
:data:`LANGUAGE_EQUIVALENCES`.

:param expected: Expected ISO 639-1 language code.
:param detected: Detected ISO 639-1 language code.
:returns: ``True`` if the languages are equivalent.
TN)rN   r4   )expecteddetectedrJ   s      r   is_language_equivalentrR      s0     )E2!22r:   c              3  8   #    U  H  n[        U5      v   M     g 7fNr   ).0ss     r   	<genexpr>rX      s      /,5q""I   _NORMALIZED_SUPERSETSc                 t    0 n [          H+  n[        S U 5       5      nU H  nX [        U5      '   M     M-     U $ )z1Build the bidirectional equivalence lookup index.c              3  8   #    U  H  n[        U5      v   M     g 7frT   rU   )rV   ns     r   rX   %_build_bidir_index.<locals>.<genexpr>   s     Eu!2155urY   )r;   rI   r   )r7   rJ   normedr   s       r   _build_bidir_indexr`      s@    (*F%EuEED4:*401  & Mr:   _NORMALIZED_BIDIRc                    U c  USL $ Uc  g[        U 5      n[        U5      nX#:X  a  gU[        ;   a  U[        U   ;   a  gU[        ;   =(       a    U[        U   ;   $ )a  Check whether *detected* is an acceptable answer for *expected*.

Acceptable means:

1. Exact match (after normalization), OR
2. Both belong to the same bidirectional byte-order group, OR
3. *detected* is a known superset of *expected*.

:param expected: The expected encoding name, or ``None`` for binary files.
:param detected: The detected encoding name, or ``None``.
:returns: ``True`` if the detection is acceptable.
NFT)r   ra   rZ   )rP   rQ   norm_expnorm_dets       r   
is_correctre      s{     4&x0H&x0H  $$5Fx5P)P 	)) 	8-h77r:   c                `    [         R                  " SU 5      nSR                  S U 5       5      $ )z4NFKD-normalize *text* and strip all combining marks.NFKDr   c              3  `   #    U  H$  n[         R                  " U5      (       a  M   Uv   M&     g 7frT   )unicodedata	combining)rV   cs     r   rX   #_strip_combining.<locals>.<genexpr>   s     Cd+*?*?*B11ds   .	.)ri   	normalizejoin)textnfkds     r   _strip_combiningrq      s)      .D77CdCCCr:   )   ¤   €)rs   rr   zfrozenset[tuple[str, str]]_EQUIVALENT_SYMBOL_PAIRSc                T    X:X  a  gX4[         ;   a  g[        U 5      [        U5      :H  $ )u   Return True if characters *a* and *b* are functionally equivalent.

Equivalent means:
- Same character, OR
- Same base letter after stripping combining marks, OR
- An explicitly listed symbol equivalence (e.g. ¤ ↔ €)
T)rt   rq   )abs     r   _chars_equivalentrx      s0     	v	v))A"21"555r:   c           	     4   Uc  USL $ Uc  g[        U5      n[        U5      nX4:X  a  g U R                  U5      nU R                  U5      nXV:X  a  g[	        U5      [	        U5      :w  a  g[        S [        XVSS9 5       5      $ ! [        [        4 a     gf = f)uU  Check whether *detected* produces functionally identical text to *expected*.

Returns ``True`` when:

1. *detected* is not ``None`` and both encoding names normalize to the same
   codec, OR
2. Decoding *data* with both encodings yields identical strings, OR
3. Every differing character pair is functionally equivalent: same base
   letter after stripping combining marks, or an explicitly listed symbol
   equivalence (e.g. ¤ ↔ €).

Returns ``False`` if *detected* is ``None``, either encoding is unknown,
or either encoding cannot decode *data*.

:param data: The raw byte data that was detected.
:param expected: The expected encoding name, or ``None`` for binary files.
:param detected: The detected encoding name, or ``None``.
:returns: ``True`` if decoding with *detected* yields functionally identical
    text to decoding with *expected*.
NFTc              3  <   #    U  H  u  p[        X5      v   M     g 7frT   )rx   )rV   rv   rw   s      r   rX   *is_equivalent_detection.<locals>.<genexpr>4  s     X3W41 &&3Ws   )strict)r   decodeUnicodeDecodeErrorr   lenallzip)datarP   rQ   rc   rd   text_exptext_dets          r   is_equivalent_detectionr     s    . 4&x0H&x0H;;x(;;x( 
8}H%X3xRV3WXXX , s   "B BB)r   r6   returnr6   )r7   r   r   r   )r   r-   )rP   r6   rQ   r6   r   bool)rP   
str | NonerQ   r   r   r   )ro   r6   r   r6   )rv   r6   rw   r6   r   r   )r   bytesrP   r   rQ   r   r   r   )__doc__
__future__r   r	   ri   chardet.pipeliner   r   rI   r.   __annotations__r1   r9   r;   rG   rM   rN   rR   itemsrZ   r`   ra   re   rq   rt   rx   r   )subset	supersetss   00r   <module>r      s  & #   *	>*(Y01(y-12( 9gY'( i$	(
 
9i[!( I{G,-( G%567( i!1 23( i()( I~./( i	"( Yz"( 9RS( I~/?@A(  I012!($ )^,-%(& )^,-'(( ^,-^,-^,-^,-^,-n-.3(	$ D        !& N  $5 1 6 2 " .I-J* J3, '__.	4 / F#Y /,5/ &  /	4 0  0B/C , C FD 8A8 4 6 .Y
.Y%.Y1;.Y	.Yq4s   "G