
    Ƒi
                       S r SSKJr  SSKrSSKrSSKJrJr  Sr\R                  " S\R                  5      r\R                  " S\R                  5      r\R                  " S\R                  5      rSS	 jrSS
 jrSS jrg)z2Stage 1b: HTML/XML charset declaration extraction.    )annotationsN)DETERMINISTIC_CONFIDENCEDetectionResulti   s*   <\?xml[^>]+encoding\s*=\s*['"]([^'"]+)['"]s,   <meta[^>]+charset\s*=\s*['"]?\s*([^\s'">;]+)s6   <meta[^>]+content\s*=\s*['"][^'"]*charset=([^\s'">;]+)c                     U R                  S5      R                  5       R                  5       n[        R                  " U5        U$ ! [
        [        [        4 a     gf = f)a=  Validate encoding name via codecs and return the lowercased original name.

We use ``codecs.lookup()`` to verify the encoding is recognized by Python,
but return the original (lowercased) name rather than the codec's canonical
name so that common aliases like ``iso-8859-1`` and ``windows-1252`` are
preserved as-is.
asciiN)decodestriplowercodecslookupLookupErrorUnicodeDecodeError
ValueError)nametexts     W/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/chardet/pipeline/markup.py_normalize_encodingr      sV    {{7#))+113d  +Z8 s   AA AAc                
   U (       d  gU S[          n[        [        [        4 H]  nUR	                  U5      nU(       d  M  [        UR                  S5      5      nUc  M<  [        X5      (       d  MN  [        U[        SS9s  $    g)aD  Scan the first bytes of *data* for an HTML/XML charset declaration.

Checks for:

1. ``<?xml ... encoding="..."?>``
2. ``<meta charset="...">``
3. ``<meta http-equiv="Content-Type" content="...; charset=...">``

:param data: The raw byte data to scan.
:returns: A :class:`DetectionResult` with confidence 0.95, or ``None``.
N   )encoding
confidencelanguage)
_SCAN_LIMIT_XML_ENCODING_RE_HTML5_CHARSET_RE_HTML4_CONTENT_TYPE_REsearchr   group_validate_bytesr   r   )dataheadpatternmatchr   s        r   detect_markup_charsetr$   (   sz     D$&79OPt$5*5;;q>:H#(G(G&%7!  Q     c                b     U S[          R                  U5        g! [        [        4 a     gf = f)zCheck that *data* can be decoded under *encoding* without errors.

Only validates the first ``_SCAN_LIMIT`` bytes to avoid decoding a
full 200 kB input just to verify a charset declaration found in the
header.
NFT)r   r   r   r   )r    r   s     r   r   r   G   s:    \k!!(+  , s    ..)r   bytesreturnz
str | None)r    r'   r(   zDetectionResult | None)r    r'   r   strr(   bool)__doc__
__future__r   r   rechardet.pipeliner   r   r   compile
IGNORECASEr   r   r   r   r$   r    r%   r   <module>r2      st    8 "  	 F::6  JJ8"--  BBMM 
">r%   