
    iF4                       S r SSKJr  SSKJr  SSKJr  SSKJrJ	r	  SSK
J
r  SSKJrJrJrJrJrJrJrJrJrJrJrJrJrJr  \(       a  SS	KJr  S
r " S S\5      r\" SS9SS j5       r\" SS9SS j5       r \" SS9SS j5       r!\" SS9SS j5       r"\" SS9SS j5       r# " S S\	5      r$\" SS9SS j5       r%            S S jr&  S!       S"S jjr'S#S jr(S$S jr)  S!       S"S jjr*g)%z
Grapheme cluster segmentation following Unicode Standard Annex #29.

This module provides pure-Python implementation of the grapheme cluster boundary algorithm as
defined in UAX #29: Unicode Text Segmentation.

https://www.unicode.org/reports/tr29/
    )annotations)IntEnum)	lru_cache)TYPE_CHECKING
NamedTuple   )bisearch)
GRAPHEME_L
GRAPHEME_T
GRAPHEME_VGRAPHEME_LVINCB_EXTENDINCB_LINKERGRAPHEME_LVTINCB_CONSONANTGRAPHEME_EXTENDGRAPHEME_CONTROLGRAPHEME_PREPENDGRAPHEME_SPACINGMARKEXTENDED_PICTOGRAPHICGRAPHEME_REGIONAL_INDICATOR)Iterator    c                  P    \ rS rSrSrSrSrSrSrSr	Sr
S	rS
rSrSrSrSrSrSrSrg)GCB,   z'Grapheme Cluster Break property values.r   r                        	   
             N)__name__
__module____qualname____firstlineno____doc__OTHERCRLFCONTROLEXTENDZWJREGIONAL_INDICATORPREPENDSPACING_MARKLVTLVLVT__static_attributes__r)       P/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/wcwidth/grapheme.pyr   r   ,   sL    1E	
B	
BGF
CGL	A
A
A	B
Cr>   r   i   )maxsizec                   U S:X  a  [         R                  $ U S:X  a  [         R                  $ U S:X  a  [         R                  $ [	        U [
        5      (       a  [         R                  $ [	        U [        5      (       a  [         R                  $ [	        U [        5      (       a  [         R                  $ [	        U [        5      (       a  [         R                  $ [	        U [        5      (       a  [         R                  $ [	        U [        5      (       a  [         R                   $ [	        U ["        5      (       a  [         R$                  $ [	        U [&        5      (       a  [         R(                  $ [	        U [*        5      (       a  [         R,                  $ [	        U [.        5      (       a  [         R0                  $ [         R2                  $ )z;Return the Grapheme_Cluster_Break property for a codepoint.r(   r%   i   )r   r0   r1   r4   	_bisearchr   r2   r   r3   r   r5   r   r6   r   r7   r
   r8   r   r9   r   r:   r   r;   r   r<   r/   ucss    r?   _grapheme_cluster_breakrE   B   s'   
 f}vv
f}vv
f}ww&''{{o&&zz122%%%&''{{*++j!!uuj!!uuj!!uuk""vvl##ww99r>   c                4    [        [        U [        5      5      $ )z6Check if codepoint has Extended_Pictographic property.)boolrB   r   rC   s    r?   _is_extended_pictographicrH   e   s     	#4566r>   c                4    [        [        U [        5      5      $ )z,Check if codepoint has InCB=Linker property.)rG   rB   r   rC   s    r?   _is_incb_linkerrJ   k        	#{+,,r>   c                4    [        [        U [        5      5      $ )z/Check if codepoint has InCB=Consonant property.)rG   rB   r   rC   s    r?   _is_incb_consonantrM   q   s     	#~.//r>   c                4    [        [        U [        5      5      $ )z,Check if codepoint has InCB=Extend property.)rG   rB   r   rC   s    r?   _is_incb_extendrO   w   rK   r>   c                  .    \ rS rSr% SrS\S'   S\S'   Srg)	BreakResult}   z*Result of grapheme cluster break decision.rG   should_breakintri_countr)   N)r*   r+   r,   r-   r.   __annotations__r=   r)   r>   r?   rQ   rQ   }   s    4Mr>   rQ   c                   U [         R                  :X  a  U[         R                  :X  a
  [        SSS9$ U [         R                  [         R                  [         R                  4;   a
  [        SSS9$ U[         R                  [         R                  [         R                  4;   a
  [        SSS9$ U [         R
                  :X  aL  U[         R
                  [         R                  [         R                  [         R                  4;   a
  [        SSS9$ U [         R                  [         R                  4;   a.  U[         R                  [         R                  4;   a
  [        SSS9$ U [         R                  [         R                  4;   a  U[         R                  :X  a
  [        SSS9$ U[         R                  :X  a
  [        SSS9$ U[         R                  :X  a
  [        SSS9$ U [         R                  :X  a
  [        SSS9$ g)z
Check simple GCB-pair-based break rules (cacheable).

Returns BreakResult for rules that can be determined from GCB properties alone, or None if
complex lookback rules (GB9c, GB11) need to be checked.
Fr   rS   rU   TN)r   r0   r1   rQ   r2   r8   r9   r;   r<   r:   r3   r7   r6   )prev_gcbcurr_gcbs     r?   _simple_break_checkr[      st    366h#&&0:: CKK00q99 CKK00q99 355X#%%)HH:: CFFCEE?"xCEE355>'A:: CGGSUU##CEE(9:: 3:::: 3###:: 3;;:: r>   c                Z   [        X5      nUb  U$ U[        R                  :X  a
  [        SSS9$ [	        X#   5      n[        U5      (       as  SnUS-
  nUS:  af  [	        X(   5      n	[        U	5      (       a  SnUS-  nO9[        U	5      (       a  US-  nO#[        U	5      (       a  U(       a
  [        SSS9$ O	OUS:  a  Mf  U [        R                  :X  ap  [        U5      (       a`  US-
  nUS:  aU  [	        X(   5      n	[        U	5      n
U
[        R                  :X  a  US-  nO[        U	5      (       a
  [        SSS9$ OUS:  a  MU  U [        R                  :X  a4  U[        R                  :X  a   US-  S:X  a  [        SUS-   S9$ [        SSS9$ U[        R                  :X  a  SOSn[        SUS9$ )z
Determine if there should be a grapheme cluster break between prev and curr.

Implements UAX #29 grapheme cluster boundary rules.
Fr   rX   r   Tr   )r[   r   r4   rQ   ordrM   rJ   rO   rH   rE   r3   r5   )rY   rZ   textcurr_idxrU   resultcurr_ucs
has_linkeriprev_ucs	prev_props              r?   _should_breakrf      s    !4F 377::
 4>"H(##
qL1f47|Hx((!
Q **Q#H--&EAFF 1f 3778BBqL1f47|H/9ICJJ&Q*844"BB 1f 3)))h#:P:P.Pa<1EHqLIIq99  6 66qAHD8<<r>   Nc              #    #    U (       d  g[        U 5      nUc  UnX:  d  X:  a  g[        X#5      nUnSn[        [        X   5      5      nU[        R
                  :X  a  Sn[        US-   U5       HM  n[        [        X   5      5      n[        XhXU5      n	U	R                  nU	R                  (       a  XU v   UnUnMO     XU v   g7f)a  
Iterate over grapheme clusters in a Unicode string.

Grapheme clusters are "user-perceived characters" - what a user would
consider a single character, which may consist of multiple Unicode
codepoints (e.g., a base character with combining marks, emoji sequences).

:param unistr: The Unicode string to segment.
:param start: Starting index (default 0).
:param end: Ending index (default len(unistr)).
:yields: Grapheme cluster substrings.

Example::

    >>> list(iter_graphemes('cafe\u0301'))
    ['c', 'a', 'f', 'e\u0301']
    >>> list(iter_graphemes('\U0001F468\u200D\U0001F469\u200D\U0001F467'))
    ['o', 'k', '\U0001F468\u200D\U0001F469\u200D\U0001F467']
    >>> list(iter_graphemes('\U0001F1FA\U0001F1F8'))
    ['o', 'k', '\U0001F1FA\U0001F1F8']

.. versionadded:: 0.3.0
Nr   r   )
lenminrE   r]   r   r5   rangerf   rU   rS   )
unistrstartendlengthcluster_startrU   rY   idxrZ   r`   s
             r?   iter_graphemesrq      s     8 [F
{|u
c
C MH 's6='9:H 3)))UQY$*3v{+;<x6I??s++M % s
##s   CCc                   [        XS-
     5      nUS:X  a  US:  a  XS-
     S:X  a  US-
  $ US:  aR  US:  aG  US:  aA  [        XS-
     5      nUS:  a+  [        U5      [        R                  :X  a  [	        XS-
  5      $ US-
  $ US-
  nUS:  aa  X-
  [
        :  aU  [        X   5      nSUs=::  a  S:  a  O  OO7[        U5      [        R                  :X  a  OUS-  nUS:  a  X-
  [
        :  a  MU  Un[        [        X   5      5      nU[        R                  :X  a  SOSn[        US-   U5       HG  n	[        [        X	   5      5      n
[        XzX	U5      nUR                  nUR                  (       a  U	nU
nMI     U$ )ac  
Find the start of the grapheme cluster containing the character before pos.

Scans backwards from pos to find a safe starting point, then iterates forward using standard
break rules to find the actual cluster boundary.

:param text: The Unicode string.
:param pos: Position to search before (exclusive).
:returns: Start position of the grapheme cluster.
r   r%   r      r   r   )r]   rE   r   r6   _find_cluster_startMAX_GRAPHEME_SCANr2   r5   rj   rf   rU   rS   )r^   pos	target_cpprev_cp
safe_startcpro   left_gcbrU   rc   	right_gcbr`   s               r?   ru   ru   <  s    DqM"I DSAX$Qw-4*?Qw 4!8	T)$Qw-(G$#:7#Cs{{#R*4q99Qw qJ
q.c.2CC!"2"2&#++5a
 q.c.2CC M&s4+;'<=H 6 66qAH:>3'+CL9	xDXF??M ( r>   c           	     N    US::  a  g[        U [        U[        U 5      5      5      $ )a  
Find the grapheme cluster boundary immediately before a position.

:param unistr: The Unicode string to search.
:param pos: Position in the string (0 < pos <= len(unistr)).
:returns: Start index of the grapheme cluster containing the character at pos-1.

Example::

    >>> grapheme_boundary_before('Hello \U0001F44B\U0001F3FB', 8)
    6
    >>> grapheme_boundary_before('a\r\nb', 3)
    1

.. versionadded:: 0.3.6
r   )ru   ri   rh   )rk   rw   s     r?   grapheme_boundary_beforer   p  s&    " axvs3F'<==r>   c              #     #    U (       d  g[        U 5      nUc  UO
[        X#5      n[        US5      nX:  d  X:  a  gUnXA:  a!  [        X5      nXQ:  a  gXU v   UnXA:  a  M   gg7f)ay  
Iterate over grapheme clusters in reverse order (last to first).

:param unistr: The Unicode string to segment.
:param start: Starting index (default 0).
:param end: Ending index (default len(unistr)).
:yields: Grapheme cluster substrings in reverse order.

Example::

    >>> list(iter_graphemes_reverse('cafe\u0301'))
    ['e\u0301', 'f', 'a', 'c']

.. versionadded:: 0.3.6
Nr   )rh   ri   maxru   )rk   rl   rm   rn   rw   ro   s         r?   iter_graphemes_reverser     sq     ( [FK&S%5CqME|u
C
++F8 3'' +s   A A&$A&)rD   rT   returnr   )rD   rT   r   rG   )rY   r   rZ   r   r   zBreakResult | None)rY   r   rZ   r   r^   strr_   rT   rU   rT   r   rQ   )r   N)rk   r   rl   rT   rm   z
int | Noner   zIterator[str])r^   r   rw   rT   r   rT   )rk   r   rw   rT   r   rT   )+r.   
__future__r   enumr   	functoolsr   typingr   r   r	   rB   table_graphemer
   r   r   r   r   r   r   r   r   r   r   r   r   r   collections.abcr   rv   r   rE   rH   rJ   rM   rO   rQ   r[   rf   rq   ru   r   r   r)   r>   r?   <module>r      s   #   , ,: : : : (  ' , 4 D 47 7
 4- -
 40 0
 4- -
*  4- -`@=@=@= @= 	@=
 @= @=J A$A$A$ 
A$ 	A$H1h>0 &&& 
& 	&r>   