
    TAi                       S r SSKJr  SSKrSSKJr  SSKrSSKJ	r	  SSKJ
r
  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJr  SSKJr  SSKJr  SSKJr  SSKJr  SSKJr  SSKJr  SSKJr  SrSr " S S5      rS r      S$S jr        S%S jrS&S jr           S'S jr! " S S5      r" " S  S!\5      r# " S" S#\5      r$g)(z'Implementation of network table parser.    )annotationsN)Any)LTTextLineHorizontal)LTTextLineVertical   )ALL_ALIGNMENTS)HORIZONTAL_ALIGNMENTS)VERTICAL_ALIGNMENTS)TextAlignments)bbox_from_str)bbox_from_textlines)boundaries_to_split_lines)find_columns_boundaries)text_in_bbox)text_in_bbox_per_axis)textlines_overlapping_bbox   )TextBaseParser      c                  &    \ rS rSrSrSrSS jrSrg)TextLine!   ap  A placeholder class to represent a text line with bounding box attributes.

Attributes
----------
x0 : float
    The x-coordinate of the left edge of the text line.
x1 : float
    The x-coordinate of the right edge of the text line.
y0 : float
    The y-coordinate of the bottom edge of the text line.
y1 : float
    The y-coordinate of the top edge of the text line.
x0y0x1y1c                4    Xl         X l        X0l        X@l        g Nr   )selfr   r   r   r   s        S/var/www/html/land-ocr/venv/lib/python3.13/site-packages/camelot/parsers/network.py__init__TextLine.__init__2   s        )r   r   r   r   N)r   floatr   r&   r   r&   r   r&   )__name__
__module____qualname____firstlineno____doc__	__slots__r#   __static_attributes__ r%   r"   r   r   !   s     )Ir%   r   c                    SnU[        U5      :  a&  X#   U :  a  US-  nU[        U5      :  a
  X#   U :  a  M  UnU[        U5      :  a&  X$   U:  a  US-  nU[        U5      :  a
  X$   U:  a  M  XC-
  $ )z=Get the number of columns crossed by a segment [left, right].r   r   len)leftrightcol_anchors
index_leftindex_rights        r"   column_spreadr7   9   s    J
s;'
'K,Cd,Ja
 s;'
'K,Cd,JK
K(
([-E-Mq K(
([-E-M ##r%   c                   SnSnSnSnU u  pgpU GH8  n
U
R                   U:  aE  U
R                  U	:  d  U
R                  U:  a  M6  Ub  UR                   U
R                   :  a  U
nMW  MY  XR                  :  aE  U
R                  U	:  d  U
R                  U:  a  M  Ub  UR                  U
R                  :  a  U
nM  M  U
R                  U:  d  U
R                   U:  a  M  U
R                  U:  a%  Ub  UR                  U
R                  :  a  U
nGM  GM  XR                  :  d  GM  Ub  UR                  U
R                  :  d  GM6  U
nGM;     UUUUS.$ )a  Search for textlines that are closest to the bounding box but outside in all four directions.

Parameters
----------
bbox : list of float
    A list containing the coordinates of the bounding box in the order
    [left, bottom, right, top].
tls : list of TextLine
    A list of textline objects to search for the closest lines.

Returns
-------
dict
    A dictionary with keys "left", "right", "top", and "bottom",
    each mapping to the closest textline object in that direction or None if not found.
N)r2   r3   topbottom)r   r   r   r   )bboxtlsr2   r3   r9   r:   	bbox_leftbbox_bottom
bbox_rightbbox_toptextlines              r"   find_closest_tlsrB   E   s4   & >BD>BE<@C?CF592YZ;;"{{X%{)B|tww4  5++%{{X%{)B}8;; 6  !7 {{Z'8;;+B{{[(>VYY%<%F &=KK';#&&8;;"6"C1 6 	 r%   c                b   / nSnU(       av  SnU R                  5        HW  nUR                  U:  d  M  UR                  U5        U R                  U5        UR                  U:  d  MI  UR                  nSnMY     U(       a  Mv  U Vs/ s H  oUR
                  UR                  /PM     snU4$ s  snf )aW  Extract zones from the textlines above the body bbox.

Parameters
----------
all_above : List[Any]
    Textlines that are above the bounding box.
max_v_gap : float
    The maximum vertical gap allowed.
top : float
    The current top boundary.

Returns
-------
Tuple[List[List[float]], float]
    The extracted zones and the new top boundary.
TF)copyr   appendremover   r   r   )	all_above	max_v_gapr9   tls_in_new_row	pushed_uprA   s         r"   _extract_zonesrK      s    & NI
	 ^^ {{S %%h/  *;;$ #++C $I 	 )  8FF~8[[(++&~FKKFs   !B,c                    U R                  S S9  / nU  HG  nU(       a  US   S   US   :  a  UR                  U5        M,  [        US   S   US   5      US   S'   MI     U$ )zMerge overlapping zones into consolidated zones.

Parameters
----------
zones : List[List[float]]
    A list of zones defined by their x-coordinates.

Returns
-------
List[List[float]]
    A list of merged zones.
c                    U S   $ Nr   r.   )zs    r"   <lambda>_merge_zones.<locals>.<lambda>   s    QqTr%   keyr   r   )sortrE   max)zonesmerged_zoneszones      r"   _merge_zonesrZ      ss     
JJ>J"&(L|B/2T!W<%"%l2&6q&947"CLQ	  r%   c           	       ^ U nU u  pVpx/ n	Sn
U
(       Ga\  Sn
Sn/ nU H  nSUR                   UR                  -   -  nUR                  U:  d  M1  X^s=:  a  U:  d  M?  O  MC  UR                  U5        SnU Vs/ s HD  nUR                  U:  d  M  USUR                   UR                  -   -  s=:  a  U:  d  M>  O  MB  UPMF     nnU(       d  M  [	        US S9nM     U(       a  UR                  X-   :  ar  [        XUR                  5      u  p[        U	5      n[        U4S jU 5       5      nU[	        [        [        R                  " [        T5      S-  5      5      ::  a  XVXx4nSn
U
(       a  GM\  U$ s  snf )	a  Expand a bounding box (bbox) vertically by looking for plausible headers.

The core algorithm is based on fairly strict alignment of text. It works
for the table body but might fail on table headers since they tend to be
in a different font, alignment (e.g., vertical), etc. This method evaluates
the area above the table body's bbox for characteristics of a table header:
close to the top of the body, with cells that fit within the horizontal bounds identified.

Parameters
----------
body_bbox : Tuple[float, float, float, float]
    The bounding box of the body in the format (left, bottom, right, top).
textlines : List[Any]
    A list of textline objects, each with properties x0, x1, y0, and y1.
col_anchors : List[float]
    A list of x-coordinates representing column anchors.
max_v_gap : float
    The maximum vertical gap allowed to consider a header plausible.

Returns
-------
Tuple[float, float, float, float]
    The expanded bounding box in the format (left, bottom, right, top).
TFNg      ?c                    U R                   $ r    r   )tls    r"   rP   .search_header_from_body_bbox.<locals>.<lambda>   s    "%%r%   rR   c              3  J   >#    U  H  n[        US    US   T5      v   M     g7f)r   r   N)r7   ).0rY   r4   s     r"   	<genexpr>/search_header_from_body_bbox.<locals>.<genexpr>  s(      IUd1gtAw<<s    #r   )r   r   r   rE   minrK   r   rZ   rV   MAX_COL_SPREAD_IN_HEADERmathceilr1   )	body_bbox	textlinesr4   rH   new_bboxr2   r:   r3   r9   rW   keep_searchingclosest_aboverG   rA   textline_centerr^   rX   
max_spreads     `               r"   search_header_from_body_bboxro      s   < H!*T5!EN
 	!H!X[[8;;%>?O{{S T%Ce%C%C  * $ ('uus{ '+cRUURUU].C'Ke'K 'K '   9$'	7G$HM " ]--? (	m>N>NOJE'.L IU J S($))C4Dq4H*I  !%5 "&g .h OQs   7E3%E38E3<E3c                  R    \ rS rSrSrS rS rS rSS jrS r	S	 r
S
 rS rS rSrg)AlignmentCounteri  z
For a given textline, represent all other textlines aligned with it.

A textline can be vertically aligned with others if their bbox match on
left, right, or middle coord, and horizontally aligned if they match top,
bottom, or center coord.

c                J    0 U l         [         H  n/ U R                   U'   M     g r    )alignment_to_occurrencesr   )r!   	alignments     r"   r#   AlignmentCounter.__init__'  s$    (*%'I79D)))4 (r%   c                     U R                   U   $ )z/Get the value of a property to the given value.rs   )r!   rS   s     r"   __getitem__AlignmentCounter.__getitem__,  s    ,,S11r%   c                "    X R                   U'   U$ )z/Set the value of a property to the given value.rw   )r!   rS   values      r"   __setitem__AlignmentCounter.__setitem__0  s    -2%%c*r%   Nc                   ^  U=(       d    T R                   R                  5       n[        U 4S jU5      n[        US S9$ )z=Get the alignment dimension with the max number of textlines.c                &   > U TR                   U    4$ r    rw   )alignment_idr!   s    r"   rP   1AlignmentCounter.max_alignments.<locals>.<lambda>9  s    --l;"r%   c                    [        U S   5      $ Nr   r0   )items    r"   rP   r   ?  s    Sa\r%   rR   )rs   keysmaprV   )r!   alignment_idsalignment_itemss   `  r"   max_alignmentsAlignmentCounter.max_alignments5  s@    %M)F)F)K)K)M 
 ?(ABBr%   c                ,    U R                  [        5      $ )z8Tuple (alignment_id, textlines) of largest vertical row.)r   r	   r!   s    r"   max_vAlignmentCounter.max_vA  s    
 ""#899r%   c                ,    U R                  [        5      $ )z:Tuple (alignment_id, textlines) of largest horizontal col.)r   r
   r   s    r"   max_hAlignmentCounter.max_hH  s    ""#677r%   c                :    [        U R                  5       S   5      $ )zuMaximum vertical count.

Return the maximum number of alignments along
one of the vertical axis (left/right/middle).
r   )r1   r   r   s    r"   max_v_countAlignmentCounter.max_v_countL       4::<?##r%   c                :    [        U R                  5       S   5      $ )zyMaximum horizontal count.

Return the maximum number of alignments along
one of the horizontal axis (bottom/top/center).
r   )r1   r   r   s    r"   max_h_countAlignmentCounter.max_h_countT  r   r%   c                P    U R                  5       S-
  U R                  5       S-
  -  $ )zReturn the alignment score.

We define the alignment score of a textline as the product of the
number of aligned elements - 1. The -1 is to avoid favoring
singletons on a long line.
r   )r   r   r   s    r"   alignment_score AlignmentCounter.alignment_score\  s+       "Q&4+;+;+=+ABBr%   rw   r    )r'   r(   r)   r*   r+   r#   rx   r|   r   r   r   r   r   r   r-   r.   r%   r"   rq   rq     s6    :
2

C:8$$Cr%   rq   c                     ^  \ rS rSrSrU 4S jrS rS rS rS r	S r
S	 r      SS
 jr              SS jr        SS jr        SS jr        SS jr      SS jrSS jrSrU =r$ )TextNetworksif  zText elements connected by vertical AND horizontal alignments.

The alignment dict has six keys based on the hor/vert alignments,
and each key's value is a list of camelot.core.TextAlignment objects.
c                :   > [         TU ]  [        5        0 U l        g r    )superr#   r   _textline_to_alignments)r!   	__class__s    r"   r#   TextNetworks.__init__m  s    ( (*$r%   c                &    UR                  X25        g r    )register_aligned_textline)r!   rt   coordrA   s       r"   _update_alignmentTextNetworks._update_alignments  s    ++H<r%   c                    U HA  n[        UR                  5       R                  5       5      S:  d  M0  U R                  U5        MC     g)zCAdd all textlines to our network repository to identify alignments.r   N)r1   get_textstrip_register_textline)r!   ri   rA   s      r"   _register_all_text_lines%TextNetworks._register_all_text_linesv  s;     "H8$$&,,./!3''1 "r%   c                   U R                   R                  5        Hf  u  pU H[  nUR                   HH  nU R                  R	                  US5      nUc  [        5       nXPR                  U'   UR                  XQ'   MJ     M]     Mh     g)z0Build a dictionary textline -> alignment object.N)_text_alignmentsitemsri   r   getrq   )r!   align_id	textedgestextedgerA   
alignmentss         r"   _compute_alignment_counts&TextNetworks._compute_alignment_counts}  s{    #'#8#8#>#>#@H% ( 2 2H!%!=!=!A!A(D!QJ!)%5%7
AK44X>+3+=+=J( !3 & $Ar%   c                   SnU(       a  SnU R                   R                  5        H  nU H  n/ n[        [        UR                  5      5       H\  nUR                  U   nU R
                  U   nUR                  5       S::  d  UR                  5       S::  d  MK  UR                  U5        M^     [        U5       H  nUR                  U	 SnM     M     M     0 U l        U R                  5         U(       a  M  gg)zRemove elements which are only connected on one dimension.

Elements should be connected to others both vertically
and horizontally.
TFr   N)r   valuesranger1   ri   r   r   r   rE   reversedr   )	r!   removed_singletonstext_alignmentstext_alignment	to_removeirA   r   indexs	            r"   remove_unconnected_edges%TextNetworks.remove_unconnected_edges  s     " !&#'#8#8#?#?#A '6N "I"3~'?'?#@A#1#;#;A#>%)%A%A(%K
 '2249)5571<%,,Q/ B "*)!4*44U;-1* "5! '6 $B0 ,.D(**,9 ! r%   c                R   ^  [        T R                  R                  5       U 4S jSS9$ )z-Retrieve the textline that is most connected.c                p   > TR                   U    R                  5       U R                  * U R                  * 4$ r    )r   r   r   r   )rA   r!   s    r"   rP   6TextNetworks.most_connected_textline.<locals>.<lambda>  s1    ,,X6FFH"r%   N)rS   default)rV   r   r   r   s   `r"   most_connected_textline$TextNetworks.most_connected_textline  s.     ((--/
 
 	
r%   c           	     <   U R                  5       nUc  gU R                  R                  U5      nUc  gUR                  5       u  p4UR	                  5       u  p5[        U5      S::  d  [        U5      S::  a  g[        US S9n[        US S9n[        R                  " [        S[        U5      5       Vs/ s H#  nXh   R                  XhS-
     R                  -
  PM%     sn5      n	[        R                  " [        S[        U5      5       Vs/ s H#  nXx   R                  XxS-
     R                  -
  PM%     sn5      n
U	R                  S:X  d  U
R                  S:X  a  gS[        R                  " U	S5      -  S[        R                  " U
S5      -  4nU$ s  snf s  snf )	zEvaluate plausible gaps between cells.

Both horizontally and vertically
based on the textlines aligned with the most connected textline.

Returns
-------
gaps_hv : tuple
    (horizontal_gap, vertical_gap) in pdf coordinate space.
Nr   c                    U R                   $ r    )r   rA   s    r"   rP   5TextNetworks.compute_plausible_gaps.<locals>.<lambda>      8;;r%   rR   c                    U R                   $ r    r]   r   s    r"   rP   r     r   r%   r   g       @K   )r   r   r   r   r   r1   sortednparrayr   r   r   size
percentile)r!   most_aligned_tlbest_alignment__ref_h_textlinesref_v_textlinesh_textlinesv_textlinesr   h_gapsv_gapsgaps_hvs               r"   compute_plausible_gaps#TextNetworks.compute_plausible_gaps  s    668" 5599/J! -224,224 1$O(<(A _2NO_2NO  q#k"233A !!KA$6$9$993
  q#k"233A !!KA$6$9$993
 ;;!v{{a/ "--++"--++

 -s   4*F*Fc                    U R                  5       nUu  pESnUb  UU/ S.nUR                  U5        UR                  UR                  UR                  UR
                  /n[        U R                  R                  5       5      nUR                  U5        U/n	Sn
UR                  UR                  4/nX:w  aD  Ub  US   R                  U5        Un
[        Xx5      nU R                  UUUUUU	5      u  p{pX:w  a  MD  [        U	5      [        :  a  U$ g)a  Build a candidate bounding box for the body of a table using network algorithm.

Parameters
----------
gaps_hv : tuple of float
    The maximum distance allowed to consider surrounding lines/columns
    as part of the same table.
parse_details : list
    Optional parameter list, in which to store extra information
    to help later visualization of the table creation.

Returns
-------
list of float or None
    The bounding box of the table body as a list of four floats
    [x0, y0, x1, y1] or None if not enough textlines are found.
N)	max_h_gaprH   
iterationsr   )r   rE   r   r   r   r   listr   r   rF   rB   expand_bboxr1   MINIMUM_TEXTLINES_IN_TABLE)r!   r   parse_detailsr   r   rH   parse_details_searchr;   tls_search_spacetls_in_bbox	last_bboxlast_cols_boundsclosest_tlss                r"   search_table_bodyTextNetworks.search_table_body  s<   , 668&	6:$&& $ 
   !56 	
   < < A A CD0&'	,//1C1CDE#/$\299$?I*4BKDHDTDT  EADK  {99Kr%   c                   UR                  5       nUR                  5        H  u  pU	b  U R                  XyXH5      (       d  M!  U R                  XyU5      n
[	        X5      nX-   nU R                  XU5      (       d  M[  [        [        U5      5      =p[        U5      nUR                  U5        U R                  X;5        M     XXc4$ )aL  Expand the bounding box based on closest textlines.

Parameters
----------
bbox : list of float
    The current bounding box.
closest_tls : dict
    The closest textlines found.
tls_search_space : list
    The list of textlines available for searching.
gaps_hv : tuple of float
    The maximum allowed horizontal and vertical gaps.
last_cols_bounds : list of tuple
    The boundaries of the last found columns.
tls_in_bbox : list
    The textlines currently in the bounding box.

Returns
-------
tuple
    The updated bounding box, column boundaries, textlines in bbox, and search space.
)rD   r   can_expand_bboxget_expanded_bboxr   is_valid_expansionr   r   r   extendupdate_search_space)r!   r;   r   r   r   r   r   	cand_bbox	directionrA   expanded_cand_bboxnew_tlstls_in_new_boxs                r"   r   TextNetworks.expand_bbox=  s    > IIK	#.#4#4#6It';';W( ( !%!7!7	Y!W"#5HG$2N**9FVWW#$7$GHHD6~Fw'$$%5?! $7$ {DDr%   c                    US:X  a  US   UR                   -
  US   :*  $ US:X  a  UR                  US   -
  US   :*  $ US:X  a  US   UR                  -
  US   :*  $ US:X  a  UR                  US   -
  US   :*  $ g	)
a  Check if the bounding box can be expanded in the given direction.

Parameters
----------
cand_bbox : list of float
    The candidate bounding box.
textline : Any
    The textline to check against.
gaps_hv : tuple of float
    The maximum allowed horizontal and vertical gaps.
direction : str
    The direction to check for expansion.

Returns
-------
bool
    True if the bounding box can be expanded, otherwise False.
r2   r   r3   r   r:   r   r9   r   F)r   r   r   r   )r!   r   rA   r   r   s        r"   r   TextNetworks.can_expand_bboxr  s    6 Q<(++-;;'!;;1-;;("Q<(++-;;%;;1-;;r%   c                    UR                  5       nUS:X  a  UR                  US'   U$ US:X  a  UR                  US'   U$ US:X  a  UR                  US'   U$ US:X  a  UR                  US'   U$ )	aM  Get the expanded bounding box based on the textline in the specified direction.

Parameters
----------
cand_bbox : list of float
    The candidate bounding box.
textline : Any
    The textline to expand the bounding box with.
direction : str
    The direction to expand.

Returns
-------
list of float
    The expanded bounding box.
r2   r   r3   r   r:   r   r9   r   )rD   r   r   r   r   )r!   r   rA   r   r   s        r"   r   TextNetworks.get_expanded_bbox  s    & '^^-$,KKq! "! '!$,KKq!
 "!	 ("$,KKq! "! %$,KKq!!!r%   c                h    [        U5      nUS;   =(       a    [        U5      [        U5      :  (       + $ )aG  Check if the new expansion is valid.

Parameters
----------
direction : str
    The direction of expansion.
tls_in_new_box : list
    The textlines in the new bounding box.
last_cols_bounds : list of tuple
    The boundaries of the last found columns.

Returns
-------
bool
    True if the expansion is valid, otherwise False.
)r:   r9   )r   r1   )r!   r   r   r   cols_boundss        r"   r   TextNetworks.is_valid_expansion  s6    , .n=**Ws;/?#FVBW/W
 	
r%   c                ^    [        [        U5      S-
  SS5       H  nX   nXB;   d  M  X	 M     g)zUpdate the search space by removing textlines in the new bounding box.

Parameters
----------
tls_search_space : list
    The current search space of textlines.
new_tls : list
    The new textlines added to the bounding box.
r   rT   N)r   r1   )r!   r   r   r   rA   s        r"   r    TextNetworks.update_search_space  s6     s+,q0"b9A'*H"$' :r%   c                F    U R                  U5        U R                  5         g)zGenerate the text edge dictionaries based on the input textlines.

Parameters
----------
textlines : list
    List of textline objects to be processed.
N)r   r   )r!   ri   s     r"   generateTextNetworks.generate  s     	%%i0&&(r%   )r   )r   tuple[float, float]r   zlist[Any] | Nonereturnzlist[float] | None)r;   list[float]r   zdict[str, Any]r   	list[Any]r   r	  r   r  r   r  r
  z3tuple[list[float], list[Any], list[Any], list[Any]])r   r  rA   r   r   r	  r   str)r   r  rA   r   r   r  r
  r  )r   r  r   r  r   r  r
  bool)r   r  r   r  r
  None)ri   r  r
  r  )r'   r(   r)   r*   r+   r#   r   r   r   r   r   r   r   r   r   r   r   r   r  r-   __classcell__r   s   @r"   r   r   f  sH   *=2	>%-N
 :x@$@ (@ 
	@D3E3E $3E $	3E
 %3E $3E 3E 
=3Ej## # %	#
 #J"$"03"@C"	"<

 "
 $	

 

6( )(4=(	("	) 	)r%   r   c                  h   ^  \ rS rSrSr          SU 4S jjrS rS rS rS r	S r
S	 rS
rU =r$ )Networki  a  Network method looks for spaces between text to parse the table.

If you want to specify columns when specifying multiple table
areas, make sure that the length of both lists are equal.

Parameters
----------
table_regions : list, optional (default: None)
    List of page regions that may contain tables of the form x1,y1,x2,y2
    where (x1, y1) -> left-top and (x2, y2) -> right-bottom
    in PDF coordinate space.
table_areas : list, optional (default: None)
    List of table area strings of the form x1,y1,x2,y2
    where (x1, y1) -> left-top and (x2, y2) -> right-bottom
    in PDF coordinate space.
columns : list, optional (default: None)
    List of column x-coordinates strings where the coordinates
    are comma-separated.
split_text : bool, optional (default: False)
    Split text that spans across multiple cells.
flag_size : bool, optional (default: False)
    Flag text based on font size. Useful to detect
    super/subscripts. Adds <s></s> around flagged text.
strip_text : str, optional (default: '')
    Characters that should be stripped from a string before
    assigning it to a cell.
edge_tol : int, optional (default: 50)
    Tolerance parameter for extending textedges vertically.
row_tol : int, optional (default: 2)
    Tolerance parameter used to combine text vertically,
    to generate rows.
column_tol : int, optional (default: 0)
    Tolerance parameter used to combine text horizontally,
    to generate columns.

c                4   > [         TU ]  SUUUUUUUUU	U
S9  g )Nnetwork)
table_regionstable_areascolumns	flag_size
split_text
strip_textedge_tolrow_tol
column_toldebug)r   r#   )r!   r  r  r  r  r  r  r  r  r  r  kwargsr   s               r"   r#   Network.__init__  s8     	'#!!! 	 	
r%   c                   U R                  5       n[        U R                  5       5      nU(       a  [        U5      O/ n[        5       n0 U l        U R
                  b-  / U R
                  S'   / U R
                  S'   / U R
                  S'   U(       a  U R                  X5      u  pVUc  g [        XS5      nU(       d  g [        U5      n[        U5      n	U R                  UUUUU	U5      n
[        U
[        5      (       a  [        U
5      n
UUU	U
S.nXR                  U
'   U R
                  b  U R
                  S   R                  U5        UR                  U5        U Vs/ s H  oU;  d  M
  UPM     nnU(       d  g U(       a  M  g g s  snf )Nnetwork_searchesbbox_searchescol_searches)	bbox_bodycols_boundariescols_anchors	bbox_full)_get_user_provided_bboxesr   _get_filtered_textlinessettable_bbox_parsesr   _get_bbox_bodyr   r   r   _get_full_bbox
isinstancetuplerE   update)r!   user_provided_bboxesfiltered_textlinesri   textlines_processedr&  r   r   r'  r(  r)  table_parser^   s                r"   _generate_table_bboxNetwork._generate_table_bbox/  s   #==?!((*
 1CD+,	 E 	 "$)57D1224D/13D~.!%!4!45I!UI 4YJK5kBO4_EL++$I )T**!),	 '#2 ,&	K 1<""9-!!-"">299+F  &&{3&/Qi=P3PiIQ Y iP Rs   	F(Fc                   Ub!  [        U5      S:  a  UR                  5       S 4$ [        5       nUR                  U5        UR	                  5         UR                  5       nUc  gUS   U R                  c  US   OU R                  4nUR                  UU R                  (       a  U R                  S   OS S9nU R                  b  U R                  S   R                  U5        Xd4$ )Nr   )NNr   r$  )r   r#  )
r1   popr   r  r   r   r  r   r   rE   )r!   r3  ri   text_networkr   edge_tol_hvr&  s          r"   r.  Network._get_bbox_bodyn  s    +'(1,(,,. 
 $~i(--/557? AJ--/GAJT]]
 !227;7I7I""?3t 3 
	 )1299,G!!r%   c                n    Ub  [        U5      S:  a  U$  U$ [        U5      nUb  [        XtXVS   5      $ U$ )Nr   r   )r1   r   ro   )r!   r3  r&  r   ri   r(  r   bbox_body_from_tlss           r"   r/  Network._get_full_bbox  s\      +'(1,   -  "5[!A!-3&<  r%   c                    U R                   U R                  -    Vs/ s H2  n[        UR                  5       R	                  5       5      S:  d  M0  UPM4     nnU R                  U5      $ s  snf rN   )horizontal_textvertical_textr1   r   r   _apply_regions_filter)r!   tall_textliness      r"   r+  Network._get_filtered_textlines  si     ))D,>,>>
>1::<%%'(1, > 	 

 ))-88
s   /A(A(c                t    U R                   b&  U R                    Vs/ s H  n[        U5      PM     sn$ g s  snf r    )r  r   )r!   area_strs     r"   r*  !Network._get_user_provided_bboxes  s9    '<@<L<LM<LM(+<LMM Ns   5c                  ^ [        XR                  U R                  5      U l        [	        [        [        S U R                  S   U R                  S   -   5      S S95      n[        U5      u  pEpgU R                  X0R                  S9nU R                  XU5      n	Ub<  U/U-   U/-   n
[        S[        U
5      S-
  5       Vs/ s H  oU   XS-      4PM     n
nOhXR                  ;   aD  U R                  U   nUS	   m[	        [        U4S
 j[        S[        T5      S-
  5      5      5      n
O[        SU S35        / / / / 4$ XS S 4$ s  snf )Nc                V    [        U R                  5       R                  5       5      S:  $ rN   )r1   r   r   r   s    r"   rP   4Network._generate_columns_and_rows.<locals>.<lambda>  s     S):):)<)B)B)D%E%Ir%   
horizontalverticalc                4    U R                   * U R                  4$ r    )r   r   r   s    r"   rP   rM    s    x{{lHKK%@r%   rR   )r  r   r   r(  c                   > TU    TU S-      /$ r   r.   )idxr4   s    r"   rP   rM    s    [%5{377K$Lr%   zWarning: Bounding box z  not found in table_bbox_parses.)r   rB  rC  t_bboxr   r   filterr   _group_rowsr  
_join_rowsr   r1   r-  r   print)r!   r;   	user_colsall_tls
text_x_min
text_y_min
text_x_max
text_y_maxrows_groupedrowscolsr   r   r4   s                @r"   _generate_columns_and_rows"Network._generate_columns_and_rows  sn   +&&(:(:
 IKK-J0GG A
 :MW9U6

 '''F|D <)+zl:D49!SY]4KL4Kq!Wdq5k*4KDLD --- $ 6 6t <+N;La[!1A!56 .tf4TUV2r2~%4%%' Ms   E)rS  r-  )
NNNFF Nr   r   F)r'   r(   r)   r*   r+   r#   r7  r.  r/  r+  r*  ra  r-   r  r  s   @r"   r  r    sS    #N 
8=~"@(9
-& -&r%   r  )r;   r  r<   z/list[LTTextLineHorizontal | LTTextLineVertical]r
  z;dict[str, LTTextLineHorizontal | LTTextLineVertical | None])rG   r  rH   r&   r9   r&   r
  ztuple[list[list[float]], float])rW   list[list[float]]r
  rd  )
rh   !tuple[float, float, float, float]ri   r  r4   r  rH   r&   r
  re  )%r+   
__future__r   rf   typingr   numpyr   pdfminer.layoutr   r   corer   r	   r
   r   utilsr   r   r   r   r   r   r   baser   re   r   r   r7   rB   rK   rZ   ro   rq   r   r  r.   r%   r"   <module>rm     s   - "    0 . ! ( & ! ! ' - +   ) .       0	$9
9K9@9x&L&L%*&L16&L$&LR2W0WW W 	W
 'WtFC FCRD)> D)No&n o&r%   