
    TAi                     n    S r SSKrSSKJr  SSKJr  SSKJr  SSKJr  SSKJr  S	S
K	J
r
   " S S\
5      rg)z*Implementation of the Stream table parser.    N   )	TextEdges)bbox_from_str)bbox_from_textlines)text_in_bbox)text_in_bbox_per_axis   )TextBaseParserc                   `   ^  \ rS rSrSr         S	U 4S jjrS rU 4S jrS rS r	Sr
U =r$ )
Stream   a  Stream method of parsing looks for spaces between text to parse the table.

If you want to specify columns when specifying multiple table
areas, make sure that the length of both lists are equal.

Parameters
----------
table_regions : list, optional (default: None)
    List of page regions that may contain tables of the form x1,y1,x2,y2
    where (x1, y1) -> left-top and (x2, y2) -> right-bottom
    in PDF coordinate space.
table_areas : list, optional (default: None)
    List of table area strings of the form x1,y1,x2,y2
    where (x1, y1) -> left-top and (x2, y2) -> right-bottom
    in PDF coordinate space.
columns : list, optional (default: None)
    List of column x-coordinates strings where the coordinates
    are comma-separated.
split_text : bool, optional (default: False)
    Split text that spans across multiple cells.
flag_size : bool, optional (default: False)
    Flag text based on font size. Useful to detect
    super/subscripts. Adds <s></s> around flagged text.
strip_text : str, optional (default: '')
    Characters that should be stripped from a string before
    assigning it to a cell.
edge_tol : int, optional (default: 50)
    Tolerance parameter for extending textedges vertically.
row_tol : int, optional (default: 2)
    Tolerance parameter used to combine text vertically,
    to generate rows.
column_tol : int, optional (default: 0)
    Tolerance parameter used to combine text horizontally,
    to generate columns.

c
                 @   > [         TU ]  SUUUUUUUUU	S9
  / U l        g )Nstream)	table_regionstable_areascolumns
split_text	flag_size
strip_textedge_tolrow_tol
column_tol)super__init__	textedges)selfr   r   r   r   r   r   r   r   r   kwargs	__class__s              R/var/www/html/land-ocr/venv/lib/python3.13/site-packages/camelot/parsers/stream.pyr   Stream.__init__3   s>     	'#!!! 	 	
     c                 ,   UR                  S S9  [        U R                  S9nUR                  U5        UR	                  5       nU R
                  R                  U5        UR                  X5      nU(       d  SSU R                  U R                  4S0nU$ )a6  Anssi Nurminen's Table detection algorithm.

A general implementation of the table detection algorithm
described by Anssi Nurminen's master's thesis.
Link: https://dspace.cc.tut.fi/dpub/bitstream/handle/123456789/21520/Nurminen.pdf?sequence=3

Assumes that tables are situated relatively far apart
vertically.
c                 4    U R                   * U R                  4$ N)y0x0xs    r   <lambda>2Stream._nurminen_table_detection.<locals>.<lambda>Z   s    qtteQTT]r!   key)r   r   N)
sortr   r   generateget_relevantr   extendget_table_areas	pdf_width
pdf_height)r   	textlinesr   relevant_textedges
table_bboxs        r   _nurminen_table_detection Stream._nurminen_table_detectionO   s     	23t}}5	9%&33501..yM
aA4HJr!   c                 F   > [         TU ]  U5        U R                  Ul        g)z*Record data about the origin of the table.N)r   record_parse_metadatar   
_textedges)r   tabler   s     r   r:   Stream.record_parse_metadatai   s    %e,>>r!   c                 P   U R                   cp  U R                  nU R                  bE  / nU R                   H"  n[        [	        U5      U R                  5      nM$     UR                  W5        U R                  U5      nO#0 nU R                    H  nS U[	        U5      '   M     X@l        g r$   )r   horizontal_textr   r   r   r0   r7   table_bbox_parses)r   hor_text
region_strregion_textr@   area_strs         r   _generate_table_bboxStream._generate_table_bboxn   s    #++H!!-"&"4"4J".%j143G3G#K #5 , $ > >x H " ,,=A!-"9: -!2r!   c           
      f   [        XR                  U R                  5      U l        [	        U R                  S   U R                  S   -   5      u  p4pVU R                  U R                  S   U R                  S9nU R                  XvU5      nU V	s/ s H  n	[        U	5      PM     n
n	Ub=  U/U-   U/-   n[        S[        U5      S-
  5       Vs/ s H  oU   XS-      4PM     nnGO-[        U
5      (       d  X54/nGO[        [        U
5      U
R                  S9nUS:X  aS  [        [        S U
5      5      n
U
(       a  [        [        U
5      U
R                  S9nO[        R                   " SU 3S	S
9  U V	Vs/ s H5  n	[        U	5      U:X  d  M  U	  H  oR"                  UR$                  4PM     M7     nn	nU R'                  [)        U5      U R*                  S9n/ n[        S[        U5      5       H}  nXS-
     S   nX   S   nUR-                  U R                   VVs/ s H@  nU R                  U     H)  nUR"                  U:  d  M  UR$                  U:  d  M'  UPM+     MB     snn5        M     U R                   VVs/ s HJ  nU R                  U     H3  nUR"                  US   S   :  d  UR$                  US   S   :  d  M1  UPM5     ML     nnnUR-                  U5        U R/                  XU R                  5      nU R1                  XU5      nXS S 4$ s  sn	f s  snf s  snn	f s  snnf s  snnf )N
horizontalvertical)r   r   r	   r+   c                     U S:g  $ )Nr	    r'   s    r   r)   3Stream._generate_columns_and_rows.<locals>.<lambda>   s    Q!Vr!   zNo tables found in table area r   )
stacklevel)r   )r   r?   vertical_textt_bboxr   _group_rowsr   
_join_rowslenrangemaxsetcountlistfilterwarningswarnr&   x1_merge_columnssortedr   r0   _add_columns_join_columns)r   bbox	user_cols
text_x_min
text_y_min
text_x_max
text_y_maxrows_groupedrowsrelementscolsincolst
inner_textleftright	direction
outer_texts                       r   _generate_columns_and_rows!Stream._generate_columns_and_rows   s   +&&(:(:
 :MKK%J(??:
6

 ''L(A4<<'X|D$01LqCFL1 <)+zl:D49!SY]4KL4Kq!Wdq5k*4KDLD x==#01CMx~~>A:  $F+;X$FGH #CMx~~ F <TFCPQ +7*6Q#a&E/LSTaTT144LSTL,   **6$<DOO*T
q#d),AA;q>D GAJE%% .2[[-8	%)[[%; ttd{  01tte| %; -8 - &*[[%0	![[3ttd2hqk)QTTDGAJ-> 3 %0   !!*-((4<<H))$JG4%%g 2 M*s7   LL6L!%L!'L'L'
L'?AL-
L-)rP   r@   r   )	NNNFF 2   r   r   )__name__
__module____qualname____firstlineno____doc__r   r7   r:   rE   rt   __static_attributes____classcell__)r   s   @r   r   r      sD    #N 84*
3&?& ?&r!   r   )r|   rZ   corer   utilsr   r   r   r   baser
   r   rK   r!   r   <module>r      s+    0   ! '   )  s&^ s&r!   