
    TAi(                     b    S r SSKrSSKJr  SSKJr  SSKJr  SSKJ	r	  SS	K
Jr   " S
 S\5      rg)z&Implementation of hybrid table parser.    N   )bboxes_overlap)boundaries_to_split_lines   )
BaseParser)Lattice)Networkc                   |   ^  \ rS rSrSr          SU 4S jjrU 4S jrS rS r\	SS j5       r
S rS	 rS
rU =r$ )Hybrid   a6  Defines a hybrid parser, leveraging both network and lattice parsers.

Parameters
----------
table_regions : list, optional (default: None)
    List of page regions that may contain tables of the form x1,y1,x2,y2
    where (x1, y1) -> left-top and (x2, y2) -> right-bottom
    in PDF coordinate space.
table_areas : list, optional (default: None)
    List of table area strings of the form x1,y1,x2,y2
    where (x1, y1) -> left-top and (x2, y2) -> right-bottom
    in PDF coordinate space.
columns : list, optional (default: None)
    List of column x-coordinates strings where the coordinates
    are comma-separated.
split_text : bool, optional (default: False)
    Split text that spans across multiple cells.
flag_size : bool, optional (default: False)
    Flag text based on font size. Useful to detect
    super/subscripts. Adds <s></s> around flagged text.
strip_text : str, optional (default: '')
    Characters that should be stripped from a string before
    assigning it to a cell.
edge_tol : int, optional (default: 50)
    Tolerance parameter for extending textedges vertically.
row_tol : int, optional (default: 2)
    Tolerance parameter used to combine text vertically,
    to generate rows.
column_tol : int, optional (default: 0)
    Tolerance parameter used to combine text horizontally,
    to generate columns.

c                    > [         TU ]  SUUUUUU
S9  X0l        [        UUUUUUUUU	U
S9
U l        [        UUUUUUUU	U
S9	U l        g )Nhybrid)table_regionstable_areas	flag_size
split_text
strip_textdebug)
r   r   columnsr   r   r   edge_tolrow_tol
column_tolr   )	r   r   r   r   r   r   r   r   r   )super__init__r   r	   network_parserr   lattice_parser)selfr   r   r   r   r   r   r   r   r   r   kwargs	__class__s               R/var/www/html/land-ocr/venv/lib/python3.13/site-packages/camelot/parsers/hybrid.pyr   Hybrid.__init__/   s     	'#!! 	 	
 %'#!!!
 &'#!!!

    c	           
         > [         T	U ]  UUUUUUUU5        U R                  R                  UUUUUUUU5        U R                  R                  UUUUUUUU5        g)zCall this method to prepare the page parsing .

Parameters
----------
filename : [type]
    [description]
layout : [type]
    [description]
dimensions : [type]
    [description]
page_idx : [type]
    [description]
layout_kwargs : [type]
    [description]
N)r   prepare_page_parser   r   )
r   filenamelayout
dimensionspage_idximageshorizontal_textvertical_textlayout_kwargsr   s
            r    r$   Hybrid.prepare_page_parse_   s    4 	"		
 	..		
 	..		
r"   c                 B    U R                   U   nUR                  X5      $ )N)table_bbox_parses_generate_columns_and_rows)r   bbox	table_idxparsers       r    r0   !Hybrid._generate_columns_and_rows   s#    ''-00AAr"   c                    U R                   U   nUR                  " XX440 UD6nUR                  R                  S[        R
                  5      Ul        UR                  R                  SSS9Ul        UR                  R                  SSS9Ul        UR                  R                  [        R
                  S5      Ul        UR                  R                  Ul        U$ )N r   all)axishowr   )r/   _generate_tabledfreplacenpnandropnashape)r   r2   r1   colsrowsr   r3   tables           r    r:   Hybrid._generate_table   s    ''-&&yMfM 88##B/88??u?588??u?588##BFFB/hhnnr"   c                 |   [        U 5      S-
  n[        U5      S-
  nSn US:  a   U $ X   nUS:  a!  X`S   S   /nU R                  SU5        US-
  nOhX   nUS   Xb-   :  a  XhS'   Ub  XeS'   US-
  nOHUS   Xb-
  :  a  US-
  nUnUS:  a	  XhS'   US-
  nO&XhS   /nU R                  US-   U5        XhS'   UnUS-
  nM  )zAugment existing boundaries using provided hard splits.

Boundaries:   |---|    |-| |---------|  #noqa RST305
Splits:     |       |     |       |  #noqa RST305
Augmented:  |-------|-----|-------|--|  #noqa RST305
r   Nr   )leninsert)	
boundariessplits	toleranceidx_boundaries
idx_splitsprevious_boundarysplitnew_boundaryboundarys	            r    _augment_boundaries_with_splits&Hybrid._augment_boundaries_with_splits   s*    Z1,[1_
 A~F E &E! %!}Q'78!!!\2'!^
%5A;!22 #(QK )4/4!,!+aJa[5#44%3a%7N(0%%) ',%/!^
 %*A;#7L%%nq&8,G"'QK(4%!+aJK r"   c                    U R                   R                  U   nUS   nU R                  R                  U   nUS   nUc  U R                   U R                  U'   gU R                  XdU R                   R                  5      nUS   S   [        US   US   5      US   S   [        US   US   5      4n[        U5      US'   U R                  R                  U	 XPR                  R                  U'   U R                  U R                  U'   g)	zAIdentify splits that were only detected by lattice or by network.col_anchorscols_boundariesNr   r      cols_anchors)r   r/   r   rQ   	joint_tolminmaxr   )r   lattice_bboxnetwork_bboxlattice_parselattice_colsnetwork_bbox_datanetwork_cols_boundariesaugmented_bboxs           r    _merge_bbox_analysisHybrid._merge_bbox_analysis   s"   ++==lK$]3 //AA,O"34E"F #*373F3FD""<0&*&J&J't7J7J7T7T'# (*1-LO\!_5'+A.LO\!_5	N 1J'1n- ##55lCDU11.A595H5HD"">2r"   c                    U R                   R                  5         [        U R                   R                  S S9nU R                  R                  5         [        U R                  R                  S S9nU Hr  nSn[        [        U5      S-
  SS5       H.  nX%   n[        X65      (       d  M  U R                  X65        X%	 SnM0     U(       a  MY  U R                   U R                  U'   Mt     U H  nU R                  U R                  U'   M     g )Nc                     U S   U S   * 4$ Nr   r    r1   s    r    <lambda>-Hybrid._generate_table_bbox.<locals>.<lambda>       T!WtTUwhDWr"   )keyc                     U S   U S   * 4$ rg   rh   ri   s    r    rj   rk     rl   r"   Fr   rV   T)	r   _generate_table_bboxsortedr/   r   rangerF   r   rc   )r   _lattice_bboxes_network_bboxesr\   mergedidxr]   s          r    ro   Hybrid._generate_table_bbox   s    002 117W
 	002 117W

 ,LFS1A5r2>.3%lAA)),E#( ? 67;7J7J&&|4 , ,L373F3FD""<0 ,r"   )r   r   r   )
NNNFFr6   Nr   r   F)r   )__name__
__module____qualname____firstlineno____doc__r   r$   r0   r:   staticmethodrQ   rc   ro   __static_attributes____classcell__)r   s   @r    r   r      sg     H .
`7
rB
 0 0dI<G Gr"   r   )r{   numpyr=   utilsr   r   baser   latticer   networkr	   r   rh   r"   r    <module>r      s*    ,  " -   IGZ IGr"   