
    TAi98                        S r SSKJr  SSKrSSKJr  SSKJr  SSKJ	r	  SSKJ
r
  SS	KJr  SS
KJr  SSKJr  SSKJr  SSKJr  SSKJr  SSKJr  SSKJr  SSKJr   " S S\5      rg)z+Implementation of the Lattice table parser.    )annotationsN)Any   )ImageConversionBackend)adaptive_threshold)find_contours)find_joints)
find_lines)build_file_path_in_temp_dir)merge_close_lines)scale_image)	scale_pdf)segments_in_bbox)text_in_bbox_per_axis   )
BaseParserc                     ^  \ rS rSrSr                 SU 4S jjr\          SS j5       r\        SS j5       rU 4S jr	S r
S rS	 rS
rU =r$ )Lattice   aB  Lattice method looks for lines between text to parse the table.

Parameters
----------
table_regions : list, optional (default: None)
    List of page regions that may contain tables of the form x1,y1,x2,y2
    where (x1, y1) -> left-top and (x2, y2) -> right-bottom
    in PDF coordinate space.
table_areas : list, optional (default: None)
    List of table area strings of the form x1,y1,x2,y2
    where (x1, y1) -> left-top and (x2, y2) -> right-bottom
    in PDF coordinate space.
process_background : bool, optional (default: False)
    Process background lines.
line_scale : int, optional (default: 15)
    Line size scaling factor. The larger the value the smaller
    the detected lines. Making it very large will lead to text
    being detected as lines.
copy_text : list, optional (default: None)
    {'h', 'v'}
    Direction in which text in a spanning cell will be copied
    over.
shift_text : list, optional (default: ['l', 't'])
    {'l', 'r', 't', 'b'}
    Direction in which text in a spanning cell will flow.
split_text : bool, optional (default: False)
    Split text that spans across multiple cells.
flag_size : bool, optional (default: False)
    Flag text based on font size. Useful to detect
    super/subscripts. Adds <s></s> around flagged text.
strip_text : str, optional (default: '')
    Characters that should be stripped from a string before
    assigning it to a cell.
line_tol : int, optional (default: 2)
    Tolerance parameter used to merge close vertical and horizontal
    lines.
joint_tol : int, optional (default: 2)
    Tolerance parameter used to decide whether the detected lines
    and points lie close to each other.
threshold_blocksize : int, optional (default: 15)
    Size of a pixel neighborhood that is used to calculate a
    threshold value for the pixel: 3, 5, 7, and so on.

    For more information, refer `OpenCV's adaptiveThreshold
    <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
threshold_constant : int, optional (default: -2)
    Constant subtracted from the mean or weighted mean.
    Normally, it is positive but may be zero or negative as well.

    For more information, refer `OpenCV's adaptiveThreshold
    <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
iterations : int, optional (default: 0)
    Number of times for erosion/dilation is applied.

    For more information, refer `OpenCV's dilate <https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#dilate>`_.
backend* : str, optional by default "pdfium"
    The backend to use for converting the PDF to an image so it can be processed by OpenCV.
use_fallback* : bool, optional
    Fallback to another backend if unavailable, by default True
resolution : int, optional (default: 300)
    Resolution used for PDF to PNG conversion.

c                8  > [         TU ]  S5        Xl        X l        X0l        X@l        XPl        U=(       d    SS/U l        Xpl        Xl	        Xl
        Xl        Xl        Xl        Xl        Xl        Xl        UU l        [%        UUS9U l        S U l        S U l        g )Nlatticelt)use_fallbackbackend)super__init__table_regionstable_areasprocess_background
line_scale	copy_text
shift_text
split_text	flag_size
strip_textline_tol	joint_tolthreshold_blocksizethreshold_constant
iterations
resolutionr   r   icb
image_path	pdf_image)selfr   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r   r   kwargs	__class__s                      S/var/www/html/land-ocr/venv/lib/python3.13/site-packages/camelot/parsers/lattice.pyr   Lattice.__init__W   s    * 	#*&"4$"$2c
$"$ "#6 "4$$()|WU    c                   US:X  aX  US:  aO  U R                   U   U   R                  (       d.  US-  nUS:  a#  U R                   U   U   R                  (       d  M.  X4$ US:X  a  U[        U R                   U   5      S-
  :  ah  U R                   U   U   R                  (       dG  US-  nU[        U R                   U   5      S-
  :  a#  U R                   U   U   R                  (       d  MG  X4$ US:X  aX  US:  aO  U R                   U   U   R                  (       d.  US-  nUS:  a#  U R                   U   U   R                  (       d  M.  X4$ US:X  a  U[        U R                   5      S-
  :  ae  U R                   U   U   R
                  (       dD  US-  nU[        U R                   5      S-
  :  a#  U R                   U   U   R
                  (       d  MD  X4$ )a  
Shift the index based on the specified direction.

Parameters
----------
table : camelot.core.Table
    The table structure containing rows and columns.
r_idx : int
    Row index of the cell.
c_idx : int
    Column index of the cell.
direction : str
    Direction in which to shift the index ('l', 'r', 't', 'b').

Returns
-------
tuple
    New row and column indices after the shift.
r   r   r   rr   b)cellsleftlenrighttopbottom)tabler_idxc_idx	directions       r3   _shift_indexLattice._shift_index   s   . !)EKK$6u$=$B$B
 !)EKK$6u$=$B$B$B | #EKK./!33E*5177
 EKK./!33E*51777 | #!)EKK$6u$=$A$A
 !)EKK$6u$=$A$A$A |	 ##ekk*Q..u{{57I%7P7W7W
 #ekk*Q..u{{57I%7P7W7W7W |r5   c                    / nU H:  u  pEnU H  n[         R                  XXW5      u  pEM     UR                  XEU45        M<     U$ )a  
Reduces the index of a text object if it lies within a spanning cell.

Parameters
----------
table : camelot.core.Table
    The table structure containing rows and columns.
idx : list of tuples
    List of tuples of the form (r_idx, c_idx, text) where r_idx
    is the row index, c_idx is the column index, and text is the
    associated text for that index.
shift_text : list of str
    A list containing one or more of the following strings:
    {'l', 'r', 't', 'b'} to specify the direction in which the
    text in a spanning cell should flow. 'l' for left, 'r' for right,
    't' for top, 'b' for bottom.

Returns
-------
list of tuples
    List of tuples of the form (r_idx, c_idx, text) where r_idx
    and c_idx are the new row and column indices for the text after
    adjustment.
)r   rC   append)r?   idxr#   indicesr@   rA   textrB   s           r3   _reduce_indexLattice._reduce_index   sR    8 "%E$'	&33E%Su ( NNE$/0 #& r5   c                   > [         TU ]  U5        U R                  Ul        U R                  U R
                  4Ul        g)z*Record data about the origin of the table.N)r   record_parse_metadatar/   _imagevertical_segmentshorizontal_segments	_segments)r0   r?   r2   s     r3   rM   Lattice.record_parse_metadata   s3    %e,~~1143K3KLr5   c                d  ^ U4S jn[        [        R                  R                  U R                  5      S5      U l        U R                  R                  U R                  U R
                  5        [        U R
                  U R                  U R                  U R                  S9u  U l        U l        U R                  R                  S   nU R                  R                  S   nU[        U R                   5      -  nU[        U R"                  5      -  nU R                   [        U5      -  nU R"                  [        U5      -  nXEU R"                  4mXgU4nU R$                  c  S n	U R&                  b  U" U R&                  5      n	[)        U R                  U	SU R*                  U R,                  S9u  p[)        U R                  U	SU R*                  U R,                  S9u  p[/        X5      n[1        XU5      nOv[)        U R                  SU R*                  U R,                  S	9u  p[)        U R                  SU R*                  U R,                  S	9u  pU" U R$                  5      n[1        UX5      n[3        XX5      u  U l        U l        U l        U R4                  R;                  5        GH  u  nnUS
   nU R<                  n[?        [A        S [C        US S95      5      n[E        S[G        U5      5       H7  nUUS-
     S   UU   S   nnUU-
  Us=::  a  UU-   ::  d  M+  O  M/  UUU   S'   M9     [C        US S9n[E        S[G        U5      5       H7  nUUS-
     S   UU   S   nnUU-
  Us=::  a  UU-   ::  d  M+  O  M/  UUU   S'   M9     UUS'   [?        [A        S U5      5      nURI                  US   US   /5        [?        [A        S U5      5      nURI                  US   US   /5        [K        [C        U5      U R<                  S9n[K        [C        USS9U R<                  S9nUUS'   UUS'   GM     g )Nc           
       > / nU  H~  nUR                  S5      u  p4pV[        U5      n[        U5      n[        U5      n[        U5      n[        X4XV4T5      u  p4pVUR                  X4[	        XS-
  5      [	        Xd-
  5      45        M     U$ )N,)splitfloatr   rF   abs)areasscaled_areasareax1y1x2y2image_scalerss          r3   scale_areas1Lattice._generate_table_bbox.<locals>.scale_areas   s    L!%C2Y2Y2Y2Y!*BB+;]!K##RS\3rw<$HI   r5   z.png)r    	blocksizecr   r   vertical)regionsrB   r!   r+   
horizontal)rB   r!   r+   jointsc                    [        U 5      $ )N)list)xs    r3   <lambda>.Lattice._generate_table_bbox.<locals>.<lambda>,  s    d1gr5   c                    U S   * $ Nr    js    r3   rl   rm   ,  s
    QqTEr5   )keyc                    U S   * $ Nr   rp   rq   s    r3   rl   rm   7  s
    1r5   joints_normalizedc                    U S   $ ro   rp   coordss    r3   rl   rm   D      6!9r5   r   c                    U S   $ ru   rp   rx   s    r3   rl   rm   F  rz   r5      )r'   T)reversecol_anchorsrow_anchors)&r   ospathbasenamefilenamer.   r-   convertr   r    r)   r*   r/   	thresholdshaperW   	pdf_width
pdf_heightr   r   r
   r!   r+   r   r	   r   table_bbox_parsesrO   rP   itemsr'   rj   mapsortedranger;   extendr   )r0   ra   image_widthimage_heightimage_width_scalerimage_height_scalerpdf_width_scalerpdf_height_scalerpdf_scalersrf   vertical_maskrO   horizontal_maskrP   contours
table_bboxrY   bboxparserh   r'   rv   rG   x_leftx_righty_bottomy_topcolsrowsr`   s                                @r3   _generate_table_bboxLattice._generate_table_bbox   s   
	  6GGT]]+V
 	8);OO#66..%%	*
& nn**1-~~++A.(5+@@*U4??-CC>>E+,>> OOeL.AA+$//R'LI#G!!-%d&8&89/9$????0,M 4>&????40O %]DH$XoNJ/9$????	0,M 4>&????	40O   0 01E$UMKJ 
7JX 	S	!79Q  11779KD%8_F }}H $%vf/'JK! Q$5 67%cAg.q1%c*1-   H$D6H3DDD06%c*1- 8 !''8o NQ$5 67%cAg.q1%c*1-   h&%F8h3FFF08%c*1- 8 *;E%&4f=>DKKa$q'*+4f=>DKKa$q'*+ %VD\DMMJD$VD$%?$--XD#'E- #'E- Q :r5   c                   [        XR                  U R                  5      u  p4[        XR                  U R
                  5      U l        U R                  U   n[        S[        US   5      S-
  5       Vs/ s H  nUS   U   US   US-      4PM     nn[        S[        US   5      S-
  5       Vs/ s H  nUS   U   US   US-      4PM     nnXxX44$ s  snf s  snf )Nr   r~   r   r   )
r   rO   rP   r   horizontal_textvertical_textt_bboxr   r   r;   )	r0   r   	user_colsv_sh_sr   ir   r   s	            r3   _generate_columns_and_rows"Lattice._generate_columns_and_rowsO  s   #(($*B*B
 ,&&(:(:
 &&t, 1c%"67!;<
< =!!$eM&:1q5&AB< 	 
 1c%"67!;<
< =!!$eM&:1q5&AB< 	 
 3##

s   4C0Cc                    UR                  S5      nUR                  S5      nUb  Uc  [        SU R                   35      eU R                  XX45      nUR	                  XgU R
                  S9nUR                  5       nU R                  U5        U$ )Nr   r   zNo segments found on )r(   )get
ValueErrorrootname_initialize_new_table	set_edgesr(   
set_borderrM   )	r0   	table_idxr   r   r   r1   r   r   r?   s	            r3   _generate_tableLattice._generate_tablec  s    jjjj;#+4T]]ODEE**9DGDNNC  """5)r5   )r"   r%   rP   r-   r.   r+   r(   r!   r'   r/   r    r,   r#   r$   r&   r   r   r   r   r   r)   r*   r   rO   )NNF   NNFF r   r   r   r   i,  Tpdfium)
r?   r   r@   intrA   r   rB   strreturnztuple[int, int])r?   r   rG   list[tuple[int, int, str]]r#   z	list[str]r   r   )__name__
__module____qualname____firstlineno____doc__r   staticmethodrC   rJ   rM   r   r   r   __static_attributes____classcell__)r2   s   @r3   r   r      s    >D  %(T &&&'*&7:&	& &P $$3$AJ$	#$ $LMu(n$( r5   r   )r   
__future__r   r   typingr   backendsr   image_processingr   r   r	   r
   utilsr   r   r   r   r   r   baser   r   rp   r5   r3   <module>r      sC    1 " 	  - 1 , * ) / %   $ ) Zj Zr5   