
    TAi9                         S r SSKrSSKJr  SSKJr  SSKJr  SSKJ	r	  SSK
Jr  SS	K
Jr         SS
\\\4   4S jjrg)zBIO related functions to Read the PDF and returns extracted tables.    N)Path)Union)StrByteType   )
PDFHandler)remove_extra)validate_inputfilepathc           	      2   Uc  0 nUS;  a  [        S5      e[        R                  " 5          U(       a  [        R                  " S5        [	        XS9  [        XX'S9n	[        XS9nU	R                  " SUUUUS.UD6n
U
sSSS5        $ ! , (       d  f       g= f)	a  Read PDF and return extracted tables.

Note: kwargs annotated with ^ can only be used with flavor='stream' or flavor='network'
and kwargs annotated with * can only be used with flavor='lattice'.
The hybrid parser accepts kwargs with both annotations.

Parameters
----------
filepath : str, Path, IO
    Filepath or URL of the PDF file.
pages : str, optional (default: '1')
    Comma-separated page numbers.
    Example: '1,3,4' or '1,4-end' or 'all'.
password : str, optional (default: None)
    Password for decryption.
flavor : str (default: 'lattice')
    The parsing method to use ('lattice', 'stream', 'network' or 'hybrid').
    Lattice is used by default.
suppress_stdout : bool, optional (default: False)
    Print all logs and warnings.
parallel : bool, optional (default: False)
    Process pages in parallel using all available cpu cores.
layout_kwargs : dict, optional (default: {})
    A dict of `pdfminer.layout.LAParams
    <https://pdfminersix.readthedocs.io/en/latest/reference/composable.html#laparams>`_ kwargs.
table_areas : list, optional (default: None)
    List of table area strings of the form x1,y1,x2,y2
    where (x1, y1) -> left-top and (x2, y2) -> right-bottom
    in PDF coordinate space.
columns^ : list, optional (default: None)
    List of column x-coordinates strings where the coordinates
    are comma-separated.
split_text : bool, optional (default: False)
    Split text that spans across multiple cells.
flag_size : bool, optional (default: False)
    Flag text based on font size. Useful to detect
    super/subscripts. Adds <s></s> around flagged text.
strip_text : str, optional (default: '')
    Characters that should be stripped from a string before
    assigning it to a cell.
row_tol^ : int, optional (default: 2)
    Tolerance parameter used to combine text vertically,
    to generate rows.
column_tol^ : int, optional (default: 0)
    Tolerance parameter used to combine text horizontally,
    to generate columns.
process_background* : bool, optional (default: False)
    Process background lines.
line_scale* : int, optional (default: 40)
    Line size scaling factor. The larger the value the smaller
    the detected lines. Making it very large will lead to text
    being detected as lines.
copy_text* : list, optional (default: None)
    {'h', 'v'}
    Direction in which text in a spanning cell will be copied
    over.
shift_text* : list, optional (default: ['l', 't'])
    {'l', 'r', 't', 'b'}
    Direction in which text in a spanning cell will flow.
line_tol* : int, optional (default: 2)
    Tolerance parameter used to merge close vertical and horizontal
    lines.
joint_tol* : int, optional (default: 2)
    Tolerance parameter used to decide whether the detected lines
    and points lie close to each other.
threshold_blocksize* : int, optional (default: 15)
    Size of a pixel neighborhood that is used to calculate a
    threshold value for the pixel: 3, 5, 7, and so on.

    For more information, refer `OpenCV's adaptiveThreshold
    <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
threshold_constant* : int, optional (default: -2)
    Constant subtracted from the mean or weighted mean.
    Normally, it is positive but may be zero or negative as well.

    For more information, refer `OpenCV's adaptiveThreshold
    <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
iterations* : int, optional (default: 0)
    Number of times for erosion/dilation is applied.

    For more information, refer `OpenCV's dilate
    <https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#dilate>`_.
backend* : str, optional by default "pdfium"
    The backend to use for converting the PDF to an image so it can be processed by OpenCV.
use_fallback* : bool, optional
    Fallback to another backend if unavailable, by default True
resolution* : int, optional (default: 300)
    Resolution used for PDF to PNG conversion.

Returns
-------
tables : camelot.core.TableList

N)latticestreamnetworkhybridzOUnknown flavor specified. Use either 'lattice', 'stream', 'network' or 'hybrid'ignore)flavor)pagespassworddebug)r   suppress_stdoutparallellayout_kwargs )NotImplementedErrorwarningscatch_warningssimplefilterr	   r   r   parse)r
   r   r   r   r   r   r   r   kwargsptabless              F/var/www/html/land-ocr/venv/lib/python3.13/site-packages/camelot/io.pyread_pdfr"      s    R ??!E
 	

 
	 	 	"!!(+v-xxMf4 
+'	

 
  
#	"	"s   AB
B)1Nr   FFNF)__doc__r   pathlibr   typingr   pypdf._utilsr   handlersr   utilsr   r	   r"   r       r!   <module>r+      sM    H    $    !
 
K%&r*   