
    SAig              #          S r SSKrSSKrSSKJr  SSKJrJrJrJ	r	J
r
Jr  SSKJrJrJrJrJr  SSKJr  SSKJrJr  SSKJrJr  SS	KJr  SS
KJrJr  SSKJ r   SSK!J"r"J#r#J$r$               S#S\S\"S\%S\%S\
\   S\&S\
\\&      S\%S\'S\&S\%S\
\%   S\(S\(S\(S\SS4"S jjr)      S$S\#S\%S\
\\&      S\&S \(S\%S\
\   S\%4S! jjr*     S%S\#S\%S\
\\&      S\&S \(S\
\   S\	\   4S" jjr+g)&zIFunctions that can be used for the most common use-cases for pdfminer.six    N)StringIO)AnyBinaryIO	ContainerIteratorOptionalcast)HOCRConverterHTMLConverterPDFPageAggregatorTextConverterXMLConverter)ImageWriter)LAParamsLTPage)	PDFDeviceTagExtractor)PDFValueError)PDFPageInterpreterPDFResourceManager)PDFPage)AnyIO
FileOrNameopen_filenameinfoutfpoutput_typecodeclaparamsmaxpagespage_numberspasswordscalerotation
layoutmode
output_dirstrip_controldebugdisable_cachingkwargsreturnc           
         U(       a2  [         R                  " 5       R                  [         R                  5        SnU(       a  [	        U5      n[        U(       + S9nSnUS:w  a.  U[        R                  :X  a  [        R                  R                  nUS:X  a  [        UUUUUS9nOoUS:X  a  [        UUUUUUS9nOZUS:X  a  [        UUUUU
UUS9nODUS	:X  a  [        UUUUUS
9nO0US:X  a  [        U[        [        U5      US9nOSU 3n[!        U5      eUc   e[#        UU5      n[$        R&                  " U UUUU(       + S9 H+  nUR(                  U	-   S-  Ul        UR+                  U5        M-     UR-                  5         g)a  Parses text from inf-file and writes to outfp file-like object.

Takes loads of optional arguments but the defaults are somewhat sane.
Beware laparams: Including an empty LAParams is not the same as passing
None!

:param inf: a file-like object to read PDF structure from, such as a
    file handler (using the builtin `open()` function) or a `BytesIO`.
:param outfp: a file-like object to write the text to.
:param output_type: May be 'text', 'xml', 'html', 'hocr', 'tag'.
    Only 'text' works properly.
:param codec: Text decoding codec
:param laparams: An LAParams object from pdfminer.layout. Default is None
    but may not layout correctly.
:param maxpages: How many pages to stop parsing after
:param page_numbers: zero-indexed page numbers to operate on.
:param password: For encrypted PDFs, the password to decrypt.
:param scale: Scale factor
:param rotation: Rotation factor
:param layoutmode: Default is 'normal', see
    pdfminer.converter.HTMLConverter
:param output_dir: If given, creates an ImageWriter for extracted images.
:param strip_control: Does what it says on the tin
:param debug: Output more logging data
:param disable_caching: Does what it says on the tin
:param other:
:return: nothing, acting as it does on two streams. Use StringIO to get
    strings.
Ncachingtext)r   r   imagewriterxml)r   r   r0   stripcontrolhtml)r   r#   r%   r   r0   hocr)r   r   r2   tag)r   z1Output type can be text, html, xml or tag but is r    r"   r.   ih  )logging	getLoggersetLevelDEBUGr   r   sysstdoutbufferr   r   r   r
   r   r	   r   r   r   r   	get_pagesrotateprocess_pageclose)r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r0   rsrcmgrdevicemsginterpreterpages                         O/var/www/html/land-ocr/venv/lib/python3.13/site-packages/pdfminer/high_level.pyextract_text_to_fprH      s   ^ $$W]]3K!*- _)<=G"&Ff#**!4

!!f#
 
	#&
 
	!#
 
	&
 
	gtHe'<EJ B+OC  $Wf5K!!## {{X-4  & LLN    pdf_filer.   c           
         Uc
  [        5       n[        U S5       n[        5        n[        [        U5      n[        US9n	[        XXVS9n
[        X5      n[        R                  " UUUUUS9 H  nUR                  U5        M     UR                  5       sSSS5        sSSS5        $ ! , (       d  f       O= fSSS5        g! , (       d  f       g= f)aK  Parse and return the text contained in a PDF file.

:param pdf_file: Either a file path or a file-like object for the PDF file
    to be worked on.
:param password: For encrypted PDFs, the password to decrypt.
:param page_numbers: List of zero-indexed page numbers to extract.
:param maxpages: The maximum number of pages to parse
:param caching: If resources should be cached
:param codec: Text decoding codec
:param laparams: An LAParams object from pdfminer.layout. If None, uses
    some default settings that often work well.
:return: a string containing all of the text extracted.
Nrbr-   )r   r   r6   )r   r   r   r	   r   r   r   r   r   r>   r@   getvalue)rJ   r"   r!   r    r.   r   r   fpoutput_stringrB   rC   rE   rF   s                rG   extract_textrP      s    , :	x	&"hjM(B$W5wUV(9%%
D $$T*
 %%' /9j	&	&jj	&	&	&s#   C A.B&	C &
B4	0C  
Cc           	   #   T  #    Uc
  [        5       n[        U S5       n[        [        U5      n[	        US9n[        XuS9n[        Xx5      n	[        R                  " UUUUUS9 H(  n
U	R                  U
5        UR                  5       nUv   M*     SSS5        g! , (       d  f       g= f7f)a  Extract and yield LTPage objects

:param pdf_file: Either a file path or a file-like object for the PDF file
    to be worked on.
:param password: For encrypted PDFs, the password to decrypt.
:param page_numbers: List of zero-indexed page numbers to extract.
:param maxpages: The maximum number of pages to parse
:param caching: If resources should be cached
:param laparams: An LAParams object from pdfminer.layout. If None, uses
    some default settings that often work well.
:return: LTPage objects
NrL   r-   )r   r6   )r   r   r	   r   r   r   r   r   r>   r@   
get_result)rJ   r"   r!   r    r.   r   rN   resource_managerrC   rE   rF   layouts               rG   extract_pagesrU      s     ( :	x	&"(B-g>"#3G()9B%%
D $$T*&&(FL
 
'	&	&s   B(A2B	B(
B%!B()r/   utf-8Nr   N g      ?r   normalNFFF)rW   Nr   TrV   N)rW   Nr   TN),__doc__r7   r;   ior   typingr   r   r   r   r   r	   pdfminer.converterr
   r   r   r   r   pdfminer.imager   pdfminer.layoutr   r   pdfminer.pdfdevicer   r   pdfminer.pdfexceptionsr   pdfminer.pdfinterpr   r   pdfminer.pdfpager   pdfminer.utilsr   r   r   strintfloatboolrH   rP   rU    rI   rG   <module>ri      s   O  
  E E  ' , 6 0 E $ ; ; #'-1 $!w	ww w 	w
 x w w 9S>*w w w w w w w w w  !w" 
#wx -1#'(((((( 9S>*(( 	((
 (( (( x (( 	((Z -1#'%%% 9S>*% 	%
 % x % f%rI   