
    TAi0                       S r SSKJr  SSKrSSKJrJr  SSKJr  SSK	J
r
  SSKJr  SSKJr  SS	KJr  SS
KJr  SSKrSSKrSSKrSSKrSSKrSSKJr  SSKJr  SSKJrJrJrJ r   SSK!J"r"  SSKJ#r#  SSK$J%r%  SSK&J'r'J(r(J)r)J*r*  SSKJ+r+J,r,J-r-  SSK.J/r/  SSK0J1r1J2r2J3r3  SSK4J5r5J6r6  \Rn                  " S5      r8\)Rr                  r:        S*S jr;\<" \)S\;5        S\Rz                  R|                  l?        S r@S rAS rBS+S jrC " S S\5      rD " S  S!\5      rE\S,S" j5       rF\" S#S$S%9        S-S& j5       rG " S' S(5      rHS.S) jrIg)/zEDetailed text position and layout analysis, building on pdfminer.six.    )annotationsN)IteratorMapping)contextmanager)copysign)PathLike)Path)Any)patch)
deprecated)PDFLayoutAnalyzer)LAParamsLTCharLTPage	LTTextBox)PDFColorSpace)
PDFTextSeq)PDFTextExtractionNotAllowed)FontWidthDictPDFFontPDFSimpleFontPDFUnicodeNotDefined)PDFGraphicStatePDFResourceManagerPDFTextState)PDFPage)Matrixbbox2str
matrix2str)EncryptedPdfErrorInputFileErrorz[0-9]+c                X    [        XX#5        U R                  (       d  SU;  a  0 U l        g)zMonkeypatch pdfminer.six PDFSimpleFont.__init__.

If there is no ToUnicode and no Encoding, pdfminer.six assumes that Unicode
conversion is possible. This is incorrect, according to PDF Reference Manual
9.10.2. This patch fixes that.
EncodingN)original_pdfsimplefont_initunicode_mapcid2unicode)self
descriptorwidthsspecs       S/var/www/html/land-ocr/venv/lib/python3.13/site-packages/ocrmypdf/pdfinfo/layout.pypdfsimplefont__init__r,   (   s)      &?
$ 6
    __init__i   c                    U R                   S   U R                   S   -
  nUS:X  a  U R                  U R                  -
  nU[        SU R                  5      -  $ )zMonkeypatch for PScript5.dll PDFs.

The height of Type3 fonts is known to be incorrect in PScript5.dll
generated PDFs. This patch attempts to correct the height by
using the bbox height if it is available, otherwise using the
ascent and descent.
      r         ?)bboxascentdescentr   vscale)r'   hs     r+   !pdftype3font__pscript5_get_heightr8   E   sM     			!tyy|#AAvKK$,,&xT[[)))r-   c                H    U R                   [        SU R                  5      -  $ )zMonkeypatch for PScript5.dll PDFs.

The descent of Type3 fonts is known to be incorrect in PScript5.dll
generated PDFs. This patch attempts to correct the descent by
using the vscale.
r2   )r5   r   r6   r'   s    r+   "pdftype3font__pscript5_get_descentr;   S   s     <<(3444r-   c                H    U R                   [        SU R                  5      -  $ )zMonkeypatch for PScript5.dll PDFs.

The ascent of Type3 fonts is known to be incorrect in PScript5.dll
generated PDFs. This patch attempts to correct the ascent by
using the vscale.
r2   )r4   r   r6   r:   s    r+   !pdftype3font__pscript5_get_ascentr=   ]   s     ;;#t{{333r-   c                T    U R                  S5      =(       a    U R                  S5      $ )z,Check if a string is an undefined character.z(cid:))
startswithendswith)ss    r+   _is_undefined_charrC   g   s    << 4QZZ_4r-   c                     ^  \ rS rSrSrSr                        S	U 4S jjrS
S jrSS jrSS jr	Sr
U =r$ )LTStateAwareCharl   zEA subclass of LTChar that tracks text render mode at time of drawing.)
rendermode_textmatrixfontnameadvuprightsizewidthheightr3   x0x1y0y1c                X   > [         TU ]  UUUUUUUUU	U
5
        UR                  U l        g)zInitialize.N)superr.   renderrG   )r'   rI   fontfontsizescalingrisetext	textwidthtextdispncsgraphicstate	textstate	__class__s               r+   r.   LTStateAwareChar.__init__   s?     		
 $**r-   c                V   [        U[        5      (       d  g[        U R                  5      (       + =(       a    [        UR                  5      (       + nU(       a  U R                  UR                  :H  $ U R
                  UR
                  :H  =(       a    U R                  UR                  :H  $ )zCheck if characters can be combined into a textline.

We consider characters compatible if:
    - the Unicode mapping is known, and both have the same render mode
    - the Unicode mapping is unknown but both are part of the same font
F)
isinstancerE   rC   rH   rG   rJ   )r'   objboth_unicode_mappeds      r+   is_compatibleLTStateAwareChar.is_compatible   s     #/00"4JJ#
 
 0$SYY// 	 ??cnn44}},RCNN1RRr-   c                P    [        U R                  5      (       a  gU R                  $ )zGet text from this character.u   �)rC   rH   r:   s    r+   get_textLTStateAwareChar.get_text   s    djj))zzr-   c                   SU R                   R                   S[        U R                  5       S[	        U R
                  5       SU R                  < SU R                  < SU R                   SU R                  5       < S3$ )	z.Return a string representation of this object.< z matrix=z rendermode=z font=z adv=z text=>)
ra   __name__r   r3   r   rI   rG   rJ   rK   rj   r:   s    r+   __repr__LTStateAwareChar.__repr__   s     ''(		"# $ -. ///, -MM$ %88* MMO&a)	
r-   )rG   )rI   r   rW   r   rX   floatrY   rs   rZ   rs   r[   strr\   rs   r]   z"float | tuple[float | None, float]r^   r   r_   r   r`   r   returnNone)re   objectru   bool)ru   rt   )rp   
__module____qualname____firstlineno____doc__	__slots__r.   rg   rj   rq   __static_attributes____classcell__ra   s   @r+   rE   rE   l   s    OI"++ + 	+
 + + + + 5+ + &+  + 
+:S"

 

r-   rE   c                     ^  \ rS rSr% SrS\S'     S     SU 4S jjjrSU 4S jjrSS jr          SU 4S jjr	                  SS	 jr
SS
 jrSS jrSrU =r$ )TextPositionTracker   z>A page layout analyzer that pays attention to text visibility.r   r`   c                4   > [         TU ]  XU5        SU l        g)zInitialize the layout analyzer.N)rU   r.   result)r'   rsrcmgrpagenolaparamsra   s       r+   r.   TextPositionTracker.__init__   s     	(3%)r-   c                n   > [         TU ]  X5        [        U R                  UR                  5      U l        g)zBegin processing of a page.N)rU   
begin_pager   r   mediaboxcur_item)r'   pagectmra   s      r+   r   TextPositionTracker.begin_page   s&    4%t{{DMM:r-   c                   U R                   (       a#   [        [        U R                   5      5      5       e[        U R                  [
        5      (       d#   [        [        U R                  5      5      5       eU R                  b%  U R                  R                  U R                  5        U =R                  S-  sl	        U R                  U R                  5        g)zEnd processing of a page.Nr1   )_stackrt   lenrd   r   r   typer   analyzer   receive_layout)r'   r   s     r+   end_pageTextPositionTracker.end_page   s    ;;5C$4 55$--00J#d4==6I2JJ0==$MM!!$--0qDMM*r-   c                f   > UR                  5       U l        [        TU ]  U R                  X#U5        g)z6Respond to render string event by updating text state.N)copyr`   rU   render_string)r'   r`   seqr^   r_   ra   s        r+   r   !TextPositionTracker.render_string   s(     #)dnncEr-   c	                    UR                  U5      n	[        U	[        5      (       d   [        [        U	5      5      5       e UR                  U5      n
UR                  U5      n[        UUUUUU	U
UUUU R                  5      nU R                  R                  U5        UR                  $ ! [         a    U R                  X&5      n	 Nf = f)z4Respond to render char event by updating text state.)	to_unichrrd   rt   r   r   handle_undefined_char
char_width	char_disprE   r`   r   addrK   )r'   rI   rW   rX   rY   rZ   cidr^   r_   r[   r\   r]   items                r+   render_charTextPositionTracker.render_char   s    	9>>#&DdC((9#d4j/9( OOC(	>>#&NN
 	$xx% $ 	9--d8D	9s   ?B* *CCc                    Xl         g)zReceive layout handler.Nr   )r'   ltpages     r+   r   "TextPositionTracker.receive_layout  s    r-   c                    U R                   $ )zGet the result of the analysis.r   r:   s    r+   
get_resultTextPositionTracker.get_result  s    {{r-   )r   r   r`   )r1   N)r   r   r   intr   zLAParams | None)r   r   r   r   ru   rv   )r   r   ru   rv   )
r`   r   r   r   r^   r   r_   r   ru   rv   )rI   r   rW   r   rX   rs   rY   rs   rZ   rs   r   r   r^   r   r_   r   ru   rs   )r   r   ru   rv   )ru   LTPage | None)rp   ry   rz   r{   r|   __annotations__r.   r   r   r   r   r   r   r~   r   r   s   @r+   r   r      s    H
 $(	*#* * "	* *;
+	F	F 	F 		F
 &	F 
	F!! ! 	!
 ! ! ! ! &! 
!F r-   r   c              #     #    U (       a2  [         R                  " SS[        [        [        S9   Sv   SSS5        gSv   g! , (       d  f       g= f7f)zCPatch pdfminer.six to work around bugs in PDFs created by PScript5.zpdfminer.pdffont.PDFType3FontT)r*   
get_ascentget_descent
get_heightN)r   multipler=   r;   r8   )pscript5_modes    r+   patch_pdfminerr     sC      ^^+8:8
 
 
 	
 
s   ,AAA
AAz16.6.0zUse PdfMinerState instead.)deprecated_indetailsc           	     Z   [         R                  R                  SS9nSn[        U[	        SSUS9S9n[         R                  R                  X55      n[        U5          [        U 5      R                  S5       n[        R                  " Xq/SS9n[        US5      n	U	c  [        S	U S
35      eUR                  U	5        SSS5        SSS5        UR!                  5       $ ! , (       d  f       N&= f! [         a  n
[        5       U
eSn
A
ff = f! , (       d  f       UR!                  5       $ = f)'Get the page analysis for a given page.TcachingN	all_textsdetect_vertical
boxes_flowr   rbr   )pagenosmaxpages pdfminer could not process page  (counting from 0).)pdfminer	pdfinterpr   r   r   PDFPageInterpreterr   r	   openr   	get_pagesnextr!   process_pager   r    r   )infiler   r   rmandisable_boxes_flowdevinterpf	page_iterr   es              r+   get_page_analysisr   %  s   
 000>D
D=O
C 224=F		&
	-f""4(A#--aAN	It,<(:6(BUV  ##D) ) 
' >> )( + 	-#%1,	- 
'	& >>sO   DC09AC?C0
C-	)C0,D-C00
D
:DD

D
D*c                  8    \ rS rSrSrS	S jrS rS rS
S jrSr	g)PdfMinerStateiD  zProvide a context manager for using pdfminer.six.

This ensures that the file is closed. It also provides a cache of pages
from the PDF so that they can be reused if needed, to improve performance.
c                    Xl         [        R                  R                  SS9U l        SU l        SU l        / U l        X l        SU l	        g)zInitialize the context manager.

Args:
    infile: The path to the PDF file to be analyzed.
    pscript5_mode: Whether the PDF was generated by PScript5.dll.
Tr   N)
r   r   r   r   r   r   r   
page_cacher   file)r'   r   r   s      r+   r.   PdfMinerState.__init__K  sG     &&99$9G	"&)+*	r-   c                    [        U R                  5      R                  S5      U l        [        R
                  " U R                  5      U l        U $ )zEnter the context manager.r   )r	   r   r   r   r   r   r   r:   s    r+   	__enter__PdfMinerState.__enter__Z  s8    %**40	 **4995r-   c                Z    U R                   (       a  U R                   R                  5         g)zExit the context manager.T)r   close)r'   exc_type	exc_value	tracebacks       r+   __exit__PdfMinerState.__exit__`  s    99IIOOr-   c           	        [        U R                  5      U::  aJ   U R                  R                  [        U R                  5      5        [        U R                  5      U::  a  MJ  U R                  U   nU(       d  [        SU S35      e[        U R                  [        SSU R                  S9S9n[        R                  R                  U R                  U5      n[        U R                  5         UR!                  U5        SSS5        UR#                  5       $ ! [
         a    [        SU S35      ef = f! , (       d  f       UR#                  5       $ = f)	r   zpdfminer did not find page z in the input file.r   r   Tr   r   N)r   r   appendr   r   StopIterationr!   r   r   r   r   r   r   r   r   r   r   r   )r'   r   r   r   r   s        r+   r   PdfMinerState.get_page_analysisf  s,   $//"f,&&tDNN';< $//"f, v& 26(:MN  "IIAXAX
 ##66tyy#FD../% 0 ~~) ! $1&9LM " 0/ ~~s   .D /D6D36
E)r   r   r   r   r   r   r   N)r   r	   r   rx   ru   rv   )r   r   )
rp   ry   rz   r{   r|   r.   r   r   r   r~    r-   r+   r   r   D  s     r-   r   c              #     #    U  H2  n[        U[        5      (       a  Uv   M   [        U5       Sh  vN   M4     g N	! [         a     MF  f = f7f)z0Get the text boxes attached to the current node.N)rd   r   get_text_boxes	TypeError)re   childs     r+   r   r     sJ     ei))K)%000 
 1 s1   !A><>A>
AAAA)r(   Mapping[str, Any]r)   r   r*   r   ru   rv   )rB   rt   ru   rx   )r   rx   )r   r   r   r   r   rx   ru   r   )ru   zIterator[LTTextBox])Jr|   
__future__r   recollections.abcr   r   
contextlibr   mathr   osr   pathlibr	   typingr
   unittest.mockr   r   pdfminer.encodingdbpdfminer.pdfdevicepdfminer.pdfinterppdfminer.psparserdeprecationr   pdfminer.converterr   pdfminer.layoutr   r   r   r   pdfminer.pdfcolorr   r   pdfminer.pdfdocumentr   pdfminer.pdffontr   r   r   r   r   r   r   pdfminer.pdfpager   pdfminer.utilsr   r   r   ocrmypdf.exceptionsr    r!   compile
STRIP_NAMEr.   r$   r,   setattrpsparserPSBaseParserBUFSIZr8   r;   r=   rC   rE   r   r   r   r   r   r   r-   r+   <module>r     sb   L " 	 - %           " 0 ? ? + ) < X X P P $ 7 7 AZZ	"
 ,44 !  	
 
* z#8 9
 ):     %*545
R
v R
jQ+ Qh   (,HI!26 J<;  ; |	r-   