
    SAit                         S SK r S SKrS SKJrJrJrJrJrJrJ	r	J
r
Jr  S SKJr  S SKJrJrJr  S SKJrJr  S SKJr  S SKJrJrJrJr  S SKJr  S S	KJrJ r   \RB                  " \"5      r#\" S
5      r$\" S5      r% " S S5      r&g)    N)	AnyBinaryIO	ContainerDictIteratorListOptionalSetTuple)settings)PDFDocumentPDFNoPageLabelsPDFTextExtractionNotAllowed)PDFObjectNotFoundPDFValueError)	PDFParser)
dict_value	int_value
list_valueresolve1)LIT)Rect
parse_rectPagePagesc                      \ rS rSrSrS\S\S\S\\   SS4
S	 jr	S\4S
 jr
1 Skr\S\S\S    4S j5       r\     SS\S\\\      S\S\S\S\S\S    4S jj5       rS\S\4S jrS\S\S\4S jrS\S\\   4S jrSrg)PDFPage   a2  An object that holds the information about a page.

A PDFPage object is merely a convenience class that has a set
of keys and values, which describe the properties of a page
and point to its contents.

Attributes
----------
  doc: a PDFDocument object.
  pageid: any Python object that can uniquely identify the page.
  attrs: a dictionary of page attributes.
  contents: a list of PDFStream objects that represents the page content.
  lastmod: the last modified time of the page.
  resources: a dictionary of resources used by the page.
  mediabox: the physical size of the page.
  cropbox: the crop rectangle of the page.
  rotate: the page rotation (in degree).
  annots: the page annotations.
  beads: a chain that represents natural reading order.
  label: the page's label (typically, the logical page number).

docpageidattrslabelreturnNc                    Xl         X l        [        U5      U l        X@l        [        U R                  R                  S5      5      U l        [        U R                  R                  S[        5       5      5      U l	        U R                  U R                  R                  S5      5      U l        U R                  U R                  R                  S5      U R                  5      U l        U R                  U R                  R                  S5      5      U l        [!        U R                  R                  SS5      5      S-   S-  U l        U R                  R                  S	5      U l        U R                  R                  S
5      U l        g)zInitialize a page object.

doc: a PDFDocument object.
pageid: any Python object that can uniquely identify the page.
attrs: a dictionary of page attributes.
label: page label string.
LastModified	ResourcesMediaBoxCropBoxContentsRotater   ih  AnnotsBN)r   r    r   r!   r"   r   getlastmoddict	resources_parse_mediaboxmediabox_parse_cropboxcropbox_parse_contentscontentsr   rotateannotsbeads)selfr   r    r!   r"   s        L/var/www/html/land-ocr/venv/lib/python3.13/site-packages/pdfminer/pdfpage.py__init__PDFPage.__init__0   s
    &



~ >?/7JJNN;/0
 ,,TZZ^^J-GH**4::>>)+DdmmT,,TZZ^^J-GH !!<=CsJjjnnX.ZZ^^C(
    c                 @    SU R                   < SU R                  < S3$ )Nz<PDFPage: Resources=z, MediaBox=>)r0   r2   )r:   s    r;   __repr__PDFPage.__repr__O   s"    %dnn%7{4==BSSTUUr>   >   r*   r(   r'   r&   documentc              #   <  ^ ^^	#     S
S[         S[        [        [         4   S[        [        [               S[
        [        [        [        [         [        [         [         4   4   4      4U U	U4S jjjm	 TR                  5       nSnSTR                  ;   aB  T	" TR                  S   TR                  5      nU H  u  pVT " TXV[        U5      5      v   SnM     U(       d  TR                   Hr  nUR                  5        H[  n TR!                  U5      n[#        U[$        5      (       a1  UR'                  S	5      [(        L a  T " TXX[        U5      5      v   MY  M[  M]     Mt     g g ! [         a    [        R                  " S 5      n GNf = f! [*         a     M  f = f7f)Nobjparentvisitedr#   c              3     >#    [        U [        5      (       a+  U n[        TR                  U5      5      R	                  5       nO%U R
                  n[        U 5      R	                  5       nUc
  [        5       nX2;   a  g UR                  U5        UR                  5        H"  u  pVUT	R                  ;   d  M  XT;  d  M  XdU'   M$     UR                  S5      nUc&  [        R                  (       d  UR                  S5      nU[        L aF  SU;   a@  [        R                  SUS   5        [!        US   5       H  nT
" XU5       S h  vN   M     g U["        L a  [        R                  SU5        X44v   g g  N/7f)NTypetypeKidszPages: Kids=%rzPage: %r)
isinstanceintr   getobjcopyobjidsetadditemsINHERITABLE_ATTRSr-   r   STRICTLITERAL_PAGESlogdebugr   LITERAL_PAGE)rE   rF   rG   	object_idobject_propertieskvobject_typechildclsdepth_first_searchrC   s            r;   ra   0PDFPage.create_pages.<locals>.depth_first_searchV   sK    
 #s##	$.xy/I$J$O$O$Q!  II	$.sO$8$8$:! %#KK	"---!2L+,a( ' ,//7K"8??/33F;m+:K0K		*,=f,EF'(9&(ABE1%GTTT C ,		*&78 44 - Us   B-E44E4;BE4E20E4Fr   TrI   N)r   r   strr	   r
   r   r   rM   get_page_labelsr   	itertoolsrepeatcatalognextxrefs
get_objidsrN   rL   r/   r-   rY   r   )
r`   rC   page_labelspagesobjectsrP   treexrefrE   ra   s
   ``       @r;   create_pagesPDFPage.create_pagesT   s~    
 +/$	5$	5cN$	5 c#h'$	5 eCc4S>&9!::;<	$	5 $	5L	13;3K3K3MK h&&&()9)9')BHDTDTUG&(Ek1BCC  '  !__.E&ooe4%c400SWWV_5T"%hD<M"NN 6U0 / '   	1#**40K	1" - sP   A/F5E$ A=FAFF$ FFFF
FFFFfppagenosmaxpagespasswordcachingcheck_extractablec              #   J  #    [        U5      n[        XtUS9nUR                  (       d1  U(       a  SU-  n	[        U	5      eSU-  n
[        R                  U
5        [        U R                  U5      5       H*  u  pU(       a  X;  a  M  Uv   U(       d  M   X;S-   ::  d  M*    g    g 7f)N)rv   rw   z"Text extraction is not allowed: %rzThe PDF %r contains a metadata field indicating that it should not allow text extraction. Ignoring this field and proceeding. Use the check_extractable if you want to raise an error in this case   )r   r   is_extractabler   rW   warning	enumeraterq   )r`   rs   rt   ru   rv   rw   rx   parserr   	error_msgwarning_msgpagenopages                r;   	get_pagesPDFPage.get_pages   s      2&WE !! @2E	1)<<A DF	F  K(%c&6&6s&;<LFF1JxH
2 =s   BB#B#B#valuec                     SnUc  [         R                  S5        U$  [        S [        U5       5       5      $ ! [         a    [         R                  S5        Us $ f = f)N)        r   g      @g     @zHMediaBox missing from /Page (and not inherited), defaulting to US Letterc              3   8   #    U  H  n[        U5      v   M     g 7frc   r   .0vals     r;   	<genexpr>*PDFPage._parse_mediabox.<locals>.<genexpr>        Ghsmm   z2Invalid MediaBox in /Page, defaulting to US Letter)rW   r|   r   r   r   )r:   r   	us_letters      r;   r1   PDFPage._parse_mediabox   sa    ,	=KK* 	GxGGG 	KKLM	s   9 !AAr2   c                     Uc  U$  [        S [        U5       5       5      $ ! [         a    [        R	                  S5        Us $ f = f)Nc              3   8   #    U  H  n[        U5      v   M     g 7frc   r   r   s     r;   r   )PDFPage._parse_cropbox.<locals>.<genexpr>   r   r   z0Invalid CropBox in /Page, defaulting to MediaBox)r   r   r   rW   r|   )r:   r   r2   s      r;   r3   PDFPage._parse_cropbox   sG    =O	GxGGG 	KKJKO	s   " !AAc                 V    / nUb#  [        U5      n[        U[        5      (       d  U/nU$ rc   )r   rL   list)r:   r   r6   s      r;   r5   PDFPage._parse_contents   s/     Hh--$:r>   )r8   r!   r9   r6   r4   r   r"   r.   r2   r    r0   r7   )Nr    TF)__name__
__module____qualname____firstlineno____doc__r   objectr	   rd   r<   rA   rT   classmethodr   rq   r   r   rM   boolr   r   r   r1   r3   r   r5   __static_attributes__ r>   r;   r   r      s9   .)) ) 	)
 }) 
)>V# V G;K ;HY4G ; ;z  -1"'"" )C.)" 	"
 " "  " 
)	" "HS T "
C 
4 
D 
S T#Y r>   r   )'rf   loggingtypingr   r   r   r   r   r   r	   r
   r   pdfminerr   pdfminer.pdfdocumentr   r   r   pdfminer.pdfexceptionsr   r   pdfminer.pdfparserr   pdfminer.pdftypesr   r   r   r   pdfminer.psparserr   pdfminer.utilsr   r   	getLoggerr   rW   rY   rV   r   r   r>   r;   <module>r      sf      W W W  
 D ( I I ! +! 6{GB Br>   