
    SAi/c                        S SK r S SKrS SKJr  S SKJrJrJrJrJ	r	J
r
JrJrJrJr  S SKJr  S SKJr  S SKJr  S SKJrJrJrJrJrJrJr  S SKJrJ r   S S	K!J"r"  S S
K#J$r$  SSK%J&r&  SSK'J(r(J)r)J*r*J+r+  SSK,J-r-  SSK.J/r/J0r0  SSK1J2r2J3r3J4r4J5r5  SSK&J6r6J7r7J8r8  SSK9J:r:J;r;  SSK<J=r=  \R|                  " S5      r?\@" / SQ5      rA\(       a  SSKBJCrC  SSKDJErE  SSSSSS.rFS\GS\H4S  jrIS!\\H\4   S\\H\4   4S" jrJ " S# S$\5      rKS5S%\(S&\)S\(4S' jjrLS%\(S(\)S\(4S) jrM " S* S+\-5      rN " S, S-\N5      rOS.\(S/\(SS4S0 jrP " S1 S2\O5      rQ " S3 S4\O5      rRg)6    N)	lru_cache)
TYPE_CHECKINGAnyCallableDict	GeneratorListOptionalPatternTupleUnion)	normalize)warn)PDFPageAggregator)LTCharLTComponentLTContainerLTCurveLTItemLTPageLTTextContainer)PDFPageInterpreter	PDFStackT)PDFPage)	PSLiteral   )utils)T_bboxT_numT_obj
T_obj_list)	Container)PDFStructTreeStructTreeMissing)T_table_settingsTableTableFinderTableSettings)decode_textresolve_allresolve_and_decode)MalformedPDFExceptionPdfminerException)TextMapz^LT)advheight	linewidthptssizesrcsizewidthx0x1y0y1bitsmatrixuprightfontnametext	imagemask
colorspaceevenoddfillnon_stroking_colorstrokestroking_colorstreamnamemcidtag)	PageImage)PDFzSimSun,RegularzSimHei,RegularzSimKai,RegularzSimFang,RegularzSimLi,Regular)s   s   s   _GB2312s   _GB2312s   r=   returnc                     SU ;   a  U R                  S5      S-   nU S U XS  p2OSU p2[        R                  U[        U5      SS 5      n[        U5      SS U-   $ )N   +r          )indexCP936_FONTNAMESgetstr)r=   split_atprefixsuffix
suffix_news        K/var/www/html/land-ocr/venv/lib/python3.13/site-packages/pdfplumber/page.pyfix_fontname_bytesr[   \   sj    x>>$'!+!)8,hy.Ah $$VS[2->?Jv;qz))rO   kwargsc           	          U R                  5        VVs0 s H(  u  pU[        U[        5      (       a  [        U5      OU_M*     snn$ s  snnf N)items
isinstancelisttuple)r\   keyvalues      rZ   tuplify_list_kwargsre   g   sH     !,,.(JC 	j55eEl5@(  s   /Ac                      ^  \ rS rSr% SrSr\\   \S'   Sr	\\
   \S'   SS\S\\   SS4S	 jjrSS
 jrSS jrS\4U 4S jjrSU 4S jjrSU 4S jjrSrU =r$ )"PDFPageAggregatorWithMarkedContentn   zVExtract layout from a specific page, adding marked-content IDs to
objects where found.Ncur_mcidcur_tagrI   propsrL   c                     [        UR                  5      U l        [        U[        5      (       a  SU;   a  US   U l        gSU l        g)z5Handle beginning of tag, setting current MCID if any.MCIDN)r)   rG   rj   r`   dictri   )selfrI   rk   s      rZ   	begin_tag,PDFPageAggregatorWithMarkedContent.begin_tagu   s7    "388,eT""v!&MDM DMrO   c                      SU l         SU l        g)z/Handle beginning of tag, clearing current MCID.N)rj   ri   ro   s    rZ   end_tag*PDFPageAggregatorWithMarkedContent.end_tag}   s    rO   c                     U R                   R                  (       a<  U R                   R                  S   nU R                  Ul        U R                  Ul        gg)zVAdd current MCID to what we hope to be the most recent object created
by pdfminer.six.rQ   N)cur_item_objsri   rH   rj   rI   )ro   cur_objs     rZ   tag_cur_item/PDFPageAggregatorWithMarkedContent.tag_cur_item   s@     ==mm))"-G==GL,,GK rO   c                 H   > [         TU ]  " U0 UD6nU R                  5         U$ )z;Hook for rendering characters, adding the `mcid` attribute.)superrender_charrz   )ro   argsr\   r/   	__class__s       rZ   r~   .PDFPageAggregatorWithMarkedContent.render_char   s(    g!4262
rO   c                 F   > [         TU ]  " U0 UD6  U R                  5         g)z7Hook for rendering images, adding the `mcid` attribute.N)r}   render_imagerz   ro   r   r\   r   s      rZ   r   /PDFPageAggregatorWithMarkedContent.render_image   s!    d-f-rO   c                 F   > [         TU ]  " U0 UD6  U R                  5         g)zAHook for rendering lines and curves, adding the `mcid` attribute.N)r}   
paint_pathrz   r   s      rZ   r   -PDFPageAggregatorWithMarkedContent.paint_path   s!    D+F+rO   )ri   rj   r^   rL   N)__name__
__module____qualname____firstlineno____doc__ri   r
   int__annotations__rj   rU   r   r   rp   rt   rz   floatr~   r   r   __static_attributes____classcell__r   s   @rZ   rg   rg   n   sj     #Hhsm"!GXc]!!Y !x	/B !d !
'e 
 rO   rg   box_rawrotationc                     [        S U  5       5      (       d  [        SU  35      e[        U S   U S   45      u  p#[        U S   U S   45      u  pEUS;   a  XBXS4$ X$X54$ )Nc              3   V   #    U  H  n[        U[        R                  5      v   M!     g 7fr^   )r`   numbersNumber.0xs     rZ   	<genexpr>!_normalize_box.<locals>.<genexpr>   s     >gz!W^^,,gs   ')z0Bounding box contains non-number coordinate(s): r   rP   r      )Z   i  )allr,   sorted)r   r   r6   r7   r8   r9   s         rZ   _normalize_boxr      s~    
 >g>>>#>wiH
 	
 WQZ,-FBWQZ,-FB9rO   	mb_heightc                      U u  p#pEX!U-
  XAU-
  4$ r^    )r   r   r6   r8   r7   r9   s         rZ   _invert_boxr      s    NBBBN33rO   c                   t   \ rS rSr% \R
                  S/-   r\\   \S'   Sr	\
\S'   Sr SGSSS	\S
\S\4S jjrSHS jr\S\4S j5       r\S\4S j5       r\S\\\\4      4S j5       r\S\4S j5       r\S\4S j5       r\S\4S j5       r\S\\\4   4S j5       rS\\\4   S\\\4   4S jrS\S\ 4S jr!S\\"   S\#\ SS4   4S jr$S\\\4   4S jr% SIS\&\'   S\(4S jjr) SIS\&\'   S\\*   4S jjr+ SIS\&\'   S\&\*   4S  jjr, SIS\&\'   S\\\\&\            4S! jjr- SIS\&\'   S\&\\\&\            4S" jjr.S#\S\/4S$ jr0     SJS%\1\\2\   4   S&\
S'\
S(\S)\
S*\
S#\S\\\\4      4S+ jjr3S#\S\4S, jr4S#\S\4S- jr5S#\S\4S. jr6 SKS/\
S)\
S#\S\4S0 jjr7 SLS1\8S2\
S3\
SS44S5 jjr9 SLS1\8S2\
S3\
SS44S6 jjr: SLS1\8S2\
S3\
SS44S7 jjr;S8\<\ /\
4   SS94S: jr=S#\SS94S; jr>     SMS<\&\1\\?4      S=\&\1\\?4      S>\&\1\\?4      S?\
S@\
SSA4SB jjr@SISC\&\\      S\\\4   4SD jjrAS\4SE jrBSFrCg)NPage   _layoutcached_propertiesTis_originalNpdfrK   page_objpage_numberinitial_doctopc                 h  ^ Xl         X l        TU l        X0l        X@l        SS[
        S[        S[        4U4S jjjnU" SS5      nUS-  U l        [        U" S5      U R                  5      nUS	   US
   -
  n[        Xx5      U l
        S HU  n	U	TR                  ;   d  M  [        [        U" U	5      U R                  5      U5      n
[        X	R                  5       U
5        MW     STR                  ;  a  U R                  U l        U R                  U l        [!        5       " U R"                  5      U l        g )Nrc   defaultrL   c                 Z   > [        TR                  R                  U 5      5      nUc  U$ U$ r^   )r*   attrsrT   )rc   r   rd   r   s      rZ   get_attrPage.__init__.<locals>.get_attr   s+     2 23 78E#m766rO   Rotater   ih  MediaBoxr   r   )CropBoxTrimBoxBleedBoxArtBoxr   r^   )r   	root_pager   r   r   rU   r   r   r   r   mediaboxr   setattrlowercropboxbboxr   _get_textmapget_textmap)ro   r   r   r   r   r   	_rotationmb_rawr   box_namebox_normalizeds     `        rZ   __init__Page.__init__   s     &,	7# 	7 	7s 	7 	7 Xq)	!C 4dmmD1Iq	)	#F6DH8>>)!,"8H#5t}}Ey" nn.? E HNN*==DL MM	 %;t'8'89rO   rL   c                 X    U R                  5         U R                  R                  5         g r^   )flush_cacher   cache_clearrs   s    rZ   close
Page.close   s     $$&rO   c                 @    U R                   S   U R                   S   -
  $ )NrP   r   r   rs   s    rZ   r5   
Page.width       yy|diil**rO   c                 @    U R                   S   U R                   S   -
  $ )Nr   r   r   rs   s    rZ   r0   Page.height   r   rO   c                      [        U R                  U 5       Vs/ s H  oR                  5       PM     sn$ s  snf ! [         a    / s $ f = f)z-Return the structure tree for a page, if any.)r#   r   to_dictr$   )ro   elems     rZ   structure_treePage.structure_tree   sD    	/<TXXt/LM/LtLLN/LMMM  	I	s   ; 6; ; A
	A
c                    [        U S5      (       a  U R                  $ [        U R                  R                  U R
                  U R                  R                  S9n[        U R                  R                  U5      n UR                  U R                  5        UR                  5       U l        U R                  $ ! [         a  n[        U5      eS nAff = f)Nr   )pagenolaparams)hasattrr   rg   r   rsrcmgrr   r   r   process_pager   	Exceptionr-   
get_result)ro   deviceinterpreteres       rZ   layoutPage.layout   s    4##<<3HH##XX&&

 ))9)96B	'$$T]]3  &002||  	'#A&&	's   <B8 8
CCCc                 `  ^ ^ S[         [        [        4   S[        S[         [        [        4   4U 4S jjmS[        S[        4UU 4S jjn[	        T R
                  R                  5      =(       d    / n[        [        X5      5      n[        T [        5      (       a  T R                  U5      $ U$ )NptrrL   c                    > US-  n[        U5       H-  nU u  pEX2S-  :X  a  TR                  OTR                  nXVU-
  4n M/     U $ )Nr   rP   )ranger5   r0   )r   r   turnsir   ycompro   s          rZ   rotate_point!Page.annots.<locals>.rotate_point  sJ    GE5\%&!)^tzz(_ " IrO   annotc                 "  > U S   u  pp4T" X4TR                   5      nT" X44TR                   5      nTR                  R                  n[        [	        / UQUQ75      U5      u  ppU R                  S0 5      nUR                  S5      U R                  S5      U R                  S5      S.nUR                  5        H  u  pUc  M
   UR                  S5      X'   M      TR                  SUX{-
  U
Xy-
  TR                  U	-   U	UX-
  X-
  S.nUR                  U5        SU ;   a  TU S'   U US'   U$ ! [         aX     UR                  S5      X'    M  ! [         a3    TR                  R                  (       a  e [        S	U S
U S35          M  f = ff = f)NRectAURITContents)urititlecontentszutf-8zutf-16zCould not decode z of annotation. z will be missing.r   )r   object_typer6   r8   r7   r9   doctoptopbottomr5   r0   Pdata)r   r   r0   r   r   rT   r_   decodeUnicodeDecodeErrorr   raise_unicode_errorsr   r   r   update)r   _a_b_c_dpt0pt1rhr6   r  r7   r  aextraskvparsedr   ro   s                    rZ   parsePage.annots.<locals>.parse  s   "6]NBBx7Cx7C&&B"-n\s\S\.JB"OBR		#r"AuuU|3!IIj1F
 =$%HHW$5	 '   $//&kh--3  ,F MM&! e|!c
"F6NM= . 	()(:FI1 #xx<< % "3A3 7$$%3&7!9 	s*   D,,
F7E8F
F	F

F)r   r   r   r    r*   r   annotsra   mapr`   CroppedPage_crop_fn)ro   r  rawr  r   s   `   @rZ   r  Page.annots  s    	U5%<0 	S 	U5%<=P 	/	 /	5 /	 /	b $--../52c%o&dK((==((MrO   c                 T    U R                    Vs/ s H  oS   c  M
  UPM     sn$ s  snf )Nr   )r  )ro   r  s     rZ   
hyperlinksPage.hyperlinksR  s"    ;;?;aE(;???s   	%%c                 ~    [        U S5      (       a  U R                  $ U R                  5       U l        U R                  $ N_objects)r   r"  parse_objectsrs   s    rZ   objectsPage.objectsV  s3    4$$== /3/A/A/C}}rO   r   c                 p    U R                   S   US   -   U R                   S   U R                  -   US   -
  4$ )Nr   r   )r   r0   )ro   r   s     rZ   point2coordPage.point2coord]  s:    a 2a5($--*:T[[*H2a5*PQQrO   objc           
      P   [         R                  " [        SUR                  R                  5      R                  5       nS[        [        [        4   S[        [        [        [        4      4S jn[        [        S [        X1R                  R                  5       5      5      5      nX$S'   U R                  US'   S H5  n[!        X5      (       d  M  [#        [%        X5      R&                  5      XE'   M7     [)        U[*        [,        45      (       aL  UR/                  5       nU R0                  R2                  b   [5        U R0                  R2                  U5      OUUS'   [)        U[*        5      (       a  UR6                  n[)        UR8                  [:        5      (       a  UR8                  OUR8                  4US	'   [)        UR<                  [:        5      (       a  UR<                  OUR<                  4US
'   [)        US   [>        5      (       a  [A        US   5      US'   O[)        U[B        45      (       ao  [E        [        U RF                  US   5      5      US'   URH                   VV	s/ s H  tpU/[        U RF                  U	5      Q7PM      sn	nUS'   URJ                  US'   U RL                  S S u  pSU;   aE  U RN                  US   -
  U-   US'   U RN                  US   -
  U-   US'   U RP                  US   -   US'   SU;   a  U
S:w  a  US   U
-   US'   US   U
-   US'   U$ s  sn	nf )N itemrL   c                 <    U u  pU[         ;   a  [        U5      nX4$ g r^   )	ALL_ATTRSr*   )r,  r  r  ress       rZ   process_attr)Page.process_object.<locals>.process_attrd  s#    DAI~!!nxrO   r   r   )ncsscsr>   rE   rC   r=   r2   pathdashrP   r8   r9   r  r  r   r6   r   r7   ))resublt_patr   r   r   r   rU   r   r
   rn   filterr  __dict__r_   r   r   r+   getattrrG   r`   r   r   get_textr   unicode_normnormalize_unicodegraphicstatescolorrb   ncolorbytesr[   r   ra   r'  original_pathdashing_styler   r0   r   )ro   r)  kindr0  attrcsr>   gscmdr2   mb_x0mb_tops               rZ   process_objectPage.process_objecta  s   vvfb#--"8"89??A	uS#X 	8E#s(O3L 	 F4\<<3E3E3G!HIJ"]"..] B s-gc.>.C.CD ! cFO455<<>D 88((4 "$(("7"7> L c6"" !!B'		599				| !" (		599				| %&
 $z*E22#5d:6F#GZ gZ((s4#3#3T%[ABDK QTPaPabPa93S>3t'7'7#=>PabDL,,DL
 bq)4<;;d3v=DK"kkDJ6&@DN!004;>DN4<EQJde+DJde+DJ% cs   '%L"layout_objectsc              #     #    U Hz  n[        U[        5      (       aO  U R                  R                  b  U R	                  U5      v   U R                  UR                  5       S h  vN   Mg  U R	                  U5      v   M|     g  N7fr^   )r`   r   r   r   rL  iter_layout_objectsrx   )ro   rN  r)  s      rZ   rP  Page.iter_layout_objects  sm      "C#{++88$$0--c2233CII>>>))#.. " ?s   A#B%B&Bc                     0 nU R                  U R                  R                  5       H9  nUS   nUS;   a  M  UR                  U5      c  / X'   X   R	                  U5        M;     U$ )Nr   )anno)rP  r   rx   rT   append)ro   r$  r)  rE  s       rZ   r#  Page.parse_objects  sh    )+++DKK,=,=>C}%Dx{{4 ( "M  % ? rO   table_settingsc                 D    [         R                  " U5      n[        X5      $ r^   )r(   resolver'   ro   rV  tsets      rZ   debug_tablefinderPage.debug_tablefinder  s     $$^44&&rO   c                 X    [         R                  " U5      n[        X5      R                  $ r^   )r(   rX  r'   tablesrY  s      rZ   find_tablesPage.find_tables  s%     $$^44&---rO   c                     [         R                  " U5      nU R                  U5      n[        U5      S:X  a  g S[        S[
        [        [        [        4   4S jn[        [        X4S95      S   nU$ )Nr   r   rL   c                 h    [        U R                  5      * U R                  S   U R                  S   4$ )Nr   r   )lencellsr   r   s    rZ   sorterPage.find_table.<locals>.sorter  s)    \M166!9affQi88rO   )rc   )
r(   rX  r_  rc  r&   r   r   r   ra   r   )ro   rV  rZ  r^  rf  largests         rZ   
find_tablePage.find_table  sm     $$^4!!$'v;!	9e 	9c5%&7 8 	9 vf1215rO   c           	          [         R                  " U5      nU R                  U5      nU Vs/ s H'  oDR                  " S0 UR                  =(       d    0 D6PM)     sn$ s  snf Nr   )r(   rX  r_  extracttext_settings)ro   rV  rZ  r^  tables        rZ   extract_tablesPage.extract_tables  sQ     $$^4!!$'IOP;!3!3!9r;PPPs   .Ac                     [         R                  " U5      nU R                  U5      nUc  g UR                  " S0 UR                  =(       d    0 D6$ rl  )r(   rX  ri  rm  rn  )ro   rV  rZ  ro  s       rZ   extract_tablePage.extract_table  sG     $$^4%===>D$6$6$<">>rO   r\   c                    [        U R                  S9nSU;  a  UR                  SU R                  05        SU;  a  UR                  SU R                  05        0 UEUEn[
        R                  " U R                  40 UD6$ )N)layout_bboxlayout_width_charslayout_widthlayout_height_charslayout_height)rn   r   r  r5   r0   r   chars_to_textmapchars)ro   r\   defaultsfull_kwargss       rZ   r   Page._get_textmap  sx    #'		$
  v-OO^TZZ89 .OO_dkk:;&<&<V&<%%djj@K@@rO   patternregexcase
main_groupreturn_charsreturn_groupsc           	      `    U R                   " S0 [        U5      D6nUR                  UUUUUUS9$ )N)r  r  r  r  r  r   )r   re   search)	ro   r  r  r  r  r  r  r\   textmaps	            rZ   r  Page.search  sE     ""A%8%@A~~!%'  
 	
rO   c                 L    U R                   " S0 [        U5      D6R                  $ rl  )r   re   	as_stringro   r\   s     rZ   extract_textPage.extract_text  s"    >"5f"=>HHHrO   c                 D    [         R                  " U R                  40 UD6$ r^   )r   extract_text_simpler|  r  s     rZ   r  Page.extract_text_simple  s    ((>v>>rO   c                 D    [         R                  " U R                  40 UD6$ r^   )r   extract_wordsr|  r  s     rZ   r  Page.extract_words  s    ""4::888rO   stripc                 R    U R                   " S0 [        U5      D6R                  XS9$ )N)r  r  r   )r   re   extract_text_lines)ro   r  r  r\   s       rZ   r  Page.extract_text_lines  s4     >"5f"=>QQ R 
 	
rO   r   relativestrictr  c                     [        XX#S9$ )N)r  r  )r  ro   r   r  r  s       rZ   crop	Page.crop!  s     4HHrO   c                 4    [        XX#[        R                  S9$ zC
Same as .crop, except only includes objects fully within the bbox
)r  r  crop_fn)r  r   within_bboxr  s       rZ   r  Page.within_bbox&  s     %BSBS
 	
rO   c                 4    [        XX#[        R                  S9$ r  )r  r   outside_bboxr  s       rZ   r  Page.outside_bbox0  s     %BTBT
 	
rO   test_functionFilteredPagec                     [        X5      $ r^   )r  )ro   r  s     rZ   r9  Page.filter:  s    D00rO   c                     [        U S 5      nU R                  R                  5        VVs0 s H  u  p4X4_M	     snnUl        [        R
                  " U R                  40 UD6UR                  S'   U$ s  snnf )u   
Removes duplicate chars — those sharing the same text and positioning
(within `tolerance`) as other characters in the set. Adjust extra_args
to be more/less restrictive with the properties checked.
c                     g)NTr   re  s    rZ   <lambda>#Page.dedupe_chars.<locals>.<lambda>C  s    rO   char)r  r$  r_   r"  r   dedupe_charsr|  )ro   r\   prE  objss        rZ   r  Page.dedupe_chars=  sf     ~.37<<3E3E3GH3GZTdj3GH
"//

EfE

6 Is   A2
resolutionr5   r0   	antialiasforce_mediaboxrJ   c                     SSK JnJn  [        S XU4 5       5      nUS:  a  [	        SU 35      eUb  SU-  U R
                  -  nOUb  SU-  U R                  -  nU" U U=(       d    UUUS9$ )z
You can pass a maximum of 1 of the following:
- resolution: The desired number pixels per inch. Defaults to 72.
- width: The desired image width in pixels.
- height: The desired image width in pixels.
r   )DEFAULT_RESOLUTIONrJ   c              3   (   #    U  H  oS Lv   M
     g 7fr^   r   r   s     rZ   r    Page.to_image.<locals>.<genexpr>X  s     K/J!/Js   zUOnly one of these arguments can be provided: resolution, width, height. You provided H   )r  r  r  )displayr  rJ   sum
ValueErrorr5   r0   )	ro   r  r5   r0   r  r  r  rJ   	num_specss	            rZ   to_imagePage.to_imageH  s     	;K
6/JKK	q=ghqgrs  edjj0Jft{{2J!7%7)	
 	
rO   object_typesc           	      N   Uc(  [        U R                  R                  5       5      S/-   nOUnU R                  U R                  U R
                  U R                  U R                  U R                  U R                  U R                  S.nU H  n[        XS-   5      X4S-   '   M     U$ )Nr   )r   r   r   r   r   r   r5   r0   s)ra   r$  keysr   r   r   r   r   r   r5   r0   r;  )ro   r  _object_typesdts        rZ   r   Page.to_dicti  s     !2!2!45	AM(M++"11||IIZZkk	
 A 3w/A#gJ rO   c                 "    SU R                    S3$ )Nz<Page:>)r   rs   s    rZ   __repr__Page.__repr__|  s    (()++rO   )r   r"  r   r   r   r   r   r   r   r   r   r   r   r   r^   )TTr   TT)TT)FT)NNNFF)Dr   r   r   r   r"   r   r	   rU   r   r   boolpagesr   r   r   r   r   propertyr5   r0   r   r   r   r   r   r!   r  r  r$  r   r'  r   r    rL  r   r   rP  r#  r
   r%   r'   r[  r&   r_  ri  rp  rs  r.   r   r   r   r  r  r  r  r  r   r  r  r  r   r9  r  r   r  r   r  r   r   rO   rZ   r   r      s   #,#>#>)#LtCyLKE !"*:*: *: 	*:
 *:X' +u + + + + + T#s(^ 4       ?
 ? ?B @J @ @ c:o.  ReE5L1 ReE5L6I RH& HU HT/";//	5$$	%/	tCO4 	 <@'&'78'	' <@.&'78.	e. <@&'78	%$ <@Q&'78Q	d4&'	(Q <@?&'78?	$tHSM*+	,?	AS 	AW 	A !"
sGCL()
 
 	

 
 
 
 
 
d38n	
(IS IS I?C ?C ?9c 9j 9 8<

04
GJ
	
 DHII&*I<@I	I DH

&*
<@
	
 DH

&*
<@
	
1HeWd]$; 1 1	S 	^ 	 37-1.2$
U3:./
 c5j)*
 sEz*+	

 
 
 

BHT#Y$7 4S> &,# ,rO   r   c                   2    \ rS rSr% Sr\\S'   S\4S jrSr	g)DerivedPagei  Fr   parent_pagec                    Xl         UR                  U l        UR                  U l        UR                  U l        UR                  U l        UR
                  U l        UR                  U l        UR                  U l        UR                  U l        U R                  [        R                  5        [        5       " U R                  5      U l        g r^   )r  r   r   r   r   r   r   r   r   r   r"   r   r   r   r   )ro   r  s     rZ   r   DerivedPage.__init__  s    &$..??#,,&22)88#,,#,,"**445$;t'8'89rO   )
r   r   r   r   r   r   r  r   r   r   N)
r   r   r   r   r   r  r   r   r   r   r   rO   rZ   r  r    s    K:D :rO   r  r   parent_bboxc                    [         R                  " U 5      nUS:X  a  [        SU  S35      e[         R                  " X5      nUc  [        SU  SU 35      e[         R                  " U5      nXB:  a  [        SU  SU 35      eg )Nr   zBounding box z has an area of zero.z. is entirely outside parent page bounding box z. is not fully within parent page bounding box )r   calculate_arear  get_bbox_overlap)r   r  	bbox_areaoverlapoverlap_areas        rZ   test_proposed_bboxr    s    $$T*IA~=.CDEE$$T7GD6 "((3}6
 	

 ''0LD6 "((3}6
 	
  rO   c                      ^  \ rS rSr\R
                  SS4S\S\S\\	\/\	4   S\
S\
4
U 4S	 jjjr\S
\\\	4   4S j5       rSrU =r$ )r  i  FTr  	crop_bboxr  r  r  c                 D  >^^ U(       a#  UR                   u  pg  nTu  ppX-   X-   X-   X-   4mU(       a  [        TUR                   5        S[        S[        4UU4S jjn[        TU ]  U5        Xl        T[        R                  L a  UR                   U l         g TU l         g )Nr  rL   c                    > T" U T5      $ r^   r   )r  r  r  s    rZ   r  &CroppedPage.__init__.<locals>._crop_fn  s    4++rO   )r   r  r!   r}   r   r  r   r  )ro   r  r  r  r  r  o_x0o_top_r6   r  r7   r  r  r   s     ``          rZ   r   CroppedPage.__init__  s      + 0 0DA"+BRCKFNKIy+*:*:;	,: 	,* 	, 	, 	%  e(((#((DI!DIrO   rL   c                     [        U S5      (       a  U R                  $ U R                  R                  R	                  5        VVs0 s H  u  pXR                  U5      _M     snnU l        U R                  $ s  snnf r!  )r   r"  r  r$  r_   r  ro   r  r  s      rZ   r$  CroppedPage.objects  si    4$$== ,0,<,<,D,D,J,J,L0
,LDAA}}Q,L0
 }}0
s   A7)r  r"  r   )r   r   r   r   r   crop_to_bboxr   r   r   r!   r  r   r  r   rU   r$  r   r   r   s   @rZ   r  r    s    
 ?D>P>P"" " :v.
:;	"
 " " ": c:o.  rO   r  c                   b   ^  \ rS rSrS\S\\/\4   4U 4S jjr\	S\
\\4   4S j5       rSrU =r$ )r  i  r  	filter_fnc                 R   > UR                   U l         X l        [        TU ]  U5        g r^   )r   r  r}   r   )ro   r  r  r   s      rZ   r   FilteredPage.__init__  s"    $$	"%rO   rL   c                    [        U S5      (       a  U R                  $ U R                  R                  R	                  5        VVs0 s H%  u  pU[        [        U R                  U5      5      _M'     snnU l        U R                  $ s  snnf r!  )r   r"  r  r$  r_   ra   r9  r  r  s      rZ   r$  FilteredPage.objects  sw    4$$==  ((006680
8 tF4>>1-..80
 }}	0
s   ,B)r"  r   r  )r   r   r   r   r   r   r    r  r   r  r   rU   r!   r$  r   r   r   s   @rZ   r  r    sE    &D &Xugtm5L &
 c:o.  rO   r  r  )Sr   r6  	functoolsr   typingr   r   r   r   r   r	   r
   r   r   r   unicodedatar   r>  warningsr   pdfminer.converterr   pdfminer.layoutr   r   r   r   r   r   r   pdfminer.pdfinterpr   r   pdfminer.pdfpager   pdfminer.psparserr   r+  r   _typingr   r   r    r!   	containerr"   	structurer#   r$   ro  r%   r&   r'   r(   r)   r*   r+   utils.exceptionsr,   r-   
utils.textr.   compiler8  setr.  r  rJ   r   rK   rS   rB  rU   r[   re   rg   r   r   r   r  r  r  r  r   rO   rZ   <module>r     sj    	    7  0   = $ '  5 5   7 F F ? ? F 	F		B "
 *) 0 1(* *3 *S#X 4S> 0): 0f F  e  F  (4 4E 4f 4
C,9 C,L:$ :"
V 
& 
T 
(%+ %P; rO   