
    TAi                    H   S r SSKJr  SSKrSSKrSSKJrJr  SSKJr  SSK	J
r
  SSKJrJrJr  SSKJr  SS	KJrJr  SS
KJr  SSKJr  SSKJr  SSKJr  SSKJr  \R<                  " \5      r SS jr!S r"S r#SS jr$SS jr%SS jr&SS jr' " S S5      r(        SS jr)g)z,OCRmyPDF page processing pipeline functions.    )annotationsN)datetimetimezone)Path)Any)
DictionaryNamePdf)__version__)PdfMetadataencode_pdf_date)remove_broken_goto_annotations)PROGRAM_NAME)
PdfContext)iso_639_2_from_3c                n  ^  UR                   nU 4S jnS Vs0 s H  nXC" U5      _M     nnUR                  (       a  UR                  US'   UR                  (       a  UR                  US'   UR                  (       a  UR                  US'   UR                  (       a  UR                  US'   UR
                  R                  R                  5       R                  U5      n[         S[         SU 3US	'   S
[         3US'   [        [        R                  " [        R                   5      5      US'   U$ s  snf )z4Read the document info and store it in a dictionary.c                f   >  TR                   U    n[        U5      $ ! [        [        4 a     gf = f)N )docinfostrKeyError	TypeError)keysbase_pdfs     N/var/www/html/land-ocr/venv/lib/python3.13/site-packages/ocrmypdf/_metadata.pyfrom_document_info'get_docinfo.<locals>.from_document_info   s8    	  %Aq6M)$ 		s    00)/Title/Author	/Keywords/Subjectz/CreationDater   r    r!   r"    z / z/Creatorzpikepdf z	/Producerz/ModDate)optionstitleauthorkeywordssubjectplugin_managerhookget_ocr_enginecreator_tagr   OCRMYPF_VERSIONPIKEPDF_VERSIONr   r   nowr   utc)r   contextr$   r   kpdfmarkr,   s   `      r   get_docinfor4      s   ooG QPA 	
a  P   }}#MM~~$^^	&//%oo
((--<<>JJ7SK)N!O+<C}MGJ%o%67GK)(,,x||*DEGJN%s   D2c                   U(       d  g U R                   R                  S5      (       a,  [        R                  S5        [        R	                  SU5        g [        R                  S5        [        R                  SU5        g )NpdfazSome input metadata could not be copied because it is not permitted in PDF/A. You may wish to examine the output PDF's XMP metadata.z1The following metadata fields were not copied: %rz^Some input metadata could not be copied.You may wish to examine the output PDF's XMP metadata.)output_type
startswithlogwarningdebugerrorinfo)r$   missings     r   report_on_metadatar?   ;   sd    %%f--"	

 			EwO		E	
 	DgN    c                   Sn [        U R                  [        5      (       d  [        S5      eU R                  R	                  5        HW  u  p#[        U[
        5      (       d  M  S[        U5      ;   d  M-  [        U5      R                  SS5      U R                  U'   SnMY     U$ ! [         a    [        R                  S5         U$ f = f)zIf the DocumentInfo block contains NUL characters, remove them.

If the DocumentInfo block is malformed, log an error and continue.
Fz DocumentInfo is not a dictionary    r@   TzAFile contains a malformed DocumentInfo block - continuing anyway.)

isinstancer   r   r   itemsr   bytesreplacer9   r<   )pdfmodifiedr2   vs       r   repair_docinfo_nulsrJ   M   s    
 H	W#++z22>??KK%%'DA!S!!gq&9!&q!1!1'3!?A ( O  W		UVOWs   AB% $B% 5.B% %CCc                ~    [         R                  " U 5      R                  nX!R                  R                  S-  :  a  gg)zkDetermine whether the PDF should be linearized.

For smaller files, linearization is not worth the effort.
i@B TF)osstatst_sizer$   fast_web_view)working_filer1   filesizes      r   should_linearizerR   `   s3    
 ww|$,,H??009<=r@   c                z    SU;  a  UR                  SS5      US'   UR                  S5      S:X  a  SU ;  a  US	 g g g )Nzxmp:CreateDatezxmp:ModifyDater   dc:titleUntitled)get)meta_originalmeta_pdfs     r   _fix_metadatarY   k   sP     x'%-\\2BB%G!"||J:- ]*$ +	 .r@   c                    UR                   S:X  a	  SU ;   a  U S	 UR                  S:X  a  SU ;   a  U S	 SU ;   a  U S	 UR                  S:X  a  SU ;   a  U S	 SU ;   a  U S	 UR                  S:X  a  SU ;   a  U S	 ggg)	a  Unset metadata fields that were explicitly set to empty strings.

If the user explicitly specified an empty string for any of the
following, they should be unset and not reported as missing in
the output pdf. Note that some metadata fields use differing names
between PDF/A and PDF.
r   rT   z
dc:creatorz
pdf:Authorzdc:descriptionz
dc:subjectzpdf:KeywordsN)r%   r&   r(   r'   )metar$   s     r   _unset_empty_metadatar\   x   s     }}zT1~~4\"4\""t#%&4\"2.D"8  #9r@   c                    [         R                  U R                  ;   d  U(       d  gUS   nU(       d  g[        U5      nU(       d  gX0R                  l        g)zSet the language of the PDF.Nr   )r	   LangRootr   )rG   	languagesprimary_language_iso639_3iso639_2s       r   _set_languagerc      sA    yyCHHI )!$ 9:HHHMr@   c                  8    \ rS rSrSS	S jjrS rS rS
S jrSrg)MetadataProgress   c                H    Xl         U R                  SSSU(       + S9U l        g )Nd   Linearizing%)totaldescunitdisable)progressbar_classprogressbar)selfro   enables      r   __init__MetadataProgress.__init__   s+    !211MZ 2 
r@   c                :    U R                   R                  5         U $ N)rp   	__enter__)rq   s    r   rw   MetadataProgress.__enter__   s    ""$r@   c                :    U R                   R                  XU5      $ rv   )rp   __exit__)rq   exc_type	exc_value	tracebacks       r   rz   MetadataProgress.__exit__   s    ((iHHr@   c                Z    U R                   (       d  g U R                  R                  US9  g )N)	completed)ro   rp   update)rq   percents     r   __call__MetadataProgress.__call__   s%    %%'2r@   )rp   ro   N)T)rr   bool)r   int)	__name__
__module____qualname____firstlineno__rs   rw   rz   r   __static_attributes__ r@   r   re   re      s    
I3r@   re   c                   UR                  S5      nUR                  nUR                  R                  R	                  5       n[
        R                  " UR                  5       n[
        R                  " U 5       n[        XTR                  5       n[        Xa5      n	UR                  SSSS9 n
UR                  5        nUR                  U	SSS9  [        X5        [        X5        [        X5        [        U
R!                  5       5      [        UR!                  5       5      -
  n[#        XL5        SSS5        SSS5        [%        XtR&                  5        UR(                  " U4SU0UD6  SSS5        SSS5        SSS5        U$ ! , (       d  f       NZ= f! , (       d  f       Nc= f! , (       d  f       NB= f! , (       d  f       NK= f! , (       d  f       U$ = f)zFix certain metadata fields whether PDF or PDF/A.

Override some of Ghostscript's metadata choices.

Also report on metadata in the input file that was not retained during
conversion.
zmetafix.pdfF)set_pikepdf_as_editorupdate_docinfostrict)delete_missingraise_failureNprogress)get_pathr$   r)   r*   get_progressbar_classr
   openoriginre   progress_barr4   open_metadataload_from_docinforY   r\   setkeysr?   rc   r`   save)rP   r1   pdf_save_settingsoutput_filer$   
pbar_classoriginalrG   pbarr   rW   rX   meta_missings                r   metadata_fixupr      sz    ""=1KooG'',,BBDJ H#%9%9:dh0""&+E% # 8&&U '  -2!-9!(4}1134s8==?7KKLw5   	c,,-AtA/@A' 	; 	 	!.    	 	;: 	 	! . sm   "G9F5F$,F	=A1F.F	62F$(F50G
FF	
F!F$$
F2.F55
G	?G
G)r   r
   r1   r   returnzdict[str, str])rP   r   r1   r   r   r   )rW   r   rX   r   )r[   r   )rG   r
   r`   z	list[str])rP   r   r1   r   r   zdict[str, Any]r   r   )*__doc__
__future__r   loggingrL   r   r   pathlibr   typingr   pikepdfr   r	   r
   r   r.   pikepdf.models.metadatar   r   ocrmypdf._annotsr   ocrmypdf._defaultsr   ocrmypdf._jobcontextr   ocrmypdf._versionr-   ocrmypdf.languagesr   	getLoggerr   r9   r4   r?   rJ   rR   rY   r\   rc   re   r   r   r@   r   <module>r      s    3 "  	 '   ) ) 2 @ ; + + < /!@O$&
%!0
3 3(&&!+&@N&	&r@   