
    TAic>                       S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SK	r	S SK
JrJr  S SKJr  S SKJr  S SKJr  S SKJr  S SKJr  S S	KJrJr  S SKrS S
KJr  S SKJr  S SKJ r J!r!  S SK"J#r#J$r$  S SK%J&r&  S SK'J(r(  S SK)J*r*J+r+J,r,J-r-J.r.J/r/J0r0J1r1J2r2J3r3J4r4J5r5J6r6J7r7J8r8J9r9  S SK:J;r;  S SK<J=r=  S SK>J?r?J@r@  S SKAJBrBJCrCJDrDJErEJFrFJGrG  S SKHJIrI  S SKJJKrK  \R                  " \M5      rN\	R                  " 5       rPS\PlQ        S rR\R" \P5        S.S jrS " S S\5      rT " S S\R                  5      rV " S S\R                  5      rX\ " S S 5      5       rY S/     S0S! jjrZS1S" jr[\    S2S# j5       r\S3S$ jr]\S4S% j5       r^        S5S& jr_      S6S' jr`        S7S( jra            S8S) jrb      S9S* jrcS:S+ jrd        S;S, jreS<S- jrfg)=    )annotationsN)CallableSequence)BrokenProcessPool)BrokenThreadPool)contextmanager)	dataclassPath)
NamedTuplecast)Pdf)remove_broken_goto_annotations)Executorsetup_executor)PageContext
PdfContext)PageNumberFilter)metadata_fixup)convert_to_pdfacreate_ocr_imagecreate_pdf_page_from_imagecreate_visible_page_jpggenerate_postscript_stubget_orientation_correctionget_pdf_save_settingsget_pdfinfooptimize_pdfpreprocess_cleanpreprocess_deskewpreprocess_remove_background	rasterizerasterize_previewshould_linearize!should_visible_page_image_use_jpg)OcrmypdfPluginManager)report_output_file_size)ExitCodeExitCodeException)available_cpu_count	check_pdfpikepdf_enable_mmaprunning_in_dockerrunning_in_snapsamefile)file_claims_pdfa)PdfInfoc                l   ^ ^ [         R                  " 5       mUU 4S jn[         R                  " U5        g)z=Inject current page number (when available) into log records.c                 \   > T" U 0 UD6n[        TS5      (       a  TR                  Ul        U$ )Npageno)hasattrr4   )argskwargsrecordold_factorytlss      W/var/www/html/land-ocr/venv/lib/python3.13/site-packages/ocrmypdf/_pipelines/_common.pywrapper!_set_logging_tls.<locals>.wrapperI   s/    d-f-3!!JJFM    N)logginggetLogRecordFactorysetLogRecordFactory)r:   r<   r9   s   ` @r;   _set_logging_tlsrB   E   s&    --/K (r>   c                    U [         l        g)z@Set page number (1-based) that the current thread is processing.N)r:   r4   )r4   s    r;   set_thread_pagenorD   U   s	    CJr>   c                  d    \ rS rSr% SrS\S'    SrS\S'    SrS\S'    SrS\S	'    S
r	S\S'   Sr
g)
PageResultZ   z*Result when a page is finished processing.intr4   NPath | Nonepdf_page_from_imageocrtextr   orientation_correction )__name__
__module____qualname____firstlineno____doc____annotations__rJ   rK   rL   rM   __static_attributes__rN   r>   r;   rF   rF   Z   sC    4K'++%CD+ "#C#,r>   rF   c                  (   ^  \ rS rSrU 4S jrSrU =r$ )HOCRResultEncoderm   c                f   > [        U[        5      (       a  S[        U5      0$ [        TU ]  U5      $ Nr   )
isinstancer   strsuperdefault)selfobj	__class__s     r;   r^   HOCRResultEncoder.defaultn   s.    c4  CH%%ws##r>   rN   )rO   rP   rQ   rR   r^   rU   __classcell__ra   s   @r;   rW   rW   m   s    $ $r>   rW   c                  .   ^  \ rS rSrU 4S jrS rSrU =r$ )HOCRResultDecodert   c                >   > [         TU ]  " USU R                  0UD6  g )Nobject_hook)r]   __init__dict_to_object)r_   r6   r7   ra   s      r;   rj   HOCRResultDecoder.__init__u   s     4JT%8%8J6Jr>   c                .    SU;   a  [        US   5      $ U$ rZ   r
   )r_   ds     r;   rk    HOCRResultDecoder.dict_to_objectx   s    Q;&	?"r>   rN   )rO   rP   rQ   rR   rj   rk   rU   rc   rd   s   @r;   rf   rf   t   s    K r>   rf   c                      \ rS rSr% SrS\S'    SrS\S'    SrS\S'    SrS\S	'    S
r	S\S'    \
SS j5       rSS jrSrg)
HOCRResult~   z(Result when hOCR is finished processing.rH   r4   NrI   rJ   hocrtextpdfr   rM   c           	     @    U " S0 [         R                  " U[        S9D6$ )zCreate an instance from a dict.clsrN   )jsonloadsrf   )rw   json_strs     r;   	from_jsonHOCRResult.from_json   s     ATZZ.?@AAr>   c                H    [         R                  " U R                  [        S9$ )zSerialize to a JSON string.rv   )rx   dumps__dict__rW   )r_   s    r;   to_jsonHOCRResult.to_json   s    zz$--->??r>   rN   )rz   r\   returnrq   )r   r\   )rO   rP   rQ   rR   rS   rT   rJ   rs   rt   rM   classmethodr{   r   rU   rN   r>   r;   rq   rq   ~   s]    2K'++%D+ G[,"#C#,B B@r>   rq   c                N  ^^ [         R                  " U SS9mTR                  [         R                  5        [         R                  " S5      nTR                  U5        TR                  [        5       5        [         R                  " T5      R                  T5        UU4S jnTU4$ )zCreate a debug log file at a specified location.

Returns the log handler, and a function to remove the handler.

Args:
    log_filename: Where to the put the log file.
    prefix: The logging domain prefix that should be sent to the log.
T)delayzA[%(asctime)s] - %(name)s - %(levelname)7s -%(pageno)s %(message)sc                    >  [         R                  " T5      R                  T5        TR                  5         g ! [         a"  n [        U [        R                  S9   S n A g S n A ff = f)Nfile)r?   	getLoggerremoveHandlercloseOSErrorprintsysstderr)elog_file_handlerprefixs    r;   remover(configure_debug_logging.<locals>.remover   sL    	&f%334DE""$ 	&!#**%	&s   59 
A%A  A%)
r?   FileHandlersetLevelDEBUG	FormattersetFormatter	addFilterr   r   
addHandler)log_filenamer   	formatterr   r   s    `  @r;   configure_debug_loggingr      s     **<tDgmm,!!KI !!),/12f(()9:& W$$r>   c                B    U [         R                  l        [        5         g)z&Initialize a worker thread or process.N)PILImageMAX_IMAGE_PIXELSr,   )
max_pixelss    r;   worker_initr      s    
 ",CIIr>   c              #  
  #    S nU R                   (       d  U R                  S:  a4  [        R                  R	                  SS5      (       d  [        US-  SS9u  p2 S v   U(       a  U" 5         g g ! U(       a  U" 5         f f = f7f)N   PYTEST_CURRENT_TEST z	debug.log)r   )keep_temporary_filesverboseosenvirongetr   )optionswork_folderr   _debug_log_handlers       r;   manage_debug_log_handlerr      sw      G$$1(<bjjnnrG G '>+%b'
#I 7I s   ABA/ B/B  Bc                    SU  3/n[        5       (       a  UR                  S5        O [        5       (       a  UR                  S5        [        SR	                  U5      [
        R                  S9  g)z0Print the location of the temporary work folder.z%Temporary working files retained at:
zUOCRmyPDF is running in a Docker container, so the files will be inside the container.zSOCRmyPDF is running in a Snap container, so the files will be inside the container.
r   N)r-   appendr.   r   joinr   r   )r   msgss     r;   _print_temp_folder_locationr      s_    4[MBCD9	
 
		9	
 
$))D/

+r>   c              #     #     U v   U(       a  U(       a  [        U 5        g g [        R                  " U SS9  g ! U(       a  U(       a  [        U 5        f f [        R                  " U SS9  f = f7f)NT)ignore_errors)r   shutilrmtree)r   retainprint_locations      r;   manage_work_folderr      sY     ;+K8  MM+T:	 +K8  MM+T:s   A.9 1A.2A++A.c                    U " X5      $ ! [          aN    UR                  S:  a  [        R                  S5        O[        R	                  S5        [
        R                  s $ [         a  n[        [        U5      nUR                  S:  a  [        R                  S5        Ol[        U5      (       a4  [        R	                  S[        U5      R                  [        U5      5        O([        R	                  [        U5      R                  5        UR                  s SnA$ SnAf[        R                  R                   a(    [        R                  S5        [
        R                   s $ ["        [$        4 a(    [        R                  S5        [
        R&                  s $ [(         a(    [        R                  S5        [
        R                   s $ f = f)	aE  Convert exceptions into command line error messages and exit codes.

When known exceptions are raised, the exception message is printed to stderr
and the program exits with a non-zero exit code. When unknown exceptions are
raised, the exception traceback is printed to stderr and the program exits
with a non-zero exit code.
r   KeyboardInterruptr)   z%s: %sNzA decompression bomb error was encountered while executing the pipeline. Use the argument --max-image-mpixels to raise the maximum image pixel limit.zA worker process was terminated unexpectedly. This is known to occur if processing your file takes all available swap space and RAM. It may help to try again with a smaller number of jobs, using the --jobs argument.z2An exception occurred while executing the pipeline)r   r   log	exceptionerrorr(   ctrl_cr)   r   r\   typerO   	exit_coder   r   DecompressionBombErrorother_errorr   r   child_process_error	Exception)fnr   plugin_managerr   s       r;   cli_exception_handlerr      sL   ($ '** ??aMM-.II)* "A&??aMM-.VVIIhQ 0 0#a&9IId1g&&'{{99++ $!	

 ### 
, 		
 +++ $JK###$s6   
 AG!	G*B-DGAG%4G.GGc                p    U R                   (       d  [        5       U l         [        5         [        U5      nU$ )N)jobsr*   r,   r   )r   r   executors      r;   setup_pipeliner   ,  s*     <<*,n-HOr>   c           
         [        U UUR                  UR                  UR                  UR                  UR
                  S9$ )N)r   detailed_analysisprogbarmax_workersuse_threadscheck_pages)r   redo_ocrprogress_barr   r   pages)pdf_pathr   r   s      r;   do_get_pdfinfor   ;  s@     !**$$LL''MM r>   c                r    U(       a  [        X5      nU(       a  [        X5      nU(       a  [        X5      nU$ )zPreprocess an image.)r!   r    r   )page_contextimageremove_backgrounddeskewcleans        r;   
preprocessr   I  s0     ,UA!%6 5Lr>   c                x   U R                   nS=p4[        U R                  U USS9n[        UR                  UR
                  UR                  /5      (       d%  [        U UUR                  UR                  SS9=p4X44$ UR                  (       d+  [        U UUR                  UR                  UR
                  S9nUR                  (       a  [        U R                  U USSS9nOUnU(       a$  Xe:X  a  UR                  UR
                  :X  a  UnX44$ [        U UUR                  UR                  UR                  S9nX44$ )z4Create intermediate and preprocessed images for OCR.NF)
correctionremove_vectors)r   T_ocr)r   r   
output_tag)r   r"   originanyr   clean_finalr   r   r   r   lossless_reconstruction)r   rM   r   	ocr_imagepreprocess_outrasterize_outrasterize_ocr_outs          r;   make_intermediate_imagesr   Z  sV    ""G!%%I)	M w22G4J4JKLL%/%%NN&
 	
	T $$E ..'))))N !! )##1#!! !. !2!4!44 'I $$ #!))mmI $$r>   c                   U R                   nSnUR                  (       a!  [        U R                  U 5      n[	        X05      n[        X5      u  pE[        X@5      nSnUR                  (       dd  U(       d   eUn[        U R                  5      (       a  [        X5      nU R                  R                  R                  XS9n	U	b  U	n[        XU5      nXgU4$ )zEProcess page to create OCR image, visible page image and orientation.r   N)pageimage_filename)r   rotate_pagesr#   r   r   r   r   r   r%   pageinfor   r   hookfilter_page_imager   )
r   r   rM   rasterize_preview_outr   r   ocr_image_outpdf_page_from_image_outvisible_image_outfiltered_images
             r;   process_pager     s    ""G 1,2E2E| T!;!"
 !9!I %Y=M"**~*,\-B-BCC 78I X%4499KK L 
 % ."<-C#
 3IIIr>   c                <   [         R                  " U 5       nUR                  S5      n[        U5      (       a  UR	                  U5        UnOU nSSS5        UR
                  R                  R                  S5      (       a  [        U5      n[        WXa5      nUR                  R                  R                  US9n[        UR
                  R                  5      nU(       + =(       a    [        WU5      US'   [        WXS9n[!        XQU5      $ ! , (       d  f       N= f)zPostprocess the PDF file.zfix_annots.pdfNpdfa)context	linearize)pdf_save_settings)r   openget_pathr   saver   output_type
startswithr   r   r   r   is_optimization_enabledr   r$   r   r   )	pdf_filer  r   pdf
fix_annotspdf_outps_stub_out
optimizingsave_settingss	            r;   postprocessr    s    
 
(	s%%&67
)#..HHZ  GG 
 ""--f55.w7!';@'',,DDWDUJ)'//*E*EFM%/!V4DWg4VM+WgOG(33! 
	s   8D
Dc                   U R                   S:X  a%  [        R                  S5        [        R$                  $ [        U R                   S5      (       aD  U R                   R	                  5       (       a%  [        R                  S5        [        R$                  $ [        U R                   [        [        R                  5      5      (       a   [        R$                  $ U R                  R                  S5      (       ab  [        U R                   5      nUS   (       a  [        R                  SUS   5        O)[        R                  S	US   5        [        R                  $ [        U R                   5      (       d%  [        R                  S
5        [        R                   $ [#        XU R                   U5        [        R$                  $ )N-zOutput sent to stdoutwritablezOutput written to streamr   passz!Output file is a %s (as expected)conformancezOOutput file is a valid PDF, but conversion to PDF/A did not succeed (issue: %s)z)Output file: The generated PDF is INVALID)output_filer   infor5   r  r/   r   r   devnullr  r  r0   warningr(   pdfa_conversion_failedr+   invalid_output_pdfr'   ok)r   start_input_fileoptimize_messages	pdfa_infos       r;   report_output_pdfr!    sP   c!(). ;;- 
$$j	1	1g6I6I6R6R6T6T+,* ;;) 
'%%tBJJ'7	8	8& ;;# ))&11()<)<=I <i>VW*m,
  666,,--KKCD...w':':<M	
 ;;r>   )r4   
int | None)r   )r   r   r   r\   r   z.tuple[logging.FileHandler, Callable[[], None]])r   r"  r   None)r   argparse.Namespacer   r   )r   r   )r   r   r   boolr   r%  )r   z?Callable[[argparse.Namespace, OcrmypdfPluginManager], ExitCode]r   r$  r   r&   r   r(   )r   r$  r   r&   r   r   )r   r   r   r   r   r$  r   r1   )r   r   r   r   r   r%  r   r%  r   r%  r   r   )r   r   rM   rH   r   ztuple[Path, Path | None])r   r   r   ztuple[Path, Path | None, int])r
  r   r  r   r   r   r   ztuple[Path, Sequence[str]])r   r(   )g
__future__r   argparserx   r?   logging.handlersr   r   r   	threadingcollections.abcr   r   concurrent.futures.processr   concurrent.futures.threadr   
contextlibr   dataclassesr	   pathlibr   typingr   r   r   pikepdfr   ocrmypdf._annotsr   ocrmypdf._concurrentr   r   ocrmypdf._jobcontextr   r   ocrmypdf._loggingr   ocrmypdf._metadatar   ocrmypdf._pipeliner   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   ocrmypdf._plugin_managerr&   ocrmypdf._validationr'   ocrmypdf.exceptionsr(   r)   ocrmypdf.helpersr*   r+   r,   r-   r.   r/   ocrmypdf.pdfar0   ocrmypdf.pdfinfor1   r   rO   r   localr:   r4   rB   rD   rF   JSONEncoderrW   JSONDecoderrf   rq   r   r   r   r   r   r   r   r   r   r   r   r  r!  rN   r>   r;   <module>rA     si   #     	  
  . 8 6 % !  # 
  ; 9 8 . -    $ ; <  + $!oo

)  
- -&$(( $((  @ @ @: ')%% #%3%<   ,,  ; ;4$G4$4$ *4$ 	4$n) &1C  	
  
"9%9%7:9%9%xJB44'43;440r>   