
    TAi                       S r SSKJr  SSKrSSKrSSKrSSKJr  SSKJ	r	  SSK
Jr  SSKJr  SSKrSSKJr  SS	KJr  SS
KJrJr  SSKJrJrJrJrJrJrJrJr  SSKJ r J!r!J"r"J#r#J$r$J%r%J&r&J'r'J(r(J)r)J*r*  SSK+J,r,  SSK-J.r.  SSK/J0r0J1r1  SSK2J3r3  \Rh                  " \55      r6      SS jr7SS jr8SS jr9      SS jr:      SS jr;      SS jr<g)zEImplements the concurrent and page synchronous parts of the pipeline.    )annotationsN)Sequence)partial)Path)mkdtemp)Executor)
OcrGrafter)PageContext
PdfContext)
copy_finalis_ocr_requiredmerge_sidecarsocr_engine_hocrocr_engine_textonly_pdfrender_hocr_pagetriagevalidate_pdfinfo_options)
PageResultcli_exception_handlerdo_get_pdfinfomanage_debug_log_handlermanage_work_folderpostprocessprocess_pagereport_output_pdfset_thread_pagenosetup_pipelineworker_init)OcrmypdfPluginManager)ProgressBar)check_requested_output_filecreate_input_file)ExitCodec                    U R                   nUR                  R                  S5      (       a  [        X5      u  p4[	        X05      nXT4$ UR                  S:X  a  [        X5      u  pTXT4$ [        SUR                   35      e)z8Run OCR engine on image to create OCR PDF and text file.hocrsandwichzpdf_renderer )optionspdf_renderer
startswithr   r   r   NotImplementedError)page_contextocr_image_outr'   hocr_outtext_outocr_outs         S/var/www/html/land-ocr/venv/lib/python3.13/site-packages/ocrmypdf/_pipelines/ocr.py_image_to_ocr_textr1   8   s     ""G&&v..,]I"8:
 	 
			+3MP  "M'2F2F1G"HII    c                    [        U R                  S-   5        [        U 5      (       d  [        U R                  S9$ [	        U 5      u  pn[        X5      u  pE[        U R                  UUUUS9$ )z3Execute a pipeline for a single page synchronously.   )pageno)r5   pdf_page_from_imageocrtextorientation_correction)r   r5   r   r   r   r1   )r+   r,   pdf_page_from_image_outr9   r/   r.   s         r0   _exec_page_syncr;   G   sv    l))A-.<((!4!455EQFBM,B +<GG""35 r2   c                b  ^^	 U R                   n[        [        U R                  5      UR                  5      nUS:  a  [
        R                  SU5        S/[        U R                  5      -  m	[        U 5      mSUU	4S jjnU" UR                  U[        [        U R                  5      UR                  S:  a  SOSSUR                  (       + S	9[        [        [        R                  R                   5      ["        U R%                  5       US
9  UR&                  (       a,  [)        T	U 5      n[+        XRR&                  UR,                  5        TR/                  5       n/ nUR0                  S:w  aC  [
        R                  S5        [3        X`U5      u  pg[+        XbR4                  UR,                  5        U$ )z&Execute the OCR pipeline concurrently.r4   z&Start processing %d pages concurrentlyNc                X  >  [        U R                  S-   5        U R                  TU R                  '   UR                  S5        TR	                  U R                  U R
                  U R                  U R                  S9  UR                  S5        [        S5        g! [        S5        f = f)z1After OCR is complete for a page, update the PDF.r4   g      ?)r5   imagetextpdfautorotate_correctionN)r   r5   r8   update
graft_pager6   r7   r9   )resultpbarocrgraftsidecarss     r0   update_page$exec_concurrent.<locals>.update_pagee   s    	$fmma/0&,kkHV]]#KK}}00

&,&C&C	    KKd#d#s   BB B)r   OCRzImage processingpage)totaldescunitdisable)use_threadsmax_workersprogress_kwargsworker_initializertasktask_argumentstask_finishednonezPostprocessing...)rC   r   rD   r    )r'   minlenpdfinfojobsloginfor	   rO   dicttesseract_timeoutprogress_barr   r   PILImageMAX_IMAGE_PIXELSr;   get_page_context_argssidecarr   r   
input_filefinalizeoutput_typer   output_file)
contextexecutorr'   rP   rG   r8   pdfmessagesrE   rF   s
           @@r0   exec_concurrentrm   [   s\   ooGc'//*GLL9KQ9;G#'&3w+?"?H'"H$ $  ''goo&!33a7=O,,,	
 #;		0J0JK446!  h04'*<*<= 


C Hf$$%#C(; 	3++W-?-?@Or2   c           	        [        [        [        SS95      U R                  U R                  S9 n[	        XS9   [        X5      n[        U 5        [        X5      u  pE[        XTUS-  U 5      n[        XcU 5      n[        XXgU5      n[        U5        [        X5      n	[        XU	5      n
U
sS S S 5        sS S S 5        $ ! , (       d  f       O= f S S S 5        g ! , (       d  f       g = f)Nzocrmypdf.io.)prefix)work_folderretainprint_location)r'   rp   z
origin.pdf)r   r   r   keep_temporary_filesr   r   r!   r"   r   r   r   r   rm   r   )r'   plugin_managerrp   rj   start_input_fileoriginal_filename
origin_pdfrY   ri   optimize_messagesexitcodes              r0   _run_pipelinerz      s    
 	WN;<//"77	
  J!':#G,.?.U+ |1KW


 !w?W:W 	!) ,G>$W@QR- 	KJ	
 	

 	KJJ	
 	
 	
s#   
CA0B=*	C=
C	C
C&c               "    [        [        X5      $ )zRun the OCR pipeline with command line exception handling.

Args:
    options: The parsed command line options.
    plugin_manager: The plugin manager to use. If not provided, one will be
        created.
)r   rz   r'   rt   s     r0   run_pipeline_clir}      s     !HHr2   c                   [        X5      $ )zRun the OCR pipeline without command line exception handling.

Args:
    options: The parsed command line options.
    plugin_manager: The plugin manager to use. If not provided, one will be
        created.
)rz   r|   s     r0   run_pipeliner      s     11r2   )r+   r
   r,   r   returnztuple[Path, Path])r+   r
   r   r   )ri   r   rj   r   r   zSequence[str])r'   zargparse.Namespacert   r   r   r#   )=__doc__
__future__r   argparselogginglogging.handlerscollections.abcr   	functoolsr   pathlibr   tempfiler   r`   ocrmypdf._concurrentr   ocrmypdf._graftr	   ocrmypdf._jobcontextr
   r   ocrmypdf._pipeliner   r   r   r   r   r   r   r   ocrmypdf._pipelines._commonr   r   r   r   r   r   r   r   r   r   r   ocrmypdf._plugin_managerr   ocrmypdf._progressbarr    ocrmypdf._validationr!   r"   ocrmypdf.exceptionsr#   	getLogger__name__r[   r1   r;   rm   rz   r}   r    r2   r0   <module>r      s  
 L "    $    
 ) & 8	 	 	    ; - )!.2(:z  )   FII *I 	I22 *2 	2r2   