
    TAi                       S r SSKJr  SSKrSSKrSSKrSSKrSSKJr  SSK	r	SSK
Jr  SSKJrJr  SSKJrJrJr  SSKJrJrJrJrJrJrJr  SS	KJr  SS
KJr  \R>                  " \ 5      r!SS jr"SS jr#      SS jr$g)zEImplements the concurrent and page synchronous parts of the pipeline.    )annotationsN)partial)Executor)PageContext
PdfContext)is_ocr_requiredocr_engine_hocrvalidate_pdfinfo_options)
HOCRResultdo_get_pdfinfomanage_work_folderprocess_pageset_thread_pagenosetup_pipelineworker_init)OcrmypdfPluginManager)set_lossless_reconstructionc                :   [        U R                  S-   5        [        U 5      (       d  [        U R                  S9$ [	        U 5      u  pn[        X5      u  pE[        U R                  UUUS9nU R                  S5      R                  UR                  5       5        U$ )z*Execute a pipeline for a single page hOCR.   )pageno)r   pdf_page_from_imagehocrorientation_correctionz	hocr.json)	r   r   r   r   r   r	   get_path
write_textto_json)page_contextocr_image_outpdf_page_from_image_outr   hocr_out_results          [/var/www/html/land-ocr/venv/lib/python3.13/site-packages/ocrmypdf/_pipelines/pdf_to_hocr.py_exec_page_hocr_syncr$   )   s    l))A-.<((!4!455EQFBM,B "->KH""35	F +&11&..2BCM    c                   U R                   n[        [        U R                  5      UR                  5      nUS:  a  [
        R                  SU5        U" UR                  U[        S[        U R                  5      -  SSSUR                  (       + S9[        [        [        R                  R                  5      [        U R!                  5       S9  g	)
z6Execute the OCR pipeline concurrently and output hOCR.r   z&Start processing %d pages concurrently   hOCRpageg      ?)totaldescunit
unit_scaledisable)use_threadsmax_workersprogress_kwargsworker_initializertasktask_argumentsN)optionsminlenpdfinfojobsloginfor/   dictprogress_barr   r   PILImageMAX_IMAGE_PIXELSr$   get_page_context_args)contextexecutorr5   r0   s       r#   exec_pdf_to_hocrrD   ?   s     ooGc'//*GLL9KQ9;G''s7??++,,,
 #;		0J0JK!446r%   c               L   [        U R                  SSS9 n[        X5      nUS-  n[        R                  " U R
                  U5        [        XCU 5      n[        XU R
                  XQ5      n[        U 5        [        U5        [        Xc5        SSS5        g! , (       d  f       g= f)zRun pipeline to output hOCR.TF)work_folderretainprint_locationz
origin.pdfN)r   output_folderr   shutilcopy2
input_filer   r   r   r
   rD   )r5   plugin_managerrF   rC   
origin_pdfr8   rB   s          r#   run_hocr_pipelinerO   W   s     
))$u
	!': </
W''4 !w?'"4"4g
 	$G, )+
 
 
s   A6B
B#)r   r   returnr   )rB   r   rC   r   rP   None)r5   zargparse.NamespacerM   r   rP   rQ   )%__doc__
__future__r   argparselogginglogging.handlersrJ   	functoolsr   r>   ocrmypdf._concurrentr   ocrmypdf._jobcontextr   r   ocrmypdf._pipeliner   r	   r
   ocrmypdf._pipelines._commonr   r   r   r   r   r   r   ocrmypdf._plugin_managerr   ocrmypdf._validationr   	getLogger__name__r:   r$   rD   rO    r%   r#   <module>ra      s   
 L "      
 ) 8 
   ; !,0,, *, 
	,r%   