
    TAi03                       S r SSKJr  SSKrSSKrSSKrSSKrSSKrSSKJ	r	  SSK
Jr  SSKJr  SSKJr  SSKrSSKrSSKJr  SS	KJrJr  SS
KJr  SSKJrJrJrJr  SSKJrJ r J!r!J"r"J#r#  SSK$J%r%  \RL                  " \'5      r(SS jr)      SS jr*S S jr+S S jr,S S jr-S S jr.S!S jr/S S jr0S S jr1S S jr2S S jr3S"S jr4S"S jr5S#S jr6S S jr7   S$             S%S jjr8g)&z/Validate a work order from API or command line.    )annotationsN)	Namespace)Sequence)Path)copyfileobj)PluginManager)DEFAULT_LANGUAGEDEFAULT_ROTATE_PAGES_THRESHOLD)unpaper)BadArgsErrorInputFileErrorMissingDependencyErrorOutputFileAccessError)is_file_writable	monotonicrunning_in_dockerrunning_in_snapsafe_symlink)check_external_programc                 X    [         R                  S::  a  [        R                  S5        g g )Nl        zYou are running OCRmyPDF in a 32-bit (x86) Python interpreter. This is not supported. 32-bit does not have enough address space to process large files. Please use a 64-bit (x86-64) version of Python.)sysmaxsizelogwarning     P/var/www/html/land-ocr/venv/lib/python3.13/site-packages/ocrmypdf/_validation.pycheck_platformr   +   s$    
{{e>	
 r   c                   U R                   (       d[  [        /U l         [        R                  " 5       S   nU(       a0  UR	                  S5      (       d  [
        R                  S[        5        U(       d  g [        U R                   5      [        U5      -
  nU(       a)  SR                  S U 5       5      nSU S3n[        U5      eg )Nr   enz-No language specified; assuming --language %s
c              3  $   #    U  H  ov   M     g 7fNr   ).0langs     r   	<genexpr>*check_options_languages.<locals>.<genexpr>B   s     A/@td/@s   zOOCR engine does not have language data for the following requested languages: 
a  
Please install the appropriate language data for your OCR engine.

See the online documentation for instructions:
    https://ocrmypdf.readthedocs.io/en/latest/languages.html

Note: most languages are identified by a 3-letter ISO 639-2 Code.
For example, English is 'eng', German is 'deu', and Spanish is 'spa'.
Simplified Chinese is 'chi_sim' and Traditional Chinese is 'chi_tra'.
)
	languagesr	   locale	getlocale
startswithr   debugsetjoinr   )optionsocr_engine_languagessystem_langmissing_languages	lang_textmsgs         r   check_options_languagesr5   5   s     -.&&(+{55d;;IIEGWXG--.5I1JJIIA/@AA	&k 	 	 %S))! r   c                    U R                   S:X  a:  U R                  [        R                  S4;  a  [	        SU R                   S35      eg g )Nnone-z:Since you specified `--output-type none`, the output file zI cannot be produced. Set the output file to `-` to suppress this message.)output_typeoutput_fileosdevnullr   r/   s    r   check_options_outputr>   T   sV    f$)<)<RZZQTDU)UH""# $,-
 	
 *V$r   c                    Sn[        U R                  U R                  U R                  U R                  45      (       d  SnXl        U R
                  (       d  U R                  (       a  [        S5      eg g )NFTz\--redo-ocr is not currently compatible with --deskew, --clean-final, and --remove-background)anydeskewclean_final	force_ocrremove_backgroundlossless_reconstructionredo_ocrr   )r/   rE   s     r   set_lossless_reconstructionrG   ]   sv    #NN%%		
  #'&=#**w/?/?5
 	
 0@*r   c                R   U R                   S:X  aX  U R                  S:X  a  [        S5      eU R                  [        R                  :X  a  [        S5      eU R                  S-   U l         U R                   U R
                  :X  d  U R                   U R                  :X  a  [        S5      eg )N r8   z5--sidecar filename needed when output file is stdout.z?--sidecar filename needed when output file is /dev/null or NUL.z.txtz@--sidecar file must be different from the input and output files)sidecarr:   r   r;   r<   
input_filer=   s    r   check_options_sidecarrL   q   s    $#%VWW  BJJ.Q  "--6',,,7CVCV0VN
 	
 1Wr   c                   U R                   (       a  SU l        U R                  (       a  U R                  (       d  [        S5      eU R                  [
        :w  a  U R                  (       d  [        S5      eU R                  (       aT  [        SS[        R                  SSS9   U R                  (       a&  [        R                  " U R                  5      U l        g g g ! [         a  n[        S[        U5      -   5      UeS nAff = f)	NTz&--clean is required for --unpaper-argsz7--rotate-pages is required for --rotate-pages-thresholdr   z6.1z--clean, --clean-final)programpackageversion_checkerneed_versionrequired_forz--unpaper-args: )rB   cleanunpaper_argsr   rotate_pages_thresholdr
   rotate_pagesr   r   versionvalidate_custom_args	Exceptionstr)r/   es     r   check_options_preprocessingr\      s    GMMCDD&&*HH$$TUU}}#OO1	
	C##'.'C'C((($ $   	C1CF:;B	Cs   #6C 
D&C>>Dc           	        / nU R                  SS5      R                  S5      nU Hv  nU(       d  M   UR                  S5      u  pE [        [        [	        U5      S-
  [	        U5      5      5      nU(       d  [        SU SU S35      S eUR                  U5        Mx     U(       d  [        SU  S	35      e[        U5      (       d  [        R                  S
5        [        S U 5       5      (       a  [        S5      e[        R                  SU5        [        U5      $ ! [         a    [        SU S35      S ef = f! [         a"    UR                  [	        U5      S-
  5         GMA  f = f)N  ,r8      zinvalid page subrange ''zThe string of page ranges 'z/' did not contain any recognizable page ranges.zQList of pages to process contains duplicate pages, or pages that are out of orderc              3  *   #    U  H	  oS :  v   M     g7f)r   Nr   )r$   pages     r   r&   %_pages_from_ranges.<locals>.<genexpr>   s     
&!8s   z)pages refers to a page number less than 1zOCRing only these pages: %s)replacesplitlistrangeintr   extend
ValueErrorappendr   r   r   r@   r,   r-   )rangespagespage_groupsgroupstartend	new_pagess          r   _pages_from_rangesru      sh   E..b)//4K	QS)JEQ s5zA~s3x!@A	 &1%#a@  Y' $ )& 2 
 	

 U	
 
&
&&&FGGII+U3u:%  Q"%<UG1#EFDPQ  	)LLUa((	)s   D<ADD9<'E('E(c                    [        S U R                  U R                  U R                  4 5       5      nUS:  a  [	        S5      eU R
                  (       a  [        U R
                  5      U l        g g )Nc              3  8   #    U  H  nU(       a  S OSv   M     g7f)ra   r   Nr   )r$   opts     r   r&   -check_options_ocr_behavior.<locals>.<genexpr>   s      KC qKs      z8Choose only one of --force-ocr, --skip-text, --redo-ocr.)sumrC   	skip_textrF   r   ro   ru   )r/   exclusive_optionss     r   check_options_ocr_behaviorr~      se     %%w'8'8':J:JK  AUVV}}*7==9 r   c           	     R   U R                   U R                  U R                  U R                  /nS U 5        Hl  nU Hc  n[        R
                  " U5      S:X  d  [        U5      S:  d  M.  [        [        U5      5      SS  R                  5       n[        SU SU S35      e   Mn     g )Nc              3  6   #    U  H  o(       d  M  Uv   M     g 7fr#   r   )r$   ms     r   r&   )check_options_metadata.<locals>.<genexpr>   s     &AAaas   
	Coi   rz   zGOne of the metadata strings contains an unsupported Unicode character: z (U+))
titleauthorkeywordssubjectunicodedatacategoryordhexupperrl   )r/   docinfoscharhexchars        r   check_options_metadatar      s    }}gnng.>.>PG&&D##D)T1SY'5Ic$i.,224 9fD	,   'r   c                    [        U R                  S-  5      [        R                  l        [        R                  R                  S:X  a  S [        R                  l        g g )Ni@B r   )rj   max_image_mpixelsPILImageMAX_IMAGE_PIXELSr=   s    r   check_options_pillowr      sA    !$W%>%>%J!KCII
yy!!Q&%)		" 'r   c                    [        5         [        U 5        [        U 5        [        U 5        [	        U 5        [        U 5        [        U 5        [        U 5        g r#   )r   r   r>   rG   rL   r\   r~   r   r=   s    r   _check_plugin_invariant_optionsr      s@    7#!('"(w'!r   c                    UR                   R                  U S9  UR                   R                  5       R                  U 5      n[	        X5        g )Nr=   )hookcheck_optionsget_ocr_enginer(   r5   )r/   plugin_managerr0   s      r   _check_plugin_optionsr      sA    %%g%6)..==?II'RG:r   c                0    [        U 5        [        X5        g r#   )r   r   )r/   r   s     r   r   r      s    #G,'2r   c                d   U R                   S:X  aW  [        R                  S5        US-  n[        US5       n[	        [
        R                  R                  U5        S S S 5        US4$ [        U R                   S5      (       as  U R                   R                  5       (       d  [        S5      e[        R                  S5        US-  n[        US5       n[	        U R                   U5        S S S 5        US4$  US	-  n[        U R                   U5        U[        R                  " U R                   5      4$ ! , (       d  f       US4$ = f! , (       d  f       US4$ = f! [         aI  nS
U R                    3n[        5       (       a  US-  nO[!        5       (       a  US-  n[        U5      UeS nAff = f)Nr8   z reading file from standard inputstdinwbreadablez!Input file stream is not readablezreading file from input streamstreamoriginzFile not found - z
Docker cannot access your working directory unless you explicitly share it with the Docker container and set uppermissions correctly.
You may find it easier to use stdin/stdout:
	docker run -i --rm jbarlow83/ocrmypdf - - <input.pdf >output.pdf
z
Snap applications cannot access files outside of your home directory unless you explicitly allow it. You may find it easier to use stdin/stdout:
	snap run ocrmypdf - - <input.pdf >output.pdf
)rK   r   infoopenr   r   r   bufferhasattrr   r   r   r;   fspathFileNotFoundErrorr   r   )r/   work_foldertargetstream_bufferr[   r4   s         r   create_input_filer      s   S 34w&&$=		((-8  w	##Z	0	0!!**,, !DEE12x'&$=**M:  x	- 8+F++V4299W%7%7888  w  x ! 	-%g&8&8%9:C "" !"" !%1,+	-s0   %D4E7<E 4
E
E
F/&AF**F/c                t   U R                   S:X  a/  [        R                  R                  5       (       a  [	        S5      eg [        U R                   S5      (       a+  U R                   R                  5       (       d  [        S5      eg [        U R                   5      (       d  [        SU R                    S35      eg )Nr8   ztOutput was set to stdout '-' but it looks like stdout is connected to a terminal.  Please redirect stdout to a file.writablezOutput stream is not writablezOutput file location (z) is not a writable file.)	r:   r   stdoutisattyr   r   r   r   r   r=   s    r   check_requested_output_filer   #  s    c!::   
$$j	1	1""++--'(GHH .g1122#$W%8%8$99RS
 	
 3r   c                R   Uc  / n [        U5      R                  5       R                  n[        U5      R                  5       R                  n[        R
                  " U5       nXE[        UR                  5      -  -   n	S S S 5        Xg-  n
XgW	-   -  nUS:  d  US:  a  g / n1 SknU H;  n[        XS5      (       d  M  UR                  SUR                  SS5       S35        M=     UR                  U5        U R                  R                  S	5      (       a  UR                  S
5        U R                  (       a  UR                  S5        U(       a  SSR                  U5      -   S-   nOSn[         R#                  SU
S SU 35        g ! [         a     g f = f! , (       d  f       GN = f)Ng?ia  >   rA   rC   
oversamplerB   rD   Fz--_r8   z! was issued, causing transcoding.pdfaz8PDF/A conversion was enabled. (Try `--output-type pdf`.)zPlugins were used.z#Possible reasons for this include:
r!   z@No reason for this increase is known.  Please report this issue.zThe output file size is z.2fu   × larger than the input file.
)r   statst_sizer   pikepdfr   lenro   getattrrm   rf   rk   r9   r+   pluginsr.   r   r   )r/   rK   r:   optimize_messagesfile_overheadpage_overheadoutput_size
input_sizepreasonable_overheadratioreasonable_ratioreasonsimage_preprocargexplanations                   r   report_output_file_sizer   4  s     ;',,.66*%**,44
 
k	"a ,c!''l.JJ 
# $E"3F&FG$*u"4GM 7''NNS[[c*++LM  NN$%%%f--QR+,<tyy?QQTXXXKK
"5+-M-	K  	"	"s   AF #F
FF
F&)returnNone)r/   r   r0   z	list[str]r   r   )r/   r   r   r   )rn   rZ   r   zset[int])r/   r   r   r   r   r   )r/   r   r   r   r   ztuple[Path, str])Ni  i  )r/   r   rK   r   r:   r   r   zSequence[str] | Noner   rj   r   rj   r   r   )9__doc__
__future__r   r)   loggingr;   r   r   argparser   collections.abcr   pathlibr   shutilr   r   r   pluggyr   ocrmypdf._defaultsr	   r
   ocrmypdf._execr   ocrmypdf.exceptionsr   r   r   r   ocrmypdf.helpersr   r   r   r   r   ocrmypdf.subprocessr   	getLogger__name__r   r   r5   r>   rG   rL   r\   ru   r~   r   r   r   r   r   r   r   r   r   r   r   <module>r      s  
 6 "   	 
   $    
   O "   7!
**.7*	*>

(
C6$N:
*";3
*-Z
* /3555 5 ,	5
 5 5 
5r   