ó
    TAiâ  ã                  ó<   • S r SSKJr  SSKrSSKJr   " S S5      rg)z!Utilities to measure OCR quality.é    )ÚannotationsN)ÚIterablec                  ó,   • \ rS rSrSrSS jrSS jrSrg)	ÚOcrQualityDictionaryé   z3Manages a dictionary for simple OCR quality checks.c               ó$   • [        U5      U l        g)zÄConstruct a dictionary from a list of words.

Words for which capitalization is important should be capitalized in the
dictionary. Words that contain spaces or other punctuation will never match.
N)ÚsetÚ
dictionary)ÚselfÚwordlists     ÚL/var/www/html/land-ocr/venv/lib/python3.13/site-packages/ocrmypdf/quality.pyÚ__init__ÚOcrQualityDictionary.__init__   s   € ô ˜h›-ˆó    c                ó¼  • [         R                  " SSU5      n[         R                  " SSU5      n[         R                  " SU5      nU Vs1 s H  n[        U5      S:¼  d  M  UiM     nnSnU HL  nX@R                  ;   d5  XDR                  5       :w  d  M'  UR                  5       U R                  ;   d  MG  US-  nMN     US:”  a  U[        U5      -  nU$ SnU$ s  snf )	zÚCheck how many unique words in the OCR text match a dictionary.

Words with mixed capitalized are only considered a match if the test word
matches that capitalization.

Returns:
    number of words that match / number
z[0-9_]+Ú z\W+z\s+é   r   é   g        )ÚreÚsubÚsplitÚlenr
   Úlower)r   Úocr_textÚtextÚtext_words_listÚwÚ
text_wordsÚmatchesÚ	hit_ratios           r   Úmeasure_words_matchedÚ*OcrQualityDictionary.measure_words_matched   sÆ   € ô vŠvj # xÓ0ˆÜvŠvf˜c 4Ó(ˆÜŸ(š( 6¨4Ó0ˆÙ!0Ó@¢˜A´C¸³F¸a±K—a¡ˆ
Ð@àˆÛˆAØ—O‘OÓ#Ø—W‘W“Y• 1§7¡7£9°·±Õ#?à˜1‘’ñ	 ð
 Q‹;Ø¤# j£/Ñ1ˆIð Ðð ˆIØÐùò As   ÁCÁ#C)r
   N)r   zIterable[str])r   ÚstrÚreturnÚfloat)Ú__name__Ú
__module__Ú__qualname__Ú__firstlineno__Ú__doc__r   r!   Ú__static_attributes__© r   r   r   r      s   † Ù=ô(÷r   r   )r*   Ú
__future__r   r   Úcollections.abcr   r   r,   r   r   Ú<module>r/      s   ðñ (å "ã 	Ý $÷#ò #r   