
    c7i                     V   S SK JrJr  S SKJr  S SKrS SKrS SKrS SKrS SK	r	S SK
JrJrJr  S SKJr  \" 5       r\	R"                  " \	R$                  S9  S rS rS	 rS
 rS rS r " S S\5      rS r\R7                  S5      S\4S j5       r\R=                  S5      S\4S j5       rg)    )FastAPIHTTPException)	BaseModelNtransliterateBENGALIITRANS)BytesIO)levelc                     U (       d  g[         R                  " SU 5      n U R                  SS5      n U R                  5       $ )N NFCz^@)unicodedata	normalizereplacestrip)ts    "/var/www/html/banglarbhumi/main.py
clean_textr      s6    eQ'A			$A779    c                     U (       d  g U R                  5       n / SQnU H
  nX ;   d  M
    g    [        R                  " SU 5      (       a  g [        U 5      S:  a  g U $ )N)   খতিয়ানu   রায়েতরu	   নামu   পিতাu   স্বামীClickRemarksNil\d+\.\d+   )r   researchlen)nameheader_wordshs      r   rule_lm_validater$      sW    ::<DL
 9  
yyd##
4y1}Kr   c                 @     [        U [        [        5      $ !   U s $ = f)Nr   )texts    r   bengali_to_englishr'   :   s"    T7F33s    c                     0 nU  H8  n[        US   S-  5      S-  nUR                  U/ 5      R                  U5        M:     U[        U5      4$ )Ntop   )round
setdefaultappendsorted)wordsrowswys       r   
group_rowsr3   D   sR    D!E(Q,!#2%%a(  r   c                 d    [         R                  " SU 5      n[        U5      S:  a
  US   US   4$ g)Nr      r      )NN)r   findallr    )r&   numss     r   detect_share_rowr9   R   s3    ::k4(D
4yA~AwQr   c                 f   / n [         R                  " [        U 5      5       nUR                   GH  nUR	                  SS9=(       d    / n[        U5      u  pg[        U5       GHo  u  p[        Xi   S S9n
SR                  S U
 5       5      n[        U5      u  pU(       d  MA  S nU
 H1  n[        US   5      n[        R                  " SU5      (       d  M/  Un  O   U(       d  M  S	nUS
-
  nUS:  a  [        XgU      S S9nSR                  S U 5       5      nSU;   d  SU;   a  Ot[        R                  " SU5      (       a  US
-  nM^  SU;   a  US
-  nMk  / nU H*  n[        US   5      nUS;  d  M  UR                  U5        M,     SR                  U5      n [        U5      nU(       d  GMN  UR                  UUUUU[!        U5      S.5        GMr     GM     S S S 5        U$ ! , (       d  f       U$ = f! ["         a-  n[$        R&                  " S[)        U5       35         S nAU$ S nAff = f)NT)use_text_flowc                     U S   $ Nx0 xs    r   <lambda>"extract_pdf_data.<locals>.<lambda>n   s    agr   )key c              3   >   #    U  H  n[        US    5      v   M     g7fr&   Nr   .0r1   s     r   	<genexpr>#extract_pdf_data.<locals>.<genexpr>o   s     'Qy!
1V9(=(=y   r&   z\d{1,5}r   r6   r   c                     U S   $ r=   r?   r@   s    r   rB   rC      s    PQRVPWr   c              3   >   #    U  H  n[        US    5      v   M     g7frG   rH   rI   s     r   rK   rL      s     ,WJqZ&	-B-BJrM   u   রায়েতr   r   u   ব্যা)r   r   z--)filekhatianraiyoter_namesharearearaiyoter_name_enzPDF parsing failed: )
pdfplumberopenr
   pagesextract_wordsr3   	enumerater.   joinr9   r   r   	fullmatchr   r-   r$   r'   	Exceptionloggingerrorstr)	pdf_bytesfilenamerecordspdfpager/   r0   sorted_rowsir2   	row_wordsrow_textrS   rT   rQ   r1   r   rR   j
prev_words	prev_text
name_partses                          r   extract_pdf_dataro   _   s-   GS7__WY/0C		***>D"$.u$5!%k2DA &tw4E FI"xx'Qy'QQH"28"<KE  
 #G&&qy1<<
A66&'G! ' # 
 %'MAAq&%+DQ,@FW%X
$'HH,WJ,W$W	,	9=QU^=^!99[)<<FA$)Y6FA$%'
!+A *1V9 5A (@@ * 1 1! 4	 ", ),(<$4]$CM( NN (#*)6!& $,>},M$ A 3 " 1h Ni 10h N  7,SVH566N7sC   G9 B1G'BG'6A'G'G9 '
G61G9 6G9 9
H0"H++H0c                        \ rS rSr% \\S'   Srg)
PdfRequest   pdf_urlr?   N)__name__
__module____qualname____firstlineno__r`   __annotations____static_attributes__r?   r   r   rq   rq      s    Lr   rq   c                 f    [         R                  " U SS9nUR                  S:w  a
  [        SSS9eSUR                  R                  SS	5      ;  a
  [        SS
S9eUR
                  $ ! [         R                  R                   a    [        SSS9e[         a  n[        S[        U5      S9eS nAff = f)N   )timeout   i  zFailed to fetch PDF)status_codedetailzapplication/pdfzContent-Typer   zURL is not a PDFi  zRequest timeouti  )
requestsgetr~   r   headerscontent
exceptionsTimeoutr]   r`   )urlresponsern   s      r   	fetch_pdfr      s    <<<R03&C8MNNH$4$4$8$8$LLC8JKK&& G4EFF <CF;;<s   A$A' '1B0B++B0z/extractrs   c                 t    [        U 5      nU R                  S5      S   n[        X5      nS[        U5      US.$ )N/success)statustotal_recordsdata)r   splitro   r    )rs   ra   rb   rc   s       r   extract_pdfr      sD     '"I}}S!"%Hy3G W r   z/farmers-above-sharereqc                    [        U R                  5      nU R                  R                  S5      S   n[        X5      n/ nU H)  n [	        US   5      S:  a  UR                  U5        M)  M+     S[        U5      US.$ !    MA  = f)Nr   r   rS   gMbP?r   )r   total_farmersfarmers)r   rs   r   ro   floatr-   r    )r   ra   rb   rc   filteredrs         r   farmers_above_sharer      s     #++&I{{  %b)Hy3G H	QwZ 5(" )  X 	s   #A??B) fastapir   r   pydanticr   rV   r   r   r   r^   indic_transliteration.sanscriptr   r   r	   ior
   appbasicConfigINFOr   r$   r'   r3   r9   ro   rq   r   r   r`   r   postr   r?   r   r   <module>r      s    *    	   J J i   ',, '8Y~ <.   $ 
 !Z  "r   