
    $j                         d Z ddlZddlZddlmZ ddlmZmZ ddlm	Z
 dZdZdZd Zd	 Zd
 Zd Zd Zd Zedk    rddlZddlZ eej                  dk     r+ ed            ed            ed            e             ej        dd         Zg ZeD ]?Z ej        e          Zere                    e           *e                     e           @ e! e"e                    Z ee          dk    rqed         Z# ee#          Z$ ed            ed            ed            e ej%        e$dd                      ee#          j&        dz   Z' ee$e'           dS  ee          Z( ed            ed            ed            e ej%        e(dd                      ee(d           dS dS )u&  
main_orchestrator.py

MASTER ORCHESTRATOR
===================

Pipeline:

PDF
 ↓
DL Pipeline
 ↓
Confidence Score
 ↓
if score < threshold:
    DLM Engine
 ↓
Merge Results
 ↓
Final Output

This file should remain CLEAN.
All heavy logic stays inside:
- dl_pipeline.py
- extract_land.py
    N)Path)extract_land_record_dlmerge_results)extract_land_recordF   Tc                 6    t           rt          |            d S d S )N)ENABLE_LOGSprint)msgs    //var/www/html/banglarbhumi/main_orchestrator.pylogr   ?   s#     c




     c                    g }g d}|D ]/}|                      |          s|                    d|            0|                      dg           }t          |          dk    r|                    d           t          |          D ]a\  }}|                     d          s|                    d| d           |                     d	          s|                    d| d
           b|S )N)jl_nodaag_nomouzamissing_khatian_entriesr   
no_entries
khatian_noentry__missing_khatian
owner_name_missing_owner)getappendlen	enumerate)resultissuesrequired_headersfieldentriesidxes          r   validate_resultr&   H   s   F   " . .zz%   	.MM,U,,--- jj*B//G
7||ql### G$$ 8 8Quu\"" 	:MM83888999uu\"" 	8MM63666777Mr   c                 R    t          |           j        |||t          |d          dS )N   )source_filepipeline_useddl_confidencer    processing_time_seconds)r   nameroundpdf_pathdl_scorer*   r    processing_times        r   build_metadatar3   m   s5     H~~*&!#(!#<#<  r   c                    t          j                     }t          d           t          d           t          d           t          d           t          |           }|                    dd          }t          d|            t	          |          }|rt          d|            |t
          k    rEt          d	           d
}t          j                     |z
  }t          | ||||          }i |d|i}|S t          rt          d           t          d           t          |           }	t          d           t          ||	          }
d}t          j                     |z
  }t          | ||||          }||
d<   t          d           |
S t          d           d}t          j                     |z
  }t          | ||||          }||d<   |S )N
==============================zSTARTING LAND RECORD PIPELINEz==============================z
[1] Running DL pipeline...r+   r   zDL Confidence Score: zValidation Issues: z
DL result accepted.DL_ONLYr/   metadataz
DL confidence LOW.zRunning DLM fallback engine...z
Merging DL + DLM results...DL_PLUS_DLMz!
Pipeline completed successfully.z
DLM fallback disabled.DL_ONLY_LOW_CONFIDENCE)
timer   r   r   r&   DL_THRESHOLDr3   ENABLE_DLM_FALLBACKextract_land_record_dlmr   )r0   
start_time	dl_resultr1   r    r*   
total_timer7   final_result
dlm_resultmerged_results              r   process_land_recordrD      s;   J*+++'(((())) &'''&x00I}}_a00H***+++ Y''F ,*&**+++ <#$$$!Y[[:-
!'&
 
 



 

   ""###,---,X66
 	+,,,%
 

 &Y[[:-
!'&
 
 
 %-j!0111 "###,Mz)J#"  H %Ijr   c           	         g }t          |           }t          d| d           t          | d          D ]\  }}t          d           t          d| d|            t          |            t          d           	 t          |          }|                    |d	|d
           r# t
          $ rO}|                    |dt          |          d           t          dt          |                      Y d }~d }~ww xY w|S )Nz
Processing z	 PDFs...
   )startz$
===================================zFILE /z#===================================T)filesuccessr   F)rI   rJ   errorzERROR: )r   r   r   rD   r   	Exceptionstr)	pdf_filesresultstotalr$   r0   r   r%   s          r   process_multiple_pdfsrQ      s^   G	NNE))))***"9A666 $ $X4555!C!!%!!"""xM1222	$(22FNN         	$ 	$ 	$NN  Q     "#a&&""########	$ Ns   ?(B((
D2AC<<Dc                     t          |dd          5 }t          j        | |dd           d d d            n# 1 swxY w Y   t          d|            d S )Nwzutf-8)encodingFr(   ensure_asciiindentz
Saved JSON: )openjsondumpr   )r   output_pathfs      r   save_result_jsonr]   $  s    	k3	1	1	1 
Q			
 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 &&&'''''s   8<<__main__r(   z
Usage:z$python main_orchestrator.py file.pdfz!python main_orchestrator.py *.pdfrF   r5   zFINAL RESULTz==============================
FrU   z_result.jsonzBATCH PROCESSING COMPLETEzbatch_results.json))__doc__rY   r:   pathlibr   dl_pipeliner   r   extract_landr   r=   r;   r<   r	   r   r&   r3   rD   rQ   r]   __name__sysglobr   argvr
   exittargetsrN   texpandedextendr   listsetr0   r   dumpsstemoutput_jsonrO    r   r   <module>rr      s`   6                          J  (t t tt# # #R( ( (" zJJJKKK
s38}}qj45551222hqrrlGI     49Q<< 	 X&&&&Q SS^^$$I s9~~Q<$$X..0111n0111DJ"  	
 	
 	
 DNN 	
 		
 	
 	
 	
 	
 ('	220111)***0111DJ"  	
 	
 	
 	 	
 	
 	
 	
 	
g r   