# ################# PURE DLM ########################################
# import json
# import requests
# import tempfile
# import os
# from concurrent.futures import ThreadPoolExecutor, as_completed
# from datetime import datetime

# from extract_land import extract_land_record, name_match_score

# # ==============================
# # CONFIG
# # ==============================
# INPUT_FILE = "formatted_output.json"
# MAX_WORKERS = 8

# session = requests.Session()


# # ==============================
# # UTILS
# # ==============================
# def safe_float(val):
#     try:
#         return float(val)
#     except:
#         return 0.0


# # ==============================
# # DOWNLOAD PDF
# # ==============================
# def download_pdf(url):
#     try:
#         r = session.get(url, timeout=30)
#         if r.status_code != 200:
#             return None

#         tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
#         tmp.write(r.content)
#         tmp.close()
#         return tmp.name

#     except Exception:
#         return None


# # ==============================
# # PROCESS ONE RECORD
# # ==============================
# def process_record(row, index, total):
#     try:
#         print(f"\n🔄 Processing {index+1}/{total}")

#         all_jl = set(str(x).strip() for x in row.get("JL_No", []))
#         all_daag = set(str(x).strip() for x in row.get("Daag_No", []))
#         all_farmers = [x.strip() for x in row.get("co-farmers", []) if x.strip()]

#         total_land_acre = 0.0
#         matched_daags = set()
#         valid_pdf_data_list = []

#         # ==============================
#         # STEP 1: PROCESS ALL PDFs
#         # ==============================
#         for pdf_url in row.get("pdf_urls", []):

#             pdf_path = download_pdf(pdf_url)
#             if not pdf_path:
#                 continue

#             try:
#                 pdf_data = extract_land_record(pdf_path)
#             except Exception as e:
#                 print("❌ Extraction error:", e)
#                 os.unlink(pdf_path)
#                 continue

#             os.unlink(pdf_path)

#             jl_pdf = {str(pdf_data.get("jl_no", "")).strip()}

#             daag_pdf = set(str(pdf_data.get("daag_no", "")).split(","))
#             daag_pdf = {x.strip() for x in daag_pdf if x.strip()}

#             # ✅ MATCH JL + PARTIAL DAAG
#             if jl_pdf == all_jl:
#                 overlap = daag_pdf & all_daag
#                 new_daags = overlap - matched_daags

#                 if new_daags:
#                     matched_daags.update(new_daags)
#                     valid_pdf_data_list.append(pdf_data)

#                     tla = safe_float(pdf_data.get("total_land_acre", ""))
#                     total_land_acre += tla

#                     print(f"✅ Matched PDF | New Daags: {new_daags} | Land added: {tla}")

#         # ==============================
#         # 🚫 STRICT BLOCK: NO JL/DAAG → NO FARMER MATCH
#         # ==============================
#         if not valid_pdf_data_list:
#             return {
#                 "rel_id": row.get("rel_id"),
#                 "status": "REJECTED",
#                 "missing": ["JL/Daag mismatch"],
#                 "co_farmer_match_score": 0,
#                 "farmer_match_details": [],
#                 "total_land_acre": 0.0,
#                 "extra_co_farmers_from_pdf": []
#             }

#         # OPTIONAL stricter check
#         missing_daags = all_daag - matched_daags

#         if missing_daags:
#             return {
#                 "rel_id": row.get("rel_id"),
#                 "status": "REJECTED",
#                 "missing": [f"Missing daags: {', '.join(missing_daags)}"],
#                 "co_farmer_match_score": 0,
#                 "farmer_match_details": [],
#                 "total_land_acre": round(total_land_acre, 4),
#                 "extra_co_farmers_from_pdf": []
#             }

#         # ==============================
#         # STEP 2: FARMER MATCHING
#         # ==============================
#         pdf_farmer_details = []

#         for pdf_data in valid_pdf_data_list:
#             for e in pdf_data.get("khatian_entries", []):
#                 name = e.get("owner_name", "").strip()
#                 ansha = safe_float(e.get("ansha", ""))
#                 area = safe_float(e.get("area_acres", ""))

#                 if name:
#                     pdf_farmer_details.append({
#                         "name": name,
#                         "ansha": ansha,
#                         "area": area
#                     })

#         pdf_farmers = [f["name"] for f in pdf_farmer_details]

#         farmer_scores = []
#         farmer_details = []
#         matched_pdf_names = set()

#         for api_name in all_farmers:

#             best_score = 0
#             best_match_name = ""

#             for pdf_name in pdf_farmers:
#                 score = name_match_score(api_name, pdf_name)

#                 if score > best_score:
#                     best_score = score
#                     best_match_name = pdf_name

#                 if score >= 0.7:
#                     matched_pdf_names.add(pdf_name)
#                     break

#             farmer_scores.append(best_score)

#             farmer_details.append({
#                 "api_name": api_name,
#                 "matched_name": best_match_name,
#                 "score": round(best_score, 2)
#             })

#         # ==============================
#         # STEP 3: EXTRA FARMERS
#         # ==============================
#         extra_co_farmers = []

#         for f in pdf_farmer_details:
#             name = f["name"]
#             area = f["area"]

#             is_matched = any(
#                 name_match_score(name, m) >= 0.7
#                 for m in matched_pdf_names
#             )

#             if not is_matched and area > 0.05:
#                 extra_co_farmers.append(name)

#         extra_co_farmers = list(set(extra_co_farmers))

#         # ==============================
#         # FINAL SCORE
#         # ==============================
#         co_farmer_score = round(sum(farmer_scores) / len(farmer_scores), 2) if farmer_scores else 0

#         # ==============================
#         # FAILURE CHECK
#         # ==============================
#         missing = []

#         if missing_daags:
#             missing.append(f"Missing daags: {', '.join(missing_daags)}")

#         low_score_farmers = [
#             f["api_name"] for f in farmer_details if f["score"] < 0.7
#         ]

#         if low_score_farmers:
#             missing.append(f"Farmer mismatch: {', '.join(low_score_farmers)}")

#         if extra_co_farmers:
#             missing.append(f"Extra farmers from PDF: {', '.join(extra_co_farmers)}")

#         # ==============================
#         # LAND VALIDATION
#         # ==============================
#         api_total_land = safe_float(row.get("land_convert_size", 0))
#         land_mismatch = False

#         if api_total_land > 0:
#             percent_diff = abs(total_land_acre - api_total_land) / api_total_land

#             if percent_diff > 0.12:
#                 land_mismatch = True
#                 missing.append(f"Land mismatch: API={api_total_land}, PDF={round(total_land_acre,4)}")

#         # ==============================
#         # FINAL STATUS
#         # ==============================
#         is_accepted = (
#             not missing_daags
#             and len(farmer_scores) == len(all_farmers)
#             and all(score >= 0.7 for score in farmer_scores)
#             and len(extra_co_farmers) == 0
#             and not land_mismatch
#         )

#         return {
#             "rel_id": row.get("rel_id"),
#             "status": "ACCEPTED" if is_accepted else "REJECTED",
#             "missing": missing,
#             "co_farmer_match_score": co_farmer_score,
#             "farmer_match_details": farmer_details,
#             "total_land_acre": round(total_land_acre, 4),
#             "extra_co_farmers_from_pdf": extra_co_farmers
#         }

#     except Exception as e:
#         print("❌ Critical error:", e)

#         return {
#             "rel_id": row.get("rel_id"),
#             "status": "REJECTED",
#             "missing": ["Internal error"],
#             "co_farmer_match_score": 0,
#             "farmer_match_details": [],
#             "total_land_acre": 0.0,
#             "extra_co_farmers_from_pdf": []
#         }
    
# # ==============================
# # MAIN PARALLEL
# # ==============================
# def process_all():

#     with open(INPUT_FILE, "r", encoding="utf-8") as f:
#         data = json.load(f)

#     results = []
#     total = len(data)

#     with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
#         futures = [
#             executor.submit(process_record, row, i, total)
#             for i, row in enumerate(data)
#         ]

#         for future in as_completed(futures):
#             result = future.result()

#             if result and isinstance(result, dict):
#                 results.append(result)

#     return results


# # ==============================
# # SAVE OUTPUT
# # ==============================

# def save_results(results):
#     ts = datetime.now().strftime("%Y%m%d_%H%M%S")

#     grouped = {}

#     for item in results:
#         rel_id = item.get("rel_id")

#         if not rel_id:
#             continue

#         if rel_id not in grouped:
#             grouped[rel_id] = item

#         else:
#             grouped[rel_id]["total_land_acre"] += item.get(
#                 "total_land_acre", 0.0
#             )

#             if item["status"] == "REJECTED":
#                 grouped[rel_id]["status"] = "REJECTED"

#     # ==============================
#     # ROUND LAND
#     # ==============================
#     for rel_id in grouped:
#         grouped[rel_id]["total_land_acre"] = round(
#             grouped[rel_id]["total_land_acre"], 4
#         )

#     # ==============================
#     # ADD AI RESPONSE FIELDS
#     # ==============================
#     for rel_id, item in grouped.items():

#         score = item.get("co_farmer_match_score", 0)
#         status = item.get("status")

#         item["status_pecentage"] = int(score * 100)

#         item["ai_status"] = (
#             "APPROVED"
#             if status == "ACCEPTED"
#             else "REJECTED"
#         )

#         missing = item.get("missing", [])

#         item["ai_status_comment"] = (
#             "All matched"
#             if not missing
#             else ", ".join(missing)
#         )

#     # ==============================
#     # SAVE FILE
#     # ==============================
#     filename = f"grouped_result_{ts}.json"

#     with open(filename, "w", encoding="utf-8") as f:
#         json.dump(grouped, f, indent=2, ensure_ascii=False)

#     print(f"\n💾 Saved: {filename}")


# # def save_results(results):
# #     ts = datetime.now().strftime("%Y%m%d_%H%M%S")

# #     grouped = {}

# #     for item in results:
# #         rel_id = item.get("rel_id")
# #         if not rel_id:
# #             continue

# #         if rel_id not in grouped:
# #             grouped[rel_id] = item
# #         else:
# #             grouped[rel_id]["total_land_acre"] += item.get("total_land_acre", 0.0)

# #             if item["status"] == "REJECTED":
# #                 grouped[rel_id]["status"] = "REJECTED"

# #     for rel_id in grouped:
# #         grouped[rel_id]["total_land_acre"] = round(grouped[rel_id]["total_land_acre"], 4)

    
    

# #     filename = f"grouped_result_{ts}.json"

# #     with open(filename, "w", encoding="utf-8") as f:
# #         json.dump(grouped, f, indent=2, ensure_ascii=False)

# #     print(f"\n💾 Saved: {filename}")


# # ==============================
# # RUN (THIS WAS MISSING)
# # ==============================
# if __name__ == "__main__":
#     results = process_all()

#     if not results:
#         print("❌ No results generated")
#     else:
#         save_results(results)
#         print("✅ DONE")      




################# PURE DLM ########################################
import json
import requests
import tempfile
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime

from extract_land import extract_land_record, name_match_score

# ==============================
# CONFIG
# ==============================
INPUT_FILE = "formatted_output.json"
MAX_WORKERS = 8

session = requests.Session()


# ==============================
# UTILS
# ==============================
def safe_float(val):
    try:
        return float(val)
    except:
        return 0.0


# ==============================
# DOWNLOAD PDF
# ==============================
def download_pdf(url):
    try:
        r = session.get(url, timeout=30)
        if r.status_code != 200:
            return None

        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
        tmp.write(r.content)
        tmp.close()
        return tmp.name

    except Exception:
        return None


# ==============================
# PROCESS ONE RECORD
# ==============================
def process_record(row, index, total):
    try:
        print(f"\n🔄 Processing {index+1}/{total}")

        all_jl = set(str(x).strip() for x in row.get("JL_No", []))
        all_daag = set(str(x).strip() for x in row.get("Daag_No", []))
        all_farmers = [x.strip() for x in row.get("co-farmers", []) if x.strip()]

        total_land_acre = 0.0
        matched_daags = set()
        valid_pdf_data_list = []
        pdf_issues = []

        # ==============================
        # STEP 1: PROCESS ALL PDFs
        # ==============================
        for pdf_info in row.get("pdf_urls", []):

            pdf_url = pdf_info.get("pdf_url")
            file_id = pdf_info.get("file_id")
            term_id = pdf_info.get("term_id")
            term_type = pdf_info.get("term_type")

            pdf_issue_info = {
                "pdf_url": pdf_url,
                "file_id": file_id,
                "term_id": term_id,
                "term_type": term_type
            }

            pdf_path = download_pdf(pdf_url)

            if not pdf_path:
                pdf_issues.append({
                    **pdf_issue_info,
                    "reason": "PDF download failed"
                })
                continue

            try:
                pdf_data = extract_land_record(pdf_path)

            except Exception as e:
                print("❌ Extraction error:", e)

                pdf_issues.append({
                    **pdf_issue_info,
                    "reason": f"Extraction error: {str(e)}"
                })

                os.unlink(pdf_path)
                continue

            os.unlink(pdf_path)

            if (
                not pdf_data.get("jl_no")
                and not pdf_data.get("daag_no")
                and not pdf_data.get("khatian_entries")
            ):
                pdf_issues.append({
                    **pdf_issue_info,
                    "reason": "No data extracted"
                })
                continue

            jl_pdf = {str(pdf_data.get("jl_no", "")).strip()}

            daag_pdf = set(str(pdf_data.get("daag_no", "")).split(","))
            daag_pdf = {x.strip() for x in daag_pdf if x.strip()}

            # ==============================
            # MATCH JL + PARTIAL DAAG
            # ==============================
            if jl_pdf == all_jl:

                overlap = daag_pdf & all_daag

                if not overlap:
                    pdf_issues.append({
                        **pdf_issue_info,
                        "reason": "Daag mismatch",
                        "pdf_daag": list(daag_pdf),
                        "api_daag": list(all_daag)
                    })

                new_daags = overlap - matched_daags

                if new_daags:
                    matched_daags.update(new_daags)
                    valid_pdf_data_list.append(pdf_data)

                    tla = safe_float(pdf_data.get("total_land_acre", ""))
                    total_land_acre += tla

                    print(
                        f"✅ Matched PDF | New Daags: {new_daags} | Land added: {tla}"
                    )

            else:
                pdf_issues.append({
                    **pdf_issue_info,
                    "reason": "JL mismatch",
                    "pdf_jl": list(jl_pdf),
                    "api_jl": list(all_jl)
                })

        # ==============================
        # STRICT BLOCK
        # ==============================
        if not valid_pdf_data_list:
            return {
                "rel_id": row.get("rel_id"),
                "status": "REJECTED",
                "missing": ["JL/Daag mismatch"],
                "pdf_issues": pdf_issues,
                "co_farmer_match_score": 0,
                "farmer_match_details": [],
                "total_land_acre": 0.0,
                "extra_co_farmers_from_pdf": []
            }

        missing_daags = all_daag - matched_daags

        if missing_daags:
            return {
                "rel_id": row.get("rel_id"),
                "status": "REJECTED",
                "missing": [f"Missing daags: {', '.join(missing_daags)}"],
                "pdf_issues": pdf_issues,
                "co_farmer_match_score": 0,
                "farmer_match_details": [],
                "total_land_acre": round(total_land_acre, 4),
                "extra_co_farmers_from_pdf": []
            }

        # ==============================
        # STEP 2: FARMER MATCHING
        # ==============================
        pdf_farmer_details = []

        for pdf_data in valid_pdf_data_list:
            for e in pdf_data.get("khatian_entries", []):

                name = e.get("owner_name", "").strip()
                ansha = safe_float(e.get("ansha", ""))
                area = safe_float(e.get("area_acres", ""))

                if name:
                    pdf_farmer_details.append({
                        "name": name,
                        "ansha": ansha,
                        "area": area
                    })

        pdf_farmers = [f["name"] for f in pdf_farmer_details]

        farmer_scores = []
        farmer_details = []
        matched_pdf_farmers = set()

        for api_name in all_farmers:

            best_score = 0
            best_match_name = ""

            for pdf_name in pdf_farmers:

                score = name_match_score(api_name, pdf_name)

                if score > best_score:
                    best_score = score
                    best_match_name = pdf_name

                if score >= 0.5:
                    matched_pdf_farmers.add(pdf_name)
                    break

            farmer_scores.append(best_score)

            farmer_details.append({
                "api_name": api_name,
                "matched_name": best_match_name,
                "score": round(best_score, 2)
            })

        # ==============================
        # STEP 3: EXTRA FARMERS
        # ==============================
        extra_co_farmers = []
        seen_extra = set()

        for f in pdf_farmer_details:

            name = f["name"]
            area = f["area"]

            is_matched = name in matched_pdf_farmers

            if not is_matched and area > 0.05:

                if name not in seen_extra:
                    extra_co_farmers.append(name)
                    seen_extra.add(name)

        # ==============================
        # FINAL SCORE
        # ==============================
        co_farmer_score = (
            round(sum(farmer_scores) / len(farmer_scores), 2)
            if farmer_scores else 0
        )

        # ==============================
        # FAILURE CHECK
        # ==============================
        missing = []

        if missing_daags:
            missing.append(
                f"Missing daags: {', '.join(missing_daags)}"
            )

        low_score_farmers = [
            f["api_name"]
            for f in farmer_details
            if f["score"] < 0.5
        ]

        if low_score_farmers:
            missing.append(
                f"Farmer mismatch: {', '.join(low_score_farmers)}"
            )

        if extra_co_farmers:
            missing.append(
                f"Extra farmers from PDF: {', '.join(extra_co_farmers)}"
            )

        # ==============================
        # LAND VALIDATION
        # ==============================
        api_total_land = safe_float(
            row.get("land_convert_size", 0)
        )

        land_mismatch = False

        if api_total_land > 0:

            percent_diff = (
                abs(total_land_acre - api_total_land)
                / api_total_land
            )

            if percent_diff > 0.12:
                land_mismatch = True

                missing.append(
                    f"Land mismatch: API={api_total_land}, "
                    f"PDF={round(total_land_acre,4)}"
                )

        # ==============================
        # FINAL STATUS
        # ==============================
        is_accepted = (
            not missing_daags
            and len(farmer_scores) == len(all_farmers)
            #and all(score >= 0.7 for score in farmer_scores)
            # and len(extra_co_farmers) == 0
            and co_farmer_score > 0.6
            and not land_mismatch
        )

        return {
            "rel_id": row.get("rel_id"),
            "status": "ACCEPTED" if is_accepted else "REJECTED",
            "missing": missing,
            "pdf_issues": pdf_issues,
            "co_farmer_match_score": co_farmer_score,
            "farmer_match_details": farmer_details,
            "total_land_acre": round(total_land_acre, 4),
            "extra_co_farmers_from_pdf": extra_co_farmers
        }

    except Exception as e:
        print("❌ Critical error:", e)

        return {
            "rel_id": row.get("rel_id"),
            "status": "REJECTED",
            "missing": ["Internal error"],
            "pdf_issues": pdf_issues if 'pdf_issues' in locals() else [],
            "co_farmer_match_score": 0,
            "farmer_match_details": [],
            "total_land_acre": 0.0,
            "extra_co_farmers_from_pdf": []
        }    
    
# ==============================
# MAIN PARALLEL
# ==============================
def process_all():

    with open(INPUT_FILE, "r", encoding="utf-8") as f:
        data = json.load(f)

    results = []
    total = len(data)

    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        futures = [
            executor.submit(process_record, row, i, total)
            for i, row in enumerate(data)
        ]

        for future in as_completed(futures):
            result = future.result()

            if result and isinstance(result, dict):
                results.append(result)

    return results


# ==============================
# SAVE OUTPUT
# ==============================

def save_results(results):
    ts = datetime.now().strftime("%Y%m%d_%H%M%S")

    grouped = {}

    for item in results:
        rel_id = item.get("rel_id")

        if not rel_id:
            continue

        if rel_id not in grouped:
            grouped[rel_id] = item

        else:
            grouped[rel_id]["total_land_acre"] += item.get(
                "total_land_acre", 0.0
            )

            if item["status"] == "REJECTED":
                grouped[rel_id]["status"] = "REJECTED"

    # ==============================
    # ROUND LAND
    # ==============================
    for rel_id in grouped:
        grouped[rel_id]["total_land_acre"] = round(
            grouped[rel_id]["total_land_acre"], 4
        )

    # ==============================
    # ADD AI RESPONSE FIELDS
    # ==============================
    for rel_id, item in grouped.items():

        score = item.get("co_farmer_match_score", 0)
        status = item.get("status")

        item["status_pecentage"] = int(score * 100)

        item["ai_status"] = (
            "APPROVED"
            if status == "ACCEPTED"
            else "REJECTED"
        )

        missing = item.get("missing", [])

        item["ai_status_comment"] = (
            "All matched"
            if not missing
            else ", ".join(missing)
        )

    # ==============================
    # SAVE FILE
    # ==============================
    filename = f"grouped_result_{ts}.json"

    with open(filename, "w", encoding="utf-8") as f:
        json.dump(grouped, f, indent=2, ensure_ascii=False)

    print(f"\n💾 Saved: {filename}")


# def save_results(results):
#     ts = datetime.now().strftime("%Y%m%d_%H%M%S")

#     grouped = {}

#     for item in results:
#         rel_id = item.get("rel_id")
#         if not rel_id:
#             continue

#         if rel_id not in grouped:
#             grouped[rel_id] = item
#         else:
#             grouped[rel_id]["total_land_acre"] += item.get("total_land_acre", 0.0)

#             if item["status"] == "REJECTED":
#                 grouped[rel_id]["status"] = "REJECTED"

#     for rel_id in grouped:
#         grouped[rel_id]["total_land_acre"] = round(grouped[rel_id]["total_land_acre"], 4)

    
    

#     filename = f"grouped_result_{ts}.json"

#     with open(filename, "w", encoding="utf-8") as f:
#         json.dump(grouped, f, indent=2, ensure_ascii=False)

#     print(f"\n💾 Saved: {filename}")


# ==============================
# RUN (THIS WAS MISSING)
# ==============================
if __name__ == "__main__":
    results = process_all()

    if not results:
        print("❌ No results generated")
    else:
        save_results(results)
        print("✅ DONE")      