import json
import requests
import tempfile
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime

from extract_land import extract_land_record, name_match_score

# ==============================
# CONFIG
# ==============================
INPUT_FILE = "formatted_output.json"
MAX_WORKERS = 8

session = requests.Session()


# ==============================
# UTILS
# ==============================
def safe_float(val):
    try:
        return float(val)
    except:
        return 0.0


# ==============================
# DOWNLOAD PDF
# ==============================
def download_pdf(url):
    try:
        r = session.get(url, timeout=30)
        if r.status_code != 200:
            return None

        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
        tmp.write(r.content)
        tmp.close()
        return tmp.name

    except Exception:
        return None


# ==============================
# PROCESS ONE RECORD
# ==============================
def process_record(row, index, total):
    try:
        print(f"\n🔄 Processing {index+1}/{total}")

        all_jl = set(str(x).strip() for x in row.get("JL_No", []))
        all_daag = set(str(x).strip() for x in row.get("Daag_No", []))
        all_farmers = [x.strip() for x in row.get("co-farmers", []) if x.strip()]

        total_land_acre = 0.0
        matched_daags = set()
        valid_pdf_data_list = []

        # ==============================
        # STEP 1: PROCESS ALL PDFs
        # ==============================
        for pdf_url in row.get("pdf_urls", []):

            pdf_path = download_pdf(pdf_url)
            if not pdf_path:
                continue

            try:
                pdf_data = extract_land_record(pdf_path)
            except Exception as e:
                print("❌ Extraction error:", e)
                os.unlink(pdf_path)
                continue

            os.unlink(pdf_path)

            jl_pdf = {str(pdf_data.get("jl_no", "")).strip()}

            daag_pdf = set(str(pdf_data.get("daag_no", "")).split(","))
            daag_pdf = {x.strip() for x in daag_pdf if x.strip()}

            # ✅ MATCH JL + PARTIAL DAAG
            if jl_pdf == all_jl:
                overlap = daag_pdf & all_daag
                new_daags = overlap - matched_daags

                if new_daags:
                    matched_daags.update(new_daags)
                    valid_pdf_data_list.append(pdf_data)

                    tla = safe_float(pdf_data.get("total_land_acre", ""))
                    total_land_acre += tla

                    print(f"✅ Matched PDF | New Daags: {new_daags} | Land added: {tla}")

        # ==============================
        # 🚫 STRICT BLOCK: NO JL/DAAG → NO FARMER MATCH
        # ==============================
        if not valid_pdf_data_list:
            return {
                "rel_id": row.get("rel_id"),
                "status": "REJECTED",
                "missing": ["JL/Daag mismatch"],
                "co_farmer_match_score": 0,
                "farmer_match_details": [],
                "total_land_acre": 0.0,
                "extra_co_farmers_from_pdf": []
            }

        # OPTIONAL stricter check
        missing_daags = all_daag - matched_daags

        if missing_daags:
            return {
                "rel_id": row.get("rel_id"),
                "status": "REJECTED",
                "missing": [f"Missing daags: {', '.join(missing_daags)}"],
                "co_farmer_match_score": 0,
                "farmer_match_details": [],
                "total_land_acre": round(total_land_acre, 4),
                "extra_co_farmers_from_pdf": []
            }

        # ==============================
        # STEP 2: FARMER MATCHING
        # ==============================
        pdf_farmer_details = []

        for pdf_data in valid_pdf_data_list:
            for e in pdf_data.get("khatian_entries", []):
                name = e.get("owner_name", "").strip()
                ansha = safe_float(e.get("ansha", ""))
                area = safe_float(e.get("area_acres", ""))

                if name:
                    pdf_farmer_details.append({
                        "name": name,
                        "ansha": ansha,
                        "area": area
                    })

        pdf_farmers = [f["name"] for f in pdf_farmer_details]

        farmer_scores = []
        farmer_details = []
        matched_pdf_names = set()

        for api_name in all_farmers:

            best_score = 0
            best_match_name = ""

            for pdf_name in pdf_farmers:
                score = name_match_score(api_name, pdf_name)

                if score > best_score:
                    best_score = score
                    best_match_name = pdf_name

                if score >= 0.7:
                    matched_pdf_names.add(pdf_name)
                    break

            farmer_scores.append(best_score)

            farmer_details.append({
                "api_name": api_name,
                "matched_name": best_match_name,
                "score": round(best_score, 2)
            })

        # ==============================
        # STEP 3: EXTRA FARMERS
        # ==============================
        extra_co_farmers = []

        for f in pdf_farmer_details:
            name = f["name"]
            area = f["area"]

            is_matched = any(
                name_match_score(name, m) >= 0.7
                for m in matched_pdf_names
            )

            if not is_matched and area > 0.05:
                extra_co_farmers.append(name)

        extra_co_farmers = list(set(extra_co_farmers))

        # ==============================
        # FINAL SCORE
        # ==============================
        co_farmer_score = round(sum(farmer_scores) / len(farmer_scores), 2) if farmer_scores else 0

        # ==============================
        # FAILURE CHECK
        # ==============================
        missing = []

        if missing_daags:
            missing.append(f"Missing daags: {', '.join(missing_daags)}")

        low_score_farmers = [
            f["api_name"] for f in farmer_details if f["score"] < 0.7
        ]

        if low_score_farmers:
            missing.append(f"Farmer mismatch: {', '.join(low_score_farmers)}")

        if extra_co_farmers:
            missing.append(f"Extra farmers from PDF: {', '.join(extra_co_farmers)}")

        # ==============================
        # LAND VALIDATION
        # ==============================
        api_total_land = safe_float(row.get("land_convert_size", 0))
        land_mismatch = False

        if api_total_land > 0:
            percent_diff = abs(total_land_acre - api_total_land) / api_total_land

            if percent_diff > 0.12:
                land_mismatch = True
                missing.append(f"Land mismatch: API={api_total_land}, PDF={round(total_land_acre,4)}")

        # ==============================
        # FINAL STATUS
        # ==============================
        is_accepted = (
            not missing_daags
            and len(farmer_scores) == len(all_farmers)
            and all(score >= 0.7 for score in farmer_scores)
            and len(extra_co_farmers) == 0
            and not land_mismatch
        )

        return {
            "rel_id": row.get("rel_id"),
            "status": "ACCEPTED" if is_accepted else "REJECTED",
            "missing": missing,
            "co_farmer_match_score": co_farmer_score,
            "farmer_match_details": farmer_details,
            "total_land_acre": round(total_land_acre, 4),
            "extra_co_farmers_from_pdf": extra_co_farmers
        }

    except Exception as e:
        print("❌ Critical error:", e)

        return {
            "rel_id": row.get("rel_id"),
            "status": "REJECTED",
            "missing": ["Internal error"],
            "co_farmer_match_score": 0,
            "farmer_match_details": [],
            "total_land_acre": 0.0,
            "extra_co_farmers_from_pdf": []
        }
    
# ==============================
# MAIN PARALLEL
# ==============================
def process_all():

    with open(INPUT_FILE, "r", encoding="utf-8") as f:
        data = json.load(f)

    results = []
    total = len(data)

    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        futures = [
            executor.submit(process_record, row, i, total)
            for i, row in enumerate(data)
        ]

        for future in as_completed(futures):
            result = future.result()

            if result and isinstance(result, dict):
                results.append(result)

    return results


# ==============================
# SAVE OUTPUT
# ==============================
def save_results(results):
    ts = datetime.now().strftime("%Y%m%d_%H%M%S")

    grouped = {}

    for item in results:
        rel_id = item.get("rel_id")
        if not rel_id:
            continue

        if rel_id not in grouped:
            grouped[rel_id] = item
        else:
            grouped[rel_id]["total_land_acre"] += item.get("total_land_acre", 0.0)

            if item["status"] == "REJECTED":
                grouped[rel_id]["status"] = "REJECTED"

    for rel_id in grouped:
        grouped[rel_id]["total_land_acre"] = round(grouped[rel_id]["total_land_acre"], 4)

    filename = f"grouped_result_{ts}.json"

    with open(filename, "w", encoding="utf-8") as f:
        json.dump(grouped, f, indent=2, ensure_ascii=False)

    print(f"\n💾 Saved: {filename}")


# ==============================
# RUN (THIS WAS MISSING)
# ==============================
if __name__ == "__main__":
    results = process_all()

    if not results:
        print("❌ No results generated")
    else:
        save_results(results)
        print("✅ DONE")    