import requests
import math
import time
import json
from typing import List, Dict
from concurrent.futures import ThreadPoolExecutor, as_completed

#Change line numbe 156,171
#

# ==============================
# CONFIGURATION
# ==============================
BASE_URL = "https://mygreenqube.com:4003/landRoute/plotListWithDocs"

HEADERS = {
    "information": "paddy_carbon_pr_53158"
}

PAGE_LIMIT = 50
MAX_WORKERS = 10
MAX_RETRIES = 3
OUTPUT_FILE = "formatted_output.json"


# ==============================
# SAFE SPLIT (JL / DAAG)
# ==============================
def safe_split(value):
    if not value:
        return []
    if isinstance(value, str):
        return [x.strip() for x in value.split(",") if x.strip()]
    return [str(value).strip()]


# ==============================
# EXTRACT CO-FARMERS (STRICT)
# ==============================
def extract_cofarmers(row):
    co_farmers = []

    # ---- Main user ----
    land_user = row.get("landUsers")
    if isinstance(land_user, dict):
        name = land_user.get("userName")
        if name:
            name = name.strip()
            if name and name.upper() != "N.A. N.A.":
                co_farmers.append(name)

    # ---- Co-farmers ----
    cof_list = row.get("landCofarmers")

    if isinstance(cof_list, list):
        for co in cof_list:

            if not isinstance(co, dict):
                continue

            cof_info = co.get("coFInfo")

            # Skip if blank / invalid
            if not cof_info or not isinstance(cof_info, dict):
                continue

            name = cof_info.get("userName")

            if not name:
                continue

            name = name.strip()

            # Skip invalid names
            if not name or name.upper() == "N.A. N.A.":
                continue

            co_farmers.append(name)

    # Remove duplicates
    return list(dict.fromkeys(co_farmers))


# ==============================
# FETCH SINGLE PAGE
# ==============================
def fetch_page(page_no: int) -> List[Dict]:
    params = {
        "pageNo": page_no,
        "pageLimit": PAGE_LIMIT,
        "orderBy": 1
    }

    for attempt in range(1, MAX_RETRIES + 1):
        try:
            response = requests.get(BASE_URL, headers=HEADERS, params=params, timeout=30)
            response.raise_for_status()

            data = response.json()
            rows = data.get("data", {}).get("rows", [])

            print(f"✅ Page {page_no} fetched ({len(rows)} rows)")
            return rows

        except Exception as e:
            print(f"[Retry {attempt}] Page {page_no} error: {e}")
            time.sleep(1)

    print(f"❌ Failed page {page_no}")
    return []


# ==============================
# TRANSFORM ROW
# ==============================
def transform_row(row: Dict) -> Dict:

    # ---- JL_No ----
    jl_list = safe_split(row.get("jl_no"))

    # ---- Daag_No ----
    daag_list = safe_split(row.get("daag_no"))

    # Debug missing
    if not jl_list or not daag_list:
        print(f"⚠️ Missing JL/Daag → jl: {row.get('jl_no')} | daag: {row.get('daag_no')}")

    # ---- PDF URLs ----
    pdf_urls = [
        img.get("file_path")
        for img in row.get("plot_img", [])
        if img.get("file_path")
    ]

    # ---- Co-farmers ----
    co_farmers = extract_cofarmers(row)

    return {
        "rel_id": row.get("rel_id"),
        "land_convert_size": row.get("land_convert_size"),
        "JL_No": jl_list,
        "Daag_No": daag_list,
        "pdf_urls": pdf_urls,
        "co-farmers": co_farmers
    }


# ==============================
# FETCH ALL DATA (PARALLEL)
# ==============================
def fetch_all_data_parallel() -> List[Dict]:
    all_rows = []

    print("Fetching first page...")

    response = requests.get(
        BASE_URL,
        headers=HEADERS,
        params={"pageNo": 1, 
                "pageLimit": PAGE_LIMIT, 
                "orderBy": 1},
        timeout=30
    )
    response.raise_for_status()

    first_json = response.json()
    data = first_json.get("data", {})

    count = data.get("count", 0)

    if count == 0:
        print("No data found.")
        return []

    total_pages = math.ceil(count / PAGE_LIMIT)

    print(f"Total records: {count}")
    print(f"Total pages: {total_pages}")
    print(f"Using {MAX_WORKERS} threads...\n")

    # First page
    all_rows.extend(data.get("rows", []))

    # Parallel fetch
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        futures = {
            executor.submit(fetch_page, page_no): page_no
            for page_no in range(2, total_pages + 1)
        }

        for future in as_completed(futures):
            page_no = futures[future]
            try:
                rows = future.result()
                if rows:
                    all_rows.extend(rows)
            except Exception as e:
                print(f"❌ Error on page {page_no}: {e}")

    return all_rows


# ==============================
# MAIN
# ==============================
if __name__ == "__main__":
    start_time = time.time()

    # Step 1: Fetch raw data
    raw_data = fetch_all_data_parallel()

    print("\nTransforming data...")

    # Step 2: Transform
    transformed_data = [transform_row(row) for row in raw_data]

    # Sample output check
    print("\nSample Output:")
    print(json.dumps(transformed_data[0], indent=2))

    # Step 3: Save final JSON
    with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
        json.dump(transformed_data, f, indent=2, ensure_ascii=False)

    print(f"\n💾 Saved {len(transformed_data)} records to {OUTPUT_FILE}")

    end_time = time.time()
    print(f"\n⏱ Execution time: {round(end_time - start_time, 2)} seconds")