# from fastapi import FastAPI, Response
# from pydantic import BaseModel
# import easyocr
# import requests
# import re
# import cv2
# import numpy as np
# from rapidfuzz import fuzz
# from pdf2image import convert_from_path
# import io

# app = FastAPI()

# # Initialize EasyOCR reader with English + Bengali
# reader = easyocr.Reader(['en', 'bn'])

# class InputData(BaseModel):
#     input_name: str
#     file: str  # URL to Aadhaar/Voter card image or PDF

# # --- Helpers ---
# def download_file(url: str):
#     resp = requests.get(url)
#     if resp.status_code != 200:
#         return None
#     return resp.content

# def convert_pdf_to_image(pdf_bytes):
#     with open("temp.pdf", "wb") as f:
#         f.write(pdf_bytes)
#     pages = convert_from_path("temp.pdf", dpi=300)
#     img_bytes = io.BytesIO()
#     pages[0].save(img_bytes, format="JPEG")
#     return img_bytes.getvalue()

# def deskew_image(img):
#     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#     edges = cv2.Canny(gray, 50, 150)
#     contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
#     if not contours:
#         return img
#     contours = sorted(contours, key=cv2.contourArea, reverse=True)
#     card_contour = contours[0]
#     epsilon = 0.02 * cv2.arcLength(card_contour, True)
#     approx = cv2.approxPolyDP(card_contour, epsilon, True)
#     if len(approx) == 4:
#         pts = approx.reshape(4, 2)
#         rect = np.zeros((4, 2), dtype="float32")
#         s = pts.sum(axis=1)
#         rect[0] = pts[np.argmin(s)]
#         rect[2] = pts[np.argmax(s)]
#         diff = np.diff(pts, axis=1)
#         rect[1] = pts[np.argmin(diff)]
#         rect[3] = pts[np.argmax(diff)]
#         (tl, tr, br, bl) = rect
#         width = int(max(np.linalg.norm(br - bl), np.linalg.norm(tr - tl)))
#         height = int(max(np.linalg.norm(tr - br), np.linalg.norm(tl - bl)))
#         dst = np.array([[0, 0], [width-1, 0], [width-1, height-1], [0, height-1]], dtype="float32")
#         M = cv2.getPerspectiveTransform(rect, dst)
#         warped = cv2.warpPerspective(img, M, (width, height))
#         return warped
#     return img

# def correct_rotation(img):
#     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#     coords = np.column_stack(np.where(gray > 0))
#     angle = cv2.minAreaRect(coords)[-1]
#     if angle < -45:
#         angle = -(90 + angle)
#     else:
#         angle = -angle
#     (h, w) = img.shape[:2]
#     M = cv2.getRotationMatrix2D((w//2, h//2), angle, 1.0)
#     rotated = cv2.warpAffine(img, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
#     return rotated

# def extract_name_from_image(image_bytes):
#     np_arr = np.frombuffer(image_bytes, np.uint8)
#     img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)

#     # ✅ Preprocess: deskew + rotation correction
#     img = deskew_image(img)
#     img = correct_rotation(img)

#     # OCR full card
#     results = reader.readtext(img, detail=0)
#     print("🔍 OCR Results:", results)

#     candidates = []
#     for line in results:
#         line = line.strip()
#         # Reject DOB/date lines
#         if re.search(r"DOB|জন্মতারিখ|\d{2}/\d{2}/\d{4}", line, re.IGNORECASE):
#             continue
#         # Reject gender/government lines
#         if line.lower() in ["male", "female", "transgender", "government of india"]:
#             continue
#         # Accept alphabetic lines, reject numeric garbage
#         if re.search(r"[A-Za-z]", line) and not re.match(r"^[0-9;=|`]+$", line):
#             candidates.append(line)

#     if not candidates:
#         return None

#     # Pick the longest candidate (most likely full name)
#     return max(candidates, key=len)

# def clean_name(name: str) -> str:
#     if not name:
#         return None
#     name = re.sub(r"DOB.*", "", name, flags=re.IGNORECASE)
#     name = name.replace("\n", " ").strip()
#     name = re.sub(r"\s+", " ", name)
#     tokens = [t for t in name.split() if re.match(r"^[A-Za-z]+$", t)]
#     return " ".join(tokens[:3])

# # --- API ---
# @app.post("/extract_name")
# async def extract_name(data: InputData, response: Response):
#     file_bytes = download_file(data.file)
#     if not file_bytes:
#         response.status_code = 400
#         return {
#             "input_name": data.input_name,
#             "extracted_name": None,
#             "status": "error",
#             "message": "Could not download file"
#         }

#     if data.file.lower().endswith(".pdf"):
#         file_bytes = convert_pdf_to_image(file_bytes)

#     extracted_raw = extract_name_from_image(file_bytes)
#     cleaned_name = clean_name(extracted_raw)

#     if not cleaned_name:
#         response.status_code = 422
#         return {
#             "input_name": data.input_name,
#             "extracted_name": None,
#             "status": "not_detected",
#             "message": "Name could not be detected"
#         }

#     similarity = fuzz.ratio(data.input_name.strip().lower(), cleaned_name.strip().lower())

#     if similarity >= 80:
#         response.status_code = 200
#         status = "matched"
#     else:
#         response.status_code = 422
#         status = "not_matched"

#     return {
#         "input_name": data.input_name,
#         "extracted_name": cleaned_name,
#         "status": status,
#         "similarity": similarity
#     }



from fastapi import FastAPI, Response
from pydantic import BaseModel
import easyocr
import requests
import re
import cv2
import numpy as np
from rapidfuzz import fuzz
from pdf2image import convert_from_path
import io

app = FastAPI()

# Initialize EasyOCR reader once
reader = easyocr.Reader(['en'])

class InputData(BaseModel):
    input_name: str
    file: str  # URL to Aadhaar/Voter card image or PDF

# --- Helpers ---
def download_file(url: str):
    resp = requests.get(url)
    if resp.status_code != 200:
        return None
    return resp.content

def convert_pdf_to_image(pdf_bytes):
    with open("temp.pdf", "wb") as f:
        f.write(pdf_bytes)
    pages = convert_from_path("temp.pdf", dpi=300)
    img_bytes = io.BytesIO()
    pages[0].save(img_bytes, format="JPEG")
    return img_bytes.getvalue()

def crop_aadhaar_region(img):
    h, w = img.shape[:2]
    # Crop narrower band above DOB line, excluding gender
    roi = img[int(h*0.28):int(h*0.38), int(w*0.15):int(w*0.85)]
    cv2.imwrite("debug_roi.jpg", roi)  # Debug: inspect cropped region
    return roi

def detect_card_type(text: str) -> str:
    text_lower = text.lower()
    if "government of india" in text_lower or "vid" in text_lower:
        return "aadhaar"
    if "election commission of india" in text_lower or "elector" in text_lower:
        return "voter"
    return "auto"

def extract_name_from_image(image_bytes):
    np_arr = np.frombuffer(image_bytes, np.uint8)
    img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)

    # First OCR pass on full card
    results_full = reader.readtext(img, detail=0)
    text_full = "\n".join(results_full)

    card_type = detect_card_type(text_full)

    # Aadhaar → crop ROI
    if card_type == "aadhaar":
        roi = crop_aadhaar_region(img)
        results = reader.readtext(roi, detail=0)
    else:
        results = results_full

    text = "\n".join(results)

    # Aadhaar: "Name : <English Name>"
    match = re.search(r"Name[^A-Za-z]*([A-Za-z\s]+)", text, re.IGNORECASE)
    if match:
        candidate = match.group(1).strip()
        if candidate.lower() not in ["male", "female", "transgender", "government of india"]:
            return candidate

    # Voter ID: "Elector's Name : <English Name>"
    match = re.search(r"Elector'?s Name[^A-Za-z]*([A-Za-z\s]+)", text, re.IGNORECASE)
    if match:
        return match.group(1).strip()

    # Fallback: first clean English line
    for line in results:
        line = line.strip()
        if re.match(r"^[A-Za-z\s]+$", line) and len(line.split()) >= 2:
            if line.lower() not in ["male", "female", "transgender", "government of india"]:
                line = re.sub(r"\b(MALE|FEMALE|Transgender)\b", "", line, flags=re.IGNORECASE).strip()
                if len(line.split()) >= 2:
                    return line
    return None

def clean_name(name: str) -> str:
    if not name:
        return None
    name = name.replace("\n", " ").strip()
    name = re.sub(r"\s+", " ", name)
    tokens = [t for t in name.split() if re.match(r"^[A-Za-z]+$", t)]
    return " ".join(tokens[:3])

# --- API ---
@app.post("/extract_name")
async def extract_name(data: InputData, response: Response):
    file_bytes = download_file(data.file)
    if not file_bytes:
        response.status_code = 400
        return {
            "input_name": data.input_name,
            "extracted_name": None,
            "status": "error",
            "message": "Could not download file"
        }

    if data.file.lower().endswith(".pdf"):
        file_bytes = convert_pdf_to_image(file_bytes)

    # ✅ Auto-detection, no card_type argument needed
    extracted_raw = extract_name_from_image(file_bytes)
    cleaned_name = clean_name(extracted_raw)

    if not cleaned_name:
        response.status_code = 422
        return {
            "input_name": data.input_name,
            "extracted_name": None,
            "status": "not_detected",
            "message": "Name could not be detected"
        }

    similarity = fuzz.ratio(data.input_name.strip().lower(), cleaned_name.strip().lower())

    if similarity >= 80:
        response.status_code = 200
        status = "matched"
    else:
        response.status_code = 422
        status = "not_matched"

    return {
        "input_name": data.input_name,
        "extracted_name": cleaned_name,
        "status": status,
        "similarity": similarity
    }