import gradio as gr
from transformers import BertTokenizerFast, BertForSequenceClassification
import torch
from PIL import Image
import pytesseract
from pdf2image import convert_from_bytes
import io

# -------------------------------
# 1️⃣ Load Hugging Face model
# -------------------------------
model_name = "AventIQ-AI/BERT-Spam-Job-Posting-Detection-Model"
tokenizer = BertTokenizerFast.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)
model.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# -------------------------------
# 2️⃣ Text extraction from files
# -------------------------------
def extract_text_from_file(file):
    extracted_text = ""
    try:
        if hasattr(file, "read"):
            file_bytes = file.read()
        else:
            with open(file, "rb") as f:
                file_bytes = f.read()

        if str(file.name).lower().endswith(".pdf"):
            pages = convert_from_bytes(file_bytes)
            for page in pages:
                extracted_text += pytesseract.image_to_string(page)
        elif str(file.name).lower().endswith((".png", ".jpg", ".jpeg")):
            img = Image.open(io.BytesIO(file_bytes))
            extracted_text = pytesseract.image_to_string(img)
        else:
            extracted_text = file_bytes.decode(errors="ignore")
    except Exception as e:
        return f"Error reading file: {e}"
    return extracted_text

# -------------------------------
# 3️⃣ Detection function
# -------------------------------
def detect_job(text, file):
    extracted_text = ""
    if file:
        extracted_text = extract_text_from_file(file)
    if text:
        extracted_text += " " + text

    if not extracted_text.strip():
        return "No text found to classify."

    # Tokenize and truncate for BERT
    inputs = tokenizer(
        extracted_text,
        return_tensors="pt",
        truncation=True,
        padding=True,
        max_length=128
    ).to(device)

    # Model prediction
    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs.logits
    prediction = torch.argmax(logits, dim=-1).item()

    return "Fake" if prediction == 1 else "Legitimate"

# -------------------------------
# 4️⃣ Gradio Interface
# -------------------------------
css = """
body { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; background-color: #f7f9fc; color: #333; }
h1, h2 { color: #1a73e8; text-align: center; margin-bottom: 20px; }
input, textarea { width: 100%; padding: 12px 15px; margin: 10px 0 20px 0; border: 1px solid #ccc; border-radius: 8px; font-size: 16px; }
button { background-color: #1a73e8; color: #fff; border: none; padding: 12px 25px; font-size: 16px; border-radius: 8px; cursor: pointer; transition: 0.3s ease; }
button:hover { background-color: #155ab6; }
.output { background-color: #f1f3f5; border-left: 4px solid #1a73e8; padding: 15px 20px; border-radius: 8px; font-size: 16px; line-height: 1.5; margin-top: 20px; white-space: pre-wrap; }
"""

iface = gr.Interface(
    fn=detect_job,
    inputs=[
        gr.Textbox(label="Paste Job Description Here", placeholder="Type or paste job text..."),
        gr.File(label="Upload PDF/Image/Text file")
    ],
    outputs=gr.Textbox(label="Prediction"),
    title="AI Fake Job Detector",
    description="Detect if a job posting is potentially fake or scam using Hugging Face AI model.",
    css=css
)

# -------------------------------
# 5️⃣ Launch app
# -------------------------------
if __name__ == "__main__":
    iface.launch()