Spaces:

Infinity-1995
/

Fake-Job-detection

Sleeping

File size: 3,619 Bytes

1849ee0
cb1ca41
 
a57db7e
 
 
cb1ca41
ff9adfe
cb1ca41
 
 
 
 
 
 
a57db7e
cb1ca41
 
 
 
 
 
 
1849ee0
cb1ca41
 
 
a57db7e
cb1ca41
 
 
 
 
 
 
 
 
1849ee0
cb1ca41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fb39cec
cb1ca41
 
 
 
 
 
 
 
 
 
 
1849ee0
 
 
d0f094c
cb1ca41
 
d0f094c
cb1ca41
 
 
 
1849ee0
 
cb1ca41

import gradio as gr
from transformers import BertTokenizerFast, BertForSequenceClassification
import torch
from PIL import Image
import pytesseract
from pdf2image import convert_from_bytes
import io

# -------------------------------
# 1️⃣ Load Hugging Face model
# -------------------------------
model_name = "AventIQ-AI/BERT-Spam-Job-Posting-Detection-Model"
tokenizer = BertTokenizerFast.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)
model.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# -------------------------------
# 2️⃣ Text extraction from files
# -------------------------------
def extract_text_from_file(file):
    extracted_text = ""
    try:
        if hasattr(file, "read"):
            file_bytes = file.read()
        else:
            with open(file, "rb") as f:
                file_bytes = f.read()

        if str(file.name).lower().endswith(".pdf"):
            pages = convert_from_bytes(file_bytes)
            for page in pages:
                extracted_text += pytesseract.image_to_string(page)
        elif str(file.name).lower().endswith((".png", ".jpg", ".jpeg")):
            img = Image.open(io.BytesIO(file_bytes))
            extracted_text = pytesseract.image_to_string(img)
        else:
            extracted_text = file_bytes.decode(errors="ignore")
    except Exception as e:
        return f"Error reading file: {e}"
    return extracted_text

# -------------------------------
# 3️⃣ Detection function
# -------------------------------
def detect_job(text, file):
    extracted_text = ""
    if file:
        extracted_text = extract_text_from_file(file)
    if text:
        extracted_text += " " + text

    if not extracted_text.strip():
        return "No text found to classify."

    # Tokenize and truncate for BERT
    inputs = tokenizer(
        extracted_text,
        return_tensors="pt",
        truncation=True,
        padding=True,
        max_length=128
    ).to(device)

    # Model prediction
    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs.logits
    prediction = torch.argmax(logits, dim=-1).item()

    return "Fake" if prediction == 1 else "Legitimate"

# -------------------------------
# 4️⃣ Gradio Interface
# -------------------------------
css = """
body { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; background-color: #f7f9fc; color: #333; }
h1, h2 { color: #1a73e8; text-align: center; margin-bottom: 20px; }
input, textarea { width: 100%; padding: 12px 15px; margin: 10px 0 20px 0; border: 1px solid #ccc; border-radius: 8px; font-size: 16px; }
button { background-color: #1a73e8; color: #fff; border: none; padding: 12px 25px; font-size: 16px; border-radius: 8px; cursor: pointer; transition: 0.3s ease; }
button:hover { background-color: #155ab6; }
.output { background-color: #f1f3f5; border-left: 4px solid #1a73e8; padding: 15px 20px; border-radius: 8px; font-size: 16px; line-height: 1.5; margin-top: 20px; white-space: pre-wrap; }
"""

iface = gr.Interface(
    fn=detect_job,
    inputs=[
        gr.Textbox(label="Paste Job Description Here", placeholder="Type or paste job text..."),
        gr.File(label="Upload PDF/Image/Text file")
    ],
    outputs=gr.Textbox(label="Prediction"),
    title="AI Fake Job Detector",
    description="Detect if a job posting is potentially fake or scam using Hugging Face AI model.",
    css=css
)

# -------------------------------
# 5️⃣ Launch app
# -------------------------------
if __name__ == "__main__":
    iface.launch()