Infinity-1995's picture
Update app.py
cb1ca41 verified
import gradio as gr
from transformers import BertTokenizerFast, BertForSequenceClassification
import torch
from PIL import Image
import pytesseract
from pdf2image import convert_from_bytes
import io
# -------------------------------
# 1️⃣ Load Hugging Face model
# -------------------------------
model_name = "AventIQ-AI/BERT-Spam-Job-Posting-Detection-Model"
tokenizer = BertTokenizerFast.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# -------------------------------
# 2️⃣ Text extraction from files
# -------------------------------
def extract_text_from_file(file):
extracted_text = ""
try:
if hasattr(file, "read"):
file_bytes = file.read()
else:
with open(file, "rb") as f:
file_bytes = f.read()
if str(file.name).lower().endswith(".pdf"):
pages = convert_from_bytes(file_bytes)
for page in pages:
extracted_text += pytesseract.image_to_string(page)
elif str(file.name).lower().endswith((".png", ".jpg", ".jpeg")):
img = Image.open(io.BytesIO(file_bytes))
extracted_text = pytesseract.image_to_string(img)
else:
extracted_text = file_bytes.decode(errors="ignore")
except Exception as e:
return f"Error reading file: {e}"
return extracted_text
# -------------------------------
# 3️⃣ Detection function
# -------------------------------
def detect_job(text, file):
extracted_text = ""
if file:
extracted_text = extract_text_from_file(file)
if text:
extracted_text += " " + text
if not extracted_text.strip():
return "No text found to classify."
# Tokenize and truncate for BERT
inputs = tokenizer(
extracted_text,
return_tensors="pt",
truncation=True,
padding=True,
max_length=128
).to(device)
# Model prediction
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
prediction = torch.argmax(logits, dim=-1).item()
return "Fake" if prediction == 1 else "Legitimate"
# -------------------------------
# 4️⃣ Gradio Interface
# -------------------------------
css = """
body { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; background-color: #f7f9fc; color: #333; }
h1, h2 { color: #1a73e8; text-align: center; margin-bottom: 20px; }
input, textarea { width: 100%; padding: 12px 15px; margin: 10px 0 20px 0; border: 1px solid #ccc; border-radius: 8px; font-size: 16px; }
button { background-color: #1a73e8; color: #fff; border: none; padding: 12px 25px; font-size: 16px; border-radius: 8px; cursor: pointer; transition: 0.3s ease; }
button:hover { background-color: #155ab6; }
.output { background-color: #f1f3f5; border-left: 4px solid #1a73e8; padding: 15px 20px; border-radius: 8px; font-size: 16px; line-height: 1.5; margin-top: 20px; white-space: pre-wrap; }
"""
iface = gr.Interface(
fn=detect_job,
inputs=[
gr.Textbox(label="Paste Job Description Here", placeholder="Type or paste job text..."),
gr.File(label="Upload PDF/Image/Text file")
],
outputs=gr.Textbox(label="Prediction"),
title="AI Fake Job Detector",
description="Detect if a job posting is potentially fake or scam using Hugging Face AI model.",
css=css
)
# -------------------------------
# 5️⃣ Launch app
# -------------------------------
if __name__ == "__main__":
iface.launch()