import gradio as gr from transformers import BertTokenizerFast, BertForSequenceClassification import torch from PIL import Image import pytesseract from pdf2image import convert_from_bytes import io # ------------------------------- # 1️⃣ Load Hugging Face model # ------------------------------- model_name = "AventIQ-AI/BERT-Spam-Job-Posting-Detection-Model" tokenizer = BertTokenizerFast.from_pretrained(model_name) model = BertForSequenceClassification.from_pretrained(model_name) model.eval() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) # ------------------------------- # 2️⃣ Text extraction from files # ------------------------------- def extract_text_from_file(file): extracted_text = "" try: if hasattr(file, "read"): file_bytes = file.read() else: with open(file, "rb") as f: file_bytes = f.read() if str(file.name).lower().endswith(".pdf"): pages = convert_from_bytes(file_bytes) for page in pages: extracted_text += pytesseract.image_to_string(page) elif str(file.name).lower().endswith((".png", ".jpg", ".jpeg")): img = Image.open(io.BytesIO(file_bytes)) extracted_text = pytesseract.image_to_string(img) else: extracted_text = file_bytes.decode(errors="ignore") except Exception as e: return f"Error reading file: {e}" return extracted_text # ------------------------------- # 3️⃣ Detection function # ------------------------------- def detect_job(text, file): extracted_text = "" if file: extracted_text = extract_text_from_file(file) if text: extracted_text += " " + text if not extracted_text.strip(): return "No text found to classify." # Tokenize and truncate for BERT inputs = tokenizer( extracted_text, return_tensors="pt", truncation=True, padding=True, max_length=128 ).to(device) # Model prediction with torch.no_grad(): outputs = model(**inputs) logits = outputs.logits prediction = torch.argmax(logits, dim=-1).item() return "Fake" if prediction == 1 else "Legitimate" # ------------------------------- # 4️⃣ Gradio Interface # ------------------------------- css = """ body { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; background-color: #f7f9fc; color: #333; } h1, h2 { color: #1a73e8; text-align: center; margin-bottom: 20px; } input, textarea { width: 100%; padding: 12px 15px; margin: 10px 0 20px 0; border: 1px solid #ccc; border-radius: 8px; font-size: 16px; } button { background-color: #1a73e8; color: #fff; border: none; padding: 12px 25px; font-size: 16px; border-radius: 8px; cursor: pointer; transition: 0.3s ease; } button:hover { background-color: #155ab6; } .output { background-color: #f1f3f5; border-left: 4px solid #1a73e8; padding: 15px 20px; border-radius: 8px; font-size: 16px; line-height: 1.5; margin-top: 20px; white-space: pre-wrap; } """ iface = gr.Interface( fn=detect_job, inputs=[ gr.Textbox(label="Paste Job Description Here", placeholder="Type or paste job text..."), gr.File(label="Upload PDF/Image/Text file") ], outputs=gr.Textbox(label="Prediction"), title="AI Fake Job Detector", description="Detect if a job posting is potentially fake or scam using Hugging Face AI model.", css=css ) # ------------------------------- # 5️⃣ Launch app # ------------------------------- if __name__ == "__main__": iface.launch()