Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import BertTokenizerFast, BertForSequenceClassification | |
| import torch | |
| from PIL import Image | |
| import pytesseract | |
| from pdf2image import convert_from_bytes | |
| import io | |
| # ------------------------------- | |
| # 1️⃣ Load Hugging Face model | |
| # ------------------------------- | |
| model_name = "AventIQ-AI/BERT-Spam-Job-Posting-Detection-Model" | |
| tokenizer = BertTokenizerFast.from_pretrained(model_name) | |
| model = BertForSequenceClassification.from_pretrained(model_name) | |
| model.eval() | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| model.to(device) | |
| # ------------------------------- | |
| # 2️⃣ Text extraction from files | |
| # ------------------------------- | |
| def extract_text_from_file(file): | |
| extracted_text = "" | |
| try: | |
| if hasattr(file, "read"): | |
| file_bytes = file.read() | |
| else: | |
| with open(file, "rb") as f: | |
| file_bytes = f.read() | |
| if str(file.name).lower().endswith(".pdf"): | |
| pages = convert_from_bytes(file_bytes) | |
| for page in pages: | |
| extracted_text += pytesseract.image_to_string(page) | |
| elif str(file.name).lower().endswith((".png", ".jpg", ".jpeg")): | |
| img = Image.open(io.BytesIO(file_bytes)) | |
| extracted_text = pytesseract.image_to_string(img) | |
| else: | |
| extracted_text = file_bytes.decode(errors="ignore") | |
| except Exception as e: | |
| return f"Error reading file: {e}" | |
| return extracted_text | |
| # ------------------------------- | |
| # 3️⃣ Detection function | |
| # ------------------------------- | |
| def detect_job(text, file): | |
| extracted_text = "" | |
| if file: | |
| extracted_text = extract_text_from_file(file) | |
| if text: | |
| extracted_text += " " + text | |
| if not extracted_text.strip(): | |
| return "No text found to classify." | |
| # Tokenize and truncate for BERT | |
| inputs = tokenizer( | |
| extracted_text, | |
| return_tensors="pt", | |
| truncation=True, | |
| padding=True, | |
| max_length=128 | |
| ).to(device) | |
| # Model prediction | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| logits = outputs.logits | |
| prediction = torch.argmax(logits, dim=-1).item() | |
| return "Fake" if prediction == 1 else "Legitimate" | |
| # ------------------------------- | |
| # 4️⃣ Gradio Interface | |
| # ------------------------------- | |
| css = """ | |
| body { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; background-color: #f7f9fc; color: #333; } | |
| h1, h2 { color: #1a73e8; text-align: center; margin-bottom: 20px; } | |
| input, textarea { width: 100%; padding: 12px 15px; margin: 10px 0 20px 0; border: 1px solid #ccc; border-radius: 8px; font-size: 16px; } | |
| button { background-color: #1a73e8; color: #fff; border: none; padding: 12px 25px; font-size: 16px; border-radius: 8px; cursor: pointer; transition: 0.3s ease; } | |
| button:hover { background-color: #155ab6; } | |
| .output { background-color: #f1f3f5; border-left: 4px solid #1a73e8; padding: 15px 20px; border-radius: 8px; font-size: 16px; line-height: 1.5; margin-top: 20px; white-space: pre-wrap; } | |
| """ | |
| iface = gr.Interface( | |
| fn=detect_job, | |
| inputs=[ | |
| gr.Textbox(label="Paste Job Description Here", placeholder="Type or paste job text..."), | |
| gr.File(label="Upload PDF/Image/Text file") | |
| ], | |
| outputs=gr.Textbox(label="Prediction"), | |
| title="AI Fake Job Detector", | |
| description="Detect if a job posting is potentially fake or scam using Hugging Face AI model.", | |
| css=css | |
| ) | |
| # ------------------------------- | |
| # 5️⃣ Launch app | |
| # ------------------------------- | |
| if __name__ == "__main__": | |
| iface.launch() | |