Spaces:

Infinity-1995
/

Fake-Job-detection

Sleeping

App Files Files Community

Infinity-1995 commited on about 1 month ago

Commit

cb1ca41

verified ·

1 Parent(s): fb39cec

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -25

app.py CHANGED Viewed

@@ -1,42 +1,103 @@
 import gradio as gr
-from transformers import pipeline
 from PIL import Image
 import pytesseract
 from pdf2image import convert_from_bytes
-# Load classifier
-classifier = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english")
-def detect_job(text, file):
     extracted_text = ""
-    if file:
-        filename = file.name if hasattr(file, "name") else "uploaded_file"
-        if filename.endswith(".pdf"):
-            # file is bytes, convert PDF to images
-            images = convert_from_bytes(file.read() if hasattr(file, "read") else file)
-            for img in images:
-                extracted_text += pytesseract.image_to_string(img) + "\n"
         else:
-            img = Image.open(file if hasattr(file, "read") else open(file, "rb"))
             extracted_text = pytesseract.image_to_string(img)
-    full_text = text + "\n" + extracted_text
-    if full_text.strip() == "":
-        return "No text provided!"
-    result = classifier(full_text)
-    label = "Legitimate" if result[0]['label'] == "POSITIVE" else "Suspicious / Fake"
-    score = result[0]['score']
-    return f"Prediction: {label} (Confidence: {score:.2f})"
-# Gradio UI
 iface = gr.Interface(
     fn=detect_job,
     inputs=[
-        gr.Textbox(lines=10, placeholder="Paste job description here..."),
-        gr.File(label="Upload PDF/Image", file_types=[".pdf", ".png", ".jpg", ".jpeg"], type="binary")
     ],
-    outputs="text",
-    title="Fake Job Detector"
 )
-iface.launch()

 import gradio as gr
+from transformers import BertTokenizerFast, BertForSequenceClassification
+import torch
 from PIL import Image
 import pytesseract
 from pdf2image import convert_from_bytes
+import io
+# -------------------------------
+# 1️⃣ Load Hugging Face model
+# -------------------------------
+model_name = "AventIQ-AI/BERT-Spam-Job-Posting-Detection-Model"
+tokenizer = BertTokenizerFast.from_pretrained(model_name)
+model = BertForSequenceClassification.from_pretrained(model_name)
+model.eval()
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+# -------------------------------
+# 2️⃣ Text extraction from files
+# -------------------------------
+def extract_text_from_file(file):
     extracted_text = ""
+    try:
+        if hasattr(file, "read"):
+            file_bytes = file.read()
         else:
+            with open(file, "rb") as f:
+                file_bytes = f.read()
+        if str(file.name).lower().endswith(".pdf"):
+            pages = convert_from_bytes(file_bytes)
+            for page in pages:
+                extracted_text += pytesseract.image_to_string(page)
+        elif str(file.name).lower().endswith((".png", ".jpg", ".jpeg")):
+            img = Image.open(io.BytesIO(file_bytes))
             extracted_text = pytesseract.image_to_string(img)
+        else:
+            extracted_text = file_bytes.decode(errors="ignore")
+    except Exception as e:
+        return f"Error reading file: {e}"
+    return extracted_text
+# -------------------------------
+# 3️⃣ Detection function
+# -------------------------------
+def detect_job(text, file):
+    extracted_text = ""
+    if file:
+        extracted_text = extract_text_from_file(file)
+    if text:
+        extracted_text += " " + text
+    if not extracted_text.strip():
+        return "No text found to classify."
+    # Tokenize and truncate for BERT
+    inputs = tokenizer(
+        extracted_text,
+        return_tensors="pt",
+        truncation=True,
+        padding=True,
+        max_length=128
+    ).to(device)
+    # Model prediction
+    with torch.no_grad():
+        outputs = model(**inputs)
+    logits = outputs.logits
+    prediction = torch.argmax(logits, dim=-1).item()
+    return "Fake" if prediction == 1 else "Legitimate"
+# -------------------------------
+# 4️⃣ Gradio Interface
+# -------------------------------
+css = """
+body { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; background-color: #f7f9fc; color: #333; }
+h1, h2 { color: #1a73e8; text-align: center; margin-bottom: 20px; }
+input, textarea { width: 100%; padding: 12px 15px; margin: 10px 0 20px 0; border: 1px solid #ccc; border-radius: 8px; font-size: 16px; }
+button { background-color: #1a73e8; color: #fff; border: none; padding: 12px 25px; font-size: 16px; border-radius: 8px; cursor: pointer; transition: 0.3s ease; }
+button:hover { background-color: #155ab6; }
+.output { background-color: #f1f3f5; border-left: 4px solid #1a73e8; padding: 15px 20px; border-radius: 8px; font-size: 16px; line-height: 1.5; margin-top: 20px; white-space: pre-wrap; }
+"""
 iface = gr.Interface(
     fn=detect_job,
     inputs=[
+        gr.Textbox(label="Paste Job Description Here", placeholder="Type or paste job text..."),
+        gr.File(label="Upload PDF/Image/Text file")
     ],
+    outputs=gr.Textbox(label="Prediction"),
+    title="AI Fake Job Detector",
+    description="Detect if a job posting is potentially fake or scam using Hugging Face AI model.",
+    css=css
 )
+# -------------------------------
+# 5️⃣ Launch app
+# -------------------------------
+if __name__ == "__main__":
+    iface.launch()