Infinity-1995's picture
Update app.py
d0f094c verified
raw
history blame
1.3 kB
import gradio as gr
from transformers import pipeline
from PIL import Image
import pytesseract
from pdf2image import convert_from_bytes
# Load classifier
classifier = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english")
def detect_job(text, file):
extracted_text = ""
if file:
filename = file.name
if filename.endswith(".pdf"):
images = convert_from_bytes(file.read())
for img in images:
extracted_text += pytesseract.image_to_string(img) + "\n"
else:
img = Image.open(file)
extracted_text = pytesseract.image_to_string(img)
full_text = text + "\n" + extracted_text
if full_text.strip() == "":
return "No text provided!"
result = classifier(full_text)
label = "Legitimate" if result[0]['label'] == "POSITIVE" else "Suspicious / Fake"
score = result[0]['score']
return f"Prediction: {label} (Confidence: {score:.2f})"
# Gradio UI
iface = gr.Interface(
fn=detect_job,
inputs=[
gr.Textbox(lines=10, placeholder="Paste job description here..."),
gr.File(label="Upload PDF/Image", file_types=[".pdf", ".png", ".jpg", ".jpeg"], type="file")
],
outputs="text",
title="Fake Job Detector"
)
iface.launch()