Spaces:

Infinity-1995
/

Fake-Job-detection

Sleeping

App Files Files Community

Fake-Job-detection / app.py

Infinity-1995

Update app.py

cb1ca41 verified 29 days ago

raw

history blame contribute delete

3.62 kB

	import gradio as gr
	from transformers import BertTokenizerFast, BertForSequenceClassification
	import torch
	from PIL import Image
	import pytesseract
	from pdf2image import convert_from_bytes
	import io

	# -------------------------------
	# 1️⃣ Load Hugging Face model
	# -------------------------------
	model_name = "AventIQ-AI/BERT-Spam-Job-Posting-Detection-Model"
	tokenizer = BertTokenizerFast.from_pretrained(model_name)
	model = BertForSequenceClassification.from_pretrained(model_name)
	model.eval()

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device)

	# -------------------------------
	# 2️⃣ Text extraction from files
	# -------------------------------
	def extract_text_from_file(file):
	extracted_text = ""
	try:
	if hasattr(file, "read"):
	file_bytes = file.read()
	else:
	with open(file, "rb") as f:
	file_bytes = f.read()

	if str(file.name).lower().endswith(".pdf"):
	pages = convert_from_bytes(file_bytes)
	for page in pages:
	extracted_text += pytesseract.image_to_string(page)
	elif str(file.name).lower().endswith((".png", ".jpg", ".jpeg")):
	img = Image.open(io.BytesIO(file_bytes))
	extracted_text = pytesseract.image_to_string(img)
	else:
	extracted_text = file_bytes.decode(errors="ignore")
	except Exception as e:
	return f"Error reading file: {e}"
	return extracted_text

	# -------------------------------
	# 3️⃣ Detection function
	# -------------------------------
	def detect_job(text, file):
	extracted_text = ""
	if file:
	extracted_text = extract_text_from_file(file)
	if text:
	extracted_text += " " + text

	if not extracted_text.strip():
	return "No text found to classify."

	# Tokenize and truncate for BERT
	inputs = tokenizer(
	extracted_text,
	return_tensors="pt",
	truncation=True,
	padding=True,
	max_length=128
	).to(device)

	# Model prediction
	with torch.no_grad():
	outputs = model(**inputs)
	logits = outputs.logits
	prediction = torch.argmax(logits, dim=-1).item()

	return "Fake" if prediction == 1 else "Legitimate"

	# -------------------------------
	# 4️⃣ Gradio Interface
	# -------------------------------
	css = """
	body { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; background-color: #f7f9fc; color: #333; }
	h1, h2 { color: #1a73e8; text-align: center; margin-bottom: 20px; }
	input, textarea { width: 100%; padding: 12px 15px; margin: 10px 0 20px 0; border: 1px solid #ccc; border-radius: 8px; font-size: 16px; }
	button { background-color: #1a73e8; color: #fff; border: none; padding: 12px 25px; font-size: 16px; border-radius: 8px; cursor: pointer; transition: 0.3s ease; }
	button:hover { background-color: #155ab6; }
	.output { background-color: #f1f3f5; border-left: 4px solid #1a73e8; padding: 15px 20px; border-radius: 8px; font-size: 16px; line-height: 1.5; margin-top: 20px; white-space: pre-wrap; }
	"""

	iface = gr.Interface(
	fn=detect_job,
	inputs=[
	gr.Textbox(label="Paste Job Description Here", placeholder="Type or paste job text..."),
	gr.File(label="Upload PDF/Image/Text file")
	],
	outputs=gr.Textbox(label="Prediction"),
	title="AI Fake Job Detector",
	description="Detect if a job posting is potentially fake or scam using Hugging Face AI model.",
	css=css
	)

	# -------------------------------
	# 5️⃣ Launch app
	# -------------------------------
	if __name__ == "__main__":
	iface.launch()