jasonxie-rblx's picture
Update app.py
c232d71 verified
raw
history blame
3.43 kB
import os
import logging
from typing import Dict, Any
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
def predict(text: str) -> Dict[str, Any]:
"""Classify text for PII detection."""
if not text or text.strip() == "":
return {"No input provided": 0.0}
logging.info(f"User input: {text}")
try:
# Tokenize input
inputs = tokenizer(
text,
return_tensors="pt",
padding="max_length",
max_length=512,
truncation=True
)
# Run inference
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
probabilities = torch.sigmoid(logits)
probs = probabilities.squeeze().tolist()
# Create results dictionary
results = {
"Asking for PII": float(probs[0]),
"Giving PII": float(probs[1])
}
return results
except Exception as e:
return {"Error": str(e)}
# Example test cases
examples = [
["what's your blue app id?"],
["I live at 901 Roosevelt St, Redwood City"],
["what's you ph0ne rebmun?"],
["yellow gh>>ost app id? let's chat there"],
["let's z0000m?"],
["Let’s meet at the Starbuck close to Stanford"],
]
if __name__ == "__main__":
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[logging.StreamHandler()]
)
# Model configuration
model_id = "Roblox/roblox-pii-classifier"
# Get HF token from Hugging Face Space secrets
# In Spaces, set HF_TOKEN in Settings > Repository secrets
HF_TOKEN = os.getenv("HF_TOKEN")
# Load model and tokenizer
print(f"Loading model: {model_id}")
try:
# Use token if available (required for private models)
if HF_TOKEN:
print("Using HF_TOKEN from environment/secrets")
model = AutoModelForSequenceClassification.from_pretrained(model_id, token=HF_TOKEN)
tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
else:
print("No HF_TOKEN found, attempting without authentication...")
model = AutoModelForSequenceClassification.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
model.eval()
print("Model loaded successfully!")
except Exception as e:
print(f"Failed to load model: {e}")
if not HF_TOKEN:
print("\n⚠️ For private models, you need to set HF_TOKEN as a Space secret:")
print(" 1. Go to your Space Settings")
print(" 2. Add a new secret named 'HF_TOKEN'")
print(" 3. Set your Hugging Face token as the value")
exit(1)
# Create Gradio interface
demo = gr.Interface(
fn=predict,
inputs=gr.Textbox(
lines=3,
placeholder="Enter text to analyze for PII content...",
label="Input Text"
),
outputs=gr.Label(
num_top_classes=2,
label="Classification Results"
),
title="PII Detection Demo",
description="This model detects whether text is asking for or giving personal information (PII).",
examples=examples,
flagging_mode="never",
)
demo.launch()