jasonxie-rblx's picture
Init
77a9f36 verified
raw
history blame
2.37 kB
from typing import Dict, Any
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
def predict(text: str) -> Dict[str, Any]:
"""Classify text for PII detection."""
if not text or text.strip() == "":
return {"No input provided": 0.0}
try:
# Tokenize input
inputs = tokenizer(
text,
return_tensors="pt",
padding="max_length",
max_length=512,
truncation=True
)
# Run inference
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
probabilities = torch.sigmoid(logits)
probs = probabilities.squeeze().tolist()
# Create results dictionary
results = {
"Asking for PII": float(probs[0]),
"Giving PII": float(probs[1])
}
return results
except Exception as e:
return {"Error": str(e)}
# Example test cases
examples = [
["Do you have the blue app?"],
["I live at 901 Roosevelt St, Redwood City"],
]
if __name__ == "__main__":
# Model configuration
model_id = "Roblox/PII-OSS-Private-Not-Public"
# Load model and tokenizer
# When deployed as a Hugging Face Space in the same organization,
# authentication is handled automatically
print(f"Loading model: {model_id}")
try:
model = AutoModelForSequenceClassification.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
model.eval()
print("Model loaded successfully!")
except Exception as e:
print(f"Failed to load model: {e}")
print("If running locally, you may need to login with: huggingface-cli login")
exit(1)
# Create Gradio interface
demo = gr.Interface(
fn=predict,
inputs=gr.Textbox(
lines=3,
placeholder="Enter text to analyze for PII content...",
label="Input Text"
),
outputs=gr.Label(
num_top_classes=2,
label="Classification Results"
),
title="PII Detection Demo",
description="This model detects whether text is asking for or giving personal information (PII).",
examples=examples,
flagging_mode="never",
)
demo.launch()