import os from typing import Dict, Any import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForSequenceClassification def predict(text: str) -> Dict[str, Any]: """Classify text for PII detection.""" if not text or text.strip() == "": return {"No input provided": 0.0} try: # Tokenize input inputs = tokenizer( text, return_tensors="pt", padding="max_length", max_length=512, truncation=True ) # Run inference with torch.no_grad(): outputs = model(**inputs) logits = outputs.logits probabilities = torch.sigmoid(logits) probs = probabilities.squeeze().tolist() # Create results dictionary results = { "Asking for PII": float(probs[0]), "Giving PII": float(probs[1]) } return results except Exception as e: return {"Error": str(e)} # Example test cases examples = [ ["Do you have the blue app?"], ["I live at 901 Roosevelt St, Redwood City"], ] if __name__ == "__main__": # Model configuration model_id = "Roblox/Roblox-PII-Classifier" # Get HF token from Hugging Face Space secrets # In Spaces, set HF_TOKEN in Settings > Repository secrets HF_TOKEN = os.getenv("HF_TOKEN") # Load model and tokenizer print(f"Loading model: {model_id}") try: # Use token if available (required for private models) if HF_TOKEN: print("Using HF_TOKEN from environment/secrets") model = AutoModelForSequenceClassification.from_pretrained(model_id, token=HF_TOKEN) tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN) else: print("No HF_TOKEN found, attempting without authentication...") model = AutoModelForSequenceClassification.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) model.eval() print("Model loaded successfully!") except Exception as e: print(f"Failed to load model: {e}") if not HF_TOKEN: print("\n⚠️ For private models, you need to set HF_TOKEN as a Space secret:") print(" 1. Go to your Space Settings") print(" 2. Add a new secret named 'HF_TOKEN'") print(" 3. Set your Hugging Face token as the value") exit(1) # Create Gradio interface demo = gr.Interface( fn=predict, inputs=gr.Textbox( lines=3, placeholder="Enter text to analyze for PII content...", label="Input Text" ), outputs=gr.Label( num_top_classes=2, label="Classification Results" ), title="PII Detection Demo", description="This model detects whether text is asking for or giving personal information (PII).", examples=examples, flagging_mode="never", ) demo.launch()