import os import logging from typing import Dict, Any import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForSequenceClassification def predict(text: str) -> Dict[str, Any]: """Classify text for PII detection.""" if not text or text.strip() == "": return {"No input provided": 0.0} logging.info(f"User input: {text}") try: # Tokenize input inputs = tokenizer( text, return_tensors="pt", padding="max_length", max_length=512, truncation=True ) # Run inference with torch.no_grad(): outputs = model(**inputs) logits = outputs.logits probabilities = torch.sigmoid(logits) probs = probabilities.squeeze().tolist() # Create results dictionary results = { "Asking for PII": float(probs[0]), "Giving PII": float(probs[1]) } return results except Exception as e: return {"Error": str(e)} # Example test cases examples = [ ["what's your blue app id?"], ["I live at 901 Roosevelt St, Redwood City"], ["what's you ph0ne rebmun?"], ["yellow gh>>ost app id? let's chat there"], ["let's z0000m?"], ["Let’s meet at the Starbuck close to Stanford"], ] if __name__ == "__main__": logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[logging.StreamHandler()] ) # Model configuration model_id = "Roblox/roblox-pii-classifier" # Get HF token from Hugging Face Space secrets # In Spaces, set HF_TOKEN in Settings > Repository secrets HF_TOKEN = os.getenv("HF_TOKEN") # Load model and tokenizer print(f"Loading model: {model_id}") try: # Use token if available (required for private models) if HF_TOKEN: print("Using HF_TOKEN from environment/secrets") model = AutoModelForSequenceClassification.from_pretrained(model_id, token=HF_TOKEN) tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN) else: print("No HF_TOKEN found, attempting without authentication...") model = AutoModelForSequenceClassification.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) model.eval() print("Model loaded successfully!") except Exception as e: print(f"Failed to load model: {e}") if not HF_TOKEN: print("\n⚠️ For private models, you need to set HF_TOKEN as a Space secret:") print(" 1. Go to your Space Settings") print(" 2. Add a new secret named 'HF_TOKEN'") print(" 3. Set your Hugging Face token as the value") exit(1) # Create Gradio interface demo = gr.Interface( fn=predict, inputs=gr.Textbox( lines=3, placeholder="Enter text to analyze for PII content...", label="Input Text" ), outputs=gr.Label( num_top_classes=2, label="Classification Results" ), title="PII Detection Demo", description="This model detects whether text is asking for or giving personal information (PII).", examples=examples, flagging_mode="never", ) demo.launch()