Spaces:
Sleeping
Sleeping
| import os | |
| import logging | |
| from typing import Dict, Any | |
| import gradio as gr | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| def predict(text: str) -> Dict[str, Any]: | |
| """Classify text for PII detection.""" | |
| if not text or text.strip() == "": | |
| return {"No input provided": 0.0} | |
| logging.info(f"User input: {text}") | |
| try: | |
| # Tokenize input | |
| inputs = tokenizer( | |
| text, | |
| return_tensors="pt", | |
| padding="max_length", | |
| max_length=512, | |
| truncation=True | |
| ) | |
| # Run inference | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| logits = outputs.logits | |
| probabilities = torch.sigmoid(logits) | |
| probs = probabilities.squeeze().tolist() | |
| # Create results dictionary | |
| results = { | |
| "Asking for PII": float(probs[0]), | |
| "Giving PII": float(probs[1]) | |
| } | |
| return results | |
| except Exception as e: | |
| return {"Error": str(e)} | |
| # Example test cases | |
| examples = [ | |
| ["what's your blue app id?"], | |
| ["I live at 901 Roosevelt St, Redwood City"], | |
| ["what's you ph0ne rebmun?"], | |
| ["yellow gh>>ost app id? let's chat there"], | |
| ["let's z0000m?"], | |
| ["Let’s meet at the Starbuck close to Stanford"], | |
| ] | |
| if __name__ == "__main__": | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(levelname)s - %(message)s', | |
| handlers=[logging.StreamHandler()] | |
| ) | |
| # Model configuration | |
| model_id = "Roblox/roblox-pii-classifier" | |
| # Get HF token from Hugging Face Space secrets | |
| # In Spaces, set HF_TOKEN in Settings > Repository secrets | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| # Load model and tokenizer | |
| print(f"Loading model: {model_id}") | |
| try: | |
| # Use token if available (required for private models) | |
| if HF_TOKEN: | |
| print("Using HF_TOKEN from environment/secrets") | |
| model = AutoModelForSequenceClassification.from_pretrained(model_id, token=HF_TOKEN) | |
| tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN) | |
| else: | |
| print("No HF_TOKEN found, attempting without authentication...") | |
| model = AutoModelForSequenceClassification.from_pretrained(model_id) | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model.eval() | |
| print("Model loaded successfully!") | |
| except Exception as e: | |
| print(f"Failed to load model: {e}") | |
| if not HF_TOKEN: | |
| print("\n⚠️ For private models, you need to set HF_TOKEN as a Space secret:") | |
| print(" 1. Go to your Space Settings") | |
| print(" 2. Add a new secret named 'HF_TOKEN'") | |
| print(" 3. Set your Hugging Face token as the value") | |
| exit(1) | |
| # Create Gradio interface | |
| demo = gr.Interface( | |
| fn=predict, | |
| inputs=gr.Textbox( | |
| lines=3, | |
| placeholder="Enter text to analyze for PII content...", | |
| label="Input Text" | |
| ), | |
| outputs=gr.Label( | |
| num_top_classes=2, | |
| label="Classification Results" | |
| ), | |
| title="PII Detection Demo", | |
| description="This model detects whether text is asking for or giving personal information (PII).", | |
| examples=examples, | |
| flagging_mode="never", | |
| ) | |
| demo.launch() |