Spaces:

Roblox
/

roblox-pii-classifier-demo

Sleeping

App Files Files Community

roblox-pii-classifier-demo / app.py

jasonxie-rblx

Update app.py

c232d71 verified 3 months ago

raw

history blame contribute delete

3.43 kB

	import os
	import logging
	from typing import Dict, Any

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForSequenceClassification


	def predict(text: str) -> Dict[str, Any]:
	"""Classify text for PII detection."""
	if not text or text.strip() == "":
	return {"No input provided": 0.0}
	logging.info(f"User input: {text}")

	try:
	# Tokenize input
	inputs = tokenizer(
	text,
	return_tensors="pt",
	padding="max_length",
	max_length=512,
	truncation=True
	)

	# Run inference
	with torch.no_grad():
	outputs = model(**inputs)
	logits = outputs.logits
	probabilities = torch.sigmoid(logits)
	probs = probabilities.squeeze().tolist()

	# Create results dictionary
	results = {
	"Asking for PII": float(probs[0]),
	"Giving PII": float(probs[1])
	}

	return results

	except Exception as e:
	return {"Error": str(e)}


	# Example test cases
	examples = [
	["what's your blue app id?"],
	["I live at 901 Roosevelt St, Redwood City"],
	["what's you ph0ne rebmun?"],
	["yellow gh>>ost app id? let's chat there"],
	["let's z0000m?"],
	["Let’s meet at the Starbuck close to Stanford"],
	]


	if __name__ == "__main__":
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(levelname)s - %(message)s',
	handlers=[logging.StreamHandler()]
	)

	# Model configuration
	model_id = "Roblox/roblox-pii-classifier"

	# Get HF token from Hugging Face Space secrets
	# In Spaces, set HF_TOKEN in Settings > Repository secrets
	HF_TOKEN = os.getenv("HF_TOKEN")

	# Load model and tokenizer
	print(f"Loading model: {model_id}")
	try:
	# Use token if available (required for private models)
	if HF_TOKEN:
	print("Using HF_TOKEN from environment/secrets")
	model = AutoModelForSequenceClassification.from_pretrained(model_id, token=HF_TOKEN)
	tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
	else:
	print("No HF_TOKEN found, attempting without authentication...")
	model = AutoModelForSequenceClassification.from_pretrained(model_id)
	tokenizer = AutoTokenizer.from_pretrained(model_id)

	model.eval()
	print("Model loaded successfully!")
	except Exception as e:
	print(f"Failed to load model: {e}")
	if not HF_TOKEN:
	print("\n⚠️ For private models, you need to set HF_TOKEN as a Space secret:")
	print(" 1. Go to your Space Settings")
	print(" 2. Add a new secret named 'HF_TOKEN'")
	print(" 3. Set your Hugging Face token as the value")
	exit(1)

	# Create Gradio interface
	demo = gr.Interface(
	fn=predict,
	inputs=gr.Textbox(
	lines=3,
	placeholder="Enter text to analyze for PII content...",
	label="Input Text"
	),
	outputs=gr.Label(
	num_top_classes=2,
	label="Classification Results"
	),
	title="PII Detection Demo",
	description="This model detects whether text is asking for or giving personal information (PII).",
	examples=examples,
	flagging_mode="never",
	)

	demo.launch()