darwinkernelpanic
/

moderat

Text Classification

content-moderation

Model card Files Files and versions

moderat / inference.py

darwinkernelpanic's picture

darwinkernelpanic

Upload inference.py with huggingface_hub

1ac7b0c verified 2 months ago

history blame contribute delete

3.61 kB

	#!/usr/bin/env python3
	"""
	Hugging Face compatible inference for content moderation
	"""
	import pickle
	from huggingface_hub import hf_hub_download
	from enum import Enum

	class AgeMode(Enum):
	UNDER_13 = "under_13"
	TEEN_PLUS = "teen_plus"

	class ContentLabel(Enum):
	SAFE = 0
	HARASSMENT = 1
	SWEARING_REACTION = 2
	SWEARING_AGGRESSIVE = 3
	HATE_SPEECH = 4
	SPAM = 5

	class DualModeFilter:
	"""
	Dual-mode content filter for Hugging Face

	Usage:
	filter = DualModeFilter("Naymmm/content-moderation-dual-mode")
	result = filter.check("text here", age=15)
	"""

	def __init__(self, repo_id="darwinkernelpanic/moderat", token=None):
	# Download model from HF
	model_path = hf_hub_download(
	repo_id=repo_id,
	filename="moderation_model.pkl",
	token=token
	)

	# Load model
	with open(model_path, 'rb') as f:
	self.pipeline = pickle.load(f)

	self.under_13_blocked = [1, 2, 3, 4, 5]
	self.teen_plus_blocked = [1, 3, 4, 5]
	self.label_names = [l.name for l in ContentLabel]

	def predict(self, text):
	"""Predict label for text"""
	prediction = self.pipeline.predict([text])[0]
	probs = self.pipeline.predict_proba([text])[0]
	confidence = max(probs)
	return ContentLabel(prediction), confidence

	def check(self, text, age):
	"""
	Check content against age-appropriate filters

	Args:
	text: Text to check
	age: User age (determines strict vs laxed mode)

	Returns:
	dict with 'allowed', 'label', 'confidence', 'mode', 'reason'
	"""
	label, confidence = self.predict(text)
	mode = AgeMode.TEEN_PLUS if age >= 13 else AgeMode.UNDER_13

	# Low confidence check
	if confidence < 0.5:
	return {
	"allowed": True,
	"label": "UNCERTAIN",
	"confidence": confidence,
	"mode": mode.value,
	"reason": "Low confidence - manual review recommended"
	}

	# Check if blocked for this age
	if age >= 13:
	allowed = label.value not in self.teen_plus_blocked
	else:
	allowed = label.value not in self.under_13_blocked

	reason = "Safe"
	if not allowed:
	if label == ContentLabel.SWEARING_REACTION and age >= 13:
	reason = "Swearing permitted as reaction (13+)"
	allowed = True
	else:
	reason = f"{label.name} detected"

	return {
	"allowed": allowed,
	"label": label.name,
	"confidence": confidence,
	"mode": mode.value,
	"reason": reason
	}

	# Example usage
	if __name__ == "__main__":
	print("Testing Dual-Mode Content Filter")
	print("="*50)

	# Initialize (downloads model from HF)
	filter_sys = DualModeFilter()

	tests = [
	("that was a great game", 10),
	("that was a great game", 15),
	("shit that sucks", 10),
	("shit that sucks", 15),
	("you're a piece of shit", 15),
	("kill yourself", 15),
	]

	for text, age in tests:
	result = filter_sys.check(text, age)
	status = "✅ ALLOWED" if result["allowed"] else "❌ BLOCKED"
	print(f"\nAge {age}: '{text}'")
	print(f" {status} - {result['reason']}")
	print(f" Confidence: {result['confidence']:.2f}")