| |
| """ |
| Hugging Face compatible inference for content moderation |
| """ |
| import pickle |
| from huggingface_hub import hf_hub_download |
| from enum import Enum |
|
|
| class AgeMode(Enum): |
| UNDER_13 = "under_13" |
| TEEN_PLUS = "teen_plus" |
|
|
| class ContentLabel(Enum): |
| SAFE = 0 |
| HARASSMENT = 1 |
| SWEARING_REACTION = 2 |
| SWEARING_AGGRESSIVE = 3 |
| HATE_SPEECH = 4 |
| SPAM = 5 |
|
|
| class DualModeFilter: |
| """ |
| Dual-mode content filter for Hugging Face |
| |
| Usage: |
| filter = DualModeFilter("Naymmm/content-moderation-dual-mode") |
| result = filter.check("text here", age=15) |
| """ |
| |
| def __init__(self, repo_id="darwinkernelpanic/moderat", token=None): |
| |
| model_path = hf_hub_download( |
| repo_id=repo_id, |
| filename="moderation_model.pkl", |
| token=token |
| ) |
| |
| |
| with open(model_path, 'rb') as f: |
| self.pipeline = pickle.load(f) |
| |
| self.under_13_blocked = [1, 2, 3, 4, 5] |
| self.teen_plus_blocked = [1, 3, 4, 5] |
| self.label_names = [l.name for l in ContentLabel] |
| |
| def predict(self, text): |
| """Predict label for text""" |
| prediction = self.pipeline.predict([text])[0] |
| probs = self.pipeline.predict_proba([text])[0] |
| confidence = max(probs) |
| return ContentLabel(prediction), confidence |
| |
| def check(self, text, age): |
| """ |
| Check content against age-appropriate filters |
| |
| Args: |
| text: Text to check |
| age: User age (determines strict vs laxed mode) |
| |
| Returns: |
| dict with 'allowed', 'label', 'confidence', 'mode', 'reason' |
| """ |
| label, confidence = self.predict(text) |
| mode = AgeMode.TEEN_PLUS if age >= 13 else AgeMode.UNDER_13 |
| |
| |
| if confidence < 0.5: |
| return { |
| "allowed": True, |
| "label": "UNCERTAIN", |
| "confidence": confidence, |
| "mode": mode.value, |
| "reason": "Low confidence - manual review recommended" |
| } |
| |
| |
| if age >= 13: |
| allowed = label.value not in self.teen_plus_blocked |
| else: |
| allowed = label.value not in self.under_13_blocked |
| |
| reason = "Safe" |
| if not allowed: |
| if label == ContentLabel.SWEARING_REACTION and age >= 13: |
| reason = "Swearing permitted as reaction (13+)" |
| allowed = True |
| else: |
| reason = f"{label.name} detected" |
| |
| return { |
| "allowed": allowed, |
| "label": label.name, |
| "confidence": confidence, |
| "mode": mode.value, |
| "reason": reason |
| } |
|
|
| |
| if __name__ == "__main__": |
| print("Testing Dual-Mode Content Filter") |
| print("="*50) |
| |
| |
| filter_sys = DualModeFilter() |
| |
| tests = [ |
| ("that was a great game", 10), |
| ("that was a great game", 15), |
| ("shit that sucks", 10), |
| ("shit that sucks", 15), |
| ("you're a piece of shit", 15), |
| ("kill yourself", 15), |
| ] |
| |
| for text, age in tests: |
| result = filter_sys.check(text, age) |
| status = "✅ ALLOWED" if result["allowed"] else "❌ BLOCKED" |
| print(f"\nAge {age}: '{text}'") |
| print(f" {status} - {result['reason']}") |
| print(f" Confidence: {result['confidence']:.2f}") |
|
|