| | |
| |
|
| | import gradio as gr |
| | import torch |
| | import torch.nn.functional as F |
| | from transformers import AutoTokenizer, AutoModelForSequenceClassification |
| | import plotly.graph_objects as go |
| | import numpy as np |
| | import os |
| |
|
| | class HateSpeechDetector: |
| | def __init__(self, model_path: str = "sadjava/multilingual-hate-speech-xlm-roberta"): |
| | """Initialize the hate speech detector with a trained model.""" |
| | self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| | print(f"🔧 Using device: {self.device}") |
| | |
| | |
| | try: |
| | self.tokenizer = AutoTokenizer.from_pretrained(model_path) |
| | self.model = AutoModelForSequenceClassification.from_pretrained(model_path) |
| | self.model.to(self.device) |
| | self.model.eval() |
| | print(f"✅ Model loaded successfully from {model_path}") |
| | except Exception as e: |
| | print(f"❌ Error loading model: {e}") |
| | |
| | print("🔄 Falling back to default multilingual model...") |
| | self.tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base") |
| | self.model = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert") |
| | self.model.to(self.device) |
| | self.model.eval() |
| | |
| | |
| | self.categories = [ |
| | "Race", "Sexual Orientation", "Gender", "Physical Appearance", |
| | "Religion", "Class", "Disability", "Appropriate" |
| | ] |
| | |
| | def predict_with_context(self, text: str) -> tuple: |
| | """Predict hate speech category with contextual analysis.""" |
| | if not text.strip(): |
| | return "Please enter some text", 0.0, {}, "" |
| | |
| | try: |
| | |
| | inputs = self.tokenizer( |
| | text, |
| | return_tensors="pt", |
| | truncation=True, |
| | padding=True, |
| | max_length=512, |
| | return_attention_mask=True |
| | ) |
| | |
| | |
| | inputs = {k: v.to(self.device) for k, v in inputs.items()} |
| | |
| | |
| | with torch.no_grad(): |
| | outputs = self.model(**inputs, output_attentions=True) |
| | logits = outputs.logits |
| | attentions = outputs.attentions |
| | |
| | |
| | probabilities = F.softmax(logits, dim=-1) |
| | |
| | |
| | if probabilities.shape[-1] == len(self.categories): |
| | predicted_class = torch.argmax(probabilities, dim=-1).item() |
| | predicted_category = self.categories[predicted_class] |
| | else: |
| | |
| | predicted_class = torch.argmax(probabilities, dim=-1).item() |
| | predicted_category = "Inappropriate" if predicted_class == 1 else "Appropriate" |
| | |
| | prob_inappropriate = float(probabilities[0][1]) if probabilities.shape[-1] > 1 else 0.5 |
| | fake_probs = torch.zeros(len(self.categories)) |
| | fake_probs[-1] = 1 - prob_inappropriate |
| | fake_probs[0] = prob_inappropriate / 7 |
| | for i in range(1, 7): |
| | fake_probs[i] = prob_inappropriate / 7 |
| | probabilities = fake_probs.unsqueeze(0) |
| | |
| | confidence = float(torch.max(probabilities[0])) |
| | |
| | |
| | confidence_chart = self.create_confidence_chart(probabilities[0]) |
| | |
| | |
| | highlighted_html = self.create_word_highlighting(text, inputs, attentions) |
| | |
| | return predicted_category, confidence, confidence_chart, highlighted_html |
| | |
| | except Exception as e: |
| | print(f"Error in prediction: {e}") |
| | return f"Error: {str(e)}", 0.0, {}, "" |
| | |
| | def create_confidence_chart(self, probabilities): |
| | """Create confidence visualization.""" |
| | scores = [float(prob) for prob in probabilities] |
| | colors = ['#ff6b6b' if cat != 'Appropriate' else '#51cf66' for cat in self.categories] |
| | |
| | fig = go.Figure(data=[ |
| | go.Bar( |
| | x=self.categories, |
| | y=scores, |
| | marker_color=colors, |
| | text=[f'{score:.1%}' for score in scores], |
| | textposition='auto', |
| | ) |
| | ]) |
| | |
| | fig.update_layout( |
| | title="Confidence Scores by Category", |
| | xaxis_title="Categories", |
| | yaxis_title="Confidence", |
| | yaxis_range=[0, 1], |
| | height=400, |
| | xaxis_tickangle=-45 |
| | ) |
| | |
| | return fig |
| | |
| | def create_word_highlighting(self, text, inputs, attentions): |
| | """Create word-level importance highlighting.""" |
| | try: |
| | |
| | last_layer_attention = attentions[-1][0] |
| | avg_attention = torch.mean(last_layer_attention, dim=0) |
| | |
| | |
| | token_importance = torch.sum(avg_attention, dim=0).cpu().numpy() |
| | tokens = self.tokenizer.convert_ids_to_tokens(inputs['input_ids'][0]) |
| | |
| | |
| | content_tokens = tokens[1:-1] if len(tokens) > 2 else tokens |
| | content_importance = token_importance[1:-1] if len(token_importance) > 2 else token_importance |
| | |
| | |
| | if len(content_importance) > 1: |
| | importance_norm = (content_importance - content_importance.min()) / (content_importance.max() - content_importance.min() + 1e-8) |
| | importance_norm = np.power(importance_norm, 0.5) |
| | else: |
| | importance_norm = np.array([0.5]) |
| | |
| | |
| | words = text.split() |
| | word_scores = [] |
| | |
| | |
| | token_idx = 0 |
| | for word in words: |
| | word_importance_scores = [] |
| | word_tokens = self.tokenizer.tokenize(word) |
| | |
| | for _ in word_tokens: |
| | if token_idx < len(importance_norm): |
| | word_importance_scores.append(importance_norm[token_idx]) |
| | token_idx += 1 |
| | |
| | if word_importance_scores: |
| | word_score = np.mean(word_importance_scores) |
| | else: |
| | word_score = 0.2 |
| | |
| | word_scores.append(word_score) |
| | |
| | |
| | html_parts = [] |
| | for word, score in zip(words, word_scores): |
| | if score > 0.7: |
| | color = "rgba(220, 53, 69, 0.8)" |
| | elif score > 0.5: |
| | color = "rgba(255, 193, 7, 0.8)" |
| | elif score > 0.3: |
| | color = "rgba(255, 235, 59, 0.6)" |
| | else: |
| | color = "rgba(248, 249, 250, 0.3)" |
| | |
| | html_parts.append( |
| | f'<span style="background-color: {color}; padding: 3px 6px; margin: 2px; ' |
| | f'border-radius: 4px; font-weight: 500; border: 1px solid rgba(0,0,0,0.1);" ' |
| | f'title="Importance: {score:.3f}">{word}</span>' |
| | ) |
| | |
| | return '<div style="line-height: 2.5; font-size: 16px; padding: 10px;">' + ' '.join(html_parts) + '</div>' |
| | |
| | except Exception as e: |
| | return f'<div>Error in highlighting: {str(e)}</div>' |
| |
|
| | |
| | detector = HateSpeechDetector() |
| |
|
| | def analyze_text(text: str): |
| | """Main analysis function with innovations.""" |
| | try: |
| | category, confidence, chart, highlighted = detector.predict_with_context(text) |
| | |
| | if category == "Appropriate": |
| | result = f"✅ **No hate speech detected**\n\nCategory: {category}\nConfidence: {confidence:.1%}" |
| | else: |
| | result = f"⚠️ **Hate speech detected**\n\nCategory: {category}\nConfidence: {confidence:.1%}" |
| | |
| | return result, chart, highlighted |
| | |
| | except Exception as e: |
| | return f"❌ Error: {str(e)}", {}, "" |
| |
|
| | def provide_feedback(text: str, rating: int): |
| | """Simple feedback collection.""" |
| | if not text.strip(): |
| | return "Please analyze some text first!" |
| | return f"✅ Thanks for rating {rating}/5 stars! Feedback helps improve the model." |
| |
|
| | |
| | with gr.Blocks(title="Multilingual Hate Speech Detector", theme=gr.themes.Soft()) as demo: |
| | gr.Markdown(""" |
| | # 🛡️ Multilingual Hate Speech Detector |
| | |
| | **Advanced AI system for detecting hate speech in English and Serbian text** |
| | |
| | 🔬 **Key Innovations:** |
| | - **Contextual Analysis**: See which words influenced the AI's decision |
| | - **Confidence Visualization**: Interactive charts showing prediction confidence across all categories |
| | - **Word-Level Highlighting**: Visual explanation of model attention and focus |
| | - **Multilingual Support**: Trained on English and Serbian hate speech datasets |
| | - **Real-time Processing**: Instant classification with detailed explanations |
| | |
| | 📋 **Categories detected:** Race, Sexual Orientation, Gender, Physical Appearance, Religion, Class, Disability, or Appropriate (no hate speech) |
| | """) |
| | |
| | with gr.Row(): |
| | with gr.Column(): |
| | text_input = gr.Textbox( |
| | label="🔍 Enter text to analyze (English/Serbian)", |
| | placeholder="Type or paste text here for hate speech analysis...", |
| | lines=4, |
| | max_lines=10 |
| | ) |
| | |
| | analyze_btn = gr.Button("🚀 Analyze Text", variant="primary", size="lg") |
| | |
| | gr.Markdown("### 📝 Example Texts") |
| | gr.Examples( |
| | examples=[ |
| | ["I really enjoyed that movie last night! Great acting and storyline."], |
| | ["You people are all the same, always causing problems everywhere you go."], |
| | ["Women just can't drive as well as men, it's basic biology."], |
| | ["That's so gay, this is stupid and makes no sense at all."], |
| | ["Ovaj film je bio odličan, preporučujem svima da ga pogledaju!"], |
| | ["Ti ljudi ne zaslužuju da žive ovde u našoj zemlji."], |
| | ["Hello world! This is a test message for the AI system."], |
| | ["People with disabilities contribute so much to our society."] |
| | ], |
| | inputs=text_input, |
| | label="Click any example to test the system" |
| | ) |
| | |
| | with gr.Column(): |
| | result_output = gr.Markdown(label="🎯 Classification Result") |
| | |
| | gr.Markdown("### ℹ️ How it works") |
| | gr.Markdown(""" |
| | 1. **Input Processing**: Text is tokenized and processed by XLM-RoBERTa |
| | 2. **Classification**: AI predicts hate speech category with confidence scores |
| | 3. **Attention Analysis**: Model attention weights show word importance |
| | 4. **Visual Explanation**: Color highlighting reveals decision factors |
| | """) |
| | |
| | |
| | gr.Markdown("### 📊 **Innovation 1**: Confidence Visualization") |
| | gr.Markdown("*Interactive chart showing model confidence across all hate speech categories*") |
| | confidence_plot = gr.Plot(label="Confidence Distribution") |
| | |
| | |
| | gr.Markdown("### 🌈 **Innovation 2**: Contextual Word Analysis") |
| | gr.Markdown("*Words are highlighted based on their influence on the classification decision*") |
| | gr.Markdown("🔴 **Red**: High influence | 🟠 **Orange**: Medium influence | 🟡 **Yellow**: Low influence | ⚪ **Gray**: Minimal influence") |
| | highlighted_text = gr.HTML(label="Word Importance Analysis") |
| | |
| | |
| | with gr.Accordion("💬 **Innovation 3**: Interactive Feedback System", open=False): |
| | gr.Markdown("**Help improve the AI model by providing your feedback!**") |
| | with gr.Row(): |
| | feedback_rating = gr.Slider(1, 5, step=1, value=3, label="Rate analysis quality (1-5 stars)") |
| | feedback_btn = gr.Button("📝 Submit Feedback") |
| | feedback_output = gr.Textbox(label="Feedback Status", interactive=False) |
| | |
| | |
| | with gr.Accordion("🔧 Technical Details", open=False): |
| | gr.Markdown(""" |
| | **Model Architecture**: XLM-RoBERTa (Cross-lingual Language Model) |
| | **Training Data**: Multilingual hate speech datasets (English + Serbian) |
| | **Categories**: 8 classes including 7 hate speech types + appropriate content |
| | **Attention Mechanism**: Transformer attention weights for explainability |
| | **Deployment**: Hugging Face Spaces with GPU acceleration |
| | """) |
| | |
| | |
| | analyze_btn.click( |
| | fn=analyze_text, |
| | inputs=[text_input], |
| | outputs=[result_output, confidence_plot, highlighted_text] |
| | ) |
| | |
| | feedback_btn.click( |
| | fn=provide_feedback, |
| | inputs=[text_input, feedback_rating], |
| | outputs=[feedback_output] |
| | ) |
| | |
| | |
| | gr.Markdown(""" |
| | --- |
| | **⚡ Powered by**: Transformer Neural Networks | **🌍 Languages**: English, Serbian | **🎯 Accuracy**: High-confidence predictions |
| | |
| | *This AI system is designed for research and educational purposes. Results should be interpreted carefully and human judgment should always be applied for critical decisions.* |
| | """) |
| |
|
| | if __name__ == "__main__": |
| | demo.launch() |