| import pandas as pd |
| import nltk |
| from nltk.sentiment import SentimentIntensityAnalyzer |
| from textblob import TextBlob |
| from transformers import pipeline |
|
|
| class SentimentAnalyzer: |
| def __init__(self): |
| nltk.download('vader_lexicon') |
| self.sia = SentimentIntensityAnalyzer() |
| self.transformer_model = pipeline("sentiment-analysis") |
|
|
| def analyze(self, data, text_column): |
| |
| data['vader_sentiment'] = data[text_column].apply(lambda x: self.sia.polarity_scores(x)['compound']) |
| |
| |
| data['textblob_sentiment'] = data[text_column].apply(lambda x: TextBlob(x).sentiment.polarity) |
| |
| |
| transformer_results = self.transformer_model(data[text_column].tolist()) |
| data['transformer_sentiment'] = [result['score'] if result['label'] == 'POSITIVE' else -result['score'] for result in transformer_results] |
| |
| |
| data['aggregate_sentiment'] = (data['vader_sentiment'] + data['textblob_sentiment'] + data['transformer_sentiment']) / 3 |
| |
| return data |
|
|
| def get_sentiment_summary(self, data): |
| summary = { |
| 'positive': (data['aggregate_sentiment'] > 0.05).sum(), |
| 'neutral': ((data['aggregate_sentiment'] >= -0.05) & (data['aggregate_sentiment'] <= 0.05)).sum(), |
| 'negative': (data['aggregate_sentiment'] < -0.05).sum() |
| } |
| return summary |
|
|
| def plot_sentiment_distribution(self, data): |
| import matplotlib.pyplot as plt |
| import seaborn as sns |
|
|
| fig, ax = plt.subplots(figsize=(10, 6)) |
| sns.histplot(data['aggregate_sentiment'], kde=True, ax=ax) |
| ax.set_title('Distribution of Sentiment Scores') |
| ax.set_xlabel('Sentiment Score') |
| ax.set_ylabel('Frequency') |
| |
| return fig |