Spaces:

Divya499
/

ReliabilityPulse

Sleeping

ReliabilityPulse / pipeline /05_evaluation.py

DIVYANSHI SINGH

Final Precision Deployment: Stable UI + Git LFS

27a3018 25 days ago

4.24 kB

	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	import os
	import sys
	import joblib
	from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc, f1_score

	# Add the project root to sys.path to import path_utils
	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	import path_utils

	def evaluate_models():
	# Load data
	preprocessed_path = path_utils.get_processed_data_path('preprocessed_data.pkl')
	if not os.path.exists(preprocessed_path):
	print(f"Error: Preprocessed data not found at {preprocessed_path}")
	return

	data = joblib.load(preprocessed_path)
	X_test = data['X_test']
	y_test = data['y_test']
	feature_names = data['feature_names']
	print("Data loaded for evaluation.")

	# Load models
	models_to_eval = {
	'Logistic Regression': joblib.load(path_utils.get_model_path('logistic_regression.pkl')),
	'SVM': joblib.load(path_utils.get_model_path('svm_model.pkl')),
	'Random Forest': joblib.load(path_utils.get_model_path('random_forest.pkl')),
	'Decision Tree': joblib.load(path_utils.get_model_path('decision_tree.pkl')),
	'XGBoost': joblib.load(path_utils.get_model_path('xgboost_model.pkl')),
	'Isolation Forest': joblib.load(path_utils.get_model_path('isolation_forest.pkl'))
	}

	plt.figure(figsize=(10, 8))

	results = []

	for name, model in models_to_eval.items():
	print(f"\nEvaluating {name}...")

	if name == 'Isolation Forest':
	# Isolation Forest returns -1 for anomaly, 1 for normal
	# Convert to 0 for normal, 1 for anomaly
	preds_raw = model.predict(X_test)
	y_pred = np.where(preds_raw == -1, 1, 0)

	# Anomaly score distribution
	scores = -model.decision_function(X_test) # Higher scores = more anomalous
	plt.figure(figsize=(8, 6))
	sns.histplot(scores, bins=50, kde=True, color='purple')
	plt.title('Anomaly Scores (Isolation Forest)')
	plt.savefig(path_utils.get_output_path('anomaly_scores.png'))
	plt.close()
	else:
	y_pred = model.predict(X_test)

	# ROC Curve components
	if hasattr(model, "predict_proba"):
	y_prob = model.predict_proba(X_test)[:, 1]
	fpr, tpr, _ = roc_curve(y_test, y_prob)
	roc_auc = auc(fpr, tpr)
	plt.plot(fpr, tpr, label=f'{name} (AUC = {roc_auc:.2f})')

	# Metrics
	print(classification_report(y_test, y_pred))
	f1 = f1_score(y_test, y_pred)
	results.append({'Model': name, 'F1-Score': f1})

	# Confusion Matrix for the best model (XGBoost)
	if name == 'XGBoost':
	cm = confusion_matrix(y_test, y_pred)
	plt.figure(figsize=(8, 6))
	sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
	plt.title('Confusion Matrix - XGBoost')
	plt.ylabel('Actual')
	plt.xlabel('Predicted')
	plt.savefig(path_utils.get_output_path('confusion_matrix_xgboost.png'))
	plt.close()

	# Feature Importance
	importances = model.feature_importances_
	feat_imp = pd.Series(importances, index=feature_names).sort_values(ascending=False).head(10)
	plt.figure(figsize=(10, 7))
	feat_imp.plot(kind='barh', color='teal')
	plt.title('Top 10 Feature Importances (XGBoost)')
	plt.savefig(path_utils.get_output_path('feature_importance.png'))
	plt.close()

	# Final ROC Plot formatting
	plt.plot([0, 1], [0, 1], 'k--')
	plt.xlabel('False Positive Rate')
	plt.ylabel('True Positive Rate')
	plt.title('ROC Curve Comparison')
	plt.legend(loc='lower right')
	plt.savefig(path_utils.get_output_path('roc_curve_comparison.png'))
	plt.close()

	# Summary Table
	res_df = pd.DataFrame(results)
	print("\nModel Performance Summary (F1-Score):")
	print(res_df.to_string(index=False))

	print("\nEvaluation completed. All plots saved to 'outputs/' directory.")

	if __name__ == "__main__":
	evaluate_models()