| import pandas as pd |
| import numpy as np |
| from sklearn.model_selection import train_test_split |
| from sklearn.preprocessing import StandardScaler |
| from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier |
| from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, classification_report |
|
|
| class PredictiveAnalytics: |
| def __init__(self): |
| self.model = None |
| self.scaler = StandardScaler() |
| self.target_column = None |
|
|
| def predict(self, data): |
| |
| self.target_column = data.columns[-1] |
| |
| |
| X = data.drop(columns=[self.target_column]) |
| y = data[self.target_column] |
|
|
| |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
| |
| X_train_scaled = self.scaler.fit_transform(X_train) |
| X_test_scaled = self.scaler.transform(X_test) |
|
|
| |
| if y.dtype == 'object' or len(np.unique(y)) < 10: |
| self.model = RandomForestClassifier(n_estimators=100, random_state=42) |
| is_classification = True |
| else: |
| self.model = RandomForestRegressor(n_estimators=100, random_state=42) |
| is_classification = False |
|
|
| |
| self.model.fit(X_train_scaled, y_train) |
|
|
| |
| y_pred = self.model.predict(X_test_scaled) |
|
|
| |
| if is_classification: |
| accuracy = accuracy_score(y_test, y_pred) |
| report = classification_report(y_test, y_pred) |
| return f"Classification Results:\nAccuracy: {accuracy:.2f}\n\nClassification Report:\n{report}" |
| else: |
| mse = mean_squared_error(y_test, y_pred) |
| r2 = r2_score(y_test, y_pred) |
| return f"Regression Results:\nMean Squared Error: {mse:.2f}\nR-squared Score: {r2:.2f}" |
|
|
| def get_feature_importance(self): |
| if self.model is None: |
| return "Model has not been trained yet." |
| |
| feature_importance = pd.DataFrame({ |
| 'feature': self.model.feature_names_in_, |
| 'importance': self.model.feature_importances_ |
| }).sort_values('importance', ascending=False) |
| |
| return feature_importance |