| | from unittest.mock import patch |
| |
|
| | import pytest |
| | from sklearn.datasets import make_classification |
| | from sklearn.decomposition import PCA |
| | from sklearn.ensemble import RandomForestClassifier |
| | from sklearn.linear_model import LogisticRegression |
| | from sklearn.model_selection import train_test_split |
| |
|
| | from src.classifiers_classic_ml import train_and_evaluate_model, visualize_embeddings |
| |
|
| | |
| | |
| | |
| |
|
| |
|
| | @pytest.fixture |
| | def sample_embedding_data(): |
| | """ |
| | Fixture to create a mock dataset for testing dimensionality reduction and model training. |
| | Returns: |
| | X_train, X_test, y_train, y_test: Training and testing data along with labels. |
| | """ |
| | |
| | X, y = make_classification( |
| | n_samples=20, n_features=6, n_classes=3, random_state=42, n_informative=4 |
| | ) |
| |
|
| | |
| | X_train, X_test, y_train, y_test = train_test_split( |
| | X, y, test_size=0.2, random_state=42 |
| | ) |
| |
|
| | return X_train, X_test, y_train, y_test |
| |
|
| |
|
| | @pytest.mark.parametrize( |
| | "method, plot_type", |
| | [ |
| | ("PCA", "2D"), |
| | ("PCA", "3D"), |
| | ], |
| | ) |
| | def test_visualize_embeddings(method, plot_type, sample_embedding_data): |
| | """ |
| | Test the dimensionality reduction and embedding visualization. |
| | This ensures that PCA can reduce embeddings correctly and produce visualizations. |
| | """ |
| | X_train, X_test, y_train, y_test = sample_embedding_data |
| |
|
| | |
| | with patch("plotly.graph_objs.Figure.show"): |
| | |
| | model = visualize_embeddings( |
| | X_train, X_test, y_train, y_test, plot_type=plot_type, method=method |
| | ) |
| |
|
| | |
| | assert isinstance(model, PCA), "The model should be an instance of PCA" |
| | if plot_type == "2D": |
| | assert model.n_components_ == 2, "PCA should reduce data to 2 components" |
| | elif plot_type == "3D": |
| | assert model.n_components_ == 3, "PCA should reduce data to 3 components" |
| |
|
| |
|
| | def test_train_and_evaluate_model(sample_embedding_data): |
| | """ |
| | Test the training and evaluation of models (Logistic Regression, Random Forest). |
| | Ensures that models are correctly trained and returned in the expected format. |
| | """ |
| | X_train, X_test, y_train, y_test = sample_embedding_data |
| |
|
| | |
| | trained_models = train_and_evaluate_model( |
| | X_train, X_test, y_train, y_test, test=False |
| | ) |
| |
|
| | |
| | assert isinstance(trained_models, list), ( |
| | "The output should be a list of trained models" |
| | ) |
| |
|
| | |
| | assert len(trained_models) >= 2, "At least two models should be trained" |
| |
|
| | |
| | models_instances = [model for _, model in trained_models] |
| | assert any(isinstance(model, LogisticRegression) for model in models_instances), ( |
| | "Logistic Regression model not found" |
| | ) |
| | assert any( |
| | isinstance(model, RandomForestClassifier) for model in models_instances |
| | ), "Random Forest model not found" |
| |
|
| | |
| | for name, model in trained_models: |
| | assert hasattr(model, "fit"), f"{name} should have a fit method" |
| | assert hasattr(model, "predict"), f"{name} should have a predict method" |
| |
|
| | |
| | y_pred = model.predict(X_test) |
| | assert y_pred is not None, f"{name} should have successfully made predictions" |
| |
|
| |
|
| | if __name__ == "__main__": |
| | pytest.main() |
| |
|