Spaces:

iBrokeTheCode
/

Multimodal_Product_Classification

Sleeping

App Files Files Community

Multimodal_Product_Classification / tests /test_classifiers_classic_ml.py

iBrokeTheCode

chore: Add tests cases

43fe501 6 months ago

raw

history blame contribute delete

4.22 kB

	from unittest.mock import patch

	import pytest
	from sklearn.datasets import make_classification
	from sklearn.decomposition import PCA
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.linear_model import LogisticRegression
	from sklearn.model_selection import train_test_split

	from src.classifiers_classic_ml import train_and_evaluate_model, visualize_embeddings

	####################################################################################################
	################################### Test the Classical ML Models ###################################
	####################################################################################################


	@pytest.fixture
	def sample_embedding_data():
	"""
	Fixture to create a mock dataset for testing dimensionality reduction and model training.
	Returns:
	X_train, X_test, y_train, y_test: Training and testing data along with labels.
	"""
	# Create a synthetic dataset with 20 samples, 6 features, and 3 classes
	X, y = make_classification(
	n_samples=20, n_features=6, n_classes=3, random_state=42, n_informative=4
	)

	# Split the dataset into training and test sets (80% train, 20% test)
	X_train, X_test, y_train, y_test = train_test_split(
	X, y, test_size=0.2, random_state=42
	)

	return X_train, X_test, y_train, y_test


	@pytest.mark.parametrize(
	"method, plot_type",
	[
	("PCA", "2D"), # PCA reduction to 2D
	("PCA", "3D"), # PCA reduction to 3D
	],
	)
	def test_visualize_embeddings(method, plot_type, sample_embedding_data):
	"""
	Test the dimensionality reduction and embedding visualization.
	This ensures that PCA can reduce embeddings correctly and produce visualizations.
	"""
	X_train, X_test, y_train, y_test = sample_embedding_data

	# Mock the plotly figures to avoid actual plotting in test environment
	with patch("plotly.graph_objs.Figure.show"):
	# Test the visualize_embeddings function
	model = visualize_embeddings(
	X_train, X_test, y_train, y_test, plot_type=plot_type, method=method
	)

	# Check if the PCA model is an instance of the correct class and has the expected number of components
	assert isinstance(model, PCA), "The model should be an instance of PCA"
	if plot_type == "2D":
	assert model.n_components_ == 2, "PCA should reduce data to 2 components"
	elif plot_type == "3D":
	assert model.n_components_ == 3, "PCA should reduce data to 3 components"


	def test_train_and_evaluate_model(sample_embedding_data):
	"""
	Test the training and evaluation of models (Logistic Regression, Random Forest).
	Ensures that models are correctly trained and returned in the expected format.
	"""
	X_train, X_test, y_train, y_test = sample_embedding_data

	# Train and evaluate the models
	trained_models = train_and_evaluate_model(
	X_train, X_test, y_train, y_test, test=False
	)

	# Verify that trained_models is a list
	assert isinstance(trained_models, list), (
	"The output should be a list of trained models"
	)

	# Check that at least two models were trained (Logistic Regression, Random Forest)
	assert len(trained_models) >= 2, "At least two models should be trained"

	# Check that the models have Logistic Regression and Random Forest
	models_instances = [model for _, model in trained_models]
	assert any(isinstance(model, LogisticRegression) for model in models_instances), (
	"Logistic Regression model not found"
	)
	assert any(
	isinstance(model, RandomForestClassifier) for model in models_instances
	), "Random Forest model not found"

	# Ensure that the trained models are indeed fitted (trained)
	for name, model in trained_models:
	assert hasattr(model, "fit"), f"{name} should have a fit method"
	assert hasattr(model, "predict"), f"{name} should have a predict method"

	# Check if the model is correctly trained by predicting on the test set
	y_pred = model.predict(X_test)
	assert y_pred is not None, f"{name} should have successfully made predictions"


	if __name__ == "__main__":
	pytest.main()