Multimodal_Product_Classification / tests /test_classifiers_classic_ml.py
iBrokeTheCode's picture
chore: Add tests cases
43fe501
raw
history blame
4.22 kB
from unittest.mock import patch
import pytest
from sklearn.datasets import make_classification
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from src.classifiers_classic_ml import train_and_evaluate_model, visualize_embeddings
####################################################################################################
################################### Test the Classical ML Models ###################################
####################################################################################################
@pytest.fixture
def sample_embedding_data():
"""
Fixture to create a mock dataset for testing dimensionality reduction and model training.
Returns:
X_train, X_test, y_train, y_test: Training and testing data along with labels.
"""
# Create a synthetic dataset with 20 samples, 6 features, and 3 classes
X, y = make_classification(
n_samples=20, n_features=6, n_classes=3, random_state=42, n_informative=4
)
# Split the dataset into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
return X_train, X_test, y_train, y_test
@pytest.mark.parametrize(
"method, plot_type",
[
("PCA", "2D"), # PCA reduction to 2D
("PCA", "3D"), # PCA reduction to 3D
],
)
def test_visualize_embeddings(method, plot_type, sample_embedding_data):
"""
Test the dimensionality reduction and embedding visualization.
This ensures that PCA can reduce embeddings correctly and produce visualizations.
"""
X_train, X_test, y_train, y_test = sample_embedding_data
# Mock the plotly figures to avoid actual plotting in test environment
with patch("plotly.graph_objs.Figure.show"):
# Test the visualize_embeddings function
model = visualize_embeddings(
X_train, X_test, y_train, y_test, plot_type=plot_type, method=method
)
# Check if the PCA model is an instance of the correct class and has the expected number of components
assert isinstance(model, PCA), "The model should be an instance of PCA"
if plot_type == "2D":
assert model.n_components_ == 2, "PCA should reduce data to 2 components"
elif plot_type == "3D":
assert model.n_components_ == 3, "PCA should reduce data to 3 components"
def test_train_and_evaluate_model(sample_embedding_data):
"""
Test the training and evaluation of models (Logistic Regression, Random Forest).
Ensures that models are correctly trained and returned in the expected format.
"""
X_train, X_test, y_train, y_test = sample_embedding_data
# Train and evaluate the models
trained_models = train_and_evaluate_model(
X_train, X_test, y_train, y_test, test=False
)
# Verify that trained_models is a list
assert isinstance(trained_models, list), (
"The output should be a list of trained models"
)
# Check that at least two models were trained (Logistic Regression, Random Forest)
assert len(trained_models) >= 2, "At least two models should be trained"
# Check that the models have Logistic Regression and Random Forest
models_instances = [model for _, model in trained_models]
assert any(isinstance(model, LogisticRegression) for model in models_instances), (
"Logistic Regression model not found"
)
assert any(
isinstance(model, RandomForestClassifier) for model in models_instances
), "Random Forest model not found"
# Ensure that the trained models are indeed fitted (trained)
for name, model in trained_models:
assert hasattr(model, "fit"), f"{name} should have a fit method"
assert hasattr(model, "predict"), f"{name} should have a predict method"
# Check if the model is correctly trained by predicting on the test set
y_pred = model.predict(X_test)
assert y_pred is not None, f"{name} should have successfully made predictions"
if __name__ == "__main__":
pytest.main()