From ad18ddd2dfdf0dcf9413a09e763fe951fb88d957 Mon Sep 17 00:00:00 2001 From: Raghav R V Date: Wed, 10 Jun 2015 19:03:12 +0530 Subject: [PATCH 1/4] ENH/TST Add helpers assert_{same_model|fitted_attributes_equal} TST Add tests for the new assert helpers --- sklearn/utils/testing.py | 218 +++++++++++++++++++++++++++- sklearn/utils/tests/test_testing.py | 110 +++++++++++++- 2 files changed, 316 insertions(+), 12 deletions(-) diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py index 1035c4e7b9a2b..2001d904bbfdd 100644 --- a/sklearn/utils/testing.py +++ b/sklearn/utils/testing.py @@ -19,8 +19,11 @@ import platform import struct -import scipy as sp +import scipy import scipy.io +import scipy.sparse as sp +import numpy as np + from functools import wraps from operator import itemgetter try: @@ -71,9 +74,10 @@ __all__ = ["assert_equal", "assert_not_equal", "assert_raises", "assert_raises_regexp", "raises", "with_setup", "assert_true", "assert_false", "assert_almost_equal", "assert_array_equal", - "assert_array_almost_equal", "assert_array_less", - "assert_less", "assert_less_equal", - "assert_greater", "assert_greater_equal", + "assert_allclose", "assert_array_almost_equal", "assert_array_less", + "assert_less", "assert_less_equal", "assert_greater", + "assert_greater_equal", "assert_same_model", + "assert_not_same_model", "assert_fitted_attributes_almost_equal", "assert_approx_equal"] @@ -383,14 +387,52 @@ def __exit__(self, *exc_info): assert_greater = _assert_greater +def _sparse_dense_allclose(val1, val2, rtol=1e-7, atol=0): + """Check if two objects are close up to the preset tolerance. + + The objects can be scalars, lists, tuples, ndarrays or sparse matrices. + """ + if isinstance(val1, (int, float)) and isinstance(val2, (int, float)): + return np.allclose(float(val1), float(val2), rtol, atol) + + if type(val1) is not type(val2): + return False + + comparables = (float, list, tuple, np.ndarray, sp.spmatrix) + + if not (isinstance(val1, comparables) or isinstance(val2, comparables)): + raise ValueError("The objects, %s and %s, are neither scalar nor " + "array-like." % (val1, val2)) + + # list/tuple (or list/tuple of ndarrays/spmatrices) + if isinstance(val1, (tuple, list)): + if (len(val1) == 0) and (len(val2) == 0): + return True + if len(val1) != len(val2): + return False + while isinstance(val1[0], (tuple, list, np.ndarray, sp.spmatrix)): + return all(_sparse_dense_allclose(val1_i, val2[i], rtol, atol) + for i, val1_i in enumerate(val1)) + # Compare the lists, if they are not nested or singleton + return np.allclose(val1, val2, rtol, atol) + + same_shape = val1.shape == val2.shape + if sp.issparse(val1) or sp.issparse(val2): + return same_shape and np.allclose(val1.toarray(), val2.toarray(), + rtol, atol) + else: + return same_shape and np.allclose(val1, val2, rtol, atol) + + def _assert_allclose(actual, desired, rtol=1e-7, atol=0, err_msg='', verbose=True): actual, desired = np.asanyarray(actual), np.asanyarray(desired) if np.allclose(actual, desired, rtol=rtol, atol=atol): return - msg = ('Array not equal to tolerance rtol=%g, atol=%g: ' - 'actual %s, desired %s') % (rtol, atol, actual, desired) - raise AssertionError(msg) + if err_msg == '': + err_msg = ('Array not equal to tolerance rtol=%g, atol=%g: ' + 'actual %s, desired %s') % (rtol, atol, actual, desired) + raise AssertionError(err_msg) if hasattr(np.testing, 'assert_allclose'): @@ -433,6 +475,166 @@ def assert_raise_message(exceptions, message, function, *args, **kwargs): (names, function.__name__)) +def _assert_same_model_method(method, X, estimator1, estimator2, msg=None): + method_err = '%r\n\nhas %s, but\n\n%r\n\ndoes not.' + # If the method is absent in only one model consider them different + if hasattr(estimator1, method) and not hasattr(estimator2, method): + raise AssertionError(method_err % (estimator1, method, estimator2)) + if hasattr(estimator2, method) and not hasattr(estimator1, method): + raise AssertionError(method_err % estimator2, method, estimator1) + + if not hasattr(estimator1, method): + return + + # Check if the method(X) returns the same for both models. + res1, res2 = getattr(estimator1, method)(X), getattr(estimator2, method)(X) + if not _sparse_dense_allclose(res1, res2): + if msg is None: + msg = ("Models are not equal. \n\n%s method returned different " + "results:\n\n%s\n\n for :\n\n%s and\n\n%s\n\n for :\n\n%s." + % (method, res1, estimator1, res2, estimator2)) + raise AssertionError(msg) + + +def assert_same_model(X, estimator1, estimator2, msg=None): + """Helper function to check if the models are similar. + + The check is done by comparing the outputs of the methods ``predict``, + ``transform``, ``decision_function`` and the ``predict_proba`` provided + they exist in both the models. If any of those methods do not exist in + one model alone, the models are considered different. + + If the outputs from both the models for each of the available above listed + function(s) are similar, a comparison of the attributes of the models + that end with ``_`` is done to ascertain the similarity of the model. + + If the models are different an AssertionError with the given error message + is raised. + + Parameters + ---------- + X : array-like, shape (n_samples, n_features) + Input data, for the fitted models, used for comparing them. + + estimator1 : An estimator object. + The first fitted model to be compared. + + estimator2 : An estimator object. + The second fitted model to be compared. + + msg : str + The error message to be used while raising the AssertionError if the + models are similar. + + Notes + ----- + This check is not exhaustive since all attributes of the model are assumed + to end with ``_``. If that is not the case, it could lead to false + positives. + """ + _assert_same_model_method('predict', X, estimator1, estimator2, msg) + _assert_same_model_method('transform', X, estimator1, estimator2, msg) + _assert_same_model_method('decision_function', + X, estimator1, estimator2, msg) + _assert_same_model_method('predict_proba', X, estimator1, estimator2, msg) + assert_fitted_attributes_almost_equal(estimator1, estimator2) + + +def assert_not_same_model(X, estimator1, estimator2, msg=None): + """Helper function to check if the models are different. + + The check is done by comparing the outputs of the methods ``predict``, + ``transform``, ``decision_function`` and the ``predict_proba``, provided + they exist in both the models. If any of those methods do not exist in + one model alone, the models are considered different. + + If the outputs from both the models for each of the available, above listed + function(s) are similar, a comparison of the attributes of the models + that end with ``_`` is done to ascertain the similarity of the model. + + If the models are similar an AssertionError with the given error message + is raised. + + Parameters + ---------- + X : array-like, shape (n_samples, n_features) + Input data, for the fitted models, used for comparing them. + + estimator1 : An estimator object. + The first fitted model to be compared. + + estimator2 : An estimator object. + The second fitted model to be compared. + + msg : str + The error message to be used while raising the AssertionError if the + models are similar. + + Notes + ----- + This check is not exhaustive since all attributes of the model are assumed + to end with ``_``. If that is not the case, it could lead to false + negatives. + """ + try: + assert_same_model(X, estimator1, estimator2) + except AssertionError: + pass + else: + raise AssertionError(msg) + + +def assert_fitted_attributes_almost_equal(estimator1, estimator2, msg=None): + """Helper function to check if the fitted model attributes are similar. + + This check is done by comparing the attributes from both the models that + end in ``_``. + + If the fitted models attributes are different an AssertionError with the + given error message is raised. + + Parameters + ---------- + estimator1 : An estimator object. + The first fitted model whose attributes are to be compared. + + estimator2 : An estimator object. + The second fitted model whose attributes are to be compared. + + msg : str + The error message to be used while raising the AssertionError, if the + fitted models attributes are different. + + Notes + ----- + This check is not exhaustive since all attributes of the model are assumed + to end with ``_``. If that is not the case, it could lead to false + positives. + """ + est1_dict, est2_dict = estimator1.__dict__, estimator2.__dict__ + assert_array_equal(est1_dict.keys(), est2_dict.keys(), + "The attributes of both the estimators do not match.") + + non_attributes = ("estimators_", "estimator_", "tree_", "base_estimator_", + "random_state_") + for attr in est1_dict: + val1, val2 = est1_dict[attr], est2_dict[attr] + + # Consider keys that end in ``_`` only as attributes. + if (attr.endswith('_') and attr not in non_attributes): + if msg is None: + msg = ("Attributes do not match. \nThe attribute, %s, in " + "estimator1,\n\n%r\n\n is %r and in estimator2," + "\n\n%r\n\n is %r.\n") % (attr, estimator1, val1, + estimator2, val2) + if isinstance(val1, str) and isinstance(val2, str): + attr_similar = val1 == val2 + else: + attr_similar = _sparse_dense_allclose(val1, val2) + if not attr_similar: + raise AssertionError(msg) + + def fake_mldata(columns_dict, dataname, matfile, ordering=None): """Create a fake mldata data set. @@ -465,7 +667,7 @@ def fake_mldata(columns_dict, dataname, matfile, ordering=None): ordering = sorted(list(datasets.keys())) # NOTE: setting up this array is tricky, because of the way Matlab # re-packages 1D arrays - datasets['mldata_descr_ordering'] = sp.empty((1, len(ordering)), + datasets['mldata_descr_ordering'] = np.empty((1, len(ordering)), dtype='object') for i, name in enumerate(ordering): datasets['mldata_descr_ordering'][0, i] = name diff --git a/sklearn/utils/tests/test_testing.py b/sklearn/utils/tests/test_testing.py index ea76333a6eafc..51e7d7c61d2c5 100644 --- a/sklearn/utils/tests/test_testing.py +++ b/sklearn/utils/tests/test_testing.py @@ -1,9 +1,10 @@ import warnings import unittest import sys +import numpy as np +from scipy import sparse as sp from nose.tools import assert_raises - from sklearn.utils.testing import ( _assert_less, _assert_greater, @@ -14,10 +15,16 @@ assert_equal, set_random_state, assert_raise_message, - ignore_warnings) - + ignore_warnings, + assert_safe_sparse_allclose, + assert_same_model, + assert_not_same_model) from sklearn.tree import DecisionTreeClassifier from sklearn.discriminant_analysis import LinearDiscriminantAnalysis +from sklearn.qda import QDA +from sklearn.datasets import make_blobs +from sklearn.svm import LinearSVC +from sklearn.cluster import KMeans try: from nose.tools import assert_less @@ -189,10 +196,105 @@ def context_manager_no_user_multiple_warning(): assert_warns(DeprecationWarning, context_manager_no_user_multiple_warning) +def test_assert_safe_sparse_allclose(): + x = 1e-3 + y = 1e-9 + assert_safe_sparse_allclose(x, y, atol=1) + assert_raises(AssertionError, assert_safe_sparse_allclose, x, y) + + a = sp.csc_matrix(np.array([x, y, x, y])) + b = sp.coo_matrix(np.array([x, y, x, x])) + assert_safe_sparse_allclose(a, b, atol=1) + assert_raises(AssertionError, assert_safe_sparse_allclose, a, b) + + b[-1] = y * (1 + 1e-8) + assert_safe_sparse_allclose(a, b) + assert_raises(AssertionError, assert_safe_sparse_allclose, a, b, + rtol=1e-9) + + assert_safe_sparse_allclose([np.array([(6, 6)]),], [np.array([(10, 10)]),], + rtol=0.5) + assert_raises(AssertionError, assert_safe_sparse_allclose, + [np.array([(6, 6)]),], [np.array([(10, 10)]),], rtol=0.5) + + a = sp.csr_matrix(np.array([np.iinfo(np.int_).min], dtype=np.int_)) + # Should not raise: + assert_allclose(a, a) + + # Test nested lists of scalars + assert_safe_sparse_allclose([(['a', 'bcd'], ['a'])], + [(['a', 'bcd'], ['a'])]) + assert_raises(AssertionError, assert_safe_sparse_allclose, + [(['a', 'bcd'], ['a'])], [(['a', 'bcd'], ['a', 'a'])]) + assert_raises(AssertionError, assert_safe_sparse_allclose, + [(['a', 'bcd'], ['a'])], [(['a', 'bcd'], ['b'])]) + + + # Test the string comparison + assert_safe_sparse_allclose('a', 'a') + assert_safe_sparse_allclose('abcdl', 'abcdl') + assert_raises(AssertionError, assert_safe_sparse_allclose, 'a', 'b') + assert_raises(AssertionError, assert_safe_sparse_allclose, 'aa', 'b') + + # Test numeric comparisons + assert_safe_sparse_allclose(6, np.float64(6)) + assert_safe_sparse_allclose(6, 6.0) + assert_safe_sparse_allclose(7, 7.0) + assert_safe_sparse_allclose(5, np.int32(5)) + + # Make sure you don't get infinite recursion with empty nested lists + x = [] + x.append(x) + assert_safe_sparse_allclose(x, x) + + +def test_assert_same_not_same_model(): + X1, y1 = make_blobs(n_samples=200, n_features=5, center_box=(-200, -150), + centers=2, random_state=0) + X2, y2 = make_blobs(n_samples=100, n_features=5, center_box=(-1, 1), + centers=3, random_state=1) + X3, y3 = make_blobs(n_samples=50, n_features=5, center_box=(-100, -50), + centers=4, random_state=2) + + # Checking both non-transductive and transductive algorithms + # By testing for transductive algorithms we also eventually test + # the assert_fitted_attributes_equal helper. + for Estimator in (LinearSVC, KMeans): + assert_same_model(X3, Estimator(random_state=0).fit(X1, y1), + Estimator(random_state=0).fit(X1, y1)) + assert_raises(AssertionError, assert_not_same_model, X3, + Estimator(random_state=0).fit(X1, y1), + Estimator(random_state=0).fit(X1, y1)) + assert_raises(AssertionError, assert_same_model, X3, + Estimator(random_state=0).fit(X1, y1), + Estimator(random_state=0).fit(X2, y2)) + assert_not_same_model(X3, Estimator(random_state=0).fit(X1, y1), + Estimator(random_state=0).fit(X2, y2)) + + +def test_qda_same_model(): + # NRT to make sure the rotations_ attribute is correctly compared + X = np.array([[0, 0], [-2, -2], [-2, -1], [-1, -1], [-1, -2], + [1, 3], [1, 2], [2, 1], [2, 2]]) + y = np.array([1, 1, 1, 1, 1, 2, 2, 2, 2]) + X1 = np.array([[-3, -1], [-2, 0], [-1, 0], [-11, 0], [0, 0], [1, 0], + [1, 5], [2, 0], [3, 4]]) + y1 = np.array([1, 1, 1, 1, 2, 2, 2, 2, 2]) + X2 = np.array([[-1, -3], [0, -2], [0, -1], [0, -5], [0, 0], [10, 1], + [0, 11], [0, 22], [0, 33]]) + + clf1 = QDA().fit(X, y) + clf2 = QDA().fit(X, y) + assert_same_model(X1, clf1, clf2) + + clf3 = QDA().fit(X1, y1) + assert_not_same_model(X2, clf1, clf3) + + # This class is inspired from numpy 1.7 with an alteration to check # the reset warning filters after calls to assert_warns. # This assert_warns behavior is specific to scikit-learn because -#`clean_warning_registry()` is called internally by assert_warns +# `clean_warning_registry()` is called internally by assert_warns # and clears all previous filters. class TestWarns(unittest.TestCase): def test_warn(self): From 64cefb84dcc9fe6a232cfd2c3e58f440723638ea Mon Sep 17 00:00:00 2001 From: Raghav R V Date: Tue, 16 Jun 2015 22:16:10 +0530 Subject: [PATCH 2/4] TST Add test to check if estimators reset upon fit --- sklearn/utils/estimator_checks.py | 46 ++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index aaf174906f960..ab2dee118556d 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -30,7 +30,8 @@ from sklearn.utils.testing import SkipTest from sklearn.utils.testing import ignore_warnings from sklearn.utils.testing import assert_warns - +from sklearn.utils.testing import assert_same_model +from sklearn.utils.testing import assert_not_same_model from sklearn.base import (clone, ClassifierMixin, RegressorMixin, TransformerMixin, ClusterMixin, BaseEstimator) @@ -107,6 +108,8 @@ def _yield_non_meta_checks(name, Estimator): # Test that estimators can be pickled, and once pickled # give the same answer as before. yield check_estimators_pickle + if name not in ('SpectralEmbedding',): + yield check_estimator_fit_reset def _yield_classifier_checks(name, Classifier): @@ -1553,3 +1556,44 @@ def check_classifiers_regression_target(name, Estimator): e = Estimator() msg = 'Unknown label type: ' assert_raises_regex(ValueError, msg, e.fit, X, y) + + +@ignore_warnings +def check_estimator_fit_reset(name, Estimator): + X1, y1 = make_blobs(n_samples=50, n_features=2, center_box=(-200, -150), + centers=2, random_state=0) + X2, y2 = make_blobs(n_samples=50, n_features=2, center_box=(200, 150), + centers=2, random_state=1) + X3, y3 = make_blobs(n_samples=50, n_features=2, center_box=(-200, 150), + centers=3, random_state=2) + X4, y4 = make_blobs(n_samples=50, n_features=5, center_box=(-200, -150), + centers=2, random_state=0) + X5, y5 = make_blobs(n_samples=50, n_features=5, center_box=(200, 150), + centers=2, random_state=1) + X6, y6 = make_blobs(n_samples=50, n_features=5, center_box=(-200, 150), + centers=3, random_state=2) + + # Some estimators work only on non-negative inputs + if name in ('AdditiveChi2Sampler', 'SkewedChi2Sampler', + 'NMF', 'MultinomialNB', 'ProjectedGradientNMF'): + X1 -= X1.min(), X2, X3, X4, X5, X6 = map(np.fabs, (X1, X2, X3, X4, X5, X6)) + + y1, y2, y3, y4, y5, y6 = map(multioutput_estimator_convert_y_2d, + (name,)*6, (y1, y2, y3, y4, y5, y6)) + estimator_1 = Estimator() + estimator_2 = Estimator() + + set_testing_parameters(estimator_1) + set_testing_parameters(estimator_2) + + set_random_state(estimator_1) + set_random_state(estimator_2) + + assert_not_same_model(X3, estimator_1.fit(X1, y1), estimator_2.fit(X2, y2)) + assert_same_model(X3, estimator_1.fit(X2, y2), estimator_2) + assert_same_model(X2, estimator_1.fit(X1, y1), estimator_2.fit(X1, y1)) + + # Fitting new data with 5 features + assert_not_same_model(X6, estimator_1.fit(X4, y4), estimator_2.fit(X5, y5)) + assert_same_model(X6, estimator_1.fit(X5, y5), estimator_2) + assert_same_model(X5, estimator_1.fit(X4, y4), estimator_2.fit(X4, y4)) From 61e98d320b1214dbbc4610153f56d2f67e9b9430 Mon Sep 17 00:00:00 2001 From: Raghav R V Date: Mon, 12 Oct 2015 15:04:29 +0200 Subject: [PATCH 3/4] FIX Shift the points instead of taking abs to preserve blobiness --- sklearn/utils/estimator_checks.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index ab2dee118556d..504be42eaa230 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -1576,7 +1576,12 @@ def check_estimator_fit_reset(name, Estimator): # Some estimators work only on non-negative inputs if name in ('AdditiveChi2Sampler', 'SkewedChi2Sampler', 'NMF', 'MultinomialNB', 'ProjectedGradientNMF'): - X1 -= X1.min(), X2, X3, X4, X5, X6 = map(np.fabs, (X1, X2, X3, X4, X5, X6)) + X1 -= X1.min() + X2 -= X2.min() + X3 -= X3.min() + X4 -= X4.min() + X5 -= X5.min() + X6 -= X6.min() y1, y2, y3, y4, y5, y6 = map(multioutput_estimator_convert_y_2d, (name,)*6, (y1, y2, y3, y4, y5, y6)) From 35fdeaaddf8ba3ad795c198313e7b3a85e32cba4 Mon Sep 17 00:00:00 2001 From: Raghav R V Date: Thu, 12 Nov 2015 15:42:02 +0100 Subject: [PATCH 4/4] WIP + SCAFFOLD_REMOVE_BEFORE_MERGE --- sklearn/utils/estimator_checks.py | 16 ++-- sklearn/utils/testing.py | 117 ++++++++++++++++------------ sklearn/utils/tests/test_testing.py | 40 ++++++---- 3 files changed, 102 insertions(+), 71 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 504be42eaa230..02e4850cfc4d0 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -77,6 +77,7 @@ def _yield_non_meta_checks(name, Estimator): + """ yield check_estimators_dtypes yield check_fit_score_takes_y yield check_dtype_object @@ -108,6 +109,7 @@ def _yield_non_meta_checks(name, Estimator): # Test that estimators can be pickled, and once pickled # give the same answer as before. yield check_estimators_pickle + """ if name not in ('SpectralEmbedding',): yield check_estimator_fit_reset @@ -202,6 +204,7 @@ def _yield_clustering_checks(name, Clusterer): def _yield_all_checks(name, Estimator): for check in _yield_non_meta_checks(name, Estimator): yield check + """ if issubclass(Estimator, ClassifierMixin): for check in _yield_classifier_checks(name, Estimator): yield check @@ -220,6 +223,7 @@ def _yield_all_checks(name, Estimator): yield check_fit2d_1feature yield check_fit1d_1feature yield check_fit1d_1sample + """ def check_estimator(Estimator): @@ -1574,14 +1578,10 @@ def check_estimator_fit_reset(name, Estimator): centers=3, random_state=2) # Some estimators work only on non-negative inputs - if name in ('AdditiveChi2Sampler', 'SkewedChi2Sampler', - 'NMF', 'MultinomialNB', 'ProjectedGradientNMF'): - X1 -= X1.min() - X2 -= X2.min() - X3 -= X3.min() - X4 -= X4.min() - X5 -= X5.min() - X6 -= X6.min() + if name in ('AdditiveChi2Sampler', 'SkewedChi2Sampler', 'NMF', + 'MultinomialNB', 'ProjectedGradientNMF',): + X1, X2, X3, X4, X5, X6 = map(lambda X: X - X.min(), + (X1, X2, X3, X4, X5, X6)) y1, y2, y3, y4, y5, y6 = map(multioutput_estimator_convert_y_2d, (name,)*6, (y1, y2, y3, y4, y5, y6)) diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py index 2001d904bbfdd..c67df100292da 100644 --- a/sklearn/utils/testing.py +++ b/sklearn/utils/testing.py @@ -78,7 +78,7 @@ "assert_less", "assert_less_equal", "assert_greater", "assert_greater_equal", "assert_same_model", "assert_not_same_model", "assert_fitted_attributes_almost_equal", - "assert_approx_equal"] + "assert_approx_equal", "assert_safe_sparse_allclose"] try: @@ -387,41 +387,72 @@ def __exit__(self, *exc_info): assert_greater = _assert_greater -def _sparse_dense_allclose(val1, val2, rtol=1e-7, atol=0): +if hasattr(np.testing, 'assert_allclose'): + assert_allclose = np.testing.assert_allclose +else: + assert_allclose = _assert_allclose + + +def assert_safe_sparse_allclose(val1, val2, rtol=1e-7, atol=0, msg=None): """Check if two objects are close up to the preset tolerance. The objects can be scalars, lists, tuples, ndarrays or sparse matrices. """ - if isinstance(val1, (int, float)) and isinstance(val2, (int, float)): - return np.allclose(float(val1), float(val2), rtol, atol) + if msg is None: + msg = ("The val1,\n%s\nand val2,\n%s\nare not all close" + % (val1, val2)) + + if isinstance(val1, str) and isinstance(val2, str): + assert_true(val1 == val2, msg=msg) - if type(val1) is not type(val2): - return False + elif np.isscalar(val1) and np.isscalar(val2): + assert_allclose(val1, val2, rtol=rtol, atol=atol, err_msg=msg) - comparables = (float, list, tuple, np.ndarray, sp.spmatrix) + # To allow mixed formats for sparse matrices alone + elif type(val1) is not type(val2) and not ( + sp.issparse(val1) and sp.issparse(val2)): + assert False, msg - if not (isinstance(val1, comparables) or isinstance(val2, comparables)): - raise ValueError("The objects, %s and %s, are neither scalar nor " + elif not (isinstance(val1, (list, tuple, np.ndarray, sp.spmatrix, dict))): + raise ValueError("The objects,\n%s\nand\n%s\n, are neither scalar nor " "array-like." % (val1, val2)) - # list/tuple (or list/tuple of ndarrays/spmatrices) - if isinstance(val1, (tuple, list)): + # list/tuple/dict (of list/tuple/dict...) of ndarrays/spmatrices/scalars + elif isinstance(val1, (tuple, list, dict)): + if isinstance(val1, dict): + val1, val2 = tuple(val1.iteritems()), tuple(val2.iteritems()) if (len(val1) == 0) and (len(val2) == 0): - return True - if len(val1) != len(val2): - return False - while isinstance(val1[0], (tuple, list, np.ndarray, sp.spmatrix)): - return all(_sparse_dense_allclose(val1_i, val2[i], rtol, atol) - for i, val1_i in enumerate(val1)) - # Compare the lists, if they are not nested or singleton - return np.allclose(val1, val2, rtol, atol) - - same_shape = val1.shape == val2.shape - if sp.issparse(val1) or sp.issparse(val2): - return same_shape and np.allclose(val1.toarray(), val2.toarray(), - rtol, atol) + assert True + elif len(val1) != len(val2): + assert False, msg + # nested lists/tuples - [array([5, 6]), array([5, ])] and [[1, 3], ] + # Or ['str',] and ['str',] + elif isinstance(val1[0], (tuple, list, np.ndarray, sp.spmatrix, str)): + # Compare them recursively + for i, val1_i in enumerate(val1): + assert_safe_sparse_allclose(val1_i, val2[i], + rtol=rtol, atol=atol, msg=msg) + # Compare the lists using np.allclose, if they are neither nested nor + # contain strings + else: + assert_allclose(val1, val2, rtol=rtol, atol=atol, err_msg=msg) + + # scipy sparse matrix + elif sp.issparse(val1) or sp.issparse(val2): + # NOTE: ref np.allclose's note for assymetricity in this testing + if val1.shape != val2.shape: + assert False, msg + + diff = abs(val1 - val2) - (rtol * abs(val2)) + assert np.any(diff > atol).size == 0, msg + + # numpy ndarray + elif isinstance(val1, (np.ndarray)): + if val1.shape != val2.shape: + assert False, msg + assert_allclose(val1, val2, rtol=rtol, atol=atol, err_msg=msg) else: - return same_shape and np.allclose(val1, val2, rtol, atol) + assert False, msg def _assert_allclose(actual, desired, rtol=1e-7, atol=0, @@ -435,12 +466,6 @@ def _assert_allclose(actual, desired, rtol=1e-7, atol=0, raise AssertionError(err_msg) -if hasattr(np.testing, 'assert_allclose'): - assert_allclose = np.testing.assert_allclose -else: - assert_allclose = _assert_allclose - - def assert_raise_message(exceptions, message, function, *args, **kwargs): """Helper function to test error messages in exceptions. @@ -488,12 +513,11 @@ def _assert_same_model_method(method, X, estimator1, estimator2, msg=None): # Check if the method(X) returns the same for both models. res1, res2 = getattr(estimator1, method)(X), getattr(estimator2, method)(X) - if not _sparse_dense_allclose(res1, res2): - if msg is None: - msg = ("Models are not equal. \n\n%s method returned different " - "results:\n\n%s\n\n for :\n\n%s and\n\n%s\n\n for :\n\n%s." - % (method, res1, estimator1, res2, estimator2)) - raise AssertionError(msg) + if msg is None: + msg = ("Models are not equal. \n\n%s method returned different " + "results:\n\n%s\n\n for :\n\n%s and\n\n%s\n\n for :\n\n%s." + % (method, res1, estimator1, res2, estimator2)) + assert_safe_sparse_allclose(res1, res2, msg=msg) def assert_same_model(X, estimator1, estimator2, msg=None): @@ -579,9 +603,8 @@ def assert_not_same_model(X, estimator1, estimator2, msg=None): try: assert_same_model(X, estimator1, estimator2) except AssertionError: - pass - else: - raise AssertionError(msg) + return + raise AssertionError(msg) def assert_fitted_attributes_almost_equal(estimator1, estimator2, msg=None): @@ -616,23 +639,21 @@ def assert_fitted_attributes_almost_equal(estimator1, estimator2, msg=None): "The attributes of both the estimators do not match.") non_attributes = ("estimators_", "estimator_", "tree_", "base_estimator_", - "random_state_") + "random_state_", "root_", "label_binarizer_", "loss_") + non_attr_suffixes = ("leaf_",) + for attr in est1_dict: val1, val2 = est1_dict[attr], est2_dict[attr] # Consider keys that end in ``_`` only as attributes. - if (attr.endswith('_') and attr not in non_attributes): + if (attr.endswith('_') and attr not in non_attributes and + not attr.endswith(non_attr_suffixes)): if msg is None: msg = ("Attributes do not match. \nThe attribute, %s, in " "estimator1,\n\n%r\n\n is %r and in estimator2," "\n\n%r\n\n is %r.\n") % (attr, estimator1, val1, estimator2, val2) - if isinstance(val1, str) and isinstance(val2, str): - attr_similar = val1 == val2 - else: - attr_similar = _sparse_dense_allclose(val1, val2) - if not attr_similar: - raise AssertionError(msg) + assert_safe_sparse_allclose(val1, val2, msg=msg) def fake_mldata(columns_dict, dataname, matfile, ordering=None): diff --git a/sklearn/utils/tests/test_testing.py b/sklearn/utils/tests/test_testing.py index 51e7d7c61d2c5..00ba84d942b06 100644 --- a/sklearn/utils/tests/test_testing.py +++ b/sklearn/utils/tests/test_testing.py @@ -3,6 +3,7 @@ import sys import numpy as np from scipy import sparse as sp +from numpy.testing import assert_allclose from nose.tools import assert_raises from sklearn.utils.testing import ( @@ -197,29 +198,26 @@ def context_manager_no_user_multiple_warning(): def test_assert_safe_sparse_allclose(): + # Test Scalars x = 1e-3 y = 1e-9 assert_safe_sparse_allclose(x, y, atol=1) assert_raises(AssertionError, assert_safe_sparse_allclose, x, y) - a = sp.csc_matrix(np.array([x, y, x, y])) - b = sp.coo_matrix(np.array([x, y, x, x])) + # Test Sparse matrices + a = sp.coo_matrix(np.array([x, y, x, y])) + b = sp.csr_matrix(np.array([x, y, x, x])) assert_safe_sparse_allclose(a, b, atol=1) assert_raises(AssertionError, assert_safe_sparse_allclose, a, b) - b[-1] = y * (1 + 1e-8) + b[0, 3] = y * (1 + 1e-8) assert_safe_sparse_allclose(a, b) - assert_raises(AssertionError, assert_safe_sparse_allclose, a, b, - rtol=1e-9) + assert_raises(AssertionError, assert_safe_sparse_allclose, a, b, rtol=1e-9) assert_safe_sparse_allclose([np.array([(6, 6)]),], [np.array([(10, 10)]),], rtol=0.5) assert_raises(AssertionError, assert_safe_sparse_allclose, - [np.array([(6, 6)]),], [np.array([(10, 10)]),], rtol=0.5) - - a = sp.csr_matrix(np.array([np.iinfo(np.int_).min], dtype=np.int_)) - # Should not raise: - assert_allclose(a, a) + [np.array([(6, 6)]),], [np.array([(10, 10)]),]) # Test nested lists of scalars assert_safe_sparse_allclose([(['a', 'bcd'], ['a'])], @@ -229,6 +227,23 @@ def test_assert_safe_sparse_allclose(): assert_raises(AssertionError, assert_safe_sparse_allclose, [(['a', 'bcd'], ['a'])], [(['a', 'bcd'], ['b'])]) + # Test dicts + assert_safe_sparse_allclose({}, {}) + assert_safe_sparse_allclose({'a':'a'}, {'a':'a'}) + dict_1 = {'a':{'b':{'arr':np.array([1, 2, 3]), 'str':'str', 'int':9}}} + dict_2 = {'a':{'b':{'arr':np.array([1, 2, 3]), 'str':'str', 'int':9}}} + assert_safe_sparse_allclose(dict_1, dict_2) + dict_1['a']['b']['arr'] = np.array([2, 2, 3]) + assert_safe_sparse_allclose(dict_1, dict_2, atol=1) + assert_raises(AssertionError, assert_safe_sparse_allclose, dict_1, dict_2) + + # Test nested list of dicts of spmatrices and ndarrays + dict_1['a']['b']['arr1'] = [a, np.array([3, 4.])] + assert_raises(AssertionError, assert_safe_sparse_allclose, dict_1, dict_2, + atol=1) + dict_2['a']['b']['arr1'] = [b, np.array([3, 4.])] + assert_safe_sparse_allclose(dict_1, dict_2, atol=1) + assert_raises(AssertionError, assert_safe_sparse_allclose, dict_1, dict_2) # Test the string comparison assert_safe_sparse_allclose('a', 'a') @@ -242,11 +257,6 @@ def test_assert_safe_sparse_allclose(): assert_safe_sparse_allclose(7, 7.0) assert_safe_sparse_allclose(5, np.int32(5)) - # Make sure you don't get infinite recursion with empty nested lists - x = [] - x.append(x) - assert_safe_sparse_allclose(x, x) - def test_assert_same_not_same_model(): X1, y1 = make_blobs(n_samples=200, n_features=5, center_box=(-200, -150),