8000 WIP + SCAFFOLD_REMOVE_BEFORE_MERGE · scikit-learn/scikit-learn@35fdeaa · GitHub
[go: up one dir, main page]

Skip to content

Commit 35fdeaa

Browse files
committed
WIP + SCAFFOLD_REMOVE_BEFORE_MERGE
1 parent 61e98d3 commit 35fdeaa

File tree

3 files changed

+102
-71
lines changed

3 files changed

+102
-71
lines changed

sklearn/utils/estimator_checks.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777

7878

7979
def _yield_non_meta_checks(name, Estimator):
80+
"""
8081
yield check_estimators_dtypes
8182
yield check_fit_score_takes_y
8283
yield check_dtype_object
@@ -108,6 +109,7 @@ def _yield_non_meta_checks(name, Estimator):
108109
# Test that estimators can be pickled, and once pickled
109110
# give the same answer as before.
110111
yield check_estimators_pickle
112+
"""
111113
if name not in ('SpectralEmbedding',):
112114
yield check_estimator_fit_reset
113115

@@ -202,6 +204,7 @@ def _yield_clustering_checks(name, Clusterer):
202204
def _yield_all_checks(name, Estimator):
203205
for check in _yield_non_meta_checks(name, Estimator):
204206
yield check
207+
"""
205208
if issubclass(Estimator, ClassifierMixin):
206209
for check in _yield_classifier_checks(name, Estimator):
207210
yield check
@@ -220,6 +223,7 @@ def _yield_all_checks(name, Estimator):
220223
yield check_fit2d_1feature
221224
yield check_fit1d_1feature
222225
yield check_fit1d_1sample
226+
"""
223227

224228

225229
def check_estimator(Estimator):
@@ -1574,14 +1578,10 @@ def check_estimator_fit_reset(name, Estimator):
15741578
centers=3, random_state=2)
15751579

15761580
# Some estimators work only on non-negative inputs
1577-
if name in ('AdditiveChi2Sampler', 'SkewedChi2Sampler',
1578-
'NMF', 'MultinomialNB', 'ProjectedGradientNMF'):
1579-
X1 -= X1.min()
1580-
X2 -= X2.min()
1581-
X3 -= X3.min()
1582-
X4 -= X4.min()
1583-
X5 -= X5.min()
1584-
X6 -= X6.min()
1581+
if name in ('AdditiveChi2Sampler', 'SkewedChi2Sampler', 'NMF',
1582+
'MultinomialNB', 'ProjectedGradientNMF',):
1583+
X1, X2, X3, X4, X5, X6 = map(lambda X: X - X.min(),
1584+
(X1, X2, X3, X4, X5, X6))
15851585

15861586
y1, y2, y3, y4, y5, y6 = map(multioutput_estimator_convert_y_2d,
15871587
(name,)*6, (y1, y2, y3, y4, y5, y6))

sklearn/utils/testing.py

Lines changed: 69 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@
7878
"assert_less", "assert_less_equal", "assert_greater",
7979
"assert_greater_equal", "assert_same_model",
8080
"assert_not_same_model", "assert_fitted_attributes_almost_equal",
81-
"assert_approx_equal"]
81+
"assert_approx_equal", "assert_safe_sparse_allclose"]
8282

8383

8484
try:
@@ -387,41 +387,72 @@ def __exit__(self, *exc_info):
387387
assert_greater = _assert_greater
388388

389389

390-
def _sparse_dense_allclose(val1, val2, rtol=1e-7, atol=0):
390+
if hasattr(np.testing, 'assert_allclose'):
391+
assert_allclose = np.testing.assert_allclose
392+
else:
393+
assert_allclose = _assert_allclose
394+
395+
396+
def assert_safe_sparse_allclose(val1, val2, rtol=1e-7, atol=0, msg=None):
391397
"""Check if two objects are close up to the preset tolerance.
392398
393399
The objects can be scalars, lists, tuples, ndarrays or sparse matrices.
394400
"""
395-
if isinstance(val1, (int, float)) and isinstance(val2, (int, float)):
396-
return np.allclose(float(val1), float(val2), rtol, atol)
401+
if msg is None:
402+
msg = ("The val1,\n%s\nand val2,\n%s\nare not all close"
403+
% (val1, val2))
404+
405+
if isinstance(val1, str) and isinstance(val2, str):
406+
assert_true(val1 == val2, msg=msg)
397407

398-
if type(val1) is not type(val2):
399-
return False
408+
elif np.isscalar(val1) and np.isscalar(val2):
409+
assert_allclose(val1, val2, rtol=rtol, atol=atol, err_msg=msg)
400410

401-
comparables = (float, list, tuple, np.ndarray, sp.spmatrix)
411+
# To allow mixed formats for sparse matrices alone
412+
elif type(val1) is not type(val2) and not (
413+
sp.issparse(val1) and sp.issparse(val2)):
414+
assert False, msg
402415

403-
if not (isinstance(val1, comparables) or isinstance(val2, comparables)):
404-
raise ValueError("The objects, %s and %s, are neither scalar nor "
416+
elif not (isinstance(val1, (list, tuple, np.ndarray, sp.spmatrix, dict))):
417+
raise ValueError("The objects,\n%s\nand\n%s\n, are neither scalar nor "
405418
"array-like." % (val1, val2))
406419

407-
# list/tuple (or list/tuple of ndarrays/spmatrices)
408-
if isinstance(val1, (tuple, list)):
420+
# list/tuple/dict (of list/tuple/dict...) of ndarrays/spmatrices/scalars
421+
elif isinstance(val1, (tuple, list, dict)):
422+
if isinstance(val1, dict):
423+
val1, val2 = tuple(val1.iteritems()), tuple(val2.iteritems())
409424
if (len(val1) == 0) and (len(val2) == 0):
410-
return True
411-
if len(val1) != len(val2):
412-
return False
413-
while isinstance(val1[0], (tuple, list, np.ndarray, sp.spmatrix)):
414-
return all(_sparse_dense_allclose(val1_i, val2[i], rtol, atol)
415-
for i, val1_i in enumerate(val1))
416-
# Compare the lists, if they are not nested or singleton
417-
return np.allclose(val1, val2, rtol, atol)
418-
419-
same_shape = val1.shape == val2.shape
420-
if sp.issparse(val1) or sp.issparse(val2):
421-
return same_shape and np.allclose(val1.toarray(), val2.toarray(),
422-
rtol, atol)
425+
assert True
426+
elif len(val1) != len(val2):
427+
assert False, msg
428+
# nested lists/tuples - [array([5, 6]), array([5, ])] and [[1, 3], ]
429+
# Or ['str',] and ['str',]
430+
elif isinstance(val1[0], (tuple, list, np.ndarray, sp.spmatrix, str)):
431+
# Compare them recursively
432+
for i, val1_i in enumerate(val1):
433+
assert_safe_sparse_allclose(val1_i, val2[i],
434+
rtol=rtol, atol=atol, msg=msg)
435+
# Compare the lists using np.allclose, if they are neither nested nor
436+
# contain strings
437+
else:
438+
assert_allclose(val1, val2, rtol=rtol, atol=atol, err_msg=msg)
439+
440+
# scipy sparse matrix
441+
elif sp.issparse(val1) or sp.issparse(val2):
442+
# NOTE: ref np.allclose's note for assymetricity in this testing
443+
if val1.shape != val2.shape:
444+
assert False, msg
445+
446+
diff = abs(val1 - val2) - (rtol * abs(val2))
447+
assert np.any(diff > atol).size == 0, msg
448+
449+
# numpy ndarray
450+
elif isinstance(val1, (np.ndarray)):
451+
if val1.shape != val2.shape:
452+
assert False, msg
453+
assert_allclose(val1, val2, rtol=rtol, atol=atol, err_msg=msg)
423454
else:
424-
return same_shape and np.allclose(val1, val2, rtol, atol)
455+
assert False, msg
425456

426457

427458
def _assert_allclose(actual, desired, rtol=1e-7, atol=0,
@@ -435,12 +466,6 @@ def _assert_allclose(actual, desired, rtol=1e-7, atol=0,
435466
raise AssertionError(err_msg)
436467

437468

438-
if hasattr(np.testing, 'assert_allclose'):
439-
assert_allclose = np.testing.assert_allclose
440-
else:
441-
assert_allclose = _assert_allclose
442-
443-
444469
def assert_raise_message(exceptions, message, function, *args, **kwargs):
445470
"""Helper function to test error messages in exceptions.
446471
@@ -488,12 +513,11 @@ def _assert_same_model_method(method, X, estimator1, estimator2, msg=None):
488513

489514
# Check if the method(X) returns the same for both models.
490515
res1, res2 = getattr(estimator1, method)(X), getattr(estimator2, method)(X)
491-
if not _sparse_dense_allclose(res1, res2):
492-
if msg is None:
493-
msg = ("Models are not equal. \n\n%s method returned different "
494-
"results:\n\n%s\n\n for :\n\n%s and\n\n%s\n\n for :\n\n%s."
495-
% (method, res1, estimator1, res2, estimator2))
496-
raise AssertionError(msg)
516+
if msg is None:
517+
msg = ("Models are not equal. \n\n%s method returned different "
518+
"results:\n\n%s\n\n for :\n\n%s and\n\n%s\n\n for :\n\n%s."
519+
% (method, res1, estimator1, res2, estimator2))
520+
assert_safe_sparse_allclose(res1, res2, msg=msg)
497521

498522

499523
def assert_same_model(X, estimator1, estimator2, msg=None):
@@ -579,9 +603,8 @@ def assert_not_same_model(X, estimator1, estimator2, msg=None):
579603
try:
580604
assert_same_model(X, estimator1, estimator2)
581605
except AssertionError:
582-
pass
583-
else:
584-
raise AssertionError(msg)
606+
return
607+
raise AssertionError(msg)
585608

586609

587610
def assert_fitted_attributes_almost_equal(estimator1, estimator2, msg=None):
@@ -616,23 +639,21 @@ def assert_fitted_attributes_almost_equal(estimator1, estimator2, msg=None):
616639
"The attributes of both the estimators do not match.")
617640

618641
non_attributes = ("estimators_", "estimator_", "tree_", "base_estimator_",
619-
"random_state_")
642+
"random_state_", "root_", "label_binarizer_", "loss_")
643+
non_attr_suffixes = ("leaf_",)
644+
620645
for attr in est1_dict:
621646
val1, val2 = est1_dict[attr], est2_dict[attr]
622647

623648
# Consider keys that end in ``_`` only as attributes.
624-
if (attr.endswith('_') and attr not in non_attributes):
649+
if (attr.endswith('_') and attr not in non_attributes and
650+
not attr.endswith(non_attr_suffixes)):
625651
if msg is None:
626652
msg = ("Attributes do not match. \nThe attribute, %s, in "
627653
"estimator1,\n\n%r\n\n is %r and in estimator2,"
628654
"\n\n%r\n\n is %r.\n") % (attr, estimator1, val1,
629655
estimator2, val2)
630-
if isinstance(val1, str) and isinstance(val2, str):
631-
attr_similar = val1 == val2
632-
else:
633-
attr_similar = _sparse_dense_allclose(val1, val2)
634-
if not attr_similar:
635-
raise AssertionError(msg)
656+
assert_safe_sparse_allclose(val1, val2, msg=msg)
636657

637658

638659
def fake_mldata(columns_dict, dataname, matfile, ordering=None):

sklearn/utils/tests/test_testing.py

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import sys
44
import numpy as np
55
from scipy import sparse as sp
6+
from numpy.testing import assert_allclose
67

78
from nose.tools import assert_raises
89
from sklearn.utils.testing import (
@@ -197,29 +198,26 @@ def context_manager_no_user_multiple_warning():
197198

198199

199200
def test_assert_safe_sparse_allclose():
201+
# Test Scalars
200202
x = 1e-3
201203
y = 1e-9
202204
assert_safe_sparse_allclose(x, y, atol=1)
203205
assert_raises(AssertionError, assert_safe_sparse_allclose, x, y)
204206

205-
a = sp.csc_matrix(np.array([x, y, x, y]))
206-
b = sp.coo_matrix(np.array([x, y, x, x]))
207+
# Test Sparse matrices
208+
a = sp.coo_matrix(np.array([x, y, x, y]))
209+
b = sp.csr_matrix(np.array([x, y, x, x]))
207210
assert_safe_sparse_allclose(a, b, atol=1)
208211
assert_raises(AssertionError, assert_safe_sparse_allclose, a, b)
209212

210-
b[-1] = y * (1 + 1e-8)
213+
b[0, 3] = y * (1 + 1e-8)
211214
assert_safe_sparse_allclose(a, b)
212-
assert_raises(AssertionError, assert_safe_sparse_allclose, a, b,
213-
rtol=1e-9)
215+
assert_raises(AssertionError, assert_safe_sparse_allclose, a, b, rtol=1e-9)
214216

215217
assert_safe_sparse_allclose([np.array([(6, 6)]),], [np.array([(10, 10)]),],
216218
rtol=0.5)
217219
assert_raises(AssertionError, assert_safe_sparse_allclose,
218-
[np.array([(6, 6)]),], [np.array([(10, 10)]),], rtol=0.5)
219-
220-
a = sp.csr_matrix(np.array([np.iinfo(np.int_).min], dtype=np.int_))
221-
# Should not raise:
222-
assert_allclose(a, a)
220+
[np.array([(6, 6)]),], [np.array([(10, 10)]),])
223221

224222
# Test nested lists of scalars
225223
assert_safe_sparse_allclose([(['a', 'bcd'], ['a'])],
@@ -229,6 +227,23 @@ def test_assert_safe_sparse_allclose():
229227
assert_raises(AssertionError, assert_safe_sparse_allclose,
230228
[(['a', 'bcd'], ['a'])], [(['a', 'bcd'], ['b'])])
231229

230+
# Test dicts
231+
assert_safe_sparse_allclose({}, {})
232+
assert_safe_sparse_allclose({'a':'a'}, {'a':'a'})
233+
dict_1 = {'a':{'b':{'arr':np.array([1, 2, 3]), 'str':'str', 'int':9}}}
234+
dict_2 = {'a':{'b':{'arr':np.array([1, 2, 3]), 'str':'str', 'int':9}}}
235+
assert_safe_sparse_allclose(dict_1, dict_2)
236+
dict_1['a']['b']['arr'] = np.array([2, 2, 3])
237+
assert_safe_sparse_allclose(dict_1, dict_2, atol=1)
238+
assert_raises(AssertionError, assert_safe_sparse_allclose, dict_1, dict_2)
239+
240+
# Test nested list of dicts of spmatrices and ndarrays
241+
dict_1['a']['b']['arr1'] = [a, np.array([3, 4.])]
242+
assert_raises(AssertionError, assert_safe_sparse_allclose, dict_1, dict_2,
243+
atol=1)
244+
dict_2['a']['b']['arr1'] = [b, np.array([3, 4.])]
245+
assert_safe_sparse_allclose(dict_1, dict_2, atol=1)
246+
assert_raises(AssertionError, assert_safe_sparse_allclose, dict_1, dict_2)
232247

233248
# Test the string comparison
234249
assert_safe_sparse_allclose('a', 'a')
@@ -242,11 +257,6 @@ def test_assert_safe_sparse_allclose():
242257
assert_safe_sparse_allclose(7, 7.0)
243258
assert_safe_sparse_allclose(5, np.int32(5))
244259

245-
# Make sure you don't get infinite recursion with empty nested lists
246-
x = []
247-
x.append(x)
248-
assert_safe_sparse_allclose(x, x)
249-
250260

251261
def test_assert_same_not_same_model():
252262
X1, y1 = make_blobs(n_samples=200, n_features=5, center_box=(-200, -150),

0 commit comments

Comments
 (0)
0