8000 WIP + SCAFFOLD_REMOVE_BEFORE_MERGE · scikit-learn/scikit-learn@7f8b5a5 · GitHub
[go: up one dir, main page]

Skip to content

Commit 7f8b5a5

Browse files
committed
WIP + SCAFFOLD_REMOVE_BEFORE_MERGE
1 parent 2832fe7 commit 7f8b5a5

File tree

3 files changed

+102
-71
lines changed

3 files changed

+102
-71
lines changed

sklearn/utils/estimator_checks.py

Lines changed: 8 additions & 8 deletions
< 10000 tr class="diff-line-row">
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676

7777

7878
def _yield_non_meta_checks(name, Estimator):
79+
"""
7980
yield check_estimators_dtypes
8081
yield check_fit_score_takes_y
8182
yield check_dtype_object
@@ -107,6 +108,7 @@ def _yield_non_meta_checks(name, Estimator):
107108
# Test that estimators can be pickled, and once pickled
108109
# give the same answer as before.
109110
yield check_estimators_pickle
111+
"""
110112
if name not in ('SpectralEmbedding',):
111113
yield check_estimator_fit_reset
112114

@@ -199,6 +201,7 @@ def _yield_clustering_checks(name, Clusterer):
199201
def _yield_all_checks(name, Estimator):
200202
for check in _yield_non_meta_checks(name, Estimator):
201203
yield check
204+
"""
202205
if issubclass(Estimator, ClassifierMixin):
203206
for check in _yield_classifier_checks(name, Estimator):
204207
yield check
@@ -217,6 +220,7 @@ def _yield_all_checks(name, Estimator):
217220
yield check_fit2d_1feature
218221
yield check_fit1d_1feature
219222
yield check_fit1d_1sample
223+
"""
220224

221225

222226
def check_estimator(Estimator):
@@ -1566,14 +1570,10 @@ def check_estimator_fit_reset(name, Estimator):
15661570
centers=3, random_state=2)
15671571

15681572
# Some estimators work only on non-negative inputs
1569-
if name in ('AdditiveChi2Sampler', 'SkewedChi2Sampler',
1570-
'NMF', 'MultinomialNB', 'ProjectedGradientNMF'):
1571-
X1 -= X1.min()
1572-
X2 -= X2.min()
1573-
X3 -= X3.min()
1574-
X4 -= X4.min()
1575-
X5 -= X5.min()
1576-
X6 -= X6.min()
1573+
if name in ('AdditiveChi2Sampler', 'SkewedChi2Sampler', 'NMF',
1574+
'MultinomialNB', 'ProjectedGradientNMF',):
1575+
X1, X2, X3, X4, X5, X6 = map(lambda X: X - X.min(),
1576+
(X1, X2, X3, X4, X5, X6))
15771577

15781578
y1, y2, y3, y4, y5, y6 = map(multioutput_estimator_convert_y_2d,
15791579
(name,)*6, (y1, y2, y3, y4, y5, y6))

sklearn/utils/testing.py

Lines changed: 69 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@
7777
"assert_less", "assert_less_equal", "assert_greater",
7878
"assert_greater_equal", "assert_same_model",
7979
"assert_not_same_model", "assert_fitted_attributes_almost_equal",
80-
"assert_approx_equal"]
80+
"assert_approx_equal", "assert_safe_sparse_allclose"]
8181

8282

8383
try:
@@ -394,41 +394,72 @@ def __exit__(self, *exc_info):
394394
assert_greater = _assert_greater
395395

396396

397-
def _sparse_dense_allclose(val1, val2, rtol=1e-7, atol=0):
397+
if hasattr(np.testing, 'assert_allclose'):
398+
assert_allclose = np.testing.assert_allclose
399+
else:
400+
assert_allclose = _assert_allclose
401+
402+
403+
def assert_safe_sparse_allclose(val1, val2, rtol=1e-7, atol=0, msg=None):
398404
"""Check if two objects are close up to the preset tolerance.
399405
400406
The objects can be scalars, lists, tuples, ndarrays or sparse matrices.
401407
"""
402-
if isinstance(val1, (int, float)) and isinstance(val2, (int, float)):
403-
return np.allclose(float(val1), float(val2), rtol, atol)
408+
if msg is None:
409+
msg = ("The val1,\n%s\nand val2,\n%s\nare not all close"
410+
% (val1, val2))
411+
412+
if isinstance(val1, str) and isinstance(val2, str):
413+
assert_true(val1 == val2, msg=msg)
404414

405-
if type(val1) is not type(val2):
406-
return False
415+
elif np.isscalar(val1) and np.isscalar(val2):
416+
assert_allclose(val1, val2, rtol=rtol, atol=atol, err_msg=msg)
407417

408-
comparables = (float, list, tuple, np.ndarray, sp.spmatrix)
418+
# To allow mixed formats for sparse matrices alone
419+
elif type(val1) is not type(val2) and not (
420+
sp.issparse(val1) and sp.issparse(val2)):
421+
assert False, msg
409422

410-
if not (isinstance(val1, comparables) or isinstance(val2, comparables)):
411-
raise ValueError("The objects, %s and %s, are neither scalar nor "
423+
elif not (isinstance(val1, (list, tuple, np.ndarray, sp.spmatrix, dict))):
424+
raise ValueError("The objects,\n%s\nand\n%s\n, are neither scalar nor "
412425
"array-like." % (val1, val2))
413426

414-
# list/tuple (or list/tuple of ndarrays/spmatrices)
415-
if isinstance(val1, (tuple, list)):
427+
# list/tuple/dict (of list/tuple/dict...) of ndarrays/spmatrices/scalars
428+
elif isinstance(val1, (tuple, list, dict)):
429+
if isinstance(val1, dict):
430+
val1, val2 = tuple(val1.iteritems()), tuple(val2.iteritems())
416431
if (len(val1) == 0) and (len(val2) == 0):
417-
return True
418-
if len(val1) != len(val2):
419-
return False
420-
while isinstance(val1[0], (tuple, list, np.ndarray, sp.spmatrix)):
421-
return all(_sparse_dense_allclose(val1_i, val2[i], rtol, atol)
422-
for i, val1_i in enumerate(val1))
423-
# Compare the lists, if they are not nested or singleton
424-
return np.allclose(val1, val2, rtol, atol)
425-
426-
same_shape = val1.shape == val2.shape
427-
if sp.issparse(val1) or sp.issparse(val2):
428-
return same_shape and np.allclose(val1.toarray(), val2.toarray(),
429-
rtol, atol)
432+
assert True
433+
elif len(val1) != len(val2):
434+
assert False, msg
435+
# nested lists/tuples - [array([5, 6]), array([5, ])] and [[1, 3], ]
436+
# Or ['str',] and ['str',]
437+
elif isinstance(val1[0], (tuple, list, np.ndarray, sp.spmatrix, str)):
438+
# Compare them recursively
439+
for i, val1_i in enumerate(val1):
440+
assert_safe_sparse_allclose(val1_i, val2[i],
441+
rtol=rtol, atol=atol, msg=msg)
442+
# Compare the lists using np.allclose, if they are neither nested nor
443+
# contain strings
444+
else:
445+
assert_allclose(val1, val2, rtol=rtol, atol=atol, err_msg=msg)
446+
447+
# scipy sparse matrix
448+
elif sp.issparse(val1) or sp.issparse(val2):
449+
# NOTE: ref np.allclose's note for assymetricity in this testing
450+
if val1.shape != val2.shape:
451+
assert False, msg
452+
453+
diff = abs(val1 - val2) - (rtol * abs(val2))
454+
assert np.any(diff > atol).size == 0, msg
455+
456+
# numpy ndarray
457+
elif isinstance(val1, (np.ndarray)):
458+
if val1.shape != val2.shape:
459+
assert False, msg
460+
assert_allclose(val1, val2, rtol=rtol, atol=atol, err_msg=msg)
430461
else:
431-
return same_shape and np.allclose(val1, val2, rtol, atol)
462+
assert False, msg
432463

433464

434465
def _assert_allclose(actual, desired, rtol=1e-7, atol=0,
@@ -442,12 +473,6 @@ def _assert_allclose(actual, desired, rtol=1e-7, atol=0,
442473
raise AssertionError(err_msg)
443474

444475

445-
if hasattr(np.testing, 'assert_allclose'):
446-
assert_allclose = np.testing.assert_allclose
447-
else:
448-
assert_allclose = _assert_allclose
449-
450-
451476
def assert_raise_message(exceptions, message, function, *args, **kwargs):
452477
"""Helper function to test error messages in exceptions
453478
@@ -495,12 +520,11 @@ def _assert_same_model_method(method, X, estimator1, estimator2, msg=None):
495520

496521
# Check if the method(X) returns the same for both models.
497522
res1, res2 = getattr(estimator1, method)(X), getattr(estimator2, method)(X)
498-
if not _sparse_dense_allclose(res1, res2):
499-
if msg is None:
500-
msg = ("Models are not equal. \n\n%s method returned different "
501-
"results:\n\n%s\n\n for :\n\n%s and\n\n%s\n\n for :\n\n%s."
502-
% (method, res1, estimator1, res2, estimator2))
503-
raise AssertionError(msg)
523+
if msg is None:
524+
msg = ("Models are not equal. \n\n%s method returned different "
525+
"results:\n\n%s\n\n for :\n\n%s and\n\n%s\n\n for :\n\n%s."
526+
% (method, res1, estimator1, res2, estimator2))
527+
assert_safe_sparse_allclose(res1, res2, msg=msg)
504528

505529

506530
def assert_same_model(X, estimator1, estimator2, msg=None):
@@ -586,9 +610,8 @@ def assert_not_same_model(X, estimator1, estimator2, msg=None):
586610
try:
587611
assert_same_model(X, estimator1, estimator2)
588612
except AssertionError:
589-
pass
590-
else:
591-
raise AssertionError(msg)
613+
return
614+
raise AssertionError(msg)
592615

593616

594617
def assert_fitted_attributes_almost_equal(estimator1, estimator2, msg=None):
@@ -623,23 +646,21 @@ def assert_fitted_attributes_almost_equal(estimator1, estimator2, msg=None):
623646
"The attributes of both the estimators do not match.")
624647

625648
non_attributes = ("estimators_", "estimator_", "tree_", "base_estimator_",
626-
"random_state_")
649+
"random_state_", "root_", "label_binarizer_", "loss_")
650+
non_attr_suffixes = ("leaf_",)
651+
627652
for attr in est1_dict:
628653
val1, val2 = est1_dict[attr], est2_dict[attr]
629654

630655
# Consider keys that end in ``_`` only as attributes.
631-
if (attr.endswith('_') and attr not in non_attributes):
656+
if (attr.endswith('_') and attr not in non_attributes and
657+
not attr.endswith(non_attr_suffixes)):
632658
if msg is None:
633659
msg = ("Attributes do not match. \nThe attribute, %s, in "
634660
"estimator1,\n\n%r\n\n is %r and in estimator2,"
635661
"\n\n%r\n\n is %r.\n") % (attr, estimator1, val1,
636662
estimator2, val2)
637-
if isinstance(val1, str) and isinstance(val2, str):
638-
attr_similar = val1 == val2
639-
else:
640-
attr_similar = _sparse_dense_allclose(val1, val2)
641-
if not attr_similar:
642-
raise AssertionError(msg)
663+
assert_safe_sparse_allclose(val1, val2, msg=msg)
643664

644665

645666
def fake_mldata(columns_dict, dataname, matfile, ordering=None):

sklearn/utils/tests/test_testing.py

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import sys
44
import numpy as np
55
from scipy import sparse as sp
6+
from numpy.testing import assert_allclose
67

78
from nose.tools import assert_raises
89
from sklearn.utils.testing import (
@@ -104,29 +105,26 @@ def _no_raise():
104105

105106

106107
def test_assert_safe_sparse_allclose():
108+
# Test Scalars
107109
x = 1e-3
108110
y = 1e-9
109111
assert_safe_sparse_allclose(x, y, atol=1)
110112
assert_raises(AssertionError, assert_safe_sparse_allclose, x, y)
111113

112-
a = sp.csc_matrix(np.array([x, y, x, y]))
113-
b = sp.coo_matrix(np.array([x, y, x, x]))
114+
# Test Sparse matrices
115+
a = sp.coo_matrix(np.array([x, y, x, y]))
116+
b = sp.csr_matrix(np.array([x, y, x, x]))
114117
assert_safe_sparse_allclose(a, b, atol=1)
115118
assert_raises(AssertionError, assert_safe_sparse_allclose, a, b)
116119

117-
b[-1] = y * (1 + 1e-8)
120+
b[0, 3] = y * (1 + 1e-8)
118121
assert_safe_sparse_allclose(a, b)
119-
assert_raises(AssertionError, assert_safe_sparse_allclose, a, b,
120-
rtol=1e-9)
122+
assert_raises(AssertionError, assert_safe_sparse_allclose, a, b, rtol=1e-9)
121123

122124
assert_safe_sparse_allclose([np.array([(6, 6)]),], [np.array([(10, 10)]),],
123125
rtol=0.5)
124126
assert_raises(AssertionError, assert_safe_sparse_allclose,
125-
[np.array([(6, 6)]),], [np.array([(10, 10)]),], rtol=0.5)
126-
127-
a = sp.csr_matrix(np.array([np.iinfo(np.int_).min], dtype=np.int_))
128-
# Should not raise:
129-
assert_allclose(a, a)
127+
[np.array([(6, 6)]),], [np.array([(10, 10)]),])
130128

131129
# Test nested lists of scalars
132130
assert_safe_sparse_allclose([(['a', 'bcd'], ['a'])],
@@ -136,6 +134,23 @@ def test_assert_safe_sparse_allclose():
136134
assert_raises(AssertionError, assert_safe_sparse_allclose,
137135
[(['a', 'bcd'], ['a'])], [(['a', 'bcd'], ['b'])])
138136

137+
# Test dicts
138+
assert_safe_sparse_allclose({}, {})
139+
assert_safe_sparse_allclose({'a':'a'}, {'a':'a'})
140+
dict_1 = {'a':{'b':{'arr':np.array([1, 2, 3]), 'str':'str', 'int':9}}}
141+
dict_2 = {'a':{'b':{'arr':np.array([1, 2, 3]), 'str':'str', 'int':9}}}
142+
assert_safe_sparse_allclose(dict_1, dict_2)
143+
dict_1['a']['b']['arr'] = np.array([2, 2, 3])
144+
assert_safe_sparse_allclose(dict_1, dict_2, atol=1)
145+
assert_raises(AssertionError, assert_safe_sparse_allclose, dict_1, dict_2)
146+
147+
# Test nested list of dicts of spmatrices and ndarrays
148+
dict_1['a']['b']['arr1'] = [a, np.array([3, 4.])]
149+
assert_raises(AssertionError, assert_safe_sparse_allclose, dict_1, dict_2,
150+
atol=1)
151+
dict_2['a']['b']['arr1'] = [b, np.array([3, 4.])]
152+
assert_safe_sparse_allclose(dict_1, dict_2, atol=1)
153+
assert_raises(AssertionError, assert_safe_sparse_allclose, dict_1, dict_2)
139154

140155
# Test the string comparison
141156
assert_safe_sparse_allclose('a', 'a')
@@ -149,11 +164,6 @@ def test_assert_safe_sparse_allclose():
149164
assert_safe_sparse_allclose(7, 7.0)
150165
assert_safe_sparse_allclose(5, np.int32(5))
151166

152-
# Make sure you don't get infinite recursion with empty nested lists
153-
x = []
154-
x.append(x)
155-
assert_safe_sparse_allclose(x, x)
156-
157167

158168
def test_assert_same_not_same_model():
159169
X1, y1 = make_blobs(n_samples=200, n_features=5, center_box=(-200, -150),

0 commit comments

Comments
 (0)
0