8000 Merge branch 'master' into install_instructions · scikit-learn/scikit-learn@16a5b45 · GitHub
[go: up one dir, main page]

Skip to content
8000

Commit 16a5b45

Browse files
committed
Merge branch 'master' into install_instructions
2 parents d82a101 + 5e4b275 commit 16a5b45

File tree

10 files changed

+119
-53
lines changed

10 files changed

+119
-53
lines changed

doc/whats_new/v0.22.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -631,4 +631,10 @@ These changes mostly affect library developers.
631631
- Added check that pairwise estimators raise error on non-square data
632632
:pr:`14336` by :user:`Gregory Dexter <gdex1>`.
633633

634+
- Added two common multioutput estimator tests
635+
:func:`~utils.estimator_checks.check_classifier_multioutput` and
636+
:func:`~utils.estimator_checks.check_regressor_multioutput`.
637+
:pr:`13392` by :user:`Rok Mihevc <rok>`.
638+
634639
- |Fix| Added ``check_transformer_data_not_an_array`` to checks where missing
640+

examples/model_selection/plot_roc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@
150150
# Area under ROC for the multiclass problem
151151
# .........................................
152152
# The :func:`sklearn.metrics.roc_auc_score` function can be used for
153-
# multi-class classification. The mutliclass One-vs-One scheme compares every
153+
# multi-class classification. The multi-class One-vs-One scheme compares every
154154
# unique pairwise combination of classes. In this section, we calcuate the AUC
155155
# using the OvR and OvO schemes. We report a macro average, and a
156156
# prevalence-weighted average.

sklearn/ensemble/tests/test_forest.py

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1294,27 +1294,6 @@ def test_backend_respected():
12941294
assert ba.count == 0
12951295

12961296

1297-
@pytest.mark.parametrize('name', FOREST_CLASSIFIERS)
1298-
@pytest.mark.parametrize('oob_score', (True, False))
1299-
def test_multi_target(name, oob_score):
1300-
ForestClassifier = FOREST_CLASSIFIERS[name]
1301-
1302-
clf = ForestClassifier(bootstrap=True, oob_score=oob_score)
1303-
1304-
X = iris.data
1305-
1306-
# Make multi column mixed type target.
1307-
y = np.vstack([
1308-
iris.target.astype(float),
1309-
iris.target.astype(int),
1310-
iris.target.astype(str),
1311-
]).T
1312-
1313-
# Try to fit and predict.
1314-
clf.fit(X, y)
1315-
clf.predict(X)
1316-
1317-
13181297
def test_forest_feature_importances_sum():
13191298
X, y = make_classification(n_samples=15, n_informative=3, random_state=1,
13201299
n_classes=3)

sklearn/linear_model/coordinate_descent.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1389,6 +1389,8 @@ def __init__(self, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True,
13891389
cv=cv, verbose=verbose, n_jobs=n_jobs, positive=positive,
13901390
random_state=random_state, selection=selection)
13911391

1392+
def _more_tags(self):
1393+
return {'multioutput': False}
13921394

13931395
class ElasticNetCV(RegressorMixin, LinearModelCV):
13941396
"""Elastic Net model with iterative fitting along a regularization path.
@@ -1594,6 +1596,8 @@ def __init__(self, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
15941596
self.random_state = random_state
15951597
self.selection = < F438 span class=pl-s1>selection
15961598

1599+
def _more_tags(self):
1600+
return {'multioutput': False}
15971601

15981602
###############################################################################
15991603
# Multi Task ElasticNet and Lasso models (with joint feature selection)

sklearn/linear_model/least_angle.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1358,6 +1358,9 @@ def __init__(self, fit_intercept=True, verbose=False, max_iter=500,
13581358
n_nonzero_coefs=500,
13591359
eps=eps, copy_X=copy_X, fit_path=True)
13601360

1361+
def _more_tags(self):
1362+
return {'multioutput': False}
1363+
13611364
def fit(self, X, y):
13621365
"""Fit the model using X, y as training data.
13631366
@@ -1729,6 +1732,9 @@ def __init__(self, criterion='aic', fit_intercept=True, verbose=False,
17291732
self.eps = eps
17301733
self.fit_path = True
17311734

1735+
def _more_tags(self):
1736+
return {'multioutput': False}
1737+
17321738
def fit(self, X, y, copy_X=None):
17331739
"""Fit the model using X, y as training data.
17341740

sklearn/linear_model/ridge.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,7 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
521521
return coef
522522

523523

524-
class _BaseRidge(MultiOutputMixin, LinearModel, metaclass=ABCMeta):
524+
class _BaseRidge(LinearModel, metaclass=ABCMeta):
525525
@abstractmethod
526526
def __init__(self, alpha=1.0, fit_intercept=True, normalize=False,
527527
copy_X=True, max_iter=None, tol=1e-3, solver="auto",
@@ -602,7 +602,7 @@ def fit(self, X, y, sample_weight=None):
602602
return self
603603

604604

605-
class Ridge(RegressorMixin, _BaseRidge):
605+
class Ridge(MultiOutputMixin, RegressorMixin, _BaseRidge):
606606
"""Linear least squares with l2 regularization.
607607
608608
Minimizes the objective function::
@@ -1506,7 +1506,7 @@ def identity_estimator():
15061506
return self
15071507

15081508

1509-
class _BaseRidgeCV(MultiOutputMixin, LinearModel):
1509+
class _BaseRidgeCV(LinearModel):
15101510
def __init__(self, alphas=(0.1, 1.0, 10.0),
15111511
fit_intercept=True, normalize=False, scoring=None,
15121512
cv=None, gcv_mode=None,
@@ -1578,7 +1578,7 @@ def fit(self, X, y, sample_weight=None):
15781578
return self
15791579

15801580

1581-
class RidgeCV(RegressorMixin, _BaseRidgeCV):
1581+
class RidgeCV(MultiOutputMixin, RegressorMixin, _BaseRidgeCV):
15821582
"""Ridge regression with built-in cross-validation.
15831583
15841584
See glossary entry for :term:`cross-validation estimator`.

sklearn/neighbors/regression.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,11 @@ def __init__(self, n_neighbors=5, weights='uniform',
148148
metric_params=metric_params, n_jobs=n_jobs, **kwargs)
149149
self.weights = _check_weights(weights)
150150

151+
@property
152+
def _pairwise(self):
153+
# For cross-validation routines to split data correctly
154+
return self.metric == 'precomputed'
155+
151156
def predict(self, X):
152157
"""Predict the target for the provided data
153158

sklearn/tree/tests/test_tree.py

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1823,26 +1823,6 @@ def test_empty_leaf_infinite_threshold():
18231823
assert len(empty_leaf) == 0
18241824

18251825

1826-
@pytest.mark.parametrize('name', CLF_TREES)
1827-
def test_multi_target(name):
1828-
Tree = CLF_TREES[name]
1829-
1830-
clf = Tree()
1831-
1832-
X = iris.data
1833-
1834-
# Make multi column mixed type target.
1835-
y = np.vstack([
1836-
iris.target.astype(float),
1837-
iris.target.astype(int),
1838-
iris.target.astype(str),
1839-
]).T
1840-
1841-
# Try to fit and predict.
1842-
clf.fit(X, y)
1843-
clf.predict(X)
1844-
1845-
18461826
def test_decision_tree_memmap():
18471827
# check that decision trees supports read-only buffer (#13626)
18481828
X = np.random.RandomState(0).random_sample((10, 2)).astype(np.float32)

sklearn/utils/estimator_checks.py

Lines changed: 87 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@
3838
BaseEstimator)
3939

4040
from ..metrics import accuracy_score, adjusted_rand_score, f1_score
41-
4241
from ..random_projection import BaseRandomProjection
4342
from ..feature_selection import SelectKBest
4443
from ..pipeline import make_pipeline
@@ -54,13 +53,13 @@
5453
from .import deprecated
5554
from .validation import has_fit_parameter, _num_samples
5655
from ..preprocessing import StandardScaler
57-
from ..datasets import load_iris, load_boston, make_blobs
56+
from ..datasets import (load_iris, load_boston, make_blobs,
57+
make_multilabel_classification, make_regression)
5858

5959

6060
BOSTON = None
6161
CROSS_DECOMPOSITION = ['PLSCanonical', 'PLSRegression', 'CCA', 'PLSSVD']
6262

63-
6463
def _safe_tags(estimator, key=None):
6564
# if estimator doesn't have _get_tags, use _DEFAULT_TAGS
6665
# if estimator has tags but not key, use _DEFAULT_TAGS[key]
@@ -125,6 +124,8 @@ def _yield_classifier_checks(name, classifier):
125124
yield check_classifiers_one_label
126125
yield check_classifiers_classes
127126
yield check_estimators_partial_fit_n_features
127+
if tags["multioutput"]:
128+
yield check_classifier_multioutput
128129
# basic consistency testing
129130
yield check_classifiers_train
130131
yield partial(check_classifiers_train, readonly_memmap=True)
@@ -174,6 +175,8 @@ def _yield_regressor_checks(name, regressor):
174175
yield partial(check_regressors_train, readonly_memmap=True)
175176
yield check_regressor_data_not_an_array
176177
yield check_estimators_partial_fit_n_features
178+
if tags["multioutput"]:
179+
yield check_regressor_multioutput
177180
yield check_regressors_no_decision_function
178181
if not tags["no_validation"]:
179182
yield check_supervised_y_2d
@@ -1495,6 +1498,87 @@ def check_estimators_partial_fit_n_features(name, estimator_orig):
14951498
estimator.partial_fit(X[:, :-1], y)
14961499

14971500

1501+
@ignore_warnings(category=(DeprecationWarning, FutureWarning))
1502+
def check_classifier_multioutput(name, estimator):
1503+
n_samples, n_labels, n_classes = 42, 5, 3
1504+
tags = _safe_tags(estimator)
1505+
estimator = clone(estimator)
1506+
X, y = make_multilabel_classification(random_state=42,
1507+
n_samples=n_samples,
1508+
n_labels=n_labels,
1509+
n_classes=n_classes)
1510+
estimator.fit(X, y)
1511+
y_pred = estimator.predict(X)
1512+
1513+
assert y_pred.shape == (n_samples, n_classes), (
1514+
"The shape of the prediction for multioutput data is "
1515+
"incorrect. Expected {}, got {}."
1516+
.format((n_samples, n_labels), y_pred.shape))
1517+
assert y_pred.dtype.kind == 'i'
1518+
1519+
if hasattr(estimator, "decision_function"):
1520+
decision = estimator.decision_function(X)
1521+
assert isinstance(decision, np.ndarray)
1522+
assert decision.shape == (n_samples, n_classes), (
1523+
"The shape of the decision function output for "
1524+
"multioutput data is incorrect. Expected {}, got {}."
1525+
.format((n_samples, n_classes), decision.shape))
1526+
1527+
dec_pred = (decision > 0).astype(np.int)
1528+
dec_exp = estimator.classes_[dec_pred]
1529+
assert_array_equal(dec_exp, y_pred)
1530+
1531+
if hasattr(estimator, "predict_proba"):
1532+
y_prob = estimator.predict_proba(X)
1533+
1534+
if isinstance(y_prob, list) and not tags['poor_score']:
1535+
for i in range(n_classes):
1536+
assert y_prob[i].shape == (n_samples, 2), (
1537+
"The shape of the probability for multioutput data is"
1538+
" incorrect. Expected {}, got {}."
1539+
.format((n_samples, 2), y_prob[i].shape))
1540+
assert_array_equal(
1541+
np.argmax(y_prob[i], axis=1).astype(np.int),
1542+
y_pred[:, i]
1543+
)
1544+
elif not tags['poor_score']:
1545+
assert y_prob.shape == (n_samples, n_classes), (
1546+
"The shape of the probability for multioutput data is"
1547+
" incorrect. Expected {}, got {}."
1548+
.format((n_samples, n_classes), y_prob.shape))
1549+
assert_array_equal(y_prob.round().astype(int), y_pred)
1550+
1551+
if (hasattr(estimator, "decision_function") and
1552+
hasattr(estimator, "predict_proba")):
1553+
for i in range(n_classes):
1554+
y_proba = estimator.predict_proba(X)[:, i]
1555+
y_decision = estimator.decision_function(X)
1556+
assert_array_equal(rankdata(y_proba), rankdata(y_decision[:, i]))
1557+
1558+
1559+
@ignore_warnings(category=(DeprecationWarning, FutureWarning))
1560+
def check_regressor_multioutput(name, estimator):
1561+
estimator = clone(estimator)
1562+
n_samples = n_features = 10
1563+
1564+
if not _is_pairwise_metric(estimator):
1565+
n_samples = n_samples + 1
1566+
1567+
X, y = make_regression(random_state=42, n_targets=5,
1568+
n_samples=n_samples, n_features=n_features)
1569+
X = pairwise_estimator_convert_X(X, estimator)
1570+
1571+
estimator.fit(X, y)
1572+
y_pred = estimator.predict(X)
1573+
1574+
assert y_pred.dtype == np.dtype('float64'), (
1575+
"Multioutput predictions by a regressor are expected to be"
1576+
" floating-point precision. Got {} instead".format(y_pred.dtype))
1577+
assert y_pred.shape == y.shape, (
1578+
"The shape of the orediction for multioutput data is incorrect."
1579+
" Expected {}, got {}.")
1580+
1581+
14981582
@ignore_warnings(category=(DeprecationWarning, FutureWarning))
14991583
def check_clustering(name, clusterer_orig, readonly_memmap=False):
15001584
clusterer = clone(clusterer_orig)

sklearn/utils/tests/test_estimator_checks.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,7 @@ class UntaggedBinaryClassifier(DecisionTreeClassifier):
282282
# Toy classifier that only supports binary classification, will fail tests.
283283
def fit(self, X, y, sample_weight=None):
284284
super().fit(X, y, sample_weight)
285-
if self.n_classes_ > 2:
285+
if np.all(self.n_classes_ > 2):
286286
raise ValueError('Only 2 classes are supported')
287287
return self
288288

@@ -296,7 +296,7 @@ def _more_tags(self):
296296
class RequiresPositiveYRegressor(LinearRegression):
297297

298298
def fit(self, X, y):
299-
X, y = check_X_y(X, y)
299+
X, y = check_X_y(X, y, multi_output=True)
300300
if (y <= 0).any():
301301
raise ValueError('negative y values not supported!')
302302
return super().fit(X, y)
@@ -423,7 +423,9 @@ def test_check_estimator():
423423
check_estimator(TaggedBinaryClassifier)
424424

425425
# Check regressor with requires_positive_y estimator tag
426-
check_estimator(RequiresPositiveYRegressor)
426+
msg = 'negative y values not supported!'
427+
assert_raises_regex(ValueError, msg, check_estimator,
428+
RequiresPositiveYRegressor)
427429

428430

429431
def test_check_outlier_corruption():
@@ -511,7 +513,7 @@ def __init__(self, you_should_set_this_=None):
511513

512514
def test_check_estimator_pairwise():
513515
# check that check_estimator() works on estimator with _pairwise
514-
# kernel or metric
516+
# kernel or metric
515517

516518
# test precomputed kernel
517519
est = SVC(kernel='precomputed')

0 commit comments

Comments
 (0)
0