From 55d82e802e889f058af15cf17cc5047899ea5959 Mon Sep 17 00:00:00 2001 From: Rebekah Kim Date: Sat, 29 Sep 2018 14:52:30 -0400 Subject: [PATCH 01/12] fix hasattr param --- sklearn/multioutput.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index 1b0fdd19e14af..9dde9f24d90c6 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -331,7 +331,7 @@ def predict_proba(self, X): classes corresponds to that in the attribute `classes_`. """ check_is_fitted(self, 'estimators_') - if not hasattr(self.estimator, "predict_proba"): + if not hasattr(self.estimator_[0], "predict_proba"): raise ValueError("The base estimator should implement" "predict_proba method") From db2c21d4ce7eb3bab1098157df72fba6fbf76650 Mon Sep 17 00:00:00 2001 From: Rebekah Kim Date: Sat, 29 Sep 2018 15:30:22 -0400 Subject: [PATCH 02/12] correct spelling + add test --- sklearn/multioutput.py | 2 +- sklearn/tests/test_multioutput.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index 9dde9f24d90c6..d0b28a01eb77c 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -331,7 +331,7 @@ def predict_proba(self, X): classes corresponds to that in the attribute `classes_`. """ check_is_fitted(self, 'estimators_') - if not hasattr(self.estimator_[0], "predict_proba"): + if not hasattr(self.estimators_[0], "predict_proba"): raise ValueError("The base estimator should implement" "predict_proba method") diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py index 1eb5a7e48f823..d7e11dc90f4f6 100644 --- a/sklearn/tests/test_multioutput.py +++ b/sklearn/tests/test_multioutput.py @@ -173,6 +173,12 @@ def test_multi_output_classification_partial_fit_parallelism(): # parallelism requires this to be the case for a sane implementation assert_false(est1 is est2) +def test_predict_proba(): + sgd_linear_clf = SGDClassifier(loss='log', random_state=1, max_iter=5) + multi_target_linear = MultiOutputClassifier(sgd_linear_clf) + multi_target_linear.fit(X, y) + # check predict_proba passes + multi_target_linear.predict_proba(X) def test_multi_output_classification_partial_fit(): # test if multi_target initializes correctly with base estimator and fit From 33f178d070608b7cfaddfd9ca6e2364630a99512 Mon Sep 17 00:00:00 2001 From: Rebekah Kim Date: Sat, 29 Sep 2018 15:45:29 -0400 Subject: [PATCH 03/12] minor format fix --- sklearn/tests/test_multioutput.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py index d7e11dc90f4f6..8c6544a237f38 100644 --- a/sklearn/tests/test_multioutput.py +++ b/sklearn/tests/test_multioutput.py @@ -173,6 +173,7 @@ def test_multi_output_classification_partial_fit_parallelism(): # parallelism requires this to be the case for a sane implementation assert_false(est1 is est2) + def test_predict_proba(): sgd_linear_clf = SGDClassifier(loss='log', random_state=1, max_iter=5) multi_target_linear = MultiOutputClassifier(sgd_linear_clf) @@ -180,6 +181,7 @@ def test_predict_proba(): # check predict_proba passes multi_target_linear.predict_proba(X) + def test_multi_output_classification_partial_fit(): # test if multi_target initializes correctly with base estimator and fit # assert predictions work as expected for predict From 9099e0e8225043f547140fbc7d1a012ae921d449 Mon Sep 17 00:00:00 2001 From: Rebekah Kim Date: Sat, 29 Sep 2018 16:12:39 -0400 Subject: [PATCH 04/12] more test --- sklearn/tests/test_multioutput.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py index 8c6544a237f38..8fcdcfe08ebdb 100644 --- a/sklearn/tests/test_multioutput.py +++ b/sklearn/tests/test_multioutput.py @@ -174,13 +174,19 @@ def test_multi_output_classification_partial_fit_parallelism(): assert_false(est1 is est2) -def test_predict_proba(): +# check predict_proba passes +def test_multi_output_predict_proba(): sgd_linear_clf = SGDClassifier(loss='log', random_state=1, max_iter=5) multi_target_linear = MultiOutputClassifier(sgd_linear_clf) multi_target_linear.fit(X, y) - # check predict_proba passes multi_target_linear.predict_proba(X) + sgd_linear_clf = SGDClassifier(random_state=1, max_iter=5) + multi_target_linear = MultiOutputClassifier(sgd_linear_clf) + multi_target_linear.fit(X, y) + with pytest.raises(ValueError): + multi_target_linear.predict_proba(X) + def test_multi_output_classification_partial_fit(): # test if multi_target initializes correctly with base estimator and fit From e0cb97c66edec64193856f38b2e82aa6b40d1bc5 Mon Sep 17 00:00:00 2001 From: Rebekah Kim Date: Wed, 27 Feb 2019 17:49:31 +0900 Subject: [PATCH 05/12] fix spacing/typo --- sklearn/multioutput.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index d0b28a01eb77c..3fe0c47c74d7f 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -145,7 +145,8 @@ def fit(self, X, y, sample_weight=None): """ if not hasattr(self.estimator, "fit"): - raise ValueError("The base estimator should implement a fit method") + raise ValueError("The base estimator should implement" + " a fit method") X, y = check_X_y(X, y, multi_output=True, @@ -186,7 +187,8 @@ def predict(self, X): """ check_is_fitted(self, 'estimators_') if not hasattr(self.estimator, "predict"): - raise ValueError("The base estimator should implement a predict method") + raise ValueError("The base estimator should implement" + " a predict method") X = check_array(X, accept_sparse=True) @@ -331,8 +333,8 @@ def predict_proba(self, X): classes corresponds to that in the attribute `classes_`. """ check_is_fitted(self, 'estimators_') - if not hasattr(self.estimators_[0], "predict_proba"): - raise ValueError("The base estimator should implement" + if not hasattr(self.estimator, "predict_proba"): + raise ValueError("The base estimator should implement " "predict_proba method") results = [estimator.predict_proba(X) for estimator in @@ -340,7 +342,7 @@ def predict_proba(self, X): return results def score(self, X, y): - """"Returns the mean accuracy on the given test data and labels. + """Returns the mean accuracy on the given test data and labels. Parameters ---------- From 39ae51ae1e07d2c9a141aca1b53556e505ae6ac4 Mon Sep 17 00:00:00 2001 From: Rebekah Kim Date: Thu, 28 Feb 2019 13:42:45 +0900 Subject: [PATCH 06/12] edge case test --- sklearn/multioutput.py | 3 ++- sklearn/tests/test_multioutput.py | 22 ++++++++++++++-------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index 3fe0c47c74d7f..9ee5e5265064c 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -333,7 +333,8 @@ def predict_proba(self, X): classes corresponds to that in the attribute `classes_`. """ check_is_fitted(self, 'estimators_') - if not hasattr(self.estimator, "predict_proba"): + if not all([hasattr(estimator, "predict_proba") + for estimator in self.estimators_]): raise ValueError("The base estimator should implement " "predict_proba method") diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py index 8fcdcfe08ebdb..f676e75cd054d 100644 --- a/sklearn/tests/test_multioutput.py +++ b/sklearn/tests/test_multioutput.py @@ -33,6 +33,7 @@ from sklearn.svm import LinearSVC from sklearn.base import ClassifierMixin from sklearn.utils import shuffle +from sklearn.model_selection import GridSearchCV def test_multi_target_regression(): @@ -176,16 +177,21 @@ def test_multi_output_classification_partial_fit_parallelism(): # check predict_proba passes def test_multi_output_predict_proba(): - sgd_linear_clf = SGDClassifier(loss='log', random_state=1, max_iter=5) - multi_target_linear = MultiOutputClassifier(sgd_linear_clf) - multi_target_linear.fit(X, y) - multi_target_linear.predict_proba(X) - sgd_linear_clf = SGDClassifier(random_state=1, max_iter=5) - multi_target_linear = MultiOutputClassifier(sgd_linear_clf) + param = {'loss': ('hinge', 'log', 'modified_huber')} + + # inner function for custom scoring + def custom_scorer(estimator, X, y): + if hasattr(estimator, "predict_proba"): + return 1.0 + else: + return 0.0 + grid_clf = GridSearchCV(sgd_linear_clf, param_grid=param, + scoring=custom_scorer) + multi_target_linear = MultiOutputClassifier(grid_clf) multi_target_linear.fit(X, y) - with pytest.raises(ValueError): - multi_target_linear.predict_proba(X) + + multi_target_linear.predict_proba(X) def test_multi_output_classification_partial_fit(): From 7c0c12aacba532f778ad2cefc6df220a44411dea Mon Sep 17 00:00:00 2001 From: Rebekah Kim Date: Thu, 28 Feb 2019 15:18:57 +0900 Subject: [PATCH 07/12] add cv to grid search --- sklearn/tests/test_multioutput.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py index c328bd0978bdc..97044674317ad 100644 --- a/sklearn/tests/test_multioutput.py +++ b/sklearn/tests/test_multioutput.py @@ -189,7 +189,7 @@ def custom_scorer(estimator, X, y): else: return 0.0 grid_clf = GridSearchCV(sgd_linear_clf, param_grid=param, - scoring=custom_scorer) + scoring=custom_scorer, cv=3) multi_target_linear = MultiOutputClassifier(grid_clf) multi_target_linear.fit(X, y) From d3a3ec1cf4905e1e81dd1fa87964da8051f75b44 Mon Sep 17 00:00:00 2001 From: Rebekah Kim Date: Thu, 28 Feb 2019 17:32:29 +0900 Subject: [PATCH 08/12] fix warnings --- sklearn/tests/test_multioutput.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py index 97044674317ad..8aed1ae0a34fc 100644 --- a/sklearn/tests/test_multioutput.py +++ b/sklearn/tests/test_multioutput.py @@ -179,7 +179,7 @@ def test_multi_output_classification_partial_fit_parallelism(): # check predict_proba passes def test_multi_output_predict_proba(): - sgd_linear_clf = SGDClassifier(random_state=1, max_iter=5) + sgd_linear_clf = SGDClassifier(random_state=1, max_iter=5, tol=1e-3) param = {'loss': ('hinge', 'log', 'modified_huber')} # inner function for custom scoring @@ -189,7 +189,7 @@ def custom_scorer(estimator, X, y): else: return 0.0 grid_clf = GridSearchCV(sgd_linear_clf, param_grid=param, - scoring=custom_scorer, cv=3) + scoring=custom_scorer, cv=3, error_score=np.nan) multi_target_linear = MultiOutputClassifier(grid_clf) multi_target_linear.fit(X, y) From 3fdf1dd0d7d02f28c1214a7579d84cfe2982487f Mon Sep 17 00:00:00 2001 From: Rebekah Kim Date: Thu, 28 Feb 2019 19:32:03 +0900 Subject: [PATCH 09/12] more test for code coverage --- sklearn/tests/test_multioutput.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py index 8aed1ae0a34fc..41a84cd8ea556 100644 --- a/sklearn/tests/test_multioutput.py +++ b/sklearn/tests/test_multioutput.py @@ -195,6 +195,14 @@ def custom_scorer(estimator, X, y): multi_target_linear.predict_proba(X) + # SGDClassifier defaults to loss='hinge' which is not a probabilistic + # loss function; therefore it does not expose a predict_proba method + sgd_linear_clf = SGDClassifier(random_state=1, max_iter=5, tol=1e-3) + multi_target_linear = MultiOutputClassifier(sgd_linear_clf) + multi_target_linear.fit(X, y) + with pytest.raises(ValueError): + multi_target_linear.predict_proba(X) + # 0.23. warning about tol not having its correct default value. @pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been') From 173b28d37238fc05b20072929ec2220d23687ce3 Mon Sep 17 00:00:00 2001 From: Rebekah Kim Date: Fri, 1 Mar 2019 17:02:11 +0900 Subject: [PATCH 10/12] add documentation --- sklearn/multioutput.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index b7eb0fd033000..bd1e9744ff941 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -322,6 +322,9 @@ def predict_proba(self, X): """Probability estimates. Returns prediction probabilities for each class of each output. + This method will raise a ``ValueError`` if any of the + estimators do not have ``predict_proba``. + Parameters ---------- X : array-like, shape (n_samples, n_features) From 8e24d1723fa6dc54f34d946be428c81ffcfd7cf8 Mon Sep 17 00:00:00 2001 From: Rebekah Kim Date: Thu, 18 Apr 2019 22:56:16 +0900 Subject: [PATCH 11/12] update doc --- doc/whats_new/v0.21.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index 005925ae42720..591b9a7a47909 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -272,6 +272,14 @@ Support for Python 3.4 and below has been officially dropped. making ``shuffle=True`` ineffective. :issue:`13124` by :user:`Hanmin Qin `. +:mod:`sklearn.multioutput` +........................ + +- |Fix| Fixed a bug in :class:`multiout.MultiOutputClassifier` where the + `predict_proba` method incorrectly checked for `predict_proba` attribute in + the estimator object. + :issue:`12222` by :user:`Rebekah Kim `. + :mod:`sklearn.neighbors` ........................ From 8828c71d5230c97b4cf8873b2007741956b9887f Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 19 Apr 2019 12:03:54 +0900 Subject: [PATCH 12/12] fix error msg Co-Authored-By: rebekahkim --- sklearn/tests/test_multioutput.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py index 1c7abfaa7907b..292f655a5b1a9 100644 --- a/sklearn/tests/test_multioutput.py +++ b/sklearn/tests/test_multioutput.py @@ -200,7 +200,8 @@ def custom_scorer(estimator, X, y): sgd_linear_clf = SGDClassifier(random_state=1, max_iter=5, tol=1e-3) multi_target_linear = MultiOutputClassifier(sgd_linear_clf) multi_target_linear.fit(X, y) - with pytest.raises(ValueError): + err_msg = "The base estimator should implement predict_proba method" + with pytest.raises(ValueError, match=err_msg): multi_target_linear.predict_proba(X)