From 6c226452aa0a90ff2f22fea5f7889e473e82f5d7 Mon Sep 17 00:00:00 2001 From: Aniruddha Dave Date: Fri, 13 Apr 2018 02:18:25 +0530 Subject: [PATCH 1/6] Fix Issue 10938 - Remove not fitted check from predict_proba method of SGDClassifier - Check only while calling predic_proba --- sklearn/linear_model/stochastic_gradient.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py index b3f8a34fe6a8d..160e7a904c39b 100644 --- a/sklearn/linear_model/stochastic_gradient.py +++ b/sklearn/linear_model/stochastic_gradient.py @@ -802,12 +802,11 @@ def __init__(self, loss="hinge", penalty='l2', alpha=0.0001, l1_ratio=0.15, average=average, n_iter=n_iter) def _check_proba(self): - check_is_fitted(self, "t_") - if self.loss not in ("log", "modified_huber"): raise AttributeError("probability estimates are not available for" " loss=%r" % self.loss) + @property def predict_proba(self): """Probability estimates. @@ -848,6 +847,8 @@ def predict_proba(self): return self._predict_proba def _predict_proba(self, X): + check_is_fitted(self, "t_") + if self.loss == "log": return self._predict_proba_lr(X) From d326c69fb1d9cec175fd4abee0820eadfdb3a6c0 Mon Sep 17 00:00:00 2001 From: Aniruddha Dave Date: Sat, 14 Apr 2018 23:52:26 +0530 Subject: [PATCH 2/6] Add test case for issue #10938 -Test if the the predict_proba and predict_log_proba methods can be accessed before fitting -Test if the not fitted check is performed before calling the proba methods --- sklearn/linear_model/tests/test_sgd.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py index 80b3ca394f990..358ba5a15a90d 100644 --- a/sklearn/linear_model/tests/test_sgd.py +++ b/sklearn/linear_model/tests/test_sgd.py @@ -24,6 +24,7 @@ from sklearn.preprocessing import LabelEncoder, scale, MinMaxScaler from sklearn.preprocessing import StandardScaler from sklearn.exceptions import ConvergenceWarning +from sklearn.exceptions import NotFittedError from sklearn.linear_model import sgd_fast @@ -467,6 +468,26 @@ def test_set_coef_multiclass(self): # Provided intercept_ does match dataset. clf = self.factory().fit(X2, Y2, intercept_init=np.zeros((3,))) + def test_sgd_proba_access(self): + # Test for issue #10938 + + # Checks if predict_proba and predict_log_proba + # is accessible for refrencing before fitting + # the SGD classifier + clf = SGDClassifier() + assert_false(hasattr(clf,"predict_proba")) + assert_false(hasattr(clf,"predict_log_proba")) + + for loss in ["log", "modified_huber"]: + clf = SGDClassifier(loss=loss) + assert_true(hasattr(clf,"predict_proba")) + assert_true(hasattr(clf,"predict_log_proba")) + + # Checks if not fitted check is performed while calling + # the methods + assert_raises(NotFittedError, clf.predict_proba,[[3,2]]) + assert_raises(NotFittedError, clf.predict_log_proba,[[3,2]]) + def test_sgd_proba(self): # Check SGD.predict_proba From c2dcd0dc8b5414abfaa22fc07a3def2ce4a91add Mon Sep 17 00:00:00 2001 From: Aniruddha Dave Date: Sun, 15 Apr 2018 14:14:00 +0530 Subject: [PATCH 3/6] Issue #10938: Code Cleanup -Remove extra line -Add space after comma -Use assert instead of asser_false --- sklearn/linear_model/stochastic_gradient.py | 1 - sklearn/linear_model/tests/test_sgd.py | 12 ++++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py index 160e7a904c39b..c6ad4c2754c51 100644 --- a/sklearn/linear_model/stochastic_gradient.py +++ b/sklearn/linear_model/stochastic_gradient.py @@ -806,7 +806,6 @@ def _check_proba(self): raise AttributeError("probability estimates are not available for" " loss=%r" % self.loss) - @property def predict_proba(self): """Probability estimates. diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py index 358ba5a15a90d..55f315109a8ac 100644 --- a/sklearn/linear_model/tests/test_sgd.py +++ b/sklearn/linear_model/tests/test_sgd.py @@ -475,18 +475,18 @@ def test_sgd_proba_access(self): # is accessible for refrencing before fitting # the SGD classifier clf = SGDClassifier() - assert_false(hasattr(clf,"predict_proba")) - assert_false(hasattr(clf,"predict_log_proba")) + assert not(hasattr(clf, "predict_proba")) + assert not(hasattr(clf, "predict_log_proba")) for loss in ["log", "modified_huber"]: clf = SGDClassifier(loss=loss) - assert_true(hasattr(clf,"predict_proba")) - assert_true(hasattr(clf,"predict_log_proba")) + assert_true(hasattr(clf, "predict_proba")) + assert_true(hasattr(clf, "predict_log_proba")) # Checks if not fitted check is performed while calling # the methods - assert_raises(NotFittedError, clf.predict_proba,[[3,2]]) - assert_raises(NotFittedError, clf.predict_log_proba,[[3,2]]) + assert_raises(NotFittedError, clf.predict_proba, [[3, 2]]) + assert_raises(NotFittedError, clf.predict_log_proba, [[3, 2]]) def test_sgd_proba(self): # Check SGD.predict_proba From a3f514d9a110f38a89421db4ff162c4a57dd0dcd Mon Sep 17 00:00:00 2001 From: Aniruddha Dave Date: Sun, 15 Apr 2018 14:53:03 +0530 Subject: [PATCH 4/6] Remove Trailing whitespaces --- sklearn/linear_model/tests/test_sgd.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py index 55f315109a8ac..fa972a870829d 100644 --- a/sklearn/linear_model/tests/test_sgd.py +++ b/sklearn/linear_model/tests/test_sgd.py @@ -472,7 +472,7 @@ def test_sgd_proba_access(self): # Test for issue #10938 # Checks if predict_proba and predict_log_proba - # is accessible for refrencing before fitting + # is accessible for refrencing before fitting # the SGD classifier clf = SGDClassifier() assert not(hasattr(clf, "predict_proba")) @@ -483,7 +483,7 @@ def test_sgd_proba_access(self): assert_true(hasattr(clf, "predict_proba")) assert_true(hasattr(clf, "predict_log_proba")) - # Checks if not fitted check is performed while calling + # Checks if not fitted check is performed while calling # the methods assert_raises(NotFittedError, clf.predict_proba, [[3, 2]]) assert_raises(NotFittedError, clf.predict_log_proba, [[3, 2]]) From b2be9dbba79dda67ceb4ab72b2ca10607c72fab1 Mon Sep 17 00:00:00 2001 From: Aniruddha Dave Date: Mon, 16 Apr 2018 22:04:05 +0530 Subject: [PATCH 5/6] Add test to check error message Remove test for NotFittedError --- sklearn/linear_model/tests/test_sgd.py | 27 +++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py index fa972a870829d..07d93243c6245 100644 --- a/sklearn/linear_model/tests/test_sgd.py +++ b/sklearn/linear_model/tests/test_sgd.py @@ -1,5 +1,6 @@ import pickle import unittest +import pytest import numpy as np import scipy.sparse as sp @@ -24,7 +25,6 @@ from sklearn.preprocessing import LabelEncoder, scale, MinMaxScaler from sklearn.preprocessing import StandardScaler from sklearn.exceptions import ConvergenceWarning -from sklearn.exceptions import NotFittedError from sklearn.linear_model import sgd_fast @@ -474,19 +474,20 @@ def test_sgd_proba_access(self): # Checks if predict_proba and predict_log_proba # is accessible for refrencing before fitting # the SGD classifier - clf = SGDClassifier() - assert not(hasattr(clf, "predict_proba")) - assert not(hasattr(clf, "predict_log_proba")) - - for loss in ["log", "modified_huber"]: + for loss in SGDClassifier.loss_functions: clf = SGDClassifier(loss=loss) - assert_true(hasattr(clf, "predict_proba")) - assert_true(hasattr(clf, "predict_log_proba")) - - # Checks if not fitted check is performed while calling - # the methods - assert_raises(NotFittedError, clf.predict_proba, [[3, 2]]) - assert_raises(NotFittedError, clf.predict_log_proba, [[3, 2]]) + if loss in ('log', 'modified_huber'): + assert (hasattr(clf, 'predict_proba')) + assert (hasattr(clf, 'predict_log_proba')) + else: + with pytest.raises(AttributeError, + message="probability estimates are not " + "available for loss={!r}".format(loss)): + clf.predict_proba + with pytest.raises(AttributeError, + message="probability estimates are not " + "available for loss={!r}".format(loss)): + clf.predict_log_proba def test_sgd_proba(self): # Check SGD.predict_proba From 3b86a08f4249957aeb0f12011aebc30a42a834d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Mon, 16 Apr 2018 23:29:22 +0200 Subject: [PATCH 6/6] Tweaks --- sklearn/linear_model/tests/test_sgd.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py index 07d93243c6245..9f372f706ca71 100644 --- a/sklearn/linear_model/tests/test_sgd.py +++ b/sklearn/linear_model/tests/test_sgd.py @@ -468,25 +468,27 @@ def test_set_coef_multiclass(self): # Provided intercept_ does match dataset. clf = self.factory().fit(X2, Y2, intercept_init=np.zeros((3,))) - def test_sgd_proba_access(self): - # Test for issue #10938 - - # Checks if predict_proba and predict_log_proba - # is accessible for refrencing before fitting - # the SGD classifier + def test_sgd_predict_proba_method_access(self): + # Checks that SGDClassifier predict_proba and predict_log_proba methods + # can either be accessed or raise an appropriate error message + # otherwise. See + # https://github.com/scikit-learn/scikit-learn/issues/10938 for more + # details. for loss in SGDClassifier.loss_functions: clf = SGDClassifier(loss=loss) if loss in ('log', 'modified_huber'): - assert (hasattr(clf, 'predict_proba')) - assert (hasattr(clf, 'predict_log_proba')) + assert hasattr(clf, 'predict_proba') + assert hasattr(clf, 'predict_log_proba') else: + message = ("probability estimates are not " + "available for loss={!r}".format(loss)) + assert not hasattr(clf, 'predict_proba') + assert not hasattr(clf, 'predict_log_proba') with pytest.raises(AttributeError, - message="probability estimates are not " - "available for loss={!r}".format(loss)): + message=message): clf.predict_proba with pytest.raises(AttributeError, - message="probability estimates are not " - "available for loss={!r}".format(loss)): + message=message): clf.predict_log_proba def test_sgd_proba(self):