From 206f1de55daf9b4458fe6600749e4d005209e82d Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Sat, 11 Aug 2018 19:47:11 +0100 Subject: [PATCH 01/22] fix LDA predict_proba() to handle binary and multi-class case --- sklearn/discriminant_analysis.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py index a635792c6f6ca..5f33aa9475ee0 100644 --- a/sklearn/discriminant_analysis.py +++ b/sklearn/discriminant_analysis.py @@ -506,16 +506,18 @@ def predict_proba(self, X): Estimated probabilities. """ prob = self.decision_function(X) - prob *= -1 - np.exp(prob, prob) - prob += 1 - np.reciprocal(prob, prob) - if len(self.classes_) == 2: # binary case + if len(self.classes_) == 2: + prob *= -1 + np.exp(prob, prob) + prob += 1 + np.reciprocal(prob, prob) return np.column_stack([1 - prob, prob]) else: - # OvR normalization, like LibLinear's predict_probability - prob /= prob.sum(axis=1).reshape((prob.shape[0], -1)) - return prob + # compute the likelihood of the underlying gaussian models + # up to a multiplicative constant. + likelihood = np.exp(prob - prob.max(axis=1)[:, np.newaxis]) + # compute posterior probabilities + return likelihood / likelihood.sum(axis=1)[:, np.newaxis] def predict_log_proba(self, X): """Estimate log probability. From 824e0ccc9c72c752957873e287afe7dcd5afaf84 Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Tue, 21 Aug 2018 14:18:05 +0100 Subject: [PATCH 02/22] test_lda_predict_proba non-regression test --- sklearn/tests/test_discriminant_analysis.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py index 6e509949b0a88..43cae507c93f3 100644 --- a/sklearn/tests/test_discriminant_analysis.py +++ b/sklearn/tests/test_discriminant_analysis.py @@ -91,6 +91,23 @@ def test_lda_predict(): clf = LinearDiscriminantAnalysis(solver="dummy") assert_raises(ValueError, clf.fit, X, y) +def test_lda_predict_proba(): + # Test LDA posterior probabilities + # Binary case + clf = LinearDiscriminantAnalysis(solver='svd').fit(X6,y6) + probas = clf.predict_proba(X6) + assert_array_almost_equal( + probas[0], + np.array([0.91942108, 0.08057892]) + ) + # Multiclass case + clf = LinearDiscriminantAnalysis(solver='svd').fit(X6,y7) + probas = clf.predict_proba(X6) + assert_array_almost_equal( + probas[0], + np.array([0.25128617, 0.36876296, 0.37995087]) + ) + def test_lda_priors(): # Test priors (negative priors) From 6560db85cc11349a9035f233fae5a7861a5b57b3 Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Tue, 21 Aug 2018 14:28:11 +0100 Subject: [PATCH 03/22] pep8 fix --- sklearn/tests/test_discriminant_analysis.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py index 43cae507c93f3..09aa4e8c81b82 100644 --- a/sklearn/tests/test_discriminant_analysis.py +++ b/sklearn/tests/test_discriminant_analysis.py @@ -91,17 +91,18 @@ def test_lda_predict(): clf = LinearDiscriminantAnalysis(solver="dummy") assert_raises(ValueError, clf.fit, X, y) + def test_lda_predict_proba(): # Test LDA posterior probabilities # Binary case - clf = LinearDiscriminantAnalysis(solver='svd').fit(X6,y6) + clf = LinearDiscriminantAnalysis(solver='svd').fit(X6, y6) probas = clf.predict_proba(X6) assert_array_almost_equal( probas[0], np.array([0.91942108, 0.08057892]) ) # Multiclass case - clf = LinearDiscriminantAnalysis(solver='svd').fit(X6,y7) + clf = LinearDiscriminantAnalysis(solver='svd').fit(X6, y7) probas = clf.predict_proba(X6) assert_array_almost_equal( probas[0], From c6e2b62455f44a5eea234043a923e5098c266f23 Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Wed, 5 Dec 2018 10:16:21 +0000 Subject: [PATCH 04/22] lda predict_proba refactoring --- sklearn/discriminant_analysis.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py index 5f33aa9475ee0..d64a5be19a7a2 100644 --- a/sklearn/discriminant_analysis.py +++ b/sklearn/discriminant_analysis.py @@ -24,6 +24,7 @@ from .utils import check_array, check_X_y from .utils.validation import check_is_fitted from .utils.multiclass import check_classification_targets +from .utils.extmath import softmax from .preprocessing import StandardScaler @@ -506,18 +507,10 @@ def predict_proba(self, X): Estimated probabilities. """ prob = self.decision_function(X) - if len(self.classes_) == 2: - prob *= -1 - np.exp(prob, prob) - prob += 1 - np.reciprocal(prob, prob) - return np.column_stack([1 - prob, prob]) + if prob.ndim == 1: + return super(LinearDiscriminantAnalysis, self)._predict_proba_lr(X) else: - # compute the likelihood of the underlying gaussian models - # up to a multiplicative constant. - likelihood = np.exp(prob - prob.max(axis=1)[:, np.newaxis]) - # compute posterior probabilities - return likelihood / likelihood.sum(axis=1)[:, np.newaxis] + return softmax(prob) def predict_log_proba(self, X): """Estimate log probability. From d7a12264731c4abe312a9e388fd7f7d5b38d630a Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Wed, 5 Dec 2018 10:19:55 +0000 Subject: [PATCH 05/22] Typo fix --- sklearn/tests/test_discriminant_analysis.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py index 09aa4e8c81b82..b9a20efd4f50c 100644 --- a/sklearn/tests/test_discriminant_analysis.py +++ b/sklearn/tests/test_discriminant_analysis.py @@ -244,7 +244,7 @@ def test_lda_scaling(): def test_lda_store_covariance(): - # Test for slover 'lsqr' and 'eigen' + # Test for solver 'lsqr' and 'eigen' # 'store_covariance' has no effect on 'lsqr' and 'eigen' solvers for solver in ('lsqr', 'eigen'): clf = LinearDiscriminantAnalysis(solver=solver).fit(X6, y6) @@ -260,7 +260,7 @@ def test_lda_store_covariance(): np.array([[0.422222, 0.088889], [0.088889, 0.533333]]) ) - # Test for SVD slover, the default is to not set the covariances_ attribute + # Test for SVD solver, the default is to not set the covariances_ attribute clf = LinearDiscriminantAnalysis(solver='svd').fit(X6, y6) assert_false(hasattr(clf, 'covariance_')) From 85d45b052bb1a689bebdf36e1bdcdd4ea4e55ab3 Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Wed, 16 Jan 2019 10:56:18 +0000 Subject: [PATCH 06/22] flake8 fix --- sklearn/discriminant_analysis.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py index d968bf5c72df4..021fdcb99e35f 100644 --- a/sklearn/discriminant_analysis.py +++ b/sklearn/discriminant_analysis.py @@ -14,7 +14,6 @@ import numpy as np from .exceptions import ChangedBehaviorWarning from scipy import linalg -from scipy.special import expit from .base import BaseEstimator, TransformerMixin, ClassifierMixin from .linear_model.base import LinearClassifierMixin From 15b59e76dfeffd651ad379c6ca50d54695421995 Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Wed, 16 Jan 2019 11:40:15 +0000 Subject: [PATCH 07/22] predict_proba check_is_fitted check --- sklearn/discriminant_analysis.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py index 021fdcb99e35f..0b1f544df9eef 100644 --- a/sklearn/discriminant_analysis.py +++ b/sklearn/discriminant_analysis.py @@ -530,9 +530,10 @@ def predict_proba(self, X): C : array, shape (n_samples, n_classes) Estimated probabilities. """ + check_is_fitted(self, 'classes_') if len(self.classes_) == 2: - return super(LinearDiscriminantAnalysis, self)._predict_proba_lr(X) + return self._predict_proba_lr(X) else: prob = self.decision_function(X) return softmax(prob) From cac71cc2f53984cce04b3845c2ccee66b7e3f7af Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Thu, 17 Jan 2019 12:11:38 +0000 Subject: [PATCH 08/22] update what's new rst file --- doc/whats_new/v0.21.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index 2f359ca87463f..4a4164fa561b4 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -17,6 +17,8 @@ parameters, may produce different models from the previous version. This often occurs due to changes in the modelling logic (bug fixes or enhancements), or in random sampling procedures. +- :class:`discriminant_analysis.LinearDiscriminantAnalysis` for multiclass + classification. |Fix| - :class:`linear_model.BayesianRidge` |Fix| - Decision trees and derived ensembles when both `max_depth` and `max_leaf_nodes` are set. |Fix| @@ -68,6 +70,14 @@ Support for Python 3.4 and below has been officially dropped. Previously the change was made, but silently. :issue:`11526` by :user:`William de Vazelhes`. +:mod:`sklearn.discriminant_analysis` +...................... + +- |Fix| Fixed a bug in :class:`discriminant_analysis.LinearDiscriminantAnalysis` + where the predicted probabilities would be incorrectly computed in the + multiclass case. :issue:`6848`, by :user:`Agamemnon Krasoulis + `. + :mod:`sklearn.ensemble` ....................... From 8144c1a918500235e29c3f55b8445a3228c101b2 Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Thu, 7 Feb 2019 17:00:27 +0000 Subject: [PATCH 09/22] rename prob to decision --- sklearn/discriminant_analysis.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py index f673e38f40fdc..b5a978cbae97b 100644 --- a/sklearn/discriminant_analysis.py +++ b/sklearn/discriminant_analysis.py @@ -534,8 +534,8 @@ def predict_proba(self, X): if len(self.classes_) == 2: return self._predict_proba_lr(X) else: - prob = self.decision_function(X) - return softmax(prob) + decision = self.decision_function(X) + return softmax(decision) def predict_log_proba(self, X): """Estimate log probability. From 2cf19a5992927e1fb8d5c8214aa8a53701f79a75 Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Thu, 7 Feb 2019 17:10:49 +0000 Subject: [PATCH 10/22] include additional tests for predict_proba --- sklearn/tests/test_discriminant_analysis.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py index b66ce2b0d1cca..c083b2dc9c145 100644 --- a/sklearn/tests/test_discriminant_analysis.py +++ b/sklearn/tests/test_discriminant_analysis.py @@ -105,7 +105,14 @@ def test_lda_predict_proba(): ) # Multiclass case clf = LinearDiscriminantAnalysis(solver='svd').fit(X6, y7) + pred = clf.predict(X6) probas = clf.predict_proba(X6) + # Check that argmax of predict_proba gives same results as predict + assert_array_equal(pred, clf.classes_[np.argmax(probas, axis=1)]) + # Check that probabilities sum up to 1 + assert_array_almost_equal(np.sum(probas, axis=1), + np.ones((probas.shape[0],))) + # Numerical check assert_array_almost_equal( probas[0], np.array([0.25128617, 0.36876296, 0.37995087]) From 67ad73e8d9458e5264fc259a31f89e3e9e6a16ad Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Thu, 7 Feb 2019 17:44:18 +0000 Subject: [PATCH 11/22] use allcose vs. assert_array_almost_equal --- sklearn/tests/test_discriminant_analysis.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py index c083b2dc9c145..ba3da5f692d15 100644 --- a/sklearn/tests/test_discriminant_analysis.py +++ b/sklearn/tests/test_discriminant_analysis.py @@ -2,6 +2,8 @@ import pytest +from numpy.testing import assert_allclose + from sklearn.exceptions import ChangedBehaviorWarning from sklearn.utils import check_random_state from sklearn.utils.testing import (assert_array_equal, assert_no_warnings, @@ -99,7 +101,7 @@ def test_lda_predict_proba(): # Binary case clf = LinearDiscriminantAnalysis(solver='svd').fit(X6, y6) probas = clf.predict_proba(X6) - assert_array_almost_equal( + assert_allclose( probas[0], np.array([0.91942108, 0.08057892]) ) @@ -110,10 +112,10 @@ def test_lda_predict_proba(): # Check that argmax of predict_proba gives same results as predict assert_array_equal(pred, clf.classes_[np.argmax(probas, axis=1)]) # Check that probabilities sum up to 1 - assert_array_almost_equal(np.sum(probas, axis=1), + assert_allclose(np.sum(probas, axis=1), np.ones((probas.shape[0],))) # Numerical check - assert_array_almost_equal( + assert_allclose( probas[0], np.array([0.25128617, 0.36876296, 0.37995087]) ) From cb5abc1f7cfed4560e61b993aa77d8ccdc6843f4 Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Thu, 7 Feb 2019 17:47:49 +0000 Subject: [PATCH 12/22] fix indent --- sklearn/tests/test_discriminant_analysis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py index ba3da5f692d15..f89f1d4e87c48 100644 --- a/sklearn/tests/test_discriminant_analysis.py +++ b/sklearn/tests/test_discriminant_analysis.py @@ -113,7 +113,7 @@ def test_lda_predict_proba(): assert_array_equal(pred, clf.classes_[np.argmax(probas, axis=1)]) # Check that probabilities sum up to 1 assert_allclose(np.sum(probas, axis=1), - np.ones((probas.shape[0],))) + np.ones((probas.shape[0],))) # Numerical check assert_allclose( probas[0], From ef6a57016ff125d85d7880bae0c7a41119af0580 Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Mon, 11 Feb 2019 17:41:53 +0000 Subject: [PATCH 13/22] replace len with size --- sklearn/discriminant_analysis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py index b5a978cbae97b..7ed63a5e454bb 100644 --- a/sklearn/discriminant_analysis.py +++ b/sklearn/discriminant_analysis.py @@ -531,7 +531,7 @@ def predict_proba(self, X): """ check_is_fitted(self, 'classes_') - if len(self.classes_) == 2: + if self.classes_.size == 2: return self._predict_proba_lr(X) else: decision = self.decision_function(X) From 92f9aa2944b950aef41a52c438380a45e1ef7d7a Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Mon, 11 Feb 2019 18:02:59 +0000 Subject: [PATCH 14/22] explicit computation for binary case --- sklearn/discriminant_analysis.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py index 7ed63a5e454bb..9c51283b7027e 100644 --- a/sklearn/discriminant_analysis.py +++ b/sklearn/discriminant_analysis.py @@ -13,6 +13,7 @@ import numpy as np from .exceptions import ChangedBehaviorWarning from scipy import linalg +from scipy.special import expit from .base import BaseEstimator, TransformerMixin, ClassifierMixin from .linear_model.base import LinearClassifierMixin @@ -531,10 +532,11 @@ def predict_proba(self, X): """ check_is_fitted(self, 'classes_') + decision = self.decision_function(X) if self.classes_.size == 2: - return self._predict_proba_lr(X) + proba = expit(decision) + return np.vstack([1-proba, proba]).T else: - decision = self.decision_function(X) return softmax(decision) def predict_log_proba(self, X): From 7320883579b86b24dff3a73dd77d2f2fd89a7251 Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Fri, 1 Mar 2019 11:34:13 +0000 Subject: [PATCH 15/22] fix style whats_new rst --- doc/whats_new/v0.21.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index ce9b2e488213c..ea524e9c66a26 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -81,7 +81,7 @@ Support for Python 3.4 and below has been officially dropped. :user:`William de Vazelhes`. :mod:`sklearn.discriminant_analysis` -...................... +.................................... - |Fix| Fixed a bug in :class:`discriminant_analysis.LinearDiscriminantAnalysis` where the predicted probabilities would be incorrectly computed in the From 9613fea4cbdf0e923ae05e0db16d6580a37558fc Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Fri, 1 Mar 2019 12:34:36 +0000 Subject: [PATCH 16/22] predict_proba new regression test --- sklearn/tests/test_discriminant_analysis.py | 84 +++++++++++++++------ 1 file changed, 63 insertions(+), 21 deletions(-) diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py index f89f1d4e87c48..a49dd09992102 100644 --- a/sklearn/tests/test_discriminant_analysis.py +++ b/sklearn/tests/test_discriminant_analysis.py @@ -96,30 +96,72 @@ def test_lda_predict(): assert_raises(ValueError, clf.fit, X, y) -def test_lda_predict_proba(): - # Test LDA posterior probabilities - # Binary case - clf = LinearDiscriminantAnalysis(solver='svd').fit(X6, y6) - probas = clf.predict_proba(X6) - assert_allclose( - probas[0], - np.array([0.91942108, 0.08057892]) +@pytest.mark.parametrize("n_classes", [2, 3]) +def test_lda_predict_proba(n_classes): + def generate_dataset(n_samples, centers, covariances, random_state=None): + """Generate a multivariate normal data given some centers and + covariances""" + rng = check_random_state(random_state) + X = np.vstack([rng.multivariate_normal(mean, cov, + size=n_samples // len(centers)) + for mean, cov in zip(centers, covariances)]) + y = np.hstack([[clazz] * (n_samples // len(centers)) + for clazz in range(len(centers))]) + return X, y + + blob_centers = np.array([[0, 0], [-10, 40], [-30, 30]])[:n_classes] + blob_stds = np.array([[[10, 10], [10, 100]]] * len(blob_centers)) + X, y = generate_dataset( + n_samples=90000, centers=blob_centers, covariances=blob_stds, + random_state=42 ) - # Multiclass case - clf = LinearDiscriminantAnalysis(solver='svd').fit(X6, y7) - pred = clf.predict(X6) - probas = clf.predict_proba(X6) - # Check that argmax of predict_proba gives same results as predict - assert_array_equal(pred, clf.classes_[np.argmax(probas, axis=1)]) - # Check that probabilities sum up to 1 - assert_allclose(np.sum(probas, axis=1), - np.ones((probas.shape[0],))) - # Numerical check - assert_allclose( - probas[0], - np.array([0.25128617, 0.36876296, 0.37995087]) + lda = LinearDiscriminantAnalysis(solver='lsqr').fit(X, y) + # check that the empirical means and covariances are close enough to the + # one used to generate the data + assert_allclose(lda.means_, blob_centers, atol=1e-1) + assert_allclose(lda.covariance_, blob_stds[0], atol=1) + + # implement the method to compute the probability given in The Elements + # of Statistical Learning (cf. p.127, Sect. 4.4.5 "Logistic Regression + # or LDA?") + precision = np.linalg.inv(blob_stds[0]) + alpha_k = [] + alpha_k_0 = [] + for clazz in range(len(blob_centers) - 1): + alpha_k.append( + np.dot(precision, + (blob_centers[clazz] - blob_centers[-1])[:, np.newaxis])) + alpha_k_0.append( + np.dot(- 0.5 * (blob_centers[clazz] + + blob_centers[-1])[np.newaxis, :], alpha_k[-1])) + + sample = np.array([[-22, 22]]) + + def discriminant_func(sample, coef, intercept, clazz): + return np.exp(intercept[clazz] + np.dot(sample, coef[clazz])) + + prob = np.array([float( + discriminant_func(sample, alpha_k, alpha_k_0, clazz) / + (1 + sum([discriminant_func(sample, alpha_k, alpha_k_0, clazz) + for clazz in range(n_classes - 1)]))) for clazz in range( + n_classes - 1)]) + + prob_ref = 1 - np.sum(prob) + + # check the consistency of the computed probability + # all probabilities should sum to one + prob_ref_2 = float( + 1 / (1 + sum([discriminant_func(sample, alpha_k, alpha_k_0, clazz) + for clazz in range(n_classes - 1)])) ) + assert prob_ref == pytest.approx(prob_ref_2) + # check that the probability of LDA are close to the theoretical + # probabilties + assert_allclose(lda.predict_proba(sample), + np.hstack([prob, prob_ref])[np.newaxis], + atol=1e-2) + def test_lda_priors(): # Test priors (negative priors) From 55e3d2a7a4447377bd1e18e4ac9d880919c60cbf Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Fri, 1 Mar 2019 12:37:24 +0000 Subject: [PATCH 17/22] give credit for regression test --- doc/whats_new/v0.21.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index ea524e9c66a26..8d4b1a209f6bf 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -86,7 +86,7 @@ Support for Python 3.4 and below has been officially dropped. - |Fix| Fixed a bug in :class:`discriminant_analysis.LinearDiscriminantAnalysis` where the predicted probabilities would be incorrectly computed in the multiclass case. :issue:`6848`, by :user:`Agamemnon Krasoulis - `. + ` and `Guillaume Lemaitre `. :mod:`sklearn.ensemble` ....................... From 9d198b1f19b1fbc799e2a694d3033b3ef3e50a33 Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Mon, 4 Mar 2019 10:20:09 +0000 Subject: [PATCH 18/22] fix bug for eigen solution --- sklearn/discriminant_analysis.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py index 6bf6e4c4fce7f..e710bc5045b30 100644 --- a/sklearn/discriminant_analysis.py +++ b/sklearn/discriminant_analysis.py @@ -339,7 +339,6 @@ class scatter). This solver supports both classification and self.explained_variance_ratio_ = np.sort(evals / np.sum(evals) )[::-1][:self._max_components] evecs = evecs[:, np.argsort(evals)[::-1]] # sort eigenvectors - evecs /= np.linalg.norm(evecs, axis=0) self.scalings_ = evecs self.coef_ = np.dot(self.means_, evecs).dot(evecs.T) From bd4370ee63ce39e6a4d9cb0f587cd828233d32bc Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Mon, 4 Mar 2019 10:20:49 +0000 Subject: [PATCH 19/22] include all three solvers in predict_proba regression test --- sklearn/tests/test_discriminant_analysis.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py index 574357563d3e5..f23007c10b201 100644 --- a/sklearn/tests/test_discriminant_analysis.py +++ b/sklearn/tests/test_discriminant_analysis.py @@ -98,7 +98,8 @@ def test_lda_predict(): @pytest.mark.parametrize("n_classes", [2, 3]) -def test_lda_predict_proba(n_classes): +@pytest.mark.parametrize("solver", ["svd", "lsqr", "eigen"]) +def test_lda_predict_proba(solver, n_classes): def generate_dataset(n_samples, centers, covariances, random_state=None): """Generate a multivariate normal data given some centers and covariances""" @@ -116,7 +117,8 @@ def generate_dataset(n_samples, centers, covariances, random_state=None): n_samples=90000, centers=blob_centers, covariances=blob_stds, random_state=42 ) - lda = LinearDiscriminantAnalysis(solver='lsqr').fit(X, y) + lda = LinearDiscriminantAnalysis(solver=solver, store_covariance=True, + shrinkage=None).fit(X, y) # check that the empirical means and covariances are close enough to the # one used to generate the data assert_allclose(lda.means_, blob_centers, atol=1e-1) From 480e1083433383a37535fdac665698e772099539 Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Mon, 4 Mar 2019 10:25:51 +0000 Subject: [PATCH 20/22] update whats_new rst file --- doc/whats_new/v0.21.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index e1aba31818339..6d0b63f445162 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -19,6 +19,8 @@ random sampling procedures. - :class:`discriminant_analysis.LinearDiscriminantAnalysis` for multiclass classification. |Fix| +- :class:`discriminant_analysis.LinearDiscriminantAnalysis` with 'eigen' +solver. |Fix| - :class:`linear_model.BayesianRidge` |Fix| - Decision trees and derived ensembles when both `max_depth` and `max_leaf_nodes` are set. |Fix| @@ -114,6 +116,11 @@ Support for Python 3.4 and below has been officially dropped. multiclass case. :issue:`6848`, by :user:`Agamemnon Krasoulis ` and `Guillaume Lemaitre `. +- |Fix| Fixed a bug in :class:`discriminant_analysis.LinearDiscriminantAnalysis` + where the predicted probabilities would be incorrectly computed with ``eigen`` + solver. :issue:`11727`, by :user:`Agamemnon Krasoulis + `. + :mod:`sklearn.dummy` .................... From 06e85725c99a9396a20230bf2bb4d44267ccaf0a Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Mon, 4 Mar 2019 12:12:20 +0000 Subject: [PATCH 21/22] fix minor formatting issue --- doc/whats_new/v0.21.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index 6d0b63f445162..313e55bdda9f4 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -20,7 +20,7 @@ random sampling procedures. - :class:`discriminant_analysis.LinearDiscriminantAnalysis` for multiclass classification. |Fix| - :class:`discriminant_analysis.LinearDiscriminantAnalysis` with 'eigen' -solver. |Fix| + solver. |Fix| - :class:`linear_model.BayesianRidge` |Fix| - Decision trees and derived ensembles when both `max_depth` and `max_leaf_nodes` are set. |Fix| From 625c3f62b7ddf52570f5e2b452d34b9994153a93 Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Wed, 6 Mar 2019 16:55:43 +0000 Subject: [PATCH 22/22] use scipy.linalg instead of np.linalg --- sklearn/tests/test_discriminant_analysis.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py index f23007c10b201..3428f12b03306 100644 --- a/sklearn/tests/test_discriminant_analysis.py +++ b/sklearn/tests/test_discriminant_analysis.py @@ -3,6 +3,7 @@ import pytest from numpy.testing import assert_allclose +from scipy import linalg from sklearn.exceptions import ChangedBehaviorWarning from sklearn.utils import check_random_state @@ -127,7 +128,7 @@ def generate_dataset(n_samples, centers, covariances, random_state=None): # implement the method to compute the probability given in The Elements # of Statistical Learning (cf. p.127, Sect. 4.4.5 "Logistic Regression # or LDA?") - precision = np.linalg.inv(blob_stds[0]) + precision = linalg.inv(blob_stds[0]) alpha_k = [] alpha_k_0 = [] for clazz in range(len(blob_centers) - 1):