diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index 6f504a721ec75..313e55bdda9f4 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -17,6 +17,10 @@ parameters, may produce different models from the previous version. This often occurs due to changes in the modelling logic (bug fixes or enhancements), or in random sampling procedures. +- :class:`discriminant_analysis.LinearDiscriminantAnalysis` for multiclass + classification. |Fix| +- :class:`discriminant_analysis.LinearDiscriminantAnalysis` with 'eigen' + solver. |Fix| - :class:`linear_model.BayesianRidge` |Fix| - Decision trees and derived ensembles when both `max_depth` and `max_leaf_nodes` are set. |Fix| @@ -107,6 +111,16 @@ Support for Python 3.4 and below has been officially dropped. Previously the change was made, but silently. :issue:`11526` by :user:`William de Vazelhes`. +- |Fix| Fixed a bug in :class:`discriminant_analysis.LinearDiscriminantAnalysis` + where the predicted probabilities would be incorrectly computed in the + multiclass case. :issue:`6848`, by :user:`Agamemnon Krasoulis + ` and `Guillaume Lemaitre `. + +- |Fix| Fixed a bug in :class:`discriminant_analysis.LinearDiscriminantAnalysis` + where the predicted probabilities would be incorrectly computed with ``eigen`` + solver. :issue:`11727`, by :user:`Agamemnon Krasoulis + `. + :mod:`sklearn.dummy` .................... diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py index 370b7fa809a8f..e710bc5045b30 100644 --- a/sklearn/discriminant_analysis.py +++ b/sklearn/discriminant_analysis.py @@ -22,6 +22,7 @@ from .utils import check_array, check_X_y from .utils.validation import check_is_fitted from .utils.multiclass import check_classification_targets +from .utils.extmath import softmax from .preprocessing import StandardScaler @@ -338,7 +339,6 @@ class scatter). This solver supports both classification and self.explained_variance_ratio_ = np.sort(evals / np.sum(evals) )[::-1][:self._max_components] evecs = evecs[:, np.argsort(evals)[::-1]] # sort eigenvectors - evecs /= np.linalg.norm(evecs, axis=0) self.scalings_ = evecs self.coef_ = np.dot(self.means_, evecs).dot(evecs.T) @@ -531,14 +531,14 @@ def predict_proba(self, X): C : array, shape (n_samples, n_classes) Estimated probabilities. """ - prob = self.decision_function(X) - expit(prob, out=prob) - if len(self.classes_) == 2: # binary case - return np.column_stack([1 - prob, prob]) + check_is_fitted(self, 'classes_') + + decision = self.decision_function(X) + if self.classes_.size == 2: + proba = expit(decision) + return np.vstack([1-proba, proba]).T else: - # OvR normalization, like LibLinear's predict_probability - prob /= prob.sum(axis=1).reshape((prob.shape[0], -1)) - return prob + return softmax(decision) def predict_log_proba(self, X): """Estimate log probability. diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py index aafb744172703..3428f12b03306 100644 --- a/sklearn/tests/test_discriminant_analysis.py +++ b/sklearn/tests/test_discriminant_analysis.py @@ -2,6 +2,9 @@ import pytest +from numpy.testing import assert_allclose +from scipy import linalg + from sklearn.exceptions import ChangedBehaviorWarning from sklearn.utils import check_random_state from sklearn.utils.testing import (assert_array_equal, assert_no_warnings, @@ -95,6 +98,75 @@ def test_lda_predict(): assert_raises(ValueError, clf.fit, X, y) +@pytest.mark.parametrize("n_classes", [2, 3]) +@pytest.mark.parametrize("solver", ["svd", "lsqr", "eigen"]) +def test_lda_predict_proba(solver, n_classes): + def generate_dataset(n_samples, centers, covariances, random_state=None): + """Generate a multivariate normal data given some centers and + covariances""" + rng = check_random_state(random_state) + X = np.vstack([rng.multivariate_normal(mean, cov, + size=n_samples // len(centers)) + for mean, cov in zip(centers, covariances)]) + y = np.hstack([[clazz] * (n_samples // len(centers)) + for clazz in range(len(centers))]) + return X, y + + blob_centers = np.array([[0, 0], [-10, 40], [-30, 30]])[:n_classes] + blob_stds = np.array([[[10, 10], [10, 100]]] * len(blob_centers)) + X, y = generate_dataset( + n_samples=90000, centers=blob_centers, covariances=blob_stds, + random_state=42 + ) + lda = LinearDiscriminantAnalysis(solver=solver, store_covariance=True, + shrinkage=None).fit(X, y) + # check that the empirical means and covariances are close enough to the + # one used to generate the data + assert_allclose(lda.means_, blob_centers, atol=1e-1) + assert_allclose(lda.covariance_, blob_stds[0], atol=1) + + # implement the method to compute the probability given in The Elements + # of Statistical Learning (cf. p.127, Sect. 4.4.5 "Logistic Regression + # or LDA?") + precision = linalg.inv(blob_stds[0]) + alpha_k = [] + alpha_k_0 = [] + for clazz in range(len(blob_centers) - 1): + alpha_k.append( + np.dot(precision, + (blob_centers[clazz] - blob_centers[-1])[:, np.newaxis])) + alpha_k_0.append( + np.dot(- 0.5 * (blob_centers[clazz] + + blob_centers[-1])[np.newaxis, :], alpha_k[-1])) + + sample = np.array([[-22, 22]]) + + def discriminant_func(sample, coef, intercept, clazz): + return np.exp(intercept[clazz] + np.dot(sample, coef[clazz])) + + prob = np.array([float( + discriminant_func(sample, alpha_k, alpha_k_0, clazz) / + (1 + sum([discriminant_func(sample, alpha_k, alpha_k_0, clazz) + for clazz in range(n_classes - 1)]))) for clazz in range( + n_classes - 1)]) + + prob_ref = 1 - np.sum(prob) + + # check the consistency of the computed probability + # all probabilities should sum to one + prob_ref_2 = float( + 1 / (1 + sum([discriminant_func(sample, alpha_k, alpha_k_0, clazz) + for clazz in range(n_classes - 1)])) + ) + + assert prob_ref == pytest.approx(prob_ref_2) + # check that the probability of LDA are close to the theoretical + # probabilties + assert_allclose(lda.predict_proba(sample), + np.hstack([prob, prob_ref])[np.newaxis], + atol=1e-2) + + def test_lda_priors(): # Test priors (negative priors) priors = np.array([0.5, -0.5]) @@ -229,7 +301,7 @@ def test_lda_scaling(): def test_lda_store_covariance(): - # Test for slover 'lsqr' and 'eigen' + # Test for solver 'lsqr' and 'eigen' # 'store_covariance' has no effect on 'lsqr' and 'eigen' solvers for solver in ('lsqr', 'eigen'): clf = LinearDiscriminantAnalysis(solver=solver).fit(X6, y6) @@ -245,7 +317,7 @@ def test_lda_store_covariance(): np.array([[0.422222, 0.088889], [0.088889, 0.533333]]) ) - # Test for SVD slover, the default is to not set the covariances_ attribute + # Test for SVD solver, the default is to not set the covariances_ attribute clf = LinearDiscriminantAnalysis(solver='svd').fit(X6, y6) assert not hasattr(clf, 'covariance_')