xhluca
diff --git a/‎doc/whats_new/v0.21.rst
Lines changed: 0 additions & 14 deletions b/‎doc/whats_new/v0.21.rst
Lines changed: 0 additions & 14 deletions
diff --git a/‎sklearn/discriminant_analysis.py
Lines changed: 8 additions & 8 deletions b/‎sklearn/discriminant_analysis.py
Lines changed: 8 additions & 8 deletions
diff --git a/‎sklearn/tests/test_discriminant_analysis.py
Lines changed: 2 additions & 74 deletions b/‎sklearn/tests/test_discriminant_analysis.py
Lines changed: 2 additions & 74 deletions
@@ -17,10 +17,6 @@ parameters, may produce different models from the previous version. This often
 occurs due to changes in the modelling logic (bug fixes or enhancements), or in
 random sampling procedures.
 
-- :class:`discriminant_analysis.LinearDiscriminantAnalysis` for multiclass
-  classification. |Fix|
-- :class:`discriminant_analysis.LinearDiscriminantAnalysis` with 'eigen'
-  solver. |Fix|
 - :class:`linear_model.BayesianRidge` |Fix|
 - Decision trees and derived ensembles when both `max_depth` and
   `max_leaf_nodes` are set. |Fix|
@@ -111,16 +107,6 @@ Support for Python 3.4 and below has been officially dropped.
   Previously the change was made, but silently. :issue:`11526` by
   :user:`William de Vazelhes<wdevazelhes>`.
 
-- |Fix| Fixed a bug in :class:`discriminant_analysis.LinearDiscriminantAnalysis`
-  where the predicted probabilities would be incorrectly computed in the
-  multiclass case. :issue:`6848`, by :user:`Agamemnon Krasoulis
-  <agamemnonc>` and `Guillaume Lemaitre <glemaitre>`.
-
-- |Fix| Fixed a bug in :class:`discriminant_analysis.LinearDiscriminantAnalysis`
-  where the predicted probabilities would be incorrectly computed with ``eigen``
-  solver. :issue:`11727`, by :user:`Agamemnon Krasoulis
-  <agamemnonc>`.
-
 :mod:`sklearn.dummy`
 ....................
 
 
@@ -22,7 +22,6 @@
 from .utils import check_array, check_X_y
 from .utils.validation import check_is_fitted
 from .utils.multiclass import check_classification_targets
-from .utils.extmath import softmax
 from .preprocessing import StandardScaler
 
 
@@ -339,6 +338,7 @@ class scatter). This solver supports both classification and
         self.explained_variance_ratio_ = np.sort(evals / np.sum(evals)
                                                  )[::-1][:self._max_components]
         evecs = evecs[:, np.argsort(evals)[::-1]]  # sort eigenvectors
+        evecs /= np.linalg.norm(evecs, axis=0)
 
         self.scalings_ = evecs
         self.coef_ = np.dot(self.means_, evecs).dot(evecs.T)
@@ -531,14 +531,14 @@ def predict_proba(self, X):
         C : array, shape (n_samples, n_classes)
             Estimated probabilities.
         """
-        check_is_fitted(self, 'classes_')
-
-        decision = self.decision_function(X)
-        if self.classes_.size == 2:
-            proba = expit(decision)
-            return np.vstack([1-proba, proba]).T
+        prob = self.decision_function(X)
+        expit(prob, out=prob)
+        if len(self.classes_) == 2:  # binary case
+            return np.column_stack([1 - prob, prob])
         else:
-            return softmax(decision)
+            # OvR normalization, like LibLinear's predict_probability
+            prob /= prob.sum(axis=1).reshape((prob.shape[0], -1))
+            return prob
 
     def predict_log_proba(self, X):
         """Estimate log probability.
 
@@ -2,9 +2,6 @@
 
 import pytest
 
-from numpy.testing import assert_allclose
-from scipy import linalg
-
 from sklearn.exceptions import ChangedBehaviorWarning
 from sklearn.utils import check_random_state
 from sklearn.utils.testing import (assert_array_equal, assert_no_warnings,
@@ -98,75 +95,6 @@ def test_lda_predict():
     assert_raises(ValueError, clf.fit, X, y)
 
 
-@pytest.mark.parametrize("n_classes", [2, 3])
-@pytest.mark.parametrize("solver", ["svd", "lsqr", "eigen"])
-def test_lda_predict_proba(solver, n_classes):
-    def generate_dataset(n_samples, centers, covariances, random_state=None):
-        """Generate a multivariate normal data given some centers and
-        covariances"""
-        rng = check_random_state(random_state)
-        X = np.vstack([rng.multivariate_normal(mean, cov,
-                                               size=n_samples // len(centers))
-                       for mean, cov in zip(centers, covariances)])
-        y = np.hstack([[clazz] * (n_samples // len(centers))
-                       for clazz in range(len(centers))])
-        return X, y
-
-    blob_centers = np.array([[0, 0], [-10, 40], [-30, 30]])[:n_classes]
-    blob_stds = np.array([[[10, 10], [10, 100]]] * len(blob_centers))
-    X, y = generate_dataset(
-        n_samples=90000, centers=blob_centers, covariances=blob_stds,
-        random_state=42
-    )
-    lda = LinearDiscriminantAnalysis(solver=solver, store_covariance=True,
-                                     shrinkage=None).fit(X, y)
-    # check that the empirical means and covariances are close enough to the
-    # one used to generate the data
-    assert_allclose(lda.means_, blob_centers, atol=1e-1)
-    assert_allclose(lda.covariance_, blob_stds[0], atol=1)
-
-    # implement the method to compute the probability given in The Elements
-    # of Statistical Learning (cf. p.127, Sect. 4.4.5 "Logistic Regression
-    # or LDA?")
-    precision = linalg.inv(blob_stds[0])
-    alpha_k = []
-    alpha_k_0 = []
-    for clazz in range(len(blob_centers) - 1):
-        alpha_k.append(
-            np.dot(precision,
-                   (blob_centers[clazz] - blob_centers[-1])[:, np.newaxis]))
-        alpha_k_0.append(
-            np.dot(- 0.5 * (blob_centers[clazz] +
-                            blob_centers[-1])[np.newaxis, :], alpha_k[-1]))
-
-    sample = np.array([[-22, 22]])
-
-    def discriminant_func(sample, coef, intercept, clazz):
-        return np.exp(intercept[clazz] + np.dot(sample, coef[clazz]))
-
-    prob = np.array([float(
-        discriminant_func(sample, alpha_k, alpha_k_0, clazz) /
-        (1 + sum([discriminant_func(sample, alpha_k, alpha_k_0, clazz)
-                  for clazz in range(n_classes - 1)]))) for clazz in range(
-                      n_classes - 1)])
-
-    prob_ref = 1 - np.sum(prob)
-
-    # check the consistency of the computed probability
-    # all probabilities should sum to one
-    prob_ref_2 = float(
-        1 / (1 + sum([discriminant_func(sample, alpha_k, alpha_k_0, clazz)
-                      for clazz in range(n_classes - 1)]))
-    )
-
-    assert prob_ref == pytest.approx(prob_ref_2)
-    # check that the probability of LDA are close to the theoretical
-    # probabilties
-    assert_allclose(lda.predict_proba(sample),
-                    np.hstack([prob, prob_ref])[np.newaxis],
-                    atol=1e-2)
-
-
 def test_lda_priors():
     # Test priors (negative priors)
     priors = np.array([0.5, -0.5])
@@ -301,7 +229,7 @@ def test_lda_scaling():
 
 
 def test_lda_store_covariance():
-    # Test for solver 'lsqr' and 'eigen'
+    # Test for slover 'lsqr' and 'eigen'
     # 'store_covariance' has no effect on 'lsqr' and 'eigen' solvers
     for solver in ('lsqr', 'eigen'):
         clf = LinearDiscriminantAnalysis(solver=solver).fit(X6, y6)
@@ -317,7 +245,7 @@ def test_lda_store_covariance():
             np.array([[0.422222, 0.088889], [0.088889, 0.533333]])
         )
 
-    # Test for SVD solver, the default is to not set the covariances_ attribute
+    # Test for SVD slover, the default is to not set the covariances_ attribute
     clf = LinearDiscriminantAnalysis(solver='svd').fit(X6, y6)
     assert not hasattr(clf, 'covariance_')