diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
index 6f504a721ec75..313e55bdda9f4 100644
--- a/doc/whats_new/v0.21.rst
+++ b/doc/whats_new/v0.21.rst
@@ -17,6 +17,10 @@ parameters, may produce different models from the previous version. This often
 occurs due to changes in the modelling logic (bug fixes or enhancements), or in
 random sampling procedures.
 
+- :class:`discriminant_analysis.LinearDiscriminantAnalysis` for multiclass
+  classification. |Fix|
+- :class:`discriminant_analysis.LinearDiscriminantAnalysis` with 'eigen'
+  solver. |Fix|
 - :class:`linear_model.BayesianRidge` |Fix|
 - Decision trees and derived ensembles when both `max_depth` and
   `max_leaf_nodes` are set. |Fix|
@@ -107,6 +111,16 @@ Support for Python 3.4 and below has been officially dropped.
   Previously the change was made, but silently. :issue:`11526` by
   :user:`William de Vazelhes<wdevazelhes>`.
 
+- |Fix| Fixed a bug in :class:`discriminant_analysis.LinearDiscriminantAnalysis`
+  where the predicted probabilities would be incorrectly computed in the
+  multiclass case. :issue:`6848`, by :user:`Agamemnon Krasoulis
+  <agamemnonc>` and `Guillaume Lemaitre <glemaitre>`.
+
+- |Fix| Fixed a bug in :class:`discriminant_analysis.LinearDiscriminantAnalysis`
+  where the predicted probabilities would be incorrectly computed with ``eigen``
+  solver. :issue:`11727`, by :user:`Agamemnon Krasoulis
+  <agamemnonc>`.
+
 :mod:`sklearn.dummy`
 ....................
 
diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py
index 370b7fa809a8f..e710bc5045b30 100644
--- a/sklearn/discriminant_analysis.py
+++ b/sklearn/discriminant_analysis.py
@@ -22,6 +22,7 @@
 from .utils import check_array, check_X_y
 from .utils.validation import check_is_fitted
 from .utils.multiclass import check_classification_targets
+from .utils.extmath import softmax
 from .preprocessing import StandardScaler
 
 
@@ -338,7 +339,6 @@ class scatter). This solver supports both classification and
         self.explained_variance_ratio_ = np.sort(evals / np.sum(evals)
                                                  )[::-1][:self._max_components]
         evecs = evecs[:, np.argsort(evals)[::-1]]  # sort eigenvectors
-        evecs /= np.linalg.norm(evecs, axis=0)
 
         self.scalings_ = evecs
         self.coef_ = np.dot(self.means_, evecs).dot(evecs.T)
@@ -531,14 +531,14 @@ def predict_proba(self, X):
         C : array, shape (n_samples, n_classes)
             Estimated probabilities.
         """
-        prob = self.decision_function(X)
-        expit(prob, out=prob)
-        if len(self.classes_) == 2:  # binary case
-            return np.column_stack([1 - prob, prob])
+        check_is_fitted(self, 'classes_')
+
+        decision = self.decision_function(X)
+        if self.classes_.size == 2:
+            proba = expit(decision)
+            return np.vstack([1-proba, proba]).T
         else:
-            # OvR normalization, like LibLinear's predict_probability
-            prob /= prob.sum(axis=1).reshape((prob.shape[0], -1))
-            return prob
+            return softmax(decision)
 
     def predict_log_proba(self, X):
         """Estimate log probability.
diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py
index aafb744172703..3428f12b03306 100644
--- a/sklearn/tests/test_discriminant_analysis.py
+++ b/sklearn/tests/test_discriminant_analysis.py
@@ -2,6 +2,9 @@
 
 import pytest
 
+from numpy.testing import assert_allclose
+from scipy import linalg
+
 from sklearn.exceptions import ChangedBehaviorWarning
 from sklearn.utils import check_random_state
 from sklearn.utils.testing import (assert_array_equal, assert_no_warnings,
@@ -95,6 +98,75 @@ def test_lda_predict():
     assert_raises(ValueError, clf.fit, X, y)
 
 
+@pytest.mark.parametrize("n_classes", [2, 3])
+@pytest.mark.parametrize("solver", ["svd", "lsqr", "eigen"])
+def test_lda_predict_proba(solver, n_classes):
+    def generate_dataset(n_samples, centers, covariances, random_state=None):
+        """Generate a multivariate normal data given some centers and
+        covariances"""
+        rng = check_random_state(random_state)
+        X = np.vstack([rng.multivariate_normal(mean, cov,
+                                               size=n_samples // len(centers))
+                       for mean, cov in zip(centers, covariances)])
+        y = np.hstack([[clazz] * (n_samples // len(centers))
+                       for clazz in range(len(centers))])
+        return X, y
+
+    blob_centers = np.array([[0, 0], [-10, 40], [-30, 30]])[:n_classes]
+    blob_stds = np.array([[[10, 10], [10, 100]]] * len(blob_centers))
+    X, y = generate_dataset(
+        n_samples=90000, centers=blob_centers, covariances=blob_stds,
+        random_state=42
+    )
+    lda = LinearDiscriminantAnalysis(solver=solver, store_covariance=True,
+                                     shrinkage=None).fit(X, y)
+    # check that the empirical means and covariances are close enough to the
+    # one used to generate the data
+    assert_allclose(lda.means_, blob_centers, atol=1e-1)
+    assert_allclose(lda.covariance_, blob_stds[0], atol=1)
+
+    # implement the method to compute the probability given in The Elements
+    # of Statistical Learning (cf. p.127, Sect. 4.4.5 "Logistic Regression
+    # or LDA?")
+    precision = linalg.inv(blob_stds[0])
+    alpha_k = []
+    alpha_k_0 = []
+    for clazz in range(len(blob_centers) - 1):
+        alpha_k.append(
+            np.dot(precision,
+                   (blob_centers[clazz] - blob_centers[-1])[:, np.newaxis]))
+        alpha_k_0.append(
+            np.dot(- 0.5 * (blob_centers[clazz] +
+                            blob_centers[-1])[np.newaxis, :], alpha_k[-1]))
+
+    sample = np.array([[-22, 22]])
+
+    def discriminant_func(sample, coef, intercept, clazz):
+        return np.exp(intercept[clazz] + np.dot(sample, coef[clazz]))
+
+    prob = np.array([float(
+        discriminant_func(sample, alpha_k, alpha_k_0, clazz) /
+        (1 + sum([discriminant_func(sample, alpha_k, alpha_k_0, clazz)
+                  for clazz in range(n_classes - 1)]))) for clazz in range(
+                      n_classes - 1)])
+
+    prob_ref = 1 - np.sum(prob)
+
+    # check the consistency of the computed probability
+    # all probabilities should sum to one
+    prob_ref_2 = float(
+        1 / (1 + sum([discriminant_func(sample, alpha_k, alpha_k_0, clazz)
+                      for clazz in range(n_classes - 1)]))
+    )
+
+    assert prob_ref == pytest.approx(prob_ref_2)
+    # check that the probability of LDA are close to the theoretical
+    # probabilties
+    assert_allclose(lda.predict_proba(sample),
+                    np.hstack([prob, prob_ref])[np.newaxis],
+                    atol=1e-2)
+
+
 def test_lda_priors():
     # Test priors (negative priors)
     priors = np.array([0.5, -0.5])
@@ -229,7 +301,7 @@ def test_lda_scaling():
 
 
 def test_lda_store_covariance():
-    # Test for slover 'lsqr' and 'eigen'
+    # Test for solver 'lsqr' and 'eigen'
     # 'store_covariance' has no effect on 'lsqr' and 'eigen' solvers
     for solver in ('lsqr', 'eigen'):
         clf = LinearDiscriminantAnalysis(solver=solver).fit(X6, y6)
@@ -245,7 +317,7 @@ def test_lda_store_covariance():
             np.array([[0.422222, 0.088889], [0.088889, 0.533333]])
         )
 
-    # Test for SVD slover, the default is to not set the covariances_ attribute
+    # Test for SVD solver, the default is to not set the covariances_ attribute
     clf = LinearDiscriminantAnalysis(solver='svd').fit(X6, y6)
     assert not hasattr(clf, 'covariance_')