scikit-learn
diff --git a/‎sklearn/linear_model/base.py
Lines changed: 24 additions & 8 deletions b/‎sklearn/linear_model/base.py
Lines changed: 24 additions & 8 deletions
diff --git a/‎sklearn/linear_model/logistic.py
Lines changed: 8 additions & 0 deletions b/‎sklearn/linear_model/logistic.py
Lines changed: 8 additions & 0 deletions
diff --git a/‎sklearn/linear_model/tests/test_logistic.py
Lines changed: 12 additions & 0 deletions b/‎sklearn/linear_model/tests/test_logistic.py
Lines changed: 12 additions & 0 deletions
@@ -238,16 +238,32 @@ def _predict_proba_lr(self, X):
         1. / (1. + np.exp(-self.decision_function(X)));
         multiclass is handled by normalizing that over all classes.
         """
+        from sklearn.linear_model.logistic import (
+            LogisticRegression, LogisticRegressionCV)
+        
+        calculate_ovr = True
         prob = self.decision_function(X)
-        prob *= -1
-        np.exp(prob, prob)
-        prob += 1
-        np.reciprocal(prob, prob)
-        if len(prob.shape) == 1:
-            return np.vstack([1 - prob, prob]).T
+        binary = len(prob.shape) == 1
+        if (isinstance(self, LogisticRegression) or
+            isinstance(self, LogisticRegressionCV)) and (
+                self.multi_class == "multinomial" and not binary):
+            calculate_ovr = False
+        if calculate_ovr:
+            prob *= -1
+            np.exp(prob, prob)
+            prob += 1
+            np.reciprocal(prob, prob)
+            if binary:
+                return np.vstack([1 - prob, prob]).T
+            else:
+                # OvR normalization, like LibLinear's predict_probability
+                prob /= prob.sum(axis=1).reshape((prob.shape[0], -1))
+                return prob
+
         else:
-            # OvR normalization, like LibLinear's predict_probability
-            prob /= prob.sum(axis=1).reshape((prob.shape[0], -1))
+            np.exp(prob, prob)
+            sum_prob = np.sum(prob, axis=1).reshape((-1, 1))
+            prob /= sum_prob
             return prob
 
 
 
@@ -1088,6 +1088,14 @@ def predict_proba(self, X):
         The returned estimates for all classes are ordered by the
         label of classes.
 
+        For a multi_class problem, if multi_class is set to be "multinomial"
+        the softmax function is used to find the predicted probability of
+        each class.
+        Else use a one-vs-rest approach, i.e calculating the probability
+        of each class assuming it to be positive using th logistic function.
+        Normalize across all the classes at the end such that the sum of
+        probabilities is 1.
+
         Parameters
         ----------
         X : array-like, shape = [n_samples, n_features]
 
@@ -675,3 +675,15 @@ def test_logreg_cv_penalty():
     lr = LogisticRegression(penalty="l1", C=1.0, solver='liblinear')
     lr.fit(X, y)
     assert_equal(np.count_nonzero(lr_cv.coef_), np.count_nonzero(lr.coef_))
+
+
+def test_logreg_predict_proba():
+    X, y = make_classification(
+        n_samples=10, n_features=20, random_state=0, n_classes=3, n_informative=10)
+    clf = LogisticRegression(multi_class="multinomial", solver="lbfgs")
+    clf.fit(X, y)
+    assert_array_almost_equal(np.sum(clf.predict_proba(X), axis=1), np.ones(10))
+
+    clf = LogisticRegression(multi_class="multinomial", solver="lbfgs")
+    clf.fit(X, y)
+    assert_array_almost_equal(np.sum(clf.predict_proba(X), axis=1), np.ones(10))