scikit-learn
diff --git a/‎sklearn/linear_model/base.py
Lines changed: 8 additions & 24 deletions b/‎sklearn/linear_model/base.py
Lines changed: 8 additions & 24 deletions
diff --git a/‎sklearn/linear_model/logistic.py
Lines changed: 12 additions & 5 deletions b/‎sklearn/linear_model/logistic.py
Lines changed: 12 additions & 5 deletions
@@ -238,32 +238,16 @@ def _predict_proba_lr(self, X):
         1. / (1. + np.exp(-self.decision_function(X)));
         multiclass is handled by normalizing that over all classes.
         """
-        from sklearn.linear_model.logistic import (
-            LogisticRegression, LogisticRegressionCV)
-        
-        calculate_ovr = True
         prob = self.decision_function(X)
-        binary = len(prob.shape) == 1
-        if (isinstance(self, LogisticRegression) or
-            isinstance(self, LogisticRegressionCV)) and (
-                self.multi_class == "multinomial" and not binary):
-            calculate_ovr = False
-        if calculate_ovr:
-            prob *= -1
-            np.exp(prob, prob)
-            prob += 1
-            np.reciprocal(prob, prob)
-            if binary:
-                return np.vstack([1 - prob, prob]).T
-            else:
-                # OvR normalization, like LibLinear's predict_probability
-                prob /= prob.sum(axis=1).reshape((prob.shape[0], -1))
-                return prob
-
+        prob *= -1
+        np.exp(prob, prob)
+        prob += 1
+        np.reciprocal(prob, prob)
+        if prob.ndim == 1:
+            return np.vstack([1 - prob, prob]).T
         else:
-            np.exp(prob, prob)
-            sum_prob = np.sum(prob, axis=1).reshape((-1, 1))
-            prob /= sum_prob
+            # OvR normalization, like LibLinear's predict_probability
+            prob /= prob.sum(axis=1).reshape((prob.shape[0], -1))
             return prob
 
 
 
@@ -1091,10 +1091,9 @@ def predict_proba(self, X):
         For a multi_class problem, if multi_class is set to be "multinomial"
         the softmax function is used to find the predicted probability of
         each class.
-        Else use a one-vs-rest approach, i.e calculating the probability
-        of each class assuming it to be positive using th logistic function.
-        Normalize across all the classes at the end such that the sum of
-        probabilities is 1.
+        Else use a one-vs-rest approach, i.e calculate the probability
+        of each class assuming it to be positive using the logistic function.
+        and normalize these values across all the classes.
 
         Parameters
         ----------
@@ -1106,7 +1105,15 @@ def predict_proba(self, X):
             Returns the probability of the sample for each class in the model,
             where classes are ordered as they are in ``self.classes_``.
         """
-        return self._predict_proba_lr(X)
+        calculate_ovr = self.coef_.shape[0] == 1 or self.multi_class == "ovr"
+        if calculate_ovr:
+            return super(LogisticRegression, self)._predict_proba_lr(X)
+        else:
+            prob = self.decision_function(X)
+            np.exp(prob, prob)
+            sum_prob = np.sum(prob, axis=1).reshape((-1, 1))
+            prob /= sum_prob
+            return prob
 
     def predict_log_proba(self, X):
         """Log of probability estimates.