8000 [BUG] predict_proba should use the softmax function in the multinomia… · scikit-learn/scikit-learn@c3cfebe · GitHub
[go: up one dir, main page]

Skip to content

Commit c3cfebe

Browse files
committed
[BUG] predict_proba should use the softmax function in the multinomial case
1 parent 11d33bc commit c3cfebe

File tree

3 files changed

+44
-8
lines changed

3 files changed

+44
-8
lines changed

sklearn/linear_model/base.py

Lines changed: 24 additions & 8 deletions
Ori 8000 ginal file line numberDiff line numberDiff line change
@@ -238,16 +238,32 @@ def _predict_proba_lr(self, X):
238238
1. / (1. + np.exp(-self.decision_function(X)));
239239
multiclass is handled by normalizing that over all classes.
240240
"""
241+
from sklearn.linear_model.logistic import (
242+
LogisticRegression, LogisticRegressionCV)
243+
244+
calculate_ovr = True
241245
prob = self.decision_function(X)
242-
prob *= -1
243-
np.exp(prob, prob)
244-
prob += 1
245-
np.reciprocal(prob, prob)
246-
if len(prob.shape) == 1:
247-
return np.vstack([1 - prob, prob]).T
246+
binary = len(prob.shape) == 1
247+
if (isinstance(self, LogisticRegression) or
248+
isinstance(self, LogisticRegressionCV)) and (
249+
self.multi_class == "multinomial" and not binary):
250+
calculate_ovr = False
251+
if calculate_ovr:
252+
prob *= -1
253+
np.exp(prob, prob)
254+
prob += 1
255+
np.reciprocal(prob, prob)
256+
if binary:
257+
return np.vstack([1 - prob, prob]).T
258+
else:
259+
# OvR normalization, like LibLinear's predict_probability
260+
prob /= prob.sum(axis=1).reshape((prob.shape[0], -1))
261+
return prob
262+
248263
else:
249-
# OvR normalization, like LibLinear's predict_probability
250-
prob /= prob.sum(axis=1).reshape((prob.shape[0], -1))
264+
np.exp(prob, prob)
265+
sum_prob = np.sum(prob, axis=1).reshape((-1, 1))
266+
prob /= sum_prob
251267
return prob
252268

253269

sklearn/linear_model/logistic.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1088,6 +1088,14 @@ def predict_proba(self, X):
10881088
The returned estimates for all classes are ordered by the
10891089
label of classes.
10901090
1091+
For a multi_class problem, if multi_class is set to be "multinomial"
1092+
the softmax function is used to find the predicted probability of
1093+
each class.
1094+
Else use a one-vs-rest approach, i.e calculating the probability
1095+
of each class assuming it to be positive using th logistic function.
1096+
Normalize across all the classes at the end such that the sum of
1097+
probabilities is 1.
1098+
10911099
Parameters
10921100
----------
10931101
X : array-like, shape = [n_samples, n_features]

sklearn/linear_model/tests/test_logistic.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,3 +675,15 @@ def test_logreg_cv_penalty():
675675
lr = LogisticRegression(penalty="l1", C=1.0, solver='liblinear')
676676
lr.fit(X, y)
677677
assert_equal(np.count_nonzero(lr_cv.coef_), np.count_nonzero(lr.coef_))
678+
679+
680+
def test_logreg_predict_proba():
681+
X, y = make_classification(
682+
n_samples=10, n_features=20, random_state=0, n_classes=3, n_informative=10)
683+
clf = LogisticRegression(multi_class="multinomial", solver="lbfgs")
684+
clf.fit(X, y)
685+
assert_array_almost_equal(np.sum(clf.predict_proba(X), axis=1), np.ones(10))
686+
687+
clf = LogisticRegression(multi_class="multinomial", solver="lbfgs")
688+
clf.fit(X, y)
689+
assert_array_almost_equal(np.sum(clf.predict_proba(X), axis=1), np.ones(10))

0 commit comments

Comments
 (0)
0