8000 FIX Fixed reshaping attributes of LogisticRegressionCV (#15044) · NeuroDataDesign/scikit-learn@220e146 · GitHub
[go: up one dir, main page]

Skip to content

Commit 220e146

Browse files
NicolasHugglemaitre
authored andcommitted
FIX Fixed reshaping attributes of LogisticRegressionCV (scikit-learn#15044)
1 parent 0eebade commit 220e146

File tree

3 files changed

+53
-3
lines changed

3 files changed

+53
-3
lines changed

doc/whats_new/v0.22.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,10 @@ Changelog
343343
now correctly scores when `cv=None`.
344344
:pr:`14864` by :user:`Venkatachalam N <venkyyuvy>`.
345345

346+
- |FIX| Fixed a bug in :class:`linear_model.LogisticRegressionCV` where the
347+
``scores_``, ``n_iter_`` and ``coefs_paths_`` attribute would have a wrong
348+
ordering with ``penalty='elastic-net'``. :pr:`15044` by `Nicolas Hug`_
349+
346350
- |FIX| :class:`linear_model.MultiTaskLassoCV` and
347351
:class:`linear_model.MultiTaskElasticNetCV` with X of dtype int
348352
and `fit_intercept=True`.

sklearn/linear_model/logistic.py

Lines changed: 18 additions & 3 deletions
10000
Original file line numberDiff line numberDiff line change
@@ -2201,14 +2201,29 @@ def fit(self, X, y, sample_weight=None):
22012201
# if elasticnet was used, add the l1_ratios dimension to some
22022202
# attributes
22032203
if self.l1_ratios is not None:
2204+
# with n_cs=2 and n_l1_ratios=3
2205+
# the layout of scores is
2206+
# [c1, c2, c1, c2, c1, c2]
2207+
# l1_1 , l1_2 , l1_3
2208+
# To get a 2d array with the following layout
2209+
# l1_1, l1_2, l1_3
2210+
# c1 [[ . , . , . ],
2211+
# c2 [ . , . , . ]]
2212+
# We need to first reshape and then transpose.
2213+
# The same goes for the other arrays
22042214
for cls, coefs_path in self.coefs_paths_.items():
22052215
self.coefs_paths_[cls] = coefs_path.reshape(
2206-
(len(folds), self.Cs_.size, self.l1_ratios_.size, -1))
2216+
(len(folds), self.l1_ratios_.size, self.Cs_.size, -1))
2217+
self.coefs_paths_[cls] = np.transpose(self.coefs_paths_[cls],
2218+
(0, 2, 1, 3))
22072219
for cls, score in self.scores_.items():
22082220
self.scores_[cls] = score.reshape(
2209-
(len(folds), self.Cs_.size, self.l1_ratios_.size))
2221+
(len(folds), self.l1_ratios_.size, self.Cs_.size))
2222+
self.scores_[cls] = np.transpose(self.scores_[cls], (0, 2, 1))
2223+
22102224
self.n_iter_ = self.n_iter_.reshape(
2211-
(-1, len(folds), self.Cs_.size, self.l1_ratios_.size))
2225+
(-1, len(folds), self.l1_ratios_.size, self.Cs_.size))
2226+
self.n_iter_ = np.transpose(self.n_iter_, (0, 1, 3, 2))
22122227

22132228
return self
22142229

sklearn/linear_model/tests/test_logistic.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from sklearn.model_selection import StratifiedKFold
1414
from sklearn.model_selection import GridSearchCV
1515
from sklearn.model_selection import train_test_split
16+
from sklearn.model_selection import cross_val_score
1617
from sklearn.preprocessing import LabelEncoder, StandardScaler
1718
from sklearn.utils import compute_class_weight, _IS_32BIT
1819
from sklearn.utils.testing import assert_almost_equal
@@ -1760,3 +1761,33 @@ def test_penalty_none(solver):
17601761
"LogisticRegressionCV",
17611762
lr.fit, X, y
17621763
)
1764+
1765+
1766+
def test_scores_attribute_layout_elasticnet():
1767+
# Non regression test for issue #14955.
1768+
# when penalty is elastic net the scores_ attribute has shape
1769+
# (n_classes, n_Cs, n_l1_ratios)
1770+
# We here make sure that the second dimension indeed corresponds to Cs and
1771+
# the third dimension corresponds to l1_ratios.
1772+
1773+
X, y = make_classification(n_samples=1000, random_state=0)
1774+
cv = StratifiedKFold(n_splits=5, shuffle=False)
1775+
1776+
l1_ratios = [.1, .9]
1777+
Cs = [.1, 1, 10]
1778+
1779+
lrcv = LogisticRegressionCV(penalty='elasticnet', solver='saga',
1780+
l1_ratios=l1_ratios, Cs=Cs, cv=cv,
1781+
random_state=0)
1782+
lrcv.fit(X, y)
1783+
1784+
avg_scores_lrcv = lrcv.scores_[1].mean(axis=0) # average over folds
1785+
1786+
for i, C in enumerate(Cs):
1787+
for j, l1_ratio in enumerate(l1_ratios):
1788+
1789+
lr = LogisticRegression(penalty='elasticnet', solver='saga', C=C,
1790+
l1_ratio=l1_ratio, random_state=0)
1791+
1792+
avg_score_lr = cross_val_score(lr, X, y, cv=cv).mean()
1793+
assert avg_scores_lrcv[i, j] == pytest.approx(avg_score_lr)

0 commit comments

Comments
 (0)
0