8000 ENH Sample weights for ElasticNet (#15436) · gio8tisu/scikit-learn@e5b18f5 · GitHub
[go: up one dir, main page]

Skip to content

Commit e5b18f5

Browse files
Christian Lorentzengio8tisu
authored andcommitted
ENH Sample weights for ElasticNet (scikit-learn#15436)
1 parent 44bbff1 commit e5b18f5

File tree

8 files changed

+271
-37
lines changed

8 files changed

+271
-37
lines changed

doc/whats_new/v0.23.rst

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,10 @@ Changelog
5454
``tol=0`` as with the default ``algorithm="full"``. :pr:`16075` by
5555
:user:`Erich Schubert <kno10>`.
5656

57-
- |Efficiency| :class:`cluster.Birch` implementation of the predict method
58-
avoids high memory footprint by calculating the distances matrix using
57+
- |Efficiency| :class:`cluster.Birch` implementation of the predict method
58+
avoids high memory footprint by calculating the distances matrix using
5959
a chunked scheme.
60-
:pr:`16149` by :user:`Jeremie du Boisberranger <jeremiedbb>` and
60+
:pr:`16149` by :user:`Jeremie du Boisberranger <jeremiedbb>` and
6161
:user:`Alex Shacked <alexshacked>`.
6262

6363
:mod:`sklearn.compose`
@@ -70,7 +70,7 @@ Changelog
7070
:mod:`sklearn.datasets`
7171
.......................
7272

73-
- |Enhancement| Added ``return_centers`` parameter in
73+
- |Enhancement| Added ``return_centers`` parameter in
7474
:func:`datasets.make_blobs`, which can be used to return
7575
centers for each cluster.
7676
:pr:`15709` by :user:`<shivamgargsya>` and
@@ -154,10 +154,14 @@ Changelog
154154

155155
- |Enhancement| :func:`gaussian_process.kernels.Matern` returns the RBF kernel when ``nu=np.inf``.
156156
:pr:`15503` by :user:`Sam Dixon <sam-dixon>`.
157-
157+
158158
:mod:`sklearn.linear_model`
159159
...........................
160160

161+
- |Feature| Support of `sample_weight` in :class:`linear_model.ElasticNet` and
162+
:class:`linear_model:Lasso` for dense feature matrix `X`.
163+
:pr:`15436` by :user:`Christian Lorentzen <lorentzenchr>`.
164+
161165
- |Fix| Fixed a bug where if a `sample_weight` parameter was passed to the fit
162166
method of :class:`linear_model.RANSACRegressor`, it would not be passed to
163167
the wrapped `base_estimator` during the fitting of the final model.
@@ -196,7 +200,7 @@ Changelog
196200
......................
197201

198202
- |Fix| Fixed a bug in :func:`metrics.mean_squared_error` to not ignore
199-
argument `squared` when argument `multioutput='raw_values'`.
203+
argument `squared` when argument `multioutput='raw_values'`.
200204
:pr:`16323` by :user:`Rushabh Vasani <rushabh-v>`
201205

202206
- |Fix| Fixed a bug in :func:`metrics.mutual_info_score` where negative
@@ -230,10 +234,10 @@ Changelog
230234
:mod:`sklearn.neighbors`
231235
..............................
232236

233-
- |Fix| Fix a bug which converted a list of arrays into a 2-D object
237+
- |Fix| Fix a bug which converted a list of arrays into a 2-D object
234238
array instead of a 1-D array containing NumPy arrays. This bug
235239
was affecting :meth:`neighbors.NearestNeighbors.radius_neighbors`.
236-
:pr:`16076` by :user:`Guillaume Lemaitre <glemaitre>` and
240+
:pr:`16076` by :user:`Guillaume Lemaitre <glemaitre>` and
237241
:user:`Alex Shacked <alexshacked>`.
238242

239243
:mod:`sklearn.neural_network`

sklearn/compose/tests/test_target.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
from sklearn.pipeline import Pipeline
1818

19-
from sklearn.linear_model import LinearRegression, Lasso
19+
from sklearn.linear_model import LinearRegression, OrthogonalMatchingPursuit
2020

2121
from sklearn import datasets
2222

@@ -37,7 +37,7 @@ def test_transform_target_regressor_error():
3737
regr.fit(X, y)
3838
# fit with sample_weight with a regressor which does not support it
3939
sample_weight = np.ones((y.shape[0],))
40-
regr = TransformedTargetRegressor(regressor=Lasso(),
40+
regr = TransformedTargetRegressor(regressor=OrthogonalMatchingPursuit(),
4141
transformer=StandardScaler())
4242
with pytest.raises(TypeError, match=r"fit\(\) got an unexpected "
4343
"keyword argument 'sample_weight'"):

sklearn/linear_model/_base.py

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
Generalized Linear models.
2+
Generalized Linear Models.
33
"""
44

55
# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
@@ -100,7 +100,8 @@ def make_dataset(X, y, sample_weight, random_state=None):
100100

101101
def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
102102
sample_weight=None, return_mean=False, check_input=True):
103-
"""
103+
"""Center and scale data.
104+
104105
Centers data to have mean zero along axis 0. If fit_intercept=False or if
105106
the X is a sparse matrix, no centering is done, but normalization can still
106107
be applied. The function returns the statistics necessary to reconstruct
@@ -180,7 +181,16 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
180181
# sample_weight makes the refactoring tricky.
181182

182183
def _rescale_data(X, y, sample_weight):
183-
"""Rescale data so as to support sample_weight"""
184+
"""Rescale data sample-wise by square root of sample_weight.
185+
186+
For many linear models, this enables easy support for sample_weight.
187+
188+
Returns
189+
-------
190+
X_rescaled : {array-like, sparse matrix}
191+
192+
y_rescaled : {array-like, sparse matrix}
193+
"""
184194
n_samples = X.shape[0]
185195
sample_weight = np.asarray(sample_weight)
186196
if sample_weight.ndim == 0:
@@ -540,8 +550,15 @@ def rmatvec(b):
540550

541551

542552
def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy,
543-
check_input=True):
544-
"""Aux function used at beginning of fit in linear models"""
553+
check_input=True, sample_weight=None):
554+
"""Aux function used at beginning of fit in linear models
555+
556+
Parameters
557+
----------
558+
order : 'F', 'C' or None, default=None
559+
Whether X and y will be forced to be fortran or c-style. Only relevant
560+
if sample_weight is not None.
561+
"""
545562
n_samples, n_features = X.shape
546563

547564
if sparse.isspmatrix(X):
@@ -554,9 +571,11 @@ def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy,
554571
# copy was done in fit if necessary
555572
X, y, X_offset, y_offset, X_scale = _preprocess_data(
556573
X, y, fit_intercept=fit_intercept, normalize=normalize, copy=copy,
557-
check_input=check_input)
574+
check_input=check_input, sample_weight=sample_weight)
575+
if sample_weight is not None:
576+
X, y = _rescale_data(X, y, sample_weight=sample_weight)
558577
if hasattr(precompute, '__array__') and (
559-
fit_intercept and not np.allclose(X_offset, np.zeros(n_features)) or
578+
fit_intercept and not np.allclose(X_offset, np.zeros(n_features)) or
560579
normalize and not np.allclose(X_scale, np.ones(n_features))):
561580
warnings.warn("Gram matrix was provided but X was centered"
562581
" to fit intercept, "

sklearn/linear_model/_coordinate_descent.py

Lines changed: 87 additions & 7 deletions
F438
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import sys
99
import warnings
10+
import numbers
1011
from abc import ABCMeta, abstractmethod
1112

1213
import numpy as np
@@ -20,13 +21,57 @@
2021
from ..utils.validation import check_random_state
2122
from ..model_selection import check_cv
2223
from ..utils.extmath import safe_sparse_dot
23-
from ..utils.fixes import _joblib_parallel_args
24-
from ..utils.validation import check_is_fitted
24+
from ..utils.fixes import _astype_copy_false, _joblib_parallel_args
25+
from ..utils.validation import check_is_fitted, _check_sample_weight
2526
from ..utils.validation import column_or_1d
2627

2728
from . import _cd_fast as cd_fast
2829

2930

31+
def _set_order(X, y, order='C'):
32+
"""Change the order of X and y if necessary.
33+
34+
Parameters
35+
----------
36+
X : {array-like, sparse matrix} of shape (n_samples, n_features)
37+
Training data.
38+
39+
y : ndarray of shape (n_samples,)
40+
Target values.
41+
42+
order : {None, 'C', 'F'}
43+
If 'C', dense arrays are returned as C-ordered, sparse matrices in csr
44+
format. If 'F', dense arrays are return as F-ordered, sparse matrices
45+
in csc format.
46+
47+
Returns
48+
-------
49+
X : {array-like, sparse matrix} of shape (n_samples, n_features)
50+
Training data with guaranteed order.
51+
52+
y : ndarray of shape (n_samples,)
53+
Target values with guaranteed order.
54+
"""
55+
if order not in [None, 'C', 'F']:
56+
raise ValueError("Unknown value for order. Got {} instead of "
57+
"None, 'C' or 'F'.".format(order))
58+
sparse_X = sparse.issparse(X)
59+
sparse_y = sparse.issparse(y)
60+
if order is not None:
61+
sparse_format = "csc" if order == "F" else "csr"
62+
if sparse_X:
63+
# As of scipy 1.1.0, new argument copy=False by default.
64+
# This is what we want.
65+
X = X.asformat(sparse_format, **_astype_copy_false(X))
66+
else:
67+
X = np.asarray(X, order=order)
68+
if sparse_y:
69+
y = y.asformat(sparse_format)
70+
else:
71+
y = np.asarray(y, order=order)
72+
return X, y
73+
74+
3075
###############################################################################
3176
# Paths functions
3277

@@ -661,7 +706,7 @@ def __init__(self, alpha=1.0, l1_ratio=0.5, fit_intercept=True,
661706
self.random_state = random_state
662707
self.selection = selection
663708

664-
def fit(self, X, y, check_input=True):
709+
def fit(self, X, y, sample_weight=None, check_input=True):
665710
"""Fit model with coordinate descent.
666711
667712
Parameters
@@ -673,6 +718,9 @@ def fit(self, X, y, check_input=True):
673718
(n_samples, n_targets)
674719
Target. Will be cast to X's dtype if necessary
675720
721+
sample_weight : float or array-like of shape (n_samples,), default=None
722+
Sample weight.
723+
676724
check_input : bool, default=True
677725
Allow to bypass several input checking.
678726
Don't use this parameter unless you know what you do.
@@ -709,18 +757,49 @@ def fit(self, X, y, check_input=True):
709757
y = check_array(y, order='F', copy=False, dtype=X.dtype.type,
710758
ensure_2d=False)
711759

712-
# Ensure copying happens only once, don't do it again if done above
760+
n_samples, n_features = X.shape
761+
alpha = self.alpha
762+
763+
if isinstance(sample_weight, numbers.Number):
764+
sample_weight = None
765+
if sample_weight is not None:
766+
if check_input:
767+
if sparse.issparse(X):
768+
raise ValueError("Sample weights do not (yet) support "
769+
"sparse matrices.")
770+
sample_weight = _check_sample_weight(sample_weight, X,
771+
dtype=X.dtype)
772+
# simplify things by rescaling sw to sum up to n_samples
773+
# => np.average(x, weights=sw) = np.mean(sw * x)
774+
sample_weight *= (n_samples / np.sum(sample_weight))
775+
# Objective function is:
776+
# 1/2 * np.average(squared error, weights=sw) + alpha * penalty
777+
# but coordinate descent minimizes:
778+
# 1/2 * sum(squared error) + alpha * penalty
779+
# enet_path therefore sets alpha = n_samples * alpha
780+
# With sw, enet_path should set alpha = sum(sw) * alpha
781+
# Therefore, we rescale alpha = sum(sw) / n_samples * alpha
782+
# Note: As we rescaled sample_weights to sum up to n_samples,
783+
# we don't need this
784+
# alpha *= np.sum(sample_weight) / n_samples
785+
786+
# Ensure copying happens only once, don't do it again if done above.
787+
# X and y will be rescaled if sample_weight is not None, order='F'
788+
# ensures that the returned X and y are still F-contiguous.
713789
should_copy = self.copy_X and not X_copied
714790
X, y, X_offset, y_offset, X_scale, precompute, Xy = \
715791
_pre_fit(X, y, None, self.precompute, self.normalize,
716792
self.fit_intercept, copy=should_copy,
717-
check_input=check_input)
793+
check_input=check_input, sample_weight=sample_weight)
794+
# coordinate descent needs F-ordered arrays and _pre_fit might have
795+
# called _rescale_data
796+
if check_input or sample_weight is not None:
797+
X, y = _set_order(X, y, order='F')
718798
if y.ndim == 1:
719799
y = y[:, np.newaxis]
720800
if Xy is not None and Xy.ndim == 1:
721801
Xy = Xy[:, np.newaxis]
722802

723-
n_samples, n_features = X.shape
724803
n_targets = y.shape[1]
725804

726805
if self.selection not in ['cyclic', 'random']:
@@ -745,7 +824,7 @@ def fit(self, X, y, check_input=True):
745824
_, this_coef, this_dual_gap, this_iter = \
746825
self.path(X, y[:, k],
747826
l1_ratio=self.l1_ratio, eps=None,
748-
n_alphas=None, alphas=[self.alpha],
827+
n_alphas=None, alphas=[alpha],
749828
precompute=precompute, Xy=this_Xy,
750829
fit_intercept=False, normalize=False, copy_X=True,
751830
verbose=False, tol=self.tol, positive=self.positive,
@@ -1397,6 +1476,7 @@ def __init__(self, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True,
13971476
def _more_tags(self):
13981477
return {'multioutput': False}
13991478

1479+
14001480
class ElasticNetCV(RegressorMixin, LinearModelCV):
14011481
"""Elastic Net model with iterative fitting along a regularization path.
14021482

sklearn/linear_model/tests/test_base.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -131,11 +131,11 @@ def test_fit_intercept():
131131
lr3_with_intercept = LinearRegression().fit(X3, y)
132132

133133
assert (lr2_with_intercept.coef_.shape ==
134-
lr2_without_intercept.coef_.shape)
134+
lr2_without_intercept.coef_.shape)
135135
assert (lr3_with_intercept.coef_.shape ==
136-
lr3_without_intercept.coef_.shape)
136+
lr3_without_intercept.coef_.shape)
137137
assert (lr2_without_intercept.coef_.ndim ==
138-
lr3_without_intercept.coef_.ndim)
138+
lr3_without_intercept.coef_.ndim)
139139

140140

141141
def test_linear_regression_sparse(random_state=0):
@@ -451,16 +451,23 @@ def test_dtype_preprocess_data():
451451
assert_array_almost_equal(X_norm_32, X_norm_64)
452452

453453

454-
def test_rescale_data():
454+
@pytest.mark.parametrize('n_targets', [None, 2])
455+
def test_rescale_data_dense(n_targets):
455456
n_samples = 200
456457
n_features = 2
457458

458459
sample_weight = 1.0 + rng.rand(n_samples)
459460
X = rng.rand(n_samples, n_features)
460-
y = rng.rand(n_samples)
461+
if n_targets is None:
462+
y = rng.rand(n_samples)
463+
else:
464+
y = rng.rand(n_samples, n_targets)
461465
rescaled_X, rescaled_y = _rescale_data(X, y, sample_weight)
462466
rescaled_X2 = X * np.sqrt(sample_weight)[:, np.newaxis]
463-
rescaled_y2 = y * np.sqrt(sample_weight)
467+
if n_targets is None:
468+
rescaled_y2 = y * np.sqrt(sample_weight)
469+
else:
470+
rescaled_y2 = y * np.sqrt(sample_weight)[:, np.newaxis]
464471
assert_array_almost_equal(rescaled_X, rescaled_X2)
465472
assert_array_almost_equal(rescaled_y, rescaled_y2)
466473

0 commit comments

Comments
 (0)
0