8000 Added check_input in Lasso for DictLearning performance · scikit-learn/scikit-learn@77b7012 · GitHub
[go: up one dir, main page]

Skip to content

Commit 77b7012

Browse files
author
Arthur Mensch
committed
Added check_input in Lasso for DictLearning performance
1 parent b3694b1 commit 77b7012

File tree

5 files changed

+120
-34
lines changed

5 files changed

+120
-34
lines changed

doc/whats_new.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,10 @@ Enhancements
105105
with ``n_jobs > 1`` used with a large grid of parameters on a small
106106
dataset. By `Vlad Niculae`_, `Olivier Grisel`_ and `Loic Esteve`_.
107107

108+
- Improved speed (3 times per iteration) of
109+
:class:`decomposition.DictLearning` with coordinate descent method
110+
from :class:`linear_model.Lasso`. By `Arthur Mensch`_.
111+
108112

109113
Bug fixes
110114
.........

sklearn/decomposition/dict_learning.py

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -107,10 +107,10 @@ def _sparse_encode(X, dictionary, gram, cov=None, algorithm='lasso_lars',
107107

108108
elif algorithm == 'lasso_cd':
109109
alpha = float(regularization) / n_features # account for scaling
110-
clf = Lasso(alpha=alpha, fit_intercept=False, precompute=gram,
111-
max_iter=max_iter, warm_start=True)
110+
clf = Lasso(alpha=alpha, fit_intercept=False, normalize=False,
111+
precompute=gram, max_iter=max_iter, warm_start=True)
112112
clf.coef_ = init
113-
clf.fit(dictionary.T, X.T)
113+
clf.fit(dictionary.T, X.T, check_input=False)
114114
new_code = clf.coef_
115115

116116
elif algorithm == 'lars':
@@ -224,8 +224,10 @@ def sparse_encode(X, dictionary, gram=None, cov=None, algorithm='lasso_lars',
224224
n_components = dictionary.shape[0]
225225

226226
if gram is None and algorithm != 'threshold':
227-
gram = np.dot(dictionary, dictionary.T)
228-
if cov is None:
227+
# Transposing product to ensure Fortran ordering
228+
gram = np.dot(dictionary, dictionary.T).T
229+
230+
if cov is None and algorithm != 'lasso_cd':
229231
copy_cov = False
230232
cov = np.dot(dictionary, X.T)
231233

@@ -239,18 +241,27 @@ def sparse_encode(X, dictionary, gram=None, cov=None, algorithm='lasso_lars',
239241
regularization = 1.
240242

241243
if n_jobs == 1 or algorithm == 'threshold':
242-
return _sparse_encode(X, dictionary, gram, cov=cov,
244+
code = _sparse_encode(X,
245+
dictionary, gram, cov=cov,
243246
algorithm=algorithm,
244247
regularization=regularization, copy_cov=copy_cov,
245-
init=init, max_iter=max_iter)
248+
init=init,
249+
max_iter=max_iter)
250+
# This ensure that dimensionality of code is always 2,
251+
# consistant with the case n_jobs > 1
252+
if code.ndim == 1:
253+
code = code[np.newaxis, :]
254+
return code
246255

247256
# Enter parallel code block
248257
code = np.empty((n_samples, n_components))
249258
slices = list(gen_even_slices(n_samples, _get_n_jobs(n_jobs)))
250259

251260
code_views = Parallel(n_jobs=n_jobs)(
252261
delayed(_sparse_encode)(
253-
X[this_slice], dictionary, gram, cov[:, this_slice], algorithm,
262+
X[this_slice], dictionary, gram,
263+
cov[:, this_slice] if cov is not None else None,
264+
algorithm,
254265
regularization=regularization, copy_cov=copy_cov,
255266
init=init[this_slice] if init is not None else None,
256267
max_iter=max_iter)
@@ -650,6 +661,10 @@ def dict_learning_online(X, n_components=2, alpha=1, n_iter=100,
650661
else:
651662
X_train = X
652663

664+
dictionary = check_array(dictionary, order='F', dtype=np.float64,
665+
copy=False)
666+
X_train = check_array(X_train, order='C', dtype=np.float64, copy=False)
667+
653668
batches = gen_batches(n_samples, batch_size)
654669
batches = itertools.cycle(batches)
655670

@@ -675,7 +690,6 @@ def dict_learning_online(X, n_components=2, alpha=1, n_iter=100,
675690
if verbose > 10 or ii % ceil(100. / verbose) == 0:
676691
print ("Iteration % 3i (elapsed time: % 3is, % 4.1fmn)"
677692
% (ii, dt, dt / 60))
678-
679693
this_code = sparse_encode(this_X, dictionary.T, algorithm=method,
680694
alpha=alpha, n_jobs=n_jobs).T
681695

sklearn/linear_model/base.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from __future__ import division
1616
from abc import ABCMeta, abstractmethod
1717
import numbers
18+
import warnings
1819

1920
import numpy as np
2021
import scipy.sparse as sp
@@ -77,7 +78,7 @@ def sparse_center_data(X, y, fit_intercept, normalize=False):
7778
return X, y, X_mean, y_mean, X_std
7879

7980

80-
def center_data(X, y, fit_intercept, normalize=False, copy=True,
81+
def center_data(X, y, fit_intercept, normalize=False, copy=True,
8182
sample_weight=None):
8283
"""
8384
Centers data to have mean zero along axis 0. This is here because
@@ -397,7 +398,8 @@ def fit(self, X, y):
397398
return self
398399

399400

400-
def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy):
401+
def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy,
402+
Xy_precompute_order=None):
401403
"""Aux function used at beginning of fit in linear models"""
402404
n_samples, n_features = X.shape
403405
if sparse.isspmatrix(X):
@@ -408,10 +410,13 @@ def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy):
408410
# copy was done in fit if necessary
409411
X, y, X_mean, y_mean, X_std = center_data(
410412
X, y, fit_intercept, normalize, copy=copy)
411-
412-
if hasattr(precompute, '__array__') \
413-
and not np.allclose(X_mean, np.zeros(n_features)) \
414-
and not np.allclose(X_std, np.ones(n_features)):
413+
if hasattr(precompute, '__array__') and (
414+
fit_intercept and not np.allclose(X_mean, np.zeros(n_features))
415+
or normalize and not np.allclose(X_std, np.ones(n_features))):
416+
warnings.warn("Gram matrix was provided but X was centered"
417+
" to fit intercept, "
418+
"or X was normalized : recomputing Gram matrix.",
419+
UserWarning)
415420
# recompute Gram
416421
precompute = 'auto'
417422
Xy = None
@@ -422,11 +427,16 @@ def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy):
422427

423428
if precompute is True:
424429
precompute = np.dot(X.T, X)
430+
if Xy_precompute_order == 'F':
431+
precompute = np.dot(X.T, X).T
425432

426433
if not hasattr(precompute, '__array__'):
427434
Xy = None # cannot use Xy if precompute is not Gram
428435

429436
if hasattr(precompute, '__array__') and Xy is None:
430-
Xy = np.dot(X.T, y)
437+
if Xy_precompute_order == 'F':
438+
Xy = np.dot(y.T, X).T
439+
else:
440+
Xy = np.dot(X.T, y)
431441

432442
return X, y, X_mean, y_mean, X_std, precompute, Xy

sklearn/linear_model/coordinate_descent.py

Lines changed: 37 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -359,11 +359,18 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
359359
ElasticNet
360360
ElasticNetCV
361361
"""
362-
X = check_array(X, 'csc', dtype=np.float64, order='F', copy=copy_X)
363-
y = check_array(y, 'csc', dtype=np.float64, order='F', copy=False, ensure_2d=False)
364-
if Xy is not None:
365-
Xy = check_array(Xy, 'csc', dtype=np.float64, order='F', copy=False,
366-
ensure_2d< 1241 span class=pl-c1>=False)
362+
# We expect X and y to be already float64 Fortran ordered when bypassing
363+
# checks
364+
check_input = 'check_input' not in params or params['check_input']
365+
pre_fit = 'check_input' not in params or params['pre_fit']
366+
if check_input:
367+
X = check_array(X, 'csc', dtype=np.float64, order='F', copy=copy_X)
368+
y = check_array(y, 'csc', dtype=np.float64, order='F', copy=False,
369+
ensure_2d=False)
370+
if Xy is not None:
371+
Xy = check_array(Xy, 'csc', dtype=np.float64, order='F',
372+
copy=False,
373+
ensure_2d=False)
367374
n_samples, n_features = X.shape
368375

369376
multi_output = False
@@ -380,10 +387,13 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
380387
else:
381388
X_sparse_scaling = np.zeros(n_features)
382389

383-
# X should be normalized and fit already.
384-
X, y, X_mean, y_mean, X_std, precompute, Xy = \
385-
_pre_fit(X, y, Xy, precompute, normalize=False, fit_intercept=False,
386-
copy=False)
390+
# X should be normalized and fit already if function is called
391+
# from ElasticNet.fit
392+
if pre_fit:
393+
X, y, X_mean, y_mean, X_std, precompute, Xy = \
394+
_pre_fit(X, y, Xy, precompute, normalize=False,
395+
fit_intercept=False,
396+
copy=False, Xy_precompute_order='F')
387397
if alphas is None:
388398
# No need to normalize of fit_intercept: it has been done
389399
# above
@@ -428,7 +438,11 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
428438
model = cd_fast.enet_coordinate_descent_multi_task(
429439
coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random)
430440
elif isinstance(precompute, np.ndarray):
431-
precompute = check_array(precompute, 'csc', dtype=np.float64, order='F')
441+
# We expect precompute to be already Fortran ordered when bypassing
442+
# checks
443+
if check_input:
444+
precompute = check_array(precompute, 'csc', dtype=np.float64,
445+
order='F')
432446
model = cd_fast.enet_coordinate_descent_gram(
433447
coef_, l1_reg, l2_reg, precompute, Xy, y, max_iter,
434448
tol, rng, random, positive)
@@ -601,7 +615,7 @@ def __init__(self, alpha=1.0, l1_ratio=0.5, fit_intercept=True,
601615
self.random_state = random_state
602616
self.selection = selection
603617

604-
def fit(self, X, y):
618+
def fit(self, X, y, check_input=True):
605619
"""Fit model with coordinate descent.
606620
607621
Parameters
@@ -622,6 +636,7 @@ def fit(self, X, y):
622636
To avoid memory re-allocation it is advised to allocate the
623637
initial data in memory directly using that format.
624638
"""
639+
625640
if self.alpha == 0:
626641
warnings.warn("With alpha=0, this algorithm does not converge "
627642
"well. You are advised to use the LinearRegression "
@@ -632,14 +647,16 @@ def fit(self, X, y):
632647
"slower even when n_samples > n_features. Hence "
633648
"it will be removed in 0.18.",
634649
DeprecationWarning, stacklevel=2)
635-
636-
X, y = check_X_y(X, y, accept_sparse='csc', dtype=np.float64,
637-
order='F', copy=self.copy_X and self.fit_intercept,
638-
multi_output=True, y_numeric=True)
639-
650+
# We expect X and y to be already float64 Fortran ordered arrays
651+
# when bypassing checks
652+
if check_input:
653+
X, y = check_X_y(X, y, accept_sparse='csc', dtype=np.float64,
654+
order='F',
655+
copy=self.copy_X and self.fit_intercept,
656+
multi_output=True, y_numeric=True)
640657
X, y, X_mean, y_mean, X_std, precompute, Xy = \
641658
_pre_fit(X, y, None, self.precompute, self.normalize,
642-
self.fit_intercept, copy=True)
659+
self.fit_intercept, copy=False, Xy_precompute_order='F')
643660

644661
if y.ndim == 1:
645662
y = y[:, np.newaxis]
@@ -678,7 +695,9 @@ def fit(self, X, y):
678695
X_mean=X_mean, X_std=X_std, return_n_iter=True,
679696
coef_init=coef_[k], max_iter=self.max_iter,
680697
random_state=self.random_state,
681-
selection=self.selection)
698+
selection=self.selection,
699+
check_input=False,
700+
pre_fit=False)
682701
coef_[k] = this_coef[:, 0]
683702
dual_gaps_[k] = this_dual_gap[0]
684703
self.n_iter_.append(this_iter[0])

sklearn/linear_model/tests/test_coordinate_descent.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from sklearn.utils.testing import assert_greater
1818
from sklearn.utils.testing import assert_raises
1919
from sklearn.utils.testing import assert_warns
20+
from sklearn.utils.testing import assert_warns_message
2021
from sklearn.utils.testing import ignore_warnings
2122
from sklearn.utils.testing import assert_array_equal
2223
from sklearn.utils.testing import TempMemmap
@@ -25,6 +26,7 @@
2526
LassoCV, ElasticNet, ElasticNetCV, MultiTaskLasso, MultiTaskElasticNet, \
2627
MultiTaskElasticNetCV, MultiTaskLassoCV, lasso_path, enet_path
2728
from sklearn.linear_model import LassoLarsCV, lars_path
29+
from sklearn.utils import check_array
2830

2931

3032
def check_warnings():
@@ -628,3 +630,40 @@ def test_sparse_dense_descent_paths():
628630
_, coefs, _ = path(X, y, fit_intercept=False)
629631
_, sparse_coefs, _ = path(csr, y, fit_intercept=False)
630632
assert_array_almost_equal(coefs, sparse_coefs)
633+
634+
635+
def test_check_input_false():
636+
X, y, _, _ = build_dataset(n_samples=20, n_features=10)
637+
X = check_array(X, order='F', dtype='float64')
638+
y = check_array(X, order='F', dtype='float64')
639+
clf = ElasticNet(selection='cyclic', tol=1e-8)
640+
# Check that no error is raised if data is provided in the right format
641+
clf.fit(X, y, check_input=False)
642+
X = check_array(X, order='F', dtype='float32')
643+
clf.fit(X, y, check_input=True)
644+
# Check that an error is raised if data is provided in the wrong format,
645+
# because of check bypassing
646+
assert_raises(ValueError, clf.fit, X, y, check_input=False)
647+
648+
# With no input checking, providing X in C order should result in false
649+
# computation
650+
X = check_array(X, order='C', dtype='float64')
651+
clf.fit(X, y, check_input=False)
652+
coef_false = clf.coef_
653+
clf.fit(X, y, check_input=True)
654+
coef_true = clf.coef_
655+
assert_true(np.any(coef_true != coef_false))
656+
657+
658+
659+
def test_overrided_gram_matrix():
660+
X, y, _, _ = build_dataset(n_samples=20, n_features=10)
661+
Gram = X.T.dot(X)
662+
clf = ElasticNet(selection='cyclic', tol=1e-8, precompute=Gram,
663+
fit_intercept=True)
664+
assert_warns_message(UserWarning,
665+
"Gram matrix was provided but X was centered"
666+
" to fit intercept, "
667+
"or X was normalized : recomputing Gram matrix.",
668+
clf.fit, X, y)
669+

0 commit comments

Comments
 (0)
0