diff --git a/sklearn/decomposition/dict_learning.py b/sklearn/decomposition/dict_learning.py index 990baef4c8c78..2f68191da58e7 100644 --- a/sklearn/decomposition/dict_learning.py +++ b/sklearn/decomposition/dict_learning.py @@ -18,7 +18,7 @@ from ..externals.joblib import Parallel, delayed, cpu_count from ..externals.six.moves import zip from ..utils import (check_array, check_random_state, gen_even_slices, - gen_batches, _get_n_jobs) + gen_batches, _get_n_jobs, check_copy_and_writeable) from ..utils.extmath import randomized_svd, row_norms from ..utils.validation import check_is_fitted from ..linear_model import Lasso, orthogonal_mp_gram, LassoLars, Lars @@ -91,6 +91,9 @@ def _sparse_encode(X, dictionary, gram, cov=None, algorithm='lasso_lars', sklearn.linear_model.Lasso SparseCoder """ + if cov is not None: + cov = check_copy_and_writeable(cov, copy_cov) + if X.ndim == 1: X = X[:, np.newaxis] n_samples, n_features = X.shape diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py index 65e3fc99d1742..0bbb76bd52789 100644 --- a/sklearn/decomposition/tests/test_dict_learning.py +++ b/sklearn/decomposition/tests/test_dict_learning.py @@ -238,3 +238,19 @@ def test_sparse_coder_estimator(): transform_alpha=0.001).transform(X) assert_true(not np.all(code == 0)) assert_less(np.sqrt(np.sum((np.dot(code, V) - X) ** 2)), 0.1) + + +def test_sparse_coder_mmap(): + # Test that SparseCoder does not error by passing reading only + # arrays to child processes + + rng = np.random.RandomState(777) + num_cols = 64 + init_dict = rng.rand(500, num_cols) + # Ensure that `data` is >2M. Joblib memory maps arrays + # if they are larger than 1MB. The 4 accounts for float32 + # data type + num_rows = (1024*1024*2)/(4*num_cols) + data = np.random.rand(num_rows, num_cols).astype(np.float32) + sc = SparseCoder(init_dict, transform_algorithm='omp', n_jobs=2) + sc.fit_transform(data) diff --git a/sklearn/linear_model/least_angle.py b/sklearn/linear_model/least_angle.py index 9fce600950c9e..e1936c351d0b2 100644 --- a/sklearn/linear_model/least_angle.py +++ b/sklearn/linear_model/least_angle.py @@ -21,7 +21,8 @@ from .base import LinearModel from ..base import RegressorMixin -from ..utils import arrayfuncs, as_float_array, check_X_y +from ..utils import (arrayfuncs, as_float_array, check_X_y, + check_copy_and_writeable) from ..model_selection import check_cv from ..exceptions import ConvergenceWarning from ..externals.joblib import Parallel, delayed @@ -847,11 +848,6 @@ def __init__(self, alpha=1.0, fit_intercept=True, verbose=False, ############################################################################### # Cross-validated estimator classes -def _check_copy_and_writeable(array, copy=False): - if copy or not array.flags.writeable: - return array.copy() - return array - def _lars_path_residues(X_train, y_train, X_test, y_test, Gram=None, copy=True, method='lars', verbose=False, @@ -938,10 +934,10 @@ def _lars_path_residues(X_train, y_train, X_test, y_test, Gram=None, residues : array, shape (n_alphas, n_samples) Residues of the prediction on the test data """ - X_train = _check_copy_and_writeable(X_train, copy) - y_train = _check_copy_and_writeable(y_train, copy) - X_test = _check_copy_and_writeable(X_test, copy) - y_test = _check_copy_and_writeable(y_test, copy) + X_train = check_copy_and_writeable(X_train, copy) + y_train = check_copy_and_writeable(y_train, copy) + X_test = check_copy_and_writeable(X_test, copy) + y_test = check_copy_and_writeable(y_test, copy) if fit_intercept: X_mean = X_train.mean(axis=0) diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index 4d62e4f394180..b6bb07c593c0b 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -12,7 +12,7 @@ assert_all_finite, check_random_state, column_or_1d, check_array, check_consistent_length, check_X_y, indexable, - check_symmetric) + check_symmetric, check_copy_and_writeable) from .deprecation import deprecated from .class_weight import compute_class_weight, compute_sample_weight from ..externals.joblib import cpu_count @@ -32,7 +32,7 @@ class ConvergenceWarning(_ConvergenceWarning): "compute_class_weight", "compute_sample_weight", "column_or_1d", "safe_indexing", "check_consistent_length", "check_X_y", 'indexable', - "check_symmetric"] + "check_symmetric", "check_copy_and_writeable"] def safe_mask(X, mask): diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index d577864fb709a..c386f6bb36f5b 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -29,6 +29,7 @@ has_fit_parameter, check_is_fitted, check_consistent_length, + check_copy_and_writeable ) from sklearn.exceptions import NotFittedError @@ -459,3 +460,11 @@ def test_check_consistent_length(): assert_raises_regexp(TypeError, 'estimator', check_consistent_length, [1, 2], RandomForestRegressor()) # XXX: We should have a test with a string, but what is correct behaviour? + + +def test_check_copy_and_writeable(): + X = np.zeros((10, 10)) + assert check_copy_and_writeable(X).flags.writeable + + X.flags.writeable = False + assert check_copy_and_writeable(X).flags.writeable diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 26bc3a8479f06..efdda70f0fbf6 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -700,3 +700,19 @@ def check_non_negative(X, whom): X = X.data if sp.issparse(X) else X if (X < 0).any(): raise ValueError("Negative values in data passed to %s" % whom) + + +def check_copy_and_writeable(X, copy=False): + """Return a writeable copy of the array + + Parameters + ---------- + X : array + Input array. + copy : bool + If set, a copy of `X` is made irrespective of it being writeable + or not. + """ + if copy or not X.flags.writeable: + return X.copy() + return X