From 5f95cbd651ebbe7f2ef9c1ed63d1008539e144a5 Mon Sep 17 00:00:00 2001
From: Vighnesh Birodkar <vighneshbirodkar@nyu.edu>
Date: Thu, 10 Dec 2015 10:49:32 -0500
Subject: [PATCH 1/3] make sparse encoder map copy on write arrays as gram omp
 modifies in place

---
 sklearn/decomposition/dict_learning.py            |  2 +-
 sklearn/decomposition/tests/test_dict_learning.py | 11 +++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/sklearn/decomposition/dict_learning.py b/sklearn/decomposition/dict_learning.py
index 990baef4c8c78..b1ae26d88d4be 100644
--- a/sklearn/decomposition/dict_learning.py
+++ b/sklearn/decomposition/dict_learning.py
@@ -288,7 +288,7 @@ def sparse_encode(X, dictionary, gram=None, cov=None, algorithm='lasso_lars',
     code = np.empty((n_samples, n_components))
     slices = list(gen_even_slices(n_samples, _get_n_jobs(n_jobs)))
 
-    code_views = Parallel(n_jobs=n_jobs, verbose=verbose)(
+    code_views = Parallel(n_jobs=n_jobs, verbose=verbose, mmap_mode='c')(
         delayed(_sparse_encode)(
             X[this_slice], dictionary, gram,
             cov[:, this_slice] if cov is not None else None,
diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py
index 65e3fc99d1742..4b1c44b555a2e 100644
--- a/sklearn/decomposition/tests/test_dict_learning.py
+++ b/sklearn/decomposition/tests/test_dict_learning.py
@@ -238,3 +238,14 @@ def test_sparse_coder_estimator():
                        transform_alpha=0.001).transform(X)
     assert_true(not np.all(code == 0))
     assert_less(np.sqrt(np.sum((np.dot(code, V) - X) ** 2)), 0.1)
+
+
+def test_sparse_coder_mmap():
+    # Test that SparseCoder does not error by passing reading only
+    # arrays to child processes
+
+    rng = np.random.RandomState(777)
+    init_dict = rng.rand(500, 64)
+    data = np.random.rand(8096, 64)
+    sc = SparseCoder(init_dict, transform_algorithm='omp', n_jobs=2)
+    sc.fit_transform(data)

From 8c26c02adc634207c17245bfb9430f617db0c391 Mon Sep 17 00:00:00 2001
From: Vighnesh Birodkar <vighneshbirodkar@nyu.edu>
Date: Tue, 19 Apr 2016 18:03:57 -0400
Subject: [PATCH 2/3] Added comments to the unit test

---
 sklearn/decomposition/tests/test_dict_learning.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py
index 4b1c44b555a2e..0bbb76bd52789 100644
--- a/sklearn/decomposition/tests/test_dict_learning.py
+++ b/sklearn/decomposition/tests/test_dict_learning.py
@@ -245,7 +245,12 @@ def test_sparse_coder_mmap():
     # arrays to child processes
 
     rng = np.random.RandomState(777)
-    init_dict = rng.rand(500, 64)
-    data = np.random.rand(8096, 64)
+    num_cols = 64
+    init_dict = rng.rand(500, num_cols)
+    # Ensure that `data` is >2M. Joblib memory maps arrays
+    # if they are larger than 1MB. The 4 accounts for float32
+    # data type
+    num_rows = (1024*1024*2)/(4*num_cols)
+    data = np.random.rand(num_rows, num_cols).astype(np.float32)
     sc = SparseCoder(init_dict, transform_algorithm='omp', n_jobs=2)
     sc.fit_transform(data)

From b3840bb1a01829a9d3a81a425922bcc86d9a66af Mon Sep 17 00:00:00 2001
From: Vighnesh Birodkar <vighneshbirodkar@nyu.edu>
Date: Wed, 27 Apr 2016 19:12:31 -0400
Subject: [PATCH 3/3] Made check_copy_writeable public and used it in
 _sparse_encode

---
 sklearn/decomposition/dict_learning.py |  7 +++++--
 sklearn/linear_model/least_angle.py    | 16 ++++++----------
 sklearn/utils/__init__.py              |  4 ++--
 sklearn/utils/tests/test_validation.py |  9 +++++++++
 sklearn/utils/validation.py            | 16 ++++++++++++++++
 5 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/sklearn/decomposition/dict_learning.py b/sklearn/decomposition/dict_learning.py
index b1ae26d88d4be..2f68191da58e7 100644
--- a/sklearn/decomposition/dict_learning.py
+++ b/sklearn/decomposition/dict_learning.py
@@ -18,7 +18,7 @@
 from ..externals.joblib import Parallel, delayed, cpu_count
 from ..externals.six.moves import zip
 from ..utils import (check_array, check_random_state, gen_even_slices,
-                     gen_batches, _get_n_jobs)
+                     gen_batches, _get_n_jobs, check_copy_and_writeable)
 from ..utils.extmath import randomized_svd, row_norms
 from ..utils.validation import check_is_fitted
 from ..linear_model import Lasso, orthogonal_mp_gram, LassoLars, Lars
@@ -91,6 +91,9 @@ def _sparse_encode(X, dictionary, gram, cov=None, algorithm='lasso_lars',
     sklearn.linear_model.Lasso
     SparseCoder
     """
+    if cov is not None:
+        cov = check_copy_and_writeable(cov, copy_cov)
+
     if X.ndim == 1:
         X = X[:, np.newaxis]
     n_samples, n_features = X.shape
@@ -288,7 +291,7 @@ def sparse_encode(X, dictionary, gram=None, cov=None, algorithm='lasso_lars',
     code = np.empty((n_samples, n_components))
     slices = list(gen_even_slices(n_samples, _get_n_jobs(n_jobs)))
 
-    code_views = Parallel(n_jobs=n_jobs, verbose=verbose, mmap_mode='c')(
+    code_views = Parallel(n_jobs=n_jobs, verbose=verbose)(
         delayed(_sparse_encode)(
             X[this_slice], dictionary, gram,
             cov[:, this_slice] if cov is not None else None,
diff --git a/sklearn/linear_model/least_angle.py b/sklearn/linear_model/least_angle.py
index 9fce600950c9e..e1936c351d0b2 100644
--- a/sklearn/linear_model/least_angle.py
+++ b/sklearn/linear_model/least_angle.py
@@ -21,7 +21,8 @@
 
 from .base import LinearModel
 from ..base import RegressorMixin
-from ..utils import arrayfuncs, as_float_array, check_X_y
+from ..utils import (arrayfuncs, as_float_array, check_X_y,
+                     check_copy_and_writeable)
 from ..model_selection import check_cv
 from ..exceptions import ConvergenceWarning
 from ..externals.joblib import Parallel, delayed
@@ -847,11 +848,6 @@ def __init__(self, alpha=1.0, fit_intercept=True, verbose=False,
 ###############################################################################
 # Cross-validated estimator classes
 
-def _check_copy_and_writeable(array, copy=False):
-    if copy or not array.flags.writeable:
-        return array.copy()
-    return array
-
 
 def _lars_path_residues(X_train, y_train, X_test, y_test, Gram=None,
                         copy=True, method='lars', verbose=False,
@@ -938,10 +934,10 @@ def _lars_path_residues(X_train, y_train, X_test, y_test, Gram=None,
     residues : array, shape (n_alphas, n_samples)
         Residues of the prediction on the test data
     """
-    X_train = _check_copy_and_writeable(X_train, copy)
-    y_train = _check_copy_and_writeable(y_train, copy)
-    X_test = _check_copy_and_writeable(X_test, copy)
-    y_test = _check_copy_and_writeable(y_test, copy)
+    X_train = check_copy_and_writeable(X_train, copy)
+    y_train = check_copy_and_writeable(y_train, copy)
+    X_test = check_copy_and_writeable(X_test, copy)
+    y_test = check_copy_and_writeable(y_test, copy)
 
     if fit_intercept:
         X_mean = X_train.mean(axis=0)
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 4d62e4f394180..b6bb07c593c0b 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -12,7 +12,7 @@
                          assert_all_finite,
                          check_random_state, column_or_1d, check_array,
                          check_consistent_length, check_X_y, indexable,
-                         check_symmetric)
+                         check_symmetric, check_copy_and_writeable)
 from .deprecation import deprecated
 from .class_weight import compute_class_weight, compute_sample_weight
 from ..externals.joblib import cpu_count
@@ -32,7 +32,7 @@ class ConvergenceWarning(_ConvergenceWarning):
            "compute_class_weight", "compute_sample_weight",
            "column_or_1d", "safe_indexing",
            "check_consistent_length", "check_X_y", 'indexable',
-           "check_symmetric"]
+           "check_symmetric", "check_copy_and_writeable"]
 
 
 def safe_mask(X, mask):
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index d577864fb709a..c386f6bb36f5b 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -29,6 +29,7 @@
     has_fit_parameter,
     check_is_fitted,
     check_consistent_length,
+    check_copy_and_writeable
 )
 
 from sklearn.exceptions import NotFittedError
@@ -459,3 +460,11 @@ def test_check_consistent_length():
     assert_raises_regexp(TypeError, 'estimator', check_consistent_length,
                          [1, 2], RandomForestRegressor())
     # XXX: We should have a test with a string, but what is correct behaviour?
+
+
+def test_check_copy_and_writeable():
+    X = np.zeros((10, 10))
+    assert check_copy_and_writeable(X).flags.writeable
+
+    X.flags.writeable = False
+    assert check_copy_and_writeable(X).flags.writeable
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 26bc3a8479f06..efdda70f0fbf6 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -700,3 +700,19 @@ def check_non_negative(X, whom):
     X = X.data if sp.issparse(X) else X
     if (X < 0).any():
         raise ValueError("Negative values in data passed to %s" % whom)
+
+
+def check_copy_and_writeable(X, copy=False):
+    """Return a writeable copy of the array
+
+    Parameters
+    ----------
+    X : array
+        Input array.
+    copy : bool
+        If set, a copy of `X` is made irrespective of it being writeable
+        or not.
+    """
+    if copy or not X.flags.writeable:
+        return X.copy()
+    return X