From f1a46a6f32e5336e7437055810d2703625b6e35f Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Fri, 5 Jul 2019 13:45:28 +0200 Subject: [PATCH 01/11] use require=sharedmem for linear models CV --- sklearn/linear_model/coordinate_descent.py | 5 ++-- .../tests/test_coordinate_descent.py | 24 +++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py index 646839a0a3ae6..a4fea9f6f776d 100644 --- a/sklearn/linear_model/coordinate_descent.py +++ b/sklearn/linear_model/coordinate_descent.py @@ -1186,8 +1186,9 @@ def fit(self, X, y): dtype=X.dtype.type) for this_l1_ratio, this_alphas in zip(l1_ratios, alphas) for train, test in folds) - mse_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, - **_joblib_parallel_args(prefer="threads"))(jobs) + mse_paths = Parallel( + n_jobs=self.n_jobs, verbose=self.verbose, + **_joblib_parallel_args(require="sharedmem"))(jobs) mse_paths = np.reshape(mse_paths, (n_l1_ratio, len(folds), -1)) mean_mse = np.mean(mse_paths, axis=1) self.mse_path_ = np.squeeze(np.rollaxis(mse_paths, 2, 1)) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index 5e9088efe1ab9..ed539d1bb9a68 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -6,8 +6,11 @@ import pytest from scipy import interpolate, sparse from copy import deepcopy +import joblib +from distutils.version import LooseVersion from sklearn.datasets import load_boston +from sklearn.datasets import make_regression from sklearn.exceptions import ConvergenceWarning from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_almost_equal @@ -868,3 +871,24 @@ def test_sparse_input_convergence_warning(): Lasso(max_iter=1000).fit(sparse.csr_matrix(X, dtype=np.float32), y) assert not record.list + + +@pytest.mark.parametrize("backend", + ["loky", "multiprocessing", "threading"]) +@pytest.mark.parametrize("estimator", + [ElasticNetCV, MultiTaskElasticNetCV, + LassoCV, MultiTaskLassoCV]) +def test_linear_models_cv_fit_for_all_backends(backend, estimator): + # LinearModelsCV.fit performs inplace operations on input data which would + # have been memmapped when using loky or multiprocessing backend, causing + # an error without require='sharedmem'. + + if joblib.__version__ < LooseVersion('0.12') and backend == 'loky': + pytest.skip('loky backend does not exist in joblib <0.12') + + # Create a problem sufficiently large to cause memmapping (1MB). + n_targets = 1 + (estimator in (MultiTaskElasticNetCV, MultiTaskLassoCV)) + X, y = make_regression(20000, 10, n_targets=n_targets) + + with joblib.parallel_backend(backend=backend): + estimator(n_jobs=2, cv=3).fit(X, y) From 79704e9d9847c426db077fa60ac560389ac1c541 Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Fri, 5 Jul 2019 14:02:43 +0200 Subject: [PATCH 02/11] add what's new --- doc/whats_new/v0.21.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index 2e1c639e267b7..ebf616d652baa 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -26,6 +26,12 @@ Changelog ``'penalty'`` parameters (regression introduced in 0.21). :pr:`14087` by `Nicolas Hug`_. +- |Fix| Fixed a bug in :class:`linear_model.ElasticNetCV`, + :class:`linear_model.MultitaskElasticNetCV`, :class:`linear_model.LassoCV` + and :class:`linear_model.MultitaskLassoCV` where fitting would fail when + using joblib multiprocessing or loky backend. :pr:`14264` by + :user:`Jérémie du Boisberranger `. + :mod:`sklearn.tree` ................... From e76f62cbd5cdbc779ee0ab3569d87363655c115b Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Fri, 26 Jul 2019 13:30:46 +0200 Subject: [PATCH 03/11] back to prefer='threads', fix flags --- sklearn/linear_model/coordinate_descent.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py index a4fea9f6f776d..58d69ecf79879 100644 --- a/sklearn/linear_model/coordinate_descent.py +++ b/sklearn/linear_model/coordinate_descent.py @@ -977,6 +977,10 @@ def _path_residuals(X, y, train, test, path, path_params, alphas=None, y_train = y[train] X_test = X[test] y_test = y[test] + if not sparse.issparse(X): + # fancy indexing of read-only memmap creates a read-only copy. + for array in (X_train, y_train, X_test, y_test): + array.setflags(write=True) fit_intercept = path_params['fit_intercept'] normalize = path_params['normalize'] @@ -1188,7 +1192,7 @@ def fit(self, X, y): for train, test in folds) mse_paths = Parallel( n_jobs=self.n_jobs, verbose=self.verbose, - **_joblib_parallel_args(require="sharedmem"))(jobs) + **_joblib_parallel_args(prefer="threads"))(jobs) mse_paths = np.reshape(mse_paths, (n_l1_ratio, len(folds), -1)) mean_mse = np.mean(mse_paths, axis=1) self.mse_path_ = np.squeeze(np.rollaxis(mse_paths, 2, 1)) From 185c008bf42a3ff49f057b46d29cda96920e48aa Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Fri, 26 Jul 2019 13:32:39 +0200 Subject: [PATCH 04/11] revert --- sklearn/linear_model/coordinate_descent.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py index 58d69ecf79879..baa96c205072d 100644 --- a/sklearn/linear_model/coordinate_descent.py +++ b/sklearn/linear_model/coordinate_descent.py @@ -1190,9 +1190,8 @@ def fit(self, X, y): dtype=X.dtype.type) for this_l1_ratio, this_alphas in zip(l1_ratios, alphas) for train, test in folds) - mse_paths = Parallel( - n_jobs=self.n_jobs, verbose=self.verbose, - **_joblib_parallel_args(prefer="threads"))(jobs) + mse_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, + **_joblib_parallel_args(prefer="threads"))(jobs) mse_paths = np.reshape(mse_paths, (n_l1_ratio, len(folds), -1)) mean_mse = np.mean(mse_paths, axis=1) self.mse_path_ = np.squeeze(np.rollaxis(mse_paths, 2, 1)) From 4b8f63efa1a5fc77367f733f4979aa4e85891d85 Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Fri, 26 Jul 2019 15:05:15 +0200 Subject: [PATCH 05/11] safer flag setting --- sklearn/linear_model/coordinate_descent.py | 11 ++++++++--- sklearn/linear_model/tests/test_coordinate_descent.py | 3 ++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py index baa96c205072d..7dae272388dfb 100644 --- a/sklearn/linear_model/coordinate_descent.py +++ b/sklearn/linear_model/coordinate_descent.py @@ -977,10 +977,15 @@ def _path_residuals(X, y, train, test, path, path_params, alphas=None, y_train = y[train] X_test = X[test] y_test = y[test] + if not sparse.issparse(X): - # fancy indexing of read-only memmap creates a read-only copy. - for array in (X_train, y_train, X_test, y_test): - array.setflags(write=True) + for array, array_input in ((X_train, X), (y_train, y), + (X_test, X), (y_test, y)): + if array.base is not array_input and not array.flags['WRITEABLE']: + # fancy indexing should create a writable copy but it doesn't + # for read-only memmaps (cf. numpy#14132). + array.setflags(write=True) + fit_intercept = path_params['fit_intercept'] normalize = path_params['normalize'] diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index ed539d1bb9a68..23b678d15a341 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -881,7 +881,8 @@ def test_sparse_input_convergence_warning(): def test_linear_models_cv_fit_for_all_backends(backend, estimator): # LinearModelsCV.fit performs inplace operations on input data which would # have been memmapped when using loky or multiprocessing backend, causing - # an error without require='sharedmem'. + # an error due to unexpected behavior of fancy indexing of read-only + # memmaps (cf. numpy#14132). if joblib.__version__ < LooseVersion('0.12') and backend == 'loky': pytest.skip('loky backend does not exist in joblib <0.12') From 17c699cda5574ae222553de493e62800c16e9ea6 Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Wed, 31 Jul 2019 13:00:32 +0200 Subject: [PATCH 06/11] move what's new --- doc/whats_new/v0.22.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst index 2245c2562dccf..53f777592f988 100644 --- a/doc/whats_new/v0.22.rst +++ b/doc/whats_new/v0.22.rst @@ -170,6 +170,12 @@ Changelog and non-contiguous arrays and makes a conversion instead of failing. :pr:`14458` by :user:`Guillaume Lemaitre `. +- |Fix| Fixed a bug in :class:`linear_model.ElasticNetCV`, + :class:`linear_model.MultitaskElasticNetCV`, :class:`linear_model.LassoCV` + and :class:`linear_model.MultitaskLassoCV` where fitting would fail when + using joblib multiprocessing or loky backend. :pr:`14264` by + :user:`Jérémie du Boisberranger `. + :mod:`sklearn.metrics` ...................... From 8599551e5db9e431bf877c6eda680ccda1da4c24 Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Wed, 31 Jul 2019 13:50:52 +0200 Subject: [PATCH 07/11] nitpick --- sklearn/linear_model/tests/test_coordinate_descent.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index 1b55690e7c80a..83efb91b12a07 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -877,10 +877,10 @@ def test_sparse_input_convergence_warning(): [ElasticNetCV, MultiTaskElasticNetCV, LassoCV, MultiTaskLassoCV]) def test_linear_models_cv_fit_for_all_backends(backend, estimator): - # LinearModelsCV.fit performs inplace operations on input data which would - # have been memmapped when using loky or multiprocessing backend, causing - # an error due to unexpected behavior of fancy indexing of read-only - # memmaps (cf. numpy#14132). + # LinearModelsCV.fit performs inplace operations on input data which is + # memmapped when using loky or multiprocessing backend, causing an error + # due to unexpected behavior of fancy indexing of read-only memmaps + # (cf. numpy#14132). if joblib.__version__ < LooseVersion('0.12') and backend == 'loky': pytest.skip('loky backend does not exist in joblib <0.12') From dd653f210eb3b099ca4976c9c938b906b80345bf Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 2 Oct 2019 16:16:26 +0200 Subject: [PATCH 08/11] DEBUG Verbose pytest to see which test is causing the deadlock --- build_tools/azure/test_script.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build_tools/azure/test_script.sh b/build_tools/azure/test_script.sh index 77a950d86549c..39a8ac1458e0a 100755 --- a/build_tools/azure/test_script.sh +++ b/build_tools/azure/test_script.sh @@ -21,7 +21,7 @@ except ImportError: python -c "import multiprocessing as mp; print('%d CPUs' % mp.cpu_count())" pip list -TEST_CMD="python -m pytest --showlocals --durations=20 --junitxml=$JUNITXML" +TEST_CMD="python -m pytest -v --showlocals --durations=20 --junitxml=$JUNITXML" if [[ "$COVERAGE" == "true" ]]; then export COVERAGE_PROCESS_START="$BUILD_SOURCESDIRECTORY/.coveragerc" From 1dfd9b3d4e5ea76c59f61ba68b2201c92faa37f9 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 3 Oct 2019 09:59:28 +0200 Subject: [PATCH 09/11] Add per-test duration --- conftest.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/conftest.py b/conftest.py index 73326d6d2e32b..e100720096368 100644 --- a/conftest.py +++ b/conftest.py @@ -6,6 +6,7 @@ # the one from site-packages. import platform +from time import time from distutils.version import LooseVersion import pytest @@ -84,7 +85,11 @@ def pytest_runtest_setup(item): if isinstance(item, DoctestItem): set_config(print_changed_only=True) + global tic + tic = time() + def pytest_runtest_teardown(item, nextitem): if isinstance(item, DoctestItem): set_config(print_changed_only=False) + print(" {:.3f}s ".format(time() - tic), end="") From 4614f066bd1ac76b00390178df9809d0100b0575 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 3 Oct 2019 10:20:56 +0200 Subject: [PATCH 10/11] Fix NameError in conftest.py --- conftest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/conftest.py b/conftest.py index e100720096368..69c2c31532f01 100644 --- a/conftest.py +++ b/conftest.py @@ -16,6 +16,8 @@ from sklearn.utils import _IS_32BIT PYTEST_MIN_VERSION = '3.3.0' +tic = time() + if LooseVersion(pytest.__version__) < PYTEST_MIN_VERSION: raise ImportError('Your version of pytest is too old, you should have ' From 4f48865e1dd2e3bea2a3763cc58a375e09812364 Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Thu, 23 Apr 2020 13:20:37 +0200 Subject: [PATCH 11/11] remove debug + remove multiprocessing --- build_tools/azure/test_script.sh | 2 +- conftest.py | 8 -------- doc/whats_new/v0.23.rst | 2 +- sklearn/linear_model/tests/test_coordinate_descent.py | 8 +++----- 4 files changed, 5 insertions(+), 15 deletions(-) diff --git a/build_tools/azure/test_script.sh b/build_tools/azure/test_script.sh index 39a8ac1458e0a..77a950d86549c 100755 --- a/build_tools/azure/test_script.sh +++ b/build_tools/azure/test_script.sh @@ -21,7 +21,7 @@ except ImportError: python -c "import multiprocessing as mp; print('%d CPUs' % mp.cpu_count())" pip list -TEST_CMD="python -m pytest -v --showlocals --durations=20 --junitxml=$JUNITXML" +TEST_CMD="python -m pytest --showlocals --durations=20 --junitxml=$JUNITXML" if [[ "$COVERAGE" == "true" ]]; then export COVERAGE_PROCESS_START="$BUILD_SOURCESDIRECTORY/.coveragerc" diff --git a/conftest.py b/conftest.py index 93eb612914b71..17c3f4b144346 100644 --- a/conftest.py +++ b/conftest.py @@ -7,7 +7,6 @@ import platform import sys -from time import time from distutils.version import LooseVersion import os @@ -20,8 +19,6 @@ from sklearn._build_utils.deprecated_modules import _DEPRECATED_MODULES PYTEST_MIN_VERSION = '3.3.0' -tic = time() - if LooseVersion(pytest.__version__) < PYTEST_MIN_VERSION: raise ImportError('Your version of pytest is too old, you should have ' @@ -101,16 +98,11 @@ def pytest_runtest_setup(item): if isinstance(item, DoctestItem): set_config(print_changed_only=True) - global tic - tic = time() - def pytest_runtest_teardown(item, nextitem): if isinstance(item, DoctestItem): set_config(print_changed_only=False) - print(" {:.3f}s ".format(time() - tic), end="") - # TODO: Remove when modules are deprecated in 0.24 # Configures pytest to ignore deprecated modules. diff --git a/doc/whats_new/v0.23.rst b/doc/whats_new/v0.23.rst index 274098fc7033c..e484d0d4c6f83 100644 --- a/doc/whats_new/v0.23.rst +++ b/doc/whats_new/v0.23.rst @@ -262,7 +262,7 @@ Changelog - |Fix| Fixed a bug in :class:`linear_model.ElasticNetCV`, :class:`linear_model.MultitaskElasticNetCV`, :class:`linear_model.LassoCV` and :class:`linear_model.MultitaskLassoCV` where fitting would fail when - using joblib multiprocessing or loky backend. :pr:`14264` by + using joblib loky backend. :pr:`14264` by :user:`Jérémie du Boisberranger `. :mod:`sklearn.metrics` diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index 7c546799ac64f..142c1e9ac2a47 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -1025,16 +1025,14 @@ def test_enet_sample_weight_sparse(): reg.fit(X, y, sample_weight=sw, check_input=True) -@pytest.mark.parametrize("backend", - ["loky", "multiprocessing", "threading"]) +@pytest.mark.parametrize("backend", ["loky", "threading"]) @pytest.mark.parametrize("estimator", [ElasticNetCV, MultiTaskElasticNetCV, LassoCV, MultiTaskLassoCV]) def test_linear_models_cv_fit_for_all_backends(backend, estimator): # LinearModelsCV.fit performs inplace operations on input data which is - # memmapped when using loky or multiprocessing backend, causing an error - # due to unexpected behavior of fancy indexing of read-only memmaps - # (cf. numpy#14132). + # memmapped when using loky backend, causing an error due to unexpected + # behavior of fancy indexing of read-only memmaps (cf. numpy#14132). if joblib.__version__ < LooseVersion('0.12') and backend == 'loky': pytest.skip('loky backend does not exist in joblib <0.12')