From 6dd446f62b4cdbdff67012ad717d4b06ae4926b5 Mon Sep 17 00:00:00 2001 From: tomMoral Date: Wed, 26 Sep 2018 17:03:32 +0200 Subject: [PATCH 1/4] FIX issue #12171 parallel_backend in neighbors --- sklearn/neighbors/base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sklearn/neighbors/base.py b/sklearn/neighbors/base.py index 9f30ba3ebd3fc..dedcc658c0d2f 100644 --- a/sklearn/neighbors/base.py +++ b/sklearn/neighbors/base.py @@ -9,6 +9,7 @@ from functools import partial from distutils.version import LooseVersion +import sys import warnings from abc import ABCMeta, abstractmethod @@ -429,7 +430,8 @@ class from an array representing our data set and ask who's raise ValueError( "%s does not work with sparse matrices. Densify the data, " "or set algorithm='brute'" % self._fit_method) - if LooseVersion(joblib_version) < LooseVersion('0.12'): + if (sys.version_info < (3,) or + LooseVersion(joblib_version) < LooseVersion('0.12')): # Deal with change of API in joblib delayed_query = delayed(self._tree.query, check_pickle=False) From 6cfc09d9ea25f48198220ef54a184c26f46374a6 Mon Sep 17 00:00:00 2001 From: tomMoral Date: Wed, 26 Sep 2018 17:04:00 +0200 Subject: [PATCH 2/4] TST add a non-regression test for parallel backends in neighbors --- sklearn/neighbors/tests/test_neighbors.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py index 9b244cde09536..2772a17257546 100644 --- a/sklearn/neighbors/tests/test_neighbors.py +++ b/sklearn/neighbors/tests/test_neighbors.py @@ -27,6 +27,8 @@ from sklearn.utils.testing import ignore_warnings from sklearn.utils.validation import check_random_state +from sklearn.externals.joblib import parallel_backend + rng = np.random.RandomState(0) # load and shuffle iris dataset iris = datasets.load_iris() @@ -1315,6 +1317,24 @@ def test_same_radius_neighbors_parallel(algorithm): assert_array_equal(ind[i], ind_parallel[i]) assert_array_almost_equal(graph, graph_parallel) +@pytest.mark.parametrize('backend', ['loky', 'multiprocessing', 'threading']) +@pytest.mark.parametrize('algorithm', ALGORITHMS) +def test_knn_forcing_backend(backend, algorithm): + # Non-regression test which ensure the knn is properly working + # even when forcing the global joblib backend + with parallel_backend(backend): + X, y = datasets.make_classification(n_samples=30, n_features=5, + n_redundant=0, random_state=0) + X_train, X_test, y_train, y_test = train_test_split(X, y) + + clf = neighbors.KNeighborsClassifier(n_neighbors=3, + algorithm=algorithm, + n_jobs=3) + clf.fit(X_train, y_train) + y = clf.predict(X_test) + dist, ind = clf.kneighbors(X_test) + graph = clf.kneighbors_graph(X_test, mode='distance').toarray() + def test_dtype_convert(): classifier = neighbors.KNeighborsClassifier(n_neighbors=1) From 42d03ca03470d791b09a3cab956384f666417997 Mon Sep 17 00:00:00 2001 From: tomMoral Date: Wed, 26 Sep 2018 17:47:47 +0200 Subject: [PATCH 3/4] CLN pep8 --- sklearn/neighbors/tests/test_neighbors.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py index 2772a17257546..160f3dc5c5eed 100644 --- a/sklearn/neighbors/tests/test_neighbors.py +++ b/sklearn/neighbors/tests/test_neighbors.py @@ -1317,11 +1317,12 @@ def test_same_radius_neighbors_parallel(algorithm): assert_array_equal(ind[i], ind_parallel[i]) assert_array_almost_equal(graph, graph_parallel) + @pytest.mark.parametrize('backend', ['loky', 'multiprocessing', 'threading']) @pytest.mark.parametrize('algorithm', ALGORITHMS) def test_knn_forcing_backend(backend, algorithm): - # Non-regression test which ensure the knn is properly working - # even when forcing the global joblib backend + # Non-regression test which ensure the knn methods are properly working + # even when forcing the global joblib backend. with parallel_backend(backend): X, y = datasets.make_classification(n_samples=30, n_features=5, n_redundant=0, random_state=0) @@ -1331,9 +1332,9 @@ def test_knn_forcing_backend(backend, algorithm): algorithm=algorithm, n_jobs=3) clf.fit(X_train, y_train) - y = clf.predict(X_test) - dist, ind = clf.kneighbors(X_test) - graph = clf.kneighbors_graph(X_test, mode='distance').toarray() + clf.predict(X_test) + clf.kneighbors(X_test) + clf.kneighbors_graph(X_test, mode='distance').toarray() def test_dtype_convert(): From b9c3fcbac3f49b196546551554004a2075416ca8 Mon Sep 17 00:00:00 2001 From: tomMoral Date: Thu, 27 Sep 2018 10:34:58 +0200 Subject: [PATCH 4/4] DOC add whats_new entry --- doc/whats_new/v0.20.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index e4ba61eefd65a..34aab000e92f7 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -23,6 +23,11 @@ enhancements to features released in 0.20.0. those estimators as part of parallel parameter search or cross-validation. :issue:`12122` by :user:`Olivier Grisel `. +- |Fix| force the parallelism backend to :code:`threading` for + :class:`neighbors.KDTree` and :class:`neighbors.BallTree` in Python 2.7 to + avoid pickling errors caused by the serialization of their methods. + :issue:`12171` by :user:`Thomas Moreau ` + .. _changes_0_20: Version 0.20.0