8000 Merge branch 'master' of github.com:scikit-learn/scikit-learn into n_… · thomasjpfan/scikit-learn@dcb160d · GitHub
[go: up one dir, main page]

Skip to content

Commit dcb160d

Browse files
committed
Merge branch 'master' of github.com:scikit-learn/scikit-learn into n_features_in_preprocessing
2 parents 4b97541 + a1c17af commit dcb160d

File tree

12 files changed

+70
-20
lines changed

12 files changed

+70
-20
lines changed

build_tools/shared.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,6 @@ get_dep() {
1111
# use latest
1212
echo "$package"
1313
elif [[ "$version" == "min" ]]; then
14-
echo "$package==$(python sklearn/_build_utils/min_dependencies.py $package)"
14+
echo "$package==$(python sklearn/_min_dependencies.py $package)"
1515
fi
1616
}

conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
from sklearn.utils import _IS_32BIT
1515
from sklearn.externals import _pilutil
16-
from sklearn._build_utils.min_dependencies import PYTEST_MIN_VERSION
16+
from sklearn._min_dependencies import PYTEST_MIN_VERSION
1717
from sklearn.utils.fixes import np_version, parse_version
1818

1919

doc/common_pitfalls.rst

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,8 @@ and the model will not be able to perform effectively.
3030
For the following example, let's create a synthetic dataset with a
3131
single feature::
3232

33-
34-
35-
from sklearn.datasets import make_regression
33+
34+
>>> from sklearn.datasets import make_regression
3635
>>> from sklearn.model_selection import train_test_split
3736
...
3837
>>> random_state = 42
@@ -225,4 +224,4 @@ Below are some tips on avoiding data leakage:
225224
* The scikit-learn :ref:`pipeline <pipeline>` is a great way to prevent data
226225
leakage as it ensures that the appropriate method is performed on the
227226
correct data subset. The pipeline is ideal for use in cross-validation
228-
and hyper-parameter tuning functions.
227+
and hyper-parameter tuning functions.

doc/conf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,7 @@ def filter_search_index(app, exception):
396396

397397
def generate_min_dependency_table(app):
398398
"""Generate min dependency table for docs."""
399-
from sklearn._build_utils.min_dependencies import dependent_packages
399+
from sklearn._min_dependencies import dependent_packages
400400

401401
# get length of header
402402
package_header_len = max(len(package)
@@ -440,7 +440,7 @@ def generate_min_dependency_table(app):
440440

441441
def generate_min_dependency_substitutions(app):
442442
"""Generate min dependency substitutions for docs."""
443-
from sklearn._build_utils.min_dependencies import dependent_packages
443+
from sklearn._min_dependencies import dependent_packages
444444

445445
output = StringIO()
446446

doc/whats_new/v0.24.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,14 @@ Changelog
542542
will raise a `ValueError` when fitting on data with all constant features. B41A
543543
:pr:`18370` by :user:`Trevor Waite <trewaite>`.
544544

545+
- |Fix| In methods `radius_neighbors` and
546+
`radius_neighbors_graph` of :class:`neighbors.NearestNeighbors`,
547+
:class:`neighbors.RadiusNeighborsClassifier`,
548+
:class:`neighbors.RadiusNeighborsRegressor`, and
549+
:class:`neighbors.RadiusNeighborsTransformer`, using `sort_results=True` now
550+
correctly sorts the results even when fitting with the "brute" algorithm.
551+
:pr:`18612` by `Tom Dupre la Tour`_.
552+
545553
:mod:`sklearn.neural_network`
546554
.............................
547555

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
# We can actually import a restricted version of sklearn that
5151
# does not need the compiled code
5252
import sklearn
53-
import sklearn._build_utils.min_dependencies as min_deps # noqa
53+
import sklearn._min_dependencies as min_deps # noqa
5454

5555

5656
VERSION = sklearn.__version__

sklearn/_build_utils/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
from .pre_build_helpers import basic_check_build
1515
from .openmp_helpers import check_openmp_support
16-
from .min_dependencies import CYTHON_MIN_VERSION
16+
from .._min_dependencies import CYTHON_MIN_VERSION
1717

1818

1919
DEFAULT_ROOT = 'sklearn'

sklearn/linear_model/_glm/glm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def _y_pred_deviance_derivative(coef, X, y, weights, family, link):
4848
return y_pred, devp
4949

5050

51-
class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
51+
class GeneralizedLinearRegressor(RegressorMixin, BaseEstimator):
5252
"""Regression via a penalized Generalized Linear Model (GLM).
5353
5454
GLMs based on a reproductive Exponential Dispersion Model (EDM) aim at

sklearn/neighbors/_base.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -925,10 +925,10 @@ def radius_neighbors(self, X=None, radius=None, return_distance=True,
925925
Whether or not to return the distances.
926926
927927
sort_results : bool, default=False
928-
If True, the distances and indices will be sorted before being
929-
returned. If `False`, the results will not be sorted. If
930-
`return_distance=False`, setting `sort_results=True` will
931-
result in an error.
928+
If True, the distances and indices will be sorted by increasing
929+
distances before being returned. If False, the results may not
930+
be sorted. If `return_distance=False`, setting `sort_results=True`
931+
will result in an error.
932932
933933
.. versionadded:: 0.22
934934
@@ -1021,6 +1021,16 @@ class from an array representing our data set and ask who's
10211021
neigh_ind_list = sum(chunked_results, [])
10221022
results = _to_object_array(neigh_ind_list)
10231023

1024+
if sort_results:
1025+
if not return_distance:
1026+
raise ValueError("return_distance must be True "
1027+
"if sort_results is True.")
1028+
for ii in range(len(neigh_dist)):
1029+
order = np.argsort(neigh_dist[ii], kind='mergesort')
1030+
neigh_ind[ii] = neigh_ind[ii][order]
1031+
neigh_dist[ii] = neigh_dist[ii][order]
1032+
results = neigh_dist, neigh_ind
1033+
10241034
elif self._fit_method in ['ball_tree', 'kd_tree']:
10251035
if issparse(X):
10261036
raise ValueError(
@@ -1097,9 +1107,9 @@ def radius_neighbors_graph(self, X=None, radius=None, mode='connectivity',
10971107
edges are Euclidean distance between points.
10981108
10991109
sort_results : bool, default=False
1100-
If True, the distances and indices will be sorted before being
1101-
returned. If False, the results will not be sorted.
1102-
Only used with mode='distance'.
1110+
If True, in each row of the result, the non-zero entries will be
1111+
sorted by increasing distances. If False, the non-zero entries may
1112+
not be sorted. Only used with mode='distance'.
11031113
11041114
.. versionadded:: 0.22
11051115

sklearn/neighbors/tests/test_neighbors.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -678,6 +678,39 @@ def test_radius_neighbors_returns_array_of_objects():
678678
assert_array_equal(neigh_ind, expected_ind)
679679

680680

681+
@pytest.mark.parametrize(["algorithm", "metric"], [("ball_tree", "euclidean"),
682+
("kd_tree", "euclidean"),
683+
("brute", "euclidean"),
684+
("brute", "precomputed")])
685+
def test_radius_neighbors_sort_results(algorithm, metric):
686+
# Test radius_neighbors[_graph] output when sort_result is True
687+
n_samples = 10
688+
rng = np.random.RandomState(42)
689+
X = rng.random_sample((n_samples, 4))
690+
691+
if metric == "precomputed":
692+
X = neighbors.radius_neighbors_graph(X, radius=np.inf, mode="distance")
693+
model = neighbors.NearestNeighbors(algorithm=algorithm, metric=metric)
694+
model.fit(X)
695+
696+
# self.radius_neighbors
697+
distances, indices = model.radius_neighbors(X=X, radius=np.inf,
698+
sort_results=True)
699+
for ii in range(n_samples):
700+
assert_array_equal(distances[ii], np.sort(distances[ii]))
701+
702+
# sort_results=True and return_distance=False
703+
if metric != "precomputed": # no need to raise with precomputed graph
704+
with pytest.raises(ValueError, match="return_distance must be True"):
705+
model.radius_neighbors(X=X, radius=np.inf, sort_results=True,
706+
return_distance=False)
707+
708+
# self.radius_neighbors_graph
709+
graph = model.radius_neighbors_graph(X=X, radius=np.inf, mode="distance",
710+
sort_results=True)
711+
assert _is_sorted_by_data(graph)
712+
713+
681714
def test_RadiusNeighborsClassifier_multioutput():
682715
# Test k-NN classifier on multioutput data
683716
rng = check_random_state(0)

sklearn/tests/test_min_dependencies_readme.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,14 @@
77

88
import pytest
99
import sklearn
10-
from sklearn._build_utils.min_dependencies import dependent_packages
10+
from sklearn._min_dependencies import dependent_packages
1111
from sklearn.utils.fixes import parse_version
1212

1313

1414
def test_min_dependencies_readme():
1515
# Test that the minimum dependencies in the README.rst file are
1616
# consistent with the minimum dependencies defined at the file:
17-
# sklearn/_build_utils/min_dependencies.py
17+
# sklearn/_min_dependencies.py
1818

1919
pattern = re.compile(r"(\.\. \|)" +
2020
r"(([A-Za-z]+\-?)+)" +

0 commit comments

Comments
 (0)
0