8000 FIX Support readonly sparse datasets for `manhattan_distances` (#25432) · scikit-learn/scikit-learn@b69abf5 · GitHub
[go: up one dir, main page]

Skip to content

Commit b69abf5

Browse files
jjerphanlestevelorentzenchrogrisel
authored
FIX Support readonly sparse datasets for manhattan_distances (#25432)
* TST Add non-regression test for #7981 This reproducer is adapted from the one of this message: #7981 (comment) Co-authored-by: Loïc Estève <loic.esteve@ymail.com> * FIX Support readonly sparse datasets for manhattan * DOC Add entry in whats_new/v1.2.rst for 1.2.1 * FIX Fix comment * Update sklearn/metrics/tests/test_pairwise.py Co-authored-by: Christian Lorentzen <lorentzen.ch@gmail.com> * DOC Move entry to whats_new/v1.3.rst * Update sklearn/metrics/tests/test_pairwise.py Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org> Co-authored-by: Loïc Estève <loic.esteve@ymail.com> Co-authored-by: Christian Lorentzen <lorentzen.ch@gmail.com> Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
1 parent c58ca7d commit b69abf5

File tree

3 files changed

+27
-3
lines changed

3 files changed

+27
-3
lines changed

doc/whats_new/v1.3.rst

+6
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,12 @@ Changelog
143143
- |Enhancement| Added the parameter `fill_value` to :class:`impute.IterativeImputer`.
144144
:pr:`25232` by :user:`Thijs van Weezel <ValueInvestorThijs>`.
145145

146+
:mod:`sklearn.metrics`
147+
......................
148+
149+
- |Fix| :func:`metric.manhattan_distances` now supports readonly sparse datasets.
150+
:pr:`25432` by :user:`Julien Jerphanion <jjerphan>`.
151+
146152
:mod:`sklearn.naive_bayes`
147153
..........................
148154

sklearn/metrics/_pairwise_fast.pyx

+9-3
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,15 @@ def _chi2_kernel_fast(floating[:, :] X,
3535
result[i, j] = -res
3636

3737

38-
def _sparse_manhattan(floating[::1] X_data, int[:] X_indices, int[:] X_indptr,
39-
floating[::1] Y_data, int[:] Y_indices, int[:] Y_indptr,
40-
double[:, ::1] D):
38+
def _sparse_manhattan(
39+
const floating[::1] X_data,
40+
const int[:] X_indices,
41+
const int[:] X_indptr,
42+
const floating[::1] Y_data,
43+
const int[:] Y_indices,
44+
const int[:] Y_indptr,
45+
double[:, ::1] D,
46+
):
4147
"""Pairwise L1 distances for CSR matrices.
4248
4349
Usage:

sklearn/metrics/tests/test_pairwise.py

+12
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from scipy.spatial.distance import minkowski as wminkowski
1717

1818
from sklearn.utils.fixes import sp_version, parse_version
19+
from sklearn.utils.parallel import delayed, Parallel
1920

2021
import pytest
2122

@@ -1541,3 +1542,14 @@ def test_numeric_pairwise_distances_datatypes(metric, global_dtype, y_is_x):
15411542
dist = pairwise_distances(X, Y, metric=metric, **params)
15421543

15431544
assert_allclose(dist, expected_dist)
1545+
1546+
1547+
def test_sparse_manhattan_readonly_dataset():
1548+
# Non-regression test for: https://github.com/scikit-learn/scikit-learn/issues/7981
1549+
matrices1 = [csr_matrix(np.ones((5, 5)))]
1550+
matrices2 = [csr_matrix(np.ones((5, 5)))]
1551+
# Joblib memory maps datasets which makes them read-only.
1552+
# The following call was reporting as failing in #7981, but this must pass.
1553+
Parallel(n_jobs=2, max_nbytes=0)(
1554+
delayed(manhattan_distances)(m1, m2) for m1, m2 in zip(matrices1, matrices2)
1555+
)

0 commit comments

Comments
 (0)
0