8000 MAINT Clean-up deprecated wminkowski distance metric for 1.3 (#26434) · REDVM/scikit-learn@e5f5168 · GitHub
[go: up one dir, main page]

Skip to content

Commit e5f5168

Browse files
jeremiedbbREDVM
authored andcommitted
MAINT Clean-up deprecated wminkowski distance metric for 1.3 (scikit-learn#26434)
1 parent 1572d87 commit e5f5168

File tree

9 files changed

+39
-359
lines changed

9 files changed

+39
-359
lines changed

doc/modules/neighbors.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ of valid metrics use :meth:`KDTree.valid_metrics` and :meth:`BallTree.valid_metr
142142
>>> KDTree.valid_metrics()
143143
['euclidean', 'l2', 'minkowski', 'p', 'manhattan', 'cityblock', 'l1', 'chebyshev', 'infinity']
144144
>>> BallTree.valid_metrics()
145-
['euclidean', 'l2', 'minkowski', 'p', 'manhattan', 'cityblock', 'l1', 'chebyshev', 'infinity', 'seuclidean', 'mahalanobis', 'wminkowski', 'hamming', 'canberra', 'braycurtis', 'jaccard', 'dice', 'rogerstanimoto', 'russellrao', 'sokalmichener', 'sokalsneath', 'haversine', 'pyfunc']
145+
['euclidean', 'l2', 'minkowski', 'p', 'manhattan', 'cityblock', 'l1', 'chebyshev', 'infinity', 'seuclidean', 'mahalanobis', 'hamming', 'canberra', 'braycurtis', 'jaccard', 'dice', 'rogerstanimoto', 'russellrao', 'sokalmichener', 'sokalsneath', 'haversine', 'pyfunc']
146146

147147
.. _classification:
148148

sklearn/cluster/tests/test_hierarchical.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -395,8 +395,6 @@ def test_vector_scikit_single_vs_scipy_single(global_random_seed):
395395
assess_same_labelling(cut, cut_scipy)
396396

397397

398-
# TODO: Remove filterwarnings in 1.3 when wminkowski is removed
399-
@pytest.mark.filterwarnings("ignore:WMinkowskiDistance:FutureWarning:sklearn")
400398
@pytest.mark.parametrize("metric_param_grid", METRICS_DEFAULT_PARAMS)
401399
def test_mst_linkage_core_memory_mapped(metric_param_grid):
402400
"""The MST-LINKAGE-CORE algorithm must work on mem-mapped dataset.
8000

sklearn/metrics/_dist_metrics.pyx.tp

Lines changed: 0 additions & 163 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,6 @@ METRIC_MAPPING{{name_suffix}} = {
9494
'infinity': ChebyshevDistance{{name_suffix}},
9595
'seuclidean': SEuclideanDistance{{name_suffix}},
9696
'mahalanobis': MahalanobisDistance{{name_suffix}},
97-
'wminkowski': WMinkowskiDistance{{name_suffix}},
9897
'hamming': HammingDistance{{name_suffix}},
9998
'canberra': CanberraDistance{{name_suffix}},
10099
'braycurtis': BrayCurtisDistance{{name_suffix}},
@@ -157,18 +156,10 @@ cdef class DistanceMetric{{name_suffix}}:
157156
"manhattan" ManhattanDistance - ``sum(|x - y|)``
158157
"chebyshev" ChebyshevDistance - ``max(|x - y|)``
159158
"minkowski" MinkowskiDistance p, w ``sum(w * |x - y|^p)^(1/p)``
160-
"wminkowski" WMinkowskiDistance p, w ``sum(|w * (x - y)|^p)^(1/p)``
161159
"seuclidean" SEuclideanDistance V ``sqrt(sum((x - y)^2 / V))``
162160
"mahalanobis" MahalanobisDistance V or VI ``sqrt((x - y)' V^-1 (x - y))``
163161
============== ==================== ======== ===============================
164162

165-
.. deprecated:: 1.1
166-
`WMinkowskiDistance` is deprecated in version 1.1 and will be removed in version 1.3.
167-
Use `MinkowskiDistance` instead. Note that in `MinkowskiDistance`, the weights are
168-
applied to the absolute differences already raised to the p power. This is different from
169-
`WMinkowskiDistance` where weights are applied to the absolute differences before raising
170-
to the p power. The deprecation aims to remain consistent with SciPy 1.8 convention.
171-
172163
**Metrics intended for two-dimensional vector spaces:** Note that the haversine
173164
distance metric requires data in the form of [latitude, longitude] and both
174165
inputs and outputs are in units of radians.
@@ -1445,160 +1436,6 @@ cdef class MinkowskiDistance{{name_suffix}}(DistanceMetric{{name_suffix}}):
14451436
1 / self.p
14461437
)
14471438

1448-
#------------------------------------------------------------
1449-
# TODO: Remove in 1.3 - WMinkowskiDistance class
1450-
# W-Minkowski Distance
1451-
cdef class WMinkowskiDistance{{name_suffix}}(DistanceMetric{{name_suffix}}):
1452-
r"""Weighted Minkowski Distance
1453-
1454-
.. math::
1455-
D(x, y) = [\sum_i |w_i * (x_i - y_i)|^p] ^ (1/p)
1456-
1457-
Weighted Minkowski Distance requires p >= 1 and finite.
1458-
1459-
Parameters
1460-
----------
1461-
p : int
1462-
The order of the norm of the difference :math:`{||u-v||}_p`.
1463-
w : (N,) array-like
1464-
The weight vector.
1465-
1466-
"""
1467-
def __init__(self, p, w):
1468-
from warnings import warn
1469-
warn("WMinkowskiDistance is deprecated in version 1.1 and will be "
1470-
"removed in version 1.3. Use MinkowskiDistance instead. Note "
1471-
"that in MinkowskiDistance, the weights are applied to the "
1472-
"absolute differences raised to the p power. This is different "
1473-
"from WMinkowskiDistance where weights are applied to the "
1474-
"absolute differences before raising to the p power. "
1475-
"The deprecation aims to remain consistent with SciPy 1.8 "
1476-
"convention.", FutureWarning)
1477-
1478-
if p < 1:
1479-
raise ValueError("p must be greater than 1")
1480-
elif np.isinf(p):
1481-
raise ValueError("WMinkowskiDistance requires finite p. "
1482-
"For p=inf, use ChebyshevDistance.")
1483-
self.p = p
1484-
self.vec = np.asarray(w, dtype=np.float64)
1485-
self.size = self.vec.shape[0]
1486-
1487-
def _validate_data(self, X):
1488-
if X.shape[1] != self.size:
1489-
raise ValueError('WMinkowskiDistance dist: '
1490-
'size of w does not match')
1491-
1492-
cdef inline float64_t rdist(
1493-
self,
1494-
const {{INPUT_DTYPE_t}}* x1,
1495-
const {{INPUT_DTYPE_t}}* x2,
1496-
intp_t size,
1497-
) except -1 nogil:
1498-
1499-
cdef float64_t d = 0
1500-
cdef intp_t j
1501-
for j in range(size):
1502-
d += (pow(self.vec[j] * fabs(x1[j] - x2[j]), self.p))
1503-
return d
1504-
1505-
cdef inline float64_t dist(
1506-
self,
1507-
const {{INPUT_DTYPE_t}}* x1,
1508-
const {{INPUT_DTYPE_t}}* x2,
1509-
intp_t size,
1510-
) except -1 nogil:
1511-
return pow(self.rdist(x1, x2, size), 1. / self.p)
1512-
1513-
cdef inline float64_t _rdist_to_dist(self, {{INPUT_DTYPE_t}} rdist) except -1 nogil:
1514-
return pow(rdist, 1. / self.p)
1515-
1516-
cdef inline float64_t _dist_to_rdist(self, {{INPUT_DTYPE_t}} dist) except -1 nogil:
1517-
return pow(dist, self.p)
1518-
1519-
def rdist_to_dist(self, rdist):
1520-
return rdist ** (1. / self.p)
1521-
1522-
def dist_to_rdist(self, dist):
1523-
return dist ** self.p
1524-
1525-
cdef inline float64_t rdist_csr(
1526-
self,
1527-
const {{INPUT_DTYPE_t}}* x1_data,
1528-
const int32_t[:] x1_indices,
1529-
const {{INPUT_DTYPE_t}}* x2_data,
1530-
const int32_t[:] x2_indices,
1531-
const int32_t x1_start,
1532-
const int32_t x1_end,
1533-
const int32_t x2_start,
1534-
const int32_t x2_end,
1535-
const intp_t size,
1536-
) except -1 nogil:
1537-
1538-
cdef:
1539-
intp_t ix1, ix2
1540-
intp_t i1 = x1_start
1541-
intp_t i2 = x2_start
1542-
1543-
float64_t d = 0.0
1544-
1545-
while i1 < x1_end and i2 < x2_end:
1546-
ix1 = x1_indices[i1]
1547-
ix2 = x2_indices[i2]
1548-
1549-
if ix1 == ix2:
1550-
d = d + pow(self.vec[ix1] * fabs(
1551-
x1_data[i1] - x2_data[i2]
1552-
), self.p)
1553-
i1 = i1 + 1
1554-
i2 = i2 + 1
1555-
elif ix1 < ix2:
1556-
d = d + pow(self.vec[ix1] * fabs(x1_data[i1]), self.p)
1557-
i1 = i1 + 1
1558-
else:
1559-
d = d + pow(self.vec[ix2] * fabs(x2_data[i2]), self.p)
1560-
i2 = i2 + 1
1561-
1562-
if i1 == x1_end:
1563-
while i2 < x2_end:
1564-
ix2 = x2_indices[i2]
1565-
d = d + pow(self.vec[ix2] * fabs(x2_data[i2]), self.p)
1566-
i2 = i2 + 1
1567-
else:
1568-
while i1 < x1_end:
1569-
ix1 = x1_indices[i1]
1570-
d = d + pow(self.vec[ix1] * fabs(x1_data[i1]), self.p)
1571-
i1 = i1 + 1
1572-
1573-
return d
1574-
1575-
cdef inline float64_t dist_csr(
1576-
self,
1577-
const {{INPUT_DTYPE_t}}* x1_data,
1578-
const int32_t[:] x1_indices,
1579-
const {{INPUT_DTYPE_t}}* x2_data,
1580-
const int32_t[:] x2_indices,
1581-
const int32_t x1_start,
1582-
const int32_t x1_end,
1583-
const int32_t x2_start,
1584-
const int32_t x2_end,
1585-
const intp_t size,
1586-
) except -1 nogil:
1587-
return pow(
1588-
self.rdist_csr(
1589-
x1_data,
1590-
x1_indices,
1591-
x2_data,
1592-
x2_indices,
1593-
x1_start,
1594-
x1_end,
1595-
x2_start,
1596-
x2_end,
1597-
size,
1598-
),
1599-
1 / self.p
1600-
)
1601-
16021439
#------------------------------------------------------------
16031440
# Mahalanobis Distance
16041441
# d = sqrt( (x - y)^T V^-1 (x - y) )

sklearn/metrics/tests/test_dist_metrics.py

Lines changed: 3 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
from sklearn.utils import check_random_state
1919
from sklearn.utils._testing import assert_allclose, create_memmap_backed_data
20-
from sklearn.utils.fixes import sp_version, parse_version
2120

2221

2322
def dist_func(x1, x2, p):
@@ -56,23 +55,10 @@ def dist_func(x1, x2, p):
5655
("hamming", {}),
5756
("canberra", {}),
5857
("braycurtis", {}),
58+
("minkowski", dict(p=(1, 1.5, 3), w=(rng.random_sample(d),))),
5959
]
60-
if sp_version >= parse_version("1.8.0.dev0"):
61-
# Starting from scipy 1.8.0.dev0, minkowski now accepts w, the weighting
62-
# parameter directly and using it is preferred over using wminkowski.
63-
METRICS_DEFAULT_PARAMS.append(
64-
("minkowski", dict(p=(1, 1.5, 3), w=(rng.random_sample(d),))),
65-
)
66-
else:
67-
# For previous versions of scipy, this was possible through a dedicated
68-
# metric (deprecated in 1.6 and removed in 1.8).
69-
METRICS_DEFAULT_PARAMS.append(
70-
("wminkowski", dict(p=(1, 1.5, 3), w=(rng.random_sample(d),))),
71-
)
7260

7361

74-
# TODO: Remove filterwarnings in 1.3 when wminkowski is removed
75-
@pytest.mark.filterwarnings("ignore:WMinkowskiDistance:FutureWarning:sklearn")
7662
@pytest.mark.parametrize(
7763
"metric_param_grid", METRICS_DEFAULT_PARAMS, ids=lambda params: params[0]
7864
)
@@ -95,15 +81,7 @@ def test_cdist(metric_param_grid, X, Y):
9581
# with scipy
9682
rtol_dict = {"rtol": 1e-6}
9783

98-
if metric == "wminkowski":
99-
# wminkoski is deprecated in SciPy 1.6.0 and removed in 1.8.0
100-
WarningToExpect = None
101-
if sp_version >= parse_version("1.6.0"):
102-
WarningToExpect = DeprecationWarning
103-
with pytest.warns(WarningToExpect):
104-
D_scipy_cdist = cdist(X, Y, metric, **kwargs)
105-
else:
106-
D_scipy_cdist = cdist(X, Y, metric, **kwargs)
84+
D_scipy_cdist = cdist(X, Y, metric, **kwargs)
10785

10886
dm = DistanceMetricInterface.get_metric(metric, **kwargs)
10987

@@ -158,8 +136,6 @@ def test_cdist_bool_metric(metric, X_bool, Y_bool):
158136
assert_allclose(D_sklearn, D_scipy_cdist)
159137

160138

161-
# TODO: Remove filterwarnings in 1.3 when wminkowski is removed
162-
@pytest.mark.filterwarnings("ignore:WMinkowskiDistance:FutureWarning:sklearn")
163139
@pytest.mark.parametrize(
164140
"metric_param_grid", METRICS_DEFAULT_PARAMS, ids=lambda params: params[0]
165141
)
@@ -182,18 +158,7 @@ def test_pdist(metric_param_grid, X):
182158
# with scipy
183159
rtol_dict = {"rtol": 1e-6}
184160

185-
if metric == "wminkowski":
186-
if sp_version >= parse_version("1.8.0"):
187-
pytest.skip("wminkowski will be removed in SciPy 1.8.0")
188-
189-
# wminkoski is deprecated in SciPy 1.6.0 and removed in 1.8.0
190-
ExceptionToAssert = None
191-
if sp_version >= parse_version("1.6.0"):
192-
ExceptionToAssert = DeprecationWarning
193-
with pytest.warns(ExceptionToAssert):
194-
D_scipy_pdist = cdist(X, X, metric, **kwargs)
195-
else:
196-
D_scipy_pdist = cdist(X, X, metric, **kwargs)
161+
D_scipy_pdist = cdist(X, X, metric, **kwargs)
197162

198163
dm = DistanceMetricInterface.get_metric(metric, **kwargs)
199164
D_sklearn = dm.pairwise(X)
@@ -209,8 +174,6 @@ def test_pdist(metric_param_grid, X):
209174
assert_allclose(D_sklearn_csr, D_scipy_pdist, **rtol_dict)
210175

211176

212-
# TODO: Remove filterwarnings in 1.3 when wminkowski is removed
213-
@pytest.mark.filterwarnings("ignore:WMinkowskiDistance:FutureWarning:sklearn")
214177
@pytest.mark.parametrize(
215178
"metric_param_grid", METRICS_DEFAULT_PARAMS, ids=lambda params: params[0]
216179
)
@@ -261,8 +224,6 @@ def test_pdist_bool_metrics(metric, X_bool):
261224
assert_allclose(D_sklearn, D_scipy_pdist)
262225

263226

264-
# TODO: Remove filterwarnings in 1.3 when wminkowski is removed
265-
@pytest.mark.filterwarnings("ignore:WMinkowskiDistance:FutureWarning:sklearn")
266227
@pytest.mark.parametrize("writable_kwargs", [True, False])
267228
@pytest.mark.parametrize(
268229
"metric_param_grid", METRICS_DEFAULT_PARAMS, ids=lambda params: params[0]
@@ -288,8 +249,6 @@ def test_pickle(writable_kwargs, metric_param_grid, X):
288249
assert_allclose(D1, D2)
289250

290251

291-
# TODO: Remove filterwarnings in 1.3 when wminkowski is removed
292-
@pytest.mark.filterwarnings("ignore:WMinkowskiDistance:FutureWarning:sklearn")
293252
@pytest.mark.parametrize("metric", BOOL_METRICS)
294253
@pytest.mark.parametrize("X_bool", [X_bool, X_bool_mmap])
295254
def test_pickle_bool_metrics(metric, X_bool):
@@ -385,8 +344,6 @@ def custom_metric(x, y):
385344
assert_allclose(pyfunc.pairwise(X), eucl.pairwise(X) ** 2)
386345

387346

388-
# TODO: Remove filterwarnings in 1.3 when wminkowski is removed
389-
@pytest.mark.filterwarnings("ignore:WMinkowskiDistance:FutureWarning:sklearn")
390347
def test_readonly_kwargs():
391348
# Non-regression test for:
392349
# https://github.com/scikit-learn/scikit-learn/issues/21685
@@ -400,7 +357,6 @@ def test_readonly_kwargs():
400357

401358
# Those distances metrics have to support readonly buffers.
402359
DistanceMetric.get_metric("seuclidean", V=weights)
403-
DistanceMetric.get_metric("wminkowski", p=1, w=weights)
404360
DistanceMetric.get_metric("mahalanobis", VI=VI)
405361

406362

@@ -433,24 +389,3 @@ def test_minkowski_metric_validate_weights_size():
433389
)
434390
with pytest.raises(ValueError, match=msg):
435391
dm.pairwise(X64, Y64)
436-
437-
438-
# TODO: Remove in 1.3 when wminkowski is removed
439-
def test_wminkowski_deprecated():
440-
w = rng.random_sample(d)
441-
msg = "WMinkowskiDistance is deprecated in version 1.1"
442-
with pytest.warns(FutureWarning, match=msg):
443-
DistanceMetric.get_metric("wminkowski", p=3, w=w)
444-
445-
446-
# TODO: Remove in 1.3 when wminkowski is removed
447-
@pytest.mark.filterwarnings("ignore:WMinkowskiDistance:FutureWarning:sklearn")
448-
@pytest.mark.parametrize("p", [1, 1.5, 3])
449-
def test_wminkowski_minkowski_equivalence(p):
450-
w = rng.random_sample(d)
451-
# Weights are rescaled for consistency w.r.t scipy 1.8 refactoring of 'minkowski'
452-
dm_wmks = DistanceMetric.get_metric("wminkowski", p=p, w=(w) ** (1 / p))
453-
dm_mks = DistanceMetric.get_metric("minkowski", p=p, w=w)
454-
D_wmks = dm_wmks.pairwise(X64, Y64)
455-
D_mks = dm_mks.pairwise(X64, Y64)
456-
assert_allclose(D_wmks, D_mks)

0 commit comments

Comments
 (0)
0