8000 BUG: spatial: fix Canberra distance. Closes #1430. · scipy/scipy@32f9e3d · GitHub
[go: up one dir, main page]

Skip to content

Commit 32f9e3d

Browse files
author
rgommers
committed
BUG: spatial: fix Canberra distance. Closes #1430.
Also fix potential problems with integer division in braycurtis and kulsinski.
1 parent fba8f78 commit 32f9e3d

File tree

3 files changed

+46
-20
lines changed

3 files changed

+46
-20
lines changed

scipy/spatial/distance.py

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,7 @@ def kulsinski(u, v):
415415
"""
416416
u = np.asarray(u, order='c')
417417
v = np.asarray(v, order='c')
418-
n = len(u)
418+
n = float(len(u))
419419
(nff, nft, ntf, ntt) = _nbool_correspond_all(u, v)
420420

421421
return (ntf + nft - ntt + n) / (ntf + nft + n)
@@ -441,7 +441,7 @@ def seuclidean(u, v, V):
441441
"""
442442
u = np.asarray(u, order='c')
443443
v = np.asarray(v, order='c')
444-
V = np.asarray(V, order='c')
444+
V = np.asarray(V, order='c', dtype=np.float64)
445445
if len(V.shape) != 1 or V.shape[0] != u.shape[0] or u.shape[0] != v.shape[0]:
446446
raise TypeError('V must be a 1-D array of the same dimension as u and v.')
447447
return np.sqrt(((u-v)**2 / V).sum())
@@ -533,6 +533,9 @@ def braycurtis(u, v):
533533
534534
\sum{|u_i-v_i|} / \sum{|u_i+v_i|}.
535535
536+
The Bray-Curtis distance is in the range [0, 1] if all coordinates are
537+
positive, and is undefined if the inputs are of length zero.
538+
536539
Parameters
537540
----------
538541
u : ndarray
@@ -546,8 +549,8 @@ def braycurtis(u, v):
546549
The Bray-Curtis distance between vectors ``u`` and ``v``.
547550
"""
548551
u = np.asarray(u, order='c')
549-
v = np.asarray(v, order='c')
550-
return abs(u-v).sum() / abs(u+v).sum()
552+
v = np.asarray(v, order='c', dtype=np.float64)
553+
return abs(u - v).sum() / abs(u + v).sum()
551554

552555
def canberra(u, v):
553556
r"""
@@ -556,9 +559,8 @@ def canberra(u, v):
556559
557560
.. math::
558561
559-
\frac{\sum_i {|u_i-v_i|}}
560-
{\sum_i {|u_i|+|v_i|}}.
561-
562+
\sum_u \frac{|u_i-v_i|}
563+
{(|u_i|+|v_i|)}.
562564
563565
Parameters
564566
----------
@@ -571,10 +573,21 @@ def canberra(u, v):
571573
-------
572574
d : double
573575
The Canberra distance between vectors ``u`` and ``v``.
576+
577+
Notes
578+
-----
579+
Whe u[i] and v[i] are 0 for given i, then the fraction 0/0 = 0 is used in
580+
the calculation.
581+
574582
"""
575583
u = np.asarray(u, order='c')
576-
v = np.asarray(v, order='c')
577-
return abs(u-v).sum() / (abs(u).sum() + abs(v).sum())
584+
v = np.asarray(v, order='c', dtype=np.float64)
585+
olderr = np.seterr(invalid='ignore')
586+
try:
587+
d = np.nansum(abs(u - v) / (abs(u) + abs(v)))
588+
finally:
589+
np.seterr(**olderr)
590+
return d
578591

579592
def _nbool_correspond_all(u, v):
580593
if u.dtype != v.dtype:

scipy/spatial/src/distance.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -75,13 +75,15 @@ static NPY_INLINE double chebyshev_distance(const double *u, const double *v, in
7575

7676
static NPY_INLINE double canberra_distance(const double *u, const double *v, int n) {
7777
int i;
78-
double snum = 0.0, sdenom_u = 0.0, sdenom_v = 0.0;
78+
double snum = 0.0, sdenom = 0.0, tot = 0.0;
7979
for (i = 0; i < n; i++) {
80-
snum += fabs(u[i] - v[i]);
81-
sdenom_u += fabs(u[i]);
82-
sdenom_v += fabs(v[i]);
80+
snum = fabs(u[i] - v[i]);
81+
sdenom = fabs(u[i]) + fabs(v[i]);
82+
if (sdenom > 0.0) {
83+
tot += snum / sdenom;
84+
}
8385
}
84-
return snum / (sdenom_u + sdenom_v);
86+
return tot;
8587
}
8688

8789
static NPY_INLINE double bray_curtis_distance(const double *u, const double *v, int n) {

scipy/spatial/tests/test_distance.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,12 @@
3838

3939
import numpy as np
4040
from numpy.testing import verbose, TestCase, run_module_suite, \
41-
assert_raises, assert_array_equal
41+
assert_raises, assert_array_equal, assert_equal, assert_almost_equal
4242
from scipy.spatial.distance import squareform, pdist, cdist, matching, \
4343
jaccard, dice, sokalsneath, rogerstanimoto, \
4444
russellrao, yule, num_obs_y, num_obs_dm, \
45-
is_valid_dm, is_valid_y, wminkowski
45+
is_valid_dm, is_valid_y, wminkowski, \
46+
canberra, braycurtis
4647

4748
_filenames = ["iris.txt",
4849
"cdist-X1.txt",
@@ -118,7 +119,7 @@ def test_cdist_euclidean_random(self):
118119
if verbose > 2:
119120
print (Y1-Y2).max()
120121
self.assertTrue(within_tol(Y1, Y2, eps))
121-
122+
122123
def test_cdist_euclidean_random_unicode(self):
123124
"Tests cdist(X, u'euclidean') using unicode metric string"
124125
eps = 1e-07
@@ -130,7 +131,7 @@ def test_cdist_euclidean_random_unicode(self):
130131
if verbose > 2:
131132
print (Y1-Y2).max()
132133
self.assertTrue(within_tol(Y1, Y2, eps))
133-
134+
134135
def test_cdist_sqeuclidean_random(self):
135136
"Tests cdist(X, 'sqeuclidean') on random data."
136137
eps = 1e-07
@@ -477,7 +478,7 @@ def test_pdist_euclidean_random(self):
477478

478479
Y_test1 = pdist(X, 'euclidean')
479480
self.assertTrue(within_tol(Y_test1, Y_right, eps))
480-
481+
481482
def test_pdist_euclidean_random_u(self):
482483
"Tests pdist(X, 'euclidean') with unicode metric string"
483484
eps = 1e-07
@@ -487,7 +488,7 @@ def test_pdist_euclidean_random_u(self):
487488

488489
Y_test1 = pdist(X, u'euclidean')
489490
self.assertTrue(within_tol(Y_test1, Y_right, eps))
490-
491+
491492
def test_pdist_euclidean_random_float32(self):
492493
"Tests pdist(X, 'euclidean') on random data (float32)."
493494
eps = 1e-07
@@ -1734,6 +1735,16 @@ def test_sokalsneath_all_false():
17341735
"""Regression test for ticket #876"""
17351736
assert_raises(ValueError, sokalsneath, [False, False, False], [False, False, False])
17361737

1738+
def test_canberra():
1739+
"""Regression test for ticket #1430."""
1740+
assert_equal(canberra([1,2,3], [2,4,6]), 1)
1741+
assert_equal(canberra([1,1,0,0], [1,0,1,0]), 2)
1742+
1743+
def test_braycurtis():
1744+
"""Regression test for ticket #1430."""
1745+
assert_almost_equal(braycurtis([1,2,3], [2,4,6]), 1./3, decimal=15)
1746+
assert_almost_equal(braycurtis([1,1,0,0], [1,0,1,0]), 0.5, decimal=15)
1747+
17371748

17381749
if __name__=="__main__":
17391750
run_module_suite()

0 commit comments

Comments
 (0)
0