8000 Merge pull request #10324 from eric-wieser/histogram-range-comparison · numpy/numpy@7c4c213 · GitHub
[go: up one dir, main page]

Skip to content

Commit 7c4c213

Browse files
authored
Merge pull request #10324 from eric-wieser/histogram-range-comparison
BUG: Fix crashes when using float32 values in uniform histograms
2 parents b429be3 + 1122303 commit 7c4c213

File tree

2 files changed

+65
-7
lines changed

2 files changed

+65
-7
lines changed

numpy/lib/histograms.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -318,9 +318,17 @@ def _get_bin_edges(a, bins, range, weights):
318318
raise ValueError('`bins` must be 1d, when an array')
319319

320320
if n_equal_bins is not None:
321+
# gh-10322 means that type resolution rules are dependent on array
322+
# shapes. To avoid this causing problems, we pick a type now and stick
323+
# with it throughout.
324+
bin_type = np.result_type(first_edge, last_edge, a)
325+
if np.issubdtype(bin_type, np.integer):
326+
bin_type = np.result_type(bin_type, float)
327+
321328
# bin edges must be computed
322329
bin_edges = np.linspace(
323-
first_edge, last_edge, n_equal_bins + 1, endpoint=True)
330+
first_edge, last_edge, n_equal_bins + 1,
331+
endpoint=True, dtype=bin_type)
324332
return bin_edges, (first_edge, last_edge, n_equal_bins)
325333
els 8000 e:
326334
return bin_edges, None
@@ -605,21 +613,24 @@ def histogram(a, bins=10, range=None, normed=False, weights=None,
605613
tmp_a = tmp_a[keep]
606614
if tmp_w is not None:
607615
tmp_w = tmp_w[keep]
608-
tmp_a_data = tmp_a.astype(float)
609-
tmp_a = tmp_a_data - first_edge
610-
tmp_a *= norm
616+
617+
# This cast ensures no type promotions occur below, which gh-10322
618+
# make unpredictable. Getting it wrong leads to precision errors
619+
# like gh-8123.
620+
tmp_a = tmp_a.astype(bin_edges.dtype, copy=False)
611621

612622
# Compute the bin indices, and for values that lie exactly on
613623
# last_edge we need to subtract one
614-
indices = tmp_a.astype(np.intp)
624+
f_indices = (tmp_a - first_edge) * norm
625+
indices = f_indices.astype(np.intp)
615626
indices[indices == n_equal_bins] -= 1
616627

617628
# The index computation is not guaranteed to give exactly
618629
# consistent results within ~1 ULP of the bin edges.
619-
decrement = tmp_a_data < bin_edges[indices]
630+
decrement = tmp_a < bin_edges[indices]
620631
indices[decrement] -= 1
621632
# The last bin includes the right edge. The other bins do not.
622-
increment = ((tmp_a_data >= bin_edges[indices + 1])
633+
increment = ((tmp_a >= bin_edges[indices + 1])
623634
& (indices != n_equal_bins - 1))
624635
indices[increment] += 1
625636

numpy/lib/tests/test_histograms.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,53 @@ def test_datetime(self):
299299
assert_equal(d_edge.dtype, dates.dtype)
300300
assert_equal(t_edge.dtype, td)
301301

302+
def do_precision_lower_bound(self, float_small, float_large):
303+
eps = np.finfo(float_large).eps
304+
305+
arr = np.array([1.0], float_small)
306+
range = np.array([1.0 + eps, 2.0], float_large)
307+
308+
# test is looking for behavior when the bounds change between dtypes
309+
if range.astype(float_small)[0] != 1:
310+
return
311+
312+
# previously crashed
313+
count, x_loc = np.histogram(arr, bins=1, range=range)
314+
assert_equal(count, [1])
315+
316+
# gh-10322 means that the type comes from arr - this may change
317+
assert_equal(x_loc.dtype, float_small)
318+
319+
def do_precision_upper_bound(self, float_small, float_large):
320+
eps = np.finfo(float_large).eps
321+
322+
arr = np.array([1.0], float_small)
323+
range = np.array([0.0, 1.0 - eps], float_large)
324+
325+
# test is looking for behavior when the bounds change between dtypes
326+
if range.astype(float_small)[-1] != 1:
327+
return
328+
329+
# previously crashed
330+
count, x_loc = np.histogram(arr, bins=1, range=range)
331+
assert_equal(count, [1])
332+
333+
# gh-10322 means that the type comes from arr - this may change
334+
assert_equal(x_loc.dtype, float_small)
335+
336+
def do_precision(self, float_small, float_large):
337+
self.do_precision_lower_bound(float_small, float_large)
338+
self.do_precision_upper_bound(float_small, float_large)
339+
340+
def test_precision(self):
341+
# not looping results in a useful stack trace upon failure
342+
self.do_precision(np.half, np.single)
343+
self.do_precision(np.half, np.double)
344+
self.do_precision(np.half, np.longdouble)
345+
self.do_precision(np.single, np.double)
346+
self.do_precision(np.single, np.longdouble)
347+
self.do_precision(np.double, np.longdouble)
348+
302349

303350
class TestHistogramOptimBinNums(object):
304351
"""

0 commit comments

Comments
 (0)
0