8000 BUG: Fix crashes when using float32 values in uniform histograms by eric-wieser · Pull Request #10324 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

BUG: Fix crashes when using float32 values in uniform histograms #10324

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 9, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 18 additions & 7 deletions numpy/lib/histograms.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,9 +318,17 @@ def _get_bin_edges(a, bins, range, weights):
raise ValueError('`bins` must be 1d, when an array')

if n_equal_bins is not None:
# gh-10322 means that type resolution rules are dependent on array
# shapes. To avoid this causing problems, we pick a type now and stick
# with it throughout.
bin_type = np.result_type(first_edge, last_edge, a)
if np.issubdtype(bin_type, np.integer):
bin_type = np.result_type(bin_type, float)

# bin edges must be computed
bin_edges = np.linspace(
first_edge, last_edge, n_equal_bins + 1, endpoint=True)
first_edge, last_edge, n_equal_bins + 1,
endpoint=True, dtype=bin_type)
return bin_edges, (first_edge, last_edge, n_equal_bins)
else:
return bin_edges, None
Expand Down Expand Up @@ -605,21 +613,24 @@ def histogram(a, bins=10, range=None, normed=False, weights=None,
tmp_a = tmp_a[keep]
if tmp_w is not None:
tmp_w = tmp_w[keep]
tmp_a_data = tmp_a.astype(float)
tmp_a = tmp_a_data - first_edge
tmp_a *= norm

# This cast ensures no type promotions occur below, which gh-10322
# make unpredictable. Getting it wrong leads to precision errors
# like gh-8123.
tmp_a = tmp_a.astype(bin_edges.dtype, copy=False)

# Compute the bin indices, and for values that lie exactly on
# last_edge we need to subtract one
indices = tmp_a.astype(np.intp)
f_indices = (tmp_a - first_edge) * norm
indices = f_indices.astype(np.intp)
indices[indices == n_equal_bins] -= 1

# The index computation is not guaranteed to give exactly
# consistent results within ~1 ULP of the bin edges.
decrement = tmp_a_data < bin_edges[indices]
decrement = tmp_a < bin_edges[indices]
indices[decrement] -= 1
# The last bin includes the right edge. The other bins do not.
increment = ((tmp_a_data >= bin_edges[indices + 1])
increment = ((tmp_a >= bin_edges[indices + 1])
& (indices != n_equal_bins - 1))
indices[increment] += 1

Expand Down
47 changes: 47 additions & 0 deletions numpy/lib/tests/test_histograms.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,53 @@ def test_datetime(self):
assert_equal(d_edge.dtype, dates.dtype)
assert_equal(t_edge.dtype, td)

def do_precision_lower_bound(self, float_small, float_large):
eps = np.finfo(float_large).eps

arr = np.array([1.0], float_small)
range = np.array([1.0 + eps, 2.0], float_large)

# test is looking for behavior when the bounds change between dtypes
if range.astype(float_small)[0] != 1:
return

# previously crashed
count, x_loc = np.histogram(arr, bins=1, range=range)
assert_equal(count, [1])

# gh-10322 means that the type comes from arr - this may change
assert_equal(x_loc.dtype, float_small)

def do_precision_upper_bound(self, float_small, float_large):
eps = np.finfo(float_large).eps

arr = np.array([1.0], float_small)
range = np.array([0.0, 1.0 - eps], float_large)

# test is looking for behavior when the bounds change between dtypes
if range.astype(float_small)[-1] != 1:
return

# previously crashed
count, x_loc = np.histogram(arr, bins=1, range=range)
assert_equal(count, [1])

# gh-10322 means that the type comes from arr - this may change
assert_equal(x_loc.dtype, float_small)

def do_precision(self, float_small, float_large):
self.do_precision_lower_bound(float_small, float_large)
self.do_precision_upper_bound(float_small, float_large)

def test_precision(self):
# not looping results in a useful stack trace upon failure
self.do_precision(np.half, np.single)
self.do_precision(np.half, np.double)
self.do_precision(np.half, np.longdouble)
self.do_precision(np.single, np.double)
self.do_precision(np.single, np.longdouble)
self.do_precision(np.double, np.longdouble)


class TestHistogramOptimBinNums(object):
"""
Expand Down
0