diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py index ec2d0fe81cd9..c5679ace8b43 100644 --- a/numpy/lib/histograms.py +++ b/numpy/lib/histograms.py @@ -318,9 +318,17 @@ def _get_bin_edges(a, bins, range, weights): raise ValueError('`bins` must be 1d, when an array') if n_equal_bins is not None: + # gh-10322 means that type resolution rules are dependent on array + # shapes. To avoid this causing problems, we pick a type now and stick + # with it throughout. + bin_type = np.result_type(first_edge, last_edge, a) + if np.issubdtype(bin_type, np.integer): + bin_type = np.result_type(bin_type, float) + # bin edges must be computed bin_edges = np.linspace( - first_edge, last_edge, n_equal_bins + 1, endpoint=True) + first_edge, last_edge, n_equal_bins + 1, + endpoint=True, dtype=bin_type) return bin_edges, (first_edge, last_edge, n_equal_bins) else: return bin_edges, None @@ -605,21 +613,24 @@ def histogram(a, bins=10, range=None, normed=False, weights=None, tmp_a = tmp_a[keep] if tmp_w is not None: tmp_w = tmp_w[keep] - tmp_a_data = tmp_a.astype(float) - tmp_a = tmp_a_data - first_edge - tmp_a *= norm + + # This cast ensures no type promotions occur below, which gh-10322 + # make unpredictable. Getting it wrong leads to precision errors + # like gh-8123. + tmp_a = tmp_a.astype(bin_edges.dtype, copy=False) # Compute the bin indices, and for values that lie exactly on # last_edge we need to subtract one - indices = tmp_a.astype(np.intp) + f_indices = (tmp_a - first_edge) * norm + indices = f_indices.astype(np.intp) indices[indices == n_equal_bins] -= 1 # The index computation is not guaranteed to give exactly # consistent results within ~1 ULP of the bin edges. - decrement = tmp_a_data < bin_edges[indices] + decrement = tmp_a < bin_edges[indices] indices[decrement] -= 1 # The last bin includes the right edge. The other bins do not. - increment = ((tmp_a_data >= bin_edges[indices + 1]) + increment = ((tmp_a >= bin_edges[indices + 1]) & (indices != n_equal_bins - 1)) indices[increment] += 1 diff --git a/numpy/lib/tests/test_histograms.py b/numpy/lib/tests/test_histograms.py index 58547dc17e58..a2c684a204f6 100644 --- a/numpy/lib/tests/test_histograms.py +++ b/numpy/lib/tests/test_histograms.py @@ -299,6 +299,53 @@ def test_datetime(self): assert_equal(d_edge.dtype, dates.dtype) assert_equal(t_edge.dtype, td) + def do_precision_lower_bound(self, float_small, float_large): + eps = np.finfo(float_large).eps + + arr = np.array([1.0], float_small) + range = np.array([1.0 + eps, 2.0], float_large) + + # test is looking for behavior when the bounds change between dtypes + if range.astype(float_small)[0] != 1: + return + + # previously crashed + count, x_loc = np.histogram(arr, bins=1, range=range) + assert_equal(count, [1]) + + # gh-10322 means that the type comes from arr - this may change + assert_equal(x_loc.dtype, float_small) + + def do_precision_upper_bound(self, float_small, float_large): + eps = np.finfo(float_large).eps + + arr = np.array([1.0], float_small) + range = np.array([0.0, 1.0 - eps], float_large) + + # test is looking for behavior when the bounds change between dtypes + if range.astype(float_small)[-1] != 1: + return + + # previously crashed + count, x_loc = np.histogram(arr, bins=1, range=range) + assert_equal(count, [1]) + + # gh-10322 means that the type comes from arr - this may change + assert_equal(x_loc.dtype, float_small) + + def do_precision(self, float_small, float_large): + self.do_precision_lower_bound(float_small, float_large) + self.do_precision_upper_bound(float_small, float_large) + + def test_precision(self): + # not looping results in a useful stack trace upon failure + self.do_precision(np.half, np.single) + self.do_precision(np.half, np.double) + self.do_precision(np.half, np.longdouble) + self.do_precision(np.single, np.double) + self.do_precision(np.single, np.longdouble) + self.do_precision(np.double, np.longdouble) + class TestHistogramOptimBinNums(object): """