diff --git a/doc/release/1.15.0-notes.rst b/doc/release/1.15.0-notes.rst index a6b23b8927a2..f57a89ee6acf 100644 --- a/doc/release/1.15.0-notes.rst +++ b/doc/release/1.15.0-notes.rst @@ -198,6 +198,13 @@ passed explicitly, and are not yet computed automatically. No longer does an IQR of 0 result in `n_bins=1`, rather the number of bins chosen is related to the data size in this situation. +``histogram`` and ``histogramdd`` return edges matching the float type of the data +---------------------------------------------------------------------------------- +When passed ``float16``, ``np.float32``, or ``np.longdouble`` data, the +returned edges are now of the same dtype. Previously, ``histogram`` would only +return the same type if explicit bins were given, and ``histogram`` would +produce ``float64`` bins no matter what the inputs. + ``histogramdd`` allows explicit ranges to be given in a subset of axes ---------------------------------------------------------------------- The ``range`` argument of `histogramdd` can now contain ``None`` values to diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py index 90e19769e585..2922b3a86a3d 100644 --- a/numpy/lib/histograms.py +++ b/numpy/lib/histograms.py @@ -877,12 +877,6 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None): # bins is an integer bins = D*[bins] - # avoid rounding issues for comparisons when dealing with inexact types - if np.issubdtype(sample.dtype, np.inexact): - edge_dt = sample.dtype - else: - edge_dt = float - # normalize the range argument if range is None: range = (None,) * D @@ -896,13 +890,12 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None): raise ValueError( '`bins[{}]` must be positive, when an integer'.format(i)) smin, smax = _get_outer_edges(sample[:,i], range[i]) - edges[i] = np.linspace(smin, smax, bins[i] + 1, dtype=edge_dt) + edges[i] = np.linspace(smin, smax, bins[i] + 1) elif np.ndim(bins[i]) == 1: - edges[i] = np.asarray(bins[i], edge_dt) - # not just monotonic, due to the use of mindiff below - if np.any(edges[i][:-1] >= edges[i][1:]): + edges[i] = np.asarray(bins[i]) + if np.any(edges[i][:-1] > edges[i][1:]): raise ValueError( - '`bins[{}]` must be strictly increasing, when an array' + '`bins[{}]` must be monotonically increasing, when an array' .format(i)) else: raise ValueError( @@ -913,7 +906,8 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None): # Compute the bin number each sample falls into. Ncount = tuple( - np.digitize(sample[:, i], edges[i]) + # avoid np.digitize to work around gh-11022 + np.searchsorted(edges[i], sample[:, i], side='right') for i in _range(D) ) @@ -921,16 +915,10 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None): # For the rightmost bin, we want values equal to the right edge to be # counted in the last bin, and not as an outlier. for i in _range(D): - # Rounding precision - mindiff = dedges[i].min() - if not np.isinf(mindiff): - decimal = int(-np.log10(mindiff)) + 6 - # Find which points are on the rightmost edge. - not_smaller_than_edge = (sample[:, i] >= edges[i][-1]) - on_edge = (np.around(sample[:, i], decimal) == - np.around(edges[i][-1], decimal)) - # Shift these points one bin to the left. - Ncount[i][on_edge & not_smaller_than_edge] -= 1 + # Find which points are on the rightmost edge. + on_edge = (sample[:, i] == edges[i][-1]) + # Shift these points one bin to the left. + Ncount[i][on_edge] -= 1 # Compute the sample indices in the flattened histogram matrix. # This raises an error if the array is too large. diff --git a/numpy/lib/tests/test_histograms.py b/numpy/lib/tests/test_histograms.py index 06daacbdced2..8583efab8af0 100644 --- a/numpy/lib/tests/test_histograms.py +++ b/numpy/lib/tests/test_histograms.py @@ -612,8 +612,6 @@ def test_bins_errors(self): x = np.arange(8).reshape(2, 4) assert_raises(ValueError, np.histogramdd, x, bins=[-1, 2, 4, 5]) assert_raises(ValueError, np.histogramdd, x, bins=[1, 0.99, 1, 1]) - assert_raises( - ValueError, np.histogramdd, x, bins=[1, 1, 1, [1, 2, 2, 3]]) assert_raises( ValueError, np.histogramdd, x, bins=[1, 1, 1, [1, 2, 3, -3]]) assert_(np.histogramdd(x, bins=[1, 1, 1, [1, 2, 3, 4]])) @@ -646,7 +644,7 @@ def test_rightmost_binedge(self): bins = [[0., 0.5, 1.0]] hist, _ = histogramdd(x, bins=bins) assert_(hist[0] == 0.0) - assert_(hist[1] == 1.) + assert_(hist[1] == 0.0) x = [1.0001] bins = [[0., 0.5, 1.0]] hist, _ = histogramdd(x, bins=bins) @@ -660,3 +658,40 @@ def test_finite_range(self): range=[[0.0, 1.0], [0.25, 0.75], [0.25, np.inf]]) assert_raises(ValueError, histogramdd, vals, range=[[0.0, 1.0], [np.nan, 0.75], [0.25, 0.5]]) + + def test_equal_edges(self): + """ Test that adjacent entries in an edge array can be equal """ + x = np.array([0, 1, 2]) + y = np.array([0, 1, 2]) + x_edges = np.array([0, 2, 2]) + y_edges = 1 + hist, edges = histogramdd((x, y), bins=(x_edges, y_edges)) + + hist_expected = np.array([ + [2.], + [1.], # x == 2 falls in the final bin + ]) + assert_equal(hist, hist_expected) + + def test_edge_dtype(self): + """ Test that if an edge array is input, its type is preserved """ + x = np.array([0, 10, 20]) + y = x / 10 + x_edges = np.array([0, 5, 15, 20]) + y_edges = x_edges / 10 + hist, edges = histogramdd((x, y), bins=(x_edges, y_edges)) + + assert_equal(edges[0].dtype, x_edges.dtype) + assert_equal(edges[1].dtype, y_edges.dtype) + + def test_large_integers(self): + big = 2**60 # Too large to represent with a full precision float + + x = np.array([0], np.int64) + x_edges = np.array([-1, +1], np.int64) + y = big + x + y_edges = big + x_edges + + hist, edges = histogramdd((x, y), bins=(x_edges, y_edges)) + + assert_equal(hist[0, 0], 1) diff --git a/numpy/lib/twodim_base.py b/numpy/lib/twodim_base.py index 402c18850dc6..cca316e9a9b4 100644 --- a/numpy/lib/twodim_base.py +++ b/numpy/lib/twodim_base.py @@ -650,7 +650,7 @@ def histogram2d(x, y, bins=10, range=None, normed=False, weights=None): N = 1 if N != 1 and N != 2: - xedges = yedges = asarray(bins, float) + xedges = yedges = asarray(bins) bins = [xedges, yedges] hist, edges = histogramdd([x, y], bins, range, normed, weights) return hist, edges[0], edges[1]