diff --git a/doc/release/1.15.0-notes.rst b/doc/release/1.15.0-notes.rst index 60720deda5e5..a24156ea01fc 100644 --- a/doc/release/1.15.0-notes.rst +++ b/doc/release/1.15.0-notes.rst @@ -64,6 +64,16 @@ to maintain compatibility, aliased at ``np.lib.function_base.histogram(dd)``. Code that does ``from np.lib.function_base import *`` will need to be updated with the new location, and should consider not using ``import *`` in future. +``histogram`` will accept NaN values when explicit bins are given +----------------------------------------------------------------- +Previously it would fail when trying to compute a finite range for the data. +Since the range is ignored anyway when the bins are given explcitly, this error +was needless. + +Note that calling `histogram` on NaN values continues to raise the +`RuntimeWarning`s typical of working with nan values, which can be silenced +as usual with `errstate`. + ``np.r_`` works with 0d arrays, and ``np.ma.mr_` works with ``np.ma.masked`` ---------------------------------------------------------------------------- 0d arrays passed to the `r_` and `mr_` concatenation helpers are now treated as diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py index 9c4ba8efce05..ccae9de22443 100644 --- a/numpy/lib/histograms.py +++ b/numpy/lib/histograms.py @@ -259,8 +259,6 @@ def _get_bin_edges(a, bins, range, weights): The upper bound, lowerbound, and number of bins, used in the optimized implementation of `histogram` that works on uniform bins. """ - first_edge, last_edge = _get_outer_edges(a, range) - # parse the overloaded bins argument n_equal_bins = None bin_edges = None @@ -276,6 +274,8 @@ def _get_bin_edges(a, bins, range, weights): raise TypeError("Automated estimation of the number of " "bins is not supported for weighted data") + first_edge, last_edge = _get_outer_edges(a, range) + # truncate the range if needed if range is not None: keep = (a >= first_edge) @@ -304,6 +304,8 @@ def _get_bin_edges(a, bins, range, weights): if n_equal_bins < 1: raise ValueError('`bins` must be positive, when an integer') + first_edge, last_edge = _get_outer_edges(a, range) + elif np.ndim(bins) == 1: bin_edges = np.asarray(bins) if np.any(bin_edges[:-1] > bin_edges[1:]): diff --git a/numpy/lib/tests/test_histograms.py b/numpy/lib/tests/test_histograms.py index 8319041b8ace..0986ad16bc8c 100644 --- a/numpy/lib/tests/test_histograms.py +++ b/numpy/lib/tests/test_histograms.py @@ -249,6 +249,31 @@ def test_object_array_of_0d(self): np.histogram([np.array([0.5]) for i in range(10)] + [.500000000000001]) np.histogram([np.array([0.5]) for i in range(10)] + [.5]) + def test_some_nan_values(self): + # gh-7503 + one_nan = np.array([0, 1, np.nan]) + all_nan = np.array([np.nan, np.nan]) + + # the internal commparisons with NaN give warnings + sup = suppress_warnings() + sup.filter(RuntimeWarning) + with sup: + # can't infer range with nan + assert_raises(ValueError, histogram, one_nan, bins='auto') + assert_raises(ValueError, histogram, all_nan, bins='auto') + + # explicit range solves the problem + h, b = histogram(one_nan, bins='auto', range=(0, 1)) + assert_equal(h.sum(), 2) # nan is not counted + h, b = histogram(all_nan, bins='auto', range=(0, 1)) + assert_equal(h.sum(), 0) # nan is not counted + + # as does an explicit set of bins + h, b = histogram(one_nan, bins=[0, 1]) + assert_equal(h.sum(), 2) # nan is not counted + h, b = histogram(all_nan, bins=[0, 1]) + assert_equal(h.sum(), 0) # nan is not counted + class TestHistogramOptimBinNums(object): """