8000 BUG: Allow nan values in the data when the bins are explicit by eric-wieser · Pull Request #10271 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

BUG: Allow nan values in the data when the bins are explicit #10271

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 25, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions doc/release/1.15.0-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,16 @@ to maintain compatibility, aliased at ``np.lib.function_base.histogram(dd)``.
Code that does ``from np.lib.function_base import *`` will need to be updated
with the new location, and should consider not using ``import *`` in future.

``histogram`` will accept NaN values when explicit bins are given
-----------------------------------------------------------------
Previously it would fail when trying to compute a finite range for the data.
Since the range is ignored anyway when the bins are given explcitly, this error
was needless.

Note that calling `histogram` on NaN values continues to raise the
`RuntimeWarning`s typical of working with nan values, which can be silenced
as usual with `errstate`.

``np.r_`` works with 0d arrays, and ``np.ma.mr_` works with ``np.ma.masked``
----------------------------------------------------------------------------
0d arrays passed to the `r_` and `mr_` concatenation helpers are now treated as
Expand Down
6 changes: 4 additions & 2 deletions numpy/lib/histograms.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,8 +259,6 @@ def _get_bin_edges(a, bins, range, weights):
The upper bound, lowerbound, and number of bins, used in the optimized
implementation of `histogram` that works on uniform bins.
"""
first_edge, last_edge = _get_outer_edges(a, range)

# parse the overloaded bins argument
n_equal_bins = None
bin_edges = None
Expand All @@ -276,6 +274,8 @@ def _get_bin_edges(a, bins, range, weights):
raise TypeError("Automated estimation of the number of "
"bins is not supported for weighted data")

first_edge, last_edge = _get_outer_edges(a, range)

# truncate the range if needed
if range is not None:
keep = (a >= first_edge)
Expand Down Expand Up @@ -304,6 +304,8 @@ def _get_bin_edges(a, bins, range, weights):
if n_equal_bins < 1:
raise ValueError('`bins` must be positive, when an integer')

first_edge, last_edge = _get_outer_edges(a, range)

elif np.ndim(bins) == 1:
bin_edges = np.asarray(bins)
if np.any(bin_edges[:-1] > bin_edges[1:]):
Expand Down
25 changes: 25 additions & 0 deletions numpy/lib/tests/test_histograms.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,31 @@ def test_object_array_of_0d(self):
np.histogram([np.array([0.5]) for i in range(10)] + [.500000000000001])
np.histogram([np.array([0.5]) for i in range(10)] + [.5])

def test_some_nan_values(self):
# gh-7503
one_nan = np.array([0, 1, np.nan])
all_nan = np.array([np.nan, np.nan])

# the internal commparisons with NaN give warnings
sup = suppress_warnings()
sup.filter(RuntimeWarning)
with sup:
# can't infer range with nan
assert_raises(ValueError, histogram, one_nan, bins='auto')
assert_raises(ValueError, histogram, all_nan, bins='auto')

# explicit range solves the problem
h, b = histogram(one_nan, bins='auto', range=(0, 1))
assert_equal(h.sum(), 2) # nan is not counted
h, b = histogram(all_nan, bins='auto', range=(0, 1))
assert_equal(h.sum(), 0) # nan is not counted

# as does an explicit set of bins
h, b = histogram(one_nan, bins=[0, 1])
assert_equal(h.sum(), 2) # nan is not counted
h, b = histogram(all_nan, bins=[0, 1])
assert_equal(h.sum(), 0) # nan is not counted


class TestHistogramOptimBinNums(object):
"""
Expand Down
0