8000 Merge pull request #12150 from anntzer/histogram-int-dtype · numpy/numpy@53cfea9 · GitHub
[go: up one dir, main page]

Skip to content

Commit 53cfea9

Browse files
authored
Merge pull request #12150 from anntzer/histogram-int-dtype
ENH: When histogramming data with integer dtype, force bin width >= 1.
2 parents aa0cc04 + c63969c commit 53cfea9

File tree

3 files changed

+35
-1
lines changed

3 files changed

+35
-1
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
``histogram`` auto-binning now returns bin sizes >=1 for integer input data
2+
---------------------------------------------------------------------------
3+
For integer input data, bin sizes smaller than 1 result in spurious empty
4+
bins. This is now avoided when the number of bins is computed using one of the
5+
algorithms provided by `histogram_bin_edges`.

numpy/lib/_histograms_impl.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,8 @@ def _get_bin_edges(a, bins, range, weights):
410410
# Do not call selectors on empty arrays
411411
width = _hist_bin_selectors[bin_name](a, (first_edge, last_edge))
412412
if width:
413+
if np.issubdtype(a.dtype, np.integer) and width < 1:
414+
width = 1
413415
n_equal_bins = int(np.ceil(_unsigned_subtract(last_edge, first_edge) / width))
414416
else:
415417
# Width can be zero for some estimators, e.g. FD when
@@ -625,6 +627,9 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None):
625627
The simplest and fastest estimator. Only takes into account the
626628
data size.
627629
630+
Additionally, if the data is of integer dtype, then the binwidth will never
631+
be less than 1.
632+
628633
Examples
629634
--------
630635
>>> arr = np.array([0, 0, 0, 1, 2, 3, 3, 4, 5])

numpy/lib/tests/test_histograms.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,7 @@ def test_small(self):
469469
'doane': 3, 'sqrt': 2, 'stone': 1}}
470470

471471
for testlen, expectedResults in small_dat.items():
472-
testdat = np.arange(testlen)
472+
testdat = np.arange(testlen).astype(float)
473473
for estimator, expbins in expectedResults.items():
474474
a, b = np.histogram(testdat, estimator)
475475
assert_equal(len(a), expbins, err_msg="For the {0} estimator "
@@ -592,6 +592,30 @@ def test_signed_integer_data(self, bins):
592592
assert_array_equal(hist, hist32)
593593
assert_array_equal(edges, edges32)
594594

595+
@pytest.mark.parametrize("bins", ['auto', 'fd', 'doane', 'scott',
596+
'stone', 'rice', 'sturges'])
597+
def test_integer(self, bins):
598+
"""
599+
Test that bin width for integer data is at least 1.
600+
"""
601+
with suppress_warnings() as sup:
602+
if bins == 'stone':
603+
sup.filter(RuntimeWarning)
604+
assert_equal(
605+
np.histogram_bin_edges(np.tile(np.arange(9), 1000), bins),
606+
np.arange(9))
607+
608+
def test_integer_non_auto(self):
609+
"""
610+
Test that the bin-width>=1 requirement *only* applies to auto binning.
611+
"""
612+
assert_equal(
613+
np.histogram_bin_edges(np.tile(np.arange(9), 1000), 16),
614+
np.arange(17) / 2)
615+
assert_equal(
616+
np.histogram_bin_edges(np.tile(np.arange(9), 1000), [.1, .2]),
617+
[.1, .2])
618+
595619
def test_simple_weighted(self):
596620
"""
597621
Check that weighted data raises a TypeError

0 commit comments

Comments
 (0)
0