BUG: histogram small range robust

tylerjereddy · tylerjereddy · commit 4d8a83379dc4 · 2023-07-10T14:12:48.000-06:00
* Fixes #23110 * the histogram `norm` variable is used to determine the bin index of input values, and `norm` is calculated in some cases by dividing `n_equal_bins` by the range of the data; when the range of the data is extraordinarily small, the `norm` can become floating point infinity * in this patch, we delay calculating `norm` to increase resistance to the generation of infinite values--for example, a really small input value divided by a really small range is more resistant to generating infinity, so we effectively just change the order of operations a bit * however, I haven't considered whether this is broadly superior for resisting floating point non-finite values for other `histogram` input/extreme value permutations--one might speculate that this is just patching one extreme case that happened to show up in the wild, but may increase likelihood of some other extreme case that isn't in our testsuite yet * the main logic for this patch is that it fixes an issue that occurred in the wild and adds a test for it--if another extreme value case eventually pops up, at least this case will have a regression guard to keep guiding us in the right direction
diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py
@@ -811,7 +811,8 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
         n = np.zeros(n_equal_bins, ntype)
 
         # Pre-compute histogram scaling factor
-        norm = n_equal_bins / _unsigned_subtract(last_edge, first_edge)
+        norm_numerator = n_equal_bins
+        norm_denom = _unsigned_subtract(last_edge, first_edge)
 
         # We iterate over blocks here for two reasons: the first is that for
         # large arrays, it is actually faster (for example for a 10^8 array it
@@ -839,7 +840,8 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
 
             # Compute the bin indices, and for values that lie exactly on
             # last_edge we need to subtract one
-            f_indices = _unsigned_subtract(tmp_a, first_edge) * norm
+            f_indices = ((_unsigned_subtract(tmp_a, first_edge) / norm_denom)
+                         * norm_numerator)
             indices = f_indices.astype(np.intp)
             indices[indices == n_equal_bins] -= 1
 
diff --git a/numpy/lib/tests/test_histograms.py b/numpy/lib/tests/test_histograms.py
@@ -409,6 +409,14 @@ def test_big_arrays(self):
         assert_equal(type(hist), type((1, 2)))
 
 
+    def test_gh_23110(self):
+        hist, e = np.histogram(np.array([-0.9e-308], dtype='>f8'),
+                               bins=2,
+                               range=(-1e-308, -2e-313))
+        expected_hist = np.array([1, 0])
+        assert_array_equal(hist, expected_hist)
+
+
 class TestHistogramOptimBinNums:
     """
     Provide test coverage when using provided estimators for optimal number of