diff --git a/doc/users/next_whats_new/2017-11-09_hist-ignores-nans.rst b/doc/users/next_whats_new/2017-11-09_hist-ignores-nans.rst new file mode 100644 index 000000000000..a3cf8baf7786 --- /dev/null +++ b/doc/users/next_whats_new/2017-11-09_hist-ignores-nans.rst @@ -0,0 +1,8 @@ +Histogram function now accepts nan values in input +-------------------------------------------------- + +The `~.Axes.hist` function now accepts nan values in both the *data* and +*weights* input. Previously this would just error. Now any invalid values +are simply ignored when calculating the histogram values. + +In addition, masked arrays are now valid input for both *data* and *weights*. diff --git a/lib/matplotlib/axes/_axes.py b/lib/matplotlib/axes/_axes.py index e4ce15ec4dfc..4c2715de9bad 100644 --- a/lib/matplotlib/axes/_axes.py +++ b/lib/matplotlib/axes/_axes.py @@ -5893,7 +5893,9 @@ def hist(self, x, bins=None, range=None, density=None, weights=None, ---------- x : (n,) array or sequence of (n,) arrays Input values, this takes either a single array or a sequence of - arrays which are not required to be of the same length + arrays which are not required to be of the same length. + Masked arrays or arrays with invalid values (e.g. ``nan``) are + allowed. bins : integer or sequence or 'auto', optional If an integer is given, ``bins + 1`` bin edges are calculated and @@ -5949,7 +5951,8 @@ def hist(self, x, bins=None, range=None, density=None, weights=None, only contributes its associated weight towards the bin count (instead of 1). If *normed* or *density* is ``True``, the weights are normalized, so that the integral of the density - over the range remains 1. + over the range remains 1. Masked arrays or arrays with invalid + values (e.g. ``nan``) are allowed. Default is ``None`` @@ -6130,6 +6133,7 @@ def hist(self, x, bins=None, range=None, density=None, weights=None, else: w = [None] * nx + # Comparing shape of weights vs. x if len(w) != nx: raise ValueError('weights should have the same shape as x') @@ -6138,6 +6142,18 @@ def hist(self, x, bins=None, range=None, density=None, weights=None, raise ValueError( 'weights should have the same shape as x') + # Combine the masks from x[i] and w[i] (if applicable) into a single + # mask and apply it to both. + if not input_empty: + for i, (xi, wi) in enumerate(zip(x, w)): + xi = cbook.safe_masked_invalid(xi) + mask = xi.mask + if wi is not None: + wi = cbook.safe_masked_invalid(wi) + mask = mask | wi.mask + w[i] = np.ma.masked_array(wi, mask=mask) + x[i] = np.ma.masked_array(xi, mask=mask) + if color is None: color = [self._get_lines.get_next_color() for i in xrange(nx)] else: diff --git a/lib/matplotlib/tests/test_axes.py b/lib/matplotlib/tests/test_axes.py index 7e26c0f08e4b..0ff95628bb68 100644 --- a/lib/matplotlib/tests/test_axes.py +++ b/lib/matplotlib/tests/test_axes.py @@ -1479,6 +1479,13 @@ def test_barh_tick_label(): align='center') +def test_hist_nans(): + # Check that histogram input data can include nans + fig, ax = plt.subplots() + ax.hist([1, 2, 1, 2, 3, np.nan], + weights=[1, 1, 1, np.nan, 1, 1]) + + @image_comparison(baseline_images=['hist_log'], remove_text=True) def test_hist_log():