From 8f75d8ab2a19b6d3cefab407a0088e5bac35a23e Mon Sep 17 00:00:00 2001 From: Larry Bradley Date: Mon, 21 Oct 2024 12:23:15 -0400 Subject: [PATCH 01/10] Add SigmaClippedStats class --- astropy/stats/sigma_clipping.py | 184 +++++++++++++++++++++++++++++++- 1 file changed, 180 insertions(+), 4 deletions(-) diff --git a/astropy/stats/sigma_clipping.py b/astropy/stats/sigma_clipping.py index e453681fcf5c..1928c0c38155 100644 --- a/astropy/stats/sigma_clipping.py +++ b/astropy/stats/sigma_clipping.py @@ -25,7 +25,7 @@ from numpy.typing import ArrayLike, NDArray -__all__ = ["SigmaClip", "sigma_clip", "sigma_clipped_stats"] +__all__ = ["SigmaClip", "sigma_clip", "SigmaClippedStats", "sigma_clipped_stats"] class SigmaClip: @@ -112,7 +112,7 @@ class SigmaClip: See Also -------- - sigma_clip, sigma_clipped_stats + sigma_clip, sigma_clipped_stats, SigmaClippedStats Notes ----- @@ -817,7 +817,7 @@ def sigma_clip( See Also -------- - SigmaClip, sigma_clipped_stats + SigmaClip, sigma_clipped_stats, SigmaClippedStats Notes ----- @@ -879,6 +879,182 @@ def sigma_clip( ) +class SigmaClippedStats: + """ + Class to calculate sigma-clipped statistics on the provided data. + + Parameters + ---------- + data : array-like or `~numpy.ma.MaskedArray` + Data array or object that can be converted to an array. + + mask : `numpy.ndarray` (bool), optional + A boolean mask with the same shape as ``data``, where a `True` + value indicates the corresponding element of ``data`` is masked. + Masked pixels are excluded when computing the statistics. + + mask_value : float, optional + A data value (e.g., ``0.0``) that is ignored when computing the + statistics. ``mask_value`` will be masked in addition to any + input ``mask``. + + sigma : float, optional + The number of standard deviations to use for both the lower + and upper clipping limit. These limits are overridden by + ``sigma_lower`` and ``sigma_upper``, if input. The default is 3. + + sigma_lower : float or None, optional + The number of standard deviations to use as the lower bound for + the clipping limit. If `None` then the value of ``sigma`` is + used. The default is `None`. + + sigma_upper : float or None, optional + The number of standard deviations to use as the upper bound for + the clipping limit. If `None` then the value of ``sigma`` is + used. The default is `None`. + + maxiters : int or None, optional + The maximum number of sigma-clipping iterations to perform or + `None` to clip until convergence is achieved (i.e., iterate + until the last iteration clips nothing). If convergence is + achieved prior to ``maxiters`` iterations, the clipping + iterations will stop. The default is 5. + + cenfunc : {'median', 'mean'} or callable, optional + The statistic or callable function/object used to compute + the center value for the clipping. If using a callable + function/object and the ``axis`` keyword is used, then it must + be able to ignore NaNs (e.g., `numpy.nanmean`) and it must have + an ``axis`` keyword to return an array with axis dimension(s) + removed. The default is ``'median'``. + + stdfunc : {'std', 'mad_std'} or callable, optional + The statistic or callable function/object used to compute the + standard deviation about the center value. If using a callable + function/object and the ``axis`` keyword is used, then it must + be able to ignore NaNs (e.g., `numpy.nanstd`) and it must have + an ``axis`` keyword to return an array with axis dimension(s) + removed. The default is ``'std'``. + + axis : None or int or tuple of int, optional + The axis or axes along which to sigma clip the data. If `None`, + then the flattened data will be used. ``axis`` is passed to the + ``cenfunc`` and ``stdfunc``. The default is `None`. + + grow : float or `False`, optional + Radius within which to mask the neighbouring pixels of those + that fall outwith the clipping limits (only applied along + ``axis``, if specified). As an example, for a 2D image a value + of 1 will mask the nearest pixels in a cross pattern around each + deviant pixel, while 1.5 will also reject the nearest diagonal + neighbours and so on. + + Notes + ----- + The best performance will typically be obtained by setting + ``cenfunc`` and ``stdfunc`` to one of the built-in functions + specified as a string. If one of the options is set to a string + while the other has a custom callable, you may in some cases + see better performance if you have the `bottleneck`_ package + installed. To preserve accuracy, bottleneck is only used for float64 + computations. + + .. _bottleneck: https://github.com/pydata/bottleneck + + See Also + -------- + sigma_clipped_stats, SigmaClip, sigma_clip + """ + + def __init__( + self, + data: ArrayLike, + *, + mask: NDArray | None = None, + mask_value: float | None = None, + sigma: float = 3.0, + sigma_lower: float | None = None, + sigma_upper: float | None = None, + maxiters: int = 5, + cenfunc: Literal["median", "mean"] | Callable = "median", + stdfunc: Literal["std", "mad_std"] | Callable = "std", + axis: int | tuple[int, ...] | None = None, + grow: float | Literal[False] | None = False, + ) -> None: + sigclip = SigmaClip( + sigma=sigma, + sigma_lower=sigma_lower, + sigma_upper=sigma_upper, + maxiters=maxiters, + cenfunc=cenfunc, + stdfunc=stdfunc, + grow=grow, + ) + + if mask is not None: + data = np.ma.MaskedArray(data, mask) + if mask_value is not None: + data = np.ma.masked_values(data, mask_value) + + if isinstance(data, np.ma.MaskedArray) and data.mask.all(): + raise ValueError("input data is all masked") + + self.data = sigclip( + data, axis=axis, masked=False, return_bounds=False, copy=True + ) + self.axis = axis + + def mean(self) -> float | NDArray: + """ + Calculate the mean of the data. + + NaN values are ignored. + + Returns + ------- + mean : float or `~numpy.ndarray` + The mean of the data. + """ + return nanmean(self.data, axis=self.axis) + + def median(self) -> float | NDArray: + """ + Calculate the median of the data. + + NaN values are ignored. + + Returns + ------- + median : float or `~numpy.ndarray` + The median of the data. + """ + return nanmedian(self.data, axis=self.axis) + + def std(self, ddof: int = 0) -> float | NDArray: + """ + Calculate the standard deviation of the data. + + NaN values are ignored. + + Parameters + ---------- + ddof : int, optional + The delta degrees of freedom for the standard deviation + calculation. The divisor used in the calculation is ``N - + ddof``, where ``N`` represents the number of elements. For + a population standard deviation where you have data for the + entire population, use ``ddof=0``. For a sample standard + deviation where you have a sample of the population, use + ``ddof=1``. The default is 0. + + Returns + ------- + std : float or `~numpy.ndarray` + The standard deviation of the data. + """ + return nanstd(self.data, axis=self.axis, ddof=ddof) + + def sigma_clipped_stats( data: ArrayLike, mask: NDArray | None = None, @@ -990,7 +1166,7 @@ def sigma_clipped_stats( See Also -------- - SigmaClip, sigma_clip + SigmaClippedStats, SigmaClip, sigma_clip """ if mask is not None: data = np.ma.MaskedArray(data, mask) From c2c177ac6ec41c47bd132cf4a0bc38de814e014f Mon Sep 17 00:00:00 2001 From: Larry Bradley Date: Mon, 21 Oct 2024 13:29:52 -0400 Subject: [PATCH 02/10] Add var method and nanvar function --- astropy/stats/nanfunctions.py | 10 +++++++++- astropy/stats/sigma_clipping.py | 25 ++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/astropy/stats/nanfunctions.py b/astropy/stats/nanfunctions.py index 098aba060cf1..dafffb2964f3 100644 --- a/astropy/stats/nanfunctions.py +++ b/astropy/stats/nanfunctions.py @@ -77,7 +77,11 @@ def _apply_bottleneck( result = function(array, axis=axis, **kwargs) if isinstance(array, Quantity): - return array.__array_wrap__(result) + if function is bottleneck.nanvar: + result = array._result_as_quantity(result, array.unit**2, None) + else: + result = array.__array_wrap__(result) + return result elif isinstance(result, float): # For compatibility with numpy, always return a numpy scalar. return np.float64(result) @@ -89,6 +93,7 @@ def _apply_bottleneck( nanmean=functools.partial(_apply_bottleneck, bottleneck.nanmean), nanmedian=functools.partial(_apply_bottleneck, bottleneck.nanmedian), nanstd=functools.partial(_apply_bottleneck, bottleneck.nanstd), + nanvar=functools.partial(_apply_bottleneck, bottleneck.nanvar), ) np_funcs = dict( @@ -96,6 +101,7 @@ def _apply_bottleneck( nanmean=np.nanmean, nanmedian=np.nanmedian, nanstd=np.nanstd, + nanvar=np.nanvar, ) def _dtype_dispatch(func_name): @@ -118,12 +124,14 @@ def wrapped(*args, **kwargs): nanmean = _dtype_dispatch("nanmean") nanmedian = _dtype_dispatch("nanmedian") nanstd = _dtype_dispatch("nanstd") + nanvar = _dtype_dispatch("nanvar") else: nansum = np.nansum nanmean = np.nanmean nanmedian = np.nanmedian nanstd = np.nanstd + nanvar = np.nanvar def nanmadstd( diff --git a/astropy/stats/sigma_clipping.py b/astropy/stats/sigma_clipping.py index 1928c0c38155..f13828a4556b 100644 --- a/astropy/stats/sigma_clipping.py +++ b/astropy/stats/sigma_clipping.py @@ -8,7 +8,7 @@ import numpy as np from astropy.stats._fast_sigma_clip import _sigma_clip_fast -from astropy.stats.nanfunctions import nanmadstd, nanmean, nanmedian, nanstd +from astropy.stats.nanfunctions import nanmadstd, nanmean, nanmedian, nanstd, nanvar from astropy.units import Quantity from astropy.utils import isiterable from astropy.utils.compat.numpycompat import NUMPY_LT_2_0 @@ -1054,6 +1054,29 @@ def std(self, ddof: int = 0) -> float | NDArray: """ return nanstd(self.data, axis=self.axis, ddof=ddof) + def var(self, ddof: int = 0) -> float | NDArray: + """ + Calculate the variance of the data. + + NaN values are ignored. + + Parameters + ---------- + ddof : int, optional + The delta degrees of freedom. The divisor used in the + calculation is ``N - ddof``, where ``N`` represents the + number of elements. For a population variance where you have + data for the entire population, use ``ddof=0``. For a sample + variance where you have a sample of the population, use + ``ddof=1``. The default is 0. + + Returns + ------- + var : float or `~numpy.ndarray` + The variance of the data. + """ + return nanvar(self.data, axis=self.axis, ddof=ddof) + def sigma_clipped_stats( data: ArrayLike, From b58ef52e2d90504a079cb9ca413006b266a6cdd6 Mon Sep 17 00:00:00 2001 From: Larry Bradley Date: Mon, 21 Oct 2024 13:31:45 -0400 Subject: [PATCH 03/10] Add biweight_location and biweight_scale methods --- astropy/stats/sigma_clipping.py | 49 +++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/astropy/stats/sigma_clipping.py b/astropy/stats/sigma_clipping.py index f13828a4556b..d3c2f13a858c 100644 --- a/astropy/stats/sigma_clipping.py +++ b/astropy/stats/sigma_clipping.py @@ -8,6 +8,7 @@ import numpy as np from astropy.stats._fast_sigma_clip import _sigma_clip_fast +from astropy.stats.biweight import biweight_location, biweight_scale from astropy.stats.nanfunctions import nanmadstd, nanmean, nanmedian, nanstd, nanvar from astropy.units import Quantity from astropy.utils import isiterable @@ -1077,6 +1078,54 @@ def var(self, ddof: int = 0) -> float | NDArray: """ return nanvar(self.data, axis=self.axis, ddof=ddof) + def biweight_location( + self, c: float = 6.0, M: float | None = None + ) -> float | NDArray: + """ + Calculate the biweight location of the data. + + NaN values are ignored. + + Parameters + ---------- + c : float, optional + Tuning constant for the biweight estimator. Default value is + 6.0. + + M : float or None, optional + Initial guess for the biweight location. Default value is + `None`. + + Returns + ------- + biweight_location : float or `~numpy.ndarray` + The biweight location of the data. + """ + return biweight_location(self.data, c=c, M=M, axis=self.axis, ignore_nan=True) + + def biweight_scale(self, c: float = 6.0, M: float | None = None) -> float | NDArray: + """ + Calculate the biweight scale of the data. + + NaN values are ignored. + + Parameters + ---------- + c : float, optional + Tuning constant for the biweight estimator. Default value is + 6.0. + + M : float or None, optional + Initial guess for the biweight location. Default value is + `None`. + + Returns + ------- + biweight_scale : float or `~numpy.ndarray` + The biweight scale of the data. + """ + return biweight_scale(self.data, c=c, M=M, axis=self.axis, ignore_nan=True) + def sigma_clipped_stats( data: ArrayLike, From 61b0cb6d710e664cb8f7988a06d17f84498bb89b Mon Sep 17 00:00:00 2001 From: Larry Bradley Date: Mon, 21 Oct 2024 13:33:08 -0400 Subject: [PATCH 04/10] Add mode and mad_std methods --- astropy/stats/sigma_clipping.py | 39 +++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/astropy/stats/sigma_clipping.py b/astropy/stats/sigma_clipping.py index d3c2f13a858c..11cdc43205af 100644 --- a/astropy/stats/sigma_clipping.py +++ b/astropy/stats/sigma_clipping.py @@ -9,6 +9,7 @@ from astropy.stats._fast_sigma_clip import _sigma_clip_fast from astropy.stats.biweight import biweight_location, biweight_scale +from astropy.stats.funcs import mad_std from astropy.stats.nanfunctions import nanmadstd, nanmean, nanmedian, nanstd, nanvar from astropy.units import Quantity from astropy.utils import isiterable @@ -1031,6 +1032,30 @@ def median(self) -> float | NDArray: """ return nanmedian(self.data, axis=self.axis) + def mode( + self, median_factor: float = 3.0, mean_factor: float = 2.0 + ) -> float | NDArray: + """ + Calculate the mode of the data using a estimator of the form + ``(median_factor * median) - (mean_factor * mean)``. + + NaN values are ignored. + + Parameters + ---------- + median_factor : float, optional + The multiplicative factor for the data median. Defaults to 3. + + mean_factor : float, optional + The multiplicative factor for the data mean. Defaults to 2. + + Returns + ------- + mode : float or `~numpy.ndarray` + The estimated mode of the data. + """ + return (median_factor * self.median()) - (mean_factor * self.mean()) + def std(self, ddof: int = 0) -> float | NDArray: """ Calculate the standard deviation of the data. @@ -1126,6 +1151,20 @@ def biweight_scale(self, c: float = 6.0, M: float | None = None) -> float | NDAr """ return biweight_scale(self.data, c=c, M=M, axis=self.axis, ignore_nan=True) + def mad_std(self) -> float | NDArray: + """ + Calculate the median absolute deviation (MAD) based standard + deviation of the data. + + NaN values are ignored. + + Returns + ------- + mad_std : float or `~numpy.ndarray` + The MAD-based standard deviation of the data. + """ + return mad_std(self.data, axis=self.axis, ignore_nan=True) + def sigma_clipped_stats( data: ArrayLike, From cc7ab2165b0f3c89728f889f1b807933a77317b1 Mon Sep 17 00:00:00 2001 From: Larry Bradley Date: Tue, 22 Oct 2024 10:42:36 -0400 Subject: [PATCH 05/10] Add nanmin/nanmax functions; add min, max, and sum to SigmaClippedStats --- astropy/stats/nanfunctions.py | 8 ++++++ astropy/stats/sigma_clipping.py | 50 ++++++++++++++++++++++++++++++++- 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/astropy/stats/nanfunctions.py b/astropy/stats/nanfunctions.py index dafffb2964f3..e18bb10fac74 100644 --- a/astropy/stats/nanfunctions.py +++ b/astropy/stats/nanfunctions.py @@ -90,6 +90,8 @@ def _apply_bottleneck( bn_funcs = dict( nansum=functools.partial(_apply_bottleneck, bottleneck.nansum), + nanmin=functools.partial(_apply_bottleneck, bottleneck.nanmin), + nanmax=functools.partial(_apply_bottleneck, bottleneck.nanmax), nanmean=functools.partial(_apply_bottleneck, bottleneck.nanmean), nanmedian=functools.partial(_apply_bottleneck, bottleneck.nanmedian), nanstd=functools.partial(_apply_bottleneck, bottleneck.nanstd), @@ -98,6 +100,8 @@ def _apply_bottleneck( np_funcs = dict( nansum=np.nansum, + nanmin=np.nanmin, + nanmax=np.nanmax, nanmean=np.nanmean, nanmedian=np.nanmedian, nanstd=np.nanstd, @@ -121,6 +125,8 @@ def wrapped(*args, **kwargs): return wrapped nansum = _dtype_dispatch("nansum") + nanmin = _dtype_dispatch("nanmin") + nanmax = _dtype_dispatch("nanmax") nanmean = _dtype_dispatch("nanmean") nanmedian = _dtype_dispatch("nanmedian") nanstd = _dtype_dispatch("nanstd") @@ -128,6 +134,8 @@ def wrapped(*args, **kwargs): else: nansum = np.nansum + nanmin = np.nanmin + nanmax = np.nanmax nanmean = np.nanmean nanmedian = np.nanmedian nanstd = np.nanstd diff --git a/astropy/stats/sigma_clipping.py b/astropy/stats/sigma_clipping.py index 11cdc43205af..2a251a2d31a4 100644 --- a/astropy/stats/sigma_clipping.py +++ b/astropy/stats/sigma_clipping.py @@ -10,7 +10,16 @@ from astropy.stats._fast_sigma_clip import _sigma_clip_fast from astropy.stats.biweight import biweight_location, biweight_scale from astropy.stats.funcs import mad_std -from astropy.stats.nanfunctions import nanmadstd, nanmean, nanmedian, nanstd, nanvar +from astropy.stats.nanfunctions import ( + nanmadstd, + nanmax, + nanmean, + nanmedian, + nanmin, + nanstd, + nansum, + nanvar, +) from astropy.units import Quantity from astropy.utils import isiterable from astropy.utils.compat.numpycompat import NUMPY_LT_2_0 @@ -1006,6 +1015,45 @@ def __init__( ) self.axis = axis + def min(self) -> float | NDArray: + """ + Calculate the minimum of the data. + + NaN values are ignored. + + Returns + ------- + min : float or `~numpy.ndarray` + The minimum of the data. + """ + return nanmin(self.data, axis=self.axis) + + def max(self) -> float | NDArray: + """ + Calculate the maximum of the data. + + NaN values are ignored. + + Returns + ------- + max : float or `~numpy.ndarray` + The maximum of the data. + """ + return nanmax(self.data, axis=self.axis) + + def sum(self) -> float | NDArray: + """ + Calculate the sum of the data. + + NaN values are ignored. + + Returns + ------- + sum : float or `~numpy.ndarray` + The sum of the data. + """ + return nansum(self.data, axis=self.axis) + def mean(self) -> float | NDArray: """ Calculate the mean of the data. From 7302e1de1823556edc05aefc7b204a90781678ae Mon Sep 17 00:00:00 2001 From: Larry Bradley Date: Mon, 21 Oct 2024 13:54:43 -0400 Subject: [PATCH 06/10] Use SigmaClippedStats in sigma_clipped_stats --- astropy/stats/sigma_clipping.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/astropy/stats/sigma_clipping.py b/astropy/stats/sigma_clipping.py index 2a251a2d31a4..334c16a8f781 100644 --- a/astropy/stats/sigma_clipping.py +++ b/astropy/stats/sigma_clipping.py @@ -1335,21 +1335,16 @@ def sigma_clipped_stats( if isinstance(data, np.ma.MaskedArray) and data.mask.all(): return np.ma.masked, np.ma.masked, np.ma.masked - sigclip = SigmaClip( + stats = SigmaClippedStats( + data, sigma=sigma, sigma_lower=sigma_lower, sigma_upper=sigma_upper, maxiters=maxiters, cenfunc=cenfunc, stdfunc=stdfunc, + axis=axis, grow=grow, ) - data_clipped = sigclip( - data, axis=axis, masked=False, return_bounds=False, copy=True - ) - - mean = nanmean(data_clipped, axis=axis) - median = nanmedian(data_clipped, axis=axis) - std = nanstd(data_clipped, ddof=std_ddof, axis=axis) - return mean, median, std + return stats.mean(), stats.median(), stats.std(ddof=std_ddof) From 2d79e17eec03c86ff281d3c51efd6fd62df31d0d Mon Sep 17 00:00:00 2001 From: Larry Bradley Date: Mon, 21 Oct 2024 14:07:42 -0400 Subject: [PATCH 07/10] Add SigmaClippedStats to the stats narrative docs --- docs/stats/robust.rst | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/docs/stats/robust.rst b/docs/stats/robust.rst index 172ac46e0559..4d47bf6a1f4f 100644 --- a/docs/stats/robust.rst +++ b/docs/stats/robust.rst @@ -116,6 +116,22 @@ calculation: >>> sigma_clipped_stats(y, sigma=3, maxiters=10) # doctest: +FLOAT_CMP (np.float64(-0.0228473012826993), np.float64(-0.02356858871405204), np.float64(0.2079616996908159)) + +:class:`~astropy.stats.SigmaClippedStats` is a +convenience class that extends the functionality of +:func:`~astropy.stats.sigma_clipped_stats`: + +.. doctest-requires:: scipy + + >>> from astropy.stats import SigmaClippedStats + >>> stats = SigmaClippedStats(y, sigma=3, maxiters=10) + >>> stats.mean(), stats.median(), stats.std() # doctest: +FLOAT_CMP + (np.float64(-0.0228473012826993), np.float64(-0.02356858871405204), np.float64(0.2079616996908159)) + >>> stats.mode(), stats.var(), stats.mad_std() # doctest: +FLOAT_CMP + (np.float64(-0.025011163576757534), np.float64(0.043248068538293126), np.float64(0.21277510956855722)) + >>> stats.biweight_location(), stats.biweight_scale() # doctest: +FLOAT_CMP + (np.float64(-0.0183718864859565), np.float64(0.21730062377965248)) + :func:`~astropy.stats.sigma_clip` and :class:`~astropy.stats.SigmaClip` can be combined with other robust statistics to provide improved outlier rejection as well. From 0cdbe8d0da22b6ee42373ee38ba37cf67b012612 Mon Sep 17 00:00:00 2001 From: Larry Bradley Date: Mon, 21 Oct 2024 14:49:50 -0400 Subject: [PATCH 08/10] Add SigmaClippedStats tests --- astropy/stats/tests/test_sigma_clipping.py | 93 +++++++++++++++++++++- 1 file changed, 92 insertions(+), 1 deletion(-) diff --git a/astropy/stats/tests/test_sigma_clipping.py b/astropy/stats/tests/test_sigma_clipping.py index dcc52beab650..886e0aa140e3 100644 --- a/astropy/stats/tests/test_sigma_clipping.py +++ b/astropy/stats/tests/test_sigma_clipping.py @@ -5,8 +5,14 @@ from numpy.testing import assert_allclose, assert_equal from astropy import units as u +from astropy.coordinates import Angle from astropy.stats import mad_std -from astropy.stats.sigma_clipping import SigmaClip, sigma_clip, sigma_clipped_stats +from astropy.stats.sigma_clipping import ( + SigmaClip, + SigmaClippedStats, + sigma_clip, + sigma_clipped_stats, +) from astropy.table import MaskedColumn from astropy.utils.compat import COPY_IF_NEEDED from astropy.utils.compat.optional_deps import HAS_BOTTLENECK, HAS_SCIPY @@ -174,6 +180,91 @@ def test_sigma_clipped_stats_masked_col(): sigma_clipped_stats(col) +@pytest.mark.parametrize("units", [False, True]) +@pytest.mark.parametrize("sigma", np.linspace(2, 5, 10)) +def test_sigmaclippedstats_stats(units, sigma): + """Test SigmaClippedStats class.""" + data = np.ones((101, 205)) + rng = np.random.default_rng(0) + idx = rng.integers(0, 100, 10) + data[idx, idx] = 1000 + + if units: + unit = u.m + data <<= unit + else: + unit = 1 + + stats = SigmaClippedStats(data, sigma=sigma) + assert stats.min() == 1.0 * unit + assert stats.max() == 1.0 * unit + assert stats.sum() == np.sum(data) - (1000 * 10) * unit + assert stats.mean() == 1.0 * unit + assert stats.median() == 1.0 * unit + assert stats.mode() == 1.0 * unit + assert stats.mode(median_factor=3.5, mean_factor=2.5) == 1.0 * unit + assert stats.std() == 0.0 * unit + assert stats.var() == 0.0 * unit**2 + assert stats.mad_std() == 0.0 * unit + assert stats.biweight_location() == 1.0 * unit + assert stats.biweight_scale() == 0.0 * unit + + # test nanvar with Angle + data = Angle([10, 20, 30, 40], "deg") + stats = SigmaClippedStats(data, sigma=5) + assert isinstance(stats.mean(), u.Quantity) + assert isinstance(stats.var(), u.Quantity) + assert not isinstance(stats.var(), Angle) + assert stats.mean() == 25 * u.deg + assert stats.var() == 125 * u.deg**2 + + +def test_sigmaclippedstats_mask(): + data = np.ones((101, 205)) + rng = np.random.default_rng(0) + idx = rng.integers(0, 100, 10) + data[idx, idx] = 1000 + stats = SigmaClippedStats(data, sigma=3.2) + assert stats.min() == 1.0 + assert stats.max() == 1.0 + assert stats.sum() == np.sum(data) - 1000 * 10 + assert stats.mean() == 1.0 + assert stats.median() == 1.0 + assert stats.mode() == 1.0 + assert stats.mode(median_factor=3.5, mean_factor=2.5) == 1.0 + assert stats.std() == 0.0 + assert stats.var() == 0.0 + assert stats.mad_std() == 0.0 + assert stats.biweight_location() == 1.0 + assert stats.biweight_scale() == 0.0 + + mask = np.zeros_like(data, dtype=bool) + mask[idx, idx] = True + stats3 = SigmaClippedStats(data, sigma=3.2, mask=mask) + assert stats3.mode() == stats.mode() + + mask = np.zeros_like(data, dtype=bool) + stats4 = SigmaClippedStats(data, sigma=3.2, mask_value=1000) + assert stats4.mode() == stats3.mode() + + data = np.ones((101, 205)) + mask = np.ones_like(data, dtype=bool) + match = "input data is all masked" + with pytest.raises(ValueError, match=match): + SigmaClippedStats(data, sigma=3, mask=mask) + + +def test_sigmaclippedstats_axis(): + data = np.ones((101, 205)) + stats = SigmaClippedStats(data, sigma=2.8, axis=1) + shape = (data.shape[0],) + assert stats.mean().shape == shape + assert stats.mode().shape == shape + assert stats.var().shape == shape + assert stats.mad_std().shape == shape + assert stats.biweight_location().shape == shape + + @pytest.mark.slow @pytest.mark.skipif( not HAS_BOTTLENECK, From fc7631a7560de761ffa178437c3775f2d4f0bef6 Mon Sep 17 00:00:00 2001 From: Larry Bradley Date: Mon, 21 Oct 2024 14:51:13 -0400 Subject: [PATCH 09/10] Add changelog entry --- docs/changes/stats/17221.feature.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 docs/changes/stats/17221.feature.rst diff --git a/docs/changes/stats/17221.feature.rst b/docs/changes/stats/17221.feature.rst new file mode 100644 index 000000000000..665fd7fd2030 --- /dev/null +++ b/docs/changes/stats/17221.feature.rst @@ -0,0 +1,2 @@ +Added a SigmaClippedStats convenience class for computing sigma-clipped +statistics. From 58efcac95b52c3a7561fafa64797473e2d3d50c4 Mon Sep 17 00:00:00 2001 From: Larry Bradley Date: Tue, 22 Oct 2024 11:02:58 -0400 Subject: [PATCH 10/10] Add whats new entry --- docs/whatsnew/7.0.rst | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/docs/whatsnew/7.0.rst b/docs/whatsnew/7.0.rst index b3231ea844f2..4abb9e3d468e 100644 --- a/docs/whatsnew/7.0.rst +++ b/docs/whatsnew/7.0.rst @@ -27,6 +27,7 @@ In particular, this release includes: * :ref:`whatsnew_7_0_lorentz2d_model` * :ref:`whatsnew_7_0_votable_1_5` * :ref:`whatsnew_7_0_simplenorm` +* :ref:`whatsnew_7_0_sigmaclippedstats` In addition to these major changes, Astropy v7.0 includes a large number of smaller improvements and bug fixes, which are described in the :ref:`changelog`. @@ -393,6 +394,32 @@ method: axim = snorm.imshow(image, ax=ax, origin='lower') fig.colorbar(axim) +.. _whatsnew_7_0_sigmaclippedstats: + +New ``SigmaClippedStats`` class +=============================== + +A new convenience class, :class:`~astropy.stats.SigmaClippedStats`, has +been added to the :mod:`~astropy.stats` module. This class provides a +convenient way to compute statistics of an array with sigma-clipping. A +simple example might be: + +.. doctest-skip:: + + >>> import numpy as np + >>> from astropy.stats import SigmaClippedStats + >>> rng = np.random.default_rng(seed=42) + >>> data = rng.exponential(scale=100, size=500) + >>> stats = SigmaClippedStats(data, sigma=3, maxiters=10) + >>> stats.min(), stats.max(), stats.sum() # doctest: +FLOAT_CMP + (np.float64(0.8129422833034009), np.float64(255.34193997940474), np.float64(36783.895498717866)) + >>> stats.mean(), stats.median(), stats.std() # doctest: +FLOAT_CMP + (np.float64(79.61882142579624), np.float64(60.01103363578014), np.float64(65.4457063794851)) + >>> stats.mode(), stats.var(), stats.mad_std() # doctest: +FLOAT_CMP + (np.float64(20.79545805574793), np.float64(4283.1404835097765), np.float64(62.608350894722484)) + >>> stats.biweight_location(), stats.biweight_scale() # doctest: +FLOAT_CMP + (np.float64(67.98055399699436), np.float64(64.82889460022386)) + Full change log ===============