8000 Adds cummulative operators to API by pwolfram · Pull Request #812 · pydata/xarray · GitHub
[go: up one dir, main page]

Skip to content

Adds cummulative operators to API #812

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 3, 2016
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Adds nancumsum, nancumprod to xarray functions
  • Loading branch information
pwolfram committed Oct 3, 2016
commit 129c807be4df7f07dce5825af3982ffd8052895b
4 changes: 4 additions & 0 deletions doc/api-hidden.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
Dataset.round
Dataset.real
Dataset.T
Dataset.cumsum
Dataset.cumprod

DataArray.ndim
DataArray.shape
Expand Down Expand Up @@ -87,6 +89,8 @@
DataArray.round
DataArray.real
DataArray.T
DataArray.cumsum
DataArray.cumprod

ufuncs.angle
ufuncs.arccos
Expand Down
4 changes: 4 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ Computation
:py:attr:`~Dataset.round`
:py:attr:`~Dataset.real`
:py:attr:`~Dataset.T`
:py:attr:`~Dataset.cumsum`
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just a nit, these probably belong under the "Aggregation" heading above

:py:attr:`~Dataset.cumprod`

**Grouped operations**:
:py:attr:`~core.groupby.DatasetGroupBy.assign`
Expand Down Expand Up @@ -286,6 +288,8 @@ Computation
:py:attr:`~DataArray.round`
:py:attr:`~DataArray.real`
:py:attr:`~DataArray.T`
:py:attr:`~DataArray.cumsum`
:py:attr:`~DataArray.cumprod`

**Grouped operations**:
:py:attr:`~core.groupby.DataArrayGroupBy.assign_coords`
Expand Down
4 changes: 4 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ Enhancements
which to concatenate.
By `Stephan Hoyer <https://github.com/shoyer>`_.

- Adds DataArray and Dataset methods :py:meth:`~xarray.DataArray.cumsum` and
:py:meth:`~xarray.DataArray.cumprod`. By `Phillip J. Wolfram
<https://github.com/pwolfram>`_.

Bug fixes
~~~~~~~~~
- ``groupby_bins`` now restores empty bins by default (:issue:`1019`).
Expand Down
14 changes: 14 additions & 0 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@ def wrapped_func(self, dim=None, axis=None, keep_attrs=False,
and 'axis' arguments can be supplied. If neither are supplied, then
`{name}` is calculated over axes."""

_cum_extra_args_docstring = \
"""dim : str or sequence of str, optional
Dimension over which to apply `{name}`.
axis : int or sequence of int, optional
Axis over which to apply `{name}`. Only one of the 'dim'
and 'axis' arguments can be supplied."""


class ImplementsDatasetReduce(object):
@classmethod
Expand All @@ -51,6 +58,13 @@ def wrapped_func(self, dim=None, keep_attrs=False, **kwargs):
Dimension(s) over which to apply `func`. By default `func` is
applied over all dimensions."""

_cum_extra_args_docstring = \
"""dim : str or sequence of str, optional
Dimension over which to apply `{name}`.
axis : int or sequence of int, optional
Axis over which to apply `{name}`. Only one o 8000 f the 'dim'
and 'axis' arguments can be supplied."""


class ImplementsRollingArrayReduce(object):
@classmethod
Expand Down
78 changes: 56 additions & 22 deletions xarray/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,11 @@
REDUCE_METHODS = ['all', 'any']
NAN_REDUCE_METHODS = ['argmax', 'argmin', 'max', 'min', 'mean', 'prod', 'sum',
'std', 'var', 'median']
NAN_CUM_METHODS = ['cumsum', 'cumprod']
BOTTLENECK_ROLLING_METHODS = {'move_sum': 'sum', 'move_mean': 'mean',
'move_std': 'std', 'move_min': 'min',
'move_max': 'max'}
# TODO: wrap cumprod/cumsum, take, dot, sort
# TODO: wrap take, dot, sort


def _dask_or_eager_func(name, eager_module=np, list_of_args=False,
Expand Down Expand Up @@ -201,6 +202,30 @@ def func(self, *args, **kwargs):
func.__doc__ = f.__doc__
return func

_CUM_DOCSTRING_TEMPLATE = \
"""Apply `{name}` along some dimension of {cls}.

Parameters
----------
{extra_args}
skipna : bool, optional
If True, skip missing values (as marked by NaN). By default, only
skips missing values for float dtypes; other dtypes either do not
have a sentinel missing value (int) or skipna=True has not been
implemented (object, datetime64 or timedelta64).
keep_attrs : bool, optional
If True, the attributes (`attrs`) will be copied from the original
object to the new one. If False (default), the new object will be
returned without attributes.
**kwargs : dict
Additional keyword arguments passed on to `{name}`.

Returns
-------
cumvalue : {cls}
New {cls} object with `{name}` applied to its data along the
indicated dimension.
"""

_REDUCE_DOCSTRING_TEMPLATE = \
"""Reduce this {cls}'s data by applying `{name}` along some
Expand Down Expand Up @@ -274,7 +299,9 @@ def _ignore_warnings_if(condition):
yield


def _create_nan_agg_method(name, numeric_only=False, coerce_strings=False):
def _create_nan_agg_method(name, numeric_only=False, np_compat=False,
no_bottleneck=False, coerce_strings=False,
keep_dims=False):
def f(values, axis=None, skipna=None, **kwargs):
# ignore keyword args inserted by np.mean and other numpy aggregators
# automatically:
Expand All @@ -292,14 +319,17 @@ def f(values, axis=None, skipna=None, **kwargs):
'skipna=True not yet implemented for %s with dtype %s'
% (name, values.dtype))
nanname = 'nan' + name
if isinstance(axis, tuple) or not values.dtype.isnative:
if isinstance(axis, tuple) or not values.dtype.isnative or no_bottleneck:
# bottleneck can't handle multiple axis arguments or non-native
# endianness
eager_module = np
if np_compat:
eager_module = npcompat
else:
eager_module = np
else:
eager_module = bn
func = _dask_or_eager_func(nanname, eager_module)
using_numpy_nan_func = eager_module is np
using_numpy_nan_func = eager_module is np or eager_module is npcompat
else:
func = _dask_or_eager_func(name)
using_numpy_nan_func = False
Expand All @@ -312,10 +342,12 @@ def f(values, axis=None, skipna=None, **kwargs):
else:
assert using_numpy_nan_func
msg = ('%s is not available with skipna=False with the '
'installed version of numpy; upgrade to numpy 1.9 '
'installed version of numpy; upgrade to numpy 1.12 '
'or newer to use skipna=True or skipna=None' % name)
raise NotImplementedError(msg)
f.numeric_only = numeric_only
f.keep_dims = keep_dims
f.__name__ = name
return f


Expand All @@ -328,28 +360,18 @@ def f(values, axis=None, skipna=None, **kwargs):
std = _create_nan_agg_method('std', numeric_only=True)
var = _create_nan_agg_method('var', numeric_only=True)
median = _create_nan_agg_method('median', numeric_only=True)

prod = _create_nan_agg_method('prod', numeric_only=True, np_compat=True,
no_bottleneck=True)
cumprod = _create_nan_agg_method('cumprod', numeric_only=True, np_compat=True,
no_bottleneck=True, keep_dims=True)
cumsum = _create_nan_agg_method('cumsum', numeric_only=True, np_compat=True,
no_bottleneck=True, keep_dims=True)

_fail_on_dask_array_input_skipna = partial(
_fail_on_dask_array_input,
msg='%r with skipna=True is not yet implemented on dask arrays')


_prod = _dask_or_eager_func('prod')


def prod(values, axis=None, skipna=None, **kwargs):
if skipna or (skipna is None and values.dtype.kind == 'f'):
if values.dtype.kind not in ['i', 'f']:
raise NotImplementedError(
'skipna=True not yet implemented for prod with dtype %s'
% values.dtype)
_fail_on_dask_array_input_skipna(values)
return npcompat.nanprod(values, axis=axis, **kwargs)
return _prod(values, axis=axis, **kwargs)
prod.numeric_only = True


def first(values, axis, skipna=None):
"""Return the first non-NA elements in this array along the given axis
"""
Expand Down Expand Up @@ -384,6 +406,17 @@ def inject_reduce_methods(cls):
extra_args=cls._reduce_extra_args_docstring)
setattr(cls, name, func)

def inject_cum_methods(cls):
methods = ([(name, globals()[name], True) for name in NAN_CUM_METHODS])
for name, f, include_skipna in methods:
numeric_only = getattr(f, 'numeric_only', False)
func = cls._reduce_method(f, include_skipna, numeric_only)
func.__name__ = name
func.__doc__ = _CUM_DOCSTRING_TEMPLATE.format(
name=name, cls=cls.__name__,
extra_args=cls._cum_extra_args_docstring)
setattr(cls, name, func)


def op_str(name):
return '__%s__' % name
Expand Down Expand Up @@ -454,6 +487,7 @@ def inject_all_ops_and_reduce_methods(cls, priority=50, array_only=True):
setattr(cls, name, _values_method_wrapper(name))

inject_reduce_methods(cls)
inject_cum_methods(cls)


def inject_bottleneck_rolling_methods(cls):
Expand Down
17 changes: 13 additions & 4 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -896,15 +896,24 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=False,
if dim is not None and axis is not None:
raise ValueError("cannot supply both 'axis' and 'dim' arguments")

if getattr(func, 'keep_dims', False):
if dim is None and axis is None:
raise ValueError("must supply either single 'dim' or 'axis' argument to %s"
% (func.__name__))

if dim is not None:
axis = self.get_axis_num(dim)
data = func(self.data if allow_lazy else self.values,
axis=axis, **kwargs)

removed_axes = (range(self.ndim) if axis is None
else np.atleast_1d(axis) % self.ndim)
dims = [dim for n, dim in enumerate(self.dims)
if n not in removed_axes]
if getattr(data, 'shape', ()) == self.shape:
dims = self.dims
else:
removed_axes = (range(self.ndim) if axis is None
else np.atleast_1d(axis) % self.ndim)
dims = [adim for n, adim in enumerate(self.dims)
if n not in removed_axes]


attrs = self._attrs if keep_attrs else None

Expand Down
2 changes: 0 additions & 2 deletions xarray/test/test_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,6 @@ def test_reduce(self):
self.assertLazyAndAllClose(u.argmax(dim='x'), v.argmax(dim='x'))
self.assertLazyAndAllClose((u > 1).any(), (v > 1).any())
self.assertLazyAndAllClose((u < 1).all('x'), (v < 1).all('x'))
with self.assertRaisesRegexp(NotImplementedError, 'dask'):
v.prod()
with self.assertRaisesRegexp(NotImplementedError, 'dask'):
v.median()

Expand Down
22 changes: 22 additions & 0 deletions xarray/test/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1089,6 +1089,28 @@ def test_dropna(self):
expected = arr[:, 1:]
self.assertDataArrayIdentical(actual, expected)

def test_cumops(self):
coords = {'x': [-1, -2], 'y': ['ab', 'cd', 'ef'],
'lat': (['x', 'y'], [[1, 2, 3], [-1, -2, -3]]),
'c': -999}
orig = DataArray([[-1, 0, 1], [-3, 0, 3]], coords, dims=['x', 'y'])

actual = orig.cumsum('x')
expected = DataArray([[-1, 0, 1], [-4, 0, 4]], coords, dims=['x', 'y'])
self.assertDataArrayIdentical(expected, actual)

actual = orig.cumsum('y')
expected = DataArray([[-1, -1, 0], [-3, -3, 0]], coords, dims=['x', 'y'])
self.assertDataArrayIdentical(expected, actual)

actual = orig.cumprod('x')
expected = DataArray([[-1, 0, 1], [3, 0, 3]], coords, dims=['x', 'y'])
self.assertDataArrayIdentical(expected, actual)

actual = orig.cumprod('y')
expected = DataArray([[-1, 0, 0], [-3, 0, 0]], coords, dims=['x', 'y'])
self.assertDataArrayIdentical(expected, actual)

def test_reduce(self):
coords = {'x': [-1, -2], 'y': ['ab', 'cd', 'ef'],
'lat': (['x', 'y'], [[1, 2, 3], [-1, -2, -3]]),
Expand Down
19 changes: 19 additions & 0 deletions xarray/test/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2421,6 +2421,25 @@ def test_reduce_bad_dim(self):
with self.assertRaisesRegexp(ValueError, 'Dataset does not contain'):
ds = data.mean(dim='bad_dim')

def test_reduce_cumsum_test_dims(self):
data = create_test_data()
for cumfunc in ['cumsum', 'cumprod']:
with self.assertRaisesRegexp(ValueError, "must supply either single 'dim' or 'axis'"):
ds = getattr(data, cumfunc)()
with self.assertRaisesRegexp(ValueError, "must supply either single 'dim' or 'axis'"):
ds = getattr(data, cumfunc)(dim=['dim1', 'dim2'])
with self.assertRaisesRegexp(ValueError, 'Dataset does not contain'):
ds = getattr(data, cumfunc)(dim='bad_dim')

# ensure dimensions are correct
for reduct, expected in [('dim1', ['dim1', 'dim2', 'dim3', 'time']),
('dim2', ['dim1', 'dim2', 'dim3', 'time']),
('dim3', ['dim1', 'dim2', 'dim3', 'time']),
('time', ['dim1', 'dim2', 'dim3'])]:
actual = getattr(data, cumfunc)(dim=reduct).dims
print(reduct, actual, expected)
self.assertItemsEqual(actual, expected)

def test_reduce_non_numeric(self):
data1 = create_test_data(seed=44)
data2 = create_test_data(seed=44)
Expand Down
4 changes: 4 additions & 0 deletions xarray/test/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -974,6 +974,10 @@ def test_reduce_funcs(self):
self.assertVariableIdentical(np.mean(v), Variable([], 2))

self.assertVariableIdentical(v.prod(), Variable([], 6))
self.assertVariableIdentical(v.cumsum(axis=0),
Variable('x', np.array([1, 1, 3, 6])))
self.assertVariableIdentical(v.cumprod(axis=0),
Variable('x', np.array([1, 1, 2, 6])))
self.assertVariableIdentical(v.var(), Variable([], 2.0 / 3))

if LooseVersion(np.__version__) < '1.9':
Expand Down
0