10000 REF: implement cumulative ops block-wise by jbrockmendel · Pull Request #29872 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

REF: implement cumulative ops block-wise #29872

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Dec 30, 2019
Next Next commit
REF: implement cumulative ops block-wise
  • Loading branch information
jbrockmendel committed Nov 26, 2019
commit e6ab0df8d4dc5b460bc2deaae509b52308d69082
36 changes: 23 additions & 13 deletions pandas/core/generic.py
8000
Original file line number Diff line number Diff line change
Expand Up @@ -11326,20 +11326,30 @@ def cum_func(self, axis=None, skipna=True, *args, **kwargs):
else:
axis = self._get_axis_number(axis)

y = com.values_from_object(self).copy()

if skipna and issubclass(y.dtype.type, (np.datetime64, np.timedelta64)):
result = accum_func(y, axis)
mask = isna(self)
np.putmask(result, mask, iNaT)
elif skipna and not issubclass(y.dtype.type, (np.integer, np.bool_)):
mask = isna(self)
np.putmask(y, mask, mask_a)
result = accum_func(y, axis)
np.putmask(result, mask, mask_b)
else:
result = accum_func(y, axis)
if axis == 1:
return cum_func(self.T, axis=0, skipna=skipna, *args, **kwargs).T

def na_accum_func(blk_values):
# We will be applying this function to block values
if skipna and issubclass(
blk_values.dtype.type, (np.datetime64, np.timedelta64)
):
result = accum_func(blk_values.T, axis)
mask = isna(blk_values.T)
np.putmask(result, mask, iNaT)
elif skipna and not issubclass(
blk_values.dtype.type, (np.integer, np.bool_)
):
vals = blk_values.copy().T
mask = isna(vals)
np.putmask(vals, mask, mask_a)
result = accum_func(vals, axis)
np.putmask(result, mask, mask_b)
else:
result = accum_func(blk_values.T, axis)
return result.T

result = self._data.apply(na_accum_func)
d = self._construct_axes_dict()
d["copy"] = False
return self._constructor(result, **d).__finalize__(self)
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,10 @@ def apply(
axis = obj._info_axis_number
kwargs[k] = obj.reindex(b_items, axis=axis, copy=align_copy)

applied = getattr(b, f)(**kwargs)
if callable(f):
applied = b.apply(f, **kwargs)
else:
applied = getattr(b, f)(**kwargs)
result_blocks = _extend_blocks(applied, result_blocks)

if len(result_blocks) == 0:
Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/frame/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -1329,8 +1329,8 @@ def test_agg_cython_table(self, df, func, expected, axis):
_get_cython_table_params(
DataFrame([[np.nan, 1], [1, 2]]),
[
("cumprod", DataFrame([[np.nan, 1], [1.0, 2.0]])),
("cumsum", DataFrame([[np.nan, 1], [1.0, 3.0]])),
("cumprod", DataFrame([[np.nan, 1], [1, 2]])),
("cumsum", DataFrame([[np.nan, 1], [1, 3]])),
],
),
),
Expand All @@ -1339,6 +1339,10 @@ def test_agg_cython_table_transform(self, df, func, expected, axis):
# GH 21224
# test transforming functions in
# pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
if axis == "columns" or axis == 1:
# operating blockwise doesn't let us preserve dtypes
expected = expected.astype("float64")

result = df.agg(func, axis=axis)
tm.assert_frame_equal(result, expected)

Expand Down
0