8000 REF: implement cumulative ops block-wise by jbrockmendel · Pull Request #29872 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

REF: implement cumulative ops block-wise #29872

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Dec 30, 2019
Prev Previous commit
Next Next commit
Merge branch 'master' of https://github.com/pandas-dev/pandas into accum
  • Loading branch information
jbrockmendel committed Dec 26, 2019
commit 4abecfc967f0eea9c7b1c5155a7ec2f9a27ad409
48 changes: 41 additions & 7 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11106,12 +11106,43 @@ def cum_func(self, axis=None, skipna=True, *args, **kwargs):

def na_accum_func(blk_values):
# We will be applying this function to block values
if skipna and issubclass(
blk_values.dtype.type, (np.datetime64, np.timedelta64)
):
result = accum_func(blk_values.T, axis)
mask = isna(blk_values.T)
np.putmask(result, mask, iNaT)
if blk_values.dtype.kind in ["m", "M"]:
# numpy 1.18 started sorting NaTs at the end instead of beginning,
# so we need to work around to maintain backwards-consistency.
orig_dtype = blk_values.dtype

# We need to define mask before masking NaTs
mask = isna(blk_values)

if accum_func == np.minimum.accumulate:
# Note: the accum_func comparison fails as an "is" comparison
y = blk_values.view("i8")
y[mask] = np.iinfo(np.int64).max
changed = True
else:
y = blk_values
changed = False

result = accum_func(y.view("i8"), axis)
if skipna:
np.putmask(result, mask, iNaT)
elif accum_func == np.minimum.accumulate:
# Restore NaTs that we masked previously
nz = (~np.asarray(mask)).nonzero()[0]
if len(nz):
# everything up to the first non-na entry stays NaT
result[: nz[0]] = iNaT

if changed:
# restore NaT elements
y[mask] = iNaT # TODO: could try/finally for this?

if isinstance(blk_values, np.ndarray):
result = result.view(orig_dtype)
else:
# DatetimeArray
result = type(blk_values)._from_sequence(result, dtype=orig_dtype)

elif skipna and not issubclass(
blk_values.dtype.type, (np.integer, np.bool_)
):
Expand All @@ -11122,9 +11153,12 @@ def na_accum_func(blk_values):
np.putmask(result, mask, mask_b)
else:
result = accum_func(blk_values.T, axis)
return result.T

# transpose back for ndarray, not for EA
return result.T if hasattr(result, "T") else result

result = self._data.apply(na_accum_func)

d = self._construct_axes_dict()
d["copy"] = False
return self._constructor(result, **d).__finalize__(self)
Expand Down
You are viewing a condensed version of this merge commit. You can view the full changes here.
0