8000 REF: use BlockManager.apply for Rolling.count by jbrockmendel · Pull Request #35883 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

REF: use BlockManager.apply for Rolling.count #35883

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Aug 31, 2020
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
REF: remove unnecesary try/except
  • Loading branch information
jbrockmendel committed Aug 21, 2020
commit 4c5eddd63e94bacddb96bf61f81a6a8fcd9c33f0
69 changes: 33 additions & 36 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
import numpy as np

from pandas._libs import lib
from pandas._typing import FrameOrSeries, FrameOrSeriesUnion
from pandas._typing import ArrayLike, FrameOrSeries, FrameOrSeriesUnion
from pandas.util._decorators import Appender, Substitution, doc

from pandas.core.dtypes.cast import (
Expand Down Expand Up @@ -60,6 +60,7 @@
validate_func_kwargs,
)
import pandas.core.algorithms as algorithms
from pandas.core.arrays import ExtensionArray
from pandas.core.base import DataError, SpecificationError
import pandas.core.common as com
from pandas.core.construction import create_series_with_explicit_dtype
Expand Down Expand Up @@ -1034,32 +1035,31 @@ def _cython_agg_blocks(

no_result = object()

def cast_result_block(result, block: "Block", how: str) -> "Block":
# see if we can cast the block to the desired dtype
def cast_agg_result(result, values: ArrayLike, how: str) -> ArrayLike:
# see if we can cast the values to the desired dtype
# this may not be the original dtype
assert not isinstance(result, DataFrame)
assert result is not no_result

dtype = maybe_cast_result_dtype(block.dtype, how)
dtype = maybe_cast_result_dtype(values.dtype, how)
result = maybe_downcast_numeric(result, dtype)

if block.is_extension and isinstance(result, np.ndarray):
# e.g. block.values was an IntegerArray
# (1, N) case can occur if block.values was Categorical
if isinstance(values, ExtensionArray) and isinstance(result, np.ndarray):
# e.g. values was an IntegerArray
# (1, N) case can occur if values was Categorical
# and result is ndarray[object]
# TODO(EA2D): special casing not needed with 2D EAs
assert result.ndim == 1 or result.shape[0] == 1
try:
# Cast back if feasible
result = type(block.values)._from_sequence(
result.ravel(), dtype=block.values.dtype
result = type(values)._from_sequence(
result.ravel(), dtype=values.dtype
)
except (ValueError, TypeError):
# reshape to be valid for non-Extension Block
result = result.reshape(1, -1)

agg_block: "Block" = block.make_block(result)
return agg_block
return result

def blk_func(block: "Block") -> List["Block"]:
new_blocks: List["Block"] = []
Expand Down Expand Up @@ -1093,33 +1093,30 @@ def blk_func(block: "Block") -> List["Block"]:
# Categoricals. This will done by later self._reindex_output()
# Doing it here creates an error. See GH#34951
sgb = get_groupby(obj, self.grouper, observed=True)
try:
result = sgb.aggregate(lambda x: alt(x, axis=self.axis))
except TypeError:
# we may have an exception in trying to aggregate
# continue and exclude the block
raise
result = sgb.aggregate(lambda x: alt(x, axis=self.axis))

result = cast(DataFrame, result)
# unwrap DataFrame to get array
if len(result._mgr.blocks) != 1:
# We've split an object block! Everything we've assumed
# about a single block input returning a single block output
# is a lie. To keep the code-path for the typical non-split case
# clean, we choose to clean up this mess later on.
assert len(locs) == result.shape[1]
for i, loc in enumerate(locs):
agg_block = result.iloc[:, [i]]._mgr.blocks[0]
agg_block.mgr_locs = [loc]
new_blocks.append(agg_block)
else:
result = cast(DataFrame, result)
# unwrap DataFrame to get array
if len(result._mgr.blocks) != 1:
# We've split an object block! Everything we've assumed
# about a single block input returning a single block output
# is a lie. To keep the code-path for the typical non-split case
# clean, we choose to clean up this mess later on.
assert len(locs) == result.shape[1]
for i, loc in enumerate(locs):
agg_block = result.iloc[:, [i]]._mgr.blocks[0]
agg_block.mgr_locs = [loc]
new_blocks.append(agg_block)
else:
result = result._mgr.blocks[0].values
if isinstance(result, np.ndarray) and result.ndim == 1:
result = result.reshape(1, -1)
agg_block = cast_result_block(result, block, how)
new_blocks = [agg_block]
result = result._mgr.blocks[0].values
if isinstance(result, np.ndarray) and result.ndim == 1:
result = result.reshape(1, -1)
res_values = cast_agg_result(result, block.values, how)
agg_block = block.make_block(res_values)
new_blocks = [agg_block]
else:
agg_block = cast_result_block(result, block, how)
res_values = cast_agg_result(result, block.values, how)
agg_block = block.make_block(res_values)
new_blocks = [agg_block]
return new_blocks

Expand Down
0