8000 Less blocks in groupby by WillAyd · Pull Request #29753 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

Less blocks in groupby #29753

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 11 commits into from
13 changes: 13 additions & 0 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -979,6 +979,19 @@ def _iterate_slices(self) -> Iterable[Series]:

yield values

def _cython_agg_general(
self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1
):
func = partial(self.grouper.aggregate, how=how, axis=1, min_count=min_count)
results = self._selected_obj._data.apply(func)
df = DataFrame(results)
if self.as_index:
df.index = self.grouper.result_index
else:
df.index = np.arange(result[0].values.shape[1])

return df

def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame:
if self.grouper.nkeys != 1:
raise AssertionError("Number of keys must be 1")
Expand Down
21 changes: 16 additions & 5 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,20 +419,31 @@ def apply(
and hasattr(kwargs[k], "values")
}

for b in self.blocks:
for blk in self.blocks:
if filter is not None:
if not b.mgr_locs.isin(filter_locs).any():
result_blocks.append(b)
if not blk.mgr_locs.isin(filter_locs).any():
result_blocks.append(blk)
continue

if aligned_args:
b_items = self.items[b.mgr_locs.indexer]
b_items = self.items[blk.mgr_locs.indexer]

for k, obj in aligned_args.items():
axis = obj._info_axis_number
kwargs[k] = obj.reindex(b_items, axis=axis, copy=align_copy)

applied = getattr(b, f)(**kwargs)
if isinstance(f, str):
applied = getattr(blk, f)(**kwargs)
else: # partial; specific to groupby
# TODO: func should only return one value; need to remove
# ohlc from groupby semantics to accomplish generically
result, _ = f(blk.values) # better way?
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

better way?

blk.apply

if result.ndim != 2: # hmm this is hacky
result = result.reshape(-1, 1)

applied = type(blk)(result, placement=blk.mgr_locs, ndim=2)
axes = [self.axes[0], np.arange(result.shape[1])]

result_blocks = _extend_blocks(applied, result_blocks)

if len(result_blocks) == 0:
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/groupby/test_whitelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,9 @@ def test_regression_whitelist_methods(raw_frame, op, level, axis, skipna, sort):
# GH 17537
# explicitly test the whitelist methods

if op == "median":
pytest.skip("Currently segfaulting...")

if axis == 0:
frame = raw_frame
else:
Expand Down
0