diff --git a/bigframes/core/groupby/__init__.py b/bigframes/core/groupby/__init__.py index 91c5e54d89..6e3a91cc1c 100644 --- a/bigframes/core/groupby/__init__.py +++ b/bigframes/core/groupby/__init__.py @@ -339,9 +339,30 @@ def _agg_list(self, func: typing.Sequence) -> df.DataFrame: for col_id in self._aggregated_columns() for f in func ] - column_labels = [ - (col_id, f) for col_id in self._aggregated_columns() for f in func - ] + + if self._block.column_labels.nlevels > 1: + # Restructure MultiIndex for proper format: (idx1, idx2, func) + # rather than ((idx1, idx2), func). + aggregated_columns = pd.MultiIndex.from_tuples( + [ + self._block.col_id_to_label[col_id] + for col_id in self._aggregated_columns() + ], + names=[*self._block.column_labels.names], + ).to_frame(index=False) + + column_labels = [ + tuple(col_id) + (f,) + for col_id in aggregated_columns.to_numpy() + for f in func + ] + else: + column_labels = [ + (self._block.col_id_to_label[col_id], f) + for col_id in self._aggregated_columns() + for f in func + ] + agg_block, _ = self._block.aggregate( by_column_ids=self._by_col_ids, aggregations=aggregations, diff --git a/tests/system/small/test_groupby.py b/tests/system/small/test_groupby.py index 02d9bf9725..b332d48574 100644 --- a/tests/system/small/test_groupby.py +++ b/tests/system/small/test_groupby.py @@ -144,6 +144,23 @@ def test_dataframe_groupby_agg_list(scalars_df_index, scalars_pandas_df_index): pd.testing.assert_frame_equal(pd_result, bf_result_computed, check_dtype=False) +def test_dataframe_groupby_agg_list_w_column_multi_index( + scalars_df_index, scalars_pandas_df_index +): + columns = ["int64_too", "string_col", "bool_col"] + multi_columns = pd.MultiIndex.from_tuples(zip(["a", "b", "a"], columns)) + bf_df = scalars_df_index[columns].copy() + bf_df.columns = multi_columns + pd_df = scalars_pandas_df_index[columns].copy() + pd_df.columns = multi_columns + + bf_result = bf_df.groupby(level=0).agg(["count", "min"]) + pd_result = pd_df.groupby(level=0).agg(["count", "min"]) + + bf_result_computed = bf_result.to_pandas() + pd.testing.assert_frame_equal(pd_result, bf_result_computed, check_dtype=False) + + @pytest.mark.parametrize( ("as_index"), [ diff --git a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py index f9bedc2a7b..6011dbfe5b 100644 --- a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py +++ b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py @@ -1092,6 +1092,17 @@ def agg(self, func, **kwargs): [2 rows x 2 columns] + Multiple aggregations + + >>> df.groupby('A').agg(['min', 'max']) + B C + min max min max + A + 1 1 2 0.227877 0.362838 + 2 3 4 -0.56286 1.267767 + + [2 rows x 4 columns] + Args: func (function, str, list, dict or None): Function to use for aggregating the data. @@ -1140,6 +1151,17 @@ def aggregate(self, func, **kwargs): [2 rows x 2 columns] + Multiple aggregations + + >>> df.groupby('A').agg(['min', 'max']) + B C + min max min max + A + 1 1 2 0.227877 0.362838 + 2 3 4 -0.56286 1.267767 + + [2 rows x 4 columns] + Args: func (function, str, list, dict or None): Function to use for aggregating the data.