diff --git a/bigframes/core/groupby/__init__.py b/bigframes/core/groupby/__init__.py
index 91c5e54d89..6e3a91cc1c 100644
--- a/bigframes/core/groupby/__init__.py
+++ b/bigframes/core/groupby/__init__.py
@@ -339,9 +339,30 @@ def _agg_list(self, func: typing.Sequence) -> df.DataFrame:
             for col_id in self._aggregated_columns()
             for f in func
         ]
-        column_labels = [
-            (col_id, f) for col_id in self._aggregated_columns() for f in func
-        ]
+
+        if self._block.column_labels.nlevels > 1:
+            # Restructure MultiIndex for proper format: (idx1, idx2, func)
+            # rather than ((idx1, idx2), func).
+            aggregated_columns = pd.MultiIndex.from_tuples(
+                [
+                    self._block.col_id_to_label[col_id]
+                    for col_id in self._aggregated_columns()
+                ],
+                names=[*self._block.column_labels.names],
+            ).to_frame(index=False)
+
+            column_labels = [
+                tuple(col_id) + (f,)
+                for col_id in aggregated_columns.to_numpy()
+                for f in func
+            ]
+        else:
+            column_labels = [
+                (self._block.col_id_to_label[col_id], f)
+                for col_id in self._aggregated_columns()
+                for f in func
+            ]
+
         agg_block, _ = self._block.aggregate(
             by_column_ids=self._by_col_ids,
             aggregations=aggregations,
diff --git a/tests/system/small/test_groupby.py b/tests/system/small/test_groupby.py
index 02d9bf9725..b332d48574 100644
--- a/tests/system/small/test_groupby.py
+++ b/tests/system/small/test_groupby.py
@@ -144,6 +144,23 @@ def test_dataframe_groupby_agg_list(scalars_df_index, scalars_pandas_df_index):
     pd.testing.assert_frame_equal(pd_result, bf_result_computed, check_dtype=False)
 
 
+def test_dataframe_groupby_agg_list_w_column_multi_index(
+    scalars_df_index, scalars_pandas_df_index
+):
+    columns = ["int64_too", "string_col", "bool_col"]
+    multi_columns = pd.MultiIndex.from_tuples(zip(["a", "b", "a"], columns))
+    bf_df = scalars_df_index[columns].copy()
+    bf_df.columns = multi_columns
+    pd_df = scalars_pandas_df_index[columns].copy()
+    pd_df.columns = multi_columns
+
+    bf_result = bf_df.groupby(level=0).agg(["count", "min"])
+    pd_result = pd_df.groupby(level=0).agg(["count", "min"])
+
+    bf_result_computed = bf_result.to_pandas()
+    pd.testing.assert_frame_equal(pd_result, bf_result_computed, check_dtype=False)
+
+
 @pytest.mark.parametrize(
     ("as_index"),
     [
diff --git a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py
index f9bedc2a7b..6011dbfe5b 100644
--- a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py
+++ b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py
@@ -1092,6 +1092,17 @@ def agg(self, func, **kwargs):
             <BLANKLINE>
             [2 rows x 2 columns]
 
+        Multiple aggregations
+
+            >>> df.groupby('A').agg(['min', 'max'])
+                B             C
+                   min max       min       max
+            A
+            1        1   2  0.227877  0.362838
+            2        3   4  -0.56286  1.267767
+            <BLANKLINE>
+            [2 rows x 4 columns]
+
         Args:
             func (function, str, list, dict or None):
                 Function to use for aggregating the data.
@@ -1140,6 +1151,17 @@ def aggregate(self, func, **kwargs):
             <BLANKLINE>
             [2 rows x 2 columns]
 
+        Multiple aggregations
+
+            >>> df.groupby('A').agg(['min', 'max'])
+                B             C
+                   min max       min       max
+            A
+            1        1   2  0.227877  0.362838
+            2        3   4  -0.56286  1.267767
+            <BLANKLINE>
+            [2 rows x 4 columns]
+
         Args:
             func (function, str, list, dict or None):
                 Function to use for aggregating the data.