From 6c4d3a36c61a89fb4aa9bd209f7775944b83572e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Tue, 1 Jul 2025 14:05:08 -0500
Subject: [PATCH] feat: `df.to_pandas_batches()` returns one empty DataFrame if
 `df` is empty

---
 bigframes/core/blocks.py                | 16 ++++++++++++++++
 tests/system/small/test_dataframe_io.py | 22 ++++++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 6d476cc795..dbbf9ee864 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -620,15 +620,31 @@ def to_pandas_batches(
             ordered=True,
             use_explicit_destination=allow_large_results,
         )
+
+        total_batches = 0
         for df in execute_result.to_pandas_batches(
             page_size=page_size, max_results=max_results
         ):
+            total_batches += 1
             self._copy_index_to_pandas(df)
             if squeeze:
                 yield df.squeeze(axis=1)
             else:
                 yield df
 
+        # To reduce the number of edge cases to consider when working with the
+        # results of this, always return at least one DataFrame. See:
+        # b/428918844.
+        if total_batches == 0:
+            df = pd.DataFrame(
+                {
+                    col: pd.Series([], dtype=self.expr.get_column_type(col))
+                    for col in itertools.chain(self.value_columns, self.index_columns)
+                }
+            )
+            self._copy_index_to_pandas(df)
+            yield df
+
     def _copy_index_to_pandas(self, df: pd.DataFrame):
         """Set the index on pandas DataFrame to match this block.
 
diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py
index afe3b53d6d..f738a32ec0 100644
--- a/tests/system/small/test_dataframe_io.py
+++ b/tests/system/small/test_dataframe_io.py
@@ -347,6 +347,28 @@ def test_to_pandas_batches_w_correct_dtypes(scalars_df_default_index):
         pd.testing.assert_series_equal(actual, expected)
 
 
+def test_to_pandas_batches_w_empty_dataframe(session):
+    """Verify to_pandas_batches() APIs returns at least one DataFrame.
+
+    See b/428918844 for additional context.
+    """
+    empty = bpd.DataFrame(
+        {
+            "idx1": [],
+            "idx2": [],
+            "col1": pandas.Series([], dtype="string[pyarrow]"),
+            "col2": pandas.Series([], dtype="Int64"),
+        },
+        session=session,
+    ).set_index(["idx1", "idx2"], drop=True)
+
+    results = list(empty.to_pandas_batches())
+    assert len(results) == 1
+    assert list(results[0].index.names) == ["idx1", "idx2"]
+    assert list(results[0].columns) == ["col1", "col2"]
+    pandas.testing.assert_series_equal(results[0].dtypes, empty.dtypes)
+
+
 @pytest.mark.parametrize("allow_large_results", (True, False))
 def test_to_pandas_batches_w_page_size_and_max_results(session, allow_large_results):
     """Verify to_pandas_batches() APIs returns the expected page size.