diff --git a/bigframes/session/executor.py b/bigframes/session/executor.py index 4c27c25058..496ff560dd 100644 --- a/bigframes/session/executor.py +++ b/bigframes/session/executor.py @@ -16,6 +16,7 @@ import abc import dataclasses +import itertools from typing import Callable, Iterator, Literal, Mapping, Optional, Sequence, Union from google.cloud import bigquery @@ -37,10 +38,16 @@ def to_arrow_table(self) -> pyarrow.Table: # Need to provide schema if no result rows, as arrow can't infer # If ther are rows, it is safest to infer schema from batches. # Any discrepencies between predicted schema and actual schema will produce errors. - return pyarrow.Table.from_batches( - self.arrow_batches(), - self.schema.to_pyarrow() if not self.total_rows else None, - ) + batches = iter(self.arrow_batches()) + peek_it = itertools.islice(batches, 0, 1) + peek_value = list(peek_it) + # TODO: Enforce our internal schema on the table for consistency + if len(peek_value) > 0: + return pyarrow.Table.from_batches( + itertools.chain(peek_value, batches), # reconstruct + ) + else: + return self.schema.to_pyarrow().empty_table() class Executor(abc.ABC): diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index e77319b551..83901562d3 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -5283,6 +5283,16 @@ def test_to_gbq_and_create_dataset(session, scalars_df_index, dataset_id_not_cre assert not loaded_scalars_df_index.empty +def test_read_gbq_to_pandas_no_exec(unordered_session: bigframes.Session): + metrics = unordered_session._metrics + execs_pre = metrics.execution_count + df = unordered_session.read_gbq("bigquery-public-data.ml_datasets.penguins") + df.to_pandas() + execs_post = metrics.execution_count + assert df.shape == (344, 7) + assert execs_pre == execs_post + + def test_to_gbq_table_labels(scalars_df_index): destination_table = "bigframes-dev.bigframes_tests_sys.table_labels" result_table = scalars_df_index.to_gbq(