From b162224507381611d4bbe7b204185cc344db070a Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Thu, 3 Apr 2025 05:39:56 +0000
Subject: [PATCH 1/3] fix: page_size without max_results does not work with
 to_pandas_batches

---
 bigframes/dataframe.py        | 66 +++++++++++++++++++++++++++++++++--
 bigframes/session/executor.py |  8 ++++-
 2 files changed, 71 insertions(+), 3 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 2056c192ad..b28762c440 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -1634,6 +1634,39 @@ def to_pandas(
     ) -> pandas.DataFrame | pandas.Series:
         """Write DataFrame to pandas DataFrame.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+            >>> df = bpd.DataFrame({'col': [4, 2, 2]})
+
+        Download the data from BigQuery and convert it into an in-memory pandas DataFrame.
+
+            >>> df.to_pandas()
+               col
+            0    4
+            1    2
+            2    2
+
+        Estimate job statistics without processing or downloading data by using `dry_run=True`.
+
+            >>> df.to_pandas(dry_run=True) # doctest: +SKIP
+            columnCount                                                            1
+            columnDtypes                                              {'col': Int64}
+            indexLevel                                                             1
+            indexDtypes                                                      [Int64]
+            projectId                                                  bigframes-dev
+            location                                                              US
+            jobType                                                            QUERY
+            destinationTable       {'projectId': 'bigframes-dev', 'datasetId': '_...
+            useLegacySql                                                       False
+            referencedTables                                                    None
+            totalBytesProcessed                                                    0
+            cacheHit                                                           False
+            statementType                                                     SELECT
+            creationTime                            2025-04-02 20:17:12.038000+00:00
+            dtype: object
+
         Args:
             max_download_size (int, default None):
                 Download size threshold in MB. If max_download_size is exceeded when downloading data
@@ -1702,11 +1735,40 @@ def to_pandas_batches(
         page_size and max_results determine the size and number of batches,
         see https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJob#google_cloud_bigquery_job_QueryJob_result
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+            >>> df = bpd.DataFrame({'col': [4, 3, 2, 2, 3]})
+
+        Iterate through the results in batches, limiting the total rows yielded
+        across all batches via `max_results`:
+
+            >>> for df_batch in df.to_pandas_batches(max_results=3):
+            ...     print(df_batch)
+               col
+            0    4
+            1    3
+            2    2
+
+        Alternatively, control the approximate size of each batch using `page_size`
+        and fetch batches manually using `next()`:
+
+            >>> it = df.to_pandas_batches(page_size=2)
+            >>> next(it)
+               col
+            0    4
+            1    3
+            >>> next(it)
+               col
+            2    2
+            3    2
+
         Args:
             page_size (int, default None):
-                The size of each batch.
+                The maximum number of rows of each batch. Non-positive values are ignored.
             max_results (int, default None):
-                If given, only download this many rows at maximum.
+                The maximum total number of rows of all batches.
             allow_large_results (bool, default None):
                 If not None, overrides the global setting to allow or disallow large query results
                 over the default size limit of 10 GB.
diff --git a/bigframes/session/executor.py b/bigframes/session/executor.py
index aabbbdcf5d..150122b7dd 100644
--- a/bigframes/session/executor.py
+++ b/bigframes/session/executor.py
@@ -264,7 +264,13 @@ def execute(
 
         # Though we provide the read client, iterator may or may not use it based on what is efficient for the result
         def iterator_supplier():
-            return iterator.to_arrow_iterable(bqstorage_client=self.bqstoragereadclient)
+            # Workaround issue fixed by: https://github.com/googleapis/python-bigquery/pull/2154
+            if iterator._page_size is not None or iterator.max_results is not None:
+                return iterator.to_arrow_iterable(bqstorage_client=None)
+            else:
+                return iterator.to_arrow_iterable(
+                    bqstorage_client=self.bqstoragereadclient
+                )
 
         if query_job:
             size_bytes = self.bqclient.get_table(query_job.destination).num_bytes

From 806e762ad6b02c7d31830b0a641f78c9772d6d62 Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Thu, 3 Apr 2025 18:57:15 +0000
Subject: [PATCH 2/3] add series.to_pandas code example

---
 bigframes/dataframe.py |  3 ---
 bigframes/series.py    | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index b28762c440..135522ebd5 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -1699,9 +1699,6 @@ def to_pandas(
                 downsampled rows and all columns of this DataFrame. If dry_run is set, a pandas
                 Series containing dry run statistics will be returned.
         """
-
-        # TODO(orrbradford): Optimize this in future. Potentially some cases where we can return the stored query job
-
         if dry_run:
             dry_run_stats, dry_run_job = self._block._compute_dry_run(
                 max_download_size=max_download_size,
diff --git a/bigframes/series.py b/bigframes/series.py
index d2a3dcf78f..817aef0c2a 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -385,6 +385,39 @@ def to_pandas(
     ) -> pandas.Series:
         """Writes Series to pandas Series.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+            >>> s = bpd.Series([4, 3, 2])
+
+        Download the data from BigQuery and convert it into an in-memory pandas Series.
+
+            >>> s.to_pandas()
+            0    4
+            1    3
+            2    2
+            dtype: Int64
+
+        Estimate job statistics without processing or downloading data by using `dry_run=True`.
+
+            >>> s.to_pandas(dry_run=True) # doctest: +SKIP
+            columnCount                                                            1
+            columnDtypes                                               {None: Int64}
+            indexLevel                                                             1
+            indexDtypes                                                      [Int64]
+            projectId                                                  bigframes-dev
+            location                                                              US
+            jobType                                                            QUERY
+            destinationTable       {'projectId': 'bigframes-dev', 'datasetId': '_...
+            useLegacySql                                                       False
+            referencedTables                                                    None
+            totalBytesProcessed                                                    0
+            cacheHit                                                           False
+            statementType                                                     SELECT
+            creationTime                            2025-04-03 18:54:59.219000+00:00
+            dtype: object
+
         Args:
             max_download_size (int, default None):
                 Download size threshold in MB. If max_download_size is exceeded when downloading data

From 7e0d968f21203e6dd1cf7051041339b0abb88763 Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Thu, 3 Apr 2025 19:01:00 +0000
Subject: [PATCH 3/3] fix test_to_pandas_batches_override_global_option:
 workaround solution can skip one warning message

---
 tests/system/large/test_dataframe_io.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/system/large/test_dataframe_io.py b/tests/system/large/test_dataframe_io.py
index 76a7001fe3..ee9daa4e31 100644
--- a/tests/system/large/test_dataframe_io.py
+++ b/tests/system/large/test_dataframe_io.py
@@ -44,7 +44,7 @@ def test_to_pandas_batches_override_global_option(
                     page_size=500, max_results=1500, allow_large_results=True
                 )
             )
-            assert len(w) == 2
+            assert len(w) == 1
             assert issubclass(w[0].category, FutureWarning)
             assert "The query result size has exceeded 10 GB." in str(w[0].message)