From 80a45b54f3003398a52bd8490e6de9635000be91 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Wed, 1 Nov 2023 22:40:53 +0000
Subject: [PATCH 1/3] feat: add __iter__, iterrows, itertuples, keys methods

---
 bigframes/dataframe.py                        | 18 +++++
 bigframes/series.py                           |  6 ++
 tests/system/small/test_dataframe.py          | 50 ++++++++++++
 .../bigframes_vendored/pandas/core/frame.py   | 79 +++++++++++++++++++
 .../bigframes_vendored/pandas/core/generic.py | 31 +++++++-
 5 files changed, 183 insertions(+), 1 deletion(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 3369fb4868..07d3fb430a 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -303,6 +303,9 @@ def __len__(self):
         rows, _ = self.shape
         return rows
 
+    def __iter__(self):
+        return iter(self.columns)
+
     def astype(
         self,
         dtype: Union[bigframes.dtypes.DtypeString, bigframes.dtypes.Dtype],
@@ -1472,12 +1475,27 @@ def isin(self, values) -> DataFrame:
                 f"isin(), you passed a [{type(values).__name__}]"
             )
 
+    def keys(self) -> pandas.Index:
+        return self.columns
+
     def items(self):
         column_ids = self._block.value_columns
         column_labels = self._block.column_labels
         for col_id, col_label in zip(column_ids, column_labels):
             yield col_label, bigframes.series.Series(self._block.select_column(col_id))
 
+    def iterrows(self) -> Iterable[tuple[typing.Hashable, bigframes.series.Series]]:
+        for df in self.to_pandas_batches():
+            for item in df.iterrows():
+                yield item
+
+    def itertuples(
+        self, index: bool = True, name: typing.Optional[str] = "Pandas"
+    ) -> Iterable[tuple[typing.Any, ...]]:
+        for df in self.to_pandas_batches():
+            for item in df.itertuples(index=index, name=name):
+                yield item
+
     def dropna(
         self,
         *,
diff --git a/bigframes/series.py b/bigframes/series.py
index 37d00d16f3..5e11211ae2 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -16,6 +16,7 @@
 
 from __future__ import annotations
 
+import itertools
 import numbers
 import textwrap
 import typing
@@ -148,6 +149,11 @@ def _set_internal_query_job(self, query_job: bigquery.QueryJob):
     def __len__(self):
         return self.shape[0]
 
+    def __iter__(self) -> typing.Iterator:
+        return itertools.chain.from_iterable(
+            map(lambda x: x.index, self._block.to_pandas_batches())
+        )
+
     def copy(self) -> Series:
         return Series(self._block)
 
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index c96faa3526..e63f94953a 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -787,6 +787,56 @@ def test_apply_series_scalar_callable(
     pandas.testing.assert_series_equal(bf_result, pd_result)
 
 
+def test_df_keys(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    pandas.testing.assert_index_equal(
+        scalars_df_index.keys(), scalars_pandas_df_index.keys()
+    )
+
+
+def test_df_iter(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    pandas.testing.assert_index_equal(
+        list(scalars_df_index), list(scalars_pandas_df_index)
+    )
+
+
+def test_iterrows(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    for (bf_index, bf_series), (pd_index, pd_series) in zip(
+        scalars_df_index.iterrows(), scalars_pandas_df_index.iterrows()
+    ):
+        assert bf_index == pd_index
+        pandas.testing.assert_series_equal(bf_series, pd_series)
+
+
+@pytest.mark.parametrize(
+    (
+        "index",
+        "name",
+    ),
+    [
+        (
+            True,
+            "my_df",
+        ),
+        (False, None),
+    ],
+)
+def test_itertuples(scalars_df_index, index, name):
+    # Numeric has slightly different representation as a result of conversions.
+    bf_tuples = scalars_df_index.itertuples(index, name)
+    pd_tuples = scalars_df_index.to_pandas().itertuples(index, name)
+    for bf_tuple, pd_tuple in zip(bf_tuples, pd_tuples):
+        assert bf_tuple == pd_tuple
+
+
 def test_df_isin_list(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     values = ["Hello, World!", 55555, 2.51, pd.NA, True]
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 013d170114..92a3153467 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -947,6 +947,85 @@ def isin(self, values):
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def keys(self):
+        """
+        Get the 'info axis'.
+
+        This is index for Series, columns for DataFrame.
+
+        Returns:
+            Index: Info axis.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({
+            ...     'A': [1, 2, 3],
+            ...     'B': [4, 5, 6],
+            ...     })
+            >>> df.keys()
+            Index(['A', 'B'], dtype='object')
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def iterrows(self):
+        """
+        Iterate over DataFrame rows as (index, Series) pairs.
+
+        Yields:
+            a tuple (index, data) where data contains row values as a Series
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+            >>> df = bpd.DataFrame({
+            ...     'A': [1, 2, 3],
+            ...     'B': [4, 5, 6],
+            ...     })
+            >>> index, row = next(df.iterrows())
+            >>> index
+            0
+            >>> row
+            A    1
+            B    4
+            Name: 0, dtype: object
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def itertuples(self, index: bool = True, name: str | None = "Pandas"):
+        """
+        Iterate over DataFrame rows as namedtuples.
+
+        Args:
+            index (bool, default True):
+                If True, return the index as the first element of the tuple.
+            name (str or None, default "Pandas"):
+                The name of the returned namedtuples or None to return regular
+                tuples.
+
+        Returns:
+            iterator:
+                An object to iterate over namedtuples for each row in the
+                DataFrame with the first field possibly being the index and
+                following fields being the column values.
+
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+            >>> df = bpd.DataFrame({
+            ...     'A': [1, 2, 3],
+            ...     'B': [4, 5, 6],
+            ...     })
+            >>> next(df.itertuples(name="Pair"))
+            Pair(Index=0, A=1, B=4)
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def items(self):
         """
         Iterate over (column name, Series) pairs.
diff --git a/third_party/bigframes_vendored/pandas/core/generic.py b/third_party/bigframes_vendored/pandas/core/generic.py
index 27d2e84537..127efe6a3d 100644
--- a/third_party/bigframes_vendored/pandas/core/generic.py
+++ b/third_party/bigframes_vendored/pandas/core/generic.py
@@ -1,7 +1,7 @@
 # Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/generic.py
 from __future__ import annotations
 
-from typing import Literal, Optional
+from typing import Iterator, Literal, Optional
 
 from bigframes import constants
 from third_party.bigframes_vendored.pandas.core import indexing
@@ -35,6 +35,35 @@ def size(self) -> int:
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def __iter__(self) -> Iterator:
+        """
+        Iterate over info axis.
+
+        Returns
+            iterator: Info axis as iterator.
+
+        **Examples:**
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({
+            ...     'A': [1, 2, 3],
+            ...     'B': [4, 5, 6],
+            ... })
+            >>> for x in df:
+            ...     print(x)
+            A
+            B
+
+            >>> series = bpd.Series(["a", "b", "c"], index=[10, 20, 30])
+            >>> for x in series:
+            ...     print(x)
+            10
+            20
+            30
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     # -------------------------------------------------------------------------
     # Unary Methods
 

From 4f4bac44ea9ca5b046c239cce388d2b48f95afb2 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Thu, 2 Nov 2023 19:56:39 +0000
Subject: [PATCH 2/3] fix mypy and df_iter test

---
 bigframes/dataframe.py               | 2 +-
 tests/system/small/test_dataframe.py | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 56123fffbf..1cbe4f5cb9 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -1488,7 +1488,7 @@ def items(self):
         for col_id, col_label in zip(column_ids, column_labels):
             yield col_label, bigframes.series.Series(self._block.select_column(col_id))
 
-    def iterrows(self) -> Iterable[tuple[typing.Hashable, bigframes.series.Series]]:
+    def iterrows(self) -> Iterable[tuple[typing.Any, bigframes.series.Series]]:
         for df in self.to_pandas_batches():
             for item in df.iterrows():
                 yield item
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 5fb331a875..bd5930e508 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -816,9 +816,8 @@ def test_df_iter(
     scalars_df_index,
     scalars_pandas_df_index,
 ):
-    pandas.testing.assert_index_equal(
-        list(scalars_df_index), list(scalars_pandas_df_index)
-    )
+    for bf_i, df_i in zip(scalars_df_index, scalars_pandas_df_index):
+        assert bf_i == df_i
 
 
 def test_iterrows(

From fd52ad92319f1cf3689ce9123f073e189eabee3e Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Thu, 2 Nov 2023 21:13:14 +0000
Subject: [PATCH 3/3] fix mypy

---
 bigframes/dataframe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 1cbe4f5cb9..04a5456e26 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -1488,7 +1488,7 @@ def items(self):
         for col_id, col_label in zip(column_ids, column_labels):
             yield col_label, bigframes.series.Series(self._block.select_column(col_id))
 
-    def iterrows(self) -> Iterable[tuple[typing.Any, bigframes.series.Series]]:
+    def iterrows(self) -> Iterable[tuple[typing.Any, pandas.Series]]:
         for df in self.to_pandas_batches():
             for item in df.iterrows():
                 yield item