From 21b044f63d58c73d26080e6e1559d70f808fc70d Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Thu, 28 Sep 2023 17:28:20 -0500
Subject: [PATCH 1/9] feat: support STRUCT data type with `Series.struct.field`
 to extract subfields

---
 bigframes/dtypes.py                           | 66 +++++++++++++++++--
 bigframes/operations/structs.py               | 47 +++++++++++++
 .../pandas/core/arrays/__init__.py            |  0
 .../pandas/core/arrays/arrow/__init__.py      |  0
 .../pandas/core/arrays/arrow/accessors.py     | 63 ++++++++++++++++++
 5 files changed, 172 insertions(+), 4 deletions(-)
 create mode 100644 bigframes/operations/structs.py
 create mode 100644 third_party/bigframes_vendored/pandas/core/arrays/__init__.py
 create mode 100644 third_party/bigframes_vendored/pandas/core/arrays/arrow/__init__.py
 create mode 100644 third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py

diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
index 271b8aa2f2..644fda0913 100644
--- a/bigframes/dtypes.py
+++ b/bigframes/dtypes.py
@@ -84,10 +84,10 @@
 
 BIDIRECTIONAL_MAPPINGS: Iterable[Tuple[IbisDtype, Dtype]] = (
     (ibis_dtypes.boolean, pd.BooleanDtype()),
+    (ibis_dtypes.date, pd.ArrowDtype(pa.date32())),
     (ibis_dtypes.float64, pd.Float64Dtype()),
     (ibis_dtypes.int64, pd.Int64Dtype()),
     (ibis_dtypes.string, pd.StringDtype(storage="pyarrow")),
-    (ibis_dtypes.date, pd.ArrowDtype(pa.date32())),
     (ibis_dtypes.time, pd.ArrowDtype(pa.time64("us"))),
     (ibis_dtypes.Timestamp(timezone=None), pd.ArrowDtype(pa.timestamp("us"))),
     (
@@ -100,6 +100,19 @@
     pandas: ibis for ibis, pandas in BIDIRECTIONAL_MAPPINGS
 }
 
+IBIS_TO_ARROW: Dict[ibis_dtypes.DataType, pa.DataType] = {
+    ibis_dtypes.boolean: pa.bool_(),
+    ibis_dtypes.date: pa.date32(),
+    ibis_dtypes.float64: pa.float64(),
+    ibis_dtypes.int64: pa.int64(),
+    ibis_dtypes.string: pa.string(),
+    ibis_dtypes.time: pa.time64("us"),
+    ibis_dtypes.Timestamp(timezone=None): pa.timestamp("us"),
+    ibis_dtypes.Timestamp(timezone="UTC"): pa.timestamp("us", tz="UTC"),
+}
+
+ARROW_TO_IBIS = {arrow: ibis for ibis, arrow in IBIS_TO_ARROW.items()}
+
 IBIS_TO_BIGFRAMES: Dict[ibis_dtypes.DataType, Union[Dtype, np.dtype[Any]]] = {
     ibis: pandas for ibis, pandas in BIDIRECTIONAL_MAPPINGS
 }
@@ -148,11 +161,12 @@ def ibis_dtype_to_bigframes_dtype(
     # Special cases: Ibis supports variations on these types, but currently
     # our IO returns them as objects. Eventually, we should support them as
     # ArrowDType (and update the IO accordingly)
-    if isinstance(ibis_dtype, ibis_dtypes.Array) or isinstance(
-        ibis_dtype, ibis_dtypes.Struct
-    ):
+    if isinstance(ibis_dtype, ibis_dtypes.Array):
         return np.dtype("O")
 
+    if isinstance(ibis_dtype, ibis_dtypes.Struct):
+        return pd.ArrowDtype(ibis_dtype_to_arrow_dtype(ibis_dtype))
+
     if ibis_dtype in IBIS_TO_BIGFRAMES:
         return IBIS_TO_BIGFRAMES[ibis_dtype]
     elif isinstance(ibis_dtype, ibis_dtypes.Null):
@@ -164,6 +178,29 @@ def ibis_dtype_to_bigframes_dtype(
         )
 
 
+def ibis_dtype_to_arrow_dtype(ibis_dtype: ibis_dtypes.DataType) -> pa.DataType:
+    if isinstance(ibis_dtype, ibis_dtypes.Array):
+        return pa.list_(ibis_dtype_to_arrow_dtype(ibis_dtype.value_type))
+
+    if isinstance(ibis_dtype, ibis_dtypes.Struct):
+        return pa.struct(
+            [
+                (name, ibis_dtype_to_arrow_dtype(dtype))
+                for name, dtype in ibis_dtype.fields.items()
+            ]
+        )
+
+    if ibis_dtype in IBIS_TO_ARROW:
+        return IBIS_TO_ARROW[ibis_dtype]
+    elif isinstance(ibis_dtype, ibis_dtypes.Null):
+        # Fallback to STRING for NULL values for most flexibility in SQL.
+        return IBIS_TO_ARROW[ibis_dtypes.string]
+    else:
+        raise ValueError(
+            f"Unexpected Ibis data type {ibis_dtype}. {constants.FEEDBACK_LINK}"
+        )
+
+
 def ibis_value_to_canonical_type(value: ibis_types.Value) -> ibis_types.Value:
     """Converts an Ibis expression to canonical type.
 
@@ -187,6 +224,24 @@ def ibis_table_to_canonical_types(table: ibis_types.Table) -> ibis_types.Table:
     return table.select(*casted_columns)
 
 
+def arrow_dtype_to_ibis_dtype(arrow_dtype: pa.DataType) -> ibis_dtypes.DataType:
+    if pa.types.is_struct(arrow_dtype):
+        struct_dtype = typing.cast(pa.StructType, arrow_dtype)
+        return ibis_dtypes.Struct.from_tuples(
+            [
+                (field.name, arrow_dtype_to_ibis_dtype(field.type))
+                for field in struct_dtype
+            ]
+        )
+
+    if arrow_dtype in ARROW_TO_IBIS:
+        return ARROW_TO_IBIS[arrow_dtype]
+    else:
+        raise ValueError(
+            f"Unexpected Arrow data type {arrow_dtype}. {constants.FEEDBACK_LINK}"
+        )
+
+
 def bigframes_dtype_to_ibis_dtype(
     bigframes_dtype: Union[DtypeString, Dtype, np.dtype[Any]]
 ) -> ibis_dtypes.DataType:
@@ -202,6 +257,9 @@ def bigframes_dtype_to_ibis_dtype(
     Raises:
         ValueError: If passed a dtype not supported by BigQuery DataFrames.
     """
+    if isinstance(bigframes_dtype, pd.ArrowDtype):
+        return arrow_dtype_to_ibis_dtype(bigframes_dtype.pyarrow_dtype)
+
     type_string = str(bigframes_dtype)
     if type_string in BIGFRAMES_STRING_TO_BIGFRAMES:
         bigframes_dtype = BIGFRAMES_STRING_TO_BIGFRAMES[
diff --git a/bigframes/operations/structs.py b/bigframes/operations/structs.py
new file mode 100644
index 0000000000..f12c97981b
--- /dev/null
+++ b/bigframes/operations/structs.py
@@ -0,0 +1,47 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import typing
+
+import ibis.expr.types as ibis_types
+
+import bigframes.dataframe
+import bigframes.operations
+import bigframes.operations.base
+import bigframes.series
+import third_party.bigframes_vendored.pandas.core.arrays.arrow.accessors as vendoracessors
+
+
+class StructField(bigframes.operations.UnaryOp):
+    def __init__(self, name_or_index: str | int):
+        self._name_or_index = name_or_index
+
+    def _as_ibis(self, x: ibis_types.Value):
+        struct_value = typing.cast(ibis_types.StructValue, x)
+        if isinstance(self._name_or_index, str):
+            name = self._name_or_index
+        else:
+            name = struct_value.names[self._name_or_index]
+        return struct_value[name]
+
+
+class StructAccessor(
+    bigframes.operations.base.SeriesMethods, vendoracessors.StructAccessor
+):
+    __doc__ = vendoracessors.StructAccessor.__doc__
+
+    def field(self, name_or_index: str | int) -> bigframes.series.Series:
+        return self._apply_unary_op(StructField(name_or_index))
diff --git a/third_party/bigframes_vendored/pandas/core/arrays/__init__.py b/third_party/bigframes_vendored/pandas/core/arrays/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/third_party/bigframes_vendored/pandas/core/arrays/arrow/__init__.py b/third_party/bigframes_vendored/pandas/core/arrays/arrow/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py
new file mode 100644
index 0000000000..cabb3566ee
--- /dev/null
+++ b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py
@@ -0,0 +1,63 @@
+# Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/arrays/arrow/accessors.py
+"""Accessors for arrow-backed data."""
+
+from __future__ import annotations
+
+from bigframes import constants
+
+
+class StructAccessor:
+    """
+    Accessor object for structured data properties of the Series values.
+    """
+
+    def field(self, name_or_index: str | int):
+        """
+        Extract a child field of a struct as a Series.
+
+        Parameters
+        ----------
+        name_or_index : str | int
+            Name or index of the child field to extract.
+
+        Returns
+        -------
+        pandas.Series
+            The data corresponding to the selected child field.
+
+        See Also
+        --------
+        Series.struct.explode : Return all child fields as a DataFrame.
+
+        Examples
+        --------
+        >>> import bigframes.pandas as bpd
+        >>> import pyarrow as pa
+        >>> s = bpd.Series(
+        ...     [
+        ...         {"version": 1, "project": "pandas"},
+        ...         {"version": 2, "project": "pandas"},
+        ...         {"version": 1, "project": "numpy"},
+        ...     ],
+        ...     dtype=bpd.ArrowDtype(pa.struct(
+        ...         [("version", pa.int64()), ("project", pa.string())]
+        ...     ))
+        ... )
+
+        Extract by field name.
+
+        >>> s.struct.field("project")
+        0    pandas
+        1    pandas
+        2     numpy
+        Name: project, dtype: string[pyarrow]
+
+        Extract by field index.
+
+        >>> s.struct.field(0)
+        0    1
+        1    2
+        2    1
+        Name: version, dtype: int64[pyarrow]
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

From 05105a8de1bd9e1510fa62def8e16849a725c8d8 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 29 Sep 2023 10:55:02 -0500
Subject: [PATCH 2/9] implement explode

---
 bigframes/dataframe.py                        | 10 +++++-
 bigframes/operations/base.py                  | 10 +++++-
 bigframes/operations/structs.py               | 18 ++++++++--
 bigframes/series.py                           |  5 +++
 .../pandas/core/arrays/arrow/accessors.py     | 36 +++++++++++++++++++
 5 files changed, 75 insertions(+), 4 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 0d357e7c3d..5a3834f84f 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -161,7 +161,15 @@ def __init__(
                 columns=columns,  # type:ignore
                 dtype=dtype,  # type:ignore
             )
-            if pd_dataframe.size < MAX_INLINE_DF_SIZE:
+            if (
+                pd_dataframe.size < MAX_INLINE_DF_SIZE
+                # TODO(swast): Workaround data types limitation in inline data.
+                and not any(
+                    dt.pyarrow_dtype
+                    for dt in pd_dataframe.dtypes
+                    if isinstance(dt, pandas.ArrowDtype)
+                )
+            ):
                 self._block = blocks.block_from_local(
                     pd_dataframe, session or bigframes.pandas.get_global_session()
                 )
diff --git a/bigframes/operations/base.py b/bigframes/operations/base.py
index add6af57f4..51eaad18b9 100644
--- a/bigframes/operations/base.py
+++ b/bigframes/operations/base.py
@@ -86,7 +86,15 @@ def __init__(
             if pd_series.name is None:
                 # to_frame will set default numeric column label if unnamed, but we do not support int column label, so must rename
                 pd_dataframe = pd_dataframe.set_axis(["unnamed_col"], axis=1)
-            if pd_dataframe.size < MAX_INLINE_SERIES_SIZE:
+            if (
+                pd_dataframe.size < MAX_INLINE_SERIES_SIZE
+                # TODO(swast): Workaround data types limitation in inline data.
+                and not any(
+                    dt.pyarrow_dtype
+                    for dt in pd_dataframe.dtypes
+                    if isinstance(dt, pd.ArrowDtype)
+                )
+            ):
                 self._block = blocks.block_from_local(
                     pd_dataframe, session or bigframes.pandas.get_global_session()
                 )
diff --git a/bigframes/operations/structs.py b/bigframes/operations/structs.py
index f12c97981b..80d51115d0 100644
--- a/bigframes/operations/structs.py
+++ b/bigframes/operations/structs.py
@@ -35,7 +35,7 @@ def _as_ibis(self, x: ibis_types.Value):
             name = self._name_or_index
         else:
             name = struct_value.names[self._name_or_index]
-        return struct_value[name]
+        return struct_value[name].name(name)
 
 
 class StructAccessor(
@@ -44,4 +44,18 @@ class StructAccessor(
     __doc__ = vendoracessors.StructAccessor.__doc__
 
     def field(self, name_or_index: str | int) -> bigframes.series.Series:
-        return self._apply_unary_op(StructField(name_or_index))
+        series = self._apply_unary_op(StructField(name_or_index))
+        if isinstance(name_or_index, str):
+            name = name_or_index
+        else:
+            struct_field = self._dtype.pyarrow_dtype[name_or_index]
+            name = struct_field.name
+        return series.rename(name)
+
+    def explode(self) -> bigframes.dataframe.DataFrame:
+        import bigframes.pandas
+
+        pa_type = self._dtype.pyarrow_dtype
+        return bigframes.pandas.concat(
+            [self.field(i) for i in range(pa_type.num_fields)], axis="columns"
+        )
diff --git a/bigframes/series.py b/bigframes/series.py
index c1c0cb0537..5efe7b3365 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -51,6 +51,7 @@
 import bigframes.operations.base
 import bigframes.operations.datetimes as dt
 import bigframes.operations.strings as strings
+import bigframes.operations.structs as structs
 import third_party.bigframes_vendored.pandas.core.series as vendored_pandas_series
 
 LevelType = typing.Union[str, int]
@@ -118,6 +119,10 @@ def query_job(self) -> Optional[bigquery.QueryJob]:
             self._set_internal_query_job(self._compute_dry_run())
         return self._query_job
 
+    @property
+    def struct(self) -> structs.StructAccessor:
+        return structs.StructAccessor(self._block)
+
     def _set_internal_query_job(self, query_job: bigquery.QueryJob):
         self._query_job = query_job
 
diff --git a/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py
index cabb3566ee..7268775f25 100644
--- a/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py
+++ b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py
@@ -61,3 +61,39 @@ def field(self, name_or_index: str | int):
         Name: version, dtype: int64[pyarrow]
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def explode(self):
+        """
+        Extract all child fields of a struct as a DataFrame.
+
+        Returns
+        -------
+        pandas.DataFrame
+            The data corresponding to all child fields.
+
+        See Also
+        --------
+        Series.struct.field : Return a single child field as a Series.
+
+        Examples
+        --------
+        >>> import bigframes.pandas as bpd
+        >>> import pyarrow as pa
+        >>> s = bpd.Series(
+        ...     [
+        ...         {"version": 1, "project": "pandas"},
+        ...         {"version": 2, "project": "pandas"},
+        ...         {"version": 1, "project": "numpy"},
+        ...     ],
+        ...     dtype=bpd.ArrowDtype(pa.struct(
+        ...         [("version", pa.int64()), ("project", pa.string())]
+        ...     ))
+        ... )
+
+        >>> s.struct.explode()
+           version project
+        0        1  pandas
+        1        2  pandas
+        2        1   numpy
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

From 31290425af4b161fc0c395d133540d4592b1c2e4 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Mon, 2 Oct 2023 10:46:23 -0500
Subject: [PATCH 3/9] fix docstrings

---
 noxfile.py                                    |   2 +-
 .../pandas/core/arrays/arrow/accessors.py     | 129 +++++++++---------
 .../bigframes_vendored/sklearn/__init__.py    |   0
 .../sklearn/ensemble/__init__.py              |   0
 .../bigframes_vendored/xgboost/__init__.py    |   0
 5 files changed, 63 insertions(+), 68 deletions(-)
 create mode 100644 third_party/bigframes_vendored/sklearn/__init__.py
 create mode 100644 third_party/bigframes_vendored/sklearn/ensemble/__init__.py
 create mode 100644 third_party/bigframes_vendored/xgboost/__init__.py

diff --git a/noxfile.py b/noxfile.py
index 033bbfefe4..da9dff92fe 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -362,7 +362,7 @@ def doctest(session: nox.sessions.Session):
     run_system(
         session=session,
         prefix_name="doctest",
-        extra_pytest_options=("--doctest-modules",),
+        extra_pytest_options=("--doctest-modules", "third_party"),
         test_folder="bigframes",
         check_cov=True,
     )
diff --git a/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py
index 7268775f25..8e3ea06a3d 100644
--- a/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py
+++ b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py
@@ -15,50 +15,45 @@ def field(self, name_or_index: str | int):
         """
         Extract a child field of a struct as a Series.
 
-        Parameters
-        ----------
-        name_or_index : str | int
-            Name or index of the child field to extract.
-
-        Returns
-        -------
-        pandas.Series
-            The data corresponding to the selected child field.
-
-        See Also
-        --------
-        Series.struct.explode : Return all child fields as a DataFrame.
-
-        Examples
-        --------
-        >>> import bigframes.pandas as bpd
-        >>> import pyarrow as pa
-        >>> s = bpd.Series(
-        ...     [
-        ...         {"version": 1, "project": "pandas"},
-        ...         {"version": 2, "project": "pandas"},
-        ...         {"version": 1, "project": "numpy"},
-        ...     ],
-        ...     dtype=bpd.ArrowDtype(pa.struct(
-        ...         [("version", pa.int64()), ("project", pa.string())]
-        ...     ))
-        ... )
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> import pyarrow as pa
+            >>> bpd.options.display.progress_bar = None
+            >>> s = bpd.Series(
+            ...     [
+            ...         {"version": 1, "project": "pandas"},
+            ...         {"version": 2, "project": "pandas"},
+            ...         {"version": 1, "project": "numpy"},
+            ...     ],
+            ...     dtype=bpd.ArrowDtype(pa.struct(
+            ...         [("version", pa.int64()), ("project", pa.string())]
+            ...     ))
+            ... )
 
         Extract by field name.
 
-        >>> s.struct.field("project")
-        0    pandas
-        1    pandas
-        2     numpy
-        Name: project, dtype: string[pyarrow]
+            >>> s.struct.field("project")
+            0    pandas
+            1    pandas
+            2     numpy
+            Name: project, dtype: string
 
         Extract by field index.
 
-        >>> s.struct.field(0)
-        0    1
-        1    2
-        2    1
-        Name: version, dtype: int64[pyarrow]
+            >>> s.struct.field(0)
+            0    1
+            1    2
+            2    1
+            Name: version, dtype: Int64
+
+        Args:
+            name_or_index:
+                Name (str) or index (int) of the child field to extract.
+
+        Returns:
+            Series:
+                The data corresponding to the selected child field.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -66,34 +61,34 @@ def explode(self):
         """
         Extract all child fields of a struct as a DataFrame.
 
-        Returns
-        -------
-        pandas.DataFrame
-            The data corresponding to all child fields.
-
-        See Also
-        --------
-        Series.struct.field : Return a single child field as a Series.
-
-        Examples
-        --------
-        >>> import bigframes.pandas as bpd
-        >>> import pyarrow as pa
-        >>> s = bpd.Series(
-        ...     [
-        ...         {"version": 1, "project": "pandas"},
-        ...         {"version": 2, "project": "pandas"},
-        ...         {"version": 1, "project": "numpy"},
-        ...     ],
-        ...     dtype=bpd.ArrowDtype(pa.struct(
-        ...         [("version", pa.int64()), ("project", pa.string())]
-        ...     ))
-        ... )
-
-        >>> s.struct.explode()
-           version project
-        0        1  pandas
-        1        2  pandas
-        2        1   numpy
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> import pyarrow as pa
+            >>> bpd.options.display.progress_bar = None
+            >>> s = bpd.Series(
+            ...     [
+            ...         {"version": 1, "project": "pandas"},
+            ...         {"version": 2, "project": "pandas"},
+            ...         {"version": 1, "project": "numpy"},
+            ...     ],
+            ...     dtype=bpd.ArrowDtype(pa.struct(
+            ...         [("version", pa.int64()), ("project", pa.string())]
+            ...     ))
+            ... )
+
+        Extract all child fields.
+
+            >>> s.struct.explode()
+               version project
+            0        1  pandas
+            1        2  pandas
+            2        1   numpy
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+        Returns:
+            DataFrame:
+                The data corresponding to all child fields.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
diff --git a/third_party/bigframes_vendored/sklearn/__init__.py b/third_party/bigframes_vendored/sklearn/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/third_party/bigframes_vendored/sklearn/ensemble/__init__.py b/third_party/bigframes_vendored/sklearn/ensemble/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/third_party/bigframes_vendored/xgboost/__init__.py b/third_party/bigframes_vendored/xgboost/__init__.py
new file mode 100644
index 0000000000..e69de29bb2

From f4671fce4134bbcd88715a6beff639f47978f478 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Mon, 2 Oct 2023 10:57:52 -0500
Subject: [PATCH 4/9] add unit tests

---
 tests/unit/test_dtypes.py | 64 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/tests/unit/test_dtypes.py b/tests/unit/test_dtypes.py
index bb8ae570dc..3baff2e1f5 100644
--- a/tests/unit/test_dtypes.py
+++ b/tests/unit/test_dtypes.py
@@ -85,6 +85,70 @@ def test_ibis_float32_raises_unexpected_datatype():
         bigframes.dtypes.ibis_dtype_to_bigframes_dtype(ibis_dtypes.float32)
 
 
+IBIS_ARROW_DTYPES = (
+    (ibis_dtypes.boolean, pa.bool_()),
+    (ibis_dtypes.date, pa.date32()),
+    (ibis_dtypes.Timestamp(), pa.timestamp("us")),
+    (ibis_dtypes.float64, pa.float64()),
+    (
+        ibis_dtypes.Timestamp(timezone="UTC"),
+        pa.timestamp("us", tz="UTC"),
+    ),
+    (
+        ibis_dtypes.Struct.from_tuples(
+            [
+                ("name", ibis_dtypes.string()),
+                ("version", ibis_dtypes.int64()),
+            ]
+        ),
+        pa.struct(
+            [
+                ("name", pa.string()),
+                ("version", pa.int64()),
+            ]
+        ),
+    ),
+    (
+        ibis_dtypes.Struct.from_tuples(
+            [
+                (
+                    "nested",
+                    ibis_dtypes.Struct.from_tuples(
+                        [
+                            ("field", ibis_dtypes.string()),
+                        ]
+                    ),
+                ),
+            ]
+        ),
+        pa.struct(
+            [
+                (
+                    "nested",
+                    pa.struct(
+                        [
+                            ("field", pa.string()),
+                        ]
+                    ),
+                ),
+            ]
+        ),
+    ),
+)
+
+
+@pytest.mark.parametrize(("ibis_dtype", "arrow_dtype"), IBIS_ARROW_DTYPES)
+def test_arrow_dtype_to_ibis_dtype(ibis_dtype, arrow_dtype):
+    result = bigframes.dtypes.arrow_dtype_to_ibis_dtype(arrow_dtype)
+    assert result == ibis_dtype
+
+
+@pytest.mark.parametrize(("ibis_dtype", "arrow_dtype"), IBIS_ARROW_DTYPES)
+def test_ibis_dtype_to_arrow_dtype(ibis_dtype, arrow_dtype):
+    result = bigframes.dtypes.ibis_dtype_to_arrow_dtype(ibis_dtype)
+    assert result == arrow_dtype
+
+
 @pytest.mark.parametrize(
     ["bigframes_dtype", "ibis_dtype"],
     [

From a18370888ef720235b2b75d9bbc788ff727cf123 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Mon, 2 Oct 2023 11:37:09 -0500
Subject: [PATCH 5/9] update struct dtype tests

---
 tests/system/small/test_dataframe.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index e71b1430e6..43dfbed426 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -884,7 +884,19 @@ def test_get_dtypes_array_struct(session):
     dtypes = df.dtypes
     pd.testing.assert_series_equal(
         dtypes,
-        pd.Series({"array_column": np.dtype("O"), "struct_column": np.dtype("O")}),
+        pd.Series(
+            {
+                "array_column": np.dtype("O"),
+                "struct_column": pd.ArrowDtype(
+                    pa.struct(
+                        [
+                            ("string_field", pa.string()),
+                            ("float_field", pa.float64()),
+                        ]
+                    )
+                ),
+            }
+        ),
     )
 
 

From d600a1c7e53a686f14f3959f150be78b5edb4241 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Mon, 2 Oct 2023 12:49:36 -0500
Subject: [PATCH 6/9] cleanup before doctest

---
 noxfile.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/noxfile.py b/noxfile.py
index a113e1fcde..1ce3965d6e 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -275,6 +275,20 @@ def install_systemtest_dependencies(session, install_test_extra, *constraints):
         session.install("-e", ".", *constraints)
 
 
+def clean_pycache():
+    paths = CURRENT_DIRECTORY.glob("**/__pycache__/**/*")
+    for path in paths:
+        path.unlink()
+
+    paths = CURRENT_DIRECTORY.glob("**/__pycache__")
+    for path in paths:
+        path.rmdir()
+
+    paths = CURRENT_DIRECTORY.glob("**/*.pyc")
+    for path in paths:
+        path.unlink()
+
+
 def run_system(
     session: nox.sessions.Session,
     prefix_name,
@@ -286,6 +300,7 @@ def run_system(
     extra_pytest_options=(),
 ):
     """Run the system test suite."""
+    clean_pycache()
     constraints_path = str(
         CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt"
     )

From 9e90b1919401dd9a3869f07a317f2797b7868423 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Mon, 2 Oct 2023 12:58:49 -0500
Subject: [PATCH 7/9] alternative workaround for mismatch import error

---
 noxfile.py | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/noxfile.py b/noxfile.py
index 1ce3965d6e..15c87746f4 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -275,20 +275,6 @@ def install_systemtest_dependencies(session, install_test_extra, *constraints):
         session.install("-e", ".", *constraints)
 
 
-def clean_pycache():
-    paths = CURRENT_DIRECTORY.glob("**/__pycache__/**/*")
-    for path in paths:
-        path.unlink()
-
-    paths = CURRENT_DIRECTORY.glob("**/__pycache__")
-    for path in paths:
-        path.rmdir()
-
-    paths = CURRENT_DIRECTORY.glob("**/*.pyc")
-    for path in paths:
-        path.unlink()
-
-
 def run_system(
     session: nox.sessions.Session,
     prefix_name,
@@ -300,7 +286,6 @@ def run_system(
     extra_pytest_options=(),
 ):
     """Run the system test suite."""
-    clean_pycache()
     constraints_path = str(
         CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt"
     )
@@ -374,6 +359,9 @@ def system_noextras(session: nox.sessions.Session):
 @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS[-1])
 def doctest(session: nox.sessions.Session):
     """Run the system test suite."""
+    # Workaround https://github.com/pytest-dev/pytest/issues/9567
+    os.environ["PY_IGNORE_IMPORTMISMATCH"] = "1"
+
     run_system(
         session=session,
         prefix_name="doctest",

From e55be81e42a8c8a7f92fe15241ce6929947a558e Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Mon, 2 Oct 2023 13:00:30 -0500
Subject: [PATCH 8/9] alternative workaround for mismatch import error

---
 .kokoro/build.sh | 3 +++
 noxfile.py       | 3 ---
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.kokoro/build.sh b/.kokoro/build.sh
index a0fa4bc787..58eaa7fedf 100755
--- a/.kokoro/build.sh
+++ b/.kokoro/build.sh
@@ -26,6 +26,9 @@ cd "${PROJECT_ROOT}"
 # Disable buffering, so that the logs stream through.
 export PYTHONUNBUFFERED=1
 
+# Workaround https://github.com/pytest-dev/pytest/issues/9567
+export PY_IGNORE_IMPORTMISMATCH=1
+
 # Debug: show build environment
 env | grep KOKORO
 
diff --git a/noxfile.py b/noxfile.py
index 15c87746f4..a113e1fcde 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -359,9 +359,6 @@ def system_noextras(session: nox.sessions.Session):
 @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS[-1])
 def doctest(session: nox.sessions.Session):
     """Run the system test suite."""
-    # Workaround https://github.com/pytest-dev/pytest/issues/9567
-    os.environ["PY_IGNORE_IMPORTMISMATCH"] = "1"
-
     run_system(
         session=session,
         prefix_name="doctest",

From 6c133143f450949295424da5c03b3f96eac7529a Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Mon, 2 Oct 2023 17:07:30 -0500
Subject: [PATCH 9/9] remove dead ibis null to arrow check

---
 bigframes/dtypes.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
index 85b473ebbd..46a7a1cb50 100644
--- a/bigframes/dtypes.py
+++ b/bigframes/dtypes.py
@@ -192,9 +192,6 @@ def ibis_dtype_to_arrow_dtype(ibis_dtype: ibis_dtypes.DataType) -> pa.DataType:
 
     if ibis_dtype in IBIS_TO_ARROW:
         return IBIS_TO_ARROW[ibis_dtype]
-    elif isinstance(ibis_dtype, ibis_dtypes.Null):
-        # Fallback to STRING for NULL values for most flexibility in SQL.
-        return IBIS_TO_ARROW[ibis_dtypes.string]
     else:
         raise ValueError(
             f"Unexpected Ibis data type {ibis_dtype}. {constants.FEEDBACK_LINK}"