From 7fcd4b656380cdfb0a8e409d7bc1baa46748f10d Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Tue, 22 Jul 2025 21:33:36 +0000
Subject: [PATCH 1/8] I am working on adding support for pyarrow.Scalar to
 infer_literal_method.

---
 bigframes/dtypes.py            |  2 ++
 setup.py                       |  1 +
 tests/unit/core/test_dtypes.py | 12 ++++++++++++
 3 files changed, 15 insertions(+)

diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
index 0be31505df..a58619dc21 100644
--- a/bigframes/dtypes.py
+++ b/bigframes/dtypes.py
@@ -659,6 +659,8 @@ def _dtype_from_string(dtype_string: str) -> typing.Optional[Dtype]:
 
 def infer_literal_type(literal) -> typing.Optional[Dtype]:
     # Maybe also normalize literal to canonical python representation to remove this burden from compilers?
+    if isinstance(literal, pa.Scalar):
+        return arrow_dtype_to_bigframes_dtype(literal.type)
     if pd.api.types.is_list_like(literal):
         element_types = [infer_literal_type(i) for i in literal]
         common_type = lcd_type(*element_types)
diff --git a/setup.py b/setup.py
index 63d019caa0..9b777ed8d9 100644
--- a/setup.py
+++ b/setup.py
@@ -61,6 +61,7 @@
     "matplotlib >=3.7.1",
     "db-dtypes >=1.4.2",
     # For vendored ibis-framework.
+    "ibis-framework==6.2.0",
     "atpublic>=2.3,<6",
     "python-dateutil>=2.8.2,<3",
     "pytz>=2022.7",
diff --git a/tests/unit/core/test_dtypes.py b/tests/unit/core/test_dtypes.py
index 77392bea2f..3cce7f6f02 100644
--- a/tests/unit/core/test_dtypes.py
+++ b/tests/unit/core/test_dtypes.py
@@ -272,3 +272,15 @@ def test_literal_to_ibis_scalar_throws_on_incompatible_literal():
         ValueError,
     ):
         bigframes.core.compile.ibis_types.literal_to_ibis_scalar({"mykey": "myval"})
+
+
+@pytest.mark.parametrize(
+    ["scalar", "expected_dtype"],
+    [
+        (pa.scalar(1_000_000_000, type=pa.int64()), bigframes.dtypes.INT_DTYPE),
+        (pa.scalar(True, type=pa.bool_()), bigframes.dtypes.BOOL_DTYPE),
+        (pa.scalar("hello", type=pa.string()), bigframes.dtypes.STRING_DTYPE),
+    ],
+)
+def test_infer_literal_type_arrow_scalar(scalar, expected_dtype):
+    assert bigframes.dtypes.infer_literal_type(scalar) == expected_dtype

From a083fa1faed0ecfa404adbbb192536332d71c196 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= <swast@google.com>
Date: Tue, 22 Jul 2025 16:36:01 -0500
Subject: [PATCH 2/8] Update setup.py

---
 setup.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/setup.py b/setup.py
index 9b777ed8d9..63d019caa0 100644
--- a/setup.py
+++ b/setup.py
@@ -61,7 +61,6 @@
     "matplotlib >=3.7.1",
     "db-dtypes >=1.4.2",
     # For vendored ibis-framework.
-    "ibis-framework==6.2.0",
     "atpublic>=2.3,<6",
     "python-dateutil>=2.8.2,<3",
     "pytz>=2022.7",

From 64c8872034cff05262a42134c80cc964122695bf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= <swast@google.com>
Date: Tue, 22 Jul 2025 16:37:35 -0500
Subject: [PATCH 3/8] Update tests/unit/core/test_dtypes.py

---
 tests/unit/core/test_dtypes.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/unit/core/test_dtypes.py b/tests/unit/core/test_dtypes.py
index 3cce7f6f02..cd23614bbf 100644
--- a/tests/unit/core/test_dtypes.py
+++ b/tests/unit/core/test_dtypes.py
@@ -280,6 +280,10 @@ def test_literal_to_ibis_scalar_throws_on_incompatible_literal():
         (pa.scalar(1_000_000_000, type=pa.int64()), bigframes.dtypes.INT_DTYPE),
         (pa.scalar(True, type=pa.bool_()), bigframes.dtypes.BOOL_DTYPE),
         (pa.scalar("hello", type=pa.string()), bigframes.dtypes.STRING_DTYPE),
+        # Support NULL scalars.
+        (pa.scalar(None, type=pa.int64()), bigframes.dtypes.INT_DTYPE),
+        (pa.scalar(None, type=pa.bool_()), bigframes.dtypes.BOOL_DTYPE),
+        (pa.scalar(None, type=pa.string()), bigframes.dtypes.STRING_DTYPE),
     ],
 )
 def test_infer_literal_type_arrow_scalar(scalar, expected_dtype):

From 29bdab54f43dec517bb05798db064f92fea36f0a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Wed, 23 Jul 2025 11:43:20 -0500
Subject: [PATCH 4/8] patch ibis

---
 tests/system/small/test_dataframe.py          | 50 ++++++++++++++++---
 .../ibis/common/temporal.py                   |  5 ++
 .../ibis/expr/datatypes/value.py              | 12 +++++
 .../ibis/formats/pyarrow.py                   |  2 -
 4 files changed, 61 insertions(+), 8 deletions(-)

diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index caf39bd9e9..bc773d05b2 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -906,15 +906,53 @@ def test_df_to_pandas_batches(scalars_dfs):
     assert_pandas_df_equal(pd.concat(filtered_batches), pd_result)
 
 
-def test_assign_new_column(scalars_dfs):
+@pytest.mark.parametrize(
+    ("literal", "expected_dtype"),
+    (
+        pytest.param(
+            2,
+            dtypes.INT_DTYPE,
+            id="INT64",
+        ),
+        # ====================================================================
+        # NULL values
+        #
+        # These are regression tests for b/428999884. It needs to be possible to
+        # set a column to NULL with a desired type (not just the pandas default
+        # of float64).
+        # ====================================================================
+        pytest.param(None, dtypes.FLOAT_DTYPE, id="NULL-None"),
+        pytest.param(
+            pa.scalar(None, type=pa.int64()),
+            dtypes.INT_DTYPE,
+            id="NULL-pyarrow-TIMESTAMP",
+        ),
+        pytest.param(
+            pa.scalar(None, type=pa.timestamp("us", tz="UTC")),
+            dtypes.TIMESTAMP_DTYPE,
+            id="NULL-pyarrow-TIMESTAMP",
+        ),
+        pytest.param(
+            pa.scalar(None, type=pa.timestamp("us")),
+            dtypes.DATETIME_DTYPE,
+            id="NULL-pyarrow-DATETIME",
+        ),
+    ),
+)
+def test_assign_new_column_w_literal(scalars_dfs, literal, expected_dtype):
     scalars_df, scalars_pandas_df = scalars_dfs
-    kwargs = {"new_col": 2}
-    df = scalars_df.assign(**kwargs)
+    df = scalars_df.assign(new_col=literal)
     bf_result = df.to_pandas()
-    pd_result = scalars_pandas_df.assign(**kwargs)
 
-    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
-    pd_result["new_col"] = pd_result["new_col"].astype("Int64")
+    new_col_pd = literal
+    if isinstance(literal, pa.Scalar):
+        # PyArrow integer scalars aren't yet supported in pandas Int64Dtype.
+        new_col_pd = literal.as_py()
+
+    # Pandas might not pick the same dtype as BigFrames, but it should at least
+    # be castable to it.
+    pd_result = scalars_pandas_df.assign(new_col=new_col_pd)
+    pd_result["new_col"] = pd_result["new_col"].astype(expected_dtype)
 
     assert_pandas_df_equal(bf_result, pd_result)
 
diff --git a/third_party/bigframes_vendored/ibis/common/temporal.py b/third_party/bigframes_vendored/ibis/common/temporal.py
index 1b0e4fa985..8d84caf5a1 100644
--- a/third_party/bigframes_vendored/ibis/common/temporal.py
+++ b/third_party/bigframes_vendored/ibis/common/temporal.py
@@ -260,3 +260,8 @@ def _from_numpy_datetime64(value):
         raise TypeError("Unable to convert np.datetime64 without pandas")
     else:
         return pd.Timestamp(value).to_pydatetime()
+
+
+@normalize_datetime.register("pyarrow.Scalar")
+def _from_pyarrow_scalar(value):
+    return value.as_py()
diff --git a/third_party/bigframes_vendored/ibis/expr/datatypes/value.py b/third_party/bigframes_vendored/ibis/expr/datatypes/value.py
index e390cea02c..85be0ac749 100644
--- a/third_party/bigframes_vendored/ibis/expr/datatypes/value.py
+++ b/third_party/bigframes_vendored/ibis/expr/datatypes/value.py
@@ -27,6 +27,7 @@
 import bigframes_vendored.ibis.expr.datatypes as dt
 from bigframes_vendored.ibis.expr.datatypes.cast import highest_precedence
 from public import public
+import pyarrow as pa
 import toolz
 
 
@@ -71,6 +72,14 @@ def infer_list(values: Sequence[Any]) -> dt.Array:
     return dt.Array(highest_precedence(map(infer, values)))
 
 
+@infer.register("pyarrow.Scalar")
+def infer_pyarrow_scalar(value: "pa.Scalar"):
+    """Infert the type of a PyArrow Scalar value."""
+    import bigframes_vendored.ibis.formats.pyarrow
+
+    return bigframes_vendored.ibis.formats.pyarrow.PyArrowType.to_ibis(value.type)
+
+
 @infer.register(datetime.time)
 def infer_time(value: datetime.time) -> dt.Time:
     return dt.time
@@ -253,6 +262,9 @@ def infer_shapely_multipolygon(value) -> dt.MultiPolygon:
 def normalize(typ, value):
     """Ensure that the Python type underlying a literal resolves to a single type."""
 
+    if pa is not None and isinstance(value, pa.Scalar):
+        value = value.as_py()
+
     dtype = dt.dtype(typ)
     if value is None:
         if not dtype.nullable:
diff --git a/third_party/bigframes_vendored/ibis/formats/pyarrow.py b/third_party/bigframes_vendored/ibis/formats/pyarrow.py
index a6861b52e1..491e551ec1 100644
--- a/third_party/bigframes_vendored/ibis/formats/pyarrow.py
+++ b/third_party/bigframes_vendored/ibis/formats/pyarrow.py
@@ -24,7 +24,6 @@
 @functools.cache
 def _from_pyarrow_types():
     import pyarrow as pa
-    import pyarrow_hotfix  # noqa: F401
 
     return {
         pa.int8(): dt.Int8,
@@ -87,7 +86,6 @@ class PyArrowType(TypeMapper):
     def to_ibis(cls, typ: pa.DataType, nullable=True) -> dt.DataType:
         """Convert a pyarrow type to an ibis type."""
         import pyarrow as pa
-        import pyarrow_hotfix  # noqa: F401
 
         if pa.types.is_null(typ):
             return dt.null

From 526d472166cfe876edf75702442387fa4dcb6b7e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= <tswast@gmail.com>
Date: Wed, 23 Jul 2025 12:13:20 -0500
Subject: [PATCH 5/8] increase timeout

---
 tests/system/small/test_session.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py
index 4bb1c6589a..f48304afc6 100644
--- a/tests/system/small/test_session.py
+++ b/tests/system/small/test_session.py
@@ -606,7 +606,7 @@ def test_read_gbq_wildcard(
             "query": {
                 "useQueryCache": True,
                 "maximumBytesBilled": "1000000000",
-                "timeoutMs": 10000,
+                "timeoutMs":120_000,
             }
         },
         pytest.param(

From 81d17579923c1f4056c715af8bd7a359058863b4 Mon Sep 17 00:00:00 2001
From: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
Date: Wed, 23 Jul 2025 17:15:40 +0000
Subject: [PATCH 6/8] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20po?=
 =?UTF-8?q?st-processor?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md
---
 tests/system/small/test_session.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py
index f48304afc6..a04da64af0 100644
--- a/tests/system/small/test_session.py
+++ b/tests/system/small/test_session.py
@@ -606,7 +606,7 @@ def test_read_gbq_wildcard(
             "query": {
                 "useQueryCache": True,
                 "maximumBytesBilled": "1000000000",
-                "timeoutMs":120_000,
+                "timeoutMs": 120_000,
             }
         },
         pytest.param(

From 2598b7333563303827ee1635b9aab64be14ad081 Mon Sep 17 00:00:00 2001
From: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
Date: Wed, 23 Jul 2025 17:16:36 +0000
Subject: [PATCH 7/8] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20po?=
 =?UTF-8?q?st-processor?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md
---
 tests/system/small/test_session.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py
index f48304afc6..a04da64af0 100644
--- a/tests/system/small/test_session.py
+++ b/tests/system/small/test_session.py
@@ -606,7 +606,7 @@ def test_read_gbq_wildcard(
             "query": {
                 "useQueryCache": True,
                 "maximumBytesBilled": "1000000000",
-                "timeoutMs":120_000,
+                "timeoutMs": 120_000,
             }
         },
         pytest.param(

From ea0792a4d426d72d519ce6d7699f4cd7d924982e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Wed, 23 Jul 2025 12:17:04 -0500
Subject: [PATCH 8/8] lint

---
 tests/system/small/test_session.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py
index f48304afc6..a04da64af0 100644
--- a/tests/system/small/test_session.py
+++ b/tests/system/small/test_session.py
@@ -606,7 +606,7 @@ def test_read_gbq_wildcard(
             "query": {
                 "useQueryCache": True,
                 "maximumBytesBilled": "1000000000",
-                "timeoutMs":120_000,
+                "timeoutMs": 120_000,
             }
         },
         pytest.param(