From 2256c8908f8007630b45a23841382e0c5c5023f2 Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Mon, 25 Mar 2024 21:23:48 +0000 Subject: [PATCH 1/3] fix: assign NaN scalar to column error. --- bigframes/core/__init__.py | 4 ++++ tests/system/small/test_dataframe.py | 18 +++++++++++++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py index 6fd6fc23c2..ce9c22132b 100644 --- a/bigframes/core/__init__.py +++ b/bigframes/core/__init__.py @@ -229,6 +229,10 @@ def assign_constant( value: typing.Any, dtype: typing.Optional[bigframes.dtypes.Dtype], ) -> ArrayValue: + if pandas.isna(value): + # Need to assign a data type when value is NaN. + dtype = dtype or bigframes.dtypes.DEFAULT_DTYPE + if destination_id in self.column_ids: # Mutate case exprs = [ ( diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index 5e034e8e83..c1f2154cbe 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -20,6 +20,7 @@ from typing import Tuple import geopandas as gpd # type: ignore +import numpy as np import pandas as pd import pandas.testing import pyarrow as pa # type: ignore @@ -605,17 +606,24 @@ def test_assign_new_column_w_loc(scalars_dfs): pd.testing.assert_frame_equal(bf_result, pd_result) -def test_assign_new_column_w_setitem(scalars_dfs): +@pytest.mark.parametrize( + ("scalar",), + [ + (2.1,), + (np.nan,), + ], +) +def test_assign_new_column_w_setitem(scalars_dfs, scalar): scalars_df, scalars_pandas_df = scalars_dfs bf_df = scalars_df.copy() pd_df = scalars_pandas_df.copy() - bf_df["new_col"] = 2 - pd_df["new_col"] = 2 + bf_df["new_col"] = scalar + pd_df["new_col"] = scalar bf_result = bf_df.to_pandas() pd_result = pd_df - # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes. - pd_result["new_col"] = pd_result["new_col"].astype("Int64") + # Convert default pandas dtypes `float64` to match BigQuery DataFrames dtypes. + pd_result["new_col"] = pd_result["new_col"].astype("Float64") pd.testing.assert_frame_equal(bf_result, pd_result) From 01db0f9a9abc6626b7d575eb46340e5c011afc1b Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Thu, 28 Mar 2024 19:43:47 +0000 Subject: [PATCH 2/3] Update test. --- tests/system/small/test_dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index c1f2154cbe..076fb9700a 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -610,7 +610,7 @@ def test_assign_new_column_w_loc(scalars_dfs): ("scalar",), [ (2.1,), - (np.nan,), + (None,), ], ) def test_assign_new_column_w_setitem(scalars_dfs, scalar): From d9a8a9390c01071a89a1a7f7f36ac6d29830a0d0 Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Thu, 28 Mar 2024 20:36:29 +0000 Subject: [PATCH 3/3] remove import. --- tests/system/small/test_dataframe.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index 076fb9700a..95d95ff51a 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -20,7 +20,6 @@ from typing import Tuple import geopandas as gpd # type: ignore -import numpy as np import pandas as pd import pandas.testing import pyarrow as pa # type: ignore