From c6d97544944bc79e9f340ccfe9329b6ccf8e23a2 Mon Sep 17 00:00:00 2001 From: Henry J Solberg Date: Sat, 4 Nov 2023 19:29:42 +0000 Subject: [PATCH 1/5] feat: support series.at[row_label] = scaler and loc equivalent --- bigframes/core/indexers.py | 41 ++++++++++++++++++++++--------- tests/system/small/test_series.py | 10 ++++++++ 2 files changed, 39 insertions(+), 12 deletions(-) diff --git a/bigframes/core/indexers.py b/bigframes/core/indexers.py index f6ce084714..eda9f6f33f 100644 --- a/bigframes/core/indexers.py +++ b/bigframes/core/indexers.py @@ -59,23 +59,26 @@ def __setitem__(self, key, value) -> None: # Assume the key is for the index label. block = self._series._block - value_column = self._series._value_column + original_column = self._series index_column = block.index_columns[0] - - # if index == key return value else value_colum block, insert_cond = block.apply_unary_op( - index_column, ops.partial_right(ops.eq_op, key) + index_column, + ops.partial_right(ops.eq_op, key), + result_label=self._series.name, ) - block, result_id = block.apply_binary_op( - insert_cond, - self._series._value_column, - ops.partial_arg1(ops.where_op, value), + insert_cond_bool_series = bigframes.series.Series( + block.select_column(insert_cond) ) - block = block.copy_values(result_id, value_column).drop_columns( - [insert_cond, result_id] + new_column = insert_cond_bool_series.map( + {True: value, False: None}, verify_integrity=False ) - - self._series._set_block(block) + try: + new_column = new_column.fillna(self._series) + except ibis.common.exceptions.IbisTypeError: + raise TypeError( + f"Cannot assign scalar of type {type(value)} to column of type {original_column.dtype}." + ) + self._series._set_block(new_column._block) class IlocSeriesIndexer: @@ -117,6 +120,20 @@ def __getitem__( ) -> Union[bigframes.core.scalar.Scalar, bigframes.series.Series]: return self._series.loc[key] + def __setitem__( + self, + key: LocSingleKey, + value, + ): + if pd.api.types.is_list_like(value) or isinstance( + value, bigframes.series.Series + ): + raise NotImplementedError( + "series.at.__setitem__ only supports scalar right-hand values. " + f"{constants.FEEDBACK_LINK}" + ) + self._series.loc[key] = value + class LocDataFrameIndexer: def __init__(self, dataframe: bigframes.dataframe.DataFrame): diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index 183ba01c0e..f0fcc8dede 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -1012,6 +1012,16 @@ def test_loc_setitem_cell(scalars_df_index, scalars_pandas_df_index): pd.testing.assert_series_equal(bf_original.to_pandas(), pd_original) +def test_at_setitem_row_label_scalar(scalars_df_index, scalars_pandas_df_index): + bf_series = scalars_df_index["int64_col"] + pd_series = scalars_pandas_df_index["int64_col"] + bf_series.at[1] = 1000 + pd_series.at[1] = 1000 + bf_result = bf_series.to_pandas() + pd_result = pd_series.astype("Float64") # type difference is due to NA treatment + pd.testing.assert_series_equal(bf_result, pd_result) + + def test_ne_obj_series(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs col_name = "string_col" From 29ec45d81c93853d567496475148790ef204f476 Mon Sep 17 00:00:00 2001 From: Henry J Solberg Date: Sat, 4 Nov 2023 19:38:34 +0000 Subject: [PATCH 2/5] don't change original loc implementation --- bigframes/core/indexers.py | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/bigframes/core/indexers.py b/bigframes/core/indexers.py index eda9f6f33f..522603365e 100644 --- a/bigframes/core/indexers.py +++ b/bigframes/core/indexers.py @@ -59,26 +59,23 @@ def __setitem__(self, key, value) -> None: # Assume the key is for the index label. block = self._series._block - original_column = self._series + value_column = self._series._value_column index_column = block.index_columns[0] + + # if index == key return value else value_colum block, insert_cond = block.apply_unary_op( - index_column, - ops.partial_right(ops.eq_op, key), - result_label=self._series.name, + index_column, ops.partial_right(ops.eq_op, key) ) - insert_cond_bool_series = bigframes.series.Series( - block.select_column(insert_cond) + block, result_id = block.apply_binary_op( + insert_cond, + self._series._value_column, + ops.partial_arg1(ops.where_op, value), ) - new_column = insert_cond_bool_series.map( - {True: value, False: None}, verify_integrity=False + block = block.copy_values(result_id, value_column).drop_columns( + [insert_cond, result_id] ) - try: - new_column = new_column.fillna(self._series) - except ibis.common.exceptions.IbisTypeError: - raise TypeError( - f"Cannot assign scalar of type {type(value)} to column of type {original_column.dtype}." - ) - self._series._set_block(new_column._block) + + self._series._set_block(block) class IlocSeriesIndexer: From 29e44a5ba89c4962d24f92085fa3d015e1ba3b22 Mon Sep 17 00:00:00 2001 From: Henry J Solberg Date: Sat, 4 Nov 2023 19:39:59 +0000 Subject: [PATCH 3/5] fix type in test --- tests/system/small/test_series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index f0fcc8dede..27eea0a310 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -1018,7 +1018,7 @@ def test_at_setitem_row_label_scalar(scalars_df_index, scalars_pandas_df_index): bf_series.at[1] = 1000 pd_series.at[1] = 1000 bf_result = bf_series.to_pandas() - pd_result = pd_series.astype("Float64") # type difference is due to NA treatment + pd_result = pd_series.astype("Int64") pd.testing.assert_series_equal(bf_result, pd_result) From 96424c6b142b5a509f175d764444e969a65cd868 Mon Sep 17 00:00:00 2001 From: Henry J Solberg Date: Sat, 4 Nov 2023 19:52:00 +0000 Subject: [PATCH 4/5] improve type hint --- bigframes/core/indexers.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/bigframes/core/indexers.py b/bigframes/core/indexers.py index 522603365e..69048b6845 100644 --- a/bigframes/core/indexers.py +++ b/bigframes/core/indexers.py @@ -120,11 +120,9 @@ def __getitem__( def __setitem__( self, key: LocSingleKey, - value, + value: bigframes.core.scalar.Scalar, ): - if pd.api.types.is_list_like(value) or isinstance( - value, bigframes.series.Series - ): + if not pd.api.types.is_scalar(value): raise NotImplementedError( "series.at.__setitem__ only supports scalar right-hand values. " f"{constants.FEEDBACK_LINK}" From f488d4244e34c0b3c54b926ca40c51ed339b821f Mon Sep 17 00:00:00 2001 From: Henry J Solberg Date: Tue, 7 Nov 2023 18:38:38 +0000 Subject: [PATCH 5/5] add copy to test --- tests/system/small/test_series.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index 27eea0a310..c8bd0f7afd 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -1012,9 +1012,10 @@ def test_loc_setitem_cell(scalars_df_index, scalars_pandas_df_index): pd.testing.assert_series_equal(bf_original.to_pandas(), pd_original) -def test_at_setitem_row_label_scalar(scalars_df_index, scalars_pandas_df_index): - bf_series = scalars_df_index["int64_col"] - pd_series = scalars_pandas_df_index["int64_col"] +def test_at_setitem_row_label_scalar(scalars_dfs): + scalars_df, scalars_pandas_df = scalars_dfs + bf_series = scalars_df["int64_col"] + pd_series = scalars_pandas_df["int64_col"].copy() bf_series.at[1] = 1000 pd_series.at[1] = 1000 bf_result = bf_series.to_pandas()