From 8cb57aa9cf8881d8dc4893baf4479999ba117db2 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Thu, 15 May 2025 00:31:42 +0000 Subject: [PATCH 1/2] fix: Fix clip int series with float bounds --- bigframes/operations/base.py | 4 ++-- bigframes/series.py | 10 +++++++--- tests/system/small/test_series.py | 11 +++++++++++ 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/bigframes/operations/base.py b/bigframes/operations/base.py index 8d70596b7d..dbd50c02db 100644 --- a/bigframes/operations/base.py +++ b/bigframes/operations/base.py @@ -245,9 +245,9 @@ def _align( ) return (typing.cast(ex.DerefOp, values[0]), values[1], block) - def _align3(self, other1: series.Series | scalars.Scalar, other2: series.Series | scalars.Scalar, how="left") -> tuple[ex.DerefOp, AlignedExprT, AlignedExprT, blocks.Block]: # type: ignore + def _align3(self, other1: series.Series | scalars.Scalar, other2: series.Series | scalars.Scalar, how="left", cast_scalars: bool = True) -> tuple[ex.DerefOp, AlignedExprT, AlignedExprT, blocks.Block]: # type: ignore """Aligns the series value with 2 other scalars or series objects. Returns new values and joined tabled expression.""" - values, index = self._align_n([other1, other2], how) + values, index = self._align_n([other1, other2], how, cast_scalars=cast_scalars) return ( typing.cast(ex.DerefOp, values[0]), values[1], diff --git a/bigframes/series.py b/bigframes/series.py index 2c387734d3..44760d8e20 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -1347,20 +1347,24 @@ def items(self): yield item def where(self, cond, other=None): - value_id, cond_id, other_id, block = self._align3(cond, other) + value_id, cond_id, other_id, block = self._align3( + cond, other, cast_scalars=False + ) block, result_id = block.project_expr( ops.where_op.as_expr(value_id, cond_id, other_id) ) return Series(block.select_column(result_id).with_column_labels([self.name])) - def clip(self, lower, upper): + def clip(self, lower=None, upper=None): if lower is None and upper is None: return self if lower is None: return self._apply_binary_op(upper, ops.minimum_op, alignment="left") if upper is None: return self._apply_binary_op(lower, ops.maximum_op, alignment="left") - value_id, lower_id, upper_id, block = self._align3(lower, upper) + value_id, lower_id, upper_id, block = self._align3( + lower, upper, cast_scalars=False + ) block, result_id = block.project_expr( ops.clip_op.as_expr(value_id, lower_id, upper_id), ) diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index 7972fbe1e9..286a480d18 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -3001,6 +3001,17 @@ def test_clip(scalars_df_index, scalars_pandas_df_index, ordered): assert_series_equal(bf_result, pd_result, ignore_order=not ordered) +def test_clip_int_with_float_bounds(scalars_df_index, scalars_pandas_df_index): + col_bf = scalars_df_index["int64_too"] + bf_result = col_bf.clip(-100, 3.14151593).to_pandas() + + col_pd = scalars_pandas_df_index["int64_too"] + # pandas doesn't work with Int64 and clip with floats + pd_result = col_pd.astype("int64").clip(-100, 3.14151593).astype("Float64") + + assert_series_equal(bf_result, pd_result) + + def test_clip_filtered_two_sided(scalars_df_index, scalars_pandas_df_index): col_bf = scalars_df_index["int64_col"].iloc[::2] lower_bf = scalars_df_index["int64_too"].iloc[2:] - 1 From 944dbb340aff5648e975ec201834032b28b44967 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Thu, 15 May 2025 19:32:18 +0000 Subject: [PATCH 2/2] fix str to date coercion --- bigframes/core/compile/scalar_op_compiler.py | 22 +++----------------- bigframes/operations/base.py | 2 +- bigframes/series.py | 7 +++---- 3 files changed, 7 insertions(+), 24 deletions(-) diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index a1cf72be97..12aee94066 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -1928,34 +1928,18 @@ def clip_op( if isinstance(lower, ibis_types.NullScalar) and ( not isinstance(upper, ibis_types.NullScalar) ): - return ( - ibis_api.case() # type: ignore - .when(upper.isnull() | (original > upper), upper) - .else_(original) - .end() - ) + return ibis_api.least(original, upper) elif (not isinstance(lower, ibis_types.NullScalar)) and isinstance( upper, ibis_types.NullScalar ): - return ( - ibis_api.case() # type: ignore - .when(lower.isnull() | (original < lower), lower) - .else_(original) - .end() - ) + return ibis_api.greatest(original, lower) elif isinstance(lower, ibis_types.NullScalar) and ( isinstance(upper, ibis_types.NullScalar) ): return original else: # Note: Pandas has unchanged behavior when upper bound and lower bound are flipped. This implementation requires that lower_bound < upper_bound - return ( - ibis_api.case() # type: ignore - .when(lower.isnull() | (original < lower), lower) - .when(upper.isnull() | (original > upper), upper) - .else_(original) - .end() - ) + return ibis_api.greatest(ibis_api.least(original, upper), lower) # N-ary Operations diff --git a/bigframes/operations/base.py b/bigframes/operations/base.py index dbd50c02db..c316d28321 100644 --- a/bigframes/operations/base.py +++ b/bigframes/operations/base.py @@ -260,7 +260,7 @@ def _align_n( others: typing.Sequence[typing.Union[series.Series, scalars.Scalar]], how="outer", ignore_self=False, - cast_scalars: bool = True, + cast_scalars: bool = False, ) -> tuple[ typing.Sequence[Union[ex.ScalarConstantExpression, ex.DerefOp]], blocks.Block, diff --git a/bigframes/series.py b/bigframes/series.py index 44760d8e20..626cf2fc76 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -1347,9 +1347,7 @@ def items(self): yield item def where(self, cond, other=None): - value_id, cond_id, other_id, block = self._align3( - cond, other, cast_scalars=False - ) + value_id, cond_id, other_id, block = self._align3(cond, other) block, result_id = block.project_expr( ops.where_op.as_expr(value_id, cond_id, other_id) ) @@ -1362,8 +1360,9 @@ def clip(self, lower=None, upper=None): return self._apply_binary_op(upper, ops.minimum_op, alignment="left") if upper is None: return self._apply_binary_op(lower, ops.maximum_op, alignment="left") + # special rule to coerce scalar string args to date value_id, lower_id, upper_id, block = self._align3( - lower, upper, cast_scalars=False + lower, upper, cast_scalars=(bigframes.dtypes.is_date_like(self.dtype)) ) block, result_id = block.project_expr( ops.clip_op.as_expr(value_id, lower_id, upper_id),