From eca0110bdc3e193fc444b7ed7443308f1253d58f Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Tue, 11 Feb 2025 22:40:39 +0000 Subject: [PATCH 1/8] [WIP] support timestamp_sub --- bigframes/core/compile/scalar_op_compiler.py | 5 +++++ bigframes/core/rewrite/timedeltas.py | 3 +++ bigframes/operations/__init__.py | 7 ++++++- bigframes/operations/numeric_ops.py | 3 +++ bigframes/operations/timedelta_ops.py | 20 ++++++++++++++++++++ 5 files changed, 37 insertions(+), 1 deletion(-) diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index 3e5f10eca4..d5ce6e9e09 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -747,6 +747,11 @@ def timestamp_add_op_impl(x: ibis_types.TimestampValue, y: ibis_types.IntegerVal return x + y.to_interval("us") +@scalar_op_compiler.register_binary_op(ops.timestamp_sub_op) +def timestamp_sub_op_impl(x: ibis_types.TimestampValue, y: ibis_types.IntegerValue): + return x - y.to_interval("us") + + @scalar_op_compiler.register_unary_op(ops.FloorDtOp, pass_op=True) def floor_dt_op_impl(x: ibis_types.Value, op: ops.FloorDtOp): supported_freqs = ["Y", "Q", "M", "W", "D", "h", "min", "s", "ms", "us", "ns"] diff --git a/bigframes/core/rewrite/timedeltas.py b/bigframes/core/rewrite/timedeltas.py index d740b28d7d..64e088cbd4 100644 --- a/bigframes/core/rewrite/timedeltas.py +++ b/bigframes/core/rewrite/timedeltas.py @@ -87,6 +87,9 @@ def _rewrite_sub_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr: if dtypes.is_datetime_like(left.dtype) and dtypes.is_datetime_like(right.dtype): result_op = ops.timestamp_diff_op + if dtypes.is_datetime_like(left.dtype) and right.dtype is dtypes.TIMEDELTA_DTYPE: + result_op = ops.timestamp_sub_op + return _TypedExpr( result_op.as_expr(left.expr, right.expr), result_op.output_type(left.dtype, right.dtype), diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index 88406317fe..21a1171ddc 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -178,7 +178,11 @@ ) from bigframes.operations.struct_ops import StructFieldOp, StructOp from bigframes.operations.time_ops import hour_op, minute_op, normalize_op, second_op -from bigframes.operations.timedelta_ops import timestamp_add_op, ToTimedeltaOp +from bigframes.operations.timedelta_ops import ( + timestamp_add_op, + timestamp_sub_op, + ToTimedeltaOp, +) __all__ = [ # Base ops @@ -251,6 +255,7 @@ "normalize_op", # Timedelta ops "timestamp_add_op", + "timestamp_sub_op", "ToTimedeltaOp", # Datetime ops "date_op", diff --git a/bigframes/operations/numeric_ops.py b/bigframes/operations/numeric_ops.py index 5183e5c4c5..38e3e7a516 100644 --- a/bigframes/operations/numeric_ops.py +++ b/bigframes/operations/numeric_ops.py @@ -150,6 +150,9 @@ def output_type(self, *input_types): if dtypes.is_datetime_like(left_type) and dtypes.is_datetime_like(right_type): return dtypes.TIMEDELTA_DTYPE + + if dtypes.is_datetime_like(left_type) and right_type is dtypes.TIMEDELTA_DTYPE: + return left_type raise TypeError(f"Cannot subtract dtypes {left_type} and {right_type}") diff --git a/bigframes/operations/timedelta_ops.py b/bigframes/operations/timedelta_ops.py index 69e054fa5c..e8d6d7ffaf 100644 --- a/bigframes/operations/timedelta_ops.py +++ b/bigframes/operations/timedelta_ops.py @@ -54,3 +54,23 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT timestamp_add_op = TimestampAdd() + + +@dataclasses.dataclass(frozen=True) +class TimestampSub(base_ops.BinaryOp): + name: typing.ClassVar[str] = "timestamp_sub" + + def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: + # timestamp - timedelta => timestamp + if ( + dtypes.is_datetime_like(input_types[0]) + and input_types[1] is dtypes.TIMEDELTA_DTYPE + ): + return input_types[0] + + raise TypeError( + f"unsupported types for timestamp_add. left: {input_types[0]} right: {input_types[1]}" + ) + + +timestamp_sub_op = TimestampSub() From f8dc1e5467aaa307f3874f2e2c73ca5f4014de0a Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Wed, 12 Feb 2025 18:13:30 +0000 Subject: [PATCH 2/8] add timestamp_sub tests --- .../small/operations/test_timedeltas.py | 91 +++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/tests/system/small/operations/test_timedeltas.py b/tests/system/small/operations/test_timedeltas.py index 6c44a62686..86eabc908f 100644 --- a/tests/system/small/operations/test_timedeltas.py +++ b/tests/system/small/operations/test_timedeltas.py @@ -164,3 +164,94 @@ def test_timestamp_add_dataframes(temporal_dfs): pandas.testing.assert_frame_equal( actual_result, expected_result, check_index_type=False ) + + +@pytest.mark.parametrize( + ("column", "pd_dtype"), + [ + ("datetime_col", " Date: Wed, 12 Feb 2025 18:17:21 +0000 Subject: [PATCH 3/8] fix format --- bigframes/operations/numeric_ops.py | 2 +- .../small/operations/test_timedeltas.py | 21 ++++++++----------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/bigframes/operations/numeric_ops.py b/bigframes/operations/numeric_ops.py index 38e3e7a516..61544984fb 100644 --- a/bigframes/operations/numeric_ops.py +++ b/bigframes/operations/numeric_ops.py @@ -150,7 +150,7 @@ def output_type(self, *input_types): if dtypes.is_datetime_like(left_type) and dtypes.is_datetime_like(right_type): return dtypes.TIMEDELTA_DTYPE - + if dtypes.is_datetime_like(left_type) and right_type is dtypes.TIMEDELTA_DTYPE: return left_type diff --git a/tests/system/small/operations/test_timedeltas.py b/tests/system/small/operations/test_timedeltas.py index 86eabc908f..e1992d2cc8 100644 --- a/tests/system/small/operations/test_timedeltas.py +++ b/tests/system/small/operations/test_timedeltas.py @@ -195,11 +195,9 @@ def test_timestamp_sub__ts_series_minus_td_series(temporal_dfs, column, pd_dtype ) def test_timestamp_sub__ts_series_minus_td_literal(temporal_dfs, column, pd_dtype): bf_df, pd_df = temporal_dfs - literal = pd.Timedelta(1, 'h') + literal = pd.Timedelta(1, "h") - actual_result = ( - (bf_df[column] - literal).to_pandas().astype(pd_dtype) - ) + actual_result = (bf_df[column] - literal).to_pandas().astype(pd_dtype) expected_result = pd_df[column] - literal pandas.testing.assert_series_equal( @@ -209,13 +207,11 @@ def test_timestamp_sub__ts_series_minus_td_literal(temporal_dfs, column, pd_dtyp def test_timestamp_sub__ts_literal_minus_td_series(temporal_dfs): bf_df, pd_df = temporal_dfs - literal = pd.Timestamp('2025-01-01 01:00:00') + literal = pd.Timestamp("2025-01-01 01:00:00") - actual_result = ( - (literal - bf_df['timedelta_col']).to_pandas().astype(" Date: Thu, 13 Feb 2025 17:53:25 +0000 Subject: [PATCH 4/8] fix format --- bigframes/core/rewrite/timedeltas.py | 1 - .../small/operations/test_timedeltas.py | 19 +++++-------------- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/bigframes/core/rewrite/timedeltas.py b/bigframes/core/rewrite/timedeltas.py index d07b87007e..db3a426635 100644 --- a/bigframes/core/rewrite/timedeltas.py +++ b/bigframes/core/rewrite/timedeltas.py @@ -110,7 +110,6 @@ def _rewrite_sub_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr: if dtypes.is_datetime_like(left.dtype) and dtypes.is_datetime_like(right.dtype): return _TypedExpr.create_op_expr(ops.timestamp_diff_op, left, right) - if dtypes.is_datetime_like(left.dtype) and right.dtype is dtypes.TIMEDELTA_DTYPE: return _TypedExpr.create_op_expr(ops.timestamp_sub_op, left, right) diff --git a/tests/system/small/operations/test_timedeltas.py b/tests/system/small/operations/test_timedeltas.py index e21ca94731..acf045aab5 100644 --- a/tests/system/small/operations/test_timedeltas.py +++ b/tests/system/small/operations/test_timedeltas.py @@ -196,9 +196,6 @@ def test_timestamp_sub__ts_series_minus_td_series(temporal_dfs, column, pd_dtype pandas.testing.assert_series_equal( actual_result, expected_result, check_index_type=False ) - - - @pytest.mark.parametrize( @@ -220,7 +217,6 @@ def test_timestamp_sub__ts_series_minus_td_literal(temporal_dfs, column, pd_dtyp ) - def test_timestamp_sub__ts_literal_minus_td_series(temporal_dfs): bf_df, pd_df = temporal_dfs literal = pd.Timestamp("2025-01-01 01:00:00") @@ -253,7 +249,6 @@ def test_timestamp_sub_with_numpy_op(temporal_dfs, column, pd_dtype): ) - def test_timestamp_sub_dataframes(temporal_dfs): columns = ["datetime_col", "timestamp_col"] timedelta = pd.Timedelta(1, unit="s") @@ -269,8 +264,8 @@ def test_timestamp_sub_dataframes(temporal_dfs): pandas.testing.assert_frame_equal( actual_result, expected_result, check_index_type=False ) - - + + @pytest.mark.parametrize( "compare_func", [ @@ -296,8 +291,7 @@ def test_timedelta_series_comparison(temporal_dfs, compare_func): actual_result, expected_result, check_index_type=False ) - - + @pytest.mark.parametrize( "compare_func", [ @@ -319,9 +313,7 @@ def test_timedelta_series_and_literal_comparison(temporal_dfs, compare_func): pandas.testing.assert_series_equal( actual_result, expected_result, check_index_type=False ) - - - + def test_timedelta_filtering(session): pd_series = pd.Series( @@ -341,11 +333,10 @@ def test_timedelta_filtering(session): ) expected_result = pd_series[(pd_series - timestamp) > pd.Timedelta(1, "h")] - pandas.testing.assert_series_equal( actual_result, expected_result, check_index_type=False ) - + def test_timedelta_ordering(session): pd_df = pd.DataFrame( From 15c9e9a36213c4a4a59ae54fba39857eb613c4bd Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Thu, 13 Feb 2025 17:55:23 +0000 Subject: [PATCH 5/8] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20po?= =?UTF-8?q?st-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- bigframes/core/rewrite/timedeltas.py | 1 - .../system/small/operations/test_timedeltas.py | 18 +++++------------- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/bigframes/core/rewrite/timedeltas.py b/bigframes/core/rewrite/timedeltas.py index d07b87007e..db3a426635 100644 --- a/bigframes/core/rewrite/timedeltas.py +++ b/bigframes/core/rewrite/timedeltas.py @@ -110,7 +110,6 @@ def _rewrite_sub_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr: if dtypes.is_datetime_like(left.dtype) and dtypes.is_datetime_like(right.dtype): return _TypedExpr.create_op_expr(ops.timestamp_diff_op, left, right) - if dtypes.is_datetime_like(left.dtype) and right.dtype is dtypes.TIMEDELTA_DTYPE: return _TypedExpr.create_op_expr(ops.timestamp_sub_op, left, right) diff --git a/tests/system/small/operations/test_timedeltas.py b/tests/system/small/operations/test_timedeltas.py index e21ca94731..1e7ce223c5 100644 --- a/tests/system/small/operations/test_timedeltas.py +++ b/tests/system/small/operations/test_timedeltas.py @@ -196,9 +196,6 @@ def test_timestamp_sub__ts_series_minus_td_series(temporal_dfs, column, pd_dtype pandas.testing.assert_series_equal( actual_result, expected_result, check_index_type=False ) - - - @pytest.mark.parametrize( @@ -220,7 +217,6 @@ def test_timestamp_sub__ts_series_minus_td_literal(temporal_dfs, column, pd_dtyp ) - def test_timestamp_sub__ts_literal_minus_td_series(temporal_dfs): bf_df, pd_df = temporal_dfs literal = pd.Timestamp("2025-01-01 01:00:00") @@ -253,7 +249,6 @@ def test_timestamp_sub_with_numpy_op(temporal_dfs, column, pd_dtype): ) - def test_timestamp_sub_dataframes(temporal_dfs): columns = ["datetime_col", "timestamp_col"] timedelta = pd.Timedelta(1, unit="s") @@ -269,8 +264,8 @@ def test_timestamp_sub_dataframes(temporal_dfs): pandas.testing.assert_frame_equal( actual_result, expected_result, check_index_type=False ) - - + + @pytest.mark.parametrize( "compare_func", [ @@ -296,8 +291,7 @@ def test_timedelta_series_comparison(temporal_dfs, compare_func): actual_result, expected_result, check_index_type=False ) - - + @pytest.mark.parametrize( "compare_func", [ @@ -319,9 +313,7 @@ def test_timedelta_series_and_literal_comparison(temporal_dfs, compare_func): pandas.testing.assert_series_equal( actual_result, expected_result, check_index_type=False ) - - - + def test_timedelta_filtering(session): pd_series = pd.Series( @@ -345,7 +337,7 @@ def test_timedelta_filtering(session): pandas.testing.assert_series_equal( actual_result, expected_result, check_index_type=False ) - + def test_timedelta_ordering(session): pd_df = pd.DataFrame( From 40eb5e623f4b7796cdc85d1250c76fe5eac65153 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Thu, 13 Feb 2025 21:20:15 +0000 Subject: [PATCH 6/8] fix error message --- bigframes/operations/timedelta_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/operations/timedelta_ops.py b/bigframes/operations/timedelta_ops.py index e8d6d7ffaf..3d3c3bfeeb 100644 --- a/bigframes/operations/timedelta_ops.py +++ b/bigframes/operations/timedelta_ops.py @@ -69,7 +69,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT return input_types[0] raise TypeError( - f"unsupported types for timestamp_add. left: {input_types[0]} right: {input_types[1]}" + f"unsupported types for timestamp_sub. left: {input_types[0]} right: {input_types[1]}" ) From 1fc78583224b74a72ea0932611b6519e3d558ae4 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Thu, 13 Feb 2025 22:25:13 +0000 Subject: [PATCH 7/8] fix column references in tests --- tests/system/small/operations/test_timedeltas.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/system/small/operations/test_timedeltas.py b/tests/system/small/operations/test_timedeltas.py index acf045aab5..cf2215e737 100644 --- a/tests/system/small/operations/test_timedeltas.py +++ b/tests/system/small/operations/test_timedeltas.py @@ -189,10 +189,10 @@ def test_timestamp_sub__ts_series_minus_td_series(temporal_dfs, column, pd_dtype bf_df, pd_df = temporal_dfs actual_result = ( - (bf_df[column] - bf_df["timedelta_col"]).to_pandas().astype(pd_dtype) + (bf_df[column] - bf_df["timedelta_col_1"]).to_pandas().astype(pd_dtype) ) - expected_result = pd_df[column] - pd_df["timedelta_col"] + expected_result = pd_df[column] - pd_df["timedelta_col_1"] pandas.testing.assert_series_equal( actual_result, expected_result, check_index_type=False ) @@ -221,9 +221,9 @@ def test_timestamp_sub__ts_literal_minus_td_series(temporal_dfs): bf_df, pd_df = temporal_dfs literal = pd.Timestamp("2025-01-01 01:00:00") - actual_result = (literal - bf_df["timedelta_col"]).to_pandas().astype(" Date: Thu, 13 Feb 2025 22:27:32 +0000 Subject: [PATCH 8/8] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20po?= =?UTF-8?q?st-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- tests/system/small/operations/test_timedeltas.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/system/small/operations/test_timedeltas.py b/tests/system/small/operations/test_timedeltas.py index cf2215e737..9dc889f619 100644 --- a/tests/system/small/operations/test_timedeltas.py +++ b/tests/system/small/operations/test_timedeltas.py @@ -240,7 +240,9 @@ def test_timestamp_sub_with_numpy_op(temporal_dfs, column, pd_dtype): bf_df, pd_df = temporal_dfs actual_result = ( - np.subtract(bf_df[column], bf_df["timedelta_col_1"]).to_pandas().astype(pd_dtype) + np.subtract(bf_df[column], bf_df["timedelta_col_1"]) + .to_pandas() + .astype(pd_dtype) ) expected_result = np.subtract(pd_df[column], pd_df["timedelta_col_1"])