8000 feat: support subtraction in DATETIME/TIMESTAMP columns with timedelta columns by sycai · Pull Request #1390 · googleapis/python-bigquery-dataframes · GitHub
[go: up one dir, main page]

Skip to content
Merged
5 changes: 5 additions & 0 deletions bigframes/core/compile/scalar_op_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,11 @@ def timestamp_add_op_impl(x: ibis_types.TimestampValue, y: ibis_types.IntegerVal
return x + y.to_interval("us")


@scalar_op_compiler.register_binary_op(ops.timestamp_sub_op)
def timestamp_sub_op_impl(x: ibis_types.TimestampValue, y: ibis_types.IntegerValue):
return x - y.to_interval("us")


@scalar_op_compiler.register_unary_op(ops.FloorDtOp, pass_op=True)
def floor_dt_op_impl(x: ibis_types.Value, op: ops.FloorDtOp):
supported_freqs = ["Y", "Q", "M", "W", "D", "h", "min", "s", "ms", "us", "ns"]
Expand Down
3 changes: 3 additions & 0 deletions bigframes/core/rewrite/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,9 @@ def _rewrite_sub_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr:
if dtypes.is_datetime_like(left.dtype) and dtypes.is_datetime_like(right.dtype):
return _TypedExpr.create_op_expr(ops.timestamp_diff_op, left, right)

if dtypes.is_datetime_like(left.dtype) and right.dtype is dtypes.TIMEDELTA_DTYPE:
return _TypedExpr.create_op_expr(ops.timestamp_sub_op, left, right)

return _TypedExpr.create_op_expr(ops.sub_op, left, right)


Expand Down
7 changes: 6 additions & 1 deletion bigframes/operations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,11 @@
)
from bigframes.operations.struct_ops import StructFieldOp, StructOp
from bigframes.operations.time_ops import hour_op, minute_op, normalize_op, second_op
from bigframes.operations.timedelta_ops import timestamp_add_op, ToTimedeltaOp
from bigframes.operations.timedelta_ops import (
timestamp_add_op,
timestamp_sub_op,
ToTimedeltaOp,
)

__all__ = [
# Base ops
Expand Down Expand Up @@ -251,6 +255,7 @@
"normalize_op",
# Timedelta ops
"timestamp_add_op",
"timestamp_sub_op",
"ToTimedeltaOp",
# Datetime ops
"date_op",
Expand Down
3 changes: 3 additions & 0 deletions bigframes/operations/numeric_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,9 @@ def output_type(self, *input_types):
if dtypes.is_datetime_like(left_type) and dtypes.is_datetime_like(right_type):
return dtypes.TIMEDELTA_DTYPE

if dtypes.is_datetime_like(left_type) and right_type is dtypes.TIMEDELTA_DTYPE:
return left_type

raise TypeError(f"Cannot subtract dtypes {left_type} and {right_type}")


Expand Down
20 changes: 20 additions & 0 deletions bigframes/operations/timedelta_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,23 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT


timestamp_add_op = TimestampAdd()


@dataclasses.dataclass(frozen=True)
class TimestampSub(base_ops.BinaryOp):
name: typing.ClassVar[str] = "timestamp_sub"

def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
# timestamp - timedelta => timestamp
if (
dtypes.is_datetime_like(input_types[0])
and input_types[1] is dtypes.TIMEDELTA_DTYPE
):
return input_types[0]

raise TypeError(
f"unsupported types for timestamp_sub. left: {input_types[0]} right: {input_types[1]}"
)


timestamp_sub_op = TimestampSub()
90 changes: 90 additions & 0 deletions tests/system/small/operations/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,96 @@ def test_timestamp_add_dataframes(temporal_dfs):
)


@pytest.mark.parametrize(
("column", "pd_dtype"),
[
("datetime_col", "<M8[ns]"),
("timestamp_col", "datetime64[ns, UTC]"),
],
)
def test_timestamp_sub__ts_series_minus_td_series(temporal_dfs, column, pd_dtype):
bf_df, pd_df = temporal_dfs

actual_result = (
(bf_df[column] - bf_df["timedelta_col_1"]).to_pandas().astype(pd_dtype)
)

expected_result = pd_df[column] - pd_df["timedelta_col_1"]
pandas.testing.assert_series_equal(
actual_result, expected_result, check_index_type=False
)


@pytest.mark.parametrize(
("column", "pd_dtype"),
[
("datetime_col", "<M8[ns]"),
("timestamp_col", "datetime64[ns, UTC]"),
],
)
def test_timestamp_sub__ts_series_minus_td_literal(temporal_dfs, column, pd_dtype):
bf_df, pd_df = temporal_dfs
literal = pd.Timedelta(1, "h")

actual_result = (bf_df[column] - literal).to_pandas().astype(pd_dtype)

expected_result = pd_df[column] - literal
pandas.testing.assert_series_equal(
actual_result, expected_result, check_index_type=False
)


def test_timestamp_sub__ts_literal_minus_td_series(temporal_dfs):
bf_df, pd_df = temporal_dfs
literal = pd.Timestamp("2025-01-01 01:00:00")

actual_result = (literal - bf_df["timedelta_col_1"]).to_pandas().astype("<M8[ns]")

expected_result = literal - pd_df["timedelta_col_1"]
pandas.testing.assert_series_equal(
actual_result, expected_result, check_index_type=False
)


@pytest.mark.parametrize(
("column", "pd_dtype"),
[
("datetime_col", "<M8[ns]"),
("timestamp_col", "datetime64[ns, UTC]"),
],
)
def test_timestamp_sub_with_numpy_op(temporal_dfs, column, pd_dtype):
bf_df, pd_df = temporal_dfs

actual_result = (
np.subtract(bf_df[column], bf_df["timedelta_col_1"])
.to_pandas()
.astype(pd_dtype)
)

expected_result = np.subtract(pd_df[column], pd_df["timedelta_col_1"])
pandas.testing.assert_series_equal(
actual_result, expected_result, check_index_type=False
)


def test_timestamp_sub_dataframes(temporal_dfs):
columns = ["datetime_col", "timestamp_col"]
timedelta = pd.Timedelta(1, unit="s")
bf_df, pd_df = temporal_dfs

actual_result = (bf_df[columns] - timedelta).to_pandas()
actual_result["datetime_col"] = actual_result["datetime_col"].astype("<M8[ns]")
actual_result["timestamp_col"] = actual_result["timestamp_col"].astype(
"datetime64[ns, UTC]"
)

expected_result = pd_df[columns] - timedelta
pandas.testing.assert_frame_equal(
actual_result, expected_result, check_index_type=False
)


@pytest.mark.parametrize(
"compare_func",
[
Expand Down
0