diff --git a/bigframes/core/compile/aggregate_compiler.py b/bigframes/core/compile/aggregate_compiler.py index a17b69815c..edf1e14b3a 100644 --- a/bigframes/core/compile/aggregate_compiler.py +++ b/bigframes/core/compile/aggregate_compiler.py @@ -26,6 +26,7 @@ import bigframes_vendored.ibis.expr.types as ibis_types import pandas as pd +from bigframes.core.compile import constants as compiler_constants import bigframes.core.compile.ibis_types as compile_ibis_types import bigframes.core.compile.scalar_op_compiler as scalar_compilers import bigframes.core.expression as ex @@ -575,6 +576,30 @@ def _( return original_column.delta(shifted_column, part="microsecond") +@compile_unary_agg.register +def _( + op: agg_ops.DateSeriesDiffOp, + column: ibis_types.Column, + window=None, +) -> ibis_types.Value: + if not column.type().is_date(): + raise TypeError(f"Cannot perform date series diff on type{column.type()}") + + original_column = cast(ibis_types.DateColumn, column) + shifted_column = cast( + ibis_types.DateColumn, + compile_unary_agg(agg_ops.ShiftOp(op.periods), column, window), + ) + + conversion_factor = typing.cast( + ibis_types.IntegerValue, compiler_constants.UNIT_TO_US_CONVERSION_FACTORS["D"] + ) + + return ( + original_column.delta(shifted_column, part="day") * conversion_factor + ).floor() + + @compile_unary_agg.register def _( op: agg_ops.AllOp, diff --git a/bigframes/core/compile/constants.py b/bigframes/core/compile/constants.py new file mode 100644 index 0000000000..9c307125ab --- /dev/null +++ b/bigframes/core/compile/constants.py @@ -0,0 +1,27 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Datetime constants +UNIT_TO_US_CONVERSION_FACTORS = { + "W": 7 * 24 * 60 * 60 * 1000 * 1000, + "d": 24 * 60 * 60 * 1000 * 1000, + "D": 24 * 60 * 60 * 1000 * 1000, + "h": 60 * 60 * 1000 * 1000, + "m": 60 * 1000 * 1000, + "s": 1000 * 1000, + "ms": 1000, + "us": 1, + "ns": 1e-3, +} diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index 7111406646..9af6a5c0b9 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -26,6 +26,7 @@ import numpy as np import pandas as pd +from bigframes.core.compile.constants import UNIT_TO_US_CONVERSION_FACTORS import bigframes.core.compile.default_ordering import bigframes.core.compile.ibis_types import bigframes.core.expression as ex @@ -50,19 +51,6 @@ ) _OBJ_REF_IBIS_DTYPE = ibis_dtypes.Struct.from_tuples(_OBJ_REF_STRUCT_SCHEMA) # type: ignore -# Datetime constants -UNIT_TO_US_CONVERSION_FACTORS = { - "W": 7 * 24 * 60 * 60 * 1000 * 1000, - "d": 24 * 60 * 60 * 1000 * 1000, - "D": 24 * 60 * 60 * 1000 * 1000, - "h": 60 * 60 * 1000 * 1000, - "m": 60 * 1000 * 1000, - "s": 1000 * 1000, - "ms": 1000, - "us": 1, - "ns": 1e-3, -} - class ScalarOpCompiler: # Mapping of operation name to implemenations diff --git a/bigframes/core/rewrite/timedeltas.py b/bigframes/core/rewrite/timedeltas.py index e21e0b6bf2..345a57ab89 100644 --- a/bigframes/core/rewrite/timedeltas.py +++ b/bigframes/core/rewrite/timedeltas.py @@ -215,10 +215,15 @@ def _rewrite_aggregation( else: input_type = aggregation.arg.dtype - if isinstance(aggregation.op, aggs.DiffOp) and dtypes.is_datetime_like(input_type): - return ex.UnaryAggregation( - aggs.TimeSeriesDiffOp(aggregation.op.periods), aggregation.arg - ) + if isinstance(aggregation.op, aggs.DiffOp): + if dtypes.is_datetime_like(input_type): + return ex.UnaryAggregation( + aggs.TimeSeriesDiffOp(aggregation.op.periods), aggregation.arg + ) + elif input_type == dtypes.DATE_DTYPE: + return ex.UnaryAggregation( + aggs.DateSeriesDiffOp(aggregation.op.periods), aggregation.arg + ) if isinstance(aggregation.op, aggs.StdOp) and input_type is dtypes.TIMEDELTA_DTYPE: return ex.UnaryAggregation( diff --git a/bigframes/operations/aggregations.py b/bigframes/operations/aggregations.py index bf6016bb2e..a714f5804c 100644 --- a/bigframes/operations/aggregations.py +++ b/bigframes/operations/aggregations.py @@ -500,7 +500,7 @@ def skips_nulls(self): return False def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: - if dtypes.is_datetime_like(input_types[0]): + if dtypes.is_date_like(input_types[0]): return dtypes.TIMEDELTA_DTYPE return super().output_type(*input_types) @@ -519,6 +519,20 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT raise TypeError(f"expect datetime-like types, but got {input_types[0]}") +@dataclasses.dataclass(frozen=True) +class DateSeriesDiffOp(UnaryWindowOp): + periods: int + + @property + def skips_nulls(self): + return False + + def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: + if input_types[0] == dtypes.DATE_DTYPE: + return dtypes.TIMEDELTA_DTYPE + raise TypeError(f"expect date type, but got {input_types[0]}") + + @dataclasses.dataclass(frozen=True) class AllOp(UnaryAggregateOp): name: ClassVar[str] = "all" diff --git a/tests/system/small/operations/test_dates.py b/tests/system/small/operations/test_dates.py new file mode 100644 index 0000000000..f957879d8b --- /dev/null +++ b/tests/system/small/operations/test_dates.py @@ -0,0 +1,28 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pandas.testing + +from bigframes import dtypes + + +def test_date_series_diff_agg(scalars_dfs): + bf_df, pd_df = scalars_dfs + + actual_result = bf_df["date_col"].diff().to_pandas() + + expected_result = pd_df["date_col"].diff().astype(dtypes.TIMEDELTA_DTYPE) + pandas.testing.assert_series_equal( + actual_result, expected_result, check_index_type=False + )