8000 chore: define timedelta type and to_timedelta function by sycai · Pull Request #1317 · googleapis/python-bigquery-dataframes · GitHub
[go: up one dir, main page]

Skip to content
1 change: 1 addition & 0 deletions bigframes/core/compile/ibis_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
BIGFRAMES_TO_IBIS: Dict[bigframes.dtypes.Dtype, ibis_dtypes.DataType] = {
pandas: ibis for ibis, pandas in BIDIRECTIONAL_MAPPINGS
}
BIGFRAMES_TO_IBIS.update({bigframes.dtypes.TIMEDETLA_DTYPE: ibis_dtypes.int64})
IBIS_TO_BIGFRAMES: Dict[ibis_dtypes.DataType, bigframes.dtypes.Dtype] = {
ibis: pandas for ibis, pandas in BIDIRECTIONAL_MAPPINGS
}
Expand Down
7 changes: 7 additions & 0 deletions bigframes/core/compile/scalar_op_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1140,6 +1140,13 @@ def to_timestamp_op_impl(x: ibis_types.Value, op: ops.ToTimestampOp):
return x.cast(ibis_dtypes.Timestamp(timezone="UTC"))


@scalar_op_compiler.register_unary_op(ops.ToTimedeltaOp, pass_op=True)
def to_timedelta_op_impl(x: ibis_types.Value, op: ops.ToTimedeltaOp):
return (
typing.cast(ibis_types.NumericValue, x) * UNIT_TO_US_CONVERSION_FACTORS[op.unit] # type: ignore
).floor()


@scalar_op_compiler.register_unary_op(ops.RemoteFunctionOp, pass_op=True)
def remote_function_op_impl(x: ibis_types.Value, op: ops.RemoteFunctionOp):
ibis_node = getattr(op.func, "ibis_node", None)
Expand Down
4 changes: 4 additions & 0 deletions bigframes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
TIME_DTYPE = pd.ArrowDtype(pa.time64("us"))
DATETIME_DTYPE = pd.ArrowDtype(pa.timestamp("us"))
TIMESTAMP_DTYPE = pd.ArrowDtype(pa.timestamp("us", tz="UTC"))
TIMEDETLA_DTYPE = pd.ArrowDtype(pa.duration("us"))
NUMERIC_DTYPE = pd.ArrowDtype(pa.decimal128(38, 9))
BIGNUMERIC_DTYPE = pd.ArrowDtype(pa.decimal256(76, 38))
# No arrow equivalent
Expand Down Expand Up @@ -632,6 +633,9 @@ def convert_to_schema_field(
return google.cloud.bigquery.SchemaField(
name, "RECORD", fields=inner_fields
)
if bigframes_dtype.pyarrow_dtype == pa.duration("us"):
# Timedeltas are represented as integers in microseconds.
return google.cloud.bigquery.SchemaField(name, "INTEGER")
raise ValueError(
f"No arrow conversion for {bigframes_dtype}. {constants.FEEDBACK_LINK}"
)
Expand Down
3 changes: 3 additions & 0 deletions bigframes/operations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@
)
from bigframes.operations.struct_ops import StructFieldOp, StructOp
from bigframes.operations.time_ops import hour_op, minute_op, normalize_op, second_op
from bigframes.operations.timedelta_ops import ToTimedeltaOp

__all__ = [
# Base ops
Expand Down Expand Up @@ -240,6 +241,8 @@
"minute_op",
"second_op",
"normalize_op",
# Timedelta ops
"ToTimedeltaOp",
# Datetime ops
"date_op",
"time_op",
Expand Down
31 changes: 31 additions & 0 deletions bigframes/operations/timedelta_ops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import dataclasses
import typing

from bigframes import dtypes
from bigframes.operations import base_ops


@dataclasses.dataclass(frozen=True)
class ToTimedeltaOp(base_ops.UnaryOp):
name: typing.ClassVar[str] = "to_timedelta"
unit: typing.Literal["us", "ms", "s", "m", "h", "d", "W"]

def output_type(self, *input_types):
if input_types[0] is not dtypes.INT_DTYPE:
raise TypeError("expected integer input")
return dtypes.TIMEDETLA_DTYPE
2 changes: 2 additions & 0 deletions bigframes/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import bigframes.dataframe
import bigframes.enums
import bigframes.functions._utils as bff_utils
from bigframes.pandas.core.api import to_timedelta
from bigframes.pandas.io.api import (
from_glob_path,
read_csv,
Expand Down Expand Up @@ -313,6 +314,7 @@ def reset_session():
"read_pickle",
"remote_function",
"to_datetime",
"to_timedelta",
"from_glob_path",
# pandas dtype attributes
"NA",
Expand Down
13 changes: 13 additions & 0 deletions bigframes/pandas/core/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
17 changes: 17 additions & 0 deletions bigframes/pandas/core/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from bigframes.pandas.core.tools.timedeltas import to_timedelta

__all__ = ["to_timedelta"]
13 changes: 13 additions & 0 deletions bigframes/pandas/core/tools/__init__.py 6D38
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
64 changes: 64 additions & 0 deletions bigframes/pandas/core/tools/timedeltas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import typing

from bigframes_vendored.pandas.core.tools import (
timedeltas as vendored_pandas_timedeltas,
)
import pandas as pd

from bigframes import operations as ops
from bigframes import series


def to_timedelta(
arg: typing.Union[series.Series, str, int, float],
unit: typing.Optional[vendored_pandas_timedeltas.UnitChoices] = None,
) -> typing.Union[series.Series, pd.Timedelta]:
if not isinstance(arg, series.Series):
return pd.to_timedelta(arg, unit)

canonical_unit = "us" if unit is None else _canonicalize_unit(unit)
return arg._apply_unary_op(ops.ToTimedeltaOp(canonical_unit))


to_timedelta.__doc__ = vendored_pandas_timedeltas.to_timedelta.__doc__


def _canonicalize_unit(
unit: vendored_pandas_timedeltas.UnitChoices,
) -> typing.Literal["us", "ms", "s", "m", "h", "d", "W"]:
if unit in {"w", "W"}:
return "W"

if unit in {"D", "d", "days", "day"}:
return "d"

if unit in {"hours", "hour", "hr", "h"}:
return "h"

if unit in {"m", "minute", "min", "minutes"}:
return "m"

if unit in {"s", "seconds", "sec", "second"}:
return "s"

if unit in {"ms", "milliseconds", "millisecond", "milli", "millis"}:
return "ms"

if unit in {"us", "microseconds", "microsecond", "µs", "micro", "micros"}:
return "us"

raise TypeError(f"Unrecognized unit: {unit}")
67 changes: 67 additions & 0 deletions tests/system/small/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

from datetime import datetime
import typing

import pandas as pd
import pytest
Expand Down Expand Up @@ -726,3 +727,69 @@ def test_to_datetime_timestamp_inputs(arg, utc, output_in_utc):
pd.testing.assert_series_equal(
bf_result, pd_result, check_index_type=False, check_names=False
)


@pytest.mark.parametrize(
"unit",
[
"W",
"w",
"D",
"d",
"days",
"day",
"hours",
"hour",
"hr",
"h",
"m",
"minute",
"min",
"minutes",
"s",
"seconds",
"sec",
"second",
"ms",
"milliseconds",
"millisecond",
"milli",
"millis",
"us",
"microseconds",
"microsecond",
"µs",
"micro",
"micros",
],
)
def test_to_timedelta_with_bf_series(session, unit):
bf_series = bpd.Series([1, 2, 3], session=session)
pd_series = pd.Series([1, 2, 3])

actual_result = (
typing.cast(bpd.Series, bpd.to_timedelta(bf_series, unit))
.to_pandas()
.astype("timedelta64[ns]")
)

expected_result = pd.to_timedelta(pd_series, unit)
pd.testing.assert_series_equal(
actual_result, expected_result, check_index_type=False
)


@pytest.mark.parametrize(
"unit",
["Y", "M", "whatever"],
)
def test_to_timedelta_with_bf_series_invalid_unit(session, unit):
bf_series = bpd.Series([1, 2, 3], session=session)

with pytest.raises(TypeError):
bpd.to_timedelta(bf_series, unit)


@pytest.mark.parametrize("input", [1, 1.2, "1s"])
def test_to_timedelta_non_bf_series(input):
assert bpd.to_timedelta(input) == pd.to_timedelta(input)
99 changes: 99 additions & 0 deletions third_party/bigframes_vendored/pandas/core/tools/timedeltas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# Contains code from https://github.com/pandas-dev/pandas/blob/v2.2.3/pandas/core/tools/timedeltas.py

import typing

from bigframes_vendored import constants
import pandas as pd

from bigframes import series

UnitChoices = typing.Literal[
"W",
"w",
"D",
"d",
"days",
"day",
"hours",
"hour",
"hr",
"h",
"m",
"minute",
"min",
"minutes",
"s",
"seconds",
"sec",
"second",
"ms",
"milliseconds",
"millisecond",
"milli",
"millis",
"us",
"microseconds",
"microsecond",
"µs",
"micro",
"micros",
]


def to_timedelta(
arg: typing.Union[series.Series, str, int, float],
unit: typing.Optional[UnitChoices] = None,
) -> typing.Union[series.Series, pd.Timedelta]:
"""
Converts a scalar or Series to a timedelta object.

.. note::
BigQuery only supports precision up to microseconds (us). Therefore, when working
with timedeltas that have a finer granularity than microseconds, be aware that
the additional precision will not be represented in BigQuery.

**Examples:**

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None

Converting a Scalar to timedelta

>>> scalar = 2
>>> bpd.to_timedelta(scalar, unit='s')
Timedelta('0 days 00:00:02')

Converting a Series of integers to a Series of timedeltas

>>> int_series = bpd.Series([1,2,3])
>>> bpd.to_timedelta(int_series, unit='s')
0 0 days 00:00:01
1 0 days 00:00:02
2 0 days 00:00:03
dtype: duration[us][pyarrow]

Args:
arg (int, float, str, Series):
The object to convert to a dataframe
unit (str, default 'us'):
Denotes the unit of the arg for numeric `arg`. Defaults to ``"us"``.

Possible values:

* 'W'
* 'D' / 'days' / 'day'
* 'hours' / 'hour' / 'hr' / 'h' / 'H'
* 'm' / 'minute' / 'min' / 'minutes'
* 's' / 'seconds' / 'sec' / 'second'
* 'ms' / 'milliseconds' / 'millisecond' / 'milli' / 'millis'
* 'us' / 'microseconds' / 'microsecond' / 'micro' / 'micros'

Returns:
Union[pandas.Timedelta, bigframes.pandas.Series]:
Return type depends on input
- Series: Series of duration[us][pyarrow] dtype
- scalar: timedelta

"""

raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
Loading
0