8000 BUG: Fix a bug in 'timedelta_range' that produced an extra point on a edge case (fix #30353) by hasB4K · Pull Request #33498 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

BUG: Fix a bug in 'timedelta_range' that produced an extra point on a edge case (fix #30353) #33498

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
May 9, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
CLN: fix review
  • Loading branch information
hasB4K committed May 9, 2020
commit c0bbbc509f40853b53e7233681e6b66ca93a185a
14 changes: 7 additions & 7 deletions pandas/core/arrays/_ranges.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from pandas.tseries.offsets import DateOffset


def generate_time_range(
def generate_regular_range(
start: Union[Timestamp, Timedelta],
end: Union[Timestamp, Timedelta],
periods: int,
Expand All @@ -25,18 +25,18 @@ def generate_time_range(
Parameters
----------
start : Timedelta, Timestamp or None
first point of produced date range
First point of produced date range.
end : Timedelta, Timestamp or None
last point of produced date range
Last point of produced date range.
periods : int
number of periods in produced date range
Number of periods in produced date range.
freq : DateOffset
describes space between dates in produced date range
Describes space between dates in produced date range.
It should be an instance of Tick.

Returns
-------
ndarray[np.int64]
Representing nanosecond unix timestamps.
ndarray[np.int64] Representing nanoseconds.
"""
start = start.value if start is not None else None
end = end.value if end is not None else None
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@

from pandas.core.algorithms import checked_add_with_arr
from pandas.core.arrays import datetimelike as dtl
from pandas.core.arrays._ranges import generate_time_range
from pandas.core.arrays._ranges import generate_regular_range
import pandas.core.common as com

from pandas.tseries.frequencies import get_period_alias, to_offset
Expand Down Expand Up @@ -398,7 +398,7 @@ def _generate_range(
end = end.tz_localize(None)

if isinstance(freq, Tick):
values = generate_time_range(start, end, periods, freq)
values = generate_regular_range(start, end, periods, freq)
else:
xdr = generate_range(start=start, end=end, periods=periods, offset=freq)
values = np.array([x.value for x in xdr], dtype=np.int64)
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from pandas.core import nanops
from pandas.core.algorithms import checked_add_with_arr
from pandas.core.arrays import datetimelike as dtl
from pandas.core.arrays._ranges import generate_time_range
from pandas.core.arrays._ranges import generate_regular_range
import pandas.core.common as com
from pandas.core.construction import extract_array
from pandas.core.ops.common import unpack_zerodim_and_defer
Expand Down Expand Up @@ -259,7 +259,7 @@ def _generate_range(cls, start, end, periods, freq, closed=None):
left_closed, right_closed = dtl.validate_endpoints(closed)

if freq is not None:
index = generate_time_range(start, end, periods, freq)
index = generate_regular_range(start, end, periods, freq)
else:
index = np.linspace(start.value, end.value, periods).astype("i8")
if len(index) >= 2:
Expand Down
32 changes: 31 additions & 1 deletion pandas/tests/indexes/timedeltas/test_timedelta_range.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
import pytest

from pandas import timedelta_range, to_timedelta
from pandas import Timedelta, Timestamp, date_range, timedelta_range, to_timedelta
import pandas._testing as tm

from pandas.tseries.offsets import Day, Second
Expand Down Expand Up @@ -61,3 +61,33 @@ def test_errors(self):
# too many params
with pytest.raises(ValueError, match=msg):
timedelta_range(start="0 days", end="5 days", periods=10, freq="H")

@pytest.mark.parametrize(
"start, end, freq",
[
("1D", "10D", "2D"),
("2D&q 8000 uot;, "30D", "3D"),
("2s", "50s", "5s"),
# tests that worked before GH 33498:
("4D", "16D", "3D"),
("8D", "16D", "40s"),
],
)
def test_timedelta_range_freq_divide_end(self, start, end, freq):
# GH 33498 only the cases where `(end % freq) == 0` used to fail

def mock_timedelta_range(start=None, end=None, **kwargs):
epoch = Timestamp(0)
if start is not None:
start = epoch + Timedelta(start)
if end is not None:
end = epoch + Timedelta(end)
result = date_range(start=start, end=end, **kwargs) - epoch
result.freq = freq
return result

res = timedelta_range(start=start, end=end, freq=freq)
exp = mock_timedelta_range(start=start, end=end, freq=freq)

tm.assert_index_equal(res, exp)
assert res.freq == exp.freq
31 changes: 1 addition & 30 deletions pandas/tests/resample/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,34 +149,5 @@ def test_resample_timedelta_edge_case(start, end, freq, resample_freq):
result = s.resample(resample_freq).min()
expected_index = pd.timedelta_range(freq=resample_freq, start=start, end=end)
tm.assert_index_equal(result.index, expected_index)
assert result.index.freq == expected_index.freq
assert not np.isnan(result[-1])


@pytest.mark.parametrize(
"start, end, freq",
[
("1D", "10D", "2D"),
("2D", "30D", "3D"),
("2s", "50s", "5s"),
# tests that worked before GH 33498:
("4D", "16D", "3D"),
("8D", "16D", "40s"),
],
)
def test_timedelta_range_freq_divide_end(start, end, freq):
# GH 33498 only the cases where `(end % freq) == 0` used to fail

def mock_timedelta_range(start=None, end=None, **kwargs):
epoch = pd.Timestamp(0)
if start is not None:
start = epoch + pd.Timedelta(start)
if end is not None:
end = epoch + pd.Timedelta(end)
result = pd.date_range(start=start, end=end, **kwargs) - epoch
result.freq = freq
return result

res = pd.timedelta_range(start=start, end=end, freq=freq)
exp = mock_timedelta_range(start=start, end=end, freq=freq)

tm.assert_index_equal(res, exp)
0