8000 ENH: make "closed" part of IntervalDtype by jbrockmendel · Pull Request #38394 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

ENH: make "closed" part of IntervalDtype #38394

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 24 commits into from
Jan 10, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
747926e
ENH: make closed part of IntervalDtype
jbrockmendel Dec 7, 2020
25d5925
TST: raise on mismatched closed
jbrockmendel Nov 21, 2020
15613e6
typo fixup
jbrockmendel Nov 21, 2020
7bd2db6
pickle
jbrockmendel Dec 8, 2020
10c0225
warn on deprcated
jbrockmendel Dec 9, 2020
2a82a78
test for warnings
jbrockmendel Dec 9, 2020
1b93617
Merge branch 'master' of https://github.com/pandas-dev/pandas into en…
jbrockmendel Dec 9, 2020
f2bb5a1
Merge branch 'master' of https://github.com/pandas-dev/pandas into en…
jbrockmendel Dec 10, 2020
ded6c31
update doctests
jbrockmendel Dec 10, 2020
9816690
remove now-unnecessary _closed attr
jbrockmendel Dec 10, 2020
aae7d84
catch warnings
jbrockmendel Dec 11, 2020
b262bdc
Merge branch 'master' of https://github.com/pandas-dev/pandas into en…
jbrockmendel Dec 11, 2020
c2efb79
whatnsew
jbrockmendel Dec 11, 2020
5ef58db
Merge branch 'master' of https://github.com/pandas-dev/pandas into en…
jbrockmendel Dec 13, 2020
bf86746
test for closed mismatch
jbrockmendel Dec 14, 2020
70c7de6
test for mismatch
jbrockmendel Dec 14, 2020
517c8f1
comment
jbrockmendel Dec 14, 2020
922ce1a
mypy fixup
jbrockmendel Dec 14, 2020
82ee698
Merge branch 'master' of https://github.com/pandas-dev/pandas into en…
jbrockmendel Dec 16, 2020
e783761
Merge branch 'master' of https://github.com/pandas-dev/pandas into en…
jbrockmendel Jan 8, 2021
6e47156
Merge branch 'master' of https://github.com/pandas-dev/pandas into en…
jbrockmendel Jan 8, 2021
367feee
API: allow closed=None in IntervalDtype
jbrockmendel Jan 8, 2021
b10b0bf
revert deprecation in whatsnew
jbrockmendel Jan 8, 2021
370a843
revert warning-catching
jbrockmendel Jan 8, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10000
Prev Previous commit
Next Next commit
warn on deprcated
  • Loading branch information
jbrockmendel committed Dec 9, 2020
commit 10c0225c508647ed7942c2f899426cdcd1217f1f
30 changes: 28 additions & 2 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
Union,
cast,
)
import warnings

import numpy as np
import pytz
Expand Down Expand Up @@ -1048,12 +1049,20 @@ def __new__(cls, subtype=None, closed: Optional[str_type] = None):
if m is not None:
gd = m.groupdict()
subtype = gd["subtype"]
if "closed" in gd:
if gd.get("closed", None) is not None:
closed = gd["closed"]
elif closed is not None:
# user passed eg. IntervalDtype("interval[int64]", "left")
pass
else:
warnings.warn(
"Constructing an IntervalDtype from a string without "
"specifying 'closed' is deprecated and will raise in "
"a future version. "
f"Use e.g. 'interval[{subtype}, left]'. "
"Defaulting to closed='right'.",
FutureWarning,
)
# default to "right"
closed = "right"

Expand All @@ -1070,7 +1079,17 @@ def __new__(cls, subtype=None, closed: Optional[str_type] = None):
)
raise TypeError(msg)

closed = closed or "right"
if closed is None and subtype is not None:
< 10000 div id="r541974369" class="timeline-comment-group js-minimizable-comment-group js-targetable-element my-0 comment previewable-edit js-task-list-container js-comment review-comment js-minimize-container unminimized-comment">
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if we have this one do we need to warn on L1058? (e.g. which one is getting hit)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you respond to this

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

they are both hit separately

warnings.warn(
"Constructing an IntervalDtype without "
"specifying 'closed' is deprecated and will raise in "
"a future version. "
"Use e.g. IntervalDtype(np.int64, 'left'). "
"Defaulting to closed='right'.",
FutureWarning,
)
closed = "right"

key = str(subtype) + str(closed)
try:
return cls._cache[key]
Expand Down Expand Up @@ -1162,6 +1181,13 @@ def __setstate__(self, state):
self._subtype = state["subtype"]
# backward-compat older pickles won't have "closed" key
self._closed = state.pop("closed", None)
if self._closed is None:
warnings.warn(
"Unpickled legacy IntervalDtype does not specify 'closed' "
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

test for this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we have tests for all of the newly-added warning/raising cases in the IntervalDtype constructor. (just added a missing consistency check https://github.com/pandas-dev/pandas/pull/38394/files#diff-f99ef42afad339d00e36197a60ccc76d74c6a94c30e05aef69d18e0ec4b10d4eR1055)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

woops, dont have a test for this one. will update.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

huh, this is definitely reached, but tm.assert_produces_warning isnt seeing anything

"attribute. Set dtype._closed to one of 'left', 'right', 'both', "
"'neither' before using this IntervalDtype object.",
UserWarning,
)

@classmethod
def is_dtype(cls, dtype: object) -> bool:
Expand Down
63 changes: 38 additions & 25 deletions pandas/tests/dtypes/test_dtypes.py
< 57AE /tr>
Original file line number Diff line number Diff line change
Expand Up @@ -516,11 +516,11 @@ def dtype(self):
"""
Class level fixture of dtype for TestIntervalDtype
"""
return IntervalDtype("int64")
return IntervalDtype("int64", "right")

def test_hash_vs_equality(self, dtype):
# make sure that we satisfy is semantics
dtype2 = IntervalDtype("int64")
dtype2 = IntervalDtype("int64", "right")
dtype3 = IntervalDtype(dtype2)
assert dtype == dtype2
assert dtype2 == dtype
Expand Down Expand Up @@ -548,7 +548,7 @@ def test_hash_vs_equality(self, dtype):
"subtype", ["interval[int64]", "Interval[int64]", "int64", np.dtype("int64")]
)
def test_construction(self, subtype):
i = IntervalDtype(subtype)
i = IntervalDtype(subtype, closed="right")
assert i.subtype == np.dtype("int64")
assert is_interval_dtype(i)

Expand Down Expand Up @@ -578,13 +578,17 @@ def test_construction_not_supported( 10000 self, subtype):
"for IntervalDtype"
)
with pytest.raises(TypeError, match=msg):
IntervalDtype(subtype)
with tm.assert_produces_warning(FutureWarning):
# need to pass 'closed'
IntervalDtype(subtype)

@pytest.mark.parametrize("subtype", ["xx", "IntervalA", "Interval[foo]"])
def test_construction_errors(self, subtype):
msg = "could not construct IntervalDtype"
with pytest.raises(TypeError, match=msg):
IntervalDtype(subtype)
with tm.assert_produces_warning(FutureWarning):
# need to pass 'closed'
IntervalDtype(subtype)

def test_closed_must_match(self):
# GH#37933
Expand All @@ -599,9 +603,9 @@ def test_closed_invalid(self):
IntervalDtype(np.float64, "foo")

def test_construction_from_string(self, dtype):
result = IntervalDtype("interval[int64]")
result = IntervalDtype("interval[int64, right]")
assert is_dtype_equal(dtype, result)
result = IntervalDtype.construct_from_string("interval[int64]")
result = IntervalDtype.construct_from_string("interval[int64, right]")
assert is_dtype_equal(dtype, result)

@pytest.mark.parametrize("string", [0, 3.14, ("a", "b"), None])
Expand All @@ -625,18 +629,18 @@ def test_construction_from_string_error_subtype(self, string):
IntervalDtype.construct_from_string(string)

def test_subclass(self):
a = IntervalDtype("interval[int64]")
b = IntervalDtype("interval[int64]")
a = IntervalDtype("interval[int64, right]")
b = IntervalDtype("interval[int64, right]")

assert issubclass(type(a), type(a))
assert issubclass(type(a), type(b))

def test_is_dtype(self, dtype):
assert IntervalDtype.is_dtype(dtype)
assert IntervalDtype.is_dtype("interval")
assert IntervalDtype.is_dtype(IntervalDtype("float64"))
assert IntervalDtype.is_dtype(IntervalDtype("int64"))
assert IntervalDtype.is_dtype(IntervalDtype(np.int64))
assert IntervalDtype.is_dtype(IntervalDtype("float64", "left"))
assert IntervalDtype.is_dtype(IntervalDtype("int64", "right"))
assert IntervalDtype.is_dtype(IntervalDtype(np.int64, "both"))

assert not IntervalDtype.is_dtype("D")
assert not IntervalDtype.is_dtype("3D")
Expand All @@ -649,16 +653,23 @@ def test_is_dtype(self, dtype):
assert not IntervalDtype.is_dtype(np.float64)

def test_equality(self, dtype):
assert is_dtype_equal(dtype, "interval[int64]")
assert is_dtype_equal(dtype, IntervalDtype("int64"))
assert is_dtype_equal(IntervalDtype("int64"), IntervalDtype("int64"))
assert is_dtype_equal(dtype, "interval[int64, right]")
assert is_dtype_equal(dtype, IntervalDtype("int64", "right"))
assert is_dtype_equal(
IntervalDtype("int64", "right"), IntervalDtype("int64", "right")
)

assert not is_dtype_equal(dtype, "int64")
assert not is_dtype_equal(IntervalDtype("int64"), IntervalDtype("float64"))
assert not is_dtype_equal(
IntervalDtype("int64", "neither"), IntervalDtype("float64", "right")
)
assert not is_dtype_equal(
IntervalDtype("int64", "both"), IntervalDtype("int64", "left")
)

# invalid subtype comparisons do not raise when directly compared
dtype1 = IntervalDtype("float64")
dtype2 = IntervalDtype("datetime64[ns, US/Eastern]")
dtype1 = IntervalDtype("float64", "left")
dtype2 = IntervalDtype("datetime64[ns, US/Eastern]", "left")
assert dtype1 != dtype2
assert dtype2 != dtype1

Expand All @@ -679,7 +690,8 @@ def test_equality(self, dtype):
)
def test_equality_generic(self, subtype):
# GH 18980
dtype = IntervalDtype(subtype)
closed = "right" if subtype is not None else None
dtype = IntervalDtype(subtype, closed=closed)
assert is_dtype_equal(dtype, "interval")
assert is_dtype_equal(dtype, IntervalDtype())

Expand All @@ -697,8 +709,9 @@ def test_equality_generic(self, subtype):
)
def test_name_repr(self, subtype):
# GH 18980
dtype = IntervalDtype(subtype)
expected = f"interval[{subtype}, right]"
closed = "right" if subtype is not None else None
dtype = IntervalDtype(subtype, closed=closed)
expected = f"interval[{subtype}, {closed}]"
assert str(dtype) == expected
assert dtype.name == "interval"

Expand All @@ -723,7 +736,7 @@ def test_basic(self, dtype):
assert is_interval_dtype(s)

def test_basic_dtype(self):
assert is_interval_dtype("interval[int64]")
assert is_interval_dtype("interval[int64, both]")
assert is_interval_dtype(IntervalIndex.from_tuples([(0, 1)]))
assert is_interval_dtype(IntervalIndex.from_breaks(np.arange(4)))
assert is_interval_dtype(
Expand All @@ -738,7 +751,7 @@ def test_basic_dtype(self):

def test_caching(self):
IntervalDtype.reset_cache()
dtype = IntervalDtype("int64")
dtype = IntervalDtype("int64", "right")
assert len(IntervalDtype._cache) == 1

IntervalDtype("interval")
Expand Down Expand Up @@ -954,8 +967,8 @@ def test_registry(dtype):
[
("int64", None),
("interval", IntervalDtype()),
("interval[int64]", IntervalDtype()),
("interval[datetime64[ns]]", IntervalDtype("datetime64[ns]")),
("interval[int64, neither]", IntervalDtype()),
("interval[datetime64[ns], left]", IntervalDtype("datetime64[ns]", "left")),
("period[D]", PeriodDtype("D")),
("category", CategoricalDtype()),
("datetime64[ns, US/Eastern]", DatetimeTZDtype("ns", "US/Eastern")),
Expand Down
0