8000 Inconsistent date parsing of to_datetime by MarcoGorelli · Pull Request #42908 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

Inconsistent date parsing of to_datetime #42908

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
8381574
added warnings when parse inconsistent with dayfirst arg
arw2019 Jul 27, 2020
12a36d8
improved error message
arw2019 Jul 27, 2020
0ee3428
TST: added tests
arw2019 Jul 27, 2020
9f1f7c9
removed trailing whitespaces
arw2019 Jul 27, 2020
67e9d95
removed pytest.warns
arw2019 Jul 27, 2020
390969f
wip
MarcoGorelli Aug 5, 2021
9ee56ac
revert
MarcoGorelli Aug 5, 2021
0744ced
set stacklevel, assert warning messages
MarcoGorelli Aug 5, 2021
56867d4
okwarning in user guide
MarcoGorelli Aug 6, 2021
e6557c7
:art:
MarcoGorelli Aug 6, 2021
ee6fbde
catch warnings
MarcoGorelli Aug 6, 2021
15797a8
fixup
MarcoGorelli Aug 6, 2021
07834ed
add to to_datetime docstring, add whatsnew note
MarcoGorelli Aug 8, 2021
b4bb5b3
Merge remote-tracking branch 'upstream/master' into pr/arw2019/to_dat…
MarcoGorelli Aug 21, 2021
1d08ae9
8000 wip
MarcoGorelli Aug 21, 2021
c4c87bc
wip
MarcoGorelli Aug 21, 2021
c4e282d
wip
MarcoGorelli Aug 21, 2021
44a0533
wip
MarcoGorelli Aug 21, 2021
5362670
fixup test
MarcoGorelli Aug 22, 2021
6b43118
more fixups
MarcoGorelli Aug 22, 2021
700881d
fixup
MarcoGorelli Aug 22, 2021
bd893a2
revert to b4bb5b330ad25c7dbca36fe55d4c264ec4d027d1
MarcoGorelli Aug 22, 2021
11049a6
document in timeseries.rst
MarcoGorelli Aug 22, 2021
f6c44da
add tests for read_csv
MarcoGorelli Aug 22, 2021
8969a8e
check expected_inconsistent in tests
MarcoGorelli Aug 22, 2021
b6cbb5d
fixup docs
MarcoGorelli Aug 22, 2021
c26b7c1
Merge remote-tracking branch 'upstream/master' into pr/arw2019/to_dat…
MarcoGorelli Aug 25, 2021
c768e1d
remove note about dateutil bug
MarcoGorelli Aug 26, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fixup test
  • Loading branch information
MarcoGorelli committed Aug 22, 2021
commit 5362670f6a3243d16584cc7c687779ecec500729
39 changes: 28 additions & 11 deletions pandas/_libs/tslibs/parsing.pyx
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ cnp.import_array()

from dateutil.parser import (
DEFAULTPARSER,
parse as du_parse_raw,
parse as du_parse,
)
from dateutil.relativedelta import relativedelta
from dateutil.tz import (
Expand Down Expand Up @@ -238,12 +238,12 @@ cdef inline bint does_string_look_like_time(str parse_string):
return 0 <= hour <= 23 and 0 <= minute <= 59


def du_parse(*args, **kwargs):
def du_parse_with_warning(*args, **kwargs):
warnings.warn(
"Parsing datetime strings without a format specified, "
"please specify a format to avoid unexpected results"
)
return du_parse_raw(*args, **kwargs)
return du_parse(*args, **kwargs)


def parse_datetime_string(
Expand All @@ -269,8 +269,12 @@ def parse_datetime_string(

if does_string_look_like_time(date_string):
# use current datetime as default, not pass _DEFAULT_DATETIME
dt = du_parse(date_string, dayfirst=dayfirst,
yearfirst=yearfirst, **kwargs)
dt = du_parse_with_warning(
date_string,
dayfirst=dayfirst,
yearfirst=yearfirst,
**kwargs,
)
return dt

dt, _ = _parse_delimited_date(date_string, dayfirst)
Expand All @@ -286,8 +290,13 @@ def parse_datetime_string(
pass

try:
dt = du_parse(date_string, default=_DEFAULT_DATETIME,
dayfirst=dayfirst, yearfirst=yearfirst, **kwargs)
dt = du_parse_with_warning(
date_string,
default=_DEFAULT_DATETIME,
dayfirst=dayfirst,
yearfirst=yearfirst,
**kwargs,
)
except TypeError:
# following may be raised from dateutil
# TypeError: 'NoneType' object is not iterable
Expand Down Expand Up @@ -641,7 +650,11 @@ def try_parse_dates(
date = datetime.now()
default = datetime(date.year, date.month, 1)

parse_date = lambda x: du_parse(x, dayfirst=dayfirst, default=default)
parse_date = lambda x: du_parse_with_warning(
x,
dayfirst=dayfirst,
default=default,
)

# EAFP here
try:
Expand Down Expand Up @@ -688,13 +701,17 @@ def try_parse_date_and_time(
date = datetime.now()
default = datetime(date.year, date.month, 1)

parse_date = lambda x: du_parse(x, dayfirst=dayfirst, default=default)
parse_date = lambda x: du_parse_with_warning(
x,
dayfirst=dayfirst,
default=default,
)

else:
parse_date = date_parser

if time_parser is None:
parse_time = lambda x: du_parse(x)
parse_time = lambda x: du_parse_with_warning(x)

else:
parse_time = time_parser
Expand Down Expand Up @@ -868,7 +885,7 @@ def format_is_iso(f: str) -> bint:
def guess_datetime_format(
dt_str,
bint dayfirst=False,
dt_str_parse=du_parse,
dt_str_parse=du_parse_with_warning,
dt_str_split=_DATEUTIL_LEXER_SPLIT,
):
"""
Expand Down
76 changes: 65 additions & 11 deletions pandas/tests/frame/methods/test_reset_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def test_reset_index_tz(self, tz_aware_fixture):
# GH 3950
# reset_index with single level
tz = tz_aware_fixture
idx = date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx")
idx = date_range("01/01/2011", periods=5, freq="D", tz=tz, name="idx")
df = DataFrame({"a": range(5), "b": ["A", "B", "C", "D", "E"]}, index=idx)

expected = DataFrame(
Expand Down Expand Up @@ -320,28 +320,82 @@ def test_reset_index_multiindex_nan(self):
[
None,
"foo",
],
)
def test_reset_index_with_datetimeindex_cols_with_user_warning(self, name):
# GH#5818
df = DataFrame(
[[1, 2], [3, 4]],
columns=date_range("01/01/2013", "01/02/2013"),
index=["A", "B"],
)
df.index.name = name

with tm.assert_produces_warning(UserWarning):
result = df.reset_index()

item = name if name is not None else "index"
columns = Index([item, datetime(2013, 1, 1), datetime(2013, 1, 2)])
if isinstance(item, str) and item == "2012-12-31":
columns = columns.astype("datetime64[ns]")
else:
assert columns.dtype == object

expected = DataFrame(
[["A", 1, 2], ["B", 3, 4]],
columns=columns,
)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
"name",
[
2,
3.0,
pd.Timedelta(6),
Timestamp("2012-12-30", tz="UTC"),
< EDBE /td> "2012-12-31",
],
)
def test_reset_index_with_datetimeindex_cols(self, name):
# GH#5818
warn = None
if isinstance(name, Timestamp) and name.tz is not None:
# _deprecate_mismatched_indexing
warn = FutureWarning
df = DataFrame(
[[1, 2], [3, 4]],
columns=date_range("01/01/2013", "01/02/2013"),
index=["A", "B"],
)
df.index.name = name

result = df.reset_index()

item = name if name is not None else "index"
columns = Index([item, datetime(2013, 1, 1), datetime(2013, 1, 2)])
if isinstance(item, str) and item == "2012-12-31":
columns = columns.astype("datetime64[ns]")
else:
assert columns.dtype == object

expected = DataFrame(
[["A", 1, 2], ["B", 3, 4]],
columns=columns,
)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
"name",
[
Timestamp("2012-12-30", tz="UTC"),
],
)
def test_reset_index_with_datetimeindex_cols_with_future_warning(self, name):
# GH#5818
df = DataFrame(
[[1, 2], [3, 4]],
columns=date_range("1/1/2013", "1/2/2013"),
columns=date_range("01/01/2013", "01/02/2013"),
index=["A", "B"],
)
df.index.name = name

with tm.assert_produces_warning(warn):
with tm.assert_produces_warning((FutureWarning, UserWarning)):
result = df.reset_index()

item = name if name is not None else "index"
Expand Down Expand Up @@ -425,7 +479,7 @@ def test_reset_index_multiindex_columns(self):
def test_reset_index_datetime(self, tz_naive_fixture):
# GH#3950
tz = tz_naive_fixture
idx1 = date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx1")
idx1 = date_range("01/01/2011", periods=5, freq="D", tz=tz, name="idx1")
idx2 = Index(range(5), name="idx2", dtype="int64")
idx = MultiIndex.from_arrays([idx1, idx2])
df = DataFrame(
Expand Down Expand Up @@ -453,7 +507,7 @@ def test_reset_index_datetime(self, tz_naive_fixture):
tm.assert_frame_equal(df.reset_index(), expected)

idx3 = date_range(
"1/1/2012", periods=5, freq="MS", tz="Europe/Paris", name="idx3"
"01/01/2012", periods=5, freq="MS", tz="Europe/Paris", name="idx3"
)
idx = MultiIndex.from_arrays([idx1, idx2, idx3])
df = DataFrame(
Expand Down Expand Up @@ -615,7 +669,7 @@ def test_rest_index_multiindex_categorical_with_missing_values(self, codes):
[
(["a", "b"], object),
(
pd.period_range("12-1-2000", periods=2, freq="Q-DEC"),
pd.period_range("12-01-2000", periods=2, freq="Q-DEC"),
pd.PeriodDtype(freq="Q-DEC"),
),
],
Expand Down
0