8000 Inconsistent date parsing of to_datetime by MarcoGorelli · Pull Request #42908 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

Inconsistent date parsing of to_datetime #42908

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
8381574
added warnings when parse inconsistent with dayfirst arg
arw2019 Jul 27, 2020
12a36d8
improved error message
arw2019 Jul 27, 2020
0ee3428
TST: added tests
arw2019 Jul 27, 2020
9f1f7c9
removed trailing whitespaces
arw2019 Jul 27, 2020
67e9d95
removed pytest.warns
arw2019 Jul 27, 2020
390969f
wip
MarcoGorelli Aug 5, 2021
9ee56ac
revert
MarcoGorelli Aug 5, 2021
0744ced
set stacklevel, assert warning messages
MarcoGorelli Aug 5, 2021
56867d4
okwarning in user guide
MarcoGorelli Aug 6, 2021
e6557c7
:art:
MarcoGorelli Aug 6, 2021
ee6fbde
catch warnings
MarcoGorelli Aug 6, 2021
15797a8
fixup
MarcoGorelli Aug 6, 2021
07834ed
add to to_datetime docstring, add whatsnew note
MarcoGorelli Aug 8, 2021
b4bb5b3
Merge remote-tracking branch 'upstream/master' into pr/arw2019/to_dat…
MarcoGorelli Aug 21, 2021
1d08ae9
wip 10000
MarcoGorelli Aug 21, 2021
c4c87bc
wip
MarcoGorelli Aug 21, 2021
c4e282d
wip
MarcoGorelli Aug 21, 2021
44a0533
wip
MarcoGorelli Aug 21, 2021
5362670
fixup test
MarcoGorelli Aug 22, 2021
6b43118
more fixups
MarcoGorelli Aug 22, 2021
700881d
fixup
MarcoGorelli Aug 22, 2021
bd893a2
revert to b4bb5b330ad25c7dbca36fe55d4c264ec4d027d1
MarcoGorelli Aug 22, 2021
11049a6
document in timeseries.rst
MarcoGorelli Aug 22, 2021
f6c44da
add tests for read_csv
MarcoGorelli Aug 22, 2021
8969a8e
check expected_inconsistent in tests
MarcoGorelli Aug 22, 2021
b6cbb5d
fixup docs
MarcoGorelli Aug 22, 2021
c26b7c1
Merge remote-tracking branch 'upstream/master' into pr/arw2019/to_dat…
MarcoGorelli Aug 25, 2021
c768e1d
remove note about dateutil bug
MarcoGorelli Aug 26, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
check expected_inconsistent in tests
  • Loading branch information
MarcoGorelli committed Aug 22, 2021
commit 8969a8e7715b8e42733f555e4a94e00a9ec3cb68
19 changes: 8 additions & 11 deletions pandas/tests/io/parser/test_parse_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -1717,35 +1717,32 @@ def test_dayfirst_warnings():

# CASE 1: valid input
input = "date\n31/12/2014\n10/03/2011"
expected = DatetimeIndex(
expected_consistent = DatetimeIndex(
["2014-12-31", "2011-03-10"], dtype="datetime64[ns]", freq=None, name="date"
)
expected_inconsistent = DatetimeIndex(
["2014-12-31", "2011-10-03"], dtype="datetime64[ns]", freq=None, name="date"
)

# A. dayfirst arg correct, no warning
res1 = read_csv(
StringIO(input), parse_dates=["date"], dayfirst=True, index_col="date"
).index
tm.assert_index_equal(expected, res1)
tm.assert_index_equal(expected_consistent, res1)

# B. dayfirst arg incorrect, warning + incorrect output
with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
res2 = read_csv(
StringIO(input), parse_dates=["date"], dayfirst=False, index_col="date"
).index
with pytest.raises(AssertionError, match=None), tm.assert_produces_warning(
UserWarning, match=warning_msg_day_first
):
tm.assert_index_equal(expected, res2)
tm.assert_index_equal(expected_inconsistent, res2)

# C. dayfirst default arg, same as B
with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
res3 = read_csv(
StringIO(input), parse_dates=["date"], dayfirst=False, index_col="date"
).index
with pytest.raises(AssertionError, match=None), tm.assert_produces_warning(
UserWarning, match=warning_msg_day_first
):
tm.assert_index_equal(expected, res3)
tm.assert_index_equal(expected_inconsistent, res3)

# D. infer_datetime_format=True overrides dayfirst default
# no warning + correct result
Expand All @@ -1755,7 +1752,7 @@ def test_dayfirst_warnings():
infer_datetime_format=True,
index_col="date",
).index
tm.assert_index_equal(expected, res4)
tm.assert_index_equal(expected_consistent, res4)

# CASE 2: invalid input
# cannot consistently process with single format
Expand Down
19 changes: 8 additions & 11 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1852,34 +1852,31 @@ def test_dayfirst_warnings(self):

# CASE 1: valid input
arr = ["31/12/2014", "10/03/2011"]
expected = DatetimeIndex(
expected_consistent = DatetimeIndex(
["2014-12-31", "2011-03-10"], dtype="datetime64[ns]", freq=None
)
expected_inconsistent = DatetimeIndex(
["2014-12-31", "2011-10-03"], dtype="datetime64[ns]", freq=None
)

# A. dayfirst arg correct, no warning
res1 = to_datetime(arr, dayfirst=True)
tm.assert_index_equal(expected, res1)
tm.assert_index_equal(expected_consistent, res1)

# B. dayfirst arg incorrect, warning + incorrect output
with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
res2 = to_datetime(arr, dayfirst=False)
with pytest.raises(AssertionError, match=None), tm.assert_produces_warning(
UserWarning, match=warning_msg_day_first
):
tm.assert_index_equal(expected, res2)
tm.assert_index_equal(expected_inconsistent, res2)

# C. dayfirst default arg, same as B
with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
res3 = to_datetime(arr, dayfirst=False)
with pytest.raises(AssertionError, match=None), tm.assert_produces_warning(
UserWarning, match=warning_msg_day_first
):
tm.assert_index_equal(expected, res3)
tm.assert_index_equal(expected_inconsistent, res3)

# D. infer_datetime_format=True overrides dayfirst default
# no warning + correct result
res4 = to_datetime(arr, infer_datetime_format=True)
tm.assert_index_equal(expected, res4)
tm.assert_index_equal(expected_consistent, res4)

# CASE 2: invalid input
# cannot consistently process with single format
Expand Down
0