8000 Inconsistent date parsing of to_datetime by MarcoGorelli · Pull Request #42908 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

Inconsistent date parsing of to_datetime #42908

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
8381574
added warnings when parse inconsistent with dayfirst arg
arw2019 Jul 27, 2020
12a36d8
improved error message
arw2019 Jul 27, 2020
0ee3428
TST: added tests
arw2019 Jul 27, 2020
9f1f7c9
removed trailing whitespaces
arw2019 Jul 27, 2020
67e9d95
removed pytest.warns
arw2019 Jul 27, 2020
390969f
wip
MarcoGorelli Aug 5, 2021
9ee56ac
revert
MarcoGorelli Aug 5, 2021
0744ced
set stacklevel, assert warning messages
MarcoGorelli Aug 5, 2021
56867d4
okwarning in user guide
MarcoGorelli Aug 6, 2021
e6557c7
:art:
MarcoGorelli Aug 6, 2021
ee6fbde
catch warnings
MarcoGorelli Aug 6, 2021
15797a8
fixup
MarcoGorelli Aug 6, 2021
07834ed
add to to_datetime docstring, add whatsnew note
MarcoGorelli Aug 8, 2021
b4bb5b3
Merge remote-tracking branch 'upstream/master' into pr/arw2019/to_dat…
MarcoGorelli Aug 21, 2021
1d08ae9
8000 wip
MarcoGorelli Aug 21, 2021
c4c87bc
wip
MarcoGorelli Aug 21, 2021
c4e282d
wip
MarcoGorelli Aug 21, 2021
44a0533
wip
MarcoGorelli Aug 21, 2021
5362670
fixup test
MarcoGorelli Aug 22, 2021
6b43118
more fixups
MarcoGorelli Aug 22, 2021
700881d
fixup
MarcoGorelli Aug 22, 2021
bd893a2
revert to b4bb5b330ad25c7dbca36fe55d4c264ec4d027d1
MarcoGorelli Aug 22, 2021
11049a6
document in timeseries.rst
MarcoGorelli Aug 22, 2021
f6c44da
add tests for read_csv
MarcoGorelli Aug 22, 2021
8969a8e
check expected_inconsistent in tests
MarcoGorelli Aug 22, 2021
b6cbb5d
fixup docs
MarcoGorelli Aug 22, 2021
c26b7c1
Merge remote-tracking branch 'upstream/master' into pr/arw2019/to_dat…
MarcoGorelli Aug 25, 2021
c768e1d
remove note about dateutil bug
MarcoGorelli Aug 26, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
revert to b4bb5b3
  • Loading branch information
MarcoGorelli committed Aug 22, 2021
commit bd893a2bb9d9d04d57ae0463b635c05b57f540fb
2 changes: 0 additions & 2 deletions doc/source/user_guide/timedeltas.rst
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,6 @@ Similarly to other of the datetime-like indices, ``DatetimeIndex`` and ``PeriodI
Selections work similarly, with coercion on string-likes and slices:

.. ipython:: python
:okwarning:

s["1 day":"2 day"]
s["1 day 01:00:00"]
Expand All @@ -433,7 +432,6 @@ Selections work similarly, with coercion on string-likes and slices:
Furthermore you can use partial string selection and the range will be inferred:

.. ipython:: python
:okwarning:

s["1 day":"1 day 5 hours"]

Expand Down
42 changes: 8 additions & 34 deletions pandas/_libs/tslibs/parsing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -238,15 +238,6 @@ cdef inline bint does_string_look_like_time(str parse_string):
return 0 <= hour <= 23 and 0 <= minute <= 59


def du_parse_with_warning(*args, **kwargs):
warnings.warn(
"Parsing datetime strings without a format specified, "
"please specify a format to avoid unexpected results",
stacklevel=4,
)
return du_parse(*args, **kwargs)


def parse_datetime_string(
str date_string,
bint dayfirst=False,
Expand All @@ -270,12 +261,8 @@ def parse_datetime_string(

if does_string_look_like_time(date_string):
# use current datetime as default, not pass _DEFAULT_DATETIME
dt = du_parse_with_warning(
date_string,
dayfirst=dayfirst,
yearfirst=yearfirst,
**kwargs,
)
dt = du_parse(date_string, dayfirst=dayfirst,
yearfirst=yearfirst, **kwargs)
return dt

dt, _ = _parse_delimited_date(date_string, dayfirst)
Expand All @@ -291,13 +278,8 @@ def parse_datetime_string(
pass

try:
dt = du_parse_with_warning(
date_string,
default=_DEFAULT_DATETIME,
dayfirst=dayfirst,
yearfirst=yearfirst,
**kwargs,
)
dt = du_parse(date_string, default=_DEFAULT_DATETIME,
dayfirst=dayfirst, yearfirst=yearfirst, **kwargs)
except TypeError:
# following may be raised from dateutil
# TypeError: 'NoneType' object is not iterable
Expand Down Expand Up @@ -651,11 +633,7 @@ def try_parse_dates(
date = datetime.now()
default = datetime(date.year, date.month, 1)

parse_date = lambda x: du_parse_with_warning(
x,
dayfirst=dayfirst,
default=default,
)
parse_date = lambda x: du_parse(x, dayfirst=dayfirst, default=default)

# EAFP here
try:
Expand Down Expand Up @@ -702,17 +680,13 @@ def try_parse_date_and_time(
date = datetime.now()
default = datetime(date.year, date.month, 1)

parse_date = lambda x: du_parse_with_warning(
x,
dayfirst=dayfirst,
default=default,
)
parse_date = lambda x: du_parse(x, dayfirst=dayfirst, default=default)

else:
parse_date = date_parser

if time_parser is None:
parse_time = lambda x: du_parse_with_warning(x)
parse_time = lambda x: du_parse(x)

else:
parse_time = time_parser
Expand Down Expand Up @@ -886,7 +860,7 @@ def format_is_iso(f: str) -> bint:
def guess_datetime_format(
dt_str,
bint dayfirst=False,
dt_str_parse=du_parse_with_warning,
dt_str_parse=du_parse,
dt_str_split=_DATEUTIL_LEXER_SPLIT,
):
"""
Expand Down
76 changes: 11 additions & 65 deletions pandas/tests/frame/methods/test_reset_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def test_reset_index_tz(self, tz_aware_fixture):
# GH 3950
# reset_index with single level
tz = tz_aware_fixture
idx = date_range("01/01/2011", periods=5, freq="D", tz=tz, name="idx")
idx = date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx")
df = DataFrame({"a": range(5), "b": ["A", "B", "C", "D", "E"]}, index=idx)

expected = DataFrame(
Expand Down Expand Up @@ -320,82 +320,28 @@ def test_reset_index_multiindex_nan(self):
[
None,
"foo",
],
)
def test_reset_index_with_datetimeindex_cols_with_user_warning(self, name):
# GH#5818
df = DataFrame(
[[1, 2], [3, 4]],
columns=date_range("01/01/2013", "01/02/2013"),
index=["A", "B"],
)
df.index.name = name

with tm.assert_produces_warning(UserWarning):
result = df.reset_index()

item = name if name is not None else "index"
columns = Index([item, datetime(2013, 1, 1), datetime(2013, 1, 2)])
if isinstance(item, str) and item == "2012-12-31":
columns = columns.astype("datetime64[ns]")
else:
assert columns.dtype == object

expected = DataFrame(
[["A", 1, 2], ["B", 3, 4]],
columns=columns,
)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
"name",
[
2,
3.0,
pd.Timedelta(6),
Timestamp("2012-12-30", tz="UTC"),
"2012-12-31",
],
)
def test_reset_index_with_datetimeindex_cols(self, name):
# GH#5818
df = DataFrame(
[[1, 2], [3, 4]],
columns=date_range("01/01/2013", "01/02/2013"),
index=["A", "B"],
)
df.index.name = name

result = df.reset_index()

item = name if name is not None else "index"
columns = Index([item, datetime(2013, 1, 1), datetime(2013, 1, 2)])
if isinstance(item, str) and item == "2012-12-31":
columns = columns.astype("datetime64[ns]")
else:
assert columns.dtype == object
warn = None
if isinstance(name, Timestamp) and name.tz is not None:
# _deprecate_mismatched_indexing
warn = FutureWarning

expected = DataFrame(
[["A", 1, 2], ["B", 3, 4]],
columns=columns,
)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
"name",
[
Timestamp("2012-12-30", tz="UTC"),
],
)
def test_reset_index_with_datetimeindex_cols_with_future_warning(self, name):
# GH#5818
df = DataFrame(
[[1, 2], [3, 4]],
columns=date_range("01/01/2013", "01/02/2013"),
columns=date_range("1/1/2013", "1/2/2013"),
index=["A", "B"],
)
df.index.name = name

with tm.assert_produces_warning((FutureWarning, UserWarning)):
with tm.assert_produces_warning(warn):
result = df.reset_index()

item = name if name is not None else "index"
Expand Down Expand Up @@ -479,7 +425,7 @@ def test_reset_index_multiindex_columns(self):
def test_reset_index_datetime(self, tz_naive_fixture):
# GH#3950
tz = tz_naive_fixture
idx1 = date_range("01/01/2011", periods=5, freq="D", tz=tz, name="idx1")
idx1 = date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx1")
idx2 = Index(range(5), name="idx2", dtype="int64")
idx = MultiIndex.from_arrays([idx1, idx2])
df = DataFrame(
Expand Down Expand Up @@ -507,7 +453,7 @@ def test_reset_index_datetime(self, tz_naive_fixture):
tm.assert_frame_equal(df.reset_index(), expected)

idx3 = date_range(
"01/01/2012", periods=5, freq="MS", tz="Europe/Paris", name="idx3"
"1/1/2012", periods=5, freq="MS", tz="Europe/Paris", name="idx3"
)
idx = MultiIndex.from_arrays([idx1, idx2, idx3])
df = DataFrame(
Expand Down Expand Up @@ -669,7 +615,7 @@ def test_rest_index_multiindex_categorical_with_missing_values(self, codes):
[
(["a", "b"], object),
(
pd.period_range("12-01-2000", periods=2, freq="Q-DEC"),
pd.period_range("12-1-2000", periods=2, freq="Q-DEC"),
pd.PeriodDtype(freq="Q-DEC"),
),
],
Expand Down
12 changes: 3 additions & 9 deletions pandas/tests/io/parser/test_parse_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,9 +439,7 @@ def test_multiple_date_cols_int_cast(all_parsers, date_parser, warning):
parse_dates = {"actual": [1, 2], "nominal": [1, 3]}
parser = all_parsers

with tm.assert_produces_warning(
(FutureWarning, UserWarning), check_stacklevel=False
):
with tm.assert_produces_warning(warning, check_stacklevel=False):
result = parser.read_csv(
StringIO(data),
header=None,
Expand Down Expand Up @@ -1241,9 +1239,7 @@ def test_parse_date_time_multi_level_column_name(all_parsers, date_parser, warni
2001-01-06, 00:00:00, 1.0, 11.
"""
parser = all_parsers
with tm.assert_produces_warning(
(FutureWarning, UserWarning), check_stacklevel=False
):
with tm.assert_produces_warning(warning, check_stacklevel=False):
result = parser.read_csv(
StringIO(data),
header=[0, 1],
Expand Down Expand Up @@ -1337,9 +1333,7 @@ def test_parse_date_time_multi_level_column_name(all_parsers, date_parser, warni
)
def test_parse_date_time(all_parsers, data, kwargs, expected, date_parser, warning):
parser = all_parsers
with tm.assert_produces_warning(
(FutureWarning, UserWarning), check_stacklevel=False
):
with tm.assert_produces_warning(warning, check_stacklevel=False):
result = parser.read_csv(StringIO(data), date_parser=date_parser, **kwargs)

# Python can sometimes be flaky about how
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/test_date_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def test_parse_date_time():
dates = np.array(["2007/1/3", "2008/2/4"], dtype=object)
times = np.array(["05:07:09", "06:08:00"], dtype=object)
expected = np.array([datetime(2007, 1, 3, 5, 7, 9), datetime(2008, 2, 4, 6, 8, 0)])
with tm.assert_produces_warning((FutureWarning, UserWarning)):
with tm.assert_produces_warning(FutureWarning):
result = conv.parse_date_time(dates, times)
tm.assert_numpy_array_equal(result, expected)

Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/plotting/frame/test_frame_subplots.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ def test_subplots_warnings(self):
df.plot(subplots=True, layout=(3, 2))

df = DataFrame(
np.random.randn(100, 4), index=date_range("01/01/2000", periods=100)
np.random.randn(100, 4), index=date_range("1/1/2000", periods=100)
)
df.plot(subplots=True, layout=(3, 2))

Expand Down Expand Up @@ -430,7 +430,7 @@ def test_df_subplots_patterns_minorticks(self):

df = DataFrame(
np.random.randn(10, 2),
index=date_range("01/01/2000", periods=10),
index=date_range("1/1/2000", periods=10),
columns=list("AB"),
)

Expand Down
14 changes: 7 additions & 7 deletions pandas/tests/scalar/period/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,7 @@ def test_hash(self):
@pytest.mark.parametrize("tzstr", ["Europe/Brussels", "Asia/Tokyo", "US/Pacific"])
def test_to_timestamp_tz_arg(self, tzstr):
# GH#34522 tz kwarg deprecated
with tm.assert_produces_warning((FutureWarning, UserWarning)):
with tm.assert_produces_warning(FutureWarning):
p = Period("1/1/2005", freq="M").to_timestamp(tz=tzstr)
exp = Timestamp("1/1/2005", tz="UTC").tz_convert(tzstr)
exp_zone = pytz.timezone(tzstr).normalize(p)
Expand All @@ -561,7 +561,7 @@ def test_to_timestamp_tz_arg(self, tzstr):
assert p.tz == exp_zone.tzinfo
assert p.tz == exp.tz

with tm.assert_produces_warning((FutureWarning, UserWarning)):
with tm.assert_produces_warning(FutureWarning):
p = Period("1/1/2005", freq="3H").to_timestamp(tz=tzstr)
exp = Timestamp("1/1/2005", tz="UTC").tz_convert(tzstr)
exp_zone = pytz.timezone(tzstr).normalize(p)
Expand All @@ -570,7 +570,7 @@ def test_to_timestamp_tz_arg(self, tzstr):
assert p.tz == exp_zone.tzinfo
assert p.tz == exp.tz

with tm.assert_produces_warning((FutureWarning, UserWarning)):
with tm.assert_produces_warning(FutureWarning):
p = Period("1/1/2005", freq="A").to_timestamp(freq="A", tz=tzstr)
exp = Timestamp(day=31, month=12, year=2005, tz="UTC").tz_convert(tzstr)
exp_zone = pytz.timezone(tzstr).normalize(p)
Expand All @@ -579,7 +579,7 @@ def test_to_timestamp_tz_arg(self, tzstr):
assert p.tz == exp_zone.tzinfo
assert p.tz == exp.tz

with tm.assert_produces_warning((FutureWarning, UserWarning)):
with tm.assert_produces_warning(FutureWarning):
p = Period("1/1/2005", freq="A").to_timestamp(freq="3H", tz=tzstr)
exp = Timestamp("1/1/2005", tz="UTC").tz_convert(tzstr)
exp_zone = pytz.timezone(tzstr).normalize(p)
Expand All @@ -594,22 +594,22 @@ def test_to_timestamp_tz_arg(self, tzstr):
)
def test_to_timestamp_tz_arg_dateutil(self, tzstr):
tz = maybe_get_tz(tzstr)
with tm.assert_produces_warning((FutureWarning, UserWarning)):
with tm.assert_produces_warning(FutureWarning):
p = Period("1/1/2005", freq="M").to_timestamp(tz=tz)
exp = Timestamp("1/1/2005", tz="UTC").tz_convert(tzstr)
assert p == exp
assert p.tz == dateutil_gettz(tzstr.split("/", 1)[1])
assert p.tz == exp.tz

with tm.assert_produces_warning((FutureWarning, UserWarning)):
with tm.assert_produces_warning(FutureWarning):
p = Period("1/1/2005", freq="M").to_timestamp(freq="3H", tz=tz)
exp = Timestamp("1/1/2005", tz="UTC").tz_convert(tzstr)
assert p == exp
assert p.tz == dateutil_gettz(tzstr.split("/", 1)[1])
assert p.tz == exp.tz

def test_to_timestamp_tz_arg_dateutil_from_string(self):
with tm.assert_produces_warning((FutureWarning, UserWarning)):
with tm.assert_produces_warning(FutureWarning):
p = Period("1/1/2005", freq="M").to_timestamp(tz="dateutil/Europe/Brussels")
assert p.tz == dateutil_gettz("Europe/Brussels")

Expand Down
Loading
0