8000 BUG/API: to_datetime preserves UTC offsets when parsing datetime strings by mroeschke · Pull Request #21822 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

BUG/API: to_datetime preserves UTC offsets when parsing datetime strings #21822

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 43 commits into from
Jul 30, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
ac5a3d1
BUG: to_datetime no longer converts offsets to UTC
Jul 7, 2018
b81a8e9
Merge remote-tracking branch 'upstream/master' into parse_tz_offsets
Jul 8, 2018
6bf46a8
Document and now return offset
Jul 8, 2018
678b337
Add some tests, start converting some existing uses of array_to_datetime
Jul 8, 2018
1917148
Add more tests
Jul 8, 2018
581a33e
Adjust test
Jul 8, 2018
a1bc8f9
Flake8
Jul 8, 2018
80042e6
Add tests confirming new behavior
Jul 8, 2018
7c4135e
Merge remote-tracking branch 'upstream/master' into parse_tz_offsets
Jul 10, 2018
0651416
Merge remote-tracking branch 'upstream/master' into parse_tz_offsets
Jul 11, 2018
bacb6e3
Lint
Jul 11, 2018
a2f4aad
adjust a test
Jul 11, 2018
d48f341
Ensure box object index, pass tests
Jul 11, 2018
7efb25c
Adjust tests
Jul 11, 2018
1d527ff
Adjust test
Jul 11, 2018
f89d6b6
Cleanup and add comments
Jul 12, 2018
d91c63f
address comments
Jul 12, 2018
1054e8b
adjust test to be closer to the original behavior
Jul 12, 2018
d9cdc91
Merge remote-tracking branch 'upstream/master' into parse_tz_offsets
Jul 12, 2018
7d04613
Add TypeError clause
Jul 12, 2018
031284c
Add TypeError not ValueError
Jul 12, 2018
749e62e
Merge remote-tracking branch 'upstream/master' into parse_tz_offsets
Jul 12, 2018
23cbf75
fix typo
Jul 12, 2018
1e6f87a
Merge remote-tracking branch 'upstream/master' into parse_tz_offsets
Jul 18, 2018
7539bcf
Merge remote-tracking branch 'upstream/master' into parse_tz_offsets
Jul 19, 2018
c1f51cd
New implimentation
Jul 19, 2018
db75a24
Merge remote-tracking branch 'upstream/master' into parse_tz_offsets
Jul 20, 2018
4733ac5
Change implimentation and add some tests
Jul 20, 2018
e3db735
Merge remote-tracking branch 'upstream/master' into parse_tz_offsets
Jul 20, 2018
2fa681f
Add missing commas
Jul 20, 2018
5f36c98
Merge remote-tracking branch 'upstream/master' into parse_tz_offsets
Jul 24, 2018
d7ff275
Change implimentation since tzoffsets cannot be hashed
Jul 25, 2018
4ff7cb3
Add whatsnew
Jul 25, 2018
8463d91
Address review
Jul 25, 2018
dddc6b3
Address tzlocal
Jul 25, 2018
cca3983
Change is to == for older dateutil compat
Jul 26, 2018
e441be0
Merge remote-tracking branch 'upstream/master' into parse_tz_offsets
Jul 26, 2018
a8a65f7
Modify example in whatsnew to display
Jul 26, 2018
75f9fd9
Merge remote-tracking branch 'upstream/master' into parse_tz_offsets
Jul 26, 2018
6052475
Add more specific errors
Jul 27, 2018
f916c69
Merge remote-tracking branch 'upstream/master' into parse_tz_offsets
Jul 28, 2018
807a251
Merge remote-tracking branch 'upstream/master' into parse_tz_offsets
Jul 29, 2018
1cbd9b9
Add some benchmarks and reformat tests
Jul 30, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Adjust tests
  • Loading branch information
Matt Roeschke committed Jul 11, 2018
commit 7efb25c1d8da6089ba90c5a443c0fc76467a2eb0
5 changes: 3 additions & 2 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
- Index-like if box=True
- ndarray of Timestamps if box=False
"""
from pandas import Index, DatetimeIndex
from pandas import DatetimeIndex
if isinstance(arg, (list, tuple)):
arg = np.array(arg, dtype='O')

Expand Down Expand Up @@ -275,14 +275,15 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
yearfirst=yearfirst,
require_iso8601=require_iso8601
)
if tz_parsed is not None:
if tz_parsed is not None and box:
return DatetimeIndex._simple_new(result, name=name,
tz=tz_parsed)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

case with multiple tzs that has to get wrapped in object-dtype?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That case will result in tz_parsed = None so this branch will not be hit.


if box:
if is_datetime64_dtype(result):
return DatetimeIndex(result, tz=tz, name=name)
elif is_object_dtype(result):
from pandas import Index
return Index(result, name=name)
return result

Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexes/datetimes/test_timezones.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,8 +317,8 @@ def test_dti_tz_localize_nonexistent_raise_coerce(self):
result = index.tz_localize(tz=tz, errors='coerce')
test_times = ['2015-03-08 01:00-05:00', 'NaT',
'2015-03-08 03:00-04:00']
dti = DatetimeIndex(test_times)
expected = dti.tz_localize('UTC').tz_convert('US/Eastern')
dti = to_datetime(test_times, utc=True)
expected = dti.tz_convert('US/Eastern')
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize('tz', [pytz.timezone('US/Eastern'),
Expand Down
29 changes: 20 additions & 9 deletions pandas/tests/indexes/datetimes/test_tools.py
8000
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,9 @@ def test_to_datetime_tz_psycopg2(self, cache):
# dtype coercion
i = pd.DatetimeIndex([
'2000-01-01 08:00:00+00:00'
], tz=psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None))
])
i = i.tz_convert(psycopg2.tz.FixedOffsetTimezone(offset=-300,
name=None))
assert is_datetime64_ns_dtype(i)

# tz coerceion
Expand Down Expand Up @@ -602,7 +604,9 @@ def test_ts_strings_with_different_offsets(self):
datetime(2015, 11, 18, 16, 30,
tzinfo=tzoffset(None, 23400))],
dtype=object)
tm.assert_numpy_array_equal(result, expected)
# GH 21864
expected = Index(expected)
tm.assert_index_equal(result, expected)

result = to_datetime(ts_strings, utc=True)
expected = DatetimeIndex([Timestamp(2015, 11, 18, 10)] * 2, tz='UTC')
Expand Down Expand Up @@ -1009,14 +1013,19 @@ def test_to_datetime_types(self, cache):
# assert result == expected

@pytest.mark.parametrize('cache', [True, False])
def test_to_datetime_unprocessable_input(self, cache):
@pytest.mark.parametrize('box, klass, assert_method', [
[True, Index, 'assert_index_equal'],
[False, np.array, 'assert_numpy_array_equal']
])
def test_to_datetime_unprocessable_input(self, cache, box, klass,
assert_method):
# GH 4928
tm.assert_numpy_array_equal(
to_datetime([1, '1'], errors='ignore', cache=cache),
np.array([1, '1'], dtype='O')
)
# GH 21864
result = to_datetime([1, '1'], errors='ignore', cache=cache, box=box)
expected = klass(np.array([1, '1'], dtype='O'))
getattr(tm, assert_method)(result, expected)
pytest.raises(TypeError, to_datetime, [1, '1'], errors='raise',
cache=cache)
cache=cache, box=box)

def test_to_datetime_other_datetime64_units(self):
# 5/25/2012
Expand Down Expand Up @@ -1077,7 +1086,9 @@ def test_string_na_nat_conversion(self, cache):
cache=cache))

result = to_datetime(malformed, errors='ignore', cache=cache)
tm.assert_numpy_array_equal(result, malformed)
# GH 21864
expected = Index(malformed)
tm.assert_index_equal(result, expected)

pytest.raises(ValueError, to_datetime, malformed, errors='raise',
cache=cache)
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/reshape/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -2325,7 +2325,7 @@ def test_concat_datetime_timezone(self):
'2011-01-01 01:00:00+01:00',
'2011-01-01 02:00:00+01:00'],
freq='H'
).tz_localize('UTC').tz_convert('Europe/Paris')
).tz_convert('UTC').tz_convert('Europe/Paris')

expected = pd.DataFrame([[1, 1], [2, 2], [3, 3]],
index=exp_idx, columns=['a', 'b'])
Expand All @@ -2343,7 +2343,7 @@ def test_concat_datetime_timezone(self):
'2010-12-31 23:00:00+00:00',
'2011-01-01 00:00:00+00:00',
'2011-01-01 01:00:00+00:00']
).tz_localize('UTC')
)

expected = pd.DataFrame([[np.nan, 1], [np.nan, 2], [np.nan, 3],
[1, np.nan], [2, np.nan], [3, np.nan]],
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,8 @@ def test_datetime64_dtype_array_returned(self):

dt_index = pd.to_datetime(['2015-01-03T00:00:00.000000000+0000',
'2015-01-01T00:00:00.000000000+0000',
'2015-01-01T00:00:00.000000000+0000'])
'2015-01-01T00:00:00.000000000+0000'],
box=False)
result = algos.unique(dt_index)
tm.assert_numpy_array_equal(result, expected)
assert result.dtype == expected.dtype
Expand Down
13 changes: 6 additions & 7 deletions pandas/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -668,16 +668,15 @@ def test_value_counts_datetime64(self, klass):

s = klass(df['dt'].copy())
s.name = None

idx = pd.to_datetime(['2010-01-01 00:00:00Z',
'2008-09-09 00:00:00Z',
'2009-01-01 00:00:00Z'])
idx = pd.to_datetime(['2010-01-01 00:00:00',
'2008-09-09 00:00:00',
'2009-01-01 00:00:00'])
expected_s = Series([3, 2, 1], index=idx)
tm.assert_series_equal(s.value_counts(), expected_s)

expected = np_array_datetime64_compat(['2010-01-01 00:00:00Z',
'2009-01-01 00:00:00Z',
'2008-09-09 00:00:00Z'],
expected = np_array_datetime64_compat(['2010-01-01 00:00:00',
'2009-01-01 00:00:00',
'2008-09-09 00:00:00'],
dtype='datetime64[ns]')
if isinstance(s, Index):
tm.assert_index_equal(s.unique(), DatetimeIndex(expected))
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/test_resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -2677,8 +2677,8 @@ def test_resample_with_dst_time_change(self):
'2016-03-14 13:00:00-05:00',
'2016-03-15 01:00:00-05:00',
'2016-03-15 13:00:00-05:00']
index = pd.DatetimeIndex(expected_index_values,
tz='UTC').tz_convert('America/Chicago')
index = pd.to_datetime(expected_index_values, utc=True).tz_convert(
'America/Chicago')
expected = pd.DataFrame([1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 2.0], index=index)
Expand Down
0