8000 DEPS: bump xlrd min version to 1.0.0 by jreback · Pull Request #23774 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

DEPS: bump xlrd min version to 1.0.0 #23774

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Nov 19, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
remove old refs to < 1.0.0
  • Loading branch information
jreback committed Nov 19, 2018
commit 063877582e58ac3e2591cccdbea09ef1adfeaf60
2 changes: 1 addition & 1 deletion doc/source/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ Optional Dependencies
* `matplotlib <http://matplotlib.org/>`__: for plotting, Version 2.0.0 or higher.
* For Excel I/O:

* `xlrd/xlwt <http://www.python-excel.org/>`__: Excel reading (xlrd) and writing (xlwt)
* `xlrd/ 8000 xlwt <http://www.python-excel.org/>`__: Excel reading (xlrd), version 1.0.0 or higher required, and writing (xlwt)
* `openpyxl <https://openpyxl.readthedocs.io/en/stable/>`__: openpyxl version 2.4.0
for writing .xlsx files (xlrd >= 0.9.0)
* `XlsxWriter <https://pypi.org/project/XlsxWriter>`__: Alternative Excel writer
Expand Down
61 changes: 20 additions & 41 deletions pandas/io/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# ---------------------------------------------------------------------
# ExcelFile class
import abc
from datetime import MINYEAR, date, datetime, time, timedelta
from datetime import date, datetime, time, timedelta
from distutils.version import LooseVersion
from io import UnsupportedOperation
import os
Expand Down Expand Up @@ -375,15 +375,14 @@ class ExcelFile(object):

def __init__(self, io, **kwds):

err_msg = "Install xlrd >= 0.9.0 for Excel support"
err_msg = "Install xlrd >= 1.0.0 for Excel support"

try:
import xlrd
except ImportError:
raise ImportError(err_msg)
else:
ver = tuple(map(int, xlrd.__VERSION__.split(".")[:2]))
if ver < (0, 9): # pragma: no cover
if xlrd.__VERSION__ < LooseVersion("1.0.0"):
raise ImportError(err_msg +
". Current version " + xlrd.__VERSION__)

Expand Down Expand Up @@ -515,7 +514,6 @@ def _parse_excel(self,
raise NotImplementedError("chunksize keyword of read_excel "
"is not implemented")

import xlrd
from xlrd import (xldate, XL_CELL_DATE,
XL_CELL_ERROR, XL_CELL_BOOLEAN,
XL_CELL_NUMBER)
Expand All @@ -528,36 +526,23 @@ def _parse_cell(cell_contents, cell_typ):

if cell_typ == XL_CELL_DATE:

if xlrd_0_9_3:
# Use the newer xlrd datetime handling.
try:
cell_contents = \
xldate.xldate_as_datetime(cell_contents,
epoch1904)
except OverflowError:
return cell_contents
# Excel doesn't distinguish between dates and time,
# so we treat dates on the epoch as times only.
# Also, Excel supports 1900 and 1904 epochs.
year = (cell_contents.timetuple())[0:3]
if ((not epoch1904 and year == (1899, 12, 31)) or
(epoch1904 and year == (1904, 1, 1))):
cell_contents = time(cell_contents.hour,
cell_contents.minute,
cell_contents.second,
cell_contents.microsecond)
else:
# Use the xlrd <= 0.9.2 date handling.
try:
dt = xldate.xldate_as_tuple(cell_contents, epoch1904)

except xldate.XLDateTooLarge:
return cell_contents

if dt[0] < MINYEAR:
cell_contents = time(*dt[3:])
else:
cell_contents = datetime(*dt)
# Use the newer xlrd datetime handling.
try:
cell_contents = xldate.xldate_as_datetime(
cell_contents, epoch1904)
except OverflowError:
return cell_contents

# Excel doesn't distinguish between dates and time,
# so we treat dates on the epoch as times only.
# Also, Excel supports 1900 and 1904 epochs.
year = (cell_contents.timetuple())[0:3]
if ((not epoch1904 and year == (1899, 12, 31)) or
(epoch1904 and year == (1904, 1, 1))):
cell_contents = time(cell_contents.hour,
cell_contents.minute,
cell_contents.second,
cell_contents.microsecond)

elif cell_typ == XL_CELL_ERROR:
cell_contents = np.nan
Expand All @@ -571,12 +556,6 @@ def _parse_cell(cell_contents, cell_typ):
cell_contents = val
return cell_contents

# xlrd >= 0.9.3 can return datetime objects directly.
if LooseVersion(xlrd.__VERSION__) >= LooseVersion("0.9.3"):
xlrd_0_9_3 = True
else:
xlrd_0_9_3 = False

ret_dict = False

# Keep sheetname to maintain backwards compatibility.
Expand Down
40 changes: 12 additions & 28 deletions pandas/tests/io/test_excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
_mixed_frame['foo'] = 'bar'


@td.skip_if_no('xlrd', '0.9')
@td.skip_if_no('xlrd', '1.0.0')
class SharedItems(object):

@pytest.fixture(autouse=True)
Expand Down Expand Up @@ -796,35 +796,19 @@ def tdf(col_sheet_name):
tm.assert_frame_equal(dfs[s], dfs_returned[s])

def test_reader_seconds(self, ext):
import xlrd

# Test reading times with and without milliseconds. GH5945.
if LooseVersion(xlrd.__VERSION__) >= LooseVersion("0.9.3"):
# Xlrd >= 0.9.3 can handle Excel milliseconds.
expected = DataFrame.from_dict({"Time": [time(1, 2, 3),
time(2, 45, 56, 100000),
time(4, 29, 49, 200000),
time(6, 13, 42, 300000),
time(7, 57, 35, 400000),
time(9, 41, 28, 500000),
time(11, 25, 21, 600000),
time(13, 9, 14, 700000),
time(14, 53, 7, 800000),
time(16, 37, 0, 900000),
time(18, 20, 54)]})
else:
# Xlrd < 0.9.3 rounds Excel milliseconds.
expected = DataFrame.from_dict({"Time": [time(1, 2, 3),
time(2, 45, 56),
time(4, 29, 49),
time(6, 13, 42),
time(7, 57, 35),
time(9, 41, 29),
time(11, 25, 22),
time(13, 9, 15),
time(14, 53, 8),
time(16, 37, 1),
time(18, 20, 54)]})
expected = DataFrame.from_dict({"Time": [time(1, 2, 3),
time(2, 45, 56, 100000),
time(4, 29, 49, 200000),
time(6, 13, 42, 300000),
time(7, 57, 35, 400000),
time(9, 41, 28, 500000),
time(11, 25, 21, 600000),
time(13, 9, 14, 700000),
time(14, 53, 7, 800000),
time(16, 37, 0, 900000),
time(18, 20, 54)]})

actual = self.get_exceldf('times_1900', ext, 'Sheet1')
tm.assert_frame_equal(actual, expected)
Expand Down
0