8000 BUG: read_excel with openpyxl and missing dimension by rhshadrach · Pull Request #39486 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

BUG: read_excel with openpyxl and missing dimension #39486

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Feb 5, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Return "" for null date columns, trim empty trailing rows
  • Loading branch information
rhshadrach committed Jan 31, 2021
commit d6c3af1ea172f723d72d9eedc6c264ab1f704c08
19 changes: 13 additions & 6 deletions pandas/io/excel/_openpyxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,14 +503,14 @@ def _convert_cell(self, cell, convert_float: bool) -> Scalar:

from openpyxl.cell.cell import TYPE_BOOL, TYPE_ERROR, TYPE_NUMERIC

if cell.is_date:
if cell.value is None:
return "" # compat with xlrd
elif cell.is_date:
return cell.value
elif cell.data_type == TYPE_ERROR:
return np.nan
elif cell.data_type == TYPE_BOOL:
return bool(cell.value)
elif cell.value is None:
return "" # compat with xlrd
elif cell.data_type == TYPE_NUMERIC:
# GH5394
if convert_float:
Expand All @@ -529,11 +529,18 @@ def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]:
sheet.reset_dimensions()

data: List[List[Scalar]] = []
for row in sheet.rows:
data.append([self._convert_cell(cell, convert_float) for cell in row])
last_row_with_data = -1
for row_number, row in enumerate(sheet.rows):
converted_row = [self._convert_cell(cell, convert_float) for cell in row]
if any(cell != "" for cell in converted_row):
last_row_with_data = row_number
data.append(converted_row)

# With dimension reset, openpyxl no longer pads rows
if len(data) > 0:
# Trim trailing rows that have no data
data = data[: last_row_with_data + 1]

# With dimension reset, openpyxl no longer pads rows
max_width = max(len(data_row) for data_row in data)
if min(len(data_row) for data_row in data) < max_width:
data = [
Expand Down
Binary file modified pandas/tests/io/data/excel/dimension_large.xlsx
Binary file not shown.
2 changes: 1 addition & 1 deletion pandas/tests/io/excel/test_openpyxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def test_to_excel_with_openpyxl_engine(ext):
@pytest.mark.parametrize(
"filename", ["dimension_missing", "dimension_small", "dimension_large"]
)
def test_read_with_missing_dimension(datapath, ext, header, expected_data, filename):
def test_read_with_bad_dimension(datapath, ext, header, expected_data, filename):
# GH 38956, 39001, 39181 - no/incorrect dimension information
path = datapath("io", "data", "excel", f"{filename}{ext}")
result = pd.read_excel(path, header=header)
Expand Down
0