8000 Ods loses spaces 32207 by detrout · Pull Request #33233 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

Ods loses spaces 32207 #33233

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Apr 6, 2020
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
More correctly parse OpenDocument string cells
  • Loading branch information
detrout committed Apr 3, 2020
commit 8f5f2e8e23a3abd0111393a40a55558d1cff8a16
23 changes: 22 additions & 1 deletion pandas/io/excel/_odfreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def _get_cell_value(self, cell, convert_float: bool) -> Scalar:
cell_value = cell.attributes.get((OFFICENS, "value"))
return float(cell_value)
elif cell_type == "string":
return str(cell)
return self._get_cell_string_value(cell)
elif cell_type == "currency":
cell_value = cell.attributes.get((OFFICENS, "value"))
return float(cell_value)
Expand All @@ -182,3 +182,24 @@ def _get_cell_value(self, cell, convert_float: bool) -> Scalar:
return pd.to_datetime(str(cell)).time()
else:
raise ValueError(f"Unrecognized type {cell_type}")

def _get_cell_string_value(self, cell):
from odf.element import Text, Element
from odf.text import S, P
from odf.namespaces import TEXTNS

text_p = P().qname
text_s = S().qname

p = cell.childNodes[0]

value = []
if p.qname == text_p:
for k, fragment in enumerate(p.childNodes):
if isinstance(fragment, Text):
value.append(fragment.data)
elif isinstance(fragment, Element):
if fragment.qname == text_s:
spaces = int(fragment.attributes.get((TEXTNS, 'c'), 1))
value.append(' ' * spaces)
return ''.join(value)
0