8000 ENH: Adding engine_kwargs to Excel engines for issue #40274 by rmhowe425 · Pull Request #52214 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

ENH: Adding engine_kwargs to Excel engines for issue #40274 #52214

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 39 commits into from
Apr 12, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
817199f
Fixing merge conflicts
rmhowe425 Mar 30, 2023
1333165
Fixing merge conflict
rmhowe425 Mar 30, 2023
1cc54cd
Merge branch 'main' into dev/read_excel
rmhowe425 Mar 30, 2023
8391425
Merge branch 'main' into dev/read_excel
rmhowe425 Mar 30, 2023
bec1da2
Merge branch 'main' into dev/read_excel
rmhowe425 Mar 30, 2023
c0988d6
Merge branch 'pandas-dev:main' into dev/read_excel
rmhowe425 Mar 31, 2023
2267d30
Fixing documentation issues
rmhowe425 Mar 31, 2023
db13a39
Merge branch 'main' into dev/read_excel
rmhowe425 Mar 31, 2023
229954e
Merge branch 'pandas-dev:main' into dev/read_excel
rmhowe425 Apr 1, 2023
14b4be0
standardized usage of engine_kwargs, fixed unit tests & doc strings
rmhowe425 Apr 1, 2023
057d5a2
Fixing documentation issues
rmhowe425 Apr 1, 2023
c05f182
Fixing implementation logic and unit tests
rmhowe425 Apr 1, 2023
9065261
Fixing implementation logic
rmhowe425 Apr 1, 2023
45589bb
Fixing formatting issues
rmhowe425 Apr 1, 2023
93c6e60
Fixing error for test Docstring validation, typing, and other manual …
rmhowe425 Apr 1, 2023
d60aa97
Fixing documentation error
rmhowe425 Apr 1, 2023
5431178
Standardizing engine_kwarg types
rmhowe425 Apr 2, 2023
f631de7
Fixing minor issues with unit tests and documentation
rmhowe425 Apr 2, 2023
1000a30
Fixing documentation issue
rmhowe425 Apr 2, 2023
86dbb35
Fixing a formatting / documentation error
rmhowe425 Apr 3, 2023
d2eae07
Merge branch 'main' into dev/read_excel
rmhowe425 Apr 4, 2023
da022c8
Fixing documentation errors
rmhowe425 Apr 5, 2023
8106cc6
Fixing documentation errors
rmhowe425 Apr 5, 2023
19a6d88
Fixing documentation errors
rmhowe425 Apr 5, 2023
c69ef91
Fixing documentation errors
rmhowe425 Apr 5, 2023
242765d
Fixing documentation errors
rmhowe425 Apr 5, 2023
c9aa28a
Adding an extra blank line to troubleshoot documentation error
rmhowe425 Apr 6, 2023
46be9ec
Adding an extra blank line to troubleshoot documentation error
rmhowe425 Apr 6, 2023
cef90f4
Merge branch 'main' into dev/read_excel
rmhowe425 Apr 10, 2023
f692c8e
Fixing documentation issues
rmhowe425 Apr 10, 2023
96c6fe0
Fixing formatting errors
rmhowe425 Apr 10, 2023
0391c9f
Fixing formatting errors
rmhowe425 Apr 10, 2023
f2c8e2a
Fixing formatting errors
rmhowe425 Apr 10, 2023
af55880
Merge branch 'main' into dev/read_excel
rmhowe425 Apr 11, 2023
679ab4b
Fixing logic and formatting issues in unit tests
rmhowe425 Apr 11, 2023
f9be828
Merge branch 'dev/read_excel' of github.com:rmhowe425/pandas into dev…
rmhowe425 Apr 11, 2023
3412af0
Fixing issues with merge conflict
rmhowe425 Apr 11, 2023
f379120
Fixing formatting issue
rmhowe425 Apr 11, 2023
8d7933c
Update pandas/io/excel/_base.py
mroeschke Apr 12, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fixing implementation logic and unit tests
  • Loading branch information
rmhowe425 committed Apr 1, 2023
commit c05f1824eda0db0738ca8b86f28f5acd5a6405b3
20 changes: 13 additions & 7 deletions pandas/io/excel/_base.py
10000
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@

.. versionadded:: 2.0

engine_kwargs : dict
engine_kwargs : dict, default None
Arbitrary keyword arguments passed to excel engine.

Returns
Expand Down Expand Up @@ -491,9 +491,9 @@ def read_excel(
should_close = True
io = ExcelFile(
io,
engine_kwargs,
storage_options=storage_options,
engine=engine,
engine_kwargs=engine_kwargs,
)
elif engine and engine != io.engine:
raise ValueError(
Expand Down Expand Up @@ -538,9 +538,12 @@ class BaseExcelReader(metaclass=abc.ABCMeta):
def __init__(
self,
filepath_or_buffer,
engine_kwargs,
storage_options: StorageOptions = None,
engine_kwargs: None = None,
) -> None:
if engine_kwargs is None:
engine_kwargs = {}

# First argument can also be bytes, so create a buffer
if isinstance(filepath_or_buffer, bytes):
filepath_or_buffer = BytesIO(filepath_or_buffer)
Expand Down Expand Up @@ -1435,8 +1438,6 @@ class ExcelFile:
A file-like object, xlrd workbook or openpyxl workbook.
If a string or path object, expected to be a path to a
.xls, .xlsx, .xlsb, .xlsm, .odf, .ods, or .odt file.
engine_kwargs: dict
Arbitrary keyword arguments passed to excel engine.
engine : str, default None
If io is not a buffer or path, this must be set to identify io.
Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb``
Expand Down Expand Up @@ -1471,6 +1472,8 @@ class ExcelFile:

Please do not report issues when using ``xlrd`` to read ``.xlsx`` files.
This is not supported, switch to using ``openpyxl`` instead.
engine_kwargs: dict, default None
Arbitrary keyword arguments passed to excel engine.
"""

from pandas.io.excel._odfreader import ODFReader
Expand All @@ -1488,10 +1491,13 @@ class ExcelFile:
def __init__(
self,
path_or_buffer,
engine_kwargs,
engine: str | None = None,
storage_options: StorageOptions = None,
engine_kwargs: None = None,
) -> None:
if engine_kwargs is None:
engine_kwargs = {}

if engine is not None and engine not in self._engines:
raise ValueError(f"Unknown engine: {engine}")

Expand Down Expand Up @@ -1537,8 +1543,8 @@ def __init__(

self._reader = self._engines[engine](
self._io,
engine_kwargs,
storage_options=storage_options,
engine_kwargs=engine_kwargs,
)

def __fspath__(self):
Expand Down
8 changes: 4 additions & 4 deletions pandas/io/excel/_odfreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ class ODFReader(BaseExcelReader):
def __init__(
self,
filepath_or_buffer: FilePath | ReadBuffer[bytes],
engine_kwargs,
storage_options: StorageOptions = None,
engine_kwargs: None = None,
) -> None:
"""
Read tables out of OpenDocument formatted files.
Expand All @@ -40,15 +40,15 @@ def __init__(
----------
filepath_or_buffer : str, path to be parsed or
an open readable stream.
engine_kwargs : dict
Arbitrary keyword arguments passed to excel engine.
{storage_options}
engine_kwargs : dict, default None
Arbitrary keyword arguments passed to excel engine.
"""
import_optional_dependency("odf")
super().__init__(
filepath_or_buffer,
engine_kwargs,
storage_options=storage_options,
engine_kwargs=engine_kwargs,
)

@property
Expand Down
8 changes: 4 additions & 4 deletions pandas/io/excel/_openpyxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,8 +535,8 @@ class OpenpyxlReader(BaseExcelReader):
def __init__(
self,
filepath_or_buffer: FilePath | ReadBuffer[bytes],
engine_kwargs,
storage_options: StorageOptions = None,
engine_kwargs: None = None,
) -> None:
"""
Reader using openpyxl engine.
Expand All @@ -545,15 +545,15 @@ def __init__(
----------
filepath_or_buffer : str, path object or Workbook
Object to be parsed.
engine_kwargs : dict
Arbitrary keyword arguments passed to excel engine.
{storage_options}
engine_kwargs : dict, default None
Arbitrary keyword arguments passed to excel engine.
"""
import_optional_dependency("openpyxl")
super().__init__(
filepath_or_buffer,
engine_kwargs,
storage_options=storage_options,
engine_kwargs=engine_kwargs,
)

@property
Expand Down
8 changes: 4 additions & 4 deletions pandas/io/excel/_pyxlsb.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ class PyxlsbReader(BaseExcelReader):
def __init__(
self,
filepath_or_buffer: FilePath | ReadBuffer[bytes],
engine_kwargs,
storage_options: StorageOptions = None,
engine_kwargs: None = None,
) -> None:
"""
Reader using pyxlsb engine.
Expand All @@ -34,17 +34,17 @@ def __init__(
----------
filepath_or_buffer : str, path object, or Workbook
Object to be parsed.
engine_kwargs : dict
Arbitrary keyword arguments passed to excel engine.
{storage_options}
engine_kwargs : dict, default None
Arbitrary keyword arguments passed to excel engine.
"""
import_optional_dependency("pyxlsb")
# This will call load_workbook on the filepath or buffer
# And set the result to the book-attribute
super().__init__(
filepath_or_buffer,
engine_kwargs,
storage_options=storage_options,
engine_kwargs=engine_kwargs,
)

@property
Expand Down
8 changes: 4 additions & 4 deletions pandas/io/excel/_xlrd.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ class XlrdReader(BaseExcelReader):
def __init__(
self,
filepath_or_buffer,
engine_kwargs,
storage_options: StorageOptions = None,
engine_kwargs: None = None,
) -> None:
"""
Reader using xlrd engine.
Expand All @@ -34,16 +34,16 @@ def __init__(
----------
filepath_or_buffer : str, path object or Workbook
Object to be parsed.
engine_kwargs : dict
Arbitrary keyword arguments passed to excel engine.
{storage_options}
engine_kwargs : dict, default None
Arbitrary keyword arguments passed to excel engine.
"""
err_msg = "Install xlrd >= 2.0.1 for xls Excel support"
import_optional_dependency("xlrd", extra=err_msg)
super().__init__(
filepath_or_buffer,
engine_kwargs,
storage_options=storage_options,
engine_kwargs=engine_kwargs,
)

@property
Expand Down
0