10000 PERF: implement get_slice in cython by jbrockmendel · Pull Request #41045 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

PERF: implement get_slice in cython #41045

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Apr 19, 2021
Merged
Changes from 1 commit
Commits
8000 File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
mypy fixup
  • Loading branch information
jbrockmendel committed Apr 14, 2021
commit 671a0aacdcd54e99363da7807e620a713711463d
70 changes: 46 additions & 24 deletions pandas/io/parsers/c_parser_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,11 @@ def __init__(self, src: FilePathOrBuffer, **kwds):
else:
if len(self._reader.header) > 1:
# we have a multi index in the columns
# error: Cannot determine type of 'names'
# error: Cannot determine type of 'index_names'
# error: Cannot determine type of 'col_names'
(
self.names,
self.names, # type: ignore[has-type]
self.index_names,
self.col_names,
passed_names,
Expand All @@ -79,15 +80,21 @@ def __init__(self, src: FilePathOrBuffer, **kwds):
passed_names,
)
else:
self.names = list(self._reader.header[0])
# error: Cannot determine type of 'names'
self.names = list(self._reader.header[0]) # type: ignore[has-type]

if self.names is None:
# error: Cannot determine type of 'names'
if self.names is None: # type: ignore[has-type]
if self.prefix:
self.names = [
# error: Cannot determine type of 'names'
self.names = [ # type: ignore[has-type]
f"{self.prefix}{i}" for i in range(self._reader.table_width)
]
else:
self.names = list(range(self._reader.table_width))
# error: Cannot determine type of 'names'
self.names = list( # type: ignore[has-type]
range(self._reader.table_width)
)

# gh-9755
#
Expand All @@ -97,7 +104,8 @@ def __init__(self, src: FilePathOrBuffer, **kwds):
#
# once names has been filtered, we will
# then set orig_names again to names
self.orig_names = self.names[:]
# error: Cannot determine type of 'names'
self.orig_names = self.names[:] # type: ignore[has-type]

if self.usecols:
usecols = self._evaluate_usecols(self.usecols, self.orig_names)
Expand All @@ -110,20 +118,30 @@ def __init__(self, src: FilePathOrBuffer, **kwds):
):
self._validate_usecols_names(usecols, self.orig_names)

if len(self.names) > len(usecols):
self.names = [
# error: Cannot determine type of 'names'
if len(self.names) > len(usecols): # type: ignore[has-type]
# error: Cannot determine type of 'names'
self.names = [ # type: ignore[has-type]
n
for i, n in enumerate(self.names)
# error: Cannot determine type of 'names'
for i, n in enumerate(self.names) # type: ignore[has-type]
if (i in usecols or n in usecols)
]

if len(self.names) < len(usecols):
self._validate_usecols_names(usecols, self.names)
# error: Cannot determine type of 'names'
if len(self.names) < len(usecols): # type: ignore[has-type]
# error: Cannot determine type of 'names'
self._validate_usecols_names(
usecols,
self.names, # type: ignore[has-type]
)

self._validate_parse_dates_presence(self.names)
# error: Cannot determine type of 'names'
self._validate_parse_dates_presence(self.names) # type: ignore[has-type]
self._set_noconvert_columns()

self.orig_names = self.names
# error: Cannot determine type of 'names'
self.orig_names = self.names # type: ignore[has-type]

if not self._has_complex_date_col:
# error: Cannot determine type of 'index_col'
Expand All @@ -132,9 +150,15 @@ def __init__(self, src: FilePathOrBuffer, **kwds):
):

self._name_processed = True
# error: Cannot determine type of 'index_col'
(index_names, self.names, self.index_col) = self._clean_index_names(
self.names,
(
index_names,
# error: Cannot determine type of 'names'
self.names, # type: ignore[has-type]
self.index_col,
) = self._clean_index_names(
# error: Cannot determine type of 'names'
self.names, # type: ignore[has-type]
# error: Cannot determine type of 'index_col'
self.index_col, # type: ignore[has-type]
self.unnamed_cols,
)
Expand Down Expand Up @@ -165,17 +189,14 @@ def _set_noconvert_columns(self):
undergo such conversions.
"""
assert self.orig_names is not None
# error: Item "None" of "Optional[Any]" has no attribute "__iter__"
# (not iterable)
# error: Cannot determine type of 'names'
col_indices = [
self.orig_names.index(x) for x in self.names # type: ignore[union-attr]
self.orig_names.index(x) for x in self.names # type: ignore[has-type]
]
# error: Argument 2 to "_set_noconvert_dtype_columns" of "ParserBase"
# has incompatible type "Optional[Any]"; expected
# "List[Union[int, str, Tuple[Any, ...]]]"
# error: Cannot determine type of 'names'
noconvert_columns = self._set_noconvert_dtype_columns(
col_indices,
self.names, # type: ignore[arg-type]
self.names, # type: ignore[has-type]
)
for col in noconvert_columns:
self._reader.set_noconvert(col)
Expand Down Expand Up @@ -213,7 +234,8 @@ def read(self, nrows=None):
# Done with first read, next time raise StopIteration
self._first_chunk = False

names = self.names
# error: Cannot determine type of 'names'
names = self.names # type: ignore[has-type]

if self._reader.leading_cols:
if self._has_complex_date_col:
Expand Down
0