8000 Fix typing for extension arrays and extension dtypes without isin and astype by Dr-Irv · Pull Request #40421 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

Fix typing for extension arrays and extension dtypes without isin and astype #40421

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 43 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
f2c52a4
small typing fixes
Dr-Irv Jan 23, 2021
d7ff8d3
fix ExtensionArray and EXtensionDtype
Dr-Irv Jan 23, 2021
49fa06e
merge with master
Dr-Irv Jan 31, 2021
03b2c4a
fixes for delete, isin, unique
Dr-Irv Jan 31, 2021
3e19958
fix import of Literal
Dr-Irv Jan 31, 2021
6861901
remove quotes on ExtensionDType.construct_from_string
Dr-Irv Jan 31, 2021
9be6486
move numpy workaround to _typing.py
Dr-Irv Feb 1, 2021
260b367
remove numpy dummy
Dr-Irv Feb 2, 2021
6276725
remove extra line in _typing
Dr-Irv Feb 2, 2021
4dafaca
Merge remote-tracking branch 'upstream/master' into extensiontyping
Dr-Irv Feb 3, 2021
8b2cee2
import Literal
Dr-Irv Feb 3, 2021
3a7d839
Merge remote-tracking branch 'upstream/master' into extensiontyping
Dr-Irv Feb 14, 2021
a21bb60
merge with master
Dr-Irv Mar 8, 2021
8cd6b76
isort preco 8000 mmit fix
Dr-Irv Mar 8, 2021
e0e0131
fix interval.repeat() typing
Dr-Irv Mar 8, 2021
6a6a21f
overload for __getitem__ and use pattern with ExtensionArrayT as self…
Dr-Irv Mar 9, 2021
bf753e6
lose less ExtensionArrayT. Make registry private. consolidate overload
Dr-Irv Mar 10, 2021
c9795a5
remove ExtensionArray typing of self
Dr-Irv Mar 10, 2021
d452842
Merge remote-tracking branch 'upstream/master' into extensiontyping
Dr-Irv Mar 10, 2021
3c2c78b
merge with upstream/master
Dr-Irv Mar 12, 2021
548c198
make extension arrays work with new typing, fixing astype and to_numpy
Dr-Irv Mar 12, 2021
db8ed9b
fix Literal import
Dr-Irv Mar 12, 2021
f8191f8
fix logic in ensure_int_or_float
Dr-Irv Mar 12, 2021
575645f
fix conflict with master
Dr-Irv Mar 12, 2021
6f8fcb5
fix typing in groupby to_numpy call
Dr-Irv Mar 12, 2021
3ea2420
fix groupby again. Allow kwargs for extension to_numpy
Dr-Irv Mar 13, 2021
c83a628
Merge remote-tracking branch 'upstream/master' into extensiontyping
simonjayhawkins Mar 13, 2021
5bb24d4
fixes for merge with master
Dr-Irv Mar 13, 2021
ad1ab3b
remove astype and isin changes
Dr-Irv Mar 13, 2021
63c3d6d
add comment to cast in managers. change return type of astype
Dr-Irv Mar 13, 2021
1882074
add 0 as argument for repeat`
Dr-Irv Mar 13, 2021
1274a76
remove kwargs from to_numpy
Dr-Irv Mar 13, 2021
363e203
remove more kwargs from to_numpy calls
Dr-Irv Mar 13, 2021
01c942c
Merge remote-tracking branch 'upstream/master' into limitextensiontyping
Dr-Irv Mar 13, 2021
1196132
don't cast in astype. TODO for overload of astype
Dr-Irv Mar 13, 2021
66d5da4
remove private registry, getitem overloads, typevar on DateTimeScalar
Dr-Irv Mar 13, 2021
5411998
Remove List[Any] from getitem
Dr-Irv Mar 13, 2021
9b7481d
remove spacing change in _mixins.py and __getitem__
Dr-Irv Mar 13, 2021
4bd3422
remove cast in io/formats. Change isinstance check in pandas.core.ba…
Dr-Irv Mar 14, 2021
3b1ff79
merge with master to resolve conflicts
Dr-Irv Mar 14, 2021
5719daa
merge with master, remove more ignores
Dr-Irv Apr 3, 2021
dbfb3a2
remove mypy comments from format
Dr-Irv Apr 3, 2021
d41bf91
resolve conflicts with master
Dr-Irv Apr 27, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
remove astype and isin changes
  • Loading branch information
Dr-Irv committed Mar 13, 2021
commit ad1ab3b8c381d6f3c36063f6c5cf1d6116730f77
10 changes: 3 additions & 7 deletions pandas/_testing/asserters.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,13 +382,9 @@ def _get_ilevel_values(index, level):
# skip exact index checking when `check_categorical` is False
if check_exact and check_categorical:
if not left.equals(right):
# error: Value of type variable "_Number" of "sum" cannot be
# "Union[ExtensionArray, ndarray, Any]"
thesum = np.sum(
(left._values != right._values).astype(int)
) # type: ignore[type-var]
# error: Unsupported operand types for * ("ExtensionArray" and "float")
diff = thesum * 100.0 / len(left) # type: ignore[operator]
diff = (
np.sum((left._values != right._values).astype(int)) * 100.0 / len(left)
)
msg = f"{obj} values are different ({np.round(diff, 5)} %)"
raise_assert_detail(obj, msg, left, right)
else:
Expand Down
8 changes: 1 addition & 7 deletions pandas/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,7 @@

ArrayLike = Union["ExtensionArray", np.ndarray]
AnyArrayLike = Union[ArrayLike, "Index", "Series"]
AnySequenceLike = Union[
"ExtensionArray",
"Index",
"Series",
Sequence[Any],
np.ndarray,
]

# scalars

PythonScalar = Union[str, int, float, bool]
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
)
from pandas._typing import (
AnyArrayLike,
AnySequenceLike,
ArrayLike,
DtypeObj,
FrameOrSeriesUnion,
Expand Down Expand Up @@ -447,7 +446,7 @@ def unique(values):
unique1d = unique


def isin(comps: AnySequenceLike, values: AnySequenceLike) -> np.ndarray:
def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
"""
Compute the isin boolean array.

Expand Down
14 changes: 4 additions & 10 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,15 +525,7 @@ def nbytes(self) -> int:
# ------------------------------------------------------------------------
# Additional Methods
# ------------------------------------------------------------------------
@overload
def astype(self, dtype: Type[str], copy: bool = True) -> np.ndarray:
...

@overload
def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
...

def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
def astype(self, dtype: Dtype, copy: bool = True):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when adding types, we should ensure that the types match the docstrings. From the PR title and previous discussion, IIUC astype was being done separately.

I would revert this for now until this method is sorted properly.

see also #41018 (comment) and response.

The docstring states we return np.ndarray and the one-liner suggests that too. We sometimes also return an ExtensionArray, this is dependent on the type of dtype.

my concern is that if we add the type now, this may get forgotten.

"""
Cast to a NumPy array with 'dtype'.

Expand Down Expand Up @@ -933,7 +925,9 @@ def isin(self, values: Sequence[Any]) -> np.ndarray:
-------
np.ndarray[bool]
"""
return isin(self.astype(object), values)
# error: Argument 2 to "isin" has incompatible type "Sequence[Any]"; expected
# "Union[Union[ExtensionArray, ndarray], Index, Series]"
return isin(self.astype(object), values) # type: ignore[arg-type]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you avoid adding this ignore. i.e. make the types of EA.isin and algos.isin consistent.


def _values_for_factorize(self) -> Tuple[np.ndarray, Any]:
"""
Expand Down
9 changes: 0 additions & 9 deletions pandas/core/arrays/boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
Tuple,
Type,
Union,
overload,
)
import warnings

Expand Down Expand Up @@ -381,14 +380,6 @@ def reconstruct(x):
def _coerce_to_array(self, value) -> Tuple[np.ndarray, np.ndarray]:
return coerce_to_array(value)

@overload
def astype(self, dtype: Type[str], copy: bool = True) -> np.ndarray:
...

@overload
def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
...

def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

astype changes are going to be a separate PR. revert for now

"""
Cast to a NumPy array or ExtensionArray with 'dtype'.
Expand Down
15 changes: 5 additions & 10 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
TypeVar,
Union,
cast,
overload,
)
from warnings import warn

Expand Down Expand Up @@ -483,14 +482,6 @@ def _constructor(self) -> Type[Categorical]:
def _from_sequence(cls, scalars, *, dtype: Optional[Dtype] = None, copy=False):
return Categorical(scalars, dtype=dtype, copy=copy)

@overload
def astype(self, dtype: Type[str], copy: bool = True) -> np.ndarray:
...

@overload
def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
...

def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
"""
Coerce this type to another dtype
Expand Down Expand Up @@ -2453,7 +2444,11 @@ def _str_get_dummies(self, sep="|"):
# sep may not be in categories. Just bail on this.
from pandas.core.arrays import PandasArray

return PandasArray(self.astype(str))._str_get_dummies(sep)
# error: Argument 1 to "PandasArray" has incompatible type
# "ExtensionArray"; expected "Union[ndarray, PandasArray]"
return PandasArray(self.astype(str))._str_get_dummies( # type: ignore[arg-type]
sep
)


# The Series.cat accessor
Expand Down
9 changes: 0 additions & 9 deletions pandas/core/arrays/floating.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
Optional,
Tuple,
Type,
overload,
)
import warnings

Expand Down Expand Up @@ -279,14 +278,6 @@ def _from_sequence_of_strings(
def _coerce_to_array(self, value) -> Tuple[np.ndarray, np.ndarray]:
return coerce_to_array(value, dtype=self.dtype)

@overload
def astype(self, dtype: Type[str], copy: bool = True) -> np.ndarray:
...

@overload
def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
...

def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same

"""
Cast to a NumPy array or ExtensionArray with 'dtype'.
Expand Down
9 changes: 0 additions & 9 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
Optional,
Tuple,
Type,
overload,
)
import warnings

Expand Down Expand Up @@ -343,14 +342,6 @@ def _from_sequence_of_strings(
def _coerce_to_array(self, value) -> Tuple[np.ndarray, np.ndarray]:
return coerce_to_array(value, dtype=self.dtype)

@overload
def astype(self, dtype: Type[str], copy: bool = True) -> np.ndarray:
...

@overload
def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
...

def astype(self, dtype, copy: bool = True) -> ArrayLike:
"""
Cast to a NumPy array or ExtensionArray with 'dtype'.
Expand Down
9 changes: 0 additions & 9 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
Type,
TypeVar,
Union,
overload,
)

import numpy as np
Expand Down Expand Up @@ -305,14 +304,6 @@ def to_numpy(
data = self._data.astype(dtype, copy=copy)
return data

@overload
def astype(self, dtype: Type[str], copy: bool = True) -> np.ndarray:
...

@overload
def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
...

def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
dtype = pandas_dtype(dtype)

Expand Down
33 changes: 16 additions & 17 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,9 @@
"""

from typing import (
TYPE_CHECKING,
Any,
Callable,
Union,
cast,
)
import warnings

Expand Down Expand Up @@ -60,9 +58,6 @@
is_sequence,
)

if TYPE_CHECKING:
from pandas.core.arrays.base import ExtensionArray

POSSIBLY_CAST_DTYPES = {
np.dtype(t).name
for t in [
Expand Down Expand Up @@ -160,18 +155,22 @@ def ensure_int_or_float(arr: ArrayLike, copy: bool = False) -> ArrayLike:
will remain unchanged.
"""
# TODO: GH27506 potential bug with ExtensionArrays
if is_extension_array_dtype(arr.dtype):
return cast("ExtensionArray", arr).to_numpy(dtype="float64", na_value=np.nan)
else:
assert isinstance(arr, np.ndarray) # For typing
try:
return arr.astype("int64", copy=copy, casting="safe")
except TypeError:
pass
try:
return arr.astype("uint64", copy=copy, casting="safe")
except TypeError:
return arr.astype("float64", copy=copy)
try:
# error: Unexpected keyword argument "casting" for "astype"
return arr.astype("int64", copy=copy, casting="safe") # type: ignore[call-arg]
except TypeError:
pass
try:
# error: Unexpected keyword argument "casting" for "astype"
return arr.astype("uint64", copy=copy, casting="safe") # type: ignore[call-arg]
except TypeError:
if is_extension_array_dtype(arr.dtype):
# pandas/core/dtypes/common.py:168: error: Item "ndarray" of
# "Union[ExtensionArray, ndarray]" has no attribute "to_numpy" [union-attr]
return arr.to_numpy( # type: ignore[union-attr]
dtype="float64", na_value=np.nan
)
return arr.astype("float64", copy=copy)


def ensure_python_int(value: Union[int, np.integer]) -> int:
Expand Down
4 changes: 1 addition & 3 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,9 +239,7 @@ def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray:
this is often overridden to handle to_dense like operations
"""
if dtype == _dtype_obj:
# error: Incompatible return value type (got "Union[ndarray,
# ExtensionArray]", expected "ndarray")
return self.values.astype(_dtype_obj) # type: ignore[return-value]
return self.values.astype(_dtype_obj)
# error: Incompatible return value type (got "Union[ndarray, ExtensionArray]",
# expected "ndarray")
return self.values # type: ignore[return-value]
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1075,7 +1075,9 @@ def calc_with_mask(carg, mask):

# string with NaN-like
try:
mask = ~algorithms.isin(arg, list(nat_strings))
# error: Argument 2 to "isin" has incompatible type "List[Any]"; expected
# "Union[Union[ExtensionArray, ndarray], Index, Series]"
mask = ~algorithms.isin(arg, list(nat_strings)) # type: ignore[arg-type]
return calc_with_mask(arg, mask)
except (ValueError, OverflowError, TypeError):
pass
Expand Down
B948
10 changes: 8 additions & 2 deletions pandas/io/parsers/base_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,11 @@ def _convert_to_ndarrays(
try:
values = lib.map_infer(values, conv_f)
except ValueError:
mask = algorithms.isin(values, list(na_values)).view(np.uint8)
# error: Argument 2 to "isin" has incompatible type "List[Any]";
# expected "Union[Union[ExtensionArray, ndarray], Index, Series]"
mask = algorithms.isin(
values, list(na_values) # type: ignore[arg-type]
).view(np.uint8)
values = lib.map_infer_mask(values, conv_f, mask)

cvals, na_count = self._infer_types(
Expand Down Expand Up @@ -657,7 +661,9 @@ def _infer_types(self, values, na_values, try_num_bool=True):
"""
na_count = 0
if issubclass(values.dtype.type, (np.number, np.bool_)):
mask = algorithms.isin(values, list(na_values))
# error: Argument 2 to "isin" has incompatible type "List[Any]"; expected
# "Union[Union[ExtensionArray, ndarray], Index, Series]"
mask = algorithms.isin(values, list(na_values)) # type: ignore[arg-type]
# error: Incompatible types in assignment (expression has type
# "number[Any]", variable has type "int")
na_count = mask.sum() # type: ignore[assignment]
Expand Down
0