-
-
Notifications
You must be signed in to change notification settings - Fork 18.7k
Fix typing for extension arrays and extension dtypes without isin and astype #40421
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
f2c52a4
d7ff8d3
49fa06e
03b2c4a
3e19958
6861901
9be6486
260b367
6276725
4dafaca
8b2cee2
3a7d839
a21bb60
8cd6b76
e0e0131
6a6a21f
bf753e6
c9795a5
d452842
3c2c78b
548c198
db8ed9b
f8191f8
575645f
6f8fcb5
3ea2420
c83a628
5bb24d4
ad1ab3b
63c3d6d
1882074
1274a76
363e203
01c942c
1196132
66d5da4
5411998
9b7481d
4bd3422
3b1ff79
5719daa
dbfb3a2
d41bf91
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -191,7 +191,7 @@ def extract_bool_array(mask: ArrayLike) -> np.ndarray: | |
# We could have BooleanArray, Sparse[bool], ... | ||
# Except for BooleanArray, this is equivalent to just | ||
# np.asarray(mask, dtype=bool) | ||
mask = mask.to_numpy(dtype=bool, na_value=False) | ||
mask = mask.to_numpy(dtype=np.dtype(bool), na_value=False) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This shouldn't need to be changed. i've opened #41185 as a precursor to fix these false positives. |
||
|
||
mask = np.asarray(mask, dtype=bool) | ||
return mask | ||
EF5E
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,7 @@ | |
Callable, | ||
Dict, | ||
Iterator, | ||
List, | ||
Literal, | ||
Optional, | ||
Sequence, | ||
|
@@ -32,6 +33,7 @@ | |
from pandas._typing import ( | ||
ArrayLike, | ||
Dtype, | ||
NpDtype, | ||
Shape, | ||
) | ||
from pandas.compat import set_function_name | ||
|
@@ -301,11 +303,11 @@ def __getitem__(self, item: int) -> Any: | |
... | ||
|
||
@overload | ||
def __getitem__(self, item: Union[slice, np.ndarray]) -> ExtensionArray: | ||
def __getitem__(self, item: Union[slice, np.ndarray, List[Any]]) -> ExtensionArray: | ||
Dr-Irv marked this conversation as resolved.
Show resolved
Hide resolved
|
||
... | ||
|
||
def __getitem__( | ||
self, item: Union[int, slice, np.ndarray] | ||
self, item: Union[int, slice, np.ndarray, List[Any]] | ||
) -> Union[ExtensionArray, Any]: | ||
""" | ||
Select a subset of self. | ||
|
@@ -441,9 +443,10 @@ def __ne__(self, other: Any) -> ArrayLike: # type: ignore[override] | |
|
||
def to_numpy( | ||
self, | ||
dtype: Optional[Dtype] = None, | ||
dtype: Optional[NpDtype] = None, | ||
Dr-Irv marked this conversation as resolved.
Show resolved
Hide resolved
|
||
copy: bool = False, | ||
na_value: Optional[Any] = lib.no_default, | ||
**kwargs: Any, | ||
Dr-Irv marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) -> np.ndarray: | ||
""" | ||
Convert to a NumPy ndarray. | ||
|
@@ -470,12 +473,7 @@ def to_numpy( | |
------- | ||
numpy.ndarray | ||
""" | ||
# error: Argument "dtype" to "asarray" has incompatible type | ||
# "Union[ExtensionDtype, str, dtype[Any], Type[str], Type[float], Type[int], | ||
# Type[complex], Type[bool], Type[object], None]"; expected "Union[dtype[Any], | ||
# None, type, _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, Union[int, | ||
# Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]" | ||
result = np.asarray(self, dtype=dtype) # type: ignore[arg-type] | ||
result = np.asarray(self, dtype=dtype) | ||
if copy or na_value is not lib.no_default: | ||
result = result.copy() | ||
if na_value is not lib.no_default: | ||
|
@@ -527,8 +525,15 @@ def nbytes(self) -> int: | |
# ------------------------------------------------------------------------ | ||
# Additional Methods | ||
# ------------------------------------------------------------------------ | ||
@overload | ||
def astype(self, dtype: Type[str], copy: bool = True) -> np.ndarray: | ||
... | ||
|
||
@overload | ||
def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: | ||
... | ||
|
||
def astype(self, dtype: Dtype, copy: bool = True) -> np.ndarray: | ||
def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: | ||
""" | ||
Cast to a NumPy array with 'dtype'. | ||
|
||
|
@@ -562,7 +567,7 @@ def astype(self, dtype: Dtype, copy: bool = True) -> np.ndarray: | |
): # allow conversion to StringArrays | ||
return dtype.construct_array_type()._from_sequence(self, copy=False) | ||
|
||
return np.array(self, dtype=dtype, copy=copy) | ||
return np.array(self, dtype=cast(NpDtype, dtype), copy=copy) | ||
Dr-Irv marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def isna(self) -> Union[np.ndarray, ExtensionArraySupportsAnyAll]: | ||
""" | ||
|
@@ -829,9 +834,9 @@ def unique(self) -> ExtensionArray: | |
|
||
def searchsorted( | ||
self, | ||
value: ArrayLike, | ||
value: Sequence[Any], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sequence is not yet compatible with EA (and others). see #28770 |
||
side: Literal["left", "right"] = "left", | ||
sorter: Optional[ArrayLike] = None, | ||
sorter: Optional[Sequence[Any]] = None, | ||
) -> np.ndarray: | ||
""" | ||
Find indices where elements should be inserted to maintain order. | ||
|
@@ -877,7 +882,7 @@ def searchsorted( | |
# 1. Values outside the range of the `data_for_sorting` fixture | ||
# 2. Values between the values in the `data_for_sorting` fixture | ||
# 3. Missing values. | ||
arr = self.astype(object) | ||
arr = cast(np.ndarray, self.astype(object)) | ||
Dr-Irv marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return arr.searchsorted(value, side=side, sorter=sorter) | ||
|
||
def equals(self, other: object) -> bool: | ||
|
@@ -914,7 +919,7 @@ def equals(self, other: object) -> bool: | |
equal_na = self.isna() & other.isna() # type: ignore[operator] | ||
return bool((equal_values | equal_na).all()) | ||
|
||
def isin(self, values: Union[ExtensionArray, Sequence[Any]]) -> np.ndarray: | ||
def isin(self, values: Sequence[Any]) -> np.ndarray: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same comment as above. also need to accept Arraylike. |
||
""" | ||
Pointwise comparison for set containment in the given values. | ||
|
||
|
@@ -928,7 +933,7 @@ def isin(self, values: Union[ExtensionArray, Sequence[Any]]) -> np.ndarray: | |
------- | ||
np.ndarray[bool] | ||
""" | ||
return isin(np.asarray(self), values) | ||
return isin(self.astype(object), values) | ||
|
||
def _values_for_factorize(self) -> Tuple[np.ndarray, Any]: | ||
""" | ||
|
@@ -952,7 +957,7 @@ def _values_for_factorize(self) -> Tuple[np.ndarray, Any]: | |
The values returned by this method are also used in | ||
:func:`pandas.util.hash_pandas_object`. | ||
""" | ||
return self.astype(object), np.nan | ||
return cast(np.ndarray, self.astype(object)), np.nan | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe we should type astype as a precursor to avoid this and other cast. It's also a blocker for #41018 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So are you suggesting that I do a PR for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not necessarily. This is a big PR. break it down to small chunks and we can maybe do some in parallel. not adverse to a PR per method. much more likely to get input/review from others and a discussion going with smaller PRs. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. PRs without casts or ignores should sail through.. well maybe. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll put this PR in draft status. Too many issues with respect to calling numpy things that you are attempting to deal with elsewhere. I'll try some smaller ones. |
||
|
||
def factorize(self, na_sentinel: int = -1) -> Tuple[np.ndarray, ExtensionArray]: | ||
""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
Tuple, | ||
Type, | ||
Union, | ||
overload, | ||
) | ||
import warnings | ||
|
||
|
@@ -20,6 +21,7 @@ | |
from pandas._typing import ( | ||
ArrayLike, | ||
Dtype, | ||
DtypeArg, | ||
) | ||
from pandas.compat.numpy import function as nv | ||
|
||
|
@@ -296,7 +298,7 @@ def dtype(self) -> BooleanDtype: | |
|
||
@classmethod | ||
def _from_sequence( | ||
cls, scalars, *, dtype: Optional[Dtype] = None, copy: bool = False | ||
cls, scalars, *, dtype: Optional[DtypeArg] = None, copy: bool = False | ||
) -> BooleanArray: | ||
if dtype: | ||
assert dtype == "boolean" | ||
|
@@ -379,7 +381,15 @@ def reconstruct(x): | |
def _coerce_to_array(self, value) -> Tuple[np.ndarray, np.ndarray]: | ||
return coerce_to_array(value) | ||
|
||
def astype(self, dtype, copy: bool = True) -> ArrayLike: | ||
@overload | ||
def astype(self, dtype: Type[str], copy: bool = True) -> np.ndarray: | ||
... | ||
|
||
@overload | ||
def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: | ||
... | ||
|
||
def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. astype changes are going to be a separate PR. revert for now |
||
""" | ||
Cast to a NumPy array or ExtensionArray with 'dtype'. | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
just make this an
isinstance(comps, ExtensionArray):
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can't do that. Creates a circular import.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
could do
if not isinstance(comps, np.ndarray)
. after theextract_array
call comps should be ArrayLike.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
True. But I think I will do that in another PR related to fixing args for
isin