-
-
Notifications
You must be signed in to change notification settings - Fork 18.7k
EA: BoolArray #25415
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
EA: BoolArray #25415
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
_MaskArrayType = None | ||
|
||
|
||
def get_mask_array_type(): | ||
"""Set the mask array type to use, we need to do | ||
this after all modules are imported as the implementations | ||
e.g. pyarrow depend on pandas being importable | ||
""" | ||
global _MaskArrayType | ||
|
||
if _MaskArrayType is not None: | ||
return _MaskArrayType | ||
|
||
# if ArrowBoolArray is available use it | ||
# otherwise use the NumpyMask | ||
try: | ||
from pandas.core.arrays.mask._pyarrow import ArrowMaskArray | ||
|
||
MaskArray = ArrowMaskArray | ||
|
||
except ImportError: | ||
from pandas.core.arrays.mask._numpy import NumpyMaskArray | ||
|
||
MaskArray = NumpyMaskArray | ||
|
||
_MaskArrayType = MaskArray | ||
return _MaskArrayType | ||
|
||
|
||
__all__ = ['get_mask_array_type'] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
"""A boolean mask interface. | ||
|
||
This module provides an interface to a numpy / pyarrow boolean mask. | ||
This is limited as not all of the implementations can hold NA, so | ||
for consistency this is an internal. | ||
""" | ||
|
||
import copy | ||
|
||
import numpy as np | ||
|
||
from pandas.api.extensions import ExtensionDtype | ||
from pandas.api.types import is_scalar | ||
from pandas.core.arrays.base import ExtensionArray | ||
from pandas.core.missing import isna | ||
|
||
|
||
class MaskDtype(ExtensionDtype): | ||
|
||
type = np.bool_ | ||
kind = 'b' | ||
name = 'bool' | ||
|
||
@classmethod | ||
def construct_from_string(cls, string): | ||
if string == cls.name: | ||
return cls() | ||
else: | ||
raise TypeError("Cannot construct a '{}' from " | ||
"'{}'".format(cls, string)) | ||
|
||
def _is_boolean(self): | ||
return True | ||
|
||
def __hash__(self): | ||
return hash(str(self)) | ||
|
||
def __eq__(self, other): | ||
# compare == to np.dtype('bool') | ||
if isinstance(other, str): | ||
return other == self.name | ||
elif isinstance(other, type(self)): | ||
return True | ||
elif isinstance(other, np.dtype): | ||
return other == 'bool' | ||
else: | ||
return hash(self) == hash(other) | ||
|
||
|
||
class MaskArray(ExtensionArray): | ||
"""Common baseclass for both pyarrow and numpy masked arrays""" | ||
_typ = "maskarray" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will this make |
||
|
||
@classmethod | ||
def _from_sequence(cls, scalars, dtype=None, copy=False): | ||
return cls.from_scalars(scalars) | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
@property | ||
def size(self): | ||
return len(self) | ||
|
||
def __eq__(self, other): | ||
return np.array(self, copy=False) == np.array(other, copy=False) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. May be good to keep a list of things requiring a cast to NumPy at the top of this file. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And perhaps make JIRAs for them as well. This, |
||
|
||
def __len__(self): | ||
return len(self._data) | ||
|
||
def isna(self): | ||
nas = isna(np.array(self._data, copy=False)) | ||
return type(self).from_scalars(nas) | ||
|
||
def __invert__(self): | ||
return type(self).from_scalars( | ||
~np.array(self._data, copy=False) | ||
) | ||
|
||
def __or__(self, other): | ||
return type(self).from_scalars(np.array( | ||
self, copy=False).__or__(np.array(oth C695 er, copy=False))) | ||
|
||
def __ior__(self, other): | ||
return type(self).from_scalars( | ||
np.array(self, copy=False) | np.array(other, copy=False)) | ||
|
||
def __and__(self, other): | ||
return type(self).from_scalars( | ||
np.array(self, copy=False).__and__(np.array(other, copy=False))) | ||
|
||
def __iand__(self, other): | ||
return type(self).from_scalars( | ||
np.array(self, copy=False) & (np.array(other, copy=False))) | ||
|
||
def __getitem__(self, item): | ||
arr = np.array(self, copy=False) | ||
if is_scalar(item): | ||
return arr[item] | ||
else: | ||
arr = arr[item] | ||
return type(self).from_scalars(arr) | ||
|
||
def view(self, dtype=None): | ||
arr = np.array(self._data, copy=False) | ||
if dtype is not None: | ||
arr = arr.view(dtype=dtype) | ||
return arr | ||
|
||
def sum(self, axis=None, min_count=None): | ||
return np.array(self, copy=False).sum() | ||
|
||
def copy(self, deep=False): | ||
if deep: | ||
return type(self)(copy.deepcopy(self._data)) | ||
else: | ||
return type(self)(copy.copy(self._data)) | ||
|
||
def any(self, axis=0, out=None): | ||
return np.array(self._data, copy=False).any() | ||
|
||
def all(self, axis=0, out=None): | ||
return np.array(self._data, copy=False).all() | ||
|
||
def min(self, axis=0, out=None): | ||
return np.array(self._data, copy=False).min() | ||
|
||
def max(self, axis=0, out=None): | ||
return np.array(self._data, copy=False).max() | ||
|
||
def _reduce(self, method, skipna=True, **kwargs): | ||
if skipna: | ||
arr = self[~self.isna()] | ||
else: | ||
arr = self | ||
# we only allow explicity defined methods | ||
# ndarrays actually support: mean, var, prod, min, max | ||
try: | ||
op = getattr(arr, method) | ||
return op() | ||
except AttributeError: | ||
pass | ||
raise TypeError |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
More of a question than anything else but is there a reason for defining a lot of the methods in this base class rather than in subclasses? Not terribly familiar with pyarrow yet but would the goal not be to decouple that from numpy here in the long run?