-
-
Notifications
You must be signed in to change notification settings - Fork 18.7k
ExtensionArray.take default implementation #20814
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
fb3c234
dacd98e
0be9ec6
08f2479
eba137f
67ba9dd
37915e9
c721915
125ca0b
b7ae0bc
338566f
31cd304
05d8844
69e7fe7
449983b
<
10000
div class="text-emphasized css-truncate css-truncate-target">
Linting
c449afd
82cad8b
d5470a0
bbcbf19
1a4d987
fc729d6
74b2c09
5db6624
741f284
fbc4425
f3b91ca
eecd632
9a6c7d4
7c4f625
eb43fa4
6858409
ec0cecd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,7 +18,7 @@ class _DtypeOpsMixin(object): | |
|
||
# na_value is the default NA value to use for this type. This is used in | ||
# e.g. ExtensionArray.take. | ||
na_value = np.nan | ||
na_value = np.nan # TODO: change to _na_value | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove the TODO? |
||
|
||
def __eq__(self, other): | ||
"""Check whether 'other' is equal to self. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
import warnings | ||
|
||
import copy | ||
from warnings import catch_warnings | ||
import inspect | ||
|
@@ -82,7 +83,7 @@ | |
from pandas.util._decorators import cache_readonly | ||
from pandas.util._validators import validate_bool_kwarg | ||
from pandas import compat | ||
from pandas.compat import range, map, zip, u | ||
from pandas.compat import range, map, zip, u, _default_fill_value | ||
|
||
|
||
class Block(PandasObject): | ||
|
@@ -1888,6 +1889,10 @@ def _holder(self): | |
# For extension blocks, the holder is values-dependent. | ||
return type(self.values) | ||
|
||
@property | ||
def fill_value(self): | ||
return self.values.dtype.na_value # TODO: change to _na_value | ||
|
||
@property | ||
def _can_hold_na(self): | ||
# The default ExtensionArray._can_hold_na is True | ||
|
@@ -4386,6 +4391,8 @@ def reindex_indexer(self, new_axis, indexer, axis, fill_value=None, | |
|
||
pandas-indexer with -1's only. | ||
""" | ||
# TODO: see if we can make fill_value be {col -> fill_value} | ||
# maybe earlier... | ||
if indexer is None: | ||
if new_axis is self.axes[axis] and not copy: | ||
return self | ||
|
@@ -4408,8 +4415,10 @@ def reindex_indexer(self, new_axis, indexer, axis, fill_value=None, | |
new_blocks = self._slice_take_blocks_ax0(indexer, | ||
fill_tuple=(fill_value,)) | ||
else: | ||
if fill_value is None: | ||
fill_value = _default_fill_value | ||
new_blocks = [blk.take_nd(indexer, axis=axis, fill_tuple=( | ||
fill_value if fill_value is not None else blk.fill_value,)) | ||
fill_value if fill_value is not _default_fill_value else blk.fill_value,)) | ||
for blk in self.blocks] | ||
|
||
new_axes = list(self.axes) | ||
|
@@ -4436,6 +4445,9 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None): | |
if self._is_single_block: | ||
blk = self.blocks[0] | ||
|
||
if allow_fill and fill_tuple[0] is _default_fill_value: | ||
fill_tuple = (blk.fill_value,) | ||
|
||
if sl_type in ('slice', 'mask'): | ||
return [blk.getitem_block(slobj, new_mgr_locs=slice(0, sllen))] | ||
elif not allow_fill or self.ndim == 1: | ||
|
@@ -5404,6 +5416,25 @@ def concatenate_block_managers(mgrs_indexers, axes, concat_axis, copy): | |
elif is_uniform_join_units(join_units): | ||
b = join_units[0].block.concat_same_type( | ||
[ju.block for ju in join_units], placement=placement) | ||
elif is_uniform_reindexer(join_units): | ||
old_block = join_units[0].block | ||
|
||
new_values = concatenate_join_units(join_units, concat_axis, | ||
copy=copy) | ||
if new_values.ndim == 2: | ||
# XXX: categorical returns a categorical here | ||
# EA returns a 2d ndarray | ||
# need to harmoinze these to always be EAs? | ||
assert new_values.shape[0] == 1 | ||
new_values = new_values[0] | ||
|
||
assert isinstance(old_block._holder, ABCExtensionArray) | ||
|
||
b = old_block.make_block_same_class( | ||
old_block._holder._from_sequence(new_values), | ||
placement=placement | ||
) | ||
|
||
else: | ||
b = make_block( | ||
concatenate_join_units(join_units, concat_axis, copy=copy), | ||
|
@@ -5434,6 +5465,13 @@ def is_uniform_join_units(join_units): | |
len(join_units) > 1) | ||
|
||
|
||
def is_uniform_reindexer(join_units): | ||
# For when we know we can reindex without changing type | ||
return ( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add a docstring to indicate what you mean with "uniform reindex" I understand it as "this are uniform blocks that will keep being uniform after reindexing", correct? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Exactly. That's the intent anyway... It probably does depend on the |
||
all(ju.block and ju.block.is_extension for ju in join_units) | ||
) | ||
|
||
|
||
def get_empty_dtype_and_na(join_units): | ||
""" | ||
Return dtype and N/A values to use when concatenating specified units. | ||
|
@@ -5461,12 +5499,15 @@ def get_empty_dtype_and_na(join_units): | |
|
||
upcast_classes = defaultdict(list) | ||
null_upcast_classes = defaultdict(list) | ||
|
||
for dtype, unit in zip(dtypes, join_units): | ||
if dtype is None: | ||
continue | ||
|
||
if is_categorical_dtype(dtype): | ||
upcast_cls = 'category' | ||
elif is_extension_array_dtype(dtype): | ||
upcast_cls = 'extension' | ||
elif is_datetimetz(dtype): | ||
upcast_cls = 'datetimetz' | ||
elif issubclass(dtype.type, np.bool_): | ||
|
@@ -5496,6 +5537,8 @@ def get_empty_dtype_and_na(join_units): | |
# create the result | ||
77D0 | if 'object' in upcast_classes: | |
return np.dtype(np.object_), np.nan | ||
elif 'extension' in upcast_classes: | ||
return np.dtype(np.object_), None | ||
elif 'bool' in upcast_classes: | ||
if has_none_blocks: | ||
return np.dtype(np.object_), np.nan | ||
|
@@ -5755,7 +5798,9 @@ def dtype(self): | |
if self.block is None: | ||
raise AssertionError("Block is None, no dtype") | ||
|
||
if not self.needs_filling: | ||
if not self.needs_filling or self.block.is_extension: | ||
# ExtensionDtypes by definition can hold their | ||
# NA value. | ||
return self.block.dtype | ||
else: | ||
return _get_dtype(maybe_promote(self.block.dtype, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the "used in EA.take" is misleading. In many cases, it will not be used in take, as this
na_value
is your "boxed" scalar NA type, not the underlying physical NA value.