8000 ENH: add ExtensionArray.to_numpy to have control over conversion to numpy array by jorisvandenbossche · Pull Request #30322 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

ENH: add ExtensionArray.to_numpy to have control over conversion to numpy array #30322

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Jan 7, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fixups
  • Loading branch information
TomAugspurger committed Jan 6, 2020
commit afc7350f321b957c10a536c527c9c5b6f510bdb8
5 changes: 5 additions & 0 deletions doc/source/reference/extensions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,8 @@ behaves correctly.
:toctree: api/

api.indexers.check_bool_array_indexer


The sentinel ``pandas.api.extensions._no_default`` is used as the default
value in some methods. Use an ``is`` comparisoin to check if the user
provides a non-default value.
3 changes: 2 additions & 1 deletion pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2232,7 +2232,8 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
return objects


_no_default = object()
# Note: _no_default is exported to the public API in pandas.api.extensions
_no_default = object() #: Sentinel indicating the default value.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we rename this to no_default ? (no leading underscore)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought about it, don't have a strong opinion.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i agree we should just rename this

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note that we have a number of type we use object as the marker (so should change those in a followup)



@cython.boundscheck(False)
Expand Down
2 changes: 2 additions & 0 deletions pandas/api/extensions/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
"""Public API for extending pandas objects."""
from pandas._libs.lib import _no_default # noqa: F401

from pandas.core.dtypes.dtypes import ( # noqa: F401
ExtensionDtype,
register_extension_dtype,
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ def __getitem__(self, item):
return type(self)(self._data[item], self._mask[item])

def to_numpy(
self, dtype=None, copy=False, na_value: "Scalar" = lib._no_default, **kwargs
self, dtype=None, copy=False, na_value: "Scalar" = lib._no_default,
):
"""
Convert to a NumPy Array.
Expand Down
18 changes: 0 additions & 18 deletions pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,24 +422,6 @@ def skew(self, axis=None, dtype=None, out=None, keepdims=False, skipna=True):
# ------------------------------------------------------------------------
# Additional Methods
def to_numpy(self, dtype=None, copy=False, na_value=lib._no_default):
"""
Convert the PandasArray to a :class:`numpy.ndarray`.

By default, this requires no coercion or copying of data.

Parameters
----------
dtype : numpy.dtype
The NumPy dtype to pass to :func:`numpy.asarray`.
copy : bool, default False
Whether to copy the underlying data.
na_value : Scalar, optional
The missing value to use for missing values.

Returns
-------
ndarray
"""
result = np.asarray(self._ndarray, dtype=dtype)

if (copy or na_value is not lib._no_default) and result is self._ndarray:
Expand Down
15 changes: 1 addition & 14 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1242,7 +1242,7 @@ def from_dict(cls, data, orient="columns", dtype=None, columns=None):

return cls(data, index=index, columns=columns, dtype=dtype)

def to_numpy(self, dtype=None, copy=False, na_value=lib._no_default):
def to_numpy(self, dtype=None, copy=False):
"""
Convert the DataFrame to a NumPy array.

Expand All @@ -1264,12 +1264,6 @@ def to_numpy(self, dtype=None, copy=False, na_value=lib._no_default):
``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
a copy is made, even if not strictly necessary.

na_value : Any, optional
The value to use for missing values. The default value depends
on `dtype` and the type of the array.

.. versionadded:: 1.0.0

Returns
-------
numpy.ndarray
Expand Down Expand Up @@ -1301,13 +1295,6 @@ def to_numpy(self, dtype=None, copy=False, na_value=lib._no_default):
[2, 4.5, Timestamp('2000-01-02 00:00:00')]], dtype=object)
"""
result = np.array(self.values, dtype=dtype, copy=copy)
if na_value is not lib._no_default:
if not copy:
# copy even if not requested. This may be unnecessary
# if NumPy already copied.
result = result.copy()

result[self.isna()] = na_value
return result

def to_dict(self, orient="dict", into=dict):
Expand Down
0