8000 API: Enforce one copy for ``__array__`` when ``copy=True`` by mtsokol · Pull Request #26215 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

API: Enforce one copy for __array__ when copy=True #26215

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 19, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Adjust was_copied variable names
  • Loading branch information
mtsokol committed Apr 11, 2024
commit ea983c6dd2612b6e9a313942a13a03c08233faf2
20 changes: 10 additions & 10 deletions numpy/_core/src/multiarray/array_coercion.c
Original file line number Diff line numbe 8000 r Diff line change
Expand Up @@ -99,7 +99,7 @@ enum _dtype_discovery_flags {
DISCOVER_TUPLES_AS_ELEMENTS = 1 << 4,
MAX_DIMS_WAS_REACHED = 1 << 5,
DESCRIPTOR_WAS_SET = 1 << 6,
COPY_WAS_CREATED = 1 << 7,
COPY_WAS_CREATED_BY__ARRAY__ = 1 << 7,
};


Expand Down Expand Up @@ -1028,18 +1028,18 @@ PyArray_DiscoverDTypeAndShape_Recursive(
/* __array__ may be passed the requested descriptor if provided */
requested_descr = *out_descr;
}
int was_copied = 0;
int was_copied_by__array__ = 0;
arr = (PyArrayObject *)_array_from_array_like(obj,
requested_descr, 0, NULL, copy, &was_copied);
requested_descr, 0, NULL, copy, &was_copied_by__array__);
if (arr == NULL) {
return -1;
}
else if (arr == (PyArrayObject *)Py_NotImplemented) {
Py_DECREF(arr);
arr = NULL;
}
if (was_copied == 1) {
*flags |= COPY_WAS_CREATED;
if (was_copied_by__array__ == 1) {
*flags |= COPY_WAS_CREATED_BY__ARRAY__;
}
}
if (arr != NULL) {
Expand Down Expand Up @@ -1231,8 +1231,8 @@ PyArray_DiscoverDTypeAndShape_Recursive(
* to choose a default.
* @param copy Specifies the copy behavior. -1 is corresponds to copy=None,
* 0 to copy=False, and 1 to copy=True in the Python API.
* @param was_copied Set to 1 if it can be assumed that a copy was made
* by implementor.
* @param was_copied_by__array__ Set to 1 if it can be assumed that a copy was
* made by implementor.
* @return dimensions of the discovered object or -1 on error.
* WARNING: If (and only if) the output is a single array, the ndim
* returned _can_ exceed the maximum allowed number of dimensions.
Expand All @@ -1245,7 +1245,7 @@ PyArray_DiscoverDTypeAndShape(
npy_intp out_shape[NPY_MAXDIMS],
coercion_cache_obj **coercion_cache,
PyArray_DTypeMeta *fixed_DType, PyArray_Descr *requested_descr,
PyArray_Descr **out_descr, int copy, int *was_copied)
PyArray_Descr **out_descr, int copy, int *was_copied_by__array__)
{
coercion_cache_obj **coercion_cache_head = coercion_cache;
*coercion_cache = NULL;
Expand Down Expand Up @@ -1298,8 +1298,8 @@ PyArray_DiscoverDTypeAndShape(
goto fail;
}

if (was_copied != NULL && flags & COPY_WAS_CREATED) {
*was_copied = 1;
if (was_copied_by__array__ != NULL && flags & COPY_WAS_CREATED_BY__ARRAY__) {
*was_copied_by__array__ = 1;
}

if (NPY_UNLIKELY(flags & FOUND_RAGGED_ARRAY)) {
Expand Down
2 changes: 1 addition & 1 deletion numpy/_core/src/multiarray/array_coercion.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ PyArray_DiscoverDTypeAndShape(
npy_intp out_shape[NPY_MAXDIMS],
coercion_cache_obj **coercion_cache,
PyArray_DTypeMeta *fixed_DType, PyArray_Descr *requested_descr,
PyArray_Descr **out_descr, int copy, int *was_copied);
PyArray_Descr **out_descr, int copy, int *was_copied_by__array__);

NPY_NO_EXPORT PyObject *
_discover_array_parameters(PyObject *NPY_UNUSED(self),
Expand Down
29 changes: 16 additions & 13 deletions numpy/_core/src/multiarray/ctors.c
Original file line number Diff line number Diff line change
Expand Up @@ -1429,8 +1429,8 @@ _array_from_buffer_3118(PyObject *memoryview)
* @param writeable whether the result must be writeable.
* @param context Unused parameter, must be NULL (should be removed later).
* @param copy Specifies the copy behavior.
* @param was_copied Set to 1 if it can be assumed that a copy was made
* by implementor.
* @param was_copied_by__array__ Set to 1 if it can be assumed that a copy
* was made by implementor.
*
* @returns The array object, Py_NotImplemented if op is not array-like,
* or NULL with an error set. (A new reference to Py_NotImplemented
Expand All @@ -1439,7 +1439,7 @@ _array_from_buffer_3118(PyObject *memoryview)
NPY_NO_EXPORT PyObject *
_array_from_array_like(PyObject *op,
PyArray_Descr *requested_dtype, npy_bool writeable, PyObject *context,
int copy, int *was_copied) {
int copy, int *was_copied_by__array__) {
PyObject* tmp;

/*
Expand Down Expand Up @@ -1487,7 +1487,8 @@ _array_from_array_like(PyObject *op,
}

if (tmp == Py_NotImplemented) {
tmp = PyArray_FromArrayAttr_int(op, requested_dtype, copy, was_copied);
tmp = PyArray_FromArrayAttr_int(
op, requested_dtype, copy, was_copied_by__array__);
if (tmp == NULL) {
return NULL;
}
Expand Down Expand Up @@ -1574,7 +1575,7 @@ PyArray_FromAny_int(PyObject *op, PyArray_Descr *in_descr,

// Default is copy = None
int copy = -1;
int was_copied = 0;
int was_copied_by__array__ = 0;

if (flags & NPY_ARRAY_ENSURENOCOPY) {
copy = 0;
Expand All @@ -1583,7 +1584,8 @@ PyArray_FromAny_int(PyObject *op, PyArray_Descr *in_descr,
}

ndim = PyArray_DiscoverDTypeAndShape(
op, NPY_MAXDIMS, dims, &cache, in_DType, in_descr, &dtype, copy, &was_copied);
op, NPY_MAXDIMS, dims, &cache, in_DType, in_descr, &dtype,
copy, &was_copied_by__array__);

if (ndim < 0) {
return NULL;
Expand Down Expand Up @@ -1620,7 +1622,7 @@ PyArray_FromAny_int(PyObject *op, PyArray_Descr *in_descr,
assert(cache->converted_obj == op);
arr = (PyArrayObject *)(cache->arr_or_sequence);
/* we may need to cast or assert flags (e.g. copy) */
if (was_copied == 1 && flags & NPY_ARRAY_ENSURECOPY) {
if (was_copied_by__array__ == 1 && flags & NPY_ARRAY_ENSURECOPY) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (was_copied_by__array__ == 1 && flags & NPY_ARRAY_ENSURECOPY) {
if (was_copied_by__array__ == 1) {

You don't need to test this, it is OK to unset the flag if it isn't set. (to me that is clearer, but maybe it's me)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

flags = flags & ~NPY_ARRAY_ENSURECOPY;
flags = flags | NPY_ARRAY_ENSURENOCOPY;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks wrong, so please add a test:

np.asarray(array_like, copy=True, order="F")

should just make two copies. You probably want to make sure that users honors the dtype that was passed. That might be nice, but if it is not very clean to test for, I am not sure we should worry much.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah right, a copy still needs to be made to convert to order="F". Then I shouldn't add ENSURENOCOPY flag but only remove ENSURECOPY one.

About honoring dtype: I can check if arr in cache has the same dtype as requested dtype variable. I wonder if it's sufficient for comparing dtypes in C-API:

PyArray_TYPE(arr) == dtype->type_num

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, unfortunately checking that is not really trivial (the only way to do it, is the way that the function uses later to see if it has to make a copy).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just realized, it is even more complicated, for subarray dtypes (maybe those should just never be passed on). But that is probably not really new, although more pronounced by nudging downstream to actually implement dtype.

Maybe the solution is to just unset the dtype when a copy was made, although unfortunately that doesn't help us in the path when no copy was made and dtype is used by the __array__ implementor.

(Eplenation: if you write dtype=(4,)i you get back an array with an additional dimension and dtype=i. But if __array__ does that, then you end up doing another cast!)

Copy link
Member Author
@mtsokol mtsokol Apr 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then let's maybe skip checking dtype after __array__ call?
What we currently have is that:

np.array(array_like, dtype=new_dtype, copy=False)

errors when a dtype wasn't honored in __array__ implementation (which is nice I think) and

np.arrray(array_like, dtype=new_dtype)

get's another copy if dtype wasn't honored, same as order="F" case.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then let's maybe skip checking dtype after array call?
For here, that is perfectly fine to me, it is just a mild performance nudge for downstream after all.

However, maybe we should make a new issue. I am slightly worried that downstream until now always just ignored dtype=, and now we will get new bugs with subarray dtypes.

OTOH, I will also say that, I don't expect anyone to explicitly run into it, so in a sense, it is an unrelated bug fix.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I updated flags logic here and added a test with:

np.asarray(array_like, copy=True, order="F")

}
Expand Down Expand Up @@ -2495,14 +2497,14 @@ check_or_clear_and_warn_error_if_due_to_copy_kwarg(PyObject *kwnames)
* NOTE: For copy == -1 it passes `op.__array__(copy=None)`,
* for copy == 0, `op.__array__(copy=False)`, and
* for copy == 1, `op.__array__(copy=True).
* @param was_copied Set to 1 if it can be assumed that a copy was made
* by implementor.
* @param was_copied_by__array__ Set to 1 if it can be assumed that a copy
* was made by implementor.
* @returns NotImplemented if `__array__` is not defined or a NumPy array
* (or subclass). On error, return NULL.
*/
NPY_NO_EXPORT PyObject *
PyArray_FromArrayAttr_int(
PyObject *op, PyArray_Descr *descr, int copy, int *was_copied)
PyArray_FromArrayAttr_int(PyObject *op, PyArray_Descr *descr, int copy,
int *was_copied_by__array__)
{
PyObject *new;
PyObject *array_meth;
Expand Down Expand Up @@ -2589,9 +2591,10 @@ PyArray_FromArrayAttr_int(
Py_DECREF(new);
return NULL;
}
if (was_copied != NULL && copy == 1 && must_copy_but_copy_kwarg_unimplemented == 0) {
if (was_copied_by__array__ != NULL && copy == 1 &&
must_copy_but_copy_kwarg_unimplemented == 0) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could add a comment to remove the was_copied_by__array__ argument again here, but it is probably clear enough (when it happens).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

/* We can assume that a copy was made */
*was_copied = 1;
*was_copied_by__array__ = 1;
}

return new;
Expand Down
6 changes: 3 additions & 3 deletions numpy/_core/src/multiarray/ctors.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ PyArray_New(
NPY_NO_EXPORT PyObject *
_array_from_array_like(PyObject *op,
PyArray_Descr *requested_dtype, npy_bool writeable, PyObject *context,
int copy, int *was_copied);
int copy, int *was_copied_by__array__);

NPY_NO_EXPORT PyObject *
PyArray_FromAny_int(PyObject *op, PyArray_Descr *in_descr,
Expand Down Expand Up @@ -84,8 +84,8 @@ NPY_NO_EXPORT PyObject *
PyArray_FromInterface(PyObject *input);

NPY_NO_EXPORT PyObject *
PyArray_FromArrayAttr_int(
PyObject *op, PyArray_Descr *descr, int copy, int *was_copied);
PyArray_FromArrayAttr_int(PyObject *op, PyArray_Descr *descr, int copy,
int *was_copied_by__array__);

NPY_NO_EXPORT PyObject *
PyArray_FromArrayAttr(PyObject *op, PyArray_Descr *typecode,
Expand Down
1 change: 0 additions & 1 deletion numpy/_core/tests/test_multiarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -8502,7 +8502,6 @@ def __array__(self, dtype=None):
match=r"Unable to avoid copy(.|\n)*numpy_2_0_migration_guide.html"):
np.array(a, copy=False)

@pytest.mark.skipif(IS_PYPY, reason="PyPy copies differently")
def test___array__copy_once(self):
size = 100
base_arr = np.zeros((size, size))
Expand Down
0