8000 MAINT: Remove NotImplemented handling from ufuncs (almost) · numpy/numpy@acd37c8 · GitHub
[go: up one dir, main page]

Skip to content

Commit acd37c8

Browse files
committed
MAINT: Remove NotImplemented handling from ufuncs (almost)
This was redundant/broken anyway. See gh-5844 for discussion. See the massive comment added to ufunc_object.c:get_ufunc_arguments for a discussion of the deprecation strategy here -- it turns out that array_richcompare is such a disaster zone that we can't quite wholly eliminate NotImplemented quite yet. But this removes most NotImplementeds, and lays the groundwork for eliminating the rest in a release or two.
1 parent c3c819a commit acd37c8

File tree

6 files changed

+232
-99
lines changed

6 files changed

+232
-99
lines changed

doc/release/1.10.0-notes.rst

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,19 +16,23 @@ Highlights
1616
evaluation order.
1717
* Addition of `nanprod` to the set of nanfunctions.
1818

19+
Dropped Support:
1920

20-
Dropped Support
21-
===============
2221
* The polytemplate.py file has been removed.
2322
* The _dotblas module is no longer available.
2423
* The testcalcs.py file has been removed.
2524

25+
Future Changes:
2626

27-
Future Changes
28-
==============
27+
* In array comparisons like ``arr1 == arr2``, many corner cases
28+
involving strings or structured dtypes that used to return scalars
29+
now issue ``FutureWarning`` or ``DeprecationWarning``, and in the
30+
future will be change to either perform elementwise comparisons or
31+
raise an error.
2932
* The SafeEval class will be removed.
3033
* The alterdot and restoredot functions will be removed.
3134

35+
See below for more details on these changes.
3236

3337
Compatibility notes
3438
===================
@@ -250,6 +254,41 @@ array is writeable.
250254
Deprecations
251255
============
252256

257+
Array comparisons involving strings or structured dtypes
258+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
259+
260+
Normally, comparison operations on arrays perform elementwise
261+
comparisons and return arrays of booleans. But in some corner cases,
262+
especially involving strings are structured dtypes, NumPy has
263+
historically returned a scalar instead. For example::
264+
265+
### Current behaviour
266+
267+
np.arange(2) == "foo"
268+
# -> False
269+
270+
np.arange(2) < "foo"
271+
# -> True on Python 2, error on Python 3
272+
273+
np.ones(2, dtype="i4,i4") == np.ones(2, dtype="i4,i4,i4")
274+
# -> False
275+
276+
Continuing work started in 1.9, in 1.10 these comparisons will now
277+
raise ``FutureWarning`` or ``DeprecationWarning``, and in the future
278+
they will be modified to behave more consistently with other
279+
comparison operations, e.g.::
280+
281+
### Future behaviour
282+
283+
np.arange(2) == "foo"
284+
# -> array([False, False])
285+
286+
np.arange(2) < "foo"
287+
# -> error, strings and numbers are not orderable
288+
289+
np.ones(2, dtype="i4,i4") == np.ones(2, dtype="i4,i4,i4")
290+
# -> [False, False]
291+
253292
SafeEval
254293
~~~~~~~~
255294
The SafeEval class in numpy/lib/utils.py is deprecated and will be removed

numpy/core/src/multiarray/arrayobject.c

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1312,6 +1312,15 @@ array_richcompare(PyArrayObject *self, PyObject *other, int cmp_op)
13121312
else {
13131313
return _strings_richcompare(self, array_other, cmp_op, 0);
13141314
}
1315+
/* If we reach this point, it means that we are not comparing
1316+
* string-to-string. It's possible that this will still work out,
1317+
* e.g. if the other array is an object array, then both will be cast
1318+
* to object or something? I don't know how that works actually, but
1319+
* it does, b/c this works:
1320+
* l = ["a", "b"]
1321+
* assert np.array(l, dtype="S1") == np.array(l, dtype="O")
1322+
* So we fall through and see what happens.
1323+
*/
13151324
}
13161325

13171326
switch (cmp_op) {
@@ -1360,10 +1369,9 @@ array_richcompare(PyArrayObject *self, PyObject *other, int cmp_op)
13601369
*/
13611370
if (array_other == NULL) {
13621371
PyErr_Clear();
1363-
if (DEPRECATE_FUTUREWARNING(
1372+
if (DEPRECATE(
13641373
"elementwise comparison failed and returning scalar "
1365-
"instead; this will raise an error or perform "
1366-
"elementwise comparison in the future.") < 0) {
1374+
"instead; this will raise an error in the future.") < 0) {
13671375
return NULL;
13681376
}
13691377
Py_INCREF(Py_NotImplemented);
@@ -1445,10 +1453,9 @@ array_richcompare(PyArrayObject *self, PyObject *other, int cmp_op)
14451453
*/
14461454
if (array_other == NULL) {
14471455
PyErr_Clear();
1448-
if (DEPRECATE_FUTUREWARNING(
1456+
if (DEPRECATE(
14491457
"elementwise comparison failed and returning scalar "
1450-
"instead; this will raise an error or perform "
1451-
"elementwise comparison in the future.") < 0) {
1458+
"instead; this will raise an error in the future.") < 0) {
14521459
return NULL;
14531460
}
14541461
Py_INCREF(Py_NotImplemented);

numpy/core/src/umath/ufunc_object.c

Lines changed: 119 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -523,38 +523,6 @@ PyUFunc_GetPyValues(char *name, int *bufsize, int *errmask, PyObject **errobj)
523523
return _extract_pyvals(ref, name, bufsize, errmask, errobj);
524524
}
525525

526-
#define GETATTR(str, rstr) do {if (strcmp(name, #str) == 0) \
527-
return PyObject_HasAttrString(op, "__" #rstr "__");} while (0);
528-
529-
static int
530-
_has_reflected_op(PyObject *op, const char *name)
531-
{
532-
GETATTR(add, radd);
533-
GETATTR(subtract, rsub);
534-
GETATTR(multiply, rmul);
535-
GETATTR(divide, rdiv);
536-
GETATTR(true_divide, rtruediv);
537-
GETATTR(floor_divide, rfloordiv);
538-
GETATTR(remainder, rmod);
539-
GETATTR(power, rpow);
540-
GETATTR(left_shift, rlshift);
541-
GETATTR(right_shift, rrshift);
542-
GETATTR(bitwise_and, rand);
543-
GETATTR(bitwise_xor, rxor);
544-
GETATTR(bitwise_or, ror);
545-
/* Comparisons */
546-
GETATTR(equal, eq);
547-
GETATTR(not_equal, ne);
548-
GETATTR(greater, lt);
549-
GETATTR(less, gt);
550-
GETATTR(greater_equal, le);
551-
GETATTR(less_equal, ge);
552-
return 0;
553-
}
554-
555-
#undef GETATTR
556-
557-
558526
/* Return the position of next non-white-space char in the string */
559527
static int
560528
_next_non_white_space(const char* str, int offset)
@@ -896,16 +864,122 @@ get_ufunc_arguments(PyUFuncObject *ufunc,
896864
}
897865
}
898866

899-
/*
900-
* Indicate not implemented if there are flexible objects (structured
901-
* type or string) but no object types and no registered struct
902-
* dtype ufuncs.
903-
*
904-
* Not sure - adding this increased to 246 errors, 150 failures.
905-
*/
906867
if (any_flexible && !any_flexible_userloops && !any_object) {
907-
return -2;
908-
868+
/* Traditionally, we return -2 here (meaning "NotImplemented") anytime
869+
* we hit the above condition.
870+
*
871+
* This condition basically means "we are doomed", b/c the "flexible"
872+
* dtypes -- strings and void -- cannot have their own ufunc loops
873+
* registered (except via the special "flexible userloops" mechanism),
874+
* and they can't be cast to anything except object (and we only cast
875+
* to object if any_object is true). So really we should do nothing
876+
* here and continue and let the proper error be raised. But, we can't
877+
* quite yet, b/c of backcompat.
878+
*
879+
* Most of the time, this NotImplemented either got returned directly
880+
* to the user (who can't do anything useful with it), or got passed
881+
* back out of a special function like __mul__. And fortunately, for
882+
* almost all special functions, the end result of this was a
883+
* TypeError. Which is also what we get if we just continue without
884+
* this special case, so this special case is unnecessary.
885+
*
886+
* The only thing that actually depended on the NotImplemented is
887+
* array_richcompare, which did two things with it. First, it needed
888+
* to see this NotImplemented in order to implement the special-case
889+
* comparisons for
890+
*
891+
* string < <= == != >= > string
892+
* void == != void
893+
*
894+
* Now it checks for those cases first, before trying to call the
895+
* ufunc, so that's no problem. What it doesn't handle, though, is
896+
* cases like
897+
*
898+
* float < string
899+
*
900+
* or
901+
*
902+
* float == void
903+
*
904+
* For those, it just let the NotImplemented bubble out, and accepted
905+
* Python's default handling. And unfortunately, for comparisons,
906+
* Python's default is *not* to raise an error. Instead, it returns
907+
* something that depends on the operator:
908+
*
909+
* == return False
910+
* != return True
911+
* < <= >= > Python 2: use "fallback" (= weird and broken) ordering
912+
* Python 3: raise TypeError (hallelujah)
913+
*
914+
* In most cases this is straightforwardly broken, because comparison
915+
* of two arrays should always return an array, and here we end up
916+
* returning a scalar. However, there is an exception: if we are
917+
* comparing two scalars for equality, then it actually is correct to
918+
* return a scalar bool instead of raising an error. If we just
919+
* removed this special check entirely, then "np.float64(1) == 'foo'"
920+
* would raise an error instead of returning False, which is genuinely
921+
* wrong.
922+
*
923+
* The proper end goal here is:
924+
* 1) == and != should be implemented in a proper vectorized way for
925+
* all types. The short-term hack for this is just to add a
926+
* special case to PyUFunc_DefaultLegacyInnerLoopSelector where
927+
* if it can't find a comparison loop for the given types, and
928+
* the ufunc is np.equal or np.not_equal, then it returns a loop
929+
* that just fills the output array with False (resp. True). Then
930+
* array_richcompare could trust that whenever its special cases
931+
* don't apply, simply calling the ufunc will do the right thing,
932+
* even without this special check.
933+
* 2) < <= >= > should raise an error if no comparison function can
934+
* be found. array_richcompare already handles all string <>
935+
* string cases, and void dtypes don't have ordering, so again
936+
* this would mean that array_richcompare could simply call the
937+
* ufunc and it would do the right thing (i.e., raise an error),
938+
* again without needing this special check.
939+
*
940+
* So this means that for the transition period, our goal is:
941+
* == and != on scalars should simply return NotImplemented like
942+
* they always did, since everything ends up working out correctly
943+
* in this case only
944+
* == and != on arrays should issue a FutureWarning and then return
945+
* NotImplemented
946+
* < <= >= > on all flexible dtypes on py2 should raise a
947+
* DeprecationWarning, and then return NotImplemented. On py3 we
948+
* skip the warning, though, b/c it would just be immediately be
949+
* followed by an exception anyway.
950+
*
951+
* And for all other operations, we let things continue as normal.
952+
*/
953+
/* strcmp() is a hack but I think we can get away with it for this
954+
* temporary measure.
955+
*/
956+
if (!strcmp(ufunc_name, "equal")
957+
|| !strcmp(ufunc_name, "not_equal")) {
958+
/* Warn on non-scalar, return NotImplemented regardless */
959+
assert(nin == 2);
960+
if (PyArray_NDIM(out_op[0]) != 0
961+
|| PyArray_NDIM(out_op[1]) != 0) {
962+
if (DEPRECATE_FUTUREWARNING(
963+
"elementwise comparison failed; returning scalar "
964+
"but in the future will perform elementwise "
965+
"comparison") < 0) {
966+
return -1;
967+
}
968+
}
969+
return -2;
970+
}
971+
else if (!strcmp(ufunc_name, "less")
972+
|| !strcmp(ufunc_name, "less_equal")
973+
|| !strcmp(ufunc_name, "greater")
974+
|| !strcmp(ufunc_name, "greater_equal")) {
975+
#if !defined(NPY_PY3K)
976+
if (DEPRECATE("unorderable dtypes; returning scalar but in "
977+
"the future this will be an error") < 0) {
978+
return -1;
979+
}
980+
#endif
981+
return -2;
982+
}
909983
}
910984

911985
/* Get positional output arguments */
@@ -2142,26 +2216,6 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
21422216
goto fail;
21432217
}
21442218

2145-
/*
2146-
* FAIL with NotImplemented if the other object has
2147-
* the __r<op>__ method and has a higher priority than
2148-
* the current op (signalling it can handle ndarray's).
2149-
*/
2150-
if (nin == 2 && nout == 1 && dtypes[1]->type_num == NPY_OBJECT) {
2151-
PyObject *_obj = PyTuple_GET_ITEM(args, 1);
2152-
if (!PyArray_CheckExact(_obj)) {
2153-
double self_prio, other_prio;
2154-
self_prio = PyArray_GetPriority(PyTuple_GET_ITEM(args, 0),
2155-
NPY_SCALAR_PRIORITY);
2156-
other_prio = PyArray_GetPriority(_obj, NPY_SCALAR_PRIORITY);
2157-
if (self_prio < other_prio &&
2158-
_has_reflected_op(_obj, ufunc_name)) {
2159-
retval = -2;
2160-
goto fail;
2161-
}
2162-
}
2163-
}
2164-
21652219
#if NPY_UF_DBG_TRACING
21662220
printf("input types:\n");
21672221
for (i = 0; i < nin; ++i) {
@@ -2521,27 +2575,6 @@ PyUFunc_GenericFunction(PyUFuncObject *ufunc,
25212575
}
25222576
}
25232577

2524-
/*
2525-
* FAIL with NotImplemented if the other object has
2526-
* the __r<op>__ method and has __array_priority__ as
2527-
* an attribute (signalling it can handle ndarray's)
2528-
* and is not already an ndarray or a subtype of the same type.
2529-
*/
2530-
if (nin == 2 && nout == 1 && dtypes[1]->type_num == NPY_OBJECT) {
2531-
PyObject *_obj = PyTuple_GET_ITEM(args, 1);
2532-
if (!PyArray_Check(_obj)) {
2533-
double self_prio, other_prio;
2534-
self_prio = PyArray_GetPriority(PyTuple_GET_ITEM(args, 0),
2535-
NPY_SCALAR_PRIORITY);
2536-
other_prio = PyArray_GetPriority(_obj, NPY_SCALAR_PRIORITY);
2537-
if (self_prio < other_prio &&
2538-
_has_re D1C2 flected_op(_obj, ufunc_name)) {
2539-
retval = -2;
2540-
goto fail;
2541-
}
2542-
}
2543-
}
2544-
25452578
#if NPY_UF_DBG_TRACING
25462579
printf("input types:\n");
25472580
for (i = 0; i < nin; ++i) {
@@ -4222,13 +4255,15 @@ ufunc_generic_call(PyUFuncObject *ufunc, PyObject *args, PyObject *kwds)
42224255
return NULL;
42234256
}
42244257
else if (ufunc->nin == 2 && ufunc->nout == 1) {
4225-
/* To allow the other argument to be given a chance */
4258+
/* For array_richcompare's benefit -- see the long comment in
4259+
* get_ufunc_arguments.
4260+
*/
42264261
Py_INCREF(Py_NotImplemented);
42274262
return Py_NotImplemented;
42284263
}
42294264
else {
42304265
PyErr_SetString(PyExc_TypeError,
4231-
"Not implemented for this type");
4266+
"XX can't happen, please report a bug XX");
42324267
return NULL;
42334268
}
42344269
}

0 commit comments

Comments
 (0)
0