diff --git a/doc/release/upcoming_changes/14800.improvement.rst b/doc/release/upcoming_changes/14800.improvement.rst new file mode 100644 index 000000000000..158c315360cc --- /dev/null +++ b/doc/release/upcoming_changes/14800.improvement.rst @@ -0,0 +1,14 @@ +Comparison on ``object`` dtypes will prefer ``object`` output +------------------------------------------------------------- +Comparison ufuncs (``np.equal`` and friends) would return boolean arrays when +the input array dtype was ``object``. This led to inconsistent behaviour for +ragged arrays ``a = np.array([1, np.array([1, 2, 3])], dtype=object)``. This +will now return an object array:: + + >>> a = np.array([1, np.array([1, 2, 3])], dtype=object) + >>> np.equal(a, a) + array([True, array([ True, True, True])], dtype=object) + +The old behaviour, which will raise a ``ValueError`` in this case, is still +available by specifying a dtype as ``np.equal(a, a, dtype=bool)``. + diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py index e0b6a654c80e..760b9c919312 100644 --- a/numpy/core/code_generators/generate_umath.py +++ b/numpy/core/code_generators/generate_umath.py @@ -226,7 +226,7 @@ def english_upper(s): 'P': 'OBJECT', } -all = '?bBhHiIlLqQefdgFDGOMm' +noobj = '?bBhHiIlLqQefdgFDGmM' O = 'O' P = 'P' ints = 'bBhHiIlLqQ' @@ -246,10 +246,8 @@ def english_upper(s): noint = inexact+O nointP = inexact+P allP = bints+times+flts+cmplxP -nobool = all[1:] -noobj = all[:-3]+all[-2:] -nobool_or_obj = all[1:-3]+all[-2:] -nobool_or_datetime = all[1:-2]+all[-1:] +nobool_or_obj = noobj[1:] +nobool_or_datetime = noobj[1:-1] + O # includes m - timedelta64 intflt = ints+flts intfltcmplx = ints+flts+cmplx nocmplx = bints+times+flts @@ -431,43 +429,49 @@ def english_upper(s): Ufunc(2, 1, None, docstrings.get('numpy.core.umath.greater'), 'PyUFunc_SimpleBinaryComparisonTypeResolver', - TD(all, out='?', simd=[('avx2', ints)]), + TD(noobj, out='?', simd=[('avx2', ints)]), [TypeDescription('O', FullTypeDescr, 'OO', 'O')], + TD('O', out='?'), ), 'greater_equal': Ufunc(2, 1, None, docstrings.get('numpy.core.umath.greater_equal'), 'PyUFunc_SimpleBinaryComparisonTypeResolver', - TD(all, out='?', simd=[('avx2', ints)]), + TD(noobj, out='?', simd=[('avx2', ints)]), [TypeDescription('O', FullTypeDescr, 'OO', 'O')], + TD('O', out='?'), ), 'less': Ufunc(2, 1, None, docstrings.get('numpy.core.umath.less'), 'PyUFunc_SimpleBinaryComparisonTypeResolver', - TD(all, out='?', simd=[('avx2', ints)]), + TD(noobj, out='?', simd=[('avx2', ints)]), [TypeDescription('O', FullTypeDescr, 'OO', 'O')], + TD('O', out='?'), ), 'less_equal': Ufunc(2, 1, None, docstrings.get('numpy.core.umath.less_equal'), 'PyUFunc_SimpleBinaryComparisonTypeResolver', - TD(all, out='?', simd=[('avx2', ints)]), + TD(noobj, out='?', simd=[('avx2', ints)]), [TypeDescription('O', FullTypeDescr, 'OO', 'O')], + TD('O', out='?'), ), 'equal': Ufunc(2, 1, None, docstrings.get('numpy.core.umath.equal'), 'PyUFunc_SimpleBinaryComparisonTypeResolver', - TD(all, out='?', simd=[('avx2', ints)]), + TD(noobj, out='?', simd=[('avx2', ints)]), [TypeDescription('O', FullTypeDescr, 'OO', 'O')], + TD('O', out='?'), ), 'not_equal': Ufunc(2, 1, None, docstrings.get('numpy.core.umath.not_equal'), 'PyUFunc_SimpleBinaryComparisonTypeResolver', - TD(all, out='?', simd=[('avx2', ints)]), + TD(noobj, out='?', simd=[('avx2', ints)]), [TypeDescription('O', FullTypeDescr, 'OO', 'O')], + TD('O', out='?'), ), 'logical_and': Ufunc(2, 1, True_, @@ -475,6 +479,7 @@ def english_upper(s): 'PyUFunc_SimpleBinaryComparisonTypeResolver', TD(nodatetime_or_obj, out='?', simd=[('avx2', ints)]), TD(O, f='npy_ObjectLogicalAnd'), + TD(O, f='npy_ObjectLogicalAnd', out='?'), ), 'logical_not': Ufunc(1, 1, None, @@ -482,6 +487,7 @@ def english_upper(s): None, TD(nodatetime_or_obj, out='?', simd=[('avx2', ints)]), TD(O, f='npy_ObjectLogicalNot'), + TD(O, f='npy_ObjectLogicalNot', out='?'), ), 'logical_or': Ufunc(2, 1, False_, @@ -489,6 +495,7 @@ def english_upper(s): 'PyUFunc_SimpleBinaryComparisonTypeResolver', TD(nodatetime_or_obj, out='?', simd=[('avx2', ints)]), TD(O, f='npy_ObjectLogicalOr'), + TD(O, f='npy_ObjectLogicalOr', out='?'), ), 'logical_xor': Ufunc(2, 1, False_, diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py index 8bffaa9affc5..9bdcd8241181 100644 --- a/numpy/core/tests/test_deprecations.py +++ b/numpy/core/tests/test_deprecations.py @@ -172,10 +172,11 @@ def test_normal_types(self): # (warning is issued a couple of times here) self.assert_deprecated(op, args=(a, a[:-1]), num=None) - # Element comparison error (numpy array can't be compared). + # ragged array comparison returns True/False a = np.array([1, np.array([1,2,3])], dtype=object) b = np.array([1, np.array([1,2,3])], dtype=object) - self.assert_deprecated(op, args=(a, b), num=None) + res = op(a, b) + assert res.dtype == 'object' def test_string(self): # For two string arrays, strings always raised the broadcasting error: diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py index 707c690ddbe6..d9f9615814ae 100644 --- a/numpy/core/tests/test_ufunc.py +++ b/numpy/core/tests/test_ufunc.py @@ -1090,14 +1090,18 @@ def __eq__(self, other): return '==' arr0d = np.array(HasComparisons()) - assert_equal(arr0d == arr0d, True) - assert_equal(np.equal(arr0d, arr0d), True) # normal behavior is a cast + assert_equal(arr0d == arr0d, '==') + assert_equal(np.equal(arr0d, arr0d), '==') + assert_equal(np.equal(arr0d, arr0d, dtype=bool), True) assert_equal(np.equal(arr0d, arr0d, dtype=object), '==') arr1d = np.array([HasComparisons()]) - assert_equal(arr1d == arr1d, np.array([True])) - assert_equal(np.equal(arr1d, arr1d), np.array([True])) # normal behavior is a cast - assert_equal(np.equal(arr1d, arr1d, dtype=object), np.array(['=='])) + ret_obj = np.array(['=='], dtype=object) + ret_bool = np.array([True]) + assert_equal(arr1d == arr1d, ret_obj) + assert_equal(np.equal(arr1d, arr1d), ret_obj) + assert_equal(np.equal(arr1d, arr1d, dtype=object), ret_obj) + assert_equal(np.equal(arr1d, arr1d, dtype=bool), ret_bool) def test_object_array_reduction(self): # Reductions on object arrays diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py index 9b4ce9e47b1e..96a9f1f8bded 100644 --- a/numpy/core/tests/test_umath.py +++ b/numpy/core/tests/test_umath.py @@ -170,10 +170,11 @@ def __array_wrap__(self, arr, context): class TestComparisons(object): def test_ignore_object_identity_in_equal(self): - # Check error raised when comparing identical objects whose comparison + # Check comparing identical objects whose comparison # is not a simple boolean, e.g., arrays that are compared elementwise. a = np.array([np.array([1, 2, 3]), None], dtype=object) - assert_raises(ValueError, np.equal, a, a) + b = np.equal(a, a.copy()) + assert b.shape == a.shape # Check error raised when comparing identical non-comparable objects. class FunkyType(object): @@ -188,10 +189,11 @@ def __eq__(self, other): assert_equal(np.equal(a, a), [False]) def test_ignore_object_identity_in_not_equal(self): - # Check error raised when comparing identical objects whose comparison + # Check comparing identical objects whose comparison # is not a simple boolean, e.g., arrays that are compared elementwise. a = np.array([np.array([1, 2, 3]), None], dtype=object) - assert_raises(ValueError, np.not_equal, a, a) + b = np.not_equal(a, a.copy()) + assert b.shape == a.shape # Check error raised when comparing identical non-comparable objects. class FunkyType(object): diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index 2309f7e4217a..cf45e181b9cd 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -562,11 +562,15 @@ def in1d(ar1, ar2, assume_unique=False, invert=False): if invert: mask = np.ones(len(ar1), dtype=bool) for a in ar2: - mask &= (ar1 != a) + # convert object arrays to bool + # cannot use np.not_equal until 'S' and 'U' have loops + mask &= (ar1 != a).astype(bool) else: mask = np.zeros(len(ar1), dtype=bool) for a in ar2: - mask |= (ar1 == a) + # convert object arrays to bool + # cannot use np.equal until 'S' and 'U' have loops + mask |= (ar1 == a).astype(bool) return mask # Otherwise use sorting diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 18ccab3b86d2..457cca146ff3 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -99,7 +99,7 @@ def _replace_nan(a, val): if a.dtype == np.object_: # object arrays do not support `isnan` (gh-9009), so make a guess - mask = a != a + mask = np.not_equal(a, a, dtype=bool) elif issubclass(a.dtype.type, np.inexact): mask = np.isnan(a) else: diff --git a/numpy/linalg/tests/test_regression.py b/numpy/linalg/tests/test_regression.py index bd3a45872cb1..289566109b2e 100644 --- a/numpy/linalg/tests/test_regression.py +++ b/numpy/linalg/tests/test_regression.py @@ -109,10 +109,9 @@ def test_norm_object_array(self): assert_raises(ValueError, linalg.norm, testvector, ord='nuc') assert_raises(ValueError, linalg.norm, testvector, ord=np.inf) assert_raises(ValueError, linalg.norm, testvector, ord=-np.inf) - with warnings.catch_warnings(): - warnings.simplefilter("error", DeprecationWarning) - assert_raises((AttributeError, DeprecationWarning), - linalg.norm, testvector, ord=0) + # Succeeds, equivalent to "sum(x != 0)" + r = linalg.norm(testvector, ord=0) + assert_(r.dtype == 'bool') assert_raises(ValueError, linalg.norm, testvector, ord=-1) assert_raises(ValueError, linalg.norm, testvector, ord=-2) diff --git a/numpy/ma/core.py b/numpy/ma/core.py index bb0d8d41238e..f98a29d823a6 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -4790,7 +4790,12 @@ def all(self, axis=None, out=None, keepdims=np._NoValue): mask = _check_mask_axis(self._mask, axis, **kwargs) if out is None: - d = self.filled(True).all(axis=axis, **kwargs).view(type(self)) + r = self.filled(True).all(axis=axis, **kwargs) + # object dtypes with axis=None return a scalar + if isinstance(r, bool): + d = type(self)(r) + else: + d = r.view(type(self)) if d.ndim: d.__setmask__(mask) elif mask: