-
-
Notifications
You must be signed in to change notification settings - Fork 10.9k
BUG MaskedArray __eq__ wrong for masked scalar, multi-d recarray #8590
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,7 @@ | |
from __future__ import division, absolute_import, print_function | ||
|
||
import sys | ||
import operator | ||
import warnings | ||
from functools import reduce | ||
|
||
|
@@ -1602,21 +1603,11 @@ def make_mask(m, copy=False, shrink=True, dtype=MaskType): | |
""" | ||
if m is nomask: | ||
return nomask | ||
elif isinstance(m, ndarray): | ||
# We won't return after this point to make sure we can shrink the mask | ||
# Fill the mask in case there are missing data | ||
m = filled(m, True) | ||
# Make sure the input dtype is valid | ||
dtype = make_mask_descr(dtype) | ||
if m.dtype == dtype: | ||
if copy: | ||
result = m.copy() | ||
else: | ||
result = m | ||
else: | ||
result = np.array(m, dtype=dtype, copy=copy) | ||
else: | ||
|
||
|
||
# Make sure the input dtype is valid. | ||
dtype = make_mask_descr(dtype) | ||
# Fill the mask in case there are missing data; turn it into an ndarray. | ||
result = np.array(filled(m, True), copy=copy, dtype=dtype, subok=True) | ||
# Bas les masques ! | ||
if shrink and (not result.dtype.names) and (not result.any()): | ||
return nomask | ||
|
@@ -1733,7 +1724,8 @@ def _recursive_mask_or(m1, m2, newmask): | |
if (dtype1 != dtype2): | ||
raise ValueError("Incompatible dtypes '%s'<>'%s'" % (dtype1, dtype2)) | ||
if dtype1.names: | ||
newmask = np.empty_like(m1) | ||
# Allocate an output mask array with the properly broadcast shape. | ||
newmask = np.empty(np.broadcast(m1, m2).shape, dtype1) | ||
_recursive_mask_or(m1, m2, newmask) | ||
return newmask | ||
return make_mask(umath.logical_or(m1, m2), copy=copy, shrink=shrink) | ||
|
@@ -3873,81 +3865,84 @@ def _delegate_binop(self, other): | |
return True | ||
return False | ||
|
||
def __eq__(self, other): | ||
""" | ||
Check whether other equals self elementwise. | ||
def _comparison(self, other, compare): | ||
"""Compare self with other using operator.eq or operator.ne. | ||
|
||
When either of the elements is masked, the result is masked as well, | ||
but the underlying boolean data are still set, with self and other | ||
considered equal if both are masked, and unequal otherwise. | ||
|
||
For structured arrays, all fields are combined, with masked values | ||
ignored. The result is masked if all fields were masked, with self | ||
and other considered equal only if both were fully masked. | ||
""" | ||
if self is masked: | ||
return masked | ||
omask = getmask(other) | ||
if omask is nomask: | ||
check = self.filled(0).__eq__(other) | ||
try: | ||
check = check.view(type(self)) | ||
check._mask = self._mask | ||
except AttributeError: | ||
# Dang, we have a bool instead of an array: return the bool | ||
return check | ||
smask = self.mask | ||
mask = mask_or(smask, omask, copy=True) | ||
|
||
odata = getdata(other) | ||
if mask.dtype.names: | ||
# For possibly masked structured arrays we need to be careful, | ||
# since the standard structured array comparison will use all | ||
# fields, masked or not. To avoid masked fields influencing the | ||
# outcome, we set all masked fields in self to other, so they'll | ||
# count as equal. To prepare, we ensure we have the right shape. | ||
broadcast_shape = np.broadcast(self, odata).shape | ||
sbroadcast = np.broadcast_to(self, broadcast_shape, subok=True) | ||
sbroadcast._mask = mask | ||
sdata = sbroadcast.filled(odata) | ||
# Now take care of the mask; the merged mask should have an item | ||
# masked if all fields were masked (in one and/or other). | ||
mask = (mask == np.ones((), mask.dtype)) | ||
|
||
else: | ||
odata = filled(other, 0) | ||
check = self.filled(0).__eq__(odata).view(type(self)) | ||
if self._mask is nomask: | ||
check._mask = omask | ||
else: | ||
mask = mask_or(self._mask, omask) | ||
if mask.dtype.names: | ||
if mask.size > 1: | ||
axis = 1 | ||
else: | ||
axis = None | ||
try: | ||
mask = mask.view((bool_, len(self.dtype))).all(axis) | ||
except (ValueError, np.AxisError): | ||
# TODO: what error are we trying to catch here? | ||
# invalid axis, or invalid view? | ||
mask = np.all([[f[n].all() for n in mask.dtype.names] | 8000 tr>||
for f in mask], axis=axis) | ||
check._mask = mask | ||
# For regular arrays, just use the data as they come. | ||
sdata = self.data | ||
|
||
check = compare(sdata, odata) | ||
|
||
if isinstance(check, (np.bool_, bool)): | ||
return masked if mask else check | ||
|
||
if mask is not nomask: | ||
# Adjust elements that were masked, which should be treated | ||
# as equal if masked in both, unequal if masked in one. | ||
# Note that this works automatically for structured arrays too. | ||
check = np.where(mask, compare(smask, omask), check) | ||
if mask.shape != check.shape: | ||
# Guarantee consistency of the shape, making a copy since the | ||
# the mask may need to get written to later. | ||
mask = np.broadcast_to(mask, check.shape).copy() | ||
|
||
check = check.view(type(self)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Went back to |
||
check._mask = mask | ||
return check | ||
|
||
def __ne__(self, other): | ||
def __eq__(self, other): | ||
"""Check whether other equals self elementwise. | ||
|
||
When either of the elements is masked, the result is masked as well, | ||
but the underlying boolean data are still set, with self and other | ||
considered equal if both are masked, and unequal otherwise. | ||
|
||
For structured arrays, all fields are combined, with masked values | ||
ignored. The result is masked if all fields were masked, with self | ||
and other considered equal only if both were fully masked. | ||
""" | ||
Check whether other doesn't equal self elementwise | ||
return self._comparison(other, operator.eq) | ||
|
||
def __ne__(self, other): | ||
"""Check whether other does not equal self elementwise. | ||
|
||
When either of the elements is masked, the result is masked as well, | ||
but the underlying boolean data are still set, with self and other | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are there any tests of this underlying bool data? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, note how the tests all do something like |
||
considered equal if both are masked, and unequal otherwise. | ||
|
||
For structured arrays, all fields are combined, with masked values | ||
ignored. The result is masked if all fields were masked, with self | ||
and other considered equal only if both were fully masked. | ||
""" | ||
if self is masked: | ||
return masked | ||
omask = getmask(other) | ||
if omask is nomask: | ||
check = self.filled(0).__ne__(other) | ||
try: | ||
check = check.view(type(self)) | ||
check._mask = self._mask | ||
except AttributeError: | ||
# In case check is a boolean (or a numpy.bool) | ||
return check | ||
else: | ||
odata = filled(other, 0) | ||
check = self.filled(0).__ne__(odata).view(type(self)) | ||
if self._mask is nomask: | ||
check._mask = omask | ||
else: | ||
mask = mask_or(self._mask, omask) | ||
if mask.dtype.names: | ||
if mask.size > 1: | ||
axis = 1 | ||
else: | ||
axis = None | ||
try: | ||
mask = mask.view((bool_, len(self.dtype))).all(axis) | ||
except (ValueError, np.AxisError): | ||
# TODO: what error are we trying to catch here? | ||
# invalid axis, or invalid view? | ||
mask = np.all([[f[n].all() for n in mask.dtype.names] | ||
for f in mask], axis=axis) | ||
check._mask = mask | ||
return check | ||
return self._comparison(other, operator.ne) | ||
|
||
def __add__(self, other): | ||
""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1335,32 +1335,96 @@ def test_eq_on_structured(self): | |
ndtype = [('A', int), ('B', int)] | ||
a = array([(1, 1), (2, 2)], mask=[(0, 1), (0, 0)], dtype=ndtype) | ||
test = (a == a) | ||
assert_equal(test, [True, True]) | ||
assert_equal(test.data, [True, True]) | ||
assert_equal(test.mask, [False, False]) | ||
test = (a == a[0]) | ||
assert_equal(test.data, [True, False]) | ||
assert_equal(test.mask, [False, False]) | ||
b = array([(1, 1), (2, 2)], mask=[(1, 0), (0, 0)], dtype=ndtype) | ||
test = (a == b) | ||
assert_equal(test, [False, True]) | ||
assert_equal(test.data, [False, True]) | ||
assert_equal(test.mask, [True, False]) | ||
test = (a[0] == b) | ||
assert_equal(test.data, [False, False]) | ||
assert_equal(test.mask, [True, False]) | ||
b = array([(1, 1), (2, 2)], mask=[(0, 1), (1, 0)], dtype=ndtype) | ||
test = (a == b) | ||
assert_equal(test, [True, False]) | ||
assert_equal(test.data, [True, True]) | ||
assert_equal(test.mask, [False, False]) | ||
# complicated dtype, 2-dimensional array. | ||
ndtype = [('A', int), ('B', [('BA', int), ('BB', int)])] | ||
a = array([[(1, (1, 1)), (2, (2, 2))], | ||
[(3, (3, 3)), (4, (4, 4))]], | ||
mask=[[(0, (1, 0)), (0, (0, 1))], | ||
[(1, (0, 0)), (1, (1, 1))]], dtype=ndtype) | ||
test = (a[0, 0] == a) | ||
assert_equal(test.data, [[True, False], [False, False]]) | ||
assert_equal(test.mask, [[False, False], [False, True]]) | ||
|
||
def test_ne_on_structured(self): | ||
# Test the equality of structured arrays | ||
ndtype = [('A', int), ('B', int)] | ||
a = array([(1, 1), (2, 2)], mask=[(0, 1), (0, 0)], dtype=ndtype) | ||
test = (a != a) | ||
assert_equal(test, [False, False]) | ||
assert_equal(test.data, [False, False]) | ||
assert_equal(test.mask, [False, False]) | ||
test = (a != a[0]) | ||
assert_equal(test.data, [False, True]) | ||
assert_equal(test.mask, [False, False]) | ||
b = array([(1, 1), (2, 2)], mask=[(1, 0), (0, 0)], dtype=ndtype) | ||
test = (a != b) | ||
assert_equal(test, [True, False]) | ||
assert_equal(test.data, [True, False]) | ||
assert_equal(test.mask, [True, False]) | ||
test = (a[0] != b) | ||
assert_equal(test.data, [True, True]) | ||
assert_equal(test.mask, [True, False]) | ||
b = array([(1, 1), (2, 2)], mask=[(0, 1), (1, 0)], dtype=ndtype) | ||
test = (a != b) | ||
assert_equal(test, [False, True]) | ||
assert_equal(test.data, [False, False]) | ||
assert_equal(test.mask, [False, False]) | ||
# complicated dtype, 2-dimensional array. | ||
ndtype = [('A', int), ('B', [('BA', int), ('BB', int)])] | ||
a = array([[(1, (1, 1)), (2, (2, 2))], | ||
[(3, (3, 3)), (4, (4, 4))]], | ||
mask=[[(0, (1, 0)), (0, (0, 1))], | ||
[(1, (0, 0)), (1, (1, 1))]], dtype=ndtype) | ||
test = (a[0, 0] != a) | ||
assert_equal(test.data, [[False, True], [True, True]]) | ||
assert_equal(test.mask, [[False, False], [False, True]]) | ||
|
||
def test_eq_ne_structured_extra(self): | ||
# ensure simple examples are symmetric and make sense. | ||
# from https://github.com/numpy/numpy/pull/8590#discussion_r101126465 | ||
dt = np.dtype('i4,i4') | ||
for m1 in (mvoid((1, 2), mask=(0, 0), dtype=dt), | ||
mvoid((1, 2), mask=(0, 1), dtype=dt), | ||
mvoid((1, 2), mask=(1, 0), dtype=dt), | ||
mvoid((1, 2), mask=(1, 1), dtype=dt)): | ||
ma1 = m1.view(MaskedArray) | ||
r1 = ma1.view('2i4') | ||
for m2 in (np.array((1, 1), dtype=dt), | ||
mvoid((1, 1), dtype=dt), | ||
mvoid((1, 0), mask=(0, 1), dtype=dt), | ||
mvoid((3, 2), mask=(0, 1), dtype=dt)): | ||
ma2 = m2.view(MaskedArray) | ||
r2 = ma2.view('2i4') | ||
eq_expected = (r1 == r2).all() | ||
assert_equal(m1 == m2, eq_expected) | ||
assert_equal(m2 == m1, eq_expected) | ||
assert_equal(ma1 == m2, eq_expected) | ||
assert_equal(m1 == ma2, eq_expected) | ||
assert_equal(ma1 == ma2, eq_expected) | ||
# Also check it is the same if we do it element by element. | ||
el_by_el = [m1[name] == m2[name] for name in dt.names] | ||
assert_equal(array(el_by_el, dtype=bool).all(), eq_expected) | ||
ne_expected = (r1 != r2).any() | ||
assert_equal(m1 != m2, ne_expected) | ||
assert_equal(m2 != m1, ne_expected) | ||
assert_equal(ma1 != m2, ne_expected) | ||
assert_equal(m1 != ma2, ne_expected) | ||
assert_equal(ma1 != ma2, ne_expected) | ||
el_by_el = [m1[name] != m2[name] for name in dt.names] | ||
assert_equal(array(el_by_el, dtype=bool).any(), ne_expected) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we have any tests of equality against normal (non-masked) arrays? |
||
|
||
def test_eq_with_None(self): | ||
# Really, comparisons with None should not be done, but check them | ||
|
@@ -1393,6 +1457,22 @@ def test_eq_with_scalar(self): | |
assert_equal(a == 0, False) | ||
assert_equal(a != 1, False) | ||
assert_equal(a != 0, True) | ||
b = array(1, mask=True) | ||
assert_equal(b == 0, masked) | ||
assert_equal(b == 1, masked) | ||
assert_equal(b != 0, masked) | ||
assert_equal(b != 1, masked) | ||
|
||
def test_eq_different_dimensions(self): | ||
m1 = array([1, 1], mask=[0, 1]) | ||
# test comparison with both masked and regular arrays. | ||
for m2 in (array([[0, 1], [1, 2]]), | ||
np.array([[0, 1], [1, 2]])): | ||
test = (m1 == m2) | ||
assert_equal(test.data, [[False, False], | ||
[True, False]]) | ||
assert_equal(test.mask, [[False, True], | ||
[False, True]]) | ||
|
||
def test_numpyarithmetics(self): | ||
# Check that the mask is not back-propagated when using numpy functions | ||
|
@@ -3978,7 +4058,15 @@ def test_make_mask(self): | |
test = make_mask(mask, dtype=mask.dtype) | ||
assert_equal(test.dtype, bdtype) | ||
assert_equal(test, np.array([(0, 0), (0, 1)], dtype=bdtype)) | ||
|
||
# Ensure this also works for void | ||
mask = np.array((False, True), dtype='?,?')[()] | ||
assert_(isinstance(mask, np.void)) | ||
test = make_mask(mask, dtype=mask.dtype) | ||
assert_equal(test, mask) | ||
assert_(test is not mask) | ||
mask = np.array((0, 1), dtype='i4,i4')[()] | ||
test2 = make_mask(mask, dtype=mask.dtype) | ||
assert_equal(test2, test) | ||
# test that nomask is returned when m is nomask. | ||
bools = [True, False] | ||
dtypes = [MaskType, np.float] | ||
|
@@ -3987,7 +4075,6 @@ def test_make_mask(self): | |
res = make_mask(nomask, copy=cpy, shrink=shr, dtype=dt) | ||
assert_(res is nomask, msgformat % (cpy, shr, dt)) | ||
|
||
|
||
def test_mask_or(self): | ||
# Initialize | ||
mtype = [('a', np.bool), ('b', np.bool)] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this need broadcasting too? Perhaps we should just broadcast
mask
at the top, since we do so later anywayThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No,
filled
does the broadcasting correctly. I went back and forth about putting the broadcasting at the top, but decided against it, since usually it will not be needed.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess my point is more whether it's even valid to have a mask that doesn't match the data shape, which I don't believe is guaranteed here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In general, no, it isn't, but
filled
does broadcast correctly (and the mask is only used for that purpose; ideally, the filled function would just take amask
argument, usingself.mask
by default; but that's for yet another PR...)