From 5fbce6d395a6bfd1ec25022dcee4eef8c5bbad17 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Fri, 16 Aug 2019 16:50:30 -0700 Subject: [PATCH 01/12] BUG: Make np.record work on structured fields with no fields This replaces some more uses of `bool(dt.fields)` and `bool(dt.names)` with `dt.names is not None`. `dt.fields is not None` would have worked too, but checking `.names` is more prevalent elsewhere --- numpy/core/records.py | 14 +++++++------- numpy/core/tests/test_records.py | 27 +++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/numpy/core/records.py b/numpy/core/records.py index 86a43306a698..e3e9f81939fe 100644 --- a/numpy/core/records.py +++ b/numpy/core/records.py @@ -254,7 +254,7 @@ def __getattribute__(self, attr): except AttributeError: #happens if field is Object type return obj - if dt.fields: + if dt.names is not None: return obj.view((self.__class__, obj.dtype.fields)) return obj else: @@ -279,7 +279,7 @@ def __getitem__(self, indx): obj = nt.void.__getitem__(self, indx) # copy behavior of record.__getattribute__, - if isinstance(obj, nt.void) and obj.dtype.fields: + if isinstance(obj, nt.void) and obj.dtype.names is not None: return obj.view((self.__class__, obj.dtype.fields)) else: # return a single element @@ -431,7 +431,7 @@ def __new__(subtype, shape, dtype=None, buf=None, offset=0, strides=None, return self def __array_finalize__(self, obj): - if self.dtype.type is not record and self.dtype.fields: + if self.dtype.type is not record and self.dtype.names is not None: # if self.dtype is not np.record, invoke __setattr__ which will # convert it to a record if it is a void dtype. self.dtype = self.dtype @@ -459,7 +459,7 @@ def __getattribute__(self, attr): # with void type convert it to the same dtype.type (eg to preserve # numpy.record type if present), since nested structured fields do not # inherit type. Don't do this for non-void structures though. - if obj.dtype.fields: + if obj.dtype.names is not None: if issubclass(obj.dtype.type, nt.void): return obj.view(dtype=(self.dtype.type, obj.dtype)) return obj @@ -474,7 +474,7 @@ def __setattr__(self, attr, val): # Automatically convert (void) structured types to records # (but not non-void structures, subarrays, or non-structured voids) - if attr == 'dtype' and issubclass(val.type, nt.void) and val.fields: + if attr == 'dtype' and issubclass(val.type, nt.void) and val.names is not None: val = sb.dtype((record, val)) newattr = attr not in self.__dict__ @@ -508,7 +508,7 @@ def __getitem__(self, indx): # copy behavior of getattr, except that here # we might also be returning a single element if isinstance(obj, ndarray): - if obj.dtype.fields: + if obj.dtype.names is not None: obj = obj.view(type(self)) if issubclass(obj.dtype.type, nt.void): return obj.view(dtype=(self.dtype.type, obj.dtype)) @@ -564,7 +564,7 @@ def field(self, attr, val=None): if val is None: obj = self.getfield(*res) - if obj.dtype.fields: + if obj.dtype.names is not None: return obj return obj.view(ndarray) else: diff --git a/numpy/core/tests/test_records.py b/numpy/core/tests/test_records.py index c059ef510eab..5cff8d0dddc3 100644 --- a/numpy/core/tests/test_records.py +++ b/numpy/core/tests/test_records.py @@ -437,6 +437,33 @@ def test_missing_field(self): arr = np.zeros((3,), dtype=[('x', int), ('y', int)]) assert_raises(ValueError, lambda: arr[['nofield']]) + @pytest.mark.parametrize('nfields', [0, 1, 2]) + def test_assign_dtype_attribute(self, nfields): + dt = np.dtype([('a', np.uint8), ('b', np.uint8), ('c', np.uint8)][:nfields]) + data = np.zeros(3, dt).view(np.recarray) + + # the original and resulting dtypes differ on whether they are records + assert data.dtype.type == np.record + assert dt.type != np.record + + # ensure that the dtype remains a record even when assigned + data.dtype = dt + assert data.dtype.type == np.record + + @pytest.mark.parametrize('nfields', [0, 1, 2]) + def test_nested_fields_are_records(self, nfields): + """ Test that nested structured types are treated as records too """ + dt = np.dtype([('a', np.uint8), ('b', np.uint8), ('c', np.uint8)][:nfields]) + dt_outer = np.dtype([('inner', dt)]) + + data = np.zeros(3, dt_outer).view(np.recarray) + assert isinstance(data, np.recarray) + assert isinstance(data['inner'], np.recarray) + + data0 = data[0] + assert isinstance(data0, np.record) + assert isinstance(data0['inner'], np.record) + def test_find_duplicate(): l1 = [1, 2, 3, 4, 5, 6] From 03cd242040120a149615cb2c843636069fa2ff10 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Fri, 16 Aug 2019 18:17:52 -0700 Subject: [PATCH 02/12] BUG: Fix crash in recarray if nested structured dtypes contain padding Previously attempting to access a field of such an array (such as when printing it!) would result in `ValueError: Changing the dtype of a 0d array is only supported if the itemsize is unchanged`. --- numpy/core/records.py | 4 ++-- numpy/core/tests/test_records.py | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/numpy/core/records.py b/numpy/core/records.py index e3e9f81939fe..4ac90f3aac08 100644 --- a/numpy/core/records.py +++ b/numpy/core/records.py @@ -255,7 +255,7 @@ def __getattribute__(self, attr): #happens if field is Object type return obj if dt.names is not None: - return obj.view((self.__class__, obj.dtype.fields)) + return obj.view((self.__class__, obj.dtype)) return obj else: raise AttributeError("'record' object has no " @@ -280,7 +280,7 @@ def __getitem__(self, indx): # copy behavior of record.__getattribute__, if isinstance(obj, nt.void) and obj.dtype.names is not None: - return obj.view((self.__class__, obj.dtype.fields)) + return obj.view((self.__class__, obj.dtype)) else: # return a single element return obj diff --git a/numpy/core/tests/test_records.py b/numpy/core/tests/test_records.py index 5cff8d0dddc3..73b1081c5554 100644 --- a/numpy/core/tests/test_records.py +++ b/numpy/core/tests/test_records.py @@ -464,6 +464,21 @@ def test_nested_fields_are_records(self, nfields): assert isinstance(data0, np.record) assert isinstance(data0['inner'], np.record) + def test_nested_dtype_padding(self): + """ test that trailing padding is preserved """ + # construct a dtype with padding at the end + dt = np.dtype([('a', np.uint8), ('b', np.uint8), ('c', np.uint8)]) + dt_padded_end = dt[['a', 'b']] + assert dt_padded_end.itemsize == dt.itemsize + + dt_outer = np.dtype([('inner', dt_padded_end)]) + + data = np.zeros(3, dt_outer).view(np.recarray) + assert_equal(data['inner'].dtype, dt_padded_end) + + data0 = data[0] + assert_equal(data0['inner'].dtype, dt_padded_end) + def test_find_duplicate(): l1 = [1, 2, 3, 4, 5, 6] From 731127385578fa2cf4e0ad48ef2e456d897d36de Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Fri, 16 Aug 2019 18:38:26 -0700 Subject: [PATCH 03/12] BUG: Fix crash on genfromtxt with nested empty structured array Previously this would fail with `ValueError: could not assign tuple of length 2 to structure with 3 fields.`, now it raises `NotImplementedError`. --- numpy/lib/_iotools.py | 2 +- numpy/lib/tests/test_io.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py index 8a042f190924..04b147b0e685 100644 --- a/numpy/lib/_iotools.py +++ b/numpy/lib/_iotools.py @@ -121,7 +121,7 @@ def has_nested_fields(ndtype): """ for name in ndtype.names or (): - if ndtype[name].names: + if ndtype[name].names is not None: return True return False diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py index 7ef25538b3b4..038d08acfcb5 100644 --- a/numpy/lib/tests/test_io.py +++ b/numpy/lib/tests/test_io.py @@ -1517,6 +1517,13 @@ def test_dtype_with_object(self): test = np.genfromtxt(TextIO(data), delimiter=";", dtype=ndtype, converters=converters) + # nested but empty fields also aren't supported + ndtype = [('idx', int), ('code', object), ('nest', [])] + with assert_raises_regex(NotImplementedError, + 'Nested fields.* not supported.*'): + test = np.genfromtxt(TextIO(data), delimiter=";", + dtype=ndtype, converters=converters) + def test_userconverters_with_explicit_dtype(self): # Test user_converters w/ explicit (standard) dtype data = TextIO('skip,skip,2001-01-01,1.0,skip') From b7a9378b113da2cb27f40c4f485ffeb225f5d01d Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Fri, 16 Aug 2019 18:44:10 -0700 Subject: [PATCH 04/12] BUG: recfunctions: Don't return None in place of empty sequences Replacing empty tuples with `None` is a bad idea, and just results in an API that is hard to consume - especially since the behavior was never documented. This affects `get_names`, `get_names_flat`, and `get_fieldstructure`. --- numpy/lib/recfunctions.py | 12 ++++++------ numpy/lib/tests/test_recfunctions.py | 21 +++++++++++++++++++++ 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index fcc0d9a7a1b2..fb0ae5cc3726 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -132,11 +132,11 @@ def get_names(adtype): names = adtype.names for name in names: current = adtype[name] - if current.names: + if current.names is not None: listnames.append((name, tuple(get_names(current)))) else: listnames.append(name) - return tuple(listnames) or None + return tuple(listnames) def get_names_flat(adtype): @@ -165,9 +165,9 @@ def get_names_flat(adtype): for name in names: listnames.append(name) current = adtype[name] - if current.names: + if current.names is not None: listnames.extend(get_names_flat(current)) - return tuple(listnames) or None + return tuple(listnames) def flatten_descr(ndtype): @@ -263,7 +263,7 @@ def get_fieldstructure(adtype, lastname=None, parents=None,): names = adtype.names for name in names: current = adtype[name] - if current.names: + if current.names is not None: if lastname: parents[name] = [lastname, ] else: @@ -276,7 +276,7 @@ def get_fieldstructure(adtype, lastname=None, parents=None,): elif lastname: lastparent = [lastname, ] parents[name] = lastparent or [] - return parents or None + return parents def _izip_fields_flat(iterable): diff --git a/numpy/lib/tests/test_recfunctions.py b/numpy/lib/tests/test_recfunctions.py index 11f8a5afa79a..3b972f4b1f8a 100644 --- a/numpy/lib/tests/test_recfunctions.py +++ b/numpy/lib/tests/test_recfunctions.py @@ -113,6 +113,14 @@ def test_get_names(self): test = get_names(ndtype) assert_equal(test, ('a', ('b', ('ba', 'bb')))) + ndtype = np.dtype([('a', int), ('b', [])]) + test = get_names(ndtype) + assert_equal(test, ('a', ('b', ()))) + + ndtype = np.dtype([]) + test = get_names(ndtype) + assert_equal(test, ()) + def test_get_names_flat(self): # Test get_names_flat ndtype = np.dtype([('A', '|S3'), ('B', float)]) @@ -123,6 +131,14 @@ def test_get_names_flat(self): test = get_names_flat(ndtype) assert_equal(test, ('a', 'b', 'ba', 'bb')) + ndtype = np.dtype([('a', int), ('b', [])]) + test = get_names_flat(ndtype) + assert_equal(test, ('a', 'b')) + + ndtype = np.dtype([]) + test = get_names_flat(ndtype) + assert_equal(test, ()) + def test_get_fieldstructure(self): # Test get_fieldstructure @@ -145,6 +161,11 @@ def test_get_fieldstructure(self): 'BBA': ['B', 'BB'], 'BBB': ['B', 'BB']} assert_equal(test, control) + # 0 fields + ndtype = np.dtype([]) + test = get_fieldstructure(ndtype) + assert_equal(test, {}) + def test_find_duplicates(self): # Test find_duplicates a = ma.array([(2, (2., 'B')), (1, (2., 'B')), (2, (2., 'B')), From 00b6745d037c04cda942901c1a9f281eaf0213a5 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sun, 18 Aug 2019 20:40:58 -0500 Subject: [PATCH 05/12] MAINT: Test names against None for consistency In these instances the behavior isn't changed, since the for loop below acts like an if. However, in general this is an antipattern that crashes on 0-field structured types, and is warned against in the docs. If we remove instances of the antipattern, it will hopefully not reappear via copy-paste code. --- numpy/core/_internal.py | 2 +- numpy/core/tests/test_regression.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/numpy/core/_internal.py b/numpy/core/_internal.py index 59da60253c9e..eda090287817 100644 --- a/numpy/core/_internal.py +++ b/numpy/core/_internal.py @@ -391,7 +391,7 @@ def _getfield_is_safe(oldtype, newtype, offset): if newtype.hasobject or oldtype.hasobject: if offset == 0 and newtype == oldtype: return - if oldtype.names: + if oldtype.names is not None: for name in oldtype.names: if (oldtype.fields[name][1] == offset and oldtype.fields[name][0] == newtype): diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py index 2421a11616ad..3d7de408701f 100644 --- a/numpy/core/tests/test_regression.py +++ b/numpy/core/tests/test_regression.py @@ -469,7 +469,7 @@ def test_pickle_py2_bytes_encoding(self): result = pickle.loads(data, encoding='bytes') assert_equal(result, original) - if isinstance(result, np.ndarray) and result.dtype.names: + if isinstance(result, np.ndarray) and result.dtype.names is not None: for name in result.dtype.names: assert_(isinstance(name, str)) From 0bdf0b9c50422fa698131481ff74a4bec39df33e Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sun, 18 Aug 2019 21:02:20 -0500 Subject: [PATCH 06/12] MAINT: Improve ndpointer.__name__ for structured types with 0 fields Without this change, `np.dtype('V0')` and `np.dtype([])` produced types with the same name, which was misleading as they are different types. This is mostly cosmetic. --- numpy/ctypeslib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/ctypeslib.py b/numpy/ctypeslib.py index 11368587fa4e..53fa52d9282e 100644 --- a/numpy/ctypeslib.py +++ b/numpy/ctypeslib.py @@ -321,7 +321,7 @@ def ndpointer(dtype=None, ndim=None, shape=None, flags=None): # produce a name for the new type if dtype is None: name = 'any' - elif dtype.names: + elif dtype.names is not None: name = str(id(dtype)) else: name = dtype.str From 18eb65228d0e3eca03796e9c30744974e53e2997 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sun, 18 Aug 2019 21:03:44 -0500 Subject: [PATCH 07/12] MAINT: Remove incorrect comment about flattening Also adjust the code to more clearly indicate what actually happens. The behavior is identical before and after this patch. --- numpy/lib/recfunctions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index fb0ae5cc3726..a5d464e32967 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -209,8 +209,8 @@ def zip_dtype(seqarrays, flatten=False): else: for a in seqarrays: current = a.dtype - if current.names and len(current.names) <= 1: - # special case - dtypes of 0 or 1 field are flattened + if current.names is not None and len(current.names) == 1: + # special case - dtypes of 1 field are flattened newdtype.extend(get_fieldspec(current)) else: newdtype.append(('', current)) From 1f0d66ce8be689ab86001696891858a1dee631cc Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sun, 18 Aug 2019 21:10:21 -0500 Subject: [PATCH 08/12] MAINT: Use the `.names is None` idiom to detect structured array in arrayprint No behavior change here --- numpy/core/arrayprint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py index 6a71de226d35..a305552ee9ff 100644 --- a/numpy/core/arrayprint.py +++ b/numpy/core/arrayprint.py @@ -672,7 +672,7 @@ def array2string(a, max_line_width=None, precision=None, if style is np._NoValue: style = repr - if a.shape == () and not a.dtype.names: + if a.shape == () and a.dtype.names is None: return style(a.item()) elif style is not np._NoValue: # Deprecation 11-9-2017 v1.14 From b11468e1a38c19a24ed0ebaede023c159c745e33 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Mon, 19 Aug 2019 16:47:35 -0500 Subject: [PATCH 09/12] BUG: Don't allow extra fields to be added in genfromtxt Previously passing `dtype=[], names=['a']` would append an extra field, even though `dtype=['a'], names=['b', 'c']` does not. --- numpy/lib/_iotools.py | 11 +++++------ numpy/lib/npyio.py | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py index 04b147b0e685..9713ff8b1671 100644 --- a/numpy/lib/_iotools.py +++ b/numpy/lib/_iotools.py @@ -925,28 +925,27 @@ def easy_dtype(ndtype, names=None, defaultfmt="f%i", **validationargs): names = validate(names, nbfields=nbfields, defaultfmt=defaultfmt) ndtype = np.dtype(dict(formats=ndtype, names=names)) else: - nbtypes = len(ndtype) # Explicit names if names is not None: validate = NameValidator(**validationargs) if isinstance(names, basestring): names = names.split(",") # Simple dtype: repeat to match the nb of names - if nbtypes == 0: + if ndtype.names is None: formats = tuple([ndtype.type] * len(names)) names = validate(names, defaultfmt=defaultfmt) ndtype = np.dtype(list(zip(names, formats))) # Structured dtype: just validate the names as needed else: - ndtype.names = validate(names, nbfields=nbtypes, + ndtype.names = validate(names, nbfields=len(ndtype.names), defaultfmt=defaultfmt) # No implicit names - elif (nbtypes > 0): + elif ndtype.names is not None: validate = NameValidator(**validationargs) # Default initial names : should we change the format ? - if ((ndtype.names == tuple("f%i" % i for i in range(nbtypes))) and + if ((ndtype.names == tuple("f%i" % i for i in range(len(ndtype.names)))) and (defaultfmt != "f%i")): - ndtype.names = validate([''] * nbtypes, defaultfmt=defaultfmt) + ndtype.names = validate([''] * len(ndtype.names), defaultfmt=defaultfmt) # Explicit initial names : just validate else: ndtype.names = validate(ndtype.names, defaultfmt=defaultfmt) diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index db6a8e5eb2c9..1991f289a468 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -2147,7 +2147,7 @@ def encode_unicode_cols(row_tup): outputmask = np.array(masks, dtype=mdtype) else: # Overwrite the initial dtype names if needed - if names and dtype.names: + if names and dtype.names is not None: dtype.names = names # Case 1. We have a structured type if len(dtype_flat) > 1: From 8187c9efcfcaa5d9ae0fcda42f9512b860001ffa Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Mon, 19 Aug 2019 16:54:35 -0500 Subject: [PATCH 10/12] BUG: Don't construct masked arrays with the wrong mask type in genfromtxt This only affects arrays with `dtype([])`, but also follows the recommended way to check for structured arrays in our docs --- numpy/lib/npyio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 1991f289a468..a4a377e4ad29 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -2197,7 +2197,7 @@ def encode_unicode_cols(row_tup): # output = np.array(data, dtype) if usemask: - if dtype.names: + if dtype.names is not None: mdtype = [(_, bool) for _ in dtype.names] else: mdtype = bool From 5d63992a53ca71c3e00c52031b1f68921137bc4f Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Mon, 19 Aug 2019 16:56:56 -0500 Subject: [PATCH 11/12] BUG: Fix detection of structured arrays in mrecords This check would fail on the structured type `np.dtype([])`. No test, since I don't really understand mrecords --- numpy/ma/mrecords.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/ma/mrecords.py b/numpy/ma/mrecords.py index daf2f8770c1b..b16e1670af38 100644 --- a/numpy/ma/mrecords.py +++ b/numpy/ma/mrecords.py @@ -211,7 +211,7 @@ def __getattribute__(self, attr): _localdict = ndarray.__getattribute__(self, '__dict__') _data = ndarray.view(self, _localdict['_baseclass']) obj = _data.getfield(*res) - if obj.dtype.fields: + if obj.dtype.names is not None: raise NotImplementedError("MaskedRecords is currently limited to" "simple records.") # Get some special attributes From 483f565d85dadc899f94710531fba8355d554d59 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Mon, 19 Aug 2019 17:07:05 -0500 Subject: [PATCH 12/12] MAINT: Fix remaining misuses of bool(dt.names) It's not clear that these have any visible effect, but we should be consistent with how we detect structured types. --- numpy/lib/recfunctions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index a5d464e32967..afb3186b2260 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -70,7 +70,7 @@ def recursive_fill_fields(input, output): current = input[field] except ValueError: continue - if current.dtype.names: + if current.dtype.names is not None: recursive_fill_fields(current, output[field]) else: output[field][:len(current)] = current @@ -437,7 +437,7 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False, if isinstance(seqarrays, (ndarray, np.void)): seqdtype = seqarrays.dtype # Make sure we have named fields - if not seqdtype.names: + if seqdtype.names is None: seqdtype = np.dtype([('', seqdtype)]) if not flatten or zip_dtype((seqarrays,), flatten=True) == seqdtype: # Minimal processing needed: just make sure everythng's a-ok @@ -657,7 +657,7 @@ def _recursive_rename_fields(ndtype, namemapper): for name in ndtype.names: newname = namemapper.get(name, name) current = ndtype[name] - if current.names: + if current.names is not None: newdtype.append( (newname, _recursive_rename_fields(current, namemapper)) )