8000 BUG: Fix recarray getattr and getindex return types · numpy/numpy@3cd9e73 · GitHub
[go: up one dir, main page]

Skip to content

Commit 3cd9e73

Browse files
committed
BUG: Fix recarray getattr and getindex return types
This commit makes changes to `__getitem__` and `__getattr__` of recarrays: 1. recarrays no longer convert string ndarrays to chararrays, and instead simply return ndarrays of string type. 2. attribute access and index access of fields now behaves identically 3. dtype.type is now inherited when fields of structured type are accessed Demonstration: >>> rec = np.rec.array([('abc ', (1,1), 1), ('abc', (2,3), 1)], ... dtype=[('foo', 'S4'), ('bar', [('A', int), ('B', int)]), ('baz', int)]) Old Behavior: >>> type(rec.foo), type(rec['foo']) (numpy.core.defchararray.chararray, numpy.recarray) >>> type(rec.bar), type(rec['bar']), rec.bar.dtype.type (numpy.recarray, numpy.recarray, numpy.void) >>> type(rec.baz), type(rec['baz']) (numpy.ndarray, numpy.ndarray) New behavior: >>> type(rec.foo), type(rec['foo']) (numpy.ndarray, numpy.ndarray) >>> type(rec.bar), type(rec['bar']), rec.bar.dtype.type (numpy.recarray, numpy.recarray, numpy.record) >>> type(rec.baz), type(rec['baz']) (numpy.ndarray, numpy.ndarray)
1 parent 937d1f2 commit 3cd9e73

File tree

4 files changed

+70
-24
lines changed

4 files changed

+70
-24
lines changed

doc/release/1.10.0-notes.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,15 @@ C API
6161
The changes to *swapaxes* also apply to the *PyArray_SwapAxes* C function,
6262
which now returns a view in all cases.
6363

64+
recarray field return types
65+
~~~~~~~~~~~~~~~~~~~~~~~~~~~
66+
Previously the returned types for recarray fields accessed by attribute and by
67+
index were inconsistent, and fields of string type were returned as chararrays.
68+
Now, fields accessed by either attribute or indexing will return an ndarray for
69+
fields of non-structured type, and a recarray for fields of structured type.
70+
Notably, this affect recarrays containing strings with whitespace, as trailing
71+
whitespace is trimmed from chararrays but kept in ndarrays of string type.
72+
Also, the dtype.type of nested structured fields is now inherited.
6473

6574
New Features
6675
============

numpy/core/records.py

Lines changed: 35 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@
4040
import os
4141

4242
from . import numeric as sb
43-
from .defchararray import chararray
4443
from . import numerictypes as nt
4544
from numpy.compat import isfileobj, bytes, long
4645

@@ -238,17 +237,15 @@ def __getattribute__(self, attr):
238237
res = fielddict.get(attr, None)
239238
if res:
240239
obj = self.getfield(*res[:2])
241-
# if it has fields return a recarray,
242-
# if it's a string ('SU') return a chararray
240+
# if it has fields return a record,
243241
# otherwise return the object
244242
try:
245243
dt = obj.dtype
246244
except AttributeError:
245+
#happens if field is Object type
247246
return obj
248247
if dt.fields:
249-
return obj.view(obj.__class__)
250-
if dt.char in 'SU':
251-
return obj.view(chararray)
248+
return obj.view((record, obj.dtype.descr))
252249
return obj
253250
else:
254251
raise AttributeError("'record' object has no "
@@ -418,29 +415,37 @@ def __new__(subtype, shape, dtype=None, buf=None, offset=0, strides=None,
418415
return self
419416

420417
def __getattribute__(self, attr):
418+
# See if ndarray has this attr, and return it if so. (note that this
419+
# means a field with the same name as an ndarray attr cannot be
420+
# accessed by attribute).
421421
try:
422422
return object.__getattribute__(self, attr)
423423
except AttributeError: # attr must be a fieldname
424424
pass
425+
426+
# look for a field with this name
425427
fielddict = ndarray.__getattribute__(self, 'dtype').fields
426428
try:
427429
res = fielddict[attr][:2]
428430
except (TypeError, KeyError):
429-
raise AttributeError("record array has no attribute %s" % attr)
431+
raise AttributeError("recarray has no attribute %s" % attr)
430432
obj = self.getfield(*res)
431-
# if it has fields return a recarray, otherwise return
432-
# normal array
433-
if obj.dtype.fields:
434-
return obj
435-
if obj.dtype.char in 'SU':
436-
return obj.view(chararray)
437-
return obj.view(ndarray)
438433

439-
# Save the dictionary
440-
# If the attr is a field name and not in the saved dictionary
441-
# Undo any "setting" of the attribute and do a setfield
442-
# Thus, you can't create attributes on-the-fly that are field names.
434+
# At this point obj will always be a recarray, since (see
435+
# PyArray_GetField) the type of obj is inherited. Next, if obj.dtype is
436+
# non-structured, convert it to an ndarray. If obj is structured leave
437+
# it as a recarray, but make sure to convert to the same dtype.type (eg
438+
# to preserve numpy.record type if present), since nested structured
439+
# fields do not inherit type.
440+
if obj.dtype.fields:
441+
return obj.view(dtype=(self.dtype.type, obj.dtype.descr))
442+
else:
443+
return obj.view(ndarray)
443444

445+
# Save the dictionary.
446+
# If the attr is a field name and not in the saved dictionary
447+
# Undo any "setting" of the attribute and do a setfield
448+
# Thus, you can't create attributes on-the-fly that are field names.
444449
def __setattr__(self, attr, val):
445450
newattr = attr not in self.__dict__
446451
try:
@@ -468,9 +473,17 @@ def __setattr__(self, attr, val):
468473

469474
def __getitem__(self, indx):
470475
obj = ndarray.__getitem__(self, indx)
471-
if (isinstance(obj, ndarray) and obj.dtype.isbuiltin):
472-
return obj.view(ndarray)
473-
return obj
476+
477+
# copy behavior of getattr, except that here
478+
# we might also be returning a single element
479+
if isinstance(obj, ndarray):
480+
if obj.dtype.fields:
481+
return obj.view(dtype=(self.dtype.type, obj.dtype.descr))
482+
else:
483+
return obj.view(type=ndarray)
484+
else:
485+
# return a single element
486+
return obj
474487

475488
def __repr__(self) :
476489
ret = ndarray.__repr__(self)
@@ -489,8 +502,6 @@ def field(self, attr, val=None):
489502
obj = self.getfield(*res)
490503
if obj.dtype.fields:
491504
return obj
492-
if obj.dtype.char in 'SU':
493-
return obj.view(chararray)
494505
return obj.view(ndarray)
495506
else:
496507
return self.setfield(val, *res)
@@ -601,7 +612,7 @@ def fromrecords(recList, dtype=None, shape=None, formats=None, names=None,
601612
>>> r.col1
602613
array([456, 2])
603614
>>> r.col2
604-
chararray(['dbe', 'de'],
615+
array(['dbe', 'de'],
605616
dtype='|S3')
606617
>>> import pickle
607618
>>> print pickle.loads(pickle.dumps(r))

numpy/core/tests/test_records.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,28 @@ def test_fromrecords_with_explicit_dtype(self):
124124
assert_equal(a.b, ['a', 'bbb'])
125125
assert_equal(a[-1].b, 'bbb')
126126

127+
def test_recarray_stringtypes(self):
128+
# Issue #3993
129+
a = np.array([('abc ', 1), ('abc', 2)],
130+
dtype=[('foo', 'S4'), ('bar', int)])
131+
a = a.view(np.recarray)
132+
assert_equal(a.foo[0] == a.foo[1], False)
133+
134+
def test_recarray_returntypes(self):
135+
a = np.rec.array([('abc ', (1,1), 1), ('abc', (2,3), 1)],
136+
dtype=[('foo', 'S4'),
137+
('bar', [('A', int), ('B', int)]),
138+
('baz', int)])
139+
assert_equal(type(a.foo), np.ndarray)
140+
assert_equal(type(a['foo']), np.ndarray)
141+
assert_equal(type(a.bar), np.recarray)
142+
assert_equal(type(a['bar']), np.recarray)
143+
assert_equal(a.bar.dtype.type, np.record)
144+
assert_equal(type(a.baz), np.ndarray)
145+
assert_equal(type(a['baz']), np.ndarray)
146+
assert_equal(type(a[0].bar), np.record)
147+
assert_equal(a[0].bar.A, 1)
148+
127149

128150
class TestRecord(TestCase):
129151
def setUp(self):

numpy/doc/structured_arrays.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,10 @@
268268
>>> type(recordarr.bar)
269269
<class 'numpy.core.records.recarray'>
270270
271+
Note that if a field has the same name as an ndarray attribute, the ndarray
272+
attribute takes precedence. Such fields will be inaccessible by attribute but
273+
may still be accessed by index.
274+
271275
Partial Attribute Access
272276
------------------------
273277

0 commit comments

Comments
 (0)
0