8000 BUG: 1.14 multifield-indexing adds padding bytes · ahaldane/numpy@976dc28 · GitHub
[go: up one dir, main page]

Skip to content

Commit 976dc28

Browse files
committed
BUG: 1.14 multifield-indexing adds padding bytes
Fixes numpy#10387 Fixes numpy#10344 Fixes numpy#10409
1 parent 7210768 commit 976dc28

File tree

12 files changed

+330
-41
lines changed

12 files changed

+330
-41
lines changed

doc/release/1.14.1-notes.rst

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,33 @@
22
NumPy 1.14.1 Release Notes
33
==========================
44

5-
This is a bugfix release for some problems found since 1.14.0. This release
6-
includes fixes to the spacing in the str and repr of complex values.
5+
This is a bugfix release for some problems found since 1.14.0. The most
6+
important fixes are the reversion of the multifield index "view" behavior (see
7+
compatibility notes) and fixes to the spacing in the str and repr of complex
8+
values.
79

810
The Python versions supported are 2.7 and 3.4 - 3.6. The Python 3.6 wheels
911
available from PIP are built with Python 3.6.2 and should be compatible with
1012
all previous versions of Python 3.6. It was cythonized with Cython 0.26.1,
1113
which should be free of the bugs found in 0.27 while also being compatible with
1214
Python 3.7-dev.
67E6
1315

16+
17+
Compatibility notes
18+
===================
19+
20+
Multifield Indexing of Structured Arrays will still return a copy
21+
-----------------------------------------------------------------
22+
We have reverted the change in 1.14.0 that multi-field indexing of structured
23+
arrays returns a view instead of a copy, and pushed it back to 1.15. This will
24+
give us time to implement additional related bugfixes to ease the transition.
25+
Affected users should read the the 1.14.1 Numpy User Guide for advice on how to
26+
manage this transition, under "basics/structured arrays/accessing multiple
27+
fields". A new method `numpy.lib.recfunctions.repack_fields` has been
28+
introduced to help with this. All other changes to structured arrays described
29+
in the 1.14.0 release notes still hold in 1.14.1, and have not been reverted.
30+
31+
1432
Contributors
1533
============
1634

numpy/core/numeric.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def loads(*args, **kwargs):
6969
'False_', 'True_', 'bitwise_not', 'CLIP', 'RAISE', 'WRAP', 'MAXDIMS',
7070
'BUFSIZE', 'ALLOW_THREADS', 'ComplexWarning', 'full', 'full_like',
7171
'matmul', 'shares_memory', 'may_share_memory', 'MAY_SHARE_BOUNDS',
72-
'MAY_SHARE_EXACT', 'TooHardError', 'AxisError' ]
72+
'MAY_SHARE_EXACT', 'TooHardError', 'AxisError', 'multifield_index_view']
7373

7474
if sys.version_info[0] < 3:
7575
__all__.extend(['getbuffer', 'newbuffer'])
@@ -2276,7 +2276,7 @@ def isclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False):
22762276
relative difference (`rtol` * abs(`b`)) and the absolute difference
22772277
`atol` are added together to compare against the absolute difference
22782278
between `a` and `b`.
2279-
2279+
22802280
.. warning:: The default `atol` is not appropriate for comparing numbers
22812281
that are much smaller than one (see Notes).
22822282
@@ -2313,12 +2313,12 @@ def isclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False):
23132313
absolute(`a` - `b`) <= (`atol` + `rtol` * absolute(`b`))
23142314
23152315
Unlike F438 the built-in `math.isclose`, the above equation is not symmetric
2316-
in `a` and `b` -- it assumes `b` is the reference value -- so that
2316+
in `a` and `b` -- it assumes `b` is the reference value -- so that
23172317
`isclose(a, b)` might be different from `isclose(b, a)`. Furthermore,
23182318
the default value of atol is not zero, and is used to determine what
23192319
small values should be considered close to zero. The default value is
23202320
appropriate for expected values of order unity: if the expected values
2321-
are significantly smaller than one, it can result in false positives.
2321+
are significantly smaller than one, it can result in false positives.
23222322
`atol` should be carefully selected for the use case at hand. A zero value
23232323
for `atol` will result in `False` if either `a` or `b` is zero.
23242324
@@ -2905,6 +2905,7 @@ def _setdef():
29052905
False_ = bool_(False)
29062906
True_ = bool_(True)
29072907

2908+
multifield_index_view = False
29082909

29092910
def extend_all(module):
29102911
adict = {}

numpy/core/src/multiarray/convert.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -617,7 +617,7 @@ PyArray_View(PyArrayObject *self, PyArray_Descr *type, PyTypeObject *pytype)
617617
const char *msg =
618618
"Numpy has detected that you may be viewing or writing to an array "
619619
"returned by selecting multiple fields in a structured array. \n\n"
620-
"This code may break in numpy 1.13 because this will return a view "
620+
"This code may break in numpy 1.15 because this will return a view "
621621
"instead of a copy -- see release notes for details.";
622622
/* 2016-09-19, 1.12 */
623623
if (DEPRECATE_FUTUREWARNING(msg) < 0) {

numpy/core/src/multiarray/mapping.c

Lines changed: 71 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1383,15 +1383,75 @@ array_subscript_asarray(PyArrayObject *self, PyObject *op)
13831383
return PyArray_EnsureAnyArray(array_subscript(self, op));
13841384
}
13851385

1386+
/*
1387+
* Helper function for _get_field_view which turns a multifield
1388+
* view into a "packed" copy, as done in numpy 1.14 and before.
1389+
* In numpy 1.15 this function will most likely be removed.
1390+
*/
1391+
NPY_NO_EXPORT int
1392+
_multifield_view_to_copy(PyArrayObject **view) {
1393+
static PyObject *copyfunc = NULL;
1394+
PyObject *viewcopy;
1395+
PyObject *mf_setting = NULL;
1396+
1397+
/*
1398+
* Check "numpy.multifield_view" setting, a secret setting to
1399+
* help development in the 1.14 to 1.15 transition.
1400+
* If true, return a view instead of a copy.
1401+
*/
1402+
mf_setting = npy_import("numpy", "multifield_index_view");
1403+
if (mf_setting == NULL) {
1404+
goto view_fail;
1405+
}
1406+
if (PyObject_IsTrue(mf_setting)) {
1407+
Py_DECREF(mf_setting);
1408+
/* return the view */
1409+
return 0;
1410+
}
1411+
Py_DECREF(mf_setting);
1412+
1413+
/* otherwise, return a copy of the view */
1414+
npy_cache_import("numpy.lib.recfunctions", "repack_fields", &copyfunc);
1415+
if (copyfunc == NULL) {
1416+
goto view_fail;
1417+
}
1418+
1419+
PyArray_CLEARFLAGS(*view, NPY_ARRAY_WARN_ON_WRITE);
1420+
viewcopy = PyObject_CallFunction(copyfunc, "O", *view);
1421+
if (viewcopy == NULL) {
1422+
goto view_fail;
1423+
}
1424+
Py_DECREF(*view);
1425+
*view = (PyArrayObject*)viewcopy;
1426+
1427+
/* warn when writing to the copy */
1428+
PyArray_ENABLEFLAGS(*view, NPY_ARRAY_WARN_ON_WRITE);
1429+
return 0;
1430+
1431+
view_fail:
1432+
Py_DECREF(*view);
1433+
*view = NULL;
1434+
return 0;
1435+
}
1436+
1437+
13861438
/*
13871439
* Attempts to subscript an array using a field name or list of field names.
13881440
*
13891441
* If an error occurred, return 0 and set view to NULL. If the subscript is not
13901442
* a string or list of strings, return -1 and set view to NULL. Otherwise
13911443
* return 0 and set view to point to a new view into arr for the given fields.
1444+
*
1445+
* In numpy 1.14 and before, in the case of a list of field names the returned
1446+
* view will actually be a copy by default, with fields packed together.
1447+
* For development purposes, one can set the `np.multifield_index_view`
1448+
* variable to `True` to return a view instead. Similarly, the `force_view`
1449+
* argument causes a view to be returned. Both of these mechanisms
1450+
* can be removed in 1.15 when we plan to return a view always.
13921451
*/
13931452
NPY_NO_EXPORT int
1394-
_get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view)
1453+
_get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view,
1454+
int force_view)
13951455
{
13961456
*view = NULL;
13971457

@@ -1490,12 +1550,12 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view)
14901550
Py_DECREF(names);
14911551
return 0;
14921552
}
1493-
// disallow use of titles as index
1553+
/* disallow use of titles as index */
14941554
if (PyTuple_Size(tup) == 3) {
14951555
PyObject *title = PyTuple_GET_ITEM(tup, 2);
14961556
int titlecmp = PyObject_RichCompareBool(title, name, Py_EQ);
14971557
if (titlecmp == 1) {
1498-
// if title == name, we were given a title, not a field name
1558+
/* if title == name, we got a title, not a field name */
14991559
PyErr_SetString(PyExc_KeyError,
15001560
"cannot use field titles in multi-field index");
15011561
}
@@ -1508,7 +1568,7 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view)
15081568
}
15091569
Py_DECREF(title);
15101570
}
1511-
// disallow duplicate field indices
1571+
/* disallow duplicate field indices */
15121572
if (PyDict_Contains(fields, name)) {
15131573
PyObject *errmsg = PyUString_FromString(
15141574
"duplicate field of name ");
@@ -1562,7 +1622,11 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view)
15621622
return 0;
15631623
}
15641624

1565-
return 0;
1625+
if (force_view) {
1626+
return 0;
1627+
}
1628+
1629+
return _multifield_view_to_copy(view);
15661630
}
15671631
return -1;
15681632
}
@@ -1590,7 +1654,7 @@ array_subscript(PyArrayObject *self, PyObject *op)
15901654
/* return fields if op is a string index */
15911655
if (PyDataType_HASFIELDS(PyArray_DESCR(self))) {
15921656
PyArrayObject *view;
1593-
int ret = _get_field_view(self, op, &view);
1657+
int ret = _get_field_view(self, op, &view, 0);
15941658
if (ret == 0){
15951659
if (view == NULL) {
15961660
return NULL;
@@ -1879,7 +1943,7 @@ array_assign_subscript(PyArrayObject *self, PyObject *ind, PyObject *op)
18791943
/* field access */
18801944
if (PyDataType_HASFIELDS(PyArray_DESCR(self))){
18811945
PyArrayObject *view;
1882-
int ret = _get_field_view(self, ind, &view);
1946+
int ret = _get_field_view(self, ind, &view, 1);
18831947
if (ret == 0){
18841948
if (view == NULL) {
18851949
return -1;

numpy/core/src/private/npy_import.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,18 @@ npy_cache_import(const char *module, const char *attr, P 10000 yObject **cache)
2929
}
3030
}
3131

32+
NPY_INLINE static PyObject *
33+
npy_import(const char *module, const char *attr)
34+
{
35+
PyObject *mod = PyImport_ImportModule(module);
36+
PyObject *ret = NULL;
37+
38+
if (mod != NULL) {
39+
ret = PyObject_GetAttrString(mod, attr);
40+
}
41+
Py_XDECREF(mod);
42+
43+
return ret;
44+
}
45+
3246
#endif

numpy/core/tests/test_multiarray.py

Lines changed: 67 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1161,6 +1161,14 @@ def test_multiindex_titles(self):
11611161
assert_raises(ValueError, lambda : a[['b','b']]) # field exists, but repeated
11621162
a[['b','c']] # no exception
11631163

1164+
def test_multifield_index_setting(self):
1165+
a = np.zeros(3, dtype=[('a', 'i8'), ('b', 'i4'), ('c', 'f8')])
1166+
a[['a', 'c']] = (2,3)
1167+
1168+
np.multifield_index_view = True
1169+
v = a[['a', 'c']] # FutureWarning?
1170+
assert_equal(v.itemsize, a.itemsize)
1171+
np.multifield_index_view = False
11641172

11651173
class TestBool(object):
11661174
def test_test_interning(self):
@@ -4706,10 +4714,21 @@ def test_field_names(self):
47064714
# multiple subfields
47074715
fn2 = func('f2')
47084716
b[fn2] = 3
4709-
4710-
assert_equal(b[['f1', 'f2']][0].tolist(), (2, 3))
4711-
assert_equal(b[['f2', 'f1']][0].tolist(), (3, 2))
4712-
assert_equal(b[['f1', 'f3']][0].tolist(), (2, (1,)))
4717+
with suppress_warnings() as sup:
4718+
sup.filter(FutureWarning,
4719+
"Numpy has detected that you .*")
4720+
4721+
assert_equal(b[['f1', 'f2']][0].tolist(), (2, 3))
4722+
assert_equal(b[['f2', 'f1']][0].tolist(), (3, 2))
4723+
assert_equal(b[['f1', 'f3']][0].tolist(), (2, (1,)))
4724+
# view of subfield view/copy
4725+
assert_equal(b[['f1', 'f2']][0].view(('i4', 2)).tolist(),
4726+
(2, 3))
4727+
assert_equal(b[['f2', 'f1']][0].view(('i4', 2)).tolist(),
4728+
(3, 2))
4729+
view_dtype = [('f1', 'i4'), ('f3', [('', 'i4')])]
4730+
assert_equal(b[['f1', 'f3']][0].view(view_dtype).tolist(),
4731+
(2, (1,)))
47134732

47144733
# non-ascii unicode field indexing is well behaved
47154734
if not is_py3:
@@ -4719,6 +4738,50 @@ def test_field_names(self):
47194738
assert_raises(ValueError, a.__setitem__, u'\u03e0', 1)
47204739
assert_raises(ValueError, a.__getitem__, u'\u03e0')
47214740

4741+
def test_field_names_deprecation(self):
4742+
4743+
def collect_warnings(f, *args, **kwargs):
4744+
with warnings.catch_warnings(record=True) as log:
4745+
warnings.simplefilter("always")
4746+
f(*args, **kwargs)
4747+
return [w.category for w in log]
4748+
4749+
a = np.zeros((1,), dtype=[('f1', 'i4'),
4750+
('f2', 'i4'),
4751+
('f3', [('sf1', 'i4')])])
4752+
a['f1'][0] = 1
4753+
a['f2'][0] = 2
4754+
a['f3'][0] = (3,)
4755+
b = np.zeros((1,), dtype=[('f1', 'i4'),
4756+
('f2', 'i4'),
4757+
('f3', [('sf1', 'i4')])])
4758+
b['f1'][0] = 1
4759+
b['f2'][0] = 2
4760+
b['f3'][0] = (3,)
4761+
4762+
# All the different functions raise a warning, but not an error
4763+
assert_equal(collect_warnings(a[['f1', 'f2']].__setitem__, 0, (10, 20)),
4764+
[FutureWarning])
4765+
# For <=1.12 a is not modified, but it will be in 1.13
4766+
assert_equal(a, b)
4767+
4768+
# Views also warn
4769+
subset = a[['f1', 'f2']]
4770+
subset_view = subset.view()
4771+
assert_equal(collect_warnings(subset_view['f1'].__setitem__, 0, 10),
4772+
[FutureWarning])
4773+
# But the write goes through:
4774+
assert_equal(subset['f1'][0], 10)
4775+
# Only one warning per multiple field indexing, though (even if there
4776+
# are multiple views involved):
4777+
assert_equal(collect_warnings(subset['f1'].__setitem__, 0, 10), [])
4778+
4779+
# make sure views of a multi-field index warn too
4780+
c = np.zeros(3, dtype='i8,i8,i8')
4781+
assert_equal(collect_warnings(c[['f0', 'f2']].view, 'i8,i8'),
4782+
[FutureWarning])
4783+
4784+
47224785
def test_record_hash(self):
47234786
a = np.array([(1, 2), (1, 2)], dtype='i1,i2')
47244787
a.flags.writeable = False

numpy/core/tests/test_numeric.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -904,7 +904,7 @@ def test_can_cast_values(self):
904904
fi = np.finfo(dt)
905905
assert_(np.can_cast(fi.min, dt))
906906
assert_(np.can_cast(fi.max, dt))
907-
907+
908908

909909
# Custom exception class to test exception propagation in fromiter
910910
class NIterError(Exception):

numpy/core/tests/test_numerictypes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import numpy as np
77
from numpy.testing import (
8-
run_module_suite, assert_, assert_equal, assert_raises
8+
run_module_suite, assert_, assert_equal, assert_raises, assert_warns
99
)
1010

1111
# This is the structure of the table used for plain objects:

numpy/core/tests/test_records.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,8 @@ def test_nonwriteable_setfield(self):
357357

358358
def test_out_of_order_fields(self):
359359
# names in the same order, padding added to descr
360+
# note: this only works for the future "view" behavior
361+
np.multifield_index_view = True
360362
x = self.data[['col1', 'col2']]
361363
assert_equal(x.dtype.names, ('col1', 'col2'))
362364
assert_equal(x.dtype.descr,
@@ -367,6 +369,7 @@ def test_out_of_order_fields(self):
367369
y = self.data[['col2', 'col1']]
368370
assert_equal(y.dtype.names, ('col2', 'col1'))
369371
assert_raises(ValueError, lambda: y.dtype.descr)
372+
np.multifield_index_view = False
370373

371374
def test_pickle_1(self):
372375
# Issue #1529
@@ -396,7 +399,8 @@ def test_objview_record(self):
396399

397400
# https://github.com/numpy/numpy/issues/3256
398401
ra = np.recarray((2,), dtype=[('x', object), ('y', float), ('z', int)])
399-
ra[['x','y']] # TypeError?
402+
with assert_warns(FutureWarning):
403+
ra[['x','y']] # TypeError?
400404

401405
def test_record_scalar_setitem(self):
402406
# https://github.com/numpy/numpy/issues/3561

0 commit comments

Comments
 (0)
0