From c4a990563b2c85747eb9cdccc5a783846d0071a0 Mon Sep 17 00:00:00 2001 From: Yu Feng Date: Tue, 2 Feb 2016 16:22:42 -0800 Subject: [PATCH] BUG: Invalid read of size 4 in PyArray_FromFile When the input dtype has a subarray, the dtype is DECREFed by PyArray_NewFromDescr, before dtype->elsize is accessed. If no one else holds a reference to the dtype object, then the dtype object will be destroyed, and dtype->elsize shall not be accessed. This raises an error in Valgrind, and occasionally crashes innocently looking code. e.g. ```numpy.fromfile('filename', dtype=('f8', 3'))``` A workaround would be ```dtype=numpy.dtype(('f8', 3)); numpy.fromfile('filename', dtype=dtype)``` This affects versions as early as 1.9.2 (where I found this bug) and seems to be still relevant today. I hope someone can prove me wrong. Valgrind log: ``` ==17479== Invalid read of size 4 ==17479== at 0x1CD0BB88: UnknownInlinedFun (stdio2.h:295) ==17479== by 0x1CD0BB88: array_fromfile_binary (ctors.c:3177) ==17479== by 0x1CD0BB88: PyArray_FromFile (ctors.c:3304) ==17479== by 0x1CD886B5: array_fromfile (multiarraymodule.c:2040) ==17479== by 0x37AB6E2571: do_call (ceval.c:4327) ==17479== by 0x37AB6E2571: call_function (ceval.c:4135) ==17479== by 0x37AB6E2571: PyEval_EvalFrameEx (ceval.c:2755) ==17479== by 0x37AB6E2665: fast_function (ceval.c:4198) ==17479== by 0x37AB6E2665: call_function (ceval.c:4133) ==17479== by 0x37AB6E2665: PyEval_EvalFrameEx (ceval.c:2755) ==17479== by 0x37AB6E2665: fast_function (ceval.c:4198) ==17479== by 0x37AB6E2665: call_function (ceval.c:4133) ==17479== by 0x37AB6E2665: PyEval_EvalFrameEx (ceval.c:2755) ==17479== by 0x37AB664DCB: gen_send_ex.isra.0 (genobject.c:85) ==17479== by 0x37AB6DE419: PyEval_EvalFrameEx (ceval.c:2586) ==17479== by 0x37AB664DCB: gen_send_ex.isra.0 (genobject.c:85) ==17479== by 0x37AB6DE419: PyEval_EvalFrameEx (ceval.c:2586) ==17479== by 0x37AB6E2665: fast_function (ceval.c:4198) ==17479== by 0x37AB6E2665: call_function (ceval.c:4133) ==17479== by 0x37AB6E2665: PyEval_EvalFrameEx (ceval.c:2755) ==17479== by 0x37AB6E36B3: PyEval_EvalCodeEx (ceval.c:3344) ==17479== by 0x37AB6E25C5: fast_function (ceval.c:4208) ==17479== by 0x37AB6E25C5: call_function (ceval.c:4133) ==17479== by 0x37AB6E25C5: PyEval_EvalFrameEx (ceval.c:2755) ==17479== Address 0x113bef20 is 32 bytes inside a block of size 88 free'd ==17479== at 0x4A07D6A: free (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) ==17479== by 0x1CD07C15: _update_descr_and_dimensions (ctors.c:273) ==17479== by 0x1CD07C15: PyArray_NewFromDescr_int (ctors.c:900) ==17479== by 0x1CD0BB76: PyArray_NewFromDescr (ctors.c:1121) ==17479== by 0x1CD0BB76: array_fromfile_binary (ctors.c:3168) ==17479== by 0x1CD0BB76: PyArray_FromFile (ctors.c:3304) ==17479== by 0x1CD886B5: array_fromfile (multiarraymodule.c:2040) ==17479== by 0x37AB6E2571: do_call (ceval.c:4327) ==17479== by 0x37AB6E2571: call_function (ceval.c:4135) ==17479== by 0x37AB6E2571: PyEval_EvalFrameEx (ceval.c:2755) ==17479== by 0x37AB6E2665: fast_function (ceval.c:4198) ==17479== by 0x37AB6E2665: call_function (ceval.c:4133) ==17479== by 0x37AB6E2665: PyEval_EvalFrameEx (ceval.c:2755) ==17479== by 0x37AB6E2665: fast_function (ceval.c:4198) ==17479== by 0x37AB6E2665: call_function (ceval.c:4133) ==17479== by 0x37AB6E2665: PyEval_EvalFrameEx (ceval.c:2755) ==17479== by 0x37AB664DCB: gen_send_ex.isra.0 (genobject.c:85) ==17479== by 0x37AB6DE419: PyEval_EvalFrameEx (ceval.c:2586) ==17479== by 0x37AB664DCB: gen_send_ex.isra.0 (genobject.c:85) ==17479== by 0x37AB6DE419: PyEval_EvalFrameEx (ceval.c:2586) ==17479== by 0x37AB6E2665: fast_function (ceval.c:4198) ==17479== by 0x37AB6E2665: call_function (ceval.c:4133) ==17479== by 0x37AB6E2665: PyEval_EvalFrameEx (ceval.c:2755) ``` --- numpy/core/src/multiarray/ctors.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index 785b3073a38b..0a85f4dc868e 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -3264,17 +3264,20 @@ array_fromfile_binary(FILE *fp, PyArray_Descr *dtype, npy_intp num, size_t *nrea } num = numbytes / dtype->elsize; } + Py_INCREF(dtype); r = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, dtype, 1, &num, NULL, NULL, 0, NULL); if (r == NULL) { - return NULL; + goto fail; } NPY_BEGIN_ALLOW_THREADS; *nread = fread(PyArray_DATA(r), dtype->elsize, num, fp); NPY_END_ALLOW_THREADS; +fail: + Py_DECREF(dtype); return r; } @@ -3297,6 +3300,8 @@ array_from_text(PyArray_Descr *dtype, npy_intp num, char *sep, size_t *nread, npy_intp bytes, totalbytes; size = (num >= 0) ? num : FROM_BUFFER_SIZE; + + Py_INCREF(dtype); r = (PyArrayObject *) PyArray_NewFromDescr(&PyArray_Type, dtype, @@ -3304,6 +3309,7 @@ array_from_text(PyArray_Descr *dtype, npy_intp num, char *sep, size_t *nread, NULL, NULL, 0, NULL); if (r == NULL) { + Py_DECREF(dtype); return NULL; } clean_sep = swab_separator(sep); @@ -3352,6 +3358,7 @@ array_from_text(PyArray_Descr *dtype, npy_intp num, char *sep, size_t *nread, free(clean_sep); fail: + Py_DECREF(dtype); if (err == 1) { PyErr_NoMemory(); }