From 202741836126c367f830c8ff872b9ad6b459c331 Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Fri, 9 Nov 2018 19:50:57 -0500 Subject: [PATCH 1/2] TST: Add a few tests to fromfile. Some are actually failing. --- numpy/_core/tests/test_longdouble.py | 41 +++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/numpy/_core/tests/test_longdouble.py b/numpy/_core/tests/test_longdouble.py index a7ad5c9e5791..e201ee7fbae4 100644 --- a/numpy/_core/tests/test_longdouble.py +++ b/numpy/_core/tests/test_longdouble.py @@ -8,6 +8,8 @@ temppath, IS_MUSL ) from numpy._core.tests._locales import CommaDecimalPointLocale +import subprocess as sp +import sys LD_INFO = np.finfo(np.longdouble) @@ -234,7 +236,44 @@ def test_fromfile_complex(self): res = np.fromfile(path, dtype=ctype, sep=",") assert_equal(res, np.array([1.j])) - + @pytest.mark.parametrize('mode', ['numpy', 'python']) + @pytest.mark.parametrize( + 'buffer_type', + ['buffered', 'unbuffered']) + def test_fromfile_buffered_unseekable(self, buffer_type, mode): + # stdout is quite a unique file descripter as it can be buffered, and + # unseekable + if buffer_type == 'buffered': + bufsize = -1 + else: + bufsize = 0 + s1 = b"numpy is not np" + s2 = b" and rain is falling" + s = s1 + s2 + p = sp.Popen([sys.executable, '-c', + 'import sys; sys.stdout.buffer.write(' + repr(s) + ')' + ], + stdout=sp.PIPE, bufsize=bufsize) + if mode == 'python': + buf = p.stdout.read(len(s1)) + arr = np.frombuffer(buf, dtype=np.uint8) + elif mode == 'numpy': + arr = np.fromfile(p.stdout, dtype=np.uint8, count=len(s1)) + else: + raise ValueError('Unknown mode {}'.format(mode)) + assert arr.tobytes() == s1 + + # Read the rest of the buffer + # We can't use `count=-1` because stdout doesn't support + # ftell and/or npy_fseek(fp, 0, SEEK_END) + if mode == 'python': + buf = p.stdout.read(len(s2)) + arr = np.frombuffer(buf, dtype=np.uint8) + elif mode == 'numpy': + arr = np.fromfile(p.stdout, dtype=np.uint8, count=len(s2)) + else: + raise ValueError('Unknown mode {}'.format(mode)) + assert arr.tobytes() == s2 @pytest.mark.skipif(string_to_longdouble_inaccurate, reason="Need strtold_l") From 6bbdd65ae431d0fd5830de3f2532399a69a5875d Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Fri, 9 Nov 2018 19:51:07 -0500 Subject: [PATCH 2/2] BUG: allow reading from buffered stdout. --- numpy/_core/include/numpy/npy_3kcompat.h | 89 +++++++++++------------- 1 file changed, 39 insertions(+), 50 deletions(-) diff --git a/numpy/_core/include/numpy/npy_3kcompat.h b/numpy/_core/include/numpy/npy_3kcompat.h index 62fde943aacc..6391dc4f484d 100644 --- a/numpy/_core/include/numpy/npy_3kcompat.h +++ b/numpy/_core/include/numpy/npy_3kcompat.h @@ -211,9 +211,9 @@ PyUnicode_Concat2(PyObject **left, PyObject *right) static inline FILE* npy_PyFile_Dup2(PyObject *file, char *mode, npy_off_t *orig_pos) { - int fd, fd2, unbuf; + int fd, fd2, seekable; Py_ssize_t fd2_tmp; - PyObject *ret, *os, *io, *io_raw; + PyObject *ret, *os; npy_off_t pos; FILE *handle; @@ -223,6 +223,17 @@ npy_PyFile_Dup2(PyObject *file, char *mode, npy_off_t *orig_pos) return PyFile_AsFile(file); } #endif + // Check for seekability before performing any file operations + // in case of error. + ret = PyObject_CallMethod(file, "seekable", NULL); + if (ret == NULL){ + return NULL; + } + seekable = PyObject_IsTrue(ret); + Py_DECREF(ret); + if (seekable == -1){ + return NULL; + } /* Flush first to ensure things end up in the file in the correct order */ ret = PyObject_CallMethod(file, "flush", ""); @@ -276,33 +287,18 @@ npy_PyFile_Dup2(PyObject *file, char *mode, npy_off_t *orig_pos) return NULL; } + if (seekable == 0) { + /* Set the original pos as invalid when the object is not seekable */ + *orig_pos = -1; + return handle; + } + /* Record the original raw file handle position */ *orig_pos = npy_ftell(handle); if (*orig_pos == -1) { - /* The io module is needed to determine if buffering is used */ - io = PyImport_ImportModule("io"); - if (io == NULL) { - fclose(handle); - return NULL; - } - /* File object instances of RawIOBase are unbuffered */ - io_raw = PyObject_GetAttrString(io, "RawIOBase"); - Py_DECREF(io); - if (io_raw == NULL) { - fclose(handle); - return NULL; - } - unbuf = PyObject_IsInstance(file, io_raw); - Py_DECREF(io_raw); - if (unbuf == 1) { - /* Succeed if the IO is unbuffered */ - return handle; - } - else { - PyErr_SetString(PyExc_IOError, "obtaining file position failed"); - fclose(handle); - return NULL; - } + PyErr_SetString(PyExc_IOError, "obtaining file position failed"); + fclose(handle); + return NULL; } /* Seek raw handle to the Python-side position */ @@ -331,8 +327,8 @@ npy_PyFile_Dup2(PyObject *file, char *mode, npy_off_t *orig_pos) static inline int npy_PyFile_DupClose2(PyObject *file, FILE* handle, npy_off_t orig_pos) { - int fd, unbuf; - PyObject *ret, *io, *io_raw; + int fd, seekable; + PyObject *ret; npy_off_t position; /* For Python 2 PyFileObject, do nothing */ @@ -356,29 +352,22 @@ npy_PyFile_DupClose2(PyObject *file, FILE* handle, npy_off_t orig_pos) return -1; } - if (npy_lseek(fd, orig_pos, SEEK_SET) == -1) { + ret = PyObject_CallMethod(file, "seekable", NULL); + if (ret == NULL){ + return -1; + } + seekable = PyObject_IsTrue(ret); + Py_DECREF(ret); + if (seekable == -1){ + return -1; + } + else if (seekable == 0) { + return 0; + } - /* The io module is needed to determine if buffering is used */ - io = PyImport_ImportModule("io"); - if (io == NULL) { - return -1; - } - /* File object instances of RawIOBase are unbuffered */ - io_raw = PyObject_GetAttrString(io, "RawIOBase"); - Py_DECREF(io); - if (io_raw == NULL) { - return -1; - } - unbuf = PyObject_IsInstance(file, io_raw); - Py_DECREF(io_raw); - if (unbuf == 1) { - /* Succeed if the IO is unbuffered */ - return 0; - } - else { - PyErr_SetString(PyExc_IOError, "seeking file failed"); - return -1; - } + if (npy_lseek(fd, orig_pos, SEEK_SET) == -1) { + PyErr_SetString(PyExc_IOError, "seeking file failed"); + return -1; } if (position == -1) {