From 6be12fc41b754c6f726c2a0aaa6eee4139aff013 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Mon, 21 Mar 2022 20:05:37 +0100 Subject: [PATCH 1/4] bpo-46315: Use fdopencookie() to avoid dup() in _PyTokenizer_FindEncodingFilename --- .../2022-03-21-20-05-10.bpo-46315.2QqFIC.rst | 2 + Parser/tokenizer.c | 80 +++++++++++++++++-- 2 files changed, 76 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-03-21-20-05-10.bpo-46315.2QqFIC.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-03-21-20-05-10.bpo-46315.2QqFIC.rst b/Misc/NEWS.d/next/Core and Builtins/2022-03-21-20-05-10.bpo-46315.2QqFIC.rst new file mode 100644 index 00000000000000..bba45756fd4ffa --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-03-21-20-05-10.bpo-46315.2QqFIC.rst @@ -0,0 +1,2 @@ +``_PyTokenizer_FindEncodingFilename`` now uses ``fdopencookie`` to avoid +``dup`` on Emscripten and WASI. diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 90dc8a2e369714..bd869b7a2a0b36 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -2072,6 +2072,79 @@ _PyTokenizer_Get(struct tok_state *tok, return result; } +#if defined(__wasi__) || defined(__EMSCRIPTEN__) +/* fdopen() with borrowed fd + + WASI does not provide dup() and Emscripten's dup() emulation with open() + is slow. Implement fdopen() with fd borrowing on top of fdopencookie(). + */ +typedef union { + void *cookie; + int fd; +} borrowed; + +static ssize_t +borrow_read(void *cookie, char *buf, size_t size) +{ + borrowed b; + b.cookie = cookie; + return read(b.fd, (void *)buf, size); +} + +static ssize_t +borrow_write(void *cookie, const char *buf, size_t size) +{ + errno = ENOTSUP; + return -1; +} + +static int +borrow_seek(void *cookie, off_t *off, int whence) +{ + borrowed b; + b.cookie = cookie; + off_t pos; + pos = lseek(b.fd, *off, whence); + if (pos == (off_t)-1) { + return -1; + } else { + *off = pos; + return 0; + } +} + +static int +borrow_close(void *cookie) +{ + // does not close(fd) + return 0; +} + +static FILE * +fdopen_borrow(int fd, const char *mode) { + // only reading is supported + if (strcmp(mode, "r") != 0) { + return NULL; + } + cookie_io_functions_t cookie_io = { + borrow_read, borrow_write, borrow_seek, borrow_close + }; + // cookie is just the fd + borrowed b; + b.fd = fd; + return fopencookie(b.cookie, "r", cookie_io); +} +#else +static FILE * +fdopen_borrow(int fd, const char *mode) { + fd = _Py_dup(fd); + if (fd < 0) { + return NULL; + } + return fdopen(fd, mode); +} +#endif + /* Get the encoding of a Python file. Check for the coding cookie and check if the file starts with a BOM. @@ -2091,12 +2164,7 @@ _PyTokenizer_FindEncodingFilename(int fd, PyObject *filename) const char *p_end = NULL; char *encoding = NULL; - fd = _Py_dup(fd); - if (fd < 0) { - return NULL; - } - - fp = fdopen(fd, "r"); + fp = fdopen_borrow(fd, "r"); if (fp == NULL) { return NULL; } From e9efa508567df7559a9e98b841c915d6643e449d Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Tue, 22 Mar 2022 12:56:49 +0100 Subject: [PATCH 2/4] Make fopencookie path much shorter --- Parser/tokenizer.c | 49 +++++++--------------------------------------- 1 file changed, 7 insertions(+), 42 deletions(-) diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index bd869b7a2a0b36..423962c2d0605d 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -2086,57 +2086,22 @@ typedef union { static ssize_t borrow_read(void *cookie, char *buf, size_t size) { - borrowed b; - b.cookie = cookie; + borrowed b = {.cookie = cookie}; return read(b.fd, (void *)buf, size); } -static ssize_t -borrow_write(void *cookie, const char *buf, size_t size) -{ - errno = ENOTSUP; - return -1; -} - -static int -borrow_seek(void *cookie, off_t *off, int whence) -{ - borrowed b; - b.cookie = cookie; - off_t pos; - pos = lseek(b.fd, *off, whence); - if (pos == (off_t)-1) { - return -1; - } else { - *off = pos; - return 0; - } -} - -static int -borrow_close(void *cookie) -{ - // does not close(fd) - return 0; -} - static FILE * fdopen_borrow(int fd, const char *mode) { - // only reading is supported - if (strcmp(mode, "r") != 0) { - return NULL; - } - cookie_io_functions_t cookie_io = { - borrow_read, borrow_write, borrow_seek, borrow_close - }; - // cookie is just the fd - borrowed b; - b.fd = fd; - return fopencookie(b.cookie, "r", cookie_io); + // supports only reading. seek fails. close and write are no-ops. + assert(strcmp(mode, "r") == 0); + cookie_io_functions_t io_cb = {borrow_read, NULL, NULL, NULL}; + borrowed b = {.fd = fd}; + return fopencookie(b.cookie, "r", io_cb); } #else static FILE * fdopen_borrow(int fd, const char *mode) { + assert(strcmp(mode, "r") == 0); fd = _Py_dup(fd); if (fd < 0) { return NULL; From 0689719cc87b19a7be216c04a40bf29f560fc2d9 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Tue, 22 Mar 2022 12:57:27 +0100 Subject: [PATCH 3/4] Remove whatsnew --- .../Core and Builtins/2022-03-21-20-05-10.bpo-46315.2QqFIC.rst | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-03-21-20-05-10.bpo-46315.2QqFIC.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-03-21-20-05-10.bpo-46315.2QqFIC.rst b/Misc/NEWS.d/next/Core and Builtins/2022-03-21-20-05-10.bpo-46315.2QqFIC.rst deleted file mode 100644 index bba45756fd4ffa..00000000000000 --- a/Misc/NEWS.d/next/Core and Builtins/2022-03-21-20-05-10.bpo-46315.2QqFIC.rst +++ /dev/null @@ -1,2 +0,0 @@ -``_PyTokenizer_FindEncodingFilename`` now uses ``fdopencookie`` to avoid -``dup`` on Emscripten and WASI. From fa6ee5eec0129e8e344782a910bfed9b74ba778d Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Tue, 22 Mar 2022 13:02:35 +0100 Subject: [PATCH 4/4] Shorten code even further --- Parser/tokenizer.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 423962c2d0605d..0941bcaaecc627 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -2073,11 +2073,8 @@ _PyTokenizer_Get(struct tok_state *tok, } #if defined(__wasi__) || defined(__EMSCRIPTEN__) -/* fdopen() with borrowed fd - - WASI does not provide dup() and Emscripten's dup() emulation with open() - is slow. Implement fdopen() with fd borrowing on top of fdopencookie(). - */ +// fdopen() with borrowed fd. WASI does not provide dup() and Emscripten's +// dup() emulation with open() is slow. typedef union { void *cookie; int fd; @@ -2091,22 +2088,20 @@ borrow_read(void *cookie, char *buf, size_t size) } static FILE * -fdopen_borrow(int fd, const char *mode) { +fdopen_borrow(int fd) { // supports only reading. seek fails. close and write are no-ops. - assert(strcmp(mode, "r") == 0); cookie_io_functions_t io_cb = {borrow_read, NULL, NULL, NULL}; borrowed b = {.fd = fd}; return fopencookie(b.cookie, "r", io_cb); } #else static FILE * -fdopen_borrow(int fd, const char *mode) { - assert(strcmp(mode, "r") == 0); +fdopen_borrow(int fd) { fd = _Py_dup(fd); if (fd < 0) { return NULL; } - return fdopen(fd, mode); + return fdopen(fd, "r"); } #endif @@ -2129,7 +2124,7 @@ _PyTokenizer_FindEncodingFilename(int fd, PyObject *filename) const char *p_end = NULL; char *encoding = NULL; - fp = fdopen_borrow(fd, "r"); + fp = fdopen_borrow(fd); if (fp == NULL) { return NULL; }