From 7ae662cb154895ae5824a59067b3588e5d3b1f23 Mon Sep 17 00:00:00 2001 From: Alex Curtiss Date: Mon, 26 Sep 2022 01:12:58 -0600 Subject: [PATCH 1/4] String parser no longer crashes on null characters --- Lib/test/test_null.py | Bin 0 -> 36 bytes Parser/string_parser.c | 13 +++++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) create mode 100644 Lib/test/test_null.py diff --git a/Lib/test/test_null.py b/Lib/test/test_null.py new file mode 100644 index 0000000000000000000000000000000000000000..7a9e8da6dcd6b34e89332e42acac681615b33844 GIT binary patch literal 36 lcmY#Z(ACmaNX}2m%uBaY$Vn{8%+oaliE$|@DRD7?NdTxk2gCpX literal 0 HcmV?d00001 diff --git a/Parser/string_parser.c b/Parser/string_parser.c index 9bc3b082136be5..0e7daa40d01021 100644 --- a/Parser/string_parser.c +++ b/Parser/string_parser.c @@ -168,12 +168,13 @@ int _PyPegen_parsestr(Parser *p, int *bytesmode, int *rawmode, PyObject **result, const char **fstr, Py_ssize_t *fstrlen, Token *t) { - const char *s = PyBytes_AsString(t->bytes); - if (s == NULL) { + char *s; + Py_ssize_t len; + + if (PyBytes_AsStringAndSize(t->bytes, &s, &len)) { return -1; } - size_t len; int quote = Py_CHARMASK(*s); int fmode = 0; *bytesmode = 0; @@ -184,17 +185,21 @@ _PyPegen_parsestr(Parser *p, int *bytesmode, int *rawmode, PyObject **result, while (!*bytesmode || !*rawmode) { if (quote == 'b' || quote == 'B') { quote =(unsigned char)*++s; + len--; *bytesmode = 1; } else if (quote == 'u' || quote == 'U') { quote = (unsigned char)*++s; + len--; } else if (quote == 'r' || quote == 'R') { quote = (unsigned char)*++s; + len--; *rawmode = 1; } else if (quote == 'f' || quote == 'F') { quote = (unsigned char)*++s; + len--; fmode = 1; } else { @@ -220,7 +225,7 @@ _PyPegen_parsestr(Parser *p, int *bytesmode, int *rawmode, PyObject **result, } /* Skip the leading quote char. */ s++; - len = strlen(s); + len--; if (len > INT_MAX) { PyErr_SetString(PyExc_OverflowError, "string to parse is too long"); return -1; From fd4161d6c972385e16acbde83f033bd27e8b99de Mon Sep 17 00:00:00 2001 From: Alex Curtiss Date: Mon, 26 Sep 2022 10:39:44 -0600 Subject: [PATCH 2/4] Removed test, which is blocked by issue #96670 --- Lib/test/test_null.py | Bin 36 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 Lib/test/test_null.py diff --git a/Lib/test/test_null.py b/Lib/test/test_null.py deleted file mode 100644 index 7a9e8da6dcd6b34e89332e42acac681615b33844..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 36 lcmY#Z(ACmaNX}2m%uBaY$Vn{8%+oaliE$|@DRD7?NdTxk2gCpX From c48ec29aac22d8496a1ff8f790f65fe2299896f7 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Mon, 26 Sep 2022 16:46:55 +0000 Subject: [PATCH 3/4] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2022-09-26-16-46-54.gh-issue-96670.-n9hqb.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-09-26-16-46-54.gh-issue-96670.-n9hqb.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-09-26-16-46-54.gh-issue-96670.-n9hqb.rst b/Misc/NEWS.d/next/Core and Builtins/2022-09-26-16-46-54.gh-issue-96670.-n9hqb.rst new file mode 100644 index 00000000000000..20371fb3458ecc --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-09-26-16-46-54.gh-issue-96670.-n9hqb.rst @@ -0,0 +1 @@ +Fix interpreter crash when a string literal contains a null character. From b704f7e6ae76ca8ad2e041c734c69e90523bffe9 Mon Sep 17 00:00:00 2001 From: Alex Curtiss Date: Mon, 26 Sep 2022 16:47:26 -0600 Subject: [PATCH 4/4] Moved the INT_MAX length check to the top of the string parser. --- Parser/string_parser.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Parser/string_parser.c b/Parser/string_parser.c index 0e7daa40d01021..5d5164e58c1955 100644 --- a/Parser/string_parser.c +++ b/Parser/string_parser.c @@ -175,6 +175,11 @@ _PyPegen_parsestr(Parser *p, int *bytesmode, int *rawmode, PyObject **result, return -1; } + if (len > INT_MAX) { + PyErr_SetString(PyExc_OverflowError, "string to parse is too long"); + return -1; + } + int quote = Py_CHARMASK(*s); int fmode = 0; *bytesmode = 0; @@ -226,10 +231,6 @@ _PyPegen_parsestr(Parser *p, int *bytesmode, int *rawmode, PyObject **result, /* Skip the leading quote char. */ s++; len--; - if (len > INT_MAX) { - PyErr_SetString(PyExc_OverflowError, "string to parse is too long"); - return -1; - } if (s[--len] != quote) { /* Last quote char must match the first. */ PyErr_BadInternalCall();