8000 gh-96670: Raise SyntaxError when parsing NULL bytes · pablogsal/cpython@ff3931b · GitHub
[go: up one dir, main page]

Skip to content

Commit ff3931b

Browse files
committed
pythongh-96670: Raise SyntaxError when parsing NULL bytes
Signed-off-by: Pablo Galindo <pablogsal@gmail.com>
1 parent 68c46ae commit ff3931b

File tree

6 files changed

+22696
-4
lines changed

6 files changed

+22696
-4
lines changed

Doc/data/python3.12.abi.new

Lines changed: 22644 additions & 0 deletions
Large diffs are not rendered by default.

Doc/whatsnew/3.12.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,12 @@ Other Language Changes
8686
* :class:`memoryview` now supports the half-float type (the "e" format code).
8787
(Contributed by Dong-hee Na and Antoine Pitrou in :gh:`90751`.)
8888

89+
* The parser now raises :exc:`SyntaxError` when parsing source code containing
90+
null bytes. (Contributed by Pablo Galindo in :gh:`96670`.)
91+
92+
* :func:`ast.parse` now raises :exc:`SyntaxError` instead of :exc:`ValueError`
93+
when parsing source code containing null bytes. (Contributed by Pablo Galindo
94+
in :gh:`96670`.)
8995

9096
New Modules
9197
===========

Lib/test/test_cmd_line_script.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -657,6 +657,18 @@ def test_syntaxerror_invalid_escape_sequence_multi_line(self):
657657
],
658658
)
659659

660+
def test_syntaxerror_null_bytes(self):
661+
script = "x = '\0' nothing to see here\n';import os;os.system('echo pwnd')\n"
662+
with os_helper.temp_dir() as script_dir:
663+
script_name = _make_test_script(script_dir, 'script', script)
664+
exitcode, stdout, stderr = assert_python_failure(script_name)
665+
self.assertEqual(
666+
stderr.splitlines()[-2:],
667+
[ b" x = '",
668+
b'SyntaxError: source code string cannot contain null bytes'
669+
],
670+
)
671+
660672
def test_consistent_sys_path_for_direct_execution(self):
661673
# This test case ensures that the following all give the same
662674
# sys.path configuration:
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
The parser now raises :exc:`SyntaxError` when parsing source code containing
2+
null bytes. Patch by Pablo Galindo

Parser/tokenizer.c

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,16 @@ tok_reserve_buf(struct tok_state *tok, Py_ssize_t size)
378378
return 1;
379379
}
380380

381+
static int
382+
contains_null_bytes(const char* str, size_t size) {
383+
for (size_t i =0; i < size; i++) {
384+
if (str[i] == '\0') {
385+
return 1;
386+
}
387+
}
388+
return 0;
389+
}
390+
381391
static int
382392
tok_readline_recode(struct tok_state *tok) {
383393
PyObject *line;
@@ -399,6 +409,11 @@ tok_readline_recode(struct tok_state *tok) {
399409
error_ret(tok);
400410
goto error;
401411
}
412+
if (contains_null_bytes(buf, buflen)) {
413+
tok->line_start = tok->cur;
414+
syntaxerror(tok, "source code string cannot contain null bytes");
415+
goto error;
416+
}
402417
if (!tok_reserve_buf(tok, buflen + 1)) {
403418
goto error;
404419
}
@@ -829,12 +844,25 @@ tok_readline_raw(struct tok_state *tok)
829844
if (!tok_reserve_buf(tok, BUFSIZ)) {
830845
return 0;
831846
}
832-
char *line = Py_UniversalNewlineFgets(tok->inp,
833-
(int)(tok->end - tok->inp),
834-
tok->fp, NULL);
847+
memset(tok->inp, 0, BUFSIZ);
848+
int n_chars = (int)(tok->end - tok->inp);
849+
char *line = Py_UniversalNewlineFgets(tok->inp, n_chars, tok->fp, NULL);
835850
if (line == NULL) {
836851
return 1;
837852
}
853+
854+
// Find the first non null character starting from the right of tok->inp
855+
char *last_char = tok->inp + n_chars - 1;
856+
while (last_char >= tok->inp && *last_char == '\0') {
857+
last_char--;
858+
}
859+
if (contains_null_bytes(tok->inp, last_char-(tok->inp))) {
860+
// Mark the line we just parsed so the tokenizer can report the syntax error correctly
861+
tok->line_start = tok->cur;
862+
syntaxerror(tok, "source code string cannot contain null bytes");
863+
return 0;
864+
}
865+
838866
if (tok->fp_interactive &&
839867
tok_concatenate_interactive_new_line(tok, line) == -1) {
840868
return 0;

Python/pythonrun.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1858,7 +1858,7 @@ _Py_SourceAsString(PyObject *cmd, const char *funcname, const char *what, PyComp
18581858
}
18591859

18601860
if (strlen(str) != (size_t)size) {
1861-
PyErr_SetString(PyExc_ValueError,
1861+
PyErr_SetString(PyExc_SyntaxError,
18621862
"source code string cannot contain null bytes");
18631863
Py_CLEAR(*cmd_copy);
18641864
return NULL;

0 commit comments

Comments
 (0)
0