8000 [3.11] gh-96670: Raise SyntaxError when parsing NULL bytes (GH-97594) by lysnikolaou · Pull Request #104195 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

[3.11] gh-96670: Raise SyntaxError when parsing NULL bytes (GH-97594) #104195

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loadi 8000 ng
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Include/cpython/fileobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#endif

PyAPI_FUNC(char *) Py_UniversalNewlineFgets(char *, int, FILE*, PyObject *);
PyAPI_FUNC(char *) _Py_UniversalNewlineFgetsWithSize(char *, int, FILE*, PyObject *, size_t*);

/* The std printer acts as a preliminary sys.stderr until the new io
infrastructure is in place. */
Expand Down
4 changes: 4 additions & 0 deletions Lib/test/test_ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -857,6 +857,10 @@ def check_limit(prefix, repeated):
check_limit("a", "[0]")
check_limit("a", "*a")

def test_null_bytes(self):
with self.assertRaises(SyntaxError,
msg="source code string cannot contain null bytes"):
ast.parse("a\0b")

class ASTHelpers_Test(unittest.TestCase):
maxDiff = None
Expand Down
3 changes: 1 addition & 2 deletions Lib/test/test_builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,11 +334,10 @@ def test_compile(self):
self.assertRaises(TypeError, compile)
self.assertRaises(ValueError, compile, 'print(42)\n', '<string>', 'badmode')
self.assertRaises(ValueError, compile, 'print(42)\n', '<string>', 'single', 0xff)
self.assertRaises(ValueError, compile, chr(0), 'f', 'exec')
self.assertRaises(TypeError, compile, 'pass', '?', 'exec',
mode='eval', source='0', filename='tmp')
compile('print("\xe5")\n', '', 'exec')
self.assertRaises(ValueError, compile, chr(0), 'f', 'exec')
self.assertRaises(SyntaxError, compile, chr(0), 'f', 'exec')
self.assertRaises(ValueError, compile, str('a = 1'), 'f', 'bad')

# test the optimize argument
Expand Down
25 changes: 25 additions & 0 deletions Lib/test/test_cmd_line_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -657,6 +657,31 @@ def test_syntaxerror_invalid_escape_sequence_multi_line(self):
],
)

def test_syntaxerror_null_bytes(self):
script = "x = '\0' nothing to see here\n';import os;os.system('echo pwnd')\n"
with os_helper.temp_dir() as script_dir:
script_name = _make_test_script(script_dir, 'script', script)
exitcode, stdout, stderr = assert_python_failure(script_name)
self.assertEqual(
stderr.splitlines()[-2:],
[ b" x = '",
b'SyntaxError: source code cannot contain null bytes'
],
)

def test_syntaxerror_null_bytes_in_multiline_string(self):
scripts = ["\n'''\nmultilinestring\0\n'''", "\nf'''\nmultilinestring\0\n'''"] # Both normal and f-strings
with os_helper.temp_dir() as script_dir:
for script in scripts:
script_name = _make_test_script(script_dir, 'script', script)
_, _, stderr = assert_python_failure(script_name)
self.assertEqual(
stderr.splitlines()[-2:],
[ b" multilinestring",
b'SyntaxError: source code cannot contain null bytes'
]
)

def test_consistent_sys_path_for_direct_execution(self):
# This test case ensures that the following all give the same
# sys.path configuration:
Expand Down
8 changes: 4 additions & 4 deletions Lib/test/test_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,7 +542,7 @@ def test_particularly_evil_undecodable(self):
with open(fn, "wb") as fp:
fp.write(src)
res = script_helper.run_python_until_end(fn)[0]
self.assertIn(b"Non-UTF-8", res.err)
self.assertIn(b"source code cannot contain null bytes", res.err)

def test_yet_more_evil_still_undecodable(self):
# Issue #25388
Expand All @@ -552,7 +552,7 @@ def test_yet_more_evil_still_undecodable(self):
with open(fn, "wb") as fp:
fp.write(src)
res = script_helper.run_python_until_end(fn)[0]
self.assertIn(b"Non-UTF-8", res.err)
self.assertIn(b"source code cannot contain null bytes", res.err)

@support.cpython_only
def test_compiler_recursion_limit(self):
Expand Down Expand Up @@ -588,9 +588,9 @@ def check_limit(prefix, repeated, mode="single"):
def test_null_terminated(self):
# The source code is null-terminated internally, but bytes-like
# objects are accepted, which could be not terminated.
with self.assertRaisesRegex(ValueError, "cannot contain null"):
with self.assertRaisesRegex(SyntaxError, "cannot contain null"):
compile("123\x00", "<dummy> 8000 ;", "eval")
with self.assertRaisesRegex(ValueError, "cannot contain null"):
with self.assertRaisesRegex(SyntaxError, "cannot contain null"):
compile(memoryview(b"123\x00"), "<dummy>", "eval")
code = compile(memoryview(b"123\x00")[1:-1], "<dummy>", "eval")
self.assertEqual(eval(code), 23)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
The parser now raises :exc:`SyntaxError` when parsing source code containing
null bytes. Backported from ``aab01e3``. Patch by Pablo Galindo
29 changes: 19 additions & 10 deletions Objects/fileobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -230,16 +230,8 @@ _PyLong_FileDescriptor_Converter(PyObject *o, void *ptr)
return 1;
}

/*
** Py_UniversalNewlineFgets is an fgets variation that understands
** all of \r, \n and \r\n conventions.
** The stream should be opened in binary mode.
** The fobj parameter exists solely for legacy reasons and must be NULL.
** Note that we need no error handling: fgets() treats error and eof
** identically.
*/
char *
Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
_Py_UniversalNewlineFgetsWithSize(char *buf, int n, FILE *stream, PyObject *fobj, size_t* size)
{
char *p = buf;
int c;
Expand All @@ -265,11 +257,28 @@ Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
}
FUNLOCKFILE(stream);
*p = '\0';
if (p == buf)
if (p == buf) {
return NULL;
}
*size = p - buf;
return buf;
}

/*
** Py_UniversalNewlineFgets is an fgets variation that understands
** all of \r, \n and \r\n conventions.
** The stream should be opened in binary mode.
** The fobj parameter exists solely for legacy reasons and must be NULL.
** Note that we need no error handling: fgets() treats error and eof
** identically.
*/

char *
Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj) {
size_t size;
return _Py_UniversalNewlineFgetsWithSize(buf, n, stream, fobj, &size);
}

/* **************************** std printer ****************************
* The stdprinter is used during the boot strapping phase as a preliminary
* file like object for sys.stderr.
Expand Down
25 changes: 20 additions & 5 deletions Parser/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,11 @@ tok_reserve_buf(struct tok_state *tok, Py_ssize_t size)
return 1;
}

static inline int
contains_null_bytes(const char* str, size_t size) {
return memchr(str, 0, size) != NULL;
}

static int
tok_readline_recode(struct tok_state *tok) {
PyObject *line;
Expand Down Expand Up @@ -831,17 +836,17 @@ tok_readline_raw(struct tok_state *tok)
if (!tok_reserve_buf(tok, BUFSIZ)) {
return 0;
}
char *line = Py_UniversalNewlineFgets(tok->inp,
(int)(tok->end - tok->inp),
tok->fp, NULL);
int n_chars = (int)(tok->end - tok->inp);
size_t line_size = 0;
char *line = _Py_UniversalNewlineFgetsWithSize(tok->inp, n_chars, tok->fp, NULL, &line_size);
if (line == NULL) {
return 1;
}
if (tok->fp_interactive &&
tok_concatenate_interactive_new_line(tok, line) == -1) {
return 0;
}
tok->inp = strchr(tok->inp, '\0');
tok->inp += line_size;
if (tok->inp == tok->buf) {
return 0;
}
Expand Down Expand Up @@ -1078,6 +1083,12 @@ tok_nextc(struct tok_state *tok)
return EOF;
}
tok->line_start = tok->cur;

if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
syntaxerror(tok, "source code cannot contain null bytes");
tok->cur = tok->inp;
return EOF;
}
}
Py_UNREACHABLE();
}
Expand Down Expand Up @@ -1987,8 +1998,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
/* Get rest of string */
while (end_quote_size != quote_size) {
c = tok_nextc(tok);
if (tok->done == E_DECODE)
if (tok->done == E_ERROR) {
return ERRORTOKEN;
}
if (tok->done == E_DECODE) {
break;
}
if (c == EOF || (quote_size == 1 && c == '\n')) {
assert(tok->multi_line_start != NULL);
// shift the tok_state's location into
Expand Down
2 changes: 1 addition & 1 deletion Python/pythonrun.c
Original file line number Diff line number Diff line change
Expand Up @@ -1859,7 +1859,7 @@ _Py_SourceAsString(PyObject *cmd, const char *funcname, const char *what, PyComp
}

if (strlen(str) != (size_t)size) {
PyErr_SetString(PyExc_ValueError,
PyErr_SetString(PyExc_SyntaxError,
"source code string cannot contain null bytes");
Py_CLEAR(*cmd_copy);
return NULL;
Expand Down
0