10000 bpo-43410: Fix crash in the parser when producing syntax errors when reading from stdin by pablogsal · Pull Request #24763 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

bpo-43410: Fix crash in the parser when producing syntax errors when reading from stdin #24763

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 14, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion Lib/test/test_cmd_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -816,9 +816,16 @@ def test_sys_flags_not_set(self):
PYTHONVERBOSE="1",
)

class SyntaxErrorTests(unittest.TestCase):
def test_tokenizer_error_with_stdin(self):
proc = subprocess.run([sys.executable, "-"], input = b"(1+2+3",
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
self.assertNotEqual(proc.returncode, 0)
self.assertNotEqual(proc.stderr, None)
self.assertIn(b"\nSyntaxError", proc.stderr)

def test_main():
support.run_unittest(CmdLineTest, IgnoreEnvironmentTest)
support.run_unittest(CmdLineTest, IgnoreEnvironmentTest, SyntaxErrorTests)
support.reap_children()

if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix a bug that was causing the parser to crash when emiting syntax errors
when reading input from stdin. Patch by Pablo Galindo
14 changes: 11 additions & 3 deletions Parser/pegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,8 @@ get_error_line(Parser *p, Py_ssize_t lineno)
are stored in p->tok->stdin_content */
assert(p->tok->fp == NULL || p->tok->fp == stdin);

char *cur_line = p->tok->fp == NULL ? p->tok->str : p->tok->stdin_content;
char *cur_line = p->tok->fp_interactive ? p->tok->interactive_src_start : p->tok->str;

for (int i = 0; i < lineno - 1; i++) {
cur_line = strchr(cur_line, '\n') + 1;
}
Expand Down Expand Up @@ -440,7 +441,10 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
goto error;
}

if (p->start_rule == Py_file_input) {
if (p->tok->fp_interactive) {
error_line = get_error_line(p, lineno);
}
else if (p->start_rule == Py_file_input) {
error_line = PyErr_ProgramTextObject(p->tok->filename, (int) lineno);
}

Expand Down Expand Up @@ -1232,7 +1236,7 @@ _PyPegen_run_parser(Parser *p)
if (p->fill == 0) {
RAISE_SYNTAX_ERROR("error at start before reading any input");
}
else if (p->tok->done == E_EOF) {
else if (p->tok->done == E_EOF) {
if (p->tok->level) {
raise_unclosed_parentheses_error(p);
} else {
Expand Down Expand Up @@ -1287,6 +1291,10 @@ _PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filena
}
return NULL;
}
if (!tok->fp || ps1 != NULL || ps2 != NULL ||
PyUnicode_CompareWithASCIIString(filename_ob, "<stdin>") == 0) {
tok->fp_interactive = 1;
}
// This transfers the ownership to the tokenizer
tok->filename = filename_ob;
Py_INCREF(filename_ob);
Expand Down
78 changes: 52 additions & 26 deletions Parser/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ tok_new(void)
if (tok == NULL)
return NULL;
tok->buf = tok->cur = tok->inp = NULL;
tok->fp_interactive = 0;
tok->interactive_src_start = NULL;
tok->interactive_src_end = NULL;
tok->start = NULL;
tok->end = NULL;
tok->done = E_OK;
Expand All @@ -80,8 +83,6 @@ tok_new(void)
tok->decoding_readline = NULL;
tok->decoding_buffer = NULL;
tok->type_comments = 0;
tok->stdin_content = NULL;

tok->async_hacks = 0;
tok->async_def = 0;
tok->async_def_indent = 0;
Expand Down Expand Up @@ -323,6 +324,35 @@ check_bom(int get_char(struct tok_state *),
return 1;
}

static int tok_concatenate_interactive_new_line(struct tok_state* tok, char* line) {
assert(tok->fp_interactive);

if (!line) {
return 0;
}

Py_ssize_t current_size = tok->interactive_src_end - tok->interactive_src_start;
Py_ssize_t line_size = strlen(line);
char* new_str = tok->interactive_src_start;

new_str = PyMem_Realloc(new_str, current_size + line_size + 1);
if (!new_str) {
if (tok->interactive_src_start) {
PyMem_Free(tok->interactive_src_start);
}
tok->interactive_src_start = NULL;
tok->interactive_src_end = NULL;
tok->done = E_NOMEM;
return -1;
}
strcpy(new_str + current_size, line);

tok->interactive_src_start = new_str;
tok->interactive_src_end = new_str + current_size + line_size;
return 0;
}


/* Read a line of text from TOK into S, using the stream in TOK.
Return NULL on failure, else S.

Expand Down Expand Up @@ -552,6 +582,12 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
badchar, tok->filename, tok->lineno + 1);
return error_ret(tok);
}

if (tok->fp_interactive &&
tok_concatenate_interactive_new_line(tok, line) == -1) {
return NULL;
}

return line;
}

Expand Down Expand Up @@ -807,17 +843,21 @@ PyTokenizer_FromFile(FILE *fp, const char* enc,
void
PyTokenizer_Free(struct tok_state *tok)
{
if (tok->encoding != NULL)
if (tok->encoding != NULL) {
PyMem_Free(tok->encoding);
}
Py_XDECREF(tok->decoding_readline);
Py_XDECREF(tok->decoding_buffer);
Py_XDECREF(tok->filename);
if (tok->fp != NULL && tok->buf != NULL)
if (tok->fp != NULL && tok->buf != NULL) {
PyMem_Free(tok->buf);
if (tok->input)
}
if (tok->input) {
PyMem_Free(tok->input);
if (tok->stdin_content)
PyMem_Free(tok->stdin_content);
}
if (tok->interactive_src_start != NULL) {
PyMem_Free(tok->interactive_src_start);
}
PyMem_Free(tok);
}

Expand Down Expand Up @@ -858,24 +898,6 @@ tok_nextc(struct tok_state *tok)
if (translated == NULL)
return EOF;
newtok = translated;
if (tok->stdin_content == NULL) {
tok->stdin_content = PyMem_Malloc(strlen(translated) + 1);
if (tok->stdin_content == NULL) {
tok->done = E_NOMEM;
return EOF;
}
sprintf(tok->stdin_content, "%s", translated);
}
else {
char *new_str = PyMem_Malloc(strlen(tok->stdin_content) + strlen(translated) + 1);
if (new_str == NULL) {
tok->done = E_NOMEM;
return EOF;
}
sprintf(new_str, "%s%s", tok->stdin_content, translated);
PyMem_Free(tok->stdin_content);
tok->stdin_content = new_str;
}
}
if (tok->encoding && newtok && *newtok) {
/* Recode to UTF-8 */
Expand All @@ -898,6 +920,10 @@ tok_nextc(struct tok_state *tok)
strcpy(newtok, buf);
Py_DECREF(u);
}
if (tok->fp_interactive &&
tok_concatenate_interactive_new_line(tok, newtok) == -1) {
return EOF;
}
if (tok->nextprompt != NULL)
tok->prompt = tok->nextprompt;
if (newtok == NULL)
Expand Down Expand Up @@ -958,7 +984,7 @@ tok_nextc(struct tok_state *tok)
}
if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf),
tok) == NULL) {
if (!tok->decoding_erred)
if (!tok->decoding_erred && !(tok->done == E_NOMEM))
tok->done = E_EOF;
done = 1;
}
Expand Down
4 changes: 3 additions & 1 deletion Parser/tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ struct tok_state {
char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL */
char *cur; /* Next character in buffer */
char *inp; /* End of data in buffer */
int fp_interactive; /* If the file descriptor is interactive */
char *interactive_src_start; /* The start of the source parsed so far in interactive mode */
char *interactive_src_end; /* The end of the source parsed so far in interactive mode */
const char *end; /* End of input buffer if buf != NULL */
const char *start; /* Start of current token if not NULL */
int done; /* E_OK normally, E_EOF at EOF, otherwise error code */
Expand All @@ -37,7 +40,6 @@ struct tok_state {
int atbol; /* Nonzero if at begin of new line */
int pendin; /* Pending indents (if > 0) or dedents (if < 0) */
const char *prompt, *nextprompt; /* For interactive prompting */
char *stdin_content;
int lineno; /* Current line number */
int first_lineno; /* First line of a single line or multi line string
expression (cf. issue 16806) */
Expand Down
0