8000 gh-105042: Disable unmatched parens syntax error in python tokenize · lysnikolaou/cpython@1c38078 · GitHub
[go: up one dir, main page]

Skip to content

Commit 1c38078

Browse files
committed
pythongh-105042: Disable unmatched parens syntax error in python tokenize
1 parent bfd20d2 commit 1c38078

9 files changed

+68
-43
lines changed

Include/internal/pycore_global_objects_fini_generated.h

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_global_strings.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,7 @@ struct _Py_global_strings {
463463
STRUCT_FOR_ID(id)
464464
STRUCT_FOR_ID(ident)
465465
STRUCT_FOR_ID(ignore)
466+
STRUCT_FOR_ID(ignore_unmatched_parens)
466467
STRUCT_FOR_ID(imag)
467468
STRUCT_FOR_ID(importlib)
468469
STRUCT_FOR_ID(in_fd)

Include/internal/pycore_runtime_init_generated.h

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_unicodeobject_generated.h

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Lib/tokenize.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -447,7 +447,9 @@ def tokenize(readline):
447447

448448
def _tokenize(rl_gen, encoding):
449449
source = b"".join(rl_gen).decode(encoding)
450-
for token in _generate_tokens_from_c_tokenizer(source, extra_tokens=True):
450+
for token in _generate_tokens_from_c_tokenizer(source,
451+
extra_tokens=True,
452+
ignore_unmatched_parens=True):
451453
yield token
452454

453455
def generate_tokens(readline):
@@ -531,10 +533,12 @@ def error(message, filename=None, location=None):
531533
perror("unexpected error: %s" % err)
532534
raise
533535

534-
def _generate_tokens_from_c_tokenizer(source, extra_tokens=False):
536+
def _generate_tokens_from_c_tokenizer(source, extra_tokens=False, ignore_unmatched_parens=False):
535537
"""Tokenize a source reading Python code as unicode strings using the internal C tokenizer"""
536538
import _tokenize as c_tokenizer
537-
for info in c_tokenizer.TokenizerIter(source, extra_tokens=extra_tokens):
539+
for info in c_tokenizer.TokenizerIter(source,
540+
extra_tokens=extra_tokens,
541+
ignore_unmatched_parens=ignore_unmatched_parens):
538542
yield TokenInfo._make(info)
539543

540544

Parser/tokenizer.c

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ tok_new(void)
113113
tok->report_warnings = 1;
114114
tok->tok_extra_tokens = 0;
115115
tok->comment_newline = 0;
116+
tok->ignore_unmatched_parens = 0;
116117
tok->tok_mode_stack[0] = (tokenizer_mode){.kind =TOK_REGULAR_MODE, .f_string_quote='\0', .f_string_quote_size = 0, .f_string_debug=0};
117118
tok->tok_mode_stack_index = 0;
118119
tok->tok_report_warnings = 1;
@@ -2496,41 +2497,42 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
24962497
case ')':
24972498
case ']':
24982499
case '}':
2499-
if (!tok->level) {
2500+
if (!tok->ignore_unmatched_parens && !tok->level) {
25002501
if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') {
25012502
return MAKE_TOKEN(syntaxerror(tok, "f-string: single '}' is not allowed"));
25022503
}
25032504
return MAKE_TOKEN(syntaxerror(tok, "unmatched '%c'", c));
25042505
}
2505-
tok->level--;
2506-
int opening = tok->parenstack[tok->level];
2507-
if (!((opening == '(' && c == ')') ||
2508-
(opening == '[' && c == ']') ||
2509-
(opening == '{' && c == '}')))
2510-
{
2511-
/* If the opening bracket belongs to an f-string's expression
2512-
part (e.g. f"{)}") and the closing bracket is an arbitrary
2513-
nested expression, then instead of matching a different
2514-
syntactical construct with it; we'll throw an unmatched
2515-
parentheses error. */
2516-
if (INSIDE_FSTRING(tok) && opening == '{') {
2517-
assert(current_tok->curly_bracket_depth >= 0);
2518-
int previous_bracket = current_tok->curly_bracket_depth - 1;
2519-
if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
2520-
return MAKE_TOKEN(syntaxerror(tok, "f-string: unmatched '%c'", c));
2506+
if (tok->level > 0) {
2507+
tok->level--;
2508+
int opening = tok->parenstack[tok->level];
2509+
if (!tok->ignore_unmatched_parens && !((opening == '(' && c == ')') ||
2510+
(opening == '[' && c == ']') ||
2511+
(opening == '{' && c == '}'))) {
2512+
/* If the opening bracket belongs to an f-string's expression
2513+
part (e.g. f"{)}") and the closing bracket is an arbitrary
2514+
nested expression, then instead of matching a different
2515+
syntactical construct with it; we'll throw an unmatched
2516+
parentheses error. */
2517+
if (INSIDE_FSTRING(tok) && opening == '{') {
2518+
assert(current_tok->curly_bracket_depth >= 0);
2519+
int previous_bracket = current_tok->curly_bracket_depth - 1;
2520+
if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
2521+
return MAKE_TOKEN(syntaxerror(tok, "f-string: unmatched '%c'", c));
2522+
}
2523+
}
2524+
if (tok->parenlinenostack[tok->level] != tok->lineno) {
2525+
return MAKE_TOKEN(syntaxerror(tok,
2526+
"closing parenthesis '%c' does not match "
2527+
"opening parenthesis '%c' on line %d",
2528+
c, opening, tok->parenlinenostack[tok->level]));
2529+
}
2530+
else {
2531+
return MAKE_TOKEN(syntaxerror(tok,
2532+
"closing parenthesis '%c' does not match "
2533+
"opening parenthesis '%c'",
2534+
c, opening));
25212535
}
2522-
}
2523-
if (tok->parenlinenostack[tok->level] != tok->lineno) {
2524-
return MAKE_TOKEN(syntaxerror(tok,
2525-
"closing parenthesis '%c' does not match "
2526-
"opening parenthesis '%c' on line %d",
2527-
c, opening, tok->parenlinenostack[tok->level]));
2528-
}
2529-
else {
2530-
return MAKE_TOKEN(syntaxerror(tok,
2531-
"closing parenthesis '%c' does not match "
2532-
"opening parenthesis '%c'",
2533-
c, opening));
25342536
}
25352537
}
25362538

Parser/tokenizer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ struct tok_state {
130130
int tok_report_warnings;
131131
int tok_extra_tokens;
132132
int comment_newline;
133+
int ignore_unmatched_parens;
133134
#ifdef Py_DEBUG
134135
int debug;
135136
#endif

Python/Python-tokenize.c

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,13 @@ _tokenizer.tokenizeriter.__new__ as tokenizeriter_new
4040
source: str
4141
*
4242
extra_tokens: bool
43+
ignore_unmatched_parens: bool
4344
[clinic start generated code]*/
4445

4546
static PyObject *
4647
tokenizeriter_new_impl< F60C /span>(PyTypeObject *type, const char *source,
47-
int extra_tokens)
48-
/*[clinic end generated code: output=f6f9d8b4beec8106 input=90dc5b6a5df180c2]*/
48+
int extra_tokens, int ignore_unmatched_parens)
49+
/*[clinic end generated code: output=5437e7bbc30de3f4 input=7f6b22d7c235ffd7]*/
4950
{
5051
tokenizeriterobject *self = (tokenizeriterobject *)type->tp_alloc(type, 0);
5152
if (self == NULL) {
@@ -64,6 +65,12 @@ tokenizeriter_new_impl(PyTypeObject *type, const char *source,
6465
if (extra_tokens) {
6566
self->tok->tok_extra_tokens = 1;
6667
}
68+
if (ignore_unmatched_parens) {
69+
self->tok->ignore_unmatched_parens = 1;
70+
}
71+
if (ignore_unmatched_parens) {
72+
self->tok->ignore_unmatched_parens = 1;
73+
}
6774
self->done = 0;
6875
return (PyObject *)self;
6976
}
@@ -82,7 +89,7 @@ _tokenizer_error(struct tok_state *tok)
8289
msg = "invalid token";
8390
break;
8491
case E_EOF:
85-
if (tok->level) {
92+
if (tok->level > 0) {
8693
PyErr_Format(PyExc_SyntaxError,
8794
"parenthesis '%c' was never closed",
8895
tok->parenstack[tok->level-1]);

Python/clinic/Python-tokenize.c.h

Lines changed: 13 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)
0