8000 [3.10] bpo-43833: Emit warnings for numeric literals followed by keyword (GH-25466) by miss-islington · Pull Request #26614 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

[3.10] bpo-43833: Emit warnings for numeric literals followed by keyword (GH-25466) #26614

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions Doc/whatsnew/3.10.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1451,6 +1451,17 @@ Optimizations
Deprecated
==========

* Currently Python accepts numeric literals immediately followed by keywords,
for example ``0in x``, ``1or x``, ``0if 1else 2``. It allows confusing
and ambigious expressions like ``[0x1for x in y]`` (which can be
interpreted as ``[0x1 for x in y]`` or ``[0x1f or x in y]``). Starting in
this release, a deprecation warning is raised if the numeric literal is
immediately followed by one of keywords :keyword:`and`, :keyword:`else`,
:keyword:`for`, :keyword:`if`, :keyword:`in`, :keyword:`is` and :keyword:`or`.
If future releases it will be changed to syntax warning, and finally to
syntax error.
(Contributed by Serhiy Storchaka in :issue:`43833`).

* Starting in this release, there will be a concerted effort to begin
cleaning up old import semantics that were kept for Python 2.7
compatibility. Specifically,
Expand Down Expand Up @@ -1677,6 +1688,18 @@ This section lists previously described changes and other bugfixes
that may require changes to your code.


Changes in the Python syntax
----------------------------

* Deprecation warning is now emitted when compiling previously valid syntax
if the numeric literal is immediately followed by a keyword (like in ``0in x``).
If future releases it will be changed to syntax warning, and finally to a
syntax error. To get rid of the warning and make the code compatible with
future releases just add a space between the numeric literal and the
following keyword.
(Contributed by Serhiy Storchaka in :issue:`43833`).


Changes in the Python API
-------------------------

Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def test_literals_with_leading_zeroes(self):
for arg in ["077787", "0xj", "0x.", "0e", "090000000000000",
"080000000000000", "000000000000009", "000000000000008",
"0b42", "0BADCAFE", "0o123456789", "0b1.1", "0o4.2",
"0b101j2", "0o153j2", "0b100e1", "0o777e1", "0777",
"0b101j", "0o153j", "0b100e1", "0o777e1", "0777",
"000777", "000000000000007"]:
self.assertRaises(SyntaxError, eval, arg)

Expand Down
92 changes: 90 additions & 2 deletions Lib/test/test_grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,10 @@ def test_floats(self):

def test_float_exponent_tokenization(self):
# See issue 21642.
self.assertEqual(1 if 1else 0, 1)
self.assertEqual(1 if 0else 0, 0)
with warnings.catch_warnings():
warnings.simplefilter('ignore', DeprecationWarning)
self.assertEqual(eval("1 if 1else 0"), 1)
self.assertEqual(eval("1 if 0else 0"), 0)
self.assertRaises(SyntaxError, eval, "0 if 1Else 0")

def test_underscore_literals(self):
Expand Down Expand Up @@ -211,6 +213,92 @@ def test_bad_numerical_literals(self):
check("1e2_", "invalid decimal literal")
check("1e+", "invalid decimal literal")

def test_end_of_numerical_literals(self):
def check(test):
with self.assertWarns(DeprecationWarning):
compile(test, "<testcase>", "eval")

def check_error(test):
with warnings.catch_warnings(record=True) as w:
with self.assertRaises(SyntaxError):
compile(test, "<testcase>", "eval")
self.assertEqual(w, [])

check_error("0xfand x")
check("0o7and x")
check("0b1and x")
check("9and x")
check("0and x")
check("1.and x")
check("1e3and x")
check("1jand x")

check("0xfor x")
check("0o7or x")
check("0b1or x")
check("9or x")
check_error("0or x")
check("1.or x")
check("1e3or x")
check("1jor x")

check("0xfin x")
check("0o7in x")
check("0b1in x")
check("9in x")
check("0in x")
check("1.in x")
check("1e3in x")
check("1jin x")

with warnings.catch_warnings():
warnings.simplefilter('ignore', SyntaxWarning)
check("0xfis x")
check("0o7is x")
check("0b1is x")
check("9is x")
check("0is x")
check("1.is x")
check("1e3is x")
check("1jis x")

check("0xfif x else y")
check("0o7if x else y")
check("0b1if x else y")
check("9if x else y")
check("0if x else y")
check("1.if x else y")
check("1e3if x else y")
check("1jif x else y")

check_error("x if 0xfelse y")
check("x if 0o7else y")
check("x if 0b1else y")
check("x if 9else y")
check("x if 0else y")
check("x if 1.else y")
check("x if 1e3else y")
check("x if 1jelse y")

check("[0x1ffor x in ()]")
check("[0x1for x in ()]")
check("[0xfor x in ()]")
check("[0o7for x in ()]")
check("[0b1for x in ()]")
check("[9for x in ()]")
check("[1.for x in ()]")
check("[1e3for x in ()]")
check("[1jfor x in ()]")

check_error("0xfspam")
check_error("0o7spam")
check_error("0b1spam")
check_error("9spam")
check_error("0spam")
check_error("1.spam")
check_error("1e3spam")
check_error("1jspam")

def test_string_literals(self):
x = ''; y = ""; self.assertTrue(len(x) == 0 and x == y)
x = '\''; y = "'"; self.assertTrue(len(x) == 1 and x == y and ord(x) == 39)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Emit a deprecation warning if the numeric literal is immediately followed by
one of keywords: and, else, for, if, in, is, or. Raise a syntax error with
more informative message if it is immediately followed by other keyword or
identifier.
128 changes: 128 additions & 0 deletions Parser/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -1121,6 +1121,113 @@ indenterror(struct tok_state *tok)
return ERRORTOKEN;
}

static int
parser_warn(struct tok_state *tok, const char *format, ...)
{
PyObject *errmsg;
va_list vargs;
#ifdef HAVE_STDARG_PROTOTYPES
va_start(vargs, format);
#else
va_start(vargs);
#endif
errmsg = PyUnicode_FromFormatV(format, vargs);
va_end(vargs);
if (!errmsg) {
goto error;
}

if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, errmsg, tok->filename,
tok->lineno, NULL, NULL) < 0) {
if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
/* Replace the DeprecationWarning exception with a SyntaxError
to get a more accurate error report */
PyErr_Clear();
syntaxerror(tok, "%U", errmsg);
}
goto error;
}
Py_DECREF(errmsg);
return 0;

error:
Py_XDECREF(errmsg);
tok->done = E_ERROR;
return -1;
}

static int
lookahead(struct tok_state *tok, const char *test)
{
const char *s = test;
int res = 0;
while (1) {
int c = tok_nextc(tok);
if (*s == 0) {
res = !is_potential_identifier_char(c);
}
else if (c == *s) {
s++;
continue;
}

tok_backup(tok, c);
while (s != test) {
tok_backup(tok, *--s);
}
return res;
}
}

static int
verify_end_of_number(struct tok_state *tok, int c, const char *kind)
{
/* Emit a deprecation warning only if the numeric literal is immediately
* followed by one of keywords which can occurr after a numeric literal
* in valid code: "and", "else", "for", "if", "in", "is" and "or".
* It allows to gradually deprecate existing valid code without adding
* warning before error in most cases of invalid numeric literal (which
* would be confusiong and break existing tests).
* Raise a syntax error with slighly better message than plain
* "invalid syntax" if the numeric literal is immediately followed by
* other keyword or identifier.
*/
int r = 0;
if (c == 'a') {
r = lookahead(tok, "nd");
}
else if (c == 'e') {
r = lookahead(tok, "lse");
}
else if (c == 'f') {
r = lookahead(tok, "or");
}
else if (c == 'i') {
int c2 = tok_nextc(tok);
if (c2 == 'f' || c2 == 'n' || c2 == 's') {
r = 1;
}
tok_backup(tok, c2);
}
else if (c == 'o') {
r = lookahead(tok, "r");
}
if (r) {
tok_backup(tok, c);
if (parser_warn(tok, "invalid %s literal", kind)) {
return 0;
}
tok_nextc(tok);
}
else /* In future releases, only error will remain. */
if (is_potential_identifier_char(c)) {
tok_backup(tok, c);
syntaxerror(tok, "invalid %s literal", kind);
return 0;
}
return 1;
}

/* Verify that the identifier follows PEP 3131.
All identifier strings are guaranteed to be "ready" unicode objects.
*/
Expand Down Expand Up @@ -1569,6 +1676,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
c = tok_nextc(tok);
} while (isxdigit(c));
} while (c == '_');
if (!verify_end_of_number(tok, c, "hexadecimal")) {
return ERRORTOKEN;
}
}
else if (c == 'o' || c == 'O') {
/* Octal */
Expand All @@ -1595,6 +1705,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
return syntaxerror(tok,
"invalid digit '%c' in octal literal", c);
}
if (!verify_end_of_number(tok, c, "octal")) {
return ERRORTOKEN;
}
}
else if (c == 'b' || c == 'B') {
/* Binary */
Expand All @@ -1621,6 +1734,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
return syntaxerror(tok,
"invalid digit '%c' in binary literal", c);
}
if (!verify_end_of_number(tok, c, "binary")) {
return ERRORTOKEN;
}
}
else {
int nonzero = 0;
Expand Down Expand Up @@ -1664,6 +1780,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
"literals are not permitted; "
"use an 0o prefix for octal integers");
}
if (!verify_end_of_number(tok, c, "decimal")) {
return ERRORTOKEN;
}
}
}
else {
Expand Down Expand Up @@ -1699,6 +1818,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
}
} else if (!isdigit(c)) {
tok_backup(tok, c);
if (!verify_end_of_number(tok, e, "decimal")) {
return ERRORTOKEN;
}
tok_backup(tok, e);
*p_start = tok->start;
*p_end = tok->cur;
Expand All @@ -1713,6 +1835,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
/* Imaginary part */
imaginary:
c = tok_nextc(tok);
if (!verify_end_of_number(tok, c, "imaginary")) {
return ERRORTOKEN;
}
}
else if (!verify_end_of_number(tok, c, "decimal")) {
return ERRORTOKEN;
}
}
}
Expand Down
0