8000 bpo-43833: Emit warnings for numeric literals followed by keyword by serhiy-storchaka · Pull Request #25466 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

bpo-43833: Emit warnings for numeric literals followed by keyword #25466

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 8, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
bpo-43833: Emit warnings for numeric literals followed by keyword
Emit a deprecation warning if the numeric literal is immediately followed by
one of keywords: and, else, for, if, in, is, or. Raise a syntax error with
more informative message if it is immediately followed by other keyword or
identifier.
  • Loading branch information
serhiy-storchaka committed Apr 18, 2021
commit 56ae48901050576c7121cb236bf1f18d9e53798c
23 changes: 23 additions & 0 deletions Doc/whatsnew/3.10.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1177,6 +1177,17 @@ Optimizations
Deprecated
==========

* Currently Python accepts numeric literals immediately followed by keywords,
for example ``0in x``, ``1or x``, ``0if 1else 2``. It allows confusing
and ambigious expressions like ``[0x1for x in y]`` (which can be
interpreted as ``[0x1 for x in y]`` or ``[0x1f or x in y]``). Starting in
this release, a deprecation warning is raised if the numeric literal is
immediately followed by one of keywords :keyword:`and`, :keyword:`else`,
:keyword:`for`, :keyword:`if`, :keyword:`in`, :keyword:`is` and :keyword:`or`.
If future releases it will be changed to syntax warning, and finally to
syntax error.
(Contributed by Serhiy Storchaka in :issue:`43833`).

* Starting in this release, there will be a concerted effort to begin
cleaning up old import semantics that were kept for Python 2.7
compatibility. Specifically,
Expand Down Expand Up @@ -1382,6 +1393,18 @@ This section lists previously described changes and other bugfixes
that may require changes to your code.


Changes in the Python syntax
----------------------------

* Deprecation warning is now emitted when compile previously valid syntax
if the numeric literal is immediately followed by keyword (like in ``0in x``).
If future releases it will be changed to syntax warning, and finally to
syntax error. To get rid of the warning and make the code compatible with
future releases just add a space between the numeric literal and the
following keyword.
(Contributed by Serhiy Storchaka in :issue:`43833`).


Changes in the Python API
-------------------------

Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def test_literals_with_leading_zeroes(self):
for arg in ["077787", "0xj", "0x.", "0e", "090000000000000",
"080000000000000", "000000000000009", "000000000000008",
"0b42", "0BADCAFE", "0o123456789", "0b1.1", "0o4.2",
"0b101j2", "0o153j2", "0b100e1", "0o777e1", "0777",
"0b101j", "0o153j", "0b100e1", "0o777e1", "0777",
"000777", "000000000000007"]:
self.assertRaises(SyntaxError, eval, arg)

Expand Down
92 changes: 90 additions & 2 deletions Lib/test/test_grammar.py
8000
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,10 @@ def test_floats(self):

def test_float_exponent_tokenization(self):
# See issue 21642.
self.assertEqual(1 if 1else 0, 1)
self.assertEqual(1 if 0else 0, 0)
with warnings.catch_warnings():
warnings.simplefilter('ignore', DeprecationWarning)
self.assertEqual(eval("1 if 1else 0"), 1)
self.assertEqual(eval("1 if 0else 0"), 0)
self.assertRaises(SyntaxError, eval, "0 if 1Else 0")

def test_underscore_literals(self):
Expand Down Expand Up @@ -210,6 +212,92 @@ def test_bad_numerical_literals(self):
check("1e2_", "invalid decimal literal")
check("1e+", "invalid decimal literal")

def test_end_of_numerical_literals(self):
def check(test):
with self.assertWarns(DeprecationWarning):
compile(test, "<testcase>", "eval")

def check_error(test):
with warnings.catch_warnings(record=True) as w:
with self.assertRaises(SyntaxError):
compile(test, "<testcase>", "eval")
self.assertEqual(w, [])

check_error("0xfand x")
check("0o7and x")
check("0b1and x")
check("9and x")
check("0and x")
check("1.and x")
check("1e3and x")
check("1jand x")

check("0xfor x")
check("0o7or x")
check("0b1or x")
check("9or x")
check_error("0or x")
check("1.or x")
check("1e3or x")
check("1jor x")

check("0xfin x")
check("0o7in x")
check("0b1in x")
check("9in x")
check("0in x")
check("1.in x")
check("1e3in x")
check("1jin x")

with warnings.catch_warnings():
warnings.simplefilter('ignore', SyntaxWarning)
check("0xfis x")
check("0o7is x")
check("0b1is x")
check("9is x")
check("0is x")
check("1.is x")
check("1e3is x")
check("1jis x")

check("0xfif x else y")
check("0o7if x else y")
check("0b1if x else y")
check("9if x else y")
check("0if x else y")
check("1.if x else y")
check("1e3if x else y")
check("1jif x else y")

check_error("x if 0xfelse y")
check("x if 0o7else y")
check("x if 0b1else y")
check("x if 9else y")
check("x if 0else y")
check("x if 1.else y")
check("x if 1e3else y")
check("x if 1jelse y")

check("[0x1ffor x in ()]")
check("[0x1for x in ()]")
check("[0xfor x in ()]")
check("[0o7for x in ()]")
check("[0b1for x in ()]")
check("[9for x in ()]")
check("[1.for x in ()]")
check("[1e3for x in ()]")
check("[1jfor x in ()]")

check_error("0xfspam")
check_error("0o7spam")
check_error("0b1spam")
check_error("9spam")
check_error("0spam")
check_error("1.spam")
check_error("1e3spam")< 8000 /td>
check_error("1jspam")

def test_string_literals(self):
x = ''; y = ""; self.assertTrue(len(x) == 0 and x == y)
x = '\''; y = "'"; self.assertTrue(len(x) == 1 and x == y and ord(x) == 39)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Emit a deprecation warning if the numeric literal is immediately followed by
one of keywords: and, else, for, if, in, is, or. Raise a syntax error with
more informative message if it is immediately followed by other keyword or
identifier.
122 changes: 122 additions & 0 deletions Parser/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -1116,6 +1116,107 @@ indenterror(struct tok_state *tok)
return ERRORTOKEN;
}

static int
parser_warn(struct tok_state *tok, const char *format, ...)
{
PyObject *errmsg;
va_list vargs;
#ifdef HAVE_STDARG_PROTOTYPES
va_start(vargs, format);
#else
va_start(vargs);
#endif
errmsg = PyUnicode_FromFormatV(format, vargs);
va_end(vargs);
if (!errmsg) {
goto error;
}

if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, errmsg, tok->filename,
tok->lineno, NULL, NULL) < 0) {
if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
/* Replace the DeprecationWarning exception with a SyntaxError
to get a more accurate error report */
PyErr_Clear();
syntaxerror(tok, "%U", errmsg);
}
goto error;
}
Py_DECREF(errmsg);
return 0;

error:
Py_XDECREF(errmsg);
tok->done = E_ERROR;
return -1;
}

static int
lookahead(struct tok_state *tok, const char *test)
{
const char *s = test;
int res = 0;
while (1) {
int c = tok_nextc(tok);
if (*s == 0) {
res = !is_potential_identifier_char(c);
}
else if (c == *s) {
s++;
continue;
}

tok_backup(tok, c);
while (s != test) {
tok_backup(tok, *--s);
}
return res;
}
}

static int
verify_end_of_number(struct tok_state *tok, int c, const char *kind)
{
/* Emit a deprecation warning if the numeric literal is immediately
* followed by one of keywords: and, else, for, if, in, is, or.
* Raise a syntax error if it is immediately followed by other keyword
* or identifier (it is better than just "invalid syntax").
*/
int r = 0;
if (c == 'a') {
r = lookahead(tok, "nd");
}
else if (c == 'e') {
r = lookahead(tok, "lse");
}
else if (c == 'f') {
r = lookahead(tok, "or");
}
else if (c == 'i') {
int c2 = tok_nextc(tok);
if (c2 == 'f' || c2 == 'n' || c2 == 's') {
r = 1;
}
tok_backup(tok, c2);
}
else if (c == 'o') {
r = lookahead(tok, "r");
}
if (r) {
tok_backup(tok, c);
if (parser_warn(tok, "invalid %s literal", kind)) {
return 0;
}
tok_nextc(tok);
}
else if (is_potential_identifier_char(c)) {
tok_backup(tok, c);
syntaxerror(tok, "invalid %s literal", kind);
return 0;
}
return 1;
}

/* Verify that the identifier follows PEP 3131.
All identifier strings are guaranteed to be "ready" unicode objects.
*/
Expand Down Expand Up @@ -1560,6 +1661,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p 10499 _end)
c = tok_nextc(tok);
} while (isxdigit(c));
} while (c == '_');
if (!verify_end_of_number(tok, c, "hexadecimal")) {
return ERRORTOKEN;
}
}
else if (c == 'o' || c == 'O') {
/* Octal */
Expand All @@ -1586,6 +1690,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
return syntaxerror(tok,
"invalid digit '%c' in octal literal", c);
}
if (!verify_end_of_number(tok, c, "octal")) {
return ERRORTOKEN;
}
}
else if (c == 'b' || c == 'B') {
/* Binary */
Expand All @@ -1612,6 +1719,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
return syntaxerror(tok,
"invalid digit '%c' in binary literal", c);
}
if (!verify_end_of_number(tok, c, "binary")) {
return ERRORTOKEN;
}
}
else {
int nonzero = 0;
Expand Down Expand Up @@ -1655,6 +1765,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
"literals are not permitted; "
"use an 0o prefix for octal integers");
}
if (!verify_end_of_number(tok, c, "decimal")) {
return ERRORTOKEN;
}
}
}
else {
Expand Down Expand Up @@ -1690,6 +1803,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
}
} else if (!isdigit(c)) {
tok_backup(tok, c);
if (!verify_end_of_number(tok, e, "decimal")) {
return ERRORTOKEN;
}
tok_backup(tok, e);
*p_start = tok->start;
*p_end = tok->cur;
Expand All @@ -1704,6 +1820,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
/* Imaginary part */
imaginary:
c = tok_nextc(tok);
if (!verify_end_of_number(tok, c, "imaginary")) {
return ERRORTOKEN;
}
}
else if (!verify_end_of_number(tok, c, "decimal")) {
return ERRORTOKEN;
}
}
}
Expand Down
0