8000 gh-102856: Python tokenizer implementation for PEP 701 by mgmacias95 · Pull Request #104323 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

gh-102856: Python tokenizer implementation for PEP 701 #104323

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
May 21, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8000
Prev Previous commit
Next Next commit
nested expressions
  • Loading branch information
mgmacias95 authored and pablogsal committed May 18, 2023
commit f58104d20269ba6878da404be0973e375473d271
25 changes: 15 additions & 10 deletions Lib/test/test_tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,21 +381,26 @@ def test_string(self):
STRING 'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)
""")
self.check_tokenize('f"abc"', """\
FSTRING_START \'f"\' (1, 0) (1, 2)
FSTRING_START 'f"' (1, 0) (1, 2)
FSTRING_MIDDLE 'abc' (1, 2) (1, 5)
FSTRING_END \'"\' (1, 5) (1, 6)
FSTRING_END '"' (1, 5) (1, 6)
""")
self.check_tokenize('fR"a{b}c"', """\
FSTRING_START \'fR"\' (1, 0) (1, 3)
FSTRING_START 'fR"' (1, 0) (1, 3)
FSTRING_MIDDLE 'a' (1, 3) (1, 4)
FSTRING_EXPR '{b}' (1, 4) (1, 7)
FSTRING_MIDDLE 'c' (1, 7) (1, 8)
FSTRING_END \'"\' (1, 8) (1, 9)
FSTRING_END '"' (1, 8) (1, 9)
""")
self.check_tokenize('fR"a{{b}c"', """\
FSTRING_START \'fR"\' (1, 0) (1, 3)
FSTRING_START 'fR"' (1, 0) (1, 3)
FSTRING_MIDDLE 'a{{b}c' (1, 3) (1, 9)
FSTRING_END \'"\' (1, 9) (1, 10)
FSTRING_END '"' (1, 9) (1, 10)
""")
self.check_tokenize('f"""{f\'\'\'{f\'{f"{1+1}"}\'}\'\'\'}"""', """\
FSTRING_START 'f\"""' (1, 0) (1, 4)
FSTRING_EXPR '{f'''{f'{f"{1+1}"}'}'''}' (1, 4) (1, 28)
FSTRING_END '\"""' (1, 28) (1, 31)
""")
self.check_tokenize('f"""abc"""', """\
FSTRING_START 'f\"""' (1, 0) (1, 4)
Expand All @@ -404,15 +409,15 @@ def test_string(self):
""")
self.check_tokenize(r'f"abc\
def"', """\
FSTRING_START \'f"\' (1, 0) (1, 2)
FSTRING_START 'f"' (1, 0) (1, 2)
FSTRING_MIDDLE 'abc\\\\\\ndef' (1, 2) (2, 3)
FSTRING_END \'"\' (2, 3) (2, 4)
FSTRING_END '"' (2, 3) (2, 4)
""")
self.check_tokenize(r'Rf"abc\
def"', """\
FSTRING_START \'Rf"\' (1, 0) (1, 3)
FSTRING_START 'Rf"' (1, 0) (1, 3)
FSTRING_MIDDLE 'abc\\\\\\ndef' (1, 3) (2, 3)
FSTRING_END \'"\' (2, 3) (2, 4)
FSTRING_END '"' (2, 3) (2, 4)
""")

def test_function(self):
Expand Down
28 changes: 19 additions & 9 deletions Lib/tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,7 +647,7 @@ def _tokenize_fstring_mode(line, tok_start):
if ((len(middle) >= position + 1 and middle[position + 1] == '{')
or (position > 0 and middle[position - 1] in ('\\', '{'))):
mid_token += c
else:
elif mid_token:
curly_brackets.append(c)
mid_expr += c
yield TokenInfo(
Expand All @@ -658,11 +658,17 @@ def _tokenize_fstring_mode(line, tok_start):
line=line)
mid_token = ''
end = line_number, start + i
else:
curly_brackets.append(c)
mid_expr += c
case '}':
# if no opening { is seen before, this character is taken
# as part of the fstring middle token
if mid_expr:
# if there are remaining elements in the curly_brackets queue
# then the expression is not done yet
if curly_brackets:
curly_brackets.pop()
if mid_expr and not curly_brackets:
mid_expr += c
yield TokenInfo(
type=FSTRING_EXPR,
Expand All @@ -675,7 +681,10 @@ def _tokenize_fstring_mode(line, tok_start):
mid_expr = ''
end = line_number, start + i + 1
else:
mid_token += c
if mid_expr:
mid_expr += c
else:
mid_token += c
case '\n':
if mid_expr:
mid_expr += c
Expand All @@ -694,12 +703,13 @@ def _tokenize_fstring_mode(line, tok_start):
# once the end of the expression is reached, release what's left of
# mid_token
start += i
yield TokenInfo(
type=FSTRING_MIDDLE,
string=mid_token,
start=end,
end=(line_number, start),
line=line)
if mid_token:
yield TokenInfo(
type=FSTRING_MIDDLE,
string=mid_token,
start=end,
end=(line_number, start),
line=line)
end = line_number, start

if curly_brackets:
Expand Down
12 changes: 12 additions & 0 deletions lel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import tokenize
from pprint import pprint
from io import BytesIO

def t(s):
pprint(list(tokenize.tokenize(BytesIO(s.encode()).readline)))


a = r'f"abc\
def"'

t(a)
0