8000 Use Lib/token.py and Lib/tokenize.py as the source of tokens · python/cpython@3d593ef · GitHub
[go: up one dir, main page]

Skip to content

Commit 3d593ef

Browse files
committed
Use Lib/token.py and Lib/tokenize.py as the source of tokens
1 parent 7e3beaf commit 3d593ef

File tree

4 files changed

+49
-92
lines changed

4 files changed

+49
-92
lines changed

Parser/pgen/__main__.py

Lines changed: 10 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,12 @@
1-
import os
2-
import sys
31
import argparse
4-
import collections
5-
6-
from lib2to3.pgen2 import grammar, tokenize
7-
8-
from . import token
9-
from . import grammar as pgen_grammar
10-
11-
def monkey_patch_pgen2(token_lines):
12-
tokens = dict(token.generate_tokens(token_lines))
13-
for name, value in tokens.items():
14-
setattr(tokenize, name, value)
152

163
from .pgen import ParserGenerator
174

18-
19-
def main(grammar_file, tokens_file, gramminit_h_file, gramminit_c_file, verbose):
20-
with open(tokens_file) as tok_file:
21-
token_lines = tok_file.readlines()
22-
23-
monkey_patch_pgen2(token_lines)
24-
25-
p = ParserGenerator(grammar_file, token_lines, verbose=verbose)
26-
grammar = p.make_grammar()
27-
grammar.produce_graminit_h(gramminit_h_file.write)
28-
grammar.produce_graminit_c(gramminit_c_file.write)
29-
30-
31-
if __name__ == "__main__":
5+
def main():
326
parser = argparse.ArgumentParser(description="Parser generator main program.")
337
parser.add_argument(
348
"grammar", type=str, help="The file with the grammar definition in EBNF format"
359
)
36-
parser.add_argument(
37-
"tokens", type=str, help="The file with the token definition"
38-
)
3910
parser.add_argument(
4011
"gramminit_h",
4112
type=argparse.FileType('w'),
@@ -48,4 +19,12 @@ def main(grammar_file, tokens_file, gramminit_h_file, gramminit_c_file, verbose)
4819
)
4920
parser.add_argument("--verbose", "-v", action="count")
5021
args = parser.parse_args()
51-
main(args.grammar, args.tokens, args.gramminit_h, args.gramminit_c, args.verbose)
22+
23+
p = ParserGenerator(args.grammar, verbose=args.verbose)
24+
grammar = p.make_grammar()
25+
grammar.produce_graminit_h(args.gramminit_h.write)
26+
grammar.produce_graminit_c(args.gramminit_c.write)
27+
28+
29+
if __name__ == "__main__":
30+
main()

Parser/pgen/grammar.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
from lib2to3.pgen2 import grammar
22

3-
from . import token
4-
5-
63
class Grammar(grammar.Grammar):
74

85
def produce_graminit_h(self, writer):

Parser/pgen/pgen.py

Lines changed: 39 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,38 @@
1+
import os
2+
import sys
13
import collections
2-
from lib2to3.pgen2 import tokenize
4+
import importlib.machinery
35

4-
from . import token, grammar
6+
# Use Lib/token.py and Lib/tokenize.py to obtain the tokens. To maintain this
7+
# compatible with older versions of Python, we need to make sure that we only
8+
# import these two files (and not any of the dependencies of these files).
9+
10+
CURRENT_FOLDER_LOCATION = os.path.dirname(os.path.realpath(__file__))
11+
LIB_LOCATION = os.path.realpath(os.path.join(CURRENT_FOLDER_LOCATION, '..', '..', 'Lib'))
12+
TOKEN_LOCATION = os.path.join(LIB_LOCATION, 'token.py')
13+
TOKENIZE_LOCATION = os.path.join(LIB_LOCATION, 'tokenize.py')
14+
15+
token = importlib.machinery.SourceFileLoader('token',
16+
TOKEN_LOCATION).load_module()
17+
# Add token to the module cache so tokenize.py uses that excact one instead of
18+
# the one in the stdlib of the interpreter executing this file.
19+
sys.modules['token'] = token
20+
tokenize = importlib.machinery.SourceFileLoader('tokenize',
21+
TOKENIZE_LOCATION).load_module()
22+
23+
from . import grammar
524

625
class ParserGenerator(object):
726

8-
def __init__(self, filename, tokens, stream=None, verbose=False):
27+
def __init__(self, filename, stream=None, verbose=False):
928
close_stream = None
1029
if stream is None:
1130
stream = open(filename)
1231
close_stream = stream.close
13-
self.tokens = dict(token.generate_tokens(tokens))
14-
self.opmap = dict(token.generate_opmap(tokens))
32+
self.tokens = token
33+
self.opmap = token.EXACT_TOKEN_TYPES
34+
# Manually add <> so it does not collide with !=
35+
self.opmap['<>'] = self.tokens.NOTEQUAL
1536
self.verbose = verbose
1637
self.filename = filename
1738
self.stream = stream
@@ -87,9 +108,9 @@ def make_label(self, c, label):
87108
return ilabel
88109
else:
89110
# A named token (NAME, NUMBER, STRING)
90-
itoken = self.tokens.get(label, None)
111+
itoken = getattr(self.tokens, label, None)
91112
assert isinstance(itoken, int), label
92-
assert itoken in self.tokens.values(), label
113+
assert itoken in self.tokens.tok_name, label
93114
if itoken in c.tokens:
94115
return c.tokens[itoken]
95116
else:
@@ -105,12 +126,12 @@ def make_label(self, c, label):
105126
if value in c.keywords:
106127
return c.keywords[value]
107128
else:
108-
c.labels.append((self.tokens['NAME'], value))
129+
c.labels.append((self.tokens.NAME, value))
109130
c.keywords[value] = ilabel
110131
return ilabel
111132
else:
112133
# An operator (any non-numeric token)
113-
itoken = self.tokens[self.opmap[value]] # Fails if unknown token
134+
itoken = self.opmap[value] # Fails if unknown token
114135
if itoken in c.tokens:
115136
return c.tokens[itoken]
116137
else:
@@ -163,16 +184,16 @@ def parse(self):
163184
dfas = collections.OrderedDict()
164185
startsymbol = None
165186
# MSTART: (NEWLINE | RULE)* ENDMARKER
166-
while self.type != self.tokens['ENDMARKER']:
167-
while self.type == self.tokens['NEWLINE']:
187+
while self.type != self.tokens.ENDMARKER:
188+
while self.type == self.tokens.NEWLINE:
168189
self.gettoken()
169190
# RULE: NAME ':' RHS NEWLINE
170-
name = self.expect(self.tokens['NAME'])
191+
name = self.expect(self.tokens.NAME)
171192
if self.verbose:
172193
print("Processing rule {dfa_name}".format(dfa_name=name))
173-
self.expect(self.tokens['OP'], ":")
194+
self.expect(self.tokens.OP, ":")
174195
a, z = self.parse_rhs()
175-
self.expect(self.tokens['NEWLINE'])
196+
self.expect(self.tokens.NEWLINE)
176197
if self.verbose:
177198
self.dump_nfa(name, a, z)
178199
dfa = self.make_dfa(a, z)
@@ -288,7 +309,7 @@ def parse_alt(self):
288309
# ALT: ITEM+
289310
a, b = self.parse_item()
290311
while (self.value in ("(", "[") or
291-
self.type in (self.tokens['NAME'], self.tokens['STRING'])):
312+
self.type in (self.tokens.NAME, self.tokens.STRING)):
292313
c, d = self.parse_item()
293314
b.addarc(c)
294315
b = d
@@ -299,7 +320,7 @@ def parse_item(self):
299320
if self.value == "[":
300321
self.gettoken()
301322
a, z = self.parse_rhs()
302-
self.expect(self.tokens['OP'], "]")
323+
self.expect(self.tokens.OP, "]")
303324
a.addarc(z)
304325
return a, z
305326
else:
@@ -319,9 +340,9 @@ def parse_atom(self):
319340
if self.value == "(":
320341
self.gettoken()
321342
a, z = self.parse_rhs()
322-
self.expect(self.tokens['OP'], ")")
343+
self.expect(self.tokens.OP, ")")
323344
return a, z
324-
elif self.type in (self.tokens['NAME'], self.tokens['STRING']):
345+
elif self.type in (self.tokens.NAME, self.tokens.STRING):
325346
a = NFAState()
326347
z = NFAState()
327348
a.addarc(z, self.value)

Parser/pgen/token.py

Lines changed: 0 additions & 40 deletions
This file was deleted.

0 commit comments

Comments
 (0)
0