8000 Add pgen module to the Parser folder to produce pgen output using pgen2 · python/cpython@2bc3198 · GitHub
[go: up one dir, main page]

Skip to content

Commit 2bc3198

Browse files
committed
Add pgen module to the Parser folder to produce pgen output using pgen2
1 parent 3bacf61 commit 2bc3198

File tree

5 files changed

+591
-0
lines changed

5 files changed

+591
-0
lines changed

Parser/pgen/__init__.py

Whitespace-only changes.

Parser/pgen/__main__.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import os
2+
import sys
3+
import argparse
4+
import collections
5+
6+
from lib2to3.pgen2 import grammar, tokenize
7+
8+
from . import token
9+
from . import grammar as pgen_grammar
10+
11+
def monkey_patch_pgen2(token_lines):
12+
tokens = dict(token.generate_tokens(token_lines))
13+
for name, value in tokens.items():
14+
setattr(tokenize, name, value)
15+
16+
from .pgen import ParserGenerator
17+
18+
19+
def main(grammar_file, tokens_file, gramminit_h_file, gramminit_c_file, verbose):
20+
with open(tokens_file) as tok_file:
21+
token_lines = tok_file.readlines()
22+
23+
monkey_patch_pgen2(token_lines)
24+
25+
p = ParserGenerator(grammar_file, token_lines, verbose=verbose)
26+
grammar = p.make_grammar()
27+
grammar.produce_graminit_h(gramminit_h_file.write)
28+
grammar.produce_graminit_c(gramminit_c_file.write)
29+
30+
31+
if __name__ == "__main__":
32+
parser = argparse.ArgumentParser(description="Parser generator main program.")
33+
parser.add_argument(
34+
"grammar", type=str, help="The file with the grammar definition in EBNF format"
35+
)
36+
parser.add_argument(
37+
"tokens", type=str, help="The file with the token definition"
38+
)
39+
parser.add_argument(
40+
"gramminit_h",
41+
type=argparse.FileType('w'),
42+
help="The path to write the grammar's non-terminals as #defines",
43+
)
44+
parser.add_argument(
45+
"gramminit_c",
46+
type=argparse.FileType('w'),
47+
help="The path to write the grammar as initialized data",
48+
)
49+
parser.add_argument("--verbose", "-v", action="count")
50+
args = parser.parse_args()
51+
main(args.grammar, args.tokens, args.gramminit_h, args.gramminit_c, args.verbose)

Parser/pgen/grammar.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
from lib2to3.pgen2 import grammar
2+
3+
from . import token
4+
5+
6+
class Grammar(grammar.Grammar):
7+
8+
def produce_graminit_h(self, writer):
9+
writer("/* Generated by Parser/pgen2 */\n\n")
10+
for number, symbol in self.number2symbol.items():
11+
writer("#define {} {}\n".format(symbol, number))
12+
13+
def produce_graminit_c(self, writer):
14+
writer("/* Generated by Parser/pgen2 */\n\n")
15+
16+
writer('#include "pgenheaders.h"\n')
17+
writer('#include "grammar.h"\n')
18+
writer("grammar _PyParser_Grammar;\n")
19+
20+
self.print_dfas(writer)
21+
self.print_labels(writer)
22+
23+
writer("grammar _PyParser_Grammar = {\n")
24+
writer(" {n_dfas},\n".format(n_dfas=len(self.dfas)))
25+
writer(" dfas,\n")
26+
writer(" {{{n_labels}, labels}},\n".format(n_labels=len(self.labels)))
27+
writer(" {start_number}\n".format(start_number=self.start))
28+
writer("};\n")
29+
30+
def print_labels(self, writer):
31+
writer(
32+
"static label labels[{n_labels}] = {{\n".format(n_labels=len(self.labels))
33+
)
34+
for label, name in self.labels:
35+
if name is None:
36+
writer(" {{{label}, 0}},\n".format(label=label))
37+
else:
38+
writer(
39+
' {{{label}, "{label_name}"}},\n'.format(
40+
label=label, label_name=name
41+
)
42+
)
43+
writer("};\n")
44+
45+
def print_dfas(self, writer):
46+
self.print_states(writer)
47+
writer("static dfa dfas[{}] = {{\n".format(len(self.dfas)))
48+
for dfaindex, dfa_elem in enumerate(self.dfas.items()):
49+
symbol, (dfa, first_sets) = dfa_elem
50+
writer(
51+
' {{{dfa_symbol}, "{symbol_name}", '.format(
52+
dfa_symbol=symbol, symbol_name=self.number2symbol[symbol]
53+
)
54+
+ "0, {n_states}, states_{dfa_index},\n".format(
55+
n_states=len(dfa), dfa_index=dfaindex
56+
)
57+
)
58+
writer(' "')
59+
60+
k = [name for label, name in self.labels if label in first_sets]
61+
bitset = bytearray((len(self.labels) >> 3) + 1)
62+
for token in first_sets:
63+
bitset[token >> 3] |= 1 << (token & 7)
64+
for byte in bitset:
65+
writer("\\%03o" % (byte & 0xFF))
66+
writer('"},\n')
67+
writer("};\n")
68+
69+
def print_states(self, write):
70+
for dfaindex, dfa in enumerate(self.states):
71+
self.print_arcs(write, dfaindex, dfa)
72+
write(
73+
"static state states_{dfa_index}[{n_states}] = {{\n".format(
74+
dfa_index=dfaindex, n_states=len(dfa)
75+
)
76+
)
77+
for stateindex, state in enumerate(dfa):
78+
narcs = len(state)
79+
write(
80+
" {{{n_arcs}, arcs_{dfa_index}_{state_index}}},\n".format(
81+
n_arcs=narcs, dfa_index=dfaindex, state_index=stateindex
82+
)
83+
)
84+
write("};\n")
85+
86+
def print_arcs(self, write, dfaindex, states):
87+
for stateindex, state in enumerate(states):
88+
narcs = len(state)
89+
write(
90+
"static arc arcs_{dfa_index}_{state_index}[{n_arcs}] = {{\n".format(
91+
dfa_index=dfaindex, state_index=stateindex, n_arcs=narcs
92+
)
93+
)
94+
for a, b in state:
95+
write(
96+
" {{{from_label}, {to_state}}},\n".format(
97+
from_label=a, to_state=b
98+
)
99+
)
100+
write("};\n")

0 commit comments

Comments
 (0)
0