8000 bpo-35808: Retire pgen and use pgen2 to generate the parser by pablogsal · Pull Request #11814 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

bpo-35808: Retire pgen and use pgen2 to generate the parser #11814

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Mar 1, 2019
Next Next commit
Add pgen module to the Parser folder to produce pgen output using pgen2
  • Loading branch information
pablogsal committed Feb 20, 2019
commit 2bc3198c20ba2a56a788e1f7555dbcab303e203f
Empty file added Parser/pgen/__init__.py
Empty file.
51 changes: 51 additions & 0 deletions Parser/pgen/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import os
import sys
import argparse
import collections

from lib2to3.pgen2 import grammar, tokenize

from . import token
from . import grammar as pgen_grammar

def monkey_patch_pgen2(token_lines):
tokens = dict(token.generate_tokens(token_lines))
for name, value in tokens.items():
setattr(tokenize, name, value)

from .pgen import ParserGenerator


def main(grammar_file, tokens_file, gramminit_h_file, gramminit_c_file, verbose):
with open(tokens_file) as tok_file:
token_lines = tok_file.readlines()

monkey_patch_pgen2(token_lines)

p = ParserGenerator(grammar_file, token_lines, verbose=verbose)
grammar = p.make_grammar()
grammar.produce_graminit_h(gramminit_h_file.write)
grammar.produce_graminit_c(gramminit_c_file.write)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Parser generator main program.")
parser.add_argument(
"grammar", type=str, help="The file with the grammar definition in EBNF format"
)
parser.add_argument(
"tokens", type=str, help="The file with the token definition"
)
parser.add_argument(
"gramminit_h",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Normally this is spelled with one ‘m’. (Also below.)

type=argparse.FileType('w'),
help="The path to write the grammar's non-terminals as #defines",
)
parser.add_argument(
"gramminit_c",
type=argparse.FileType('w'),
help="The path to write the grammar as initialized data",
)
parser.add_argument("--verbose", "-v", action="count")
args = parser.parse_args()
main(args.grammar, args.tokens, args.gramminit_h, args.gramminit_c, args.verbose)
100 changes: 100 additions & 0 deletions Parser/pgen/grammar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
from lib2to3.pgen2 import grammar
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe also copy this, so we’re completely independent from lib2to3?


from . import token


class Grammar(grammar.Grammar):

def produce_graminit_h(self, writer):
writer("/* Generated by Parser/pgen2 */\n\n")
for number, symbol in self.number2symbol.items():
writer("#define {} {}\n".format(symbol, number))

def produce_graminit_c(self, writer):
writer("/* Generated by Parser/pgen2 */\n\n")

writer('#include "pgenheaders.h"\n')
writer('#include "grammar.h"\n')
writer("grammar _PyParser_Grammar;\n")

self.print_dfas(writer)
self.print_labels(writer)

writer("grammar _PyParser_Grammar = {\n")
writer(" {n_dfas},\n".format(n_dfas=len(self.dfas)))
writer(" dfas,\n")
writer(" {{{n_labels}, labels}},\n".format(n_labels=len(self.labels)))
writer(" {start_number}\n".format(start_number=self.start))
writer("};\n")

def print_labels(self, writer):
writer(
"static label labels[{n_labels}] = {{\n".format(n_labels=len(self.labels))
)
for label, name in self.labels:
if name is None:
writer(" {{{label}, 0}},\n".format(label=label))
else:
writer(
' {{{label}, "{label_name}"}},\n'.format(
label=label, label_name=name
)
)
writer("};\n")

def print_dfas(self, writer):
self.print_states(writer)
writer("static dfa dfas[{}] = {{\n".format(len(self.dfas)))
for dfaindex, dfa_elem in enumerate(self.dfas.items()):
symbol, (dfa, first_sets) = dfa_elem
writer(
' {{{dfa_symbol}, "{symbol_name}", '.format(
dfa_symbol=symbol, symbol_name=self.number2symbol[symbol]
)
+ "0, {n_states}, states_{dfa_index},\n".format(
n_states=len(dfa), dfa_index=dfaindex
)
)
writer(' "')

k = [name for label, name in self.labels if label in first_sets]
bitset = bytearray((len(self.labels) >> 3) + 1)
for token in first_sets:
bitset[token >> 3] |= 1 << (token & 7)
for byte in bitset:
writer("\\%03o" % (byte & 0xFF))
writer('"},\n')
writer("};\n")

def print_states(self, write):
for dfaindex, dfa in enumerate(self.states):
self.print_arcs(write, dfaindex, dfa)
write(
"static state states_{dfa_index}[{n_states}] = {{\n".format(
dfa_index=dfaindex, n_states=len(dfa)
)
)
for stateindex, state in enumerate(dfa):
narcs = len(state)
write(
" {{{n_arcs}, arcs_{dfa_index}_{state_index}}},\n".format(
n_arcs=narcs, dfa_index=dfaindex, state_index=stateindex
)
)
write("};\n")

def print_arcs(self, write, dfaindex, states):
for stateindex, state in enumerate(states):
narcs = len(state)
write(
"static arc arcs_{dfa_index}_{state_index}[{n_arcs}] = {{\n".format(
dfa_index=dfaindex, state_index=stateindex, n_arcs=narcs
)
)
for a, b in state:
write(
" {{{from_label}, {to_state}}},\n".format(
from_label=a, to_state=b
)
)
write("};\n")
Loading
0