10000 GH-131498: Cases generator: Parse down to C statement level. by markshannon · Pull Request #131948 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

GH-131498: Cases generator: Parse down to C statement level. #131948

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Apr 2, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter
< 8000 h3 class="SelectMenu-title">Filter by extension
Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add handling for #if macros. Work in progress
  • Loading branch information
markshannon committed Mar 31, 2025
commit f7cccf8bba58c02e941beae9530cfac84d56d27f
2 changes: 1 addition & 1 deletion Tools/cases_generator/cwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def emit_token(self, tkn: Token) -> None:
self.maybe_dedent(tkn.text)
self.set_position(tkn)
self.emit_text(tkn.text)
if tkn.kind == "CMACRO":
if tkn.kind.startswith("CMACRO"):
self.newline = True
self.maybe_indent(tkn.text)

Expand Down
37 changes: 35 additions & 2 deletions Tools/cases_generator/generators_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from typing import Callable, TextIO, Iterator, Iterable
from lexer import Token
from stack import Storage, StackError
from parser import Stmt, SimpleStmt, BlockStmt, IfStmt, ForStmt, WhileStmt
from parser import Stmt, SimpleStmt, BlockStmt, IfStmt, ForStmt, WhileStmt, MacroIfStmt

# Set this to true for voluminous output showing state of stack and locals
PRINT_STACKS = False
Expand Down Expand Up @@ -476,6 +476,8 @@ def _emit_stmt(
return self._emit_for(stmt, uop, storage, inst)
elif isinstance(stmt, WhileStmt):
return self._emit_while(stmt, uop, storage, inst)
elif isinstance(stmt, MacroIfStmt):
return self._emit_macro_if(stmt, uop, storage, inst)
else:
raise NotImplementedError("Unexpected statement")

Expand Down Expand Up @@ -533,6 +535,37 @@ def _emit_simple(
except StackError as ex:
raise analysis_error(ex.args[0], tkn) #from None


def _emit_macro_if(
self,
stmt: IfStmt,
uop: CodeSection,
storage: Storage,
inst: Instruction | None,
) -> tuple[bool, Token, Storage]:
self.out.emit(stmt.condition)
branch = stmt.else_ is not None
reachable = True
for s in stmt.body:
r, tkn, storage = self._emit_stmt(s, uop, storage, inst)
if tkn is not None:
self.out.emit(tkn)
if not r:
reachable = False
if branch:
else_storage = storage.copy()
self.out.emit(stmt.else_)
for s in stmt.else_body:
r, tkn, else_storage = self._emit_stmt(s, uop, else_storage, inst)
if tkn is not None:
self.out.emit(tkn)
if not r:
reachable = False
storage.merge(else_storage, self.out)
self.out.emit(stmt.endif)
return reachable, None, storage


def _emit_if(
self,
stmt: IfStmt,
Expand Down Expand Up @@ -640,7 +673,7 @@ def emit_tokens(
if emit_braces:
self.out.emit(tkn)
except StackError as ex:
raise analysis_error(ex.args[0], last) from None
raise analysis_error(ex.args[0], tkn) from None
return storage

def emit(self, txt: str | Token) -> None:
Expand Down
18 changes: 15 additions & 3 deletions Tools/cases_generator/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,10 @@ def choice(*opts: str) -> str:

# Macros
macro = r"#.*\n"
CMACRO = "CMACRO"
CMACRO_IF = "CMACRO_IF"
CMACRO_ELSE = "CMACRO_ELSE"
CMACRO_ENDIF = "CMACRO_ENDIF"
CMACRO_OTHER = "CMACRO_OTHER"

id_re = r"[a-zA-Z_][0-9a-zA-Z_]*"
IDENTIFIER = "IDENTIFIER"
Expand Down Expand Up @@ -292,6 +295,7 @@ def tokenize(src: str, line: int = 1, filename: str = "") -> Iterator[Token]:
linestart = -1
for m in matcher.finditer(src):
start, end = m.span()
macro_body = ""
text = m.group(0)
if text in keywords:
kind = keywords[text]
Expand All @@ -316,7 +320,15 @@ def tokenize(src: str, line: int = 1, filename: str = "") -> Iterator[Token]:
elif text[0] == "'":
kind = CHARACTER
elif text[0] == "#":
kind = CMACRO
macro_body = text[1:].strip()
if macro_body.startswith("if"):
kind = CMACRO_IF
elif macro_body.startswith("else"):
kind = CMACRO_ELSE
elif macro_body.startswith("endif"):
kind = CMACRO_ENDIF
else:
kind = CMACRO_OTHER
elif text[0] == "/" and text[1] in "/*":
kind = COMMENT
else:
Expand All @@ -338,7 +350,7 @@ def tokenize(src: str, line: int = 1, filename: str = "") -> Iterator[Token]:
line += newlines
else:
begin = line, start - linestart
if kind == CMACRO:
if macro_body:
linestart = end
line += 1
if kind != "\n":
Expand Down
1 change: 1 addition & 0 deletions Tools/cases_generator/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
ForStmt,
WhileStmt,
BlockStmt,
MacroIfStmt,
)

import pprint
Expand Down
35 changes: 32 additions & 3 deletions Tools/cases_generator/parsing.py
6D40
Original file line numberDiff line number Diff line change
Expand Up @@ -171,7 +171,9 @@ def tokens(self) -> Iterator[lx.Token]:
class MacroIfStmt(Stmt):
condition: lx.Token
body: list[Stmt]
else_: lx.Token | None
else_body: list[Stmt] | None
endif: lx.Token

def print(self, out:CWriter) -> None:
out.emit(self.condition)
Expand Down Expand Up @@ -657,7 +659,15 @@ def stmt(self) -> Stmt:
return self.for_stmt(tkn)
elif tkn := self.expect(lx.WHILE):
return self.while_stmt(tkn)
elif tkn := self.expect(lx.CMACRO):
elif tkn := self.expect(lx.CMACRO_IF):
return self.macro_if(tkn)
elif tkn := self.expect(lx.CMACRO_ELSE):
msg = "Unexpected #else"
raise self.make_syntax_error(msg)
elif tkn := self.expect(lx.CMACRO_ENDIF):
msg = "Unexpected #endif"
raise self.make_syntax_error(msg)
elif tkn := self.expect(lx.CMACRO_OTHER):
return SimpleStmt([tkn])
elif tkn := self.expect(lx.SWITCH):
msg = "switch statements are not supported due to their complex flow control. Sorry."
Expand All @@ -678,6 +688,24 @@ def if_stmt(self, if_: lx.Token) -> IfStmt:
else_body = self.block()
return IfStmt(if_, condition, body, else_, else_body)


def macro_if(self, cond: lx.Token) -> IfStmt:
else_ = None
body: list[Stmt] = []
else_body: list[Stmt] | None = None
part = body
while True:
if tkn := self.expect(lx.CMACRO_ENDIF):
return MacroIfStmt(cond, body, else_, else_body, tkn)
elif tkn := self.expect(lx.CMACRO_ELSE):
if part is else_body:
raise self.make_syntax_error("Multiple #else")
else_ = tkn
else_body = []
part = else_body
else:
part.append(self.stmt())

def for_stmt(self, for_: lx.Token) -> ForStmt:
lparen = self.require(lx.LPAREN)
header = [lparen] + self.consume_to(lx.RPAREN)
Expand All @@ -693,6 +721,7 @@ def while_stmt(self, while_: lx.Token) -> WhileStmt:

if __name__ == "__main__":
import sys
import pprint

if sys.argv[1:]:
filename = sys.argv[1]
Expand All @@ -710,5 +739,5 @@ def while_stmt(self, while_: lx.Token) -> WhileStmt:
filename = "<default>"
src = "if (x) { x.foo; // comment\n}"
parser = Parser(src, filename)
x = parser.definition()
print(x)
while node := parser.definition():
pprint.pprint(node)
0