8000 Split cparser into sparser and eparser · python/cpython@17466fd · GitHub
[go: up one dir, main page]

Skip to content

Commit 17466fd

Browse files
committed
Split cparser into sparser and eparser
1 parent 1869a8b commit 17466fd

File tree

3 files changed

+264
-203
lines changed

3 files changed

+264
-203
lines changed

Tools/cases_generator/cparser.py renamed to Tools/cases_generator/eparser.py

Lines changed: 33 additions & 199 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,21 @@
1+
# C expression parser
2+
13
from __future__ import annotations
24

35
from dataclasses import dataclass, field
4-
from typing import NamedTuple, Callable
6+
from typing import NamedTuple, Callable, TypeVar
57

68
import lexer as lx
7-
from plexer import Lexer
9+
from plexer import PLexer
10+
811
Token = lx.Token
912

1013

11-
def contextual(func: Callable[[Parser], Node|None]):
14+
T = TypeVar("T", bound="EParser")
15+
def contextual(func: Callable[[T], Node|None]) -> Callable[[T], Node|None]:
1216
# Decorator to wrap grammar methods.
1317
# Resets position if `func` returns None.
14-
def contextual_wrapper(self: Parser) -> Node|None:
18+
def contextual_wrapper(self: T) -> Node|None:
1519
begin = self.getpos()
1620
res = func(self)
1721
if res is None:
@@ -24,7 +28,7 @@ def contextual_wrapper(self: Parser) -> Node|None:
2428

2529

2630
class Context(NamedTuple):
27-
owner: Lexer
31+
owner: PLexer
2832
begin: int
2933
end: int
3034

@@ -36,70 +40,14 @@ class Node:
3640
@property
3741
def text(self) -> str:
3842
context = self.context
43+
if not context:
44+
return ""
3945
tokens = context.owner.tokens
4046
begin = context.begin
4147
end = context.end
4248
return lx.to_text(tokens[begin:end])
4349

4450

45-
@dataclass
46-
class Block(Node):
47-
stmts: list[Node]
48-
49-
50-
@dataclass
51-
class ForStmt(N 6D40 ode):
52-
init: Node | None
53-
cond: Node | None
54-
next: Node | None
55-
body: Node
56-
57-
58-
@dataclass
59-
class IfStmt(Node):
60-
cond: Node
61-
body: Node
62-
orelse: Node | None
63-
64-
65-
@dataclass
66-
class WhileStmt(Node):
67-
cond: Node
68-
body: Node
69-
70-
71-
@dataclass
72-
class BreakStmt():
73-
pass
74-
75-
76-
@dataclass
77-
class ContinueStmt():
78-
pass
79-
80-
81-
@dataclass
82-
class ReturnStmt(Node):
83-
expr: Node | None
84-
85-
86-
@dataclass
87-
class GotoStmt(Node):
88-
label: Token
89-
90-
91-
@dataclass
92-
class NullStmt(Node):
93-
pass
94-
95-
96-
@dataclass
97-
class VarDecl(Node):
98-
type: Token
99-
name: Token
100-
init: Node | None
101-
102-
10351
@dataclass
10452
class InfixOp(Node):
10553
left: Node
@@ -160,11 +108,11 @@ class Number(Node):
160108

161109
@property
162110
def value(self):
163-
v = self.tok.value
111+
text = self.tok.text
164112
try:
165-
return int(v)
113+
return int(text)
166114
except ValueError:
167-
return float(v)
115+
return float(text)
168116

169117

170118
@dataclass
@@ -230,136 +178,12 @@ def name(self):
230178
}
231179

232180

233-
class Parser(Lexer):
234-
235-
@contextual
236-
def stmt(self) -> Node | None:
237-
if self.eof():
238-
return None
239-
kind = self.peek().kind
240-
if kind == lx.LBRACE:
241-
return self.block()
242-
if kind == lx.FOR:
243-
return self.for_stmt()
244-
if kind == lx.IF:
245-
return self.if_stmt()
246-
if kind == lx.WHILE:
247-
return self.while_stmt()
248-
# TODO: switch
249-
if kind == lx.BREAK:
250-
self.next()
251-
self.require(lx.SEMI)
252-
return BreakStmt()
253-
if kind == lx.CONTINUE:
254-
self.next()
255-
self.require(lx.SEMI)
256-
return ContinueStmt()
257-
if kind == lx.RETURN:
258-
self.next()
259-
expr = self.expr() # May be None
260-
self.require(lx.SEMI)
261-
return ReturnStmt(expr)
262-
if kind == lx.GOTO:
263-
self.next()
264-
label = self.require(lx.IDENTIFIER)
265-
self.require(lx.SEMI)
266-
return GotoStmt(label)
267-
# TODO: switch, case, default, label
268-
if kind == lx.SEMI:
269-
return self.empty_stmt()
270-
if decl := self.declaration():
271-
return decl
272-
return self.expr_stmt()
273-
274-
@contextual
275-
def block(self):
276-
if self.expect(lx.LBRACE):
277-
stmts = []
278-
while s := self.stmt():
279-
stmts.append(s)
280-
if not self.expect(lx.RBRACE):
281-
raise self.make_syntax_error("Expected '}'")
282-
return Block(stmts)
283-
284-
@contextual
285-
def for_stmt(self):
286-
if self.expect(lx.FOR):
287-
self.require(lx.LPAREN)
288-
init = self.expr()
289-
self.require(lx.SEMI)
290-
cond = self.expr()
291-
self.require(lx.SEMI)
292-
next = self.expr()
293-
self.require(lx.RPAREN)
294-
body = self.stmt()
295-
if not body:
296-
raise self.make_syntax_error("Expected statement")
297-
return ForStmt(init, cond, next, body)
298-
299-
@contextual
300-
def if_stmt(self):
301-
if self.expect(lx.IF):
302-
self.require(lx.LPAREN)
303-
cond = self.expr()
304-
if not cond:
305-
raise self.make_syntax_error("Expected expression")
306-
self.require(lx.RPAREN)
307-
body = self.stmt()
308-
if not body:
309-
raise self.make_syntax_error("Expected statement")
310-
orelse = None
311-
if self.expect(lx.ELSE):
312-
orelse = self.stmt()
313-
if not orelse:
314-
raise self.make_syntax_error("Expected statement")
315-
return IfStmt(cond, body, orelse)
316-
317-
@contextual
318-
def while_stmt(self):
319-
if self.expect(lx.WHILE):
320-
self.require(lx.LPAREN)
321-
cond = self.expr()
322-
if not cond:
323-
raise self.make_syntax_error("Expected expression")
324-
self.require(lx.RPAREN)
325-
body = self.stmt()
326-
if not body:
327-
raise self.make_syntax_error("Expected statement")
328-
return WhileStmt(cond, body)
329-
330-
@contextual
331-
def empty_stmt(self):
332-
if self.expect(lx.SEMI):
333-
return NullStmt()
334-
335-
@contextual
336-
def expr_stmt(self):
337-
if expr := self. F438 expr():
338-
self.require(lx.SEMI)
339-
return expr
340-
341-
def declaration(self):
342-
tok = self.peek()
343-
if not tok:
344-
return None
345-
# TODO: Do it for real
346-
if tok.kind in (lx.INT, lx.CHAR, lx.FLOAT, lx.DOUBLE):
347-
type = self.next()
348-
name = self.require(lx.IDENTIFIER)
349-
if self.expect(lx.EQUALS):
350-
init = self.expr()
351-
if not init:
352-
raise self.make_syntax_error("Expected initialization expression")
353-
else:
354-
init = None
355-
self.require(lx.SEMI)
356-
return VarDecl(type, name, init)
357-
181+
class EParser(PLexer):
358182

359183
@contextual
360184
def expr(self) -> Node | None:
361185
# TODO: All the other forms of expressions
362-
things = []
186+
things: list[Node|Token] = [] # TODO: list[tuple[Token|None, Node]]
363187
if not (term := self.full_term()):
364188
return None
365189
things.append(term)
@@ -381,15 +205,19 @@ def expr(self) -> Node | None:
381205
def full_term(self) -> Node | None:
382206
tok = self.peek()
383207
if tok and tok.kind in PREFIX_OPS:
384-
return PrefixOp(self.next(), self.full_term())
208+
self.next()
209+
term = self.full_term()
210+
if not term:
211+
raise self.make_syntax_error(f"Expected term following {tok}")
212+
return PrefixOp(tok, term)
385213
# TODO: SIZEOF
386214
if cast := self.cast():
387215
return cast
388216
term = self.term()
389217
if not term:
390218
return None
391219
if self.expect(lx.LPAREN):
392-
args = []
220+
args: list[Node] = []
393221
while arg := self.expr():
394222
args.append(arg)
395223
if not self.expect(lx.COMMA):
@@ -398,6 +226,8 @@ def full_term(self) -> Node | None:
398226
return Call(term, args)
399227
if self.expect(lx.LBRACKET):
400228
index = self.expr()
229+
if not index:
230+
raise self.make_syntax_error("Expected index expression")
401231
self.require(lx.RBRACKET)
402232
return Index(term, index)
403233
if self.expect(lx.PERIOD):
@@ -421,9 +251,10 @@ def cast(self):
421251

422252
@contextual
423253
def type(self):
424-
tok = self.peek()
425-
if tok.kind in (lx.INT, lx.CHAR, lx.FLOAT, lx.DOUBLE):
254+
token = self.peek()
255+
if token and token.kind in (lx.INT, lx.CHAR, lx.FLOAT, lx.DOUBLE):
426256
type = self.next()
257+
assert type
427258
stars = 0
428259
while self.expect(lx.TIMES):
429260
stars += 1
@@ -432,6 +263,8 @@ def type(self):
432263
@contextual
433264
def term(self) -> Node | None:
434265
token = self.next()
266+
if not token:
267+
return None
435268
if token.kind == lx.NUMBER:
436269
return Number(token)
437270
if token.kind == lx.IDENTIFIER:
@@ -440,7 +273,6 @@ def term(self) -> Node | None:
440273
expr = self.expr()
441274
self.require(lx.RPAREN)
442275
return expr
443-
self.backup()
444276
return None
445277

446278
def infix_op(self) -> Token | None:
@@ -467,14 +299,16 @@ def infix_op(self) -> Token | None:
467299
else:
468300
filename = None
469301
src = "if (x) { x.foo; // comment\n}"
470-
p = Parser(src, filename)
471-
x = p.stmt()
302+
p = EParser(src, filename)
303+
x = p.expr()
472304
assert x, p.getpos()
473305
if x.text.rstrip() != src.rstrip():
474306
print("=== src ===")
475307
print(src)
476308
print("=== text ===")
477309
print(x.text)
310+
print("=== data ===")
311+
print(x)
478312
print("=== === ===")
479313
print("FAIL")
480314
else:

Tools/cases_generator/plexer.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Token = lx.Token
33

44

5-
class Lexer:
5+
class PLexer:
66
def __init__(self, src: str, filename: str|None = None):
77
self.src = src
88
self.filename = filename
@@ -29,6 +29,7 @@ def backup(self) -> None:
2929

3030
def next(self, raw: bool = False) -> Token | None:
3131
# Return next token and advance position; None if at EOF
32+
# TODO: Return synthetic EOF token instead of None?
3233
while self.pos < len(self.tokens):
3334
tok = self.tokens[self.pos]
3435
self.pos += 1
@@ -52,9 +53,10 @@ def maybe(self, kind: str, raw: bool = False) -> Token | None:
5253
def expect(self, kind: str) -> Token | None:
5354
# Return next token and advance position if kind matches
5455
tkn = self.next()
55-
if tkn is not None and tkn.kind == kind:
56-
return tkn
57-
self.backup()
56+
if tkn is not None:
57+
if tkn.kind == kind:
58+
return tkn
59+
self.backup()
5860
return None
5961

6062
def require(self, kind: str) -> Token:

0 commit comments

Comments
 (0)
0