1
+ # C expression parser
2
+
1
3
from __future__ import annotations
2
4
3
5
from dataclasses import dataclass , field
4
- from typing import NamedTuple , Callable
6
+ from typing import NamedTuple , Callable , TypeVar
5
7
6
8
import lexer as lx
7
- from plexer import Lexer
9
+ from plexer import PLexer
10
+
8
11
Token = lx .Token
9
12
10
13
11
- def contextual (func : Callable [[Parser ], Node | None ]):
14
+ T = TypeVar ("T" , bound = "EParser" )
15
+ def contextual (func : Callable [[T ], Node | None ]) -> Callable [[T ], Node | None ]:
12
16
# Decorator to wrap grammar methods.
13
17
# Resets position if `func` returns None.
14
- def contextual_wrapper (self : Parser ) -> Node | None :
18
+ def contextual_wrapper (self : T ) -> Node | None :
15
19
begin = self .getpos ()
16
20
res = func (self )
17
21
if res is None :
@@ -24,7 +28,7 @@ def contextual_wrapper(self: Parser) -> Node|None:
24
28
25
29
26
30
class Context (NamedTuple ):
27
- owner : Lexer
31
+ owner : PLexer
28
32
begin : int
29
33
end : int
30
34
@@ -36,70 +40,14 @@ class Node:
36
40
@property
37
41
def text (self ) -> str :
38
42
context = self .context
43
+ if not context :
44
+ return ""
39
45
tokens = context .owner .tokens
40
46
begin = context .begin
41
47
end = context .end
42
48
return lx .to_text (tokens [begin :end ])
43
49
44
50
45
- @dataclass
46
- class Block (Node ):
47
- stmts : list [Node ]
48
-
49
-
50
- @dataclass
51
- class ForStmt (N
6D40
ode ):
52
- init : Node | None
53
- cond : Node | None
54
- next : Node | None
55
- body : Node
56
-
57
-
58
- @dataclass
59
- class IfStmt (Node ):
60
- cond : Node
61
- body : Node
62
- orelse : Node | None
63
-
64
-
65
- @dataclass
66
- class WhileStmt (Node ):
67
- cond : Node
68
- body : Node
69
-
70
-
71
- @dataclass
72
- class BreakStmt ():
73
- pass
74
-
75
-
76
- @dataclass
77
- class ContinueStmt ():
78
- pass
79
-
80
-
81
- @dataclass
82
- class ReturnStmt (Node ):
83
- expr : Node | None
84
-
85
-
86
- @dataclass
87
- class GotoStmt (Node ):
88
- label : Token
89
-
90
-
91
- @dataclass
92
- class NullStmt (Node ):
93
- pass
94
-
95
-
96
- @dataclass
97
- class VarDecl (Node ):
98
- type : Token
99
- name : Token
100
- init : Node | None
101
-
102
-
103
51
@dataclass
104
52
class InfixOp (Node ):
105
53
left : Node
@@ -160,11 +108,11 @@ class Number(Node):
160
108
161
109
@property
162
110
def value (self ):
163
- v = self .tok .value
111
+ text = self .tok .text
164
112
try :
165
- return int (v )
113
+ return int (text )
166
114
except ValueError :
167
- return float (v )
115
+ return float (text )
168
116
169
117
170
118
@dataclass
@@ -230,136 +178,12 @@ def name(self):
230
178
}
231
179
232
180
233
- class Parser (Lexer ):
234
-
235
- @contextual
236
- def stmt (self ) -> Node | None :
237
- if self .eof ():
238
- return None
239
- kind = self .peek ().kind
240
- if kind == lx .LBRACE :
241
- return self .block ()
242
- if kind == lx .FOR :
243
- return self .for_stmt ()
244
- if kind == lx .IF :
245
- return self .if_stmt ()
246
- if kind == lx .WHILE :
247
- return self .while_stmt ()
248
- # TODO: switch
249
- if kind == lx .BREAK :
250
- self .next ()
251
- self .require (lx .SEMI )
252
- return BreakStmt ()
253
- if kind == lx .CONTINUE :
254
- self .next ()
255
- self .require (lx .SEMI )
256
- return ContinueStmt ()
257
- if kind == lx .RETURN :
258
- self .next ()
259
- expr = self .expr () # May be None
260
- self .require (lx .SEMI )
261
- return ReturnStmt (expr )
262
- if kind == lx .GOTO :
263
- self .next ()
264
- label = self .require (lx .IDENTIFIER )
265
- self .require (lx .SEMI )
266
- return GotoStmt (label )
267
- # TODO: switch, case, default, label
268
- if kind == lx .SEMI :
269
- return self .empty_stmt ()
270
- if decl := self .declaration ():
271
- return decl
272
- return self .expr_stmt ()
273
-
274
- @contextual
275
- def block (self ):
276
- if self .expect (lx .LBRACE ):
277
- stmts = []
278
- while s := self .stmt ():
279
- stmts .append (s )
280
- if not self .expect (lx .RBRACE ):
281
- raise self .make_syntax_error ("Expected '}'" )
282
- return Block (stmts )
283
-
284
- @contextual
285
- def for_stmt (self ):
286
- if self .expect (lx .FOR ):
287
- self .require (lx .LPAREN )
288
- init = self .expr ()
289
- self .require (lx .SEMI )
290
- cond = self .expr ()
291
- self .require (lx .SEMI )
292
- next = self .expr ()
293
- self .require (lx .RPAREN )
294
- body = self .stmt ()
295
- if not body :
296
- raise self .make_syntax_error ("Expected statement" )
297
- return ForStmt (init , cond , next , body )
298
-
299
- @contextual
300
- def if_stmt (self ):
301
- if self .expect (lx .IF ):
302
- self .require (lx .LPAREN )
303
- cond = self .expr ()
304
- if not cond :
305
- raise self .make_syntax_error ("Expected expression" )
306
- self .require (lx .RPAREN )
307
- body = self .stmt ()
308
- if not body :
309
- raise self .make_syntax_error ("Expected statement" )
310
- orelse = None
311
- if self .expect (lx .ELSE ):
312
- orelse = self .stmt ()
313
- if not orelse :
314
- raise self .make_syntax_error ("Expected statement" )
315
- return IfStmt (cond , body , orelse )
316
-
317
- @contextual
318
- def while_stmt (self ):
319
- if self .expect (lx .WHILE ):
320
- self .require (lx .LPAREN )
321
- cond = self .expr ()
322
- if not cond :
323
- raise self .make_syntax_error ("Expected expression" )
324
- self .require (lx .RPAREN )
325
- body = self .stmt ()
326
- if not body :
327
- raise self .make_syntax_error ("Expected statement" )
328
- return WhileStmt (cond , body )
329
-
330
- @contextual
331
- def empty_stmt (self ):
332
- if self .expect (lx .SEMI ):
333
- return NullStmt ()
334
-
335
- @contextual
336
- def expr_stmt (self ):
337
- if expr := self .
F438
expr ():
338
- self .require (lx .SEMI )
339
- return expr
340
-
341
- def declaration (self ):
342
- tok = self .peek ()
343
- if not tok :
344
- return None
345
- # TODO: Do it for real
346
- if tok .kind in (lx .INT , lx .CHAR , lx .FLOAT , lx .DOUBLE ):
347
- type = self .next ()
348
- name = self .require (lx .IDENTIFIER )
349
- if self .expect (lx .EQUALS ):
350
- init = self .expr ()
351
- if not init :
352
- raise self .make_syntax_error ("Expected initialization expression" )
353
- else :
354
- init = None
355
- self .require (lx .SEMI )
356
- return VarDecl (type , name , init )
357
-
181
+ class EParser (PLexer ):
358
182
359
183
@contextual
360
184
def expr (self ) -> Node | None :
361
185
# TODO: All the other forms of expressions
362
- things = []
186
+ things : list [ Node | Token ] = [] # TODO: list[tuple[Token|None, Node] ]
363
187
if not (term := self .full_term ()):
364
188
return None
365
189
things .append (term )
@@ -381,15 +205,19 @@ def expr(self) -> Node | None:
381
205
def full_term (self ) -> Node | None :
382
206
tok = self .peek ()
383
207
if tok and tok .kind in PREFIX_OPS :
384
- return PrefixOp (self .next (), self .full_term ())
208
+ self .next ()
209
+ term = self .full_term ()
210
+ if not term :
211
+ raise self .make_syntax_error (f"Expected term following { tok } " )
212
+ return PrefixOp (tok , term )
385
213
# TODO: SIZEOF
386
214
if cast := self .cast ():
387
215
return cast
388
216
term = self .term ()
389
217
if not term :
390
218
return None
391
219
if self .expect (lx .LPAREN ):
392
- args = []
220
+ args : list [ Node ] = []
393
221
while arg := self .expr ():
394
222
args .append (arg )
395
223
if not self .expect (lx .COMMA ):
@@ -398,6 +226,8 @@ def full_term(self) -> Node | None:
398
226
return Call (term , args )
399
227
if self .expect (lx .LBRACKET ):
400
228
index = self .expr ()
229
+ if not index :
230
+ raise self .make_syntax_error ("Expected index expression" )
401
231
self .require (lx .RBRACKET )
402
232
return Index (term , index )
403
233
if self .expect (lx .PERIOD ):
@@ -421,9 +251,10 @@ def cast(self):
421
251
422
252
@contextual
423
253
def type (self ):
424
- tok = self .peek ()
425
- if tok .kind in (lx .INT , lx .CHAR , lx .FLOAT , lx .DOUBLE ):
254
+ token = self .peek ()
255
+ if token and token .kind in (lx .INT , lx .CHAR , lx .FLOAT , lx .DOUBLE ):
426
256
type = self .next ()
257
+ assert type
427
258
stars = 0
428
259
while self .expect (lx .TIMES ):
429
260
stars += 1
@@ -432,6 +263,8 @@ def type(self):
432
263
@contextual
433
264
def term (self ) -> Node | None :
434
265
token = self .next ()
266
+ if not token :
267
+ return None
435
268
if token .kind == lx .NUMBER :
436
269
return Number (token )
437
270
if token .kind == lx .IDENTIFIER :
@@ -440,7 +273,6 @@ def term(self) -> Node | None:
440
273
expr = self .expr ()
441
274
self .require (lx .RPAREN )
442
275
return expr
443
- self .backup ()
444
276
return None
445
277
446
278
def infix_op (self ) -> Token | None :
@@ -467,14 +299,16 @@ def infix_op(self) -> Token | None:
467
299
else :
468
300
filename = None
469
301
src = "if (x) { x.foo; // comment\n }"
470
- p = Parser (src , filename )
471
- x = p .stmt ()
302
+ p = EParser (src , filename )
303
+ x = p .expr ()
472
304
assert x , p .getpos ()
473
305
if x .text .rstrip () != src .rstrip ():
474
306
print ("=== src ===" )
475
307
print (src )
476
308
print ("=== text ===" )
477
309
print (x .text )
310
+ print ("=== data ===" )
311
+ print (x )
478
312
print ("=== === ===" )
479
313
print ("FAIL" )
480
314
else :
0 commit comments