8
8
static int
9
9
init_normalization (Parser * p )
10
10
{
11
+ if (p -> normalize ) {
12
+ return 1 ;
13
+ }
11
14
PyObject * m = PyImport_ImportModuleNoBlock ("unicodedata" );
12
15
if (!m )
13
16
{
@@ -36,7 +39,7 @@ _PyPegen_new_identifier(Parser *p, char *n)
36
39
if (!PyUnicode_IS_ASCII (id ))
37
40
{
38
41
PyObject * id2 ;
39
- if (!p -> normalize && ! init_normalization (p ))
42
+ if (!init_normalization (p ))
40
43
{
41
44
Py_DECREF (id );
42
45
goto error ;
@@ -88,6 +91,9 @@ static inline Py_ssize_t
88
91
byte_offset_to_character_offset (PyObject * line , int col_offset )
89
92
{
90
93
const char * str = PyUnicode_AsUTF8 (line );
94
+ if (!str ) {
95
+ return 0 ;
96
+ }
91
97
PyObject * text = PyUnicode_DecodeUTF8 (str , col_offset , NULL );
92
98
if (!text ) {
93
99
return 0 ;
@@ -171,9 +177,10 @@ _PyPegen_get_expr_name(expr_ty e)
171
177
}
172
178
}
173
179
174
- static void
180
+ static int
175
181
raise_decode_error (Parser * p )
176
182
{
183
+ assert (PyErr_Occurred ());
177
184
const char * errtype = NULL ;
178
185
if (PyErr_ExceptionMatches (PyExc_UnicodeError )) {
179
186
errtype = "unicode error" ;
@@ -197,6 +204,8 @@ raise_decode_error(Parser *p)
197
204
Py_XDECREF (value );
198
205
Py_XDECREF (tback );
199
206
}
207
+
208
+ return -1 ;
200
209
}
201
210
202
211
static void
@@ -207,27 +216,33 @@ raise_tokenizer_init_error(PyObject *filename)
207
216
|| PyErr_ExceptionMatches (PyExc_UnicodeDecodeError ))) {
208
217
return ;
209
218
}
210
- PyObject * type , * value , * tback , * errstr ;
219
+ PyObject * errstr = NULL ;
220
+ PyObject * tuple = NULL ;
221
+ PyObject * type , * value , * tback ;
211
222
PyErr_Fetch (& type , & value , & tback );
212
223
errstr = PyObject_Str (value );
224
+ if (!errstr ) {
225
+ goto error ;
226
+ }
213
227
214
- Py_INCREF (Py_None );
215
- PyObject * tmp = Py_BuildValu
10000
e ("(OiiN)" , filename , 0 , -1 , Py_None );
228
+ PyObject * tmp = Py_BuildValue ("(OiiO)" , filename , 0 , -1 , Py_None );
216
229
if (!tmp ) {
217
230
goto error ;
218
231
}
219
232
220
- value = PyTuple_Pack (2 , errstr , tmp );
233
+ tuple = PyTuple_Pack (2 , errstr , tmp );
221
234
Py_DECREF (tmp );
222
235
if (!value ) {
223
236
goto error ;
224
237
}
225
- PyErr_SetObject (PyExc_SyntaxError , value );
238
+ PyErr_SetObject (PyExc_SyntaxError , tuple );
226
239
227
240
error :
228
241
Py_XDECREF (type );
229
242
Py_XDECREF (value );
230
243
Py_XDECREF (tback );
244
+ Py_XDECREF (errstr );
245
+ Py_XDECREF (tuple );
231
246
}
232
247
233
248
static inline PyObject *
@@ -337,9 +352,6 @@ tokenizer_error(Parser *p)
337
352
errtype = PyExc_IndentationError ;
338
353
msg = "too many levels of indentation" ;
339
354
break ;
340
- case E_DECODE :
341
- raise_decode_error (p );
342
- return -1 ;
343
355
case E_LINECONT :
344
356
msg = "unexpected character after line continuation character" ;
345
357
break ;
@@ -513,7 +525,12 @@ _PyPegen_fill_token(Parser *p)
513
525
const char * start , * end ;
514
526
int type = PyTokenizer_Get (p -> tok , & start , & end );
515
527
if (type == ERRORTOKEN ) {
516
- return tokenizer_error (p );
528
+ if (p -> tok -> done == E_DECODE ) {
529
+ return raise_decode_error (p );
530
+ }
531
+ else {
532
+ return tokenizer_error (p );
533
+ }
517
534
}
518
535
if (type == ENDMARKER && p -> start_rule == Py_single_input && p -> parsing_started ) {
519
536
type = NEWLINE ; /* Add an extra newline */
@@ -530,13 +547,21 @@ _PyPegen_fill_token(Parser *p)
530
547
531
548
if (p -> fill == p -> size ) {
532
549
int newsize = p -> size * 2 ;
533
- p -> tokens = PyMem_Realloc (p -> tokens , newsize * sizeof (Token * ));
534
- if (p -> tokens == NULL ) {
535
- PyErr_Format ( PyExc_MemoryError , "Realloc tokens failed" );
550
+ Token * * new_tokens = PyMem_Realloc (p -> tokens , newsize * sizeof (Token * ));
551
+ if (new_tokens == NULL ) {
552
+ PyErr_NoMemory ( );
536
553
return -1 ;
537
554
}
555
+ else {
556
+ p -> tokens = new_tokens ;
557
+ }
538
558
for (int i = p -> size ; i < newsize ; i ++ ) {
539
559
p -> tokens [i ] = PyMem_Malloc (sizeof (Token ));
560
+ if (p -> tokens [i ] == NULL ) {
561
+ p -> size = i ; // Needed, in order to cleanup correctly after parser fails
562
+ PyErr_NoMemory ();
563
+ return -1 ;
564
+ }
540
565
memset (p -> tokens [i ], '\0' , sizeof (Token ));
541
566
}
542
567
p -> size = newsize ;
@@ -566,8 +591,6 @@ _PyPegen_fill_token(Parser *p)
566
591
t -> end_lineno = p -> starting_lineno + end_lineno ;
567
592
t -> end_col_offset = p -> tok -> lineno == 1 ? p -> starting_col_offset + end_col_offset : end_col_offset ;
568
593
569
- // if (p->fill % 100 == 0) fprintf(stderr, "Filled at %d: %s \"%s\"\n", p->fill,
570
- // token_name(type), PyBytes_AsString(t->bytes));
571
594
p -> fill += 1 ;
572
595
return 0 ;
573
596
}
@@ -614,6 +637,7 @@ _PyPegen_is_memoized(Parser *p, int type, void *pres)
614
637
{
615
638
if (p -> mark == p -> fill ) {
616
639
if (_PyPegen_fill_token (p ) < 0 ) {
640
+ p -> error_indicator = 1 ;
617
641
return -1 ;
618
642
}
619
643
}
@@ -632,11 +656,9 @@ _PyPegen_is_memoized(Parser *p, int type, void *pres)
632
656
}
633
657
p -> mark = m -> mark ;
634
658
* (void * * )(pres ) = m -> node ;
635
- // fprintf(stderr, "%d < %d: memoized!\n", p->mark, p->fill);
636
659
return 1 ;
637
660
}
638
661
}
639
- // fprintf(stderr, "%d < %d: not memoized\n", p->mark, p->fill);
640
662
return 0 ;
641
663
}
642
664
@@ -683,18 +705,15 @@ _PyPegen_expect_token(Parser *p, int type)
683
705
{
684
706
if (p -> mark == p -> fill ) {
685
707
if (_PyPegen_fill_token (p ) < 0 ) {
708
+ p -> error_indicator = 1 ;
686
709
return NULL ;
687
710
}
688
711
}
689
712
Token * t = p -> tokens [p -> mark ];
690
713
if (t -> type != type ) {
691
- // fprintf(stderr, "No %s at %d\n", token_name(type), p->mark);
692
714
return NULL ;
693
715
}
694
716
p -> mark += 1 ;
695
- // fprintf(stderr, "Got %s at %d: %s\n", token_name(type), p->mark,
696
- // PyBytes_AsString(t->bytes));
697
-
698
717
return t ;
699
718
}
700
719
@@ -888,8 +907,7 @@ _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int *errcode, PyArena
888
907
{
889
908
Parser * p = PyMem_Malloc (sizeof (Parser ));
890
909
if (p == NULL ) {
891
- PyErr_Format (PyExc_MemoryError , "Out of memory for Parser" );
892
- return NULL ;
910
+ return (Parser * ) PyErr_NoMemory ();
893
911
}
894
912
assert (tok != NULL );
895
913
p -> tok = tok ;
@@ -898,10 +916,14 @@ _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int *errcode, PyArena
898
916
p -> tokens = PyMem_Malloc (sizeof (Token * ));
899
917
if (!p -> tokens ) {
900
918
PyMem_Free (p );
901
- PyErr_Format (PyExc_MemoryError , "Out of memory for tokens" );
902
- return NULL ;
919
+ return (Parser * ) PyErr_NoMemory ();
903
920
}
904
921
p -> tokens [0 ] = PyMem_Malloc (sizeof (Token ));
922
+ if (!p -> tokens ) {
923
+ PyMem_Free (p -> tokens );
924
+ PyMem_Free (p );
925
+ return (Parser * ) PyErr_NoMemory ();
926
+ }
905
927
memset (p -> tokens [0 ], '\0' , sizeof (Token ));
906
928
p -> mark = 0 ;
907
929
p -> fill = 0 ;
@@ -1187,7 +1209,7 @@ _PyPegen_seq_count_dots(asdl_seq *seq)
1187
1209
number_of_dots += 1 ;
1188
1210
break ;
1189
1211
default :
1190
- assert ( current_expr -> type == ELLIPSIS || current_expr -> type == DOT );
1212
+ Py_UNREACHABLE ( );
1191
1213
}
1192
1214
}
1193
1215
0 commit comments