8000 bpo-40334: Improve various PEG-Parser related stuff (GH-19669) · python/cpython@ebebb64 · GitHub
[go: up one dir, main page]

Skip to content

Commit ebebb64

Browse files
authored
bpo-40334: Improve various PEG-Parser related stuff (GH-19669)
The changes in this commit are all related to @vstinner's original review comments of the initial PEP 617 implementation PR.
1 parent 9e6a131 commit ebebb64

File tree

7 files changed

+58
-34
lines changed

7 files changed

+58
-34
lines changed

Include/pegen_interface.h renamed to Include/internal/pegen_interface.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1-
#ifndef Py_LIMITED_API
21
#ifndef Py_PEGENINTERFACE
32
#define Py_PEGENINTERFACE
43
#ifdef __cplusplus
54
extern "C" {
65
#endif
76

7+
#ifndef Py_BUILD_CORE
8+
# error "this header requires Py_BUILD_CORE define"
9+
#endif
10+
811
#include "Python.h"
912
#include "Python-ast.h"
1013

@@ -29,4 +32,3 @@ PyAPI_FUNC(PyCodeObject *) PyPegen_CodeObjectFromFileObject(FILE *, PyObject *fi
2932
}
3033
#endif
3134
#endif /* !Py_PEGENINTERFACE*/
32-
#endif /* !Py_LIMITED_API */

Makefile.pre.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,7 @@ PEGEN_OBJS= \
304304

305305

306306
PEGEN_HEADERS= \
307-
$(srcdir)/Include/pegen_interface.h \
307+
$(srcdir)/Include/internal/pegen_interface.h \
308308
$(srcdir)/Parser/pegen/pegen.h \
309309
$(srcdir)/Parser/pegen/parse_string.h
310310

Modules/_peg_parser.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#include <Python.h>
2-
#include <pegen_interface.h>
2+
#include "pegen_interface.h"
33

44
PyObject *
55
_Py_parse_file(PyObject *self, PyObject *args, PyObject *kwds)

PCbuild/pythoncore.vcxproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@
161161
<ClInclude Include="..\Include\graminit.h" />
162162
<ClInclude Include="..\Include\grammar.h" />
163163
<ClInclude Include="..\Include\import.h" />
164+
<ClInclude Include="..\Include\internal\pegen_interface.h" />
164165
<ClInclude Include="..\Include\internal\pycore_abstract.h" />
165166
<ClInclude Include="..\Include\internal\pycore_accu.h" />
166167
<ClInclude Include=". 67E6 .\Include\internal\pycore_atomic.h" />
@@ -213,7 +214,6 @@
213214
<ClInclude Include="..\Include\parsetok.h" />
214215
<ClInclude Include="..\Include\patchlevel.h" />
215216
<ClInclude Include="..\Include\picklebufobject.h" />
216-
<ClInclude Include="..\Include\pegen_interface.h" />
217217
<ClInclude Include="..\Include\pyhash.h" />
218218
<ClInclude Include="..\Include\pyhash.h" />
219219
<ClInclude Include="..\Include\py_curses.h" />

Parser/pegen/peg_api.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#include <pegen_interface.h>
1+
#include "pegen_interface.h"
22

33
#include "../tokenizer.h"
44
#include "pegen.h"

Parser/pegen/pegen.c

Lines changed: 49 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
static int
99
init_normalization(Parser *p)
1010
{
11+
if (p->normalize) {
12+
return 1;
13+
}
1114
PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
1215
if (!m)
1316
{
@@ -36,7 +39,7 @@ _PyPegen_new_identifier(Parser *p, char *n)
3639
if (!PyUnicode_IS_ASCII(id))
3740
{
3841
PyObject *id2;
39-
if (!p->normalize && !init_normalization(p))
42+
if (!init_normalization(p))
4043
{
4144
Py_DECREF(id);
4245
goto error;
@@ -88,6 +91,9 @@ static inline Py_ssize_t
8891
byte_offset_to_character_offset(PyObject *line, int col_offset)
8992
{
9093
const char *str = PyUnicode_AsUTF8(line);
94+
if (!str) {
95+
return 0;
96+
}
9197
PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, NULL);
9298
if (!text) {
9399
return 0;
@@ -171,9 +177,10 @@ _PyPegen_get_expr_name(expr_ty e)
171177
}
172178
}
173179

174-
static void
180+
static int
175181
raise_decode_error(Parser *p)
176182
{
183+
assert(PyErr_Occurred());
177184
const char *errtype = NULL;
178185
if (PyErr_ExceptionMatches(PyExc_UnicodeError)) {
179186
errtype = "unicode error";
@@ -197,6 +204,8 @@ raise_decode_error(Parser *p)
197204
Py_XDECREF(value);
198205
Py_XDECREF(tback);
199206
}
207+
208+
return -1;
200209
}
201210

202211
static void
@@ -207,27 +216,33 @@ raise_tokenizer_init_error(PyObject *filename)
207216
|| PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))) {
208217
return;
209218
}
210-
PyObject *type, *value, *tback, *errstr;
219+
PyObject *errstr = NULL;
220+
PyObject *tuple = NULL;
221+
PyObject *type, *value, *tback;
211222
PyErr_Fetch(&type, &value, &tback);
212223
errstr = PyObject_Str(value);
224+
if (!errstr) {
225+
goto error;
226+
}
213227

214-
Py_INCREF(Py_None);
215-
PyObject *tmp = Py_BuildValu 10000 e("(OiiN)", filename, 0, -1, Py_None);
228+
PyObject *tmp = Py_BuildValue("(OiiO)", filename, 0, -1, Py_None);
216229
if (!tmp) {
217230
goto error;
218231
}
219232

220-
value = PyTuple_Pack(2, errstr, tmp);
233+
tuple = PyTuple_Pack(2, errstr, tmp);
221234
Py_DECREF(tmp);
222235
if (!value) {
223236
goto error;
224237
}
225-
PyErr_SetObject(PyExc_SyntaxError, value);
238+
PyErr_SetObject(PyExc_SyntaxError, tuple);
226239

227240
error:
228241
Py_XDECREF(type);
229242
Py_XDECREF(value);
230243
Py_XDECREF(tback);
244+
Py_XDECREF(errstr);
245+
Py_XDECREF(tuple);
231246
}
232247

233248
static inline PyObject *
@@ -337,9 +352,6 @@ tokenizer_error(Parser *p)
337352
errtype = PyExc_IndentationError;
338353
msg = "too many levels of indentation";
339354
break;
340-
case E_DECODE:
341-
raise_decode_error(p);
342-
return -1;
343355
case E_LINECONT:
344356
msg = "unexpected character after line continuation character";
345357
break;
@@ -513,7 +525,12 @@ _PyPegen_fill_token(Parser *p)
513525
const char *start, *end;
514526
int type = PyTokenizer_Get(p->tok, &start, &end);
515527
if (type == ERRORTOKEN) {
516-
return tokenizer_error(p);
528+
if (p->tok->done == E_DECODE) {
529+
return raise_decode_error(p);
530+
}
531+
else {
532+
return tokenizer_error(p);
533+
}
517534
}
518535
if (type == ENDMARKER && p->start_rule == Py_single_input && p->parsing_started) {
519536
type = NEWLINE; /* Add an extra newline */
@@ -530,13 +547,21 @@ _PyPegen_fill_token(Parser *p)
530547

531548
if (p->fill == p->size) {
532549
int newsize = p->size * 2;
533-
p->tokens = PyMem_Realloc(p->tokens, newsize * sizeof(Token *));
534-
if (p->tokens == NULL) {
535-
PyErr_Format(PyExc_MemoryError, "Realloc tokens failed");
550+
Token **new_tokens = PyMem_Realloc(p->tokens, newsize * sizeof(Token *));
551+
if (new_tokens == NULL) {
552+
PyErr_NoMemory();
536553
return -1;
537554
}
555+
else {
556+
p->tokens = new_tokens;
557+
}
538558
for (int i = p->size; i < newsize; i++) {
539559
p->tokens[i] = PyMem_Malloc(sizeof(Token));
560+
if (p->tokens[i] == NULL) {
561+
p->size = i; // Needed, in order to cleanup correctly after parser fails
562+
PyErr_NoMemory();
563+
return -1;
564+
}
540565
memset(p->tokens[i], '\0', sizeof(Token));
541566
}
542567
p->size = newsize;
@@ -566,8 +591,6 @@ _PyPegen_fill_token(Parser *p)
566591
t->end_lineno = p->starting_lineno + end_lineno;
567592
t->end_col_offset = p->tok->lineno == 1 ? p->starting_col_offset + end_col_offset : end_col_offset;
568593

569-
// if (p->fill % 100 == 0) fprintf(stderr, "Filled at %d: %s \"%s\"\n", p->fill,
570-
// token_name(type), PyBytes_AsString(t->bytes));
571594
p->fill += 1;
572595
return 0;
573596
}
@@ -614,6 +637,7 @@ _PyPegen_is_memoized(Parser *p, int type, void *pres)
614637
{
615638
if (p->mark == p->fill) {
616639
if (_PyPegen_fill_token(p) < 0) {
640+
p->error_indicator = 1;
617641
return -1;
618642
}
619643
}
@@ -632,11 +656,9 @@ _PyPegen_is_memoized(Parser *p, int type, void *pres)
632656
}
633657
p->mark = m->mark;
634658
*(void **)(pres) = m->node;
635-
// fprintf(stderr, "%d < %d: memoized!\n", p->mark, p->fill);
636659
return 1;
637660
}
638661
}
639-
// fprintf(stderr, "%d < %d: not memoized\n", p->mark, p->fill);
640662
return 0;
641663
}
642664

@@ -683,18 +705,15 @@ _PyPegen_expect_token(Parser *p, int type)
683705
{
684706
if (p->mark == p->fill) {
685707
if (_PyPegen_fill_token(p) < 0) {
708+
p->error_indicator = 1;
686709
return NULL;
687710
}
688711
}
689712
Token *t = p->tokens[p->mark];
690713
if (t->type != type) {
691-
// fprintf(stderr, "No %s at %d\n", token_name(type), p->mark);
692714
return NULL;
693715
}
694716
p->mark += 1;
695-
// fprintf(stderr, "Got %s at %d: %s\n", token_name(type), p->mark,
696-
// PyBytes_AsString(t->bytes));
697-
698717
return t;
699718
}
700719

@@ -888,8 +907,7 @@ _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int *errcode, PyArena
888907
{
889908
Parser *p = PyMem_Malloc(sizeof(Parser));
890909
if (p == NULL) {
891-
PyErr_Format(PyExc_MemoryError, "Out of memory for Parser");
892-
return NULL;
910+
return (Parser *) PyErr_NoMemory();
893911
}
894912
assert(tok != NULL);
895913
p->tok = tok;
@@ -898,10 +916,14 @@ _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int *errcode, PyArena
898916
p->tokens = PyMem_Malloc(sizeof(Token *));
899917
if (!p->tokens) {
900918
PyMem_Free(p);
901-
PyErr_Format(PyExc_MemoryError, "Out of memory for tokens");
902-
return NULL;
919+
return (Parser *) PyErr_NoMemory();
903920
}
904921
p->tokens[0] = PyMem_Malloc(sizeof(Token));
922+
if (!p->tokens) {
923+
PyMem_Free(p->tokens);
924+
PyMem_Free(p);
925+
return (Parser *) PyErr_NoMemory();
926+
}
905927
memset(p->tokens[0], '\0', sizeof(Token));
906928
p->mark = 0;
907929
p->fill = 0;
@@ -1187,7 +1209,7 @@ _PyPegen_seq_count_dots(asdl_seq *seq)
11871209
number_of_dots += 1;
11881210
break;
11891211
default:
1190-
assert(current_expr->type == ELLIPSIS || current_expr->type == DOT);
1212+
Py_UNREACHABLE();
11911213
}
11921214
}
11931215

Python/pythonrun.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
#include "ast.h" // PyAST_FromNodeObject()
3030
#include "marshal.h" // PyMarshal_ReadLongFromFile()
3131

32-
#include <pegen_interface.h> // PyPegen_ASTFrom*
32+
#include "pegen_interface.h" // PyPegen_ASTFrom*
3333

3434
#ifdef MS_WINDOWS
3535
# include "malloc.h" // alloca()

0 commit comments

Comments
 (0)
0