8000 closes bpo-39721: Fix constness of members of tok_state struct. (GH-1… · python/cpython@384f3c5 · GitHub
[go: up one dir, main page]

Skip to content

Commit 384f3c5

Browse files
authored
closes bpo-39721: Fix constness of members of tok_state struct. (GH-18600)
The function PyTokenizer_FromUTF8 from Parser/tokenizer.c had a comment: /* XXX: constify members. */ This patch addresses that. In the tok_state struct: * end and start were non-const but could be made const * str and input were const but should have been non-const Changes to support this include: * decode_str() now returns a char * since it is allocated. * PyTokenizer_FromString() and PyTokenizer_FromUTF8() each creates a new char * for an allocate string instead of reusing the input const char *. * PyTokenizer_Get() and tok_get() now take const char ** arguments. * Various local vars are const or non-const accordingly. I was able to remove five casts that cast away constness.
1 parent 766b754 commit 384f3c5

File tree

3 files changed

+37
-27
lines changed

3 files changed

+37
-27
lines changed

Parser/parsetok.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
240240
#endif
241241

242242
for (;;) {
243-
char *a, *b;
243+
const char *a, *b;
244244
int type;
245245
size_t len;
246246
char *str;
@@ -371,7 +371,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
371371
buffer after parsing. Trailing whitespace and comments
372372
are OK. */
373373
if (err_ret->error == E_DONE && start == single_input) {
374-
char *cur = tok->cur;
374+
const char *cur = tok->cur;
375375
char c = *tok->cur;
376376

377377
for (;;) {

Parser/tokenizer.c

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,9 @@ tok_new(void)
5959
sizeof(struct tok_state));
6060
if (tok == NULL)
6161
return NULL;
62-
tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
62+
tok->buf = tok->cur = tok->inp = NULL;
63+
tok->start = NULL;
64+
tok->end = NULL;
6365
tok->done = E_OK;
6466
tok->fp = NULL;
6567
tok->input = NULL;
@@ -111,7 +113,9 @@ error_ret(struct tok_state *tok) /* XXX */
111113
tok->decoding_erred = 1;
112114
if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */
113115
PyMem_FREE(tok->buf);
114-
tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
116+
tok->buf = tok->cur = tok->inp = NULL;
117+
tok->start = NULL;
118+
tok->end = NULL;
115119
tok->done = E_DECODE;
116120
return NULL; /* as if it were EOF */
117121
}
@@ -664,11 +668,11 @@ translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
664668
Look for encoding declarations inside STR, and record them
665669
inside TOK. */
666670

667-
static const char *
671+
static char *
668672
decode_str(const char *input, int single, struct tok_state *tok)
669673
{
670674
PyObject* utf8 = NULL;
671-
const char *str;
675+
char *str;
672676
const char *s;
673677
const char *newl[2] = {NULL, NULL};
674678
int lineno = 0;
@@ -726,43 +730,46 @@ struct tok_state *
726730
PyTokenizer_FromString(const char *str, int exec_input)
727731
{
728732
struct tok_state *tok = tok_new();
733+
char *decoded;
734+
729735
if (tok == NULL)
730736
return NULL;
731-
str = decode_str(str, exec_input, tok);
732-
if (str == NULL) {
737+
decoded = decode_str(str, exec_input, tok);
738+
if (decoded == NULL) {
733739
PyTokenizer_Free(tok);
734740
return NULL;
735741
}
736742

737-
/* XXX: constify members. */
738-
tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
743+
tok->buf = tok->cur = tok->inp = decoded;
744+
tok->end = decoded;
739745
return tok;
740746
}
741747

742748
struct tok_state *
743749
PyTokenizer_FromUTF8(const char *str, int exec_input)
744750
{
745751
struct tok_state *tok = tok_new();
752+
char *translated;
746753
if (tok == NULL)
747754
return NULL;
748-
tok->input = str = translate_newlines(str, exec_input, tok);
749-
if (str == NULL) {
755+
tok->input = translated = translate_newlines(str, exec_input, tok);
756+
if (translated == NULL) {
750757
PyTokenizer_Free(tok);
751758
return NULL;
752759
}
753760
tok->decoding_state = STATE_RAW;
754761
tok->read_coding_spec = 1;
755762
tok->enc = NULL;
756-
tok->str = str;
763+
tok->str = translated;
757764
tok->encoding = (char *)PyMem_MALLOC(6);
758765
if (!tok->encoding) {
759766
PyTokenizer_Free(tok);
760767
return NULL;
761768
}
762769
strcpy(tok->encoding, "utf-8");
763770

764-
/* XXX: constify members. */
765-
tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
771+
tok->buf = tok->cur = tok->inp = translated;
772+
tok->end = translated;
766773
return tok;
767774
}
768775

@@ -812,7 +819,7 @@ PyTokenizer_Free(struct tok_state *tok)
812819
if (tok->fp != NULL && tok->buf != NULL)
813820
PyMem_FREE(tok->buf);
814821
if (tok->input)
815-
PyMem_FREE((char *)tok->input);
822+
PyMem_FREE(tok->input);
816823
PyMem_FREE(tok);
817824
}
818825

@@ -1138,7 +1145,7 @@ tok_decimal_tail(struct tok_state *tok)
11381145
/* Get next token, after space stripping etc. */
11391146

11401147
static int
1141-
tok_get(struct tok_state *tok, char **p_start, char **p_end)
1148+
tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
11421149
{
11431150
int c;
11441151
int blankline, nonascii;
@@ -1321,7 +1328,7 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
13211328
&& ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
13221329

13231330
if (is_type_ignore) {
1324-
*p_start = (char *) ignore_end;
1331+
*p_start = ignore_end;
13251332
*p_end = tok->cur;
13261333

13271334
/* If this type ignore is the only thing on the line, consume the newline also. */
@@ -1331,7 +1338,7 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
13311338
}
13321339
return TYPE_IGNORE;
13331340
} else {
1334-
*p_start = (char *) type_start; /* after type_comment_prefix */
1341+
*p_start = type_start; /* after type_comment_prefix */
13351342
*p_end = tok->cur;
13361343
return TYPE_COMMENT;
13371344
}
@@ -1410,7 +1417,8 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
14101417
Look ahead one token to see if that is 'def'. */
14111418

14121419
struct tok_state ahead_tok;
1413-
char *ahead_tok_start = NULL, *ahead_tok_end = NULL;
1420+
const char *ahead_tok_start = NULL;
1421+
const char *ahead_tok_end = NULL;
14141422
int ahead_tok_kind;
14151423

14161424
memcpy(&ahead_tok, tok, sizeof(ahead_tok));
@@ -1798,7 +1806,7 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
17981806
}
17991807

18001808
int
1801-
PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
1809+
PyTokenizer_Get(struct tok_state *tok, const char **p_start, const char **p_end)
18021810
{
18031811
int result = tok_get(tok, p_start, p_end);
18041812
if (tok->decoding_erred) {
@@ -1823,7 +1831,9 @@ PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
18231831
{
18241832
struct tok_state *tok;
18251833
FILE *fp;
1826-
char *p_start =NULL , *p_end =NULL , *encoding = NULL;
1834+
const char *p_start = NULL;
1835+
const char *p_end = NULL;
1836+
char *encoding = NULL;
18271837

18281838
fd = _Py_dup(fd);
18291839
if (fd < 0) {

Parser/tokenizer.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ struct tok_state {
2626
char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL */
2727
char *cur; /* Next character in buffer */
2828
char *inp; /* End of data in buffer */
29-
char *end; /* End of input buffer if buf != NULL */
30-
char *start; /* Start of current token if not NULL */
29+
const char *end; /* End of input buffer if buf != NULL */
30+
const char *start; /* Start of current token if not NULL */
3131
int done; /* E_OK normally, E_EOF at EOF, otherwise error code */
3232
/* NB If done != E_OK, cur must be == inp!!! */
3333
FILE *fp; /* Rest of input; NULL if tokenizing a string */
@@ -60,8 +60,8 @@ struct tok_state {
6060
PyObject *decoding_readline; /* open(...).readline */
6161
PyObject *decoding_buffer;
6262
const char* enc; /* Encoding for the current str. */
63-
const char* str;
64-
const char* input; /* Tokenizer's newline translated copy of the string. */
63+
char* str;
64+
char* input; /* Tokenizer's newline translated copy of the string. */
6565

6666
int type_comments; /* Whether to look for type comments */
6767

@@ -78,7 +78,7 @@ extern struct tok_state *PyTokenizer_FromUTF8(const char *, int);
7878
extern struct tok_state *PyTokenizer_FromFile(FILE *, const char*,
7979
const char *, const char *);
8080
extern void PyTokenizer_Free(struct tok_state *);
81-
extern int PyTokenizer_Get(struct tok_state *, char **, char **);
81+
extern int PyTokenizer_Get(struct tok_state *, const char **, const char **);
8282

8383
#define tok_dump _Py_tok_dump
8484

0 commit comments

Comments
 (0)
0