8000 bpo-39721: Fix constness of members of tok_state struct. by petdance · Pull Request #18600 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

bpo-39721: Fix constness of members of tok_state struct. #18600

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
8000
Diff view
Diff view
4 changes: 2 additions & 2 deletions Parser/parsetok.c
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
#endif

for (;;) {
char *a, *b;
const char *a, *b;
int type;
size_t len;
char *str;
Expand Down Expand Up @@ -371,7 +371,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
buffer after parsing. Trailing whitespace and comments
are OK. */
if (err_ret->error == E_DONE && start == single_input) {
char *cur = tok->cur;
const char *cur = tok->cur;
char c = *tok->cur;

for (;;) {
Expand Down
50 changes: 30 additions & 20 deletions Parser/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@ tok_new(void)
sizeof(struct tok_state));
if (tok == NULL)
return NULL;
tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
tok->buf = tok->cur = tok->inp = NULL;
tok->start = NULL;
tok->end = NULL;
tok->done = E_OK;
tok->fp = NULL;
tok->input = NULL;
Expand Down Expand Up @@ -111,7 +113,9 @@ error_ret(struct tok_state *tok) /* XXX */
tok->decoding_erred = 1;
if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */
PyMem_FREE(tok->buf);
tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
tok->buf = tok->cur = tok->inp = NULL;
tok->start = NULL;
tok->end = NULL;
tok->done = E_DECODE;
return NULL; /* as if it were EOF */
}
Expand Down Expand Up @@ -664,11 +668,11 @@ translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
Look for encoding declarations inside STR, and record them
inside TOK. */

static const char *
static char *
decode_str(const char *input, int single, struct tok_state *tok)
{
PyObject* utf8 = NULL;
const char *str;
char *str;
const char *s;
const char *newl[2] = {NULL, NULL};
int lineno = 0;
Expand Down Expand Up @@ -726,43 +730,46 @@ struct tok_state *
PyTokenizer_FromString(const char *str, int exec_input)
{
struct tok_state *tok = tok_new();
char *decoded;

if (tok == NULL)
return NULL;
str = decode_str(str, exec_input, tok);
if (str == NULL) {
decoded = decode_str(str, exec_input, tok);
if (decoded == NULL) {
PyTokenizer_Free(tok);
return NULL;
}

/* XXX: constify members. */
tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
tok->buf = tok->cur = tok->inp = decoded;
tok->end = decoded;
return tok;
}

struct tok_state *
PyTokenizer_FromUTF8(const char *str, int exec_input)
{
struct tok_state *tok = tok_new();
char *translated;
if (tok == NULL)
return NULL;
tok->input = str = translate_newlines(str, exec_input, tok);
if (str == NULL) {
tok->input = translated = translate_newlines(str, exec_input, tok);
if (translated == NULL) {
PyTokenizer_Free(tok);
return NULL;
}
tok->decoding_state = STATE_RAW;
tok->read_coding_spec = 1;
tok->enc = NULL;
tok->str = str;
tok->str = translated;
tok->encoding = (char *)PyMem_MALLOC(6);
if (!tok->encoding) {
PyTokenizer_Free(tok);
return NULL;
}
strcpy(tok->encoding, "utf-8");

/* XXX: constify members. */
tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
tok->buf = tok->cur = tok->inp = translated;
tok->end = translated;
return tok;
}

Expand Down Expand Up @@ -812,7 +819,7 @@ PyTokenizer_Free(struct tok_state *tok)
if (tok->fp != NULL && tok->buf != NULL)
PyMem_FREE(tok->buf);
if (tok->input)
PyMem_FREE((char *)tok->input);
PyMem_FREE(tok->input);
PyMem_FREE(tok);
}

Expand Down Expand Up @@ -1138,7 +1145,7 @@ tok_decimal_tail(struct tok_state *tok)
/* Get next token, after space stripping etc. */

static int
tok_get(struct tok_state *tok, char **p_start, char **p_end)
tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
{
int c;
int blankline, nonascii;
Expand Down Expand Up @@ -1321,7 +1328,7 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
&& ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));

if (is_type_ignore) {
*p_start = (char *) ignore_end;
*p_start = ignore_end;
*p_end = tok->cur;

/* If this type ignore is the only thing on the line, consume the newline also. */
Expand All @@ -1331,7 +1338,7 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
}
return TYPE_IGNORE;
} else {
*p_start = (char *) type_start; /* after type_comment_prefix */
*p_start = type_start; /* after type_comment_prefix */
*p_end = tok->cur;
return TYPE_COMMENT;
}
Expand Down Expand Up @@ -1410,7 +1417,8 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
Look ahead one token to see if that is 'def'. */

struct tok_state ahead_tok;
char *ahead_tok_start = NULL, *ahead_tok_end = NULL;
const char *ahead_tok_start = NULL;
const char *ahead_tok_end = NULL;
int ahead_tok_kind;

memcpy(&ahead_tok, tok, sizeof(ahead_tok));
Expand Down Expand Up @@ -1798,7 +1806,7 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
}

int
PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
PyTokenizer_Get(struct tok_state *tok, const char **p_start, const char **p_end)
{
int result = tok_get(tok, p_start, p_end);
if (tok->decoding_erred) {
Expand All @@ -1823,7 +1831,9 @@ PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
{
struct tok_state *tok;
FILE *fp;
char *p_start =NULL , *p_end =NULL , *encoding = NULL;
const char *p_start = NULL;
const char *p_end = NULL;
char *encoding = NULL;

fd = _Py_dup(fd);
if (fd < 0) {
Expand Down
10 changes: 5 additions & 5 deletions Parser/tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ struct tok_state {
char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL */
char *cur; /* Next character in buffer */
char *inp; /* End of data in buffer */
char *end; /* End of input buffer if buf != NULL */
char *start; /* Start of current token if not NULL */
const char *end; /* End of input buffer if buf != NULL */
const char *start; /* Start of current token if not NULL */
int done; /* E_OK normally, E_EOF at EOF, otherwise error code */
/* NB If done != E_OK, cur must be == inp!!! */
FILE *fp; /* Rest of input; NULL if tokenizing a string */
Expand Down Expand Up @@ -60,8 +60,8 @@ struct tok_state {
PyObject *decoding_readline; /* open(...).readline */
PyObject *decoding_buffer;
const char* enc; /* Encoding for the current str. */
const char* str;
const char* input; /* Tokenizer's newline translated copy of the string. */
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the reason these can't be const?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tok->input can't be const because it's the allocated input, and then gets freed in PyTokenizer__Free.

tok->str can't be const because it can be returned from decode_str, which returns a non-const string. If I inlined decode_str into PyTokenizer_FromString, the only place that uses it, then I think I could make tok->str const.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

decode_str only returns a non-const string in this change, though.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's correct. I changed decode_str to return char * instead of const char * because it can return the result from translate_newlines. Also, the result from decode_str is assigned to tok->buf which is non-const.

char* str;
char* input; /* Tokenizer's newline translated copy of the string. */

int type_comments; /* Whether to look for type comments */

Expand All @@ -78,7 +78,7 @@ extern struct tok_state *PyTokenizer_FromUTF8(const char *, int);
extern struct tok_state *PyTokenizer_FromFile(FILE *, const char*,
const char *, const char *);
extern void PyTokenizer_Free(struct tok_state *);
extern int PyTokenizer_Get(struct tok_state *, char **, char **);
extern int PyTokenizer_Get(struct tok_state *, const char **, const char **);

#define tok_dump _Py_tok_dump

Expand Down
0