10000 Tighten checks for whitespace in functions that parse identifiers etc. · markusborg/postgres@2c5e3fa · GitHub
[go: up one dir, main page]

Skip to content

Commit 2c5e3fa

Browse files
committed
Tighten checks for whitespace in functions that parse identifiers etc.
This patch replaces isspace() calls with scanner_isspace() in functions that are likely to be presented with non-ASCII input. isspace() has the small advantage that it will correctly recognize no-break space in single-byte encodings (such as LATIN1); but it cannot work successfully for any multibyte character, and depending on platform it might return false positive results for some fragments of multibyte characters. That's disastrous for functions that are trying to discard whitespace between valid strings, as noted in bug #14662 from Justin Muise. Even treating no-break space as whitespace is pretty questionable for the usages touched here, because the core scanner would think it is an identifier character. Affected functions are parse_ident(), parseNameAndArgTypes (underlying regprocedurein() and siblings), SplitIdentifierString (used for parsing GUCs and options that are qualified names or lists of names), and SplitDirectoriesString (used for parsing GUCs that are lists of directories). All the functions adjusted here are parsing SQL identifiers and similar constructs, so it's reasonable to insist that their definition of whitespace match the core scanner. So we can hope that this won't cause many backwards-compatibility problems. I've left alone isspace() calls in places that aren't really expecting any non-ASCII input characters, such as float8in(). Back-patch to all supported branches. Discussion: https://postgr.es/m/10129.1495302480@sss.pgh.pa.us
1 parent 72d62b6 commit 2c5e3fa

File tree

2 files changed

+12
-11
lines changed

2 files changed

+12
-11
lines changed

src/backend/utils/adt/regproc.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "lib/stringinfo.h"
3636
#include "miscadmin.h"
3737
#include "parser/parse_type.h"
38+
#include "parser/scansup.h"
3839
#include "utils/builtins.h"
3940
#include "utils/fmgroids.h"
4041
#include "utils/lsyscache.h"
@@ -1911,7 +1912,7 @@ parseNameAndArgTypes(const char *string, bool allowNone, List **names,
19111912
ptr2 = ptr + strlen(ptr);
19121913
while (--ptr2 > ptr)
19131914
{
1914-
if (!isspace((unsigned char) *ptr2))
1915+
if (!scanner_isspace(*ptr2))
19151916
break;
19161917
}
19171918
if (*ptr2 != ')')
@@ -1928,7 +1929,7 @@ parseNameAndArgTypes(const char *string, bool allowNone, List **names,
19281929
for (;;)
19291930
{
19301931
/* allow leading whitespace */
1931-
while (isspace((unsigned char) *ptr))
1932+
while (scanner_isspace(*ptr))
19321933
ptr++;
19331934
if (*ptr == '\0')
19341935
{
@@ -1984,7 +1985,7 @@ parseNameAndArgTypes(const char *string, bool allowNone, List **names,
19841985
/* Lop off trailing whitespace */
19851986
while (--ptr2 >= typename)
19861987
{
1987-
if (!isspace((unsigned char) *ptr2))
1988+
if (!scanner_isspace(*ptr2))
19881989
break;
19891990
*ptr2 = '\0';
19901991
}

src/backend/utils/adt/varlena.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2921,7 +2921,7 @@ SplitIdentifierString(char *rawstring, char separator,
29212921

29222922
*namelist = NIL;
29232923

2924-
while (isspace((unsigned char) *nextp))
2924+
while (scanner_isspace(*nextp))
29252925
nextp++; /* skip leading whitespace */
29262926

29272927
if (*nextp == '\0')
@@ -2959,7 +2959,7 @@ SplitIdentifierString(char *rawstring, char separator,
29592959

29602960
curname = nextp;
29612961
while (*nextp && *nextp != separator &&
2962-
!isspace((unsigned char) *nextp))
2962+
!scanner_isspace(*nextp))
29632963
nextp++;
29642964
endp = nextp;
29652965
if (curname == nextp)
@@ -2981,13 +2981,13 @@ SplitIdentifierString(char *rawstring, char separator,
29812981
pfree(downname);
29822982
}
29832983

2984-
while (isspace((unsigned char) *nextp))
2984+
while (scanner_isspace(*nextp))
29852985
nextp++; /* skip trailing whitespace */
29862986

29872987
if (*nextp == separator)
29882988
{
29892989
nextp++;
2990-
while (isspace((unsigned char) *nextp))
2990+
while (scanner_isspace(*nextp))
29912991
nextp++; /* skip leading whitespace for next */
29922992
/* we expect another name, so done remains false */
29932993
}
@@ -3046,7 +3046,7 @@ SplitDirectoriesString(char *rawstring, char separator,
30463046

30473047
*namelist = NIL;
30483048

3049-
while (isspace((unsigned char) *nextp))
3049+
while (scanner_isspace(*nextp))
30503050
nextp++; /* skip leading whitespace */
30513051

30523052
if (*nextp == '\0')
@@ -3083,21 +3083,21 @@ SplitDirectoriesString(char *rawstring, char separator,
30833083
while (*nextp && *nextp != separator)
30843084
{
30853085
/* trailing whitespace should not be included in name */
3086-
if (!isspace((unsigned char) *nextp))
3086+
if (!scanner_isspace(*nextp))
30873087
endp = nextp + 1;
30883088
nextp++;
30893089
}
30903090
if (curname == endp)
30913091
return false; /* empty unquoted name not allowed */
30923092
}
30933093

3094-
while (isspace((unsigned char) *nextp))
3094+
while (scanner_isspace(*nextp))
30953095
nextp++; /* skip trailing whitespace */
30963096

30973097
if (*nextp == separator)
30983098
{
30993099
nextp++;
3100-
while (isspace((unsigned char) *nextp))
3100+
while (scanner_isspace(*nextp))
31013101
nextp++; /* skip leading whitespace for next */
31023102
/* we expect another name, so done remains false */
31033103
}

0 commit comments

Comments
 (0)
0