8000 Code and docs review for commit 3187d6de0e5a9e805b27c48437897e8c39071… · postgres/postgres@d12e5bb · GitHub
[go: up one dir, main page]

Skip to content

Commit d12e5bb

Browse files
committed
Code and docs review for commit 3187d6d.
Fix up check for high-bit-set characters, which provoked "comparison is always true due to limited range of data type" warnings on some compilers, and was unlike the way we do it elsewhere anyway. Fix omission of "$" from the set of valid identifier continuation characters. Get rid of sanitize_text(), which was utterly inconsistent with any other error report anywhere in the system, and wasn't even well designed on its own terms (double-quoting the result string without escaping contained double quotes doesn't seem very well thought out). Fix up error messages, which didn't follow the message style guidelines very well, and were overly specific in situations where the actual mistake might not be what they said. Improve documentation. (I started out just intending to fix the compiler warning, but the more I looked at the patch the less I liked it.)
1 parent 499a505 commit d12e5bb

File tree

3 files changed

+103
-152
lines changed
  • test/regress/expected
  • 3 files changed

    +103
    -152
    lines changed

    doc/src/sgml/func.sgml

    Lines changed: 12 additions & 15 deletions
    Original file line numberDiff line numberDiff line change
    @@ -1823,25 +1823,22 @@
    18231823
    <indexterm>
    18241824
    <primary>parse_ident</primary>
    18251825
    </indexterm>
    1826-
    <literal><function>parse_ident(<parameter>str</parameter> <type>text</type>,
    1827-
    [ <parameter>strictmode</parameter> <type>boolean</type> DEFAULT true ] )</function></literal>
    1826+
    <literal><function>parse_ident(<parameter>qualified_identifier</parameter> <type>text</type>
    1827+
    [, <parameter>strictmode</parameter> <type>boolean</type> DEFAULT true ] )</function></literal>
    18281828
    </entry>
    18291829
    <entry><type>text[]</type></entry>
    1830-
    <entry>Split <parameter>qualified identifier</parameter> into array
    1831-
    <parameter>parts</parameter>. When <parameter>strictmode</parameter> is
    1832-
    false, extra characters after the identifier are ignored. This is useful
    1833-
    for parsing identifiers for objects like functions and arrays that may
    1834-
    have trailing characters. By default, extra characters after the last
    1835-
    identifier are considered an error, but if the second parameter is false,
    1836-
    then the characters after the last identifier are ignored. Note that this
    1837-
    function does not truncate quoted identifiers. If you care about that
    1838-
    you should cast the result of this function to name[]. Non-printable
    1839-
    characters (like 0 to 31) are always displayed as hexadecimal codes,
    1840-
    which can be different from PostgreSQL internal SQL identifiers
    1841-
    processing, when the original escaped value is displayed.
    1830+
    <entry>
    1831+
    Split <parameter>qualified_identifier</parameter> into an array of
    1832+
    identifiers, removing any quoting of individual identifiers. By
    1833+
    default, extra characters after the last identifier are considered an
    1834+
    error; but if the second parameter is <literal>false</>, then such
    1835+
    extra characters are ignored. (This behavior is useful for parsing
    1836+
    names for objects like functions.) Note that this function does not
    1837+
    truncate over-length identifiers. If you want truncation you can cast
    1838+
    the result to <type>name[]</>.
    18421839
    </entry>
    18431840
    <entry><literal>parse_ident('"SomeSchema".someTable')</literal></entry>
    1844-
    <entry><literal>"SomeSchema,sometable"</literal></entry>
    1841+
    <entry><literal>{SomeSchema,sometable}</literal></entry>
    18451842
    </row>
    18461843

    18471844
    <row>

    src/backend/utils/adt/misc.c

    Lines changed: 74 additions & 125 deletions
    Original file line numberDiff line numberDiff line change
    @@ -723,105 +723,57 @@ pg_column_is_updatable(PG_FUNCTION_ARGS)
    723723

    724724

    725725
    /*
    726-
    * This simple parser utility are compatible with lexer implementation,
    727-
    * used only in parse_ident function
    726+
    * Is character a valid identifier start?
    727+
    * Must match scan.l's {ident_start} character class.
    728728
    */
    729729
    static bool
    730730
    is_ident_start(unsigned char c)
    731731
    {
    732+
    /* Underscores and ASCII letters are OK */
    732733
    if (c == '_')
    733734
    return true;
    734735
    if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
    735736
    return true;
    736-
    737-
    if (c >= 0200 && c <= 0377)
    737+
    /* Any high-bit-set character is OK (might be part of a multibyte char) */
    738+
    if (IS_HIGHBIT_SET(c))
    738739
    return true;
    739-
    740740
    return false;
    741741
    }
    742742

    743+
    /*
    744+
    * Is character a valid identifier continuation?
    745+
    * Must match scan.l's {ident_cont} character class.
    746+
    */
    743747
    static bool
    744748
    is_ident_cont(unsigned char c)
    745749
    {
    746-
    if (c >= '0' && c <= '9')
    750+
    /* Can be digit or dollar sign ... */
    751+
    if ((c >= '0' && c <= '9') || c == '$')
    747752
    return true;
    748-
    753+
    /* ... or an identifier start character */
    749754
    return is_ident_start(c);
    750755
    }
    751756

    752757
    /*
    753-
    * Sanitize SQL string for using in error message.
    754-
    */
    755-
    static char *
    756-
    sanitize_text(text *t)
    757-
    {
    758-
    int len = VARSIZE_ANY_EXHDR(t);
    759-
    const char *p = VARDATA_ANY(t);
    760-
    StringInfo dstr;
    761-
    762-
    dstr = makeStringInfo();
    763-
    764-
    appendStringInfoChar(dstr, '"');
    765-
    766-
    while (len--)
    767-
    {
    768-
    switch (*p)
    769-
    {
    770-
    case '\b':
    771-
    appendStringInfoString(dstr, "\\b");
    772-
    break;
    773-
    case '\f':
    774-
    appendStringInfoString(dstr, "\\f");
    775-
    break;
    776-
    case '\n':
    777-
    appendStringInfoString(dstr, "\\n");
    778-
    break;
    779-
    case '\r':
    780-
    appendStringInfoString(dstr, "\\r");
    781-
    break;
    782-
    case '\t':
    783-
    appendStringInfoString(dstr, "\\t");
    784-
    break;
    785-
    case '\'':
    786-
    appendStringInfoString(dstr, "''");
    787-
    break;
    788-
    case '\\':
    789-
    appendStringInfoString(dstr, "\\\\");
    790-
    break;
    791-
    default:
    792-
    if ((unsigned char) *p < ' ')
    793-
    appendStringInfo(dstr, "\\u%04x", (int) *p);
    794-
    else
    795-
    appendStringInfoCharMacro(dstr, *p);
    796-
    break;
    797-
    }
    798-
    p++;
    799-
    }
    800-
    801-
    appendStringInfoChar(dstr, '"');
    802-
    803-
    return dstr->data;
    804-
    }
    805-
    806-
    /*
    807-
    * parse_ident - parse SQL composed identifier to separate identifiers.
    758+
    * parse_ident - parse a SQL qualified identifier into separate identifiers.
    808759
    * When strict mode is active (second parameter), then any chars after
    809-
    * last identifiers are disallowed.
    760+
    * the last identifier are disallowed.
    810761
    */
    811762
    Datum
    812763
    parse_ident(PG_FUNCTION_ARGS)
    813764
    {
    814-
    text *qualname;
    815-
    char *qualname_str;
    816-
    bool strict;
    765+
    text *qualname = PG_GETARG_TEXT_PP(0);
    766+
    bool strict = PG_GETARG_BOOL(1);
    767+
    char *qualname_str = text_to_cstring(qualname);
    768+
    ArrayBuildState *astate = NULL;
    817769
    char *nextp;
    818770
    bool after_dot = false;
    819-
    ArrayBuildState *astate = NULL;
    820-
    821-
    qualname = PG_GETARG_TEXT_PP(0);
    822-
    qualname_str = text_to_cstring(qualname);
    823-
    strict = PG_GETARG_BOOL(1);
    824771

    772+
    /*
    773+
    * The code below scribbles on qualname_str in some cases, so we should
    774+
    * reconvert qualname if we need to show the original string in error
    775+
    * messages.
    776+
    */
    825777
    nextp = qualname_str;
    826778

    827779
    /* skip leading whitespace */
    @@ -830,90 +782,87 @@ parse_ident(PG_FUNCTION_ARGS)
    830782

    831783
    for (;;)
    832784
    {
    833-
    char *curname;
    834-
    char *endp;
    835-
    bool missing_ident;
    836-
    837-
    missing_ident = true;
    785+
    char *curname;
    786+
    bool missing_ident = true;
    838787

    839-
    if (*nextp == '\"')
    788+
    if (*nextp == '"')
    840789
    {
    790+
    char *endp;
    791+
    841792
    curname = nextp + 1;
    842793
    for (;;)
    843794
    {
    844-
    endp F987 = strchr(nextp + 1, '\"');
    795+
    endp = strchr(nextp + 1, '"');
    845796
    if (endp == NULL)
    846797
    ereport(ERROR,
    847-
    (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    848-
    errmsg("unclosed double quotes"),
    849-
    errdetail("string %s is not valid identifier",
    850-
    sanitize_text(qualname))));
    851-
    if (endp[1] != '\"')
    798+
    (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    799+
    errmsg("string is not a valid identifier: \"%s\"",
    800+
    text_to_cstring(qualname)),
    801+
    errdetail("String has unclosed double quotes.")));
    802+
    if (endp[1] != '"')
    852803
    break;
    853804
    memmove(endp, endp + 1, strlen(endp));
    854805
    nextp = endp;
    855806
    }
    856807
    nextp = endp + 1;
    857808
    *endp = '\0';
    858809

    859-
    /* Show complete input string in this case. */
    860810
    if (endp - curname == 0)
    861811
    ereport(ERROR,
    862-
    (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    863-
    errmsg("identifier should not be empty: %s",
    864-
    sanitize_text(qualname))));
    812+
    (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    813+
    errmsg("string is not a valid identifier: \"%s\"",
    814+
    text_to_cstring(qualname)),
    815+
    errdetail("Quoted identifier must not be empty.")));
    865816

    866817
    astate = accumArrayResult(astate, CStringGetTextDatum(curname),
    867818
    false, TEXTOID, CurrentMemoryContext);
    868819
    missing_ident = false;
    869820
    }
    870-
    else
    821+
    else if (is_ident_start((unsigned char) *nextp))
    871822
    {
    872-
    if (is_ident_start((unsigned char) *nextp))
    873-
    {
    874-
    char *downname;
    875-
    int len;
    876-
    text *part;
    877-
    878-
    curname = nextp++;
    879-
    while (is_ident_cont((unsigned char) *nextp))
    880-
    nextp++;
    881-
    882-
    len = nextp - curname;
    883-
    884-
    /*
    885-
    * Unlike name, we don't implicitly truncate identifiers. This
    886-
    * is useful for allowing the user to check for specific parts
    887-
    * of the identifier being too long. It's easy enough for the
    888-
    * user to get the truncated names by casting our output to
    889-
    * name[].
    890-
    */
    891-
    downname = downcase_identifier(curname, len, false, false);
    892-
    part = cstring_to_text_with_len(downname, len);
    893-
    astate = accumArrayResult(astate, PointerGetDatum(part), false,
    894-
    TEXTOID, CurrentMemoryContext);
    895-
    missing_ident = false;
    896-
    }
    823+
    char *downname;
    824+
    int len;
    825+
    text *part;
    826+
    827+
    curname = nextp++;
    828+
    while (is_ident_cont((unsigned char) *nextp))
    829+
    nextp++;
    830+
    831+
    len = nextp - curname;
    832+
    833+
    /*
    834+
    * We don't implicitly truncate identifiers. This is useful for
    835+
    * allowing the user to check for specific parts of the identifier
    836+
    * being too long. It's easy enough for the user to get the
    837+
    * truncated names by casting our output to name[].
    838+
    */
    839+
    downname = downcase_identifier(curname, len, false, false);
    840+
    part = cstring_to_text_with_len(downname, len);
    841+
    astate = accumArrayResult(astate, PointerGetDatum(part), false,
    842+
    TEXTOID, CurrentMemoryContext);
    843+
    missing_ident = false;
    897844
    }
    898845

    899846
    if (missing_ident)
    900847
    {
    901848
    /* Different error messages based on where we failed. */
    902849
    if (*ne 10000 xtp == '.')
    903850
    ereport(ERROR,
    904-
    (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    905-
    errmsg("missing valid identifier before \".\" symbol: %s",
    906-
    sanitize_text(qualname))));
    851+
    (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    852+
    errmsg("string is not a valid identifier: \"%s\"",
    853+
    text_to_cstring(qualname)),
    854+
    errdetail("No valid identifier before \".\" symbol.")));
    907855
    else if (after_dot)
    908856
    ereport(ERROR,
    909-
    (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    910-
    errmsg("missing valid identifier after \".\" symbol: %s",
    911-
    sanitize_text(qualname))));
    857+
    (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    858+
    errmsg("string is not a valid identifier: \"%s\"",
    859+
    text_to_cstring(qualname)),
    860+
    errdetail("No valid identifier after \".\" symbol.")));
    912861
    else
    913862
    ereport(ERROR,
    914-
    (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    915-
    errmsg("missing valid identifier: %s",
    916-
    sanitize_text(qualname))));
    863+
    (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    864+
    errmsg("string is not a valid identifier: \"%s\"",
    865+
    text_to_cstring(qualname))));
    917866
    }
    918867

    919868
    while (isspace((unsigned char) *nextp))
    @@ -934,9 +883,9 @@ parse_ident(PG_FUNCTION_ARGS)
    934883
    {
    935884
    if (strict)
    936885
    ereport(ERROR,
    937-
    (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    938-
    errmsg("identifier contains disallowed characters: %s",
    939-
    sanitize_text(qualname))));
    886+
    (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    887+
    errmsg("string is not a valid identifier: \"%s\"",
    888+
    text_to_cstring(qualname))));
    940889
    break;
    941890
    }
    942891
    }

    src/test/regress/expected/name.out

    Lines changed: 17 additions & 12 deletions
    Original file line numberDiff line numberDiff line change
    @@ -142,7 +142,7 @@ SELECT parse_ident('foo.boo');
    142142
    (1 row)
    143143

    144144
    SELECT parse_ident('foo.boo[]'); -- should fail
    145-
    ERROR: identifier contains disallowed characters: "foo.boo[]"
    145+
    ERROR: string is not a valid identifier: "foo.boo[]"
    146146
    SELECT parse_ident('foo.boo[]', strict => false); -- ok
    147147
    parse_ident
    148148
    -------------
    @@ -151,15 +151,17 @@ SELECT parse_ident('foo.boo[]', strict => false); -- ok
    151151

    152152
    -- should fail
    153153
    SELECT parse_ident(' ');
    154-
    ERROR: missing valid identifier: " "
    154+
    ERROR: string is not a valid identifier: " "
    155155
    SELECT parse_ident(' .aaa');
    156-
    ERROR: missing valid identifier before "." symbol: " .aaa"
    156+
    ERROR: string is not a valid identifier: " .aaa"
    157+
    DETAIL: No valid identifier before "." symbol.
    157158
    SELECT parse_ident(' aaa . ');
    158-
    ERROR: missing valid identifier after "." symbol: " aaa . "
    159+
    ERROR: string is not a valid identifier: " aaa . "
    160+
    DETAIL: No valid identifier after "." symbol.
    159161
    SELECT parse_ident('aaa.a%b');
    160-
    ERROR: identifier contains disallowed characters: "aaa.a%b"
    162+
    ERROR: string is not a valid identifier: "aaa.a%b"
    161163
    SELECT parse_ident(E'X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX');
    162-
    ERROR: identifier contains disallowed characters: "X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
    164+
    ERROR: string is not a valid identifier: "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
    163165
    SELECT length(a[1]), length(a[2]) from parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy') as a ;
    164166
    length | length
    165167
    --------+--------
    @@ -179,14 +181,17 @@ SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66)
    179181
    (1 row)
    180182

    181183
    SELECT parse_ident(E'"c".X XXXX\002XXXXXX');
    182-
    ERROR: identifier contains disallowed characters: ""c".X XXXX\u0002XXXXXX"
    184+
    ERROR: string is not a valid identifier: ""c".X XXXXXXXXXX"
    183185
    SELECT parse_ident('1020');
    184-
    ERROR: missing valid identifier: "1020"
    186+
    ERROR: string is not a valid identifier: "1020"
    185187
    SELECT parse_ident('10.20');
    186-
    ERROR: missing valid identifier: "10.20"
    188+
    ERROR: string is not a valid identifier: "10.20"
    187189
    SELECT parse_ident('.');
    188-
    ERROR: missing valid identifier before "." symbol: "."
    190+
    ERROR: string is not a valid identifier: "."
    191+
    DETAIL: No valid identifier before "." symbol.
    189192
    SELECT parse_ident('.1020');
    190-
    ERROR: missing valid identifier before "." symbol: ".1020"
    193+
    ERROR: string is not a valid identifier: ".1020"
    194+
    DETAIL: No valid identifier before "." symbol.
    191195
    SELECT parse_ident('xxx.1020');
    192-
    ERROR: missing valid identifier after "." symbol: "xxx.1020"
    196+
    ERROR: string is not a valid identifier: "xxx.1020"
    197+
    DETAIL: No valid identifier after "." symbol.

    0 commit comments

    Comments
     (0)
    0