8000 hstore: Tighten key/value parsing check for whitespaces · postgrespro/postgres@d522b05 · GitHub
[go: up one dir, main page]

Skip to content
  • Commit d522b05

    Browse files
    committed
    hstore: Tighten key/value parsing check for whitespaces
    isspace() can be locale-sensitive depending on the platform, causing hstore to consider as whitespaces characters it should not see as such. For example, U+0105, being decoded as 0xC4 0x85 in UTF-8, would be discarded from the input given. This problem is similar to 9ae2661, though it was missed that hstore can also manipulate non-ASCII inputs, so replace the existing isspace() calls with scanner_isspace(). This problem exists for a long time, so backpatch all the way down. Author: Evan Jones Discussion: https://postgr.es/m/CA+HWA9awUW0+RV_gO9r1ABZwGoZxPztcJxPy8vMFSTbTfi4jig@mail.gmail.com Backpatch-through: 11
    1 parent d088ba5 commit d522b05

    File tree

    6 files changed

    +70
    -5
    lines changed

    6 files changed

    +70
    -5
    lines changed

    contrib/hstore/Makefile

    Lines changed: 1 addition & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -22,7 +22,7 @@ PGFILEDESC = "hstore - key/value pair data type"
    2222

    2323
    HEADERS = hstore.h
    2424

    25-
    REGRESS = hstore
    25+
    REGRESS = hstore hstore_utf8
    2626

    2727
    ifdef USE_PGXS
    2828
    PG_CONFIG = pg_config
    Lines changed: 36 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -0,0 +1,36 @@
    1+
    /*
    2+
    * This test must be run in a database with UTF-8 encoding,
    3+
    * because other encodings don't support all the characters used.
    4+
    */
    5+
    SELECT getdatabaseencoding() <> 'UTF8'
    6+
    AS skip_test \gset
    7+
    \if :skip_test
    8+
    \quit
    9+
    \endif
    10+
    SET client_encoding = utf8;
    11+
    -- UTF-8 locale bug on macOS: isspace(0x85) returns true. \u0105 encodes
    12+
    -- as 0xc4 0x85 in UTF-8; the 0x85 was interpreted here as a whitespace.
    13+
    SELECT E'key\u0105=>value\u0105'::hstore;
    14+
    hstore
    15+
    ------------------
    16+
    "keyą"=>"valueą"
    17+
    (1 row)
    18+
    19+
    SELECT 'keyą=>valueą'::hstore;
    20+
    hstore
    21+
    ------------------
    22+
    "keyą"=>"valueą"
    23+
    (1 row)
    24+
    25+
    SELECT 'ą=>ą'::hstore;
    26+
    hstore
    27+
    ----------
    28+
    "ą"=>"ą"
    29+
    (1 row)
    30+
    31+
    SELECT 'keyąfoo=>valueą'::hstore;
    32+
    hstore
    33+
    ---------------------
    34+
    "keyąfoo"=>"valueą"
    35+
    (1 row)
    36+
    Lines changed: 8 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -0,0 +1,8 @@
    1+
    /*
    2+
    * This test must be run in a database with UTF-8 encoding,
    3+
    * because other encodings don't support all the characters used.
    4+
    */
    5+
    SELECT getdatabaseencoding() <> 'UTF8'
    6+
    AS skip_test \gset
    7+
    \if :skip_test
    8+
    \quit

    contrib/hstore/hstore_io.c

    Lines changed: 5 additions & 4 deletions
    Original file line numberDiff line numberDiff line change
    @@ -13,6 +13,7 @@
    1313
    #include "lib/stringinfo.h"
    1414
    #include "libpq/pqformat.h"
    1515
    #include "nodes/miscnodes.h"
    16+
    #include "parser/scansup.h"
    1617
    #include "utils/builtins.h"
    1718
    #include "utils/json.h"
    1819
    #include "utils/jsonb.h"
    @@ -118,7 +119,7 @@ get_val(HSParser *state, bool ignoreeq, bool *escaped)
    118119
    {
    119120
    st = GV_WAITESCIN;
    120121
    }
    121-
    else if (!isspace((unsigned char) *(state->ptr)))
    122+
    else if (!scanner_isspace((unsigned char) *(state->ptr)))
    122123
    {
    123124
    *(state->cur) = *(state->ptr);
    124125
    state->cur++;
    @@ -141,7 +142,7 @@ get_val(HSParser *state, bool ignoreeq, bool *escaped)
    141142
    state->ptr--;
    142143
    return true;
    143144
    }
    144-
    else if (isspace((unsigned char) *(state->ptr)))
    145+
    else if (scanner_isspace((unsigned char) *(state->ptr)))
    145146
    {
    146147
    return true;
    147148
    }
    @@ -255,7 +256,7 @@ parse_hstore(HSParser *state)
    255256
    {
    256257
    PRSEOF;
    257258
    }
    258-
    else if (!isspace((unsigned char) *(state->ptr)))
    259+
    else if (!scanner_isspace((unsigned char) *(state->ptr)))
    259260
    {
    260261
    PRSSYNTAXERROR;
    261262
    }
    @@ -309,7 +310,7 @@ parse_hstore(HSParser *state)
    309310
    {
    310311
    return true;
    311312
    }
    312-
    else if (!isspace((unsigned char) *(state->ptr)))
    313+
    else if (!scanner_isspace((unsigned char) *(state->ptr)))
    313314
    {
    314315
    PRSSYNTAXERROR;
    315316
    }

    contrib/hstore/meson.build

    Lines changed: 1 addition & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -50,6 +50,7 @@ tests += {
    5050
    'regress': {
    5151
    'sql': [
    5252
    'hstore',
    53+
    'hstore_utf8',
    5354
    ],
    5455
    },
    5556
    }

    contrib/hstore/sql/hstore_utf8.sql

    Lines changed: 19 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -0,0 +1,19 @@
    1+
    /*
    2+
    * This test must be run in a database with UTF-8 encoding,
    3+
    * because other encodings don't support all the characters used.
    4+
    */
    5+
    6+
    SELECT getdatabaseencoding() <> 'UTF8'
    7+
    AS skip_test \gset
    8+
    \if :skip_test
    9+
    \quit
    10+
    \endif
    11+
    12+
    SET client_encoding = utf8;
    13+
    14+
    -- UTF-8 locale bug on macOS: isspace(0x85) returns true. \u0105 encodes
    15+
    -- as 0xc4 0x85 in UTF-8; the 0x85 was interpreted here as a whitespace.
    16+
    SELECT E'key\u0105=>value\u0105'::hstore;
    17+
    SELECT 'keyą=>valueą'::hstore;
    18+
    SELECT 'ą=>ą'::hstore;
    19+
    SELECT 'keyąfoo=>valueą'::hstore;

    0 commit comments

    Comments
     (0)
    0