8000 Fix comparison of similarity to threshold in GIST trigram searches. · postgrespro/postgres_cluster@9c85256 · GitHub
[go: up one dir, main page]

Skip to content
  • Insights
  • Commit 9c85256

    Browse files
    committed
    Fix comparison of similarity to threshold in GIST trigram searches.
    There was some very strange code here, dating to commit b525bf7, that purported to work around an ancient gcc bug by forcing a float4 comparison to be done as int instead. Commit 5871b88 broke that when it changed one side of the comparison to "double" but left the comparison code alone. Commit f576b17 doubled down on the weirdness by introducing a "volatile" marker, which had nothing to do with the actual problem. Guess that the gcc bug, even if it's still present in the wild, was triggered by comparison of float4's and can be avoided if we store the result of cnt_sml() into a double before comparing to the double "nlimit". This will at least work correctly on non-broken compilers, and it's way more readable. Per bug #14202 from Greg Navis. Add a regression test based on his example. Report: <20160620115321.5792.10766@wrigleys.postgresql.org>
    1 parent 47981a4 commit 9c85256

    File tree

    3 files changed

    +76
    -9
    lines changed

    3 files changed

    +76
    -9
    lines changed

    contrib/pg_trgm/expected/pg_trgm.out

    Lines changed: 53 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -3838,3 +3838,56 @@ select * from test2 where t ~ ' z foo';
    < 8000 code>38383838
    z foo bar
    38393839
    (1 row)
    38403840

    3841+
    -- Check similarity threshold (bug #14202)
    3842+
    CREATE TEMP TABLE restaurants (city text);
    3843+
    INSERT INTO restaurants SELECT 'Warsaw' FROM generate_series(1, 10000);
    3844+
    INSERT INTO restaurants SELECT 'Szczecin' FROM generate_series(1, 10000);
    3845+
    CREATE INDEX ON restaurants USING gist(city gist_trgm_ops);
    3846+
    -- Similarity of the two names (for reference).
    3847+
    SELECT similarity('Szczecin', 'Warsaw');
    3848+
    similarity
    3849+
    ------------
    3850+
    0
    3851+
    (1 row)
    3852+
    3853+
    -- Should get only 'Warsaw' for either setting of set_limit.
    3854+
    EXPLAIN (COSTS OFF)
    3855+
    SELECT DISTINCT city, similarity(city, 'Warsaw'), show_limit()
    3856+
    FROM restaurants WHERE city % 'Warsaw';
    3857+
    QUERY PLAN
    3858+
    -------------------------------------------------------------
    3859+
    Unique
    3860+
    -> Sort
    3861+
    Sort Key: city, (similarity(city, 'Warsaw'::text))
    3862+
    -> Bitmap Heap Scan on restaurants
    3863+
    Recheck Cond: (city % 'Warsaw'::text)
    3864+
    -> Bitmap Index Scan on restaurants_city_idx
    3865+
    Index Cond: (city % 'Warsaw'::text)
    3866+
    (7 rows)
    3867+
    3868+
    SELECT set_limit(0.3);
    3869+
    set_limit
    3870+
    -----------
    3871+
    0.3
    3872+
    (1 row)
    3873+
    3874+
    SELECT DISTINCT city, similarity(city, 'Warsaw'), show_limit()
    3875+
    FROM restaurants WHERE city % 'Warsaw';
    3876+
    city | similarity | show_limit
    3877+
    --------+------------+------------
    3878+
    Warsaw | 1 | 0.3
    3879+
    (1 row)
    3880+
    3881+
    SELECT set_limit(0.5);
    3882+
    set_limit
    3883+
    -----------
    3884+
    0.5
    3885+
    (1 row)
    3886+
    3887+
    SELECT DISTINCT city, similarity(city, 'Warsaw'), show_limit()
    3888+
    FROM restaurants WHERE city % 'Warsaw';
    3889+
    city | similarity | show_limit
    3890+
    --------+------------+------------
    3891+
    Warsaw | 1 | 0.5
    3892+
    (1 row)
    3893+

    contrib/pg_trgm/sql/pg_trgm.sql

    Lines changed: 21 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -113,3 +113,24 @@ select * from test2 where t ~ 'z foo bar';
    113113
    select * from test2 where t ~ ' z foo bar';
    114114
    select * from test2 where t ~ ' z foo bar';
    115115
    select * from test2 where t ~ ' z foo';
    116+
    117+
    -- Check similarity threshold (bug #14202)
    118+
    119+
    CREATE TEMP TABLE restaurants (city text);
    120+
    INSERT INTO restaurants SELECT 'Warsaw' FROM generate_series(1, 10000);
    121+
    INSERT INTO restaurants SELECT 'Szczecin' FROM generate_series(1, 10000);
    122+
    CREATE INDEX ON restaurants USING gist(city gist_trgm_ops);
    123+
    124+
    -- Similarity of the two names (for reference).
    125+
    SELECT similarity('Szczecin', 'Warsaw');
    126+
    127+
    -- Should get only 'Warsaw' for either setting of set_limit.
    128+
    EXPLAIN (COSTS OFF)
    129+
    SELECT DISTINCT city, similarity(city, 'Warsaw'), show_limit()
    130+
    FROM restaurants WHERE city % 'Warsaw';
    131+
    SELECT set_limit(0.3);
    132+
    SELECT DISTINCT city, similarity(city, 'Warsaw'), show_limit()
    133+
    FROM restaurants WHERE city % 'Warsaw';
    134+
    SELECT set_limit(0.5);
    135+
    SELECT DISTINCT city, similarity(city, 'Warsaw'), show_limit()
    136+
    FROM restaurants WHERE city % 'Warsaw';

    contrib/pg_trgm/trgm_gist.c

    Lines changed: 2 additions & 9 deletions
    Original file line numberDiff line numberDiff line change
    @@ -296,16 +296,9 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
    296296

    297297
    if (GIST_LEAF(entry))
    298298
    { /* all leafs contains orig trgm */
    299+
    double tmpsml = cnt_sml(qtrg, key, *recheck);
    299300

    300-
    /*
    301-
    * Prevent gcc optimizing the tmpsml variable using volatile
    302-
    * keyword. Otherwise comparison of nlimit and tmpsml may give
    303-
    * wrong results.
    304-
    */
    305-
    float4 volatile tmpsml = cnt_sml(qtrg, key, *recheck);
    306-
    307-
    /* strange bug at freebsd 5.2.1 and gcc 3.3.3 */
    308-
    res = (*(int *) &tmpsml == *(int *) &nlimit || tmpsml > nlimit);
    301+
    res = (tmpsml >= nlimit);
    309302
    }
    310303
    else if (ISALLTRUE(key))
    311304
    { /* non-leaf contains signature */

    0 commit comments

    Comments
     (0)
    0