8000 Fix bugs in gin_fuzzy_search_limit processing. · postgrespro/postgres@e41955f · GitHub
[go: up one dir, main page]

Skip to content
  • Commit e41955f

    Browse files
    committed
    Fix bugs in gin_fuzzy_search_limit processing.
    entryGetItem()'s three code paths each contained bugs associated with filtering the entries for gin_fuzzy_search_limit. The posting-tree path failed to advance "advancePast" after having decided to filter an item. If we ran out of items on the current page and needed to advance to the next, what would actually happen is that entryLoadMoreItems() would re-load the same page. Eventually, the random dropItem() test would accept one of the same items it'd previously rejected, and we'd move on --- but it could take awhile with small gin_fuzzy_search_limit. To add insult to injury, this case would inevitably cause entryLoadMoreItems() to decide it needed to re-descend from the root, making things even slower. The posting-list path failed to implement gin_fuzzy_search_limit filtering at all, so that all entries in the posting list would be returned. The bitmap-result path used a "gotitem" variable that it failed to update in the one place where it'd actually make a difference, ie at the one "continue" statement. I think this was unreachable in practice, because if we'd looped around then it shouldn't be the case that the entries on the new page are before advancePast. Still, the "gotitem" variable was contributing nothing to either clarity or correctness, so get rid of it. Refactor all three loops so that the termination conditions are more alike and less unreadable. The code coverage report showed that we had no coverage at all for the re-descend-from-root code path in entryLoadMoreItems(), which seems like a very bad thing, so add a test case that exercises it. We also had exactly no coverage for gin_fuzzy_search_limit, so add a simplistic test case that at least hits those code paths a little bit. Back-patch to all supported branches. Adé Heyward and Tom Lane Discussion: https://postgr.es/m/CAEknJCdS-dE1Heddptm7ay2xTbSeADbkaQ8bU2AXRCVC2LdtKQ@mail.gmail.com
    1 parent c0885c4 commit e41955f

    File tree

    3 files changed

    +86
    -12
    lines changed

    3 files changed

    +86
    -12
    lines changed

    src/backend/access/gin/ginget.c

    Lines changed: 32 additions & 12 deletions
    < 10000 td data-grid-cell-id="diff-b2e57a3b5e8fb458d8d8733ff8c022f81239d69cd0583fd8b9e56fa63a394201-895-893-2" data-line-anchor="diff-b2e57a3b5e8fb458d8d8733ff8c022f81239d69cd0583fd8b9e56fa63a394201R893" data-selected="false" role="gridcell" style="background-color:var(--bgColor-default);padding-right:24px" tabindex="-1" valign="top" class="focusable-grid-cell diff-text-cell right-side-diff-cell left-side">
    entry->matchResult->offsets[entry->offset]);
    Original file line numberDiff line numberDiff line change
    @@ -813,9 +813,8 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
    813813
    /* A bitmap result */
    814814
    BlockNumber advancePastBlk = GinItemPointerGetBlockNumber(&advancePast);
    815815
    OffsetNumber advancePastOff = GinItemPointerGetOffsetNumber(&advancePast);
    816-
    bool gotitem = false;
    817816

    818-
    do
    817+
    for (;;)
    819818
    {
    820819
    /*
    821820
    * If we've exhausted all items on this block, move to next block
    @@ -864,7 +863,6 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
    864863
    * estimate number of results on this page to support correct
    865864
    * reducing of result even if it's enabled.
    866865
    */
    867-
    gotitem = true;
    868866
    break;
    869867
    }
    870868

    @@ -877,7 +875,7 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
    877875
    /*
    878876
    * First, do a quick check against the last offset on the
    879877
    * page. If that's > advancePast, so are all the other
    880-
    * offsets.
    878+
    * offsets, so just go back to the top to get the next page.
    881879
    */
    882880
    if (entry->matchResult->offsets[entry->matchResult->ntuples - 1] <= advancePastOff)
    883881
    {
    @@ -894,16 +892,19 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
    894892
    entry->matchResult->blockno,
    895893
    896894
    entry->offset++;
    897-
    gotitem = true;
    898-
    } while (!gotitem || (entry->reduceResult == true && dropItem(entry)));
    895+
    896+
    /* Done unless we need to reduce the result */
    897+
    if (!entry->reduceResult || !dropItem(entry))
    898+
    break;
    899+
    }
    899900
    }
    900901
    else if (!BufferIsValid(entry->buffer))
    901902
    {
    902903
    /*
    903904
    * A posting list from an entry tuple, or the last page of a posting
    904905
    * tree.
    905906
    */
    906-
    do
    907+
    for (;;)
    907908
    {
    908909
    if (entry->offset >= entry->nlist)
    909910
    {
    @@ -913,13 +914,20 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
    913914
    }
    914915

    915916
    entry->curItem = entry->list[entry->offset++];
    916-
    } while (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0);
    917-
    /* XXX: shouldn't we apply the fuzzy search limit here? */
    917+
    918+
    /* If we're not past advancePast, keep scanning */
    919+
    if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
    920+
    continue;
    921+
    922+
    /* Done unless we need to reduce the result */
    923+
    if (!entry->reduceResult || !dropItem(entry))
    924+
    break;
    925+
    }
    918926
    }
    919927
    else
    920928
    {
    921929
    /* A posting tree */
    922-
    do
    930+
    for (;;)
    923931
    {
    924932
    /* If we've processed the current batch, load more items */
    925933
    while (entry->offset >= entry->nlist)
    @@ -935,8 +943,20 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
    935943

    936944
    entry->curItem = entry->list[entry->offset++];
    937945

    938-
    } while (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0 ||
    939-
    (entry->reduceResult == true && dropItem(entry)));
    946+
    /* If we're not past advancePast, keep scanning */
    947+
    if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
    948+
    continue;
    949+
    950+
    /* Done unless we need to reduce the result */
    951+
    if (!entry->reduceResult || !dropItem(entry))
    952+
    break;
    953+
    954+
    /*
    955+
    * Advance advancePast (so that entryLoadMoreItems will load the
    956+
    * right data), and keep scanning
    957+
    */
    958+
    advancePast = entry->curItem;
    959+
    }
    940960
    }
    941961
    }
    942962

    src/test/regress/expected/gin.out

    Lines changed: 38 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -35,6 +35,44 @@ insert into gin_test_tbl select array[1, 2, g] from generate_series(1, 1000) g;
    3535
    insert into gin_test_tbl select array[1, 3, g] from generate_series(1, 1000) g;
    3636
    delete from gin_test_tbl where i @> array[2];
    3737
    vacuum gin_test_tbl;
    38+
    -- Test for "rare && frequent" searches
    39+
    explain (costs off)
    40+
    select count(*) from gin_test_tbl where i @> array[1, 999];
    41+
    QUERY PLAN
    42+
    -------------------------------------------------------
    43+
    Aggregate
    44+
    -> Bitmap Heap Scan on gin_test_tbl
    45+
    Recheck Cond: (i @> '{1,999}'::integer[])
    46+
    -> Bitmap Index Scan on gin_test_idx
    47+
    Index Cond: (i @> '{1,999}'::integer[])
    48+
    (5 rows)
    49+
    50+
    select count(*) from gin_test_tbl where i @> array[1, 999];
    51+
    count
    52+
    -------
    53+
    3
    54+
    (1 row)
    55+
    56+
    -- Very weak test for gin_fuzzy_search_limit
    57+
    set gin_fuzzy_search_limit = 1000;
    58+
    explain (costs off)
    59+
    select count(*) > 0 as ok from gin_test_tbl where i @> array[1];
    60+
    QUERY PLAN
    61+
    ---------------------------------------------------
    62+
    Aggregate
    63+
    -> Bitmap Heap Scan on gin_test_tbl
    64+
    Recheck Cond: (i @> '{1}'::integer[])
    65+
    -> Bitmap Index Scan on gin_test_idx
    66+
    Index Cond: (i @> '{1}'::integer[])
    67+
    (5 rows)
    68+
    69+
    select count(*) > 0 as ok from gin_test_tbl where i @> array[1];
    70+
    ok
    71+
    ----
    72+
    t
    73+
    (1 row)
    74+
    75+
    reset gin_fuzzy_search_limit;
    3876
    -- Test optimization of empty queries
    3977
    create temp table t_gin_test_tbl(i int4[], j int4[]);
    4078
    create index on t_gin_test_tbl using gin (i, j);

    src/test/regress/sql/gin.sql

    Lines changed: 16 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -35,6 +35,22 @@ insert into gin_test_tbl select array[1, 3, g] from generate_series(1, 1000) g;
    3535
    delete from gin_test_tbl where i @> array[2];
    3636
    vacuum gin_test_tbl;
    3737

    38+
    -- Test for "rare && frequent" searches
    39+
    explain (costs off)
    40+
    select count(*) from gin_test_tbl where i @> array[1, 999];
    41+
    42+
    select count(*) from gin_test_tbl where i @> array[1, 999];
    43+
    44+
    -- Very weak test for gin_fuzzy_search_limit
    45+
    set gin_fuzzy_search_limit = 1000;
    46+
    47+
    explain (costs off)
    48+
    select count(*) > 0 as ok from gin_test_tbl where i @> array[1];
    49+
    50+
    select count(*) > 0 as ok from gin_test_tbl where i @> array[1];
    51+
    52+
    reset gin_fuzzy_search_limit;
    53+
    3854
    -- Test optimization of empty queries
    3955
    create temp table t_gin_test_tbl(i int4[], j int4[]);
    4056
    create index on t_gin_test_tbl using gin (i, j);

    0 commit comments

    Comments
     (0)
    0