8000 Allow bitmap scans to operate as index-only scans when possible. · postgrespro/postgres@7c70996 · GitHub
[go: up one dir, main page]

Skip to content

Commit 7c70996

Browse files
committed
Allow bitmap scans to operate as index-only scans when possible.
If we don't have to return any columns from heap tuples, and there's no need to recheck qual conditions, and the heap page is all-visible, then we can skip fetching the heap page altogether. Skip prefetching pages too, when possible, on the assumption that the recheck flag will remain the same from one page to the next. While that assumption is hardly bulletproof, it seems like a good bet most of the time, and better than prefetching pages we don't need. This commit installs the executor infrastructure, but doesn't change any planner cost estimates, thus possibly causing bitmap scans to not be chosen in cases where this change renders them the best choice. I (tgl) am not entirely convinced that we need to account for this behavior in the planner, because I think typically the bitmap scan would get chosen anyway if it's the best bet. In any case the submitted patch took way too many shortcuts, resulting in too many clearly-bad choices, to be committable. Alexander Kuzmenkov, reviewed by Alexey Chernyshov, and whacked around rather heavily by me. Discussion: https://postgr.es/m/239a8955-c0fc-f506-026d-c837e86c827b@postgrespro.ru
1 parent ec7ce54 commit 7c70996

File tree

5 files changed

+150
-40
lines changed
  • optimizer/plan
  • include/nodes
  • test/regress
  • 5 files changed

    +150
    -40
    lines changed

    src/backend/executor/nodeBitmapHeapscan.c

    Lines changed: 126 additions & 39 deletions
    Original file line numberDiff line numberDiff line change
    @@ -39,6 +39,7 @@
    3939

    4040
    #include "access/relscan.h"
    4141
    #include "access/transam.h"
    42+
    #include "access/visibilitymap.h"
    4243
    #include "executor/execdebug.h"
    4344
    #include "executor/nodeBitmapHeapscan.h"
    4445
    #include "miscadmin.h"
    @@ -225,9 +226,31 @@ BitmapHeapNext(BitmapHeapScanState *node)
    225226
    }
    226227

    227228
    /*
    228-
    * Fetch the current heap page and identify candidate tuples.
    229+
    * We can skip fetching the heap page if we don't need any fields
    230+
    * from the heap, and the bitmap entries don't need rechecking,
    231+
    * and all tuples on the page are visible to our transaction.
    229232
    */
    230-
    bitgetpage(scan, tbmres);
    233+
    node->skip_fetch = (node->can_skip_fetch &&
    234+
    !tbmres->recheck &&
    235+
    VM_ALL_VISIBLE(node->ss.ss_currentRelation,
    236+
    tbmres->blockno,
    237+
    &node->vmbuffer));
    238+
    239+
    if (node->skip_fetch)
    240+
    {
    241+
    /*
    242+
    * The number of tuples on this page is put into
    243+
    * scan->rs_ntuples; note we don't fill scan->rs_vistuples.
    244+
    */
    245+
    scan->rs_ntuples = tbmres->ntuples;
    246+
    }
    247+
    else
    248+
    {
    249+
    /*
    250+
    * Fetch the current heap page and identify candidate tuples.
    251+
    */
    252+
    bitgetpage(scan, tbmres);
    253+
    }
    231254

    232255
    if (tbmres->ntuples >= 0)
    233256
    node->exact_pages++;
    @@ -289,45 +312,55 @@ BitmapHeapNext(BitmapHeapScanState *node)
    289312
    */
    290313
    BitmapPrefetch(node, scan);
    291314

    292-
    /*
    293-
    * Okay to fetch the tuple
    294-
    */
    295-
    targoffset = scan->rs_vistuples[scan->rs_cindex];
    296-
    dp = (Page) BufferGetPage(scan->rs_cbuf);
    297-
    lp = PageGetItemId(dp, targoffset);
    298-
    Assert(ItemIdIsNormal(lp));
    315+
    if (node->skip_fetch)
    316+
    {
    317+
    /*
    318+
    * If we don't have to fetch the tuple, just return nulls.
    319+
    */
    320+
    ExecStoreAllNullTuple(slot);
    321+
    }
    322+
    else
    323+
    {
    324+
    /*
    325+
    * Okay to fetch the tuple.
    326+
    */
    327+
    targoffset = scan->rs_vistuples[scan->rs_cindex];
    328+
    dp = (Page) BufferGetPage(scan->rs_cbuf);
    329+
    lp = PageGetItemId(dp, targoffset);
    330+
    Assert(ItemIdIsNormal(lp));
    299331

    300-
    scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
    301-
    scan->rs_ctup.t_len = ItemIdGetLength(lp);
    302-
    scan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
    303-
    ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset);
    332+
    scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
    333+
    scan->rs_ctup.t_len = ItemIdGetLength(lp);
    334+
    scan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
    335+
    ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset);
    304336

    305-
    pgstat_count_heap_fetch(scan->rs_rd);
    337+
    pgstat_count_heap_fetch(scan->rs_rd);
    306338

    307-
    /*
    308-
    * Set up the result slot to point to this tuple. Note that the slot
    309-
    * acquires a pin on the buffer.
    310-
    */
    311-
    ExecStoreTuple(&scan->rs_ctup,
    312-
    slot,
    313-
    scan->rs_cbuf,
    314-
    false);
    315-
    316-
    /*
    317-
    * If we are using lossy info, we have to recheck the qual conditions
    318-
    * at every tuple.
    319-
    */
    320-
    if (tbmres->recheck)
    321-
    {
    322-
    econtext->ecxt_scantuple = slot;
    323-
    ResetExprContext(econtext);
    339+
    /*
    340+
    * Set up the result slot to point to this tuple. Note that the
    341+
    * slot acquires a pin on the buffer.
    342+
    */
    343+
    ExecStoreTuple(&scan->rs_ctup,
    344+
    slot,
    345+
    scan->rs_cbuf,
    346+
    false);
    324347

    325-
    if (!ExecQual(node->bitmapqualorig, econtext))
    348+
    /*
    349+
    * If we are using lossy info, we have to recheck the qual
    350+
    * conditions at every tuple.
    351+
    */
    352+
    if (tbmres->recheck)
    326353
    {
    327-
    /* Fails recheck, so drop it and loop back for another */
    328-
    InstrCountFiltered2(node, 1);
    329-
    ExecClearTuple(slot);
    330-
    continue;
    354+
    econtext->ecxt_scantuple = slot;
    355+
    ResetExprContext(econtext);
    356+
    357+
    if (!ExecQual(node->bitmapqualorig, econtext))
    358+
    {
    359+
    /* Fails recheck, so drop it and loop back for another */
    360+
    InstrCountFiltered2(node, 1);
    361+
    ExecClearTuple(slot);
    362+
    continue;
    363+
    }
    331364
    }
    332365
    }
    333366

    @@ -582,6 +615,7 @@ BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan)
    582615
    while (node->prefetch_pages < node->prefetch_target)
    583616
    {
    584617
    TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
    618+
    bool skip_fetch;
    585619

    586620
    if (tbmpre == NULL)
    587621
    {
    @@ -591,7 +625,26 @@ BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan)
    591625
    break;
    592626
    }
    593627
    node->prefetch_pages++;
    594-
    PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
    628+
    629+
    /*
    630+
    * If we expect not to have to actually read this heap page,
    631+
    * skip this prefetch call, but continue to run the prefetch
    632+
    * logic normally. (Would it be better not to increment
    633+
    * prefetch_pages?)
    634+
    *
    635+
    * This depends on the assumption that the index AM will
    636+
    * report the same recheck flag for this future heap page as
    637+
    * it did for the current heap page; which is not a certainty
    638+
    * but is true in many cases.
    639+
    */
    640+
    skip_fetch = (node->can_skip_fetch &&
    641+
    (node->tbmres ? !node->tbmres->recheck : false) &&
    642+
    VM_ALL_VISIBLE(node->ss.ss_currentRelation,
    643+
    tbmpre->blockno,
    644+
    &node->pvmbuffer));
    645+
    646+
    if (!skip_fetch)
    647+
    PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
    595648
    }
    596649
    }
    597650

    @@ -608,6 +661,7 @@ BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan)
    608661
    {
    609662
    TBMIterateResult *tbmpre;
    610663
    bool do_prefetch = false;
    664+
    bool skip_fetch;
    611665

    612666
    /*
    613667
    * Recheck under the mutex. If some other process has already
    @@ -633,7 +687,15 @@ BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan)
    633687
    break;
    634688
    }
    635689

    636-
    PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
    690+
    /* As above, skip prefetch if we expect not to need page */
    691+
    skip_fetch = (node->can_skip_fetch &&
    692+
    (node->tbmres ? !node->tbmres->recheck : false) &&
    693+
    VM_ALL_VISIBLE(node->ss.ss_currentRelation,
    694+
    tbmpre->blockno,
    695+
    &node->pvmbuffer));
    696+
    697+
    if (!skip_fetch)
    698+
    PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
    637699
    }
    638700
    }
    639701
    }
    @@ -687,6 +749,7 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
    687749
    /* rescan to release any page pin */
    688750
    heap_rescan(node->ss.ss_currentScanDesc, NULL);
    689751

    752+
    /* release bitmaps and buffers if any */
    690753
    if (node->tbmiterator)
    691754
    tbm_end_iterate(node->tbmiterator);
    < 10000 /td>
    692755
    if (node->prefetch_iterator)
    @@ -697,13 +760,19 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
    697760
    tbm_end_shared_iterate(node->shared_prefetch_iterator);
    698761
    if (node->tbm)
    699762
    tbm_free(node->tbm);
    763+
    if (node->vmbuffer != InvalidBuffer)
    764+
    ReleaseBuffer(node->vmbuffer);
    765+
    if (node->pvmbuffer != InvalidBuffer)
    766+
    ReleaseBuffer(node->pvmbuffer);
    700767
    node->tbm = NULL;
    701768
    node->tbmiterator = NULL;
    702769
    node->tbmres = NULL;
    703770
    node->prefetch_iterator = NULL;
    704771
    node->initialized = false;
    705772
    node->shared_tbmiterator = NULL;
    706773
    node->shared_prefetch_iterator = NULL;
    774+
    node->vmbuffer = InvalidBuffer;
    775+
    node->pvmbuffer = InvalidBuffer;
    707776

    708777
    ExecScanReScan(&node->ss);
    709778

    @@ -748,7 +817,7 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
    748817
    ExecEndNode(outerPlanState(node));
    749818

    750819
    /*
    751-
    * release bitmap if any
    820+
    * release bitmaps and buffers if any
    752821
    */
    753822
    if (node->tbmiterator)
    754823
    tbm_end_iterate(node->tbmiterator);
    @@ -760,6 +829,10 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
    760829
    tbm_end_shared_iterate(node->shared_tbmiterator);
    761830
    if (node->shared_prefetch_iterator)
    762831
    tbm_end_shared_iterate(node->shared_prefetch_iterator);
    832+
    if (node->vmbuffer != InvalidBuffer)
    833+
    ReleaseBuffer(node->vmbuffer);
    834+
    if (node->pvmbuffer != InvalidBuffer)
    835+
    ReleaseBuffer(node->pvmbuffer);
    763836

    764837
    /*
    765838
    * close heap scan
    @@ -805,6 +878,9 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
    805878
    scanstate->tbm = NULL;
    806879
    scanstate->tbmiterator = NULL;
    807880
    scanstate->tbmres = NULL;
    881+
    scanstate->skip_fetch = false;
    882+
    scanstate->vmbuffer = InvalidBuffer;
    883+
    scanstate->pvmbuffer = InvalidBuffer;
    808884
    scanstate->exact_pages = 0;
    809885
    scanstate->lossy_pages = 0;
    810886
    scanstate->prefetch_iterator = NULL;
    @@ -815,8 +891,19 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
    815891
    scanstate->pscan_len = 0;
    816892
    scanstate->initialized = false;
    817893
    scanstate->shared_tbmiterator = NULL;
    894+
    scanstate->shared_prefetch_iterator = NULL;
    818895
    scanstate->pstate = NULL;
    819896

    897+
    /*
    898+
    * We can potentially skip fetching heap pages if we do not need any
    899+
    * columns of the table, either for checking non-indexable quals or for
    900+
    * returning data. This test is a bit simplistic, as it checks the
    901+
    * stronger condition that there's no qual or return tlist at all. But in
    902+
    * most cases it's probably not worth working harder than that.
    903+
    */
    904+
    scanstate->can_skip_fetch = (node->scan.plan.qual == NIL &&
    905+
    node->scan.plan.targetlist == NIL);
    906+
    820907
    /*
    821908
    * Miscellaneous initialization
    822909
    *

    src/backend/optimizer/plan/createplan.c

    Lines changed: 9 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -807,6 +807,15 @@ use_physical_tlist(PlannerInfo *root, Path *path, int flags)
    807807
    if (IsA(path, CustomPath))
    808808
    return false;
    809809

    810+
    /*
    811+
    * If a bitmap scan's tlist is empty, keep it as-is. This may allow the
    812+
    * executor to skip heap page fetches, and in any case, the benefit of
    813+
    * using a physical tlist instead would be minimal.
    814+
    */
    815+
    if (IsA(path, BitmapHeapPath) &&
    816+
    path->pathtarget->exprs == NIL)
    817+
    return false;
    818+
    810819
    /*
    811820
    * Can't do it if any system columns or whole-row Vars are requested.
    812821
    * (This could possibly be fixed but would take some fragile assumptions

    src/include/nodes/execnodes.h

    Lines changed: 9 additions & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -507,7 +507,7 @@ typedef struct EState
    507507
    bool *es_epqTupleSet; /* true if EPQ tuple is provided */
    508508
    bool *es_epqScanDone; /* true if EPQ tuple has been fetched */
    509509

    510-
    bool es_use_parallel_mode; /* can we use parallel workers? */
    510+
    bool es_use_parallel_mode; /* can we use parallel workers? */
    511511

    512512
    /* The per-query shared memory area to use for parallel execution. */
    513513
    struct dsa_area *es_query_dsa;
    @@ -1331,6 +1331,10 @@ typedef struct ParallelBitmapHeapState
    13311331
    * tbm bitmap obtained from child index scan(s)
    13321332
    * tbmiterator iterator for scanning current pages
    13331333
    * tbmres current-page data
    1334+
    * can_skip_fetch can we potentially skip tuple fetches in this scan?
    1335+
    * skip_fetch are we skipping tuple fetches on this page?
    1336+
    * vmbuffer buffer for visibility-map lookups
    1337+
    * pvmbuffer ditto, for prefetched pages
    13341338
    * exact_pages total number of exact pages retrieved
    13351339
    * lossy_pages total number of lossy pages retrieved
    13361340
    * prefetch_iterator iterator for prefetching ahead of current page
    @@ -1351,6 +1355,10 @@ typedef struct BitmapHeapScanState
    13511355
    TIDBitmap *tbm;
    13521356
    TBMIterator *tbmiterator;
    13531357
    TBMIterateResult *tbmres;
    1358+
    bool can_skip_fetch;
    1359+
    bool skip_fetch;
    1360+
    Buffer vmbuffer;
    1361+
    Buffer pvmbuffer;
    13541362
    long exact_pages;
    13551363
    long lossy_pages;
    13561364
    TBMIterator *prefetch_iterator;

    src/test/regress/expected/stats.out

    Lines changed: 3 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -136,12 +136,15 @@ SELECT count(*) FROM tenk2;
    136136
    (1 row)
    137137

    138138
    -- do an indexscan
    139+
    -- make sure it is not a bitmap scan, which might skip fetching heap tuples
    140+
    SET enable_bitmapscan TO off;
    139141
    SELECT count(*) FROM tenk2 WHERE unique1 = 1;
    140142
    count
    141143
    -------
    142144
    1
    143145
    (1 row)
    144146

    147+
    RESET enable_bitmapscan;
    145148
    -- We can't just call wait_for_stats() at this point, because we only
    146149
    -- transmit stats when the session goes idle, and we probably didn't
    147150
    -- transmit the last couple of counts yet thanks to the rate-limiting logic

    src/test/regress/sql/stats.sql

    Lines changed: 3 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -138,7 +138,10 @@ ROLLBACK;
    138138
    -- do a seqscan
    139139
    SELECT count(*) FROM tenk2;
    140140
    -- do an indexscan
    141+
    -- make sure it is not a bitmap scan, which might skip fetching heap tuples
    142+
    SET enable_bitmapscan TO off;
    141143
    SELECT count(*) FROM tenk2 WHERE unique1 = 1;
    144+
    RESET enable_bitmapscan;
    142145

    143146
    -- We can't just call wait_for_stats() at this point, because we only
    144147
    -- transmit stats when the session goes idle, and we probably didn't

    0 commit comments

    Comments
     (0)
    0