10000 Repair old performance bug in tuplesort.c/logtape.c. In the case where · postgrespro/postgres_cluster@8db05ba · GitHub
[go: up one dir, main page]

Skip to content

Commit 8db05ba

Browse files
committed
Repair old performance bug in tuplesort.c/logtape.c. In the case where
we are doing the final merge pass on-the-fly, and not writing the data back onto a 'tape', the number of free blocks in the tape set will become large, leading to a lot of time wasted in ltsReleaseBlock(). There is really no need to track the free blocks anymore in this state, so add a simple shutoff switch. Per report from Stefan Kaltenbrunner.
1 parent e6107da commit 8db05ba

File tree

3 files changed

+65
-17
lines changed
  • include/utils
  • 3 files changed

    +65
    -17
    lines changed

    src/backend/utils/sort/logtape.c

    Lines changed: 28 additions & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -64,7 +64,7 @@
    6464
    * Portions Copyright (c) 1994, Regents of the University of California
    6565
    *
    6666
    * IDENTIFICATION
    67-
    * $PostgreSQL: pgsql/src/backend/utils/sort/logtape.c,v 1.19 2006/03/05 15:58:49 momjian Exp $
    67+
    * $PostgreSQL: pgsql/src/backend/utils/sort/logtape.c,v 1.20 2006/03/07 19:06:49 tgl Exp $
    6868
    *
    6969
    *-------------------------------------------------------------------------
    7070
    */
    @@ -146,7 +146,12 @@ struct LogicalTapeSet
    146146
    * When there are no such blocks, we extend the underlying file. Note
    147147
    * that the block numbers in freeBlocks are always in *decreasing* order,
    148148
    * so that removing the last entry gives us the lowest free block.
    149+
    *
    150+
    * If forgetFreeSpace is true then any freed blocks are simply forgotten
    151+
    * rather than being remembered in freeBlocks[]. See notes for
    152+
    * LogicalTapeSetForgetFreeSpace().
    149153
    */
    154+
    bool forgetFreeSpace; /* are we remembering free blocks? */
    150155
    long *freeBlocks; /* resizable array */
    151156
    int nFreeBlocks; /* # of currently free blocks */
    152157
    int freeBlocksLen; /* current allocated length of freeBlocks[] */
    @@ -247,6 +252,12 @@ ltsReleaseBlock(LogicalTapeSet *lts, long blocknum)
    247252
    int ndx;
    248253
    long *ptr;
    249254

    255+
    /*
    256+
    * Do nothing if we're no longer interested in remembering free space.
    257+
    */
    258+
    if (lts->forgetFreeSpace)
    259+
    return;
    260+
    250261
    /*
    251262
    * Enlarge freeBlocks array if full.
    252263
    */
    @@ -491,6 +502,7 @@ LogicalTapeSetCreate(int ntapes)
    491502
    (ntapes - 1) * sizeof(LogicalTape));
    492503
    lts->pfile = BufFileCreateTemp(false);
    493504
    lts->nFileBlocks = 0L;
    505+
    lts->forgetFreeSpace = false;
    494506
    lts->freeBlocksLen = 32; /* reasonable initial guess */
    495507
    lts->freeBlocks = (long *) palloc(lts->freeBlocksLen * sizeof(long));
    496508
    lts->nFreeBlocks = 0;
    @@ -546,6 +558,21 @@ LogicalTapeSetClose(LogicalTapeSet *lts)
    546558
    pfree(lts);
    547559
    }
    548560

    561+
    /*
    562+
    * Mark a logical tape set as not needing management of free space anymore.
    563+
    *
    564+
    * This should be called if the caller does not intend to write any more data
    565+
    * into the tape set, but is reading from un-frozen tapes. Since no more
    566+
    * writes are planned, remembering free blocks is no longer useful. Setting
    567+
    * this flag lets us avoid wasting time and space in ltsReleaseBlock(), which
    568+
    * is not designed to handle large numbers of free blocks.
    569+
    */
    570+
    void
    571+
    LogicalTapeSetForgetFreeSpace(LogicalTapeSet *lts)
    572+
    {
    573+
    lts->forgetFreeSpace = true;
    574+
    }
    575+
    549576
    /*
    550577
    * Dump the dirty buffer of a logical tape.
    551578
    */

    src/backend/utils/sort/tuplesort.c

    Lines changed: 35 additions & 15 deletions
    Original file line numberDiff line numberDiff line change
    @@ -91,7 +91,7 @@
    9191
    * Portions Copyright (c) 1994, Regents of the University of California
    9292
    *
    9393
    * IDENTIFICATION
    94-
    * $PostgreSQL: pgsql/src/backend/utils/sort/tuplesort.c,v 1.62 2006/03/05 15:58:49 momjian Exp $
    94+
    * $PostgreSQL: pgsql/src/backend/utils/sort/tuplesort.c,v 1.63 2006/03/07 19:06:50 tgl Exp $
    9595
    *
    9696
    *-------------------------------------------------------------------------
    9797
    */
    @@ -1384,7 +1384,8 @@ mergeruns(Tuplesortstate *state)
    13841384
    /*
    13851385
    * If we produced only one initial run (quite likely if the total data
    13861386
    * volume is between 1X and 2X workMem), we can just use that tape as the
    1387-
    * finished output, rather than doing a useless merge.
    1387+
    * finished output, rather than doing a useless merge. (This obvious
    1388+
    * optimization is not in Knuth's algorithm.)
    13881389
    */
    13891390
    if (state->currentRun == 1)
    13901391
    {
    @@ -1401,33 +1402,51 @@ mergeruns(Tuplesortstate *state)
    14011402

    14021403
    for (;;)
    14031404
    {
    1404-
    /* Step D5: merge runs onto tape[T] until tape[P] is empty */
    1405-
    while (state->tp_runs[state->tapeRange - 1] ||
    1406-
    state->tp_dummy[state->tapeRange - 1])
    1405+
    /*
    1406+
    * At this point we know that tape[T] is empty. If there's just one
    1407+
    * (real or dummy) run left on each input tape, then only one merge
    1408+
    * pass remains. If we don't have to produce a materialized sorted
    1409+
    * tape, we can stop at this point and do the final merge on-the-fly.
    1410+
    */
    1411+
    if (!state->randomAccess)
    14071412
    {
    1408-
    bool allDummy = true;
    14091413
    bool allOneRun = true;
    14101414

    1415+
    Assert(state->tp_runs[state->tapeRange] == 0);
    14111416
    for (tapenum = 0; tapenum < state->tapeRange; tapenum++)
    14121417
    {
    1413-
    if (state->tp_dummy[tapenum] == 0)
    1414-
    allDummy = false;
    14151418
    if (state->tp_runs[tapenum] + state->tp_dummy[tapenum] != 1)
    1419+
    {
    14161420
    allOneRun = false;
    1421+
    break;
    1422+
    }
    14171423
    }
    1418-
    1419-
    /*
    1420-
    * If we don't have to produce a materialized sorted tape, quit as
    1421-
    * soon as we're down to one real/dummy run per tape.
    1422-
    */
    1423-
    if (!state->randomAccess && allOneRun)
    1424+
    if (allOneRun)
    14241425
    {
    1425-
    Assert(!allDummy);
    1426+
    /* Tell logtape.c we won't be writing anymore */
    1427+
    LogicalTapeSetForgetFreeSpace(state->tapeset);
    14261428
    /* Initialize for the final merge pass */
    14271429
    beginmerge(state);
    14281430
    state->status = TSS_FINALMERGE;
    14291431
    return;
    14301432
    }
    1433+
    }
    1434+
    1435+
    /* Step D5: merge runs onto tape[T] until tape[P] is empty */
    1436+
    while (state->tp_runs[state->tapeRange - 1] ||
    1437+
    state->tp_dummy[state->tapeRange - 1])
    1438+
    {
    1439+
    bool allDummy = true;
    1440+
    1441+
    for (tapenum = 0; tapenum < state->tapeRange; tapenum++)
    1442+
    {
    1443+
    if (state->tp_dummy[tapenum] == 0)
    1444+
    {
    1445+
    allDummy = false;
    1446+
    break;
    1447+
    }
    1448+
    }
    1449+
    14311450
    if (allDummy)
    14321451
    {
    14331452
    state->tp_dummy[state->tapeRange]++;
    @@ -1437,6 +1456,7 @@ mergeruns(Tuplesortstate *state)
    14371456
    else
    14381457
    mergeonerun(state);
    14391458
    }
    1459+
    14401460
    /* Step D6: decrease level */
    14411461
    if (--state->Level == 0)
    14421462
    break;

    src/include/utils/logtape.h

    Lines changed: 2 additions & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -8,7 +8,7 @@
    88
    * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
    99
    * Portions Copyright (c) 1994, Regents of the University of California
    1010
    *
    11-
    * $PostgreSQL: pgsql/src/include/utils/logtape.h,v 1.14 2006/03/05 15:59:07 momjian Exp $
    11+
    * $PostgreSQL: pgsql/src/include/utils/logtape.h,v 1.15 2006/03/07 19:06:50 tgl Exp $
    1212
    *
    1313
    *-------------------------------------------------------------------------
    1414
    */
    @@ -26,6 +26,7 @@ typedef struct LogicalTapeSet LogicalTapeSet;
    2626

    2727
    extern LogicalTapeSet *LogicalTapeSetCreate(int ntapes);
    2828
    extern void LogicalTapeSetClose(LogicalTapeSet *lts);
    29+
    extern void LogicalTapeSetForgetFreeSpace(LogicalTapeSet *lts);
    2930
    extern size_t LogicalTapeRead(LogicalTapeSet *lts, int tapenum,
    3031
    void *ptr, size_t size);
    3132
    extern void LogicalTapeWrite(LogicalTapeSet *lts, int tapenum,

    0 commit comments

    Comments
     (0)
    0