8000 Assorted code review for recent ProcArrayLock patch. · postgrespro/postgres@4aec498 · GitHub
[go: up one dir, main page]

Skip to content
  • Commit 4aec498

    Browse files
    committed
    Assorted code review for recent ProcArrayLock patch.
    Post-commit review by Andres Freund discovered a couple of concurrency bugs in the original patch: specifically, if the leader cleared a follower's XID before it reached PGSemaphoreLock, the semaphore would be left in the wrong state; and if another process did PGSemaphoreUnlock for some unrelated reason, we might resume execution before the fact that our XID was cleared was globally visible. Also, improve the wording of some comments, rename nextClearXidElem to firstClearXidElem in PROC_HDR for clarity, and drop some volatile qualifiers that aren't necessary. Amit Kapila, reviewed and slightly revised by me.
    1 parent 1ea5ce5 commit 4aec498

    File tree

    3 files changed

    +34
    -15
    lines changed

    3 files changed

    +34
    -15
    lines changed

    src/backend/storage/ipc/procarray.c

    Lines changed: 29 additions & 12 deletions
    Original file line numberDiff line numberDiff line change
    @@ -470,10 +470,10 @@ ProcArrayEndTransactionInternal(PGPROC *proc, PGXACT *pgxact,
    470470
    * commit time, add ourselves to a list of processes that need their XIDs
    471471
    * cleared. The first process to add itself to the list will acquire
    472472
    * ProcArrayLock in exclusive mode and perform ProcArrayEndTransactionInternal
    473-
    * on behalf of all group members. This avoids a great deal of context
    474-
    * switching when many processes are trying to commit at once, since the lock
    475-
    * only needs to be handed from the last share-locker to one process waiting
    476-
    * for the exclusive lock, rather than to each one in turn.
    473+
    * on behalf of all group members. This avoids a great deal of contention
    474+
    * around ProcArrayLock when many processes are trying to commit at once,
    475+
    * since the lock need not be repeatedly handed off from one committing
    476+
    * process to the next.
    477477
    */ 8000
    478478
    static void
    479479
    ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid)
    @@ -487,28 +487,39 @@ ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid)
    487487
    Assert(TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
    488488

    489489
    /* Add ourselves to the list of processes needing a group XID clear. */
    490+
    proc->clearXid = true;
    490491
    proc->backendLatestXid = latestXid;
    491492
    while (true)
    492493
    {
    493-
    nextidx = pg_atomic_read_u32(&procglobal->nextClearXidElem);
    494+
    nextidx = pg_atomic_read_u32(&procglobal->firstClearXidElem);
    494495
    pg_atomic_write_u32(&proc->nextClearXidElem, nextidx);
    495496

    496-
    if (pg_atomic_compare_exchange_u32(&procglobal->nextClearXidElem,
    497+
    if (pg_atomic_compare_exchange_u32(&procglobal->firstClearXidElem,
    497498
    &nextidx,
    498499
    (uint32) proc->pgprocno))
    499500
    break;
    500501
    }
    501502

    502-
    /* If the list was not empty, the leader will clear our XID. */
    503+
    /*
    504+
    * If the list was not empty, the leader will clear our XID. It is
    505+
    * impossible to have followers without a leader because the first process
    506+
    * that has added itself to the list will always have nextidx as
    507+
    * INVALID_PGPROCNO.
    508+
    */
    503509
    if (nextidx != INVALID_PGPROCNO)
    504510
    {
    505511
    /* Sleep until the leader clears our XID. */
    506-
    while (pg_atomic_read_u32(&proc->nextClearXidElem) != INVALID_PGPROCNO)
    512+
    for (;;)
    507513
    {
    508-
    extraWaits++;
    514+
    /* acts as a read barrier */
    509515
    PGSemaphoreLock(&proc->sem);
    516+
    if (!proc->clearXid)
    517+
    break;
    518+
    extraWaits++;
    510519
    }
    511520

    521+
    Assert(pg_atomic_read_u32(&proc->nextClearXidElem) == INVALID_PGPROCNO);
    522+
    512523
    /* Fix semaphore count for any absorbed wakeups */
    513524
    while (extraWaits-- > 0)
    514525
    PGSemaphoreUnlock(&proc->sem);
    @@ -520,12 +531,13 @@ ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid)
    520531

    521532
    /*
    522533
    * Now that we've got the lock, clear the list of processes waiting for
    523-
    * group XID clearing, saving a pointer to the head of the list.
    534+
    * group XID clearing, saving a pointer to the head of the list. Trying
    535+
    * to pop elements one at a time could lead to an ABA problem.
    524536
    */
    525537
    while (true)
    526538
    {
    527-
    nextidx = pg_atomic_read_u32(&procglobal->nextClearXidElem);
    528-
    if (pg_atomic_compare_exchange_u32(&procglobal->nextClearXidElem,
    539+
    nextidx = pg_atomic_read_u32(&procglobal->firstClearXidElem);
    540+
    if (pg_atomic_compare_exchange_u32(&procglobal->firstClearXidElem,
    529541
    &nextidx,
    530542
    INVALID_PGPROCNO))
    531543
    break;
    @@ -563,6 +575,11 @@ ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid)
    563575
    wakeidx = pg_atomic_read_u32(&proc->nextClearXidElem);
    564576
    pg_atomic_write_u32(&proc->nextClearXidElem, INVALID_PGPROCNO);
    565577

    8000 578+
    /* ensure all previous writes are visible before follower continues. */
    579+
    pg_write_barrier();
    580+
    581+
    proc->clearXid = false;
    582+
    566583
    if (proc != MyProc)
    567584
    PGSemaphoreUnlock(&proc->sem);
    568585
    }

    src/backend/storage/lmgr/proc.c

    Lines changed: 2 additions & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -181,7 +181,7 @@ InitProcGlobal(void)
    181181
    ProcGlobal->startupBufferPinWaitBufId = -1;
    182182
    ProcGlobal->walwriterLatch = NULL;
    183183
    ProcGlobal->checkpointerLatch = NULL;
    184-
    pg_atomic_init_u32(&ProcGlobal->nextClearXidElem, INVALID_PGPROCNO);
    184+
    pg_atomic_init_u32(&ProcGlobal->firstClearXidElem, INVALID_PGPROCNO);
    185185

    186186
    /*
    187187
    * Create and initialize all the PGPROC structures we'll need. There are
    @@ -395,6 +395,7 @@ InitProcess(void)
    395395
    SHMQueueElemInit(&(MyProc->syncRepLinks));
    396396

    397397
    /* Initialize fields for group XID clearing. */
    398+
    MyProc->clearXid = false;
    398399
    MyProc->backendLatestXid = InvalidTransactionId;
    399400
    pg_atomic_init_u32(&MyProc->nextClearXidElem, INVALID_PGPROCNO);
    400401

    src/include/storage/proc.h

    Lines changed: 3 additions & 2 deletions
    Original file line numberDiff line numberDiff line change
    @@ -142,7 +142,8 @@ struct PGPROC
    142142
    struct XidCache subxids; /* cache for subtransaction XIDs */
    143143

    144144
    /* Support for group XID clearing. */
    145-
    volatile pg_atomic_uint32 nextClearXidElem;
    145+
    bool clearXid;
    146+
    pg_atomic_uint32 nextClearXidElem;
    146147
    TransactionId backendLatestXid;
    147148

    148149
    /* Per-backend LWLock. Protects fields below. */
    @@ -207,7 +208,7 @@ typedef struct PROC_HDR
    207208
    /* Head of list of bgworker free PGPROC structures */
    208209
    PGPROC *bgworkerFreeProcs;
    209210
    /* First pgproc waiting for group XID clear */
    210-
    volatile pg_atomic_uint32 nextClearXidElem;
    211+
    pg_atomic_uint32 firstClearXidElem;
    211212
    /* WALWriter process's latch */
    212213
    Latch *walwriterLatch;
    213214
    /* Checkpointer process's latch */

    0 commit comments

    Comments
     (0)
    0