8000 Simplify and improve ProcessStandbyHSFeedbackMessage logic. · jaylevitt/postgres@d1d094e · GitHub
[go: up one dir, main page]

Skip to content

Commit d1d094e

Browse files
committed
Simplify and improve ProcessStandbyHSFeedbackMessage logic.
There's no need to clamp the standby's xmin to be greater than GetOldestXmin's result; if there were any such need this logic would be hopelessly inadequate anyway, because it fails to account for within-database versus cluster-wide values of GetOldestXmin. So get rid of that, and just rely on sanity-checking that the xmin is not wrapped around relative to the nextXid counter. Also, don't reset the walsender's xmin if the current feedback xmin is indeed out of range; that just creates more problems than we already had. Lastly, don't bother to take the ProcArrayLock; there's no need to do that to set xmin. Also improve the comments about this in GetOldestXmin itself.
1 parent 790fa1f commit d1d094e

File tree

2 files changed

+79
-76
lines changed

2 files changed

+79
-76
lines changed

src/backend/replication/walsender.c

Lines changed: 48 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -611,76 +611,67 @@ static void
611611
ProcessStandbyHSFeedbackMessage(void)
612612
{
613613
StandbyHSFeedbackMessage reply;
614-
TransactionId newxmin = InvalidTransactionId;
614+
TransactionId nextXid;
615+
uint32 nextEpoch;
615616

616-
pq_copymsgbytes(&reply_message, (char *) &reply, sizeof(StandbyHSFeedbackMessage));
617+
/* Decipher the reply message */
618+
pq_copymsgbytes(&reply_message, (char *) &reply,
619+
sizeof(StandbyHSFeedbackMessage));
617620

618621
elog(DEBUG2, "hot standby feedback xmin %u epoch %u",
619622
reply.xmin,
620623
reply.epoch);
621624

625+
/* Ignore invalid xmin (can't actually happen with current walreceiver) */
626+
if (!TransactionIdIsNormal(reply.xmin))
627+
return;
628+
622629
/*
623-
* Update the WalSender's proc xmin to allow it to be visible to
624-
* snapshots. This will hold back the removal of dead rows and thereby
625-
* prevent the generation of cleanup conflicts on the standby server.
630+
* Check that the provided xmin/epoch are sane, that is, not in the future
631+
* and not so far back as to be already wrapped around. Ignore if not.
632+
*
633+
* Epoch of nextXid should be same as standby, or if the counter has
634+
* wrapped, then one greater than standby.
626635
*/
627-
if (TransactionIdIsValid(reply.xmin))
628-
{
629-
TransactionId nextXid;
630-
uint32 nextEpoch;
631-
bool epochOK = false;
632-
633-
GetNextXidAndEpoch(&nextXid, &nextEpoch);
634-
635-
/*
636-
* Epoch of oldestXmin should be same as standby or if the counter has
637-
* wrapped, then one less than reply.
638-
*/
639-
if (reply.xmin <= nextXid)
640-
{
641-
if (reply.epoch == nextEpoch)
642-
epochOK = true;
643-
}
644-
else
645-
{
646-
if (nextEpoch > 0 && reply.epoch == nextEpoch - 1)
647-
epochOK = true;
648-
}
649-
650-
/*
651-
* Feedback from standby must not go backwards, nor should it go
652-
* forwards further than our most recent xid.
653-
*/
654-
if (epochOK && TransactionIdPrecedesOrEquals(reply.xmin, nextXid))
655-
{
656-
if (!TransactionIdIsValid(MyProc->xmin))
657-
{
658-
TransactionId oldestXmin = GetOldestXmin(true, true);
636+
GetNextXidAndEpoch(&nextXid, &nextEpoch);
659637

660-
if (TransactionIdPrecedes(oldestXmin, reply.xmin))
661-
newxmin = reply.xmin;
662-
else
663-
newxmin = oldestXmin;
664-
}
665-
else
666-
{
667-
if (TransactionIdPrecedes(MyProc->xmin, reply.xmin))
668-
newxmin = reply.xmin;
669-
else
670-
newxmin = MyProc->xmin; /* stay the same */
671-
}
672-
}
638+
if (reply.xmin <= nextXid)
639+
{
640+
if (reply.epoch != nextEpoch)
641+
return;
673642
}
643+
else
644+
{
645+
if (reply.epoch + 1 != nextEpoch)
646+
return;
647+
}
648+
649+
if (!TransactionIdPrecedesOrEquals(reply.xmin, nextXid))
650+
return; /* epoch OK, but it's wrapped around */
674651

675652
/*
676-
* Grab the ProcArrayLock to set xmin, or invalidate for bad reply
653+
* Set the WalSender's xmin equal to the standby's requested xmin, so that
654+
* the xmin will be taken into account by GetOldestXmin. This will hold
655+
* back the removal of dead rows and thereby prevent the generation of
656+
* cleanup conflicts on the standby server.
657+
*
658+
* There is a small window for a race condition here: although we just
659+
* checked that reply.xmin precedes nextXid, the nextXid could have gotten
660+
* advanced between our fetching it and applying the xmin below, perhaps
661+
* far enough to make reply.xmin wrap around. In that case the xmin we
662+
* set here would be "in the future" and have no effect. No point in
663+
* worrying about this since it's too late to save the desired data
664+
* anyway. Assuming that the standby sends us an increasing sequence of
665+
* xmins, this could only happen during the first reply cycle, else our
666+
* own xmin would prevent nextXid from advancing so far.
667+
*
668+
* We don't bother taking the ProcArrayLock here. Setting the xmin field
669+
* is assumed atomic, and there's no real need to prevent a concurrent
670+
* GetOldestXmin. (If we're moving our xmin forward, this is obviously
671+
* safe, and if we're moving it backwards, well, the data is at risk
672+
* already since a VACUUM could have just finished calling GetOldestXmin.)
677673
*/
678-
if (MyProc->xmin != newxmin)
679-
{
680-
LWLockAcquire(ProcArrayLock, LW_SHARED);
681-
MyProc->xmin = newxmin;
682-
LWLockRelease(ProcArrayLock);
683-
}
674+
MyProc->xmin = reply.xmin;
684675
}
685676

686677
/* Main loop of walsender process */

src/backend/storage/ipc/procarray.c

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -998,22 +998,32 @@ TransactionIdIsActive(TransactionId xid)
998998
* This is also used to determine where to truncate pg_subtrans. allDbs
999999
* must be TRUE for that case, and ignoreVacuum FALSE.
10001000
*
1001-
* Note: it's possible for the calculated value to move backwards on repeated
1002-
* calls. The calculated value is conservative, so that anything older is
1003-
* definitely not considered as running by anyone anymore, but the exact
1004-
* value calculated depends on a number of things. For example, if allDbs is
1005-
* TRUE and there are no transactions running in the current database,
1006-
* GetOldestXmin() returns latestCompletedXid. If a transaction begins after
1007-
* that, its xmin will include in-progress transactions in other databases
1008-
* that started earlier, so another call will return an lower value. The
1009- BE2E
* return value is also adjusted with vacuum_defer_cleanup_age, so increasing
1010-
* that setting on the fly is an easy way to have GetOldestXmin() move
1011-
* backwards.
1012-
*
10131001
* Note: we include all currently running xids in the set of considered xids.
10141002
* This ensures that if a just-started xact has not yet set its snapshot,
10151003
* when it does set the snapshot it cannot set xmin less than what we compute.
10161004
* See notes in src/backend/access/transam/README.
1005+
*
1006+
* Note: despite the above, it's possible for the calculated value to move
1007+
* backwards on repeated calls. The calculated value is conservative, so that
1008+
* anything older is definitely not considered as running by anyone anymore,
1009+
* but the exact value calculated depends on a number of things. For example,
1010+
* if allDbs is FALSE and there are no transactions running in the current
1011+
* database, GetOldestXmin() returns latestCompletedXid. If a transaction
1012+
* begins after that, its xmin will include in-progress transactions in other
1013+
* databases that started earlier, so another call will return a lower value.
1014+
* Nonetheless it is safe to vacuum a table in the current database with the
1015+
* first result. There are also replication-related effects: a walsender
1016+
* process can set its xmin based on transactions that are no longer running
1017+
* in the master but are still being replayed on the standby, thus possibly
1018+
* making the GetOldestXmin reading go backwards. In this case there is a
1019+
* possibility that we lose data that the standby would like to have, but
1020+
* there is little we can do about that --- data is only protected if the
1021+
* walsender runs continuously while queries are executed on the standby.
1022+
* (The Hot Standby code deals with such cases by failing standby queries
1023+
* that needed to access already-removed data, so there's no integrity bug.)
1024+
* The return value is also adjusted with vacuum_defer_cleanup_age, so
1025+
* increasing that setting on the fly is another easy way to make
1026+
* GetOldestXmin() move backwards, with no consequences for data integrity.
10171027
*/
10181028
TransactionId
10191029
GetOldestXmin(bool allDbs, bool ignoreVacuum)
@@ -1046,7 +1056,7 @@ GetOldestXmin(bool allDbs, bool ignoreVacuum)
10461056

10471057
if (allDbs ||
10481058
proc->databaseId == MyDatabaseId ||
1049-
proc->databaseId == 0) /* include WalSender */
1059+
proc->databaseId == 0) /* always include WalSender */
10501060
{
10511061
/* Fetch xid just once - see GetNewTransactionId */
10521062
TransactionId xid = proc->xid;
@@ -1092,16 +1102,18 @@ GetOldestXmin(bool allDbs, bool ignoreVacuum)
10921102
LWLockRelease(ProcArrayLock);
10931103

10941104
/*
1095-
* Compute the cutoff XID, being careful not to generate a "permanent"
1096-
* XID. We need do this only on the primary, never on standby.
1105+
* Compute the cutoff XID by subtracting vacuum_defer_cleanup_age,
1106+
* being careful not to generate a "permanent" XID.
10971107
*
10981108
* vacuum_defer_cleanup_age provides some additional "slop" for the
10991109
* benefit of hot standby queries on slave servers. This is quick and
11001110
* dirty, and perhaps not all that useful unless the master has a
1101-
* predictable transaction rate, but it's what we've got. Note that
1102-
* we are assuming vacuum_defer_cleanup_age isn't large enough to
1103-
* cause wraparound --- so guc.c should limit it to no more than the
1104-
* xidStopLimit threshold in varsup.c.
1111+
* predictable transaction rate, but it offers some protection when
1112+
* there's no walsender connection. Note that we are assuming
1113+
* vacuum_defer_cleanup_age isn't large enough to cause wraparound ---
1114+
* so guc.c should limit it to no more than the xidStopLimit threshold
1115+
* in varsup.c. Also note that we intentionally don't apply
1116+
* vacuum_defer_cleanup_age on standby servers.
11051117
*/
11061118
result -= vacuum_defer_cleanup_age;
11071119
if (!TransactionIdIsNormal(result))

0 commit comments

Comments
 (0)
0