8000 If recovery.conf is created after "pg_ctl stop -m i", do crash recovery. · danielcode/postgres@ee8b95e · GitHub
[go: up one dir, main page]

Skip to content

Commit ee8b95e

Browse files
committed
If recovery.conf is created after "pg_ctl stop -m i", do crash recovery.
If you create a base backup using an atomic filesystem snapshot, and try to perform PITR starting from that base backup, or if you just kill a master server and create recovery.conf to put it into standby mode, we don't know how far we need to recover before reaching consistency. Normally in crash recovery, we replay all the WAL present in pg_xlog, and assume that we're consistent after that. And normally in archive recovery, minRecoveryPoint, backupEndRequired, or backupEndPoint is set in the control file, indicating how far we need to replay to reach consistency. But if the server was previously up and running normally, and you kill -9 it or take an atomic filesystem snapshot, none of those fields are set in the control file. The solution is to perform crash recovery first, replaying all the WAL in pg_xlog. After that's done, we assume that the system is consistent like in normal crash recovery, and switch to archive recovery mode after that. Per report from Kyotaro HORIGUCHI. In his scenario, recovery.conf was created after "pg_ctl stop -m i". I'm not sure we need to support that exact scenario, but we should support backing up using a filesystem snapshot, which looks identical. This issue goes back to at least 9.0, where hot standby was introduced and we started to track when consistency is reached. In 9.1 and 9.2, we would open up for hot standby too early, and queries could briefly see an inconsistent state. But 9.2 made it more visible, as we started to PANIC if we see a reference to a non-existing page during recovery, if we've already reached consistency. This is a fairly big patch, so back-patch to 9.2 only, where the issue is more visible. We can consider back-patching further after this has received some more testing in 9.2 and master.
1 parent 79f21b3 commit ee8b95e

File tree

1 file changed

+112
-23
lines changed
  • src/backend/access/transam

1 file changed

+112
-23
lines changed

src/backend/access/transam/xlog.c

Lines changed: 112 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,18 @@ static bool LocalHotStandbyActive = false;
188188
*/
189189
static int LocalXLogInsertAllowed = -1;
190190

191-
/* Are we recovering using offline XLOG archives? */
191+
/*
192+
* When ArchiveRecoveryRequested is set, archive recovery was requested,
193+
* ie. recovery.conf file was present. When InArchiveRecovery is set, we are
194+
* currently recovering using offline XLOG archives. These variables are only
195+
* valid in the startup process.
196+
*
197+
* When ArchiveRecoveryRequested is true, but InArchiveRecovery is false, we're
198+
* currently performing crash recovery using only XLOG files in pg_xlog, but
199+
* will switch to using offline XLOG archives as soon as we reach the end of
200+
* WAL in pg_xlog.
201+
*/
202+
static bool ArchiveRecoveryRequested = false;
192203
static bool InArchiveRecovery = false;
193204

194205
/* Was the last xlog file restored from archive, or local? */
@@ -206,10 +217,13 @@ static TimestampTz recoveryTargetTime;
206217
static char *recoveryTargetName;
207218

208219
/* options taken from recovery.conf for XLOG streaming */
209-
static bool StandbyMode = false;
220+
static bool StandbyModeRequested = false;
210221
static char *PrimaryConnInfo = NULL;
211222
static char *TriggerFile = NULL;
212223

224+
/* are we currently in standby mode? */
225+
bool StandbyMode = false;
226+
213227
/* if recoveryStopsHere returns true, it saves actual stop xid/time/name here */
214228
static TransactionId recoveryStopXid;
215229
static TimestampTz recoveryStopTime;
@@ -4236,6 +4250,43 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt)
42364250
readFile = -1;
42374251
}
42384252

4253+
/*
4254+
* If archive recovery was requested, but we were still doing crash
4255+
* recovery, switch to archive recovery and retry using the offline
4256+
* archive. We have now replayed all the valid WAL in pg_xlog, so
4257+
* we are presumably now consistent.
4258+
*
4259+
* We require that there's at least some valid WAL present in
4260+
* pg_xlog, however (!fetch_ckpt). We could recover using the WAL
4261+
* from the archive, even if pg_xlog is completely empty, but we'd
4262+
* have no idea how far we'd have to replay to reach consistency.
4263+
* So err on the safe side and give up.
4264+
*/
4265+
if (!InArchiveRecovery && ArchiveRecoveryRequested && !fetching_ckpt)
4266+
{
4267+
ereport(DEBUG1,
4268+
(errmsg_internal("reached end of WAL in pg_xlog, entering archive recovery")));
4269+
InArchiveRecovery = true;
4270+
if (StandbyModeRequested)
4271+
StandbyMode = true;
4272+
4273+
/* initialize minRecoveryPoint to this record */
4274+
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
4275+
ControlFile->state = DB_IN_ARCHIVE_RECOVERY;
4276+
if (XLByteLT(ControlFile->minRecoveryPoint, EndRecPtr))
4277+
ControlFile->minRecoveryPoint = EndRecPtr;
4278+
4279+
/* update local copy */
4280+
minRecoveryPoint = ControlFile->minRecoveryPoint;
4281+
4282+
UpdateControlFile();
4283+
LWLockRelease(ControlFileLock);
4284+
4285+
CheckRecoveryConsistency();
4286+
4287+
goto retry;
4288+
}
4289+
42394290
/* In standby-mode, keep trying */
42404291
if (StandbyMode)
42414292
goto retry;
@@ -5631,7 +5682,7 @@ readRecoveryCommandFile(void)
56315682
}
56325683
else if (strcmp(item->name, "standby_mode") == 0)
56335684
{
5634-
if (!parse_bool(item->value, &StandbyMode))
5685+
if (!parse_bool(item->value, &StandbyModeRequested))
56355686
ereport(ERROR,
56365687
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
56375688
errmsg("parameter \"%s\" requires a Boolean value",
@@ -5662,7 +5713,7 @@ readRecoveryCommandFile(void)
56625713
/*
56635714
* Check for compulsory parameters
56645715
*/
5665-
if (StandbyMode)
5716+
if (StandbyModeRequested)
56665717
{
56675718
if (PrimaryConnInfo == NULL && recoveryRestoreCommand == NULL)
56685719
ereport(WARNING,
@@ -5679,7 +5730,7 @@ readRecoveryCommandFile(void)
56795730
}
56805731

56815732
/* Enable fetching from archive recovery area */
5682-
InArchiveRecovery = true;
5733+
ArchiveRecoveryRequested = true;
56835734

56845735
/*
56855736
* If user specified recovery_target_timeline, validate it or compute the
@@ -5689,6 +5740,11 @@ readRecoveryCommandFile(void)
56895740
*/
56905741
if (rtliGiven)
56915742
{
5743+
/*
5744+
* Temporarily set InArchiveRecovery, so that existsTimeLineHistory
5745+
* or findNewestTimeLine below will check the archive.
5746+
*/
5747+
InArchiveRecovery = true;
56925748
if (rtli)
56935749
{
56945750
/* Timeline 1 does not have a history file, all else should */
@@ -5705,6 +5761,7 @@ readRecoveryCommandFile(void)
57055761
recoveryTargetTLI = findNewestTimeLine(recoveryTargetTLI);
57065762
recoveryTargetIsLatest = true;
57075763
}
5764+
InArchiveRecovery = false;
57085765
}
57095766

57105767
FreeConfigVariables(head);
@@ -6283,9 +6340,9 @@ StartupXLOG(void)
62836340
archiveCleanupCommand ? archiveCleanupCommand : "",
62846341
sizeof(XLogCtl->archiveCleanupCommand));
62856342

6286-
if (InArchiveRecovery)
6343+
if (ArchiveRecoveryRequested)
62876344
{
6288-
if (StandbyMode)
6345+
if (StandbyModeRequested)
62896346
ereport(LOG,
62906347
(errmsg("entering standby mode")));
62916348
else if (recoveryTarget == RECOVERY_TARGET_XID)
@@ -6309,12 +6366,21 @@ StartupXLOG(void)
63096366
* Take ownership of the wakeup latch if we're going to sleep during
63106367
* recovery.
63116368
*/
6312-
if (StandbyMode)
6369+
if (StandbyModeRequested)
63136370
OwnLatch(&XLogCtl->recoveryWakeupLatch);
63146371

63156372
if (read_backup_label(&checkPointLoc, &backupEndRequired,
63166373
&backupFromStandby))
63176374
{
6375+
/*
6376+
* Archive recovery was requested, and thanks to the backup label file,
6377+
* we know how far we need to replay to reach consistency. Enter
6378+
* archive recovery directly.
6379+
*/
6380+
InArchiveRecovery = true;
6381+
if (StandbyModeRequested)
6382+
StandbyMode = true;
6383+
63186384
/*
63196385
* When a backup_label file is present, we want to roll forward from
63206386
* the checkpoint it identifies, rather than using pg_control.
@@ -6355,6 +6421,33 @@ StartupXLOG(void)
63556421
}
63566422
else
63576423
{
6424+
/*
6425+
* It's possible that archive recovery was requested, but we don't
6426+
* know how far we need to replay the WAL before we reach consistency.
6427+
* This can happen for example if a base backup is taken from a running
6428+
* server using an atomic filesystem snapshot, without calling
6429+
* pg_start/stop_backup. Or if you just kill a running master server
6430+
* and put it into archive recovery by creating a recovery.conf file.
6431+
*
6432+
* Our strategy in that case is to perform crash recovery first,
6433+
* replaying all the WAL present in pg_xlog, and only enter archive
6434+
* recovery after that.
6435+
*
6436+
* But usually we already know how far we need to replay the WAL (up to
6437+
* minRecoveryPoint, up to backupEndPoint, or until we see an
6438+
* end-of-backup record), and we can enter archive recovery directly.
6439+
*/
6440+
if (ArchiveRecoveryRequested &&
6441+
(!XLByteEQ(ControlFile->minRecoveryPoint, InvalidXLogRecPtr) ||
6442+
ControlFile->backupEndRequired ||
6443+
!XLByteEQ(ControlFile->backupEndPoint, InvalidXLogRecPtr) ||
6444+
ControlFile->state == DB_SHUTDOWNED))
6445+
{
6446+
InArchiveRecovery = true;
6447+
if (StandbyModeRequested)
6448+
StandbyMode = true;
6449+
}
6450+
63586451
/*
63596452
* Get the last valid checkpoint record. If the latest one according
63606453
* to pg_control is broken, try the next-to-last one.
@@ -6454,7 +6547,7 @@ StartupXLOG(void)
64546547
}
64556548
else if (ControlFile->state != DB_SHUTDOWNED)
64566549
InRecovery = true;
6457-
else if (InArchiveRecovery)
6550+
else if (ArchiveRecoveryRequested)
64586551
{
64596552
/* force recovery due to presence of recovery.conf */
64606553
InRecovery = true;
@@ -6487,12 +6580,6 @@ StartupXLOG(void)
64876580
ControlFile->prevCheckPoint = ControlFile->checkPoint;
64886581
ControlFile->checkPoint = checkPointLoc;
64896582
ControlFile->checkPointCopy = checkPoint;
6490-
if (InArchiveRecovery)
6491-
{
6492-
/* initialize minRecoveryPoint if not set yet */
6493-
if (XLByteLT(ControlFile->minRecoveryPoint, checkPoint.redo))
6494-
ControlFile->minRecoveryPoint = checkPoint.redo;
6495-
}
64966583

64976584
/*
64986585
* Set backupStartPoint if we're starting recovery from a base backup.
@@ -6571,7 +6658,7 @@ StartupXLOG(void)
65716658
* control file and we've established a recovery snapshot from a
65726659
* running-xacts WAL record.
65736660
*/
6574-
if (InArchiveRecovery && EnableHotStandby)
6661+
if (ArchiveRecoveryRequested && EnableHotStandby)
65756662
{
65766663
TransactionId *xids;
65776664
int nxids;
@@ -6669,7 +6756,7 @@ StartupXLOG(void)
66696756
* process in addition to postmaster! Also, fsync requests are
66706757
* subsequently to be handled by the checkpointer, not locally.
66716758
*/
6672-
if (InArchiveRecovery && IsUnderPostmaster)
6759+
if (ArchiveRecoveryRequested && IsUnderPostmaster)
66736760
{
66746761
PublishStartupProcessInformation();
66756762
SetForwardFsyncRequests();
@@ -6873,7 +6960,7 @@ StartupXLOG(void)
68736960
* We don't need the latch anymore. It's not strictly necessary to disown
68746961
* it, but let's do it for the sake of tidiness.
68756962
*/
6876-
if (StandbyMode)
6963+
if (StandbyModeRequested)
68776964
DisownLatch(&XLogCtl->recoveryWakeupLatch);
68786965

68796966
/*
@@ -6918,7 +7005,7 @@ StartupXLOG(void)
69187005
* crashes while an online backup is in progress. We must not treat
69197006
* that as an error, or the database will refuse to start up.
69207007
*/
6921-
if (InArchiveRecovery || ControlFile->backupEndRequired)
7008+
if (ArchiveRecoveryRequested || ControlFile->backupEndRequired)
69227009
{
69237010
if (ControlFile->backupEndRequired)
69247011
ereport(FATAL,
@@ -6948,8 +7035,10 @@ StartupXLOG(void)
69487035
*
69497036
* In a normal crash recovery, we can just extend the timeline we were in.
69507037
*/
6951-
if (InArchiveRecovery)
7038+
if (ArchiveRecoveryRequested)
69527039
{
7040+
Assert(InArchiveRecovery);
7041+
69537042
ThisTimeLineID = findNewestTimeLine(recoveryTargetTLI) + 1;
69547043
ereport(LOG,
69557044
(errmsg("selected new timeline ID: %u", ThisTimeLineID)));
@@ -6966,7 +7055,7 @@ StartupXLOG(void)
69667055
* that we also have a copy of the last block of the old WAL in readBuf;
69677056
* we will use that below.)
69687057
*/
6969-
if (InArchiveRecovery)
7058+
if (ArchiveRecoveryRequested)
69707059
exitArchiveRecovery(curFileTLI, endLogId, endLogSeg);
69717060

69727061
/*
@@ -8799,7 +8888,7 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
87998888
* record, the backup was canceled and the end-of-backup record will
88008889
* never arrive.
88018890
*/
8802-
if (InArchiveRecovery &&
8891+
if (ArchiveRecoveryRequested &&
88038892
!XLogRecPtrIsInvalid(ControlFile->backupStartPoint) &&
88048893
XLogRecPtrIsInvalid(ControlFile->backupEndPoint))
88058894
ereport(PANIC,
@@ -10263,7 +10352,7 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
1026310352
* Request a restartpoint if we've replayed too much xlog since the
1026410353
* last one.
1026510354
*/
10266-
if (StandbyMode && bgwriterLaunched)
10355+
if (StandbyModeRequested && bgwriterLaunched)
1026710356
{
1026810357
if (XLogCheckpointNeeded(readId, readSeg))
1026910358
{

0 commit comments

Comments
 (0)
0