@@ -188,7 +188,18 @@ static bool LocalHotStandbyActive = false;
188
188
*/
189
189
static int LocalXLogInsertAllowed = -1 ;
190
190
191
- /* Are we recovering using offline XLOG archives? */
191
+ /*
192
+ * When ArchiveRecoveryRequested is set, archive recovery was requested,
193
+ * ie. recovery.conf file was present. When InArchiveRecovery is set, we are
194
+ * currently recovering using offline XLOG archives. These variables are only
195
+ * valid in the startup process.
196
+ *
197
+ * When ArchiveRecoveryRequested is true, but InArchiveRecovery is false, we're
198
+ * currently performing crash recovery using only XLOG files in pg_xlog, but
199
+ * will switch to using offline XLOG archives as soon as we reach the end of
200
+ * WAL in pg_xlog.
201
+ */
202
+ static bool ArchiveRecoveryRequested = false;
192
203
static bool InArchiveRecovery = false;
193
204
194
205
/* Was the last xlog file restored from archive, or local? */
@@ -206,10 +217,13 @@ static TimestampTz recoveryTargetTime;
206
217
static char * recoveryTargetName ;
207
218
208
219
/* options taken from recovery.conf for XLOG streaming */
209
- static bool StandbyMode = false;
220
+ static bool StandbyModeRequested = false;
210
221
static char * PrimaryConnInfo = NULL ;
211
222
static char * TriggerFile = NULL ;
212
223
224
+ /* are we currently in standby mode? */
225
+ bool StandbyMode = false;
226
+
213
227
/* if recoveryStopsHere returns true, it saves actual stop xid/time/name here */
214
228
static TransactionId recoveryStopXid ;
215
229
static TimestampTz recoveryStopTime ;
@@ -4236,6 +4250,43 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt)
4236
4250
readFile = -1 ;
4237
4251
}
4238
4252
4253
+ /*
4254
+ * If archive recovery was requested, but we were still doing crash
4255
+ * recovery, switch to archive recovery and retry using the offline
4256
+ * archive. We have now replayed all the valid WAL in pg_xlog, so
4257
+ * we are presumably now consistent.
4258
+ *
4259
+ * We require that there's at least some valid WAL present in
4260
+ * pg_xlog, however (!fetch_ckpt). We could recover using the WAL
4261
+ * from the archive, even if pg_xlog is completely empty, but we'd
4262
+ * have no idea how far we'd have to replay to reach consistency.
4263
+ * So err on the safe side and give up.
4264
+ */
4265
+ if (!InArchiveRecovery && ArchiveRecoveryRequested && !fetching_ckpt )
4266
+ {
4267
+ ereport (DEBUG1 ,
4268
+ (errmsg_internal ("reached end of WAL in pg_xlog, entering archive recovery" )));
4269
+ InArchiveRecovery = true;
4270
+ if (StandbyModeRequested )
4271
+ StandbyMode = true;
4272
+
4273
+ /* initialize minRecoveryPoint to this record */
4274
+ LWLockAcquire (ControlFileLock , LW_EXCLUSIVE );
4275
+ ControlFile -> state = DB_IN_ARCHIVE_RECOVERY ;
4276
+ if (XLByteLT (ControlFile -> minRecoveryPoint , EndRecPtr ))
4277
+ ControlFile -> minRecoveryPoint = EndRecPtr ;
4278
+
4279
+ /* update local copy */
4280
+ minRecoveryPoint = ControlFile -> minRecoveryPoint ;
4281
+
4282
+ UpdateControlFile ();
4283
+ LWLockRelease (ControlFileLock );
4284
+
4285
+ CheckRecoveryConsistency ();
4286
+
4287
+ goto retry ;
4288
+ }
4289
+
4239
4290
/* In standby-mode, keep trying */
4240
4291
if (StandbyMode )
4241
4292
goto retry ;
@@ -5631,7 +5682,7 @@ readRecoveryCommandFile(void)
5631
5682
}
5632
5683
else if (strcmp (item -> name , "standby_mode" ) == 0 )
5633
5684
{
5634
- if (!parse_bool (item -> value , & StandbyMode ))
5685
+ if (!parse_bool (item -> value , & StandbyModeRequested ))
5635
5686
ereport (ERROR ,
5636
5687
(errcode (ERRCODE_INVALID_PARAMETER_VALUE ),
5637
5688
errmsg ("parameter \"%s\" requires a Boolean value" ,
@@ -5662,7 +5713,7 @@ readRecoveryCommandFile(void)
5662
5713
/*
5663
5714
* Check for compulsory parameters
5664
5715
*/
5665
- if (StandbyMode )
5716
+ if (StandbyModeRequested )
5666
5717
{
5667
5718
if (PrimaryConnInfo == NULL && recoveryRestoreCommand == NULL )
5668
5719
ereport (WARNING ,
@@ -5679,7 +5730,7 @@ readRecoveryCommandFile(void)
5679
5730
}
5680
5731
5681
5732
/* Enable fetching from archive recovery area */
5682
- InArchiveRecovery = true;
5733
+ ArchiveRecoveryRequested = true;
5683
5734
5684
5735
/*
5685
5736
* If user specified recovery_target_timeline, validate it or compute the
@@ -5689,6 +5740,11 @@ readRecoveryCommandFile(void)
5689
5740
*/
5690
5741
if (rtliGiven )
5691
5742
{
5743
+ /*
5744
+ * Temporarily set InArchiveRecovery, so that existsTimeLineHistory
5745
+ * or findNewestTimeLine below will check the archive.
5746
+ */
5747
+ InArchiveRecovery = true;
5692
5748
if (rtli )
5693
5749
{
5694
5750
/* Timeline 1 does not have a history file, all else should */
@@ -5705,6 +5761,7 @@ readRecoveryCommandFile(void)
5705
5761
recoveryTargetTLI = findNewestTimeLine (recoveryTargetTLI );
5706
5762
recoveryTargetIsLatest = true;
5707
5763
}
5764
+ InArchiveRecovery = false;
5708
5765
}
5709
5766
5710
5767
FreeConfigVariables (head );
@@ -6283,9 +6340,9 @@ StartupXLOG(void)
6283
6340
archiveCleanupCommand ? archiveCleanupCommand : "" ,
6284
6341
sizeof (XLogCtl -> archiveCleanupCommand ));
6285
6342
6286
- if (InArchiveRecovery )
6343
+ if (ArchiveRecoveryRequested )
6287
6344
{
6288
- if (StandbyMode )
6345
+ if (StandbyModeRequested )
6289
6346
ereport (LOG ,
6290
6347
(errmsg ("entering standby mode" )));
6291
6348
else if (recoveryTarget == RECOVERY_TARGET_XID )
@@ -6309,12 +6366,21 @@ StartupXLOG(void)
6309
6366
* Take ownership of the wakeup latch if we're going to sleep during
6310
6367
* recovery.
6311
6368
*/
6312
- if (StandbyMode )
6369
+ if (StandbyModeRequested )
6313
6370
OwnLatch (& XLogCtl -> recoveryWakeupLatch );
6314
6371
6315
6372
if (read_backup_label (& checkPointLoc , & backupEndRequired ,
6316
6373
& backupFromStandby ))
6317
6374
{
6375
+ /*
6376
+ * Archive recovery was requested, and thanks to the backup label file,
6377
+ * we know how far we need to replay to reach consistency. Enter
6378
+ * archive recovery directly.
6379
+ */
6380
+ InArchiveRecovery = true;
6381
+ if (StandbyModeRequested )
6382
+ StandbyMode = true;
6383
+
6318
6384
/*
6319
6385
* When a backup_label file is present, we want to roll forward from
6320
6386
* the checkpoint it identifies, rather than using pg_control.
@@ -6355,6 +6421,33 @@ StartupXLOG(void)
6355
6421
}
6356
6422
else
6357
6423
{
6424
+ /*
6425
+ * It's possible that archive recovery was requested, but we don't
6426
+ * know how far we need to replay the WAL before we reach consistency.
6427
+ * This can happen for example if a base backup is taken from a running
6428
+ * server using an atomic filesystem snapshot, without calling
6429
+ * pg_start/stop_backup. Or if you just kill a running master server
6430
+ * and put it into archive recovery by creating a recovery.conf file.
6431
+ *
6432
+ * Our strategy in that case is to perform crash recovery first,
6433
+ * replaying all the WAL present in pg_xlog, and only enter archive
6434
+ * recovery after that.
6435
+ *
6436
+ * But usually we already know how far we need to replay the WAL (up to
6437
+ * minRecoveryPoint, up to backupEndPoint, or until we see an
6438
+ * end-of-backup record), and we can enter archive recovery directly.
6439
+ */
6440
+ if (ArchiveRecoveryRequested &&
6441
+ (!XLByteEQ (ControlFile -> minRecoveryPoint , InvalidXLogRecPtr ) ||
6442
+ ControlFile -> backupEndRequired ||
6443
+ !XLByteEQ (ControlFile -> backupEndPoint , InvalidXLogRecPtr ) ||
6444
+ ControlFile -> state == DB_SHUTDOWNED ))
6445
+ {
6446
+ InArchiveRecovery = true;
6447
+ if (StandbyModeRequested )
6448
+ StandbyMode = true;
6449
+ }
6450
+
6358
6451
/*
6359
6452
* Get the last valid checkpoint record. If the latest one according
6360
6453
* to pg_control is broken, try the next-to-last one.
@@ -6454,7 +6547,7 @@ StartupXLOG(void)
6454
6547
}
6455
6548
else if (ControlFile -> state != DB_SHUTDOWNED )
6456
6549
InRecovery = true;
6457
- else if (InArchiveRecovery )
6550
+ else if (ArchiveRecoveryRequested )
6458
6551
{
6459
6552
/* force recovery due to presence of recovery.conf */
6460
6553
InRecovery = true;
@@ -6487,12 +6580,6 @@ StartupXLOG(void)
6487
6580
ControlFile -> prevCheckPoint = ControlFile -> checkPoint ;
6488
6581
ControlFile -> checkPoint = checkPointLoc ;
6489
6582
ControlFile -> checkPointCopy = checkPoint ;
6490
- if (InArchiveRecovery )
6491
- {
6492
- /* initialize minRecoveryPoint if not set yet */
6493
- if (XLByteLT (ControlFile -> minRecoveryPoint , checkPoint .redo ))
6494
- ControlFile -> minRecoveryPoint = checkPoint .redo ;
6495
- }
6496
6583
6497
6584
/*
6498
6585
* Set backupStartPoint if we're starting recovery from a base backup.
@@ -6571,7 +6658,7 @@ StartupXLOG(void)
6571
6658
* control file and we've established a recovery snapshot from a
6572
6659
* running-xacts WAL record.
6573
6660
*/
6574
- if (InArchiveRecovery && EnableHotStandby )
6661
+ if (ArchiveRecoveryRequested && EnableHotStandby )
6575
6662
{
6576
6663
TransactionId * xids ;
6577
6664
int nxids ;
@@ -6669,7 +6756,7 @@ StartupXLOG(void)
6669
6756
* process in addition to postmaster! Also, fsync requests are
6670
6757
* subsequently to be handled by the checkpointer, not locally.
6671
6758
*/
6672
- if (InArchiveRecovery && IsUnderPostmaster )
6759
+ if (ArchiveRecoveryRequested && IsUnderPostmaster )
6673
6760
{
6674
6761
PublishStartupProcessInformation ();
6675
6762
SetForwardFsyncRequests ();
@@ -6873,7 +6960,7 @@ StartupXLOG(void)
6873
6960
* We don't need the latch anymore. It's not strictly necessary to disown
6874
6961
* it, but let's do it for the sake of tidiness.
6875
6962
*/
6876
- if (StandbyMode )
6963
+ if (StandbyModeRequested )
6877
6964
DisownLatch (& XLogCtl -> recoveryWakeupLatch );
6878
6965
6879
6966
/*
@@ -6918,7 +7005,7 @@ StartupXLOG(void)
6918
7005
* crashes while an online backup is in progress. We must not treat
6919
7006
* that as an error, or the database will refuse to start up.
6920
7007
*/
6921
- if (InArchiveRecovery || ControlFile -> backupEndRequired )
7008
+ if (ArchiveRecoveryRequested || ControlFile -> backupEndRequired )
6922
7009
{
6923
7010
if (ControlFile -> backupEndRequired )
6924
7011
ereport (FATAL ,
@@ -6948,8 +7035,10 @@ StartupXLOG(void)
6948
7035
*
6949
7036
* In a normal crash recovery, we can just extend the timeline we were in.
6950
7037
*/
6951
- if (InArchiveRecovery )
7038
+ if (ArchiveRecoveryRequested )
6952
7039
{
7040
+ Assert (InArchiveRecovery );
7041
+
6953
7042
ThisTimeLineID = findNewestTimeLine (recoveryTargetTLI ) + 1 ;
6954
7043
ereport (LOG ,
6955
7044
(errmsg ("selected new timeline ID: %u" , ThisTimeLineID )));
@@ -6966,7 +7055,7 @@ StartupXLOG(void)
6966
7055
* that we also have a copy of the last block of the old WAL in readBuf;
6967
7056
* we will use that below.)
6968
7057
*/
6969
- if (InArchiveRecovery )
7058
+ if (ArchiveRecoveryRequested )
6970
7059
exitArchiveRecovery (curFileTLI , endLogId , endLogSeg );
6971
7060
6972
7061
/*
@@ -8799,7 +8888,7 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
8799
8888
* record, the backup was canceled and the end-of-backup record will
8800
8889
* never arrive.
8801
8890
*/
8802
- if (InArchiveRecovery &&
8891
+ if (ArchiveRecoveryRequested &&
8803
8892
!XLogRecPtrIsInvalid (ControlFile -> backupStartPoint ) &&
8804
8893
XLogRecPtrIsInvalid (ControlFile -> backupEndPoint ))
8805
8894
ereport (PANIC ,
@@ -10263,7 +10352,7 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
10263
10352
* Request a restartpoint if we've replayed too much xlog since the
10264
10353
* last one.
10265
10354
*/
10266
- if (StandbyMode && bgwriterLaunched )
10355
+ if (StandbyModeRequested && bgwriterLaunched )
10267
10356
{
10268
10357
if (XLogCheckpointNeeded (readId , readSeg ))
10269
10358
{
0 commit comments