8000 If backup-end record is not seen, and we reach end of recovery from a · omgwtfun/postgres@59dd479 · GitHub
[go: up one dir, main page]

Skip to content

Commit 59dd479

Browse files
committed
If backup-end record is not seen, and we reach end of recovery from a
streamed backup, throw an error and refuse to start up. The restore has not finished correctly in that case and the data directory is possibly corrupt. We already errored out in case of archive recovery, but could not during crash recovery because we couldn't distinguish between the case that pg_start_backup() was called and the database then crashed (must not error, data is OK), and the case that we're restoring from a backup and not all the needed WAL was replayed (data can be corrupt). To distinguish those cases, add a line to backup_label to indicate whether the backup was taken with pg_start/stop_backup(), or by streaming (ie. pg_basebackup). This is a different implementation than what I committed to 9.2 a week ago. That implementation was not back-patchable because it required re-initdb. Fujii Masao
1 parent 9dcaf9a commit 59dd479

File tree

1 file changed

+39
-22
lines changed
  • src/backend/access/transam

1 file changed

+39
-22
lines changed

src/backend/access/transam/xlog.c

Lines changed: 39 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -659,7 +659,8 @@ static bool CheckForStandbyTrigger(void);
659659
static void xlog_outrec(StringInfo buf, XLogRecord *record);
660660
#endif
661661
static void pg_start_backup_callback(int code, Datum arg);
662-
static bool read_backup_label(XLogRecPtr *checkPointLoc);
662+
static bool read_backup_label(XLogRecPtr *checkPointLoc,
663+
bool *backupEndRequired);
663664
static void rm_redo_error_callback(void *arg);
664665
static int get_sync_bit(int method);
665666

@@ -5995,6 +5996,7 @@ StartupXLOG(void)
59955996
XLogRecord *record;
59965997
uint32 freespace;
59975998
TransactionId oldestActiveXID;
5999+
bool backupEndRequired = false;
59986000

59996001
/*
60006002
* Read control file and check XLOG status looks valid.
@@ -6128,7 +6130,7 @@ StartupXLOG(void)
61286130
if (StandbyMode)
61296131
OwnLatch(&XLogCtl->recoveryWakeupLatch);
61306132

6131-
if (read_backup_label(&checkPointLoc))
6133+
if (read_backup_label(&checkPointLoc, &backupEndRequired))
61326134
{
61336135
/*
61346136
* When a backup_label file is present, we want to roll forward from
@@ -6304,10 +6306,17 @@ StartupXLOG(void)
63046306
}
63056307

63066308
/*
6307-
* set backupStartPoint if we're starting recovery from a base backup
6309+
* Set backupStartPoint if we're starting recovery from a base backup.
6310+
* However, if there was no recovery.conf, and the backup was taken
6311+
* with pg_start_backup(), we don't know if the server crashed before
6312+
* the backup was finished and we're doing crash recovery on the
6313+
* original server, or if we're restoring from the base backup. We
6314+
* have to assume we're doing crash recovery in that case, or the
6315+
* database would refuse to start up after a crash.
63086316
*/
6309-
if (haveBackupLabel)
6317+
if ((InArchiveRecovery && haveBackupLabel) || backupEndRequired)
63106318
ControlFile->backupStartPoint = checkPoint.redo;
6319+
63116320
ControlFile->time = (pg_time_t) time(NULL);
63126321
/* No need to hold ControlFileLock yet, we aren't up far enough */
63136322
UpdateControlFile();
@@ -6670,23 +6679,15 @@ StartupXLOG(void)
66706679

66716680
/*
66726681
* Ran off end of WAL before reaching end-of-backup WAL record, or
6673-
* minRecoveryPoint. That's usually a bad sign, indicating that you
6674-
* tried to recover from an online backup but never called
6675-
* pg_stop_backup(), or you didn't archive all the WAL up to that
6676-
* point. However, this also happens in crash recovery, if the system
6677-
* crashes while an online backup is in progress. We must not treat
6678-
* that as an error, or the database will refuse to start up.
6682+
* minRecoveryPoint.
66796683
*/
6680-
if (InArchiveRecovery)
6681-
{
6682-
if (!XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
6683-
ereport(FATAL,
6684-
(errmsg("WAL ends before end of online backup"),
6685-
errhint("Online backup started with pg_start_backup() must be ended with pg_stop_backup(), and all WAL up to that point must be available at recovery.")));
6686-
else
6687-
ereport(FATAL,
6688-
(errmsg("WAL ends before consistent recovery point")));
6689-
}
6684+
if (!XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
6685+
ereport(FATAL,
6686+
(errmsg("WAL ends before end of online backup"),
6687+
errhint("Online backup started with pg_start_backup() must be ended with pg_stop_backup(), and all WAL up to that point must be available at recovery.")));
6688+
else
6689+
ereport(FATAL,
6690+
(errmsg("WAL ends before consistent recovery point")));
66906691
}
66916692

66926693
/*
@@ -8990,6 +8991,8 @@ do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile)
89908991
startpoint.xlogid, startpoint.xrecoff, xlogfilename);
89918992
appendStringInfo(&labelfbuf, "CHECKPOINT LOCATION: %X/%X\n",
89928993
checkpointloc.xlogid, checkpointloc.xrecoff);
8994+
appendStringInfo(&labelfbuf, "BACKUP METHOD: %s\n",
8995+
exclusive ? "pg_start_backup" : "streamed");
89938996
appendStringInfo(&labelfbuf, "START TIME: %s\n", strfbuf);
89948997
appendStringInfo(&labelfbuf, "LABEL: %s\n", backupidstr);
89958998

@@ -9719,15 +9722,19 @@ pg_xlogfile_name(PG_FUNCTION_ARGS)
97199722
*
97209723
* Returns TRUE if a backup_label was found (and fills the checkpoint
97219724
* location and its REDO location into *checkPointLoc and RedoStartLSN,
9722-
* respectively); returns FALSE if not.
9725+
* respectively); returns FALSE if not. If this backup_label came from a
9726+
* streamed backup, *backupEndRequired is set to TRUE.
97239727
*/
97249728
static bool
9725-
read_backup_label(XLogRecPtr *checkPointLoc)
9729+
read_backup_label(XLogRecPtr *checkPointLoc, bool *backupEndRequired)
97269730
{
97279731
char startxlogfilename[MAXFNAMELEN];
97289732
TimeLineID tli;
97299733
FILE *lfp;
97309734
char ch;
9735+
char backuptype[20];
9736+
9737+
*backupEndRequired = false;
97319738

97329739
/*
97339740
* See if label file is present
@@ -9760,6 +9767,16 @@ read_backup_label(XLogRecPtr *checkPointLoc)
97609767
ereport(FATAL,
97619768
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
97629769
errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
9770+
/*
9771+
* BACKUP METHOD line didn't exist in 9.1beta3 and earlier, so don't
9772+
* error out if it doesn't exist.
9773+
*/
9774+
if (fscanf(lfp, "BACKUP METHOD: %19s", backuptype) == 1)
9775+
{
9776+
if (strcmp(backuptype, "streamed") == 0)
9777+
*backupEndRequired = true;
9778+
}
9779+
97639780
if (ferror(lfp) || FreeFile(lfp))
97649781
ereport(FATAL,
97659782
(errcode_for_file_access(),

0 commit comments

Comments
 (0)
0