8000 Fix bug in verifying TLI (timeline ID) in WAL page header during reco… · hackingwu/postgres@38bec18 · GitHub
[go: up one dir, main page]

Skip to content
8000

Commit 38bec18

Browse files
committed
Fix bug in verifying TLI (timeline ID) in WAL page header during recovery..
Previously ValidXLOGHeader() could not handle properly the case where we re-read the WAL segment after reading its subsequent segment having larger TLI. This case can happen, for example, when the WAL record is split across two segments having different TLI. In this case, since the segment we're re-reading has the smaller TLI than its subsequent segment we've already read, ValidXLOGHeader() reported an error "out-of-sequence TLI" even though TLI sequence was valid (i.e., TLI doesn't go backwards across successive WAL pages and segments). This issue was fixed by commit 7fcbf6a in 9.3 or later though there is no mention to the bug fix in its commit log. It changed the WAL check code so that it verifies TLI for pages that are later than the last remembered LSN. This patch applies the same change to 9.2 where the issue still existed. Author: Takayuki Tsunakawa and Amit Kapila Reviewed-By: Robert Haas Discussion: https://postgr.es/m/0A3221C70F24FB45833433255569204D1F5E15E5@G01JPEXMBYT05
1 parent dbaa621 commit 38bec18

File tree

1 file changed

+22
-36
lines changed
  • src/backend/access/transam

1 file changed

+22
-36
lines changed

src/backend/access/transam/xlog.c

Lines changed: 22 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -617,8 +617,8 @@ static uint32 readRecordBufSize = 0;
617617
/* State information for XLOG reading */
618618
static XLogRecPtr ReadRecPtr; /* start of last record read */
619619
static XLogRecPtr EndRecPtr; /* end+1 of last record read */
620-
static TimeLineID lastPageTLI = 0;
621-
static TimeLineID lastSegmentTLI = 0;
620+
static XLogRecPtr latestPagePtr; /* start of last page read */
621+
static TimeLineID latestPageTLI = 0;
622622

623623
static XLogRecPtr minRecoveryPoint; /* local copy of
624624
* ControlFile->minRecoveryPoint */
@@ -706,7 +706,7 @@ static void CleanupBackupHistory(void);
706706
static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force);
707707
static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt);
708708
static void CheckRecoveryConsistency(void);
709-
static bool ValidXLOGHeader(XLogPageHeader hdr, int emode, bool segmentonly);
709+
static bool ValidXLOGHeader(XLogPageHeader hdr, int emode);
710710
static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt);
711711
static List *readTimeLineHistory(TimeLineID targetTLI);
712712
static bool existsTimeLineHistory(TimeLineID probeTLI);
@@ -4021,14 +4021,6 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt)
40214021
(errmsg("invalid record offset at %X/%X",
40224022
RecPtr->xlogid, RecPtr->xrecoff)));
40234023

4024-
/*
4025-
* Since we are going to a random position in WAL, forget any prior
4026-
* state about what timeline we were in, and allow it to be any
4027-
* timeline in expectedTLIs. We also set a flag to allow curFileTLI
4028-
* to go backwards (but we can't reset that variable right here, since
4029-
* we might not change files at all).
4030-
*/
4031-
lastPageTLI = lastSegmentTLI = 0; /* see comment in ValidXLOGHeader */
40324024
randAccess = true; /* allow curFileTLI to go backwards too */
40334025
}
40344026

@@ -4346,7 +4338,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt)
43464338
* ReadRecord. It's not intended for use from anywhere else.
43474339
*/
43484340
static bool
4349-
ValidXLOGHeader(XLogPageHeader hdr, int emode, bool segmentonly)
4341+
ValidXLOGHeader(XLogPageHeader hdr, int emode)
43504342
{
43514343
XLogRecPtr recaddr;
43524344

@@ -4440,31 +4432,25 @@ ValidXLOGHeader(XLogPageHeader hdr, int emode, bool segmentonly)
44404432
* immediate parent's TLI, we should never see TLI go backwards across
44414433
* successive pages of a consistent WAL sequence.
44424434
*
4443-
* Of course this check should only be applied when advancing sequentially
4444-
* across pages; therefore ReadRecord resets lastPageTLI and
4445-
* lastSegmentTLI to zero when going to a random page.
4446-
*
4447-
* Sometimes we re-open a segment that's already been partially replayed.
4448-
* In that case we cannot perform the normal TLI check: if there is a
4449-
* timeline switch within the segment, the first page has a smaller TLI
4450-
* than later pages following the timeline switch, and we might've read
4451-
* them already. As a weaker test, we still check that it's not smaller
4452-
* than the TLI we last saw at the beginning of a segment. Pass
4453-
* segmentonly = true when re-validating the first page like that, and the
4454-
* page you're actually interested in comes later.
4435+
* Sometimes we re-read a segment that's already been (partially) read.
4436+
* This can happen when we read WAL segments from parent's TLI during
4437+
* archive recovery, refer XLogFileReadAnyTLI. So we only verify TLIs
4438+
* for pages that are later than the last remembered LSN.
44554439
*/
4456-
if (hdr->xlp_tli < (segmentonly ? lastSegmentTLI : lastPageTLI))
4440+
if (XLByteLT(latestPagePtr, recaddr))
44574441
{
4458-
ereport(emode_for_corrupt_record(emode, recaddr),
4459-
(errmsg("out-of-sequence timeline ID %u (after %u) in log file %u, segment %u, offset %u",
4460-
hdr->xlp_tli,
4461-
segmentonly ? lastSegmentTLI : lastPageTLI,
4462-
readId, readSeg, readOff)));
4463-
return false;
4442+
if (hdr->xlp_tli < latestPageTLI)
4443+
{
4444+
ereport(emode_for_corrupt_record(emode, recaddr),
4445+
(errmsg("out-of-sequence timeline ID %u (after %u) in log file %u, segment %u, offset %u",
4446+
hdr->xlp_tli,
4447+
latestPageTLI,
4448+
readId, readSeg, readOff)));
4449+
return false;
4450+
}
44644451
}
4465-
lastPageTLI = hdr->xlp_tli;
4466-
if (readOff == 0)
4467-
lastSegmentTLI = hdr->xlp_tli;
4452+
latestPagePtr = recaddr;
4453+
latestPageTLI = hdr->xlp_tli;
44684454

44694455
return true;
44704456
}
@@ -10927,7 +10913,7 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
1092710913
readId, readSeg, readOff)));
1092810914
goto next_record_is_invalid;
1092910915
}
10930-
if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode, true))
10916+
if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode))
1093110917
goto next_record_is_invalid;
1093210918
}
1093310919

@@ -10949,7 +10935,7 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
1094910935
readId, readSeg, readOff)));
1095010936
goto next_record_is_invalid;
1095110937
}
10952-
if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode, false))
10938+
if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode))
1095310939
goto next_record_is_invalid;
1095410940

1095510941
Assert(targetId == readId);

0 commit comments

Comments
 (0)
0