10000 Avoid unlikely data-loss scenarios due to rename() without fsync. · prmdeveloper/postgres@a62714f · GitHub
[go: up one dir, main page]

Skip to content

Commit a62714f

Browse files
committed
Avoid unlikely data-loss scenarios due to rename() without fsync.
Renaming a file using rename(2) is not guaranteed to be durable in face of crashes. Use the previously added durable_rename()/durable_link_or_rename() in various places where we previously just renamed files. Most of the changed call sites are arguably not critical, but it seems better to err on the side of too much durability. The most prominent known case where the previously missing fsyncs could cause data loss is crashes at the end of a checkpoint. After the actual checkpoint has been performed, old WAL files are recycled. When they're filled, their contents are fdatasynced, but we did not fsync the containing directory. An OS/hardware crash in an unfortunate moment could then end up leaving that file with its old name, but new content; WAL replay would thus not replay it. Reported-By: Tomas Vondra Author: Michael Paquier, Tomas Vondra, Andres Freund Discussion: 56583BDD.9060302@2ndquadrant.com Backpatch: All supported branches
1 parent d0e47bc commit a62714f

File tree

3 files changed

+13
-65
lines changed

3 files changed

+13
-65
lines changed

contrib/pg_stat_statements/pg_stat_statements.c

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -504,11 +504,7 @@ pgss_shmem_shutdown(int code, Datum arg)
504504
/*
505505
* Rename file into place, so we atomically replace the old one.
506506
*/
507-
if (rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE) != 0)
508-
ereport(LOG,
509-
(errcode_for_file_access(),
510-
errmsg("could not rename pg_stat_statement file \"%s\": %m",
511-
PGSS_DUMP_FILE ".tmp")));
507+
(void) durable_rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE, LOG);
512508

513509
return;
514510

src/backend/access/transam/xlog.c

Lines changed: 11 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -2672,34 +2672,16 @@ InstallXLogFileSegment(uint32 *log, uint32 *seg, char *tmppath,
26722672
}
26732673

26742674
/*
2675-
* Prefer link() to rename() here just to be really sure that we don't
2676-
* overwrite an existing logfile. However, there shouldn't be one, so
2677-
* rename() is an acceptable substitute except for the truly paranoid.
2675+
* Perform the rename using link if available, paranoidly trying to avoid
2676+
* overwriting an existing file (there shouldn't be one).
26782677
*/
2679-
#if HAVE_WORKING_LINK
2680-
if (link(tmppath, path) < 0)
2678+
if (durable_link_or_rename(tmppath, path, LOG) != 0)
26812679
{
26822680
if (use_lock)
26832681
LWLockRelease(ControlFileLock);
2684-
ereport(LOG,
2685-
(errcode_for_file_access(),
2686-
errmsg("could not link file \"%s\" to \"%s\" (initialization of log file %u, segment %u): %m",
2687-
tmppath, path, *log, *seg)));
2682+
/* durable_link_or_rename already emitted log message */
26882683
return false;
26892684
}
2690-
unlink(tmppath);
2691-
#else
2692-
if (rename(tmppath, path) < 0)
2693-
{
2694-
if (use_lock)
2695-
LWLockRelease(ControlFileLock);
2696-
ereport(LOG,
2697-
(errcode_for_file_access(),
2698-
errmsg("could not rename file \"%s\" to \"%s\" (initialization of log file %u, segment %u): %m",
2699-
tmppath, path, *log, *seg)));
2700-
return false;
2701-
}
2702-
#endif
27032685

27042686
if (use_lock)
27052687
LWLockRelease(ControlFileLock);
@@ -4620,24 +4602,10 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
46204602
TLHistoryFilePath(path, newTLI);
46214603

46224604
/*
4623-
* Prefer link() to rename() here just to be really sure that we don't
4624-
* overwrite an existing logfile. However, there shouldn't be one, so
4625-
* rename() is an acceptable substitute except for the truly paranoid.
4605+
* Perform the rename using link if available, paranoidly trying to avoid
4606+
* overwriting an existing file (there shouldn't be one).
46264607
*/
4627-
#if HAVE_WORKING_LINK
4628-
if (link(tmppath, path) < 0)
4629-
ereport(ERROR,
4630-
(errcode_for_file_access(),
4631-
errmsg("could not link file \"%s\" to \"%s\": %m",
4632-
tmppath, path)));
4633-
unlink(tmppath);
4634-
#else
4635-
if (rename(tmppath, path) < 0)
4636-
ereport(ERROR,
4637-
(errcode_for_file_access(),
4638-
errmsg("could not rename file \"%s\" to \"%s\": %m",
4639-
tmppath, path)));
4640-
#endif
4608+
durable_link_or_rename(tmppath, path, ERROR);
46414609

46424610
/* The history file can be archived immediately. */
46434611
if (XLogArchivingActive())
@@ -5595,11 +5563,7 @@ exitArchiveRecovery(TimeLineID endTLI, uint32 endLogId, uint32 endLogSeg)
55955563
(errmsg_internal("moving last restored xlog to \"%s\"",
55965564
xlogpath)));
55975565
unlink(xlogpath); /* might or might not exist */
5598-
if (rename(recoveryPath, xlogpath) != 0)
5599-
ereport(FATAL,
5600-
(errcode_for_file_access(),
5601-
errmsg("could not rename file \"%s\" to \"%s\": %m",
5602-
recoveryPath, xlogpath)));
5566+
durable_rename(recoveryPath, xlogpath, FATAL);
56035567
/* XXX might we need to fix permissions on the file? */
56045568
}
56055569
else
@@ -5648,11 +5612,7 @@ exitArchiveRecovery(TimeLineID endTLI, uint32 endLogId, uint32 endLogSeg)
56485612
* re-enter archive recovery mode in a subsequent crash.
56495613
*/
56505614
unlink(RECOVERY_COMMAND_DONE);
5651-
if (rename(RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE) != 0)
5652-
ereport(FATAL,
5653-
(errcode_for_file_access(),
5654-
errmsg("could not rename file \"%s\" to \"%s\": %m",
5655-
RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE)));
5615+
durable_rename(RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE, FATAL);
56565616

56575617
ereport(LOG,
56585618
(errmsg("archive recovery complete")));
@@ -6548,11 +6508,7 @@ StartupXLOG(void)
65486508
if (haveBackupLabel)
65496509
{
65506510
unlink(BACKUP_LABEL_OLD);
6551-
if (rename(BACKUP_LABEL_FILE, BACKUP_LABEL_OLD) != 0)
6552-
ereport(FATAL,
6553-
(errcode_for_file_access(),
6554-
errmsg("could not rename file \" 341A %s\" to \"%s\": %m",
6555-
BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
6511+
durable_rename(BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, FATAL);
65566512
}
65576513

65586514
/* Check that the GUCs used to generate the WAL allow recovery */
@@ -10148,7 +10104,7 @@ CancelBackup(void)
1014810104
/* remove leftover file from previously canceled backup if it exists */
1014910105
unlink(BACKUP_LABEL_OLD);
1015010106

10151-
if (rename(BACKUP_LABEL_FILE, BACKUP_LABEL_OLD) == 0)
10107+
if (durable_rename(BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, DEBUG1) == 0)
1015210108
{
1015310109
ereport(LOG,
1015410110
(errmsg("online backup mode canceled"),

src/backend/postmaster/pgarch.c

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -709,9 +709,5 @@ pgarch_archiveDone(char *xlog)
709709

710710
StatusFilePath(rlogready, xlog, ".ready");
711711
StatusFilePath(rlogdone, xlog, ".done");
712-
if (rename(rlogready, rlogdone) < 0)
713-
ereport(WARNING,
714-
(errcode_for_file_access(),
715-
errmsg("could not rename file \"%s\" to \"%s\": %m",
716-
rlogready, rlogdone)));
712+
(void) durable_rename(rlogready, rlogdone, WARNING);
717713
}

0 commit comments

Comments
 (0)
0