8000 Avoid unlikely data-loss scenarios due to rename() without fsync. · prmdeveloper/postgres@301cc35 · GitHub
[go: up one dir, main page]

Skip to content

Commit 301cc35

Browse files
committed
Avoid unlikely data-loss scenarios due to rename() without fsync.
Renaming a file using rename(2) is not guaranteed to be durable in face of crashes. Use the previously added durable_rename()/durable_link_or_rename() in various places where we previously just renamed files. Most of the changed call sites are arguably not critical, but it seems better to err on the side of too much durability. The most prominent known case where the previously missing fsyncs could cause data loss is crashes at the end of a checkpoint. After the actual checkpoint has been performed, old WAL files are recycled. When they're filled, their contents are fdatasynced, but we did not fsync the containing directory. An OS/hardware crash in an unfortunate moment could then end up leaving that file with its old name, but new content; WAL replay would thus not replay it. Reported-By: Tomas Vondra Author: Michael Paquier, Tomas Vondra, Andres Freund Discussion: 56583BDD.9060302@2ndquadrant.com Backpatch: All supported branches
1 parent 63b06e8 commit 301cc35

File tree

7 files changed

+24
-126
lines changed

7 files changed

+24
-126
lines changed

contrib/pg_stat_statements/pg_stat_statements.c

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -741,11 +741,7 @@ pgss_shmem_shutdown(int code, Datum arg)
741741
/*
742742
* Rename file into place, so we atomically replace any old one.
743743
*/
744-
if (rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE) != 0)
745-
ereport(LOG,
746-
(errcode_for_file_access(),
747-
errmsg("could not rename pg_stat_statement file \"%s\": %m",
748-
PGSS_DUMP_FILE ".tmp")));
744+
(void) durable_rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE, LOG);
749745

750746
/* Unlink query-texts file; it's not needed while shutdown */
751747
unlink(PGSS_TEXT_FILE);

src/backend/access/transam/timeline.c

Lines changed: 6 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -418,24 +418,10 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
418418
TLHistoryFilePath(path, newTLI);
419419

420420
/*
421-
* Prefer link() to rename() here just to be really sure that we don't
422-
* overwrite an existing file. However, there shouldn't be one, so
423-
* rename() is an acceptable substitute except for the truly paranoid.
421+
* Perform the rename using link if available, paranoidly trying to avoid
422+
* overwriting an existing file (there shouldn't be one).
424423
*/
425-
#if HAVE_WORKING_LINK
426-
if (link(tmppath, path) < 0)
427-
ereport(ERROR,
428-
(errcode_for_file_access(),
429-
errmsg("could not link file \"%s\" to \"%s\": %m",
430-
tmppath, path)));
431-
unlink(tmppath);
432-
#else
433-
if (rename(tmppath, path) < 0)
434-
ereport(ERROR,
435-
(errcode_for_file_access(),
436-
errmsg("could not rename file \"%s\" to \"%s\": %m",
437-
tmppath, path)));
438-
#endif
424+
durable_link_or_rename(tmppath, path, ERROR);
439425

440426
/* The history file can be archived immediately. */
441427
if (XLogArchivingActive())
@@ -508,24 +494,10 @@ writeTimeLineHistoryFile(TimeLineID tli, char *content, int size)
508494
TLHistoryFilePath(path, tli);
509495

510496
/*
511-
* Prefer link() to rename() here just to be really sure that we don't
512-
* overwrite an existing logfile. However, there shouldn't be one, so
513-
* rename() is an acceptable substitute except for the truly paranoid.
497+
* Perform the rename using link if available, paranoidly trying to avoid
498+
* overwriting an existing file (there shouldn't be one).
514499
*/
515-
#if HAVE_WORKING_LINK
516-
if (link(tmppath, path) < 0)
517-
ereport(ERROR,
518-
(errcode_for_file_access(),
519-
errmsg("could not link file \"%s\" to \"%s\": %m",
520-
tmppath, path)));
521-
unlink(tmppath);
522-
#else
523-
if (rename(tmppath, path) < 0)
524-
ereport(ERROR,
525-
(errcode_for_file_access(),
526-
errmsg("could not rename file \"%s\" to \"%s\": %m",
527-
tmppath, path)));
528-
#endif
500+
durable_link_or_rename(tmppath, path, ERROR);
529501
}
530502

531503
/*

src/backend/access/transam/xlog.c

Lines changed: 11 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -3253,34 +3253,16 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
32533253
}
32543254

32553255
/*
3256-
* Prefer link() to rename() here just to be really sure that we don't
3257-
* overwrite an existing logfile. However, there shouldn't be one, so
3258-
* rename() is an acceptable substitute except for the truly paranoid.
3256+
* Perform the rename using link if available, paranoidly trying to avoid
3257+
* overwriting an existing file (there shouldn't be one).
32593258
*/
3260-
#if HAVE_WORKING_LINK
3261-
if (link(tmppath, path) < 0)
3259+
if (durable_link_or_rename(tmppath, path, LOG) != 0)
32623260
{
32633261
if (use_lock)
32643262
LWLockRelease(ControlFileLock);
3265-
ereport(LOG,
3266-
(errcode_for_file_access(),
3267-
errmsg("could not link file \"%s\" to \"%s\" (initialization of log file): %m",
3268-
tmppath, path)));
3269-
return false;
3270-
}
3271-
unlink(tmppath);
3272-
#else
3273-
if (rename(tmppath, path) < 0)
3274-
{
3275-
if (use_lock)
3276-
LWLockRelease(ControlFileLock);
3277-
ereport(LOG,
3278-
(errcode_for_file_access(),
3279-
errmsg("could not rename file \"%s\" to \"%s\" (initialization of log file): %m",
3280-
tmppath, path)));
3263+
/* durable_link_or_rename already emitted log message */
32813264
return false;
32823265
}
3283-
#endif
32843266

32853267
if (use_lock)
32863268
LWLockRelease(ControlFileLock);
@@ -5296,11 +5278,7 @@ exitArchiveRecovery(TimeLineID endTLI, XLogRecPtr endOfLog)
52965278
* re-enter archive recovery mode in a subsequent crash.
52975279
*/
52985280
unlink(RECOVERY_COMMAND_DONE);
5299-
if (rename(RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE) != 0)
5300-
ereport(FATAL,
5301-
(errcode_for_file_access(),
5302-
errmsg("could not rename file \"%s\" to \"%s\": %m",
5303-
RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE)));
5281+
durable_rename(RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE, FATAL);
53045282

53055283
ereport(LOG,
53065284
(errmsg("archive recovery complete")));
@@ -6147,7 +6125,7 @@ StartupXLOG(void)
61476125
if (stat(TABLESPACE_MAP, &st) == 0)
61486126
{
61496127
unlink(TABLESPACE_MAP_OLD);
6150-
if (rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD) == 0)
6128+
if (durable_rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD, DEBUG1) == 0)
61516129
ereport(LOG,
61526130
(errmsg("ignoring file \"%s\" because no file \"%s\" exists",
61536131
TABLESPACE_MAP, BACKUP_LABEL_FILE),
@@ -6510,11 +6488,7 @@ StartupXLOG(void)
65106488
if (haveBackupLabel)
65116489
{
65126490
unlink(BACKUP_LABEL_OLD);
6513-
if (rename(BACKUP_LABEL_FILE, BACKUP_LABEL_OLD) != 0)
6514-
ereport(FATAL,
6515-
(errcode_for_file_access(),
6516-
errmsg("could not rename file \"%s\" to \"%s\": %m",
6517-
BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
6491+
durable_rename(BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, FATAL);
65186492
}
65196493

65206494
/*
@@ -6527,11 +6501,7 @@ StartupXLOG(void)
65276501
if (haveTblspcMap)
65286502
{
65296503
unlink(TABLESPACE_MAP_OLD);
6530-
if (rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD) != 0)
6531-
ereport(FATAL,
6532-
(errcode_for_file_access(),
6533-
errmsg("could not rename file \"%s\" to \"%s\": %m",
6534-
TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
6504+
durable_rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD, FATAL);
65356505
}
65366506

65376507
/* Check that the GUCs used to generate the WAL allow recovery */
@@ -7308,11 +7278,7 @@ StartupXLOG(void)
73087278
*/
73097279
XLogArchiveCleanup(partialfname);
73107280

7311-
if (rename(origpath, partialpath) != 0)
7312-
ereport(ERROR,
7313-
(errcode_for_file_access(),
7314-
errmsg("could not rename file \"%s\" to \"%s\": %m",
7315-
origpath, partialpath)));
7281+
durable_rename(origpath, partialpath, ERROR);
73167282
XLogArchiveNotify(partialfname);
73177283
}
73187284
}
@@ -10874,7 +10840,7 @@ CancelBackup(void)
1087410840
/* remove leftover file from previously canceled backup if it exists */
1087510841
unlink(BACKUP_LABEL_OLD);
1087610842

10877-
if (rename(BACKUP_LABEL_FILE, BACKUP_LABEL_OLD) != 0)
10843+
if (durable_rename(BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, DEBUG1) != 0)
1087810844
{
1087910845
ereport(WARNING,
1088010846
(errcode_for_file_access(),
@@ -10897,7 +10863,7 @@ CancelBackup(void)
1089710863
/* remove leftover file from previously canceled backup if it exists */
1089810864
unlink(TABLESPACE_MAP_OLD);
1089910865

10900-
if (rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD) == 0)
10866+
if (durable_rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD, DEBUG1) == 0)
1090110867
{
1090210868
ereport(LOG,
1090310869
(errmsg("online backup mode canceled"),

src/backend/access/transam/xlogarchive.c

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -470,11 +470,7 @@ KeepFileRestoredFromArchive(char *path, char *xlogfname)
470470
reload = true;
471471
}
472472

473-
if (rename(path, xlogfpath) < 0)
474-
ereport(ERROR,
475-
(errcode_for_file_access(),
476-
errmsg("could not rename file \"%s\" to \"%s\": %m",
477-
path, xlogfpath)));
473+
durable_rename(path, xlogfpath, ERROR);
478474

479475
/*
480476
* Create .done file forcibly to prevent the restored segment from being
@@ -580,12 +576,7 @@ XLogArchiveForceDone(const char *xlog)
580576
StatusFilePath(archiveReady, xlog, ".ready");
581577
if (stat(archiveReady, &stat_buf) == 0)
582578
{
583-
if (rename(archiveReady, archiveDone) < 0)
584-
ereport(WARNING,
585-
(errcode_for_file_access(),
586-
errmsg("could not rename file \"%s\" to \"%s\": %m",
587-
archiveReady, archiveDone)));
588-
579+
(void) durable_rename(archiveReady, archiveDone, WARNING);
589580
return;
590581
}
591582

src/backend/postmaster/pgarch.c

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -728,9 +728,5 @@ pgarch_archiveDone(char *xlog)
728728

729729
StatusFilePath(rlogready, xlog, ".ready");
730730
StatusFilePath(rlogdone, xlog, ".done");
731-
if (rename(rlogready, rlogdone) < 0)
732-
ereport(WARNING,
733-
(errcode_for_file_access(),
734-
errmsg("could not rename file \"%s\" to \"%s\": %m",
735-
rlogready, rlogdone)));
731+
(void) durable_rename(rlogready, rlogdone, WARNING);
736732
}

src/backend/replication/logical/origin.c

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -604,29 +604,10 @@ CheckPointReplicationOrigin(void)
604604
tmppath)));
605605
}
606606

607-
/* fsync the temporary file */
608-
if (pg_fsync(tmpfd) != 0)
609-
{
610-
CloseTransientFile(tmpfd);
611-
ereport(PANIC,
612-
(errcode_for_file_access(),
613-
errmsg("could not fsync file \"%s\": %m",
614-
tmppath)));
615-
}
616-
617607
CloseTransientFile(tmpfd);
618608

619-
/* rename to permanent file, fsync file and directory */
620-
if (rename(tmppath, path) != 0)
621-
{
622-
ereport(PANIC,
623-
(errcode_for_file_access(),
624-
errmsg("could not rename file \"%s\" to \"%s\": %m",
625-
tmppath, path)));
626-
}
627-
628-
fsync_fname(path, false);
629-
fsync_fname("pg_logical", true);
609+
/* fsync, rename to permanent file, fsync file and directory */
610+
durable_rename(tmppath, path, PANIC);
630611
}
631612

632613 ADD2
/*

src/backend/utils/misc/guc.c

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6962,11 +6962,7 @@ AlterSystemSetConfigFile(AlterSystemStmt *altersysstmt)
69626962
* at worst it can lose the parameters set by last ALTER SYSTEM
69636963
* command.
69646964
*/
6965-
if (rename(AutoConfTmpFileName, AutoConfFileName) < 0)
6966-
ereport(ERROR,
6967-
(errcode_for_file_access(),
6968-
errmsg("could not rename file \"%s\" to \"%s\": %m",
6969-
AutoConfTmpFileName, AutoConfFileName)));
6965+
durable_rename(AutoConfTmpFileName, AutoConfFileName, ERROR);
69706966
}
69716967
PG_CATCH();
69726968
{

0 commit comments

Comments
 (0)
0