8000 Fix sloppy handling of corner-case errors in fd.c. · anrs/postgres@7752275 · GitHub
[go: up one dir, main page]

Skip to content

Commit 7752275

Browse files
committed
Fix sloppy handling of corner-case errors in fd.c.
Several places in fd.c had badly-thought-through handling of error returns from lseek() and close(). The fact that those would seldom fail on valid FDs is probably the reason we've not noticed this up to now; but if they did fail, we'd get quite confused. LruDelete and LruInsert actually just Assert'd that lseek never fails, which is pretty awful on its face. In LruDelete, we indeed can't throw an error, because that's likely to get called during error abort and so throwing an error would probably just lead to an infinite loop. But by the same token, throwing an error from the close() right after that was ill-advised, not to mention that it would've left the LRU state corrupted since we'd already unlinked the VFD from the list. I also noticed that really, most of the time, we should know the current seek position and it shouldn't be necessary to do an lseek here at all. As patched, if we don't have a seek position and an lseek attempt doesn't give us one, we'll close the file but then subsequent re-open attempts will fail (except in the somewhat-unlikely case that a FileSeek(SEEK_SET) call comes between and allows us to re-establish a known target seek position). This isn't great but it won't result in any state corruption. Meanwhile, having an Assert instead of an honest test in LruInsert is really dangerous: if that lseek failed, a subsequent read or write would read or write from the start of the file, not where the caller expected, leading to data corruption. In both LruDelete and FileClose, if close() fails, just LOG that and mark the VFD closed anyway. Possibly leaking an FD is preferable to getting into an infinite loop or corrupting the VFD list. Besides, as far as I can tell from the POSIX spec, it's unspecified whether or not the file has been closed, so treating it as still open could be the wrong thing anyhow. I also fixed a number of other places that were being sloppy about behaving correctly when the seekPos is unknown. Also, I changed FileSeek to return -1 with EINVAL for the cases where it detects a bad offset, rather than throwing a hard elog(ERROR). It seemed pretty inconsistent that some bad-offset cases would get a failure return while others got elog(ERROR). It was missing an offset validity check for the SEEK_CUR case on a closed file, too. Back-patch to all supported branches, since all this code is fundamentally identical in all of them. Discussion: https://postgr.es/m/2982.1487617365@sss.pgh.pa.us
1 parent b9f05be commit 7752275

File tree

1 file changed

+136
-59
lines changed
  • src/backend/storage/file

1 file changed

+136
-59
lines changed

src/backend/storage/file/fd.c

Lines changed: 136 additions & 59 deletions
< 8000 td data-grid-cell-id="diff-c3c830e34267c8ad838b09a48aaca47789d4af47b2c283dff221227f06341048-871-891-2" data-line-anchor="diff-c3c830e34267c8ad838b09a48aaca47789d4af47b2c283dff221227f06341048R891" data-selected="false" role="gridcell" style="background-color:var(--bgColor-default);padding-right:24px" tabindex="-1" valign="top" class="focusable-grid-cell diff-text-cell right-side-diff-cell left-side">
++nfile;
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,14 @@ int max_safe_fds = 32; /* default if not changed */
132132

133133
#define FileIsNotOpen(file) (VfdCache[file].fd == VFD_CLOSED)
134134

135+
/*
136+
* Note: a VFD's seekPos is normally always valid, but if for some reason
137+
* an lseek() fails, it might become set to FileUnknownPos. We can struggle
138+
* along without knowing the seek position in many cases, but in some places
139+
* we have to fail if we don't have it.
140+
*/
135141
#define FileUnknownPos ((off_t) -1)
142+
#define FilePosIsUnknown(pos) ((pos) < 0)
136143

137144
/* these are the assigned bits in fdstate below: */
138145
#define FD_TEMPORARY (1 << 0) /* T = delete when closed */
@@ -146,7 +153,7 @@ typedef struct vfd
146153
File nextFree; /* link to next free VFD, if in freelist */
147154
File lruMoreRecently; /* doubly linked recency-of-use list */
148155
File lruLessRecently;
149-
off_t seekPos; /* current logical file position */
156+
off_t seekPos; /* current logical file position, or -1 */
150157
off_t fileSize; /* current size of file (0 if not temporary) */
151158
char *fileName; /* name of file, or NULL for unused VFD */
152159
/* NB: fileName is malloc'd, and must be free'd when closing the VFD */
@@ -799,19 +806,33 @@ LruDelete(File file)
799806

800807
vfdP = &VfdCache[file];
801808

802-
/* delete the vfd record from the LRU ring */
803-
Delete(file);
804-
805-
/* save the seek position */
806-
vfdP->seekPos = lseek(vfdP->fd, (off_t) 0, SEEK_CUR);
807-
Assert(vfdP->seekPos != (off_t) -1);
809+
/*
810+
* Normally we should know the seek position, but if for some reason we
811+
* have lost track of it, try again to get it. If we still can't get it,
812+
* we have a problem: we will be unable to restore the file seek position
813+
* when and if the file is re-opened. But we can't really throw an error
814+
* and refuse to close the file, or activities such as transaction cleanup
815+
* will be broken.
816+
*/
817+
if (FilePosIsUnknown(vfdP->seekPos))
818+
{
819+
vfdP->seekPos = lseek(vfdP->fd, (off_t) 0, SEEK_CUR);
820+
if (FilePosIsUnknown(vfdP->seekPos))
821+
elog(LOG, "could not seek file \"%s\" before closing: %m",
822+
vfdP->fileName);
823+
}
808824

809-
/* close the file */
825+
/*
826+
* Close the file. We aren't expecting this to fail; if it does, better
827+
* to leak the FD than to mess up our internal state.
828+
*/
810829
if (close(vfdP->fd))
811-
elog(ERROR, "could not close file \"%s\": %m", vfdP->fileName);
812-
813-
--nfile;
830+
elog(LOG, "could not close file \"%s\": %m", vfdP->fileName);
814831
vfdP->fd = VFD_CLOSED;
832+
--nfile;
833+
834+
/* delete the vfd record from the LRU ring */
835+
Delete(file);
815836
}
816837

817838
static void
@@ -862,22 +883,39 @@ LruInsert(File file)
862883
vfdP->fileMode);
863884
if (vfdP->fd < 0)
864885
{
865-
DO_DB(elog(LOG, "RE_OPEN FAILED: %d", errno));
886+
DO_DB(elog(LOG, "re-open failed: %m"));
866887
return -1;
867888
}
868889
else
869890
{
870-
DO_DB(elog(LOG, "RE_OPEN SUCCESS"));
871891
872892
}
873893

874-
/* seek to the right position */
894+
/*
895+
* Seek to the right position. We need no special case for seekPos
896+
* equal to FileUnknownPos, as lseek() will certainly reject that
897+
* (thus completing the logic noted in LruDelete() that we will fail
898+
* to re-open a file if we couldn't get its seek position before
899+
* closing).
900+
*/
875901
if (vfdP->seekPos != (off_t) 0)
876902
{
877-
off_t returnValue PG_USED_FOR_ASSERTS_ONLY;
878-
879-
returnValue = lseek(vfdP->fd, vfdP->seekPos, SEEK_SET);
880-
Assert(returnValue != (off_t) -1);
903+
if (lseek(vfdP->fd, vfdP->seekPos, SEEK_SET) < 0)
904+
{
905+
/*
906+
* If we fail to restore the seek position, treat it like an
907+
* open() failure.
908+
*/
909+
int save_errno = errno;
910+
911+
elog(LOG, "could not seek file \"%s\" after re-opening: %m",
912+
vfdP->fileName);
913+
(void) close(vfdP->fd);
914+
vfdP->fd = VFD_CLOSED;
915+
--nfile;
916+
errno = save_errno;
917+
return -1;
918+
}
881919
}
882920
}
883921

@@ -1260,15 +1298,15 @@ FileClose(File file)
12601298

12611299
if (!FileIsNotOpen(file))
12621300
{
1263-
/* remove the file from the lru ring */
1264-
Delete(file);
1265-
12661301
/* close the file */
12671302
if (close(vfdP->fd))
1268-
elog(ERROR, "could not close file \"%s\": %m", vfdP->fileName);
1303+
elog(LOG, "could not close file \"%s\": %m", vfdP->fileName);
12691304

12701305
--nfile;
12711306
vfdP->fd = VFD_CLOSED;
1307+
1308+
/* remove the file from the lru ring */
1309+
Delete(file);
12721310
}
12731311

12741312
/*
@@ -1373,6 +1411,7 @@ int
13731411
FileRead(File file, char *buffer, int amount)
13741412
{
13751413
int returnCode;
1414+
Vfd *vfdP;
13761415

13771416
Assert(FileIsValid(file));
13781417

@@ -1385,11 +1424,17 @@ FileRead(File file, char *buffer, int amount)
13851424
if (returnCode < 0)
13861425
return returnCode;
13871426

1427+
vfdP = &VfdCache[file];
1428+
13881429
retry:
1389-
returnCode = read(VfdCache[file].fd, buffer, amount);
1430+
returnCode = read(vfdP->fd, buffer, amount);
13901431

13911432
if (returnCode >= 0)
1392-
VfdCache[file].seekPos += returnCode;
1433+
{
1434+
/* if seekPos is unknown, leave it that way */
1435+
if (!FilePosIsUnknown(vfdP->seekPos))
1436+
vfdP->seekPos += returnCode;
1437+
}
13931438
else
13941439
{
13951440
/*
@@ -1418,7 +1463,7 @@ FileRead(File file, char *buffer, int amount)
14181463
goto retry;
14191464

14201465
/* Trouble, so assume we don't know the file position anymore */
1421-
VfdCache[file].seekPos = FileUnknownPos;
1466+
vfdP->seekPos = FileUnknownPos;
14221467
}
14231468

14241469
return returnCode;
@@ -1428,6 +1473,7 @@ int
14281473
FileWrite(File file, char *buffer, int amount)
14291474
{
14301475
int returnCode;
1476+
Vfd *vfdP;
14311477

14321478
Assert(FileIsValid(file));
14331479

@@ -1440,6 +1486,8 @@ FileWrite(File file, char *buffer, int amount)
14401486
if (returnCode < 0)
14411487
return returnCode;
14421488

1489+
vfdP = &VfdCache[file];
1490+
14431491
/*
14441492
* If enforcing temp_file_limit and it's a temp file, check to see if the
14451493
* write would overrun temp_file_limit, and throw error if so. Note: it's
@@ -1448,15 +1496,28 @@ FileWrite(File file, char *buffer, int amount)
14481496
* message if we do that. All current callers would just throw error
14491497
* immediately anyway, so this is safe at present.
14501498
*/
1451-
if (temp_file_limit >= 0 && (VfdCache[file].fdstate & FD_TEMPORARY))
1499+
if (temp_file_limit >= 0 && (vfdP->fdstate & FD_TEMPORARY))
14521500
{
1453-
off_t newPos = VfdCache[file].seekPos + amount;
1501+
off_t newPos;
14541502

1455-
if (newPos > VfdCache[file].fileSize)
1503+
/*
1504+
* Normally we should know the seek position, but if for some reason
1505+
* we have lost track of it, try again to get it. Here, it's fine to
1506+
* throw an error if we still can't get it.
1507+
*/
1508+
if (FilePosIsUnknown(vfdP->seekPos))
1509+
{
1510+
vfdP->seekPos = lseek(vfdP->fd, (off_t) 0, SEEK_CUR);
1511+
if (FilePosIsUnknown(vfdP->seekPos))
1512+
elog(ERROR, "could not seek file \"%s\": %m", vfdP->fileName);
1513+
}
1514+
1515+
newPos = vfdP->seekPos + amount;
1516+
if (newPos > vfdP->fileSize)
14561517
{
14571518
uint64 newTotal = temporary_files_size;
14581519

1459-
newTotal += newPos - VfdCache[file].fileSize;
1520+
newTotal += newPos - vfdP->fileSize;
14601521
if (newTotal > (uint64) temp_file_limit * (uint64) 1024)
14611522
ereport(ERROR,
14621523
(errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
@@ -1467,25 +1528,33 @@ FileWrite(File file, char *buffer, int amount)
14671528

14681529
retry:
14691530
errno = 0;
1470-
returnCode = write(VfdCache[file].fd, buffer, amount);
1531+
returnCode = write(vfdP->fd, buffer, amount);
14711532

14721533
/* if write didn't set errno, assume problem is no disk space */
14731534
if (returnCode != amount && errno == 0)
14741535
errno = ENOSPC;
14751536

14761537
if (returnCode >= 0)
14771538
{
1478-
VfdCache[file].seekPos += returnCode;
1539+
/* if seekPos is unknown, leave it that way */
1540+
if (!FilePosIsUnknown(vfdP->seekPos))
1541+
vfdP->seekPos += returnCode;
14791542

1480-
/* maintain fileSize and temporary_files_size if it's a temp file */
1481-
if (VfdCache[file].fdstate & FD_TEMPORARY)
1543+
/*
1544+
* Maintain fileSize and temporary_files_size if it's a temp file.
1545+
*
1546+
* If seekPos is -1 (unknown), this will do nothing; but we could only
1547+
* get here in that state if we're not enforcing temporary_files_size,
1548+
* so we don't care.
1549+
*/
1550+
if (vfdP->fdstate & FD_TEMPORARY)
14821551
{
1483-
off_t newPos = VfdCache[file].seekPos;
1552+
off_t newPos = vfdP->seekPos;
14841553

1485-
if (newPos > VfdCache[file].fileSize)
1554+
if (newPos > vfdP->fileSize)
14861555
{
1487-
temporary_files_size += newPos - VfdCache[file].fileSize;
1488-
VfdCache[file].fileSize = newPos;
1556+
temporary_files_size += newPos - vfdP->fileSize;
1557+
vfdP->fileSize = newPos;
14891558
}
14901559
}
14911560
}
@@ -1513,7 +1582,7 @@ FileWrite(File file, char *buffer, int amount)
15131582
goto retry;
15141583

15151584
/* Trouble, so assume we don't know the file position anymore */
1516-
VfdCache[file].seekPos = FileUnknownPos;
1585+
vfdP->seekPos = FileUnknownPos;
15171586
}
15181587

15191588
return returnCode;
@@ -1539,7 +1608,7 @@ FileSync(File file)
15391608
off_t
15401609
FileSeek(File file, off_t offset, int whence)
15411610
{
1542-
int returnCode;
1611+
Vfd *vfdP;
15431612

15441613
Assert(FileIsValid(file));
15451614

@@ -1548,25 +1617,33 @@ FileSeek(File file, off_t offset, int whence)
15481617
(int64) VfdCache[file].seekPos,
15491618
(int64) offset, whence));
15501619

1620+
vfdP = &VfdCache[file];
1621+
15511622
if (FileIsNotOpen(file))
15521623
{
15531624
switch (whence)
15541625
{
15551626
case SEEK_SET:
15561627
if (offset < 0)
1557-
elog(ERROR, "invalid seek offset: " INT64_FORMAT,
1558-
(int64) offset);
1559-
VfdCache[file].seekPos = offset;
1628+
{
1629+
errno = EINVAL;
1630+
return (off_t) -1;
1631+
}
1632+
vfdP->seekPos = offset;
15601633
break;
15611634
case SEEK_CUR:
1562-
VfdCache[file].seekPos += offset;
1635+
if (FilePosIsUnknown(vfdP->seekPos) ||
1636+
vfdP->seekPos + offset < 0)
1637+
{
1638+
errno = EINVAL;
1639+
return (off_t) -1;
1640+
}
1641+
vfdP->seekPos += offset;
15631642
break;
15641643
case SEEK_END:
1565-
returnCode = FileAccess(file);
1566-
if (returnCode < 0)
1567-
return returnCode;
1568-
VfdCache[file].seekPos = lseek(VfdCache[file].fd,
1569-
offset, whence);
1644+
if (FileAccess(file) < 0)
1645+
return (off_t) -1;
1646+
vfdP->seekPos = lseek(vfdP->fd, offset, whence);
15701647
break;
15711648
default:
15721649
elog(ERROR, "invalid whence: %d", whence);
@@ -1579,27 +1656,27 @@ FileSeek(File file, off_t offset, int whence)
15791656
{
15801657
case SEEK_SET:
15811658
if (offset < 0)
1582-
elog(ERROR, "invalid seek offset: " INT64_FORMAT,
1583-
(int64) offset);
1584-
if (VfdCache[file].seekPos != offset)
1585-
VfdCache[file].seekPos = lseek(VfdCache[file].fd,
1586-
offset, whence);
1659+
{
1660+
errno = EINVAL;
1661+
return (off_t) -1;
1662+
}
1663+
if (vfdP->seekPos != offset)
1664+
vfdP->seekPos = lseek(vfdP->fd, offset, whence);
15871665
break;
15881666
case SEEK_CUR:
1589-
if (offset != 0 || VfdCache[file].seekPos == FileUnknownPos)
1590-
VfdCache[file].seekPos = lseek(VfdCache[file].fd,
1591-
offset, whence);
1667+
if (offset != 0 || FilePosIsUnknown(vfdP->seekPos))
1668+
vfdP->seekPos = lseek(vfdP->fd, offset, whence);
15921669
break;
15931670
case SEEK_END:
1594-
VfdCache[file].seekPos = lseek(VfdCache[file].fd,
1595-
offset, whence);
1671+
vfdP->seekPos = lseek(vfdP->fd, offset, whence);
15961672
break;
15971673
default:
15981674
elog(ERROR, "invalid whence: %d", whence);
15991675
break;
16001676
}
16011677
}
1602-
return VfdCache[file].seekPos;
1678+
1679+
return vfdP->seekPos;
16031680
}
16041681

16051682
/*

0 commit comments

Comments
 (0)
0