8000 Back-patch assorted latch-related fixes. · omgwtfun/postgres@989f530 · GitHub
[go: up one dir, main page]

Skip to content

Commit 989f530

Browse files
committed
Back-patch assorted latch-related fixes.
Fix a whole bunch of signal handlers that had been hacked to do things that might change errno, without adding the necessary save/restore logic for errno. Also make some minor fixes in unix_latch.c, and clean up bizarre and unsafe scheme for disowning the process's latch. While at it, rename the PGPROC latch field to procLatch for consistency with 9.2. Issues noted while reviewing a patch by Peter Geoghegan.
1 parent 74d0994 commit 989f530

File tree

9 files changed

+124
-39
lines changed

9 files changed

+124
-39
lines changed

src/backend/access/transam/xlog.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9881,34 +9881,50 @@ startupproc_quickdie(SIGNAL_ARGS)
98819881
static void
98829882
StartupProcSigUsr1Handler(SIGNAL_ARGS)
98839883
{
9884+
int save_errno = errno;
9885+
98849886
latch_sigusr1_handler();
9887+
9888+
errno = save_errno;
98859889
}
98869890

98879891
/* SIGUSR2: set flag to finish recovery */
98889892
static void
98899893
StartupProcTriggerHandler(SIGNAL_ARGS)
98909894
{
9895+
int save_errno = errno;
9896+
98919897
promote_triggered = true;
98929898
WakeupRecovery();
9899+
9900+
errno = save_errno;
98939901
}
98949902

98959903
/* SIGHUP: set flag to re-read config file at next convenient time */
98969904
static void
98979905
StartupProcSigHupHandler(SIGNAL_ARGS)
98989906
{
9907+
int save_errno = errno;
9908+
98999909
got_SIGHUP = true;
99009910
WakeupRecovery();
9911+
9912+
errno = save_errno;
99019913
}
99029914

99039915
/* SIGTERM: set flag to abort redo and exit */
99049916
static void
99059917
StartupProcShutdownHandler(SIGNAL_ARGS)
99069918
{
9919+
int save_errno = errno;
9920+
99079921
if (in_restore_command)
99089922
proc_exit(1);
99099923
else
99109924
shutdown_requested = true;
99119925
WakeupRecovery();
9926+
9927+
errno = save_errno;
99129928
}
99139929

99149930
/* Handle SIGHUP and SIGTERM signals of startup process */

src/backend/port/unix_latch.c

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -135,9 +135,12 @@ DisownLatch(volatile Latch *latch)
135135
* If the latch is already set, the function returns immediately.
136136
*
137137
* The 'timeout' is given in milliseconds, and -1 means wait forever.
138-
* On some platforms, signals cause the timeout to be restarted, so beware
139-
* that the function can sleep for several times longer than the specified
140-
* timeout.
138+
* On some platforms, signals do not interrupt the wait, or even
139+
* cause the timeout to be restarted, so beware that the function can sleep
140+
* for several times longer than the requested timeout. However, this
141+
* difficulty is not so great as it seems, because the signal handlers for any
142+
* signals that the caller should respond to ought to be programmed to end the
143+
* wait by calling SetLatch. Ideally, the timeout parameter is vestigial.
141144
*
142145
* The latch must be owned by the current process, ie. it must be a
143146
* backend-local latch initialized with InitLatch, or a shared latch
@@ -228,6 +231,7 @@ WaitLatchOrSocket(volatile Latch *latch, pgsocket sock, bool forRead,
228231
{
229232
if (errno == EINTR)
230233
continue;
234+
waiting = false;
231235
ereport(ERROR,
232236
(errcode_for_socket_access(),
233237
errmsg("select() failed: %m")));
@@ -255,6 +259,10 @@ WaitLatchOrSocket(volatile Latch *latch, pgsocket sock, bool forRead,
255259
* Sets a latch and wakes up anyone waiting on it.
256260
*
257261
* This is cheap if the latch is already set, otherwise not so much.
262+
*
263+
* NB: when calling this in a signal handler, be sure to save and restore
264+
* errno around it. (That's standard practice in most signal handlers, of
265+
* course, but we used to omit it in handlers that only set a flag.)
258266
*/
259267
void
260268
SetLatch(volatile Latch *latch)
@@ -300,7 +308,10 @@ SetLatch(volatile Latch *latch)
300308
if (owner_pid == 0)
301309
return;
302310
else if (owner_pid == MyProcPid)
303-
sendSelfPipeByte();
311+
{
312+
if (waiting)
313+
sendSelfPipeByte();
314+
}
304315
else
305316
kill(owner_pid, SIGUSR1);
306317
}
@@ -332,7 +343,11 @@ ResetLatch(volatile Latch *latch)
332343
* SetLatch uses SIGUSR1 to wake up the process waiting on the latch.
333344
*
334345
* Wake up WaitLatch, if we're waiting. (We might not be, since SIGUSR1 is
335-
* overloaded for multiple purposes.)
346+
* overloaded for multiple purposes; or we might not have reached WaitLatch
347+
* yet, in which case we don't need to fill the pipe either.)
348+
*
349+
* NB: when calling this in a signal handler, be sure to save and restore
350+
* errno around it.
336351
*/
337352
void
338353
latch_sigusr1_handler(void)
@@ -396,13 +411,19 @@ sendSelfPipeByte(void)
396411
}
397412
}
398413

399-
/* Read all available data from the self-pipe */
414+
/*
415+
* Read all available data from the self-pipe
416+
*
417+
* Note: this is only called when waiting = true. If it fails and doesn't
418+
* return, it must reset that flag first (though ideally, this will never
419+
* happen).
420+
*/
400421
static void
401422
drainSelfPipe(void)
402423
{
403424
/*
404425
* There shouldn't normally be more than one byte in the pipe, or maybe a
405-
* few more if multiple processes run SetLatch at the same instant.
426+
* few bytes if multiple processes run SetLatch at the same instant.
406427
*/
407428
char buf[16];
408429
int rc;
@@ -417,9 +438,21 @@ drainSelfPipe(void)
417438
else if (errno == EINTR)
418439
continue; /* retry */
419440
else
441+
{
442+
waiting = false;
420443
elog(ERROR, "read() on self-pipe failed: %m");
444+
}
421445
}
422446
else if (rc == 0)
447+
{
448+
waiting = false;
423449
elog(ERROR, "unexpected EOF on self-pipe");
450+
}
451+
else if (rc < sizeof(buf))
452+
{
453+
/* we successfully drained the pipe; no need to read() again */
10000 454+
break;
455+
}
456+
/* else buffer wasn't big enough, so read again */
424457
}
425458
}

src/backend/replication/syncrep.c

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,6 @@ SyncRepWaitForLSN(XLogRecPtr XactCommitLSN)
111111
Assert(SHMQueueIsDetached(&(MyProc->syncRepLinks)));
112112
Assert(WalSndCtl != NULL);
113113

114-
/* Reset the latch before adding ourselves to the queue. */
115-
ResetLatch(&MyProc->waitLatch);
116-
117114
LWLockAcquire(SyncRepLock, LW_EXCLUSIVE);
118115
Assert(MyProc->syncRepState == SYNC_REP_NOT_WAITING);
119116

@@ -167,7 +164,7 @@ SyncRepWaitForLSN(XLogRecPtr XactCommitLSN)
167164
int syncRepState;
168165

169166
/* Must reset the latch before testing state. */
170-
ResetLatch(&MyProc->waitLatch);
167+
ResetLatch(&MyProc->procLatch);
171168

172169
/*
173170
* Try checking the state without the lock first. There's no
@@ -247,11 +244,11 @@ SyncRepWaitForLSN(XLogRecPtr XactCommitLSN)
247244
}
248245

249246
/*
250-
* Wait on latch for up to 60 seconds. This allows us to check for
247+
* Wait on latch for up to 10 seconds. This allows us to check for
251248
* cancel/die signal or postmaster death regularly while waiting. Note
252249
* that timeout here does not necessarily release from loop.
253250
*/
254-
WaitLatch(&MyProc->waitLatch, 60000L);
251+
WaitLatch(&MyProc->procLatch, 10000L);
255252
}
256253

257254
/*
@@ -322,16 +319,14 @@ SyncRepCancelWait(void)
322319
}
323320

324321
void
325-
SyncRepCleanupAtProcExit(int code, Datum arg)
322+
SyncRepCleanupAtProcExit(void)
326323
{
327324
if (!SHMQueueIsDetached(&(MyProc->syncRepLinks)))
328325
{
329326
LWLockAcquire(SyncRepLock, LW_EXCLUSIVE);
330327
SHMQueueDelete(&(MyProc->syncRepLinks));
331328
LWLockRelease(SyncRepLock);
332329
}
333-
334-
DisownLatch(&MyProc->waitLatch);
335330
}
336331

337332
/*
@@ -560,9 +555,7 @@ SyncRepWakeQueue(bool all)
560555
/*
561556
* Wake only when we have set state and removed from queue.
562557
*/
563-
Assert(SHMQueueIsDetached(&(thisproc->syncRepLinks)));
564-
Assert(thisproc->syncRepState == SYNC_REP_WAIT_COMPLETE);
565-
SetLatch(&(thisproc->waitLatch));
558+
SetLatch(&(thisproc->procLatch));
566559

567560
numprocs++;
568561
}

src/backend/replication/walreceiver.c

Lines changed: 4 additions & 0 deletions
386
Original file line numberDiff line numberDiff line change
@@ -373,11 +373,15 @@ WalRcvSigHupHandler(SIGNAL_ARGS)
373373
static void
374374
WalRcvShutdownHandler(SIGNAL_ARGS)
375375
{
376+
int save_errno = errno;
377+
376378
got_SIGTERM = true;
377379

378380
/* Don't joggle the elbow of proc_exit */
379381
if (!proc_exit_inprogress && WalRcvImmediateInterruptOK)
380382
ProcessWalRcvInterrupts();
383+
384+
errno = save_errno;
381385
}
382

383387
/*

src/backend/replication/walsender.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1188,18 +1188,26 @@ XLogSend(char *msgbuf, bool *caughtup)
11881188
static void
11891189
WalSndSigHupHandler(SIGNAL_ARGS)
11901190
{
1191+
int save_errno = errno;
1192+
11911193
got_SIGHUP = true;
11921194
if (MyWalSnd)
11931195
SetLatch(&MyWalSnd->latch);
1196+
1197+
errno = save_errno;
11941198
}
11951199

11961200
/* SIGTERM: set flag to shut down */
11971201
static void
11981202
WalSndShutdownHandler(SIGNAL_ARGS)
11991203
{
1204+
int save_errno = errno;
1205+
12001206
walsender_shutdown_requested = true;
12011207
if (MyWalSnd)
12021208
SetLatch(&MyWalSnd->latch);
1209+
1210+
errno = save_errno;
12031211
}
12041212

12051213
/*
@@ -1238,16 +1246,24 @@ WalSndQuickDieHandler(SIGNAL_ARGS)
12381246
static void
12391247
WalSndXLogSendHandler(SIGNAL_ARGS)
12401248
{
1249+
int save_errno = errno;
1250+
12411251
latch_sigusr1_handler();
1252+
1253+
errno = save_errno;
12421254
}
12431255

12441256
/* SIGUSR2: set flag to do a last cycle and shut down afterwards */
12451257
static void
12461258
WalSndLastCycleHandler(SIGNAL_ARGS)
12471259
{
1260+
int save_errno = errno;
1261+
12481262
walsender_ready_to_stop = true;
12491263
if (MyWalSnd)
12501264
SetLatch(&MyWalSnd->latch);
1265+
1266+
errno = save_errno;
12511267
}
12521268

12531269
/* Set up signal handlers */

0 commit comments

Comments
 (0)
0