33 * unix_latch.c
44 * Routines for inter-process latches
55 *
6- * A latch is a boolean variable, with operations that let you to sleep
7- * until it is set. A latch can be set from another process, or a signal
8- * handler within the same process.
9- *
10- * The latch interface is a reliable replacement for the common pattern of
11- * using pg_usleep() or select() to wait until a signal arrives, where the
12- * signal handler sets a global variable. Because on some platforms, an
13- * incoming signal doesn't interrupt sleep, and even on platforms where it
14- * does there is a race condition if the signal arrives just before
15- * entering the sleep, the common pattern must periodically wake up and
16- * poll the global variable. pselect() system call was invented to solve
17- * the problem, but it is not portable enough. Latches are designed to
18- * overcome these limitations, allowing you to sleep without polling and
19- * ensuring a quick response to signals from other processes.
20- *
21- * There are two kinds of latches: local and shared. A local latch is
22- * initialized by InitLatch, and can only be set from the same process.
23- * A local latch can be used to wait for a signal to arrive, by calling
24- * SetLatch in the signal handler. A shared latch resides in shared memory,
25- * and must be initialized at postmaster startup by InitSharedLatch. Before
26- * a shared latch can be waited on, it must be associated with a process
27- * with OwnLatch. Only the process owning the latch can wait on it, but any
28- * process can set it.
29- *
30- * There are three basic operations on a latch:
31- *
32- * SetLatch - Sets the latch
33- * ResetLatch - Clears the latch, allowing it to be set again
34- * WaitLatch - Waits for the latch to become set
35- *
36- * The correct pattern to wait for an event is:
37- *
38- * for (;;)
39- * {
40- * ResetLatch();
41- * if (work to do)
42- * Do Stuff();
43- *
44- * WaitLatch();
45- * }
46- *
47- * It's important to reset the latch *before* checking if there's work to
48- * do. Otherwise, if someone sets the latch between the check and the
49- * ResetLatch call, you will miss it and Wait will block.
50- *
51- * To wake up the waiter, you must first set a global flag or something
52- * else that the main loop tests in the "if (work to do)" part, and call
53- * SetLatch *after* that. SetLatch is designed to return quickly if the
54- * latch is already set.
55- *
56- *
57- * Implementation
58- * --------------
59- *
606 * The Unix implementation uses the so-called self-pipe trick to overcome
617 * the race condition involved with select() and setting a global flag
628 * in the signal handler. When a latch is set and the current process
6511 * interrupt select() on all platforms, and even on platforms where it
6612 * does, a signal that arrives just before the select() call does not
6713 * prevent the select() from entering sleep. An incoming byte on a pipe
68- * however reliably interrupts the sleep, and makes select() to return
69- * immediately if the signal arrives just before select() begins.
14+ * however reliably interrupts the sleep, and causes select() to return
15+ * immediately even if the signal arrives before select() begins.
7016 *
7117 * When SetLatch is called from the same process that owns the latch,
7218 * SetLatch writes the byte directly to the pipe. If it's owned by another
10046/* Are we currently in WaitLatch? The signal handler would like to know. */
10147static volatile sig_atomic_t waiting = false;
10248
103- /* Read and write end of the self-pipe */
49+ /* Read and write ends of the self-pipe */
10450static int selfpipe_readfd = -1 ;
10551static int selfpipe_writefd = -1 ;
10652
@@ -116,7 +62,7 @@ static void sendSelfPipeByte(void);
11662void
11763InitLatch (volatile Latch * latch )
11864{
119- /* Initialize the self pipe if this is our first latch in the process */
65+ /* Initialize the sel
9E7A
f- pipe if this is our first latch in the process */
12066 if (selfpipe_readfd == -1 )
12167 initSelfPipe ();
12268
@@ -127,13 +73,14 @@ InitLatch(volatile Latch *latch)
12773
12874/*
12975 * Initialize a shared latch that can be set from other processes. The latch
130- * is initially owned by no-one, use OwnLatch to associate it with the
76+ * is initially owned by no-one; use OwnLatch to associate it with the
13177 * current process.
13278 *
13379 * InitSharedLatch needs to be called in postmaster before forking child
13480 * processes, usually right after allocating the shared memory block
135- * containing the latch with ShmemInitStruct. The Unix implementation
136- * doesn't actually require that, but the Windows one does.
81+ * containing the latch with ShmemInitStruct. (The Unix implementation
82+ * doesn't actually require that, but the Windows one does.) Because of
83+ * this restriction, we have no concurrency issues to worry about here.
13784 */
13885void
13986InitSharedLatch (volatile Latch * latch )
@@ -145,23 +92,30 @@ InitSharedLatch(volatile Latch *latch)
14592
14693/*
14794 * Associate a shared latch with the current process, allowing it to
148- * wait on it.
95+ * wait on the latch.
96+ *
97+ * Although there is a sanity check for latch-already-owned, we don't do
98+ * any sort of locking here, meaning that we could fail to detect the error
99+ * if two processes try to own the same latch at about the same time. If
100+ * there is any risk of that, caller must provide an interlock to prevent it.
149101 *
150- * Make sure that latch_sigusr1_handler() is called from the SIGUSR1 signal
151- * handler, as shared latches use SIGUSR1 to for inter-process communication.
102+ * In any process that calls OwnLatch(), make sure that
103+ * latch_sigusr1_handler() is called from the SIGUSR1 signal handler,
104+ * as shared latches use SIGUSR1 for inter-process communication.
152105 */
153106void
154107OwnLatch (volatile Latch * latch )
155108{
156109 Assert (latch -> is_shared );
157110
158- /* Initialize the self pipe if this is our first latch in the process */
111+ /* Initialize the self- pipe if this is our first latch in this process */
159112 if (selfpipe_readfd == -1 )
160113 initSelfPipe ();
161114
162115 /* sanity check */
163116 if (latch -> owner_pid != 0 )
164117 elog (ERROR , "latch already owned" );
118+
165119 latch -> owner_pid = MyProcPid ;
166120}
167121
@@ -173,25 +127,26 @@ DisownLatch(volatile Latch *latch)
173127{
174128 Assert (latch -> is_shared );
175129 Assert (latch -> owner_pid == MyProcPid );
130+
176131 latch -> owner_pid = 0 ;
177132}
178133
179134/*
180- * Wait for a given latch to be set, postmaster death, or until timeout is
181- * exceeded. 'wakeEvents' is a bitmask that specifies which of those events
135+ * Wait for a given latch to be set, or for postmaster death, or until timeout
136+ * is exceeded. 'wakeEvents' is a bitmask that specifies which of those events
182137 * to wait for. If the latch is already set (and WL_LATCH_SET is given), the
183138 * function returns immediately.
184139 *
185- * The 'timeout' is given in microseconds. It must be >= 0 if WL_TIMEOUT
186- * event is given, otherwise it is ignored. On some platforms, signals cause
187- * the timeout to be restarted, so beware that the function can sleep for
188- * several times longer than the specified timeout.
140+ * The 'timeout' is given in microseconds. It must be >= 0 if WL_TIMEOUT flag
141+ * is given. On some platforms, signals cause the timeout to be restarted,
142+ * so beware that the function can sleep for several times longer than the
143+ * specified timeout.
189144 *
190145 * The latch must be owned by the current process, ie. it must be a
191146 * backend-local latch initialized with InitLatch, or a shared latch
192147 * associated with the current process by calling OwnLatch.
193148 *
194- * Returns bit field indicating which condition(s) caused the wake-up. Note
149+ * Returns bit mask indicating which condition(s) caused the wake-up. Note
195150 * that if multiple wake-up conditions are true, there is no guarantee that
196151 * we return all of them in one call, but we will return at least one. Also,
197152 * according to the select(2) man page on Linux, select(2) may spuriously
@@ -200,7 +155,7 @@ DisownLatch(volatile Latch *latch)
200155 * readable, or postmaster has died, even when none of the wake conditions
201156 * have been satisfied. That should be rare in practice, but the caller
202157 * should not use the return value for anything critical, re-checking the
203- * situation with PostmasterIsAlive() or read() on a socket if necessary.
158+ * situation with PostmasterIsAlive() or read() on a socket as necessary.
204159 */
205160int
206161WaitLatch (volatile Latch * latch , int wakeEvents , long timeout )
@@ -247,12 +202,18 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
247202 int hifd ;
248203
249204 /*
250- * Clear the pipe, and check if the latch is set already. If someone
205+ * Clear the pipe, then check if the latch is set already. If someone
251206 * sets the latch between this and the select() below, the setter will
252207 * write a byte to the pipe (or signal us and the signal handler will
253208 * do that), and the select() will return immediately.
209+ *
210+ * Note: we assume that the kernel calls involved in drainSelfPipe()
211+ * and SetLatch() will provide adequate synchronization on machines
212+ * with weak memory ordering, so that we cannot miss seeing is_set
213+ * if the signal byte is already in the pipe when we drain it.
254214 */
255215 drainSelfPipe ();
216+
256217 if ((wakeEvents & WL_LATCH_SET ) && latch -> is_set )
257218 {
258219 result |= WL_LATCH_SET ;
@@ -263,7 +224,10 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
263224 break ;
264225 }
265226
227+ /* Must wait ... set up the event masks for select() */
266228 FD_ZERO (& input_mask );
229+ FD_ZERO (& output_mask );
230+
267231 FD_SET (selfpipe_readfd , & input_mask );
268232 hifd = selfpipe_readfd ;
269233
@@ -281,7 +245,6 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
281245 hifd = sock ;
282246 }
283247
284- FD_ZERO (& output_mask );
285248 if (wakeEvents & WL_SOCKET_WRITEABLE )
286249 {
287250 FD_SET (sock , & output_mask );
@@ -320,21 +283,30 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
320283 {
321284 result |= WL_POSTMASTER_DEATH ;
322285 }
323- } while (result == 0 );
286+ } while (result == 0 );
324287 waiting = false;
325288
326289 return result ;
327290}
328291
329292/*
330- * Sets a latch and wakes up anyone waiting on it. Returns quickly if the
331- * latch is already set.
293+ * Sets a latch and wakes up anyone waiting on it.
294+ *
295+ * This is cheap if the latch is already set, otherwise not so much.
332296 */
333297void
334298SetLatch (volatile Latch * latch )
335299{
336300 pid_t owner_pid ;
337301
302+ /*
303+ * XXX there really ought to be a memory barrier operation right here,
304+ * to ensure that any flag variables we might have changed get flushed
305+ * to main memory before we check/set is_set. Without that, we have to
306+ * require that callers provide their own synchronization for machines
307+ * with weak memory ordering (see latch.h).
308+ */
309+
338310 /* Quick exit if already set */
339311 if (latch -> is_set )
340312 return ;
@@ -346,13 +318,21 @@ SetLatch(volatile Latch *latch)
346318 * we're in a signal handler. We use the self-pipe to wake up the select()
347319 * in that case. If it's another process, send a signal.
348320 *
349- * Fetch owner_pid only once, in case the owner simultaneously disowns the
350- * latch and clears owner_pid. XXX: This assumes that pid_t is atomic,
351- * which isn't guaranteed to be true! In practice, the effective range of
352- * pid_t fits in a 32 bit integer, and so should be atomic. In the worst
353- * case, we might end up signaling wrong process if the right one disowns
354- * the latch just as we fetch owner_pid. Even then, you're very unlucky if
355- * a process with that bogus pid exists.
321+ * Fetch owner_pid only once, in case the latch is concurrently getting
322+ * owned or disowned. XXX: This assumes that pid_t is atomic, which isn't
323+ * guaranteed to be true! In practice, the effective range of pid_t fits
324+ * in a 32 bit integer, and so should be atomic. In the worst case, we
325+ * might end up signaling the wrong process. Even then, you're very
326+ * unlucky if a process with that bogus pid exists and belongs to
327+ * Postgres; and PG database processes should handle excess SIGUSR1
328+ * interrupts without a problem anyhow.
329+ *
330+ * Another sort of race condition that's possible here is for a new process
331+ * to own the latch immediately after we look, so we don't signal it.
332+ * This is okay so long as all callers of ResetLatch/WaitLatch follow the
333+ * standard coding convention of waiting at the bottom of their loops,
334+ * not the top, so that they'll correctly process latch-setting events that
335+ * happen before they enter the loop.
356336 */
357337 owner_pid = latch -> owner_pid ;
358338 if (owner_pid == 0 )
@@ -374,11 +354,23 @@ ResetLatch(volatile Latch *latch)
374354 Assert (latch -> owner_pid == MyProcPid );
375355
376356 latch -> is_set = false;
357+
358+ /*
359+ * XXX there really ought to be a memory barrier operation right here, to
360+ * ensure that the write to is_set gets flushed to main memory before we
361+ * examine any flag variables. Otherwise a concurrent SetLatch might
362+ * falsely conclude that it needn't signal us, even though we have missed
363+ * seeing some flag updates that SetLatch was supposed to inform us of.
364+ * For the moment, callers must supply their own synchronization of flag
365+ * variables (see latch.h).
366+ */
377367}
378368
379369/*
380- * SetLatch uses SIGUSR1 to wake up the process waiting on the latch. Wake
381- * up WaitLatch.
370+ * SetLatch uses SIGUSR1 to wake up the process waiting on the latch.
371+ *
372+ * Wake up WaitLatch, if we're waiting. (We might not be, since SIGUSR1 is
373+ * overloaded for multiple purposes.)
382374 */
383375void
384376latch_sigusr1_handler (void )
0 commit comments