3
3
* unix_latch.c
4
4
* Routines for inter-process latches
5
5
*
6
- * A latch is a boolean variable, with operations that let you to sleep
7
- * until it is set. A latch can be set from another process, or a signal
8
- * handler within the same process.
9
- *
10
- * The latch interface is a reliable replacement for the common pattern of
11
- * using pg_usleep() or select() to wait until a signal arrives, where the
12
- * signal handler sets a global variable. Because on some platforms, an
13
- * incoming signal doesn't interrupt sleep, and even on platforms where it
14
- * does there is a race condition if the signal arrives just before
15
- * entering the sleep, the common pattern must periodically wake up and
16
- * poll the global variable. pselect() system call was invented to solve
17
- * the problem, but it is not portable enough. Latches are designed to
18
- * overcome these limitations, allowing you to sleep without polling and
19
- * ensuring a quick response to signals from other processes.
20
- *
21
- * There are two kinds of latches: local and shared. A local latch is
22
- * initialized by InitLatch, and can only be set from the same process.
23
- * A local latch can be used to wait for a signal to arrive, by calling
24
- * SetLatch in the signal handler. A shared latch resides in shared memory,
25
- * and must be initialized at postmaster startup by InitSharedLatch. Before
26
- * a shared latch can be waited on, it must be associated with a process
27
- * with OwnLatch. Only the process owning the latch can wait on it, but any
28
- * process can set it.
29
- *
30
- * There are three basic operations on a latch:
31
- *
32
- * SetLatch - Sets the latch
33
- * ResetLatch - Clears the latch, allowing it to be set again
34
- * WaitLatch - Waits for the latch to become set
35
- *
36
- * The correct pattern to wait for an event is:
37
- *
38
- * for (;;)
39
- * {
40
- * ResetLatch();
41
- * if (work to do)
42
- * Do Stuff();
43
- *
44
- * WaitLatch();
45
- * }
46
- *
47
- * It's important to reset the latch *before* checking if there's work to
48
- * do. Otherwise, if someone sets the latch between the check and the
49
- * ResetLatch call, you will miss it and Wait will block.
50
- *
51
- * To wake up the waiter, you must first set a global flag or something
52
- * else that the main loop tests in the "if (work to do)" part, and call
53
- * SetLatch *after* that. SetLatch is designed to return quickly if the
54
- * latch is already set.
55
- *
56
- *
57
- * Implementation
58
- * --------------
59
- *
60
6
* The Unix implementation uses the so-called self-pipe trick to overcome
61
7
* the race condition involved with select() and setting a global flag
62
8
* in the signal handler. When a latch is set and the current process
65
11
* interrupt select() on all platforms, and even on platforms where it
66
12
* does, a signal that arrives just before the select() call does not
67
13
* prevent the select() from entering sleep. An incoming byte on a pipe
68
- * however reliably interrupts the sleep, and makes select() to return
69
- * immediately if the signal arrives just before select() begins.
14
+ * however reliably interrupts the sleep, and causes select() to return
15
+ * immediately even if the signal arrives before select() begins.
70
16
*
71
17
* When SetLatch is called from the same process that owns the latch,
72
18
* SetLatch writes the byte directly to the pipe. If it's owned by another
99
45
/* Are we currently in WaitLatch? The signal handler would like to know. */
100
46
static volatile sig_atomic_t waiting = false;
101
47
102
- /* Read and write end of the self-pipe */
48
+ /* Read and write ends of the self-pipe */
103
49
static int selfpipe_readfd = -1 ;
104
50
static int selfpipe_writefd = -1 ;
105
51
@@ -115,7 +61,7 @@ static void sendSelfPipeByte(void);
115
61
void
116
62
InitLatch (volatile Latch * latch )
117
63
{
118
- /* Initialize the self pipe if this is our first latch in the process */
64
+ /* Initialize the self- pipe if this is our first latch in the process */
119
65
if (selfpipe_readfd == -1 )
120
66
initSelfPipe ();
121
67
@@ -126,13 +72,14 @@ InitLatch(volatile Latch *latch)
126
72
127
73
/*
128
74
* Initialize a shared latch that can be set from other processes. The latch
129
- * is initially owned by no-one, use OwnLatch to associate it with the
75
+ * is initially owned by no-one; use OwnLatch to associate it with the
130
76
* current process.
131
77
*
132
78
* InitSharedLatch needs to be called in postmaster before forking child
133
79
* processes, usually right after allocating the shared memory block
134
- * containing the latch with ShmemInitStruct. The Unix implementation
135
- * doesn't actually require that, but the Windows one does.
80
+ * containing the latch with ShmemInitStruct. (The Unix implementation
81
+ * doesn't actually require that, but the Windows one does.) Because of
82
+ * this restriction, we have no concurrency issues to worry about here.
136
83
*/
137
84
void
138
85
InitSharedLatch (volatile Latch * latch )
@@ -144,23 +91,30 @@ InitSharedLatch(volatile Latch *latch)
144
91
145
92
/*
146
93
* Associate a shared latch with the current process, allowing it to
147
- * wait on it .
94
+ * wait on the latch .
148
95
*
149
- * Make sure that latch_sigusr1_handler() is called from the SIGUSR1 signal
150
- * handler, as shared latches use SIGUSR1 to for inter-process communication.
96
+ * Although there is a sanity check for latch-already-owned, we don't do
97
+ * any sort of locking here, meaning that we could fail to detect the error
98
+ * if two processes try to own the same latch at about the same time. If
99
+ * there is any risk of that, caller must provide an interlock to prevent it.
100
+ *
101
+ * In any process that calls OwnLatch(), make sure that
102
+ * latch_sigusr1_handler() is called from the SIGUSR1 signal handler,
103
+ * as shared latches use SIGUSR1 for inter-process communication.
151
104
*/
152
105
void
153
106
OwnLatch (volatile Latch * latch )
154
107
{
155
108
Assert (latch -> is_shared );
156
109
157
- /* Initialize the self pipe if this is our first latch in the process */
110
+ /* Initialize the self- pipe if this is our first latch in this process */
158
111
if (selfpipe_readfd == -1 )
159
112
initSelfPipe ();
160
113
161
114
/* sanity check */
162
115
if (latch -> owner_pid != 0 )
163
116
elog (ERROR , "latch already owned" );
117
+
164
118
latch -> owner_pid = MyProcPid ;
165
119
}
166
120
@@ -172,6 +126,7 @@ DisownLatch(volatile Latch *latch)
172
126
{
173
127
Assert (latch -> is_shared );
174
128
Assert (latch -> owner_pid == MyProcPid );
129
+
175
130
latch -> owner_pid = 0 ;
176
131
}
177
132
@@ -229,29 +184,38 @@ WaitLatchOrSocket(volatile Latch *latch, pgsocket sock, bool forRead,
229
184
int hifd ;
230
185
231
186
/*
232
- * Clear the pipe, and check if the latch is set already. If someone
187
+ * Clear the pipe, then check if the latch is set already. If someone
233
188
* sets the latch between this and the select() below, the setter will
234
189
* write a byte to the pipe (or signal us and the signal handler will
235
190
* do that), and the select() will return immediately.
191
+ *
192
+ * Note: we assume that the kernel calls involved in drainSelfPipe()
193
+ * and SetLatch() will provide adequate synchronization on machines
194
+ * with weak memory ordering, so that we cannot miss seeing is_set
195
+ * if the signal byte is already in the pipe when we drain it.
236
196
*/
237
197
drainSelfPipe ();
198
+
238
199
if (latch -> is_set )
239
200
{
240
201
result = 1 ;
241
202
break ;
242
203
}
243
204
205
+ /* Must wait ... set up the event masks for select() */
244
206
FD_ZERO (& input_mask );
207
+ FD_ZERO (& output_mask );
208
+
245
209
FD_SET (selfpipe_readfd , & input_mask );
246
210
hifd = selfpipe_readfd ;
211
+
247
212
if (sock != PGINVALID_SOCKET && forRead )
248
213
{
249
214
FD_SET (sock , & input_mask );
250
215
if (sock > hifd )
251
216
hifd = sock ;
252
217
}
253
218
254
- FD_ZERO (& output_mask );
255
219
if (sock != PGINVALID_SOCKET && forWrite )
256
220
{
257
221
FD_SET (sock , & output_mask );
@@ -288,14 +252,23 @@ WaitLatchOrSocket(volatile Latch *latch, pgsocket sock, bool forRead,
288
252
}
289
253
290
254
/*
291
- * Sets a latch and wakes up anyone waiting on it. Returns quickly if the
292
- * latch is already set.
255
+ * Sets a latch and wakes up anyone waiting on it.
256
+ *
257
+ * This is cheap if the latch is already set, otherwise not so much.
293
258
*/
294
259
void
295
260
SetLatch (volatile Latch * latch )
296
261
{
297
262
pid_t owner_pid ;
298
263
264
+ /*
265
+ * XXX there really ought to be a memory barrier operation right here,
266
+ * to ensure that any flag variables we might have changed get flushed
267
+ * to main memory before we check/set is_set. Without that, we have to
268
+ * require that callers provide their own synchronization for machines
269
+ * with weak memory ordering (see latch.h).
270
+ */
271
+
299
272
/* Quick exit if already set */
300
273
if (latch -> is_set )
301
274
return ;
@@ -307,13 +280,21 @@ SetLatch(volatile Latch *latch)
307
280
* we're in a signal handler. We use the self-pipe to wake up the select()
308
281
* in that case. If it's another process, send a signal.
309
282
*
310
- * Fetch owner_pid only once, in case the owner simultaneously disowns the
311
- * latch and clears owner_pid. XXX: This assumes that pid_t is atomic,
312
- * which isn't guaranteed to be true! In practice, the effective range of
313
- * pid_t fits in a 32 bit integer, and so should be atomic. In the worst
314
- * case, we might end up signaling wrong process if the right one disowns
315
- * the latch just as we fetch owner_pid. Even then, you're very unlucky if
316
- * a process with that bogus pid exists.
283
+ * Fetch owner_pid only once, in case the latch is concurrently getting
284
+ * owned or disowned. XXX: This assumes that pid_t is atomic, which isn't
285
+ * guaranteed to be true! In practice, the effective range of pid_t fits
286
+ * in a 32 bit integer, and so should be atomic. In the worst case, we
287
+ * might end up signaling the wrong process. Even then, you're very
288
+ * unlucky if a process with that bogus pid exists and belongs to
289
+ * Postgres; and PG database processes should handle excess SIGUSR1
290
+ * interrupts without a problem anyhow.
291
+ *
292
+ * Another sort of race condition that's possible here is for a new process
293
+ * to own the latch immediately after we look, so we don't signal it.
294
+ * This is okay so long as all callers of ResetLatch/WaitLatch follow the
295
+ * standard coding convention of waiting at the bottom of their loops,
296
+ * not the top, so that they'll correctly process latch-setting events that
297
+ * happen before they enter the loop.
317
298
*/
318
299
owner_pid = latch -> owner_pid ;
319
300
if (owner_pid == 0 )
@@ -335,11 +316,23 @@ ResetLatch(volatile Latch *latch)
335
316
Assert (latch -> owner_pid == MyProcPid );
336
317
337
318
latch -> is_set = false;
319
+
320
+ /*
321
+ * XXX there really ought to be a memory barrier operation right here, to
322
+ * ensure that the write to is_set gets flushed to main memory before we
323
+ * examine any flag variables. Otherwise a concurrent SetLatch might
324
+ * falsely conclude that it needn't signal us, even though we have missed
325
+ * seeing some flag updates that SetLatch was supposed to inform us of.
326
+ * For the moment, callers must supply their own synchronization of flag
327
+ * variables (see latch.h).
328
+ */
338
329
}
339
330
340
331
/*
341
- * SetLatch uses SIGUSR1 to wake up the process waiting on the latch. Wake
342
- * up WaitLatch.
332
+ * SetLatch uses SIGUSR1 to wake up the process waiting on the latch.
333
+ *
334
+ * Wake up WaitLatch, if we're waiting. (We might not be, since SIGUSR1 is
335
+ * overloaded for multiple purposes.)
343
336
*/
344
337
void
345
338
latch_sigusr1_handler (void )
0 commit comments