@@ -132,19 +132,69 @@ smgr_bulk_finish(BulkWriteState *bulkstate)
132
132
smgr_bulk_flush (bulkstate );
133
133
134
134
/*
135
- * When we wrote out the pages, we passed skipFsync=true to avoid the
136
- * overhead of registering all the writes with the checkpointer. Register
137
- * the whole relation now.
138
- *
139
- * There is one hole in that idea: If a checkpoint occurred while we were
140
- * writing the pages, it already missed fsyncing the pages we had written
141
- * before the checkpoint started. A crash later on would replay the WAL
142
- * starting from the checkpoint, therefore it wouldn't replay our earlier
143
- * WAL records. So if a checkpoint started after the bulk write, fsync
144
- * the files now.
135
+ * Fsync the relation, or register it for the next checkpoint, if
136
+ * necessary.
145
137
*/
146
- if (! SmgrIsTemp (bulkstate -> smgr ))
138
+ if (SmgrIsTemp (bulkstate -> smgr ))
147
139
{
140
+ /* Temporary relations don't need to be fsync'd, ever */
141
+ }
142
+ else if (!bulkstate -> use_wal )
143
+ {
144
+ /*----------
145
+ * This is either an unlogged relation, or a permanent relation but we
146
+ * skipped WAL-logging because wal_level=minimal:
147
+ *
148
+ * A) Unlogged relation
149
+ *
150
+ * Unlogged relations will go away on crash, but they need to be
151
+ * fsync'd on a clean shutdown. It's sufficient to call
152
+ * smgrregistersync(), that ensures that the checkpointer will
153
+ * flush it at the shutdown checkpoint. (It will flush it on the
154
+ * next online checkpoint too, which is not strictly necessary.)
155
+ *
156
+ * Note that the init-fork of an unlogged relation is not
157
+ * considered unlogged for our purposes. It's treated like a
158
+ * regular permanent relation. The callers will pass use_wal=true
159
+ * for the init fork.
160
+ *
161
+ * B) Permanent relation, WAL-logging skipped because wal_level=minimal
162
+ *
163
+ * This is a new relation, and we didn't WAL-log the pages as we
164
+ * wrote, but they need to be fsync'd before commit.
165
+ *
166
+ * We don't need to do that here, however. The fsync() is done at
167
+ * commit, by smgrDoPendingSyncs() (*).
168
+ *
169
+ * (*) smgrDoPendingSyncs() might decide to WAL-log the whole
170
+ * relation at commit instead of fsyncing it, if the relation was
171
+ * very small, but it's smgrDoPendingSyncs() responsibility in any
172
+ * case.
173
+ *
174
+ * We cannot distinguish the two here, so conservatively assume it's
175
+ * an unlogged relation. A permanent relation with wal_level=minimal
176
+ * would require no actions, see above.
177
+ */
178
+ smgrregistersync (bulkstate -> smgr , bulkstate -> forknum );
179
+ }
180
+ else
181
+ {
182
+ /*
183
+ * Permanent relation, WAL-logged normally.
184
+ *
185
+ * We already WAL-logged all the pages, so they will be replayed from
186
+ * WAL on crash. However, when we wrote out the pages, we passed
187
+ * skipFsync=true to avoid the overhead of registering all the writes
188
+ * with the checkpointer. Register the whole relation now.
189
+ *
190
+ * There is one hole in that idea: If a checkpoint occurred while we
191
+ * were writing the pages, it already missed fsyncing the pages we had
192
+ * written before the checkpoint started. A crash later on would
193
+ * replay the WAL starting from the checkpoint, therefore it wouldn't
194
+ * replay our earlier WAL records. So if a checkpoint started after
195
+ * the bulk write, fsync the files now.
196
+ */
197
+
148
198
/*
149
199
* Prevent a checkpoint from starting between the GetRedoRecPtr() and
150
200
* smgrregistersync() calls.
0 commit comments