@@ -6135,13 +6135,18 @@ heap_inplace_update_and_unlock(Relation relation,
61356135 HeapTupleHeader htup = oldtup -> t_data ;
61366136 uint32 oldlen ;
61376137 uint32 newlen ;
6138+ char * dst ;
6139+ char * src ;
61386140
61396141 Assert (ItemPointerEquals (& oldtup -> t_self , & tuple -> t_self ));
61406142 oldlen = oldtup -> t_len - htup -> t_hoff ;
61416143 newlen = tuple -> t_len - tuple -> t_data -> t_hoff ;
61426144 if (oldlen != newlen || htup -> t_hoff != tuple -> t_data -> t_hoff )
61436145 elog (ERROR , "wrong tuple length" );
61446146
6147+ dst = (char * ) htup + htup -> t_hoff ;
6148+ src = (char * ) tuple -> t_data + tuple -> t_data -> t_hoff ;
6149+
61456150 /*
61466151 * Construct shared cache inval if necessary. Note that because we only
61476152 * pass the new version of the tuple, this mustn't be used for any
@@ -6160,15 +6165,15 @@ heap_inplace_update_and_unlock(Relation relation,
61606165 */
61616166 PreInplace_Inval ();
61626167
6163- /* NO EREPORT(ERROR) from here till changes are logged */
6164- START_CRIT_SECTION ();
6165-
6166- memcpy ((char * ) htup + htup -> t_hoff ,
6167- (char * ) tuple -> t_data + tuple -> t_data -> t_hoff ,
6168- newlen );
6169-
61706168 /*----------
6171- * XXX A crash here can allow datfrozenxid() to get ahead of relfrozenxid:
6169+ * NO EREPORT(ERROR) from here till changes are complete
6170+ *
6171+ * Our buffer lock won't stop a reader having already pinned and checked
6172+ * visibility for this tuple. Hence, we write WAL first, then mutate the
6173+ * buffer. Like in MarkBufferDirtyHint() or RecordTransactionCommit(),
6174+ * checkpoint delay makes that acceptable. With the usual order of
6175+ * changes, a crash after memcpy() and before XLogInsert() could allow
6176+ * datfrozenxid to overtake relfrozenxid:
61726177 *
61736178 * ["D" is a VACUUM (ONLY_DATABASE_STATS)]
61746179 * ["R" is a VACUUM tbl]
@@ -6178,31 +6183,57 @@ heap_inplace_update_and_unlock(Relation relation,
61786183 * D: raise pg_database.datfrozenxid, XLogInsert(), finish
61796184 * [crash]
61806185 * [recovery restores datfrozenxid w/o relfrozenxid]
6186+ *
6187+ * Like in MarkBufferDirtyHint() subroutine XLogSaveBufferForHint(), copy
6188+ * the buffer to the stack before logging. Here, that facilitates a FPI
6189+ * of the post-mutation block before we accept other sessions seeing it.
61816190 */
6182-
6183- MarkBufferDirty (buffer );
6191+ Assert (!MyPgXact -> delayChkpt );
6192+ START_CRIT_SECTION ();
6193+ MyPgXact -> delayChkpt = true;
61846194
61856195 /* XLOG stuff */
61866196 if (RelationNeedsWAL (relation ))
61876197 {
61886198 xl_heap_inplace xlrec ;
6199+ PGAlignedBlock copied_buffer ;
6200+ char * origdata = (char * ) BufferGetBlock (buffer );
6201+ Page page = BufferGetPage (buffer );
6202+ uint16 lower = ((PageHeader ) page )-> pd_lower ;
6203+ uint16 upper = ((PageHeader ) page )-> pd_upper ;
6204+ uintptr_t dst_offset_in_block ;
6205+ RelFileNode rnode ;
6206+ ForkNumber forkno ;
6207+ BlockNumber blkno ;
61896208 XLogRecPtr recptr ;
61906209
61916210 xlrec .offnum = ItemPointerGetOffsetNumber (& tuple -> t_self );
61926211
61936212 XLogBeginInsert ();
61946213 XLogRegisterData ((char * ) & xlrec , SizeOfHeapInplace );
61956214
6196- XLogRegisterBuffer (0 , buffer , REGBUF_STANDARD );
6197- XLogRegisterBufData (0 , (char * ) htup + htup -> t_hoff , newlen );
6215+ /* register block matching what buffer will look like after changes */
6216+ memcpy (copied_buffer .data , origdata , lower );
6217+ memcpy (copied_buffer .data + upper , origdata + upper , BLCKSZ - upper );
6218+ dst_offset_in_block = dst - origdata ;
6219+ memcpy (copied_buffer .data + dst_offset_in_block , src , newlen );
6220+ BufferGetTag (buffer , & rnode , & forkno , & blkno );
6221+ Assert (forkno == MAIN_FORKNUM );
6222+ XLogRegisterBlock (0 , & rnode , forkno , blkno , copied_buffer .data ,
6223+ REGBUF_STANDARD );
6224+ XLogRegisterBufData (0 , src , newlen );
61986225
61996226 /* inplace updates aren't decoded atm, don't log the origin */
62006227
62016228 recptr = XLogInsert (RM_HEAP_ID , XLOG_HEAP_INPLACE );
62026229
6203- PageSetLSN (BufferGetPage ( buffer ) , recptr );
6230+ PageSetLSN (page , recptr );
62046231 }
62056232
6233+ memcpy (dst , src , newlen );
6234+
6235+ MarkBufferDirty (buffer );
6236+
62066237 LockBuffer (buffer , BUFFER_LOCK_UNLOCK );
62076238
62086239 /*
@@ -6215,6 +6246,7 @@ heap_inplace_update_and_unlock(Relation relation,
62156246 */
62166247 AtInplace_Inval ();
62176248
6249+ MyPgXact -> delayChkpt = false;
62186250 END_CRIT_SECTION ();
62196251 UnlockTuple (relation , & tuple -> t_self , InplaceUpdateTupleLock );
62206252
0 commit comments