46
46
#include "commands/vacuum.h"
47
47
#include "miscadmin.h"
48
48
#include "pgstat.h"
49
+ #include "portability/instr_time.h"
49
50
#include "postmaster/autovacuum.h"
50
51
#include "storage/bufmgr.h"
51
52
#include "storage/freespace.h"
67
68
#define REL_TRUNCATE_MINIMUM 1000
68
69
#define REL_TRUNCATE_FRACTION 16
69
70
71
+ /*
72
+ * Timing parameters for truncate locking heuristics.
73
+ *
74
+ * These were not exposed as user tunable GUC values because it didn't seem
75
+ * that the potential for improvement was great enough to merit the cost of
76
+ * supporting them.
77
+ */
78
+ #define AUTOVACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */
79
+ #define AUTOVACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */
80
+ #define AUTOVACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */
81
+
70
82
/*
71
83
* Guesstimation of number of dead tuples per page. This is used to
72
84
* provide an upper limit to memory allocated when vacuuming small
@@ -101,6 +113,7 @@ typedef struct LVRelStats
101
113
ItemPointer dead_tuples ; /* array of ItemPointerData */
102
114
int num_index_scans ;
103
115
TransactionId latestRemovedXid ;
116
+ bool lock_waiter_detected ;
104
117
} LVRelStats ;
105
118
106
119
@@ -191,6 +204,8 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
191
204
vacrelstats -> old_rel_pages = onerel -> rd_rel -> relpages ;
192
205
vacrelstats -> old_rel_tuples = onerel -> rd_rel -> reltuples ;
193
206
vacrelstats -> num_index_scans = 0 ;
207
+ vacrelstats -> pages_removed = 0 ;
208
+ vacrelstats -> lock_waiter_detected = false;
194
209
195
210
/* Open all indexes of the relation */
196
211
vac_open_indexes (onerel , RowExclusiveLock , & nindexes , & Irel );
@@ -257,10 +272,17 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
257
272
vacrelstats -> hasindex ,
258
273
new_frozen_xid );
259
274
260
- /* report results to the stats collector, too */
261
- pgstat_report_vacuum (RelationGetRelid (onerel ),
262
- onerel -> rd_rel -> relisshared ,
263
- new_rel_tuples );
275
+ /*
276
+ * Report results to the stats collector, too. An early terminated
277
+ * lazy_truncate_heap attempt suppresses the message and also cancels the
278
+ * execution of ANALYZE, if that was ordered.
279
+ */
280
+ if (!vacrelstats -> lock_waiter_detected )
281
+ pgstat_report_vacuum (RelationGetRelid (onerel ),
282
+ onerel -> rd_rel -> relisshared ,
283
+ new_rel_tuples );
284
+ else
285
+ vacstmt -> options &= ~VACOPT_ANALYZE ;
264
286
265
287
/* and log the action if appropriate */
266
288
if (IsAutoVacuumWorkerProcess () && Log_autovacuum_min_duration >= 0 )
@@ -1255,80 +1277,124 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
1255
1277
BlockNumber old_rel_pages = vacrelstats -> rel_pages ;
1256
1278
BlockNumber new_rel_pages ;
1257
1279
PGRUsage ru0 ;
1280
+ int lock_retry ;
1258
1281
1259
1282
pg_rusage_init (& ru0 );
1260
1283
1261
1284
/*
1262
- * We need full exclusive lock on the relation in order to do truncation.
1263
- * If we can't get it, give up rather than waiting --- we don't want to
1264
- * block other backends, and we don't want to deadlock (which is quite
1265
- * possible considering we already hold a lower-grade lock).
1266
- */
1267
- if (!ConditionalLockRelation (onerel , AccessExclusiveLock ))
1268
- return ;
1269
-
1270
- /*
1271
- * Now that we have exclusive lock, look to see if the rel has grown
1272
- * whilst we were vacuuming with non-exclusive lock. If so, give up; the
1273
- * newly added pages presumably contain non-deletable tuples.
1285
+ * Loop until no more truncating can be done.
1274
1286
*/
1275
- new_rel_pages = RelationGetNumberOfBlocks (onerel );
1276
- if (new_rel_pages != old_rel_pages )
1287
+ do
1277
1288
{
1278
1289
/*
1279
- * Note: we intentionally don't update vacrelstats->rel_pages with the
1280
- * new rel size here. If we did, it would amount to assuming that the
1281
- * new pages are empty, which is unlikely. Leaving the numbers alone
1282
- * amounts to assuming that the new pages have the same tuple density
1283
- * as existing ones, which is less unlikely .
1290
+ * We need full exclusive lock on the relation in order to do
1291
+ * truncation. If we can't get it, give up rather than waiting --- we
1292
+ * don't want to block other backends, and we don't want to deadlock
1293
+ * (which is quite possible considering we already hold a lower-grade
1294
+ * lock) .
1284
1295
*/
1285
- UnlockRelation (onerel , AccessExclusiveLock );
1286
- return ;
1287
- }
1296
+ vacrelstats -> lock_waiter_detected = false;
1297
+ lock_retry = 0 ;
1298
+ while (true)
1299
+ {
1300
+ if (ConditionalLockRelation (onerel , AccessExclusiveLock ))
1301
+ break ;
1288
1302
1289
- /*
1290
- * Scan backwards from the end to verify that the end pages actually
1291
- * contain no tuples. This is *necessary*, not optional, because other
1292
- * backends could have added tuples to these pages whilst we were
1293
- * vacuuming.
1294
- */
1295
- new_rel_pages = count_nondeletable_pages (onerel , vacrelstats );
1303
+ /*
1304
+ * Check for interrupts while trying to (re-)acquire the exclusive
1305
+ * lock.
1306
+ */
1307
+ CHECK_FOR_INTERRUPTS ();
1296
1308
1297
- if (new_rel_pages >= old_rel_pages )
1298
- {
1299
- /* can't do anything after all */
1300
- UnlockRelation (onerel , AccessExclusiveLock );
1301
- return ;
1302
- }
1309
+ if (++ lock_retry > (AUTOVACUUM_TRUNCATE_LOCK_TIMEOUT /
1310
+ AUTOVACUUM_TRUNCATE_LOCK_WAIT_INTERVAL ))
1311
+ {
1312
+ /*
1313
+ * We failed to establish the lock in the specified number of
1314
+ * retries. This means we give up truncating. Suppress the
1315
+ * ANALYZE step. Doing an ANALYZE at this point will reset the
1316
+ * dead_tuple_count in the stats collector, so we will not get
1317
+ * called by the autovacuum launcher again to do the truncate.
1318
+ */
1319
+ vacrelstats -> lock_waiter_detected = true;
1320
+ ereport (LOG ,
1321
+ (errmsg ("automatic vacuum of table \"%s.%s.%s\": "
1322
+ "cannot (re)acquire exclusive "
1323
+ "lock for truncate scan" ,
1324
+ get_database_name (MyDatabaseId ),
1325
+ get_namespace_name (RelationGetNamespace (onerel )),
1326
+ RelationGetRelationName (onerel ))));
1327
+ return ;
1328
+ }
1303
1329
1304
- /*
1305
- * Okay to truncate.
1306
- */
1307
- RelationTruncate (onerel , new_rel_pages );
1330
+ pg_usleep (AUTOVACUUM_TRUNCATE_LOCK_WAIT_INTERVAL );
1331
+ }
1308
1332
1309
- /*
1310
- * We can release the exclusive lock as soon as we have truncated. Other
1311
- * backends can't safely access the relation until they have processed the
1312
- * smgr invalidation that smgrtruncate sent out ... but that should happen
1313
- * as part of standard invalidation processing once they acquire lock on
1314
- * the relation.
1315
- */
1316
- UnlockRelation (onerel , AccessExclusiveLock );
1333
+ /*
1334
+ * Now that we have exclusive lock, look to see if the rel has grown
1335
+ * whilst we were vacuuming with non-exclusive lock. If so, give up;
1336
+ * the newly added pages presumably contain non-deletable tuples.
1337
+ */
1338
+ new_rel_pages = RelationGetNumberOfBlocks (onerel );
1339
+ if (new_rel_pages != old_rel_pages )
1340
+ {
1341
+ /*
1342
+ * Note: we intentionally don't update vacrelstats->rel_pages with
1343
+ * the new rel size here. If we did, it would amount to assuming
1344
+ * that the new pages are empty, which is unlikely. Leaving the
1345
+ * numbers alone amounts to assuming that the new pages have the
1346
+ * same tuple density as existing ones, which is less unlikely.
1347
+ */
1348
+ UnlockRelation (onerel , AccessExclusiveLock );
1349
+ return ;
1350
+ }
1317
1351
1318
- /*
1319
- * Update statistics. Here, it *is* correct to adjust rel_pages without
1320
- * also touching reltuples, since the tuple count wasn't changed by the
1321
- * truncation.
1322
- */
1323
- vacrelstats -> rel_pages = new_rel_pages ;
1324
- vacrelstats -> pages_remove
93C6
d = old_rel_pages - new_rel_pages ;
1352
+ /*
1353
+ * Scan backwards from the end to verify that the end pages actually
1354
+ * contain no tuples. This is *necessary*, not optional, because
1355
+ * other backends could have added tuples to these pages whilst we
1356
+ * were vacuuming.
1357
+ */
1358
+ new_rel_pages = count_nondeletable_pages ( onerel , vacrelstats ) ;
1325
1359
1326
- ereport (elevel ,
1327
- (errmsg ("\"%s\": truncated %u to %u pages" ,
1328
- RelationGetRelationName (onerel ),
1329
- old_rel_pages , new_rel_pages ),
1330
- errdetail ("%s." ,
1331
- pg_rusage_show (& ru0 ))));
1360
+ if (new_rel_pages >= old_rel_pages )
1361
+ {
1362
+ /* can't do anything after all */
1363
+ UnlockRelation (onerel , AccessExclusiveLock );
1364
+ return ;
1365
+ }
1366
+
1367
+ /*
1368
+ * Okay to truncate.
1369
+ */
1370
+ RelationTruncate (onerel , new_rel_pages );
1371
+
1372
+ /*
1373
+ * We can release the exclusive lock as soon as we have truncated.
1374
+ * Other backends can't safely access the relation until they have
1375
+ * processed the smgr invalidation that smgrtruncate sent out ... but
1376
+ * that should happen as part of standard invalidation processing once
1377
+ * they acquire lock on the relation.
1378
+ */
1379
+ UnlockRelation (onerel , AccessExclusiveLock );
1380
+
1381
+ /*
1382
+ * Update statistics. Here, it *is* correct to adjust rel_pages
1383
+ * without also touching reltuples, since the tuple count wasn't
1384
+ * changed by the truncation.
1385
+ */
1386
+ vacrelstats -> pages_removed += old_rel_pages - new_rel_pages ;
1387
+ vacrelstats -> rel_pages = new_rel_pages ;
1388
+
1389
+ ereport (elevel ,
1390
+ (errmsg ("\"%s\": truncated %u to %u pages" ,
1391
+ RelationGetRelationName (onerel ),
1392
+ old_rel_pages , new_rel_pages ),
1393
+ errdetail ("%s." ,
1394
+ pg_rusage_show (& ru0 ))));
1395
+ old_rel_pages = new_rel_pages ;
1396
+ } while (new_rel_pages > vacrelstats -> nonempty_pages &&
1397
+ vacrelstats -> lock_waiter_detected );
1332
1398
}
1333
1399
1334
1400
/*
@@ -1340,6 +1406,12 @@ static BlockNumber
1340
1406
count_nondeletable_pages (Relation onerel , LVRelStats * vacrelstats )
1341
1407
{
1342
1408
BlockNumber blkno ;
1409
+ instr_time starttime ;
1410
+ instr_time currenttime ;
1411
+ instr_time elapsed ;
1412
+
1413
+ /* Initialize the starttime if we check for conflicting lock requests */
1414
+ INSTR_TIME_SET_CURRENT (starttime );
1343
1415
1344
1416
/* Strange coding of loop control is needed because blkno is unsigned */
1345
1417
blkno = vacrelstats -> rel_pages ;
@@ -1351,6 +1423,36 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
1351
1423
maxoff ;
1352
1424
bool hastup ;
1353
1425
1426
+ /*
1427
+ * Check if another process requests a lock on our relation. We are
1428
+ * holding an AccessExclusiveLock here, so they will be waiting. We
1429
+ * only do this in autovacuum_truncate_lock_check millisecond
1430
+ * intervals, and we only check if that interval has elapsed once
1431
+ * every 32 blocks to keep the number of system calls and actual
1432
+ * shared lock table lookups to a minimum.
1433
+ */
1434
+ if ((blkno % 32 ) == 0 )
1435
+ {
1436
+ INSTR_TIME_SET_CURRENT (currenttime );
1437
+ elapsed = currenttime ;
1438
+ INSTR_TIME_SUBTRACT (elapsed , starttime );
1439
+ if ((INSTR_TIME_GET_MICROSEC (elapsed ) / 1000 )
1440
+ >= AUTOVACUUM_TRUNCATE_LOCK_CHECK_INTERVAL )
1441
+ {
1442
+ if (LockHasWaitersRelation (onerel , AccessExclusiveLock ))
1443
+ {
1444
+ ereport (elevel ,
1445
+ (errmsg ("\"%s\": suspending truncate "
1446
+ "due to conflicting lock request" ,
1447
+ RelationGetRelationName (onerel ))));
1448
+
1449
+ vacrelstats -> lock_waiter_detected = true;
1450
+ return blkno ;
1451
+ }
1452
+ starttime = currenttime ;
1453
+ }
1454
+ }
1455
+
1354
1456
/*
1355
1457
* We don't insert a vacuum delay point here, because we have an
1356
1458
* exclusive lock on the table which we want to hold for as short a
0 commit comments