8000 Add defenses against integer overflow in dynahash numbuckets calculat… · mwarnock/postgres@b7ef58a · GitHub
[go: up one dir, main page]

Skip to content

Commit b7ef58a

Browse files
committed
Add defenses against integer overflow in dynahash numbuckets calculations.
The dynahash code requires the number of buckets in a hash table to fit in an int; but since we calculate the desired hash table size dynamically, there are various scenarios where we might calculate too large a value. The resulting overflow can lead to infinite loops, division-by-zero crashes, etc. I (tgl) had previously installed some defenses against that in commit 299d171, but that covered only one call path. Moreover it worked by limiting the request size to work_mem, but in a 64-bit machine it's possible to set work_mem high enough that the problem appears anyway. So let's fix the problem at the root by installing limits in the dynahash.c functions themselves. Trouble report and patch by Jeff Davis.
1 parent 175f7a3 commit b7ef58a

File tree

2 files changed

+41
-12
lines changed

2 files changed

+41
-12
lines changed

src/backend/executor/nodeHash.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,9 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
431431
* Both nbuckets and nbatch must be powers of 2 to make
432432
* ExecHashGetBucketAndBatch fast. We already fixed nbatch; now inflate
433433
* nbuckets to the next larger power of 2. We also force nbuckets to not
434-
* be real small, by starting the search at 2^10.
434+
* be real small, by starting the search at 2^10. (Note: above we made
435+
* sure that nbuckets is not more than INT_MAX / 2, so this loop cannot
436+
* overflow, nor can the final shift to recalculate nbuckets.)
435437
*/
436438
i = 10;
437439
while ((1 << i) < nbuckets)

src/backend/utils/hash/dynahash.c

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@
6363

6464
#include "postgres.h"
6565

66+
#include <limits.h>
67+
6668
#include "access/xact.h"
6769
#include "storage/shmem.h"
6870
#include "storage/spin.h"
@@ -199,6 +201,8 @@ static void hdefault(HTAB *hashp);
199201
static int choose_nelem_alloc(Size entrysize);
200202
static bool init_htab(HTAB *hashp, long nelem);
201203
static void hash_corrupted(HTAB *hashp);
204+
static long next_pow2_long(long num);
205+
static int next_pow2_int(long num);
202206
static void register_seq_scan(HTAB *hashp);
203207
static void deregister_seq_scan(HTAB *hashp);
204208
static bool has_seq_scans(HTAB *hashp);
@@ -373,8 +377,13 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
373377
{
374378
/* Doesn't make sense to partition a local hash table */
375379
Assert(flags & HASH_SHARED_MEM);
376-
/* # of partitions had better be a power of 2 */
377-
Assert(info->num_partitions == (1L << my_log2(info->num_partitions)));
380+
381+
/*
382+
* The number of partitions had better be a power of 2. Also, it must
383+
* be less than INT_MAX (see init_htab()), so call the int version of
384+
* next_pow2.
385+
*/
386+
Assert(info->num_partitions == next_pow2_int(info->num_partitions));
378387

379388
hctl->num_partitions = info->num_partitions;
380389
}
@@ -515,7 +524,6 @@ init_htab(HTAB *hashp, long nelem)
515524
{
516525
HASHHDR *hctl = hashp->hctl;
517526
HASHSEGMENT *segp;
518-
long lnbuckets;
519527
int nbuckets;
520528
int nsegs;
521529

@@ -530,9 +538,7 @@ init_htab(HTAB *hashp, long nelem)
530538
* number of buckets. Allocate space for the next greater power of two
531539
* number of buckets
532540
*/
533-
lnbuckets = (nelem - 1) / hctl->ffactor + 1;
534-
535-
nbuckets = 1 << my_log2(lnbuckets);
541+
nbuckets = next_pow2_int((nelem - 1) / hctl->ffactor + 1);
536542

537543
/*
538544
* In a partitioned table, nbuckets must be at least equal to
@@ -550,7 +556,7 @@ init_htab(HTAB *hashp, long nelem)
550556
* Figure number of directory segments needed, round up to a power of 2
551557
*/
552558
nsegs = (nbuckets - 1) / hctl->ssize + 1;
553-
nsegs = 1 << my_log2(nsegs);
559+
nsegs = next_pow2_int(nsegs);
554560

555561
/*
556562
* Make sure directory is big enough. If pre-allocated directory is too
@@ -620,9 +626,9 @@ hash_estimate_size(long num_entries, Size entrysize)
620626
elementAllocCnt;
621627

622628
/* estimate number of buckets wanted */
623-
nBuckets = 1L << my_log2((num_entries - 1) / DEF_FFACTOR + 1);
629+
nBuckets = next_pow2_long((num_entries - 1) / DEF_FFACTOR + 1);
624630
/* # of segments needed for nBuckets */
625-
nSegments = 1L << my_log2((nBuckets - 1) / DEF_SEGSIZE + 1);
631+
nSegments = next_pow2_long((nBuckets - 1) / DEF_SEGSIZE + 1);
626632
/* directory entries */
627633
nDirEntries = DEF_DIRSIZE;
628634
while (nDirEntries < nSegments)
@@ -663,9 +669,9 @@ hash_select_dirsize(long num_entries)
663669
nDirEntries;
664670

665671
/* estimate number of buckets wanted */
666-
nBuckets = 1L << my_log2((num_entries - 1) / DEF_FFACTOR + 1);
672+
nBuckets = next_pow2_long((num_entries - 1) / DEF_FFACTOR + 1);
667673
/* # of segments needed for nBuckets */
668-
nSegments = 1L << my_log2((nBuckets - 1) / DEF_SEGSIZE + 1);
674+
nSegments = next_pow2_long((nBuckets - 1) / DEF_SEGSIZE + 1);
669675
/* directory entries */
670676
nDirEntries = DEF_DIRSIZE;
671677
while (nDirEntries < nSegments)
@@ -1397,11 +1403,32 @@ my_log2(long num)
13971403
int i;
13981404
long limit;
13991405

1406+
/* guard against too-large input, which would put us into infinite loop */
1407+
if (num > LONG_MAX / 2)
1408+
num = LONG_MAX / 2;
1409+
14001410
for (i = 0, limit = 1; limit < num; i++, limit <<= 1)
14011411
;
14021412
return i;
14031413
}
14041414

1415+
/* calculate first power of 2 >= num, bounded to what will fit in a long */
1416+
static long
1417+
next_pow2_long(long num)
1418+
{
1419+
/* my_log2's internal range check is sufficient */
1420+
return 1L << my_log2(num);
1421+
}
1422+
1423+
/* calculate first power of 2 >= num, bounded to what will fit in an int */
1424+
static int
1425+
next_pow2_int(long num)
1426+
{
1427+
if (num > INT_MAX / 2)
1428+
num = INT_MAX / 2;
1429+
return 1 << my_log2(num);
1430+
}
1431+
14051432

14061433
/************************* SEQ SCAN TRACKING ************************/
14071434

0 commit comments

Comments
 (0)
0