8000 Fix improper repetition of previous results from a hashed aggregate. · s-monk/postgres@08a823e · GitHub
[go: up one dir, main page]

Skip to content

Commit 08a823e

Browse files
committed
Fix improper repetition of previous results from a hashed aggregate.
ExecReScanAgg's check for whether it could re-use a previously calculated hashtable neglected the possibility that the Agg node might reference PARAM_EXEC Params that are not referenced by its input plan node. That's okay if the Params are in upper tlist or qual expressions; but if one appears in aggregate input expressions, then the hashtable contents need to be recomputed when the Param's value changes. To avoid unnecessary performance degradation in the case of a Param that isn't within an aggregate input, add logic to the planner to determine which Params are within aggregate inputs. This requires a new field in struct Agg, but fortunately we never write plans to disk, so this isn't an initdb-forcing change. Per report from Jeevan Chalke. This has been broken since forever, so back-patch to all supported branches. Andrew Gierth, with minor adjustments by me Report: <CAM2+6=VY8ykfLT5Q8vb9B6EbeBk-NGuLbT6seaQ+Fq4zXvrDcA@mail.gmail.com>
1 parent 07d4723 commit 08a823e

File tree

8 files changed

+158
-7
lines changed

8 files changed

+158
-7
lines changed

src/backend/executor/nodeAgg.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2049,13 +2049,14 @@ void
20492049
ExecReScanAgg(AggState *node)
20502050
{
20512051
ExprContext *econtext = node->ss.ps.ps_ExprContext;
2052+
Agg *aggnode = (Agg *) node->ss.ps.plan;
20522053
int aggno;
20532054

20542055
node->agg_done = false;
20552056

20562057
node->ss.ps.ps_TupFromTlist = false;
20572058

2058-
if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
2059+
if (aggnode->aggstrategy == AGG_HASHED)
20592060
{
20602061
/*
20612062
* In the hashed case, if we haven't yet built the hash table then we
@@ -2067,11 +2068,13 @@ ExecReScanAgg(AggState *node)
20672068
return;
20682069

20692070
/*
2070-
* If we do have the hash table and the subplan does not have any
2071-
* parameter changes, then we can just rescan the existing hash table;
2072-
* no need to build it again.
2071+
* If we do have the hash table, and the subplan does not have any
2072+
* parameter changes, and none of our own parameter changes affect
2073+
* input expressions of the aggregated functions, then we can just
2074+
* rescan the existing hash table; no need to build it again.
20732075
*/
2074-
if (node->ss.ps.lefttree->chgParam == NULL)
2076+
if (node->ss.ps.lefttree->chgParam == NULL &&
2077+
!bms_overlap(node->ss.ps.chgParam, aggnode->aggParams))
20752078
{
20762079
ResetTupleHashIterator(node->hashtable, &node->hashiter);
20772080
return;
@@ -2110,7 +2113,7 @@ ExecReScanAgg(AggState *node)
21102113
*/
21112114
MemoryContextResetAndDeleteChildren(node->aggcontext);
21122115

2113-
if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
2116+
if (aggnode->aggstrategy == AGG_HASHED)
21142117
{
21152118
/* Rebuild an empty hash table */
21162119
build_hash_table(node);

src/backend/nodes/copyfuncs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -779,6 +779,7 @@ _copyAgg(const Agg *from)
779779
COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid));
780780
}
781781
COPY_SCALAR_FIELD(numGroups);
782+
COPY_BITMAPSET_FIELD(aggParams);
782783

783784
return newnode;
784785
}

src/backend/nodes/outfuncs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,7 @@ _outAgg(StringInfo str, const Agg *node)
643643
appendStringInfo(str, " %u", node->grpOperators[i]);
644644

645645
WRITE_LONG_FIELD(numGroups);
646+
WRITE_BITMAPSET_FIELD(aggParams);
646647
}
647648

648649
static void

src/backend/optimizer/plan/createplan.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4299,6 +4299,7 @@ make_agg(PlannerInfo *root, List *tlist, List *qual,
42994299
node->grpColIdx = grpColIdx;
43004300
node->grpOperators = grpOperators;
43014301
node->numGroups = numGroups;
4302+
node->aggParams = NULL; /* SS_finalize_plan() will fill this */
43024303

43034304
copy_plan_costsize(plan, lefttree); /* only care about copying size */
43044305
cost_agg(&agg_path, root,

src/backend/optimizer/plan/subselect.c

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ static Bitmapset *finalize_plan(PlannerInfo *root,
8080
Bitmapset *valid_params,
8181
Bitmapset *scan_params);
8282
static bool finalize_primnode(Node *node, finalize_primnode_context *context);
83+
static bool finalize_agg_primnode(Node *node, finalize_primnode_context *context);
8384

8485

8586
/*
@@ -2380,6 +2381,29 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
23802381
locally_added_param);
23812382
break;
23822383

2384+
case T_Agg:
2385+
{
2386+
Agg *agg = (Agg *) plan;
2387+
2388+
/*
2389+
* AGG_HASHED plans need to know which Params are referenced
2390+
* in aggregate calls. Do a separate scan to identify them.
2391+
*/
2392+
if (agg->aggstrategy == AGG_HASHED)
2393+
{
2394+
finalize_primnode_context aggcontext;
2395+
2396+
aggcontext.root = root;
2397+
aggcontext.paramids = NULL;
2398+
finalize_agg_primnode((Node *) agg->plan.targetlist,
2399+
&aggcontext);
2400+
finalize_agg_primnode((Node *) agg->plan.qual,
2401+
&aggcontext);
2402+
agg->aggParams = aggcontext.paramids;
2403+
}
2404+
}
2405+
break;
2406+
23832407
case T_WindowAgg:
23842408
finalize_primnode(((WindowAgg *) plan)->startOffset,
23852409
&context);
@@ -2388,7 +2412,6 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
23882412
break;
23892413

23902414
case T_Hash:
2391-
case T_Agg:
23922415
case T_Material:
23932416
case T_Sort:
23942417
case T_Unique:
@@ -2532,6 +2555,29 @@ finalize_primnode(Node *node, finalize_primnode_context *context)
25322555
(void *) context);
25332556
}
25342557

2558+
/*
2559+
* finalize_agg_primnode: find all Aggref nodes in the given expression tree,
2560+
* and add IDs of all PARAM_EXEC params appearing within their aggregated
2561+
* arguments to the result set.
2562+
*/
2563+
static bool
2564+
finalize_agg_primnode(Node *node, finalize_primnode_context *context)
2565+
{
2566+
if (node == NULL)
2567+
return false;
2568+
if (IsA(node, Aggref))
2569+
{
2570+
Aggref *agg = (Aggref *) node;
2571+
2572+
/* we should not consider the direct arguments, if any */
2573+
finalize_primnode((Node *) agg->args, context);
2574+
finalize_primnode((Node *) agg->aggfilter, context);
2575+
return false; /* there can't be any Aggrefs below here */
2576+
}
2577+
return expression_tree_walker(node, finalize_agg_primnode,
2578+
(void *) context);
2579+
}
2580+
25352581
/*
25362582
* SS_make_initplan_from_plan - given a plan tree, make it an InitPlan
25372583
*

src/include/nodes/plannodes.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -631,6 +631,8 @@ typedef struct Agg
631631
AttrNumber *grpColIdx; /* their indexes in the target list */
632632
Oid *grpOperators; /* equality operators to compare with */
633633
long numGroups; /* estimated number of groups in input */
634+
Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */
635+
/* Note: planner provides numGroups & aggParams only in AGG_HASHED case */
634636
} Agg;
635637

636638
/* ----------------

src/test/regress/expected/aggregates.out

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,81 @@ from tenk1 o;
366366
9999
367367
(1 row)
368368

369+
-- Test handling of Params within aggregate arguments in hashed aggregation.
370+
-- Per bug report from Jeevan Chalke.
371+
explain (verbose, costs off)
372+
select s1, s2, sm
373+
from generate_series(1, 3) s1,
374+
lateral (select s2, sum(s1 + s2) sm
375+
from generate_series(1, 3) s2 group by s2) ss
376+
order by 1, 2;
377+
QUERY PLAN
378+
------------------------------------------------------------------
379+
Sort
380+
Output: s1.s1, s2.s2, (sum((s1.s1 + s2.s2)))
381+
Sort Key: s1.s1, s2.s2
382+
-> Nested Loop
383+
Output: s1.s1, s2.s2, (sum((s1.s1 + s2.s2)))
384+
-> Function Scan on pg_catalog.generate_series s1
385+
Output: s1.s1
386+
Function Call: generate_series(1, 3)
387+
-> HashAggregate
388+
Output: s2.s2, sum((s1.s1 + s2.s2))
389+
Group Key: s2.s2
390+
-> Function Scan on pg_catalog.generate_series s2
391+
Output: s2.s2
392+
Function Call: generate_series(1, 3)
393+
(14 rows)
394+
395+
select s1, s2, sm
396+
from generate_series(1, 3) s1,
397+
lateral (select s2, sum(s1 + s2) sm
398+
from generate_series(1, 3) s2 group by s2) ss
399+
order by 1, 2;
400+
s1 | s2 | sm
401+
----+----+----
402+
1 | 1 | 2
403+
1 | 2 | 3
404+
1 | 3 | 4
405+
2 | 1 | 3
406+
2 | 2 | 4
407+
2 | 3 | 5
408+
3 | 1 | 4
409+
3 | 2 | 5
410+
3 | 3 | 6
411+
(9 rows)
412+
413+
explain (verbose, costs off)
414+
select array(select sum(x+y) s
415+
from generate_series(1,3) y group by y order by s)
416+
from generate_series(1,3) x;
417+
QUERY PLAN
418+
-------------------------------------------------------------------
419+
Function Scan on pg_catalog.generate_series x
420+
Output: (SubPlan 1)
421+
Function Call: generate_series(1, 3)
422+
SubPlan 1
423+
-> Sort
424+
Output: (sum((x.x + y.y))), y.y
425+
Sort Key: (sum((x.x + y.y)))
426+
-> HashAggregate
427+
Output: sum((x.x + y.y)), y.y
428+
Group Key: y.y
429+
-> Function Scan on pg_catalog.generate_series y
430+
Output: y.y
431+
Function Call: generate_series(1, 3)
432+
(13 rows)
433+
434+
select array(select sum(x+y) s
435+
from generate_series(1,3) y group by y order by s)
436+
from generate_series(1,3) x;
437+
array
438+
---------
439+
{2,3,4}
440+
{3,4,5}
441+
{4,5,6}
442+
(3 rows)
443+
369444
--
370445
-- test for bitwise integer aggregates
371446
--

src/test/regress/sql/aggregates.sql

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,28 @@ select
9898
(select max((select i.unique2 from tenk1 i where i.unique1 = o.unique1)))
9999
from tenk1 o;
100100

101+
-- Test handling of Params within aggregate arguments in hashed aggregation.
102+
-- Per bug report from Jeevan Chalke.
103+
explain (verbose, costs off)
104+
select s1, s2, sm
105+
from generate_series(1, 3) s1,
106+
lateral (select s2, sum(s1 + s2) sm
107+
from generate_series(1, 3) s2 group by s2) ss
108+
order by 1, 2;
109+
select s1, s2, sm
110+
from generate_series(1, 3) s1,
111+
lateral (select s2, sum(s1 + s2) sm
112+
from generate_series(1, 3) s2 group by s2) ss
113+
order by 1, 2;
114+
115+
explain (verbose, costs off)
116+
select array(select sum(x+y) s
117+
from generate_series(1,3) y group by y order by s)
118+
from generate_series(1,3) x;
119+
select array(select sum(x+y) s
120+
from generate_series(1,3) y group by y order by s)
121+
from generate_series(1,3) x;
122+
101123
--
102124
-- test for bitwise integer aggregates
103125
--

0 commit comments

Comments
 (0)
0