8000 Fix improper repetition of previous results from a hashed aggregate. · s-monk/postgres@2376638 · GitHub
[go: up one dir, main page]

Skip to content

Commit 2376638

Browse files
committed
Fix improper repetition of previous results from a hashed aggregate.
ExecReScanAgg's check for whether it could re-use a previously calculated hashtable neglected the possibility that the Agg node might reference PARAM_EXEC Params that are not referenced by its input plan node. That's okay if the Params are in upper tlist or qual expressions; but if one appears in aggregate input expressions, then the hashtable contents need to be recomputed when the Param's value changes. To avoid unnecessary performance degradation in the case of a Param that isn't within an aggregate input, add logic to the planner to determine which Params are within aggregate inputs. This requires a new field in struct Agg, but fortunately we never write plans to disk, so this isn't an initdb-forcing change. Per report from Jeevan Chalke. This has been broken since forever, so back-patch to all supported branches. Andrew Gierth, with minor adjustments by me Report: <CAM2+6=VY8ykfLT5Q8vb9B6EbeBk-NGuLbT6seaQ+Fq4zXvrDcA@mail.gmail 8000 .com>
1 parent 35982db commit 2376638

File tree

8 files changed

+102
-7
lines changed

8 files changed

+102
-7
lines changed

src/backend/executor/nodeAgg.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1909,13 +1909,14 @@ void
19091909
ExecReScanAgg(AggState *node)
19101910
{
19111911
ExprContext *econtext = node->ss.ps.ps_ExprContext;
1912+
Agg *aggnode = (Agg *) node->ss.ps.plan;
19121913
int aggno;
19131914

19141915
node->agg_done = false;
19151916

19161917
node->ss.ps.ps_TupFromTlist = false;
19171918

1918-
if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
1919+
if (aggnode->aggstrategy == AGG_HASHED)
19191920
{
19201921
/*
19211922
* In the hashed case, if we haven't yet built the hash table then we
@@ -1927,11 +1928,13 @@ ExecReScanAgg(AggState *node)
19271928
return;
19281929

19291930
/*
1930-
* If we do have the hash table and the subplan does not have any
1931-
* parameter changes, then we can just rescan the existing hash table;
1932-
* no need to build it again.
1931+
* If we do have the hash table, and the subplan does not have any
1932+
* parameter changes, and none of our own parameter changes affect
1933+
* input expressions of the aggregated functions, then we can just
1934+
* rescan the existing hash table; no need to build it again.
19331935
*/
1934-
if (node->ss.ps.lefttree->chgParam == NULL)
1936+
if (node->ss.ps.lefttree->chgParam == NULL &&
1937+
!bms_overlap(node->ss.ps.chgParam, aggnode->aggParams))
19351938
{
19361939
ResetTupleHashIterator(node->hashtable, &node->hashiter);
19371940
return;
@@ -1968,7 +1971,7 @@ ExecReScanAgg(AggState *node)
19681971
*/
19691972
MemoryContextResetAndDeleteChildren(node->aggcontext);
19701973

1971-
if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
1974+
if (aggnode->aggstrategy == AGG_HASHED)
19721975
{
19731976
/* Rebuild an empty hash table */
19741977
build_hash_table(node);

src/backend/nodes/copyfuncs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -780,6 +780,7 @@ _copyAgg(const Agg *from)
780780
COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid));
781781
}
782782
COPY_SCALAR_FIELD(numGroups);
783+
COPY_BITMAPSET_FIELD(aggParams);
783784

784785
return newnode;
785786
}

src/backend/nodes/outfuncs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -644,6 +644,7 @@ _outAgg(StringInfo str, const Agg *node)
644644
appendStringInfo(str, " %u", node->grpOperators[i]);
645645

646646
WRITE_LONG_FIELD(numGroups);
647+
WRITE_BITMAPSET_FIELD(aggParams);
647648
}
648649

649650
static void

src/backend/optimizer/plan/createplan.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4152,6 +4152,7 @@ make_agg(PlannerInfo *root, List *tlist, List *qual,
41524152
node->grpColIdx = grpColIdx;
41534153
node->grpOperators = grpOperators;
41544154
node->numGroups = numGroups;
4155+
node->aggParams = NULL; /* SS_finalize_plan() will fill this */
41554156

41564157
copy_plan_costsize(plan, lefttree); /* only care about copying size */
41574158
cost_agg(&agg_path, root,

src/backend/optimizer/plan/subselect.c

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ static Bitmapset *finalize_plan(PlannerInfo *root,
7979
Bitmapset *valid_params,
8080
Bitmapset *scan_params);
8181
static bool finalize_primnode(Node *node, finalize_primnode_context *context);
82+
static bool finalize_agg_primnode(Node *node, finalize_primnode_context *context);
8283

8384

8485
/*
@@ -2354,6 +2355,29 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
23542355
locally_added_param);
23552356
break;
23562357

2358+
case T_Agg:
2359+
{
2360+
Agg *agg = (Agg *) plan;
2361+
2362+
/*
2363+
* AGG_HASHED plans need to know which Params are referenced
2364+
* in aggregate calls. Do a separate scan to identify them.
2365+
*/
2366+
if (agg->aggstrategy == AGG_HASHED)
2367+
{
2368+
finalize_primnode_context aggcontext;
2369+
2370+
aggcontext.root = root;
2371+
aggcontext.paramids = NULL;
2372+
finalize_agg_primnode((Node *) agg->plan.targetlist,
2373+
&aggcontext);
2374+
finalize_agg_primnode((Node *) agg->plan.qual,
2375+
&aggcontext);
2376+
agg->aggParams = aggcontext.paramids;
2377+
}
2378+
}
2379+
break;
2380+
23572381
case T_WindowAgg:
23582382
finalize_primnode(((WindowAgg *) plan)->startOffset,
23592383
&context);
@@ -2362,7 +2386,6 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
23622386
break;
23632387

23642388
case T_Hash:
2365-
case T_Agg:
23662389
case T_Material:
23672390
case T_Sort:
23682391
case T_Unique:
@@ -2506,6 +2529,28 @@ finalize_primnode(Node *node, finalize_primnode_context *context)
25062529
(void *) context);
25072530
}
25082531

2532+
/*
2533+
* finalize_agg_primnode: find all Aggref nodes in the given expression tree,
2534+
* and add IDs of all PARAM_EXEC params appearing within their aggregated
2535+
* arguments to the result set.
2536+
*/
2537+
static bool
2538+
finalize_agg_primnode(Node *node, finalize_primnode_context *context)
2539+
{
2540+
if (node == NULL)
2541+
return false;
2542+
if (IsA(node, Aggref))
2543+
{
2544+
Aggref *agg = (Aggref *) node;
2545+
2546+
/* we should not consider the direct arguments, if any */
2547+
finalize_primnode((Node *) agg->args, context);
2548+
return false; /* there can't be any Aggrefs below here */
2549+
}
2550+
return expression_tree_walker(node, finalize_agg_primnode,
2551+
(void *) context);
2552+
}
2553+
25092554
/*
25102555
* SS_make_initplan_from_plan - given a plan tree, make it an InitPlan
25112556
*

src/include/nodes/plannodes.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -632,6 +632,8 @@ typedef struct Agg
632632
AttrNumber *grpColIdx; /* their indexes in the target list */
633633
Oid *grpOperators; /* equality operators to compare with */
634634
long numGroups; /* estimated number of groups in input */
635+
Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */
636+
/* Note: planner provides numGroups & aggParams only in AGG_HASHED case */
635637
} Agg;
636638

637639
/* ----------------

src/test/regress/expected/aggregates.out

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,38 @@ from tenk1 o;
305305
9999
306306
(1 row)
307307

308+
-- Test handling of Params within aggregate arguments in hashed aggregation.
309+
-- Per bug report from Jeevan Chalke.
310+
explain (verbose, costs off)
311+
select array(select sum(x+y) s
312+
from generate_series(1,3) y group by y order by s)
313+
from generate_series(1,3) x;
314+
QUERY PLAN
315+
-------------------------------------------------------------------
316+
Function Scan on pg_catalog.generate_series x
317+
Output: (SubPlan 1)
318+
Function Call: generate_series(1, 3)
319+
SubPlan 1
320+
-> Sort
321+
Output: (sum(($0 + y.y))), y.y
322+
Sort Key: (sum(($0 + y.y)))
323+
-> HashAggregate
324+
Output: sum((x.x + y.y)), y.y
325+
-> Function Scan on pg_catalog.generate_series y
326+
Output: y.y
327+
Function Call: generate_series(1, 3)
328+
(12 rows)
329+
330+
select array(select sum(x+y) s
331+
from generate_series(1,3) y group by y order by s)
332+
from generate_series(1,3) x;
333+
array
334+
---------
335+
{2,3,4}
336+
{3,4,5}
337+
{4,5,6}
338+
(3 rows)
339+
308340
--
309341
-- test for bitwise integer aggregates
310342
--

src/test/regress/sql/aggregates.sql

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,16 @@ select
8686
(select max((select i.unique2 from tenk1 i where i.unique1 = o.unique1)))
8787
from tenk1 o;
8888

89+
-- Test handling of Params within aggregate arguments in hashed aggregation.
90+
-- Per bug report from Jeevan Chalke.
91+
explain (verbose, costs off)
92+
select array(select sum(x+y) s
93+
from generate_series(1,3) y group by y order by s)
94+
from generate_series(1,3) x;
95+
select array(select sum(x+y) s
96+
from generate_series(1,3) y group by y order by s)
97+
from generate_series(1,3) x;
98+
8999
--
90100
-- test for bitwise integer aggregates
91101
--

0 commit comments

Comments
 (0)
0