8000 Fix improper repetition of previous results from a hashed aggregate. · s-monk/postgres@3570ea4 · GitHub
[go: up one dir, main page]

Skip to content

Commit 3570ea4

Browse files
committed
Fix improper repetition of previous results from a hashed aggregate.
ExecReScanAgg's check for whether it could re-use a previously calculated hashtable neglected the possibility that the Agg node might reference PARAM_EXEC Params that are not referenced by its input plan node. That's okay if the Params are in upper tlist or qual expressions; but if one appears in aggregate input expressions, then the hashtable contents need to be recomputed when the Param's value changes. To avoid unnecessary performance degradation in the case of a Param that isn't within an aggregate input, add logic to the planner to determine which Params are within aggregate inputs. This requires a new field in struct Agg, but fortunately we never write plans to disk, so this isn't an initdb-forcing change. Per report from Jeevan Chalke. This has been broken since forever, so back-patch to all supported branches. Andrew Gierth, with minor adjustments by me Report: <CAM2+6=VY8ykfLT5Q8vb9B6EbeBk-NGuLbT6seaQ+Fq4zXvrDcA@mail.gmail.com>
1 parent 9942376 commit 3570ea4

File tree

8 files changed

+102
-7
lines changed

8 files changed

+102
-7
lines changed

src/backend/executor/nodeAgg.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1905,13 +1905,14 @@ void
19051905
ExecReScanAgg(AggState *node)
19061906
{
19071907
ExprContext *econtext = node->ss.ps.ps_ExprContext;
1908+
Agg *aggnode = (Agg *) node->ss.ps.plan;
19081909
int aggno;
19091910

19101911
node->agg_done = false;
19111912

19121913
node->ss.ps.ps_TupFromTlist = false;
19131914

1914-
if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
1915+
if (aggnode->aggstrategy == AGG_HASHED)
19151916
{
19161917
/*
19171918
* In the hashed case, if we haven't yet built the hash table then we
@@ -1923,11 +1924,13 @@ ExecReScanAgg(AggState *node)
19231924
return;
19241925

19251926
/*
1926-
* If we do have the hash table and the subplan does not have any
1927-
* parameter changes, then we can just rescan the existing hash table;
1928-
* no need to build it again.
1927+
* If we do have the hash table, and the subplan does not have any
1928+
* parameter changes, and none of our own parameter changes affect
1929+
* input expressions of the aggregated functions, then we can just
1930+
* rescan the existing hash table; no need to build it again.
19291931
*/
1930-
if (node->ss.ps.lefttree->chgParam == NULL)
1932+
if (node->ss.ps.lefttree->chgParam == NULL &&
1933+
!bms_overlap(node->ss.ps.chgParam, aggnode->aggParams))
19311934
{
19321935
ResetTupleHashIterator(node->hashtable, &node->hashiter);
19331936
return;
@@ -1964,7 +1967,7 @@ ExecReScanAgg(AggState *node)
19641967
*/
19651968
MemoryContextResetAndDeleteChildren(node->aggcontext);
19661969

1967-
if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
1970+
if (aggnode->aggstrategy == AGG_HASHED)
19681971
{
19691972
/* Rebuild an empty hash table */
19701973
build_hash_table(node);

src/backend/nodes/copyfuncs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -769,6 +769,7 @@ _copyAgg(Agg *from)
769769
COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid));
770770
}
771771
COPY_SCALAR_FIELD(numGroups);
772+
COPY_BITMAPSET_FIELD(aggParams);
772773

773774
return newnode;
774775
}

src/backend/nodes/outfuncs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,7 @@ _outAgg(StringInfo str, Agg *node)
642642
appendStringInfo(str, " %u", node->grpOperators[i]);
643643

644644
WRITE_LONG_FIELD(numGroups);
645+
WRITE_BITMAPSET_FIELD(aggParams);
645646
}
646647

647648
static void

src/backend/optimizer/plan/createplan.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3940,6 +3940,7 @@ make_agg(PlannerInfo *root, List *tlist, List *qual,
39403940
node->grpColIdx = grpColIdx;
39413941
node->grpOperators = grpOperators;
39423942
node->numGroups = numGroups;
3943+
node->aggParams = NULL; /* SS_finalize_plan() will fill this */
39433944

39443945
copy_plan_costsize(plan, lefttree); /* only care about copying size */
39453946
cost_agg(&agg_path, root,

src/backend/optimizer/plan/subselect.c

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ static Bitmapset *finalize_plan(PlannerInfo *root,
8181
Bitmapset *valid_params,
8282
Bitmapset *scan_params);
8383
static bool finalize_primnode(Node *node, finalize_primnode_context *context);
84+
static bool finalize_agg_primnode(Node *node, finalize_primnode_context *context);
8485

8586

8687
/*
@@ -2351,6 +2352,29 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
23512352
locally_added_param);
23522353
break;
23532354

2355+
case T_Agg:
2356+
{
2357+
Agg *agg = (Agg *) plan;
2358+
2359+
/*
2360+
* AGG_HASHED plans need to know which Params are referenced
2361+
* in aggregate calls. Do a separate scan to identify them.
2362+
*/
2363+
if (agg->aggstrategy == AGG_HASHED)
2364+
{
2365+
finalize_primnode_context aggcontext;
2366+
2367+
aggcontext.root = root;
2368+
aggcontext.paramids = NULL;
2369+
finalize_agg_primnode((Node *) agg->plan.targetlist,
2370+
&aggcontext);
2371+
finalize_agg_primnode((Node *) agg->plan.qual,
2372+
&aggcontext);
2373+
agg->aggParams = aggcontext.paramids;
2374+
}
2375+
}
2376+
break;
2377+
23542378
case T_WindowAgg:
23552379
finalize_primnode(((WindowAgg *) plan)->startOffset,
23562380
&context);
@@ -2359,7 +2383,6 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
23592383
break;
23602384

23612385
case T_Hash:
2362-
case T_Agg:
23632386
case T_Material:
23642387
case T_Sort:
23652388
case T_Unique:
@@ -2503,6 +2526,28 @@ finalize_primnode(Node *node, finalize_primnode_context *context)
25032526
(void *) context);
25042527
}
25052528

2529+
/*
2530+
* finalize_agg_primnode: find all Aggref nodes in the given expression tree,
2531+
* and add IDs of all PARAM_EXEC params appearing within their aggregated
2532+
* arguments to the result set.
2533+
*/
2534+
static bool
2535+
finalize_agg_primnode(Node *node, finalize_primnode_context *context)
2536+
{
2537+
if (node == NULL)
2538+
return false;
2539+
if (IsA(node, Aggref))
2540+
{
2541+
Aggref *agg = (Aggref *) node;
2542+
2543+
/* we should not consider the direct arguments, if any */
2544+
finalize_primnode((Node *) agg->args, context);
2545+
return false; /* there can't be any Aggrefs below here */
2546+
}
2547+
return expression_tree_walker(node, finalize_agg_primnode,
2548+
(void *) context);
2549+
}
2550+
25062551
/*
25072552
* SS_make_initplan_from_plan - given a plan tree, make it an InitPlan
25082553
*

src/include/nodes/plannodes.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -604,6 +604,8 @@ typedef struct Agg
604604
AttrNumber *grpColIdx; /* their indexes in the target list */
605605
Oid *grpOperators; /* equality operators to compare with */
606606
long numGroups; /* estimated number of groups in input */
607+
Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */
608+
/* Note: planner provides numGroups & aggParams only in AGG_HASHED case */
607609
} Agg;
608610

609611
/* ----------------

src/test/regress/expected/aggregates.out

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,38 @@ from tenk1 o;
305305
9999
306306
(1 row)
307307

308+
-- Test handling of Params within aggregate arguments in hashed aggregation.
309+
-- Per bug report from Jeevan Chalke.
310+
explain (verbose, costs off)
311+
select array(select sum(x+y) s
312+
from generate_series(1,3) y group by y order by s)
313+
from generate_series(1,3) x;
314+
QUERY PLAN
315+
-------------------------------------------------------------------
316+
Function Scan on pg_catalog.generate_series x
317+
Output: (SubPlan 1)
318+
Function Call: generate_series(1, 3)
319+
SubPlan 1
320+
-> Sort
321+
Output: (sum(($0 + y.y))), y.y
322+
Sort Key: (sum(($0 + y.y)))
323+
-> HashAggregate
324+
Output: sum((x.x + y.y)), y.y
325+
-> Function Scan on pg_catalog.generate_series y
326+
Output: y.y
327+
Function Call: generate_series(1, 3)
328+
(12 rows)
329+
330+
select array(select sum(x+y) s
331+
from generate_series(1,3) y group by y order by s)
332+
from generate_series(1,3) x;
333+
?column?
334+
----------
335+
{2,3,4}
336+
{3,4,5}
337+
{4,5,6}
338+
(3 rows)
339+
308340
--
309341
-- test for bitwise integer aggregates
310342
--

src/test/regress/sql/aggregates.sql

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,16 @@ select
8686
(select max((select i.unique2 from tenk1 i where i.unique1 = o.unique1)))
8787
from tenk1 o;
8888

89+
-- Test handling of Params within aggregate arguments in hashed aggregation.
90+
-- Per bug report from Jeevan Chalke.
91+
explain (verbose, costs off)
92+
select array(select sum(x+y) s
93+
from generate_series(1,3) y group by y order by s)
94+
from generate_series(1,3) x;
95+
select array(select sum(x+y) s
96+
from generate_series(1,3) y group by y order by s)
97+
from generate_series(1,3) x;
98+
8999
--
90100
-- test for bitwise integer aggregates
91101
--

0 commit comments

Comments
 (0)
0