8000 Fix improper repetition of previous results from a hashed aggregate. · patchsoft/postgres@25fe5f7 · GitHub
[go: up one dir, main page]

Skip to content

Commit 25fe5f7

Browse files
committed
Fix improper repetition of previous results from a hashed aggregate.
ExecReScanAgg's check for whether it could re-use a previously calculated hashtable neglected the possibility that the Agg node might reference PARAM_EXEC Params that are not referenced by its input plan node. That's okay if the Params are in upper tlist or qual expressions; 8000 but if one appears in aggregate input expressions, then the hashtable contents need to be recomputed when the Param's value changes. To avoid unnecessary performance degradation in the case of a Param that isn't within an aggregate input, add logic to the planner to determine which Params are within aggregate inputs. This requires a new field in struct Agg, but fortunately we never write plans to disk, so this isn't an initdb-forcing change. Per report from Jeevan Chalke. This has been broken since forever, so back-patch to all supported branches. Andrew Gierth, with minor adjustments by me Report: <CAM2+6=VY8ykfLT5Q8vb9B6EbeBk-NGuLbT6seaQ+Fq4zXvrDcA@mail.gmail.com>
1 parent da9659f commit 25fe5f7

File tree

8 files changed

+155
-6
lines changed
  • include/nodes
  • test/regress
  • 8 files changed

    +155
    -6
    lines changed

    src/backend/executor/nodeAgg.c

    Lines changed: 6 additions & 4 deletions
    Original file line numberDiff line numberDiff line change
    @@ -2666,11 +2666,13 @@ ExecReScanAgg(AggState *node)
    26662666
    return;
    26672667

    26682668
    /*
    2669-
    * If we do have the hash table and the subplan does not have any
    2670-
    * parameter changes, then we can just rescan the existing hash table;
    2671-
    * no need to build it again.
    2669+
    * If we do have the hash table, and the subplan does not have any
    2670+
    * parameter changes, and none of our own parameter changes affect
    2671+
    * input expressions of the aggregated functions, then we can just
    2672+
    * rescan the existing hash table; no need to build it again.
    26722673
    */
    2673-
    if (outerPlan->chgParam == NULL)
    2674+
    if (outerPlan->chgParam == NULL &&
    2675+
    !bms_overlap(node->ss.ps.chgParam, aggnode->aggParams))
    26742676
    {
    26752677
    ResetTupleHashIterator(node->hashtable, &node->hashiter);
    26762678
    return;

    src/backend/nodes/copyfuncs.c

    Lines changed: 1 addition & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -846,6 +846,7 @@ _copyAgg(const Agg *from)
    846846< 8000 code class="diff-text syntax-highlighted-line">
    COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid));
    847847
    }
    848848
    COPY_SCALAR_FIELD(numGroups);
    849+
    COPY_BITMAPSET_FIELD(aggParams);
    849850
    COPY_NODE_FIELD(groupingSets);
    850851
    COPY_NODE_FIELD(chain);
    851852

    src/backend/nodes/outfuncs.c

    Lines changed: 1 addition & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -683,7 +683,7 @@ _outAgg(StringInfo str, const Agg *node)
    683683
    appendStringInfo(str, " %u", node->grpOperators[i]);
    684684

    685685
    WRITE_LONG_FIELD(numGroups);
    686-
    686+
    WRITE_BITMAPSET_FIELD(aggParams);
    687687
    WRITE_NODE_FIELD(groupingSets);
    688688
    WRITE_NODE_FIELD(chain);
    689689
    }

    src/backend/optimizer/plan/createplan.c

    Lines changed: 1 addition & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -4535,6 +4535,7 @@ make_agg(PlannerInfo *root, List *tlist, List *qual,
    45354535
    node->grpColIdx = grpColIdx;
    45364536
    node->grpOperators = grpOperators;
    45374537
    node->numGroups = numGroups;
    4538+
    node->aggParams = NULL; /* SS_finalize_plan() will fill this */
    45384539

    45394540
    copy_plan_costsize(plan, lefttree); /* only care about copying size */
    45404541
    cost_agg(&agg_path, root,

    src/backend/optimizer/plan/subselect.c

    Lines changed: 47 additions & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -81,6 +81,7 @@ static Bitmapset *finalize_plan(PlannerInfo *root,
    8181
    Bitmapset *valid_params,
    8282
    Bitmapset *scan_params);
    8383
    static bool finalize_primnode(Node *node, finalize_primnode_context *context);
    84+
    static bool finalize_agg_primnode(Node *node, finalize_primnode_context *context);
    8485

    8586

    8687< 6D40 div class="diff-text-inner">/*
    @@ -2558,6 +2559,29 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
    25582559
    locally_added_param);
    25592560
    break;
    25602561

    2562+
    case T_Agg:
    2563+
    {
    2564+
    Agg *agg = (Agg *) plan;
    2565+
    2566+
    /*
    2567+
    * AGG_HASHED plans need to know which Params are referenced
    2568+
    * in aggregate calls. Do a separate scan to identify them.
    2569+
    */
    2570+
    if (agg->aggstrategy == AGG_HASHED)
    2571+
    {
    2572+
    finalize_primnode_context aggcontext;
    2573+
    2574+
    aggcontext.root = root;
    2575+
    aggcontext.paramids = NULL;
    2576+
    finalize_agg_primnode((Node *) agg->plan.targetlist,
    2577+
    &aggcontext);
    2578+
    finalize_agg_primnode((Node *) agg->plan.qual,
    2579+
    &aggcontext);
    2580+
    agg->aggParams = aggcontext.paramids;
    2581+
    }
    2582+
    }
    2583+
    break;
    2584+
    25612585
    case T_WindowAgg:
    25622586
    finalize_primnode(((WindowAgg *) plan)->startOffset,
    25632587
    &context);
    @@ -2566,7 +2590,6 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
    25662590
    break;
    25672591

    25682592
    case T_Hash:
    2569-
    case T_Agg:
    25702593
    case T_Material:
    25712594
    case T_Sort:
    25722595
    case T_Unique:
    @@ -2710,6 +2733,29 @@ finalize_primnode(Node *node, finalize_primnode_context *context)
    27102733
    (void *) context);
    27112734
    }
    27122735

    2736+
    /*
    2737+
    * finalize_agg_primnode: find all Aggref nodes in the given expression tree,
    2738+
    * and add IDs of all PARAM_EXEC params appearing within their aggregated
    2739+
    * arguments to the result set.
    2740+
    */
    2741+
    static bool
    2742+
    finalize_agg_primnode(Node *node, finalize_primnode_context *context)
    2743+
    {
    2744+
    if (node == NULL)
    2745+
    return false;
    2746+
    if (IsA(node, Aggref))
    2747+
    {
    2748+
    Aggref *agg = (Aggref *) node;
    2749+
    2750+
    /* we should not consider the direct arguments, if any */
    2751+
    finalize_primnode((Node *) agg->args, context);
    2752+
    finalize_primnode((Node *) agg->aggfilter, context);
    2753+
    return false; /* there can't be any Aggrefs below here */
    2754+
    }
    2755+
    return expression_tree_walker(node, finalize_agg_primnode,
    2756+
    (void *) context);
    2757+
    }
    2758+
    27132759
    /*
    27142760
    * SS_make_initplan_from_plan - given a plan tree, make it an InitPlan
    27152761
    *

    src/include/nodes/plannodes.h

    Lines changed: 2 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -721,6 +721,8 @@ typedef struct Agg
    721721
    AttrNumber *grpColIdx; /* their indexes in the target list */
    722722
    Oid *grpOperators; /* equality operators to compare with */
    723723
    long numGroups; /* estimated number of groups in input */
    724+
    Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */
    725+
    /* Note: planner provides numGroups & aggParams only in AGG_HASHED case */
    724726
    List *groupingSets; /* grouping sets to use */
    725727
    List *chain; /* chained Agg/Sort nodes */
    726728
    } Agg;

    src/test/regress/expected/aggregates.out

    Lines changed: 75 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -366,6 +366,81 @@ from tenk1 o;
    366366
    9999
    367367
    (1 row)
    368368

    369+
    -- Test handling of Params within aggregate arguments in hashed aggregation.
    370+
    -- Per bug report from Jeevan Chalke.
    371+
    explain (verbose, costs off)
    372+
    select s1, s2, sm
    373+
    from generate_series(1, 3) s1,
    374+
    lateral (select s2, sum(s1 + s2) sm
    375+
    from generate_series(1, 3) s2 group by s2) ss
    376+
    order by 1, 2;
    377+
    QUERY PLAN
    378+
    ------------------------------------------------------------------
    379+
    Sort
    380+
    Output: s1.s1, s2.s2, (sum((s1.s1 + s2.s2)))
    381+
    Sort Key: s1.s1, s2.s2
    382+
    -> Nested Loop
    383+
    Output: s1.s1, s2.s2, (sum((s1.s1 + s2.s2)))
    384+
    -> Function Scan on pg_catalog.generate_series s1
    385+
    Output: s1.s1
    386+
    Function Call: generate_series(1, 3)
    387+
    -> HashAggregate
    388+
    Output: s2.s2, sum((s1.s1 + s2.s2))
    389+
    Group Key: s2.s2
    390+
    -> Function Scan on pg_catalog.generate_series s2
    391+
    Output: s2.s2
    392+
    Function Call: generate_series(1, 3)
    393+
    (14 rows)
    394+
    395+
    select s1, s2, sm
    396+
    from generate_series(1, 3) s1,
    397+
    lateral (select s2, sum(s1 + s2) sm
    398+
    from generate_series(1, 3) s2 group by s2) ss
    399+
    order by 1, 2;
    400+
    s1 | s2 | sm
    401+
    ----+----+----
    402+
    1 | 1 | 2
    403+
    1 | 2 | 3
    404+
    1 | 3 | 4
    405+
    2 | 1 | 3
    406+
    2 | 2 | 4
    407+
    2 | 3 | 5
    408+
    3 | 1 | 4
    409+
    3 | 2 | 5
    410+
    3 | 3 | 6
    411+
    (9 rows)
    412+
    413+
    explain (verbose, costs off)
    414+
    select array(select sum(x+y) s
    415+
    from generate_series(1,3) y group by y order by s)
    416+
    from generate_series(1,3) x;
    417+
    QUERY PLAN
    418+
    -------------------------------------------------------------------
    419+
    Function Scan on pg_catalog.generate_series x
    420+
    Output: (SubPlan 1)
    421+
    Function Call: generate_series(1, 3)
    422+
    SubPlan 1
    423+
    -> Sort
    424+
    Output: (sum((x.x + y.y))), y.y
    425+
    Sort Key: (sum((x.x + y.y)))
    426+
    -> HashAggregate
    427+
    Output: sum((x.x + y.y)), y.y
    428+
    Group Key: y.y
    429+
    -> Function Scan on pg_catalog.generate_series y
    430+
    Output: y.y
    431+
    Function Call: generate_series(1, 3)
    432+
    (13 rows)
    433+
    434+
    select array(select sum(x+y) s
    435+
    from generate_series(1,3) y group by y order by s)
    436+
    from generate_series(1,3) x;
    437+
    array
    438+
    ---------
    439+
    {2,3,4}
    440+
    {3,4,5}
    441+
    {4,5,6}
    442+
    (3 rows)
    443+
    369444
    --
    370445
    -- test for bitwise integer aggregates
    371446
    --

    src/test/regress/sql/aggregates.sql

    Lines changed: 22 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -98,6 +98,28 @@ select
    9898
    (select max((select i.unique2 from tenk1 i where i.unique1 = o.unique1)))
    9999
    from tenk1 o;
    100100

    101+
    -- Test handling of Params within aggregate arguments in hashed aggregation.
    102+
    -- Per bug report from Jeevan Chalke.
    103+
    explain (verbose, costs off)
    104+
    select s1, s2, sm
    105+
    from generate_series(1, 3) s1,
    106+
    lateral (select s2, sum(s1 + s2) sm
    107+
    from generate_series(1, 3) s2 group by s2) ss
    108+
    order by 1, 2;
    109+
    select s1, s2, sm
    110+
    from generate_series(1, 3) s1,
    111+
    lateral (select s2, sum(s1 + s2) sm
    112+
    from generate_series(1, 3) s2 group by s2) ss
    113+
    order by 1, 2;
    114+
    115+
    explain (verbose, costs off)
    116+
    select array(select sum(x+y) s
    117+
    from generate_series(1,3) y group by y order by s)
    118+
    from generate_series(1,3) x;
    119+
    select array(select sum(x+y) s
    120+
    from generate_series(1,3) y group by y order by s)
    121+
    from generate_series(1,3) x;
    122+
    101123
    --
    102124
    -- test for bitwise integer aggregates
    103125
    --

    0 commit comments

    Comments
     (0)
    0