8000 Teach the system how to use hashing for UNION. (INTERSECT/EXCEPT wil… · postgrespro/postgres_cluster@2d1d96b · GitHub
[go: up one dir, main page]

Skip to content

Commit 2d1d96b

Browse files
committed
Teach the system how to use hashing for UNION. (INTERSECT/EXCEPT will follow,
but seem like a separate patch since most of the remaining work is on the executor side.) I took the opportunity to push selection of the grouping operators for set operations into the parser where it belongs. Otherwise this is just a small exercise in making prepunion.c consider both alternatives. As with the recent DISTINCT patch, this means we can UNION on datatypes that can hash but not sort, and it means that UNION without ORDER BY is no longer certain to produce sorted output.
1 parent 3d40d5e commit 2d1d96b

File tree

20 files changed

+462
-212
lines changed
  • optimizer
  • parser
  • include
  • test/regress
  • 20 files changed

    +462
    -212
    lines changed

    src/backend/catalog/dependency.c

    Lines changed: 10 additions & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -8,7 +8,7 @@
    88
    * Portions Copyright (c) 1994, Regents of the University of California
    99
    *
    1010
    * IDENTIFICATION
    11-
    * $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.77 2008/08/02 21:31:59 tgl Exp $
    11+
    * $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.78 2008/08/07 01:11:46 tgl Exp $
    1212
    *
    1313
    *-------------------------------------------------------------------------
    1414
    */
    @@ -1597,6 +1597,15 @@ find_expr_references_walker(Node *node,
    15971597
    context->rtables = list_delete_first(context->rtables);
    15981598
    return result;
    15991599
    }
    1600+
    else if (IsA(node, SetOperationStmt))
    1601+
    {
    1602+
    SetOperationStmt *setop = (SetOperationStmt *) node;
    1603+
    1604+
    /* we need to look at the groupClauses for operator references */
    1605+
    find_expr_references_walker((Node *) setop->groupClauses, context);
    1606+
    /* fall through to examine child nodes */
    1607+
    }
    1608+
    16001609
    return expression_tree_walker(node, find_expr_references_walker,
    16011610
    (void *) context);
    16021611
    }

    src/backend/nodes/copyfuncs.c

    Lines changed: 2 additions & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -15,7 +15,7 @@
    1515
    * Portions Copyright (c) 1994, Regents of the University of California
    1616
    *
    1717
    * IDENTIFICATION
    18-
    * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.396 2008/08/02 21:31:59 tgl Exp $
    18+
    * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.397 2008/08/07 01:11:46 tgl Exp $
    1919
    *
    2020
    *-------------------------------------------------------------------------
    2121
    */
    @@ -1943,6 +1943,7 @@ _copySetOperationStmt(SetOperationStmt *from)
    19431943
    COPY_NODE_FIELD(rarg);
    19441944
    COPY_NODE_FIELD(colTypes);
    19451945
    COPY_NODE_FIELD(colTypmods);
    1946+
    COPY_NODE_FIELD(groupClauses);
    19461947

    19471948
    return newnode;
    19481949
    }

    src/backend/nodes/equalfuncs.c

    Lines changed: 2 additions & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -18,7 +18,7 @@
    1818
    * Portions Copyright (c) 1994, Regents of the University of California
    1919
    *
    2020
    * IDENTIFICATION
    21-
    * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.325 2008/08/02 21:31:59 tgl Exp $
    21+
    * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.326 2008/08/07 01:11:47 tgl Exp $
    2222
    *
    2323
    *-------------------------------------------------------------------------
    2424
    */
    @@ -839,6 +839,7 @@ _equalSetOperationStmt(SetOperationStmt *a, SetOperationStmt *b)
    839839
    COMPARE_NODE_FIELD(rarg);
    840840
    COMPARE_NODE_FIELD(colTypes);
    841841
    COMPARE_NODE_FIELD(colTypmods);
    842+
    COMPARE_NODE_FIELD(groupClauses);
    842843

    843844
    return true;
    844845
    }

    src/backend/nodes/outfuncs.c

    Lines changed: 2 additions & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -8,7 +8,7 @@
    88
    *
    99
    *
    1010
    * IDENTIFICATION
    11-
    * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.330 2008/08/05 02:43:17 tgl Exp $
    11+
    * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.331 2008/08/07 01:11:48 tgl Exp $
    1212
    *
    1313
    * NOTES
    1414
    * Every node type that can appear in stored rules' parsetrees *must*
    @@ -1780,6 +1780,7 @@ _outSetOperationStmt(StringInfo str, SetOperationStmt *node)
    17801780
    WRITE_NODE_FIELD(rarg);
    17811781
    WRITE_NODE_FIELD(colTypes);
    17821782
    WRITE_NODE_FIELD(colTypmods);
    1783+
    WRITE_NODE_FIELD(groupClauses);
    17831784
    }
    17841785

    17851786
    static void

    src/backend/nodes/readfuncs.c

    Lines changed: 2 additions & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -8,7 +8,7 @@
    88
    *
    99
    *
    1010
    * IDENTIFICATION
    11-
    * $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.211 2008/08/02 21:31:59 tgl Exp $
    11+
    * $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.212 2008/08/07 01:11:49 tgl Exp $
    1212
    *
    1313
    * NOTES
    1414
    * Path and Plan nodes do not have any readfuncs support, because we
    @@ -232,6 +232,7 @@ _readSetOperationStmt(void)
    232232
    READ_NODE_FIELD(rarg);
    233233
    READ_NODE_FIELD(colTypes);
    234234
    READ_NODE_FIELD(colTypmods);
    235+
    READ_NODE_FIELD(groupClauses);
    235236

    236237
    READ_DONE();
    237238
    }

    src/backend/optimizer/plan/planner.c

    Lines changed: 6 additions & 104 deletions
    Original file line numberDiff line numberDiff line change
    @@ -8,7 +8,7 @@
    88
    *
    99
    *
    1010
    * IDENTIFICATION
    11-
    * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.239 2008/08/05 16:03:10 tgl Exp $
    11+
    * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.240 2008/08/07 01:11:50 tgl Exp $
    1212
    *
    1313
    *-------------------------------------------------------------------------
    1414
    */
    @@ -68,10 +68,6 @@ static double preprocess_limit(PlannerInfo *root,
    6868
    double tuple_fraction,
    6969
    int64 *offset_est, int64 *count_est);
    7070
    static void preprocess_groupclause(PlannerInfo *root);
    71-
    static Oid *extract_grouping_ops(List *groupClause);
    72-
    static AttrNumber *extract_grouping_cols(List *groupClause, List *tlist);
    73-
    static bool grouping_is_sortable(List *groupClause);
    74-
    static bool grouping_is_hashable(List *groupClause);
    7571
    static bool choose_hashed_grouping(PlannerInfo *root,
    7672
    double tuple_fraction, double limit_tuples,
    7773
    Path *cheapest_path, Path *sorted_path,
    @@ -784,10 +780,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
    784780

    785781
    /*
    786782
    * If there's a top-level ORDER BY, assume we have to fetch all the
    787-
    * tuples. This might seem too simplistic given all the hackery below
    788-
    * to possibly avoid the sort ... but a nonzero tuple_fraction is only
    789-
    * of use to plan_set_operations() when the setop is UNION ALL, and
    790-
    * the result of UNION ALL is always unsorted.
    783+
    * tuples. This might be too simplistic given all the hackery below
    784+
    * to possibly avoid the sort; but the odds of accurate estimates
    785+
    * here are pretty low anyway.
    791786
    */
    792787
    if (parse->sortClause)
    793788
    tuple_fraction = 0.0;
    @@ -818,7 +813,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
    818813
    */
    819814
    Assert(parse->commandType == CMD_SELECT);
    820815

    821-
    tlist = postprocess_setop_tlist(result_plan->targetlist, tlist);
    816+
    tlist = postprocess_setop_tlist(copyObject(result_plan->targetlist),
    817+
    tlist);
    822818

    823819
    /*
    824820
    * Can't handle FOR UPDATE/SHARE here (parser should have checked
    @@ -1714,100 +1710,6 @@ preprocess_groupclause(PlannerInfo *root)
    17141710
    parse->groupClause = new_groupclause;
    17151711
    }
    17161712

    1717-
    /*
    1718-
    * extract_grouping_ops - make an array of the equality operator OIDs
    1719-
    * for a SortGroupClause list
    1720-
    */
    1721-
    static Oid *
    1722-
    extract_grouping_ops(List *groupClause)
    1723-
    {
    1724-
    int numCols = list_length(groupClause);
    1725-
    int colno = 0;
    1726-
    Oid *groupOperators;
    1727-
    ListCell *glitem;
    1728-
    1729-
    groupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
    1730-
    1731-
    foreach(glitem, groupClause)
    1732-
    {
    1733-
    SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
    1734-
    1735-
    groupOperators[colno] = groupcl->eqop;
    1736-
    Assert(OidIsValid(groupOperators[colno]));
    1737-
    colno++;
    1738-
    }
    1739-
    1740-
    return groupOperators;
    1741-
    }
    1742-
    1743-
    /*
    1744-
    * extract_grouping_cols - make an array of the grouping column resnos
    1745-
    * for a SortGroupClause list
    1746-
    */
    1747-
    static AttrNumber *
    1748-
    extract_grouping_cols(List *groupClause, List *tlist)
    1749-
    {
    1750-
    AttrNumber *grpColIdx;
    1751-
    int numCols = list_length(groupClause);
    1752-
    int colno = 0;
    1753-
    ListCell *glitem;
    1754-
    1755-
    grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
    1756-
    1757-
    foreach(glitem, groupClause)
    1758-
    {
    1759-
    SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
    1760-
    TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist);
    1761-
    1762-
    grpColIdx[colno++] = tle->resno;
    1763-
    }
    1764-
    1765-
    return grpColIdx;
    1766-
    }
    1767-
    1768-
    /*
    1769-
    * grouping_is_sortable - is it possible to implement grouping list by sorting?
    1770-
    *
    1771-
    * This is easy since the parser will have included a sortop if one exists.
    1772-
    */
    1773-
    static bool
    1774-
    grouping_is_sortable(List *groupClause)
    1775-
    {
    1776-
    ListCell *glitem;
    1777-
    1778-
    foreach(glitem, groupClause)
    1779-
    {
    1780-
    SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
    1781-
    1782-
    if (!OidIsValid(groupcl->sortop))
    1783-
    return false;
    1784-
    }
    1785-
    return true;
    1786-
    }
    1787-
    1788-
    /*
    1789-
    * grouping_is_hashable - is it possible to implement grouping list by hashing?
    1790-
    *
    1791-
    * We assume hashing is OK if the equality operators are marked oprcanhash.
    1792-
    * (If there isn't actually a supporting hash function, the executor will
    1793-
    * complain at runtime; but this is a misdeclaration of the operator, not
    1794-
    * a system bug.)
    1795-
    */
    1796-
    static bool
    1797-
    grouping_is_hashable(List *groupClause)
    1798-
    {
    1799-
    ListCell *glitem;
    1800-
    1801-
    foreach(glitem, groupClause)
    1802-
    {
    1803-
    SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
    1804-
    1805-
    if (!op_hashjoinable(groupcl->eqop))
    1806-
    return false;
    1807-
    }
    1808-
    return true;
    1809-
    }
    1810-
    18111713
    /*
    18121714
    * choose_hashed_grouping - should we use hashed grouping?
    18131715
    *

    0 commit comments

    Comments
     (0)
    0