8000 Order active window clauses for greater reuse of Sort nodes. · postgrespro/postgres@728202b · GitHub
[go: up one dir, main page]

Skip to content

Commit 728202b

Browse files
committed
Order active window clauses for greater reuse of Sort nodes.
By sorting the active windo 8000 w list lexicographically by the sort clause list but putting longer clauses before shorter prefixes, we generate more chances to elide Sort nodes when building the path. Author: Daniel Gustafsson (with some editorialization by me) Reviewed-by: Alexander Kuzmenkov, Masahiko Sawada, Tom Lane Discussion: https://postgr.es/m/124A7F69-84CD-435B-BA0E-2695BE21E5C2%40yesql.se
1 parent 75f9c4c commit 728202b

File tree

4 files changed

+177
-60
lines changed

4 files changed

+177
-60
lines changed

src/backend/nodes/list.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1011,8 +1011,11 @@ list_append_unique_oid(List *list, Oid datum)
10111011
* via equal().
10121012
*
10131013
* This is almost the same functionality as list_union(), but list1 is
1014-
* modified in-place rather than being copied. Note also that list2's cells
1015-
* are not inserted in list1, so the analogy to list_concat() isn't perfect.
1014+
* modified in-place rather than being copied. However, callers of this
1015+
* function may have strict ordering expectations -- i.e. that the relative
1016+
* order of those list2 elements that are not duplicates is preserved. Note
1017+
* also that list2's cells are not inserted in list1, so the analogy to
1018+
* list_concat() isn't perfect.
10161019
*/
10171020
List *
10181021
list_concat_unique(List *list1, List *list2)

src/backend/optimizer/plan/planner.c

Lines changed: 109 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,17 @@ typedef struct
110110
int *tleref_to_colnum_map;
111111
} grouping_sets_data;
112112

113+
/*
114+
* Temporary structure for use during WindowClause reordering in order to be
115+
* be able to sort WindowClauses on partitioning/ordering prefix.
116+
*/
117+
typedef struct
118+
{
119+
WindowClause *wc;
120+
List *uniqueOrder; /* A List of unique ordering/partitioning
121+
* clauses per Window */
122+
} WindowClauseSortData;
123+
113124
/* Local functions */
114125
static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
115126
static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
@@ -236,6 +247,7 @@ static void create_partitionwise_grouping_paths(PlannerInfo *root,
236247
static bool group_by_has_partkey(RelOptInfo *input_rel,
237248
List *targetList,
238249
List *groupClause);
250+
static int common_prefix_cmp(const void *a, const void *b);
239251

240252

241253
/*****************************************************************************
@@ -5259,67 +5271,119 @@ postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
52595271
static List *
52605272
select_active_windows(PlannerInfo *root, WindowFuncLists *wflists)
52615273
{
5262-
List *result;
5263-
List *actives;
5274+
List *windowClause = root->parse->windowClause;
5275+
List *result = NIL;
52645276
ListCell *lc;
5277+
int nActive = 0;
5278+
WindowClauseSortData *actives = palloc(sizeof(WindowClauseSortData)
5279+
* list_length(windowClause));
52655280

5266-
/* First, make a list of the active windows */
5267-
actives = NIL;
5268-
foreach(lc, root->parse->windowClause)
5281+
/* First, construct an array of the active windows */
5282+
foreach(lc, windowClause)
52695283
{
52705284
WindowClause *wc = lfirst_node(WindowClause, lc);
52715285

52725286
/* It's only active if wflists shows some related WindowFuncs */
52735287
Assert(wc->winref <= wflists->maxWinRef);
5274-
if (wflists->windowFuncs[wc->winref] != NIL)
5275-
actives = lappend(actives, wc);
5288+
if (wflists->windowFuncs[wc->winref] == NIL)
5289+
continue;
5290+
5291+
actives[nActive].wc = wc; /* original clause */
5292+
5293+
/*
5294+
* For sorting, we want the list of partition keys followed by the
5295+
* list of sort keys. But pathkeys construction will remove duplicates
5296+
* between the two, so we can as well (even though we can't detect all
5297+
* of the duplicates, since some may come from ECs - that might mean
5298+
* we miss optimization chances here). We must, however, ensure that
5299+
* the order of entries is preserved with respect to the ones we do
5300+
* keep.
5301+
*
5302+
* partitionClause and orderClause had their own duplicates removed in
5303+
* parse analysis, so we're only concerned here with removing
5304+
* orderClause entries that also appear in partitionClause.
5305+
*/
5306+
actives[nActive].uniqueOrder =
5307+
list_concat_unique(list_copy(wc->partitionClause),
5308+
wc->orderClause);
5309+
nActive++;
52765310
}
52775311

52785312
/*
5279-
* Now, ensure that windows with identical partitioning/ordering clauses
5280-
* are adjacent in the list. This is required by the SQL standard, which
5281-
* says that only one sort is to be used for such windows, even if they
5282-
* are otherwise distinct (eg, different names or framing clauses).
5313+
* Sort active windows by their partitioning/ordering clauses, ignoring
5314+
* any framing clauses, so that the windows that need the same sorting are
5315+
* adjacent in the list. When we come to generate paths, this will avoid
5316+
* inserting additional Sort nodes.
52835317
*
5284-
* There is room to be much smarter here, for example detecting whether
5285-
* one window's sort keys are a prefix of another's (so that sorting for
5286-
* the latter would do for the former), or putting windows first that
5287-
* match a sort order available for the underlying query. For the moment
5288-
* we are content with meeting the spec.
5289-
*/
5290-
result = NIL;
5291-
while (actives != NIL)
5292-
{
5293-
WindowClause *wc = linitial_node(WindowClause, actives);
5294-
ListCell *prev;
5295-
ListCell *next;
5296-
5297-
/* Move wc from actives to result */
5298-
actives = list_delete_first(actives);
5299-
result = lappend(result, wc);
5300-
5301-
/* Now move any matching windows from actives to result */
5302-
prev = NULL;
5303-
for (lc = list_head(actives); lc; lc = next)
5304-
{
5305-
WindowClause *wc2 = lfirst_node(WindowClause, lc);
5318+
* This is how we implement a specific requirement from the SQL standard,
5319+
* which says that when two or more windows are order-equivalent (i.e.
5320+
* have matching partition and order clauses, even if their names or
5321+
* framing clauses differ), then all peer rows must be presented in the
5322+
* same order in all of them. If we allowed multiple sort nodes for such
5323+
* cases, we'd risk having the peer rows end up in different orders in
5324+
* equivalent windows due to sort instability. (See General Rule 4 of
5325+
* <window clause> in SQL2008 - SQL2016.)
5326+
*
5327+
* Additionally, if the entire list of clauses of one window is a prefix
5328+
* of another, put first the window with stronger sorting requirements.
5329+
* This way we will first sort for stronger window, and won't have to sort
5330+
* again for the weaker one.
5331+
*/
5332+
qsort(actives, nActive, sizeof(WindowClauseSortData), common_prefix_cmp);
53065333

5307-
next = lnext(lc);
5308-
/* framing options are NOT to be compared here! */
5309-
if (equal(wc->partitionClause, wc2->partitionClause) &&
5310-
equal(wc->orderClause, wc2->orderClause))
5311-
{
5312-
actives = list_delete_cell(actives, lc, prev);
5313-
result = lappend(result, wc2);
5314-
}
5315-
else
5316-
prev = lc;
5317-
}
5318-
}
5334+
/* build ordered list of the original WindowClause nodes */
5335+
for (int i = 0; i < nActive; i++)
5336+
result = lappend(result, actives[i].wc);
5337+
5338+
pfree(actives);
53195339

53205340
return result;
53215341
}
53225342

5343+
/*
5344+
* common_prefix_cmp
5345+
* QSort comparison function for WindowClauseSortData
5346+
*
5347+
* Sort the windows by the required sorting clauses. First, compare the sort
5348+
* clauses themselves. Second, if one window's clauses are a prefix of another
5349+
* one's clauses, put the window with more sort clauses first.
5350+
*/
5351+
static int
5352+
common_prefix_cmp(const void *a, const void *b)
5353+
{
5354+
const WindowClauseSortData *wcsa = a;
5355+
const WindowClauseSortData *wcsb = b;
5356+
ListCell *item_a;
5357+
ListCell *item_b;
5358+
5359+
forboth(item_a, wcsa->uniqueOrder, item_b, wcsb->uniqueOrder)
5360+
{
5361+
SortGroupClause *sca = lfirst_node(SortGroupClause, item_a);
5362+
SortGroupClause *scb = lfirst_node(SortGroupClause, item_b);
5363+
5364+
if (sca->tleSortGroupRef > scb->tleSortGroupRef)
5365+
return -1;
5366+
else if (sca->tleSortGroupRef < scb->tleSortGroupRef)
5367+
return 1;
5368+
else if (sca->sortop > scb->sortop)
5369+
return -1;
5370+
else if (sca->sortop < scb->sortop)
5371+
return 1;
5372+
else if (sca->nulls_first && !scb->nulls_first)
5373+
return -1;
5374+
else if (!sca->nulls_first && scb->nulls_first)
5375+
return 1;
5376+
/* no need to compare eqop, since it is fully determined by sortop */
5377+
}
5378+
5379+
if (list_length(wcsa->uniqueOrder) > list_length(wcsb->uniqueOrder))
5380+
return -1;
5381+
else if (list_length(wcsa->uniqueOrder) < list_length(wcsb->uniqueOrder))
5382+
return 1;
5383+
5384+
return 0;
5385+
}
5386+
53235387
/*
53245388
* make_window_input_target
53255389
* Generate appropriate PathTarget for initial input to WindowAgg nodes.

src/test/regress/expected/window.out

Lines changed: 47 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -504,9 +504,9 @@ SELECT sum(salary),
504504
FROM empsalary GROUP BY depname;
505505
sum | row_number | sum
506506
-------+------------+-------
507-
14600 | 3 | 14600
508-
7400 | 2 | 22000
509507
25100 | 1 | 47100
508+
7400 | 2 | 22000
509+
14600 | 3 | 14600
510510
(3 rows)
511511

512512
-- identical windows with different names
@@ -2994,9 +2994,9 @@ SELECT sum(salary), row_number() OVER (ORDER BY depname), sum(
29942994
FROM empsalary GROUP BY depname;
29952995
sum | row_number | filtered_sum | depname
29962996
-------+------------+--------------+-----------
2997-
14600 | 3 | | sales
2998-
7400 | 2 | 3500 | personnel
29992997
25100 | 1 | 22600 | develop
2998+
7400 | 2 | 3500 | personnel
2999+
14600 | 3 | | sales
30003000
(3 rows)
30013001

30023002
-- Test pushdown of quals into a subquery containing window functions
@@ -3008,13 +3008,13 @@ SELECT * FROM
30083008
min(salary) OVER (PARTITION BY depname || 'A', depname) depminsalary
30093009
FROM empsalary) emp
30103010
WHERE depname = 'sales';
3011-
QUERY PLAN
3012-
---------------------------------------------------------------------
3011+
QUERY PLAN
3012+
--------------------------------------------------------------------------
30133013
Subquery Scan on emp
30143014
-> WindowAgg
3015-
-> Sort
3016-
Sort Key: (((empsalary.depname)::text || 'A'::text))
3017-
-> WindowAgg
3015+
-> WindowAgg
3016+
-> Sort
3017+
Sort Key: (((empsalary.depname)::text || 'A'::text))
30183018
-> Seq Scan on empsalary
30193019
Filter: ((depname)::text = 'sales'::text)
30203020
(7 rows)
@@ -3027,19 +3027,53 @@ SELECT * FROM
30273027
min(salary) OVER (PARTITION BY depname) depminsalary
30283028
FROM empsalary) emp
30293029
WHERE depname = 'sales';
3030-
QUERY PLAN
3031-
-----------------------------------------------------------
3030+
QUERY PLAN
3031+
-------------------------------------------------------
30323032
Subquery Scan on emp
30333033
Filter: ((emp.depname)::text = 'sales'::text)
30343034
-> WindowAgg
30353035
-> Sort
3036-
Sort Key: empsalary.depname
3036+
Sort Key: empsalary.enroll_date
30373037
-> WindowAgg
30383038
-> Sort
3039-
Sort Key: empsalary.enroll_date
3039+
Sort Key: empsalary.depname
30403040
-> Seq Scan on empsalary
30413041
(9 rows)
30423042

3043+
-- Test Sort node collapsing
3044+
EXPLAIN (COSTS OFF)
3045+
SELECT * FROM
3046+
(SELECT depname,
3047+
sum(salary) OVER (PARTITION BY depname order by empno) depsalary,
3048+
min(salary) OVER (PARTITION BY depname, empno order by enroll_date) depminsalary
3049+
FROM empsalary) emp
3050+
WHERE depname = 'sales';
3051+
QUERY PLAN
3052+
----------------------------------------------------------------------
3053+
Subquery Scan on emp
3054+
-> WindowAgg
3055+
-> WindowAgg
3056+
-> Sort
3057+
Sort Key: empsalary.empno, empsalary.enroll_date
3058+
-> Seq Scan on empsalary
3059+
Filter: ((depname)::text = 'sales'::text)
3060+
(7 rows)
3061+
3062+
-- Test Sort node reordering
3063+
EXPLAIN (COSTS OFF)
3064+
SELECT
3065+
lead(1) OVER (PARTITION BY depname ORDER BY salary, enroll_date),
3066+
lag(1) OVER (PARTITION BY depname ORDER BY salary,enroll_date,empno)
3067+
FROM empsalary;
3068+
QUERY PLAN
3069+
-------------------------------------------------------------
3070+
WindowAgg
3071+
-> WindowAgg
3072+
-> Sort
3073+
Sort Key: depname, salary, enroll_date, empno
3074+
-> Seq Scan on empsalary
3075+
(5 rows)
3076+
30433077
-- cleanup
30443078
DROP TABLE empsalary;
30453079
-- test user-defined window function with named args and default args

src/test/regress/sql/window.sql

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -892,6 +892,22 @@ SELECT * FROM
892892
FROM empsalary) emp
893893
WHERE depname = 'sales';
894894

895+
-- Test Sort node collapsing
896+
EXPLAIN (COSTS OFF)
897+
SELECT * FROM
898+
(SELECT depname,
899+
sum(salary) OVER (PARTITION BY depname order by empno) depsalary,
900+
min(salary) OVER (PARTITION BY depname, empno order by enroll_date) depminsalary
901+
FROM empsalary) emp
902+
WHERE depname = 'sales';
903+
904+
-- Test Sort node reordering
905+
EXPLAIN (COSTS OFF)
906+
SELECT
907+
lead(1) OVER (PARTITION BY depname ORDER BY salary, enroll_date),
908+
lag(1) OVER (PARTITION BY depname ORDER BY salary,enroll_date,empno)
909+
FROM empsalary;
910+
895911
-- cleanup
896912
DROP TABLE empsalary;
897913

0 commit comments

Comments
 (0)
0