8000 Don't assume a subquery's output is unique if there's a SRF in its tl… · micdev42/postgres@2e7469d · GitHub
[go: up one dir, main page]

Skip to content

Commit 2e7469d

Browse files
committed
Don't assume a subquery's output is unique if there's a SRF in its tlist.
While the x output of "select x from t group by x" can be presumed unique, this does not hold for "select x, generate_series(1,10) from t group by x", because we may expand the set-returning function after the grouping step. (Perhaps that should be re-thought; but considering all the other oddities involved with SRFs in targetlists, it seems unlikely we'll change it.) Put a check in query_is_distinct_for() so it's not fooled by such cases. Back-patch to all supported branches. David Rowley
1 parent e78791e commit 2e7469d

File tree

3 files changed

+32
-0
lines changed

3 files changed

+32
-0
lines changed

src/backend/optimizer/util/pathnode.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "catalog/pg_operator.h"
2020
#include "executor/executor.h"
2121
#include "miscadmin.h"
22+
#include "nodes/nodeFuncs.h"
2223
#include "optimizer/clauses.h"
2324
#include "optimizer/cost.h"
2425
#include "optimizer/pathnode.h"
@@ -1093,6 +1094,17 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
10931094

10941095
Assert(list_length(colnos) == list_length(opids));
10951096

1097+
/*
1098+
* A set-returning function in the query's targetlist can result in
1099+
* returning duplicate rows, if the SRF is evaluated after the
1100+
* de-duplication step; so we play it safe and say "no" if there are any
1101+
* SRFs. (We could be certain that it's okay if SRFs appear only in the
1102+
* specified columns, since those must be evaluated before de-duplication;
1103+
* but it doesn't presently seem worth the complication to check that.)
1104+
*/
1105+
if (expression_returns_set((Node *) query->targetList))
1106+
return false;
1107+
10961108
/*
10971109
* DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
10981110
* columns in the DISTINCT clause appear in colnos and operator semantics

src/test/regress/expected/subselect.out

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -650,3 +650,15 @@ select * from int4_tbl where
650650
0
651651
(1 row)
652652

653+
--
654+
-- Check for incorrect optimization when IN subquery contains a SRF
655+
--
656+
set enable_hashjoin to 0;
657+
select * from int4_tbl o where (f1, f1) in
658+
(select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
659+
f1
660+
----
661+
0
662+
(1 row)
663+
664+
reset enable_hashjoin;

src/test/regress/sql/subselect.sql

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,3 +396,11 @@ where a.thousand = b.thousand
396396
select * from int4_tbl where
397397
(case when f1 in (select unique1 from tenk1 a) then f1 else null end) in
398398
(select ten from tenk1 b);
399+
400+
--
401+
-- Check for incorrect optimization when IN subquery contains a SRF
402+
--
403+
set enable_hashjoin to 0;
404+
select * from int4_tbl o where (f1, f1) in
405+
(select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
406+
reset enable_hashjoin;

0 commit comments

Comments
 (0)
0