8000 Pull up ANY-SUBLINK with the necessary lateral support. · postgrespro/postgres@9f13376 · GitHub
[go: up one dir, main page]

Skip to content
  • Commit 9f13376

    Browse files
    committed
    Pull up ANY-SUBLINK with the necessary lateral support.
    For ANY-SUBLINK, we adopted a two-stage pull-up approach to handle different types of scenarios. In the first stage, the sublink is pulled up as a subquery. Because of this, when writing this code, we did not have the ability to perform lateral joins, and therefore, we were unable to pull up Var with varlevelsup=1. Now that we have the ability to use lateral joins, we can eliminate this limitation. Author: Andy Fan <zhihui.fan1213@gmail.com> Author: Tom Lane <tgl@sss.pgh.pa.us> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Reviewed-by: Richard Guo <guofenglinux@gmail.com> Reviewed-by: Alena Rybakina <lena.ribackina@yandex.ru> Reviewed-by: Andrey Lepikhov <a.lepikhov@postgrespro.ru>
    1 parent 995d400 commit 9f13376

    File tree

    7 files changed

    +192
    -20
    lines changed

    7 files changed

    +192
    -20
    lines changed

    contrib/postgres_fdw/expected/postgres_fdw.out

    Lines changed: 3 additions & 3 deletions
    Original file line numberDiff line numberDiff line change
    @@ -11894,23 +11894,23 @@ CREATE FOREIGN TABLE foreign_tbl (b int)
    1189411894
    CREATE FOREIGN TABLE foreign_tbl2 () INHERITS (foreign_tbl)
    1189511895
    SERVER loopback OPTIONS (table_name 'base_tbl');
    1189611896
    EXPLAIN (VERBOSE, COSTS OFF)
    11897-
    SELECT a FROM base_tbl WHERE a IN (SELECT a FROM foreign_tbl);
    11897+
    SELECT a FROM base_tbl WHERE (a, random() > 0) IN (SELECT a, random() > 0 FROM foreign_tbl);
    1189811898
    QUERY PLAN
    1189911899
    -----------------------------------------------------------------------------
    1190011900
    Seq Scan on public.base_tbl
    1190111901
    Output: base_tbl.a
    1190211902
    Filter: (SubPlan 1)
    1190311903
    SubPlan 1
    1190411904
    -> Result
    11905-
    Output: base_tbl.a
    11905+
    Output: base_tbl.a, (random() > '0'::double precision)
    1190611906
    -> Append
    1190711907
    -> Async Foreign Scan on public.foreign_tbl foreign_tbl_1
    1190811908
    Remote SQL: SELECT NULL FROM public.base_tbl
    1190911909
    -> Async Foreign Scan on public.foreign_tbl2 foreign_tbl_2
    1191011910
    Remote SQL: SELECT NULL FROM public.base_tbl
    1191111911
    (11 rows)
    1191211912

    11913-
    SELECT a FROM base_tbl WHERE a IN (SELECT a FROM foreign_tbl);
    11913+
    SELECT a FROM base_tbl WHERE (a, random() > 0) IN (SELECT a, random() > 0 FROM foreign_tbl);
    1191411914
    a
    1191511915
    ---
    1191611916
    1

    contrib/postgres_fdw/sql/postgres_fdw.sql

    Lines changed: 2 additions & 2 deletions
    Original file line numberDiff line numberDiff line change
    @@ -3988,8 +3988,8 @@ CREATE FOREIGN TABLE foreign_tbl2 () INHERITS (foreign_tbl)
    39883988
    SERVER loopback OPTIONS (table_name 'base_tbl');
    39893989

    39903990
    EXPLAIN (VERBOSE, COSTS OFF)
    3991-
    SELECT a FROM base_tbl WHERE a IN (SELECT a FROM foreign_tbl);
    3992-
    SELECT a FROM base_tbl WHERE a IN (SELECT a FROM foreign_tbl);
    3991+
    SELECT a FROM base_tbl WHERE (a, random() > 0) IN (SELECT a, random() > 0 FROM foreign_tbl);
    3992+
    SELECT a FROM base_tbl WHERE (a, random() > 0) IN (SELECT a, random() > 0 FROM foreign_tbl);
    39933993

    39943994
    -- Clean up
    39953995
    DROP FOREIGN TABLE foreign_tbl CASCADE;

    src/backend/optimizer/plan/subselect.c

    Lines changed: 13 additions & 4 deletions
    Original file line numberDiff line numberDiff line change
    @@ -1278,14 +1278,23 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
    12781278
    List *subquery_vars;
    12791279
    Node *quals;
    12801280
    ParseState *pstate;
    1281+
    Relids sub_ref_outer_relids;
    1282+
    bool use_lateral;
    12811283

    12821284
    Assert(sublink->subLinkType == ANY_SUBLINK);
    12831285

    12841286
    /*
    1285-
    * The sub-select must not refer to any Vars of the parent query. (Vars of
    1286-
    * higher levels should be okay, though.)
    1287+
    * If the sub-select refers to any Vars of the parent query, we so let's
    1288+
    * considering it as LATERAL. (Vars of higher levels don't matter here.)
    12871289
    */
    1288-
    if (contain_vars_of_level((Node *) subselect, 1))
    1290+
    sub_ref_outer_relids = pull_varnos_of_level(NULL, (Node *) subselect, 1);
    1291+
    use_lateral = !bms_is_empty(sub_ref_outer_relids);
    1292+
    1293+
    /*
    1294+
    * Check that sub-select refers nothing outside of available_rels of the
    1295+
    * parent query.
    1296+
    */
    1297+
    if (!bms_is_subset(sub_ref_outer_relids, available_rels))
    12891298
    return NULL;
    12901299

    12911300
    /*
    @@ -1323,7 +1332,7 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
    13231332
    nsitem = addRangeTableEntryForSubquery(pstate,
    13241333
    subselect,
    13251334
    makeAlias("ANY_subquery", NIL),
    1326-
    false,
    1335+
    use_lateral,
    13271336
    false);
    13281337
    rte = nsitem->p_rte;
    13291338
    parse->rtable = lappend(parse->rtable, rte);

    src/test/regress/expected/join.out

    Lines changed: 7 additions & 7 deletions
    Original file line numberDiff line numberDiff line change
    @@ -5277,7 +5277,7 @@ reset enable_nestloop;
    52775277
    explain (costs off)
    52785278
    select a.unique1, b.unique2
    52795279
    from onek a left join onek b on a.unique1 = b.unique2
    5280-
    where b.unique2 = any (select q1 from int8_tbl c where c.q1 < b.unique1);
    5280+
    where (b.unique2, random() > 0) = any (select q1, random() > 0 from int8_tbl c where c.q1 < b.unique1);
    52815281
    QUERY PLAN
    52825282
    ----------------------------------------------------------
    52835283
    Hash Join
    @@ -5293,7 +5293,7 @@ select a.unique1, b.unique2
    52935293

    52945294
    select a.unique1, b.unique2
    52955295
    from onek a left join onek b on a.unique1 = b.unique2
    5296-
    where b.unique2 = any (select q1 from int8_tbl c where c.q1 < b.unique1);
    5296+
    where (b.unique2, random() > 0) = any (select q1, random() > 0 from int8_tbl c where c.q1 < b.unique1);
    52975297
    unique1 | unique2
    52985298
    ---------+---------
    52995299
    123 | 123
    @@ -8210,12 +8210,12 @@ select * from (values (0), (1)) v(id),
    82108210
    lateral (select * from int8_tbl t1,
    82118211
    lateral (select * from
    82128212
    (select * from int8_tbl t2
    8213-
    where q1 = any (select q2 from int8_tbl t3
    8213+
    where (q1, random() > 0) = any (select q2, random() > 0 from int8_tbl t3
    82148214
    where q2 = (select greatest(t1.q1,t2.q2))
    82158215
    and (select v.id=0)) offset 0) ss2) ss
    82168216
    where t1.q1 = ss.q2) ss0;
    8217-
    QUERY PLAN
    8218-
    ----------------------------------------------------------------------
    8217+
    QUERY PLAN
    8218+
    -------------------------------------------------------------------------------
    82198219
    Nested Loop
    82208220
    Output: "*VALUES*".column1, t1.q1, t1.q2, ss2.q1, ss2.q2
    82218221
    -> Seq Scan on public.int8_tbl t1
    @@ -8232,7 +8232,7 @@ lateral (select * from int8_tbl t1,
    82328232
    Filter: (SubPlan 3)
    82338233
    SubPlan 3
    82348234
    -> Result
    8235-
    Output: t3.q2
    8235+
    Output: t3.q2, (random() > '0'::double precision)
    82368236
    One-Time Filter: $4
    82378237
    InitPlan 1 (returns $2)
    82388238
    -> Result
    @@ -8249,7 +8249,7 @@ select * from (values (0), (1)) v(id),
    82498249
    lateral (select * from int8_tbl t1,
    82508250
    lateral (select * from
    82518251
    (select * from int8_tbl t2
    8252-
    where q1 = any (select q2 from int8_tbl t3
    8252+
    where (q1, random() > 0) = any (select q2, random() > 0 from int8_tbl t3
    82538253
    where q2 = (select greatest(t1.q1,t2.q2))
    82548254
    and (select v.id=0)) offset 0) ss2) ss
    82558255
    where t1.q1 = ss.q2) ss0;

    src/test/regress/expected/subselect.out

    Lines changed: 126 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -1926,3 +1926,129 @@ select * from x for update;
    19261926
    Output: subselect_tbl.f1, subselect_tbl.f2, subselect_tbl.f3
    19271927
    (2 rows)
    19281928

    1929+
    -- Pull-up the direct-correlated ANY_SUBLINK
    1930+
    explain (costs off)
    1931+
    select * from tenk1 A where hundred in (select hundred from tenk2 B where B.odd = A.odd);
    1932+
    QUERY PLAN
    1933+
    ------------------------------------------------------------
    1934+
    Hash Join
    1935+
    Hash Cond: ((a.odd = b.odd) AND (a.hundred = b.hundred))
    1936+
    -> Seq Scan on tenk1 a
    1937+
    -> Hash
    1938+
    -> HashAggregate
    1939+
    Group Key: b.odd, b.hundred
    1940+
    -> Seq Scan on tenk2 b
    1941+
    (7 rows)
    1942+
    1943+
    explain (costs off)
    1944+
    select * from tenk1 A where exists
    1945+
    (select 1 from tenk2 B
    1946+
    where A.hundred in (select C.hundred FROM tenk2 C
    1947+
    WHERE c.odd = b.odd));
    1948+
    QUERY PLAN
    1949+
    ---------------------------------
    1950+
    Nested Loop Semi Join
    1951+
    Join Filter: (SubPlan 1)
    1952+
    -> Seq Scan on tenk1 a
    1953+
    -> Materialize
    1954+
    -> Seq Scan on tenk2 b
    1955+
    SubPlan 1
    1956+
    -> Seq Scan on tenk2 c
    1957+
    Filter: (odd = b.odd)
    1958+
    (8 rows)
    1959+
    1960+
    -- we should only try to pull up the sublink into RHS of a left join
    1961+
    -- but a.hundred is not avaiable.
    1962+
    explain (costs off)
    1963+
    SELECT * FROM tenk1 A LEFT JOIN tenk2 B
    1964+
    ON A.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
    1965+
    QUERY PLAN
    1966+
    ---------------------------------
    1967+
    Nested Loop Left Join
    1968+
    Join Filter: (SubPlan 1)
    1969+
    -> Seq Scan on tenk1 a
    1970+
    -> Materialize
    1971+
    -> Seq Scan on tenk2 b
    1972+
    SubPlan 1
    1973+
    -> Seq Scan on tenk2 c
    1974+
    Filter: (odd = b.odd)
    1975+
    (8 rows)
    1976+
    1977+
    -- we should only try to pull up the sublink into RHS of a left join
    1978+
    -- but a.odd is not avaiable for this.
    1979+
    explain (costs off)
    1980+
    SELECT * FROM tenk1 A LEFT JOIN tenk2 B
    1981+
    ON B.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = a.odd);
    1982+
    QUERY PLAN
    1983+
    ---------------------------------
    1984+
    Nested Loop Left Join
    1985+
    Join Filter: (SubPlan 1)
    1986+
    -> Seq Scan on tenk1 a
    1987+
    -> Materialize
    1988+
    -> Seq Scan on tenk2 b
    1989+
    SubPlan 1
    1990+
    -> Seq Scan on tenk2 c
    1991+
    Filter: (odd = a.odd)
    1992+
    (8 rows)
    1993+
    1994+
    -- should be able to pull up since all the references is available
    1995+
    explain (costs off)
    1996+
    SELECT * FROM tenk1 A LEFT JOIN tenk2 B
    1997+
    ON B.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
    1998+
    QUERY PLAN
    1999+
    ------------------------------------------------------------------------
    2000+
    Nested Loop Left Join
    2001+
    -> Seq Scan on tenk1 a
    2002+
    -> Materialize
    2003+
    -> Hash Join
    2004+
    Hash Cond: ((b.odd = c.odd) AND (b.hundred = c.hundred))
    2005+
    -> Seq Scan on tenk2 b
    2006+
    -> Hash
    2007+
    -> HashAggregate
    2008+
    Group Key: c.odd, c.hundred
    < 1CF5 /td>
    2009+
    -> Seq Scan on tenk2 c
    2010+
    (10 rows)
    2011+
    2012+
    -- we can pull up the sublink into the inner JoinExpr.
    2013+
    explain (costs off)
    2014+
    SELECT * FROM tenk1 A INNER JOIN tenk2 B
    2015+
    ON A.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
    2016+
    QUERY PLAN
    2017+
    -------------------------------------------------
    2018+
    Hash Join
    2019+
    Hash Cond: (c.odd = b.odd)
    2020+
    -> Hash Join
    2021+
    Hash Cond: (a.hundred = c.hundred)
    2022+
    -> Seq Scan on tenk1 a
    2023+
    -> Hash
    2024+
    -> HashAggregate
    2025+
    Group Key: c.odd, c.hundred
    2026+
    -> Seq Scan on tenk2 c
    2027+
    -> Hash
    2028+
    -> Seq Scan on tenk2 b
    2029+
    (11 rows)
    2030+
    2031+
    -- we can pull up the aggregate sublink into RHS of a left join.
    2032+
    explain (costs off)
    2033+
    SELECT * FROM tenk1 A LEFT JOIN tenk2 B
    2034+
    ON B.hundred in (SELECT min(c.hundred) FROM tenk2 C WHERE c.odd = b.odd);
    2035+
    QUERY PLAN
    2036+
    ---------------------------------------------------------------------------------------
    2037+
    Nested Loop Left Join
    2038+
    -> Seq Scan on tenk1 a
    2039+
    -> Materialize
    2040+
    -> Nested Loop
    2041+
    -> Seq Scan on tenk2 b
    2042+
    -> Memoize
    2043+
    Cache Key: b.hundred, b.odd
    2044+
    Cache Mode: binary
    2045+
    -> Subquery Scan on "ANY_subquery"
    2046+
    Filter: (b.hundred = "ANY_subquery".min)
    2047+
    -> Result
    2048+
    InitPlan 1 (returns $1)
    2049+
    -> Limit
    2050+
    -> Index Scan using tenk2_hundred on tenk2 c
    2051+
    Index Cond: (hundred IS NOT NULL)
    2052+
    Filter: (odd = b.odd)
    2053+
    (16 rows)
    2054+

    src/test/regress/sql/join.sql

    Lines changed: 4 additions & 4 deletions
    Original file line numberDiff line numberDiff line change
    @@ -1864,11 +1864,11 @@ reset enable_nestloop;
    18641864
    explain (costs off)
    18651865
    select a.unique1, b.unique2
    18661866
    from onek a left join onek b on a.unique1 = b.unique2
    1867-
    where b.unique2 = any (select q1 from int8_tbl c where c.q1 < b.unique1);
    1867+
    where (b.unique2, random() > 0) = any (select q1, random() > 0 from int8_tbl c where c.q1 < b.unique1);
    18681868

    18691869
    select a.unique1, b.unique2
    18701870
    from onek a left join onek b on a.unique1 = b.unique2
    1871-
    where b.unique2 = any (select q1 from int8_tbl c where c.q1 < b.unique1);
    1871+
    where (b.unique2, random() > 0) = any (select q1, random() > 0 from int8_tbl c where c.q1 < b.unique1);
    18721872

    18731873
    --
    18741874
    -- test full-join strength reduction
    @@ -3038,7 +3038,7 @@ select * from (values (0), (1)) v(id),
    30383038
    lateral (select * from int8_tbl t1,
    30393039
    lateral (select * from
    30403040
    (select * from int8_tbl t2
    3041-
    where q1 = any (select q2 from int8_tbl t3
    3041+
    where (q1, random() > 0) = any (select q2, random() > 0 from int8_tbl t3
    30423042
    where q2 = (select greatest(t1.q1,t2.q2))
    30433043
    and (select v.id=0)) offset 0) ss2) ss
    30443044
    where t1.q1 = ss.q2) ss0;
    @@ -3047,7 +3047,7 @@ select * from (values (0), (1)) v(id),
    30473047
    lateral (select * from int8_tbl t1,
    30483048
    lateral (select * from
    30493049
    (select * from int8_tbl t2
    3050-
    where q1 = any (select q2 from int8_tbl t3
    3050+
    where (q1, random() > 0) = any (select q2, random() > 0 from int8_tbl t3
    30513051
    where q2 = (select greatest(t1.q1,t2.q2))
    30523052
    and (select v.id=0)) offset 0) ss2) ss
    30533053
    where t1.q1 = ss.q2) ss0;

    src/test/regress/sql/subselect.sql

    Lines changed: 37 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -968,3 +968,40 @@ select * from (with x as (select 2 as y) select * from x) ss;
    968968
    explain (verbose, costs off)
    969969
    with x as (select * from subselect_tbl)
    970970
    select * from x for update;
    971+
    972+
    -- Pull-up the direct-correlated ANY_SUBLINK
    973+
    explain (costs off)
    974+
    select * from tenk1 A where hundred in (select hundred from tenk2 B where B.odd = A.odd);
    975+
    976+
    explain (costs off)
    977+
    select * from tenk1 A where exists
    978+
    (select 1 from tenk2 B
    979+
    where A.hundred in (select C.hundred FROM tenk2 C
    980+
    WHERE c.odd = b.odd));
    981+
    982+
    -- we should only try to pull up the sublink into RHS of a left join
    983+
    -- but a.hundred is not avaiable.
    984+
    explain (costs off)
    985+
    SELECT * FROM tenk1 A LEFT JOIN tenk2 B
    986+
    ON A.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
    987+
    988+
    -- we should only try to pull up the sublink into RHS of a left join
    989+
    -- but a.odd is not avaiable for this.
    990+
    explain (costs off)
    991+
    SELECT * FROM tenk1 A LEFT JOIN tenk2 B
    992+
    ON B.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = a.odd);
    993+
    994+
    -- should be able to pull up since all the references is available
    995+
    explain (costs off)
    996+
    SELECT * FROM tenk1 A LEFT JOIN tenk2 B
    997+
    ON B.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
    998+
    999+
    -- we can pull up the sublink into the inner JoinExpr.
    1000+
    explain (costs off)
    1001+
    SELECT * FROM tenk1 A INNER JOIN tenk2 B
    1002+
    ON A.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
    1003+
    1004+
    -- we can pull up the aggregate sublink into RHS of a left join.
    1005+
    explain (costs off)
    1006+
    SELECT * FROM tenk1 A LEFT JOIN tenk2 B
    1007+
    ON B.hundred in (SELECT min(c.hundred) FROM tenk2 C WHERE c.odd = b.odd);

    0 commit comments

    Comments
     (0)
    0