8000 Fix planner to pass correct collation to operator selectivity estimat… · jcsston/postgres@eb1b488 · GitHub
[go: up one dir, main page]

Skip to content

Commit eb1b488

Browse files
committed
Fix planner to pass correct collation to operator selectivity estimators.
We can do this without creating an API break for estimation functions by passing the collation using the existing fmgr functionality for passing an input collation as a hidden parameter. The need for this was foreseen at the outset, but we didn't get around to making it happen in 9.1 because of the decision to sort all pg_statistic histograms according to the database's default collation. That meant that selectivity estimators generally need to use the default collation too, even if they're estimating for an operator that will do something different. The reason it's suddenly become more interesting is that regexp interpretation also uses a collation (for its LC_TYPE not LC_COLLATE property), and we no longer want to use the wrong collation when examining regexps during planning. It's not that the selectivity estimate is likely to change much from this; rather that we are thinking of caching compiled regexps during planner estimation, and we won't get the intended benefit if we cache them with a different collation than the executor will use. Back-patch to 9.1, both because the regexp change is likely to get back-patched and because we might as well get this right in all collation-supporting branches, in case any third-party code wants to rely on getting the collation. The patch turns out to be minuscule now that I've done it ...
1 parent 3295387 commit eb1b488

File tree

4 files changed

+83
-51
lines changed

4 files changed

+83
-51
lines changed

src/backend/optimizer/path/clausesel.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,7 @@ clause_selectivity(PlannerInfo *root,
578578
list_make2(var,
579579
makeBoolConst(true,
580580
false)),
581+
InvalidOid,
581582
varRelid);
582583
}
583584
}
@@ -649,21 +650,24 @@ clause_selectivity(PlannerInfo *root,
649650
}
650651
else if (is_opclause(clause) || IsA(clause, DistinctExpr))
651652
{
652-
Oid opno = ((OpExpr *) clause)->opno;
653+
OpExpr *opclause = (OpExpr *) clause;
654+
Oid opno = opclause->opno;
653655

654656
if (treat_as_join_clause(clause, rinfo, varRelid, sjinfo))
655657
{
656658
/* Estimate selectivity for a join clause. */
657659
s1 = join_selectivity(root, opno,
658-
((OpExpr *) clause)->args,
660+
opclause->args,
661+
opclause->inputcollid,
659662
jointype,
660663
sjinfo);
661664
}
662665
else
663666
{
664667
/* Estimate selectivity for a restriction clause. */
665668
s1 = restriction_selectivity(root, opno,
666-
((OpExpr *) clause)->args,
669+
opclause->args,
670+
opclause->inputcollid,
667671
varRelid);
668672
}
669673

src/backend/optimizer/util/plancat.c

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1010,6 +1010,7 @@ Selectivity
10101010
restriction_selectivity(PlannerInfo *root,
10111011
Oid operatorid,
10121012
List *args,
1013+
Oid inputcollid,
10131014
int varRelid)
10141015
{
10151016
RegProcedure oprrest = get_oprrest(operatorid);
@@ -1022,11 +1023,12 @@ restriction_selectivity(PlannerInfo *root,
10221023
if (!oprrest)
10231024
return (Selectivity) 0.5;
10241025

1025-
result = DatumGetFloat8(OidFunctionCall4(oprrest,
1026-
PointerGetDatum(root),
1027-
ObjectIdGetDatum(operatorid),
1028-
PointerGetDatum(args),
1029-
Int32GetDatum(varRelid)));
1026+
result = DatumGetFloat8(OidFunctionCall4Coll(oprrest,
1027+
inputcollid,
1028+
PointerGetDatum(root),
1029+
ObjectIdGetDatum(operatorid),
1030+
PointerGetDatum(args),
1031+
Int32GetDatum(varRelid)));
10301032

10311033
if (result < 0.0 || result > 1.0)
10321034
elog(ERROR, "invalid restriction selectivity: %f", result);
@@ -1045,6 +1047,7 @@ Selectivity
10451047
join_selectivity(PlannerInfo *root,
10461048
Oid operatorid,
10471049
List *args,
1050+
Oid inputcollid,
10481051
JoinType jointype,
10491052
SpecialJoinInfo *sjinfo)
10501053
{
@@ -1058,12 +1061,13 @@ join_selectivity(PlannerInfo *root,
10581061
if (!oprjoin)
10591062
return (Selectivity) 0.5;
10601063

1061-
result = DatumGetFloat8(OidFunctionCall5(oprjoin,
1062-
PointerGetDatum(root),
1063-
ObjectIdGetDatum(operatorid),
1064-
PointerGetDatum(args),
1065-
Int16GetDatum(jointype),
1066-
PointerGetDatum(sjinfo)));
1064+
result = DatumGetFloat8(OidFunctionCall5Coll(oprjoin,
1065+
inputcollid,
1066+
PointerGetDatum(root),
1067+
ObjectIdGetDatum(operatorid),
1068+
PointerGetDatum(args),
1069+
Int16GetDatum(jointype),
1070+
PointerGetDatum(sjinfo)));
10671071

10681072
if (result < 0.0 || result > 1.0)
10691073
elog(ERROR, "invalid join selectivity: %f", result);

src/backend/utils/adt/selfuncs.c

Lines changed: 59 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,15 @@
8383
* joins, however, the selectivity is defined as the fraction of the left-hand
8484
* side relation's rows that are expected to have a match (ie, at least one
8585
* row with a TRUE result) in the right-hand side.
86+
*
87+
* For both oprrest and oprjoin functions, the operator's input collation OID
88+
* (if any) is passed using the standard fmgr mechanism, so that the estimator
89+
* function can fetch it with PG_GET_COLLATION(). Note, however, that all
90+
* statistics in pg_statistic are currently built using the database's default
91+
* collation. Thus, in most cases where we are looking at statistics, we
92+
* should ignore the actual operator collation and use DEFAULT_COLLATION_OID.
93+
* We expect that the error induced by doing this is usually not large enough
94+
* to justify complicating matters.
8695
*----------
8796
*/
8897

@@ -1097,6 +1106,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
10971106
Oid operator = PG_GETARG_OID(1);
10981107
List *args = (List *) PG_GETARG_POINTER(2);
10991108
int varRelid = PG_GETARG_INT32(3);
1109+
Oid collation = PG_GET_COLLATION();
11001110
VariableStatData vardata;
11011111
Node *other;
11021112
bool varonleft;
@@ -1197,12 +1207,15 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
11971207
}
11981208

11991209
/*
1200-
* Divide pattern into fixed prefix and remainder. XXX we have to assume
1201-
* default collation here, because we don't have access to the actual
1202-
* input collation for the operator. FIXME ...
1210+
* Divide pattern into fixed prefix and remainder. Unlike many of the
1211+
* other functions in this file, we use the pattern operator's actual
1212+
* collation for this step. This is not because we expect the collation
1213+
* to make a big difference in the selectivity estimate (it seldom would),
1214+
* but because we want to be sure we cache compiled regexps under the
1215+
* right cache key, so that they can be re-used at runtime.
12031216
*/
12041217
patt = (Const *) other;
1205-
pstatus = pattern_fixed_prefix(patt, ptype, DEFAULT_COLLATION_OID,
1218+
pstatus = pattern_fixed_prefix(patt, ptype, collation,
12061219
&prefix, &rest);
12071220

12081221
/*
@@ -1847,18 +1860,20 @@ scalararraysel(PlannerInfo *root,
18471860
elem_nulls[i],
18481861
elmbyval));
18491862
if (is_join_clause)
1850-
s2 = DatumGetFloat8(FunctionCall5(&oprselproc,
1851-
PointerGetDatum(root),
1852-
ObjectIdGetDatum(operator),
1853-
PointerGetDatum(args),
1854-
Int16GetDatum(jointype),
1855-
PointerGetDatum(sjinfo)));
1863+
s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1864+
clause->inputcollid,
1865+
PointerGetDatum(root),
1866+
ObjectIdGetDatum(operator),
1867+
PointerGetDatum(args),
1868+
Int16GetDatum(jointype),
1869+
PointerGetDatum(sjinfo)));
18561870
else
1857-
s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
1858-
PointerGetDatum(root),
1859-
ObjectIdGetDatum(operator),
1860-
PointerGetDatum(args),
1861-
Int32GetDatum(varRelid)));
1871+
s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1872+
clause->inputcollid,
1873+
PointerGetDatum(root),
1874+
ObjectIdGetDatum(operator),
1875+
PointerGetDatum(args),
1876+
Int32GetDatum(varRelid)));
18621877

18631878
if (useOr)
18641879
{
@@ -1912,18 +1927,20 @@ scalararraysel(PlannerInfo *root,
19121927
*/
19131928
args = list_make2(leftop, elem);
19141929
if (is_join_clause)
1915-
s2 = DatumGetFloat8(FunctionCall5(&oprselproc,
1916-
PointerGetDatum(root),
1917-
ObjectIdGetDatum(operator),
1918-
PointerGetDatum(args),
1919-
Int16GetDatum(jointype),
1920-
PointerGetDatum(sjinfo)));
1930+
s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1931+
clause->inputcollid,
1932+
PointerGetDatum(root),
1933+
ObjectIdGetDatum(operator),
1934+
PointerGetDatum(args),
1935+
Int16GetDatum(jointype),
1936+
PointerGetDatum(sjinfo)));
19211937
else
1922-
s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
1923-
PointerGetDatum(root),
1924-
ObjectIdGetDatum(operator),
1925-
PointerGetDatum(args),
1926-
Int32GetDatum(varRelid)));
1938+
s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1939+
clause->inputcollid,
1940+
PointerGetDatum(root),
1941+
ObjectIdGetDatum(operator),
1942+
PointerGetDatum(args),
1943+
Int32GetDatum(varRelid)));
19271944

19281945
if (useOr)
19291946
{
@@ -1962,18 +1979,20 @@ scalararraysel(PlannerInfo *root,
19621979
dummyexpr->collation = clause->inputcollid;
19631980
args = list_make2(leftop, dummyexpr);
19641981
if (is_join_clause)
1965-
s2 = DatumGetFloat8(FunctionCall5(&oprselproc,
1966-
PointerGetDatum(root),
1967-
ObjectIdGetDatum(operator),
1968-
PointerGetDatum(args),
1969-
Int16GetDatum(jointype),
1970-
PointerGetDatum(sjinfo)));
1982+
s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1983+
clause->inputcollid,
1984+
PointerGetDatum(root),
1985+
ObjectIdGetDatum(operator),
1986+
PointerGetDatum(args),
1987+
Int16GetDatum(jointype),
1988+
PointerGetDatum(sjinfo)));
19711989
else
1972-
s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
1973-
PointerGetDatum(root),
1974-
ObjectIdGetDatum(operator),
1975-
PointerGetDatum(args),
1976-
Int32GetDatum(varRelid)));
1990+
s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1991+
clause->inputcollid,
1992+
PointerGetDatum(root),
1993+
ObjectIdGetDatum(operator),
1994+
PointerGetDatum(args),
1995+
Int32GetDatum(varRelid)));
19771996
s1 = useOr ? 0.0 : 1.0;
19781997

19791998
/*
@@ -2046,6 +2065,7 @@ rowcomparesel(PlannerInfo *root,
20462065
{
20472066
Selectivity s1;
20482067
Oid opno = linitial_oid(clause->opnos);
2068+
Oid inputcollid = linitial_oid(clause->inputcollids);
20492069
List *opargs;
20502070
bool is_join_clause;
20512071

@@ -2086,6 +2106,7 @@ rowcomparesel(PlannerInfo *root,
20862106
/* Estimate selectivity for a join clause. */
20872107
s1 = join_selectivity(root, opno,
20882108
opargs,
2109+
inputcollid,
20892110
jointype,
20902111
sjinfo);
20912112
}
@@ -2094,6 +2115,7 @@ rowcomparesel(PlannerInfo *root,
20942115
/* Estimate selectivity for a restriction clause. */
20952116
s1 = restriction_selectivity(root, opno,
20962117
opargs,
2118+
inputcollid,
20972119
varRelid);
20982120
}
20992121

src/include/optimizer/plancat.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,13 @@ extern bool has_unique_index(RelOptInfo *rel, AttrNumber attno);
4343
extern Selectivity restriction_selectivity(PlannerInfo *root,
4444
Oid operatorid,
4545
List *args,
46+
Oid inputcollid,
4647
int varRelid);
4748

4849
extern Selectivity join_selectivity(PlannerInfo *root,
4950
Oid operatorid,
5051
List *args,
52+
Oid inputcollid,
5153
JoinType jointype,
5254
SpecialJoinInfo *sjinfo);
5355

0 commit comments

Comments
 (0)
0