88 *
99 *
1010 * IDENTIFICATION
11- * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.182 2005/04/06 16:34:05 tgl Exp $
11+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.183 2005/04/10 19:50:08 tgl Exp $
1212 *
1313 *-------------------------------------------------------------------------
1414 */
@@ -58,6 +58,10 @@ static Node *preprocess_expression(Query *parse, Node *expr, int kind);
5858static void preprocess_qual_conditions (Query * parse , Node * jtnode );
5959static Plan * inheritance_planner (Query * parse , List * inheritlist );
6060static Plan * grouping_planner (Query * parse , double tuple_fraction );
61+ static bool choose_hashed_grouping (Query * parse , double tuple_fraction ,
62+ Path * cheapest_path , Path * sorted_path ,
63+ List * sort_pathkeys , List * group_pathkeys ,
64+ double dNumGroups , AggClauseCounts * agg_counts );
6165static bool hash_safe_grouping (Query * parse );
6266static List * make_subplanTargetList (Query * parse , List * tlist ,
6367 AttrNumber * * groupColIdx , bool * need_tlist_eval );
@@ -920,34 +924,25 @@ grouping_planner(Query *parse, double tuple_fraction)
920924 sort_pathkeys = canonicalize_pathkeys (parse , sort_pathkeys );
921925
922926 /*
923- * Consider whether we might want to use hashed grouping.
927+ * If grouping, estimate the number of groups. (We can't do this
928+ * until after running query_planner(), either.) Then decide
929+ * whether we want to use hashed grouping.
924930 */
925931 if (parse -> groupClause )
926932 {
927933 List * groupExprs ;
928934 double cheapest_path_rows ;
929- int cheapest_path_width ;
930935
931936 /*
932- * Beware in this section of the possibility that
933- * cheapest_path->parent is NULL. This could happen if user
934- * does something silly like SELECT 'foo' GROUP BY 1;
937+ * Beware of the possibility that cheapest_path->parent is NULL.
938+ * This could happen if user does something silly like
939+ * SELECT 'foo' GROUP BY 1;
935940 */
936941 if (cheapest_path -> parent )
937- {
938942 cheapest_path_rows = cheapest_path -> parent -> rows ;
939- cheapest_path_width = cheapest_path -> parent -> width ;
940- }
941943 else
942- {
943944 cheapest_path_rows = 1 ; /* assume non-set result */
944- cheapest_path_width = 100 ; /* arbitrary */
945- }
946945
947- /*
948- * Always estimate the number of groups. We can't do this
949- * until after running query_planner(), either.
950- */
951946 groupExprs = get_sortgrouplist_exprs (parse -> groupClause ,
952947 parse -> targetList );
953948 dNumGroups = estimate_num_groups (parse ,
@@ -956,130 +951,11 @@ grouping_planner(Query *parse, double tuple_fraction)
956951 /* Also want it as a long int --- but 'ware overflow! */
957952 numGroups = (long ) Min (dNumGroups , (double ) LONG_MAX );
958953
959- /*
960- * Check can't-do-it conditions, including whether the
961- * grouping operators are hashjoinable.
962- *
963- * Executor doesn't support hashed aggregation with DISTINCT
964- * aggregates. (Doing so would imply storing *all* the input
965- * values in the hash table, which seems like a certain
966- * loser.)
967- */
968- if (!enable_hashagg || !hash_safe_grouping (parse ))
969- use_hashed_grouping = false;
970- else if (agg_counts .numDistinctAggs != 0 )
971- use_hashed_grouping = false;
972- else
973- {
974- /*
975- * Use hashed grouping if (a) we think we can fit the
976- * hashtable into work_mem, *and* (b) the estimated cost
977- * is no more than doing it the other way. While avoiding
978- * the need for sorted input is usually a win, the fact
979- * that the output won't be sorted may be a loss; so we
980- * need to do an actual cost comparison.
981- */
982- Size hashentrysize ;
983-
984- /* Estimate per-hash-entry space at tuple width... */
985- hashentrysize = cheapest_path_width ;
986- /* plus space for pass-by-ref transition values... */
987- hashentrysize += agg_counts .transitionSpace ;
988- /* plus the per-hash-entry overhead */
989- hashentrysize += hash_agg_entry_size (agg_counts .numAggs );
990-
991- if (hashentrysize * dNumGroups <= work_mem * 1024L )
992- {
993- /*
994- * Okay, do the cost comparison. We need to consider
995- * cheapest_path + hashagg [+ final sort] versus
996- * either cheapest_path [+ sort] + group or agg [+
997- * final sort] or presorted_path + group or agg [+
998- * final sort] where brackets indicate a step that may
999- * not be needed. We assume query_planner() will have
1000- * returned a presorted path only if it's a winner
1001- * compared to cheapest_path for this purpose.
1002- *
1003- * These path variables are dummies that just hold cost
1004- * fields; we don't make actual Paths for these steps.
1005- */
1006- Path hashed_p ;
1007- Path sorted_p ;
1008-
1009- cost_agg (& hashed_p , parse ,
1010- AGG_HASHED , agg_counts .numAggs ,
1011- numGroupCols , dNumGroups ,
1012- cheapest_path -> startup_cost ,
1013- cheapest_path -> total_cost ,
1014- cheapest_path_rows );
1015- /* Result of hashed agg is always unsorted */
1016- if (sort_pathkeys )
1017- cost_sort (& hashed_p , parse , sort_pathkeys ,
1018- hashed_p .total_cost ,
1019- dNumGroups ,
1020- cheapest_path_width );
1021-
1022- if (sorted_path )
1023- {
1024- sorted_p .startup_cost = sorted_path -> startup_cost ;
1025- sorted_p .total_cost = sorted_path -> total_cost ;
1026- current_pathkeys = sorted_path -> pathkeys ;
1027- }
1028- else
1029- {
1030- sorted_p .startup_cost = cheapest_path -> startup_cost ;
1031- sorted_p .total_cost = cheapest_path -> total_cost ;
1032- current_pathkeys = cheapest_path -> pathkeys ;
1033- }
1034- if (!pathkeys_contained_in (group_pathkeys ,
1035- current_pathkeys ))
1036- {
1037- cost_sort (& sorted_p , parse , group_pathkeys ,
1038- sorted_p .total_cost ,
1039- cheapest_path_rows ,
1040- cheapest_path_width );
1041- current_pathkeys = group_pathkeys ;
1042- }
1043- if (parse -> hasAggs )
1044- cost_agg (& sorted_p , parse ,
1045- AGG_SORTED , agg_counts .numAggs ,
1046- numGroupCols , dNumGroups ,
1047- sorted_p .startup_cost ,
1048- sorted_p .total_cost ,
1049- cheapest_path_rows );
1050- else
1051- cost_group (& sorted_p , parse ,
1052- numGroupCols , dNumGroups ,
1053- sorted_p .startup_cost ,
1054- sorted_p .total_cost ,
1055- cheapest_path_rows );
1056- /* The Agg or Group node will preserve ordering */
1057- if (sort_pathkeys &&
1058- !pathkeys_contained_in (sort_pathkeys ,
1059- current_pathkeys ))
1060- {
1061- cost_sort (& sorted_p , parse , sort_pathkeys ,
1062- sorted_p .total_cost ,
1063- dNumGroups ,
1064- cheapest_path_width );
1065- }
1066-
1067- /*
1068- * Now make the decision using the top-level tuple
1069- * fraction. First we have to convert an absolute
1070- * count (LIMIT) into fractional form.
1071- */
1072- if (tuple_fraction >= 1.0 )
1073- tuple_fraction /= dNumGroups ;
1074-
1075- if (compare_fractional_path_costs (& hashed_p , & sorted_p ,
1076- tuple_fraction ) < 0 )
1077- {
1078- /* Hashed is cheaper, so use it */
1079- use_hashed_grouping = true;
1080- }
1081- }
1082- }
954+ use_hashed_grouping =
955+ choose_hashed_grouping (parse , tuple_fraction ,
956+ cheapest_path , sorted_path ,
957+ sort_pathkeys , group_pathkeys ,
958+ dNumGroups , & agg_counts );
1083959 }
1084960
1085961 /*
@@ -1331,6 +1207,146 @@ grouping_planner(Query *parse, double tuple_fraction)
13311207 return result_plan ;
13321208}
13331209
1210+ /*
1211+ * choose_hashed_grouping - should we use hashed grouping?
1212+ */
1213+ static bool
1214+ choose_hashed_grouping (Query * parse , double tuple_fraction ,
1215+ Path * cheapest_path , Path * sorted_path ,
1216+ List * sort_pathkeys , List * group_pathkeys ,
1217+ double dNumGroups , AggClauseCounts * agg_counts )
1218+ {
1219+ int numGroupCols = list_length (parse -> groupClause );
1220+ double cheapest_path_rows ;
1221+ int cheapest_path_width ;
1222+ Size hashentrysize ;
1223+ List * current_pathkeys ;
1224+ Path hashed_p ;
1225+ Path sorted_p ;
1226+
1227+ /*
1228+ * Check can't-do-it conditions, including whether the grouping operators
1229+ * are hashjoinable.
1230+ *
1231+ * Executor doesn't support hashed aggregation with DISTINCT aggregates.
1232+ * (Doing so would imply storing *all* the input values in the hash table,
1233+ * which seems like a certain loser.)
1234+ */
1235+ if (!enable_hashagg )
1236+ return false;
1237+ if (agg_counts -> numDistinctAggs != 0 )
1238+ return false;
1239+ if (!hash_safe_grouping (parse ))
1240+ return false;
1241+
1242+ /*
1243+ * Don't do it if it doesn't look like the hashtable will fit into
1244+ * work_mem.
1245+ *
1246+ * Beware here of the possibility that cheapest_path->parent is NULL.
1247+ * This could happen if user does something silly like
1248+ * SELECT 'foo' GROUP BY 1;
1249+ */
1250+ if (cheapest_path -> parent )
1251+ {
1252+ cheapest_path_rows = cheapest_path -> parent -> rows ;
1253+ cheapest_path_width = cheapest_path -> parent -> width ;
1254+ }
1255+ else
1256+ {
1257+ cheapest_path_rows = 1 ; /* assume non-set result */
1258+ cheapest_path_width = 100 ; /* arbitrary */
1259+ }
1260+
1261+ /* Estimate per-hash-entry space at tuple width... */
1262+ hashentrysize = cheapest_path_width ;
1263+ /* plus space for pass-by-ref transition values... */
1264+ hashentrysize += agg_counts -> transitionSpace ;
1265+ /* plus the per-hash-entry overhead */
1266+ hashentrysize += hash_agg_entry_size (agg_counts -> numAggs );
1267+
1268+ if (hashentrysize * dNumGroups > work_mem * 1024L )
1269+ return false;
1270+
1271+ /*
1272+ * See if the estimated cost is no more than doing it the other way.
1273+ * While avoiding the need for sorted input is usually a win, the fact
1274+ * that the output won't be sorted may be a loss; so we need to do an
1275+ * actual cost comparison.
1276+ *
1277+ * We need to consider
1278+ * cheapest_path + hashagg [+ final sort]
1279+ * versus either
1280+ * cheapest_path [+ sort] + group or agg [+ final sort]
1281+ * or
1282+ * presorted_path + group or agg [+ final sort]
1283+ * where brackets indicate a step that may not be needed. We assume
1284+ * query_planner() will have returned a presorted path only if it's a
1285+ * winner compared to cheapest_path for this purpose.
1286+ *
1287+ * These path variables are dummies that just hold cost fields; we don't
1288+ * make actual Paths for these steps.
1289+ */
1290+ cost_agg (& hashed_p , parse , AGG_HASHED , agg_counts -> numAggs ,
1291+ numGroupCols , dNumGroups ,
1292+ cheapest_path -> startup_cost , cheapest_path -> total_cost ,
1293+ cheapest_path_rows );
1294+ /* Result of hashed agg is always unsorted */
1295+ if (sort_pathkeys )
1296+ cost_sort (& hashed_p , parse , sort_pathkeys , hashed_p .total_cost ,
1297+ dNumGroups , cheapest_path_width );
1298+
1299+ if (sorted_path )
1300+ {
C46B
tr>1301+ sorted_p .startup_cost = sorted_path -> startup_cost ;
1302+ sorted_p .total_cost = sorted_path -> total_cost ;
1303+ current_pathkeys = sorted_path -> pathkeys ;
1304+ }
1305+ else
1306+ {
1307+ sorted_p .startup_cost = cheapest_path -> startup_cost ;
1308+ sorted_p .total_cost = cheapest_path -> total_cost ;
1309+ current_pathkeys = cheapest_path -> pathkeys ;
1310+ }
1311+ if (!pathkeys_contained_in (group_pathkeys ,
1312+ current_pathkeys ))
1313+ {
1314+ cost_sort (& sorted_p , parse , group_pathkeys , sorted_p .total_cost ,
1315+ cheapest_path_rows , cheapest_path_width );
1316+ current_pathkeys = group_pathkeys ;
1317+ }
1318+
1319+ if (parse -> hasAggs )
1320+ cost_agg (& sorted_p , parse , AGG_SORTED , agg_counts -> numAggs ,
1321+ numGroupCols , dNumGroups ,
1322+ sorted_p .startup_cost , sorted_p .total_cost ,
1323+ cheapest_path_rows );
1324+ else
1325+ cost_group (& sorted_p , parse , numGroupCols , dNumGroups ,
1326+ sorted_p .startup_cost , sorted_p .total_cost ,
1327+ cheapest_path_rows );
1328+ /* The Agg or Group node will preserve ordering */
1329+ if (sort_pathkeys &&
1330+ !pathkeys_contained_in (sort_pathkeys , current_pathkeys ))
1331+ cost_sort (& sorted_p , parse , sort_pathkeys , sorted_p .total_cost ,
1332+ dNumGroups , cheapest_path_width );
1333+
1334+ /*
1335+ * Now make the decision using the top-level tuple fraction. First we
1336+ * have to convert an absolute count (LIMIT) into fractional form.
1337+ */
1338+ if (tuple_fraction >= 1.0 )
1339+ tuple_fraction /= dNumGroups ;
1340+
1341+ if (compare_fractional_path_costs (& hashed_p , & sorted_p ,
1342+ tuple_fraction ) < 0 )
1343+ {
1344+ /* Hashed is cheaper, so use it */
1345+ return true;
1346+ }
1347+ return false;
1348+ }
1349+
13341350/*
13351351 * hash_safe_grouping - are grouping operators hashable?
13361352 *
0 commit comments