Heikki Linnakangas írta: > On 26.10.2010 18:34, Boszormenyi Zoltan wrote: >> thank you very much for pointing me to dynahash, here is the >> next version that finally seems to work. >> >> Two patches are attached, the first is the absolute minimum for >> making it work, this still has the Tree type for canon_pathkeys >> and eq_classes got the same treatment as join_rel_list/join_rel_hash >> has in the current sources: if the list grows larger than 32, a hash >> table >> is created. It seems to be be enough for doing in for >> get_eclass_for_sort_expr() >> only, the other users of eq_classes aren't bothered by this change. > > That's better, but can't you use dynahash for canon_pathkeys as well?
Here's a purely dynahash solution. It's somewhat slower than the tree version, 0.45 vs 0.41 seconds in the cached case for the previously posted test case. Best regards, Zoltán Böszörményi -- ---------------------------------- Zoltán Böszörményi Cybertec Schönig & Schönig GmbH Gröhrmühlgasse 26 A-2700 Wiener Neustadt, Austria Web: http://www.postgresql-support.de http://www.postgresql.at/
diff -durpN postgresql.orig/src/backend/optimizer/path/equivclass.c postgresql.1/src/backend/optimizer/path/equivclass.c --- postgresql.orig/src/backend/optimizer/path/equivclass.c 2010-10-15 10:31:47.000000000 +0200 +++ postgresql.1/src/backend/optimizer/path/equivclass.c 2010-10-26 17:01:57.000000000 +0200 @@ -24,6 +24,7 @@ #include "optimizer/planmain.h" #include "optimizer/prep.h" #include "optimizer/var.h" +#include "utils/hsearch.h" #include "utils/lsyscache.h" @@ -360,75 +361,103 @@ add_eq_member(EquivalenceClass *ec, Expr /* - * get_eclass_for_sort_expr - * Given an expression and opfamily info, find an existing equivalence - * class it is a member of; if none, build a new single-member - * EquivalenceClass for it. - * - * sortref is the SortGroupRef of the originating SortGroupClause, if any, - * or zero if not. (It should never be zero if the expression is volatile!) - * - * This can be used safely both before and after EquivalenceClass merging; - * since it never causes merging it does not invalidate any existing ECs - * or PathKeys. - * - * Note: opfamilies must be chosen consistently with the way - * process_equivalence() would do; that is, generated from a mergejoinable - * equality operator. Else we might fail to detect valid equivalences, - * generating poor (but not incorrect) plans. + * eq_classes_match - matching function for eq_classes_hash in PlannerInfo */ -EquivalenceClass * -get_eclass_for_sort_expr(PlannerInfo *root, - Expr *expr, - Oid expr_datatype, - List *opfamilies, - Index sortref) +static int +eq_classes_match(const void *key1, const void *key2, Size keysize) { - EquivalenceClass *newec; - EquivalenceMember *newem; + EquivalenceClass *ec1 = (EquivalenceClass *) key1; /* this is in the hashtable */ + EquivalenceClass *ec2 = (EquivalenceClass *) key2; /* this is the new matched entry */ ListCell *lc1; - MemoryContext oldcontext; + ListCell *lc2; /* - * Scan through the existing EquivalenceClasses for a match + * Never match to a volatile EC, except when we are looking at another + * reference to the same volatile SortGroupClause. */ - foreach(lc1, root->eq_classes) + if (ec1->ec_has_volatile && + (ec2->ec_sortref == 0 || ec2->ec_sortref != ec1->ec_sortref)) + return 1; + + if (!equal(ec1->ec_opfamilies, ec2->ec_opfamilies)) + return 1; + + foreach(lc1, ec1->ec_members) { - EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1); - ListCell *lc2; + EquivalenceMember *em1 = (EquivalenceMember *) lfirst(lc1); /* - * Never match to a volatile EC, except when we are looking at another - * reference to the same volatile SortGroupClause. + * If below an outer join, don't match constants: they're not as + * constant as they look. */ - if (cur_ec->ec_has_volatile && - (sortref == 0 || sortref != cur_ec->ec_sortref)) - continue; - - if (!equal(opfamilies, cur_ec->ec_opfamilies)) + if (ec1->ec_below_outer_join && + em1->em_is_const) continue; - foreach(lc2, cur_ec->ec_members) + foreach(lc2, ec2->ec_members) { - EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); - - /* - * If below an outer join, don't match constants: they're not as - * constant as they look. - */ - if (cur_ec->ec_below_outer_join && - cur_em->em_is_const) - continue; + EquivalenceMember *em2 = (EquivalenceMember *) lfirst(lc2); - if (expr_datatype == cur_em->em_datatype && - equal(expr, cur_em->em_expr)) - return cur_ec; /* Match! */ + if (em1->em_datatype == em2->em_datatype && + equal(em1->em_expr, em2->em_expr)) + return 0; } } + return 1; +} + + +/* + * build_eq_classes_hash + * Build the initial contents of PlannerInfo.eq_classes_hash + * for faster search in PlannerInfo.eq_classes. This is used + * to make get_eclass_for_sort_expr() faster for large + * inheritance trees. + */ +static void +build_eq_classes_hash(PlannerInfo *root) +{ + MemoryContext oldcontext; + HASHCTL info; + + ListCell *lc; + + info.match = eq_classes_match; + info.hcxt = root->planner_cxt; + info.keysize = sizeof(Relids); + info.entrysize = sizeof(EquivalenceClass); + + oldcontext = MemoryContextSwitchTo(root->planner_cxt); + + root->eq_classes_hash = hash_create("eq_classes", 2048, &info, + HASH_ELEM | HASH_COMPARE | HASH_CONTEXT); + + foreach(lc, root->eq_classes) + { + EquivalenceClass *ec = lfirst(lc); + bool found; + + hash_search_with_hash_value(root->eq_classes_hash, ec, + bms_hash_value(ec->ec_relids), + HASH_ENTER, &found); + Assert(!found); + } +} + + +static EquivalenceClass * +build_new_ec(PlannerInfo *root, + Expr *expr, + Oid expr_datatype, + List *opfamilies, + Index sortref) +{ + MemoryContext oldcontext; + EquivalenceClass *newec; + EquivalenceMember *newem; + /* - * No match, so build a new single-member EC - * * Here, we must be sure that we construct the EC in the right context. We * can assume, however, that the passed expr is long-lived. */ @@ -471,13 +500,132 @@ get_eclass_for_sort_expr(PlannerInfo *ro } } - root->eq_classes = lappend(root->eq_classes, newec); - MemoryContextSwitchTo(oldcontext); return newec; } +/* + * get_eclass_for_sort_expr + * Given an expression and opfamily info, find an existing equivalence + * class it is a member of; if none, build a new single-member + * EquivalenceClass for it. + * + * sortref is the SortGroupRef of the originating SortGroupClause, if any, + * or zero if not. (It should never be zero if the expression is volatile!) + * + * This can be used safely both before and after EquivalenceClass merging; + * since it never causes merging it does not invalidate any existing ECs + * or PathKeys. + * + * Note: opfamilies must be chosen consistently with the way + * process_equivalence() would do; that is, generated from a mergejoinable + * equality operator. Else we might fail to detect valid equivalences, + * generating poor (but not incorrect) plans. + */ +EquivalenceClass * +get_eclass_for_sort_expr(PlannerInfo *root, + Expr *expr, + Oid expr_datatype, + List *opfamilies, + Index sortref) +{ + EquivalenceClass *newec; + ListCell *lc1; + MemoryContext oldcontext; + + if (root->eq_classes_hash == NULL && + list_length(root->eq_classes) > 32) + build_eq_classes_hash(root); + + if (root->eq_classes_hash == NULL) + { + /* + * Scan through the existing EquivalenceClasses for a match + */ + foreach(lc1, root->eq_classes) + { + EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1); + ListCell *lc2; + + /* + * Never match to a volatile EC, except when we are looking at another + * reference to the same volatile SortGroupClause. + */ + if (cur_ec->ec_has_volatile && + (sortref == 0 || sortref != cur_ec->ec_sortref)) + continue; + + if (!equal(opfamilies, cur_ec->ec_opfamilies)) + continue; + + foreach(lc2, cur_ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); + + /* + * If below an outer join, don't match constants: they're not as + * constant as they look. + */ + if (cur_ec->ec_below_outer_join && + cur_em->em_is_const) + continue; + + if (expr_datatype == cur_em->em_datatype && + equal(expr, cur_em->em_expr)) + return cur_ec; /* Match! */ + } + } + + /* + * No match, so build a new single-member EC + */ + newec = build_new_ec(root, expr, expr_datatype, opfamilies, sortref); + + oldcontext = MemoryContextSwitchTo(root->planner_cxt); + root->eq_classes = lappend(root->eq_classes, newec); + MemoryContextSwitchTo(oldcontext); + + return newec; + } + else + { + EquivalenceClass *ec_found; + bool found; + uint32 hashval; + + /* + * Build the new single-member EC to match against in hash_search() + */ + newec = build_new_ec(root, expr, expr_datatype, opfamilies, sortref); + + hashval = bms_hash_value(newec->ec_relids); + + ec_found = hash_search_with_hash_value(root->eq_classes_hash, newec, hashval, HASH_FIND, &found); + + if (found) + { + list_free(newec->ec_opfamilies); + list_free_deep(newec->ec_members); + bms_free(newec->ec_relids); + pfree(newec); + return ec_found; + } + + oldcontext = MemoryContextSwitchTo(root->planner_cxt); + + root->eq_classes = lappend(root->eq_classes, newec); + + hash_search_with_hash_value(root->eq_classes_hash, newec, hashval, HASH_ENTER, &found); + + Assert(!found); + + MemoryContextSwitchTo(oldcontext); + + return newec; + } +} + /* * generate_base_implied_equalities diff -durpN postgresql.orig/src/backend/optimizer/path/pathkeys.c postgresql.1/src/backend/optimizer/path/pathkeys.c --- postgresql.orig/src/backend/optimizer/path/pathkeys.c 2010-09-21 13:49:57.000000000 +0200 +++ postgresql.1/src/backend/optimizer/path/pathkeys.c 2010-10-28 11:19:35.000000000 +0200 @@ -17,6 +17,7 @@ */ #include "postgres.h" +#include "access/hash.h" #include "access/skey.h" #include "catalog/pg_type.h" #include "nodes/makefuncs.h" @@ -27,6 +28,7 @@ #include "optimizer/paths.h" #include "optimizer/tlist.h" #include "parser/parsetree.h" +#include "utils/hsearch.h" #include "utils/lsyscache.h" @@ -72,6 +74,73 @@ makePathKey(EquivalenceClass *eclass, Oi } /* + * pk_hash + * hashtable hash function for PlannerInfo.canon_pathkeys_hash + */ +static uint32 +pk_hash(const void *key, Size keysize) +{ + PathKey *pk = (PathKey *) key; + intptr_t ptr = (intptr_t) pk->pk_eclass; + + return DatumGetUInt32(hash_uint32((uint32)ptr)); +} + +/* + * pk_match + * hashtable match function for PlannerInfo.canon_pathkeys_hash + */ +static int +pk_match(const void *key1, const void *key2, Size keysize) +{ + PathKey *pk1 = (PathKey *)key1; + PathKey *pk2 = (PathKey *)key2; + + if (pk1->pk_eclass == pk2->pk_eclass && + pk1->pk_opfamily == pk2->pk_opfamily && + pk1->pk_strategy == pk2->pk_strategy && + pk1->pk_nulls_first == pk2->pk_nulls_first) + return 0; + return 1; +} + +/* + * build_canonical_pathkey_hash + * + * Build PlannerInfo.canon_pathkeys_hash from canon_pathkeys list. + */ +static void +build_canonical_pathkey_hash(PlannerInfo *root) +{ + MemoryContext oldcontext; + HASHCTL info; + + ListCell *lc; + + info.hash = pk_hash; + info.match = pk_match; + info.hcxt = root->planner_cxt; + info.keysize = sizeof(EquivalenceClass *); + info.entrysize = sizeof(PathKey); + + oldcontext = MemoryContextSwitchTo(root->planner_cxt); + + root->canon_pathkeys_hash = hash_create("canon_pathkeys", 2048, &info, + HASH_FUNCTION | HASH_ELEM | HASH_COMPARE | HASH_CONTEXT); + + foreach(lc, root->canon_pathkeys) + { + PathKey *pk = lfirst(lc); + bool found; + + hash_search_with_hash_value(root->canon_pathkeys_hash, pk, + DatumGetUInt32(hash_any((const unsigned char *) pk, sizeof(PathKey))), + HASH_ENTER, &found); + Assert(!found); + } +} + +/* * make_canonical_pathkey * Given the parameters for a PathKey, find any pre-existing matching * pathkey in the query's list of "canonical" pathkeys. Make a new @@ -85,7 +154,7 @@ make_canonical_pathkey(PlannerInfo *root EquivalenceClass *eclass, Oid opfamily, int strategy, bool nulls_first) { - PathKey *pk; + PathKey *pk, *pk_found; ListCell *lc; MemoryContext oldcontext; @@ -93,26 +162,64 @@ make_canonical_pathkey(PlannerInfo *root while (eclass->ec_merged) eclass = eclass->ec_merged; - foreach(lc, root->canon_pathkeys) + if (root->canon_pathkeys_hash == NULL && + list_length(root->canon_pathkeys) > 32) + build_canonical_pathkey_hash(root); + + if (root->canon_pathkeys_hash == NULL) { - pk = (PathKey *) lfirst(lc); - if (eclass == pk->pk_eclass && - opfamily == pk->pk_opfamily && - strategy == pk->pk_strategy && - nulls_first == pk->pk_nulls_first) - return pk; + foreach(lc, root->canon_pathkeys) + { + pk = (PathKey *) lfirst(lc); + if (eclass == pk->pk_eclass && + opfamily == pk->pk_opfamily && + strategy == pk->pk_strategy && + nulls_first == pk->pk_nulls_first) + return pk; + } + + /* + * Be sure canonical pathkeys are allocated in the main planning context. + * Not an issue in normal planning, but it is for GEQO. + */ + oldcontext = MemoryContextSwitchTo(root->planner_cxt); + + pk = makePathKey(eclass, opfamily, strategy, nulls_first); + root->canon_pathkeys = lappend(root->canon_pathkeys, pk); + + MemoryContextSwitchTo(oldcontext); } + else + { + uint32 hashval; + bool found; - /* - * Be sure canonical pathkeys are allocated in the main planning context. - * Not an issue in normal planning, but it is for GEQO. - */ - oldcontext = MemoryContextSwitchTo(root->planner_cxt); + /* + * Be sure canonical pathkeys are allocated in the main planning context. + * Not an issue in normal planning, but it is for GEQO. + */ + oldcontext = MemoryContextSwitchTo(root->planner_cxt); - pk = makePathKey(eclass, opfamily, strategy, nulls_first); - root->canon_pathkeys = lappend(root->canon_pathkeys, pk); + pk = makePathKey(eclass, opfamily, strategy, nulls_first); - MemoryContextSwitchTo(oldcontext); + hashval = DatumGetInt32(hash_any((const unsigned char *) pk, sizeof(PathKey))); + + + pk_found = hash_search_with_hash_value(root->canon_pathkeys_hash, pk, hashval, HASH_FIND, &found); + + if (found) + { + pfree(pk); + pk = pk_found; + } + else + { + root->canon_pathkeys = lappend(root->canon_pathkeys, pk); + hash_search_with_hash_value(root->canon_pathkeys_hash, pk, hashval, HASH_ENTER, &found); + } + + MemoryContextSwitchTo(oldcontext); + } return pk; } diff -durpN postgresql.orig/src/backend/optimizer/plan/planmain.c postgresql.1/src/backend/optimizer/plan/planmain.c --- postgresql.orig/src/backend/optimizer/plan/planmain.c 2010-10-08 11:04:23.000000000 +0200 +++ postgresql.1/src/backend/optimizer/plan/planmain.c 2010-10-28 09:35:37.000000000 +0200 @@ -27,6 +27,7 @@ #include "optimizer/placeholder.h" #include "optimizer/planmain.h" #include "optimizer/tlist.h" +#include "utils/hsearch.h" #include "utils/selfuncs.h" @@ -118,6 +119,11 @@ query_planner(PlannerInfo *root, List *t * something like "SELECT 2+2 ORDER BY 1". */ root->canon_pathkeys = NIL; + if (root->canon_pathkeys_hash) + { + hash_destroy(root->canon_pathkeys_hash); + root->canon_pathkeys_hash = NULL; + } root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys); root->group_pathkeys = canonicalize_pathkeys(root, @@ -146,6 +152,11 @@ query_planner(PlannerInfo *root, List *t root->join_rel_level = NULL; root->join_cur_level = 0; root->canon_pathkeys = NIL; + if (root->canon_pathkeys_hash) + { + hash_destroy(root->canon_pathkeys_hash); + root->canon_pathkeys_hash = NULL; + } root->left_join_clauses = NIL; root->right_join_clauses = NIL; root->full_join_clauses = NIL; diff -durpN postgresql.orig/src/include/nodes/relation.h postgresql.1/src/include/nodes/relation.h --- postgresql.orig/src/include/nodes/relation.h 2010-10-15 10:31:47.000000000 +0200 +++ postgresql.1/src/include/nodes/relation.h 2010-10-28 11:26:02.000000000 +0200 @@ -159,8 +159,10 @@ typedef struct PlannerInfo List *cte_plan_ids; /* per-CTE-item list of subplan IDs */ List *eq_classes; /* list of active EquivalenceClasses */ + struct HTAB *eq_classes_hash; /* optional hashtable for equivalence classes */ List *canon_pathkeys; /* list of "canonical" PathKeys */ + struct HTAB *canon_pathkeys_hash; /* optional hashtable for "canonical" PathKeys */ List *left_join_clauses; /* list of RestrictInfos for * mergejoinable outer join clauses Binary files postgresql.orig/src/timezone/gmon.out and postgresql.1/src/timezone/gmon.out differ
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers