Boszormenyi Zoltan írta: > Boszormenyi Zoltan írta: > >> Heikki Linnakangas írta: >> >> >>> On 26.10.2010 18:34, Boszormenyi Zoltan wrote: >>> >>> >>>> thank you very much for pointing me to dynahash, here is the >>>> next version that finally seems to work. >>>> >>>> Two patches are attached, the first is the absolute minimum for >>>> making it work, this still has the Tree type for canon_pathkeys >>>> and eq_classes got the same treatment as join_rel_list/join_rel_hash >>>> has in the current sources: if the list grows larger than 32, a hash >>>> table >>>> is created. It seems to be be enough for doing in for >>>> get_eclass_for_sort_expr() >>>> only, the other users of eq_classes aren't bothered by this change. >>>> >>>> >>> That's better, but can't you use dynahash for canon_pathkeys as well? >>> >>> >> Here's a purely dynahash solution. It's somewhat slower than >> the tree version, 0.45 vs 0.41 seconds in the cached case for the >> previously posted test case. >> >> > > And now in context diff, sorry for my affection towards unified diffs. :-) >
A little better version, no need for the heavy hash_any, hash_uint32 on the lower 32 bits on pk_eclass is enough. The profiling runtime is now 0.42 seconds vs the previous 0.41 seconds for the tree version. Best regards, Zoltán Böszörményi -- ---------------------------------- Zoltán Böszörményi Cybertec Schönig & Schönig GmbH Gröhrmühlgasse 26 A-2700 Wiener Neustadt, Austria Web: http://www.postgresql-support.de http://www.postgresql.at/
diff -dcrpN postgresql.orig/src/backend/optimizer/path/equivclass.c postgresql.1/src/backend/optimizer/path/equivclass.c *** postgresql.orig/src/backend/optimizer/path/equivclass.c 2010-10-15 10:31:47.000000000 +0200 --- postgresql.1/src/backend/optimizer/path/equivclass.c 2010-10-28 12:37:47.000000000 +0200 *************** *** 24,29 **** --- 24,30 ---- #include "optimizer/planmain.h" #include "optimizer/prep.h" #include "optimizer/var.h" + #include "utils/hsearch.h" #include "utils/lsyscache.h" *************** add_eq_member(EquivalenceClass *ec, Expr *** 360,434 **** /* ! * get_eclass_for_sort_expr ! * Given an expression and opfamily info, find an existing equivalence ! * class it is a member of; if none, build a new single-member ! * EquivalenceClass for it. ! * ! * sortref is the SortGroupRef of the originating SortGroupClause, if any, ! * or zero if not. (It should never be zero if the expression is volatile!) ! * ! * This can be used safely both before and after EquivalenceClass merging; ! * since it never causes merging it does not invalidate any existing ECs ! * or PathKeys. ! * ! * Note: opfamilies must be chosen consistently with the way ! * process_equivalence() would do; that is, generated from a mergejoinable ! * equality operator. Else we might fail to detect valid equivalences, ! * generating poor (but not incorrect) plans. */ ! EquivalenceClass * ! get_eclass_for_sort_expr(PlannerInfo *root, ! Expr *expr, ! Oid expr_datatype, ! List *opfamilies, ! Index sortref) { ! EquivalenceClass *newec; ! EquivalenceMember *newem; ListCell *lc1; ! MemoryContext oldcontext; /* ! * Scan through the existing EquivalenceClasses for a match */ ! foreach(lc1, root->eq_classes) { ! EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1); ! ListCell *lc2; /* ! * Never match to a volatile EC, except when we are looking at another ! * reference to the same volatile SortGroupClause. */ ! if (cur_ec->ec_has_volatile && ! (sortref == 0 || sortref != cur_ec->ec_sortref)) ! continue; ! ! if (!equal(opfamilies, cur_ec->ec_opfamilies)) continue; ! foreach(lc2, cur_ec->ec_members) { ! EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); ! ! /* ! * If below an outer join, don't match constants: they're not as ! * constant as they look. ! */ ! if (cur_ec->ec_below_outer_join && ! cur_em->em_is_const) ! continue; ! if (expr_datatype == cur_em->em_datatype && ! equal(expr, cur_em->em_expr)) ! return cur_ec; /* Match! */ } } /* - * No match, so build a new single-member EC - * * Here, we must be sure that we construct the EC in the right context. We * can assume, however, that the passed expr is long-lived. */ --- 361,463 ---- /* ! * eq_classes_match - matching function for eq_classes_hash in PlannerInfo */ ! static int ! eq_classes_match(const void *key1, const void *key2, Size keysize) { ! EquivalenceClass *ec1 = (EquivalenceClass *) key1; /* this is in the hashtable */ ! EquivalenceClass *ec2 = (EquivalenceClass *) key2; /* this is the new matched entry */ ListCell *lc1; ! ListCell *lc2; /* ! * Never match to a volatile EC, except when we are looking at another ! * reference to the same volatile SortGroupClause. */ ! if (ec1->ec_has_volatile && ! (ec2->ec_sortref == 0 || ec2->ec_sortref != ec1->ec_sortref)) ! return 1; ! ! if (!equal(ec1->ec_opfamilies, ec2->ec_opfamilies)) ! return 1; ! ! foreach(lc1, ec1->ec_members) { ! EquivalenceMember *em1 = (EquivalenceMember *) lfirst(lc1); /* ! * If below an outer join, don't match constants: they're not as ! * constant as they look. */ ! if (ec1->ec_below_outer_join && ! em1->em_is_const) continue; ! foreach(lc2, ec2->ec_members) { ! EquivalenceMember *em2 = (EquivalenceMember *) lfirst(lc2); ! if (em1->em_datatype == em2->em_datatype && ! equal(em1->em_expr, em2->em_expr)) ! return 0; } } + return 1; + } + + + /* + * build_eq_classes_hash + * Build the initial contents of PlannerInfo.eq_classes_hash + * for faster search in PlannerInfo.eq_classes. This is used + * to make get_eclass_for_sort_expr() faster for large + * inheritance trees. + */ + static void + build_eq_classes_hash(PlannerInfo *root) + { + MemoryContext oldcontext; + HASHCTL info; + + ListCell *lc; + + info.match = eq_classes_match; + info.hcxt = root->planner_cxt; + info.keysize = sizeof(Relids); + info.entrysize = sizeof(EquivalenceClass); + + oldcontext = MemoryContextSwitchTo(root->planner_cxt); + + root->eq_classes_hash = hash_create("eq_classes", 2048, &info, + HASH_ELEM | HASH_COMPARE | HASH_CONTEXT); + + foreach(lc, root->eq_classes) + { + EquivalenceClass *ec = lfirst(lc); + bool found; + + hash_search_with_hash_value(root->eq_classes_hash, ec, + bms_hash_value(ec->ec_relids), + HASH_ENTER, &found); + Assert(!found); + } + } + + + static EquivalenceClass * + build_new_ec(PlannerInfo *root, + Expr *expr, + Oid expr_datatype, + List *opfamilies, + Index sortref) + { + MemoryContext oldcontext; + EquivalenceClass *newec; + EquivalenceMember *newem; + /* * Here, we must be sure that we construct the EC in the right context. We * can assume, however, that the passed expr is long-lived. */ *************** get_eclass_for_sort_expr(PlannerInfo *ro *** 471,483 **** } } - root->eq_classes = lappend(root->eq_classes, newec); - MemoryContextSwitchTo(oldcontext); return newec; } /* * generate_base_implied_equalities --- 500,631 ---- } } MemoryContextSwitchTo(oldcontext); return newec; } + /* + * get_eclass_for_sort_expr + * Given an expression and opfamily info, find an existing equivalence + * class it is a member of; if none, build a new single-member + * EquivalenceClass for it. + * + * sortref is the SortGroupRef of the originating SortGroupClause, if any, + * or zero if not. (It should never be zero if the expression is volatile!) + * + * This can be used safely both before and after EquivalenceClass merging; + * since it never causes merging it does not invalidate any existing ECs + * or PathKeys. + * + * Note: opfamilies must be chosen consistently with the way + * process_equivalence() would do; that is, generated from a mergejoinable + * equality operator. Else we might fail to detect valid equivalences, + * generating poor (but not incorrect) plans. + */ + EquivalenceClass * + get_eclass_for_sort_expr(PlannerInfo *root, + Expr *expr, + Oid expr_datatype, + List *opfamilies, + Index sortref) + { + EquivalenceClass *newec; + ListCell *lc1; + MemoryContext oldcontext; + + if (root->eq_classes_hash == NULL && + list_length(root->eq_classes) > 32) + build_eq_classes_hash(root); + + if (root->eq_classes_hash == NULL) + { + /* + * Scan through the existing EquivalenceClasses for a match + */ + foreach(lc1, root->eq_classes) + { + EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1); + ListCell *lc2; + + /* + * Never match to a volatile EC, except when we are looking at another + * reference to the same volatile SortGroupClause. + */ + if (cur_ec->ec_has_volatile && + (sortref == 0 || sortref != cur_ec->ec_sortref)) + continue; + + if (!equal(opfamilies, cur_ec->ec_opfamilies)) + continue; + + foreach(lc2, cur_ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); + + /* + * If below an outer join, don't match constants: they're not as + * constant as they look. + */ + if (cur_ec->ec_below_outer_join && + cur_em->em_is_const) + continue; + + if (expr_datatype == cur_em->em_datatype && + equal(expr, cur_em->em_expr)) + return cur_ec; /* Match! */ + } + } + + /* + * No match, so build a new single-member EC + */ + newec = build_new_ec(root, expr, expr_datatype, opfamilies, sortref); + + oldcontext = MemoryContextSwitchTo(root->planner_cxt); + root->eq_classes = lappend(root->eq_classes, newec); + MemoryContextSwitchTo(oldcontext); + + return newec; + } + else + { + EquivalenceClass *ec_found; + bool found; + uint32 hashval; + + /* + * Build the new single-member EC to match against in hash_search() + */ + newec = build_new_ec(root, expr, expr_datatype, opfamilies, sortref); + + hashval = bms_hash_value(newec->ec_relids); + + ec_found = hash_search_with_hash_value(root->eq_classes_hash, newec, hashval, HASH_FIND, &found); + + if (found) + { + list_free(newec->ec_opfamilies); + list_free_deep(newec->ec_members); + bms_free(newec->ec_relids); + pfree(newec); + return ec_found; + } + + oldcontext = MemoryContextSwitchTo(root->planner_cxt); + + root->eq_classes = lappend(root->eq_classes, newec); + + hash_search_with_hash_value(root->eq_classes_hash, newec, hashval, HASH_ENTER, &found); + + Assert(!found); + + MemoryContextSwitchTo(oldcontext); + + return newec; + } + } + /* * generate_base_implied_equalities diff -dcrpN postgresql.orig/src/backend/optimizer/path/pathkeys.c postgresql.1/src/backend/optimizer/path/pathkeys.c *** postgresql.orig/src/backend/optimizer/path/pathkeys.c 2010-09-21 13:49:57.000000000 +0200 --- postgresql.1/src/backend/optimizer/path/pathkeys.c 2010-10-28 12:39:22.000000000 +0200 *************** *** 17,22 **** --- 17,23 ---- */ #include "postgres.h" + #include "access/hash.h" #include "access/skey.h" #include "catalog/pg_type.h" #include "nodes/makefuncs.h" *************** *** 27,32 **** --- 28,34 ---- #include "optimizer/paths.h" #include "optimizer/tlist.h" #include "parser/parsetree.h" + #include "utils/hsearch.h" #include "utils/lsyscache.h" *************** makePathKey(EquivalenceClass *eclass, Oi *** 72,77 **** --- 74,144 ---- } /* + * pk_hash + * hashtable hash function for PlannerInfo.canon_pathkeys_hash + */ + static uint32 + pk_hash(const void *key, Size keysize) + { + PathKey *pk = (PathKey *) key; + intptr_t ptr = (intptr_t) pk->pk_eclass; + + return DatumGetUInt32(hash_uint32((uint32)ptr)); + } + + /* + * pk_match + * hashtable match function for PlannerInfo.canon_pathkeys_hash + */ + static int + pk_match(const void *key1, const void *key2, Size keysize) + { + PathKey *pk1 = (PathKey *)key1; + PathKey *pk2 = (PathKey *)key2; + + if (pk1->pk_eclass == pk2->pk_eclass && + pk1->pk_opfamily == pk2->pk_opfamily && + pk1->pk_strategy == pk2->pk_strategy && + pk1->pk_nulls_first == pk2->pk_nulls_first) + return 0; + return 1; + } + + /* + * build_canonical_pathkey_hash + * + * Build PlannerInfo.canon_pathkeys_hash from canon_pathkeys list. + */ + static void + build_canonical_pathkey_hash(PlannerInfo *root) + { + MemoryContext oldcontext; + HASHCTL info; + + ListCell *lc; + + info.hash = pk_hash; + info.match = pk_match; + info.hcxt = root->planner_cxt; + info.keysize = sizeof(EquivalenceClass *); + info.entrysize = sizeof(PathKey); + + oldcontext = MemoryContextSwitchTo(root->planner_cxt); + + root->canon_pathkeys_hash = hash_create("canon_pathkeys", 2048, &info, + HASH_FUNCTION | HASH_ELEM | HASH_COMPARE | HASH_CONTEXT); + + foreach(lc, root->canon_pathkeys) + { + PathKey *pk = lfirst(lc); + bool found; + + hash_search(root->canon_pathkeys_hash, pk, HASH_ENTER, &found); + Assert(!found); + } + } + + /* * make_canonical_pathkey * Given the parameters for a PathKey, find any pre-existing matching * pathkey in the query's list of "canonical" pathkeys. Make a new *************** make_canonical_pathkey(PlannerInfo *root *** 85,91 **** EquivalenceClass *eclass, Oid opfamily, int strategy, bool nulls_first) { ! PathKey *pk; ListCell *lc; MemoryContext oldcontext; --- 152,158 ---- EquivalenceClass *eclass, Oid opfamily, int strategy, bool nulls_first) { ! PathKey *pk, *pk_found; ListCell *lc; MemoryContext oldcontext; *************** make_canonical_pathkey(PlannerInfo *root *** 93,118 **** while (eclass->ec_merged) eclass = eclass->ec_merged; ! foreach(lc, root->canon_pathkeys) { ! pk = (PathKey *) lfirst(lc); ! if (eclass == pk->pk_eclass && ! opfamily == pk->pk_opfamily && ! strategy == pk->pk_strategy && ! nulls_first == pk->pk_nulls_first) ! return pk; } ! /* ! * Be sure canonical pathkeys are allocated in the main planning context. ! * Not an issue in normal planning, but it is for GEQO. ! */ ! oldcontext = MemoryContextSwitchTo(root->planner_cxt); ! pk = makePathKey(eclass, opfamily, strategy, nulls_first); ! root->canon_pathkeys = lappend(root->canon_pathkeys, pk); ! MemoryContextSwitchTo(oldcontext); return pk; } --- 160,222 ---- while (eclass->ec_merged) eclass = eclass->ec_merged; ! if (root->canon_pathkeys_hash == NULL && ! list_length(root->canon_pathkeys) > 32) ! build_canonical_pathkey_hash(root); ! ! if (root->canon_pathkeys_hash == NULL) { ! foreach(lc, root->canon_pathkeys) ! { ! pk = (PathKey *) lfirst(lc); ! if (eclass == pk->pk_eclass && ! opfamily == pk->pk_opfamily && ! strategy == pk->pk_strategy && ! nulls_first == pk->pk_nulls_first) ! return pk; ! } ! ! /* ! * Be sure canonical pathkeys are allocated in the main planning context. ! * Not an issue in normal planning, but it is for GEQO. ! */ ! oldcontext = MemoryContextSwitchTo(root->planner_cxt); ! ! pk = makePathKey(eclass, opfamily, strategy, nulls_first); ! root->canon_pathkeys = lappend(root->canon_pathkeys, pk); ! ! MemoryContextSwitchTo(oldcontext); } + else + { + uint32 hashval; + bool found; ! /* ! * Be sure canonical pathkeys are allocated in the main planning context. ! * Not an issue in normal planning, but it is for GEQO. ! */ ! oldcontext = MemoryContextSwitchTo(root->planner_cxt); ! pk = makePathKey(eclass, opfamily, strategy, nulls_first); ! hashval = pk_hash(pk, sizeof(PathKey)); ! ! pk_found = hash_search_with_hash_value(root->canon_pathkeys_hash, pk, hashval, HASH_FIND, &found); ! ! if (found) ! { ! pfree(pk); ! pk = pk_found; ! } ! else ! { ! root->canon_pathkeys = lappend(root->canon_pathkeys, pk); ! hash_search_with_hash_value(root->canon_pathkeys_hash, pk, hashval, HASH_ENTER, &found); ! } ! ! MemoryContextSwitchTo(oldcontext); ! } return pk; } diff -dcrpN postgresql.orig/src/backend/optimizer/plan/planmain.c postgresql.1/src/backend/optimizer/plan/planmain.c *** postgresql.orig/src/backend/optimizer/plan/planmain.c 2010-10-08 11:04:23.000000000 +0200 --- postgresql.1/src/backend/optimizer/plan/planmain.c 2010-10-28 12:37:47.000000000 +0200 *************** *** 27,32 **** --- 27,33 ---- #include "optimizer/placeholder.h" #include "optimizer/planmain.h" #include "optimizer/tlist.h" + #include "utils/hsearch.h" #include "utils/selfuncs.h" *************** query_planner(PlannerInfo *root, List *t *** 118,123 **** --- 119,129 ---- * something like "SELECT 2+2 ORDER BY 1". */ root->canon_pathkeys = NIL; + if (root->canon_pathkeys_hash) + { + hash_destroy(root->canon_pathkeys_hash); + root->canon_pathkeys_hash = NULL; + } root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys); root->group_pathkeys = canonicalize_pathkeys(root, *************** query_planner(PlannerInfo *root, List *t *** 146,151 **** --- 152,162 ---- root->join_rel_level = NULL; root->join_cur_level = 0; root->canon_pathkeys = NIL; + if (root->canon_pathkeys_hash) + { + hash_destroy(root->canon_pathkeys_hash); + root->canon_pathkeys_hash = NULL; + } root->left_join_clauses = NIL; root->right_join_clauses = NIL; root->full_join_clauses = NIL; diff -dcrpN postgresql.orig/src/include/nodes/relation.h postgresql.1/src/include/nodes/relation.h *** postgresql.orig/src/include/nodes/relation.h 2010-10-15 10:31:47.000000000 +0200 --- postgresql.1/src/include/nodes/relation.h 2010-10-28 12:37:47.000000000 +0200 *************** typedef struct PlannerInfo *** 159,166 **** --- 159,168 ---- List *cte_plan_ids; /* per-CTE-item list of subplan IDs */ List *eq_classes; /* list of active EquivalenceClasses */ + struct HTAB *eq_classes_hash; /* optional hashtable for equivalence classes */ List *canon_pathkeys; /* list of "canonical" PathKeys */ + struct HTAB *canon_pathkeys_hash; /* optional hashtable for "canonical" PathKeys */ List *left_join_clauses; /* list of RestrictInfos for * mergejoinable outer join clauses Binary files postgresql.orig/src/test/regress/gmon.out and postgresql.1/src/test/regress/gmon.out differ Binary files postgresql.orig/src/timezone/gmon.out and postgresql.1/src/timezone/gmon.out differ
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers