Horiguchi-san, Thanks for the review.
On 2018/01/30 20:43, Kyotaro HORIGUCHI wrote: > At Tue, 30 Jan 2018 09:57:44 +0900, Amit Langote wrote: >> AFAICS, v22 cleanly applies to HEAD (c12693d8f3 [1]), compiles, and make > I have some random comments on 0002. > > -extract_partition_key_clauses implicitly assumes that the > commutator of inequality operator is the same to the original > operator. (commutation for such operators is an identity > function.) Yeah, it seems so. > I believe it is always true for a sane operator definition, but > perhaps wouldn't it be better using commutator instead of > opclause->opno as the source of negator if any? (Attached diff 1) ISTM, the same thing happens with or without the patch. - pc->opno = OidIsValid(commutator) ? commutator : opclause->opno; + pc->opno = comparator; comparator as added by the patch is effectively equal to the RHS expression in the deleted line. > -get_partitions_from_or_clause_args abandons arg_partset with all > bit set when partition constraint doesn't refute whole the > partition. Finally get_partitions_from_clauses does the same > thing but it's waste of cycles and looks weird. It seems to have > intended to return immediately there. > > > /* Couldn't eliminate any of the partitions. */ > > partdesc = RelationGetPartitionDesc(context->relation); > > - arg_partset = bms_add_range(NULL, 0, partdesc->nparts - 1); > > + return bms_add_range(NULL, 0, partdesc->nparts - 1); > > } > > > > subcontext.rt_index = context->rt_index; > > subcontext.relation = context->relation; > > subcontext.clauseinfo = &partclauseinfo; > !> arg_partset = get_partitions_from_clauses(&subcontext); You're right, fixed. > -get_partitions_from_or_clause_args converts IN (null) into > nulltest and the nulltest doesn't exclude a child that the > partition key column can be null. > > drop table if exists p; > create table p (a int, b int) partition by list (a); > create table c1 partition of p for values in (1, 5, 7); > create table c2 partition of p for values in (4, 6, null); > insert into p values (1, 0), (null, 0); > > explain select tableoid::regclass, * from p where a in (1, null); > > QUERY PLAN > > ----------------------------------------------------------------- > > Result (cost=0.00..76.72 rows=22 width=12) > > -> Append (cost=0.00..76.50 rows=22 width=12) > > -> Seq Scan on c1 (cost=0.00..38.25 rows=11 width=12) > > Filter: (a = ANY ('{1,NULL}'::integer[])) > > -> Seq Scan on c2 (cost=0.00..38.25 rows=11 width=12) > > Filter: (a = ANY ('{1,NULL}'::integer[])) > > Although the clause "a in (null)" doesn't match the (null, 0) > row so it donesn't harm finally, I don't think this is a right > behavior. null in an SAOP rightop should be just ignored on > partition pruning. Or is there any purpose of this behavior? Yeah, it seems that we're better off ignoring null values appearing the IN-list. Framing a IS NULL or IS NOT NULL expression corresponding to a null value in the SAOP rightop array doesn't seem to be semantically correct, as you also pointed out. In ExecEvalScalarArrayOpExpr(), I see that a null in the rightop array (IN-list) doesn't lead to selecting rows containing null in the corresponding column. > - In extract_bounding_datums, clauseinfo is set twice to the same > value. Oops, my bad when merging in David's patch. Update patch set attached. Thanks again. Regards, Amit
>From 42ca7750e2b89caa3aee0c9ab7479a7f29953fff Mon Sep 17 00:00:00 2001 From: amit <amitlangot...@gmail.com> Date: Tue, 22 Aug 2017 11:33:27 +0900 Subject: [PATCH v23 1/5] Some interface changes for partition_bound_{cmp/bsearch} Introduces a notion of PartitionBoundCmpArg, which replaces the set of arguments void *probe and bool probe_is_bound of both partition_bound_cmp and partition_bound_bsearch. It wasn't possible before to specify the number of datums when a non-bound type of probe is passed. Slightly tweaking the existing interface to allow specifying the same seems awkward. So, instead encapsulate that into PartitionBoundCmpArg. Also, modify partition_rbound_datum_cmp to compare caller-specifed number of datums, instead of key->partnatts datums. --- src/backend/catalog/partition.c | 166 +++++++++++++++++++++++++++++----------- 1 file changed, 123 insertions(+), 43 deletions(-) diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c index e69bbc0345..de2b53e0c8 100644 --- a/src/backend/catalog/partition.c +++ b/src/backend/catalog/partition.c @@ -138,6 +138,31 @@ typedef struct PartitionRangeBound bool lower; /* this is the lower (vs upper) bound */ } PartitionRangeBound; +/* + * PartitionBoundCmpArg - Caller-defined argument to be passed to + * partition_bound_cmp() + * + * The first (fixed) argument involved in a comparison is the partition bound + * found in the catalog, while an instance of the following struct describes + * either a new partition bound being compared against existing bounds + * (caller should set is_bound to true and set bound), or a new tuple's + * partition key specified in datums (caller should set ndatums to the number + * of valid datums that are passed in the array). + */ +typedef struct PartitionBoundCmpArg +{ + bool is_bound; + union + { + PartitionListValue *lbound; + PartitionRangeBound *rbound; + PartitionHashBound *hbound; + } bound; + + Datum *datums; + int ndatums; +} PartitionBoundCmpArg; + static int32 qsort_partition_hbound_cmp(const void *a, const void *b); static int32 qsort_partition_list_value_cmp(const void *a, const void *b, void *arg); @@ -170,14 +195,15 @@ static int32 partition_rbound_cmp(PartitionKey key, bool lower1, PartitionRangeBound *b2); static int32 partition_rbound_datum_cmp(PartitionKey key, Datum *rb_datums, PartitionRangeDatumKind *rb_kind, - Datum *tuple_datums); + Datum *tuple_datums, int n_tuple_datums); static int32 partition_bound_cmp(PartitionKey key, PartitionBoundInfo boundinfo, - int offset, void *probe, bool probe_is_bound); + int offset, PartitionBoundCmpArg *arg); static int partition_bound_bsearch(PartitionKey key, PartitionBoundInfo boundinfo, - void *probe, bool probe_is_bound, bool *is_equal); + PartitionBoundCmpArg *arg, + bool *is_equal); static int get_partition_bound_num_indexes(PartitionBoundInfo b); static int get_greatest_modulus(PartitionBoundInfo b); static uint64 compute_hash_value(PartitionKey key, Datum *values, bool *isnull); @@ -985,6 +1011,8 @@ check_new_partition_bound(char *relname, Relation parent, valid_modulus = true; int prev_modulus, /* Previous largest modulus */ next_modulus; /* Next largest modulus */ + PartitionHashBound hbound; + PartitionBoundCmpArg arg; /* * Check rule that every modulus must be a factor of the @@ -999,8 +1027,14 @@ check_new_partition_bound(char *relname, Relation parent, * less than or equal to spec->modulus and * spec->remainder. */ - offset = partition_bound_bsearch(key, boundinfo, spec, - true, &equal); + memset(&hbound, 0, sizeof(PartitionHashBound)); + hbound.modulus = spec->modulus; + hbound.remainder = spec->remainder; + memset(&arg, 0, sizeof(PartitionBoundCmpArg)); + arg.is_bound = true; + arg.bound.hbound = &hbound; + offset = partition_bound_bsearch(key, boundinfo, &arg, + &equal); if (offset < 0) { next_modulus = DatumGetInt32(datums[0][0]); @@ -1073,10 +1107,16 @@ check_new_partition_bound(char *relname, Relation parent, { int offset; bool equal; - + PartitionListValue lbound; + PartitionBoundCmpArg arg; + + memset(&lbound, 0, sizeof(PartitionListValue)); + lbound.value = val->constvalue; + memset(&arg, 0, sizeof(PartitionBoundCmpArg)); + arg.is_bound = true; + arg.bound.lbound = &lbound; offset = partition_bound_bsearch(key, boundinfo, - &val->constvalue, - true, &equal); + &arg, &equal); if (offset >= 0 && equal) { overlap = true; @@ -1127,6 +1167,7 @@ check_new_partition_bound(char *relname, Relation parent, PartitionBoundInfo boundinfo = partdesc->boundinfo; int offset; bool equal; + PartitionBoundCmpArg arg; Assert(boundinfo && boundinfo->strategy == PARTITION_STRATEGY_RANGE && @@ -1148,8 +1189,11 @@ check_new_partition_bound(char *relname, Relation parent, * since the index array is initialised with an extra -1 * at the end. */ - offset = partition_bound_bsearch(key, boundinfo, lower, - true, &equal); + memset(&arg, 0, sizeof(PartitionBoundCmpArg)); + arg.is_bound = true; + arg.bound.rbound = lower; + offset = partition_bound_bsearch(key, boundinfo, &arg, + &equal); if (boundinfo->indexes[offset + 1] < 0) { @@ -1163,9 +1207,9 @@ check_new_partition_bound(char *relname, Relation parent, { int32 cmpval; + arg.bound.rbound = upper; cmpval = partition_bound_cmp(key, boundinfo, - offset + 1, upper, - true); + offset + 1, &arg); if (cmpval < 0) { /* @@ -2537,12 +2581,15 @@ get_partition_for_tuple(Relation relation, Datum *values, bool *isnull) else { bool equal = false; + PartitionBoundCmpArg arg; + memset(&arg, 0, sizeof(PartitionBoundCmpArg)); + arg.is_bound = false; + arg.datums = values; + arg.ndatums = key->partnatts; bound_offset = partition_bound_bsearch(key, partdesc->boundinfo, - values, - false, - &equal); + &arg, &equal); if (bound_offset >= 0 && equal) part_index = partdesc->boundinfo->indexes[bound_offset]; } @@ -2569,11 +2616,15 @@ get_partition_for_tuple(Relation relation, Datum *values, bool *isnull) if (!range_partkey_has_null) { + PartitionBoundCmpArg arg; + + memset(&arg, 0, sizeof(PartitionBoundCmpArg)); + arg.is_bound = false; + arg.datums = values; + arg.ndatums = key->partnatts; bound_offset = partition_bound_bsearch(key, partdesc->boundinfo, - values, - false, - &equal); + &arg, &equal); /* * The bound at bound_offset is less than or equal to the @@ -2845,12 +2896,12 @@ partition_rbound_cmp(PartitionKey key, static int32 partition_rbound_datum_cmp(PartitionKey key, Datum *rb_datums, PartitionRangeDatumKind *rb_kind, - Datum *tuple_datums) + Datum *tuple_datums, int n_tuple_datums) { int i; int32 cmpval = -1; - for (i = 0; i < key->partnatts; i++) + for (i = 0; i < n_tuple_datums; i++) { if (rb_kind[i] == PARTITION_RANGE_DATUM_MINVALUE) return -1; @@ -2872,11 +2923,11 @@ partition_rbound_datum_cmp(PartitionKey key, * partition_bound_cmp * * Return whether the bound at offset in boundinfo is <, =, or > the argument - * specified in *probe. + * specified in *arg. */ static int32 partition_bound_cmp(PartitionKey key, PartitionBoundInfo boundinfo, - int offset, void *probe, bool probe_is_bound) + int offset, PartitionBoundCmpArg *arg) { Datum *bound_datums = boundinfo->datums[offset]; int32 cmpval = -1; @@ -2885,25 +2936,55 @@ partition_bound_cmp(PartitionKey key, PartitionBoundInfo boundinfo, { case PARTITION_STRATEGY_HASH: { - PartitionBoundSpec *spec = (PartitionBoundSpec *) probe; + int modulus, + remainder; + + if (arg->is_bound) + { + modulus = arg->bound.hbound->modulus; + remainder = arg->bound.hbound->remainder; + } + else + { + modulus = DatumGetInt32(arg->datums[0]); + remainder = DatumGetInt32(arg->datums[1]); + } cmpval = partition_hbound_cmp(DatumGetInt32(bound_datums[0]), DatumGetInt32(bound_datums[1]), - spec->modulus, spec->remainder); + modulus, remainder); break; } case PARTITION_STRATEGY_LIST: - cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0], - key->partcollation[0], - bound_datums[0], - *(Datum *) probe)); - break; + { + Datum listdatum; + + if (arg->is_bound) + listdatum = arg->bound.lbound->value; + else + { + if (arg->ndatums >= 1) + listdatum = arg->datums[0]; + /* + * If there's no tuple datum to compare with the bound, + * consider the latter to be greater. + */ + else + return 1; + } + + cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0], + key->partcollation[0], + bound_datums[0], + listdatum)); + break; + } case PARTITION_STRATEGY_RANGE: { PartitionRangeDatumKind *kind = boundinfo->kind[offset]; - if (probe_is_bound) + if (arg->is_bound) { /* * We need to pass whether the existing bound is a lower @@ -2914,12 +2995,13 @@ partition_bound_cmp(PartitionKey key, PartitionBoundInfo boundinfo, cmpval = partition_rbound_cmp(key, bound_datums, kind, lower, - (PartitionRangeBound *) probe); + arg->bound.rbound); } else cmpval = partition_rbound_datum_cmp(key, bound_datums, kind, - (Datum *) probe); + arg->datums, + arg->ndatums); break; } @@ -2933,20 +3015,19 @@ partition_bound_cmp(PartitionKey key, PartitionBoundInfo boundinfo, /* * Binary search on a collection of partition bounds. Returns greatest - * bound in array boundinfo->datums which is less than or equal to *probe. - * If all bounds in the array are greater than *probe, -1 is returned. + * bound in array boundinfo->datums which is less than or equal to *arg. + * If all bounds in the array are greater than *arg, -1 is returned. * - * *probe could either be a partition bound or a Datum array representing - * the partition key of a tuple being routed; probe_is_bound tells which. - * We pass that down to the comparison function so that it can interpret the - * contents of *probe accordingly. + * *arg may contain either a partition bound struct or a Datum array + * representing the partition key of a tuple being routed. We simply pass + * that down to partition_bound_cmp where it is interpreted appropriately. * - * *is_equal is set to whether the bound at the returned index is equal with - * *probe. + * *is_equal is set to whether the bound at the returned index is exactly + * equal to *arg. */ static int partition_bound_bsearch(PartitionKey key, PartitionBoundInfo boundinfo, - void *probe, bool probe_is_bound, bool *is_equal) + PartitionBoundCmpArg *arg, bool *is_equal) { int lo, hi, @@ -2959,8 +3040,7 @@ partition_bound_bsearch(PartitionKey key, PartitionBoundInfo boundinfo, int32 cmpval; mid = (lo + hi + 1) / 2; - cmpval = partition_bound_cmp(key, boundinfo, mid, probe, - probe_is_bound); + cmpval = partition_bound_cmp(key, boundinfo, mid, arg); if (cmpval <= 0) { lo = mid; -- 2.11.0
>From b20b3a3deca2f9469045b2ea89683cb34ad5cc15 Mon Sep 17 00:00:00 2001 From: amit <amitlangot...@gmail.com> Date: Tue, 22 Aug 2017 13:48:13 +0900 Subject: [PATCH v23 2/5] Introduce a get_partitions_from_clauses() Whereas get_partition_for_tuple() takes a tuple and returns index of the partition of the table that should contain that tuple, get_partitions_from_clauses() will take a list of clauses and return a set of indexes of the partitions that satisfy all of those clauses. Aforementioned list of clauses must be all clauses that were matched to the partition key(s) using populate_partition_clauses() It is meant as a faster alternative to the planner's current method of selecting a table's partitions by running contraint exclusion algorithm against the partition constraint of each of the partitions. Authors: Amit Langote, David Rowley (david.row...@2ndquadrant.com) --- src/backend/catalog/partition.c | 2071 ++++++++++++++++++++++++++++++++++ src/backend/nodes/copyfuncs.c | 22 + src/backend/optimizer/util/clauses.c | 4 +- src/include/catalog/partition.h | 13 + src/include/catalog/pg_opfamily.h | 3 + src/include/nodes/nodes.h | 1 + src/include/nodes/primnodes.h | 31 + src/include/optimizer/clauses.h | 2 + 8 files changed, 2144 insertions(+), 3 deletions(-) diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c index de2b53e0c8..a5179d177e 100644 --- a/src/backend/catalog/partition.c +++ b/src/backend/catalog/partition.c @@ -28,6 +28,8 @@ #include "catalog/pg_inherits.h" #include "catalog/pg_inherits_fn.h" #include "catalog/pg_opclass.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_opfamily.h" #include "catalog/pg_partitioned_table.h" #include "catalog/pg_type.h" #include "commands/tablecmds.h" @@ -38,6 +40,8 @@ #include "nodes/parsenodes.h" #include "optimizer/clauses.h" #include "optimizer/planmain.h" +#include "optimizer/planner.h" +#include "optimizer/predtest.h" #include "optimizer/prep.h" #include "optimizer/var.h" #include "parser/parse_coerce.h" @@ -163,6 +167,81 @@ typedef struct PartitionBoundCmpArg int ndatums; } PartitionBoundCmpArg; +/* + * Information about a clause matched with a partition key column kept to + * avoid recomputing it in remove_redundant_clauses(). + */ +typedef struct PartClause +{ + Oid opno; /* opno to compare partkey to 'value' */ + Oid inputcollid; /* collation to compare partkey to 'value' */ + Expr *value; /* The value the partition key is being compared to */ + + /* cached info. */ + bool valid_cache; /* Are the following fields populated? */ + int op_strategy; + Oid op_subtype; + FmgrInfo op_func; +} PartClause; + +/* + * Strategy of a partition clause operator per the partitioning operator class + * definition. + */ +typedef enum PartOpStrategy +{ + PART_OP_EQUAL, + PART_OP_LESS, + PART_OP_GREATER +} PartOpStrategy; + +/* + * PartScanKeyInfo + * Information about partition look up keys to be passed to + * get_partitions_for_keys() + * + * Stores Datums and nullness properties found in clauses which match the + * partition key. Properties found are cached and are indexed by the + * partition key index. + */ +typedef struct PartScanKeyInfo +{ + /* + * Equality look up key. Used to store known Datums values from clauses + * compared by an equality operation to the partition key. + */ + Datum eqkeys[PARTITION_MAX_KEYS]; + + /* + * Lower and upper bounds on a sequence of selected partitions. These may + * contain values for only a prefix of the partition keys. + */ + Datum minkeys[PARTITION_MAX_KEYS]; + Datum maxkeys[PARTITION_MAX_KEYS]; + + /* + * Number of values stored in the corresponding array above + */ + int n_eqkeys; + int n_minkeys; + int n_maxkeys; + + /* + * Properties to mark if the clauses found for the corresponding partition + * are inclusive of the stored value or not. + */ + bool min_incl; + bool max_incl; + + /* + * Information about nullness of the partition keys, either specified + * explicitly in the query (in the form of a IS [NOT] NULL clause) or + * implied from strict clauses matching the partition key. + */ + Bitmapset *keyisnull; + Bitmapset *keyisnotnull; +} PartScanKeyInfo; + static int32 qsort_partition_hbound_cmp(const void *a, const void *b); static int32 qsort_partition_list_value_cmp(const void *a, const void *b, void *arg); @@ -211,6 +290,35 @@ static uint64 compute_hash_value(PartitionKey key, Datum *values, bool *isnull); /* SQL-callable function for use in hash partition CHECK constraints */ PG_FUNCTION_INFO_V1(satisfies_hash_partition); +static Bitmapset *get_partitions_excluded_by_ne_clauses( + PartitionPruneContext *context, + List *ne_clauses); +static Bitmapset *get_partitions_from_or_clause_args( + PartitionPruneContext *context, + List *or_clause_args); +static void extract_partition_key_clauses(PartitionKey partkey, List *clauses, + int rt_index, PartitionClauseInfo *partclauses); +static bool extract_bounding_datums(PartitionKey partkey, + PartitionPruneContext *context, + PartScanKeyInfo *keys); +static bool partition_cmp_args(PartitionKey key, int partkeyidx, + PartClause *pc, PartClause *leftarg, PartClause *rightarg, + bool *result); +static PartOpStrategy partition_op_strategy(PartitionKey key, PartClause *pc, + bool *incl); +static bool partkey_datum_from_expr(PartitionKey key, int partkeyidx, + Expr *expr, Datum *value); +static void remove_redundant_clauses(PartitionKey partkey, + PartitionPruneContext *context); +static Bitmapset *get_partitions_for_keys(Relation rel, + PartScanKeyInfo *keys); +static Bitmapset *get_partitions_for_keys_hash(Relation rel, + PartScanKeyInfo *keys); +static Bitmapset *get_partitions_for_keys_list(Relation rel, + PartScanKeyInfo *keys); +static Bitmapset *get_partitions_for_keys_range(Relation rel, + PartScanKeyInfo *keys); + /* * RelationBuildPartitionDesc * Form rel's partition descriptor @@ -1581,9 +1689,1972 @@ get_partition_qual_relid(Oid relid) return result; } +/* + * populate_partition_clauses + * Processes 'clauses' to try to match them to relation's partition + * keys. If any compatible clauses are found which match a partition + * key, then these clauses are stored in 'partclauseinfo'. + * + * The caller must ensure that 'clauses' is not an empty List. Upon return, + * callers must also check if the 'partclauseinfo' constfalse has been set, if + * so, then they must be aware that the 'partclauseinfo' may only be partially + * populated. + */ +void +populate_partition_clauses(Relation relation, + int rt_index, List *clauses, + PartitionClauseInfo *partclauseinfo) +{ + PartitionDesc partdesc; + PartitionKey partkey; + PartitionBoundInfo boundinfo; + + Assert(clauses != NIL); + + partkey = RelationGetPartitionKey(relation); + partdesc = RelationGetPartitionDesc(relation); + + /* Some functions called below modify this list */ + clauses = list_copy(clauses); + boundinfo = partdesc->boundinfo; + + /* + * For sub-partitioned tables there's a corner case where if the + * sub-partitioned table shares any partition keys with its parent, then + * it's possible that the partitioning hierarchy allows the parent + * partition to only contain a narrower range of values than the + * sub-partitioned table does. In this case it is possible that we'd + * include partitions that could not possibly have any tuples matching + * 'clauses'. The possibility of such a partition arrangement + * is perhaps unlikely for non-default partitions, but it may be more + * likely in the case of default partitions, so we'll add the parent + * partition table's partition qual to the clause list in this case only. + * This may result in the default partition being eliminated. + */ + if (partition_bound_has_default(boundinfo)) + { + List *partqual = RelationGetPartitionQual(relation); + + partqual = (List *) expression_planner((Expr *) partqual); + + /* Fix Vars to have the desired varno */ + if (rt_index != 1) + ChangeVarNodes((Node *) partqual, 1, rt_index, 0); + + clauses = list_concat(clauses, partqual); + } + + extract_partition_key_clauses(partkey, clauses, rt_index, partclauseinfo); +} + +/* + * get_partitions_from_clauses + * Determine all partitions of context->relation that could possibly + * contain a record that matches clauses as described in + * context->clauseinfo + * + * Returns a Bitmapset of the matching partition indexes, or NULL if none can + * match. + */ +Bitmapset * +get_partitions_from_clauses(PartitionPruneContext *context) +{ + PartitionClauseInfo *partclauseinfo = context->clauseinfo; + PartitionDesc partdesc; + PartScanKeyInfo keys; + Bitmapset *result; + ListCell *lc; + + Assert(partclauseinfo != NULL); + + /* + * Check if there were proofs that no partitions can match due to some + * clause items contradicting another. + */ + if (partclauseinfo->constfalse) + return NULL; + + partdesc = RelationGetPartitionDesc(context->relation); + + if (!partclauseinfo->foundkeyclauses) + { + /* No interesting clauses were found to eliminate partitions. */ + result = bms_add_range(NULL, 0, partdesc->nparts - 1); + } + else + { + PartitionKey partkey = RelationGetPartitionKey(context->relation); + + /* collapse clauses down to the most restrictive set */ + remove_redundant_clauses(partkey, context); + + /* Did remove_redundant_clauses find any contradicting clauses? */ + if (partclauseinfo->constfalse) + return NULL; + + if (extract_bounding_datums(partkey, context, &keys)) + { + result = get_partitions_for_keys(context->relation, &keys); + + /* + * No point in trying to look at other conjunctive clauses, if we + * got an empty set in the first place. + */ + if (bms_is_empty(result)) + return NULL; + } + else + { + /* + * Looks like we didn't have the values we'd need to eliminate + * partitions using get_partitions_for_keys, likely because + * context->clauseinfo only contained <> clauses and/or OR + * clauses, which are handled further below in this function. + */ + result = bms_add_range(NULL, 0, partdesc->nparts - 1); + } + } + + /* Select partitions by applying the clauses containing <> operators. */ + if (partclauseinfo->ne_clauses) + { + Bitmapset *ne_parts; + + ne_parts = get_partitions_excluded_by_ne_clauses(context, + partclauseinfo->ne_clauses); + + /* Remove any partitions we found to not be needed */ + result = bms_del_members(result, ne_parts); + bms_free(ne_parts); + } + + /* Select partitions by applying OR clauses. */ + foreach(lc, partclauseinfo->or_clauses) + { + List *or_args = (List *) lfirst(lc); + PartitionPruneContext orcontext; + Bitmapset *or_parts; + + orcontext.rt_index = context->rt_index; + orcontext.relation = context->relation; + orcontext.clauseinfo = NULL; + + or_parts = get_partitions_from_or_clause_args(&orcontext, or_args); + + /* + * Clauses in or_clauses are mutually conjunctive and also in + * in conjunction with the rest of the clauses above, so combine the + * partitions thus selected with those in result using set + * intersection. + */ + result = bms_int_members(result, or_parts); + bms_free(or_parts); + } + + return result; +} + /* Module-local functions */ /* + * get_partitions_excluded_by_ne_clauses + * + * Returns a Bitmapset of partition indexes of any partition that can safely + * be removed due to 'ne_clauses' containing not-equal clauses for all + * possible values that the partition can contain. + */ +static Bitmapset * +get_partitions_excluded_by_ne_clauses(PartitionPruneContext *context, + List *ne_clauses) +{ + ListCell *lc; + Bitmapset *excluded_parts; + Bitmapset *foundoffsets = NULL; + Relation relation = context->relation; + PartitionKey partkey = RelationGetPartitionKey(relation); + PartitionDesc partdesc = RelationGetPartitionDesc(relation); + PartitionBoundInfo boundinfo = partdesc->boundinfo; + PartitionBoundCmpArg arg; + int *datums_in_part; + int *datums_found; + int i; + + Assert(partkey->strategy == PARTITION_STRATEGY_LIST); + Assert(partkey->partnatts == 1); + + memset(&arg, 0, sizeof(arg)); + + /* + * Build a Bitmapset to record the indexes of all datums of the + * query that are found in boundinfo. + */ + foreach(lc, ne_clauses) + { + PartClause *pc = (PartClause *) lfirst(lc); + Datum datum; + + if (partkey_datum_from_expr(partkey, 0, pc->value, &datum)) + { + int offset; + bool is_equal; + + arg.datums = &datum; + arg.ndatums = 1; + offset = partition_bound_bsearch(partkey, boundinfo, &arg, + &is_equal); + + if (offset >= 0 && is_equal) + { + Assert(boundinfo->indexes[offset] >= 0); + foundoffsets = bms_add_member(foundoffsets, offset); + } + } + } + + /* No partitions can be excluded if none of the datums were found. */ + if (bms_is_empty(foundoffsets)) + return NULL; + + /* + * Since each list partition can permit multiple values, we must ensure + * that we got clauses for all those values before we can eliminate the + * the entire partition. + * + * We'll need two arrays for this, one to count the number of unique + * datums found in the query which belong to each partition, and another + * to record the number of datums permitted in each partition. Once we've + * counted all this, we can eliminate any partition where the number of + * datums found matches the number of datums allowed in the partition. + */ + datums_in_part = (int *) palloc0(sizeof(int) * partdesc->nparts); + datums_found = (int *) palloc0(sizeof(int) * partdesc->nparts); + + i = -1; + while ((i = bms_next_member(foundoffsets, i)) >= 0) + datums_found[boundinfo->indexes[i]]++; + + /* + * Now, in a single pass over all the datums, count the number of datums + * permitted in each partition. + */ + for (i = 0; i < boundinfo->ndatums; i++) + datums_in_part[boundinfo->indexes[i]]++; + + /* + * Now compare the counts and eliminate any partition for which we found + * clauses for all its permitted values. We must be careful here not to + * eliminate the default partition. We can recognize that by it having a + * zero value in both arrays. + */ + excluded_parts = NULL; + + for (i = 0; i < partdesc->nparts; i++) + { + if (datums_found[i] >= datums_in_part[i] && datums_found[i] > 0) + excluded_parts = bms_add_member(excluded_parts, i); + } + + /* + * Because the above clauses are strict, we can also exclude the NULL + * partition, provided it does not also allow non-NULL values. + */ + if (partition_bound_accepts_nulls(boundinfo) && + datums_in_part[boundinfo->null_index] == 0) + excluded_parts = bms_add_member(excluded_parts, + boundinfo->null_index); + + pfree(datums_in_part); + pfree(datums_found); + + return excluded_parts; +} + +/* + * get_partitions_from_or_clause_args + * + * Returns the set of partitions of relation, each of which satisfies some + * clause in or_clause_args. + */ +static Bitmapset * +get_partitions_from_or_clause_args(PartitionPruneContext *context, + List *or_clause_args) +{ + PartitionKey partkey = RelationGetPartitionKey(context->relation); + Bitmapset *result = NULL; + ListCell *lc; + + /* + * When matching an OR expression, it is only checked if at least one of + * its args matches the partition key, not all. For arguments that don't + * match, we cannot eliminate any of its partitions using + * get_partitions_from_clauses(). However, if the table is itself a + * partition, we may be able to prove using constraint exclusion that the + * clause refutes its partition constraint, that is, we can eliminate all + * of its partitions. + */ + foreach(lc, or_clause_args) + { + List *clauses = list_make1(lfirst(lc)); + PartitionClauseInfo partclauseinfo; + PartitionPruneContext subcontext; + Bitmapset *arg_partset; + + extract_partition_key_clauses(partkey, clauses, context->rt_index, + &partclauseinfo); + + if (!partclauseinfo.foundkeyclauses) + { + List *partconstr = RelationGetPartitionQual(context->relation); + PartitionDesc partdesc; + + if (partconstr) + { + partconstr = (List *) expression_planner((Expr *) partconstr); + if (context->rt_index != 1) + ChangeVarNodes((Node *) partconstr, 1, context->rt_index, + 0); + if (predicate_refuted_by(partconstr, clauses, false)) + continue; + } + + /* Couldn't eliminate any of the partitions. */ + partdesc = RelationGetPartitionDesc(context->relation); + return bms_add_range(NULL, 0, partdesc->nparts - 1); + } + + subcontext.rt_index = context->rt_index; + subcontext.relation = context->relation; + subcontext.clauseinfo = &partclauseinfo; + arg_partset = get_partitions_from_clauses(&subcontext); + + result = bms_add_members(result, arg_partset); + bms_free(arg_partset); + } + + return result; +} + +/* Match partition key (partattno/partexpr) to an expression (expr). */ +#define EXPR_MATCHES_PARTKEY(expr, partattno, partexpr) \ + ((partattno) != 0 ? \ + (IsA((expr), Var) && \ + ((Var *) (expr))->varattno == (partattno)) : \ + equal((expr), (partexpr))) + +#define COLLATION_MATCH(partcoll, exprcoll) \ + (!OidIsValid(partcoll) || (partcoll) == (exprcoll)) + +/* + * extract_partition_key_clauses + * Processes 'clauses' to extract clause matching the partition key. + * This adds matched clauses to the list corresponding to particular key + * in 'partclauseinfo'. Also collects other useful clauses to assist + * in partition elimination, such as OR clauses, clauses containing <> + * operator, and IS [NOT] NULL clauses + * + * We may also discover some contradiction in the clauses which means that no + * partition can possibly match. In this case, the function sets + * partclauseinfo's 'constfalse' to true and exits immediately without + * processing any further clauses. In this case, the caller must be careful + * not to assume the PartitionClauseInfo is fully populated with all clauses. + */ +static void +extract_partition_key_clauses(PartitionKey partkey, List *clauses, + int rt_index, + PartitionClauseInfo *partclauseinfo) +{ + int i; + ListCell *lc; + + memset(partclauseinfo, 0, sizeof(PartitionClauseInfo)); + + foreach(lc, clauses) + { + Expr *clause = (Expr *) lfirst(lc); + ListCell *partexprs_item; + + if (IsA(clause, RestrictInfo)) + { + RestrictInfo *rinfo = (RestrictInfo *) clause; + + clause = rinfo->clause; + if (rinfo->pseudoconstant && + !DatumGetBool(((Const *) clause)->constvalue)) + { + partclauseinfo->constfalse = true; + return; + } + } + + /* Get the BoolExpr's out of the way.*/ + if (IsA(clause, BoolExpr)) + { + if (or_clause((Node *) clause)) + { + partclauseinfo->or_clauses = + lappend(partclauseinfo->or_clauses, + ((BoolExpr *) clause)->args); + continue; + } + else if (and_clause((Node *) clause)) + { + clauses = list_concat(clauses, + list_copy(((BoolExpr *) clause)->args)); + continue; + } + /* Fall-through for a NOT clause, which is handled below. */ + } + + partexprs_item = list_head(partkey->partexprs); + for (i = 0; i < partkey->partnatts; i++) + { + PartClause *pc; + Oid partopfamily = partkey->partopfamily[i]; + Oid partcoll = partkey->partcollation[i]; + Oid commutator = InvalidOid; + AttrNumber partattno = partkey->partattrs[i]; + Expr *partexpr = NULL; + + /* + * A zero attno means the partition key is an expression, so grab + * the next expression from the list. + */ + if (partattno == 0) + { + if (partexprs_item == NULL) + elog(ERROR, "wrong number of partition key expressions"); + + partexpr = (Expr *) lfirst(partexprs_item); + + /* + * Expressions stored for the PartitionKey in the relcache are + * all stored with the dummy varno of 1. Change that to what + * we need. + */ + if (rt_index != 1) + { + /* make a copy so as not to overwrite the relcache */ + partexpr = (Expr *) copyObject(partexpr); + ChangeVarNodes((Node *) partexpr, 1, rt_index, 0); + } + + partexprs_item = lnext(partexprs_item); + } + + if (IsA(clause, OpExpr)) + { + OpExpr *opclause = (OpExpr *) clause; + Expr *leftop, + *rightop, + *valueexpr; + bool is_ne_listp = false; + + leftop = (Expr *) get_leftop(clause); + if (IsA(leftop, RelabelType)) + leftop = ((RelabelType *) leftop)->arg; + rightop = (Expr *) get_rightop(clause); + if (IsA(rightop, RelabelType)) + rightop = ((RelabelType *) rightop)->arg; + + /* check if the clause matches this partition key */ + if (EXPR_MATCHES_PARTKEY(leftop, partattno, partexpr)) + valueexpr = rightop; + else if (EXPR_MATCHES_PARTKEY(rightop, partattno, partexpr)) + { + valueexpr = leftop; + + commutator = get_commutator(opclause->opno); + + /* nothing we can do unless we can swap the operands */ + if (!OidIsValid(commutator)) + continue; + } + else + /* Clause does not match this partition key. */ + continue; + + /* + * Also, useless, if the clause's collation is different from + * the partitioning collation. + */ + if (!COLLATION_MATCH(partcoll, opclause->inputcollid)) + continue; + + /* + * Only allow strict operators. This will guarantee nulls are + * filtered. + */ + if (!op_strict(opclause->opno)) + continue; + + /* We can't use any volatile value to prune partitions. */ + if (contain_volatile_functions((Node *) valueexpr)) + continue; + + /* + * Handle cases where the clause's operator does not belong to + * the partitioning operator family. We currently handle two + * such cases: 1. Operators named '<>' are not listed in any + * operator family whatsoever, 2. Ordering operators like '<' + * are not listed in the hash operator families. For 1, check + * if list partitioning is in use and if so, proceed to pass + * the clause to the caller without doing any more processing + * ourselves. 2 cannot be handled at all, so the clause is + * simply skipped. + */ + if (!op_in_opfamily(opclause->opno, partopfamily)) + { + Oid negator; + + /* + * To confirm if the operator is really '<>', check if its + * negator is a equality operator. If it's a btree + * equality operator *and* this is a list partitioned + * table, we can use it prune partitions. + */ + negator = get_negator(opclause->opno); + if (OidIsValid(negator) && + op_in_opfamily(negator, partopfamily)) + { + Oid lefttype; + Oid righttype; + int strategy; + + get_op_opfamily_properties(negator, partopfamily, + false, + &strategy, + &lefttype, &righttype); + + if (strategy == BTEqualStrategyNumber && + partkey->strategy == PARTITION_STRATEGY_LIST) + is_ne_listp = true; + } + + /* Cannot handle this clause. */ + if (!is_ne_listp) + continue; + } + + pc = (PartClause *) palloc0(sizeof(PartClause)); + pc->opno = OidIsValid(commutator) ? commutator : opclause->opno; + pc->inputcollid = opclause->inputcollid; + pc->value = valueexpr; + + /* + * We don't turn a <> operator clause into a key right away. + * Instead, the caller will hand over such clauses to + * get_partitions_excluded_by_ne_clauses(). + */ + if (is_ne_listp) + partclauseinfo->ne_clauses = + lappend(partclauseinfo->ne_clauses, + pc); + else + partclauseinfo->keyclauses[i] = + lappend(partclauseinfo->keyclauses[i], + pc); + + /* + * Since we only allow strict operators, check for any + * contradicting IS NULLs. + */ + if (bms_is_member(i, partclauseinfo->keyisnull)) + { + partclauseinfo->constfalse = true; + return; + } + /* Record that a strict clause has been seen for this key */ + partclauseinfo->keyisnotnull = + bms_add_member(partclauseinfo->keyisnotnull, + i); + partclauseinfo->foundkeyclauses = true; + } + else if (IsA(clause, ScalarArrayOpExpr)) + { + ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause; + Oid saop_op = saop->opno; + Oid saop_coll = saop->inputcollid; + Expr *leftop = (Expr *) linitial(saop->args), + *rightop = (Expr *) lsecond(saop->args); + List *elem_exprs, + *elem_clauses; + ListCell *lc1; + + if (IsA(leftop, RelabelType)) + leftop = ((RelabelType *) leftop)->arg; + + /* Check it matches this partition key */ + if (!EXPR_MATCHES_PARTKEY(leftop, partattno, partexpr)) + continue; + + /* + * Also, useless, if the clause's collation is different from + * the partitioning collation. + */ + if (!COLLATION_MATCH(partcoll, saop->inputcollid)) + continue; + + /* + * Only allow strict operators. This will guarantee null are + * filtered. + */ + if (!op_strict(saop->opno)) + continue; + + /* Useless if the array has any volatile functions. */ + if (contain_volatile_functions((Node *) rightop)) + continue; + + /* + * In case of NOT IN (..), we get a '<>', which while not + * listed as part of any operator family, we are able to + * handle it if its negator is indeed a part of the + * partitioning equality operator. + */ + if (!op_in_opfamily(saop_op, partopfamily)) + { + Oid negator = get_negator(saop_op); + int strategy; + Oid lefttype, + righttype; + + if (!OidIsValid(negator)) + continue; + get_op_opfamily_properties(negator, partopfamily, false, + &strategy, + &lefttype, &righttype); + if (strategy != BTEqualStrategyNumber) + continue; + } + + /* + * First generate a list of Const nodes, one for each array + * element. + */ + elem_exprs = NIL; + if (IsA(rightop, Const)) + { + Const *arr = (Const *) lsecond(saop->args); + ArrayType *arrval = DatumGetArrayTypeP(arr->constvalue); + int16 elemlen; + bool elembyval; + char elemalign; + Datum *elem_values; + bool *elem_nulls; + int num_elems; + + get_typlenbyvalalign(ARR_ELEMTYPE(arrval), + &elemlen, &elembyval, &elemalign); + deconstruct_array(arrval, + ARR_ELEMTYPE(arrval), + elemlen, elembyval, elemalign, + &elem_values, &elem_nulls, + &num_elems); + for (i = 0; i < num_elems; i++) + { + /* Only consider non-null values. */ + if (!elem_nulls[i]) + { + Const *elem_expr = makeConst(ARR_ELEMTYPE(arrval), + -1, arr->constcollid, + elemlen, + elem_values[i], + false, elembyval); + + elem_exprs = lappend(elem_exprs, elem_expr); + } + } + } + else + { + ArrayExpr *arrexpr = castNode(ArrayExpr, rightop); + + /* + * For a nested ArrayExpr, we don't know how to get the + * actual scalar values out into a flat list, so we give + * up doing anything with this ScalarArrayOpExpr. + */ + if (arrexpr->multidims) + continue; + + elem_exprs = arrexpr->elements; + } + + /* + * Now generate a list of clauses, one for each array element, + * of the form: saop_leftop saop_op elem_expr + */ + elem_clauses = NIL; + foreach(lc1, elem_exprs) + { + Expr *rightop = (Expr *) lfirst(lc1), + *elem_clause; + + elem_clause = (Expr *) make_opclause(saop_op, BOOLOID, + false, + leftop, rightop, + InvalidOid, + saop_coll); + elem_clauses = lappend(elem_clauses, elem_clause); + } + + /* + * Build the OR clause if needed or add the clauses to the end + * of the list that's being processed currently. + */ + if (saop->useOr && list_length(elem_clauses) > 1) + partclauseinfo->or_clauses = + lappend(partclauseinfo->or_clauses, + elem_clauses); + else + clauses = list_concat(clauses, elem_clauses); + partclauseinfo->foundkeyclauses = true; + } + else if (IsA(clause, NullTest)) + { + NullTest *nulltest = (NullTest *) clause; + Expr *arg = nulltest->arg; + + if (IsA(arg, RelabelType)) + arg = ((RelabelType *) arg)->arg; + + /* Does leftop match with this partition key column? */ + if (EXPR_MATCHES_PARTKEY(arg, partattno, partexpr)) + { + if (nulltest->nulltesttype == IS_NULL) + { + /* check for conflicting IS NOT NULLs */ + if (bms_is_member(i, partclauseinfo->keyisnotnull)) + { + partclauseinfo->constfalse = true; + return; + } + partclauseinfo->keyisnull = + bms_add_member(partclauseinfo->keyisnull, + i); + } + else + { + /* check for conflicting IS NULLs */ + if (bms_is_member(i, partclauseinfo->keyisnull)) + { + partclauseinfo->constfalse = true; + return; + } + + partclauseinfo->keyisnotnull = + bms_add_member(partclauseinfo->keyisnotnull, + i); + } + partclauseinfo->foundkeyclauses = true; + } + } + /* + * Boolean clauses have a special shape, which would've been + * accepted if the partitioning opfamily accepts Boolean + * conditions. + */ + else if (IsBooleanOpfamily(partopfamily) && + (IsA(clause, BooleanTest) || + IsA(clause, Var) || + not_clause((Node *) clause))) + { + Expr *leftop, + *rightop; + + if (IsA(clause, BooleanTest)) + { + BooleanTest *btest = (BooleanTest *) clause; + + /* Only IS [NOT] TRUE/FALSE are any good to us */ + if (btest->booltesttype == IS_UNKNOWN || + btest->booltesttype == IS_NOT_UNKNOWN) + continue; + + leftop = btest->arg; + if (IsA(leftop, RelabelType)) + leftop = ((RelabelType *) leftop)->arg; + + /* Clause does not match this partition key. */ + if (!EXPR_MATCHES_PARTKEY(leftop, partattno, partexpr)) + continue; + + rightop = (btest->booltesttype == IS_TRUE || + btest->booltesttype == IS_NOT_FALSE) + ? (Expr *) makeBoolConst(true, false) + : (Expr *) makeBoolConst(false, false); + } + else + { + leftop = IsA(clause, Var) + ? (Expr *) clause + : (Expr *) get_notclausearg((Expr *) clause); + if (IsA(leftop, RelabelType)) + leftop = ((RelabelType *) leftop)->arg; + + /* Clause does not match this partition key. */ + if (!EXPR_MATCHES_PARTKEY(leftop, partattno, partexpr)) + continue; + + rightop = IsA(clause, Var) + ? (Expr *) makeBoolConst(true, false) + : (Expr *) makeBoolConst(false, false); + } + + pc = (PartClause *) palloc0(sizeof(PartClause)); + pc->opno = BooleanEqualOperator; + pc->inputcollid = InvalidOid; + pc->value = rightop; + + partclauseinfo->keyclauses[i] = + lappend(partclauseinfo->keyclauses[i], + pc); + partclauseinfo->foundkeyclauses = true; + } + } + } +} + +/* + * extract_bounding_datums + * Process clauses in context->clauseinfo and populate 'keys' with all + * min/max/equal values that we're able to determine. + * + * For RANGE partitioning we do not need to match and find values for all + * partition keys. We may be able to eliminate some partitions with just a + * prefix of the partition keys. HASH partitioning does require all keys are + * matched to with at least some combinations of equality clauses and IS NULL + * clauses. LIST partitions don't support multiple partition keys. + * + * Returns true if at least one key was found; false otherwise. + */ +static bool +extract_bounding_datums(PartitionKey partkey, PartitionPruneContext *context, + PartScanKeyInfo *keys) +{ + PartitionClauseInfo *clauseinfo = context->clauseinfo; + bool need_next_eq, + need_next_min, + need_next_max; + int i; + ListCell *lc; + + /* + * Based on the strategies of the clauses' operators (=, </<=, >/>=), try + * to construct a tuple of those datums that serve as the exact lookup + * tuple or two tuples that serve as minimum and maximum bound. + * + * If we find datums for all partition key columns that appear in = + * operator clauses, then we have the exact match lookup tuple, which will + * be used to match just one partition (although that's required only for + * range partitioning, finding datums for just some columns is fine for + * hash partitioning). + * + * If the last datum in a tuple comes from a clause containing </<= or + * >/>= operator, then that constitutes the minimum or maximum bound tuple, + * respectively. There is one exception -- if we have a tuple containing + * values for only a prefix of partition key columns, where none of its + * values come from a </<= or >/>= operator clause, we still consider such + * tuple as both minimum and maximum bound tuple. + */ + need_next_eq = true; + need_next_min = true; + need_next_max = true; + memset(keys, 0, sizeof(PartScanKeyInfo)); + for (i = 0; i < partkey->partnatts; i++) + { + List *clauselist = clauseinfo->keyclauses[i]; + + /* + * Min and max keys must constitute a prefix of the partition key and + * must appear in the same order as partition keys. Equal keys have + * to satisfy that requirement only for non-hash partitioning. + */ + if (i > keys->n_eqkeys && + partkey->strategy != PARTITION_STRATEGY_HASH) + need_next_eq = false; + + if (i > keys->n_minkeys) + need_next_min = false; + + if (i > keys->n_maxkeys) + need_next_max = false; + + foreach(lc, clauselist) + { + PartClause *clause = (PartClause *) lfirst(lc); + Expr *value = clause->value; + bool incl; + PartOpStrategy op_strategy; + + op_strategy = partition_op_strategy(partkey, clause, &incl); + switch (op_strategy) + { + case PART_OP_EQUAL: + Assert(incl); + if (need_next_eq && + partkey_datum_from_expr(partkey, i, value, + &keys->eqkeys[i])) + keys->n_eqkeys++; + + if (need_next_max && + partkey_datum_from_expr(partkey, i, value, + &keys->maxkeys[i])) + { + keys->n_maxkeys++; + keys->max_incl = true; + } + + if (need_next_min && + partkey_datum_from_expr(partkey, i, value, + &keys->minkeys[i])) + { + keys->n_minkeys++; + keys->min_incl = true; + } + break; + + case PART_OP_LESS: + if (need_next_max && + partkey_datum_from_expr(partkey, i, value, + &keys->maxkeys[i])) + { + keys->n_maxkeys++; + keys->max_incl = incl; + if (!incl) + need_next_eq = need_next_max = false; + } + break; + + case PART_OP_GREATER: + if (need_next_min && + partkey_datum_from_expr(partkey, i, value, + &keys->minkeys[i])) + { + keys->n_minkeys++; + keys->min_incl = incl; + if (!incl) + need_next_eq = need_next_min = false; + } + break; + + default: + Assert(false); + } + } + } + + /* + * To set eqkeys, we must have found matching clauses containing = + * operator for all partition key columns and if present we don't need + * the values in minkeys and maxkeys anymore. In the case hash + * partitioning, we don't require all of eqkeys to be operator clausses. + * In that case, any IS NULL clauses involving partition key columns are + * also considered as equality keys by the code for hash partition pruning, + * which checks that all partition columns are covered before actually + * performing the pruning. + */ + if (keys->n_eqkeys == partkey->partnatts || + partkey->strategy == PARTITION_STRATEGY_HASH) + keys->n_minkeys = keys->n_maxkeys = 0; + else + keys->n_eqkeys = 0; + + /* Finally, also set the keyisnull and keyisnotnull values. */ + keys->keyisnull = clauseinfo->keyisnull; + keys->keyisnotnull = clauseinfo->keyisnotnull; + + return (keys->n_eqkeys > 0 || keys->n_minkeys > 0 || + keys->n_maxkeys > 0 || !bms_is_empty(keys->keyisnull) || + !bms_is_empty(keys->keyisnotnull)); +} + +/* + * partition_op_strategy + * Returns whether the clause in 'pc' contains an =, </<=, or >/>= + * operator and set *incl to true if the operator's strategy is + * inclusive. + */ +static PartOpStrategy +partition_op_strategy(PartitionKey key, PartClause *pc, bool *incl) +{ + *incl = false; /* may be overwritten below */ + + switch (key->strategy) + { + /* Hash partitioning allows only hash equality. */ + case PARTITION_STRATEGY_HASH: + if (pc->op_strategy == HTEqualStrategyNumber) + { + *incl = true; + return PART_OP_EQUAL; + } + elog(ERROR, "unexpected operator strategy number: %d", + pc->op_strategy); + + /* List and range partitioning support all btree operators. */ + case PARTITION_STRATEGY_LIST: + case PARTITION_STRATEGY_RANGE: + switch (pc->op_strategy) + { + case BTLessEqualStrategyNumber: + *incl = true; + /* fall through */ + + case BTLessStrategyNumber: + return PART_OP_LESS; + + case BTEqualStrategyNumber: + *incl = true; + return PART_OP_EQUAL; + + case BTGreaterEqualStrategyNumber: + *incl = true; + /* fall through */ + + case BTGreaterStrategyNumber: + return PART_OP_GREATER; + } + + default: + elog(ERROR, "unexpected partition strategy: %d", + (int) key->strategy); + } + + return PART_OP_EQUAL; /* keep compiler quiet */ +} + +/* + * partkey_datum_from_expr + * Set *value to the constant value obtained by evaluating 'expr' + * + * Note that we may not be able to evaluate the input expression, in which + * case, the function returns false to indicate that *value has not been + * set. True is returned otherwise. + */ +static bool +partkey_datum_from_expr(PartitionKey key, int partkeyidx, + Expr *expr, Datum *value) +{ + Oid exprtype = exprType((Node *) expr); + + if (exprtype != key->parttypid[partkeyidx]) + { + ParseState *pstate = make_parsestate(NULL); + + expr = (Expr *) coerce_to_target_type(pstate, (Node *) expr, + exprtype, + key->parttypid[partkeyidx], -1, + COERCION_EXPLICIT, + COERCE_IMPLICIT_CAST, -1); + free_parsestate(pstate); + + /* + * If we couldn't coerce to the partition key's type, that is, the + * type of the datums stored in PartitionBoundInfo for this partition + * key, there's no hope of using this expression for anything + * partitioning-related. + */ + if (expr == NULL) + return false; + + /* + * Transform into a form that the following code can do something + * useful with. + */ + expr = evaluate_expr(expr, + exprType((Node *) expr), + exprTypmod((Node *) expr), + exprCollation((Node *) expr)); + } + + /* + * Add more expression types here as needed to support higher-level + * code. + */ + if (IsA(expr, Const)) + { + *value = ((Const *) expr)->constvalue; + return true; + } + + return false; +} + +/* + * remove_redundant_clauses + * Processes the clauses contained in context->clauseinfo to remove the + * ones that are superseeded by other clauses which are more restrictive. + * + * For example, x > 1 AND x > 2 and x >= 5, the latter is the most + * restrictive, so 5 is the best minimum bound for x. + * + * We also look for clauses which contradict one another in a way that proves + * that the clauses cannot possibly match any partition. Impossible clauses + * include things like: x = 1 AND x = 2, x > 0 and x < 10. The function + * returns right after finding such a clause and before returning, sets a field + * in context->clauseinfo to inform the caller that we found such clause. + */ +static void +remove_redundant_clauses(PartitionKey partkey, + PartitionPruneContext *context) +{ + PartClause *hash_clause, + *btree_clauses[BTMaxStrategyNumber]; + PartitionClauseInfo *partclauseinfo = context->clauseinfo; + ListCell *lc; + int s; + int i; + bool test_result; + List *newlist; + + for (i = 0; i < partkey->partnatts; i++) + { + List *keyclauses = partclauseinfo->keyclauses[i]; + + hash_clause = NULL; + newlist = NIL; + + memset(btree_clauses, 0, sizeof(btree_clauses)); + + foreach(lc, keyclauses) + { + PartClause *pc = (PartClause *) lfirst(lc); + + if (!pc->valid_cache) + { + Oid lefttype; + + get_op_opfamily_properties(pc->opno, + partkey->partopfamily[i], + false, + &pc->op_strategy, + &lefttype, + &pc->op_subtype); + fmgr_info(get_opcode(pc->opno), &pc->op_func); + pc->valid_cache = true; + } + + /* + * Hash-partitioning knows only about equality. So, if we've + * matched a clause and found another clause whose constant + * operand doesn't match the constant operand of the former, then + * we have found mutually contradictory clauses. + */ + if (partkey->strategy == PARTITION_STRATEGY_HASH) + { + if (hash_clause == NULL) + hash_clause = pc; + /* check if another clause would contradict the one we have */ + else if (partition_cmp_args(partkey, i, + pc, pc, hash_clause, + &test_result)) + { + if (!test_result) + { + partclauseinfo->constfalse = true; + return; + } + } + /* + * Couldn't compare; keep hash_clause set to the previous value, + * and add this one directly to the result. Caller would + * arbitrarily choose one of the many and perform + * partition-pruning with it. + */ + else + newlist = lappend(newlist, pc); + + /* + * The code below handles btree operators, so not relevant for + * hash partitioning. + */ + continue; + } + + /* + * The code that follows closely mimics similar processing done by + * nbtutils.c: _bt_preprocess_keys(). + * + * btree_clauses[s] points currently best clause containing the + * operator strategy type s+1; it is NULL if we haven't yet found + * such a clause. + */ + s = pc->op_strategy - 1; + if (btree_clauses[s] == NULL) + { + btree_clauses[s] = pc; + } + else + { + /* + * Is this one more restrictive than what we already have? + * + * Consider some examples: 1. If btree_clauses[BTLT] now contains + * a < 5, and cur is a < 3, then because 3 < 5 is true, a < 5 + * currently at btree_clauses[BTLT] will be replaced by a < 3. + * + * 2. If btree_clauses[BTEQ] now contains a = 5 and cur is a = 7, + * then because 5 = 7 is false, we found a mutual contradiction, + * so we set *constfalse to true and return. + * + * 3. If btree_clauses[BTLT] now contains a < 5 and cur is a < 7, + * then because 7 < 5 is false, we leave a < 5 where it is and + * effectively discard a < 7 as being redundant. + */ + if (partition_cmp_args(partkey, i, + pc, pc, btree_clauses[s], + &test_result)) + { + /* cur is more restrictive, so replace the existing. */ + if (test_result) + btree_clauses[s] = pc; + else if (s == BTEqualStrategyNumber - 1) + { + partclauseinfo->constfalse = true; + return; + } + + /* Old one is more restrictive, so keep around. */ + } + else + { + /* + * We couldn't determine which one is more restrictive. Keep + * the previous one in btree_clauses[s] and push this one directly + * to the output list. + */ + newlist = lappend(newlist, pc); + } + } + } + + if (partkey->strategy == PARTITION_STRATEGY_HASH) + { + /* Note we didn't add this one to the result yet. */ + if (hash_clause) + newlist = lappend(newlist, hash_clause); + list_free(partclauseinfo->keyclauses[i]); + partclauseinfo->keyclauses[i] = newlist; + continue; + } + + /* Compare btree operator clauses across strategies. */ + + /* Compare the equality clause with clauses of other strategies. */ + if (btree_clauses[BTEqualStrategyNumber - 1]) + { + PartClause *eq = btree_clauses[BTEqualStrategyNumber - 1]; + + for (s = 0; s < BTMaxStrategyNumber; s++) + { + PartClause *chk = btree_clauses[s]; + + if (!chk || s == (BTEqualStrategyNumber - 1)) + continue; + + /* + * Suppose btree_clauses[BTLT] contained a < 5 and the eq clause + * is a = 5, then because 5 < 5 is false, we found contradiction. + * That is, a < 5 and a = 5 are mutually contradictory. OTOH, if + * eq clause is a = 3, then because 3 < 5, we no longer need + * a < 5, because a = 3 is more restrictive. + */ + if (partition_cmp_args(partkey, i, + chk, eq, chk, + &test_result)) + { + if (!test_result) + { + partclauseinfo->constfalse = true; + return; + } + /* Discard the no longer needed clause. */ + btree_clauses[s] = NULL; + } + } + } + + /* + * Try to keep only one of <, <=. + * + * Suppose btree_clauses[BTLT] contains a < 3 and btree_clauses[BTLE] + * contains a <= 3 (or a <= 4), then because 3 <= 3 (or 3 <= 4) is true, + * we discard the a <= 3 (or a <= 4) as redundant. If the latter contains + * contains a <= 2, then because 3 <= 2 is false, we dicard a < 3 as + * redundant. + */ + if (btree_clauses[BTLessStrategyNumber - 1] && + btree_clauses[BTLessEqualStrategyNumber - 1]) + { + PartClause *lt = btree_clauses[BTLessStrategyNumber - 1], + *le = btree_clauses[BTLessEqualStrategyNumber - 1]; + + if (partition_cmp_args(partkey, i, + le, lt, le, + &test_result)) + { + if (test_result) + btree_clauses[BTLessEqualStrategyNumber - 1] = NULL; + else + btree_clauses[BTLessStrategyNumber - 1] = NULL; + } + } + + /* Try to keep only one of >, >=. See the example above. */ + if (btree_clauses[BTGreaterStrategyNumber - 1] && + btree_clauses[BTGreaterEqualStrategyNumber - 1]) + { + PartClause *gt = btree_clauses[BTGreaterStrategyNumber - 1], + *ge = btree_clauses[BTGreaterEqualStrategyNumber - 1]; + + if (partition_cmp_args(partkey, i, + ge, gt, ge, + &test_result)) + { + if (test_result) + btree_clauses[BTGreaterEqualStrategyNumber - 1] = NULL; + else + btree_clauses[BTGreaterStrategyNumber - 1] = NULL; + } + } + + /* + * btree_clauses now contains the "best" clause or NULL for each btree + * strategy number. Add to the newlist. + */ + for (s = 0; s < BTMaxStrategyNumber; s++) + { + if (btree_clauses[s]) + newlist = lappend(newlist, btree_clauses[s]); + } + + /* + * Replace the old List with the new one with the redundant clauses + * removed. + */ + list_free(partclauseinfo->keyclauses[i]); + partclauseinfo->keyclauses[i] = newlist; + } +} + +/* + * partition_cmp_args + * Try to compare the constant arguments of 'leftarg' and 'rightarg', in + * that order, using the operator of 'op' and set *result to the result + * of this comparison. + * + * Returns true if we could actually perform the comparison; otherwise false. + * + * Note: We may not be able to perform the comparison if operand values are + * unknown in this context or if the type of any of the operands are + * incompatible with the operator. + */ +static bool +partition_cmp_args(PartitionKey key, int partkeyidx, + PartClause *pc, PartClause *leftarg, PartClause *rightarg, + bool *result) +{ + Datum left_value; + Datum right_value; + + Assert(pc->valid_cache && leftarg->valid_cache && rightarg->valid_cache); + + /* + * Try to extract an actual value from each arg. This may fail if the + * value is unknown in this context, in which case we cannot compare. + */ + if (!partkey_datum_from_expr(key, partkeyidx, + leftarg->value, &left_value)) + return false; + + if (!partkey_datum_from_expr(key, partkeyidx, + rightarg->value, &right_value)) + return false; + + /* + * We can compare left_value and right_value using op's operator + * only if both are of the expected type. + */ + if (leftarg->op_subtype == pc->op_subtype && + rightarg->op_subtype == pc->op_subtype) + { + *result = DatumGetBool(FunctionCall2Coll(&pc->op_func, + pc->inputcollid, + left_value, + right_value)); + return true; + } + else + { + Oid partopfamily = key->partopfamily[partkeyidx]; + Oid cmp_op; + + /* Otherwise, look one up in the partitioning operator family. */ + cmp_op = get_opfamily_member(partopfamily, + leftarg->op_subtype, + rightarg->op_subtype, + pc->op_strategy); + if (OidIsValid(cmp_op)) + { + *result = DatumGetBool(OidFunctionCall2Coll(get_opcode(cmp_op), + pc->inputcollid, + left_value, + right_value)); + return true; + } + } + + /* Couldn't do the comparison. */ + *result = false; + return false; +} + +/* + * get_partitions_for_keys + * Returns the partitions of 'rel' that will need to be scanned for the + * given look up keys + * + * Input: + * See the comments above the definition of PartScanKeyInfo to see what + * kind of information is contained in 'keys'. + * + * Outputs: + * Bitmapset containing indexes of the selceted partitions + */ +static Bitmapset * +get_partitions_for_keys(Relation rel, PartScanKeyInfo *keys) +{ + /* Return an empty set if no partitions to see. */ + if (RelationGetPartitionDesc(rel)->nparts == 0) + return NULL; + + switch (RelationGetPartitionKey(rel)->strategy) + { + case PARTITION_STRATEGY_HASH: + return get_partitions_for_keys_hash(rel, keys); + + case PARTITION_STRATEGY_LIST: + return get_partitions_for_keys_list(rel, keys); + + case PARTITION_STRATEGY_RANGE: + return get_partitions_for_keys_range(rel, keys); + + default: + elog(ERROR, "unexpected partition strategy: %d", + RelationGetPartitionKey(rel)->strategy); + } + + return NULL; /* keep compiler quiet */ +} + +/* + * get_partitions_for_keys_hash + * Return partitions of a hash partitioned table for requested + * keys + * + * This interprets the keys and looks up partitions in the partition bound + * descriptor using the hash partitioning semantics. + */ +static Bitmapset * +get_partitions_for_keys_hash(Relation rel, PartScanKeyInfo *keys) +{ + PartitionKey partkey = RelationGetPartitionKey(rel); + PartitionDesc partdesc = RelationGetPartitionDesc(rel); + PartitionBoundInfo boundinfo = partdesc->boundinfo; + bool keyisnull[PARTITION_MAX_KEYS]; + int i; + + Assert(partdesc->nparts > 0); + + /* + * Since tuples with NULL values in the partition key columns are stored + * in regular partitions, we'll treat any IS NULL clauses here as regular + * equality clauses. + */ + memset(keyisnull, false, sizeof(keyisnull)); + for (i = 0; i < partkey->partnatts; i++) + { + if (bms_is_member(i, keys->keyisnull)) + { + keys->n_eqkeys++; + keyisnull[i] = true; + } + } + + /* + * Can only do pruning if we know all the keys and they're all equality + * keys including the nulls that we just counted above. + */ + if (keys->n_eqkeys == partkey->partnatts) + { + uint64 rowHash; + int greatest_modulus = get_greatest_modulus(boundinfo), + result_index; + + rowHash = compute_hash_value(partkey, keys->eqkeys, keyisnull); + result_index = boundinfo->indexes[rowHash % greatest_modulus]; + if (result_index >= 0) + return bms_make_singleton(result_index); + } + else + /* Can't do pruning otherwise, so return all partitions. */ + return bms_add_range(NULL, 0, partdesc->nparts - 1); + + return NULL; +} + +/* + * get_partitions_for_keys_list + * Return partitions of a list partitioned table for requested keys + * + * This interprets the keys and looks up partitions in the partition bound + * descriptor using the list partitioning semantics. + */ +static Bitmapset * +get_partitions_for_keys_list(Relation rel, PartScanKeyInfo *keys) +{ + Bitmapset *result = NULL; + PartitionKey partkey = RelationGetPartitionKey(rel); + PartitionBoundInfo boundinfo = RelationGetPartitionDesc(rel)->boundinfo; + PartitionBoundCmpArg arg; + int i, + eqoff, + minoff, + maxoff; + bool is_equal; + + Assert(RelationGetPartitionDesc(rel)->nparts > 0); + Assert(partkey->partnatts == 1); + + /* + * If the query is looking for null keys, there can only be one such + * partition. Return the same if one exists. + */ + if (!bms_is_empty(keys->keyisnull)) + { + /* + * NULLs may only exist in the NULL partition, or in the + * default, if there's no NULL partition. + */ + if (partition_bound_accepts_nulls(boundinfo)) + return bms_make_singleton(boundinfo->null_index); + else if (partition_bound_has_default(boundinfo)) + return bms_make_singleton(boundinfo->default_index); + else + return NULL; + } + + /* + * If there are no datums to compare keys with, but there are partitions, + * just return the default partition if one exists. + */ + if (boundinfo->ndatums == 0) + { + if (partition_bound_has_default(boundinfo)) + return bms_make_singleton(boundinfo->default_index); + else + return NULL; /* shouldn't happen */ + } + + /* Equality key. */ + if (keys->n_eqkeys > 0) + { + memset(&arg, 0, sizeof(PartitionBoundCmpArg)); + arg.datums = keys->eqkeys; + arg.ndatums = keys->n_eqkeys; + eqoff = partition_bound_bsearch(partkey, boundinfo, &arg, &is_equal); + + if (eqoff >= 0 && is_equal) + { + /* Exactly matching datum exists. */ + Assert(boundinfo->indexes[eqoff] >= 0); + return bms_make_singleton(boundinfo->indexes[eqoff]); + } + else if (partition_bound_has_default(boundinfo)) + return bms_make_singleton(boundinfo->default_index); + else + return NULL; + } + + /* + * Find the left-most bound that satisfies the query, i.e., the one that + * satisfies minkeys. + */ + minoff = 0; + if (keys->n_minkeys > 0) + { + memset(&arg, 0, sizeof(PartitionBoundCmpArg)); + arg.datums = keys->minkeys; + arg.ndatums = keys->n_minkeys; + minoff = partition_bound_bsearch(partkey, boundinfo, &arg, &is_equal); + + if (minoff >= 0) + { + /* + * The bound at minoff is <= minkeys, given the way + * partition_bound_bsearch() works. If it's not equal (<), then + * increment minoff to make it point to the datum on the right + * that necessarily satisfies minkeys. Also do the same if it is + * equal but minkeys is exclusive. + */ + if (!is_equal || !keys->min_incl) + minoff++; + } + else + { + /* + * minoff set to -1 means all datums are greater than minkeys, + * which means all partitions satisfy minkeys. In that case, set + * minoff to the index of the leftmost datum, viz. 0. + */ + minoff = 0; + } + + /* + * minkeys is greater than the datums of all non-default partitions, + * meaning there isn't one to return. Return the default partition if + * one exists. + */ + if (minoff > boundinfo->ndatums - 1) + return partition_bound_has_default(boundinfo) + ? bms_make_singleton(boundinfo->default_index) + : NULL; + } + + /* + * Find the right-most bound that satisfies the query, i.e., the one that + * satisfies maxkeys. + */ + maxoff = boundinfo->ndatums - 1; + if (keys->n_maxkeys > 0) + { + memset(&arg, 0, sizeof(PartitionBoundCmpArg)); + arg.datums = keys->maxkeys; + arg.ndatums = keys->n_maxkeys; + maxoff = partition_bound_bsearch(partkey, boundinfo, &arg, &is_equal); + + if (maxoff >= 0) + { + /* + * The bound at maxoff is <= maxkeys, given the way + * partition_bound_bsearch works. If the bound at maxoff exactly + * matches maxkey (is_equal), but the maxkey is exclusive, then + * decrement maxoff to point to the bound on the left. + */ + if (is_equal && !keys->max_incl) + maxoff--; + } + + /* + * maxkeys is smaller than the datums of all non-default partitions, + * meaning there isn't one to return. Return the default partition if + * one exists. + */ + if (maxoff < 0) + return partition_bound_has_default(boundinfo) + ? bms_make_singleton(boundinfo->default_index) + : NULL; + } + + Assert (minoff >= 0 && maxoff >= 0); + + /* + * All datums between those at minoff and maxoff satisfy query's keys, so + * add the corresponding partitions to the result set. + */ + for (i = minoff; i <= maxoff; i++) + result = bms_add_member(result, boundinfo->indexes[i]); + + /* + * For range queries, always include the default list partition, + * because list partitions divide the key space in a discontinuous + * manner, not all values in the given range will have a partition + * assigned. + */ + if (partition_bound_has_default(boundinfo)) + return bms_add_member(result, boundinfo->default_index); + + return result; +} + +/* + * get_partitions_for_keys_range + * Return partitions of a range partitioned table for requested keys + * + * This interprets the keys and looks up partitions in the partition bound + * descriptor using the range partitioning semantics. + */ +static Bitmapset * +get_partitions_for_keys_range(Relation rel, PartScanKeyInfo *keys) +{ + Bitmapset *result = NULL; + PartitionKey partkey = RelationGetPartitionKey(rel); + PartitionBoundInfo boundinfo = RelationGetPartitionDesc(rel)->boundinfo; + PartitionBoundCmpArg arg; + int i, + eqoff, + minoff, + maxoff; + bool is_equal, + include_def = false; + + Assert(RelationGetPartitionDesc(rel)->nparts > 0); + + /* + * We might be able to get the answer sooner based on the nullness of + * keys, so get that out of the way. + */ + for (i = 0; i < partkey->partnatts; i++) + { + if (bms_is_member(i, keys->keyisnull)) + { + /* Only the default partition accepts nulls. */ + if (partition_bound_has_default(boundinfo)) + return bms_make_singleton(boundinfo->default_index); + else + return NULL; + } + } + + /* + * If there are no datums to compare keys with, but there are partitions, + * just return the default partition, if one exists. + */ + if (boundinfo->ndatums == 0) + { + if (partition_bound_has_default(boundinfo)) + return bms_make_singleton(boundinfo->default_index); + else + return NULL; + } + + /* Equality keys. */ + if (keys->n_eqkeys > 0) + { + /* Valid iff there are as many as partition key columns. */ + Assert(keys->n_eqkeys == partkey->partnatts); + memset(&arg, 0, sizeof(PartitionBoundCmpArg)); + arg.datums = keys->eqkeys; + arg.ndatums = keys->n_eqkeys; + eqoff = partition_bound_bsearch(partkey, boundinfo, &arg, &is_equal); + + /* + * The bound at eqoff is known to be <= eqkeys, given the way + * partition_bound_bsearch works. Considering it as the lower bound + * of the partition that eqkeys falls into, the bound at eqoff + 1 + * would be its upper bound, so use eqoff + 1 to get the desired + * partition's index. + */ + if (eqoff >= 0 && boundinfo->indexes[eqoff + 1] >= 0) + return bms_make_singleton(boundinfo->indexes[eqoff+1]); + /* + * eqkeys falls into a range of values for which no non-default + * partition exists. + */ + else if (partition_bound_has_default(boundinfo)) + return bms_make_singleton(boundinfo->default_index); + else + return NULL; + } + + /* + * Find the leftmost bound that satisfies the query, that is, make minoff + * point to the datum corresponding to the upper bound of the left-most + * partition to be selected. + */ + minoff = 0; + if (keys->n_minkeys > 0) + { + memset(&arg, 0, sizeof(PartitionBoundCmpArg)); + arg.datums = keys->minkeys; + arg.ndatums = keys->n_minkeys; + minoff = partition_bound_bsearch(partkey, boundinfo, &arg, &is_equal); + + /* + * If minkeys does not contain values for all partition key columns, + * that is, only a prefix is specified, then there may be multiple + * bounds in boundinfo that share the same prefix. But + * partition_bound_bsearch would've returned the offset of just one of + * those. If minkey is inclusive, we must decrement minoff until it + * reaches the leftmost of those bound values, so that partitions + * corresponding to all those bound values are selected. If minkeys + * is exclusive, we must increment minoff until it reaches the first + * bound greater than this prefix, so that none of the partitions + * corresponding to those bound values are selected. + */ + if (is_equal && arg.ndatums < partkey->partnatts) + { + while (minoff >= 1 && minoff < boundinfo->ndatums - 1) + { + int32 cmpval; + + cmpval = partition_bound_cmp(partkey, boundinfo, + keys->min_incl + ? minoff - 1 : minoff + 1, + &arg); + if (cmpval != 0) + { + /* Move to the non-equal bound only in this case. */ + if (!keys->min_incl) + minoff += 1; + break; + } + + if (keys->min_incl) + minoff -= 1; + else + minoff += 1; + } + } + /* + * Assuming minoff currently points to the lower bound of the left- + * most selected partition, increment it so that it points to the + * upper bound. + */ + else + minoff += 1; + } + + /* + * Find the rightmost bound that satisfies the query, that is, make maxoff + * maxoff point to the datum corresponding to the upper bound of the + * right-most partition to be selected. + */ + maxoff = boundinfo->ndatums; + if (keys->n_maxkeys > 0) + { + memset(&arg, 0, sizeof(PartitionBoundCmpArg)); + arg.datums = keys->maxkeys; + arg.ndatums = keys->n_maxkeys; + maxoff = partition_bound_bsearch(partkey, boundinfo, &arg, &is_equal); + + /* See the comment written above for minkeys. */ + if (is_equal && arg.ndatums < partkey->partnatts) + { + while (maxoff >= 1 && maxoff < boundinfo->ndatums - 1) + { + int32 cmpval; + + cmpval = partition_bound_cmp(partkey, boundinfo, + keys->max_incl + ? maxoff + 1 : maxoff - 1, + &arg); + if (cmpval != 0) + { + /* Move to the non-equal bound only in this case. */ + if (!keys->max_incl) + maxoff -= 1; + break; + } + + if (keys->max_incl) + maxoff += 1; + else + maxoff -= 1; + } + + /* + * Assuming maxoff currently points to the lower bound of the + * right-most partition, increment it so that it points to the + * upper bound. + */ + maxoff += 1; + } + /* + * Assuming maxoff currently points to the lower bound of the right- + * most selected partition, increment it so that it points to the + * upper bound. We do not need to include that partition though if + * maxkeys exactly matched the bound in question and it is exclusive. + * Not incrementing simply means we treat the matched bound itself + * the upper bound of the right-most selected partition. + */ + else if (!is_equal || keys->max_incl) + maxoff += 1; + } + + Assert (minoff >= 0 && maxoff >= 0); + + /* + * At this point, we believe that minoff/maxoff point to the upper bound + * of some partition, but it may not be the case. It might actually be + * the upper bound of an unassigned range of values, which if so, move + * minoff/maxoff to the adjacent bound which must be the upper bound of + * a valid partition. + * + * By doing that, we skip over a portion of values that do indeed satisfy + * the query, but don't have a valid partition assigned. The default + * partition will have to be included to cover those values. Although, if + * the original bound in question contains an infinite value, there would + * not be any unassigned range to speak of, because the range us unbounded + * in that direction by definition, so no need to include the default. + */ + if (boundinfo->indexes[minoff] < 0) + { + int lastkey = partkey->partnatts - 1; + + if (keys->n_minkeys > 0) + lastkey = keys->n_minkeys - 1; + if (minoff >=0 && minoff < boundinfo->ndatums && + boundinfo->kind[minoff][lastkey] == PARTITION_RANGE_DATUM_VALUE) + include_def = true; + minoff += 1; + } + + if (maxoff >= 1 && boundinfo->indexes[maxoff] < 0) + { + int lastkey = partkey->partnatts - 1; + + if (keys->n_maxkeys > 0) + lastkey = keys->n_maxkeys - 1; + if (maxoff >=0 && maxoff <= boundinfo->ndatums && + boundinfo->kind[maxoff - 1][lastkey] == PARTITION_RANGE_DATUM_VALUE) + include_def = true; + maxoff -= 1; + } + + if (minoff <= maxoff) + result = bms_add_range(result, + boundinfo->indexes[minoff], + boundinfo->indexes[maxoff]); + /* + * There may exist a range of values unassigned to any non-default + * partition between the datums at minoff and maxoff. + */ + for (i = minoff; i <= maxoff; i++) + { + if (boundinfo->indexes[i] < 0) + { + include_def = true; + break; + } + } + + /* + * Since partition keys with nulls are mapped to the default range + * partition, we must include the default partition if some keys + * could be null. + */ + if (keys->n_minkeys < partkey->partnatts || + keys->n_maxkeys < partkey->partnatts) + { + for (i = 0; i < partkey->partnatts; i++) + { + if (!bms_is_member(i, keys->keyisnotnull)) + { + include_def = true; + break; + } + } + } + + if (include_def && partition_bound_has_default(boundinfo)) + result = bms_add_member(result, boundinfo->default_index); + + return result; +} + +/* * get_partition_operator * * Return oid of the operator of given strategy for a given partition key diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index fd3001c493..2fc54defbd 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -2127,6 +2127,25 @@ _copyOnConflictExpr(const OnConflictExpr *from) return newnode; } +static PartitionClauseInfo * +_copyPartitionClauseInfo(const PartitionClauseInfo *from) +{ + PartitionClauseInfo *newnode = makeNode(PartitionClauseInfo); + + int i; + for (i = 0; i < PARTITION_MAX_KEYS; i++) + COPY_NODE_FIELD(keyclauses[i]); + + COPY_NODE_FIELD(or_clauses); + COPY_NODE_FIELD(ne_clauses); + COPY_BITMAPSET_FIELD(keyisnull); + COPY_BITMAPSET_FIELD(keyisnotnull); + COPY_SCALAR_FIELD(constfalse); + COPY_SCALAR_FIELD(foundkeyclauses); + + return newnode; +} + /* **************************************************************** * relation.h copy functions * @@ -5009,6 +5028,9 @@ copyObjectImpl(const void *from) case T_OnConflictExpr: retval = _copyOnConflictExpr(from); break; + case T_PartitionClauseInfo: + retval = _copyPartitionClauseInfo(from); + break; /* * RELATION NODES diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 89f27ce0eb..0c1f23951a 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -152,8 +152,6 @@ static Node *substitute_actual_parameters(Node *expr, int nargs, List *args, static Node *substitute_actual_parameters_mutator(Node *node, substitute_actual_parameters_context *context); static void sql_inline_error_callback(void *arg); -static Expr *evaluate_expr(Expr *expr, Oid result_type, int32 result_typmod, - Oid result_collation); static Query *substitute_actual_srf_parameters(Query *expr, int nargs, List *args); static Node *substitute_actual_srf_parameters_mutator(Node *node, @@ -4833,7 +4831,7 @@ sql_inline_error_callback(void *arg) * We use the executor's routine ExecEvalExpr() to avoid duplication of * code and ensure we get the same result as the executor would get. */ -static Expr * +Expr * evaluate_expr(Expr *expr, Oid result_type, int32 result_typmod, Oid result_collation) { diff --git a/src/include/catalog/partition.h b/src/include/catalog/partition.h index 2faf0ca26e..78d43ea07c 100644 --- a/src/include/catalog/partition.h +++ b/src/include/catalog/partition.h @@ -40,6 +40,13 @@ typedef struct PartitionDescData PartitionBoundInfo boundinfo; /* collection of partition bounds */ } PartitionDescData; +typedef struct PartitionPruneContext +{ + int rt_index; + Relation relation; + PartitionClauseInfo *clauseinfo; +} PartitionPruneContext; + typedef struct PartitionDescData *PartitionDesc; extern void RelationBuildPartitionDesc(Relation relation); @@ -73,4 +80,10 @@ extern List *get_proposed_default_constraint(List *new_part_constaints); extern int get_partition_for_tuple(Relation relation, Datum *values, bool *isnull); +/* For partition-pruning */ +extern void populate_partition_clauses(Relation relation, + int rt_index, List *clauses, + PartitionClauseInfo *partclauseinfo); +extern Bitmapset *get_partitions_from_clauses(PartitionPruneContext *context); + #endif /* PARTITION_H */ diff --git a/src/include/catalog/pg_opfamily.h b/src/include/catalog/pg_opfamily.h index b544474254..0847df97ff 100644 --- a/src/include/catalog/pg_opfamily.h +++ b/src/include/catalog/pg_opfamily.h @@ -188,4 +188,7 @@ DATA(insert OID = 4104 ( 3580 box_inclusion_ops PGNSP PGUID )); DATA(insert OID = 5000 ( 4000 box_ops PGNSP PGUID )); DATA(insert OID = 5008 ( 4000 poly_ops PGNSP PGUID )); +#define IsBooleanOpfamily(opfamily) \ + ((opfamily) == BOOL_BTREE_FAM_OID || (opfamily) == BOOL_HASH_FAM_OID) + #endif /* PG_OPFAMILY_H */ diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 74b094a9c3..0ac242aeda 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -190,6 +190,7 @@ typedef enum NodeTag T_JoinExpr, T_FromExpr, T_OnConflictExpr, + T_PartitionClauseInfo, T_IntoClause, /* diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 1b4b0d75af..642ea0fbde 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -1506,4 +1506,35 @@ typedef struct OnConflictExpr List *exclRelTlist; /* tlist of the EXCLUDED pseudo relation */ } OnConflictExpr; +/*---------- + * PartitionClauseInfo + * + * Stores clauses which were matched to a partition key. Each matching clause + * is stored in the 'keyclauses' list for the partition key index that it was + * matched to. Other details are also stored, such as OR clauses and + * not-equal (<>) clauses. Nullness properties are also stored. + *---------- + */ +typedef struct PartitionClauseInfo +{ + /* Lists of clauses indexed by the partition key */ + List *keyclauses[PARTITION_MAX_KEYS]; + + /* Each members is a List itself of a given OR clauses's arguments. */ + List *or_clauses; + + /* List of clauses containing <> operator. */ + List *ne_clauses; + + /* Nth (0 <= N < partnatts) bit set if the key is NULL or NOT NULL. */ + Bitmapset *keyisnull; + Bitmapset *keyisnotnull; + + /* True if at least one of above fields contains valid information. */ + bool foundkeyclauses; + + /* True if mutually contradictory clauses were found. */ + bool constfalse; +} PartitionClauseInfo; + #endif /* PRIMNODES_H */ diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h index ba4fa4b68b..3c2f54964b 100644 --- a/src/include/optimizer/clauses.h +++ b/src/include/optimizer/clauses.h @@ -84,5 +84,7 @@ extern Node *estimate_expression_value(PlannerInfo *root, Node *node); extern Query *inline_set_returning_function(PlannerInfo *root, RangeTblEntry *rte); +extern Expr *evaluate_expr(Expr *expr, Oid result_type, int32 result_typmod, + Oid result_collation); #endif /* CLAUSES_H */ -- 2.11.0
>From 202812a09e1b7500d23a4a29edb8223e32d83555 Mon Sep 17 00:00:00 2001 From: amit <amitlangot...@gmail.com> Date: Tue, 12 Dec 2017 13:46:26 +0900 Subject: [PATCH v23 3/5] Move some code of set_append_rel_size to separate function The code that initializes basic properties of a partition RelOptInfo from the information in parent's RelOptInfo. It will be needed to be called by the pairwise-join related code to minimally initialize the partitions that earlier planning would have considered pruned and hence left untouched. That's not true currently, because the current pruning method touches each partition (setting its basic properties) before considering it pruned. --- src/backend/optimizer/path/allpaths.c | 80 ++----------------------------- src/backend/optimizer/util/relnode.c | 90 +++++++++++++++++++++++++++++++++++ src/include/optimizer/pathnode.h | 4 ++ 3 files changed, 97 insertions(+), 77 deletions(-) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index fd1a58336b..fd68374e20 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -921,85 +921,11 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, childrel = find_base_rel(root, childRTindex); Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL); - if (rel->part_scheme) - { - AttrNumber attno; - - /* - * We need attr_needed data for building targetlist of a join - * relation representing join between matching partitions for - * partition-wise join. A given attribute of a child will be - * needed in the same highest joinrel where the corresponding - * attribute of parent is needed. Hence it suffices to use the - * same Relids set for parent and child. - */ - for (attno = rel->min_attr; attno <= rel->max_attr; attno++) - { - int index = attno - rel->min_attr; - Relids attr_needed = rel->attr_needed[index]; - - /* System attributes do not need translation. */ - if (attno <= 0) - { - Assert(rel->min_attr == childrel->min_attr); - childrel->attr_needed[index] = attr_needed; - } - else - { - Var *var = list_nth_node(Var, - appinfo->translated_vars, - attno - 1); - int child_index; - - /* - * Ignore any column dropped from the parent. - * Corresponding Var won't have any translation. It won't - * have attr_needed information, since it can not be - * referenced in the query. - */ - if (var == NULL) - { - Assert(attr_needed == NULL); - continue; - } - - child_index = var->varattno - childrel->min_attr; - childrel->attr_needed[child_index] = attr_needed; - } - } - } - - /* - * Copy/Modify targetlist. Even if this child is deemed empty, we need - * its targetlist in case it falls on nullable side in a child-join - * because of partition-wise join. - * - * NB: the resulting childrel->reltarget->exprs may contain arbitrary - * expressions, which otherwise would not occur in a rel's targetlist. - * Code that might be looking at an appendrel child must cope with - * such. (Normally, a rel's targetlist would only include Vars and - * PlaceHolderVars.) XXX we do not bother to update the cost or width - * fields of childrel->reltarget; not clear if that would be useful. - */ - childrel->reltarget->exprs = (List *) - adjust_appendrel_attrs(root, - (Node *) rel->reltarget->exprs, - 1, &appinfo); - /* - * We have to make child entries in the EquivalenceClass data - * structures as well. This is needed either if the parent - * participates in some eclass joins (because we will want to consider - * inner-indexscan joins on the individual children) or if the parent - * has useful pathkeys (because we should try to build MergeAppend - * paths that produce those sort orderings). Even if this child is - * deemed dummy, it may fall on nullable side in a child-join, which - * in turn may participate in a MergeAppend, where we will need the - * EquivalenceClass data structures. + * Initialize some properties of child rel from the parent rel, such + * target list, equivalence class members, etc. */ - if (rel->has_eclass_joins || has_useful_pathkeys(root, rel)) - add_child_rel_equivalences(root, appinfo, rel, childrel); - childrel->has_eclass_joins = rel->has_eclass_joins; + set_basic_child_rel_properties(root, rel, childrel, appinfo); /* * We have to copy the parent's quals to the child, with appropriate diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index ac5a7c9553..35345ccbe9 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -1748,3 +1748,93 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel, joinrel->nullable_partexprs[cnt] = nullable_partexpr; } } + +/* + * Initialize some basic properties of child rel from the parent rel, such + * target list, equivalence class members, etc. + */ +void +set_basic_child_rel_properties(PlannerInfo *root, + RelOptInfo *rel, + RelOptInfo *childrel, + AppendRelInfo *appinfo) +{ + /* + * Copy/Modify targetlist. Even if this child is deemed empty, we need + * its targetlist in case it falls on nullable side in a child-join + * because of partition-wise join. + * + * NB: the resulting childrel->reltarget->exprs may contain arbitrary + * expressions, which otherwise would not occur in a rel's targetlist. + * Code that might be looking at an appendrel child must cope with + * such. (Normally, a rel's targetlist would only include Vars and + * PlaceHolderVars.) XXX we do not bother to update the cost or width + * fields of childrel->reltarget; not clear if that would be useful. + */ + childrel->reltarget->exprs = (List *) + adjust_appendrel_attrs(root, + (Node *) rel->reltarget->exprs, + 1, &appinfo); + + /* + * We have to make child entries in the EquivalenceClass data + * structures as well. This is needed either if the parent + * participates in some eclass joins (because we will want to consider + * inner-indexscan joins on the individual children) or if the parent + * has useful pathkeys (because we should try to build MergeAppend + * paths that produce those sort orderings). Even if this child is + * deemed dummy, it may fall on nullable side in a child-join, which + * in turn may participate in a MergeAppend, where we will need the + * EquivalenceClass data structures. + */ + if (rel->has_eclass_joins || has_useful_pathkeys(root, rel)) + add_child_rel_equivalences(root, appinfo, rel, childrel); + childrel->has_eclass_joins = rel->has_eclass_joins; + + if (rel->part_scheme) + { + AttrNumber attno; + + /* + * We need attr_needed data for building targetlist of a join relation + * representing join between matching partitions for partition-wise + * join. A given attribute of a child will be needed in the same + * highest joinrel where the corresponding attribute of parent is + * needed. Hence it suffices to use the same Relids set for parent and + * child. + */ + for (attno = rel->min_attr; attno <= rel->max_attr; attno++) + { + int index = attno - rel->min_attr; + Relids attr_needed = rel->attr_needed[index]; + + /* System attributes do not need translation. */ + if (attno <= 0) + { + Assert(rel->min_attr == childrel->min_attr); + childrel->attr_needed[index] = attr_needed; + } + else + { + Var *var = list_nth_node(Var, + appinfo->translated_vars, + attno - 1); + int child_index; + + /* + * Ignore any column dropped from the parent. Corresponding + * Var won't have any translation. It won't have attr_needed + * information, since it can not be referenced in the query. + */ + if (var == NULL) + { + Assert(attr_needed == NULL); + continue; + } + + child_index = var->varattno - childrel->min_attr; + childrel->attr_needed[child_index] = attr_needed; + } + } + } +} diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index ef7173fbf8..142eecd733 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -301,5 +301,9 @@ extern RelOptInfo *build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, RelOptInfo *inner_rel, RelOptInfo *parent_joinrel, List *restrictlist, SpecialJoinInfo *sjinfo, JoinType jointype); +extern void set_basic_child_rel_properties(PlannerInfo *root, + RelOptInfo *rel, + RelOptInfo *childrel, + AppendRelInfo *appinfo); #endif /* PATHNODE_H */ -- 2.11.0
>From 68a16a921bc67728136b232c7a93af7bdf658478 Mon Sep 17 00:00:00 2001 From: amit <amitlangot...@gmail.com> Date: Wed, 13 Sep 2017 18:24:55 +0900 Subject: [PATCH v23 4/5] More refactoring around partitioned table AppendPath creation Instead of going through root->append_rel_list to pick up the child appinfos, store them in an array called part_appinfos that stores partition appinfos in the same order as RelOptInfos are stored in part_rels, right when the latter are created. Further, instead of going through root->pcinfo_list to get the list of partitioned child rels, which ends up including even the rels that are pruned by set_append_rel_size(), build up a list of "live" partitioned child rels and use the same to initialize partitioned_rels field of AppendPath. --- src/backend/optimizer/path/allpaths.c | 120 ++++++++++++++++++++-------------- src/backend/optimizer/plan/planner.c | 19 ++++-- src/backend/optimizer/util/relnode.c | 14 ++++ src/include/nodes/relation.h | 25 ++++++- 4 files changed, 122 insertions(+), 56 deletions(-) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index fd68374e20..8f761a77e8 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -861,6 +861,7 @@ static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte) { + List *rel_appinfos = NIL; int parentRTindex = rti; bool has_live_children; double parent_rows; @@ -874,6 +875,27 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, Assert(IS_SIMPLE_REL(rel)); + if (rte->relkind != RELKIND_PARTITIONED_TABLE) + { + foreach (l, root->append_rel_list) + { + AppendRelInfo *appinfo = lfirst(l); + + /* append_rel_list contains all append rels; ignore others */ + if (appinfo->parent_relid == parentRTindex) + rel_appinfos = lappend(rel_appinfos, appinfo); + } + } + else + { + int i; + + for (i = 0; i < rel->nparts; i++) + rel_appinfos = lappend(rel_appinfos, rel->part_appinfos[i]); + + rel->live_partitioned_rels = list_make1_int(rti); + } + /* * Initialize to compute size estimates for whole append relation. * @@ -894,7 +916,7 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, nattrs = rel->max_attr - rel->min_attr + 1; parent_attrsizes = (double *) palloc0(nattrs * sizeof(double)); - foreach(l, root->append_rel_list) + foreach(l, rel_appinfos) { AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); int childRTindex; @@ -907,10 +929,6 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, ListCell *childvars; ListCell *lc; - /* append_rel_list contains all append rels; ignore others */ - if (appinfo->parent_relid != parentRTindex) - continue; - childRTindex = appinfo->child_relid; childRTE = root->simple_rte_array[childRTindex]; @@ -1090,6 +1108,9 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, /* We have at least one live child. */ has_live_children = true; + /* Add this child as a live partition of the parent. */ + rel->live_part_appinfos = lappend(rel->live_part_appinfos, appinfo); + /* * If any live child is not parallel-safe, treat the whole appendrel * as not parallel-safe. In future we might be able to generate plans @@ -1186,24 +1207,35 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte) { int parentRTindex = rti; - List *live_childrels = NIL; + List *rel_appinfos = NIL, + *live_childrels = NIL; ListCell *l; + if (rte->relkind != RELKIND_PARTITIONED_TABLE) + { + foreach (l, root->append_rel_list) + { + AppendRelInfo *appinfo = lfirst(l); + + /* append_rel_list contains all append rels; ignore others */ + if (appinfo->parent_relid == parentRTindex) + rel_appinfos = lappend(rel_appinfos, appinfo); + } + } + else + rel_appinfos = rel->live_part_appinfos; + /* * Generate access paths for each member relation, and remember the * non-dummy children. */ - foreach(l, root->append_rel_list) + foreach(l, rel_appinfos) { AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); int childRTindex; RangeTblEntry *childRTE; RelOptInfo *childrel; - /* append_rel_list contains all append rels; ignore others */ - if (appinfo->parent_relid != parentRTindex) - continue; - /* Re-locate the child RTE and RelOptInfo */ childRTindex = appinfo->child_relid; childRTE = root->simple_rte_array[childRTindex]; @@ -1267,44 +1299,39 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, ListCell *l; List *partitioned_rels = NIL; RangeTblEntry *rte; - bool build_partitioned_rels = false; double partial_rows = -1; + /* + * AppendPath generated for partitioned tables must record the RT indexes + * of partitioned tables that are direct or indirect children of this + * Append rel. We can find them in rel->live_partitioned_rels. However, + * it contains only the immediate children, so collect those of the + * children that are partitioned themselves in loop below and concatenate + * all into one list to be passed to the path creation function. + * + * AppendPath may be for a sub-query RTE (UNION ALL), whose child sub- + * queries may contain references to partitioned tables. The loop below + * will look for such children and collect them in a list to be passed to + * the path creation function. (This assumes that we don't need to look + * through multiple levels of subquery RTEs; if we ever do, we could + * consider stuffing the list we generate here into sub-query RTE's + * RelOptInfo, just like we do for partitioned rels, which would be used + * when populating our parent rel with paths. For the present, that + * appears to be unnecessary.) + */ if (IS_SIMPLE_REL(rel)) { - /* - * A root partition will already have a PartitionedChildRelInfo, and a - * non-root partitioned table doesn't need one, because its Append - * paths will get flattened into the parent anyway. For a subquery - * RTE, no PartitionedChildRelInfo exists; we collect all - * partitioned_rels associated with any child. (This assumes that we - * don't need to look through multiple levels of subquery RTEs; if we - * ever do, we could create a PartitionedChildRelInfo with the - * accumulated list of partitioned_rels which would then be found when - * populated our parent rel with paths. For the present, that appears - * to be unnecessary.) - */ rte = planner_rt_fetch(rel->relid, root); - switch (rte->rtekind) - { - case RTE_RELATION: - if (rte->relkind == RELKIND_PARTITIONED_TABLE) - partitioned_rels = - get_partitioned_child_rels(root, rel->relid, NULL); - break; - case RTE_SUBQUERY: - build_partitioned_rels = true; - break; - default: - elog(ERROR, "unexpected rtekind: %d", (int) rte->rtekind); - } + if (rte->rtekind == RTE_RELATION && + rte->relkind == RELKIND_PARTITIONED_TABLE) + partitioned_rels = rel->live_partitioned_rels; } else if (rel->reloptkind == RELOPT_JOINREL && rel->part_scheme) { /* - * Associate PartitionedChildRelInfo of the root partitioned tables - * being joined with the root partitioned join (indicated by - * RELOPT_JOINREL). + * For joinrel consisting of partitioned tables, construct the list + * list by combining live_partitioned_rels of the component + * partitioned tables, which is what the following does. */ partitioned_rels = get_partitioned_child_rels_for_join(root, rel->relids); @@ -1322,17 +1349,12 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, Path *cheapest_partial_path = NULL; /* - * If we need to build partitioned_rels, accumulate the partitioned - * rels for this child. + * Accumulate the live partitioned children of this child, if it's + * itself partitioned rel. */ - if (build_partitioned_rels) - { - List *cprels; - - cprels = get_partitioned_child_rels(root, childrel->relid, NULL); + if (childrel->part_scheme) partitioned_rels = list_concat(partitioned_rels, - list_copy(cprels)); - } + list_copy(childrel->live_partitioned_rels)); /* * If child has an unparameterized cheapest-total path, add that to diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 2a4e22b6c8..a81fed6d1d 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -5839,14 +5839,23 @@ List * get_partitioned_child_rels_for_join(PlannerInfo *root, Relids join_relids) { List *result = NIL; - ListCell *l; + int relid; - foreach(l, root->pcinfo_list) + relid = -1; + while ((relid = bms_next_member(join_relids, relid)) >= 0) { - PartitionedChildRelInfo *pc = lfirst(l); + RelOptInfo *rel; - if (bms_is_member(pc->parent_relid, join_relids)) - result = list_concat(result, list_copy(pc->child_rels)); + /* Paranoia: ignore bogus relid indexes */ + if (relid >= root->simple_rel_array_size) + continue; + rel = root->simple_rel_array[relid]; + if (rel == NULL) + continue; + Assert(rel->relid == relid); /* sanity check on array */ + Assert(rel->part_scheme != NULL); + Assert(list_length(rel->live_partitioned_rels) >= 1); + result = list_concat(result, list_copy(rel->live_partitioned_rels)); } return result; diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 35345ccbe9..f3b9a2be32 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -154,9 +154,12 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) rel->part_scheme = NULL; rel->nparts = 0; rel->boundinfo = NULL; + rel->part_appinfos = NULL; rel->part_rels = NULL; rel->partexprs = NULL; rel->nullable_partexprs = NULL; + rel->live_part_appinfos = NIL; + rel->live_partitioned_rels = NIL; /* * Pass top parent's relids down the inheritance hierarchy. If the parent @@ -233,8 +236,12 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) int cnt_parts = 0; if (nparts > 0) + { + rel->part_appinfos = (AppendRelInfo **) + palloc(sizeof(AppendRelInfo *) * nparts); rel->part_rels = (RelOptInfo **) palloc(sizeof(RelOptInfo *) * nparts); + } foreach(l, root->append_rel_list) { @@ -258,6 +265,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) * also match the PartitionDesc. See expand_partitioned_rtentry. */ Assert(cnt_parts < nparts); + rel->part_appinfos[cnt_parts] = appinfo; rel->part_rels[cnt_parts] = childrel; cnt_parts++; } @@ -567,9 +575,12 @@ build_join_rel(PlannerInfo *root, joinrel->part_scheme = NULL; joinrel->nparts = 0; joinrel->boundinfo = NULL; + joinrel->part_appinfos = NULL; joinrel->part_rels = NULL; joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; + joinrel->live_part_appinfos = NIL; + joinrel->live_partitioned_rels = NIL; /* Compute information relevant to the foreign relations. */ set_foreign_rel_properties(joinrel, outer_rel, inner_rel); @@ -734,9 +745,12 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, joinrel->has_eclass_joins = false; joinrel->top_parent_relids = NULL; joinrel->part_scheme = NULL; + joinrel->part_appinfos = NULL; joinrel->part_rels = NULL; joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; + joinrel->live_part_appinfos = NIL; + joinrel->live_partitioned_rels = NIL; joinrel->top_parent_relids = bms_union(outer_rel->top_parent_relids, inner_rel->top_parent_relids); diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 6bf68f31da..25333c5407 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -529,8 +529,12 @@ typedef struct PartitionSchemeData *PartitionScheme; * part_scheme - Partitioning scheme of the relation * boundinfo - Partition bounds * nparts - Number of partitions + * part_appinfos - AppendRelInfo of each partition * part_rels - RelOptInfos for each partition * partexprs, nullable_partexprs - Partition key expressions + * live_part_appinfos - AppendRelInfo of unpruned partitions + * live_partitioned_rels - RT indexes of unpruned partitions that are + * partitioned tables themselves * * Note: A base relation always has only one set of partition keys, but a join * relation may have as many sets of partition keys as the number of relations @@ -657,10 +661,27 @@ typedef struct RelOptInfo PartitionScheme part_scheme; /* Partitioning scheme. */ int nparts; /* number of partitions */ struct PartitionBoundInfoData *boundinfo; /* Partition bounds */ - struct RelOptInfo **part_rels; /* Array of RelOptInfos of partitions, - * stored in the same order of bounds */ + struct AppendRelInfo **part_appinfos; /* Array of AppendRelInfos of + * of partitioned, stored in the + * same order as of bounds */ + struct RelOptInfo **part_rels; /* Array of RelOptInfos of *all* + * partitions, stored in the same order as + * of bounds */ List **partexprs; /* Non-nullable partition key expressions. */ List **nullable_partexprs; /* Nullable partition key expressions. */ + + + /* + * List of AppendRelInfo's of the table's partitions that survive a + * query's clauses. + */ + List *live_part_appinfos; + + /* + * RT indexes of live partitions that are partitioned tables themselves. + * This includes the RT index of the table itself. + */ + List *live_partitioned_rels; } RelOptInfo; /* -- 2.11.0
>From 200dffcf2360acd7854397cd3b0b5187a730366f Mon Sep 17 00:00:00 2001 From: amit <amitlangot...@gmail.com> Date: Tue, 12 Dec 2017 16:17:10 +0900 Subject: [PATCH v23 5/5] Teach planner to use get_partitions_from_clauses() Current method of selecting a table's partitions to be scanned involves applying constraint exclusion against the partition constraint of each partition, which works by comparing a query's clauses against the partition constraint and exclude a partition if the clauses refute the latter. A dummy path is added for each partition that is excluded. This algorithm takes linear time with a big constant, especially given that we repeat the work of matching clauses to the partition constraint for every partition. Instead, we can match clauses only once by comparing them against the (parent) table's partition key using populate_partition_clauses(). Then, if we pass the clauses to get_partitions_from_clauses(), we'll get the set of matching partitions in much less time than determining by running the matching algorithm separately for each partition. Authors: Amit Langote, Dilip Kumar (dilipbal...@gmail.com), David Rowley (david.row...@2ndquadrant.com) --- src/backend/optimizer/path/allpaths.c | 80 ++++- src/backend/optimizer/path/joinrels.c | 24 ++ src/backend/optimizer/util/plancat.c | 41 ++- src/include/nodes/relation.h | 6 +- src/test/regress/expected/inherit.out | 10 +- src/test/regress/expected/partition_prune.out | 434 ++++++++++++++++++++++---- src/test/regress/sql/partition_prune.sql | 77 ++++- 7 files changed, 592 insertions(+), 80 deletions(-) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 8f761a77e8..af9658128e 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -20,6 +20,7 @@ #include "access/sysattr.h" #include "access/tsmapi.h" +#include "catalog/partition.h" #include "catalog/pg_class.h" #include "catalog/pg_operator.h" #include "catalog/pg_proc.h" @@ -136,6 +137,9 @@ static void recurse_push_qual(Node *setOp, Query *topquery, static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel); static void add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, List *live_childrels); +static List *get_append_rel_partitions(PlannerInfo *root, + RelOptInfo *rel, + RangeTblEntry *rte); /* @@ -847,6 +851,77 @@ set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) } /* + * get_append_rel_partitions + * Returns a List of AppendRelInfo belonging to the minimum set of + * partitions which must be scanned to satisfy rel's baserestrictinfo + * quals. + */ +static List * +get_append_rel_partitions(PlannerInfo *root, + RelOptInfo *rel, + RangeTblEntry *rte) +{ + List *result = NIL; + List *clauses = rel->baserestrictinfo; + int i; + + if (!clauses) + { + /* If there are no clauses then include every partition */ + for (i = 0; i < rel->nparts; i++) + result = lappend(result, rel->part_appinfos[i]); + } + else + { + Relation partrel; + Bitmapset *partindexes; + PartitionClauseInfo partclauseinfo; + + partrel = heap_open(rte->relid, NoLock); + + /* Process clauses and populate partclauseinfo */ + populate_partition_clauses(partrel, rel->relid, + clauses, &partclauseinfo); + + if (!partclauseinfo.constfalse) + { + PartitionPruneContext context; + + context.rt_index = rel->relid; + context.relation = partrel; + context.clauseinfo = &partclauseinfo; + + partindexes = get_partitions_from_clauses(&context); + + /* Fetch the partition appinfos. */ + i = -1; + while ((i = bms_next_member(partindexes, i)) >= 0) + { + AppendRelInfo *appinfo = rel->part_appinfos[i]; + +#ifdef USE_ASSERT_CHECKING + PartitionDesc partdesc = RelationGetPartitionDesc(partrel); + RangeTblEntry *childrte; + + childrte = planner_rt_fetch(appinfo->child_relid, root); + + /* + * Must be the intended child's RTE here, because appinfos are ordered + * the same way as partitions in the partition descriptor. + */ + Assert(partdesc->oids[i] == childrte->relid); +#endif + result = lappend(result, appinfo); + } + } + + heap_close(partrel, NoLock); + } + + return result; +} + +/* * set_append_rel_size * Set size estimates for a simple "append relation" * @@ -888,10 +963,7 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, } else { - int i; - - for (i = 0; i < rel->nparts; i++) - rel_appinfos = lappend(rel_appinfos, rel->part_appinfos[i]); + rel_appinfos = get_append_rel_partitions(root, rel, rte); rel->live_partitioned_rels = list_make1_int(rti); } diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index a35d068911..6949886e46 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -1395,6 +1395,30 @@ try_partition_wise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, child_rel2->relids); /* + * If either child_rel1 or child_rel2 is not a live partition, they'd + * not have been touched by set_append_rel_size. So, its RelOptInfo + * would be missing some information that set_append_rel_size sets for + * live partitions, such as the target list, child EQ members, etc. + * We need to make the RelOptInfo of even the dead partitions look + * minimally valid and as having a valid dummy path attached to it. + */ + if (IS_SIMPLE_REL(child_rel1) && child_rel1->pathlist == NIL) + { + AppendRelInfo *appinfo = rel1->part_appinfos[cnt_parts]; + + set_basic_child_rel_properties(root, rel1, child_rel1, appinfo); + mark_dummy_rel(child_rel1); + } + + if (IS_SIMPLE_REL(child_rel2) && child_rel2->pathlist == NIL) + { + AppendRelInfo *appinfo = rel2->part_appinfos[cnt_parts]; + + set_basic_child_rel_properties(root, rel2, child_rel2, appinfo); + mark_dummy_rel(child_rel2); + } + + /* * Construct restrictions applicable to the child join from those * applicable to the parent join. */ diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 60f21711f4..c1d4c7db5b 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -1171,7 +1171,6 @@ get_relation_constraints(PlannerInfo *root, Index varno = rel->relid; Relation relation; TupleConstr *constr; - List *pcqual; /* * We assume the relation has already been safely locked. @@ -1257,22 +1256,32 @@ get_relation_constraints(PlannerInfo *root, } } - /* Append partition predicates, if any */ - pcqual = RelationGetPartitionQual(relation); - if (pcqual) + /* + * Append partition predicates, if any. + * + * For selects, partition pruning uses the parent table's partition bound + * descriptor, instead of constraint exclusion which is driven by the + * individual partition's partition constraint. + */ + if (root->parse->commandType != CMD_SELECT) { - /* - * Run each expression through const-simplification and - * canonicalization similar to check constraints. - */ - pcqual = (List *) eval_const_expressions(root, (Node *) pcqual); - pcqual = (List *) canonicalize_qual((Expr *) pcqual); + List *pcqual = RelationGetPartitionQual(relation); - /* Fix Vars to have the desired varno */ - if (varno != 1) - ChangeVarNodes((Node *) pcqual, 1, varno, 0); + if (pcqual) + { + /* + * Run each expression through const-simplification and + * canonicalization similar to check constraints. + */ + pcqual = (List *) eval_const_expressions(root, (Node *) pcqual); + pcqual = (List *) canonicalize_qual((Expr *) pcqual); + + /* Fix Vars to have the desired varno */ + if (varno != 1) + ChangeVarNodes((Node *) pcqual, 1, varno, 0); - result = list_concat(result, pcqual); + result = list_concat(result, pcqual); + } } heap_close(relation, NoLock); @@ -1930,6 +1939,10 @@ find_partition_scheme(PlannerInfo *root, Relation relation) memcpy(part_scheme->parttypcoll, partkey->parttypcoll, sizeof(Oid) * partnatts); + part_scheme->partcollation = (Oid *) palloc(sizeof(Oid) * partnatts); + memcpy(part_scheme->partcollation, partkey->partcollation, + sizeof(Oid) * partnatts); + part_scheme->parttyplen = (int16 *) palloc(sizeof(int16) * partnatts); memcpy(part_scheme->parttyplen, partkey->parttyplen, sizeof(int16) * partnatts); diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 25333c5407..5e1d4151c2 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -342,6 +342,9 @@ typedef struct PlannerInfo * partition bounds. Since partition key data types and the opclass declared * input data types are expected to be binary compatible (per ResolveOpClass), * both of those should have same byval and length properties. + * + * The collation of the partition key can differ from the collation of the + * underlying column, so we must store this separately. */ typedef struct PartitionSchemeData { @@ -349,7 +352,8 @@ typedef struct PartitionSchemeData int16 partnatts; /* number of partition attributes */ Oid *partopfamily; /* OIDs of operator families */ Oid *partopcintype; /* OIDs of opclass declared input data types */ - Oid *parttypcoll; /* OIDs of collations of partition keys. */ + Oid *parttypcoll; /* OIDs of partition key type collation. */ + Oid *partcollation; /* OIDs of partitioning collation */ /* Cached information about partition key data types. */ int16 *parttyplen; diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out index a79f891da7..11a259ca25 100644 --- a/src/test/regress/expected/inherit.out +++ b/src/test/regress/expected/inherit.out @@ -1715,11 +1715,7 @@ explain (costs off) select * from list_parted where a = 'ab' or a in (null, 'cd' Append -> Seq Scan on part_ab_cd Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[]))) - -> Seq Scan on part_ef_gh - Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[]))) - -> Seq Scan on part_null_xy - Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[]))) -(7 rows) +(3 rows) explain (costs off) select * from list_parted where a = 'ab'; QUERY PLAN @@ -1906,11 +1902,13 @@ explain (costs off) select * from mcrparted where abs(b) = 5; -- scans all parti Filter: (abs(b) = 5) -> Seq Scan on mcrparted3 Filter: (abs(b) = 5) + -> Seq Scan on mcrparted4 + Filter: (abs(b) = 5) -> Seq Scan on mcrparted5 Filter: (abs(b) = 5) -> Seq Scan on mcrparted_def Filter: (abs(b) = 5) -(13 rows) +(15 rows) explain (costs off) select * from mcrparted where a > -1; -- scans all partitions QUERY PLAN diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out index aabb0240a9..bc9ff38253 100644 --- a/src/test/regress/expected/partition_prune.out +++ b/src/test/regress/expected/partition_prune.out @@ -208,16 +208,14 @@ explain (costs off) select * from rlp where 1 > a; /* commuted */ (3 rows) explain (costs off) select * from rlp where a <= 1; - QUERY PLAN ---------------------------------------- + QUERY PLAN +-------------------------- Append -> Seq Scan on rlp1 Filter: (a <= 1) -> Seq Scan on rlp2 Filter: (a <= 1) - -> Seq Scan on rlp_default_default - Filter: (a <= 1) -(7 rows) +(5 rows) explain (costs off) select * from rlp where a = 1; QUERY PLAN @@ -519,15 +517,13 @@ explain (costs off) select * from rlp where a <= 31; Filter: (a <= 31) -> Seq Scan on rlp5_1 Filter: (a <= 31) - -> Seq Scan on rlp5_default - Filter: (a <= 31) -> Seq Scan on rlp_default_10 Filter: (a <= 31) -> Seq Scan on rlp_default_30 Filter: (a <= 31) -> Seq Scan on rlp_default_default Filter: (a <= 31) -(29 rows) +(27 rows) explain (costs off) select * from rlp where a = 1 or a = 7; QUERY PLAN @@ -575,9 +571,7 @@ explain (costs off) select * from rlp where a > 20 and a < 27; Filter: ((a > 20) AND (a < 27)) -> Seq Scan on rlp4_2 Filter: ((a > 20) AND (a < 27)) - -> Seq Scan on rlp4_default - Filter: ((a > 20) AND (a < 27)) -(7 rows) +(5 rows) explain (costs off) select * from rlp where a = 29; QUERY PLAN @@ -651,8 +645,6 @@ explain (costs off) select * from rlp where (a = 1 and a = 3) or (a > 1 and a = QUERY PLAN ------------------------------------------------------------------- Append - -> Seq Scan on rlp2 - Filter: (((a = 1) AND (a = 3)) OR ((a > 1) AND (a = 15))) -> Seq Scan on rlp3abcd Filter: (((a = 1) AND (a = 3)) OR ((a > 1) AND (a = 15))) -> Seq Scan on rlp3efgh @@ -661,7 +653,7 @@ explain (costs off) select * from rlp where (a = 1 and a = 3) or (a > 1 and a = Filter: (((a = 1) AND (a = 3)) OR ((a > 1) AND (a = 15))) -> Seq Scan on rlp3_default Filter: (((a = 1) AND (a = 3)) OR ((a > 1) AND (a = 15))) -(11 rows) +(9 rows) -- multi-column keys create table mc3p (a int, b int, c int) partition by range (a, abs(b), c); @@ -716,9 +708,7 @@ explain (costs off) select * from mc3p where a = 1 and abs(b) = 1 and c < 8; Filter: ((c < 8) AND (a = 1) AND (abs(b) = 1)) -> Seq Scan on mc3p1 Filter: ((c < 8) AND (a = 1) AND (abs(b) = 1)) - -> Seq Scan on mc3p_default - Filter: ((c < 8) AND (a = 1) AND (abs(b) = 1)) -(7 rows) +(5 rows) explain (costs off) select * from mc3p where a = 10 and abs(b) between 5 and 35; QUERY PLAN @@ -894,6 +884,8 @@ explain (costs off) select * from mc3p where a = 1 or abs(b) = 1 or c = 1; Filter: ((a = 1) OR (abs(b) = 1) OR (c = 1)) -> Seq Scan on mc3p2 Filter: ((a = 1) OR (abs(b) = 1) OR (c = 1)) + -> Seq Scan on mc3p3 + Filter: ((a = 1) OR (abs(b) = 1) OR (c = 1)) -> Seq Scan on mc3p4 Filter: ((a = 1) OR (abs(b) = 1) OR (c = 1)) -> Seq Scan on mc3p5 @@ -904,7 +896,7 @@ explain (costs off) select * from mc3p where a = 1 or abs(b) = 1 or c = 1; Filter: ((a = 1) OR (abs(b) = 1) OR (c = 1)) -> Seq Scan on mc3p_default Filter: ((a = 1) OR (abs(b) = 1) OR (c = 1)) -(17 rows) +(19 rows) explain (costs off) select * from mc3p where (a = 1 and abs(b) = 1) or (a = 10 and abs(b) = 10); QUERY PLAN @@ -965,9 +957,11 @@ explain (costs off) select * from mc2p where a = 2 and b < 1; QUERY PLAN --------------------------------------- Append + -> Seq Scan on mc2p2 + Filter: ((b < 1) AND (a = 2)) -> Seq Scan on mc2p3 Filter: ((b < 1) AND (a = 2)) -(3 rows) +(5 rows) explain (costs off) select * from mc2p where a > 1; QUERY PLAN @@ -1009,28 +1003,20 @@ explain (costs off) select * from boolpart where a in (true, false); (5 rows) explain (costs off) select * from boolpart where a = false; - QUERY PLAN ------------------------------------- + QUERY PLAN +------------------------------ Append -> Seq Scan on boolpart_f Filter: (NOT a) - -> Seq Scan on boolpart_t - Filter: (NOT a) - -> Seq Scan on boolpart_default - Filter: (NOT a) -(7 rows) +(3 rows) explain (costs off) select * from boolpart where not a = false; - QUERY PLAN ------------------------------------- + QUERY PLAN +------------------------------ Append - -> Seq Scan on boolpart_f - Filter: a -> Seq Scan on boolpart_t Filter: a - -> Seq Scan on boolpart_default - Filter: a -(7 rows) +(3 rows) explain (costs off) select * from boolpart where a is true or a is not true; QUERY PLAN @@ -1040,33 +1026,22 @@ explain (costs off) select * from boolpart where a is true or a is not true; Filter: ((a IS TRUE) OR (a IS NOT TRUE)) -> Seq Scan on boolpart_t Filter: ((a IS TRUE) OR (a IS NOT TRUE)) - -> Seq Scan on boolpart_default - Filter: ((a IS TRUE) OR (a IS NOT TRUE)) -(7 rows) +(5 rows) explain (costs off) select * from boolpart where a is not true; - QUERY PLAN ------------------------------------- + QUERY PLAN +--------------------------------- Append -> Seq Scan on boolpart_f Filter: (a IS NOT TRUE) - -> Seq Scan on boolpart_t - Filter: (a IS NOT TRUE) - -> Seq Scan on boolpart_default - Filter: (a IS NOT TRUE) -(7 rows) +(3 rows) explain (costs off) select * from boolpart where a is not true and a is not false; - QUERY PLAN --------------------------------------------------------- - Append - -> Seq Scan on boolpart_f - Filter: ((a IS NOT TRUE) AND (a IS NOT FALSE)) - -> Seq Scan on boolpart_t - Filter: ((a IS NOT TRUE) AND (a IS NOT FALSE)) - -> Seq Scan on boolpart_default - Filter: ((a IS NOT TRUE) AND (a IS NOT FALSE)) -(7 rows) + QUERY PLAN +-------------------------- + Result + One-Time Filter: false +(2 rows) explain (costs off) select * from boolpart where a is unknown; QUERY PLAN @@ -1092,4 +1067,355 @@ explain (costs off) select * from boolpart where a is not unknown; Filter: (a IS NOT UNKNOWN) (7 rows) -drop table lp, coll_pruning, rlp, mc3p, mc2p, boolpart; +-- hash partitioning +create table hp (a int, b text) partition by hash (a, b); +create table hp0 partition of hp for values with (modulus 4, remainder 0); +create table hp3 partition of hp for values with (modulus 4, remainder 3); +create table hp1 partition of hp for values with (modulus 4, remainder 1); +create table hp2 partition of hp for values with (modulus 4, remainder 2); +insert into hp values (null, null); +insert into hp values (1, null); +insert into hp values (1, 'xxx'); +insert into hp values (null, 'xxx'); +insert into hp values (10, 'xxx'); +insert into hp values (10, 'yyy'); +select tableoid::regclass, * from hp order by 1; + tableoid | a | b +----------+----+----- + hp0 | | + hp0 | 1 | + hp0 | 1 | xxx + hp3 | 10 | yyy + hp1 | | xxx + hp2 | 10 | xxx +(6 rows) + +-- partial keys won't prune, nor would non-equality conditions +explain (costs off) select * from hp where a = 1; + QUERY PLAN +------------------------- + Append + -> Seq Scan on hp0 + Filter: (a = 1) + -> Seq Scan on hp1 + Filter: (a = 1) + -> Seq Scan on hp2 + Filter: (a = 1) + -> Seq Scan on hp3 + Filter: (a = 1) +(9 rows) + +explain (costs off) select * from hp where b = 'xxx'; + QUERY PLAN +----------------------------------- + Append + -> Seq Scan on hp0 + Filter: (b = 'xxx'::text) + -> Seq Scan on hp1 + Filter: (b = 'xxx'::text) + -> Seq Scan on hp2 + Filter: (b = 'xxx'::text) + -> Seq Scan on hp3 + Filter: (b = 'xxx'::text) +(9 rows) + +explain (costs off) select * from hp where a is null; + QUERY PLAN +----------------------------- + Append + -> Seq Scan on hp0 + Filter: (a IS NULL) + -> Seq Scan on hp1 + Filter: (a IS NULL) + -> Seq Scan on hp2 + Filter: (a IS NULL) + -> Seq Scan on hp3 + Filter: (a IS NULL) +(9 rows) + +explain (costs off) select * from hp where b is null; + QUERY PLAN +----------------------------- + Append + -> Seq Scan on hp0 + Filter: (b IS NULL) + -> Seq Scan on hp1 + Filter: (b IS NULL) + -> Seq Scan on hp2 + Filter: (b IS NULL) + -> Seq Scan on hp3 + Filter: (b IS NULL) +(9 rows) + +explain (costs off) select * from hp where a < 1 and b = 'xxx'; + QUERY PLAN +------------------------------------------------- + Append + -> Seq Scan on hp0 + Filter: ((a < 1) AND (b = 'xxx'::text)) + -> Seq Scan on hp1 + Filter: ((a < 1) AND (b = 'xxx'::text)) + -> Seq Scan on hp2 + Filter: ((a < 1) AND (b = 'xxx'::text)) + -> Seq Scan on hp3 + Filter: ((a < 1) AND (b = 'xxx'::text)) +(9 rows) + +explain (costs off) select * from hp where a <> 1 and b = 'yyy'; + QUERY PLAN +-------------------------------------------------- + Append + -> Seq Scan on hp0 + Filter: ((a <> 1) AND (b = 'yyy'::text)) + -> Seq Scan on hp1 + Filter: ((a <> 1) AND (b = 'yyy'::text)) + -> Seq Scan on hp2 + Filter: ((a <> 1) AND (b = 'yyy'::text)) + -> Seq Scan on hp3 + Filter: ((a <> 1) AND (b = 'yyy'::text)) +(9 rows) + +-- pruning should work in all cases below +explain (costs off) select * from hp where a is null and b is null; + QUERY PLAN +----------------------------------------------- + Append + -> Seq Scan on hp0 + Filter: ((a IS NULL) AND (b IS NULL)) +(3 rows) + +explain (costs off) select * from hp where a = 1 and b is null; + QUERY PLAN +------------------------------------------- + Append + -> Seq Scan on hp0 + Filter: ((b IS NULL) AND (a = 1)) +(3 rows) + +explain (costs off) select * from hp where a = 1 and b = 'xxx'; + QUERY PLAN +------------------------------------------------- + Append + -> Seq Scan on hp0 + Filter: ((a = 1) AND (b = 'xxx'::text)) +(3 rows) + +explain (costs off) select * from hp where a is null and b = 'xxx'; + QUERY PLAN +----------------------------------------------------- + Append + -> Seq Scan on hp1 + Filter: ((a IS NULL) AND (b = 'xxx'::text)) +(3 rows) + +explain (costs off) select * from hp where a = 10 and b = 'xxx'; + QUERY PLAN +-------------------------------------------------- + Append + -> Seq Scan on hp2 + Filter: ((a = 10) AND (b = 'xxx'::text)) +(3 rows) + +explain (costs off) select * from hp where a = 10 and b = 'yyy'; + QUERY PLAN +-------------------------------------------------- + Append + -> Seq Scan on hp3 + Filter: ((a = 10) AND (b = 'yyy'::text)) +(3 rows) + +explain (costs off) select * from hp where (a = 10 and b = 'yyy') or (a = 10 and b = 'xxx') or (a is null and b is null); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------- + Append + -> Seq Scan on hp0 + Filter: (((a = 10) AND (b = 'yyy'::text)) OR ((a = 10) AND (b = 'xxx'::text)) OR ((a IS NULL) AND (b IS NULL))) + -> Seq Scan on hp2 + Filter: (((a = 10) AND (b = 'yyy'::text)) OR ((a = 10) AND (b = 'xxx'::text)) OR ((a IS NULL) AND (b IS NULL))) + -> Seq Scan on hp3 + Filter: (((a = 10) AND (b = 'yyy'::text)) OR ((a = 10) AND (b = 'xxx'::text)) OR ((a IS NULL) AND (b IS NULL))) +(7 rows) + +-- +-- some more cases +-- +-- +-- pruning for partitioned table appearing inside a sub-query +-- +-- pruning won't work for mc3p, because some keys are Params +explain (costs off) select * from mc2p t1, lateral (select count(*) from mc3p t2 where t2.a = t1.b and abs(t2.b) = 1 and t2.c = 1) s where t1.a = 1; + QUERY PLAN +----------------------------------------------------------------------- + Nested Loop + -> Append + -> Seq Scan on mc2p0 t1 + Filter: (a = 1) + -> Seq Scan on mc2p1 t1_1 + Filter: (a = 1) + -> Seq Scan on mc2p2 t1_2 + Filter: (a = 1) + -> Seq Scan on mc2p_default t1_3 + Filter: (a = 1) + -> Aggregate + -> Append + -> Seq Scan on mc3p0 t2 + Filter: ((a = t1.b) AND (c = 1) AND (abs(b) = 1)) + -> Seq Scan on mc3p1 t2_1 + Filter: ((a = t1.b) AND (c = 1) AND (abs(b) = 1)) + -> Seq Scan on mc3p2 t2_2 + Filter: ((a = t1.b) AND (c = 1) AND (abs(b) = 1)) + -> Seq Scan on mc3p3 t2_3 + Filter: ((a = t1.b) AND (c = 1) AND (abs(b) = 1)) + -> Seq Scan on mc3p4 t2_4 + Filter: ((a = t1.b) AND (c = 1) AND (abs(b) = 1)) + -> Seq Scan on mc3p5 t2_5 + Filter: ((a = t1.b) AND (c = 1) AND (abs(b) = 1)) + -> Seq Scan on mc3p6 t2_6 + Filter: ((a = t1.b) AND (c = 1) AND (abs(b) = 1)) + -> Seq Scan on mc3p7 t2_7 + Filter: ((a = t1.b) AND (c = 1) AND (abs(b) = 1)) + -> Seq Scan on mc3p_default t2_8 + Filter: ((a = t1.b) AND (c = 1) AND (abs(b) = 1)) +(30 rows) + +-- pruning should work fine, because prefix of keys is available +explain (costs off) select * from mc2p t1, lateral (select count(*) from mc3p t2 where t2.c = t1.b and abs(t2.b) = 1 and t2.a = 1) s where t1.a = 1; + QUERY PLAN +----------------------------------------------------------------------- + Nested Loop + -> Append + -> Seq Scan on mc2p0 t1 + Filter: (a = 1) + -> Seq Scan on mc2p1 t1_1 + Filter: (a = 1) + -> Seq Scan on mc2p2 t1_2 + Filter: (a = 1) + -> Seq Scan on mc2p_default t1_3 + Filter: (a = 1) + -> Aggregate + -> Append + -> Seq Scan on mc3p0 t2 + Filter: ((c = t1.b) AND (a = 1) AND (abs(b) = 1)) + -> Seq Scan on mc3p1 t2_1 + Filter: ((c = t1.b) AND (a = 1) AND (abs(b) = 1)) +(16 rows) + +-- pruning should work fine in this case, too. +explain (costs off) select * from mc2p t1, lateral (select count(*) from mc3p t2 where t2.a = 1 and abs(t2.b) = 1 and t2.c = 1) s where t1.a = 1; + QUERY PLAN +-------------------------------------------------------------------- + Nested Loop + -> Aggregate + -> Append + -> Seq Scan on mc3p1 t2 + Filter: ((a = 1) AND (c = 1) AND (abs(b) = 1)) + -> Append + -> Seq Scan on mc2p0 t1 + Filter: (a = 1) + -> Seq Scan on mc2p1 t1_1 + Filter: (a = 1) + -> Seq Scan on mc2p2 t1_2 + Filter: (a = 1) + -> Seq Scan on mc2p_default t1_3 + Filter: (a = 1) +(14 rows) + +-- +-- pruning with clauses containing <> operator +-- +-- doesn't prune range or hash partitions +explain (costs off) select * from hp where a <> 1 and b <> 'xxx'; + QUERY PLAN +--------------------------------------------------- + Append + -> Seq Scan on hp0 + Filter: ((a <> 1) AND (b <> 'xxx'::text)) + -> Seq Scan on hp1 + Filter: ((a <> 1) AND (b <> 'xxx'::text)) + -> Seq Scan on hp2 + Filter: ((a <> 1) AND (b <> 'xxx'::text)) + -> Seq Scan on hp3 + Filter: ((a <> 1) AND (b <> 'xxx'::text)) +(9 rows) + +create table rp (a int) partition by range (a); +create table rp0 partition of rp for values from (minvalue) to (1); +create table rp1 partition of rp for values from (1) to (2); +create table rp2 partition of rp for values from (2) to (maxvalue); +explain (costs off) select * from rp where a <> 1; + QUERY PLAN +-------------------------- + Append + -> Seq Scan on rp0 + Filter: (a <> 1) + -> Seq Scan on rp1 + Filter: (a <> 1) + -> Seq Scan on rp2 + Filter: (a <> 1) +(7 rows) + +explain (costs off) select * from rp where a <> 1 and a <> 2; + QUERY PLAN +----------------------------------------- + Append + -> Seq Scan on rp0 + Filter: ((a <> 1) AND (a <> 2)) + -> Seq Scan on rp1 + Filter: ((a <> 1) AND (a <> 2)) + -> Seq Scan on rp2 + Filter: ((a <> 1) AND (a <> 2)) +(7 rows) + +-- null partition should be eliminated due to strict <> clause. +explain (costs off) select * from lp where a <> 'a'; + QUERY PLAN +------------------------------------ + Append + -> Seq Scan on lp_ad + Filter: (a <> 'a'::bpchar) + -> Seq Scan on lp_bc + Filter: (a <> 'a'::bpchar) + -> Seq Scan on lp_ef + Filter: (a <> 'a'::bpchar) + -> Seq Scan on lp_g + Filter: (a <> 'a'::bpchar) + -> Seq Scan on lp_default + Filter: (a <> 'a'::bpchar) +(11 rows) + +-- ensure we detect contradictions in clauses; a can't be NULL and NOT NULL. +explain (costs off) select * from lp where a <> 'a' and a is null; + QUERY PLAN +-------------------------- + Result + One-Time Filter: false +(2 rows) + +explain (costs off) select * from lp where (a <> 'a' and a <> 'd') or a is null; + QUERY PLAN +------------------------------------------------------------------------------ + Append + -> Seq Scan on lp_bc + Filter: (((a <> 'a'::bpchar) AND (a <> 'd'::bpchar)) OR (a IS NULL)) + -> Seq Scan on lp_ef + Filter: (((a <> 'a'::bpchar) AND (a <> 'd'::bpchar)) OR (a IS NULL)) + -> Seq Scan on lp_g + Filter: (((a <> 'a'::bpchar) AND (a <> 'd'::bpchar)) OR (a IS NULL)) + -> Seq Scan on lp_null + Filter: (((a <> 'a'::bpchar) AND (a <> 'd'::bpchar)) OR (a IS NULL)) + -> Seq Scan on lp_default + Filter: (((a <> 'a'::bpchar) AND (a <> 'd'::bpchar)) OR (a IS NULL)) +(11 rows) + +-- case for list partitioned table that's not root +explain (costs off) select * from rlp where a = 15 and b <> 'ab' and b <> 'cd' and b <> 'xy' and b is not null; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------ + Append + -> Seq Scan on rlp3efgh + Filter: ((b IS NOT NULL) AND ((b)::text <> 'ab'::text) AND ((b)::text <> 'cd'::text) AND ((b)::text <> 'xy'::text) AND (a = 15)) + -> Seq Scan on rlp3_default + Filter: ((b IS NOT NULL) AND ((b)::text <> 'ab'::text) AND ((b)::text <> 'cd'::text) AND ((b)::text <> 'xy'::text) AND (a = 15)) +(5 rows) + +drop table lp, coll_pruning, rlp, mc3p, mc2p, boolpart, hp, rp; diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql index 514f8e5ce1..b7c5abf378 100644 --- a/src/test/regress/sql/partition_prune.sql +++ b/src/test/regress/sql/partition_prune.sql @@ -152,4 +152,79 @@ explain (costs off) select * from boolpart where a is not true and a is not fals explain (costs off) select * from boolpart where a is unknown; explain (costs off) select * from boolpart where a is not unknown; -drop table lp, coll_pruning, rlp, mc3p, mc2p, boolpart; +-- hash partitioning +create table hp (a int, b text) partition by hash (a, b); +create table hp0 partition of hp for values with (modulus 4, remainder 0); +create table hp3 partition of hp for values with (modulus 4, remainder 3); +create table hp1 partition of hp for values with (modulus 4, remainder 1); +create table hp2 partition of hp for values with (modulus 4, remainder 2); + +insert into hp values (null, null); +insert into hp values (1, null); +insert into hp values (1, 'xxx'); +insert into hp values (null, 'xxx'); +insert into hp values (10, 'xxx'); +insert into hp values (10, 'yyy'); +select tableoid::regclass, * from hp order by 1; + +-- partial keys won't prune, nor would non-equality conditions +explain (costs off) select * from hp where a = 1; +explain (costs off) select * from hp where b = 'xxx'; +explain (costs off) select * from hp where a is null; +explain (costs off) select * from hp where b is null; +explain (costs off) select * from hp where a < 1 and b = 'xxx'; +explain (costs off) select * from hp where a <> 1 and b = 'yyy'; + +-- pruning should work in all cases below +explain (costs off) select * from hp where a is null and b is null; +explain (costs off) select * from hp where a = 1 and b is null; +explain (costs off) select * from hp where a = 1 and b = 'xxx'; +explain (costs off) select * from hp where a is null and b = 'xxx'; +explain (costs off) select * from hp where a = 10 and b = 'xxx'; +explain (costs off) select * from hp where a = 10 and b = 'yyy'; +explain (costs off) select * from hp where (a = 10 and b = 'yyy') or (a = 10 and b = 'xxx') or (a is null and b is null); + +-- +-- some more cases +-- + +-- +-- pruning for partitioned table appearing inside a sub-query +-- + +-- pruning won't work for mc3p, because some keys are Params +explain (costs off) select * from mc2p t1, lateral (select count(*) from mc3p t2 where t2.a = t1.b and abs(t2.b) = 1 and t2.c = 1) s where t1.a = 1; + +-- pruning should work fine, because prefix of keys is available +explain (costs off) select * from mc2p t1, lateral (select count(*) from mc3p t2 where t2.c = t1.b and abs(t2.b) = 1 and t2.a = 1) s where t1.a = 1; + +-- pruning should work fine in this case, too. +explain (costs off) select * from mc2p t1, lateral (select count(*) from mc3p t2 where t2.a = 1 and abs(t2.b) = 1 and t2.c = 1) s where t1.a = 1; + +-- +-- pruning with clauses containing <> operator +-- + +-- doesn't prune range or hash partitions +explain (costs off) select * from hp where a <> 1 and b <> 'xxx'; + +create table rp (a int) partition by range (a); +create table rp0 partition of rp for values from (minvalue) to (1); +create table rp1 partition of rp for values from (1) to (2); +create table rp2 partition of rp for values from (2) to (maxvalue); + +explain (costs off) select * from rp where a <> 1; +explain (costs off) select * from rp where a <> 1 and a <> 2; + +-- null partition should be eliminated due to strict <> clause. +explain (costs off) select * from lp where a <> 'a'; + +-- ensure we detect contradictions in clauses; a can't be NULL and NOT NULL. +explain (costs off) select * from lp where a <> 'a' and a is null; + +explain (costs off) select * from lp where (a <> 'a' and a <> 'd') or a is null; + +-- case for list partitioned table that's not root +explain (costs off) select * from rlp where a = 15 and b <> 'ab' and b <> 'cd' and b <> 'xy' and b is not null; + +drop table lp, coll_pruning, rlp, mc3p, mc2p, boolpart, hp, rp; -- 2.11.0