Hi Vladimir,
On 05.09.2016 16:38, Ildar Musin wrote:
Hi Vladimir,
On 03.09.2016 19:31, Vladimir Sitnikov wrote:
Ildar>The reason why this doesn't work is that '~~' operator (which is a
Ildar>synonym for 'like') isn't supported by operator class for btree.
Since
Ildar>the only operators supported by btree are <, <=, =, >=, >, you
can use
Ildar>it with queries like:
Ildar>And in 3rd query 'OFFSET' statement prevents rewriter from
Ildar>transforming the query, so it is possible to use index only scan on
Ildar>subquery and then filter the result of subquery with '~~' operator.
I'm afraid I do not follow you.
Note: query 3 is 100% equivalent of query 2, however query 3 takes 55
times less reads.
It looks like either an optimizer bug, or some missing feature in the
"index only scan" logic.
Here's quote from "query 2" (note % are at both ends): ... where
type=42) as x where upper_vc like '%ABC%';
Note: I do NOT use "indexed scan" for the like operator. I'm very well
aware
that LIKE patterns with leading % cannot be optimized to a btree range
scan.
What I want is "use the first indexed column as index scan, then use the
second column
for filtering".
As shown in "query 2" vs "query 3", PostgreSQL cannot come up with such
a plan on its own
for some reason.
This is not a theoretical issue, but it is something that I use a lot
with Oracle DB (it just creates a good plan for "query 2").
Vladimir
Thanks, I get it now. The reason why it acts like this is that I used
match_clause_to_index() function to determine if IOS can be used with
the specified clauses. This function among other things checks if
operator matches the index opfamily. Apparently this isn't correct. I
wrote another prototype to test your case and it seems to work. But it's
not ready for public yet, I'll publish it in 1-2 days.
Here is a new patch version. I modified check_index_only_clauses() so
that it doesn't use match_clause_to_indexcol() anymore. Instead it
handles different types of expressions including binary operator
expressions, scalar array expressions, row compare expressions (e.g.
(a,b)<(1,2)) and null tests and tries to match each part of expression
to index regardless an operator. I reproduced your example and was able
to get index only scan on all queries. Could you please try the patch
and tell if it works for you?
--
Ildar Musin
i.mu...@postgrespro.ru
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 2952bfb..e81f914 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -25,6 +25,7 @@
#include "catalog/pg_opfamily.h"
#include "catalog/pg_type.h"
#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
@@ -78,6 +79,14 @@ typedef struct
int indexcol; /* index column we want to match to */
} ec_member_matches_arg;
+/* Context for index only expression checker */
+typedef struct
+{
+ IndexOptInfo *index;
+ bool match; /* TRUE if expression matches
+ * index */
+} check_index_only_walker_ctx;
+
static void consider_index_join_clauses(PlannerInfo *root, RelOptInfo *rel,
IndexOptInfo *index,
@@ -130,7 +139,15 @@ static PathClauseUsage *classify_index_clause_usage(Path *path,
static Relids get_bitmap_tree_required_outer(Path *bitmapqual);
static void find_indexpath_quals(Path *bitmapqual, List **quals, List **preds);
static int find_list_position(Node *node, List **nodelist);
-static bool check_index_only(RelOptInfo *rel, IndexOptInfo *index);
+static bool check_index_only(PlannerInfo *root, RelOptInfo *rel, IndexOptInfo *index);
+static bool check_index_only_targetlist(PlannerInfo *root,
+ RelOptInfo *rel,
+ IndexOptInfo *index,
+ Bitmapset *index_canreturn_attrs);
+static bool check_index_only_clauses(List *clauses,
+ IndexOptInfo *index);
+static bool check_index_only_expr_walker(Node *node, check_index_only_walker_ctx *ctx);
+static bool check_index_only_expr(Node *expr, IndexOptInfo *index);
static double get_loop_count(PlannerInfo *root, Index cur_relid, Relids outer_relids);
static double adjust_rowcount_for_semijoins(PlannerInfo *root,
Index cur_relid,
@@ -1020,7 +1037,7 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
* index data retrieval anyway.
*/
index_only_scan = (scantype != ST_BITMAPSCAN &&
- check_index_only(rel, index));
+ check_index_only(root, rel, index));
/*
* 4. Generate an indexscan path if there are relevant restriction clauses
@@ -1786,7 +1803,7 @@ find_list_position(Node *node, List **nodelist)
* Determine whether an index-only scan is possible for this index.
*/
static bool
-check_index_only(RelOptInfo *rel, IndexOptInfo *index)
+check_index_only(PlannerInfo *root, RelOptInfo *rel, IndexOptInfo *index)
{
bool result;
Bitmapset *attrs_used = NULL;
@@ -1837,8 +1854,10 @@ check_index_only(RelOptInfo *rel, IndexOptInfo *index)
int attno = index->indexkeys[i];
/*
- * For the moment, we just ignore index expressions. It might be nice
- * to do something with them, later.
+ * We ignore expressions here and only look at plain attributes. Analyzing
+ * expressions is also a bit more expensive, so we first try to match the
+ * expressions to attributes indexed directly, and spend additional CPU time
+ * on analyzing the expressions only if needed.
*/
if (attno == 0)
continue;
@@ -1852,6 +1871,15 @@ check_index_only(RelOptInfo *rel, IndexOptInfo *index)
/* Do we have all the necessary attributes? */
result = bms_is_subset(attrs_used, index_canreturn_attrs);
+ /*
+ * If the directly indexed attributes are not sufficient, it's time to look
+ * at index expressions and try to match them to targetlist and index clauses.
+ */
+ if (!result && index->indexprs)
+ result =
+ check_index_only_targetlist(root, rel, index, index_canreturn_attrs) &&
+ check_index_only_clauses(index->indrestrictinfo, index);
+
bms_free(attrs_used);
bms_free(index_canreturn_attrs);
@@ -1859,6 +1887,225 @@ check_index_only(RelOptInfo *rel, IndexOptInfo *index)
}
/*
+ * check_index_only_expr_walker
+ * Recursive walker that checks if each part of the expression can be
+ * build with index expressions or attributes
+ */
+static bool
+check_index_only_expr_walker(Node *node, check_index_only_walker_ctx *ctx)
+{
+ int i;
+
+ /*
+ * If node is empty or some subexpression has already reported that it
+ * isn't match the index then quit looking any further
+ */
+ if (node == NULL || ctx->match == false)
+ return false;
+
+ /*
+ * Try to match current node to index expressions. If it matches, we're
+ * done with this subtree
+ */
+ for (i = 0; i < ctx->index->ncolumns; i++)
+ if (match_index_to_operand(node, i, ctx->index))
+ return false;
+
+ /*
+ * If after all we still have an unresolved Var that doesn't match index
+ * then we cannot use index only scan
+ */
+ if (IsA(node, Var) && ((Var *) node)->varno == ctx->index->rel->relid)
+ {
+ ctx->match = false;
+ return false;
+ }
+
+ /* Recursive check */
+ return expression_tree_walker(node, check_index_only_expr_walker, (void *) ctx);
+}
+
+/*
+ * check_index_only_expr
+ * Runs check_index_only_expr_walker() to make sure that expression could
+ * be build with index expressions and attributes
+ */
+static bool
+check_index_only_expr(Node *expr, IndexOptInfo *index)
+{
+ bool flag = false;
+ check_index_only_walker_ctx *ctx;
+
+ /* Don't bother to run walker for NULL expressions */
+ if (!expr)
+ return true;
+
+ ctx = (check_index_only_walker_ctx *) palloc(sizeof(check_index_only_walker_ctx));
+ ctx->index = index;
+ ctx->match = true;
+ check_index_only_expr_walker(expr, ctx);
+
+ if (ctx->match)
+ flag = true;
+
+ pfree(ctx);
+
+ return flag;
+}
+
+/*
+ * check_index_only_targetlist
+ * Checks that every target of index->relation can be returned by index
+ *
+ * In this function we're looking through root->parse->targetlist and pick
+ * those targets which contain index->relation's attributes. For each selected
+ * target we use recursive function check_index_only_expr_walker() to check if
+ * target can be build with the index expressions and attributes and none of
+ * the other index->relation's attributes
+ */
+static bool
+check_index_only_targetlist(PlannerInfo *root,
+ RelOptInfo *rel,
+ IndexOptInfo *index,
+ Bitmapset *index_canreturn_attrs)
+{
+ ListCell *lc;
+ bool result = true;
+
+ /* Check expressions from targetlist */
+ foreach(lc, root->parse->targetList)
+ {
+ TargetEntry *te = (TargetEntry *) lfirst(lc);
+ Bitmapset *varnos = pull_varnos((Node *) te->expr);
+
+ /* Ignore targetlist entries not related to the indexed relation. */
+ if (bms_is_member(rel->relid, varnos))
+ {
+ bool is_matched = false; /* assume the expression is not matched */
+ Bitmapset *attrs = NULL;
+
+ pull_varattnos((Node *) te->expr, rel->relid, &attrs);
+
+ if (bms_is_subset(attrs, index_canreturn_attrs) ||
+ check_index_only_expr((Node *) te->expr, index))
+ {
+ is_matched = true;
+ }
+
+ bms_free(attrs);
+
+ /* if the expression is not matched, we don't need to look any further */
+ if (! is_matched)
+ {
+ result = false;
+ break;
+ }
+ }
+
+ bms_free(varnos);
+ }
+
+ return result;
+}
+
+/*
+ * check_index_only_clauses
+ * Recursive function that checks that each clause matches the index
+ *
+ * clauses is a list of RestrictInfos or a BoolExpr containing RestrictInfos
+ */
+static bool
+check_index_only_clauses(List *clauses,
+ IndexOptInfo *index)
+{
+ ListCell *lc;
+
+ foreach(lc, clauses)
+ {
+ bool match = false;
+ Node *node = (Node *) lfirst(lc);
+
+ /*
+ * We expect here either a RestrictInfo or a boolean expression
+ * containing other RestrictInfos
+ */
+ if (IsA(node, RestrictInfo))
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) node;
+ Expr *clause = rinfo->clause;
+
+ if (!restriction_is_or_clause(rinfo))
+ {
+ /*
+ * Clause could be a binary opclause, ScalarArrayOpExpr,
+ * RowCompareExpr or NullTest. We take left and right parts of
+ * binary clauses or single argument of NullTest and make sure
+ * that they match index. Unlike match_clause_to_indexcol()
+ * we do not care here about operator family
+ */
+ Node *left = NULL;
+ Node *right = NULL;
+
+ if (is_opclause(clause))
+ {
+ left = get_leftop(clause);
+ right = get_rightop(clause);
+ }
+ else if (IsA(clause, ScalarArrayOpExpr))
+ {
+ ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
+
+ left = (Node *) linitial(saop->args);
+ right = (Node *) lsecond(saop->args);
+ }
+ else if (IsA(clause, RowCompareExpr))
+ {
+ RowCompareExpr *rowexpr = (RowCompareExpr *) clause;
+
+ left = (Node *) rowexpr->largs;
+ right = (Node *) rowexpr->rargs;
+ }
+ else if (index->amsearchnulls && IsA(clause, NullTest))
+ {
+ NullTest *nt = (NullTest *) clause;
+
+ left = (Node *) nt->arg;
+ /* Leave right part equal to NULL */
+ }
+
+ if (check_index_only_expr(left, index)
+ && check_index_only_expr(right, index))
+ {
+ match = true;
+ break;
+ }
+ }
+ else
+ {
+ List *subclauses = ((BoolExpr *) rinfo->orclause)->args;
+
+ match = check_index_only_clauses(subclauses, index);
+ }
+ }
+ else if (IsA(node, BoolExpr))
+ {
+ BoolExpr *boolexpr = (BoolExpr *) node;
+
+ match = check_index_only_clauses(boolexpr->args, index);
+ }
+
+ /*
+ * If at least one restriction doesn't match index then return false
+ */
+ if (!match)
+ return false;
+ }
+
+ /* If we got here then everything's fine */
+ return true;
+}
+
+/*
* get_loop_count
* Choose the loop count estimate to use for costing a parameterized path
* with the given set of outer relids.
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers