On Sun, Mar 24, 2019 at 11:52 AM Julien Rouhaud <rjuju...@gmail.com> wrote:
>
> Marc (in Cc) reported me a problematic query using a GIN index hit in
> production.  The issue is that even if an GIN opclass says that the
> index can be used for an operator, it's still possible that some
> values aren't really compatible and requires a full index scan.
>
> One simple example is with a GIN pg_trgm index (but other opclasses
> have similar restrictions) , doing a LIKE with wildcard on both side,
> where the pattern is shorter than a trigram, e.g. col LIKE '%a%'.  So,
> a where clause of the form:
>
> WHERE col LIKE '%verylongpattern%' AND col LIKE '%a%'
>
> is much more expensive than
>
> WHERE col LKE '%verylongpattern%'
>
> While there's nothing to do if the unhandled const is the only
> predicate, if there are multiple AND-ed predicates and at least one of
> them doesn't require a full index scan, we can avoid it.
>
> Attached patch tries to fix the issue by detecting such cases and
> dropping the unhandled quals in the BitmapIndexScan, letting the
> recheck in BitmapHeapScan do the proper filtering.  I'm not happy to
> call the extractQuery support functions an additional time, but i
> didn't find a cleaner way.  This is of course intended for pg13.

Patch doesn't apply anymore (thanks cfbot).  Rebased patch attached.
diff --git a/contrib/pg_trgm/expected/pg_trgm.out b/contrib/pg_trgm/expected/pg_trgm.out
index b3e709f496..6d96fca65f 100644
--- a/contrib/pg_trgm/expected/pg_trgm.out
+++ b/contrib/pg_trgm/expected/pg_trgm.out
@@ -3498,6 +3498,37 @@ select count(*) from test_trgm where t ~ '[qwerty]{2}-?[qwerty]{2}';
   1000
 (1 row)
 
+explain (costs off)
+  select * from test_trgm where t like '%0%' and t like '%azerty%';
+                 QUERY PLAN                  
+---------------------------------------------
+ Bitmap Heap Scan on test_trgm
+   Recheck Cond: (t ~~ '%azerty%'::text)
+   Filter: (t ~~ '%0%'::text)
+   ->  Bitmap Index Scan on trgm_idx
+         Index Cond: (t ~~ '%azerty%'::text)
+(5 rows)
+
+select * from test_trgm where t like '%0%' and t like '%azerty%';
+ t 
+---
+(0 rows)
+
+explain (costs off)
+  select * from test_trgm where t like '%0%' and t like '%az%';
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Bitmap Heap Scan on test_trgm
+   Recheck Cond: ((t ~~ '%0%'::text) AND (t ~~ '%az%'::text))
+   ->  Bitmap Index Scan on trgm_idx
+         Index Cond: ((t ~~ '%0%'::text) AND (t ~~ '%az%'::text))
+(4 rows)
+
+select * from test_trgm where t like '%0%' and t like '%az%';
+ t 
+---
+(0 rows)
+
 create table test2(t text COLLATE "C");
 insert into test2 values ('abcdef');
 insert into test2 values ('quark');
diff --git a/contrib/pg_trgm/sql/pg_trgm.sql b/contrib/pg_trgm/sql/pg_trgm.sql
index 08459e64c3..9cdb6fda14 100644
--- a/contrib/pg_trgm/sql/pg_trgm.sql
+++ b/contrib/pg_trgm/sql/pg_trgm.sql
@@ -55,6 +55,13 @@ select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu098
 select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu1988' order by sml desc, t;
 select count(*) from test_trgm where t ~ '[qwerty]{2}-?[qwerty]{2}';
 
+explain (costs off)
+  select * from test_trgm where t like '%0%' and t like '%azerty%';
+select * from test_trgm where t like '%0%' and t like '%azerty%';
+explain (costs off)
+  select * from test_trgm where t like '%0%' and t like '%az%';
+select * from test_trgm where t like '%0%' and t like '%az%';
+
 create table test2(t text COLLATE "C");
 insert into test2 values ('abcdef');
 insert into test2 values ('quark');
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index c208e9bfb0..82fdf44905 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -33,6 +33,7 @@
 #include "optimizer/prep.h"
 #include "optimizer/restrictinfo.h"
 #include "utils/lsyscache.h"
+#include "utils/index_selfuncs.h"
 #include "utils/selfuncs.h"
 
 
@@ -973,6 +974,24 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
 			return NIL;
 	}
 
+	/*
+	 * GIN access method can require a full index scan.  However, if there are
+	 * multiple AND-ed quals and at least one of them doesn't require a full
+	 * index scan, we can avoid the full scan by dropping all the quals
+	 * requiring it and let the recheck do the proper filtering.
+	 */
+	if (index_clauses != NIL && list_length(index_clauses) > 1 &&
+			index->relam == GIN_AM_OID)
+	{
+		Relids old_outer_relids = bms_copy(outer_relids);
+
+		bms_free(outer_relids);
+		outer_relids = bms_copy(rel->lateral_relids);
+
+		index_clauses = gin_get_optimizable_quals(root, index, index_clauses,
+				&outer_relids, old_outer_relids);
+	}
+
 	/* We do not want the index's rel itself listed in outer_relids */
 	outer_relids = bms_del_member(outer_relids, rel->relid);
 	/* Enforce convention that outer_relids is exactly NULL if empty */
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index d7e3f09f1a..0080b1de4b 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -6767,6 +6767,97 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
 	*indexPages = dataPagesFetched;
 }
 
+/*
+ * Given a list of implicitly AND-ed quals, return the cured list of quals that
+ * can be used for a BitmapIndexScan that would not require a full index scan
+ * if any, otherwise the original list of quals.
+ */
+List *gin_get_optimizable_quals(PlannerInfo *root, IndexOptInfo *index,
+		List *index_clauses, Relids *outer_relids, Relids old_outer_relids)
+{
+	List	   *result = NIL;
+	GinQualCounts counts;
+	bool		matchPossible,
+				haveOneFullScan = false;
+	ListCell   *lc;
+
+	Assert(index->relam == GIN_AM_OID);
+
+	foreach(lc, index_clauses)
+	{
+		IndexClause *iclause = lfirst_node(IndexClause, lc);
+		IndexClause *newiclause = makeNode(IndexClause);
+		ListCell   *lc2;
+
+		memset(&counts, 0, sizeof(counts));
+
+		newiclause->rinfo = iclause->rinfo;
+		newiclause->indexquals = NIL;
+		newiclause->lossy = iclause->lossy;
+		newiclause->indexcol = iclause->indexcol;
+		newiclause->indexcols = iclause->indexcols;
+
+		foreach(lc2, iclause->indexquals)
+		{
+			RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
+			Expr	   *clause = rinfo->clause;
+
+			if (IsA(clause, OpExpr))
+			{
+				matchPossible = gincost_opexpr(root,
+											   index,
+											   iclause->indexcol,
+											   (OpExpr *) clause,
+											   &counts);
+				if (!matchPossible)
+					break;
+			}
+			else if (IsA(clause, ScalarArrayOpExpr))
+			{
+				double numEntries = 1;
+
+				matchPossible = gincost_scalararrayopexpr(root,
+														  index,
+														  iclause->indexcol,
+														  (ScalarArrayOpExpr *) clause,
+														  numEntries,
+														  &counts);
+				if (!matchPossible)
+					break;
+			}
+			else
+			{
+				/* shouldn't be anything else for a GIN index */
+				elog(ERROR, "unsupported GIN indexqual type: %d",
+					 (int) nodeTag(clause));
+			}
+
+			if (counts.haveFullScan)
+				haveOneFullScan = true;
+			else
+			{
+				newiclause->indexquals = lappend(newiclause->indexquals, rinfo);
+				*outer_relids = bms_add_members(*outer_relids,
+											   rinfo->clause_relids);
+			}
+		}
+
+		if (list_length(newiclause->indexquals))
+			result = lappend(result, newiclause);
+	}
+
+	if (!matchPossible || !haveOneFullScan || result == NIL)
+	{
+		bms_free(*outer_relids);
+		*outer_relids = old_outer_relids;
+		list_free(result);
+
+		return index_clauses;
+	}
+
+	return result;
+}
+
 /*
  * BRIN has search behavior completely different from other index types
  */
diff --git a/src/include/utils/index_selfuncs.h b/src/include/utils/index_selfuncs.h
index b81556d7a1..085ebd942a 100644
--- a/src/include/utils/index_selfuncs.h
+++ b/src/include/utils/index_selfuncs.h
@@ -20,6 +20,7 @@
 #define INDEX_SELFUNCS_H
 
 #include "access/amapi.h"
+#include "nodes/pathnodes.h"
 
 /* Functions in selfuncs.c */
 extern void brincostestimate(struct PlannerInfo *root,
@@ -70,5 +71,10 @@ extern void gincostestimate(struct PlannerInfo *root,
 							Selectivity *indexSelectivity,
 							double *indexCorrelation,
 							double *indexPages);
+extern List *gin_get_optimizable_quals(struct PlannerInfo *root,
+									   IndexOptInfo *index,
+									   List *index_clauses,
+									   Relids *outer_relids,
+									   Relids old_outer_relids);
 
 #endif							/* INDEX_SELFUNCS_H */

Reply via email to