Hi,

Marc (in Cc) reported me a problematic query using a GIN index hit in
production.  The issue is that even if an GIN opclass says that the
index can be used for an operator, it's still possible that some
values aren't really compatible and requires a full index scan.

One simple example is with a GIN pg_trgm index (but other opclasses
have similar restrictions) , doing a LIKE with wildcard on both side,
where the pattern is shorter than a trigram, e.g. col LIKE '%a%'.  So,
a where clause of the form:

WHERE col LIKE '%verylongpattern%' AND col LIKE '%a%'

is much more expensive than

WHERE col LKE '%verylongpattern%'

While there's nothing to do if the unhandled const is the only
predicate, if there are multiple AND-ed predicates and at least one of
them doesn't require a full index scan, we can avoid it.

Attached patch tries to fix the issue by detecting such cases and
dropping the unhandled quals in the BitmapIndexScan, letting the
recheck in BitmapHeapScan do the proper filtering.  I'm not happy to
call the extractQuery support functions an additional time, but i
didn't find a cleaner way.  This is of course intended for pg13.
diff --git a/contrib/pg_trgm/expected/pg_trgm.out b/contrib/pg_trgm/expected/pg_trgm.out
index b3e709f496..6d96fca65f 100644
--- a/contrib/pg_trgm/expected/pg_trgm.out
+++ b/contrib/pg_trgm/expected/pg_trgm.out
@@ -3498,6 +3498,37 @@ select count(*) from test_trgm where t ~ '[qwerty]{2}-?[qwerty]{2}';
   1000
 (1 row)
 
+explain (costs off)
+  select * from test_trgm where t like '%0%' and t like '%azerty%';
+                 QUERY PLAN                  
+---------------------------------------------
+ Bitmap Heap Scan on test_trgm
+   Recheck Cond: (t ~~ '%azerty%'::text)
+   Filter: (t ~~ '%0%'::text)
+   ->  Bitmap Index Scan on trgm_idx
+         Index Cond: (t ~~ '%azerty%'::text)
+(5 rows)
+
+select * from test_trgm where t like '%0%' and t like '%azerty%';
+ t 
+---
+(0 rows)
+
+explain (costs off)
+  select * from test_trgm where t like '%0%' and t like '%az%';
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Bitmap Heap Scan on test_trgm
+   Recheck Cond: ((t ~~ '%0%'::text) AND (t ~~ '%az%'::text))
+   ->  Bitmap Index Scan on trgm_idx
+         Index Cond: ((t ~~ '%0%'::text) AND (t ~~ '%az%'::text))
+(4 rows)
+
+select * from test_trgm where t like '%0%' and t like '%az%';
+ t 
+---
+(0 rows)
+
 create table test2(t text COLLATE "C");
 insert into test2 values ('abcdef');
 insert into test2 values ('quark');
diff --git a/contrib/pg_trgm/sql/pg_trgm.sql b/contrib/pg_trgm/sql/pg_trgm.sql
index 08459e64c3..9cdb6fda14 100644
--- a/contrib/pg_trgm/sql/pg_trgm.sql
+++ b/contrib/pg_trgm/sql/pg_trgm.sql
@@ -55,6 +55,13 @@ select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu098
 select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu1988' order by sml desc, t;
 select count(*) from test_trgm where t ~ '[qwerty]{2}-?[qwerty]{2}';
 
+explain (costs off)
+  select * from test_trgm where t like '%0%' and t like '%azerty%';
+select * from test_trgm where t like '%0%' and t like '%azerty%';
+explain (costs off)
+  select * from test_trgm where t like '%0%' and t like '%az%';
+select * from test_trgm where t like '%0%' and t like '%az%';
+
 create table test2(t text COLLATE "C");
 insert into test2 values ('abcdef');
 insert into test2 values ('quark');
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 3434219dbd..19e68980c3 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -33,6 +33,7 @@
 #include "optimizer/prep.h"
 #include "optimizer/restrictinfo.h"
 #include "utils/lsyscache.h"
+#include "utils/index_selfuncs.h"
 #include "utils/selfuncs.h"
 
 
@@ -973,6 +974,24 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
 			return NIL;
 	}
 
+	/*
+	 * GIN access method can require a full index scan.  However, if there are
+	 * multiple AND-ed quals and at least one of them doesn't require a full
+	 * index scan, we can avoid the full scan by dropping all the quals
+	 * requiring it and let the recheck do the proper filtering.
+	 */
+	if (index_clauses != NIL && list_length(index_clauses) > 1 &&
+			index->relam == GIN_AM_OID)
+	{
+		Relids old_outer_relids = bms_copy(outer_relids);
+
+		bms_free(outer_relids);
+		outer_relids = bms_copy(rel->lateral_relids);
+
+		index_clauses = gin_get_optimizable_quals(root, index, index_clauses,
+				&outer_relids, old_outer_relids);
+	}
+
 	/* We do not want the index's rel itself listed in outer_relids */
 	outer_relids = bms_del_member(outer_relids, rel->relid);
 	/* Enforce convention that outer_relids is exactly NULL if empty */
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 12d30d7d63..dbab64a329 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -6671,6 +6671,97 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
 	*indexPages = dataPagesFetched;
 }
 
+/*
+ * Given a list of implicitly AND-ed quals, return the cured list of quals that
+ * can be used for a BitmapIndexScan that would not require a full index scan
+ * if any, otherwise the original list of quals.
+ */
+List *gin_get_optimizable_quals(PlannerInfo *root, IndexOptInfo *index,
+		List *index_clauses, Relids *outer_relids, Relids old_outer_relids)
+{
+	List	   *result = NIL;
+	GinQualCounts counts;
+	bool		matchPossible,
+				haveOneFullScan = false;
+	ListCell   *lc;
+
+	Assert(index->relam == GIN_AM_OID);
+
+	foreach(lc, index_clauses)
+	{
+		IndexClause *iclause = lfirst_node(IndexClause, lc);
+		IndexClause *newiclause = makeNode(IndexClause);
+		ListCell   *lc2;
+
+		memset(&counts, 0, sizeof(counts));
+
+		newiclause->rinfo = iclause->rinfo;
+		newiclause->indexquals = NIL;
+		newiclause->lossy = iclause->lossy;
+		newiclause->indexcol = iclause->indexcol;
+		newiclause->indexcols = iclause->indexcols;
+
+		foreach(lc2, iclause->indexquals)
+		{
+			RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
+			Expr	   *clause = rinfo->clause;
+
+			if (IsA(clause, OpExpr))
+			{
+				matchPossible = gincost_opexpr(root,
+											   index,
+											   iclause->indexcol,
+											   (OpExpr *) clause,
+											   &counts);
+				if (!matchPossible)
+					break;
+			}
+			else if (IsA(clause, ScalarArrayOpExpr))
+			{
+				double numEntries = 1;
+
+				matchPossible = gincost_scalararrayopexpr(root,
+														  index,
+														  iclause->indexcol,
+														  (ScalarArrayOpExpr *) clause,
+														  numEntries,
+														  &counts);
+				if (!matchPossible)
+					break;
+			}
+			else
+			{
+				/* shouldn't be anything else for a GIN index */
+				elog(ERROR, "unsupported GIN indexqual type: %d",
+					 (int) nodeTag(clause));
+			}
+
+			if (counts.haveFullScan)
+				haveOneFullScan = true;
+			else
+			{
+				newiclause->indexquals = lappend(newiclause->indexquals, rinfo);
+				*outer_relids = bms_add_members(*outer_relids,
+											   rinfo->clause_relids);
+			}
+		}
+
+		if (list_length(newiclause->indexquals))
+			result = lappend(result, newiclause);
+	}
+
+	if (!matchPossible || !haveOneFullScan || result == NIL)
+	{
+		bms_free(*outer_relids);
+		*outer_relids = old_outer_relids;
+		list_free(result);
+
+		return index_clauses;
+	}
+
+	return result;
+}
+
 /*
  * BRIN has search behavior completely different from other index types
  */
diff --git a/src/include/utils/index_selfuncs.h b/src/include/utils/index_selfuncs.h
index c100b4352a..0364e3c88d 100644
--- a/src/include/utils/index_selfuncs.h
+++ b/src/include/utils/index_selfuncs.h
@@ -20,6 +20,7 @@
 #define INDEX_SELFUNCS_H
 
 #include "access/amapi.h"
+#include "nodes/pathnodes.h"
 
 /* Functions in selfuncs.c */
 extern void brincostestimate(struct PlannerInfo *root,
@@ -70,5 +71,10 @@ extern void gincostestimate(struct PlannerInfo *root,
 				Selectivity *indexSelectivity,
 				double *indexCorrelation,
 				double *indexPages);
+extern List *gin_get_optimizable_quals(struct PlannerInfo *root,
+				IndexOptInfo *index,
+				List *index_clauses,
+				Relids *outer_relids,
+				Relids old_outer_relids);
 
 #endif							/* INDEX_SELFUNCS_H */

Reply via email to