Hi, Marc (in Cc) reported me a problematic query using a GIN index hit in production. The issue is that even if an GIN opclass says that the index can be used for an operator, it's still possible that some values aren't really compatible and requires a full index scan.
One simple example is with a GIN pg_trgm index (but other opclasses have similar restrictions) , doing a LIKE with wildcard on both side, where the pattern is shorter than a trigram, e.g. col LIKE '%a%'. So, a where clause of the form: WHERE col LIKE '%verylongpattern%' AND col LIKE '%a%' is much more expensive than WHERE col LKE '%verylongpattern%' While there's nothing to do if the unhandled const is the only predicate, if there are multiple AND-ed predicates and at least one of them doesn't require a full index scan, we can avoid it. Attached patch tries to fix the issue by detecting such cases and dropping the unhandled quals in the BitmapIndexScan, letting the recheck in BitmapHeapScan do the proper filtering. I'm not happy to call the extractQuery support functions an additional time, but i didn't find a cleaner way. This is of course intended for pg13.
diff --git a/contrib/pg_trgm/expected/pg_trgm.out b/contrib/pg_trgm/expected/pg_trgm.out index b3e709f496..6d96fca65f 100644 --- a/contrib/pg_trgm/expected/pg_trgm.out +++ b/contrib/pg_trgm/expected/pg_trgm.out @@ -3498,6 +3498,37 @@ select count(*) from test_trgm where t ~ '[qwerty]{2}-?[qwerty]{2}'; 1000 (1 row) +explain (costs off) + select * from test_trgm where t like '%0%' and t like '%azerty%'; + QUERY PLAN +--------------------------------------------- + Bitmap Heap Scan on test_trgm + Recheck Cond: (t ~~ '%azerty%'::text) + Filter: (t ~~ '%0%'::text) + -> Bitmap Index Scan on trgm_idx + Index Cond: (t ~~ '%azerty%'::text) +(5 rows) + +select * from test_trgm where t like '%0%' and t like '%azerty%'; + t +--- +(0 rows) + +explain (costs off) + select * from test_trgm where t like '%0%' and t like '%az%'; + QUERY PLAN +------------------------------------------------------------------ + Bitmap Heap Scan on test_trgm + Recheck Cond: ((t ~~ '%0%'::text) AND (t ~~ '%az%'::text)) + -> Bitmap Index Scan on trgm_idx + Index Cond: ((t ~~ '%0%'::text) AND (t ~~ '%az%'::text)) +(4 rows) + +select * from test_trgm where t like '%0%' and t like '%az%'; + t +--- +(0 rows) + create table test2(t text COLLATE "C"); insert into test2 values ('abcdef'); insert into test2 values ('quark'); diff --git a/contrib/pg_trgm/sql/pg_trgm.sql b/contrib/pg_trgm/sql/pg_trgm.sql index 08459e64c3..9cdb6fda14 100644 --- a/contrib/pg_trgm/sql/pg_trgm.sql +++ b/contrib/pg_trgm/sql/pg_trgm.sql @@ -55,6 +55,13 @@ select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu098 select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu1988' order by sml desc, t; select count(*) from test_trgm where t ~ '[qwerty]{2}-?[qwerty]{2}'; +explain (costs off) + select * from test_trgm where t like '%0%' and t like '%azerty%'; +select * from test_trgm where t like '%0%' and t like '%azerty%'; +explain (costs off) + select * from test_trgm where t like '%0%' and t like '%az%'; +select * from test_trgm where t like '%0%' and t like '%az%'; + create table test2(t text COLLATE "C"); insert into test2 values ('abcdef'); insert into test2 values ('quark'); diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 3434219dbd..19e68980c3 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -33,6 +33,7 @@ #include "optimizer/prep.h" #include "optimizer/restrictinfo.h" #include "utils/lsyscache.h" +#include "utils/index_selfuncs.h" #include "utils/selfuncs.h" @@ -973,6 +974,24 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel, return NIL; } + /* + * GIN access method can require a full index scan. However, if there are + * multiple AND-ed quals and at least one of them doesn't require a full + * index scan, we can avoid the full scan by dropping all the quals + * requiring it and let the recheck do the proper filtering. + */ + if (index_clauses != NIL && list_length(index_clauses) > 1 && + index->relam == GIN_AM_OID) + { + Relids old_outer_relids = bms_copy(outer_relids); + + bms_free(outer_relids); + outer_relids = bms_copy(rel->lateral_relids); + + index_clauses = gin_get_optimizable_quals(root, index, index_clauses, + &outer_relids, old_outer_relids); + } + /* We do not want the index's rel itself listed in outer_relids */ outer_relids = bms_del_member(outer_relids, rel->relid); /* Enforce convention that outer_relids is exactly NULL if empty */ diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 12d30d7d63..dbab64a329 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -6671,6 +6671,97 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, *indexPages = dataPagesFetched; } +/* + * Given a list of implicitly AND-ed quals, return the cured list of quals that + * can be used for a BitmapIndexScan that would not require a full index scan + * if any, otherwise the original list of quals. + */ +List *gin_get_optimizable_quals(PlannerInfo *root, IndexOptInfo *index, + List *index_clauses, Relids *outer_relids, Relids old_outer_relids) +{ + List *result = NIL; + GinQualCounts counts; + bool matchPossible, + haveOneFullScan = false; + ListCell *lc; + + Assert(index->relam == GIN_AM_OID); + + foreach(lc, index_clauses) + { + IndexClause *iclause = lfirst_node(IndexClause, lc); + IndexClause *newiclause = makeNode(IndexClause); + ListCell *lc2; + + memset(&counts, 0, sizeof(counts)); + + newiclause->rinfo = iclause->rinfo; + newiclause->indexquals = NIL; + newiclause->lossy = iclause->lossy; + newiclause->indexcol = iclause->indexcol; + newiclause->indexcols = iclause->indexcols; + + foreach(lc2, iclause->indexquals) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2); + Expr *clause = rinfo->clause; + + if (IsA(clause, OpExpr)) + { + matchPossible = gincost_opexpr(root, + index, + iclause->indexcol, + (OpExpr *) clause, + &counts); + if (!matchPossible) + break; + } + else if (IsA(clause, ScalarArrayOpExpr)) + { + double numEntries = 1; + + matchPossible = gincost_scalararrayopexpr(root, + index, + iclause->indexcol, + (ScalarArrayOpExpr *) clause, + numEntries, + &counts); + if (!matchPossible) + break; + } + else + { + /* shouldn't be anything else for a GIN index */ + elog(ERROR, "unsupported GIN indexqual type: %d", + (int) nodeTag(clause)); + } + + if (counts.haveFullScan) + haveOneFullScan = true; + else + { + newiclause->indexquals = lappend(newiclause->indexquals, rinfo); + *outer_relids = bms_add_members(*outer_relids, + rinfo->clause_relids); + } + } + + if (list_length(newiclause->indexquals)) + result = lappend(result, newiclause); + } + + if (!matchPossible || !haveOneFullScan || result == NIL) + { + bms_free(*outer_relids); + *outer_relids = old_outer_relids; + list_free(result); + + return index_clauses; + } + + return result; +} + /* * BRIN has search behavior completely different from other index types */ diff --git a/src/include/utils/index_selfuncs.h b/src/include/utils/index_selfuncs.h index c100b4352a..0364e3c88d 100644 --- a/src/include/utils/index_selfuncs.h +++ b/src/include/utils/index_selfuncs.h @@ -20,6 +20,7 @@ #define INDEX_SELFUNCS_H #include "access/amapi.h" +#include "nodes/pathnodes.h" /* Functions in selfuncs.c */ extern void brincostestimate(struct PlannerInfo *root, @@ -70,5 +71,10 @@ extern void gincostestimate(struct PlannerInfo *root, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages); +extern List *gin_get_optimizable_quals(struct PlannerInfo *root, + IndexOptInfo *index, + List *index_clauses, + Relids *outer_relids, + Relids old_outer_relids); #endif /* INDEX_SELFUNCS_H */