On Mon, Oct 26, 2020 at 12:02 PM Julien Rouhaud <rjuju...@gmail.com> wrote: > > On Mon, Oct 26, 2020 at 5:03 AM Tom Lane <t...@sss.pgh.pa.us> wrote: > > > > Julien Rouhaud <rjuju...@gmail.com> writes: > > > A french user recently complained that with an index created using > > > gin_trgm_ops (or gist_trgm_ops), you can use the index with a clause > > > like > > > col LIKE 'something' > > > but not > > > col = 'something' > > > > Huh, I'd supposed we did that already. > > > > > even though both clauses are technically identical. That's clearly > > > not a high priority thing to support, but looking at the code it seems > > > to me that this could be achieved quite simply: just adding a new > > > operator = in the opclass, with an operator strategy number that falls > > > back doing exactly what LikeStrategyNumber is doing and that's it. > > > There shouldn't be any wrong results, even using wildcards as the > > > recheck will remove any incorrect one. > > > > I think you may be overoptimistic about being able to use the identical > > code path without regard for LIKE wildcards; but certainly it should be > > possible to do this with not a lot of new code. +1. > > Well, that's what I was thinking too, but I tried all the possible > wildcard combinations I could think of and I couldn't find any case > yielding wrong results. As far as I can see the index scans return at > least all the required rows, and all extraneous rows are correctly > removed either by heap recheck or index recheck. > > I'm attaching a patch POC pach with regression tests covering those > combinations. I also found a typo in the 1.4--1.5 pg_trgm upgrade > script, so I'm also attaching a patch for that.
Oops, I forgot to git-add the 1.5--1.6.sql upgrade script in the previous patch.
From 3539eb386b5f15dc3c454cef4fee210d014bd91e Mon Sep 17 00:00:00 2001 From: Julien Rouhaud <julien.rouh...@free.fr> Date: Mon, 26 Oct 2020 11:29:45 +0800 Subject: [PATCH v2 1/2] Fix typo in 1.4--1.5 pg_trm upgrade script --- contrib/pg_trgm/pg_trgm--1.4--1.5.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/pg_trgm/pg_trgm--1.4--1.5.sql b/contrib/pg_trgm/pg_trgm--1.4--1.5.sql index 284f88d325..db122fce0f 100644 --- a/contrib/pg_trgm/pg_trgm--1.4--1.5.sql +++ b/contrib/pg_trgm/pg_trgm--1.4--1.5.sql @@ -1,4 +1,4 @@ -/* contrib/pg_trgm/pg_trgm--1.5--1.5.sql */ +/* contrib/pg_trgm/pg_trgm--1.4--1.5.sql */ -- complain if script is sourced in psql, rather than via ALTER EXTENSION \echo Use "ALTER EXTENSION pg_trgm UPDATE TO '1.5'" to load this file. \quit -- 2.28.0
From 38c6727c16814640fe9e3be0bc019a67642f7c56 Mon Sep 17 00:00:00 2001 From: Julien Rouhaud <julien.rouh...@free.fr> Date: Mon, 26 Oct 2020 11:28:36 +0800 Subject: [PATCH v2 2/2] Handle = operator in pg_trgm. --- contrib/pg_trgm/Makefile | 2 +- contrib/pg_trgm/expected/pg_trgm.out | 204 +++++++++++++++++++++++++- contrib/pg_trgm/pg_trgm--1.5--1.6.sql | 11 ++ contrib/pg_trgm/pg_trgm.control | 2 +- contrib/pg_trgm/sql/pg_trgm.sql | 40 +++++ contrib/pg_trgm/trgm.h | 1 + contrib/pg_trgm/trgm_gin.c | 3 + contrib/pg_trgm/trgm_gist.c | 2 + doc/src/sgml/pgtrgm.sgml | 7 +- 9 files changed, 263 insertions(+), 9 deletions(-) create mode 100644 contrib/pg_trgm/pg_trgm--1.5--1.6.sql diff --git a/contrib/pg_trgm/Makefile b/contrib/pg_trgm/Makefile index d75e9ada2e..1fbdc9ec1e 100644 --- a/contrib/pg_trgm/Makefile +++ b/contrib/pg_trgm/Makefile @@ -9,7 +9,7 @@ OBJS = \ trgm_regexp.o EXTENSION = pg_trgm -DATA = pg_trgm--1.4--1.5.sql pg_trgm--1.3--1.4.sql \ +DATA = pg_trgm--1.5--1.6.sql pg_trgm--1.4--1.5.sql pg_trgm--1.3--1.4.sql \ pg_trgm--1.3.sql pg_trgm--1.2--1.3.sql pg_trgm--1.1--1.2.sql \ pg_trgm--1.0--1.1.sql PGFILEDESC = "pg_trgm - trigram matching" diff --git a/contrib/pg_trgm/expected/pg_trgm.out b/contrib/pg_trgm/expected/pg_trgm.out index 923c326c7b..20141ce7f3 100644 --- a/contrib/pg_trgm/expected/pg_trgm.out +++ b/contrib/pg_trgm/expected/pg_trgm.out @@ -4761,6 +4761,12 @@ insert into test2 values ('abcdef'); insert into test2 values ('quark'); insert into test2 values (' z foo bar'); insert into test2 values ('/123/-45/'); +insert into test2 values ('line 1'); +insert into test2 values ('%line 2'); +insert into test2 values ('line 3%'); +insert into test2 values ('%line 4%'); +insert into test2 values ('%li%ne 5%'); +insert into test2 values ('li_e 6'); create index test2_idx_gin on test2 using gin (t gin_trgm_ops); set enable_seqscan=off; explain (costs off) @@ -4863,7 +4869,13 @@ select * from test2 where t ~ '(abc)*$'; quark z foo bar /123/-45/ -(4 rows) + line 1 + %line 2 + line 3% + %line 4% + %li%ne 5% + li_e 6 +(10 rows) select * from test2 where t ~* 'DEF'; t @@ -4918,7 +4930,11 @@ select * from test2 where t ~ '[a-z]{3}'; abcdef quark z foo bar -(3 rows) + line 1 + %line 2 + line 3% + %line 4% +(7 rows) select * from test2 where t ~* '(a{10}|b{10}|c{10}){10}'; t @@ -4961,6 +4977,93 @@ select * from test2 where t ~ '/\d+/-\d'; /123/-45/ (1 row) +-- test = operator +explain (costs off) + select * from test2 where t = 'abcdef'; + QUERY PLAN +------------------------------------------ + Bitmap Heap Scan on test2 + Recheck Cond: (t = 'abcdef'::text) + -> Bitmap Index Scan on test2_idx_gin + Index Cond: (t = 'abcdef'::text) +(4 rows) + +select * from test2 where t = 'abcdef'; + t +-------- + abcdef +(1 row) + +explain (costs off) + select * from test2 where t = '%line%'; + QUERY PLAN +------------------------------------------ + Bitmap Heap Scan on test2 + Recheck Cond: (t = '%line%'::text) + -> Bitmap Index Scan on test2_idx_gin + Index Cond: (t = '%line%'::text) +(4 rows) + +select * from test2 where t = '%line%'; + t +--- +(0 rows) + +select * from test2 where t = 'li_e 1'; + t +--- +(0 rows) + +select * from test2 where t = '%line 2'; + t +--------- + %line 2 +(1 row) + +select * from test2 where t = 'line 3%'; + t +--------- + line 3% +(1 row) + +select * from test2 where t = '%line 3%'; + t +--- +(0 rows) + +select * from test2 where t = '%line 4%'; + t +---------- + %line 4% +(1 row) + +select * from test2 where t = '%line 5%'; + t +--- +(0 rows) + +select * from test2 where t = '%li_ne 5%'; + t +--- +(0 rows) + +select * from test2 where t = '%li%ne 5%'; + t +----------- + %li%ne 5% +(1 row) + +select * from test2 where t = 'line 6'; + t +--- +(0 rows) + +select * from test2 where t = 'li_e 6'; + t +-------- + li_e 6 +(1 row) + drop index test2_idx_gin; create index test2_idx_gist on test2 using gist (t gist_trgm_ops); set enable_seqscan=off; @@ -5056,7 +5159,13 @@ select * from test2 where t ~ '(abc)*$'; quark z foo bar /123/-45/ -(4 rows) + line 1 + %line 2 + line 3% + %line 4% + %li%ne 5% + li_e 6 +(10 rows) select * from test2 where t ~* 'DEF'; t @@ -5111,7 +5220,11 @@ select * from test2 where t ~ '[a-z]{3}'; abcdef quark z foo bar -(3 rows) + line 1 + %line 2 + line 3% + %line 4% +(7 rows) select * from test2 where t ~* '(a{10}|b{10}|c{10}){10}'; t @@ -5154,6 +5267,89 @@ select * from test2 where t ~ '/\d+/-\d'; /123/-45/ (1 row) +-- test = operator +explain (costs off) + select * from test2 where t = 'abcdef'; + QUERY PLAN +------------------------------------------ + Index Scan using test2_idx_gist on test2 + Index Cond: (t = 'abcdef'::text) +(2 rows) + +select * from test2 where t = 'abcdef'; + t +-------- + abcdef +(1 row) + +explain (costs off) + select * from test2 where t = '%line%'; + QUERY PLAN +------------------------------------------ + Index Scan using test2_idx_gist on test2 + Index Cond: (t = '%line%'::text) +(2 rows) + +select * from test2 where t = '%line%'; + t +--- +(0 rows) + +select * from test2 where t = 'li_e 1'; + t +--- +(0 rows) + +select * from test2 where t = '%line 2'; + t +--------- + %line 2 +(1 row) + +select * from test2 where t = 'line 3%'; + t +--------- + line 3% +(1 row) + +select * from test2 where t = '%line 3%'; + t +--- +(0 rows) + +select * from test2 where t = '%line 4%'; + t +---------- + %line 4% +(1 row) + +select * from test2 where t = '%line 5%'; + t +--- +(0 rows) + +select * from test2 where t = '%li_ne 5%'; + t +--- +(0 rows) + +select * from test2 where t = '%li%ne 5%'; + t +----------- + %li%ne 5% +(1 row) + +select * from test2 where t = 'line 6'; + t +--- +(0 rows) + +select * from test2 where t = 'li_e 6'; + t +-------- + li_e 6 +(1 row) + -- Check similarity threshold (bug #14202) CREATE TEMP TABLE restaurants (city text); INSERT INTO restaurants SELECT 'Warsaw' FROM generate_series(1, 10000); diff --git a/contrib/pg_trgm/pg_trgm--1.5--1.6.sql b/contrib/pg_trgm/pg_trgm--1.5--1.6.sql new file mode 100644 index 0000000000..1e6b571a4a --- /dev/null +++ b/contrib/pg_trgm/pg_trgm--1.5--1.6.sql @@ -0,0 +1,11 @@ +/* contrib/pg_trgm/pg_trgm--1.5--1.6.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION pg_trgm UPDATE TO '1.6'" to load this file. \quit + + +ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD + OPERATOR 11 pg_catalog.= (text, text); + +ALTER OPERATOR FAMILY gist_trgm_ops USING gist ADD + OPERATOR 11 pg_catalog.= (text, text); diff --git a/contrib/pg_trgm/pg_trgm.control b/contrib/pg_trgm/pg_trgm.control index ed4487e96b..1d6a9ddf25 100644 --- a/contrib/pg_trgm/pg_trgm.control +++ b/contrib/pg_trgm/pg_trgm.control @@ -1,6 +1,6 @@ # pg_trgm extension comment = 'text similarity measurement and index searching based on trigrams' -default_version = '1.5' +default_version = '1.6' module_pathname = '$libdir/pg_trgm' relocatable = true trusted = true diff --git a/contrib/pg_trgm/sql/pg_trgm.sql b/contrib/pg_trgm/sql/pg_trgm.sql index bc2a6d525c..6a9da24d5a 100644 --- a/contrib/pg_trgm/sql/pg_trgm.sql +++ b/contrib/pg_trgm/sql/pg_trgm.sql @@ -101,6 +101,12 @@ insert into test2 values ('abcdef'); insert into test2 values ('quark'); insert into test2 values (' z foo bar'); insert into test2 values ('/123/-45/'); +insert into test2 values ('line 1'); +insert into test2 values ('%line 2'); +insert into test2 values ('line 3%'); +insert into test2 values ('%line 4%'); +insert into test2 values ('%li%ne 5%'); +insert into test2 values ('li_e 6'); create index test2_idx_gin on test2 using gin (t gin_trgm_ops); set enable_seqscan=off; explain (costs off) @@ -137,6 +143,23 @@ select * from test2 where t ~ ' z foo bar'; select * from test2 where t ~ ' z foo'; select * from test2 where t ~ 'qua(?!foo)'; select * from test2 where t ~ '/\d+/-\d'; +-- test = operator +explain (costs off) + select * from test2 where t = 'abcdef'; +select * from test2 where t = 'abcdef'; +explain (costs off) + select * from test2 where t = '%line%'; +select * from test2 where t = '%line%'; +select * from test2 where t = 'li_e 1'; +select * from test2 where t = '%line 2'; +select * from test2 where t = 'line 3%'; +select * from test2 where t = '%line 3%'; +select * from test2 where t = '%line 4%'; +select * from test2 where t = '%line 5%'; +select * from test2 where t = '%li_ne 5%'; +select * from test2 where t = '%li%ne 5%'; +select * from test2 where t = 'line 6'; +select * from test2 where t = 'li_e 6'; drop index test2_idx_gin; create index test2_idx_gist on test2 using gist (t gist_trgm_ops); @@ -175,6 +198,23 @@ select * from test2 where t ~ ' z foo bar'; select * from test2 where t ~ ' z foo'; select * from test2 where t ~ 'qua(?!foo)'; select * from test2 where t ~ '/\d+/-\d'; +-- test = operator +explain (costs off) + select * from test2 where t = 'abcdef'; +select * from test2 where t = 'abcdef'; +explain (costs off) + select * from test2 where t = '%line%'; +select * from test2 where t = '%line%'; +select * from test2 where t = 'li_e 1'; +select * from test2 where t = '%line 2'; +select * from test2 where t = 'line 3%'; +select * from test2 where t = '%line 3%'; +select * from test2 where t = '%line 4%'; +select * from test2 where t = '%line 5%'; +select * from test2 where t = '%li_ne 5%'; +select * from test2 where t = '%li%ne 5%'; +select * from test2 where t = 'line 6'; +select * from test2 where t = 'li_e 6'; -- Check similarity threshold (bug #14202) diff --git a/contrib/pg_trgm/trgm.h b/contrib/pg_trgm/trgm.h index b616953462..405a1d9552 100644 --- a/contrib/pg_trgm/trgm.h +++ b/contrib/pg_trgm/trgm.h @@ -37,6 +37,7 @@ #define WordDistanceStrategyNumber 8 #define StrictWordSimilarityStrategyNumber 9 #define StrictWordDistanceStrategyNumber 10 +#define EqualStrategyNumber 11 typedef char trgm[3]; diff --git a/contrib/pg_trgm/trgm_gin.c b/contrib/pg_trgm/trgm_gin.c index 4dbf0ffb68..fc0179b5ca 100644 --- a/contrib/pg_trgm/trgm_gin.c +++ b/contrib/pg_trgm/trgm_gin.c @@ -97,6 +97,7 @@ gin_extract_query_trgm(PG_FUNCTION_ARGS) #endif /* FALL THRU */ case LikeStrategyNumber: + case EqualStrategyNumber: /* * For wildcard search we extract all the trigrams that every @@ -221,6 +222,7 @@ gin_trgm_consistent(PG_FUNCTION_ARGS) #endif /* FALL THRU */ case LikeStrategyNumber: + case EqualStrategyNumber: /* Check if all extracted trigrams are presented. */ res = true; for (i = 0; i < nkeys; i++) @@ -306,6 +308,7 @@ gin_trgm_triconsistent(PG_FUNCTION_ARGS) #endif /* FALL THRU */ case LikeStrategyNumber: + case EqualStrategyNumber: /* Check if all extracted trigrams are presented. */ res = GIN_MAYBE; for (i = 0; i < nkeys; i++) diff --git a/contrib/pg_trgm/trgm_gist.c b/contrib/pg_trgm/trgm_gist.c index 9937ef9253..70e8a8ebcf 100644 --- a/contrib/pg_trgm/trgm_gist.c +++ b/contrib/pg_trgm/trgm_gist.c @@ -241,6 +241,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS) #endif /* FALL THRU */ case LikeStrategyNumber: + case EqualStrategyNumber: qtrg = generate_wildcard_trgm(VARDATA(query), querysize - VARHDRSZ); break; @@ -338,6 +339,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS) #endif /* FALL THRU */ case LikeStrategyNumber: + case EqualStrategyNumber: /* Wildcard search is inexact */ *recheck = true; diff --git a/doc/src/sgml/pgtrgm.sgml b/doc/src/sgml/pgtrgm.sgml index 5365b0681e..3ce865b076 100644 --- a/doc/src/sgml/pgtrgm.sgml +++ b/doc/src/sgml/pgtrgm.sgml @@ -419,9 +419,10 @@ the purpose of very fast similarity searches. These index types support the above-described similarity operators, and additionally support trigram-based index searches for <literal>LIKE</literal>, <literal>ILIKE</literal>, - <literal>~</literal> and <literal>~*</literal> queries. (These indexes do not - support equality nor simple comparison operators, so you may need a - regular B-tree index too.) + <literal>~</literal> and <literal>~*</literal> queries. Beginning in + <productname>PostgreSQL</productname> 14, these indexes also support + equality and simple comparison operators, so extraneous regular B-tree + indexes are not needed anymore. </para> <para> -- 2.28.0