On Tue, 6 Mar 2018 19:27:21 +0300 Arthur Zakirov <a.zaki...@postgrespro.ru> wrote:
> On Mon, Feb 19, 2018 at 05:19:15PM +0300, Ildus Kurbangaliev wrote: > > At brief look at this place seems better to move this block into > > pattern_fixed_prefix function. But there is also `vartype` variable > > which used to in prefix construction, and it would require pass this > > variable too. And since pattern_fixed_prefix called in 10 other > > places and vartype is required only for this ptype it seems better > > just keep this block outside of this function. > > Understood. > > > I've added documentation in current version of the patch. > > Thank you. > > Can you rebase the patch due to changes within pg_proc.h? > > Also here > > + <para> > + There is also the prefix operator <literal>^@</literal> and > corresponding > + <literal>text_startswith</literal> function which covers cases > when only > + searching by beginning of the string is needed. > + </para> > > I think text_startswith should be enclosed with the <function> tag. > I'm not sure, but I think <literal> used for operators, keywords, > etc. I haven't found a manual which describes how to use tags, but > after looking at the documentation where <function> is used, I think > that for function <function> should be used. > Hi, thanks for the review. I've fixed documentation as you said and also rebased to current master. -- --- Ildus Kurbangaliev Postgres Professional: http://www.postgrespro.com Russian Postgres Company
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 2f59af25a6..4dc11d8df2 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -2274,6 +2274,21 @@ <entry><literal>ph</literal></entry> </row> + <row> + <entry> + <indexterm> + <primary>text_startswith</primary> + </indexterm> + <literal><function>text_startswith(<parameter>string</parameter>, <parameter>prefix</parameter>)</function></literal> + </entry> + <entry><type>bool</type></entry> + <entry> + Returns true if <parameter>string</parameter> starts with <parameter>prefix</parameter>. + </entry> + <entry><literal>text_startswith('alphabet', 'alph')</literal></entry> + <entry><literal>t</literal></entry> + </row> + <row> <entry> <indexterm> @@ -4033,6 +4048,12 @@ cast(-44 as bit(12)) <lineannotation>111111010100</lineannotation> ILIKE</function>, respectively. All of these operators are <productname>PostgreSQL</productname>-specific. </para> + + <para> + There is also the prefix operator <literal>^@</literal> and corresponding + <function>text_startswith</function> function which covers cases when only + searching by beginning of the string is needed. + </para> </sect2> diff --git a/doc/src/sgml/spgist.sgml b/doc/src/sgml/spgist.sgml index e47f70be89..06b7519052 100644 --- a/doc/src/sgml/spgist.sgml +++ b/doc/src/sgml/spgist.sgml @@ -161,6 +161,7 @@ <literal>~<~</literal> <literal>~>=~</literal> <literal>~>~</literal> + <literal>^@</literal> </entry> </row> <row> diff --git a/src/backend/access/spgist/spgtextproc.c b/src/backend/access/spgist/spgtextproc.c index f156b2166e..5beb9e33a1 100644 --- a/src/backend/access/spgist/spgtextproc.c +++ b/src/backend/access/spgist/spgtextproc.c @@ -496,7 +496,7 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS) * well end with a partial multibyte character, so that applying * any encoding-sensitive test to it would be risky anyhow.) */ - if (strategy > 10) + if (strategy > 10 && strategy != RTPrefixStrategyNumber) { if (collate_is_c) strategy -= 10; @@ -526,6 +526,10 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS) if (r < 0) res = false; break; + case RTPrefixStrategyNumber: + if (r != 0) + res = false; + break; default: elog(ERROR, "unrecognized strategy number: %d", in->scankeys[j].sk_strategy); @@ -601,10 +605,21 @@ spg_text_leaf_consistent(PG_FUNCTION_ARGS) for (j = 0; j < in->nkeys; j++) { StrategyNumber strategy = in->scankeys[j].sk_strategy; - text *query = DatumGetTextPP(in->scankeys[j].sk_argument); - int queryLen = VARSIZE_ANY_EXHDR(query); + text *query; + int queryLen; int r; + /* for this strategy collation is not important */ + if (strategy == RTPrefixStrategyNumber) + { + res = DatumGetBool(DirectFunctionCall2(text_startswith, + out->leafValue, in->scankeys[j].sk_argument)); + goto next; + } + + query = DatumGetTextPP(in->scankeys[j].sk_argument); + queryLen = VARSIZE_ANY_EXHDR(query); + if (strategy > 10) { /* Collation-aware comparison */ @@ -655,6 +670,7 @@ spg_text_leaf_consistent(PG_FUNCTION_ARGS) break; } +next: if (!res) break; /* no need to consider remaining conditions */ } diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index bf240aa9c5..50b2583ca0 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -1317,8 +1317,18 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate) * right cache key, so that they can be re-used at runtime. */ patt = (Const *) other; - pstatus = pattern_fixed_prefix(patt, ptype, collation, - &prefix, &rest_selec); + + if (ptype == Pattern_Type_Prefix) + { + char *s = TextDatumGetCString(constval); + prefix = string_to_const(s, vartype); + pstatus = Pattern_Prefix_Partial; + rest_selec = 1.0; /* all */ + pfree(s); + } + else + pstatus = pattern_fixed_prefix(patt, ptype, collation, + &prefix, &rest_selec); /* * If necessary, coerce the prefix constant to the right type. @@ -1488,6 +1498,16 @@ likesel(PG_FUNCTION_ARGS) } /* + * prefixsel - selectivity of prefix operator + */ +Datum +prefixsel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Prefix, false)); +} + +/* + * * iclikesel - Selectivity of ILIKE pattern match. */ Datum @@ -2906,6 +2926,15 @@ likejoinsel(PG_FUNCTION_ARGS) PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false)); } +/* + * prefixjoinsel - Join selectivity of prefix operator + */ +Datum +prefixjoinsel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Prefix, false)); +} + /* * iclikejoinsel - Join selectivity of ILIKE pattern match. */ diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 4346410d5a..cb721e9a57 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -1761,6 +1761,34 @@ text_ge(PG_FUNCTION_ARGS) PG_RETURN_BOOL(result); } +Datum +text_startswith(PG_FUNCTION_ARGS) +{ + Datum arg1 = PG_GETARG_DATUM(0); + Datum arg2 = PG_GETARG_DATUM(1); + bool result; + Size len1, + len2; + + len1 = toast_raw_datum_size(arg1); + len2 = toast_raw_datum_size(arg2); + if (len2 > len1) + result = false; + else + { + text *targ1 = DatumGetTextPP(arg1); + text *targ2 = DatumGetTextPP(arg2); + + result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2), + len2 - VARHDRSZ) == 0); + + PG_FREE_IF_COPY(targ1, 0); + PG_FREE_IF_COPY(targ2, 1); + } + + PG_RETURN_BOOL(result); +} + Datum bttextcmp(PG_FUNCTION_ARGS) { diff --git a/src/include/access/stratnum.h b/src/include/access/stratnum.h index bddfac4c10..0db11a1117 100644 --- a/src/include/access/stratnum.h +++ b/src/include/access/stratnum.h @@ -68,8 +68,9 @@ typedef uint16 StrategyNumber; #define RTSubEqualStrategyNumber 25 /* for inet <<= */ #define RTSuperStrategyNumber 26 /* for inet << */ #define RTSuperEqualStrategyNumber 27 /* for inet >>= */ +#define RTPrefixStrategyNumber 28 /* for text ^@ */ -#define RTMaxStrategyNumber 27 +#define RTMaxStrategyNumber 28 #endif /* STRATNUM_H */ diff --git a/src/include/catalog/pg_amop.h b/src/include/catalog/pg_amop.h index 03af581df4..7f7ffcfecb 100644 --- a/src/include/catalog/pg_amop.h +++ b/src/include/catalog/pg_amop.h @@ -795,6 +795,7 @@ DATA(insert ( 4017 25 25 2 s 2315 4000 0 )); DATA(insert ( 4017 25 25 3 s 98 4000 0 )); DATA(insert ( 4017 25 25 4 s 2317 4000 0 )); DATA(insert ( 4017 25 25 5 s 2318 4000 0 )); +DATA(insert ( 4017 25 25 28 s 315 4000 0 )); DATA(insert ( 4017 25 25 11 s 664 4000 0 )); DATA(insert ( 4017 25 25 12 s 665 4000 0 )); DATA(insert ( 4017 25 25 14 s 667 4000 0 )); diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h index e74f963eb5..414a967557 100644 --- a/src/include/catalog/pg_operator.h +++ b/src/include/catalog/pg_operator.h @@ -134,6 +134,8 @@ DESCR("less than"); DATA(insert OID = 98 ( "=" PGNSP PGUID b t t 25 25 16 98 531 texteq eqsel eqjoinsel )); DESCR("equal"); #define TextEqualOperator 98 +DATA(insert OID = 315 ( "^@" PGNSP PGUID b f f 25 25 16 0 0 text_startswith prefixsel prefixjoinsel )); +DESCR("starts with"); DATA(insert OID = 349 ( "||" PGNSP PGUID b f f 2277 2283 2277 0 0 array_append - - )); DESCR("append element onto end of array"); diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 0fdb42f639..4e62efbb46 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -209,6 +209,7 @@ DATA(insert OID = 64 ( int2lt PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 DATA(insert OID = 65 ( int4eq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "23 23" _null_ _null_ _null_ _null_ _null_ int4eq _null_ _null_ _null_ )); DATA(insert OID = 66 ( int4lt PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "23 23" _null_ _null_ _null_ _null_ _null_ int4lt _null_ _null_ _null_ )); DATA(insert OID = 67 ( texteq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "25 25" _null_ _null_ _null_ _null_ _null_ texteq _null_ _null_ _null_ )); +DATA(insert OID = 3450 ( text_startswith PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "25 25" _null_ _null_ _null_ _null_ _null_ text_startswith _null_ _null_ _null_ )); DATA(insert OID = 68 ( xideq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "28 28" _null_ _null_ _null_ _null_ _null_ xideq _null_ _null_ _null_ )); DATA(insert OID = 3308 ( xidneq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "28 28" _null_ _null_ _null_ _null_ _null_ xidneq _null_ _null_ _null_ )); DATA(insert OID = 69 ( cideq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "29 29" _null_ _null_ _null_ _null_ _null_ cideq _null_ _null_ _null_ )); @@ -2568,6 +2569,10 @@ DATA(insert OID = 1828 ( nlikejoinsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0 DESCR("join selectivity of NOT LIKE"); DATA(insert OID = 1829 ( icregexnejoinsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_ _null_ icregexnejoinsel _null_ _null_ _null_ )); DESCR("join selectivity of case-insensitive regex non-match"); +DATA(insert OID = 2579 ( prefixsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 4 0 701 "2281 26 2281 23" _null_ _null_ _null_ _null_ _null_ prefixsel _null_ _null_ _null_ )); +DESCR("restriction selectivity of exact prefix"); +DATA(insert OID = 2580 ( prefixjoinsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_ _null_ prefixjoinsel _null_ _null_ _null_ )); +DESCR("join selectivity of exact prefix"); /* Aggregate-related functions */ DATA(insert OID = 1830 ( float8_avg PGNSP PGUID 12 1 0 0 0 f f f t f i s 1 0 701 "1022" _null_ _null_ _null_ _null_ _null_ float8_avg _null_ _null_ _null_ )); diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index 299c9f846a..95e44280c4 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -87,8 +87,11 @@ typedef struct VariableStatData typedef enum { - Pattern_Type_Like, Pattern_Type_Like_IC, - Pattern_Type_Regex, Pattern_Type_Regex_IC + Pattern_Type_Like, + Pattern_Type_Like_IC, + Pattern_Type_Regex, + Pattern_Type_Regex_IC, + Pattern_Type_Prefix } Pattern_Type; typedef enum diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out index 057faff2e5..6d32d4cb5b 100644 --- a/src/test/regress/expected/create_index.out +++ b/src/test/regress/expected/create_index.out @@ -372,6 +372,12 @@ SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth 48 (1 row) +SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth'; + count +------- + 2 +(1 row) + SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10; f1 ------------------------------------------------- @@ -1182,6 +1188,21 @@ SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth 48 (1 row) +EXPLAIN (COSTS OFF) +SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth'; + QUERY PLAN +------------------------------------------------------------ + Aggregate + -> Index Only Scan using sp_radix_ind on radix_text_tbl + Index Cond: (t ^@ 'Worth'::text) +(3 rows) + +SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth'; + count +------- + 2 +(1 row) + EXPLAIN (COSTS OFF) SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10; QUERY PLAN diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 01608d2c04..919248cdd2 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -718,6 +718,7 @@ sha224(bytea) sha256(bytea) sha384(bytea) sha512(bytea) +text_startswith(text,text) macaddr8_eq(macaddr8,macaddr8) macaddr8_lt(macaddr8,macaddr8) macaddr8_le(macaddr8,macaddr8) @@ -1887,7 +1888,8 @@ ORDER BY 1, 2, 3; 4000 | 25 | <<= 4000 | 26 | >> 4000 | 27 | >>= -(121 rows) + 4000 | 28 | ^@ +(122 rows) -- Check that all opclass search operators have selectivity estimators. -- This is not absolutely required, but it seems a reasonable thing diff --git a/src/test/regress/expected/spgist.out b/src/test/regress/expected/spgist.out index 2d75bbf8dc..0011634b76 100644 --- a/src/test/regress/expected/spgist.out +++ b/src/test/regress/expected/spgist.out @@ -32,6 +32,15 @@ insert into spgist_text_tbl (id, t) select g, 'f' || repeat('o', 100) || g from generate_series(1, 10000) g union all select g, 'baaaaaaaaaaaaaar' || g from generate_series(1, 1000) g; +explain (costs off) select * from spgist_text_tbl where t ^@ 'baaaaaaaaaaaaaar11'; + QUERY PLAN +------------------------------------------------------- + Bitmap Heap Scan on spgist_text_tbl + Recheck Cond: (t ^@ 'baaaaaaaaaaaaaar11'::text) + -> Bitmap Index Scan on spgist_text_idx + Index Cond: (t ^@ 'baaaaaaaaaaaaaar11'::text) +(4 rows) + -- Do a lot of insertions that have to split an existing node. Hopefully -- one of these will cause the page to run out of space, causing the inner -- tuple to be moved to another page. diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql index 7f17588b0d..28c71abd74 100644 --- a/src/test/regress/sql/create_index.sql +++ b/src/test/regress/sql/create_index.sql @@ -224,6 +224,8 @@ SELECT count(*) FROM radix_text_tbl WHERE t > 'Worth SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth St '; +SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth'; + SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10; SELECT circle_center(f1), round(radius(f1)) as radius FROM gcircle_tbl ORDER BY f1 <-> '(200,300)'::point LIMIT 10; @@ -441,6 +443,10 @@ EXPLAIN (COSTS OFF) SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth St '; SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth St '; +EXPLAIN (COSTS OFF) +SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth'; +SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth'; + EXPLAIN (COSTS OFF) SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10; SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10; diff --git a/src/test/regress/sql/spgist.sql b/src/test/regress/sql/spgist.sql index 77b43a2d3e..2b57fc836c 100644 --- a/src/test/regress/sql/spgist.sql +++ b/src/test/regress/sql/spgist.sql @@ -43,6 +43,8 @@ select g, 'f' || repeat('o', 100) || g from generate_series(1, 10000) g union all select g, 'baaaaaaaaaaaaaar' || g from generate_series(1, 1000) g; +explain (costs off) select * from spgist_text_tbl where t ^@ 'baaaaaaaaaaaaaar11'; + -- Do a lot of insertions that have to split an existing node. Hopefully -- one of these will cause the page to run out of space, causing the inner -- tuple to be moved to another page.