On Tue, 6 Mar 2018 19:27:21 +0300
Arthur Zakirov <a.zaki...@postgrespro.ru> wrote:

> On Mon, Feb 19, 2018 at 05:19:15PM +0300, Ildus Kurbangaliev wrote:
> > At brief look at this place seems better to move this block into
> > pattern_fixed_prefix function. But there is also `vartype` variable
> > which used to in prefix construction, and it would require pass this
> > variable too. And since pattern_fixed_prefix called in 10 other
> > places and vartype is required only for this ptype it seems better
> > just keep this block outside of this function.  
> 
> Understood.
> 
> > I've added documentation in current version of the patch.  
> 
> Thank you.
> 
> Can you rebase the patch due to changes within pg_proc.h?
> 
> Also here
> 
> +   <para>
> +    There is also the prefix operator <literal>^@</literal> and
> corresponding
> +    <literal>text_startswith</literal> function which covers cases
> when only
> +    searching by beginning of the string is needed.
> +   </para>
> 
> I think text_startswith should be enclosed with the <function> tag.
> I'm not sure, but I think <literal> used for operators, keywords,
> etc. I haven't found a manual which describes how to use tags, but
> after looking at the documentation where <function> is used, I think
> that for function <function> should be used.
> 

Hi, thanks for the review. I've fixed documentation as you said and
also rebased to current master.

-- 
---
Ildus Kurbangaliev
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 2f59af25a6..4dc11d8df2 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -2274,6 +2274,21 @@
        <entry><literal>ph</literal></entry>
       </row>
 
+      <row>
+       <entry>
+        <indexterm>
+         <primary>text_startswith</primary>
+        </indexterm>
+        <literal><function>text_startswith(<parameter>string</parameter>, <parameter>prefix</parameter>)</function></literal>
+       </entry>
+       <entry><type>bool</type></entry>
+       <entry>
+        Returns true if <parameter>string</parameter> starts with <parameter>prefix</parameter>.
+       </entry>
+       <entry><literal>text_startswith('alphabet', 'alph')</literal></entry>
+       <entry><literal>t</literal></entry>
+      </row>
+
       <row>
        <entry>
         <indexterm>
@@ -4033,6 +4048,12 @@ cast(-44 as bit(12))           <lineannotation>111111010100</lineannotation>
     ILIKE</function>, respectively.  All of these operators are
     <productname>PostgreSQL</productname>-specific.
    </para>
+
+   <para>
+    There is also the prefix operator <literal>^@</literal> and corresponding
+    <function>text_startswith</function> function which covers cases when only
+    searching by beginning of the string is needed.
+   </para>
   </sect2>
 
 
diff --git a/doc/src/sgml/spgist.sgml b/doc/src/sgml/spgist.sgml
index e47f70be89..06b7519052 100644
--- a/doc/src/sgml/spgist.sgml
+++ b/doc/src/sgml/spgist.sgml
@@ -161,6 +161,7 @@
        <literal>~&lt;~</literal>
        <literal>~&gt;=~</literal>
        <literal>~&gt;~</literal>
+       <literal>^@</literal>
       </entry>
      </row>
      <row>
diff --git a/src/backend/access/spgist/spgtextproc.c b/src/backend/access/spgist/spgtextproc.c
index f156b2166e..5beb9e33a1 100644
--- a/src/backend/access/spgist/spgtextproc.c
+++ b/src/backend/access/spgist/spgtextproc.c
@@ -496,7 +496,7 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
 			 * well end with a partial multibyte character, so that applying
 			 * any encoding-sensitive test to it would be risky anyhow.)
 			 */
-			if (strategy > 10)
+			if (strategy > 10 && strategy != RTPrefixStrategyNumber)
 			{
 				if (collate_is_c)
 					strategy -= 10;
@@ -526,6 +526,10 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
 					if (r < 0)
 						res = false;
 					break;
+				case RTPrefixStrategyNumber:
+					if (r != 0)
+						res = false;
+					break;
 				default:
 					elog(ERROR, "unrecognized strategy number: %d",
 						 in->scankeys[j].sk_strategy);
@@ -601,10 +605,21 @@ spg_text_leaf_consistent(PG_FUNCTION_ARGS)
 	for (j = 0; j < in->nkeys; j++)
 	{
 		StrategyNumber strategy = in->scankeys[j].sk_strategy;
-		text	   *query = DatumGetTextPP(in->scankeys[j].sk_argument);
-		int			queryLen = VARSIZE_ANY_EXHDR(query);
+		text	   *query;
+		int			queryLen;
 		int			r;
 
+		/* for this strategy collation is not important */
+		if (strategy == RTPrefixStrategyNumber)
+		{
+			res = DatumGetBool(DirectFunctionCall2(text_startswith,
+				out->leafValue, in->scankeys[j].sk_argument));
+			goto next;
+		}
+
+		query = DatumGetTextPP(in->scankeys[j].sk_argument);
+		queryLen = VARSIZE_ANY_EXHDR(query);
+
 		if (strategy > 10)
 		{
 			/* Collation-aware comparison */
@@ -655,6 +670,7 @@ spg_text_leaf_consistent(PG_FUNCTION_ARGS)
 				break;
 		}
 
+next:
 		if (!res)
 			break;				/* no need to consider remaining conditions */
 	}
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index bf240aa9c5..50b2583ca0 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -1317,8 +1317,18 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
 	 * right cache key, so that they can be re-used at runtime.
 	 */
 	patt = (Const *) other;
-	pstatus = pattern_fixed_prefix(patt, ptype, collation,
-								   &prefix, &rest_selec);
+
+	if (ptype == Pattern_Type_Prefix)
+	{
+		char	*s = TextDatumGetCString(constval);
+		prefix = string_to_const(s, vartype);
+		pstatus = Pattern_Prefix_Partial;
+		rest_selec = 1.0;	/* all */
+		pfree(s);
+	}
+	else
+		pstatus = pattern_fixed_prefix(patt, ptype, collation,
+									   &prefix, &rest_selec);
 
 	/*
 	 * If necessary, coerce the prefix constant to the right type.
@@ -1488,6 +1498,16 @@ likesel(PG_FUNCTION_ARGS)
 }
 
 /*
+ *		prefixsel			- selectivity of prefix operator
+ */
+Datum
+prefixsel(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Prefix, false));
+}
+
+/*
+ *
  *		iclikesel			- Selectivity of ILIKE pattern match.
  */
 Datum
@@ -2906,6 +2926,15 @@ likejoinsel(PG_FUNCTION_ARGS)
 	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
 }
 
+/*
+ *		prefixjoinsel			- Join selectivity of prefix operator
+ */
+Datum
+prefixjoinsel(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Prefix, false));
+}
+
 /*
  *		iclikejoinsel			- Join selectivity of ILIKE pattern match.
  */
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 4346410d5a..cb721e9a57 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -1761,6 +1761,34 @@ text_ge(PG_FUNCTION_ARGS)
 	PG_RETURN_BOOL(result);
 }
 
+Datum
+text_startswith(PG_FUNCTION_ARGS)
+{
+	Datum		arg1 = PG_GETARG_DATUM(0);
+	Datum		arg2 = PG_GETARG_DATUM(1);
+	bool		result;
+	Size		len1,
+				len2;
+
+	len1 = toast_raw_datum_size(arg1);
+	len2 = toast_raw_datum_size(arg2);
+	if (len2 > len1)
+		result = false;
+	else
+	{
+		text	   *targ1 = DatumGetTextPP(arg1);
+		text	   *targ2 = DatumGetTextPP(arg2);
+
+		result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
+						 len2 - VARHDRSZ) == 0);
+
+		PG_FREE_IF_COPY(targ1, 0);
+		PG_FREE_IF_COPY(targ2, 1);
+	}
+
+	PG_RETURN_BOOL(result);
+}
+
 Datum
 bttextcmp(PG_FUNCTION_ARGS)
 {
diff --git a/src/include/access/stratnum.h b/src/include/access/stratnum.h
index bddfac4c10..0db11a1117 100644
--- a/src/include/access/stratnum.h
+++ b/src/include/access/stratnum.h
@@ -68,8 +68,9 @@ typedef uint16 StrategyNumber;
 #define RTSubEqualStrategyNumber		25	/* for inet <<= */
 #define RTSuperStrategyNumber			26	/* for inet << */
 #define RTSuperEqualStrategyNumber		27	/* for inet >>= */
+#define RTPrefixStrategyNumber			28	/* for text ^@ */
 
-#define RTMaxStrategyNumber				27
+#define RTMaxStrategyNumber				28
 
 
 #endif							/* STRATNUM_H */
diff --git a/src/include/catalog/pg_amop.h b/src/include/catalog/pg_amop.h
index 03af581df4..7f7ffcfecb 100644
--- a/src/include/catalog/pg_amop.h
+++ b/src/include/catalog/pg_amop.h
@@ -795,6 +795,7 @@ DATA(insert (	4017   25 25 2 s	2315 4000 0 ));
 DATA(insert (	4017   25 25 3 s	98	4000 0 ));
 DATA(insert (	4017   25 25 4 s	2317 4000 0 ));
 DATA(insert (	4017   25 25 5 s	2318 4000 0 ));
+DATA(insert (	4017   25 25 28 s	315 4000 0 ));
 DATA(insert (	4017   25 25 11 s	664 4000 0 ));
 DATA(insert (	4017   25 25 12 s	665 4000 0 ));
 DATA(insert (	4017   25 25 14 s	667 4000 0 ));
diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h
index e74f963eb5..414a967557 100644
--- a/src/include/catalog/pg_operator.h
+++ b/src/include/catalog/pg_operator.h
@@ -134,6 +134,8 @@ DESCR("less than");
 DATA(insert OID =  98 ( "="		   PGNSP PGUID b t t	25	25	16	98 531 texteq eqsel eqjoinsel ));
 DESCR("equal");
 #define TextEqualOperator	98
+DATA(insert OID =  315 ( "^@"	   PGNSP PGUID b f f	25	25	16	0 0 text_startswith prefixsel prefixjoinsel ));
+DESCR("starts with");
 
 DATA(insert OID = 349 (  "||"	   PGNSP PGUID b f f 2277 2283 2277 0 0 array_append   -	   -	 ));
 DESCR("append element onto end of array");
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 0fdb42f639..4e62efbb46 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -209,6 +209,7 @@ DATA(insert OID =  64 (  int2lt			   PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16
 DATA(insert OID =  65 (  int4eq			   PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "23 23" _null_ _null_ _null_ _null_ _null_ int4eq _null_ _null_ _null_ ));
 DATA(insert OID =  66 (  int4lt			   PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "23 23" _null_ _null_ _null_ _null_ _null_ int4lt _null_ _null_ _null_ ));
 DATA(insert OID =  67 (  texteq			   PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "25 25" _null_ _null_ _null_ _null_ _null_ texteq _null_ _null_ _null_ ));
+DATA(insert OID =  3450 ( text_startswith  PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "25 25" _null_ _null_ _null_ _null_ _null_ text_startswith _null_ _null_ _null_ ));
 DATA(insert OID =  68 (  xideq			   PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "28 28" _null_ _null_ _null_ _null_ _null_ xideq _null_ _null_ _null_ ));
 DATA(insert OID = 3308 (  xidneq		   PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "28 28" _null_ _null_ _null_ _null_ _null_ xidneq _null_ _null_ _null_ ));
 DATA(insert OID =  69 (  cideq			   PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "29 29" _null_ _null_ _null_ _null_ _null_ cideq _null_ _null_ _null_ ));
@@ -2568,6 +2569,10 @@ DATA(insert OID = 1828 ( nlikejoinsel		PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0
 DESCR("join selectivity of NOT LIKE");
 DATA(insert OID = 1829 ( icregexnejoinsel	PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_  _null_ icregexnejoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity of case-insensitive regex non-match");
+DATA(insert OID = 2579 ( prefixsel			PGNSP PGUID 12 1 0 0 0 f f f t f s s 4 0 701 "2281 26 2281 23" _null_ _null_ _null_ _null_ _null_ prefixsel _null_ _null_ _null_ ));
+DESCR("restriction selectivity of exact prefix");
+DATA(insert OID = 2580 ( prefixjoinsel		PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_  _null_ prefixjoinsel _null_ _null_ _null_ ));
+DESCR("join selectivity of exact prefix");
 
 /* Aggregate-related functions */
 DATA(insert OID = 1830 (  float8_avg	   PGNSP PGUID 12 1 0 0 0 f f f t f i s 1 0 701 "1022" _null_ _null_ _null_ _null_ _null_ float8_avg _null_ _null_ _null_ ));
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 299c9f846a..95e44280c4 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -87,8 +87,11 @@ typedef struct VariableStatData
 
 typedef enum
 {
-	Pattern_Type_Like, Pattern_Type_Like_IC,
-	Pattern_Type_Regex, Pattern_Type_Regex_IC
+	Pattern_Type_Like,
+	Pattern_Type_Like_IC,
+	Pattern_Type_Regex,
+	Pattern_Type_Regex_IC,
+	Pattern_Type_Prefix
 } Pattern_Type;
 
 typedef enum
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index 057faff2e5..6d32d4cb5b 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -372,6 +372,12 @@ SELECT count(*) FROM radix_text_tbl WHERE t ~>~  'Worth
     48
 (1 row)
 
+SELECT count(*) FROM radix_text_tbl WHERE t ^@  'Worth';
+ count 
+-------
+     2
+(1 row)
+
 SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
                        f1                        
 -------------------------------------------------
@@ -1182,6 +1188,21 @@ SELECT count(*) FROM radix_text_tbl WHERE t ~>~  'Worth
     48
 (1 row)
 
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM radix_text_tbl WHERE t ^@	 'Worth';
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Aggregate
+   ->  Index Only Scan using sp_radix_ind on radix_text_tbl
+         Index Cond: (t ^@ 'Worth'::text)
+(3 rows)
+
+SELECT count(*) FROM radix_text_tbl WHERE t ^@	 'Worth';
+ count 
+-------
+     2
+(1 row)
+
 EXPLAIN (COSTS OFF)
 SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
                      QUERY PLAN                      
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 01608d2c04..919248cdd2 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -718,6 +718,7 @@ sha224(bytea)
 sha256(bytea)
 sha384(bytea)
 sha512(bytea)
+text_startswith(text,text)
 macaddr8_eq(macaddr8,macaddr8)
 macaddr8_lt(macaddr8,macaddr8)
 macaddr8_le(macaddr8,macaddr8)
@@ -1887,7 +1888,8 @@ ORDER BY 1, 2, 3;
        4000 |           25 | <<=
        4000 |           26 | >>
        4000 |           27 | >>=
-(121 rows)
+       4000 |           28 | ^@
+(122 rows)
 
 -- Check that all opclass search operators have selectivity estimators.
 -- This is not absolutely required, but it seems a reasonable thing
diff --git a/src/test/regress/expected/spgist.out b/src/test/regress/expected/spgist.out
index 2d75bbf8dc..0011634b76 100644
--- a/src/test/regress/expected/spgist.out
+++ b/src/test/regress/expected/spgist.out
@@ -32,6 +32,15 @@ insert into spgist_text_tbl (id, t)
 select g, 'f' || repeat('o', 100) || g from generate_series(1, 10000) g
 union all
 select g, 'baaaaaaaaaaaaaar' || g from generate_series(1, 1000) g;
+explain (costs off) select * from spgist_text_tbl where t ^@ 'baaaaaaaaaaaaaar11';
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Bitmap Heap Scan on spgist_text_tbl
+   Recheck Cond: (t ^@ 'baaaaaaaaaaaaaar11'::text)
+   ->  Bitmap Index Scan on spgist_text_idx
+         Index Cond: (t ^@ 'baaaaaaaaaaaaaar11'::text)
+(4 rows)
+
 -- Do a lot of insertions that have to split an existing node. Hopefully
 -- one of these will cause the page to run out of space, causing the inner
 -- tuple to be moved to another page.
diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql
index 7f17588b0d..28c71abd74 100644
--- a/src/test/regress/sql/create_index.sql
+++ b/src/test/regress/sql/create_index.sql
@@ -224,6 +224,8 @@ SELECT count(*) FROM radix_text_tbl WHERE t >    'Worth
 
 SELECT count(*) FROM radix_text_tbl WHERE t ~>~  'Worth                         St  ';
 
+SELECT count(*) FROM radix_text_tbl WHERE t ^@  'Worth';
+
 SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
 
 SELECT circle_center(f1), round(radius(f1)) as radius FROM gcircle_tbl ORDER BY f1 <-> '(200,300)'::point LIMIT 10;
@@ -441,6 +443,10 @@ EXPLAIN (COSTS OFF)
 SELECT count(*) FROM radix_text_tbl WHERE t ~>~  'Worth                         St  ';
 SELECT count(*) FROM radix_text_tbl WHERE t ~>~  'Worth                         St  ';
 
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM radix_text_tbl WHERE t ^@	 'Worth';
+SELECT count(*) FROM radix_text_tbl WHERE t ^@	 'Worth';
+
 EXPLAIN (COSTS OFF)
 SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
 SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
diff --git a/src/test/regress/sql/spgist.sql b/src/test/regress/sql/spgist.sql
index 77b43a2d3e..2b57fc836c 100644
--- a/src/test/regress/sql/spgist.sql
+++ b/src/test/regress/sql/spgist.sql
@@ -43,6 +43,8 @@ select g, 'f' || repeat('o', 100) || g from generate_series(1, 10000) g
 union all
 select g, 'baaaaaaaaaaaaaar' || g from generate_series(1, 1000) g;
 
+explain (costs off) select * from spgist_text_tbl where t ^@ 'baaaaaaaaaaaaaar11';
+
 -- Do a lot of insertions that have to split an existing node. Hopefully
 -- one of these will cause the page to run out of space, causing the inner
 -- tuple to be moved to another page.

Reply via email to