Hi everyone,

I'd like to share some intermediate results. Here's what has changed:


1. OR operator is now case-insensitive. Moreover, trailing whitespace is no longer used to identify it:

select websearch_to_tsquery('simple', 'abc or');
 websearch_to_tsquery
----------------------
 'abc' & 'or'
(1 row)

select websearch_to_tsquery('simple', 'abc or(def)');
 websearch_to_tsquery
----------------------
 'abc' | 'def'
(1 row)

select websearch_to_tsquery('simple', 'abc or!def');
 websearch_to_tsquery
----------------------
 'abc' | 'def'
(1 row)


2. AROUND(N) has been dropped. I hope that <N, M> operator will allow us to implement it with a few lines of code.

3. websearch_to_tsquery() now tolerates various syntax errors, for instance:

Misused operators:

'abc &'
'| abc'
'<- def'

Missing parentheses:

'abc & (def <-> (cat or rat'

Other sorts of nonsense:

'abc &--|| def'  =>  'abc' & !!'def'
'abc:def'  =>  'abc':D & 'ef'

This, however, doesn't mean that the result will always be adequate (who would have thought?). Overall, current implementation follows the GIGO principle. In theory, this would allow us to use user-supplied websearch strings (but see gotchas), even if they don't make much sense. Better then nothing, right?

4. A small refactoring: I've replaced all WAIT* macros with a enum for better debugging (names look much nicer in GDB). Hope this is acceptable.

5. Finally, I've added a few more comments and tests. I haven't checked the code coverage, though.


A few gotchas:

I haven't touched gettoken_tsvector() yet. As a result, the following queries produce errors:

select websearch_to_tsquery('simple', '''');
ERROR:  syntax error in tsquery: "'"

select websearch_to_tsquery('simple', '\');
ERROR:  there is no escaped character: "\"

Maybe there's more. The question is: should we fix those, or it's fine as it is? I don't have a strong opinion about this.

--
Dmitry Ivanov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index ea5947a3a8..bdf05236cf 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -390,7 +390,8 @@ add_to_tsvector(void *_state, char *elem_value, int elem_len)
  * and different variants are ORed together.
  */
 static void
-pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix)
+pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval,
+			  int16 weight, bool prefix, bool force_phrase)
 {
 	int32		count = 0;
 	ParsedText	prs;
@@ -423,7 +424,12 @@ pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval,
 					/* put placeholders for each missing stop word */
 					pushStop(state);
 					if (cntpos)
-						pushOperator(state, data->qoperator, 1);
+					{
+						if (force_phrase)
+							pushOperator(state, OP_PHRASE, 1);
+						else
+							pushOperator(state, data->qoperator, 1);
+					}
 					cntpos++;
 					pos++;
 				}
@@ -464,7 +470,10 @@ pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval,
 			if (cntpos)
 			{
 				/* distance may be useful */
-				pushOperator(state, data->qoperator, 1);
+				if (force_phrase)
+					pushOperator(state, OP_PHRASE, 1);
+				else
+					pushOperator(state, data->qoperator, 1);
 			}
 
 			cntpos++;
@@ -490,6 +499,7 @@ to_tsquery_byid(PG_FUNCTION_ARGS)
 	query = parse_tsquery(text_to_cstring(in),
 						  pushval_morph,
 						  PointerGetDatum(&data),
+						  false,
 						  false);
 
 	PG_RETURN_TSQUERY(query);
@@ -520,7 +530,8 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS)
 	query = parse_tsquery(text_to_cstring(in),
 						  pushval_morph,
 						  PointerGetDatum(&data),
-						  true);
+						  true,
+						  false);
 
 	PG_RETURN_POINTER(query);
 }
@@ -551,7 +562,8 @@ phraseto_tsquery_byid(PG_FUNCTION_ARGS)
 	query = parse_tsquery(text_to_cstring(in),
 						  pushval_morph,
 						  PointerGetDatum(&data),
-						  true);
+						  true,
+						  false);
 
 	PG_RETURN_TSQUERY(query);
 }
@@ -567,3 +579,36 @@ phraseto_tsquery(PG_FUNCTION_ARGS)
 										ObjectIdGetDatum(cfgId),
 										PointerGetDatum(in)));
 }
+
+Datum
+websearch_to_tsquery_byid(PG_FUNCTION_ARGS)
+{
+	text	   *in = PG_GETARG_TEXT_PP(1);
+	MorphOpaque	data;
+	TSQuery		query = NULL;
+
+	data.cfg_id = PG_GETARG_OID(0);
+
+	data.qoperator = OP_AND;
+
+	query = parse_tsquery(text_to_cstring(in),
+						  pushval_morph,
+						  PointerGetDatum(&data),
+						  false,
+						  true);
+
+	PG_RETURN_TSQUERY(query);
+}
+
+Datum
+websearch_to_tsquery(PG_FUNCTION_ARGS)
+{
+	text	   *in = PG_GETARG_TEXT_PP(0);
+	Oid			cfgId;
+
+	cfgId = getTSCurrentConfig(true);
+	PG_RETURN_DATUM(DirectFunctionCall2(websearch_to_tsquery_byid,
+										ObjectIdGetDatum(cfgId),
+										PointerGetDatum(in)));
+
+}
diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c
index 1ccbf79030..4b7460e5b9 100644
--- a/src/backend/utils/adt/tsquery.c
+++ b/src/backend/utils/adt/tsquery.c
@@ -32,12 +32,24 @@ const int	tsearch_op_priority[OP_COUNT] =
 	3							/* OP_PHRASE */
 };
 
+/*
+ * parser's states
+ */
+typedef enum
+{
+	WAITOPERAND = 1,
+	WAITOPERATOR = 2,
+	WAITFIRSTOPERAND = 3,
+	WAITSINGLEOPERAND = 4,
+	INQUOTES = 5 /* for quoted phrases in web search */
+} ts_parserstate;
+
 struct TSQueryParserStateData
 {
 	/* State for gettoken_query */
 	char	   *buffer;			/* entire string we are scanning */
 	char	   *buf;			/* current scan point */
-	int			state;
+	ts_parserstate state;
 	int			count;			/* nesting count, incremented by (,
 								 * decremented by ) */
 
@@ -57,12 +69,6 @@ struct TSQueryParserStateData
 	TSVectorParseState valstate;
 };
 
-/* parser's states */
-#define WAITOPERAND 1
-#define WAITOPERATOR	2
-#define WAITFIRSTOPERAND 3
-#define WAITSINGLEOPERAND 4
-
 /*
  * subroutine to parse the modifiers (weight and prefix flag currently)
  * part, like ':AB*' of a query.
@@ -197,6 +203,25 @@ err:
 	return buf;
 }
 
+/*
+ * Parse OR operator used in websearch_to_tsquery().
+ */
+static bool
+parse_or_operator(char *buf, int *len)
+{
+	bool is_or = (t_iseq(&buf[0], 'o') || t_iseq(&buf[0], 'O')) &&
+				 (t_iseq(&buf[1], 'r') || t_iseq(&buf[1], 'R')) &&
+					(buf[2] != '\0' &&
+					 !t_iseq(&buf[2], '-') &&
+					 !t_iseq(&buf[2], '_') &&
+					 !t_isalpha(&buf[2]) &&
+					 !t_isdigit(&buf[2]));
+
+	*len = 2 + pg_mblen(&buf[2]);
+
+	return is_or;
+}
+
 /*
  * token types for parsing
  */
@@ -220,19 +245,22 @@ typedef enum
  */
 static ts_tokentype
 gettoken_query(TSQueryParserState state,
-			   int8 *operator,
-			   int *lenval, char **strval, int16 *weight, bool *prefix)
+			   int8 *operator, int *lenval, char **strval,
+			   int16 *weight, bool *prefix, bool isweb)
 {
 	*weight = 0;
 	*prefix = false;
 
 	while (1)
 	{
+		int oplen = 0;
+
 		switch (state->state)
 		{
 			case WAITFIRSTOPERAND:
 			case WAITOPERAND:
-				if (t_iseq(state->buf, '!'))
+				if (t_iseq(state->buf, '!') ||
+					(isweb && t_iseq(state->buf, '-')))
 				{
 					(state->buf)++; /* can safely ++, t_iseq guarantee that
 									 * pg_mblen()==1 */
@@ -249,11 +277,55 @@ gettoken_query(TSQueryParserState state,
 				}
 				else if (t_iseq(state->buf, ':'))
 				{
+					if (isweb)
+					{
+						/* it doesn't mean anything */
+						(state->buf)++;
+						continue;
+					}
+
 					ereport(ERROR,
 							(errcode(ERRCODE_SYNTAX_ERROR),
 							 errmsg("syntax error in tsquery: \"%s\"",
 									state->buffer)));
 				}
+				else if (isweb && t_iseq(state->buf, ')'))
+				{
+					if (state->count == 0)
+					{
+						/* web search tolerates useless closing parantheses */
+						(state->buf)++;
+						continue;
+					}
+					(state->buf)++;
+					state->count--;
+					return PT_CLOSE;
+				}
+				else if (isweb &&
+							(t_iseq(state->buf, '&') ||
+							 t_iseq(state->buf, '|') ||
+							 t_iseq(state->buf, '<')))
+				{
+					/* or else gettoken_tsvector() will raise an error */
+					(state->buf)++;
+					continue;
+				}
+				else if (isweb && t_iseq(state->buf, '"'))
+				{
+					/* quoted text should be ordered (<->) */
+					char *quote = strchr(state->buf + 1, '"');
+					if (quote == NULL)
+					{
+						/* web search tolerates missing quotes too */
+						state->buf++;
+						continue;
+					}
+					*strval = state->buf + 1;
+					*lenval = quote - *strval;
+					state->buf = quote + 1;
+					state->state = INQUOTES;
+					return PT_VAL;
+				}
 				else if (!t_isspace(state->buf))
 				{
 					/*
@@ -269,6 +341,16 @@ gettoken_query(TSQueryParserState state,
 					}
 					else if (state->state == WAITFIRSTOPERAND)
 						return PT_END;
+					else if (isweb)
+					{
+						if (state->count > 0)
+							/* decrement per each parentheses level (see PT_OPEN) */
+							state->count--;
+						else
+							/* finally, we have to provide an operand */
+							pushStop(state);
+						return PT_END;
+					}
 					else
 						ereport(ERROR,
 								(errcode(ERRCODE_SYNTAX_ERROR),
@@ -291,26 +373,61 @@ gettoken_query(TSQueryParserState state,
 					(state->buf)++;
 					return PT_OPR;
 				}
-				else if (t_iseq(state->buf, '<'))
+				else if (isweb && parse_or_operator(state->buf, &oplen))
 				{
 					state->state = WAITOPERAND;
-					*operator = OP_PHRASE;
-					/* weight var is used as storage for distance */
-					state->buf = parse_phrase_operator(state->buf, weight);
-					if (*weight < 0)
+					*operator = OP_OR;
+					(state->buf) += oplen;
+					return PT_OPR;
+				}
+				else if (t_iseq(state->buf, '<'))
+				{
+					int16	distance;
+					char   *phrase = parse_phrase_operator(state->buf, &distance);
+					if (distance < 0)
+					{
+						if (isweb)
+						{
+							/* web search tolerates broken phrase operator */
+							(state->buf)++;
+							continue;
+						}
 						return PT_ERR;
+					}
+					state->buf = phrase;
+					*operator = OP_PHRASE;
+					*weight = distance; /* weight var is used as storage for distance */
+					state->state = WAITOPERAND;
 					return PT_OPR;
 				}
 				else if (t_iseq(state->buf, ')'))
 				{
+					if (isweb && state->count == 0)
+					{
+						/* web search tolerates useless closing parentheses */
+						(state->buf)++;
+						continue;
+					}
 					(state->buf)++;
 					state->count--;
 					return (state->count < 0) ? PT_ERR : PT_CLOSE;
 				}
 				else if (*(state->buf) == '\0')
-					return (state->count) ? PT_ERR : PT_END;
+				{
+					/* web search tolerates unexpected end of line */
+					return (!isweb && state->count) ? PT_ERR : PT_END;
+				}
 				else if (!t_isspace(state->buf))
+				{
+					if (isweb)
+					{
+						/* put implicit AND if there's no operator */
+						*operator = OP_AND;
+						state->state = WAITOPERAND;
+						return PT_OPR;
+					}
 					return PT_ERR;
+				}
 				break;
 			case WAITSINGLEOPERAND:
 				if (*(state->buf) == '\0')
@@ -320,9 +437,10 @@ gettoken_query(TSQueryParserState state,
 				state->buf += strlen(state->buf);
 				state->count++;
 				return PT_VAL;
-			default:
-				return PT_ERR;
-				break;
+			case INQUOTES:
+				/* phrase should be followed by an operator */
+				state->state = WAITOPERATOR;
+				continue;
 		}
 		state->buf += pg_mblen(state->buf);
 	}
@@ -475,7 +593,8 @@ cleanOpStack(TSQueryParserState state,
 static void
 makepol(TSQueryParserState state,
 		PushFunction pushval,
-		Datum opaque)
+		Datum opaque,
+		bool isweb)
 {
 	int8		operator = 0;
 	ts_tokentype type;
@@ -489,19 +608,21 @@ makepol(TSQueryParserState state,
 	/* since this function recurses, it could be driven to stack overflow */
 	check_stack_depth();
 
-	while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight, &prefix)) != PT_END)
+	while ((type = gettoken_query(state, &operator, &lenval, &strval,
+								  &weight, &prefix, isweb)) != PT_END)
 	{
 		switch (type)
 		{
 			case PT_VAL:
-				pushval(opaque, state, strval, lenval, weight, prefix);
+				pushval(opaque, state, strval, lenval, weight, prefix,
+						state->state == INQUOTES /* force phrase operator */);
 				break;
 			case PT_OPR:
 				cleanOpStack(state, opstack, &lenstack, operator);
 				pushOpStack(opstack, &lenstack, operator, weight);
 				break;
 			case PT_OPEN:
-				makepol(state, pushval, opaque);
+				makepol(state, pushval, opaque, isweb);
 				break;
 			case PT_CLOSE:
 				cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
@@ -605,7 +726,8 @@ TSQuery
 parse_tsquery(char *buf,
 			  PushFunction pushval,
 			  Datum opaque,
-			  bool isplain)
+			  bool isplain,
+			  bool isweb)
 {
 	struct TSQueryParserStateData state;
 	int			i;
@@ -632,7 +754,7 @@ parse_tsquery(char *buf,
 	*(state.curop) = '\0';
 
 	/* parse query & make polish notation (postfix, but in reverse order) */
-	makepol(&state, pushval, opaque);
+	makepol(&state, pushval, opaque, isweb);
 
 	close_tsvector_parser(state.valstate);
 
@@ -703,7 +825,7 @@ parse_tsquery(char *buf,
 
 static void
 pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval,
-			 int16 weight, bool prefix)
+			 int16 weight, bool prefix, bool isphrase)
 {
 	pushValue(state, strval, lenval, weight, prefix);
 }
@@ -716,7 +838,7 @@ tsqueryin(PG_FUNCTION_ARGS)
 {
 	char	   *in = PG_GETARG_CSTRING(0);
 
-	PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), false));
+	PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), false, false));
 }
 
 /*
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index bfc90098f8..00f1a85ae7 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -4950,6 +4950,8 @@ DATA(insert OID = 3747 (  plainto_tsquery	PGNSP PGUID 12 100 0 0 0 f f f t f i s
 DESCR("transform to tsquery");
 DATA(insert OID = 5006 (  phraseto_tsquery	PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery_byid _null_ _null_ _null_ ));
 DESCR("transform to tsquery");
+DATA(insert OID = 8889 (  websearch_to_tsquery	PGNSP PGUID 12 100 0 0 0 f f f  t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ websearch_to_tsquery_byid _null_ _null_ _null_ ));
+DESCR("transform to tsquery");
 DATA(insert OID = 3749 (  to_tsvector		PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "25" _null_ _null_ _null_ _null_ _null_ to_tsvector _null_ _null_ _null_ ));
 DESCR("transform to tsvector");
 DATA(insert OID = 3750 (  to_tsquery		PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ to_tsquery _null_ _null_ _null_ ));
@@ -4958,6 +4960,8 @@ DATA(insert OID = 3751 (  plainto_tsquery	PGNSP PGUID 12 100 0 0 0 f f f t f s s
 DESCR("transform to tsquery");
 DATA(insert OID = 5001 (  phraseto_tsquery	PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ ));
 DESCR("transform to tsquery");
+DATA(insert OID = 8890 (  websearch_to_tsquery	PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ websearch_to_tsquery _null_ _null_ _null_ ));
+DESCR("transform to tsquery");
 DATA(insert OID = 4209 (  to_tsvector		PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector _null_ _null_ _null_ ));
 DESCR("transform jsonb to tsvector");
 DATA(insert OID = 4210 (  to_tsvector		PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector _null_ _null_ _null_ ));
diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h
index f8ddce5ecb..098c9b9091 100644
--- a/src/include/tsearch/ts_utils.h
+++ b/src/include/tsearch/ts_utils.h
@@ -44,11 +44,12 @@ typedef void (*PushFunction) (Datum opaque, TSQueryParserState state,
 							  char *token, int tokenlen,
 							  int16 tokenweights,	/* bitmap as described in
 													 * QueryOperand struct */
-							  bool prefix);
+							  bool prefix,
+							  bool isphrase);
 
 extern TSQuery parse_tsquery(char *buf,
 			  PushFunction pushval,
-			  Datum opaque, bool isplain);
+			  Datum opaque, bool isplain, bool isweb);
 
 /* Functions for use by PushFunction implementations */
 extern void pushValue(TSQueryParserState state,
diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out
index d63fb12f1d..265fd55fcc 100644
--- a/src/test/regress/expected/tsearch.out
+++ b/src/test/regress/expected/tsearch.out
@@ -1672,3 +1672,408 @@ select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat ca
 (1 row)
 
 set enable_seqscan = on;
+-- test websearch_to_tsquery function
+select websearch_to_tsquery('()');
+NOTICE:  text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery 
+----------------------
+ 
+(1 row)
+
+select websearch_to_tsquery('(())');
+NOTICE:  text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery 
+----------------------
+ 
+(1 row)
+
+select websearch_to_tsquery('()()()');
+NOTICE:  text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery 
+----------------------
+ 
+(1 row)
+
+select websearch_to_tsquery('abc ()');
+ websearch_to_tsquery 
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('() abc');
+ websearch_to_tsquery 
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('abc & ()');
+ websearch_to_tsquery 
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('() & abc');
+ websearch_to_tsquery 
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('(');
+NOTICE:  text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery 
+----------------------
+ 
+(1 row)
+
+select websearch_to_tsquery('((');
+NOTICE:  text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery 
+----------------------
+ 
+(1 row)
+
+select websearch_to_tsquery('(((  )) abc or def');
+ websearch_to_tsquery 
+----------------------
+ 'abc' | 'def'
+(1 row)
+
+select websearch_to_tsquery('))');
+NOTICE:  text-search query doesn't contain lexemes: "))"
+ websearch_to_tsquery 
+----------------------
+ 
+(1 row)
+
+select websearch_to_tsquery(')');
+NOTICE:  text-search query doesn't contain lexemes: ")"
+ websearch_to_tsquery 
+----------------------
+ 
+(1 row)
+
+select websearch_to_tsquery(')(');
+NOTICE:  text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery 
+----------------------
+ 
+(1 row)
+
+select websearch_to_tsquery('& )( |');
+NOTICE:  text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery 
+----------------------
+ 
+(1 row)
+
+select websearch_to_tsquery('abc )( def');
+ websearch_to_tsquery 
+----------------------
+ 'abc' & 'def'
+(1 row)
+
+select websearch_to_tsquery('abc | )( & def');
+ websearch_to_tsquery 
+----------------------
+ 'abc' | 'def'
+(1 row)
+
+select websearch_to_tsquery('& abc');
+ websearch_to_tsquery 
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('abc &');
+ websearch_to_tsquery 
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('| abc');
+ websearch_to_tsquery 
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('abc |');
+ websearch_to_tsquery 
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('< abc');
+ websearch_to_tsquery 
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('abc <');
+ websearch_to_tsquery 
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('a:::b');
+ websearch_to_tsquery 
+----------------------
+ 'b'
+(1 row)
+
+select websearch_to_tsquery('My brand new smartphone');
+     websearch_to_tsquery      
+-------------------------------
+ 'brand' & 'new' & 'smartphon'
+(1 row)
+
+select websearch_to_tsquery('My brand "new smartphone"');
+      websearch_to_tsquery       
+---------------------------------
+ 'brand' & 'new' <-> 'smartphon'
+(1 row)
+
+select websearch_to_tsquery('My brand "new -smartphone"');
+      websearch_to_tsquery       
+---------------------------------
+ 'brand' & 'new' <-> 'smartphon'
+(1 row)
+
+select websearch_to_tsquery('My brand:B "new -smartphone"');
+       websearch_to_tsquery        
+-----------------------------------
+ 'brand':B & 'new' <-> 'smartphon'
+(1 row)
+
+select websearch_to_tsquery('My brand:Z "new -smartphone"');
+         websearch_to_tsquery          
+---------------------------------------
+ 'brand' & 'z' & 'new' <-> 'smartphon'
+(1 row)
+
+select websearch_to_tsquery('My & (brand ("new -smartphone"');
+      websearch_to_tsquery       
+---------------------------------
+ 'brand' & 'new' <-> 'smartphon'
+(1 row)
+
+select websearch_to_tsquery('My & (brand ("new) -smartphone"');
+      websearch_to_tsquery       
+---------------------------------
+ 'brand' & 'new' <-> 'smartphon'
+(1 row)
+
+select websearch_to_tsquery('simple', 'cat or rat');
+ websearch_to_tsquery 
+----------------------
+ 'cat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'cat OR rat');
+ websearch_to_tsquery 
+----------------------
+ 'cat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'cat "OR" rat');
+ websearch_to_tsquery 
+----------------------
+ 'cat' & 'or' & 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'cat OR');
+ websearch_to_tsquery 
+----------------------
+ 'cat' & 'or'
+(1 row)
+
+select websearch_to_tsquery('simple', 'OR rat');
+ websearch_to_tsquery 
+----------------------
+ 'or' & 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', '"fat cat OR rat"');
+        websearch_to_tsquery        
+------------------------------------
+ 'fat' <-> 'cat' <-> 'or' <-> 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat (cat OR rat');
+   websearch_to_tsquery    
+---------------------------
+ 'fat' & ( 'cat' | 'rat' )
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat*rat');
+ websearch_to_tsquery 
+----------------------
+ 'fat' & 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat-rat');
+   websearch_to_tsquery    
+---------------------------
+ 'fat-rat' & 'fat' & 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat_rat');
+ websearch_to_tsquery 
+----------------------
+ 'fat' & 'rat'
+(1 row)
+
+-- OR is an operator here ...
+select websearch_to_tsquery('simple', 'fat or(rat');
+ websearch_to_tsquery 
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or)rat');
+ websearch_to_tsquery 
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or&rat');
+ websearch_to_tsquery 
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or|rat');
+ websearch_to_tsquery 
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or!rat');
+ websearch_to_tsquery 
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or<rat');
+ websearch_to_tsquery 
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or>rat');
+ websearch_to_tsquery 
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or ');
+ websearch_to_tsquery 
+----------------------
+ 'fat'
+(1 row)
+
+-- ... but not here
+select websearch_to_tsquery('simple', 'abc orange');
+ websearch_to_tsquery 
+----------------------
+ 'abc' & 'orange'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc orтест');
+ websearch_to_tsquery 
+----------------------
+ 'abc' & 'orтест'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc OR1234');
+ websearch_to_tsquery 
+----------------------
+ 'abc' & 'or1234'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc or-abc');
+      websearch_to_tsquery       
+---------------------------------
+ 'abc' & 'or-abc' & 'or' & 'abc'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc OR_abc');
+ websearch_to_tsquery 
+----------------------
+ 'abc' & 'or' & 'abc'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc or');
+ websearch_to_tsquery 
+----------------------
+ 'abc' & 'or'
+(1 row)
+
+select websearch_to_tsquery('simple', 'or OR or');
+ websearch_to_tsquery 
+----------------------
+ 'or' | 'or'
+(1 row)
+
+select websearch_to_tsquery('english', '"A fat cat" has just eaten a -rat.');
+        websearch_to_tsquery        
+------------------------------------
+ 'fat' <-> 'cat' & 'eaten' & !'rat'
+(1 row)
+
+select websearch_to_tsquery('english', '"A fat cat" has just eaten OR !rat.');
+        websearch_to_tsquery        
+------------------------------------
+ 'fat' <-> 'cat' & 'eaten' | !'rat'
+(1 row)
+
+select websearch_to_tsquery('english', '"A fat cat" has just (+eaten OR -rat)');
+          websearch_to_tsquery          
+----------------------------------------
+ 'fat' <-> 'cat' & ( 'eaten' | !'rat' )
+(1 row)
+
+select websearch_to_tsquery('english', 'this is ----fine');
+ websearch_to_tsquery 
+----------------------
+ !!!!'fine'
+(1 row)
+
+select websearch_to_tsquery('english', '(()) )))) this ||| is && -!-fine, "dear friend" OR good');
+           websearch_to_tsquery           
+------------------------------------------
+ !!!'fine' & 'dear' <-> 'friend' | 'good'
+(1 row)
+
+select websearch_to_tsquery('english', 'an old <-> cat " is fine &&& too');
+   websearch_to_tsquery   
+--------------------------
+ 'old' <-> 'cat' & 'fine'
+(1 row)
+
+select websearch_to_tsquery('english', '"A the" OR just on');
+NOTICE:  text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery 
+----------------------
+ 
+(1 row)
+
+select websearch_to_tsquery('russian', '"толстая кошка" съела крысу');
+         websearch_to_tsquery         
+--------------------------------------
+ 'толст' <-> 'кошк' & 'съел' & 'крыс'
+(1 row)
+
+select to_tsvector('russian', 'съела толстая кошка крысу') @@
+websearch_to_tsquery('russian', '"толстая кошка" съела крысу');
+ ?column? 
+----------
+ t
+(1 row)
+
+select to_tsvector('russian', 'съела толстая серая кошка крысу') @@
+websearch_to_tsquery('russian', '"толстая кошка" съела крысу');
+ ?column? 
+----------
+ f
+(1 row)
+
diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql
index 1c8520b3e9..1bf9f80a9c 100644
--- a/src/test/regress/sql/tsearch.sql
+++ b/src/test/regress/sql/tsearch.sql
@@ -539,3 +539,88 @@ create index phrase_index_test_idx on phrase_index_test using gin(fts);
 set enable_seqscan = off;
 select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat cat');
 set enable_seqscan = on;
+
+-- test websearch_to_tsquery function
+select websearch_to_tsquery('()');
+select websearch_to_tsquery('(())');
+select websearch_to_tsquery('()()()');
+select websearch_to_tsquery('abc ()');
+select websearch_to_tsquery('() abc');
+select websearch_to_tsquery('abc & ()');
+select websearch_to_tsquery('() & abc');
+
+select websearch_to_tsquery('(');
+select websearch_to_tsquery('((');
+select websearch_to_tsquery('(((  )) abc or def');
+select websearch_to_tsquery('))');
+select websearch_to_tsquery(')');
+
+select websearch_to_tsquery(')(');
+select websearch_to_tsquery('& )( |');
+select websearch_to_tsquery('abc )( def');
+select websearch_to_tsquery('abc | )( & def');
+
+select websearch_to_tsquery('& abc');
+select websearch_to_tsquery('abc &');
+select websearch_to_tsquery('| abc');
+select websearch_to_tsquery('abc |');
+select websearch_to_tsquery('< abc');
+select websearch_to_tsquery('abc <');
+select websearch_to_tsquery('a:::b');
+
+select websearch_to_tsquery('My brand new smartphone');
+select websearch_to_tsquery('My brand "new smartphone"');
+select websearch_to_tsquery('My brand "new -smartphone"');
+select websearch_to_tsquery('My brand:B "new -smartphone"');
+select websearch_to_tsquery('My brand:Z "new -smartphone"');
+select websearch_to_tsquery('My & (brand ("new -smartphone"');
+select websearch_to_tsquery('My & (brand ("new) -smartphone"');
+
+select websearch_to_tsquery('simple', 'cat or rat');
+select websearch_to_tsquery('simple', 'cat OR rat');
+select websearch_to_tsquery('simple', 'cat "OR" rat');
+select websearch_to_tsquery('simple', 'cat OR');
+select websearch_to_tsquery('simple', 'OR rat');
+
+select websearch_to_tsquery('simple', '"fat cat OR rat"');
+select websearch_to_tsquery('simple', 'fat (cat OR rat');
+select websearch_to_tsquery('simple', 'fat*rat');
+select websearch_to_tsquery('simple', 'fat-rat');
+select websearch_to_tsquery('simple', 'fat_rat');
+
+-- OR is an operator here ...
+select websearch_to_tsquery('simple', 'fat or(rat');
+select websearch_to_tsquery('simple', 'fat or)rat');
+select websearch_to_tsquery('simple', 'fat or&rat');
+select websearch_to_tsquery('simple', 'fat or|rat');
+select websearch_to_tsquery('simple', 'fat or!rat');
+select websearch_to_tsquery('simple', 'fat or<rat');
+select websearch_to_tsquery('simple', 'fat or>rat');
+select websearch_to_tsquery('simple', 'fat or ');
+
+-- ... but not here
+select websearch_to_tsquery('simple', 'abc orange');
+select websearch_to_tsquery('simple', 'abc orтест');
+select websearch_to_tsquery('simple', 'abc OR1234');
+select websearch_to_tsquery('simple', 'abc or-abc');
+select websearch_to_tsquery('simple', 'abc OR_abc');
+select websearch_to_tsquery('simple', 'abc or');
+
+select websearch_to_tsquery('simple', 'or OR or');
+
+select websearch_to_tsquery('english', '"A fat cat" has just eaten a -rat.');
+select websearch_to_tsquery('english', '"A fat cat" has just eaten OR !rat.');
+select websearch_to_tsquery('english', '"A fat cat" has just (+eaten OR -rat)');
+
+select websearch_to_tsquery('english', 'this is ----fine');
+select websearch_to_tsquery('english', '(()) )))) this ||| is && -!-fine, "dear friend" OR good');
+select websearch_to_tsquery('english', 'an old <-> cat " is fine &&& too');
+
+select websearch_to_tsquery('english', '"A the" OR just on');
+select websearch_to_tsquery('russian', '"толстая кошка" съела крысу');
+
+select to_tsvector('russian', 'съела толстая кошка крысу') @@
+websearch_to_tsquery('russian', '"толстая кошка" съела крысу');
+
+select to_tsvector('russian', 'съела толстая серая кошка крысу') @@
+websearch_to_tsquery('russian', '"толстая кошка" съела крысу');

Reply via email to