Hello hackers,

On 2018-03-28 12:21, Aleksander Alekseev wrote:
It doesn't sound right to me to accept any input as a general rule but
sometimes return errors nevertheless. That API would be complicated for
the users. Thus I suggest to accept any garbage and try our best to
interpret it.

I agree with Aleksander about silencing all errors in websearch_to_tsquery().

In the attachment is a revised patch with the attempt to introduce an ability to ignore syntax errors in gettoken_tsvector(). I'm also read through the patch and all the code looks good to me except one thing. The name of enum ts_parsestate looks more like a name of the function than a name of a type. In my version, it renamed to QueryParserState, but you can fix it if I'm wrong.

--
Aleksandr Parfenov
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company
diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index ea5947a3a8..bdf05236cf 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -390,7 +390,8 @@ add_to_tsvector(void *_state, char *elem_value, int elem_len)
  * and different variants are ORed together.
  */
 static void
-pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix)
+pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval,
+			  int16 weight, bool prefix, bool force_phrase)
 {
 	int32		count = 0;
 	ParsedText	prs;
@@ -423,7 +424,12 @@ pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval,
 					/* put placeholders for each missing stop word */
 					pushStop(state);
 					if (cntpos)
-						pushOperator(state, data->qoperator, 1);
+					{
+						if (force_phrase)
+							pushOperator(state, OP_PHRASE, 1);
+						else
+							pushOperator(state, data->qoperator, 1);
+					}
 					cntpos++;
 					pos++;
 				}
@@ -464,7 +470,10 @@ pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval,
 			if (cntpos)
 			{
 				/* distance may be useful */
-				pushOperator(state, data->qoperator, 1);
+				if (force_phrase)
+					pushOperator(state, OP_PHRASE, 1);
+				else
+					pushOperator(state, data->qoperator, 1);
 			}
 
 			cntpos++;
@@ -490,6 +499,7 @@ to_tsquery_byid(PG_FUNCTION_ARGS)
 	query = parse_tsquery(text_to_cstring(in),
 						  pushval_morph,
 						  PointerGetDatum(&data),
+						  false,
 						  false);
 
 	PG_RETURN_TSQUERY(query);
@@ -520,7 +530,8 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS)
 	query = parse_tsquery(text_to_cstring(in),
 						  pushval_morph,
 						  PointerGetDatum(&data),
-						  true);
+						  true,
+						  false);
 
 	PG_RETURN_POINTER(query);
 }
@@ -551,7 +562,8 @@ phraseto_tsquery_byid(PG_FUNCTION_ARGS)
 	query = parse_tsquery(text_to_cstring(in),
 						  pushval_morph,
 						  PointerGetDatum(&data),
-						  true);
+						  true,
+						  false);
 
 	PG_RETURN_TSQUERY(query);
 }
@@ -567,3 +579,36 @@ phraseto_tsquery(PG_FUNCTION_ARGS)
 										ObjectIdGetDatum(cfgId),
 										PointerGetDatum(in)));
 }
+
+Datum
+websearch_to_tsquery_byid(PG_FUNCTION_ARGS)
+{
+	text	   *in = PG_GETARG_TEXT_PP(1);
+	MorphOpaque	data;
+	TSQuery		query = NULL;
+
+	data.cfg_id = PG_GETARG_OID(0);
+
+	data.qoperator = OP_AND;
+
+	query = parse_tsquery(text_to_cstring(in),
+						  pushval_morph,
+						  PointerGetDatum(&data),
+						  false,
+						  true);
+
+	PG_RETURN_TSQUERY(query);
+}
+
+Datum
+websearch_to_tsquery(PG_FUNCTION_ARGS)
+{
+	text	   *in = PG_GETARG_TEXT_PP(0);
+	Oid			cfgId;
+
+	cfgId = getTSCurrentConfig(true);
+	PG_RETURN_DATUM(DirectFunctionCall2(websearch_to_tsquery_byid,
+										ObjectIdGetDatum(cfgId),
+										PointerGetDatum(in)));
+
+}
diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c
index 1ccbf79030..00e6218691 100644
--- a/src/backend/utils/adt/tsquery.c
+++ b/src/backend/utils/adt/tsquery.c
@@ -32,12 +32,24 @@ const int	tsearch_op_priority[OP_COUNT] =
 	3							/* OP_PHRASE */
 };
 
+/*
+ * parser's states
+ */
+typedef enum
+{
+	WAITOPERAND = 1,
+	WAITOPERATOR = 2,
+	WAITFIRSTOPERAND = 3,
+	WAITSINGLEOPERAND = 4,
+	INQUOTES = 5 /* for quoted phrases in web search */
+} QueryParserState;
+
 struct TSQueryParserStateData
 {
 	/* State for gettoken_query */
 	char	   *buffer;			/* entire string we are scanning */
 	char	   *buf;			/* current scan point */
-	int			state;
+	QueryParserState state;
 	int			count;			/* nesting count, incremented by (,
 								 * decremented by ) */
 
@@ -57,12 +69,6 @@ struct TSQueryParserStateData
 	TSVectorParseState valstate;
 };
 
-/* parser's states */
-#define WAITOPERAND 1
-#define WAITOPERATOR	2
-#define WAITFIRSTOPERAND 3
-#define WAITSINGLEOPERAND 4
-
 /*
  * subroutine to parse the modifiers (weight and prefix flag currently)
  * part, like ':AB*' of a query.
@@ -197,6 +203,21 @@ err:
 	return buf;
 }
 
+/*
+ * Parse OR operator used in websearch_to_tsquery().
+ */
+static bool
+parse_or_operator(char *buf)
+{
+	return (t_iseq(&buf[0], 'o') || t_iseq(&buf[0], 'O')) &&
+		   (t_iseq(&buf[1], 'r') || t_iseq(&buf[1], 'R')) &&
+		   (buf[2] != '\0' &&
+				!t_iseq(&buf[2], '-') &&
+				!t_iseq(&buf[2], '_') &&
+				!t_isalpha(&buf[2]) &&
+				!t_isdigit(&buf[2]));
+}
+
 /*
  * token types for parsing
  */
@@ -220,8 +241,8 @@ typedef enum
  */
 static ts_tokentype
 gettoken_query(TSQueryParserState state,
-			   int8 *operator,
-			   int *lenval, char **strval, int16 *weight, bool *prefix)
+			   int8 *operator, int *lenval, char **strval,
+			   int16 *weight, bool *prefix, bool isweb)
 {
 	*weight = 0;
 	*prefix = false;
@@ -232,7 +253,8 @@ gettoken_query(TSQueryParserState state,
 		{
 			case WAITFIRSTOPERAND:
 			case WAITOPERAND:
-				if (t_iseq(state->buf, '!'))
+				if (t_iseq(state->buf, '!') ||
+					(isweb && t_iseq(state->buf, '-')))
 				{
 					(state->buf)++; /* can safely ++, t_iseq guarantee that
 									 * pg_mblen()==1 */
@@ -249,11 +271,55 @@ gettoken_query(TSQueryParserState state,
 				}
 				else if (t_iseq(state->buf, ':'))
 				{
+					if (isweb)
+					{
+						/* it doesn't mean anything */
+						(state->buf)++;
+						continue;
+					}
+
 					ereport(ERROR,
 							(errcode(ERRCODE_SYNTAX_ERROR),
 							 errmsg("syntax error in tsquery: \"%s\"",
 									state->buffer)));
 				}
+				else if (isweb && t_iseq(state->buf, ')'))
+				{
+					if (state->count == 0)
+					{
+						/* web search tolerates useless closing parantheses */
+						(state->buf)++;
+						continue;
+					}
+					(state->buf)++;
+					state->count--;
+					return PT_CLOSE;
+				}
+				else if (isweb &&
+							(t_iseq(state->buf, '&') ||
+							 t_iseq(state->buf, '|') ||
+							 t_iseq(state->buf, '<')))
+				{
+					/* or else gettoken_tsvector() will raise an error */
+					(state->buf)++;
+					continue;
+				}
+				else if (isweb && t_iseq(state->buf, '"'))
+				{
+					/* quoted text should be ordered (<->) */
+					char *quote = strchr(state->buf + 1, '"');
+					if (quote == NULL)
+					{
+						/* web search tolerates missing quotes too */
+						state->buf++;
+						continue;
+					}
+					*strval = state->buf + 1;
+					*lenval = quote - *strval;
+					state->buf = quote + 1;
+					state->state = INQUOTES;
+					return PT_VAL;
+				}
 				else if (!t_isspace(state->buf))
 				{
 					/*
@@ -269,6 +335,16 @@ gettoken_query(TSQueryParserState state,
 					}
 					else if (state->state == WAITFIRSTOPERAND)
 						return PT_END;
+					else if (isweb)
+					{
+						if (state->count > 0)
+							/* decrement per each parentheses level (see PT_OPEN) */
+							state->count--;
+						else
+							/* finally, we have to provide an operand */
+							pushStop(state);
+						return PT_END;
+					}
 					else
 						ereport(ERROR,
 								(errcode(ERRCODE_SYNTAX_ERROR),
@@ -291,26 +367,61 @@ gettoken_query(TSQueryParserState state,
 					(state->buf)++;
 					return PT_OPR;
 				}
-				else if (t_iseq(state->buf, '<'))
+				else if (isweb && parse_or_operator(state->buf))
 				{
 					state->state = WAITOPERAND;
-					*operator = OP_PHRASE;
-					/* weight var is used as storage for distance */
-					state->buf = parse_phrase_operator(state->buf, weight);
-					if (*weight < 0)
+					*operator = OP_OR;
+					(state->buf) += 2; /* strlen("OR") */
+					return PT_OPR;
+				}
+				else if (t_iseq(state->buf, '<'))
+				{
+					int16	distance;
+					char   *phrase = parse_phrase_operator(state->buf, &distance);
+					if (distance < 0)
+					{
+						if (isweb)
+						{
+							/* web search tolerates broken phrase operator */
+							(state->buf)++;
+							continue;
+						}
 						return PT_ERR;
+					}
+					state->buf = phrase;
+					*operator = OP_PHRASE;
+					*weight = distance; /* weight var is used as storage for distance */
+					state->state = WAITOPERAND;
 					return PT_OPR;
 				}
 				else if (t_iseq(state->buf, ')'))
 				{
+					if (isweb && state->count == 0)
+					{
+						/* web search tolerates useless closing parentheses */
+						(state->buf)++;
+						continue;
+					}
 					(state->buf)++;
 					state->count--;
 					return (state->count < 0) ? PT_ERR : PT_CLOSE;
 				}
 				else if (*(state->buf) == '\0')
-					return (state->count) ? PT_ERR : PT_END;
+				{
+					/* web search tolerates unexpected end of line */
+					return (!isweb && state->count) ? PT_ERR : PT_END;
+				}
 				else if (!t_isspace(state->buf))
+				{
+					if (isweb)
+					{
+						/* put implicit AND if there's no operator */
+						*operator = OP_AND;
+						state->state = WAITOPERAND;
+						return PT_OPR;
+					}
 					return PT_ERR;
+				}
 				break;
 			case WAITSINGLEOPERAND:
 				if (*(state->buf) == '\0')
@@ -320,9 +431,10 @@ gettoken_query(TSQueryParserState state,
 				state->buf += strlen(state->buf);
 				state->count++;
 				return PT_VAL;
-			default:
-				return PT_ERR;
-				break;
+			case INQUOTES:
+				/* phrase should be followed by an operator */
+				state->state = WAITOPERATOR;
+				continue;
 		}
 		state->buf += pg_mblen(state->buf);
 	}
@@ -475,7 +587,8 @@ cleanOpStack(TSQueryParserState state,
 static void
 makepol(TSQueryParserState state,
 		PushFunction pushval,
-		Datum opaque)
+		Datum opaque,
+		bool isweb)
 {
 	int8		operator = 0;
 	ts_tokentype type;
@@ -489,19 +602,21 @@ makepol(TSQueryParserState state,
 	/* since this function recurses, it could be driven to stack overflow */
 	check_stack_depth();
 
-	while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight, &prefix)) != PT_END)
+	while ((type = gettoken_query(state, &operator, &lenval, &strval,
+								  &weight, &prefix, isweb)) != PT_END)
 	{
 		switch (type)
 		{
 			case PT_VAL:
-				pushval(opaque, state, strval, lenval, weight, prefix);
+				pushval(opaque, state, strval, lenval, weight, prefix,
+						state->state == INQUOTES /* force phrase operator */);
 				break;
 			case PT_OPR:
 				cleanOpStack(state, opstack, &lenstack, operator);
 				pushOpStack(opstack, &lenstack, operator, weight);
 				break;
 			case PT_OPEN:
-				makepol(state, pushval, opaque);
+				makepol(state, pushval, opaque, isweb);
 				break;
 			case PT_CLOSE:
 				cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
@@ -605,7 +720,8 @@ TSQuery
 parse_tsquery(char *buf,
 			  PushFunction pushval,
 			  Datum opaque,
-			  bool isplain)
+			  bool isplain,
+			  bool isweb)
 {
 	struct TSQueryParserStateData state;
 	int			i;
@@ -623,7 +739,7 @@ parse_tsquery(char *buf,
 	state.polstr = NIL;
 
 	/* init value parser's state */
-	state.valstate = init_tsvector_parser(state.buffer, true, true);
+	state.valstate = init_tsvector_parser(state.buffer, true, true, isweb);
 
 	/* init list of operand */
 	state.sumlen = 0;
@@ -632,7 +748,7 @@ parse_tsquery(char *buf,
 	*(state.curop) = '\0';
 
 	/* parse query & make polish notation (postfix, but in reverse order) */
-	makepol(&state, pushval, opaque);
+	makepol(&state, pushval, opaque, isweb);
 
 	close_tsvector_parser(state.valstate);
 
@@ -703,7 +819,7 @@ parse_tsquery(char *buf,
 
 static void
 pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval,
-			 int16 weight, bool prefix)
+			 int16 weight, bool prefix, bool isphrase)
 {
 	pushValue(state, strval, lenval, weight, prefix);
 }
@@ -716,7 +832,7 @@ tsqueryin(PG_FUNCTION_ARGS)
 {
 	char	   *in = PG_GETARG_CSTRING(0);
 
-	PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), false));
+	PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), false, false));
 }
 
 /*
diff --git a/src/backend/utils/adt/tsvector.c b/src/backend/utils/adt/tsvector.c
index 64e02ef434..395a326513 100644
--- a/src/backend/utils/adt/tsvector.c
+++ b/src/backend/utils/adt/tsvector.c
@@ -200,7 +200,7 @@ tsvectorin(PG_FUNCTION_ARGS)
 	char	   *cur;
 	int			buflen = 256;	/* allocated size of tmpbuf */
 
-	state = init_tsvector_parser(buf, false, false);
+	state = init_tsvector_parser(buf, false, false, false);
 
 	arrlen = 64;
 	arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen);
diff --git a/src/backend/utils/adt/tsvector_parser.c b/src/backend/utils/adt/tsvector_parser.c
index 7367ba6a40..6994d832d1 100644
--- a/src/backend/utils/adt/tsvector_parser.c
+++ b/src/backend/utils/adt/tsvector_parser.c
@@ -33,8 +33,22 @@ struct TSVectorParseStateData
 	int			eml;			/* max bytes per character */
 	bool		oprisdelim;		/* treat ! | * ( ) as delimiters? */
 	bool		is_tsquery;		/* say "tsquery" not "tsvector" in errors? */
+	bool		ignore_errors;	/* ignore errors and log them as warnings */
 };
 
+/* State codes used in gettoken_tsvector */
+typedef enum
+{
+	WAITWORD = 1,
+	WAITENDWORD = 2,
+	WAITNEXTCHAR = 3,
+	WAITENDCMPLX = 4,
+	WAITPOSINFO = 5,
+	INPOSINFO = 6,
+	WAITPOSDELIM = 7,
+	WAITCHARCMPLX = 8
+} VectorParseStates;
+
 
 /*
  * Initializes parser for the input string. If oprisdelim is set, the
@@ -42,7 +56,7 @@ struct TSVectorParseStateData
  * ! | & ( )
  */
 TSVectorParseState
-init_tsvector_parser(char *input, bool oprisdelim, bool is_tsquery)
+init_tsvector_parser(char *input, bool oprisdelim, bool is_tsquery, bool ignore_errors)
 {
 	TSVectorParseState state;
 
@@ -54,6 +68,7 @@ init_tsvector_parser(char *input, bool oprisdelim, bool is_tsquery)
 	state->eml = pg_database_encoding_max_length();
 	state->oprisdelim = oprisdelim;
 	state->is_tsquery = is_tsquery;
+	state->ignore_errors = ignore_errors;
 
 	return state;
 }
@@ -119,27 +134,27 @@ do { \
 	return true; \
 } while(0)
 
-
-/* State codes used in gettoken_tsvector */
-#define WAITWORD		1
-#define WAITENDWORD		2
-#define WAITNEXTCHAR	3
-#define WAITENDCMPLX	4
-#define WAITPOSINFO		5
-#define INPOSINFO		6
-#define WAITPOSDELIM	7
-#define WAITCHARCMPLX	8
-
 #define PRSSYNTAXERROR prssyntaxerror(state)
 
 static void
 prssyntaxerror(TSVectorParseState state)
 {
-	ereport(ERROR,
-			(errcode(ERRCODE_SYNTAX_ERROR),
-			 state->is_tsquery ?
-			 errmsg("syntax error in tsquery: \"%s\"", state->bufstart) :
-			 errmsg("syntax error in tsvector: \"%s\"", state->bufstart)));
+	if (state->ignore_errors)
+	{
+		ereport(WARNING,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				state->is_tsquery ?
+				errmsg("syntax error in tsquery: \"%s\"", state->bufstart) :
+				errmsg("syntax error in tsvector: \"%s\"", state->bufstart)));
+	}
+	else
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				state->is_tsquery ?
+				errmsg("syntax error in tsquery: \"%s\"", state->bufstart) :
+				errmsg("syntax error in tsvector: \"%s\"", state->bufstart)));
+	}
 }
 
 
@@ -165,9 +180,9 @@ gettoken_tsvector(TSVectorParseState state,
 				  WordEntryPos **pos_ptr, int *poslen,
 				  char **endptr)
 {
-	int			oldstate = 0;
+	VectorParseStates oldstate = 0;
 	char	   *curpos = state->word;
-	int			statecode = WAITWORD;
+	VectorParseStates statecode = WAITWORD;
 
 	/*
 	 * pos is for collecting the comma delimited list of positions followed by
@@ -202,10 +217,23 @@ gettoken_tsvector(TSVectorParseState state,
 		else if (statecode == WAITNEXTCHAR)
 		{
 			if (*(state->prsbuf) == '\0')
-				ereport(ERROR,
-						(errcode(ERRCODE_SYNTAX_ERROR),
-						 errmsg("there is no escaped character: \"%s\"",
-								state->bufstart)));
+			{
+				if (state->ignore_errors)
+				{
+					ereport(WARNING,
+							(errcode(ERRCODE_SYNTAX_ERROR),
+							errmsg("there is no escaped character: \"%s\"",
+									state->bufstart)));
+					return false;
+				}
+				else
+				{
+					ereport(ERROR,
+							(errcode(ERRCODE_SYNTAX_ERROR),
+							errmsg("there is no escaped character: \"%s\"",
+									state->bufstart)));
+				}
+			}
 			else
 			{
 				RESIZEPRSBUF;
@@ -260,7 +288,15 @@ gettoken_tsvector(TSVectorParseState state,
 				oldstate = WAITENDCMPLX;
 			}
 			else if (*(state->prsbuf) == '\0')
+			{
+				if (state->ignore_errors)
+				{
+					/* Parse as there is a closing quote character in the end */
+					statecode = WAITCHARCMPLX;
+					continue;
+				}
 				PRSSYNTAXERROR;
+			}
 			else
 			{
 				RESIZEPRSBUF;
@@ -319,10 +355,22 @@ gettoken_tsvector(TSVectorParseState state,
 				WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf)));
 				/* we cannot get here in tsquery, so no need for 2 errmsgs */
 				if (WEP_GETPOS(pos[npos - 1]) == 0)
-					ereport(ERROR,
-							(errcode(ERRCODE_SYNTAX_ERROR),
-							 errmsg("wrong position info in tsvector: \"%s\"",
-									state->bufstart)));
+				{
+					if (state->ignore_errors)
+					{
+						ereport(WARNING,
+								(errcode(ERRCODE_SYNTAX_ERROR),
+								errmsg("wrong position info in tsvector: \"%s\"",
+										state->bufstart)));
+					}
+					else
+					{
+						ereport(ERROR,
+								(errcode(ERRCODE_SYNTAX_ERROR),
+								errmsg("wrong position info in tsvector: \"%s\"",
+										state->bufstart)));
+					}
+				}
 				WEP_SETWEIGHT(pos[npos - 1], 0);
 				statecode = WAITPOSDELIM;
 			}
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 90d994c71a..560416636b 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -4966,6 +4966,8 @@ DATA(insert OID = 3747 (  plainto_tsquery	PGNSP PGUID 12 100 0 0 0 f f f t f i s
 DESCR("transform to tsquery");
 DATA(insert OID = 5006 (  phraseto_tsquery	PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery_byid _null_ _null_ _null_ ));
 DESCR("transform to tsquery");
+DATA(insert OID = 8889 (  websearch_to_tsquery	PGNSP PGUID 12 100 0 0 0 f f f  t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ websearch_to_tsquery_byid _null_ _null_ _null_ ));
+DESCR("transform to tsquery");
 DATA(insert OID = 3749 (  to_tsvector		PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "25" _null_ _null_ _null_ _null_ _null_ to_tsvector _null_ _null_ _null_ ));
 DESCR("transform to tsvector");
 DATA(insert OID = 3750 (  to_tsquery		PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ to_tsquery _null_ _null_ _null_ ));
@@ -4974,6 +4976,8 @@ DATA(insert OID = 3751 (  plainto_tsquery	PGNSP PGUID 12 100 0 0 0 f f f t f s s
 DESCR("transform to tsquery");
 DATA(insert OID = 5001 (  phraseto_tsquery	PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ ));
 DESCR("transform to tsquery");
+DATA(insert OID = 8890 (  websearch_to_tsquery	PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ websearch_to_tsquery _null_ _null_ _null_ ));
+DESCR("transform to tsquery");
 DATA(insert OID = 4209 (  to_tsvector		PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector _null_ _null_ _null_ ));
 DESCR("transform jsonb to tsvector");
 DATA(insert OID = 4210 (  to_tsvector		PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector _null_ _null_ _null_ ));
diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h
index f8ddce5ecb..2e805bfeb0 100644
--- a/src/include/tsearch/ts_utils.h
+++ b/src/include/tsearch/ts_utils.h
@@ -27,7 +27,8 @@ typedef struct TSVectorParseStateData *TSVectorParseState;
 
 extern TSVectorParseState init_tsvector_parser(char *input,
 					 bool oprisdelim,
-					 bool is_tsquery);
+					 bool is_tsquery,
+					 bool ignore_errors);
 extern void reset_tsvector_parser(TSVectorParseState state, char *input);
 extern bool gettoken_tsvector(TSVectorParseState state,
 				  char **token, int *len,
@@ -44,11 +45,12 @@ typedef void (*PushFunction) (Datum opaque, TSQueryParserState state,
 							  char *token, int tokenlen,
 							  int16 tokenweights,	/* bitmap as described in
 													 * QueryOperand struct */
-							  bool prefix);
+							  bool prefix,
+							  bool isphrase);
 
 extern TSQuery parse_tsquery(char *buf,
 			  PushFunction pushval,
-			  Datum opaque, bool isplain);
+			  Datum opaque, bool isplain, bool isweb);
 
 /* Functions for use by PushFunction implementations */
 extern void pushValue(TSQueryParserState state,
diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out
index d63fb12f1d..d5d1bdda7f 100644
--- a/src/test/regress/expected/tsearch.out
+++ b/src/test/regress/expected/tsearch.out
@@ -1672,3 +1672,455 @@ select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat ca
 (1 row)
 
 set enable_seqscan = on;
+-- test websearch_to_tsquery function
+select websearch_to_tsquery('()');
+NOTICE:  text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery 
+----------------------
+ 
+(1 row)
+
+select websearch_to_tsquery('(())');
+NOTICE:  text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery 
+----------------------
+ 
+(1 row)
+
+select websearch_to_tsquery('()()()');
+NOTICE:  text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery 
+----------------------
+ 
+(1 row)
+
+select websearch_to_tsquery('abc ()');
+ websearch_to_tsquery 
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('() abc');
+ websearch_to_tsquery 
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('abc & ()');
+ websearch_to_tsquery 
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('() & abc');
+ websearch_to_tsquery 
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('''');
+WARNING:  syntax error in tsquery: "'"
+NOTICE:  text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery 
+----------------------
+ 
+(1 row)
+
+select websearch_to_tsquery('\');
+WARNING:  there is no escaped character: "\"
+NOTICE:  text-search query doesn't contain lexemes: "\"
+ websearch_to_tsquery 
+----------------------
+ 
+(1 row)
+
+select websearch_to_tsquery('\\');
+NOTICE:  text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery 
+----------------------
+ 
+(1 row)
+
+select websearch_to_tsquery('ab '' abc');
+ websearch_to_tsquery 
+----------------------
+ 'ab' & 'abc'
+(1 row)
+
+select websearch_to_tsquery('ab '' abc''');
+ websearch_to_tsquery 
+----------------------
+ 'ab' & 'abc'
+(1 row)
+
+select websearch_to_tsquery('ab \ abc');
+ websearch_to_tsquery 
+----------------------
+ 'ab' & 'abc'
+(1 row)
+
+select websearch_to_tsquery('ab \\ abc');
+ websearch_to_tsquery 
+----------------------
+ 'ab' & 'abc'
+(1 row)
+
+select websearch_to_tsquery('(');
+NOTICE:  text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery 
+----------------------
+ 
+(1 row)
+
+select websearch_to_tsquery('((');
+NOTICE:  text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery 
+----------------------
+ 
+(1 row)
+
+select websearch_to_tsquery('(((  )) abc or def');
+ websearch_to_tsquery 
+----------------------
+ 'abc' | 'def'
+(1 row)
+
+select websearch_to_tsquery('))');
+NOTICE:  text-search query doesn't contain lexemes: "))"
+ websearch_to_tsquery 
+----------------------
+ 
+(1 row)
+
+select websearch_to_tsquery(')');
+NOTICE:  text-search query doesn't contain lexemes: ")"
+ websearch_to_tsquery 
+----------------------
+ 
+(1 row)
+
+select websearch_to_tsquery(')(');
+NOTICE:  text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery 
+----------------------
+ 
+(1 row)
+
+select websearch_to_tsquery('& )( |');
+NOTICE:  text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery 
+----------------------
+ 
+(1 row)
+
+select websearch_to_tsquery('abc )( def');
+ websearch_to_tsquery 
+----------------------
+ 'abc' & 'def'
+(1 row)
+
+select websearch_to_tsquery('abc | )( & def');
+ websearch_to_tsquery 
+----------------------
+ 'abc' | 'def'
+(1 row)
+
+select websearch_to_tsquery('& abc');
+ websearch_to_tsquery 
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('abc &');
+ websearch_to_tsquery 
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('| abc');
+ websearch_to_tsquery 
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('abc |');
+ websearch_to_tsquery 
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('< abc');
+ websearch_to_tsquery 
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('abc <');
+ websearch_to_tsquery 
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('a:::b');
+ websearch_to_tsquery 
+----------------------
+ 'b'
+(1 row)
+
+select websearch_to_tsquery('My brand new smartphone');
+     websearch_to_tsquery      
+-------------------------------
+ 'brand' & 'new' & 'smartphon'
+(1 row)
+
+select websearch_to_tsquery('My brand "new smartphone"');
+      websearch_to_tsquery       
+---------------------------------
+ 'brand' & 'new' <-> 'smartphon'
+(1 row)
+
+select websearch_to_tsquery('My brand "new -smartphone"');
+      websearch_to_tsquery       
+---------------------------------
+ 'brand' & 'new' <-> 'smartphon'
+(1 row)
+
+select websearch_to_tsquery('My brand:B "new -smartphone"');
+       websearch_to_tsquery        
+-----------------------------------
+ 'brand':B & 'new' <-> 'smartphon'
+(1 row)
+
+select websearch_to_tsquery('My brand:Z "new -smartphone"');
+         websearch_to_tsquery          
+---------------------------------------
+ 'brand' & 'z' & 'new' <-> 'smartphon'
+(1 row)
+
+select websearch_to_tsquery('My & (brand ("new -smartphone"');
+      websearch_to_tsquery       
+---------------------------------
+ 'brand' & 'new' <-> 'smartphon'
+(1 row)
+
+select websearch_to_tsquery('My & (brand ("new) -smartphone"');
+      websearch_to_tsquery       
+---------------------------------
+ 'brand' & 'new' <-> 'smartphon'
+(1 row)
+
+select websearch_to_tsquery('simple', 'cat or rat');
+ websearch_to_tsquery 
+----------------------
+ 'cat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'cat OR rat');
+ websearch_to_tsquery 
+----------------------
+ 'cat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'cat "OR" rat');
+ websearch_to_tsquery 
+----------------------
+ 'cat' & 'or' & 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'cat OR');
+ websearch_to_tsquery 
+----------------------
+ 'cat' & 'or'
+(1 row)
+
+select websearch_to_tsquery('simple', 'OR rat');
+ websearch_to_tsquery 
+----------------------
+ 'or' & 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', '"fat cat OR rat"');
+        websearch_to_tsquery        
+------------------------------------
+ 'fat' <-> 'cat' <-> 'or' <-> 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat (cat OR rat');
+   websearch_to_tsquery    
+---------------------------
+ 'fat' & ( 'cat' | 'rat' )
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat*rat');
+ websearch_to_tsquery 
+----------------------
+ 'fat' & 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat-rat');
+   websearch_to_tsquery    
+---------------------------
+ 'fat-rat' & 'fat' & 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat_rat');
+ websearch_to_tsquery 
+----------------------
+ 'fat' & 'rat'
+(1 row)
+
+-- OR is an operator here ...
+select websearch_to_tsquery('simple', 'fat or(rat');
+ websearch_to_tsquery 
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or)rat');
+ websearch_to_tsquery 
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or&rat');
+ websearch_to_tsquery 
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or|rat');
+ websearch_to_tsquery 
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or!rat');
+ websearch_to_tsquery 
+----------------------
+ 'fat' | !'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or<rat');
+ websearch_to_tsquery 
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or>rat');
+ websearch_to_tsquery 
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or ');
+ websearch_to_tsquery 
+----------------------
+ 'fat'
+(1 row)
+
+-- ... but not here
+select websearch_to_tsquery('simple', 'abc orange');
+ websearch_to_tsquery 
+----------------------
+ 'abc' & 'orange'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc orтест');
+ websearch_to_tsquery 
+----------------------
+ 'abc' & 'orтест'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc OR1234');
+ websearch_to_tsquery 
+----------------------
+ 'abc' & 'or1234'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc or-abc');
+      websearch_to_tsquery       
+---------------------------------
+ 'abc' & 'or-abc' & 'or' & 'abc'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc OR_abc');
+ websearch_to_tsquery 
+----------------------
+ 'abc' & 'or' & 'abc'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc or');
+ websearch_to_tsquery 
+----------------------
+ 'abc' & 'or'
+(1 row)
+
+select websearch_to_tsquery('simple', 'or OR or');
+ websearch_to_tsquery 
+----------------------
+ 'or' | 'or'
+(1 row)
+
+select websearch_to_tsquery('english', '"A fat cat" has just eaten a -rat.');
+        websearch_to_tsquery        
+------------------------------------
+ 'fat' <-> 'cat' & 'eaten' & !'rat'
+(1 row)
+
+select websearch_to_tsquery('english', '"A fat cat" has just eaten OR !rat.');
+        websearch_to_tsquery        
+------------------------------------
+ 'fat' <-> 'cat' & 'eaten' | !'rat'
+(1 row)
+
+select websearch_to_tsquery('english', '"A fat cat" has just (+eaten OR -rat)');
+          websearch_to_tsquery          
+----------------------------------------
+ 'fat' <-> 'cat' & ( 'eaten' | !'rat' )
+(1 row)
+
+select websearch_to_tsquery('english', 'this is ----fine');
+ websearch_to_tsquery 
+----------------------
+ !!!!'fine'
+(1 row)
+
+select websearch_to_tsquery('english', '(()) )))) this ||| is && -!-fine, "dear friend" OR good');
+           websearch_to_tsquery           
+------------------------------------------
+ !!!'fine' & 'dear' <-> 'friend' | 'good'
+(1 row)
+
+select websearch_to_tsquery('english', 'an old <-> cat " is fine &&& too');
+   websearch_to_tsquery   
+--------------------------
+ 'old' <-> 'cat' & 'fine'
+(1 row)
+
+select websearch_to_tsquery('english', '"A the" OR just on');
+NOTICE:  text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery 
+----------------------
+ 
+(1 row)
+
+select websearch_to_tsquery('russian', '"толстая кошка" съела крысу');
+         websearch_to_tsquery         
+--------------------------------------
+ 'толст' <-> 'кошк' & 'съел' & 'крыс'
+(1 row)
+
+select to_tsvector('russian', 'съела толстая кошка крысу') @@
+websearch_to_tsquery('russian', '"толстая кошка" съела крысу');
+ ?column? 
+----------
+ t
+(1 row)
+
+select to_tsvector('russian', 'съела толстая серая кошка крысу') @@
+websearch_to_tsquery('russian', '"толстая кошка" съела крысу');
+ ?column? 
+----------
+ f
+(1 row)
+
diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql
index 1c8520b3e9..c7976eb87c 100644
--- a/src/test/regress/sql/tsearch.sql
+++ b/src/test/regress/sql/tsearch.sql
@@ -539,3 +539,95 @@ create index phrase_index_test_idx on phrase_index_test using gin(fts);
 set enable_seqscan = off;
 select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat cat');
 set enable_seqscan = on;
+
+-- test websearch_to_tsquery function
+select websearch_to_tsquery('()');
+select websearch_to_tsquery('(())');
+select websearch_to_tsquery('()()()');
+select websearch_to_tsquery('abc ()');
+select websearch_to_tsquery('() abc');
+select websearch_to_tsquery('abc & ()');
+select websearch_to_tsquery('() & abc');
+select websearch_to_tsquery('''');
+select websearch_to_tsquery('\');
+select websearch_to_tsquery('\\');
+select websearch_to_tsquery('ab '' abc');
+select websearch_to_tsquery('ab '' abc''');
+select websearch_to_tsquery('ab \ abc');
+select websearch_to_tsquery('ab \\ abc');
+
+select websearch_to_tsquery('(');
+select websearch_to_tsquery('((');
+select websearch_to_tsquery('(((  )) abc or def');
+select websearch_to_tsquery('))');
+select websearch_to_tsquery(')');
+
+select websearch_to_tsquery(')(');
+select websearch_to_tsquery('& )( |');
+select websearch_to_tsquery('abc )( def');
+select websearch_to_tsquery('abc | )( & def');
+
+select websearch_to_tsquery('& abc');
+select websearch_to_tsquery('abc &');
+select websearch_to_tsquery('| abc');
+select websearch_to_tsquery('abc |');
+select websearch_to_tsquery('< abc');
+select websearch_to_tsquery('abc <');
+select websearch_to_tsquery('a:::b');
+
+select websearch_to_tsquery('My brand new smartphone');
+select websearch_to_tsquery('My brand "new smartphone"');
+select websearch_to_tsquery('My brand "new -smartphone"');
+select websearch_to_tsquery('My brand:B "new -smartphone"');
+select websearch_to_tsquery('My brand:Z "new -smartphone"');
+select websearch_to_tsquery('My & (brand ("new -smartphone"');
+select websearch_to_tsquery('My & (brand ("new) -smartphone"');
+
+select websearch_to_tsquery('simple', 'cat or rat');
+select websearch_to_tsquery('simple', 'cat OR rat');
+select websearch_to_tsquery('simple', 'cat "OR" rat');
+select websearch_to_tsquery('simple', 'cat OR');
+select websearch_to_tsquery('simple', 'OR rat');
+
+select websearch_to_tsquery('simple', '"fat cat OR rat"');
+select websearch_to_tsquery('simple', 'fat (cat OR rat');
+select websearch_to_tsquery('simple', 'fat*rat');
+select websearch_to_tsquery('simple', 'fat-rat');
+select websearch_to_tsquery('simple', 'fat_rat');
+
+-- OR is an operator here ...
+select websearch_to_tsquery('simple', 'fat or(rat');
+select websearch_to_tsquery('simple', 'fat or)rat');
+select websearch_to_tsquery('simple', 'fat or&rat');
+select websearch_to_tsquery('simple', 'fat or|rat');
+select websearch_to_tsquery('simple', 'fat or!rat');
+select websearch_to_tsquery('simple', 'fat or<rat');
+select websearch_to_tsquery('simple', 'fat or>rat');
+select websearch_to_tsquery('simple', 'fat or ');
+
+-- ... but not here
+select websearch_to_tsquery('simple', 'abc orange');
+select websearch_to_tsquery('simple', 'abc orтест');
+select websearch_to_tsquery('simple', 'abc OR1234');
+select websearch_to_tsquery('simple', 'abc or-abc');
+select websearch_to_tsquery('simple', 'abc OR_abc');
+select websearch_to_tsquery('simple', 'abc or');
+
+select websearch_to_tsquery('simple', 'or OR or');
+
+select websearch_to_tsquery('english', '"A fat cat" has just eaten a -rat.');
+select websearch_to_tsquery('english', '"A fat cat" has just eaten OR !rat.');
+select websearch_to_tsquery('english', '"A fat cat" has just (+eaten OR -rat)');
+
+select websearch_to_tsquery('english', 'this is ----fine');
+select websearch_to_tsquery('english', '(()) )))) this ||| is && -!-fine, "dear friend" OR good');
+select websearch_to_tsquery('english', 'an old <-> cat " is fine &&& too');
+
+select websearch_to_tsquery('english', '"A the" OR just on');
+select websearch_to_tsquery('russian', '"толстая кошка" съела крысу');
+
+select to_tsvector('russian', 'съела толстая кошка крысу') @@
+websearch_to_tsquery('russian', '"толстая кошка" съела крысу');
+
+select to_tsvector('russian', 'съела толстая серая кошка крысу') @@
+websearch_to_tsquery('russian', '"толстая кошка" съела крысу');

Reply via email to