select websearch_to_tsquery('simple', 'abc or!def');
websearch_to_tsquery
----------------------
'abc' | 'def'
(1 row)
This is wrong ofc, I've attached the fixed version.
--
Dmitry Ivanov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index ea5947a3a8..bdf05236cf 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -390,7 +390,8 @@ add_to_tsvector(void *_state, char *elem_value, int elem_len)
* and different variants are ORed together.
*/
static void
-pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix)
+pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval,
+ int16 weight, bool prefix, bool force_phrase)
{
int32 count = 0;
ParsedText prs;
@@ -423,7 +424,12 @@ pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval,
/* put placeholders for each missing stop word */
pushStop(state);
if (cntpos)
- pushOperator(state, data->qoperator, 1);
+ {
+ if (force_phrase)
+ pushOperator(state, OP_PHRASE, 1);
+ else
+ pushOperator(state, data->qoperator, 1);
+ }
cntpos++;
pos++;
}
@@ -464,7 +470,10 @@ pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval,
if (cntpos)
{
/* distance may be useful */
- pushOperator(state, data->qoperator, 1);
+ if (force_phrase)
+ pushOperator(state, OP_PHRASE, 1);
+ else
+ pushOperator(state, data->qoperator, 1);
}
cntpos++;
@@ -490,6 +499,7 @@ to_tsquery_byid(PG_FUNCTION_ARGS)
query = parse_tsquery(text_to_cstring(in),
pushval_morph,
PointerGetDatum(&data),
+ false,
false);
PG_RETURN_TSQUERY(query);
@@ -520,7 +530,8 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS)
query = parse_tsquery(text_to_cstring(in),
pushval_morph,
PointerGetDatum(&data),
- true);
+ true,
+ false);
PG_RETURN_POINTER(query);
}
@@ -551,7 +562,8 @@ phraseto_tsquery_byid(PG_FUNCTION_ARGS)
query = parse_tsquery(text_to_cstring(in),
pushval_morph,
PointerGetDatum(&data),
- true);
+ true,
+ false);
PG_RETURN_TSQUERY(query);
}
@@ -567,3 +579,36 @@ phraseto_tsquery(PG_FUNCTION_ARGS)
ObjectIdGetDatum(cfgId),
PointerGetDatum(in)));
}
+
+Datum
+websearch_to_tsquery_byid(PG_FUNCTION_ARGS)
+{
+ text *in = PG_GETARG_TEXT_PP(1);
+ MorphOpaque data;
+ TSQuery query = NULL;
+
+ data.cfg_id = PG_GETARG_OID(0);
+
+ data.qoperator = OP_AND;
+
+ query = parse_tsquery(text_to_cstring(in),
+ pushval_morph,
+ PointerGetDatum(&data),
+ false,
+ true);
+
+ PG_RETURN_TSQUERY(query);
+}
+
+Datum
+websearch_to_tsquery(PG_FUNCTION_ARGS)
+{
+ text *in = PG_GETARG_TEXT_PP(0);
+ Oid cfgId;
+
+ cfgId = getTSCurrentConfig(true);
+ PG_RETURN_DATUM(DirectFunctionCall2(websearch_to_tsquery_byid,
+ ObjectIdGetDatum(cfgId),
+ PointerGetDatum(in)));
+
+}
diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c
index 1ccbf79030..a5bec3f332 100644
--- a/src/backend/utils/adt/tsquery.c
+++ b/src/backend/utils/adt/tsquery.c
@@ -32,12 +32,24 @@ const int tsearch_op_priority[OP_COUNT] =
3 /* OP_PHRASE */
};
+/*
+ * parser's states
+ */
+typedef enum
+{
+ WAITOPERAND = 1,
+ WAITOPERATOR = 2,
+ WAITFIRSTOPERAND = 3,
+ WAITSINGLEOPERAND = 4,
+ INQUOTES = 5 /* for quoted phrases in web search */
+} ts_parserstate;
+
struct TSQueryParserStateData
{
/* State for gettoken_query */
char *buffer; /* entire string we are scanning */
char *buf; /* current scan point */
- int state;
+ ts_parserstate state;
int count; /* nesting count, incremented by (,
* decremented by ) */
@@ -57,12 +69,6 @@ struct TSQueryParserStateData
TSVectorParseState valstate;
};
-/* parser's states */
-#define WAITOPERAND 1
-#define WAITOPERATOR 2
-#define WAITFIRSTOPERAND 3
-#define WAITSINGLEOPERAND 4
-
/*
* subroutine to parse the modifiers (weight and prefix flag currently)
* part, like ':AB*' of a query.
@@ -197,6 +203,21 @@ err:
return buf;
}
+/*
+ * Parse OR operator used in websearch_to_tsquery().
+ */
+static bool
+parse_or_operator(char *buf)
+{
+ return (t_iseq(&buf[0], 'o') || t_iseq(&buf[0], 'O')) &&
+ (t_iseq(&buf[1], 'r') || t_iseq(&buf[1], 'R')) &&
+ (buf[2] != '\0' &&
+ !t_iseq(&buf[2], '-') &&
+ !t_iseq(&buf[2], '_') &&
+ !t_isalpha(&buf[2]) &&
+ !t_isdigit(&buf[2]));
+}
+
/*
* token types for parsing
*/
@@ -220,8 +241,8 @@ typedef enum
*/
static ts_tokentype
gettoken_query(TSQueryParserState state,
- int8 *operator,
- int *lenval, char **strval, int16 *weight, bool *prefix)
+ int8 *operator, int *lenval, char **strval,
+ int16 *weight, bool *prefix, bool isweb)
{
*weight = 0;
*prefix = false;
@@ -232,7 +253,8 @@ gettoken_query(TSQueryParserState state,
{
case WAITFIRSTOPERAND:
case WAITOPERAND:
- if (t_iseq(state->buf, '!'))
+ if (t_iseq(state->buf, '!') ||
+ (isweb && t_iseq(state->buf, '-')))
{
(state->buf)++; /* can safely ++, t_iseq guarantee that
* pg_mblen()==1 */
@@ -249,11 +271,55 @@ gettoken_query(TSQueryParserState state,
}
else if (t_iseq(state->buf, ':'))
{
+ if (isweb)
+ {
+ /* it doesn't mean anything */
+ (state->buf)++;
+ continue;
+ }
+
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsquery: \"%s\"",
state->buffer)));
}
+ else if (isweb && t_iseq(state->buf, ')'))
+ {
+ if (state->count == 0)
+ {
+ /* web search tolerates useless closing parantheses */
+ (state->buf)++;
+ continue;
+ }
+ (state->buf)++;
+ state->count--;
+ return PT_CLOSE;
+ }
+ else if (isweb &&
+ (t_iseq(state->buf, '&') ||
+ t_iseq(state->buf, '|') ||
+ t_iseq(state->buf, '<')))
+ {
+ /* or else gettoken_tsvector() will raise an error */
+ (state->buf)++;
+ continue;
+ }
+ else if (isweb && t_iseq(state->buf, '"'))
+ {
+ /* quoted text should be ordered (<->) */
+ char *quote = strchr(state->buf + 1, '"');
+ if (quote == NULL)
+ {
+ /* web search tolerates missing quotes too */
+ state->buf++;
+ continue;
+ }
+ *strval = state->buf + 1;
+ *lenval = quote - *strval;
+ state->buf = quote + 1;
+ state->state = INQUOTES;
+ return PT_VAL;
+ }
else if (!t_isspace(state->buf))
{
/*
@@ -269,6 +335,16 @@ gettoken_query(TSQueryParserState state,
}
else if (state->state == WAITFIRSTOPERAND)
return PT_END;
+ else if (isweb)
+ {
+ if (state->count > 0)
+ /* decrement per each parentheses level (see PT_OPEN) */
+ state->count--;
+ else
+ /* finally, we have to provide an operand */
+ pushStop(state);
+ return PT_END;
+ }
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
@@ -291,26 +367,61 @@ gettoken_query(TSQueryParserState state,
(state->buf)++;
return PT_OPR;
}
- else if (t_iseq(state->buf, '<'))
+ else if (isweb && parse_or_operator(state->buf))
{
state->state = WAITOPERAND;
- *operator = OP_PHRASE;
- /* weight var is used as storage for distance */
- state->buf = parse_phrase_operator(state->buf, weight);
- if (*weight < 0)
+ *operator = OP_OR;
+ (state->buf) += 2; /* strlen("OR") */
+ return PT_OPR;
+ }
+ else if (t_iseq(state->buf, '<'))
+ {
+ int16 distance;
+ char *phrase = parse_phrase_operator(state->buf, &distance);
+ if (distance < 0)
+ {
+ if (isweb)
+ {
+ /* web search tolerates broken phrase operator */
+ (state->buf)++;
+ continue;
+ }
return PT_ERR;
+ }
+ state->buf = phrase;
+ *operator = OP_PHRASE;
+ *weight = distance; /* weight var is used as storage for distance */
+ state->state = WAITOPERAND;
return PT_OPR;
}
else if (t_iseq(state->buf, ')'))
{
+ if (isweb && state->count == 0)
+ {
+ /* web search tolerates useless closing parentheses */
+ (state->buf)++;
+ continue;
+ }
(state->buf)++;
state->count--;
return (state->count < 0) ? PT_ERR : PT_CLOSE;
}
else if (*(state->buf) == '\0')
- return (state->count) ? PT_ERR : PT_END;
+ {
+ /* web search tolerates unexpected end of line */
+ return (!isweb && state->count) ? PT_ERR : PT_END;
+ }
else if (!t_isspace(state->buf))
+ {
+ if (isweb)
+ {
+ /* put implicit AND if there's no operator */
+ *operator = OP_AND;
+ state->state = WAITOPERAND;
+ return PT_OPR;
+ }
return PT_ERR;
+ }
break;
case WAITSINGLEOPERAND:
if (*(state->buf) == '\0')
@@ -320,9 +431,10 @@ gettoken_query(TSQueryParserState state,
state->buf += strlen(state->buf);
state->count++;
return PT_VAL;
- default:
- return PT_ERR;
- break;
+ case INQUOTES:
+ /* phrase should be followed by an operator */
+ state->state = WAITOPERATOR;
+ continue;
}
state->buf += pg_mblen(state->buf);
}
@@ -475,7 +587,8 @@ cleanOpStack(TSQueryParserState state,
static void
makepol(TSQueryParserState state,
PushFunction pushval,
- Datum opaque)
+ Datum opaque,
+ bool isweb)
{
int8 operator = 0;
ts_tokentype type;
@@ -489,19 +602,21 @@ makepol(TSQueryParserState state,
/* since this function recurses, it could be driven to stack overflow */
check_stack_depth();
- while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight, &prefix)) != PT_END)
+ while ((type = gettoken_query(state, &operator, &lenval, &strval,
+ &weight, &prefix, isweb)) != PT_END)
{
switch (type)
{
case PT_VAL:
- pushval(opaque, state, strval, lenval, weight, prefix);
+ pushval(opaque, state, strval, lenval, weight, prefix,
+ state->state == INQUOTES /* force phrase operator */);
break;
case PT_OPR:
cleanOpStack(state, opstack, &lenstack, operator);
pushOpStack(opstack, &lenstack, operator, weight);
break;
case PT_OPEN:
- makepol(state, pushval, opaque);
+ makepol(state, pushval, opaque, isweb);
break;
case PT_CLOSE:
cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
@@ -605,7 +720,8 @@ TSQuery
parse_tsquery(char *buf,
PushFunction pushval,
Datum opaque,
- bool isplain)
+ bool isplain,
+ bool isweb)
{
struct TSQueryParserStateData state;
int i;
@@ -632,7 +748,7 @@ parse_tsquery(char *buf,
*(state.curop) = '\0';
/* parse query & make polish notation (postfix, but in reverse order) */
- makepol(&state, pushval, opaque);
+ makepol(&state, pushval, opaque, isweb);
close_tsvector_parser(state.valstate);
@@ -703,7 +819,7 @@ parse_tsquery(char *buf,
static void
pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval,
- int16 weight, bool prefix)
+ int16 weight, bool prefix, bool isphrase)
{
pushValue(state, strval, lenval, weight, prefix);
}
@@ -716,7 +832,7 @@ tsqueryin(PG_FUNCTION_ARGS)
{
char *in = PG_GETARG_CSTRING(0);
- PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), false));
+ PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), false, false));
}
/*
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index bfc90098f8..00f1a85ae7 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -4950,6 +4950,8 @@ DATA(insert OID = 3747 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f i s
DESCR("transform to tsquery");
DATA(insert OID = 5006 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery_byid _null_ _null_ _null_ ));
DESCR("transform to tsquery");
+DATA(insert OID = 8889 ( websearch_to_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ websearch_to_tsquery_byid _null_ _null_ _null_ ));
+DESCR("transform to tsquery");
DATA(insert OID = 3749 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "25" _null_ _null_ _null_ _null_ _null_ to_tsvector _null_ _null_ _null_ ));
DESCR("transform to tsvector");
DATA(insert OID = 3750 ( to_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ to_tsquery _null_ _null_ _null_ ));
@@ -4958,6 +4960,8 @@ DATA(insert OID = 3751 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f s s
DESCR("transform to tsquery");
DATA(insert OID = 5001 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ ));
DESCR("transform to tsquery");
+DATA(insert OID = 8890 ( websearch_to_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ websearch_to_tsquery _null_ _null_ _null_ ));
+DESCR("transform to tsquery");
DATA(insert OID = 4209 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector _null_ _null_ _null_ ));
DESCR("transform jsonb to tsvector");
DATA(insert OID = 4210 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector _null_ _null_ _null_ ));
diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h
index f8ddce5ecb..098c9b9091 100644
--- a/src/include/tsearch/ts_utils.h
+++ b/src/include/tsearch/ts_utils.h
@@ -44,11 +44,12 @@ typedef void (*PushFunction) (Datum opaque, TSQueryParserState state,
char *token, int tokenlen,
int16 tokenweights, /* bitmap as described in
* QueryOperand struct */
- bool prefix);
+ bool prefix,
+ bool isphrase);
extern TSQuery parse_tsquery(char *buf,
PushFunction pushval,
- Datum opaque, bool isplain);
+ Datum opaque, bool isplain, bool isweb);
/* Functions for use by PushFunction implementations */
extern void pushValue(TSQueryParserState state,
diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out
index d63fb12f1d..06100768a7 100644
--- a/src/test/regress/expected/tsearch.out
+++ b/src/test/regress/expected/tsearch.out
@@ -1672,3 +1672,408 @@ select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat ca
(1 row)
set enable_seqscan = on;
+-- test websearch_to_tsquery function
+select websearch_to_tsquery('()');
+NOTICE: text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery
+----------------------
+
+(1 row)
+
+select websearch_to_tsquery('(())');
+NOTICE: text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery
+----------------------
+
+(1 row)
+
+select websearch_to_tsquery('()()()');
+NOTICE: text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery
+----------------------
+
+(1 row)
+
+select websearch_to_tsquery('abc ()');
+ websearch_to_tsquery
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('() abc');
+ websearch_to_tsquery
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('abc & ()');
+ websearch_to_tsquery
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('() & abc');
+ websearch_to_tsquery
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('(');
+NOTICE: text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery
+----------------------
+
+(1 row)
+
+select websearch_to_tsquery('((');
+NOTICE: text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery
+----------------------
+
+(1 row)
+
+select websearch_to_tsquery('((( )) abc or def');
+ websearch_to_tsquery
+----------------------
+ 'abc' | 'def'
+(1 row)
+
+select websearch_to_tsquery('))');
+NOTICE: text-search query doesn't contain lexemes: "))"
+ websearch_to_tsquery
+----------------------
+
+(1 row)
+
+select websearch_to_tsquery(')');
+NOTICE: text-search query doesn't contain lexemes: ")"
+ websearch_to_tsquery
+----------------------
+
+(1 row)
+
+select websearch_to_tsquery(')(');
+NOTICE: text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery
+----------------------
+
+(1 row)
+
+select websearch_to_tsquery('& )( |');
+NOTICE: text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery
+----------------------
+
+(1 row)
+
+select websearch_to_tsquery('abc )( def');
+ websearch_to_tsquery
+----------------------
+ 'abc' & 'def'
+(1 row)
+
+select websearch_to_tsquery('abc | )( & def');
+ websearch_to_tsquery
+----------------------
+ 'abc' | 'def'
+(1 row)
+
+select websearch_to_tsquery('& abc');
+ websearch_to_tsquery
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('abc &');
+ websearch_to_tsquery
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('| abc');
+ websearch_to_tsquery
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('abc |');
+ websearch_to_tsquery
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('< abc');
+ websearch_to_tsquery
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('abc <');
+ websearch_to_tsquery
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('a:::b');
+ websearch_to_tsquery
+----------------------
+ 'b'
+(1 row)
+
+select websearch_to_tsquery('My brand new smartphone');
+ websearch_to_tsquery
+-------------------------------
+ 'brand' & 'new' & 'smartphon'
+(1 row)
+
+select websearch_to_tsquery('My brand "new smartphone"');
+ websearch_to_tsquery
+---------------------------------
+ 'brand' & 'new' <-> 'smartphon'
+(1 row)
+
+select websearch_to_tsquery('My brand "new -smartphone"');
+ websearch_to_tsquery
+---------------------------------
+ 'brand' & 'new' <-> 'smartphon'
+(1 row)
+
+select websearch_to_tsquery('My brand:B "new -smartphone"');
+ websearch_to_tsquery
+-----------------------------------
+ 'brand':B & 'new' <-> 'smartphon'
+(1 row)
+
+select websearch_to_tsquery('My brand:Z "new -smartphone"');
+ websearch_to_tsquery
+---------------------------------------
+ 'brand' & 'z' & 'new' <-> 'smartphon'
+(1 row)
+
+select websearch_to_tsquery('My & (brand ("new -smartphone"');
+ websearch_to_tsquery
+---------------------------------
+ 'brand' & 'new' <-> 'smartphon'
+(1 row)
+
+select websearch_to_tsquery('My & (brand ("new) -smartphone"');
+ websearch_to_tsquery
+---------------------------------
+ 'brand' & 'new' <-> 'smartphon'
+(1 row)
+
+select websearch_to_tsquery('simple', 'cat or rat');
+ websearch_to_tsquery
+----------------------
+ 'cat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'cat OR rat');
+ websearch_to_tsquery
+----------------------
+ 'cat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'cat "OR" rat');
+ websearch_to_tsquery
+----------------------
+ 'cat' & 'or' & 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'cat OR');
+ websearch_to_tsquery
+----------------------
+ 'cat' & 'or'
+(1 row)
+
+select websearch_to_tsquery('simple', 'OR rat');
+ websearch_to_tsquery
+----------------------
+ 'or' & 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', '"fat cat OR rat"');
+ websearch_to_tsquery
+------------------------------------
+ 'fat' <-> 'cat' <-> 'or' <-> 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat (cat OR rat');
+ websearch_to_tsquery
+---------------------------
+ 'fat' & ( 'cat' | 'rat' )
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat*rat');
+ websearch_to_tsquery
+----------------------
+ 'fat' & 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat-rat');
+ websearch_to_tsquery
+---------------------------
+ 'fat-rat' & 'fat' & 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat_rat');
+ websearch_to_tsquery
+----------------------
+ 'fat' & 'rat'
+(1 row)
+
+-- OR is an operator here ...
+select websearch_to_tsquery('simple', 'fat or(rat');
+ websearch_to_tsquery
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or)rat');
+ websearch_to_tsquery
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or&rat');
+ websearch_to_tsquery
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or|rat');
+ websearch_to_tsquery
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or!rat');
+ websearch_to_tsquery
+----------------------
+ 'fat' | !'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or<rat');
+ websearch_to_tsquery
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or>rat');
+ websearch_to_tsquery
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or ');
+ websearch_to_tsquery
+----------------------
+ 'fat'
+(1 row)
+
+-- ... but not here
+select websearch_to_tsquery('simple', 'abc orange');
+ websearch_to_tsquery
+----------------------
+ 'abc' & 'orange'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc orÑеÑÑ');
+ websearch_to_tsquery
+----------------------
+ 'abc' & 'orÑеÑÑ'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc OR1234');
+ websearch_to_tsquery
+----------------------
+ 'abc' & 'or1234'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc or-abc');
+ websearch_to_tsquery
+---------------------------------
+ 'abc' & 'or-abc' & 'or' & 'abc'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc OR_abc');
+ websearch_to_tsquery
+----------------------
+ 'abc' & 'or' & 'abc'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc or');
+ websearch_to_tsquery
+----------------------
+ 'abc' & 'or'
+(1 row)
+
+select websearch_to_tsquery('simple', 'or OR or');
+ websearch_to_tsquery
+----------------------
+ 'or' | 'or'
+(1 row)
+
+select websearch_to_tsquery('english', '"A fat cat" has just eaten a -rat.');
+ websearch_to_tsquery
+------------------------------------
+ 'fat' <-> 'cat' & 'eaten' & !'rat'
+(1 row)
+
+select websearch_to_tsquery('english', '"A fat cat" has just eaten OR !rat.');
+ websearch_to_tsquery
+------------------------------------
+ 'fat' <-> 'cat' & 'eaten' | !'rat'
+(1 row)
+
+select websearch_to_tsquery('english', '"A fat cat" has just (+eaten OR -rat)');
+ websearch_to_tsquery
+----------------------------------------
+ 'fat' <-> 'cat' & ( 'eaten' | !'rat' )
+(1 row)
+
+select websearch_to_tsquery('english', 'this is ----fine');
+ websearch_to_tsquery
+----------------------
+ !!!!'fine'
+(1 row)
+
+select websearch_to_tsquery('english', '(()) )))) this ||| is && -!-fine, "dear friend" OR good');
+ websearch_to_tsquery
+------------------------------------------
+ !!!'fine' & 'dear' <-> 'friend' | 'good'
+(1 row)
+
+select websearch_to_tsquery('english', 'an old <-> cat " is fine &&& too');
+ websearch_to_tsquery
+--------------------------
+ 'old' <-> 'cat' & 'fine'
+(1 row)
+
+select websearch_to_tsquery('english', '"A the" OR just on');
+NOTICE: text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery
+----------------------
+
+(1 row)
+
+select websearch_to_tsquery('russian', '"ÑолÑÑÐ°Ñ ÐºÐ¾Ñка" ÑÑела кÑÑÑÑ');
+ websearch_to_tsquery
+--------------------------------------
+ 'ÑолÑÑ' <-> 'коÑк' & 'ÑÑел' & 'кÑÑÑ'
+(1 row)
+
+select to_tsvector('russian', 'ÑÑела ÑолÑÑÐ°Ñ ÐºÐ¾Ñка кÑÑÑÑ') @@
+websearch_to_tsquery('russian', '"ÑолÑÑÐ°Ñ ÐºÐ¾Ñка" ÑÑела кÑÑÑÑ');
+ ?column?
+----------
+ t
+(1 row)
+
+select to_tsvector('russian', 'ÑÑела ÑолÑÑÐ°Ñ ÑеÑÐ°Ñ ÐºÐ¾Ñка кÑÑÑÑ') @@
+websearch_to_tsquery('russian', '"ÑолÑÑÐ°Ñ ÐºÐ¾Ñка" ÑÑела кÑÑÑÑ');
+ ?column?
+----------
+ f
+(1 row)
+
diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql
index 1c8520b3e9..1bf9f80a9c 100644
--- a/src/test/regress/sql/tsearch.sql
+++ b/src/test/regress/sql/tsearch.sql
@@ -539,3 +539,88 @@ create index phrase_index_test_idx on phrase_index_test using gin(fts);
set enable_seqscan = off;
select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat cat');
set enable_seqscan = on;
+
+-- test websearch_to_tsquery function
+select websearch_to_tsquery('()');
+select websearch_to_tsquery('(())');
+select websearch_to_tsquery('()()()');
+select websearch_to_tsquery('abc ()');
+select websearch_to_tsquery('() abc');
+select websearch_to_tsquery('abc & ()');
+select websearch_to_tsquery('() & abc');
+
+select websearch_to_tsquery('(');
+select websearch_to_tsquery('((');
+select websearch_to_tsquery('((( )) abc or def');
+select websearch_to_tsquery('))');
+select websearch_to_tsquery(')');
+
+select websearch_to_tsquery(')(');
+select websearch_to_tsquery('& )( |');
+select websearch_to_tsquery('abc )( def');
+select websearch_to_tsquery('abc | )( & def');
+
+select websearch_to_tsquery('& abc');
+select websearch_to_tsquery('abc &');
+select websearch_to_tsquery('| abc');
+select websearch_to_tsquery('abc |');
+select websearch_to_tsquery('< abc');
+select websearch_to_tsquery('abc <');
+select websearch_to_tsquery('a:::b');
+
+select websearch_to_tsquery('My brand new smartphone');
+select websearch_to_tsquery('My brand "new smartphone"');
+select websearch_to_tsquery('My brand "new -smartphone"');
+select websearch_to_tsquery('My brand:B "new -smartphone"');
+select websearch_to_tsquery('My brand:Z "new -smartphone"');
+select websearch_to_tsquery('My & (brand ("new -smartphone"');
+select websearch_to_tsquery('My & (brand ("new) -smartphone"');
+
+select websearch_to_tsquery('simple', 'cat or rat');
+select websearch_to_tsquery('simple', 'cat OR rat');
+select websearch_to_tsquery('simple', 'cat "OR" rat');
+select websearch_to_tsquery('simple', 'cat OR');
+select websearch_to_tsquery('simple', 'OR rat');
+
+select websearch_to_tsquery('simple', '"fat cat OR rat"');
+select websearch_to_tsquery('simple', 'fat (cat OR rat');
+select websearch_to_tsquery('simple', 'fat*rat');
+select websearch_to_tsquery('simple', 'fat-rat');
+select websearch_to_tsquery('simple', 'fat_rat');
+
+-- OR is an operator here ...
+select websearch_to_tsquery('simple', 'fat or(rat');
+select websearch_to_tsquery('simple', 'fat or)rat');
+select websearch_to_tsquery('simple', 'fat or&rat');
+select websearch_to_tsquery('simple', 'fat or|rat');
+select websearch_to_tsquery('simple', 'fat or!rat');
+select websearch_to_tsquery('simple', 'fat or<rat');
+select websearch_to_tsquery('simple', 'fat or>rat');
+select websearch_to_tsquery('simple', 'fat or ');
+
+-- ... but not here
+select websearch_to_tsquery('simple', 'abc orange');
+select websearch_to_tsquery('simple', 'abc orÑеÑÑ');
+select websearch_to_tsquery('simple', 'abc OR1234');
+select websearch_to_tsquery('simple', 'abc or-abc');
+select websearch_to_tsquery('simple', 'abc OR_abc');
+select websearch_to_tsquery('simple', 'abc or');
+
+select websearch_to_tsquery('simple', 'or OR or');
+
+select websearch_to_tsquery('english', '"A fat cat" has just eaten a -rat.');
+select websearch_to_tsquery('english', '"A fat cat" has just eaten OR !rat.');
+select websearch_to_tsquery('english', '"A fat cat" has just (+eaten OR -rat)');
+
+select websearch_to_tsquery('english', 'this is ----fine');
+select websearch_to_tsquery('english', '(()) )))) this ||| is && -!-fine, "dear friend" OR good');
+select websearch_to_tsquery('english', 'an old <-> cat " is fine &&& too');
+
+select websearch_to_tsquery('english', '"A the" OR just on');
+select websearch_to_tsquery('russian', '"ÑолÑÑÐ°Ñ ÐºÐ¾Ñка" ÑÑела кÑÑÑÑ');
+
+select to_tsvector('russian', 'ÑÑела ÑолÑÑÐ°Ñ ÐºÐ¾Ñка кÑÑÑÑ') @@
+websearch_to_tsquery('russian', '"ÑолÑÑÐ°Ñ ÐºÐ¾Ñка" ÑÑела кÑÑÑÑ');
+
+select to_tsvector('russian', 'ÑÑела ÑолÑÑÐ°Ñ ÑеÑÐ°Ñ ÐºÐ¾Ñка кÑÑÑÑ') @@
+websearch_to_tsquery('russian', '"ÑолÑÑÐ°Ñ ÐºÐ¾Ñка" ÑÑела кÑÑÑÑ');