> It seems that everything is settled now, so here's the patch introducing the
> '<->' and '<N>' operators. I've made the necessary changes to docs &
> regression tests.
I noticed that I had accidently trimmed whitespaces in docs, this is a better
one.
--
Dmitry Ivanov
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company
diff --git a/contrib/tsearch2/expected/tsearch2.out b/contrib/tsearch2/expected/tsearch2.out
index 972f764..97379e7 100644
--- a/contrib/tsearch2/expected/tsearch2.out
+++ b/contrib/tsearch2/expected/tsearch2.out
@@ -278,15 +278,15 @@ SELECT '(!1|2)&3'::tsquery;
(1 row)
SELECT '1|(2|(4|(5|6)))'::tsquery;
- tsquery
------------------------------------------
- '1' | ( '2' | ( '4' | ( '5' | '6' ) ) )
+ tsquery
+-----------------------------
+ '1' | '2' | '4' | '5' | '6'
(1 row)
SELECT '1|2|4|5|6'::tsquery;
- tsquery
------------------------------------------
- ( ( ( '1' | '2' ) | '4' ) | '5' ) | '6'
+ tsquery
+-----------------------------
+ '1' | '2' | '4' | '5' | '6'
(1 row)
SELECT '1&(2&(4&(5&6)))'::tsquery;
@@ -340,7 +340,7 @@ select 'a' > 'b & c'::tsquery;
select 'a | f' < 'b & c'::tsquery;
?column?
----------
- t
+ f
(1 row)
select 'a | ff' < 'b & c'::tsquery;
@@ -443,9 +443,9 @@ select count(*) from test_tsquery where keyword > 'new & york';
set enable_seqscan=on;
select rewrite('foo & bar & qq & new & york', 'new & york'::tsquery, 'big & apple | nyc | new & york & city');
- rewrite
-----------------------------------------------------------------------------------
- 'foo' & 'bar' & 'qq' & ( 'city' & 'new' & 'york' | ( 'nyc' | 'big' & 'apple' ) )
+ rewrite
+------------------------------------------------------------------------------
+ 'foo' & 'bar' & 'qq' & ( 'nyc' | 'big' & 'apple' | 'city' & 'new' & 'york' )
(1 row)
select rewrite('moscow', 'select keyword, sample from test_tsquery'::text );
@@ -461,9 +461,9 @@ select rewrite('moscow & hotel', 'select keyword, sample from test_tsquery'::tex
(1 row)
select rewrite('bar & new & qq & foo & york', 'select keyword, sample from test_tsquery'::text );
- rewrite
--------------------------------------------------------------------------------------
- 'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
+ rewrite
+---------------------------------------------------------------------------------
+ ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
(1 row)
select rewrite( ARRAY['moscow', keyword, sample] ) from test_tsquery;
@@ -479,9 +479,9 @@ select rewrite( ARRAY['moscow & hotel', keyword, sample] ) from test_tsquery;
(1 row)
select rewrite( ARRAY['bar & new & qq & foo & york', keyword, sample] ) from test_tsquery;
- rewrite
--------------------------------------------------------------------------------------
- 'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
+ rewrite
+---------------------------------------------------------------------------------
+ ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
(1 row)
select keyword from test_tsquery where keyword @> 'new';
@@ -520,9 +520,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e
(1 row)
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where keyword <@ query;
- rewrite
--------------------------------------------------------------------------------------
- 'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
+ rewrite
+---------------------------------------------------------------------------------
+ ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
(1 row)
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'moscow') as query where query @> keyword;
@@ -538,9 +538,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e
(1 row)
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where query @> keyword;
- rewrite
--------------------------------------------------------------------------------------
- 'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
+ rewrite
+---------------------------------------------------------------------------------
+ ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
(1 row)
create index qq on test_tsquery using gist (keyword gist_tp_tsquery_ops);
@@ -581,9 +581,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e
(1 row)
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where keyword <@ query;
- rewrite
--------------------------------------------------------------------------------------
- 'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
+ rewrite
+---------------------------------------------------------------------------------
+ ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
(1 row)
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'moscow') as query where query @> keyword;
@@ -599,9 +599,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e
(1 row)
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where query @> keyword;
- rewrite
--------------------------------------------------------------------------------------
- 'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
+ rewrite
+---------------------------------------------------------------------------------
+ ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
(1 row)
set enable_seqscan='on';
diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index 7c3ef92..0b60c61 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -3924,8 +3924,9 @@ SELECT to_tsvector('english', 'The Fat Rats');
<para>
A <type>tsquery</type> value stores lexemes that are to be
searched for, and combines them honoring the Boolean operators
- <literal>&</literal> (AND), <literal>|</literal> (OR), and
- <literal>!</> (NOT). Parentheses can be used to enforce grouping
+ <literal>&</literal> (AND), <literal>|</literal> (OR),
+ <literal>!</> (NOT) and <literal><-></> (FOLLOWED BY) phrase search
+ operator. Parentheses can be used to enforce grouping
of the operators:
<programlisting>
@@ -3946,8 +3947,8 @@ SELECT 'fat & rat & ! cat'::tsquery;
</programlisting>
In the absence of parentheses, <literal>!</> (NOT) binds most tightly,
- and <literal>&</literal> (AND) binds more tightly than
- <literal>|</literal> (OR).
+ and <literal>&</literal> (AND) and <literal><-></literal> (FOLLOWED BY)
+ both bind more tightly than <literal>|</literal> (OR).
</para>
<para>
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 1bc9fbc..7a55ae5 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -9128,6 +9128,12 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
<entry><literal>!'cat'</literal></entry>
</row>
<row>
+ <entry> <literal><-></literal> </entry>
+ <entry><type>tsquery</> followed by <type>tsquery</></entry>
+ <entry><literal>to_tsquery('fat') <-> to_tsquery('rat')</literal></entry>
+ <entry><literal>'fat' <-> 'rat'</literal></entry>
+ </row>
+ <row>
<entry> <literal>@></literal> </entry>
<entry><type>tsquery</> contains another ?</entry>
<entry><literal>'cat'::tsquery @> 'cat & rat'::tsquery</literal></entry>
@@ -9222,6 +9228,18 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
<row>
<entry>
<indexterm>
+ <primary>phraseto_tsquery</primary>
+ </indexterm>
+ <literal><function>phraseto_tsquery(<optional> <replaceable class="PARAMETER">config</> <type>regconfig</> , </optional> <replaceable class="PARAMETER">query</> <type>text</type>)</function></literal>
+ </entry>
+ <entry><type>tsquery</type></entry>
+ <entry>produce <type>tsquery</> ignoring punctuation</entry>
+ <entry><literal>phraseto_tsquery('english', 'The Fat Rats')</literal></entry>
+ <entry><literal>'fat' <-> 'rat'</literal></entry>
+ </row>
+ <row>
+ <entry>
+ <indexterm>
<primary>querytree</primary>
</indexterm>
<literal><function>querytree(<replaceable class="PARAMETER">query</replaceable> <type>tsquery</>)</function></literal>
@@ -9424,6 +9442,27 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
<row>
<entry>
<indexterm>
+ <primary>tsquery_phrase</primary>
+ </indexterm>
+ <literal><function>tsquery_phrase(<replaceable class="PARAMETER">query1</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">query2</replaceable> <type>tsquery</>)</function></literal>
+ </entry>
+ <entry><type>tsquery</type></entry>
+ <entry>implementation of <literal><-></> (FOLLOWED BY) operator</entry>
+ <entry><literal>tsquery_phrase(to_tsquery('fat'), to_tsquery('cat'))</literal></entry>
+ <entry><literal>'fat' <-> 'cat'</literal></entry>
+ </row>
+ <row>
+ <entry>
+ <literal><function>tsquery_phrase(<replaceable class="PARAMETER">query1</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">query2</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">distance</replaceable> <type>integer</>)</function></literal>
+ </entry>
+ <entry><type>tsquery</type></entry>
+ <entry>phrase-concatenate with distance</entry>
+ <entry><literal>tsquery_phrase(to_tsquery('fat'), to_tsquery('cat'), 10)</literal></entry>
+ <entry><literal>'fat' <10> 'cat'</literal></entry>
+ </row>
+ <row>
+ <entry>
+ <indexterm>
<primary>tsvector_update_trigger</primary>
</indexterm>
<literal><function>tsvector_update_trigger()</function></literal>
diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml
index ea3abc9..930c8f0 100644
--- a/doc/src/sgml/textsearch.sgml
+++ b/doc/src/sgml/textsearch.sgml
@@ -263,9 +263,10 @@ SELECT 'fat & cow'::tsquery @@ 'a fat cat sat on a mat and ate a fat rat'::t
As the above example suggests, a <type>tsquery</type> is not just raw
text, any more than a <type>tsvector</type> is. A <type>tsquery</type>
contains search terms, which must be already-normalized lexemes, and
- may combine multiple terms using AND, OR, and NOT operators.
+ may combine multiple terms using AND, OR, NOT and FOLLOWED BY operators.
(For details see <xref linkend="datatype-textsearch">.) There are
- functions <function>to_tsquery</> and <function>plainto_tsquery</>
+ functions <function>to_tsquery</>, <function>plainto_tsquery</>
+ and <function>phraseto_tsquery</>
that are helpful in converting user-written text into a proper
<type>tsquery</type>, for example by normalizing words appearing in
the text. Similarly, <function>to_tsvector</> is used to parse and
@@ -294,6 +295,35 @@ SELECT 'fat cats ate fat rats'::tsvector @@ to_tsquery('fat & rat');
</para>
<para>
+ Phrase search is made possible with the help of the <literal><-></>
+ (FOLLOWED BY) operator, which enforces lexeme order. This allows you
+ to discard strings not containing the desired phrase, for example:
+
+<programlisting>
+SELECT q @@ to_tsquery('fatal <-> error')
+FROM unnest(array[to_tsvector('fatal error'),
+ to_tsvector('error is not fatal')]) AS q;
+ ?column?
+----------
+ t
+ f
+</programlisting>
+
+ A more generic version of the FOLLOWED BY operator takes form of
+ <literal><N></>, where N stands for the greatest allowed distance
+ between the specified lexemes. The <literal>phraseto_tsquery</>
+ function makes use of this behavior in order to construct a
+ <literal>tsquery</> capable of matching the provided phrase:
+
+<programlisting>
+SELECT phraseto_tsquery('cat ate some rats');
+ phraseto_tsquery
+-------------------------------
+ ( 'cat' <-> 'ate' ) <2> 'rat'
+</programlisting>
+ </para>
+
+ <para>
The <literal>@@</literal> operator also
supports <type>text</type> input, allowing explicit conversion of a text
string to <type>tsvector</type> or <type>tsquery</> to be skipped
@@ -709,11 +739,14 @@ UPDATE tt SET ti =
<para>
<productname>PostgreSQL</productname> provides the
- functions <function>to_tsquery</function> and
- <function>plainto_tsquery</function> for converting a query to
- the <type>tsquery</type> data type. <function>to_tsquery</function>
- offers access to more features than <function>plainto_tsquery</function>,
- but is less forgiving about its input.
+ functions <function>to_tsquery</function>,
+ <function>plainto_tsquery</function> and
+ <function>phraseto_tsquery</function>
+ for converting a query to the <type>tsquery</type> data type.
+ <function>to_tsquery</function> offers access to more features
+ than both <function>plainto_tsquery</function> and
+ <function>phraseto_tsquery</function>, but is less forgiving
+ about its input.
</para>
<indexterm>
@@ -728,7 +761,8 @@ to_tsquery(<optional> <replaceable class="PARAMETER">config</replaceable> <type>
<function>to_tsquery</function> creates a <type>tsquery</> value from
<replaceable>querytext</replaceable>, which must consist of single tokens
separated by the Boolean operators <literal>&</literal> (AND),
- <literal>|</literal> (OR) and <literal>!</literal> (NOT). These operators
+ <literal>|</literal> (OR), <literal>!</literal> (NOT), and also the
+ <literal><-></literal> (FOLLOWED BY) phrase search operator. These operators
can be grouped using parentheses. In other words, the input to
<function>to_tsquery</function> must already follow the general rules for
<type>tsquery</> input, as described in <xref
@@ -814,8 +848,8 @@ SELECT plainto_tsquery('english', 'The Fat Rats');
</screen>
Note that <function>plainto_tsquery</> cannot
- recognize Boolean operators, weight labels, or prefix-match labels
- in its input:
+ recognize Boolean and phrase search operators, weight labels,
+ or prefix-match labels in its input:
<screen>
SELECT plainto_tsquery('english', 'The Fat & Rats:C');
@@ -827,6 +861,57 @@ SELECT plainto_tsquery('english', 'The Fat & Rats:C');
Here, all the input punctuation was discarded as being space symbols.
</para>
+ <indexterm>
+ <primary>phraseto_tsquery</primary>
+ </indexterm>
+
+<synopsis>
+phraseto_tsquery(<optional> <replaceable class="PARAMETER">config</replaceable> <type>regconfig</>, </optional> <replaceable class="PARAMETER">querytext</replaceable> <type>text</>) returns <type>tsquery</>
+</synopsis>
+
+ <para>
+ <function>phraseto_tsquery</> behaves much like
+ <function>plainto_tsquery</>, with the exception
+ that it utilizes the <literal><-></literal> (FOLLOWED BY) phrase search
+ operator instead of the <literal>&</literal> (AND) Boolean operator.
+ This is particularly useful when searching for exact lexeme sequences,
+ since the phrase search operator helps to maintain lexeme order.
+ </para>
+
+ <para>
+ Example:
+
+<screen>
+SELECT phraseto_tsquery('english', 'The Fat Rats');
+ phraseto_tsquery
+------------------
+ 'fat' <-> 'rat'
+</screen>
+
+ Just like the <function>plainto_tsquery</>, the
+ <function>phraseto_tsquery</> function cannot
+ recognize Boolean and phrase search operators, weight labels,
+ or prefix-match labels in its input:
+
+<screen>
+SELECT phraseto_tsquery('english', 'The Fat & Rats:C');
+ phraseto_tsquery
+-----------------------------
+ ( 'fat' <-> 'rat' ) <-> 'c'
+</screen>
+
+ It is possible to specify the configuration to be used to parse the document,
+ for example, we could create a new one using the hunspell dictionary
+ (namely 'eng_hunspell') in order to match phrases with different word forms:
+
+<screen>
+SELECT phraseto_tsquery('eng_hunspell', 'developer of the building which collapsed');
+ phraseto_tsquery
+--------------------------------------------------------------------------------------------
+ ( 'developer' <3> 'building' ) <2> 'collapse' | ( 'developer' <3> 'build' ) <2> 'collapse'
+</screen>
+ </para>
+
</sect2>
<sect2 id="textsearch-ranking">
@@ -1390,6 +1475,81 @@ FROM (SELECT id, body, q, ts_rank_cd(ti, q) AS rank
<varlistentry>
<term>
+ <literal><type>tsquery</> <-> <type>tsquery</></literal>
+ </term>
+
+ <listitem>
+ <para>
+ Returns the phrase-concatenation of the two given queries.
+
+<screen>
+SELECT to_tsquery('fat') <-> to_tsquery('cat | rat');
+ ?column?
+-----------------------------------
+ 'fat' <-> 'cat' | 'fat' <-> 'rat'
+</screen>
+ </para>
+ </listitem>
+
+ </varlistentry>
+
+ <varlistentry>
+
+ <term>
+ <indexterm>
+ <primary>tsquery_phrase</primary>
+ </indexterm>
+
+ <literal>tsquery_phrase(<replaceable class="PARAMETER">query1</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">query2</replaceable> <type>tsquery</> [, <replaceable class="PARAMETER">distance</replaceable> <type>integer</> ]) returns <type>tsquery</></literal>
+ </term>
+
+ <listitem>
+ <para>
+ Returns the distanced phrase-concatenation of the two given queries.
+ This function lies in the implementation of the <literal><-></> operator.
+
+<screen>
+SELECT tsquery_phrase(to_tsquery('fat'), to_tsquery('cat'), 10);
+ tsquery_phrase
+------------------
+ 'fat' <10> 'cat'
+</screen>
+ </para>
+ </listitem>
+
+ </varlistentry>
+
+ <varlistentry>
+
+ <term>
+ <indexterm>
+ <primary>setweight</primary>
+ </indexterm>
+
+ <literal>setweight(<replaceable class="PARAMETER">query</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">weight</replaceable> <type>"char"</>) returns <type>tsquery</></literal>
+ </term>
+
+ <listitem>
+ <para>
+ <function>setweight</> returns a copy of the input query in which every
+ position has been labeled with the given <replaceable>weight</>(s), either
+ <literal>A</literal>, <literal>B</literal>, <literal>C</literal>,
+ <literal>D</literal> or their combination. These labels are retained when
+ queries are concatenated, allowing words from different parts of a document
+ to be weighted differently by ranking functions.
+ </para>
+
+ <para>
+ Note that weight labels apply to <emphasis>positions</>, not
+ <emphasis>lexemes</>. If the input query has been stripped of
+ positions then <function>setweight</> does nothing.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+
+ <term>
<indexterm>
<primary>numnode</primary>
</indexterm>
@@ -2428,7 +2588,7 @@ more sample word(s) : more indexed word(s)
<para>
Specific stop words recognized by the subdictionary cannot be
- specified; instead use <literal>?</> to mark the location where any
+ specified; instead use <literal><-></> to mark the location where any
stop word can appear. For example, assuming that <literal>a</> and
<literal>the</> are stop words according to the subdictionary:
diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index aa77ec0..3f69d74 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -18,6 +18,13 @@
#include "utils/builtins.h"
+typedef struct MorphOpaque
+{
+ Oid cfg_id;
+ int qoperator; /* query operator */
+} MorphOpaque;
+
+
Datum
get_current_ts_config(PG_FUNCTION_ARGS)
{
@@ -262,60 +269,81 @@ to_tsvector(PG_FUNCTION_ARGS)
* to the stack.
*
* All words belonging to the same variant are pushed as an ANDed list,
- * and different variants are ORred together.
+ * and different variants are ORed together.
*/
static void
pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix)
{
- int32 count = 0;
- ParsedText prs;
- uint32 variant,
- pos,
- cntvar = 0,
- cntpos = 0,
- cnt = 0;
- Oid cfg_id = DatumGetObjectId(opaque); /* the input is actually
- * an Oid, not a pointer */
+ int32 count = 0;
+ ParsedText prs;
+ uint32 variant,
+ pos = 0,
+ cntvar = 0,
+ cntpos = 0,
+ cnt = 0;
+ MorphOpaque *data = (MorphOpaque *) DatumGetPointer(opaque);
prs.lenwords = 4;
prs.curwords = 0;
prs.pos = 0;
prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
- parsetext(cfg_id, &prs, strval, lenval);
+ parsetext(data->cfg_id, &prs, strval, lenval);
if (prs.curwords > 0)
{
-
while (count < prs.curwords)
{
- pos = prs.words[count].pos.pos;
+ /*
+ * Were any stop words removed? If so, fill empty positions
+ * with placeholders linked by an appropriate operator.
+ */
+ if (pos > 0 && pos + 1 < prs.words[count].pos.pos)
+ {
+ while (pos + 1 < prs.words[count].pos.pos)
+ {
+ /* put placeholders for each missing stop word */
+ pushStop(state);
+ if (cntpos)
+ pushOperator(state, data->qoperator, 1);
+ cntpos++;
+ pos++;
+ }
+ }
+
+ pos = prs.words[count].pos.pos; /* save current word's position */
+
+ /* Go through all variants obtained from this token */
cntvar = 0;
while (count < prs.curwords && pos == prs.words[count].pos.pos)
{
variant = prs.words[count].nvariant;
+ /* Push all words belonging to the same variant */
cnt = 0;
- while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant)
+ while (count < prs.curwords &&
+ pos == prs.words[count].pos.pos &&
+ variant == prs.words[count].nvariant)
{
-
- pushValue(state, prs.words[count].word, prs.words[count].len, weight,
- ((prs.words[count].flags & TSL_PREFIX) || prefix) ? true : false);
+ pushValue(state,
+ prs.words[count].word,
+ prs.words[count].len,
+ weight,
+ ((prs.words[count].flags & TSL_PREFIX) || prefix));
pfree(prs.words[count].word);
if (cnt)
- pushOperator(state, OP_AND);
+ pushOperator(state, OP_AND, 0);
cnt++;
count++;
}
if (cntvar)
- pushOperator(state, OP_OR);
+ pushOperator(state, OP_OR, 0);
cntvar++;
}
if (cntpos)
- pushOperator(state, OP_AND);
-
+ pushOperator(state, data->qoperator, 1); /* distance may be useful */
cntpos++;
}
@@ -329,44 +357,18 @@ pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval,
Datum
to_tsquery_byid(PG_FUNCTION_ARGS)
{
- Oid cfgid = PG_GETARG_OID(0);
- text *in = PG_GETARG_TEXT_P(1);
- TSQuery query;
- QueryItem *res;
- int32 len;
-
- query = parse_tsquery(text_to_cstring(in), pushval_morph, ObjectIdGetDatum(cfgid), false);
-
- if (query->size == 0)
- PG_RETURN_TSQUERY(query);
-
- /* clean out any stopword placeholders from the tree */
- res = clean_fakeval(GETQUERY(query), &len);
- if (!res)
- {
- SET_VARSIZE(query, HDRSIZETQ);
- query->size = 0;
- PG_RETURN_POINTER(query);
- }
- memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem));
+ text *in = PG_GETARG_TEXT_P(1);
+ TSQuery query;
+ MorphOpaque data;
- /*
- * Removing the stopword placeholders might've resulted in fewer
- * QueryItems. If so, move the operands up accordingly.
- */
- if (len != query->size)
- {
- char *oldoperand = GETOPERAND(query);
- int32 lenoperand = VARSIZE(query) - (oldoperand - (char *) query);
+ data.cfg_id = PG_GETARG_OID(0);
+ data.qoperator = OP_AND;
- Assert(len < query->size);
-
- query->size = len;
- memmove((void *) GETOPERAND(query), oldoperand, VARSIZE(query) - (oldoperand - (char *) query));
- SET_VARSIZE(query, COMPUTESIZE(len, lenoperand));
- }
+ query = parse_tsquery(text_to_cstring(in),
+ pushval_morph,
+ PointerGetDatum(&data),
+ false);
- pfree(res);
PG_RETURN_TSQUERY(query);
}
@@ -385,55 +387,60 @@ to_tsquery(PG_FUNCTION_ARGS)
Datum
plainto_tsquery_byid(PG_FUNCTION_ARGS)
{
- Oid cfgid = PG_GETARG_OID(0);
- text *in = PG_GETARG_TEXT_P(1);
- TSQuery query;
- QueryItem *res;
- int32 len;
+ text *in = PG_GETARG_TEXT_P(1);
+ TSQuery query;
+ MorphOpaque data;
- query = parse_tsquery(text_to_cstring(in), pushval_morph, ObjectIdGetDatum(cfgid), true);
+ data.cfg_id = PG_GETARG_OID(0);
+ data.qoperator = OP_AND;
- if (query->size == 0)
- PG_RETURN_TSQUERY(query);
+ query = parse_tsquery(text_to_cstring(in),
+ pushval_morph,
+ PointerGetDatum(&data),
+ true);
- /* clean out any stopword placeholders from the tree */
- res = clean_fakeval(GETQUERY(query), &len);
- if (!res)
- {
- SET_VARSIZE(query, HDRSIZETQ);
- query->size = 0;
- PG_RETURN_POINTER(query);
- }
- memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem));
+ PG_RETURN_POINTER(query);
+}
- /*
- * Removing the stopword placeholders might've resulted in fewer
- * QueryItems. If so, move the operands up accordingly.
- */
- if (len != query->size)
- {
- char *oldoperand = GETOPERAND(query);
- int32 lenoperand = VARSIZE(query) - (oldoperand - (char *) query);
+Datum
+plainto_tsquery(PG_FUNCTION_ARGS)
+{
+ text *in = PG_GETARG_TEXT_P(0);
+ Oid cfgId;
+
+ cfgId = getTSCurrentConfig(true);
+ PG_RETURN_DATUM(DirectFunctionCall2(plainto_tsquery_byid,
+ ObjectIdGetDatum(cfgId),
+ PointerGetDatum(in)));
+}
- Assert(len < query->size);
- query->size = len;
- memmove((void *) GETOPERAND(query), oldoperand, lenoperand);
- SET_VARSIZE(query, COMPUTESIZE(len, lenoperand));
- }
+Datum
+phraseto_tsquery_byid(PG_FUNCTION_ARGS)
+{
+ text *in = PG_GETARG_TEXT_P(1);
+ TSQuery query;
+ MorphOpaque data;
- pfree(res);
- PG_RETURN_POINTER(query);
+ data.cfg_id = PG_GETARG_OID(0);
+ data.qoperator = OP_PHRASE;
+
+ query = parse_tsquery(text_to_cstring(in),
+ pushval_morph,
+ PointerGetDatum(&data),
+ true);
+
+ PG_RETURN_TSQUERY(query);
}
Datum
-plainto_tsquery(PG_FUNCTION_ARGS)
+phraseto_tsquery(PG_FUNCTION_ARGS)
{
text *in = PG_GETARG_TEXT_P(0);
Oid cfgId;
cfgId = getTSCurrentConfig(true);
- PG_RETURN_DATUM(DirectFunctionCall2(plainto_tsquery_byid,
+ PG_RETURN_DATUM(DirectFunctionCall2(phraseto_tsquery_byid,
ObjectIdGetDatum(cfgId),
PointerGetDatum(in)));
}
diff --git a/src/backend/tsearch/ts_parse.c b/src/backend/tsearch/ts_parse.c
index 64cf906..f0e4269 100644
--- a/src/backend/tsearch/ts_parse.c
+++ b/src/backend/tsearch/ts_parse.c
@@ -454,7 +454,7 @@ hladdword(HeadlineParsedText *prs, char *buf, int buflen, int type)
}
static void
-hlfinditem(HeadlineParsedText *prs, TSQuery query, char *buf, int buflen)
+hlfinditem(HeadlineParsedText *prs, TSQuery query, int32 pos, char *buf, int buflen)
{
int i;
QueryItem *item = GETQUERY(query);
@@ -467,6 +467,7 @@ hlfinditem(HeadlineParsedText *prs, TSQuery query, char *buf, int buflen)
}
word = &(prs->words[prs->curwords - 1]);
+ word->pos = LIMITPOS(pos);
for (i = 0; i < query->size; i++)
{
if (item->type == QI_VAL &&
@@ -492,17 +493,20 @@ addHLParsedLex(HeadlineParsedText *prs, TSQuery query, ParsedLex *lexs, TSLexeme
{
ParsedLex *tmplexs;
TSLexeme *ptr;
+ int32 savedpos;
while (lexs)
{
-
if (lexs->type > 0)
hladdword(prs, lexs->lemm, lexs->lenlemm, lexs->type);
ptr = norms;
+ savedpos = prs->vectorpos;
while (ptr && ptr->lexeme)
{
- hlfinditem(prs, query, ptr->lexeme, strlen(ptr->lexeme));
+ if (ptr->flags & TSL_ADDPOS)
+ savedpos++;
+ hlfinditem(prs, query, savedpos, ptr->lexeme, strlen(ptr->lexeme));
ptr++;
}
@@ -516,6 +520,8 @@ addHLParsedLex(HeadlineParsedText *prs, TSQuery query, ParsedLex *lexs, TSLexeme
ptr = norms;
while (ptr->lexeme)
{
+ if (ptr->flags & TSL_ADDPOS)
+ prs->vectorpos++;
pfree(ptr->lexeme);
ptr++;
}
@@ -575,7 +581,10 @@ hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query, char *buf, int bu
do
{
if ((norms = LexizeExec(&ldata, &lexs)) != NULL)
+ {
+ prs->vectorpos++;
addHLParsedLex(prs, query, lexs, norms);
+ }
else
addHLParsedLex(prs, query, lexs, NULL);
} while (norms);
diff --git a/src/backend/tsearch/ts_selfuncs.c b/src/backend/tsearch/ts_selfuncs.c
index 7462888..c4118f1 100644
--- a/src/backend/tsearch/ts_selfuncs.c
+++ b/src/backend/tsearch/ts_selfuncs.c
@@ -261,7 +261,7 @@ mcelem_tsquery_selec(TSQuery query, Datum *mcelem, int nmcelem,
/*
* Traverse the tsquery in preorder, calculating selectivity as:
*
- * selec(left_oper) * selec(right_oper) in AND nodes,
+ * selec(left_oper) * selec(right_oper) in AND & PHRASE nodes,
*
* selec(left_oper) + selec(right_oper) -
* selec(left_oper) * selec(right_oper) in OR nodes,
@@ -400,6 +400,7 @@ tsquery_opr_selec(QueryItem *item, char *operand,
lookup, length, minfreq);
break;
+ case OP_PHRASE:
case OP_AND:
s1 = tsquery_opr_selec(item + 1, operand,
lookup, length, minfreq);
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index 4a28ce7..2faa15e 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -2030,15 +2030,36 @@ typedef struct
} hlCheck;
static bool
-checkcondition_HL(void *checkval, QueryOperand *val)
+checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data)
{
int i;
+ hlCheck *checkval = (hlCheck *) opaque;
- for (i = 0; i < ((hlCheck *) checkval)->len; i++)
+ for (i = 0; i < checkval->len; i++)
{
- if (((hlCheck *) checkval)->words[i].item == val)
- return true;
+ if (checkval->words[i].item == val)
+ {
+ /* don't need to find all positions */
+ if (!data)
+ return true;
+
+ if (!data->pos)
+ {
+ data->pos = palloc(sizeof(WordEntryPos) * checkval->len);
+ data->allocated = true;
+ data->npos = 1;
+ data->pos[0] = checkval->words[i].pos;
+ }
+ else if (data->pos[data->npos - 1] < checkval->words[i].pos)
+ {
+ data->pos[data->npos++] = checkval->words[i].pos;
+ }
+ }
}
+
+ if (data && data->npos > 0)
+ return true;
+
return false;
}
@@ -2400,7 +2421,7 @@ mark_hl_words(HeadlineParsedText *prs, TSQuery query, int highlight,
if (poslen < bestlen && !(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword))
{
- /* best already finded, so try one more cover */
+ /* best already found, so try one more cover */
p++;
continue;
}
diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c
index fef5947..fc0686e 100644
--- a/src/backend/utils/adt/tsginidx.c
+++ b/src/backend/utils/adt/tsginidx.c
@@ -179,14 +179,16 @@ typedef struct
} GinChkVal;
static GinTernaryValue
-checkcondition_gin(void *checkval, QueryOperand *val)
+checkcondition_gin_internal(GinChkVal *gcv, QueryOperand *val, ExecPhraseData *data)
{
- GinChkVal *gcv = (GinChkVal *) checkval;
int j;
- /* if any val requiring a weight is used, set recheck flag */
- if (val->weight != 0)
- *(gcv->need_recheck) = true;
+ /*
+ * if any val requiring a weight is used or caller
+ * needs position information then set recheck flag
+ */
+ if (val->weight != 0 || data != NULL)
+ *gcv->need_recheck = true;
/* convert item's number to corresponding entry's (operand's) number */
j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item];
@@ -196,15 +198,21 @@ checkcondition_gin(void *checkval, QueryOperand *val)
}
/*
+ * Wrapper of check condition function for TS_execute.
+ */
+static bool
+checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
+{
+ return checkcondition_gin_internal((GinChkVal *) checkval,
+ val,
+ data) != GIN_FALSE;
+}
+
+/*
* Evaluate tsquery boolean expression using ternary logic.
- *
- * chkcond is a callback function used to evaluate each VAL node in the query.
- * checkval can be used to pass information to the callback. TS_execute doesn't
- * do anything with it.
*/
static GinTernaryValue
-TS_execute_ternary(QueryItem *curitem, void *checkval,
- GinTernaryValue (*chkcond) (void *checkval, QueryOperand *val))
+TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem)
{
GinTernaryValue val1,
val2,
@@ -214,22 +222,30 @@ TS_execute_ternary(QueryItem *curitem, void *checkval,
check_stack_depth();
if (curitem->type == QI_VAL)
- return chkcond(checkval, (QueryOperand *) curitem);
+ return checkcondition_gin_internal(gcv,
+ (QueryOperand *) curitem,
+ NULL /* don't have any position info */);
switch (curitem->qoperator.oper)
{
case OP_NOT:
- result = TS_execute_ternary(curitem + 1, checkval, chkcond);
+ result = TS_execute_ternary(gcv, curitem + 1);
if (result == GIN_MAYBE)
return result;
return !result;
+ case OP_PHRASE:
+ /*
+ * GIN doesn't contain any information about positions,
+ * treat OP_PHRASE as OP_AND with recheck requirement
+ */
+ *gcv->need_recheck = true;
+
case OP_AND:
- val1 = TS_execute_ternary(curitem + curitem->qoperator.left,
- checkval, chkcond);
+ val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left);
if (val1 == GIN_FALSE)
return GIN_FALSE;
- val2 = TS_execute_ternary(curitem + 1, checkval, chkcond);
+ val2 = TS_execute_ternary(gcv, curitem + 1);
if (val2 == GIN_FALSE)
return GIN_FALSE;
if (val1 == GIN_TRUE && val2 == GIN_TRUE)
@@ -238,11 +254,10 @@ TS_execute_ternary(QueryItem *curitem, void *checkval,
return GIN_MAYBE;
case OP_OR:
- val1 = TS_execute_ternary(curitem + curitem->qoperator.left,
- checkval, chkcond);
+ val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left);
if (val1 == GIN_TRUE)
return GIN_TRUE;
- val2 = TS_execute_ternary(curitem + 1, checkval, chkcond);
+ val2 = TS_execute_ternary(gcv, curitem + 1);
if (val2 == GIN_TRUE)
return GIN_TRUE;
if (val1 == GIN_FALSE && val2 == GIN_FALSE)
@@ -327,9 +342,7 @@ gin_tsquery_triconsistent(PG_FUNCTION_ARGS)
gcv.map_item_operand = (int *) (extra_data[0]);
gcv.need_recheck = &recheck;
- res = TS_execute_ternary(GETQUERY(query),
- &gcv,
- checkcondition_gin);
+ res = TS_execute_ternary(&gcv, GETQUERY(query));
if (res == GIN_TRUE && recheck)
res = GIN_MAYBE;
diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c
index 0100cf4..cdd5d43 100644
--- a/src/backend/utils/adt/tsgistidx.c
+++ b/src/backend/utils/adt/tsgistidx.c
@@ -298,7 +298,7 @@ typedef struct
* is there value 'val' in array or not ?
*/
static bool
-checkcondition_arr(void *checkval, QueryOperand *val)
+checkcondition_arr(void *checkval, QueryOperand *val, ExecPhraseData *data)
{
int32 *StopLow = ((CHKVAL *) checkval)->arrb;
int32 *StopHigh = ((CHKVAL *) checkval)->arre;
@@ -327,7 +327,7 @@ checkcondition_arr(void *checkval, QueryOperand *val)
}
static bool
-checkcondition_bit(void *checkval, QueryOperand *val)
+checkcondition_bit(void *checkval, QueryOperand *val, ExecPhraseData *data)
{
/*
* we are not able to find a prefix in signature tree
diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c
index 0732060..46f1c00 100644
--- a/src/backend/utils/adt/tsquery.c
+++ b/src/backend/utils/adt/tsquery.c
@@ -56,7 +56,7 @@ struct TSQueryParserStateData
/*
* subroutine to parse the modifiers (weight and prefix flag currently)
- * part, like ':1AB' of a query.
+ * part, like ':AB*' of a query.
*/
static char *
get_modifiers(char *buf, int16 *weight, bool *prefix)
@@ -101,6 +101,56 @@ get_modifiers(char *buf, int16 *weight, bool *prefix)
}
/*
+ * Get phrase operator distance. The operator
+ * may take the following forms:
+ *
+ * a <X> b (distance is no greater than X)
+ * a <-> b (default distance = 1)
+ */
+static char *
+get_phrase_distance(char *buf, int16 *distance)
+{
+ char *ptr = buf;
+ char *endptr;
+ long l;
+
+ *distance = -1;
+
+ /* Return instantly if there's no distance */
+ if (strncmp(buf, "->", 2) == 0)
+ {
+ *distance = 1;
+ return ptr + 2;
+ }
+
+ while (*ptr && t_isspace(ptr))
+ ptr += pg_mblen(ptr);
+
+ l = strtol(ptr, &endptr, 10);
+
+ if (ptr == endptr)
+ return buf;
+
+ if (errno == ERANGE || l < 0 || l > MAXENTRYPOS)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("distance in phrase operator should be non-negative and less than %d",
+ MAXENTRYPOS)));
+
+ ptr = endptr;
+
+ while (*ptr && t_isspace(ptr))
+ ptr += pg_mblen(ptr);
+
+ if (!t_iseq(ptr, '>'))
+ return buf;
+ ptr++;
+
+ *distance = (int16) l;
+ return ptr;
+}
+
+/*
* token types for parsing
*/
typedef enum
@@ -116,8 +166,10 @@ typedef enum
/*
* get token from query string
*
- * *operator is filled in with OP_* when return values is PT_OPR
+ * *operator is filled in with OP_* when return values is PT_OPR,
+ * but *weight could contain a distance value in case of phrase operator.
* *strval, *lenval and *weight are filled in when return value is PT_VAL
+ *
*/
static ts_tokentype
gettoken_query(TSQueryParserState state,
@@ -185,13 +237,24 @@ gettoken_query(TSQueryParserState state,
(state->buf)++;
return PT_OPR;
}
- if (t_iseq(state->buf, '|'))
+ else if (t_iseq(state->buf, '|'))
{
state->state = WAITOPERAND;
*operator = OP_OR;
(state->buf)++;
return PT_OPR;
}
+ else if (t_iseq(state->buf, '<'))
+ {
+ state->state = WAITOPERAND;
+ *operator = OP_PHRASE;
+ (state->buf)++;
+ /* weight var is used as storage for distance */
+ state->buf = get_phrase_distance(state->buf, weight);
+ if (*weight < 0)
+ return PT_ERR;
+ return PT_OPR;
+ }
else if (t_iseq(state->buf, ')'))
{
(state->buf)++;
@@ -223,15 +286,16 @@ gettoken_query(TSQueryParserState state,
* Push an operator to state->polstr
*/
void
-pushOperator(TSQueryParserState state, int8 oper)
+pushOperator(TSQueryParserState state, int8 oper, int16 distance)
{
QueryOperator *tmp;
- Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR);
+ Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR || oper == OP_PHRASE);
tmp = (QueryOperator *) palloc0(sizeof(QueryOperator));
tmp->type = QI_OPR;
tmp->oper = oper;
+ tmp->distance = (oper == OP_PHRASE) ? distance : 0;
/* left is filled in later with findoprnd */
state->polstr = lcons(tmp, state->polstr);
@@ -330,14 +394,18 @@ makepol(TSQueryParserState state,
PushFunction pushval,
Datum opaque)
{
- int8 operator = 0;
- ts_tokentype type;
- int lenval = 0;
- char *strval = NULL;
- int8 opstack[STACKDEPTH];
- int lenstack = 0;
- int16 weight = 0;
- bool prefix;
+ int8 operator = 0;
+ ts_tokentype type;
+ int lenval = 0;
+ char *strval = NULL;
+ struct
+ {
+ int8 op;
+ int16 distance;
+ } opstack[STACKDEPTH];
+ int lenstack = 0;
+ int16 weight = 0;
+ bool prefix;
/* since this function recurses, it could be driven to stack overflow */
check_stack_depth();
@@ -348,39 +416,48 @@ makepol(TSQueryParserState state,
{
case PT_VAL:
pushval(opaque, state, strval, lenval, weight, prefix);
- while (lenstack && (opstack[lenstack - 1] == OP_AND ||
- opstack[lenstack - 1] == OP_NOT))
+ while (lenstack && (opstack[lenstack - 1].op == OP_AND ||
+ opstack[lenstack - 1].op == OP_PHRASE ||
+ opstack[lenstack - 1].op == OP_NOT))
{
lenstack--;
- pushOperator(state, opstack[lenstack]);
+ pushOperator(state,
+ opstack[lenstack].op,
+ opstack[lenstack].distance);
}
break;
case PT_OPR:
if (lenstack && operator == OP_OR)
- pushOperator(state, OP_OR);
+ pushOperator(state, OP_OR, 0);
else
{
if (lenstack == STACKDEPTH) /* internal error */
elog(ERROR, "tsquery stack too small");
- opstack[lenstack] = operator;
+ opstack[lenstack].op = operator;
+ opstack[lenstack].distance = weight;
lenstack++;
}
break;
case PT_OPEN:
makepol(state, pushval, opaque);
- while (lenstack && (opstack[lenstack - 1] == OP_AND ||
- opstack[lenstack - 1] == OP_NOT))
+ while (lenstack && (opstack[lenstack - 1].op == OP_AND ||
+ opstack[lenstack - 1].op == OP_PHRASE ||
+ opstack[lenstack - 1].op == OP_NOT))
{
lenstack--;
- pushOperator(state, opstack[lenstack]);
+ pushOperator(state,
+ opstack[lenstack].op,
+ opstack[lenstack].distance);
}
break;
case PT_CLOSE:
while (lenstack)
{
lenstack--;
- pushOperator(state, opstack[lenstack]);
+ pushOperator(state,
+ opstack[lenstack].op,
+ opstack[lenstack].distance);
};
return;
case PT_ERR:
@@ -394,12 +471,14 @@ makepol(TSQueryParserState state,
while (lenstack)
{
lenstack--;
- pushOperator(state, opstack[lenstack]);
+ pushOperator(state,
+ opstack[lenstack].op,
+ opstack[lenstack].distance);
}
}
static void
-findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes)
+findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes, bool *needcleanup)
{
/* since this function recurses, it could be driven to stack overflow. */
check_stack_depth();
@@ -407,33 +486,47 @@ findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes)
if (*pos >= nnodes)
elog(ERROR, "malformed tsquery: operand not found");
- if (ptr[*pos].type == QI_VAL ||
- ptr[*pos].type == QI_VALSTOP) /* need to handle VALSTOP here, they
- * haven't been cleaned away yet. */
+ if (ptr[*pos].type == QI_VAL)
{
(*pos)++;
}
+ else if (ptr[*pos].type == QI_VALSTOP)
+ {
+ *needcleanup = true; /* we'll have to remove stop words */
+ (*pos)++;
+ }
else
{
Assert(ptr[*pos].type == QI_OPR);
if (ptr[*pos].qoperator.oper == OP_NOT)
{
- ptr[*pos].qoperator.left = 1;
+ ptr[*pos].qoperator.left = 1; /* fixed offset */
(*pos)++;
- findoprnd_recurse(ptr, pos, nnodes);
+
+ /* process the only argument */
+ findoprnd_recurse(ptr, pos, nnodes, needcleanup);
}
else
{
- QueryOperator *curitem = &ptr[*pos].qoperator;
- int tmp = *pos;
+ QueryOperator *curitem = &ptr[*pos].qoperator;
+ int tmp = *pos; /* save current position */
- Assert(curitem->oper == OP_AND || curitem->oper == OP_OR);
+ Assert(curitem->oper == OP_AND ||
+ curitem->oper == OP_OR ||
+ curitem->oper == OP_PHRASE);
+
+ if (curitem->oper == OP_PHRASE)
+ *needcleanup = true; /* push OP_PHRASE down later */
(*pos)++;
- findoprnd_recurse(ptr, pos, nnodes);
- curitem->left = *pos - tmp;
- findoprnd_recurse(ptr, pos, nnodes);
+
+ /* process RIGHT argument */
+ findoprnd_recurse(ptr, pos, nnodes, needcleanup);
+ curitem->left = *pos - tmp; /* set LEFT arg's offset */
+
+ /* process LEFT argument */
+ findoprnd_recurse(ptr, pos, nnodes, needcleanup);
}
}
}
@@ -444,12 +537,13 @@ findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes)
* QueryItems must be in polish (prefix) notation.
*/
static void
-findoprnd(QueryItem *ptr, int size)
+findoprnd(QueryItem *ptr, int size, bool *needcleanup)
{
uint32 pos;
+ *needcleanup = false;
pos = 0;
- findoprnd_recurse(ptr, &pos, size);
+ findoprnd_recurse(ptr, &pos, size, needcleanup);
if (pos != size)
elog(ERROR, "malformed tsquery: extra nodes");
@@ -466,9 +560,6 @@ findoprnd(QueryItem *ptr, int size)
*
* opaque is passed on to pushval as is, pushval can use it to store its
* private state.
- *
- * The returned query might contain QI_STOPVAL nodes. The caller is responsible
- * for cleaning them up (with clean_fakeval)
*/
TSQuery
parse_tsquery(char *buf,
@@ -482,6 +573,7 @@ parse_tsquery(char *buf,
int commonlen;
QueryItem *ptr;
ListCell *cell;
+ bool needcleanup;
/* init state */
state.buffer = buf;
@@ -531,7 +623,7 @@ parse_tsquery(char *buf,
i = 0;
foreach(cell, state.polstr)
{
- QueryItem *item = (QueryItem *) lfirst(cell);
+ QueryItem *item = (QueryItem *) lfirst(cell);
switch (item->type)
{
@@ -555,7 +647,14 @@ parse_tsquery(char *buf,
pfree(state.op);
/* Set left operand pointers for every operator. */
- findoprnd(ptr, query->size);
+ findoprnd(ptr, query->size, &needcleanup);
+
+ /*
+ * QI_VALSTOP nodes should be cleaned and
+ * and OP_PHRASE should be pushed down
+ */
+ if (needcleanup)
+ return cleanup_fakeval_and_phrase(query);
return query;
}
@@ -600,12 +699,15 @@ while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
(inf)->cur = (inf)->buf + len; \
}
+#define PRINT_PRIORITY(x) \
+ ( (QO_PRIORITY(x) == OP_NOT) ? OP_NOT_PHRASE : QO_PRIORITY(x) )
+
/*
- * recursive walk on tree and print it in
- * infix (human-readable) view
+ * recursively traverse the tree and
+ * print it in infix (human-readable) form
*/
static void
-infix(INFIX *in, bool first)
+infix(INFIX *in, int parentPriority)
{
/* since this function recurses, it could be driven to stack overflow. */
check_stack_depth();
@@ -674,24 +776,22 @@ infix(INFIX *in, bool first)
}
else if (in->curpol->qoperator.oper == OP_NOT)
{
- bool isopr = false;
-
- RESIZEBUF(in, 1);
- *(in->cur) = '!';
- in->cur++;
- *(in->cur) = '\0';
- in->curpol++;
+ int priority = PRINT_PRIORITY(in->curpol);
- if (in->curpol->type == QI_OPR)
+ if (priority < parentPriority)
{
- isopr = true;
RESIZEBUF(in, 2);
sprintf(in->cur, "( ");
in->cur = strchr(in->cur, '\0');
}
+ RESIZEBUF(in, 1);
+ *(in->cur) = '!';
+ in->cur++;
+ *(in->cur) = '\0';
+ in->curpol++;
- infix(in, isopr);
- if (isopr)
+ infix(in, priority);
+ if (priority < parentPriority)
{
RESIZEBUF(in, 2);
sprintf(in->cur, " )");
@@ -701,11 +801,18 @@ infix(INFIX *in, bool first)
else
{
int8 op = in->curpol->qoperator.oper;
+ int priority = PRINT_PRIORITY(in->curpol);
+ int16 distance = in->curpol->qoperator.distance;
INFIX nrm;
+ bool needParenthesis = false;
in->curpol++;
- if (op == OP_OR && !first)
+ if (priority < parentPriority ||
+ (op == OP_PHRASE &&
+ (priority == parentPriority || /* phrases are not commutative! */
+ parentPriority == OP_PRIORITY(OP_AND))))
{
+ needParenthesis = true;
RESIZEBUF(in, 2);
sprintf(in->cur, "( ");
in->cur = strchr(in->cur, '\0');
@@ -717,14 +824,14 @@ infix(INFIX *in, bool first)
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
/* get right operand */
- infix(&nrm, false);
+ infix(&nrm, priority);
/* get & print left operand */
in->curpol = nrm.curpol;
- infix(in, false);
+ infix(in, priority);
/* print operator & right operand */
- RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
+ RESIZEBUF(in, 3 + (2 + 10 /* distance */) + (nrm.cur - nrm.buf));
switch (op)
{
case OP_OR:
@@ -733,6 +840,12 @@ infix(INFIX *in, bool first)
case OP_AND:
sprintf(in->cur, " & %s", nrm.buf);
break;
+ case OP_PHRASE:
+ if (distance != 1)
+ sprintf(in->cur, " <%d> %s", distance, nrm.buf);
+ else
+ sprintf(in->cur, " <-> %s", nrm.buf);
+ break;
default:
/* OP_NOT is handled in above if-branch */
elog(ERROR, "unrecognized operator type: %d", op);
@@ -740,7 +853,7 @@ infix(INFIX *in, bool first)
in->cur = strchr(in->cur, '\0');
pfree(nrm.buf);
- if (op == OP_OR && !first)
+ if (needParenthesis)
{
RESIZEBUF(in, 2);
sprintf(in->cur, " )");
@@ -749,7 +862,6 @@ infix(INFIX *in, bool first)
}
}
-
Datum
tsqueryout(PG_FUNCTION_ARGS)
{
@@ -768,7 +880,7 @@ tsqueryout(PG_FUNCTION_ARGS)
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
*(nrm.cur) = '\0';
nrm.op = GETOPERAND(query);
- infix(&nrm, true);
+ infix(&nrm, -1 /* lowest priority */);
PG_FREE_IF_COPY(query, 0);
PG_RETURN_CSTRING(nrm.buf);
@@ -789,7 +901,8 @@ tsqueryout(PG_FUNCTION_ARGS)
*
* For each operator:
* uint8 type, QI_OPR
- * uint8 operator, one of OP_AND, OP_OR, OP_NOT.
+ * uint8 operator, one of OP_AND, OP_PHRASE OP_OR, OP_NOT.
+ * uint16 distance (only for OP_PHRASE)
*/
Datum
tsquerysend(PG_FUNCTION_ARGS)
@@ -815,6 +928,9 @@ tsquerysend(PG_FUNCTION_ARGS)
break;
case QI_OPR:
pq_sendint(&buf, item->qoperator.oper, sizeof(item->qoperator.oper));
+ if (item->qoperator.oper == OP_PHRASE)
+ pq_sendint(&buf, item->qoperator.distance,
+ sizeof(item->qoperator.distance));
break;
default:
elog(ERROR, "unrecognized tsquery node type: %d", item->type);
@@ -830,15 +946,16 @@ tsquerysend(PG_FUNCTION_ARGS)
Datum
tsqueryrecv(PG_FUNCTION_ARGS)
{
- StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
- TSQuery query;
- int i,
- len;
- QueryItem *item;
- int datalen;
- char *ptr;
- uint32 size;
- const char **operands;
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ TSQuery query;
+ int i,
+ len;
+ QueryItem *item;
+ int datalen;
+ char *ptr;
+ uint32 size;
+ const char **operands;
+ bool needcleanup;
size = pq_getmsgint(buf, sizeof(uint32));
if (size > (MaxAllocSize / sizeof(QueryItem)))
@@ -907,13 +1024,15 @@ tsqueryrecv(PG_FUNCTION_ARGS)
int8 oper;
oper = (int8) pq_getmsgint(buf, sizeof(int8));
- if (oper != OP_NOT && oper != OP_OR && oper != OP_AND)
+ if (oper != OP_NOT && oper != OP_OR && oper != OP_AND && oper != OP_PHRASE)
elog(ERROR, "invalid tsquery: unrecognized operator type %d",
(int) oper);
if (i == size - 1)
elog(ERROR, "invalid pointer to right operand");
item->qoperator.oper = oper;
+ if (oper == OP_PHRASE)
+ item->qoperator.distance = (int16) pq_getmsgint(buf, sizeof(int16));
}
else
elog(ERROR, "unrecognized tsquery node type: %d", item->type);
@@ -930,7 +1049,7 @@ tsqueryrecv(PG_FUNCTION_ARGS)
* Fill in the left-pointers. Checks that the tree is well-formed as a
* side-effect.
*/
- findoprnd(item, size);
+ findoprnd(item, size, &needcleanup);
/* Copy operands to output struct */
for (i = 0; i < size; i++)
@@ -949,7 +1068,10 @@ tsqueryrecv(PG_FUNCTION_ARGS)
SET_VARSIZE(query, len + datalen);
- PG_RETURN_TSVECTOR(query);
+ if (needcleanup)
+ PG_RETURN_TSQUERY(cleanup_fakeval_and_phrase(query));
+
+ PG_RETURN_TSQUERY(query);
}
/*
diff --git a/src/backend/utils/adt/tsquery_cleanup.c b/src/backend/utils/adt/tsquery_cleanup.c
index 333789b..2ab223b 100644
--- a/src/backend/utils/adt/tsquery_cleanup.c
+++ b/src/backend/utils/adt/tsquery_cleanup.c
@@ -25,6 +25,12 @@ typedef struct NODE
QueryItem *valnode;
} NODE;
+/* Non-operator nodes have fake (but highest) priority */
+#define NODE_PRIORITY(x) \
+ ( ((x)->valnode->qoperator.type == QI_OPR) ? \
+ QO_PRIORITY((x)->valnode) : \
+ TOP_PRIORITY )
+
/*
* make query tree from plain view of query
*/
@@ -160,7 +166,8 @@ clean_NOT_intree(NODE *node)
{
NODE *res = node;
- Assert(node->valnode->qoperator.oper == OP_AND);
+ Assert(node->valnode->qoperator.oper == OP_AND ||
+ node->valnode->qoperator.oper == OP_PHRASE);
node->left = clean_NOT_intree(node->left);
node->right = clean_NOT_intree(node->right);
@@ -212,18 +219,20 @@ clean_NOT(QueryItem *ptr, int *len)
#define V_STOP 3 /* the expression is a stop word */
/*
- * Clean query tree from values which is always in
- * text (stopword)
+ * Remove QI_VALSTOP (stopword nodes) from query tree.
*/
static NODE *
-clean_fakeval_intree(NODE *node, char *result)
+clean_fakeval_intree(NODE *node, char *result, int *adddistance)
{
- char lresult = V_UNKNOWN,
- rresult = V_UNKNOWN;
+ char lresult = V_UNKNOWN,
+ rresult = V_UNKNOWN;
/* since this function recurses, it could be driven to stack overflow. */
check_stack_depth();
+ if (adddistance)
+ *adddistance = 0;
+
if (node->valnode->type == QI_VAL)
return node;
else if (node->valnode->type == QI_VALSTOP)
@@ -237,7 +246,7 @@ clean_fakeval_intree(NODE *node, char *result)
if (node->valnode->qoperator.oper == OP_NOT)
{
- node->right = clean_fakeval_intree(node->right, &rresult);
+ node->right = clean_fakeval_intree(node->right, &rresult, NULL);
if (!node->right)
{
*result = V_STOP;
@@ -247,13 +256,30 @@ clean_fakeval_intree(NODE *node, char *result)
}
else
{
- NODE *res = node;
+ NODE *res = node;
+ int ndistance, ldistance = 0, rdistance = 0;
+
+ ndistance = (node->valnode->qoperator.oper == OP_PHRASE) ?
+ node->valnode->qoperator.distance :
+ 0;
- node->left = clean_fakeval_intree(node->left, &lresult);
- node->right = clean_fakeval_intree(node->right, &rresult);
+ node->left = clean_fakeval_intree(node->left,
+ &lresult,
+ ndistance ? &ldistance : NULL);
+
+ node->right = clean_fakeval_intree(node->right,
+ &rresult,
+ ndistance ? &rdistance : NULL);
+
+ /*
+ * ndistance, ldistance and rdistance are greater than zero
+ * if their corresponding nodes are OP_PHRASE
+ */
if (lresult == V_STOP && rresult == V_STOP)
{
+ if (adddistance && ndistance)
+ *adddistance = ldistance + ndistance + rdistance;
freetree(node);
*result = V_STOP;
return NULL;
@@ -261,33 +287,331 @@ clean_fakeval_intree(NODE *node, char *result)
else if (lresult == V_STOP)
{
res = node->right;
+ /*
+ * propagate distance from current node to the
+ * right upper subtree.
+ */
+ if (adddistance && ndistance)
+ *adddistance = rdistance;
pfree(node);
}
else if (rresult == V_STOP)
{
res = node->left;
+ /*
+ * propagate distance from current node to the upper tree.
+ */
+ if (adddistance && ndistance)
+ *adddistance = ndistance + ldistance;
pfree(node);
}
+ else if (ndistance)
+ {
+ node->valnode->qoperator.distance += ldistance;
+ if (adddistance)
+ *adddistance = 0;
+ }
+ else if (adddistance)
+ {
+ *adddistance = 0;
+ }
+
return res;
}
return node;
}
-QueryItem *
-clean_fakeval(QueryItem *ptr, int *len)
+static NODE *
+copyNODE(NODE *node)
{
- NODE *root = maketree(ptr);
+ NODE *cnode = palloc(sizeof(NODE));
+
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
+ cnode->valnode = palloc(sizeof(QueryItem));
+ *(cnode->valnode) = *(node->valnode);
+
+ if (node->valnode->type == QI_OPR)
+ {
+ cnode->right = copyNODE(node->right);
+ if (node->valnode->qoperator.oper != OP_NOT)
+ cnode->left = copyNODE(node->left);
+ }
+
+ return cnode;
+}
+
+static NODE *
+makeNODE(int8 op, NODE *left, NODE *right)
+{
+ NODE *node = palloc(sizeof(NODE));
+
+ node->valnode = palloc(sizeof(QueryItem));
+
+ node->valnode->qoperator.type = QI_OPR;
+ node->valnode->qoperator.oper = op;
+
+ node->left = left;
+ node->right = right;
+
+ return node;
+}
+
+/*
+ * Move operation with high priority to the leaves. This guarantees
+ * that the phrase operator will be near the bottom of the tree.
+ * Actual transformation will be performed only on subtrees under the
+ * <-> (<n>) operation since it's needed solely for the phrase operator.
+ *
+ * Rules:
+ * a <-> (b | c) => (a <-> b) | (a <-> c)
+ * (a | b) <-> c => (a <-> c) | (b <-> c)
+ * a <-> !b => a & !(a <-> b)
+ * !a <-> b => b & !(a <-> b)
+ *
+ * Warnings for readers:
+ * a <-> b != b <-> a
+ *
+ * a <n> (b <n> c) != (a <n> b) <n> c since the phrase lengths are:
+ * n 2n-1
+ */
+static NODE *
+normalize_phrase_tree(NODE *node)
+{
+ /* there should be no stop words at this point */
+ Assert(node->valnode->type != QI_VALSTOP);
+
+ if (node->valnode->type == QI_VAL)
+ return node;
+
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
+ Assert(node->valnode->type == QI_OPR);
+
+ if (node->valnode->qoperator.oper == OP_NOT)
+ {
+ /* eliminate NOT sequence */
+ while (node->valnode->type == QI_OPR &&
+ node->valnode->qoperator.oper == node->right->valnode->qoperator.oper)
+ {
+ node = node->right->right;
+ }
+
+ node->right = normalize_phrase_tree(node->right);
+ }
+ else if (node->valnode->qoperator.oper == OP_PHRASE)
+ {
+ int16 distance;
+ NODE *X;
+
+ node->left = normalize_phrase_tree(node->left);
+ node->right = normalize_phrase_tree(node->right);
+
+ if (NODE_PRIORITY(node) <= NODE_PRIORITY(node->right) &&
+ NODE_PRIORITY(node) <= NODE_PRIORITY(node->left))
+ return node;
+
+ /*
+ * We can't swap left-right and works only with left child
+ * because of a <-> b != b <-> a
+ */
+
+ distance = node->valnode->qoperator.distance;
+
+ if (node->right->valnode->type == QI_OPR)
+ {
+ switch (node->right->valnode->qoperator.oper)
+ {
+ case OP_AND:
+ /* a <-> (b & c) => (a <-> b) & (a <-> c) */
+ node = makeNODE(OP_AND,
+ makeNODE(OP_PHRASE,
+ node->left,
+ node->right->left),
+ makeNODE(OP_PHRASE,
+ copyNODE(node->left),
+ node->right->right));
+ node->left->valnode->qoperator.distance =
+ node->right->valnode->qoperator.distance = distance;
+ break;
+ case OP_OR:
+ /* a <-> (b | c) => (a <-> b) | (a <-> c) */
+ node = makeNODE(OP_OR,
+ makeNODE(OP_PHRASE,
+ node->left,
+ node->right->left),
+ makeNODE(OP_PHRASE,
+ copyNODE(node->left),
+ node->right->right));
+ node->left->valnode->qoperator.distance =
+ node->right->valnode->qoperator.distance = distance;
+ break;
+ case OP_NOT:
+ /* a <-> !b => a & !(a <-> b) */
+ X = node->right;
+ node->right = node->right->right;
+ X->right = node;
+ node = makeNODE(OP_AND,
+ copyNODE(node->left),
+ X);
+ break;
+ case OP_PHRASE:
+ /* no-op */
+ break;
+ default:
+ elog(ERROR,"Wrong type of tsquery node: %d",
+ node->right->valnode->qoperator.oper);
+ }
+ }
+
+ if (node->left->valnode->type == QI_OPR &&
+ node->valnode->qoperator.oper == OP_PHRASE)
+ {
+ /*
+ * if the node is still OP_PHRASE, check the left subtree,
+ * otherwise the whole node will be transformed later.
+ */
+ switch(node->left->valnode->qoperator.oper)
+ {
+ case OP_AND:
+ /* (a & b) <-> c => (a <-> c) & (b <-> c) */
+ node = makeNODE(OP_AND,
+ makeNODE(OP_PHRASE,
+ node->left->left,
+ node->right),
+ makeNODE(OP_PHRASE,
+ node->left->right,
+ copyNODE(node->right)));
+ node->left->valnode->qoperator.distance =
+ node->right->valnode->qoperator.distance = distance;
+ break;
+ case OP_OR:
+ /* (a | b) <-> c => (a <-> c) | (b <-> c) */
+ node = makeNODE(OP_OR,
+ makeNODE(OP_PHRASE,
+ node->left->left,
+ node->right),
+ makeNODE(OP_PHRASE,
+ node->left->right,
+ copyNODE(node->right)));
+ node->left->valnode->qoperator.distance =
+ node->right->valnode->qoperator.distance = distance;
+ break;
+ case OP_NOT:
+ /* !a <-> b => b & !(a <-> b) */
+ X = node->left;
+ node->left = node->left->right;
+ X->right = node;
+ node = makeNODE(OP_AND,
+ X,
+ copyNODE(node->right));
+ break;
+ case OP_PHRASE:
+ /* no-op */
+ break;
+ default:
+ elog(ERROR,"Wrong type of tsquery node: %d",
+ node->left->valnode->qoperator.oper);
+ }
+ }
+
+ /* continue transformation */
+ node = normalize_phrase_tree(node);
+ }
+ else /* AND or OR */
+ {
+ node->left = normalize_phrase_tree(node->left);
+ node->right = normalize_phrase_tree(node->right);
+ }
+
+ return node;
+}
+
+/*
+ * Number of elements in query tree
+ */
+static int32
+calcstrlen(NODE *node)
+{
+ int32 size = 0;
+
+ if (node->valnode->type == QI_VAL)
+ {
+ size = node->valnode->qoperand.length + 1;
+ }
+ else
+ {
+ Assert(node->valnode->type == QI_OPR);
+
+ size = calcstrlen(node->right);
+ if (node->valnode->qoperator.oper != OP_NOT)
+ size += calcstrlen(node->left);
+ }
+
+ return size;
+}
+
+TSQuery
+cleanup_fakeval_and_phrase(TSQuery in)
+{
+ int32 len,
+ lenstr,
+ commonlen,
+ i;
+ NODE *root;
char result = V_UNKNOWN;
- NODE *resroot;
+ TSQuery out;
+ QueryItem *items;
+ char *operands;
- resroot = clean_fakeval_intree(root, &result);
+ if (in->size == 0)
+ return in;
+
+ /* eliminate stop words */
+ root = clean_fakeval_intree(maketree(GETQUERY(in)), &result, NULL);
if (result != V_UNKNOWN)
{
ereport(NOTICE,
(errmsg("text-search query contains only stop words or doesn't contain lexemes, ignored")));
- *len = 0;
- return NULL;
+ out = palloc(HDRSIZETQ);
+ out->size = 0;
+ SET_VARSIZE(out, HDRSIZETQ);
+ return out;
+ }
+
+ /* push OP_PHRASE nodes down */
+ root = normalize_phrase_tree(root);
+
+ /*
+ * Build TSQuery from plain view
+ */
+
+ lenstr = calcstrlen(root);
+ items = plaintree(root, &len);
+ commonlen = COMPUTESIZE(len, lenstr);
+
+ out = palloc(commonlen);
+ SET_VARSIZE(out, commonlen);
+ out->size = len;
+
+ memcpy(GETQUERY(out), items, len * sizeof(QueryItem));
+
+ items = GETQUERY(out);
+ operands = GETOPERAND(out);
+ for (i = 0; i < out->size; i++)
+ {
+ QueryOperand *op = (QueryOperand *) &items[i];
+
+ if (op->type != QI_VAL)
+ continue;
+
+ memcpy(operands, GETOPERAND(in) + op->distance, op->length);
+ operands[op->length] = '\0';
+ op->distance = operands - GETOPERAND(out);
+ operands += op->length + 1;
}
- return plaintree(resroot, len);
+ return out;
}
diff --git a/src/backend/utils/adt/tsquery_op.c b/src/backend/utils/adt/tsquery_op.c
index 9cdf1fe..30d3faf 100644
--- a/src/backend/utils/adt/tsquery_op.c
+++ b/src/backend/utils/adt/tsquery_op.c
@@ -27,7 +27,7 @@ tsquery_numnode(PG_FUNCTION_ARGS)
}
static QTNode *
-join_tsqueries(TSQuery a, TSQuery b, int8 operator)
+join_tsqueries(TSQuery a, TSQuery b, int8 operator, uint16 distance)
{
QTNode *res = (QTNode *) palloc0(sizeof(QTNode));
@@ -36,6 +36,8 @@ join_tsqueries(TSQuery a, TSQuery b, int8 operator)
res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
res->valnode->type = QI_OPR;
res->valnode->qoperator.oper = operator;
+ if (operator == OP_PHRASE)
+ res->valnode->qoperator.distance = distance;
res->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
res->child[0] = QT2QTN(GETQUERY(b), GETOPERAND(b));
@@ -64,7 +66,7 @@ tsquery_and(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(a);
}
- res = join_tsqueries(a, b, OP_AND);
+ res = join_tsqueries(a, b, OP_AND, 0);
query = QTN2QT(res);
@@ -94,7 +96,7 @@ tsquery_or(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(a);
}
- res = join_tsqueries(a, b, OP_OR);
+ res = join_tsqueries(a, b, OP_OR, 0);
query = QTN2QT(res);
@@ -106,6 +108,52 @@ tsquery_or(PG_FUNCTION_ARGS)
}
Datum
+tsquery_phrase_distance(PG_FUNCTION_ARGS)
+{
+ TSQuery a = PG_GETARG_TSQUERY_COPY(0);
+ TSQuery b = PG_GETARG_TSQUERY_COPY(1);
+ QTNode *res;
+ TSQuery query;
+ int32 distance = PG_GETARG_INT32(2);
+
+ if (distance < 0 || distance > MAXENTRYPOS)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("distance in phrase operator should be non-negative and less than %d",
+ MAXENTRYPOS)));
+ if (a->size == 0)
+ {
+ PG_FREE_IF_COPY(a, 1);
+ PG_RETURN_POINTER(b);
+ }
+ else if (b->size == 0)
+ {
+ PG_FREE_IF_COPY(b, 1);
+ PG_RETURN_POINTER(a);
+ }
+
+ res = join_tsqueries(a, b, OP_PHRASE, (uint16) distance);
+
+ query = QTN2QT(res);
+
+ QTNFree(res);
+ PG_FREE_IF_COPY(a, 0);
+ PG_FREE_IF_COPY(b, 1);
+
+ PG_RETURN_POINTER(cleanup_fakeval_and_phrase(query));
+}
+
+Datum
+tsquery_phrase(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_POINTER(DirectFunctionCall3(
+ tsquery_phrase_distance,
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1),
+ Int32GetDatum(1)));
+}
+
+Datum
tsquery_not(PG_FUNCTION_ARGS)
{
TSQuery a = PG_GETARG_TSQUERY_COPY(0);
diff --git a/src/backend/utils/adt/tsquery_util.c b/src/backend/utils/adt/tsquery_util.c
index fe26ad5..0f338aa 100644
--- a/src/backend/utils/adt/tsquery_util.c
+++ b/src/backend/utils/adt/tsquery_util.c
@@ -110,6 +110,10 @@ QTNodeCompare(QTNode *an, QTNode *bn)
if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0)
return res;
}
+
+ if (ao->oper == OP_PHRASE && ao->distance != bo->distance)
+ return (ao->distance > bo->distance) ? -1 : 1;
+
return 0;
}
else if (an->valnode->type == QI_VAL)
@@ -150,7 +154,7 @@ QTNSort(QTNode *in)
for (i = 0; i < in->nchild; i++)
QTNSort(in->child[i]);
- if (in->nchild > 1)
+ if (in->nchild > 1 && in->valnode->qoperator.oper != OP_PHRASE)
qsort((void *) in->child, in->nchild, sizeof(QTNode *), cmpQTN);
}
@@ -190,7 +194,10 @@ QTNTernary(QTNode *in)
{
QTNode *cc = in->child[i];
- if (cc->valnode->type == QI_OPR && in->valnode->qoperator.oper == cc->valnode->qoperator.oper)
+ /* OP_Phrase isn't associative */
+ if (cc->valnode->type == QI_OPR &&
+ in->valnode->qoperator.oper == cc->valnode->qoperator.oper &&
+ in->valnode->qoperator.oper != OP_PHRASE)
{
int oldnchild = in->nchild;
diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c
index 53f678a..ab47b76 100644
--- a/src/backend/utils/adt/tsrank.c
+++ b/src/backend/utils/adt/tsrank.c
@@ -364,8 +364,10 @@ calc_rank(const float *w, TSVector t, TSQuery q, int32 method)
return 0.0;
/* XXX: What about NOT? */
- res = (item->type == QI_OPR && item->qoperator.oper == OP_AND) ?
- calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
+ res = (item->type == QI_OPR && (item->qoperator.oper == OP_AND ||
+ item->qoperator.oper == OP_PHRASE)) ?
+ calc_rank_and(w, t, q) :
+ calc_rank_or(w, t, q);
if (res < 0)
res = 1e-20f;
@@ -496,10 +498,17 @@ ts_rank_tt(PG_FUNCTION_ARGS)
typedef struct
{
- QueryItem **item;
- int16 nitem;
- uint8 wclass;
- int32 pos;
+ union {
+ struct { /* compiled doc representation */
+ QueryItem **items;
+ int16 nitem;
+ } query;
+ struct { /* struct is used for preparing doc representation */
+ QueryItem *item;
+ WordEntry *entry;
+ } map;
+ } data;
+ WordEntryPos pos;
} DocRepresentation;
static int
@@ -508,26 +517,59 @@ compareDocR(const void *va, const void *vb)
const DocRepresentation *a = (const DocRepresentation *) va;
const DocRepresentation *b = (const DocRepresentation *) vb;
- if (a->pos == b->pos)
- return 0;
- return (a->pos > b->pos) ? 1 : -1;
+ if (WEP_GETPOS(a->pos) == WEP_GETPOS(b->pos))
+ {
+ if (WEP_GETWEIGHT(a->pos) == WEP_GETWEIGHT(b->pos))
+ {
+ if (a->data.map.entry == b->data.map.entry)
+ return 0;
+
+ return (a->data.map.entry > b->data.map.entry) ? 1 : -1;
+ }
+
+ return (WEP_GETWEIGHT(a->pos) > WEP_GETWEIGHT(b->pos)) ? 1 : -1;
+ }
+
+ return (WEP_GETPOS(a->pos) > WEP_GETPOS(b->pos)) ? 1 : -1;
}
+#define MAXQROPOS MAXENTRYPOS
+typedef struct
+{
+ bool operandexists;
+ bool reverseinsert; /* indicates insert order,
+ true means descending order */
+ uint32 npos;
+ WordEntryPos pos[MAXQROPOS];
+} QueryRepresentationOperand;
+
typedef struct
{
- TSQuery query;
- bool *operandexist;
+ TSQuery query;
+ QueryRepresentationOperand *operandData;
} QueryRepresentation;
-#define QR_GET_OPERAND_EXISTS(q, v) ( (q)->operandexist[ ((QueryItem*)(v)) - GETQUERY((q)->query) ] )
-#define QR_SET_OPERAND_EXISTS(q, v) QR_GET_OPERAND_EXISTS(q,v) = true
+#define QR_GET_OPERAND_DATA(q, v) \
+ ( (q)->operandData + (((QueryItem*)(v)) - GETQUERY((q)->query)) )
static bool
-checkcondition_QueryOperand(void *checkval, QueryOperand *val)
+checkcondition_QueryOperand(void *checkval, QueryOperand *val, ExecPhraseData *data)
{
- QueryRepresentation *qr = (QueryRepresentation *) checkval;
+ QueryRepresentation *qr = (QueryRepresentation *) checkval;
+ QueryRepresentationOperand *opData = QR_GET_OPERAND_DATA(qr, val);
- return QR_GET_OPERAND_EXISTS(qr, val);
+ if (!opData->operandexists)
+ return false;
+
+ if (data)
+ {
+ data->npos = opData->npos;
+ data->pos = opData->pos;
+ if (opData->reverseinsert)
+ data->pos += MAXQROPOS - opData->npos;
+ }
+
+ return true;
}
typedef struct
@@ -539,14 +581,65 @@ typedef struct
DocRepresentation *end;
} CoverExt;
+static void
+resetQueryRepresentation(QueryRepresentation *qr, bool reverseinsert)
+{
+ int i;
+
+ for(i = 0; i < qr->query->size; i++)
+ {
+ qr->operandData[i].operandexists = false;
+ qr->operandData[i].reverseinsert = reverseinsert;
+ qr->operandData[i].npos = 0;
+ }
+}
+
+static void
+fillQueryRepresentationData(QueryRepresentation *qr, DocRepresentation *entry)
+{
+ int i;
+ int lastPos;
+ QueryRepresentationOperand *opData;
+
+ for (i = 0; i < entry->data.query.nitem; i++)
+ {
+ if (entry->data.query.items[i]->type != QI_VAL)
+ continue;
+
+ opData = QR_GET_OPERAND_DATA(qr, entry->data.query.items[i]);
+
+ opData->operandexists = true;
+
+ if (opData->npos == 0)
+ {
+ lastPos = (opData->reverseinsert) ? (MAXQROPOS - 1) : 0;
+ opData->pos[lastPos] = entry->pos;
+ opData->npos++;
+ continue;
+ }
+
+ lastPos = opData->reverseinsert ?
+ (MAXQROPOS - opData->npos) :
+ (opData->npos - 1);
+
+ if (WEP_GETPOS(opData->pos[lastPos]) != WEP_GETPOS(entry->pos))
+ {
+ lastPos = opData->reverseinsert ?
+ (MAXQROPOS - 1 - opData->npos) :
+ (opData->npos);
+
+ opData->pos[lastPos] = entry->pos;
+ opData->npos++;
+ }
+ }
+}
static bool
Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
{
- DocRepresentation *ptr;
- int lastpos = ext->pos;
- int i;
- bool found = false;
+ DocRepresentation *ptr;
+ int lastpos = ext->pos;
+ bool found = false;
/*
* since this function recurses, it could be driven to stack overflow.
@@ -554,7 +647,7 @@ Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
*/
check_stack_depth();
- memset(qr->operandexist, 0, sizeof(bool) * qr->query->size);
+ resetQueryRepresentation(qr, false);
ext->p = INT_MAX;
ext->q = 0;
@@ -563,16 +656,13 @@ Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
/* find upper bound of cover from current position, move up */
while (ptr - doc < len)
{
- for (i = 0; i < ptr->nitem; i++)
- {
- if (ptr->item[i]->type == QI_VAL)
- QR_SET_OPERAND_EXISTS(qr, ptr->item[i]);
- }
+ fillQueryRepresentationData(qr, ptr);
+
if (TS_execute(GETQUERY(qr->query), (void *) qr, false, checkcondition_QueryOperand))
{
- if (ptr->pos > ext->q)
+ if (WEP_GETPOS(ptr->pos) > ext->q)
{
- ext->q = ptr->pos;
+ ext->q = WEP_GETPOS(ptr->pos);
ext->end = ptr;
lastpos = ptr - doc;
found = true;
@@ -585,22 +675,24 @@ Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
if (!found)
return false;
- memset(qr->operandexist, 0, sizeof(bool) * qr->query->size);
+ resetQueryRepresentation(qr, true);
ptr = doc + lastpos;
/* find lower bound of cover from found upper bound, move down */
while (ptr >= doc + ext->pos)
{
- for (i = 0; i < ptr->nitem; i++)
- if (ptr->item[i]->type == QI_VAL)
- QR_SET_OPERAND_EXISTS(qr, ptr->item[i]);
+ /*
+ * we scan doc from right to left, so pos info in reverse order!
+ */
+ fillQueryRepresentationData(qr, ptr);
+
if (TS_execute(GETQUERY(qr->query), (void *) qr, true, checkcondition_QueryOperand))
{
- if (ptr->pos < ext->p)
+ if (WEP_GETPOS(ptr->pos) < ext->p)
{
ext->begin = ptr;
- ext->p = ptr->pos;
+ ext->p = WEP_GETPOS(ptr->pos);
}
break;
}
@@ -628,18 +720,20 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
WordEntry *entry,
*firstentry;
WordEntryPos *post;
- int32 dimt,
+ int32 dimt, /* number of 'post' items */
j,
i,
nitem;
int len = qr->query->size * 4,
cur = 0;
DocRepresentation *doc;
- char *operand;
doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
- operand = GETOPERAND(qr->query);
+ /*
+ * Iterate through query to make DocRepresentaion for words and it's entries
+ * satisfied by query
+ */
for (i = 0; i < qr->query->size; i++)
{
QueryOperand *curoperand;
@@ -649,13 +743,11 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
curoperand = &item[i].qoperand;
- if (QR_GET_OPERAND_EXISTS(qr, &item[i]))
- continue;
-
firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem);
if (!entry)
continue;
+ /* iterations over entries in tsvector */
while (entry - firstentry < nitem)
{
if (entry->haspos)
@@ -676,53 +768,67 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
}
+ /* iterations over entry's positions */
for (j = 0; j < dimt; j++)
{
- if (j == 0)
- {
- int k;
-
- doc[cur].nitem = 0;
- doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * qr->query->size);
-
- for (k = 0; k < qr->query->size; k++)
- {
- QueryOperand *kptr = &item[k].qoperand;
- QueryOperand *iptr = &item[i].qoperand;
-
- if (k == i ||
- (item[k].type == QI_VAL &&
- compareQueryOperand(&kptr, &iptr, operand) == 0))
- {
- /*
- * if k == i, we've already checked above that
- * it's type == Q_VAL
- */
- doc[cur].item[doc[cur].nitem] = item + k;
- doc[cur].nitem++;
- QR_SET_OPERAND_EXISTS(qr, item + k);
- }
- }
- }
- else
+ if (curoperand->weight == 0 ||
+ curoperand->weight & (1 << WEP_GETWEIGHT(post[j])))
{
- doc[cur].nitem = doc[cur - 1].nitem;
- doc[cur].item = doc[cur - 1].item;
+ doc[cur].pos = post[j];
+ doc[cur].data.map.entry = entry;
+ doc[cur].data.map.item = (QueryItem *) curoperand;
+ cur++;
}
- doc[cur].pos = WEP_GETPOS(post[j]);
- doc[cur].wclass = WEP_GETWEIGHT(post[j]);
- cur++;
}
entry++;
}
}
- *doclen = cur;
-
if (cur > 0)
{
+ DocRepresentation *rptr = doc + 1,
+ *wptr = doc,
+ storage;
+
+ /*
+ * Sort representation in ascending order by pos and entry
+ */
qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+
+ /*
+ * Join QueryItem per WordEntry and it's position
+ */
+ storage.pos = doc->pos;
+ storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size);
+ storage.data.query.items[0] = doc->data.map.item;
+ storage.data.query.nitem = 1;
+
+ while (rptr - doc < cur)
+ {
+ if (rptr->pos == (rptr-1)->pos &&
+ rptr->data.map.entry == (rptr-1)->data.map.entry)
+ {
+ storage.data.query.items[storage.data.query.nitem] = rptr->data.map.item;
+ storage.data.query.nitem++;
+ }
+ else
+ {
+ *wptr = storage;
+ wptr++;
+ storage.pos = rptr->pos;
+ storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size);
+ storage.data.query.items[0] = rptr->data.map.item;
+ storage.data.query.nitem = 1;
+ }
+
+ rptr++;
+ }
+
+ *wptr = storage;
+ wptr++;
+
+ *doclen = wptr - doc;
return doc;
}
@@ -758,12 +864,13 @@ calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
}
qr.query = query;
- qr.operandexist = (bool *) palloc0(sizeof(bool) * query->size);
+ qr.operandData = (QueryRepresentationOperand *)
+ palloc0(sizeof(QueryRepresentationOperand) * query->size);
doc = get_docrep(txt, &qr, &doclen);
if (!doc)
{
- pfree(qr.operandexist);
+ pfree(qr.operandData);
return 0.0;
}
@@ -777,7 +884,7 @@ calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
while (ptr <= ext.end)
{
- InvSum += invws[ptr->wclass];
+ InvSum += invws[WEP_GETWEIGHT(ptr->pos)];
ptr++;
}
@@ -827,7 +934,7 @@ calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
pfree(doc);
- pfree(qr.operandexist);
+ pfree(qr.operandData);
return (float4) Wdoc;
}
diff --git a/src/backend/utils/adt/tsvector.c b/src/backend/utils/adt/tsvector.c
index 12043bf..2a26c46 100644
--- a/src/backend/utils/adt/tsvector.c
+++ b/src/backend/utils/adt/tsvector.c
@@ -28,7 +28,7 @@ typedef struct
/* Compare two WordEntryPos values for qsort */
-static int
+int
comparePos(const void *a, const void *b)
{
int apos = WEP_GETPOS(*(const WordEntryPos *) a);
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index f6d3fb5..d40c0a7 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -1121,35 +1121,124 @@ tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
}
/*
- * check weight info
+ * Check weight info or/and fill 'data' with the required positions
*/
static bool
-checkclass_str(CHKVAL *chkval, WordEntry *val, QueryOperand *item)
+checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
+ ExecPhraseData *data)
{
- WordEntryPosVector *posvec;
- WordEntryPos *ptr;
- uint16 len;
+ bool result = false;
- posvec = (WordEntryPosVector *)
- (chkval->values + SHORTALIGN(val->pos + val->len));
+ if (entry->haspos && (val->weight || data))
+ {
+ WordEntryPosVector *posvec;
- len = posvec->npos;
- ptr = posvec->pos;
+ /*
+ * We can't use the _POSVECPTR macro here because the pointer to the
+ * tsvector's lexeme storage is already contained in chkval->values.
+ */
+ posvec = (WordEntryPosVector *)
+ (chkval->values + SHORTALIGN(entry->pos + entry->len));
- while (len--)
+ if (val->weight && data)
+ {
+ WordEntryPos *posvec_iter = posvec->pos;
+ WordEntryPos *dptr;
+
+ /*
+ * Filter position information by weights
+ */
+ dptr = data->pos = palloc(sizeof(WordEntryPos) * posvec->npos);
+ data->allocated = true;
+
+ /* Is there a position with a matching weight? */
+ while (posvec_iter < posvec->pos + posvec->npos)
+ {
+ /* If true, append this position to the data->pos */
+ if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
+ {
+ *dptr = WEP_GETPOS(*posvec_iter);
+ dptr++;
+ }
+
+ posvec_iter++;
+ }
+
+ data->npos = dptr - data->pos;
+
+ if (data->npos > 0)
+ result = true;
+ }
+ else if (val->weight)
+ {
+ WordEntryPos *posvec_iter = posvec->pos;
+
+ /* Is there a position with a matching weight? */
+ while (posvec_iter < posvec->pos + posvec->npos)
+ {
+ if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
+ {
+ result = true;
+ break; /* no need to go further */
+ }
+
+ posvec_iter++;
+ }
+ }
+ else /* data != NULL */
+ {
+ data->npos = posvec->npos;
+ data->pos = posvec->pos;
+ data->allocated = false;
+ result = true;
+ }
+ }
+ else
{
- if (item->weight & (1 << WEP_GETWEIGHT(*ptr)))
- return true;
- ptr++;
+ result = true;
}
- return false;
+
+ return result;
+}
+
+/*
+ * Removes duplicate pos entries. We can't use uniquePos() from
+ * tsvector.c because array might be longer than MAXENTRYPOS
+ *
+ * Returns new length.
+ */
+static int
+uniqueLongPos(WordEntryPos *pos, int npos)
+{
+ WordEntryPos *pos_iter,
+ *result;
+
+ if (npos <= 1)
+ return npos;
+
+ qsort((void *) pos, npos, sizeof(WordEntryPos), comparePos);
+
+ result = pos;
+ pos_iter = pos + 1;
+ while (pos_iter < pos + npos)
+ {
+ if (WEP_GETPOS(*pos_iter) != WEP_GETPOS(*result))
+ {
+ result++;
+ *result = WEP_GETPOS(*pos_iter);
+ }
+
+ pos_iter++;
+ }
+
+ return result + 1 - pos;
}
/*
* is there value 'val' in array or not ?
*/
static bool
-checkcondition_str(void *checkval, QueryOperand *val)
+checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
{
CHKVAL *chkval = (CHKVAL *) checkval;
WordEntry *StopLow = chkval->arrb;
@@ -1162,14 +1251,16 @@ checkcondition_str(void *checkval, QueryOperand *val)
while (StopLow < StopHigh)
{
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
- difference = tsCompareString(chkval->operand + val->distance, val->length,
- chkval->values + StopMiddle->pos, StopMiddle->len,
+ difference = tsCompareString(chkval->operand + val->distance,
+ val->length,
+ chkval->values + StopMiddle->pos,
+ StopMiddle->len,
false);
if (difference == 0)
{
- res = (val->weight && StopMiddle->haspos) ?
- checkclass_str(chkval, StopMiddle, val) : true;
+ /* Check weight info & fill 'data' with positions */
+ res = checkclass_str(chkval, StopMiddle, val, data);
break;
}
else if (difference > 0)
@@ -1178,31 +1269,200 @@ checkcondition_str(void *checkval, QueryOperand *val)
StopHigh = StopMiddle;
}
- if (!res && val->prefix)
+ if ((!res || data) && val->prefix)
{
+ WordEntryPos *allpos = NULL;
+ int npos = 0,
+ totalpos = 0;
/*
* there was a failed exact search, so we should scan further to find
- * a prefix match.
+ * a prefix match. We also need to do so if caller needs position info
*/
if (StopLow >= StopHigh)
StopMiddle = StopHigh;
- while (res == false && StopMiddle < chkval->arre &&
- tsCompareString(chkval->operand + val->distance, val->length,
- chkval->values + StopMiddle->pos, StopMiddle->len,
+ while ((!res || data) && StopMiddle < chkval->arre &&
+ tsCompareString(chkval->operand + val->distance,
+ val->length,
+ chkval->values + StopMiddle->pos,
+ StopMiddle->len,
true) == 0)
{
- res = (val->weight && StopMiddle->haspos) ?
- checkclass_str(chkval, StopMiddle, val) : true;
+ if (data)
+ {
+ /*
+ * We need to join position information
+ */
+ res = checkclass_str(chkval, StopMiddle, val, data);
+
+ if (res)
+ {
+ while (npos + data->npos >= totalpos)
+ {
+ if (totalpos == 0)
+ {
+ totalpos = 256;
+ allpos = palloc(sizeof(WordEntryPos) * totalpos);
+ }
+ else
+ {
+ totalpos *= 2;
+ allpos = repalloc(allpos, sizeof(WordEntryPos) * totalpos);
+ }
+ }
+
+ memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
+ npos += data->npos;
+ }
+ }
+ else
+ {
+ res = checkclass_str(chkval, StopMiddle, val, NULL);
+ }
StopMiddle++;
}
+
+ if (res && data)
+ {
+ /* Sort and make unique array of found positions */
+ data->pos = allpos;
+ data->npos = uniqueLongPos(allpos, npos);
+ data->allocated = true;
+ }
}
return res;
}
/*
+ * Check for phrase condition. Fallback to the AND operation
+ * if there is no positional information.
+ */
+static bool
+TS_phrase_execute(QueryItem *curitem,
+ void *checkval, bool calcnot, ExecPhraseData *data,
+ bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *))
+{
+ /* since this function recurses, it could be driven to stack overflow */
+ check_stack_depth();
+
+ if (curitem->type == QI_VAL)
+ {
+ return chkcond(checkval, (QueryOperand *) curitem, data);
+ }
+ else
+ {
+ ExecPhraseData Ldata = {0, false, NULL},
+ Rdata = {0, false, NULL};
+ WordEntryPos *Lpos,
+ *Rpos,
+ *pos_iter = NULL;
+
+ Assert(curitem->qoperator.oper == OP_PHRASE);
+
+ if (!TS_phrase_execute(curitem + curitem->qoperator.left,
+ checkval, calcnot, &Ldata, chkcond))
+ return false;
+
+ if (!TS_phrase_execute(curitem + 1, checkval, calcnot, &Rdata, chkcond))
+ return false;
+
+ /*
+ * if at least one of the operands has no position
+ * information, fallback to AND operation.
+ */
+ if (Ldata.npos == 0 || Rdata.npos == 0)
+ return true;
+
+ /*
+ * Result of the operation is a list of the
+ * corresponding positions of RIGHT operand.
+ */
+ if (data)
+ {
+ if (!Rdata.allocated)
+ /*
+ * OP_PHRASE is based on the OP_AND, so the number of resulting
+ * positions could not be greater than the total amount of operands.
+ */
+ data->pos = palloc(sizeof(WordEntryPos) * Min(Ldata.npos, Rdata.npos));
+ else
+ data->pos = Rdata.pos;
+
+ data->allocated = true;
+ data->npos = 0;
+ pos_iter = data->pos;
+ }
+
+ Lpos = Ldata.pos;
+ Rpos = Rdata.pos;
+
+ /*
+ * Find matches by distance, WEP_GETPOS() is needed because
+ * ExecPhraseData->data can point to the tsvector's WordEntryPosVector
+ */
+
+ while (Rpos < Rdata.pos + Rdata.npos)
+ {
+ while (Lpos < Ldata.pos + Ldata.npos)
+ {
+ if (WEP_GETPOS(*Lpos) <= WEP_GETPOS(*Rpos))
+ {
+ /*
+ * Lpos is behind the Rpos, so we have to check the
+ * distance condition
+ */
+ if (WEP_GETPOS(*Rpos) - WEP_GETPOS(*Lpos) <= curitem->qoperator.distance)
+ {
+ /* MATCH! */
+ if (data)
+ {
+ *pos_iter = WEP_GETPOS(*Rpos);
+ pos_iter++;
+
+ break; /* We need to build a unique result
+ * array, so go to the next Rpos */
+ }
+ else
+ {
+ /*
+ * We are in the root of the phrase tree and hence
+ * we don't have to store the resulting positions
+ */
+ return true;
+ }
+ }
+ }
+ else
+ {
+ /*
+ * Go to the next Rpos, because Lpos
+ * is ahead of the current Rpos
+ */
+ break;
+ }
+
+ Lpos++;
+ }
+
+ Rpos++;
+ }
+
+ if (data)
+ {
+ data->npos = pos_iter - data->pos;
+
+ if (data->npos > 0)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
+/*
* Evaluate tsquery boolean expression.
*
* chkcond is a callback function used to evaluate each VAL node in the query.
@@ -1213,13 +1473,14 @@ checkcondition_str(void *checkval, QueryOperand *val)
*/
bool
TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
- bool (*chkcond) (void *checkval, QueryOperand *val))
+ bool (*chkcond) (void *checkval, QueryOperand *val, ExecPhraseData *data))
{
/* since this function recurses, it could be driven to stack overflow */
check_stack_depth();
if (curitem->type == QI_VAL)
- return chkcond(checkval, (QueryOperand *) curitem);
+ return chkcond(checkval, (QueryOperand *) curitem,
+ NULL /* we don't need position info */);
switch (curitem->qoperator.oper)
{
@@ -1241,6 +1502,9 @@ TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
else
return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+ case OP_PHRASE:
+ return TS_phrase_execute(curitem, checkval, calcnot, NULL, chkcond);
+
default:
elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
}
@@ -1277,6 +1541,10 @@ tsquery_requires_match(QueryItem *curitem)
*/
return false;
+ case OP_PHRASE:
+ /*
+ * Treat OP_PHRASE as OP_AND here
+ */
case OP_AND:
/* If either side requires a match, we're good */
if (tsquery_requires_match(curitem + curitem->qoperator.left))
diff --git a/src/backend/utils/adt/tsvector_parser.c b/src/backend/utils/adt/tsvector_parser.c
index e281708..d994058 100644
--- a/src/backend/utils/adt/tsvector_parser.c
+++ b/src/backend/utils/adt/tsvector_parser.c
@@ -89,7 +89,15 @@ do { \
} \
} while (0)
-#define ISOPERATOR(x) ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) )
+/* phrase operator begins with '<' */
+#define ISOPERATOR(x) \
+ ( pg_mblen(x) == 1 && ( *(x) == '!' || \
+ *(x) == '&' || \
+ *(x) == '|' || \
+ *(x) == '(' || \
+ *(x) == ')' || \
+ *(x) == '<' \
+ ) )
/* Fills gettoken_tsvector's output parameters, and returns true */
#define RETURN_TOKEN \
diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h
index b3daff2..a5e4a02 100644
--- a/src/include/catalog/pg_operator.h
+++ b/src/include/catalog/pg_operator.h
@@ -1675,6 +1675,9 @@ DATA(insert OID = 3680 ( "&&" PGNSP PGUID b f f 3615 3615 3615 0 0 tsque
DESCR("AND-concatenate");
DATA(insert OID = 3681 ( "||" PGNSP PGUID b f f 3615 3615 3615 0 0 tsquery_or - - ));
DESCR("OR-concatenate");
+/* <-> operation calls tsquery_phrase, but function is polymorphic. So, point to OID of the tsquery_phrase */
+DATA(insert OID = 5005 ( "<->" PGNSP PGUID b f f 3615 3615 3615 0 0 5003 - - ));
+DESCR("phrase-concatenate");
DATA(insert OID = 3682 ( "!!" PGNSP PGUID l f f 0 3615 3615 0 0 tsquery_not - - ));
DESCR("NOT tsquery");
DATA(insert OID = 3693 ( "@>" PGNSP PGUID b f f 3615 3615 16 3694 0 tsq_mcontains contsel contjoinsel ));
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index c86b920..b7fd899 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -4577,6 +4577,9 @@ DESCR("less-equal-greater");
DATA(insert OID = 3669 ( tsquery_and PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3615 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsquery_and _null_ _null_ _null_ ));
DATA(insert OID = 3670 ( tsquery_or PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3615 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsquery_or _null_ _null_ _null_ ));
+DATA(insert OID = 5003 ( tsquery_phrase PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3615 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsquery_phrase _null_ _null_ _null_ ));
+DATA(insert OID = 5004 ( tsquery_phrase PGNSP PGUID 12 1 0 0 0 f f f f t f i s 3 0 3615 "3615 3615 23" _null_ _null_ _null_ _null_ _null_ tsquery_phrase_distance _null_ _null_ _null_ ));
+DESCR("phrase-concatenate with distance");
DATA(insert OID = 3671 ( tsquery_not PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 3615 "3615" _null_ _null_ _null_ _null_ _null_ tsquery_not _null_ _null_ _null_ ));
DATA(insert OID = 3691 ( tsq_mcontains PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsq_mcontains _null_ _null_ _null_ ));
@@ -4696,12 +4699,16 @@ DATA(insert OID = 3746 ( to_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2
DESCR("make tsquery");
DATA(insert OID = 3747 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ plainto_tsquery_byid _null_ _null_ _null_ ));
DESCR("transform to tsquery");
+DATA(insert OID = 5006 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery_byid _null_ _null_ _null_ ));
+DESCR("transform to tsquery");
DATA(insert OID = 3749 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3614 "25" _null_ _null_ _null_ _null_ _null_ to_tsvector _null_ _null_ _null_ ));
DESCR("transform to tsvector");
DATA(insert OID = 3750 ( to_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ to_tsquery _null_ _null_ _null_ ));
DESCR("make tsquery");
DATA(insert OID = 3751 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ plainto_tsquery _null_ _null_ _null_ ));
DESCR("transform to tsquery");
+DATA(insert OID = 5001 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ ));
+DESCR("transform to tsquery");
DATA(insert OID = 3752 ( tsvector_update_trigger PGNSP PGUID 12 1 0 0 0 f f f f f f v s 0 0 2279 "" _null_ _null_ _null_ _null_ _null_ tsvector_update_trigger_byid _null_ _null_ _null_ ));
DESCR("trigger for automatic update of tsvector column");
diff --git a/src/include/tsearch/ts_public.h b/src/include/tsearch/ts_public.h
index 6f7a891..9364eee 100644
--- a/src/include/tsearch/ts_public.h
+++ b/src/include/tsearch/ts_public.h
@@ -34,16 +34,17 @@ typedef struct
*/
typedef struct
{
- uint32 selected:1,
- in:1,
- replace:1,
- repeated:1,
- skip:1,
- unused:3,
- type:8,
- len:16;
- char *word;
- QueryOperand *item;
+ uint32 selected: 1,
+ in: 1,
+ replace: 1,
+ repeated: 1,
+ skip: 1,
+ unused: 3,
+ type: 8,
+ len: 16;
+ WordEntryPos pos;
+ char *word;
+ QueryOperand *item;
} HeadlineWordEntry;
typedef struct
@@ -51,6 +52,7 @@ typedef struct
HeadlineWordEntry *words;
int32 lenwords;
int32 curwords;
+ int32 vectorpos; /* positions a-la tsvector */
char *startsel;
char *stopsel;
char *fragdelim;
diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h
index bc99524..5f4e596 100644
--- a/src/include/tsearch/ts_type.h
+++ b/src/include/tsearch/ts_type.h
@@ -49,6 +49,8 @@ typedef struct
#define MAXSTRLEN ( (1<<11) - 1)
#define MAXSTRPOS ( (1<<20) - 1)
+extern int comparePos(const void *a, const void *b);
+
/*
* Equivalent to
* typedef struct {
@@ -213,15 +215,33 @@ typedef struct
} QueryOperand;
-/* Legal values for QueryOperator.operator */
-#define OP_NOT 1
-#define OP_AND 2
-#define OP_OR 3
+/*
+ * Legal values for QueryOperator.operator.
+ * They should be ordered by priority! We assume that phrase
+ * has highest priority, but this agreement is only
+ * for query transformation! That's need to simplify
+ * algorithm of query transformation.
+ */
+#define OP_OR 1
+#define OP_AND 2
+#define OP_NOT 3
+#define OP_PHRASE 4
+#define OP_NOT_PHRASE 5 /*
+ * OP_PHRASE negation operations must have greater
+ * priority in order to force infix() to surround
+ * the whole OP_PHRASE expression with parentheses.
+ */
+
+#define TOP_PRIORITY 6 /* highest priority for val nodes */
+
+#define OP_PRIORITY(x) (x)
+#define QO_PRIORITY(x) OP_PRIORITY(((QueryOperator *) (x))->oper)
typedef struct
{
QueryItemType type;
int8 oper; /* see above */
+ int16 distance; /* distance between agrs for OP_PHRASE */
uint32 left; /* pointer to left operand. Right operand is
* item + 1, left operand is placed
* item+item->left */
@@ -304,6 +324,8 @@ extern Datum tsquery_numnode(PG_FUNCTION_ARGS);
extern Datum tsquery_and(PG_FUNCTION_ARGS);
extern Datum tsquery_or(PG_FUNCTION_ARGS);
+extern Datum tsquery_phrase(PG_FUNCTION_ARGS);
+extern Datum tsquery_phrase_distance(PG_FUNCTION_ARGS);
extern Datum tsquery_not(PG_FUNCTION_ARGS);
extern Datum tsquery_rewrite(PG_FUNCTION_ARGS);
diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h
index 88533a6..855bbfe 100644
--- a/src/include/tsearch/ts_utils.h
+++ b/src/include/tsearch/ts_utils.h
@@ -55,7 +55,7 @@ extern TSQuery parse_tsquery(char *buf,
extern void pushValue(TSQueryParserState state,
char *strval, int lenval, int16 weight, bool prefix);
extern void pushStop(TSQueryParserState state);
-extern void pushOperator(TSQueryParserState state, int8 oper);
+extern void pushOperator(TSQueryParserState state, int8 oper, int16 distance);
/*
* parse plain text and lexize words
@@ -104,8 +104,15 @@ extern text *generateHeadline(HeadlineParsedText *prs);
/*
* Common check function for tsvector @@ tsquery
*/
+typedef struct ExecPhraseData
+{
+ int npos;
+ bool allocated;
+ WordEntryPos *pos;
+} ExecPhraseData;
+
extern bool TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
- bool (*chkcond) (void *checkval, QueryOperand *val));
+ bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *));
extern bool tsquery_requires_match(QueryItem *curitem);
/*
@@ -120,6 +127,8 @@ extern Datum to_tsquery_byid(PG_FUNCTION_ARGS);
extern Datum to_tsquery(PG_FUNCTION_ARGS);
extern Datum plainto_tsquery_byid(PG_FUNCTION_ARGS);
extern Datum plainto_tsquery(PG_FUNCTION_ARGS);
+extern Datum phraseto_tsquery_byid(PG_FUNCTION_ARGS);
+extern Datum phraseto_tsquery(PG_FUNCTION_ARGS);
/*
* GiST support function
@@ -169,7 +178,7 @@ extern Datum gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS);
* TSQuery Utilities
*/
extern QueryItem *clean_NOT(QueryItem *ptr, int32 *len);
-extern QueryItem *clean_fakeval(QueryItem *ptr, int32 *len);
+extern TSQuery cleanup_fakeval_and_phrase(TSQuery in);
typedef struct QTNode
{
diff --git a/src/test/regress/expected/tsdicts.out b/src/test/regress/expected/tsdicts.out
index ef86295..5ddbe80 100644
--- a/src/test/regress/expected/tsdicts.out
+++ b/src/test/regress/expected/tsdicts.out
@@ -434,9 +434,9 @@ SELECT to_tsvector('ispell_tst', 'Booking the skies after rebookings for footbal
(1 row)
SELECT to_tsquery('ispell_tst', 'footballklubber');
- to_tsquery
-------------------------------------------------------------------------------
- ( 'footballklubber' | 'foot' & 'ball' & 'klubber' ) | 'football' & 'klubber'
+ to_tsquery
+--------------------------------------------------------------------------
+ 'footballklubber' | 'foot' & 'ball' & 'klubber' | 'football' & 'klubber'
(1 row)
SELECT to_tsquery('ispell_tst', 'footballyklubber:b & rebookings:A & sky');
@@ -458,9 +458,9 @@ SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footb
(1 row)
SELECT to_tsquery('hunspell_tst', 'footballklubber');
- to_tsquery
-------------------------------------------------------------------------------
- ( 'footballklubber' | 'foot' & 'ball' & 'klubber' ) | 'football' & 'klubber'
+ to_tsquery
+--------------------------------------------------------------------------
+ 'footballklubber' | 'foot' & 'ball' & 'klubber' | 'football' & 'klubber'
(1 row)
SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
@@ -469,6 +469,18 @@ SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
'foot':B & 'ball':B & 'klubber':B & ( 'booking':A | 'book':A ) & 'sky'
(1 row)
+SELECT to_tsquery('hunspell_tst', 'footballyklubber:b <-> sky');
+ to_tsquery
+-----------------------------------------------------------------------------
+ ( 'foot':B <-> 'sky' ) & ( 'ball':B <-> 'sky' ) & ( 'klubber':B <-> 'sky' )
+(1 row)
+
+SELECT phraseto_tsquery('hunspell_tst', 'footballyklubber sky');
+ phraseto_tsquery
+-----------------------------------------------------------------------
+ ( 'foot' <-> 'sky' ) & ( 'ball' <-> 'sky' ) & ( 'klubber' <-> 'sky' )
+(1 row)
+
-- Test ispell dictionary with hunspell affix with FLAG long in configuration
ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING
REPLACE hunspell WITH hunspell_long;
@@ -479,9 +491,9 @@ SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footb
(1 row)
SELECT to_tsquery('hunspell_tst', 'footballklubber');
- to_tsquery
-------------------------------------------------------------------------------
- ( 'footballklubber' | 'foot' & 'ball' & 'klubber' ) | 'football' & 'klubber'
+ to_tsquery
+--------------------------------------------------------------------------
+ 'footballklubber' | 'foot' & 'ball' & 'klubber' | 'football' & 'klubber'
(1 row)
SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
@@ -500,9 +512,9 @@ SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footb
(1 row)
SELECT to_tsquery('hunspell_tst', 'footballklubber');
- to_tsquery
-------------------------------------------------------------------------------
- ( 'footballklubber' | 'foot' & 'ball' & 'klubber' ) | 'football' & 'klubber'
+ to_tsquery
+--------------------------------------------------------------------------
+ 'footballklubber' | 'foot' & 'ball' & 'klubber' | 'football' & 'klubber'
(1 row)
SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out
index 3811250..558f00c 100644
--- a/src/test/regress/expected/tsearch.out
+++ b/src/test/regress/expected/tsearch.out
@@ -554,6 +554,235 @@ SELECT plainto_tsquery('english', 'foo bar') && 'asd | fg';
'foo' & 'bar' & ( 'asd' | 'fg' )
(1 row)
+-- Check stop word deletion, a and s are stop-words
+SELECT to_tsquery('english', '(1 <-> 2) <-> a');
+ to_tsquery
+-------------
+ '1' <-> '2'
+(1 row)
+
+SELECT to_tsquery('english', '(1 <-> a) <-> 2');
+ to_tsquery
+-------------
+ '1' <2> '2'
+(1 row)
+
+SELECT to_tsquery('english', '(a <-> 1) <-> 2');
+ to_tsquery
+-------------
+ '1' <-> '2'
+(1 row)
+
+SELECT to_tsquery('english', 'a <-> (1 <-> 2)');
+ to_tsquery
+-------------
+ '1' <-> '2'
+(1 row)
+
+SELECT to_tsquery('english', '1 <-> (a <-> 2)');
+ to_tsquery
+-------------
+ '1' <-> '2'
+(1 row)
+
+SELECT to_tsquery('english', '1 <-> (2 <-> a)');
+ to_tsquery
+-------------
+ '1' <-> '2'
+(1 row)
+
+SELECT to_tsquery('english', '(1 <-> 2) <3> a');
+ to_tsquery
+-------------
+ '1' <-> '2'
+(1 row)
+
+SELECT to_tsquery('english', '(1 <-> a) <3> 2');
+ to_tsquery
+-------------
+ '1' <4> '2'
+(1 row)
+
+SELECT to_tsquery('english', '(a <-> 1) <3> 2');
+ to_tsquery
+-------------
+ '1' <3> '2'
+(1 row)
+
+SELECT to_tsquery('english', 'a <3> (1 <-> 2)');
+ to_tsquery
+-------------
+ '1' <-> '2'
+(1 row)
+
+SELECT to_tsquery('english', '1 <3> (a <-> 2)');
+ to_tsquery
+-------------
+ '1' <3> '2'
+(1 row)
+
+SELECT to_tsquery('english', '1 <3> (2 <-> a)');
+ to_tsquery
+-------------
+ '1' <3> '2'
+(1 row)
+
+SELECT to_tsquery('english', '(1 <3> 2) <-> a');
+ to_tsquery
+-------------
+ '1' <3> '2'
+(1 row)
+
+SELECT to_tsquery('english', '(1 <3> a) <-> 2');
+ to_tsquery
+-------------
+ '1' <4> '2'
+(1 row)
+
+SELECT to_tsquery('english', '(a <3> 1) <-> 2');
+ to_tsquery
+-------------
+ '1' <-> '2'
+(1 row)
+
+SELECT to_tsquery('english', 'a <-> (1 <3> 2)');
+ to_tsquery
+-------------
+ '1' <3> '2'
+(1 row)
+
+SELECT to_tsquery('english', '1 <-> (a <3> 2)');
+ to_tsquery
+-------------
+ '1' <-> '2'
+(1 row)
+
+SELECT to_tsquery('english', '1 <-> (2 <3> a)');
+ to_tsquery
+-------------
+ '1' <-> '2'
+(1 row)
+
+SELECT to_tsquery('english', '((a <-> 1) <-> 2) <-> s');
+ to_tsquery
+-------------
+ '1' <-> '2'
+(1 row)
+
+SELECT to_tsquery('english', '(2 <-> (a <-> 1)) <-> s');
+ to_tsquery
+-------------
+ '2' <-> '1'
+(1 row)
+
+SELECT to_tsquery('english', '((1 <-> a) <-> 2) <-> s');
+ to_tsquery
+-------------
+ '1' <2> '2'
+(1 row)
+
+SELECT to_tsquery('english', '(2 <-> (1 <-> a)) <-> s');
+ to_tsquery
+-------------
+ '2' <-> '1'
+(1 row)
+
+SELECT to_tsquery('english', 's <-> ((a <-> 1) <-> 2)');
+ to_tsquery
+-------------
+ '1' <-> '2'
+(1 row)
+
+SELECT to_tsquery('english', 's <-> (2 <-> (a <-> 1))');
+ to_tsquery
+-------------
+ '2' <-> '1'
+(1 row)
+
+SELECT to_tsquery('english', 's <-> ((1 <-> a) <-> 2)');
+ to_tsquery
+-------------
+ '1' <2> '2'
+(1 row)
+
+SELECT to_tsquery('english', 's <-> (2 <-> (1 <-> a))');
+ to_tsquery
+-------------
+ '2' <-> '1'
+(1 row)
+
+SELECT to_tsquery('english', '((a <-> 1) <-> s) <-> 2');
+ to_tsquery
+-------------
+ '1' <2> '2'
+(1 row)
+
+SELECT to_tsquery('english', '(s <-> (a <-> 1)) <-> 2');
+ to_tsquery
+-------------
+ '1' <-> '2'
+(1 row)
+
+SELECT to_tsquery('english', '((1 <-> a) <-> s) <-> 2');
+ to_tsquery
+-------------
+ '1' <3> '2'
+(1 row)
+
+SELECT to_tsquery('english', '(s <-> (1 <-> a)) <-> 2');
+ to_tsquery
+-------------
+ '1' <2> '2'
+(1 row)
+
+SELECT to_tsquery('english', '2 <-> ((a <-> 1) <-> s)');
+ to_tsquery
+-------------
+ '2' <-> '1'
+(1 row)
+
+SELECT to_tsquery('english', '2 <-> (s <-> (a <-> 1))');
+ to_tsquery
+-------------
+ '2' <-> '1'
+(1 row)
+
+SELECT to_tsquery('english', '2 <-> ((1 <-> a) <-> s)');
+ to_tsquery
+-------------
+ '2' <-> '1'
+(1 row)
+
+SELECT to_tsquery('english', '2 <-> (s <-> (1 <-> a))');
+ to_tsquery
+-------------
+ '2' <-> '1'
+(1 row)
+
+SELECT to_tsquery('foo <-> (a <-> (the <-> bar))');
+ to_tsquery
+-----------------
+ 'foo' <-> 'bar'
+(1 row)
+
+SELECT to_tsquery('((foo <-> a) <-> the) <-> bar');
+ to_tsquery
+-----------------
+ 'foo' <3> 'bar'
+(1 row)
+
+SELECT to_tsquery('foo <-> a <-> the <-> bar');
+ to_tsquery
+-----------------
+ 'foo' <3> 'bar'
+(1 row)
+
+SELECT phraseto_tsquery('PostgreSQL can be extended by the user in many ways');
+ phraseto_tsquery
+-----------------------------------------------------------------------
+ ( ( ( 'postgresql' <3> 'extend' ) <3> 'user' ) <2> 'mani' ) <-> 'way'
+(1 row)
+
SELECT ts_rank_cd(to_tsvector('english', '
Day after day, day after day,
We stuck, nor breath nor motion,
@@ -602,6 +831,22 @@ S. T. Coleridge (1772-1834)
0.1
(1 row)
+SELECT ts_rank_cd(to_tsvector('english', '
+Day after day, day after day,
+ We stuck, nor breath nor motion,
+As idle as a painted Ship
+ Upon a painted Ocean.
+Water, water, every where
+ And all the boards did shrink;
+Water, water, every where,
+ Nor any drop to drink.
+S. T. Coleridge (1772-1834)
+'), to_tsquery('english', 'painted <-> Ship'));
+ ts_rank_cd
+------------
+ 0.1
+(1 row)
+
SELECT ts_rank_cd(strip(to_tsvector('both stripped')),
to_tsquery('both & stripped'));
ts_rank_cd
@@ -675,6 +920,44 @@ S. T. Coleridge (1772-1834)
(1 row)
SELECT ts_headline('english', '
+Day after day, day after day,
+ We stuck, nor breath nor motion,
+As idle as a painted Ship
+ Upon a painted Ocean.
+Water, water, every where
+ And all the boards did shrink;
+Water, water, every where,
+ Nor any drop to drink.
+S. T. Coleridge (1772-1834)
+', phraseto_tsquery('english', 'painted Ocean'));
+ ts_headline
+----------------------------------
+ <b>painted</b> <b>Ocean</b>. +
+ Water, water, every where +
+ And all the boards did shrink;+
+ Water, water, every
+(1 row)
+
+SELECT ts_headline('english', '
+Day after day, day after day,
+ We stuck, nor breath nor motion,
+As idle as a painted Ship
+ Upon a painted Ocean.
+Water, water, every where
+ And all the boards did shrink;
+Water, water, every where,
+ Nor any drop to drink.
+S. T. Coleridge (1772-1834)
+', phraseto_tsquery('english', 'idle as a painted Ship'));
+ ts_headline
+---------------------------------------------
+ <b>idle</b> as a <b>painted</b> <b>Ship</b>+
+ Upon a <b>painted</b> Ocean. +
+ Water, water, every where +
+ And all the boards
+(1 row)
+
+SELECT ts_headline('english', '
<html>
<!-- some comment -->
<body>
@@ -703,6 +986,24 @@ to_tsquery('english', 'sea&foo'), 'HighlightAll=true');
</html>
(1 row)
+SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 <-> 3', 'MaxWords=2, MinWords=1');
+ ts_headline
+-------------------
+ <b>1</b> <b>3</b>
+(1 row)
+
+SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 & 3', 'MaxWords=4, MinWords=1');
+ ts_headline
+------------------------------
+ <b>1</b> 2 <b>3</b> <b>1</b>
+(1 row)
+
+SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 <-> 3', 'MaxWords=4, MinWords=1');
+ ts_headline
+-------------------
+ <b>1</b> <b>3</b>
+(1 row)
+
--Check if headline fragments work
SELECT ts_headline('english', '
Day after day, day after day,
@@ -805,13 +1106,13 @@ UPDATE test_tsquery SET sample = to_tsquery('english', txtsample::text);
SELECT COUNT(*) FROM test_tsquery WHERE keyword < 'new & york';
count
-------
- 1
+ 2
(1 row)
SELECT COUNT(*) FROM test_tsquery WHERE keyword <= 'new & york';
count
-------
- 2
+ 3
(1 row)
SELECT COUNT(*) FROM test_tsquery WHERE keyword = 'new & york';
@@ -823,13 +1124,13 @@ SELECT COUNT(*) FROM test_tsquery WHERE keyword = 'new & york';
SELECT COUNT(*) FROM test_tsquery WHERE keyword >= 'new & york';
count
-------
- 3
+ 4
(1 row)
SELECT COUNT(*) FROM test_tsquery WHERE keyword > 'new & york';
count
-------
- 2
+ 3
(1 row)
CREATE UNIQUE INDEX bt_tsq ON test_tsquery (keyword);
@@ -837,13 +1138,13 @@ SET enable_seqscan=OFF;
SELECT COUNT(*) FROM test_tsquery WHERE keyword < 'new & york';
count
-------
- 1
+ 2
(1 row)
SELECT COUNT(*) FROM test_tsquery WHERE keyword <= 'new & york';
count
-------
- 2
+ 3
(1 row)
SELECT COUNT(*) FROM test_tsquery WHERE keyword = 'new & york';
@@ -855,20 +1156,20 @@ SELECT COUNT(*) FROM test_tsquery WHERE keyword = 'new & york';
SELECT COUNT(*) FROM test_tsquery WHERE keyword >= 'new & york';
count
-------
- 3
+ 4
(1 row)
SELECT COUNT(*) FROM test_tsquery WHERE keyword > 'new & york';
count
-------
- 2
+ 3
(1 row)
RESET enable_seqscan;
SELECT ts_rewrite('foo & bar & qq & new & york', 'new & york'::tsquery, 'big & apple | nyc | new & york & city');
- ts_rewrite
-----------------------------------------------------------------------------------
- 'foo' & 'bar' & 'qq' & ( 'city' & 'new' & 'york' | ( 'nyc' | 'big' & 'apple' ) )
+ ts_rewrite
+------------------------------------------------------------------------------
+ 'foo' & 'bar' & 'qq' & ( 'nyc' | 'big' & 'apple' | 'city' & 'new' & 'york' )
(1 row)
SELECT ts_rewrite('moscow', 'SELECT keyword, sample FROM test_tsquery'::text );
@@ -884,9 +1185,9 @@ SELECT ts_rewrite('moscow & hotel', 'SELECT keyword, sample FROM test_tsquery'::
(1 row)
SELECT ts_rewrite('bar & new & qq & foo & york', 'SELECT keyword, sample FROM test_tsquery'::text );
- ts_rewrite
--------------------------------------------------------------------------------------
- 'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
+ ts_rewrite
+---------------------------------------------------------------------------------
+ ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
(1 row)
SELECT ts_rewrite( 'moscow', 'SELECT keyword, sample FROM test_tsquery');
@@ -902,9 +1203,33 @@ SELECT ts_rewrite( 'moscow & hotel', 'SELECT keyword, sample FROM test_tsquery')
(1 row)
SELECT ts_rewrite( 'bar & new & qq & foo & york', 'SELECT keyword, sample FROM test_tsquery');
- ts_rewrite
--------------------------------------------------------------------------------------
- 'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
+ ts_rewrite
+---------------------------------------------------------------------------------
+ ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
+(1 row)
+
+SELECT ts_rewrite('1 & (2 <-> 3)', 'SELECT keyword, sample FROM test_tsquery'::text );
+ ts_rewrite
+-------------
+ '2' <-> '4'
+(1 row)
+
+SELECT ts_rewrite('1 & (2 <2> 3)', 'SELECT keyword, sample FROM test_tsquery'::text );
+ ts_rewrite
+-----------------------
+ '1' & ( '2' <2> '3' )
+(1 row)
+
+SELECT ts_rewrite('5 <-> (1 & (2 <-> 3))', 'SELECT keyword, sample FROM test_tsquery'::text );
+ ts_rewrite
+-----------------------------------------------
+ ( '5' <-> '1' ) & ( '5' <-> ( '2' <-> '3' ) )
+(1 row)
+
+SELECT ts_rewrite('5 <-> (6 | 8)', 'SELECT keyword, sample FROM test_tsquery'::text );
+ ts_rewrite
+---------------------------
+ '5' <-> '7' | '5' <-> '8'
(1 row)
SELECT keyword FROM test_tsquery WHERE keyword @> 'new';
@@ -943,9 +1268,9 @@ SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_t
(1 row)
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & new & qq & foo & york') AS query;
- ts_rewrite
--------------------------------------------------------------------------------------
- 'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
+ ts_rewrite
+---------------------------------------------------------------------------------
+ ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
(1 row)
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow') AS query;
@@ -961,9 +1286,9 @@ SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_t
(1 row)
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & new & qq & foo & york') AS query;
- ts_rewrite
--------------------------------------------------------------------------------------
- 'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
+ ts_rewrite
+---------------------------------------------------------------------------------
+ ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
(1 row)
CREATE INDEX qq ON test_tsquery USING gist (keyword tsquery_ops);
@@ -1004,9 +1329,9 @@ SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_t
(1 row)
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & new & qq & foo & york') AS query;
- ts_rewrite
--------------------------------------------------------------------------------------
- 'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
+ ts_rewrite
+---------------------------------------------------------------------------------
+ ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
(1 row)
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow') AS query;
@@ -1022,9 +1347,9 @@ SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_t
(1 row)
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & new & qq & foo & york') AS query;
- ts_rewrite
--------------------------------------------------------------------------------------
- 'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
+ ts_rewrite
+---------------------------------------------------------------------------------
+ ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
(1 row)
RESET enable_seqscan;
@@ -1132,3 +1457,15 @@ select * from pendtest where 'ipi:*'::tsquery @@ ts;
----
(0 rows)
+--check OP_PHRASE on index
+create temp table phrase_index_test(fts tsvector);
+insert into phrase_index_test values('A fat cat has just eaten a rat.');
+create index phrase_index_test_idx on phrase_index_test using gin(fts);
+set enable_seqscan = off;
+select * from phrase_index_test where fts @@ phraseto_tsquery('fat cat');
+ fts
+-------------------------------------------------
+ 'A' 'a' 'cat' 'eaten' 'fat' 'has' 'just' 'rat.'
+(1 row)
+
+set enable_seqscan = on;
diff --git a/src/test/regress/expected/tstypes.out b/src/test/regress/expected/tstypes.out
index a386a46..c904c1c 100644
--- a/src/test/regress/expected/tstypes.out
+++ b/src/test/regress/expected/tstypes.out
@@ -277,15 +277,15 @@ SELECT '(!1|2)&3'::tsquery;
(1 row)
SELECT '1|(2|(4|(5|6)))'::tsquery;
- tsquery
------------------------------------------
- '1' | ( '2' | ( '4' | ( '5' | '6' ) ) )
+ tsquery
+-----------------------------
+ '1' | '2' | '4' | '5' | '6'
(1 row)
SELECT '1|2|4|5|6'::tsquery;
- tsquery
------------------------------------------
- ( ( ( '1' | '2' ) | '4' ) | '5' ) | '6'
+ tsquery
+-----------------------------
+ '1' | '2' | '4' | '5' | '6'
(1 row)
SELECT '1&(2&(4&(5&6)))'::tsquery;
@@ -325,11 +325,139 @@ SELECT $$'\\as'$$::tsquery;
(1 row)
SELECT 'a:* & nbb:*ac | doo:a* | goo'::tsquery;
+ tsquery
+--------------------------------------
+ 'a':* & 'nbb':*AC | 'doo':*A | 'goo'
+(1 row)
+
+-- phrase transformation
+SELECT 'a <-> (b|c)'::tsquery;
+ tsquery
+---------------------------
+ 'a' <-> 'b' | 'a' <-> 'c'
+(1 row)
+
+SELECT '(a|b) <-> c'::tsquery;
+ tsquery
+---------------------------
+ 'a' <-> 'c' | 'b' <-> 'c'
+(1 row)
+
+SELECT '(a|b) <-> (d|c)'::tsquery;
+ tsquery
+-------------------------------------------------------
+ 'a' <-> 'd' | 'b' <-> 'd' | 'a' <-> 'c' | 'b' <-> 'c'
+(1 row)
+
+SELECT 'a <-> (b&c)'::tsquery;
+ tsquery
+-----------------------------------
+ ( 'a' <-> 'b' ) & ( 'a' <-> 'c' )
+(1 row)
+
+SELECT '(a&b) <-> c'::tsquery;
+ tsquery
+-----------------------------------
+ ( 'a' <-> 'c' ) & ( 'b' <-> 'c' )
+(1 row)
+
+SELECT '(a&b) <-> (d&c)'::tsquery;
+ tsquery
+-----------------------------------------------------------------------
+ ( 'a' <-> 'd' ) & ( 'b' <-> 'd' ) & ( 'a' <-> 'c' ) & ( 'b' <-> 'c' )
+(1 row)
+
+SELECT 'a <-> !b'::tsquery;
+ tsquery
+------------------------
+ 'a' & !( 'a' <-> 'b' )
+(1 row)
+
+SELECT '!a <-> b'::tsquery;
+ tsquery
+------------------------
+ !( 'a' <-> 'b' ) & 'b'
+(1 row)
+
+SELECT '!a <-> !b'::tsquery;
+ tsquery
+------------------------------------
+ !'a' & !( !( 'a' <-> 'b' ) & 'b' )
+(1 row)
+
+SELECT 'a <-> !(b&c)'::tsquery;
+ tsquery
+----------------------------------------------
+ 'a' & !( ( 'a' <-> 'b' ) & ( 'a' <-> 'c' ) )
+(1 row)
+
+SELECT 'a <-> !(b|c)'::tsquery;
+ tsquery
+--------------------------------------
+ 'a' & !( 'a' <-> 'b' | 'a' <-> 'c' )
+(1 row)
+
+SELECT '!(a&b) <-> c'::tsquery;
+ tsquery
+----------------------------------------------
+ !( ( 'a' <-> 'c' ) & ( 'b' <-> 'c' ) ) & 'c'
+(1 row)
+
+SELECT '!(a|b) <-> c'::tsquery;
+ tsquery
+--------------------------------------
+ !( 'a' <-> 'c' | 'b' <-> 'c' ) & 'c'
+(1 row)
+
+SELECT '(!a|b) <-> c'::tsquery;
+ tsquery
+--------------------------------------
+ !( 'a' <-> 'c' ) & 'c' | 'b' <-> 'c'
+(1 row)
+
+SELECT '(!a&b) <-> c'::tsquery;
tsquery
------------------------------------------
- ( 'a':* & 'nbb':*AC | 'doo':*A ) | 'goo'
+ !( 'a' <-> 'c' ) & 'c' & ( 'b' <-> 'c' )
+(1 row)
+
+SELECT 'c <-> (!a|b)'::tsquery;
+ tsquery
+--------------------------------------
+ 'c' & !( 'c' <-> 'a' ) | 'c' <-> 'b'
+(1 row)
+
+SELECT 'c <-> (!a&b)'::tsquery;
+ tsquery
+------------------------------------------
+ 'c' & !( 'c' <-> 'a' ) & ( 'c' <-> 'b' )
+(1 row)
+
+SELECT '(a|b) <-> !c'::tsquery;
+ tsquery
+------------------------------------------------
+ ( 'a' | 'b' ) & !( 'a' <-> 'c' | 'b' <-> 'c' )
+(1 row)
+
+SELECT '(a&b) <-> !c'::tsquery;
+ tsquery
+----------------------------------------------------
+ 'a' & 'b' & !( ( 'a' <-> 'c' ) & ( 'b' <-> 'c' ) )
+(1 row)
+
+SELECT '!c <-> (a|b)'::tsquery;
+ tsquery
+-------------------------------------------------
+ !( 'c' <-> 'a' ) & 'a' | !( 'c' <-> 'b' ) & 'b'
+(1 row)
+
+SELECT '!c <-> (a&b)'::tsquery;
+ tsquery
+-------------------------------------------------
+ !( 'c' <-> 'a' ) & 'a' & !( 'c' <-> 'b' ) & 'b'
(1 row)
+--comparisons
SELECT 'a' < 'b & c'::tsquery as "true";
true
------
@@ -342,10 +470,10 @@ SELECT 'a' > 'b & c'::tsquery as "false";
f
(1 row)
-SELECT 'a | f' < 'b & c'::tsquery as "true";
- true
-------
- t
+SELECT 'a | f' < 'b & c'::tsquery as "false";
+ false
+-------
+ f
(1 row)
SELECT 'a | ff' < 'b & c'::tsquery as "false";
@@ -360,6 +488,7 @@ SELECT 'a | f | g' < 'b & c'::tsquery as "false";
f
(1 row)
+--concatenation
SELECT numnode( 'new'::tsquery );
numnode
---------
@@ -402,6 +531,36 @@ SELECT 'foo & bar'::tsquery && 'asd | fg';
'foo' & 'bar' & ( 'asd' | 'fg' )
(1 row)
+SELECT 'a' <-> 'b & d'::tsquery;
+ ?column?
+-----------------------------------
+ ( 'a' <-> 'b' ) & ( 'a' <-> 'd' )
+(1 row)
+
+SELECT 'a & g' <-> 'b & d'::tsquery;
+ ?column?
+-----------------------------------------------------------------------
+ ( 'a' <-> 'b' ) & ( 'g' <-> 'b' ) & ( 'a' <-> 'd' ) & ( 'g' <-> 'd' )
+(1 row)
+
+SELECT 'a & g' <-> 'b | d'::tsquery;
+ ?column?
+-----------------------------------------------------------------------
+ ( 'a' <-> 'b' ) & ( 'g' <-> 'b' ) | ( 'a' <-> 'd' ) & ( 'g' <-> 'd' )
+(1 row)
+
+SELECT 'a & g' <-> 'b <-> d'::tsquery;
+ ?column?
+-----------------------------------------------------------
+ ( 'a' <-> ( 'b' <-> 'd' ) ) & ( 'g' <-> ( 'b' <-> 'd' ) )
+(1 row)
+
+SELECT tsquery_phrase('a <3> g', 'b & d', 10);
+ tsquery_phrase
+-------------------------------------------------------------
+ ( ( 'a' <3> 'g' ) <10> 'b' ) & ( ( 'a' <3> 'g' ) <10> 'd' )
+(1 row)
+
-- tsvector-tsquery operations
SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca' as "true";
true
@@ -499,6 +658,80 @@ SELECT 'supeznova supernova'::tsvector @@ 'super:*'::tsquery AS "true";
t
(1 row)
+--phrase search
+SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 2' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 2' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 3' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 3' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT to_tsvector('simple', '1 2 11 3') @@ '1 <-> 3' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT to_tsvector('simple', '1 2 11 3') @@ '1:* <-> 3' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <-> 2 <-> 3' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT to_tsvector('simple', '1 2 3 4') @@ '(1 <-> 2) <-> 3' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <-> (2 <-> 3)' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <2> (2 <-> 3)' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '(1 <-> 2) <-> 3' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '1 <-> 2 <-> 3' AS "true";
+ true
+------
+ t
+(1 row)
+
+--ranking
SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s');
ts_rank
-----------
@@ -613,6 +846,120 @@ SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a & s');
0.1
(1 row)
+SELECT ts_rank_cd(' a:1 s:2A d g'::tsvector, 'a <-> s');
+ ts_rank_cd
+------------
+ 0.181818
+(1 row)
+
+SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a <-> s');
+ ts_rank_cd
+------------
+ 0.133333
+(1 row)
+
+SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a <-> s');
+ ts_rank_cd
+------------
+ 0.1
+(1 row)
+
+SELECT ts_rank_cd(' a:1 s:2 d:2A g'::tsvector, 'a <-> s');
+ ts_rank_cd
+------------
+ 0.1
+(1 row)
+
+SELECT ts_rank_cd(' a:1 s:2,3A d:2A g'::tsvector, 'a <2> s:A');
+ ts_rank_cd
+------------
+ 0.0909091
+(1 row)
+
+SELECT ts_rank_cd(' a:1 b:2 s:3A d:2A g'::tsvector, 'a <2> s:A');
+ ts_rank_cd
+------------
+ 0.0909091
+(1 row)
+
+SELECT ts_rank_cd(' a:1 sa:2D sb:2A g'::tsvector, 'a <-> s:*');
+ ts_rank_cd
+------------
+ 0.1
+(1 row)
+
+SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:*');
+ ts_rank_cd
+------------
+ 0.1
+(1 row)
+
+SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:A');
+ ts_rank_cd
+------------
+ 0.0714286
+(1 row)
+
+SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:B');
+ ts_rank_cd
+------------
+ 0
+(1 row)
+
+SELECT 'a:1 b:2'::tsvector @@ 'a <-> b'::tsquery AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'a:1 b:2'::tsvector @@ 'a <0> b'::tsquery AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'a:1 b:2'::tsvector @@ 'a <1> b'::tsquery AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'a:1 b:2'::tsvector @@ 'a <2> b'::tsquery AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'a:1 b:3'::tsvector @@ 'a <-> b'::tsquery AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'a:1 b:3'::tsvector @@ 'a <0> b'::tsquery AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'a:1 b:3'::tsvector @@ 'a <1> b'::tsquery AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'a:1 b:3'::tsvector @@ 'a <2> b'::tsquery AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'a:1 b:3'::tsvector @@ 'a <3> b'::tsquery AS "true";
+ true
+------
+ t
+(1 row)
+
-- tsvector editing operations
SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
strip
diff --git a/src/test/regress/sql/tsdicts.sql b/src/test/regress/sql/tsdicts.sql
index d13ce2e..4d0419e 100644
--- a/src/test/regress/sql/tsdicts.sql
+++ b/src/test/regress/sql/tsdicts.sql
@@ -142,6 +142,9 @@ SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footb
SELECT to_tsquery('hunspell_tst', 'footballklubber');
SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
+SELECT to_tsquery('hunspell_tst', 'footballyklubber:b <-> sky');
+SELECT phraseto_tsquery('hunspell_tst', 'footballyklubber sky');
+
-- Test ispell dictionary with hunspell affix with FLAG long in configuration
ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING
REPLACE hunspell WITH hunspell_long;
diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql
index 405278f..ccd1525 100644
--- a/src/test/regress/sql/tsearch.sql
+++ b/src/test/regress/sql/tsearch.sql
@@ -129,6 +129,52 @@ SELECT plainto_tsquery('english', 'foo bar') || plainto_tsquery('english', 'asd
SELECT plainto_tsquery('english', 'foo bar') || !!plainto_tsquery('english', 'asd fg');
SELECT plainto_tsquery('english', 'foo bar') && 'asd | fg';
+-- Check stop word deletion, a and s are stop-words
+SELECT to_tsquery('english', '(1 <-> 2) <-> a');
+SELECT to_tsquery('english', '(1 <-> a) <-> 2');
+SELECT to_tsquery('english', '(a <-> 1) <-> 2');
+SELECT to_tsquery('english', 'a <-> (1 <-> 2)');
+SELECT to_tsquery('english', '1 <-> (a <-> 2)');
+SELECT to_tsquery('english', '1 <-> (2 <-> a)');
+
+SELECT to_tsquery('english', '(1 <-> 2) <3> a');
+SELECT to_tsquery('english', '(1 <-> a) <3> 2');
+SELECT to_tsquery('english', '(a <-> 1) <3> 2');
+SELECT to_tsquery('english', 'a <3> (1 <-> 2)');
+SELECT to_tsquery('english', '1 <3> (a <-> 2)');
+SELECT to_tsquery('english', '1 <3> (2 <-> a)');
+
+SELECT to_tsquery('english', '(1 <3> 2) <-> a');
+SELECT to_tsquery('english', '(1 <3> a) <-> 2');
+SELECT to_tsquery('english', '(a <3> 1) <-> 2');
+SELECT to_tsquery('english', 'a <-> (1 <3> 2)');
+SELECT to_tsquery('english', '1 <-> (a <3> 2)');
+SELECT to_tsquery('english', '1 <-> (2 <3> a)');
+
+SELECT to_tsquery('english', '((a <-> 1) <-> 2) <-> s');
+SELECT to_tsquery('english', '(2 <-> (a <-> 1)) <-> s');
+SELECT to_tsquery('english', '((1 <-> a) <-> 2) <-> s');
+SELECT to_tsquery('english', '(2 <-> (1 <-> a)) <-> s');
+SELECT to_tsquery('english', 's <-> ((a <-> 1) <-> 2)');
+SELECT to_tsquery('english', 's <-> (2 <-> (a <-> 1))');
+SELECT to_tsquery('english', 's <-> ((1 <-> a) <-> 2)');
+SELECT to_tsquery('english', 's <-> (2 <-> (1 <-> a))');
+
+SELECT to_tsquery('english', '((a <-> 1) <-> s) <-> 2');
+SELECT to_tsquery('english', '(s <-> (a <-> 1)) <-> 2');
+SELECT to_tsquery('english', '((1 <-> a) <-> s) <-> 2');
+SELECT to_tsquery('english', '(s <-> (1 <-> a)) <-> 2');
+SELECT to_tsquery('english', '2 <-> ((a <-> 1) <-> s)');
+SELECT to_tsquery('english', '2 <-> (s <-> (a <-> 1))');
+SELECT to_tsquery('english', '2 <-> ((1 <-> a) <-> s)');
+SELECT to_tsquery('english', '2 <-> (s <-> (1 <-> a))');
+
+SELECT to_tsquery('foo <-> (a <-> (the <-> bar))');
+SELECT to_tsquery('((foo <-> a) <-> the) <-> bar');
+SELECT to_tsquery('foo <-> a <-> the <-> bar');
+SELECT phraseto_tsquery('PostgreSQL can be extended by the user in many ways');
+
+
SELECT ts_rank_cd(to_tsvector('english', '
Day after day, day after day,
We stuck, nor breath nor motion,
@@ -165,6 +211,18 @@ Water, water, every where,
S. T. Coleridge (1772-1834)
'), to_tsquery('english', 'ocean'));
+SELECT ts_rank_cd(to_tsvector('english', '
+Day after day, day after day,
+ We stuck, nor breath nor motion,
+As idle as a painted Ship
+ Upon a painted Ocean.
+Water, water, every where
+ And all the boards did shrink;
+Water, water, every where,
+ Nor any drop to drink.
+S. T. Coleridge (1772-1834)
+'), to_tsquery('english', 'painted <-> Ship'));
+
SELECT ts_rank_cd(strip(to_tsvector('both stripped')),
to_tsquery('both & stripped'));
@@ -209,6 +267,30 @@ S. T. Coleridge (1772-1834)
', to_tsquery('english', 'ocean'));
SELECT ts_headline('english', '
+Day after day, day after day,
+ We stuck, nor breath nor motion,
+As idle as a painted Ship
+ Upon a painted Ocean.
+Water, water, every where
+ And all the boards did shrink;
+Water, water, every where,
+ Nor any drop to drink.
+S. T. Coleridge (1772-1834)
+', phraseto_tsquery('english', 'painted Ocean'));
+
+SELECT ts_headline('english', '
+Day after day, day after day,
+ We stuck, nor breath nor motion,
+As idle as a painted Ship
+ Upon a painted Ocean.
+Water, water, every where
+ And all the boards did shrink;
+Water, water, every where,
+ Nor any drop to drink.
+S. T. Coleridge (1772-1834)
+', phraseto_tsquery('english', 'idle as a painted Ship'));
+
+SELECT ts_headline('english', '
<html>
<!-- some comment -->
<body>
@@ -222,6 +304,10 @@ ff-bg
</html>',
to_tsquery('english', 'sea&foo'), 'HighlightAll=true');
+SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 <-> 3', 'MaxWords=2, MinWords=1');
+SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 & 3', 'MaxWords=4, MinWords=1');
+SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 <-> 3', 'MaxWords=4, MinWords=1');
+
--Check if headline fragments work
SELECT ts_headline('english', '
Day after day, day after day,
@@ -283,6 +369,8 @@ CREATE TABLE test_tsquery (txtkeyword TEXT, txtsample TEXT);
Moscow moskva | moscow
'Sanct Peter' Peterburg | peter | 'Sanct Peterburg'
'foo bar qq' foo & (bar | qq) & city
+1 & (2 <-> 3) 2 <-> 4
+5 <-> 6 5 <-> 7
\.
\set ECHO all
@@ -320,6 +408,11 @@ SELECT ts_rewrite( 'moscow', 'SELECT keyword, sample FROM test_tsquery');
SELECT ts_rewrite( 'moscow & hotel', 'SELECT keyword, sample FROM test_tsquery');
SELECT ts_rewrite( 'bar & new & qq & foo & york', 'SELECT keyword, sample FROM test_tsquery');
+SELECT ts_rewrite('1 & (2 <-> 3)', 'SELECT keyword, sample FROM test_tsquery'::text );
+SELECT ts_rewrite('1 & (2 <2> 3)', 'SELECT keyword, sample FROM test_tsquery'::text );
+SELECT ts_rewrite('5 <-> (1 & (2 <-> 3))', 'SELECT keyword, sample FROM test_tsquery'::text );
+SELECT ts_rewrite('5 <-> (6 | 8)', 'SELECT keyword, sample FROM test_tsquery'::text );
+
SELECT keyword FROM test_tsquery WHERE keyword @> 'new';
SELECT keyword FROM test_tsquery WHERE keyword @> 'moscow';
@@ -386,3 +479,11 @@ select * from pendtest where 'ipsa:*'::tsquery @@ ts;
select * from pendtest where 'ips:*'::tsquery @@ ts;
select * from pendtest where 'ipt:*'::tsquery @@ ts;
select * from pendtest where 'ipi:*'::tsquery @@ ts;
+
+--check OP_PHRASE on index
+create temp table phrase_index_test(fts tsvector);
+insert into phrase_index_test values('A fat cat has just eaten a rat.');
+create index phrase_index_test_idx on phrase_index_test using gin(fts);
+set enable_seqscan = off;
+select * from phrase_index_test where fts @@ phraseto_tsquery('fat cat');
+set enable_seqscan = on;
diff --git a/src/test/regress/sql/tstypes.sql b/src/test/regress/sql/tstypes.sql
index 38b7f65..ecc71c8 100644
--- a/src/test/regress/sql/tstypes.sql
+++ b/src/test/regress/sql/tstypes.sql
@@ -58,12 +58,42 @@ SELECT E'1&(''2''&('' 4''&(\\|5 | ''6 \\'' !|&'')))'::tsquery;
SELECT $$'\\as'$$::tsquery;
SELECT 'a:* & nbb:*ac | doo:a* | goo'::tsquery;
+-- phrase transformation
+SELECT 'a <-> (b|c)'::tsquery;
+SELECT '(a|b) <-> c'::tsquery;
+SELECT '(a|b) <-> (d|c)'::tsquery;
+
+SELECT 'a <-> (b&c)'::tsquery;
+SELECT '(a&b) <-> c'::tsquery;
+SELECT '(a&b) <-> (d&c)'::tsquery;
+
+SELECT 'a <-> !b'::tsquery;
+SELECT '!a <-> b'::tsquery;
+SELECT '!a <-> !b'::tsquery;
+
+SELECT 'a <-> !(b&c)'::tsquery;
+SELECT 'a <-> !(b|c)'::tsquery;
+SELECT '!(a&b) <-> c'::tsquery;
+SELECT '!(a|b) <-> c'::tsquery;
+
+SELECT '(!a|b) <-> c'::tsquery;
+SELECT '(!a&b) <-> c'::tsquery;
+SELECT 'c <-> (!a|b)'::tsquery;
+SELECT 'c <-> (!a&b)'::tsquery;
+
+SELECT '(a|b) <-> !c'::tsquery;
+SELECT '(a&b) <-> !c'::tsquery;
+SELECT '!c <-> (a|b)'::tsquery;
+SELECT '!c <-> (a&b)'::tsquery;
+
+--comparisons
SELECT 'a' < 'b & c'::tsquery as "true";
SELECT 'a' > 'b & c'::tsquery as "false";
-SELECT 'a | f' < 'b & c'::tsquery as "true";
+SELECT 'a | f' < 'b & c'::tsquery as "false";
SELECT 'a | ff' < 'b & c'::tsquery as "false";
SELECT 'a | f | g' < 'b & c'::tsquery as "false";
+--concatenation
SELECT numnode( 'new'::tsquery );
SELECT numnode( 'new & york'::tsquery );
SELECT numnode( 'new & york | qwery'::tsquery );
@@ -72,6 +102,11 @@ SELECT 'foo & bar'::tsquery && 'asd';
SELECT 'foo & bar'::tsquery || 'asd & fg';
SELECT 'foo & bar'::tsquery || !!'asd & fg'::tsquery;
SELECT 'foo & bar'::tsquery && 'asd | fg';
+SELECT 'a' <-> 'b & d'::tsquery;
+SELECT 'a & g' <-> 'b & d'::tsquery;
+SELECT 'a & g' <-> 'b | d'::tsquery;
+SELECT 'a & g' <-> 'b <-> d'::tsquery;
+SELECT tsquery_phrase('a <3> g', 'b & d', 10);
-- tsvector-tsquery operations
@@ -93,6 +128,23 @@ SELECT 'supernova'::tsvector @@ 'super:*'::tsquery AS "true";
SELECT 'supeanova supernova'::tsvector @@ 'super:*'::tsquery AS "true";
SELECT 'supeznova supernova'::tsvector @@ 'super:*'::tsquery AS "true";
+--phrase search
+SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 2' AS "true";
+SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 2' AS "true";
+SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 3' AS "false";
+SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 3' AS "true";
+
+SELECT to_tsvector('simple', '1 2 11 3') @@ '1 <-> 3' AS "false";
+SELECT to_tsvector('simple', '1 2 11 3') @@ '1:* <-> 3' AS "true";
+
+SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <-> 2 <-> 3' AS "true";
+SELECT to_tsvector('simple', '1 2 3 4') @@ '(1 <-> 2) <-> 3' AS "true";
+SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <-> (2 <-> 3)' AS "false";
+SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <2> (2 <-> 3)' AS "true";
+SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '(1 <-> 2) <-> 3' AS "true";
+SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '1 <-> 2 <-> 3' AS "true";
+
+--ranking
SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s');
SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | s');
SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | s:*');
@@ -114,6 +166,27 @@ SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a & s');
SELECT ts_rank_cd(' a:1 s:2B d g'::tsvector, 'a & s');
SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a & s');
+SELECT ts_rank_cd(' a:1 s:2A d g'::tsvector, 'a <-> s');
+SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a <-> s');
+SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a <-> s');
+SELECT ts_rank_cd(' a:1 s:2 d:2A g'::tsvector, 'a <-> s');
+SELECT ts_rank_cd(' a:1 s:2,3A d:2A g'::tsvector, 'a <2> s:A');
+SELECT ts_rank_cd(' a:1 b:2 s:3A d:2A g'::tsvector, 'a <2> s:A');
+SELECT ts_rank_cd(' a:1 sa:2D sb:2A g'::tsvector, 'a <-> s:*');
+SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:*');
+SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:A');
+SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:B');
+
+SELECT 'a:1 b:2'::tsvector @@ 'a <-> b'::tsquery AS "true";
+SELECT 'a:1 b:2'::tsvector @@ 'a <0> b'::tsquery AS "false";
+SELECT 'a:1 b:2'::tsvector @@ 'a <1> b'::tsquery AS "true";
+SELECT 'a:1 b:2'::tsvector @@ 'a <2> b'::tsquery AS "true";
+SELECT 'a:1 b:3'::tsvector @@ 'a <-> b'::tsquery AS "false";
+SELECT 'a:1 b:3'::tsvector @@ 'a <0> b'::tsquery AS "false";
+SELECT 'a:1 b:3'::tsvector @@ 'a <1> b'::tsquery AS "false";
+SELECT 'a:1 b:3'::tsvector @@ 'a <2> b'::tsquery AS "true";
+SELECT 'a:1 b:3'::tsvector @@ 'a <3> b'::tsquery AS "true";
+
-- tsvector editing operations
SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers