Hi, Artur I've made an attempt to fix some of the issues you've listed, although there's still much work to be done. I'll add some comments later.
> This function has the duplicated piece from the tsvector_setweight() > from tsvector_op.c. You can define new function for it. I'm not sure it's worth the trouble. IMO these functions are relatively small and we won't benefit from extracting the duplicate code. > These functions was removed in 9acb9007de30b3daaa9efc16763c3bc6e3e0a92d. > It seems that tsvector_op.c was not synchronized with the master. Got it, thanks! -- Dmitry Ivanov Postgres Professional: http://www.postgrespro.com Russian Postgres Company
diff --git a/contrib/tsearch2/expected/tsearch2.out b/contrib/tsearch2/expected/tsearch2.out index 972f764..97379e7 100644 --- a/contrib/tsearch2/expected/tsearch2.out +++ b/contrib/tsearch2/expected/tsearch2.out @@ -278,15 +278,15 @@ SELECT '(!1|2)&3'::tsquery; (1 row) SELECT '1|(2|(4|(5|6)))'::tsquery; - tsquery ------------------------------------------ - '1' | ( '2' | ( '4' | ( '5' | '6' ) ) ) + tsquery +----------------------------- + '1' | '2' | '4' | '5' | '6' (1 row) SELECT '1|2|4|5|6'::tsquery; - tsquery ------------------------------------------ - ( ( ( '1' | '2' ) | '4' ) | '5' ) | '6' + tsquery +----------------------------- + '1' | '2' | '4' | '5' | '6' (1 row) SELECT '1&(2&(4&(5&6)))'::tsquery; @@ -340,7 +340,7 @@ select 'a' > 'b & c'::tsquery; select 'a | f' < 'b & c'::tsquery; ?column? ---------- - t + f (1 row) select 'a | ff' < 'b & c'::tsquery; @@ -443,9 +443,9 @@ select count(*) from test_tsquery where keyword > 'new & york'; set enable_seqscan=on; select rewrite('foo & bar & qq & new & york', 'new & york'::tsquery, 'big & apple | nyc | new & york & city'); - rewrite ----------------------------------------------------------------------------------- - 'foo' & 'bar' & 'qq' & ( 'city' & 'new' & 'york' | ( 'nyc' | 'big' & 'apple' ) ) + rewrite +------------------------------------------------------------------------------ + 'foo' & 'bar' & 'qq' & ( 'nyc' | 'big' & 'apple' | 'city' & 'new' & 'york' ) (1 row) select rewrite('moscow', 'select keyword, sample from test_tsquery'::text ); @@ -461,9 +461,9 @@ select rewrite('moscow & hotel', 'select keyword, sample from test_tsquery'::tex (1 row) select rewrite('bar & new & qq & foo & york', 'select keyword, sample from test_tsquery'::text ); - rewrite -------------------------------------------------------------------------------------- - 'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) ) + rewrite +--------------------------------------------------------------------------------- + ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' ) (1 row) select rewrite( ARRAY['moscow', keyword, sample] ) from test_tsquery; @@ -479,9 +479,9 @@ select rewrite( ARRAY['moscow & hotel', keyword, sample] ) from test_tsquery; (1 row) select rewrite( ARRAY['bar & new & qq & foo & york', keyword, sample] ) from test_tsquery; - rewrite -------------------------------------------------------------------------------------- - 'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) ) + rewrite +--------------------------------------------------------------------------------- + ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' ) (1 row) select keyword from test_tsquery where keyword @> 'new'; @@ -520,9 +520,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e (1 row) select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where keyword <@ query; - rewrite -------------------------------------------------------------------------------------- - 'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) ) + rewrite +--------------------------------------------------------------------------------- + ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' ) (1 row) select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'moscow') as query where query @> keyword; @@ -538,9 +538,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e (1 row) select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where query @> keyword; - rewrite -------------------------------------------------------------------------------------- - 'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) ) + rewrite +--------------------------------------------------------------------------------- + ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' ) (1 row) create index qq on test_tsquery using gist (keyword gist_tp_tsquery_ops); @@ -581,9 +581,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e (1 row) select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where keyword <@ query; - rewrite -------------------------------------------------------------------------------------- - 'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) ) + rewrite +--------------------------------------------------------------------------------- + ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' ) (1 row) select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'moscow') as query where query @> keyword; @@ -599,9 +599,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e (1 row) select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where query @> keyword; - rewrite -------------------------------------------------------------------------------------- - 'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) ) + rewrite +--------------------------------------------------------------------------------- + ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' ) (1 row) set enable_seqscan='on'; diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml index 7c3ef92..8568567 100644 --- a/doc/src/sgml/datatype.sgml +++ b/doc/src/sgml/datatype.sgml @@ -3924,8 +3924,9 @@ SELECT to_tsvector('english', 'The Fat Rats'); <para> A <type>tsquery</type> value stores lexemes that are to be searched for, and combines them honoring the Boolean operators - <literal>&</literal> (AND), <literal>|</literal> (OR), and - <literal>!</> (NOT). Parentheses can be used to enforce grouping + <literal>&</literal> (AND), <literal>|</literal> (OR), + <literal>!</> (NOT) and <literal>?</> (FOLLOWED BY) phrase search + operator. Parentheses can be used to enforce grouping of the operators: <programlisting> @@ -3946,8 +3947,8 @@ SELECT 'fat & rat & ! cat'::tsquery; </programlisting> In the absence of parentheses, <literal>!</> (NOT) binds most tightly, - and <literal>&</literal> (AND) binds more tightly than - <literal>|</literal> (OR). + and <literal>&</literal> (AND) and <literal>?</literal> (FOLLOWED BY) + both bind more tightly than <literal>|</literal> (OR). </para> <para> diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 000489d..f98e312 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -9103,6 +9103,12 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple <entry><literal>!'cat'</literal></entry> </row> <row> + <entry> <literal>??</literal> </entry> + <entry><type>tsquery</> followed by <type>tsquery</></entry> + <entry><literal>to_tsquery('fat') ?? to_tsquery('rat')</literal></entry> + <entry><literal>'fat' ? 'rat'</literal></entry> + </row> + <row> <entry> <literal>@></literal> </entry> <entry><type>tsquery</> contains another ?</entry> <entry><literal>'cat'::tsquery @> 'cat & rat'::tsquery</literal></entry> @@ -9197,6 +9203,18 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple <row> <entry> <indexterm> + <primary>phraseto_tsquery</primary> + </indexterm> + <literal><function>phraseto_tsquery(<optional> <replaceable class="PARAMETER">config</> <type>regconfig</> , </optional> <replaceable class="PARAMETER">query</> <type>text</type>)</function></literal> + </entry> + <entry><type>tsquery</type></entry> + <entry>produce <type>tsquery</> ignoring punctuation</entry> + <entry><literal>phraseto_tsquery('english', 'The Fat Rats')</literal></entry> + <entry><literal>'fat' ? 'rat'</literal></entry> + </row> + <row> + <entry> + <indexterm> <primary>querytree</primary> </indexterm> <literal><function>querytree(<replaceable class="PARAMETER">query</replaceable> <type>tsquery</>)</function></literal> @@ -9220,6 +9238,15 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple </row> <row> <entry> + <literal><function>setweight(<type>tsquery</>, <type>"char"</>)</function></literal> + </entry> + <entry><type>tsquery</type></entry> + <entry>add weight to each element of <type>tsquery</></entry> + <entry><literal>setweight('fat ? cat & rat:B'::tsquery, 'A')</literal></entry> + <entry><literal>( 'fat':A ? 'cat':A ) & 'rat':AB</literal></entry> + </row> + <row> + <entry> <indexterm> <primary>setweight</primary> <secondary>setweight by filter</secondary> @@ -9399,6 +9426,27 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple <row> <entry> <indexterm> + <primary>tsquery_phrase</primary> + </indexterm> + <literal><function>tsquery_phrase(<replaceable class="PARAMETER">query1</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">query2</replaceable> <type>tsquery</>)</function></literal> + </entry> + <entry><type>tsquery</type></entry> + <entry>implementation of <literal>??</> (FOLLOWED BY) operator</entry> + <entry><literal>tsquery_phrase(to_tsquery('fat'), to_tsquery('cat'))</literal></entry> + <entry><literal>'fat' ? 'cat'</literal></entry> + </row> + <row> + <entry> + <literal><function>tsquery_phrase(<replaceable class="PARAMETER">query1</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">query2</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">distance</replaceable> <type>integer</>)</function></literal> + </entry> + <entry><type>tsquery</type></entry> + <entry>phrase-concatenate with distance</entry> + <entry><literal>tsquery_phrase(to_tsquery('fat'), to_tsquery('cat'), 10)</literal></entry> + <entry><literal>'fat' ?[10] 'cat'</literal></entry> + </row> + <row> + <entry> + <indexterm> <primary>tsvector_update_trigger</primary> </indexterm> <literal><function>tsvector_update_trigger()</function></literal> diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml index ea3abc9..74b17d9 100644 --- a/doc/src/sgml/textsearch.sgml +++ b/doc/src/sgml/textsearch.sgml @@ -263,9 +263,10 @@ SELECT 'fat & cow'::tsquery @@ 'a fat cat sat on a mat and ate a fat rat'::t As the above example suggests, a <type>tsquery</type> is not just raw text, any more than a <type>tsvector</type> is. A <type>tsquery</type> contains search terms, which must be already-normalized lexemes, and - may combine multiple terms using AND, OR, and NOT operators. + may combine multiple terms using AND, OR, NOT and FOLLOWED BY operators. (For details see <xref linkend="datatype-textsearch">.) There are - functions <function>to_tsquery</> and <function>plainto_tsquery</> + functions <function>to_tsquery</>, <function>plainto_tsquery</> + and <function>phraseto_tsquery</> that are helpful in converting user-written text into a proper <type>tsquery</type>, for example by normalizing words appearing in the text. Similarly, <function>to_tsvector</> is used to parse and @@ -294,6 +295,35 @@ SELECT 'fat cats ate fat rats'::tsvector @@ to_tsquery('fat & rat'); </para> <para> + Phrase search is made possible with the help of the <literal>?</> + (FOLLOWED BY) operator, which enforces lexeme order. This allows you + to discard strings not containing the desired phrase, for example: + +<programlisting> +SELECT q @@ to_tsquery('fatal ? error') +FROM unnest(array[to_tsvector('fatal error'), + to_tsvector('error is not fatal')]) AS q; +?column? +---------- +t +f +</programlisting> + + A more generic version of the FOLLOWED BY operator takes form of + <literal>?[N]</>, where N stands for the greatest allowed distance + between the specified lexemes. The <literal>phraseto_tsquery</> + function makes use of this behavior in order to construct a + <literal>tsquery</> capable of matching the provided phrase: + +<programlisting> +SELECT phraseto_tsquery('cat ate some rats'); + phraseto_tsquery +------------------------------ + ( 'cat' ? 'ate' ) ?[2] 'rat' +</programlisting> + </para> + + <para> The <literal>@@</literal> operator also supports <type>text</type> input, allowing explicit conversion of a text string to <type>tsvector</type> or <type>tsquery</> to be skipped @@ -709,11 +739,14 @@ UPDATE tt SET ti = <para> <productname>PostgreSQL</productname> provides the - functions <function>to_tsquery</function> and - <function>plainto_tsquery</function> for converting a query to - the <type>tsquery</type> data type. <function>to_tsquery</function> - offers access to more features than <function>plainto_tsquery</function>, - but is less forgiving about its input. + functions <function>to_tsquery</function>, + <function>plainto_tsquery</function> and + <function>phraseto_tsquery</function> + for converting a query to the <type>tsquery</type> data type. + <function>to_tsquery</function> offers access to more features + than both <function>plainto_tsquery</function> and + <function>phraseto_tsquery</function>, but is less forgiving + about its input. </para> <indexterm> @@ -728,7 +761,8 @@ to_tsquery(<optional> <replaceable class="PARAMETER">config</replaceable> <type> <function>to_tsquery</function> creates a <type>tsquery</> value from <replaceable>querytext</replaceable>, which must consist of single tokens separated by the Boolean operators <literal>&</literal> (AND), - <literal>|</literal> (OR) and <literal>!</literal> (NOT). These operators + <literal>|</literal> (OR), <literal>!</literal> (NOT), and also the + <literal>?</literal> (FOLLOWED BY) phrase search operator. These operators can be grouped using parentheses. In other words, the input to <function>to_tsquery</function> must already follow the general rules for <type>tsquery</> input, as described in <xref @@ -814,8 +848,8 @@ SELECT plainto_tsquery('english', 'The Fat Rats'); </screen> Note that <function>plainto_tsquery</> cannot - recognize Boolean operators, weight labels, or prefix-match labels - in its input: + recognize Boolean and phrase search operators, weight labels, + or prefix-match labels in its input: <screen> SELECT plainto_tsquery('english', 'The Fat & Rats:C'); @@ -827,6 +861,57 @@ SELECT plainto_tsquery('english', 'The Fat & Rats:C'); Here, all the input punctuation was discarded as being space symbols. </para> + <indexterm> + <primary>phraseto_tsquery</primary> + </indexterm> + +<synopsis> +phraseto_tsquery(<optional> <replaceable class="PARAMETER">config</replaceable> <type>regconfig</>, </optional> <replaceable class="PARAMETER">querytext</replaceable> <type>text</>) returns <type>tsquery</> +</synopsis> + + <para> + <function>phraseto_tsquery</> behaves much like + <function>plainto_tsquery</>, with the exception + that it utilizes the <literal>?</literal> (FOLLOWED BY) phrase search + operator instead of the <literal>&</literal> (AND) Boolean operator. + This is particularly useful when searching for exact lexeme sequences, + since the phrase search operator helps to maintain lexeme order. + </para> + + <para> + Example: + +<screen> +SELECT phraseto_tsquery('english', 'The Fat Rats'); + phraseto_tsquery +------------------ + 'fat' ? 'rat' +</screen> + + Just like the <function>plainto_tsquery</>, the + <function>phraseto_tsquery</> function cannot + recognize Boolean and phrase search operators, weight labels, + or prefix-match labels in its input: + +<screen> +SELECT phraseto_tsquery('english', 'The Fat & Rats:C'); + phraseto_tsquery +------------------------- + ( 'fat' ? 'rat' ) ? 'c' +</screen> + + It is possible to specify the configuration to be used to parse the document, + for example, we could create a new one using the hunspell dictionary + (namely 'eng_hunspell') in order to match phrases with different word forms: + +<screen> +SELECT phraseto_tsquery('eng_hunspell', 'developer of the building which collapsed'); + phraseto_tsquery +------------------------------------------------------------------------------------------------ + ( 'developer' ?[3] 'building' ) ?[2] 'collapse' | ( 'developer' ?[3] 'build' ) ?[2] 'collapse' +</screen> + </para> + </sect2> <sect2 id="textsearch-ranking"> @@ -1390,6 +1475,81 @@ FROM (SELECT id, body, q, ts_rank_cd(ti, q) AS rank <varlistentry> <term> + <literal><type>tsquery</> ?? <type>tsquery</></literal> + </term> + + <listitem> + <para> + Returns the phrase-concatenation of the two given queries. + +<screen> +SELECT to_tsquery('fat') ?? to_tsquery('cat | rat'); + ?column? +------------------------------- + 'fat' ? 'cat' | 'fat' ? 'rat' +</screen> + </para> + </listitem> + + </varlistentry> + + <varlistentry> + + <term> + <indexterm> + <primary>tsquery_phrase</primary> + </indexterm> + + <literal>tsquery_phrase(<replaceable class="PARAMETER">query1</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">query2</replaceable> <type>tsquery</> [, <replaceable class="PARAMETER">distance</replaceable> <type>integer</> ]) returns <type>tsquery</></literal> + </term> + + <listitem> + <para> + Returns the distanced phrase-concatenation of the two given queries. + This function lies in the implementation of the <literal>??</> operator. + +<screen> +SELECT tsquery_phrase(to_tsquery('fat'), to_tsquery('cat'), 10); + tsquery_phrase +------------------- + 'fat' ?[10] 'cat' +</screen> + </para> + </listitem> + + </varlistentry> + + <varlistentry> + + <term> + <indexterm> + <primary>setweight</primary> + </indexterm> + + <literal>setweight(<replaceable class="PARAMETER">query</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">weight</replaceable> <type>"char"</>) returns <type>tsquery</></literal> + </term> + + <listitem> + <para> + <function>setweight</> returns a copy of the input query in which every + position has been labeled with the given <replaceable>weight</>(s), either + <literal>A</literal>, <literal>B</literal>, <literal>C</literal>, + <literal>D</literal> or their combination. These labels are retained when + queries are concatenated, allowing words from different parts of a document + to be weighted differently by ranking functions. + </para> + + <para> + Note that weight labels apply to <emphasis>positions</>, not + <emphasis>lexemes</>. If the input query has been stripped of + positions then <function>setweight</> does nothing. + </para> + </listitem> + </varlistentry> + + <varlistentry> + + <term> <indexterm> <primary>numnode</primary> </indexterm> diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c index aa77ec0..7a74411 100644 --- a/src/backend/tsearch/to_tsany.c +++ b/src/backend/tsearch/to_tsany.c @@ -18,6 +18,13 @@ #include "utils/builtins.h" +typedef struct MorphOpaque +{ + Oid cfg_id; + int qoperator; /* query operator */ +} MorphOpaque; + + Datum get_current_ts_config(PG_FUNCTION_ARGS) { @@ -253,7 +260,6 @@ to_tsvector(PG_FUNCTION_ARGS) * to_tsquery */ - /* * This function is used for morph parsing. * @@ -267,28 +273,39 @@ to_tsvector(PG_FUNCTION_ARGS) static void pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix) { - int32 count = 0; - ParsedText prs; - uint32 variant, - pos, - cntvar = 0, - cntpos = 0, - cnt = 0; - Oid cfg_id = DatumGetObjectId(opaque); /* the input is actually - * an Oid, not a pointer */ + int32 count = 0; + ParsedText prs; + uint32 variant, + pos = 0, + cntvar = 0, + cntpos = 0, + cnt = 0; + MorphOpaque *data = (MorphOpaque *) DatumGetPointer(opaque); prs.lenwords = 4; prs.curwords = 0; prs.pos = 0; prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords); - parsetext(cfg_id, &prs, strval, lenval); + parsetext(data->cfg_id, &prs, strval, lenval); if (prs.curwords > 0) { - while (count < prs.curwords) { + if (pos > 0 && pos + 1 < prs.words[count].pos.pos) + { + while (pos + 1 < prs.words[count].pos.pos) + { + /* put placeholders for each stop word */ + pushStop(state); + if (cntpos) + pushOperator(state, data->qoperator, 1); + cntpos++; + pos++; + } + } + pos = prs.words[count].pos.pos; cntvar = 0; while (count < prs.curwords && pos == prs.words[count].pos.pos) @@ -296,31 +313,33 @@ pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, variant = prs.words[count].nvariant; cnt = 0; - while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant) + while (count < prs.curwords && + pos == prs.words[count].pos.pos && + variant == prs.words[count].nvariant) { - - pushValue(state, prs.words[count].word, prs.words[count].len, weight, - ((prs.words[count].flags & TSL_PREFIX) || prefix) ? true : false); + pushValue(state, + prs.words[count].word, + prs.words[count].len, + weight, + ((prs.words[count].flags & TSL_PREFIX) || prefix)); pfree(prs.words[count].word); if (cnt) - pushOperator(state, OP_AND); + pushOperator(state, OP_AND, 0); cnt++; count++; } if (cntvar) - pushOperator(state, OP_OR); + pushOperator(state, OP_OR, 0); cntvar++; } if (cntpos) - pushOperator(state, OP_AND); - + pushOperator(state, data->qoperator, 1); cntpos++; } pfree(prs.words); - } else pushStop(state); @@ -329,44 +348,18 @@ pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, Datum to_tsquery_byid(PG_FUNCTION_ARGS) { - Oid cfgid = PG_GETARG_OID(0); - text *in = PG_GETARG_TEXT_P(1); - TSQuery query; - QueryItem *res; - int32 len; - - query = parse_tsquery(text_to_cstring(in), pushval_morph, ObjectIdGetDatum(cfgid), false); - - if (query->size == 0) - PG_RETURN_TSQUERY(query); - - /* clean out any stopword placeholders from the tree */ - res = clean_fakeval(GETQUERY(query), &len); - if (!res) - { - SET_VARSIZE(query, HDRSIZETQ); - query->size = 0; - PG_RETURN_POINTER(query); - } - memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem)); + text *in = PG_GETARG_TEXT_P(1); + TSQuery query; + MorphOpaque data; - /* - * Removing the stopword placeholders might've resulted in fewer - * QueryItems. If so, move the operands up accordingly. - */ - if (len != query->size) - { - char *oldoperand = GETOPERAND(query); - int32 lenoperand = VARSIZE(query) - (oldoperand - (char *) query); - - Assert(len < query->size); + data.cfg_id = PG_GETARG_OID(0); + data.qoperator = OP_AND; - query->size = len; - memmove((void *) GETOPERAND(query), oldoperand, VARSIZE(query) - (oldoperand - (char *) query)); - SET_VARSIZE(query, COMPUTESIZE(len, lenoperand)); - } + query = parse_tsquery(text_to_cstring(in), + pushval_morph, + PointerGetDatum(&data), + false); - pfree(res); PG_RETURN_TSQUERY(query); } @@ -385,55 +378,60 @@ to_tsquery(PG_FUNCTION_ARGS) Datum plainto_tsquery_byid(PG_FUNCTION_ARGS) { - Oid cfgid = PG_GETARG_OID(0); - text *in = PG_GETARG_TEXT_P(1); - TSQuery query; - QueryItem *res; - int32 len; + text *in = PG_GETARG_TEXT_P(1); + TSQuery query; + MorphOpaque data; - query = parse_tsquery(text_to_cstring(in), pushval_morph, ObjectIdGetDatum(cfgid), true); + data.cfg_id = PG_GETARG_OID(0); + data.qoperator = OP_AND; - if (query->size == 0) - PG_RETURN_TSQUERY(query); + query = parse_tsquery(text_to_cstring(in), + pushval_morph, + PointerGetDatum(&data), + true); - /* clean out any stopword placeholders from the tree */ - res = clean_fakeval(GETQUERY(query), &len); - if (!res) - { - SET_VARSIZE(query, HDRSIZETQ); - query->size = 0; - PG_RETURN_POINTER(query); - } - memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem)); + PG_RETURN_POINTER(query); +} - /* - * Removing the stopword placeholders might've resulted in fewer - * QueryItems. If so, move the operands up accordingly. - */ - if (len != query->size) - { - char *oldoperand = GETOPERAND(query); - int32 lenoperand = VARSIZE(query) - (oldoperand - (char *) query); +Datum +plainto_tsquery(PG_FUNCTION_ARGS) +{ + text *in = PG_GETARG_TEXT_P(0); + Oid cfgId; - Assert(len < query->size); + cfgId = getTSCurrentConfig(true); + PG_RETURN_DATUM(DirectFunctionCall2(plainto_tsquery_byid, + ObjectIdGetDatum(cfgId), + PointerGetDatum(in))); +} - query->size = len; - memmove((void *) GETOPERAND(query), oldoperand, lenoperand); - SET_VARSIZE(query, COMPUTESIZE(len, lenoperand)); - } - pfree(res); - PG_RETURN_POINTER(query); +Datum +phraseto_tsquery_byid(PG_FUNCTION_ARGS) +{ + text *in = PG_GETARG_TEXT_P(1); + TSQuery query; + MorphOpaque data; + + data.cfg_id = PG_GETARG_OID(0); + data.qoperator = OP_PHRASE; + + query = parse_tsquery(text_to_cstring(in), + pushval_morph, + PointerGetDatum(&data), + true); + + PG_RETURN_TSQUERY(query); } Datum -plainto_tsquery(PG_FUNCTION_ARGS) +phraseto_tsquery(PG_FUNCTION_ARGS) { text *in = PG_GETARG_TEXT_P(0); Oid cfgId; cfgId = getTSCurrentConfig(true); - PG_RETURN_DATUM(DirectFunctionCall2(plainto_tsquery_byid, + PG_RETURN_DATUM(DirectFunctionCall2(phraseto_tsquery_byid, ObjectIdGetDatum(cfgId), PointerGetDatum(in))); } diff --git a/src/backend/tsearch/ts_parse.c b/src/backend/tsearch/ts_parse.c index 64cf906..b19f118 100644 --- a/src/backend/tsearch/ts_parse.c +++ b/src/backend/tsearch/ts_parse.c @@ -454,7 +454,7 @@ hladdword(HeadlineParsedText *prs, char *buf, int buflen, int type) } static void -hlfinditem(HeadlineParsedText *prs, TSQuery query, char *buf, int buflen) +hlfinditem(HeadlineParsedText *prs, TSQuery query, int32 pos, char *buf, int buflen) { int i; QueryItem *item = GETQUERY(query); @@ -467,6 +467,7 @@ hlfinditem(HeadlineParsedText *prs, TSQuery query, char *buf, int buflen) } word = &(prs->words[prs->curwords - 1]); + word->pos = LIMITPOS(pos); for (i = 0; i < query->size; i++) { if (item->type == QI_VAL && @@ -492,6 +493,7 @@ addHLParsedLex(HeadlineParsedText *prs, TSQuery query, ParsedLex *lexs, TSLexeme { ParsedLex *tmplexs; TSLexeme *ptr; + int32 savedpos; while (lexs) { @@ -500,9 +502,12 @@ addHLParsedLex(HeadlineParsedText *prs, TSQuery query, ParsedLex *lexs, TSLexeme hladdword(prs, lexs->lemm, lexs->lenlemm, lexs->type); ptr = norms; + savedpos = prs->vectorpos; while (ptr && ptr->lexeme) { - hlfinditem(prs, query, ptr->lexeme, strlen(ptr->lexeme)); + if (ptr->flags & TSL_ADDPOS) + savedpos++; + hlfinditem(prs, query, savedpos, ptr->lexeme, strlen(ptr->lexeme)); ptr++; } @@ -516,6 +521,8 @@ addHLParsedLex(HeadlineParsedText *prs, TSQuery query, ParsedLex *lexs, TSLexeme ptr = norms; while (ptr->lexeme) { + if (ptr->flags & TSL_ADDPOS) + prs->vectorpos++; pfree(ptr->lexeme); ptr++; } @@ -575,7 +582,10 @@ hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query, char *buf, int bu do { if ((norms = LexizeExec(&ldata, &lexs)) != NULL) + { + prs->vectorpos++; addHLParsedLex(prs, query, lexs, norms); + } else addHLParsedLex(prs, query, lexs, NULL); } while (norms); diff --git a/src/backend/tsearch/ts_selfuncs.c b/src/backend/tsearch/ts_selfuncs.c index 7462888..133c937 100644 --- a/src/backend/tsearch/ts_selfuncs.c +++ b/src/backend/tsearch/ts_selfuncs.c @@ -400,6 +400,7 @@ tsquery_opr_selec(QueryItem *item, char *operand, lookup, length, minfreq); break; + case OP_PHRASE: case OP_AND: s1 = tsquery_opr_selec(item + 1, operand, lookup, length, minfreq); diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c index c921294..731419a 100644 --- a/src/backend/tsearch/wparser_def.c +++ b/src/backend/tsearch/wparser_def.c @@ -2027,15 +2027,36 @@ typedef struct } hlCheck; static bool -checkcondition_HL(void *checkval, QueryOperand *val) +checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data) { int i; + hlCheck *checkval = (hlCheck *) opaque; - for (i = 0; i < ((hlCheck *) checkval)->len; i++) + for (i = 0; i < checkval->len; i++) { - if (((hlCheck *) checkval)->words[i].item == val) - return true; + if (checkval->words[i].item == val) + { + /* don't need to find all positions */ + if (!data) + return true; + + if (!data->pos) + { + data->pos = palloc(sizeof(WordEntryPos) * checkval->len); + data->allocated = true; + data->npos = 1; + data->pos[0] = checkval->words[i].pos; + } + else if (data->pos[data->npos-1] < checkval->words[i].pos) + { + data->pos[data->npos++] = checkval->words[i].pos; + } + } } + + if (data && data->npos > 0) + return true; + return false; } diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c index fef5947..d04689b 100644 --- a/src/backend/utils/adt/tsginidx.c +++ b/src/backend/utils/adt/tsginidx.c @@ -179,14 +179,16 @@ typedef struct } GinChkVal; static GinTernaryValue -checkcondition_gin(void *checkval, QueryOperand *val) +checkcondition_gin_internal(GinChkVal *gcv, QueryOperand *val, ExecPhraseData *data) { - GinChkVal *gcv = (GinChkVal *) checkval; int j; - /* if any val requiring a weight is used, set recheck flag */ - if (val->weight != 0) - *(gcv->need_recheck) = true; + /* + * if any val requiring a weight is used or caller + * needs position information then set recheck flag + */ + if (val->weight != 0 || data != NULL) + *gcv->need_recheck = true; /* convert item's number to corresponding entry's (operand's) number */ j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item]; @@ -196,15 +198,21 @@ checkcondition_gin(void *checkval, QueryOperand *val) } /* + * Wrapper of check condition function for TS_execute. + */ +static bool +checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data) +{ + return checkcondition_gin_internal((GinChkVal *) checkval, + val, + data) != GIN_FALSE; +} + +/* * Evaluate tsquery boolean expression using ternary logic. - * - * chkcond is a callback function used to evaluate each VAL node in the query. - * checkval can be used to pass information to the callback. TS_execute doesn't - * do anything with it. */ static GinTernaryValue -TS_execute_ternary(QueryItem *curitem, void *checkval, - GinTernaryValue (*chkcond) (void *checkval, QueryOperand *val)) +TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem) { GinTernaryValue val1, val2, @@ -214,22 +222,30 @@ TS_execute_ternary(QueryItem *curitem, void *checkval, check_stack_depth(); if (curitem->type == QI_VAL) - return chkcond(checkval, (QueryOperand *) curitem); + return checkcondition_gin_internal(gcv, + (QueryOperand*) curitem, + NULL /* don't have any position info */); switch (curitem->qoperator.oper) { case OP_NOT: - result = TS_execute_ternary(curitem + 1, checkval, chkcond); + result = TS_execute_ternary(gcv, curitem + 1); if (result == GIN_MAYBE) return result; return !result; + case OP_PHRASE: + /* + * GIN doesn't contain any information about postions, + * treat OP_PHRASE as OP_AND with recheck requirement + */ + *gcv->need_recheck = true; + case OP_AND: - val1 = TS_execute_ternary(curitem + curitem->qoperator.left, - checkval, chkcond); + val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left); if (val1 == GIN_FALSE) return GIN_FALSE; - val2 = TS_execute_ternary(curitem + 1, checkval, chkcond); + val2 = TS_execute_ternary(gcv, curitem + 1); if (val2 == GIN_FALSE) return GIN_FALSE; if (val1 == GIN_TRUE && val2 == GIN_TRUE) @@ -238,11 +254,10 @@ TS_execute_ternary(QueryItem *curitem, void *checkval, return GIN_MAYBE; case OP_OR: - val1 = TS_execute_ternary(curitem + curitem->qoperator.left, - checkval, chkcond); + val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left); if (val1 == GIN_TRUE) return GIN_TRUE; - val2 = TS_execute_ternary(curitem + 1, checkval, chkcond); + val2 = TS_execute_ternary(gcv, curitem + 1); if (val2 == GIN_TRUE) return GIN_TRUE; if (val1 == GIN_FALSE && val2 == GIN_FALSE) @@ -327,9 +342,7 @@ gin_tsquery_triconsistent(PG_FUNCTION_ARGS) gcv.map_item_operand = (int *) (extra_data[0]); gcv.need_recheck = &recheck; - res = TS_execute_ternary(GETQUERY(query), - &gcv, - checkcondition_gin); + res = TS_execute_ternary(&gcv, GETQUERY(query)); if (res == GIN_TRUE && recheck) res = GIN_MAYBE; diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c index 0100cf4..cdd5d43 100644 --- a/src/backend/utils/adt/tsgistidx.c +++ b/src/backend/utils/adt/tsgistidx.c @@ -298,7 +298,7 @@ typedef struct * is there value 'val' in array or not ? */ static bool -checkcondition_arr(void *checkval, QueryOperand *val) +checkcondition_arr(void *checkval, QueryOperand *val, ExecPhraseData *data) { int32 *StopLow = ((CHKVAL *) checkval)->arrb; int32 *StopHigh = ((CHKVAL *) checkval)->arre; @@ -327,7 +327,7 @@ checkcondition_arr(void *checkval, QueryOperand *val) } static bool -checkcondition_bit(void *checkval, QueryOperand *val) +checkcondition_bit(void *checkval, QueryOperand *val, ExecPhraseData *data) { /* * we are not able to find a prefix in signature tree diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c index 0732060..931861d 100644 --- a/src/backend/utils/adt/tsquery.c +++ b/src/backend/utils/adt/tsquery.c @@ -56,7 +56,7 @@ struct TSQueryParserStateData /* * subroutine to parse the modifiers (weight and prefix flag currently) - * part, like ':1AB' of a query. + * part, like ':AB*' of a query. */ static char * get_modifiers(char *buf, int16 *weight, bool *prefix) @@ -101,6 +101,52 @@ get_modifiers(char *buf, int16 *weight, bool *prefix) } /* + * Get distance for phrase node like ?[123] + */ +static char * +get_phrase_distance(char *buf, int16 *distance) +{ + char *ptr = buf; + char *endptr; + long l; + + *distance = 1; + + while (ptr && t_isspace(ptr)) + ptr += pg_mblen(ptr); + + if (!t_iseq(ptr, '[')) + return buf; + ptr++; + + while (*ptr && t_isspace(ptr)) + ptr += pg_mblen(ptr); + + l = strtol(ptr, &endptr, 10); + + if (ptr == endptr) + return buf; + + if (errno == ERANGE || l < 0 || l > MAXENTRYPOS) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("distance in phrase operator should be non-negative and less than %d", + MAXENTRYPOS))); + + ptr = endptr; + + while (*ptr && t_isspace(ptr)) + ptr += pg_mblen(ptr); + + if (!t_iseq(ptr, ']')) + return buf; + ptr++; + + *distance = (int16) l; + return ptr; +} + +/* * token types for parsing */ typedef enum @@ -192,6 +238,15 @@ gettoken_query(TSQueryParserState state, (state->buf)++; return PT_OPR; } + if (t_iseq(state->buf, '?')) + { + state->state = WAITOPERAND; + *operator = OP_PHRASE; + (state->buf)++; + state->buf = get_phrase_distance(state->buf, + weight); /* use as storage for distance */ + return PT_OPR; + } else if (t_iseq(state->buf, ')')) { (state->buf)++; @@ -223,15 +278,16 @@ gettoken_query(TSQueryParserState state, * Push an operator to state->polstr */ void -pushOperator(TSQueryParserState state, int8 oper) +pushOperator(TSQueryParserState state, int8 oper, int16 distance) { QueryOperator *tmp; - Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR); + Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR || oper == OP_PHRASE); tmp = (QueryOperator *) palloc0(sizeof(QueryOperator)); tmp->type = QI_OPR; tmp->oper = oper; + tmp->distance = (oper == OP_PHRASE) ? distance : 0; /* left is filled in later with findoprnd */ state->polstr = lcons(tmp, state->polstr); @@ -320,6 +376,11 @@ pushStop(TSQueryParserState state) #define STACKDEPTH 32 +typedef struct { + int8 op; + int16 distance; +} stack_op; + /* * Make polish (prefix) notation of query. * @@ -334,7 +395,7 @@ makepol(TSQueryParserState state, ts_tokentype type; int lenval = 0; char *strval = NULL; - int8 opstack[STACKDEPTH]; + stack_op opstack[STACKDEPTH]; int lenstack = 0; int16 weight = 0; bool prefix; @@ -348,39 +409,48 @@ makepol(TSQueryParserState state, { case PT_VAL: pushval(opaque, state, strval, lenval, weight, prefix); - while (lenstack && (opstack[lenstack - 1] == OP_AND || - opstack[lenstack - 1] == OP_NOT)) + while (lenstack && (opstack[lenstack - 1].op == OP_AND || + opstack[lenstack - 1].op == OP_PHRASE || + opstack[lenstack - 1].op == OP_NOT)) { lenstack--; - pushOperator(state, opstack[lenstack]); + pushOperator(state, + opstack[lenstack].op, + opstack[lenstack].distance); } break; case PT_OPR: if (lenstack && operator == OP_OR) - pushOperator(state, OP_OR); + pushOperator(state, OP_OR, 0); else { if (lenstack == STACKDEPTH) /* internal error */ elog(ERROR, "tsquery stack too small"); - opstack[lenstack] = operator; + opstack[lenstack].op = operator; + opstack[lenstack].distance = weight; lenstack++; } break; case PT_OPEN: makepol(state, pushval, opaque); - while (lenstack && (opstack[lenstack - 1] == OP_AND || - opstack[lenstack - 1] == OP_NOT)) + while (lenstack && (opstack[lenstack - 1].op == OP_AND || + opstack[lenstack - 1].op == OP_PHRASE || + opstack[lenstack - 1].op == OP_NOT)) { lenstack--; - pushOperator(state, opstack[lenstack]); + pushOperator(state, + opstack[lenstack].op, + opstack[lenstack].distance); } break; case PT_CLOSE: while (lenstack) { lenstack--; - pushOperator(state, opstack[lenstack]); + pushOperator(state, + opstack[lenstack].op, + opstack[lenstack].distance); }; return; case PT_ERR: @@ -394,12 +464,14 @@ makepol(TSQueryParserState state, while (lenstack) { lenstack--; - pushOperator(state, opstack[lenstack]); + pushOperator(state, + opstack[lenstack].op, + opstack[lenstack].distance); } } static void -findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes) +findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes, bool *needcleanup) { /* since this function recurses, it could be driven to stack overflow. */ check_stack_depth(); @@ -407,10 +479,13 @@ findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes) if (*pos >= nnodes) elog(ERROR, "malformed tsquery: operand not found"); - if (ptr[*pos].type == QI_VAL || - ptr[*pos].type == QI_VALSTOP) /* need to handle VALSTOP here, they - * haven't been cleaned away yet. */ + if (ptr[*pos].type == QI_VAL) + { + (*pos)++; + } + else if (ptr[*pos].type == QI_VALSTOP) { + *needcleanup=true; (*pos)++; } else @@ -421,19 +496,24 @@ findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes) { ptr[*pos].qoperator.left = 1; (*pos)++; - findoprnd_recurse(ptr, pos, nnodes); + findoprnd_recurse(ptr, pos, nnodes, needcleanup); } else { QueryOperator *curitem = &ptr[*pos].qoperator; int tmp = *pos; - Assert(curitem->oper == OP_AND || curitem->oper == OP_OR); + Assert(curitem->oper == OP_AND || + curitem->oper == OP_OR || + curitem->oper == OP_PHRASE); + + if ( curitem->oper == OP_PHRASE ) + *needcleanup = true; (*pos)++; - findoprnd_recurse(ptr, pos, nnodes); + findoprnd_recurse(ptr, pos, nnodes, needcleanup); curitem->left = *pos - tmp; - findoprnd_recurse(ptr, pos, nnodes); + findoprnd_recurse(ptr, pos, nnodes, needcleanup); } } } @@ -444,12 +524,13 @@ findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes) * QueryItems must be in polish (prefix) notation. */ static void -findoprnd(QueryItem *ptr, int size) +findoprnd(QueryItem *ptr, int size, bool *needcleanup) { uint32 pos; + *needcleanup = false; pos = 0; - findoprnd_recurse(ptr, &pos, size); + findoprnd_recurse(ptr, &pos, size, needcleanup); if (pos != size) elog(ERROR, "malformed tsquery: extra nodes"); @@ -466,9 +547,6 @@ findoprnd(QueryItem *ptr, int size) * * opaque is passed on to pushval as is, pushval can use it to store its * private state. - * - * The returned query might contain QI_STOPVAL nodes. The caller is responsible - * for cleaning them up (with clean_fakeval) */ TSQuery parse_tsquery(char *buf, @@ -482,6 +560,7 @@ parse_tsquery(char *buf, int commonlen; QueryItem *ptr; ListCell *cell; + bool needcleanup; /* init state */ state.buffer = buf; @@ -555,7 +634,14 @@ parse_tsquery(char *buf, pfree(state.op); /* Set left operand pointers for every operator. */ - findoprnd(ptr, query->size); + findoprnd(ptr, query->size, &needcleanup); + + /* + * QI_VALSTOP nodes should be cleaned and + * and OP_PHRASE should be pushed down + */ + if ( needcleanup ) + return cleanup_fakeval_and_phrase(query); return query; } @@ -600,12 +686,15 @@ while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \ (inf)->cur = (inf)->buf + len; \ } +#define PRINT_PRIORITY(x) \ + ( (QO_PRIORITY(x) == OP_NOT) ? OP_NOT_PHRASE : QO_PRIORITY(x) ) + /* * recursive walk on tree and print it in * infix (human-readable) view */ static void -infix(INFIX *in, bool first) +infix(INFIX *in, int parentPriority) { /* since this function recurses, it could be driven to stack overflow. */ check_stack_depth(); @@ -674,24 +763,22 @@ infix(INFIX *in, bool first) } else if (in->curpol->qoperator.oper == OP_NOT) { - bool isopr = false; - - RESIZEBUF(in, 1); - *(in->cur) = '!'; - in->cur++; - *(in->cur) = '\0'; - in->curpol++; + int priority = PRINT_PRIORITY(in->curpol); - if (in->curpol->type == QI_OPR) + if (priority < parentPriority) { - isopr = true; RESIZEBUF(in, 2); sprintf(in->cur, "( "); in->cur = strchr(in->cur, '\0'); } + RESIZEBUF(in, 1); + *(in->cur) = '!'; + in->cur++; + *(in->cur) = '\0'; + in->curpol++; - infix(in, isopr); - if (isopr) + infix(in, priority); + if (priority < parentPriority) { RESIZEBUF(in, 2); sprintf(in->cur, " )"); @@ -701,11 +788,18 @@ infix(INFIX *in, bool first) else { int8 op = in->curpol->qoperator.oper; + int priority = PRINT_PRIORITY(in->curpol); + int16 distance = in->curpol->qoperator.distance; INFIX nrm; + bool needParenthesis = false; in->curpol++; - if (op == OP_OR && !first) + if (priority < parentPriority || + (op == OP_PHRASE && + (priority == parentPriority || /* phrases are not commutative! */ + parentPriority == OP_PRIORITY(OP_AND)))) { + needParenthesis = true; RESIZEBUF(in, 2); sprintf(in->cur, "( "); in->cur = strchr(in->cur, '\0'); @@ -717,14 +811,14 @@ infix(INFIX *in, bool first) nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); /* get right operand */ - infix(&nrm, false); + infix(&nrm, priority); /* get & print left operand */ in->curpol = nrm.curpol; - infix(in, false); + infix(in, priority); /* print operator & right operand */ - RESIZEBUF(in, 3 + (nrm.cur - nrm.buf)); + RESIZEBUF(in, 3 + (2 + 10 /* distance */) + (nrm.cur - nrm.buf)); switch (op) { case OP_OR: @@ -733,6 +827,12 @@ infix(INFIX *in, bool first) case OP_AND: sprintf(in->cur, " & %s", nrm.buf); break; + case OP_PHRASE: + if (distance != 1) + sprintf(in->cur, " ?[%d] %s", distance, nrm.buf); + else + sprintf(in->cur, " ? %s", nrm.buf); + break; default: /* OP_NOT is handled in above if-branch */ elog(ERROR, "unrecognized operator type: %d", op); @@ -740,7 +840,7 @@ infix(INFIX *in, bool first) in->cur = strchr(in->cur, '\0'); pfree(nrm.buf); - if (op == OP_OR && !first) + if (needParenthesis) { RESIZEBUF(in, 2); sprintf(in->cur, " )"); @@ -749,7 +849,6 @@ infix(INFIX *in, bool first) } } - Datum tsqueryout(PG_FUNCTION_ARGS) { @@ -768,7 +867,7 @@ tsqueryout(PG_FUNCTION_ARGS) nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); *(nrm.cur) = '\0'; nrm.op = GETOPERAND(query); - infix(&nrm, true); + infix(&nrm, -1 /* lowest priority */); PG_FREE_IF_COPY(query, 0); PG_RETURN_CSTRING(nrm.buf); @@ -789,7 +888,8 @@ tsqueryout(PG_FUNCTION_ARGS) * * For each operator: * uint8 type, QI_OPR - * uint8 operator, one of OP_AND, OP_OR, OP_NOT. + * uint8 operator, one of OP_AND, OP_PHRASE OP_OR, OP_NOT. + * uint16 distance (only for OP_PHRASE) */ Datum tsquerysend(PG_FUNCTION_ARGS) @@ -815,6 +915,8 @@ tsquerysend(PG_FUNCTION_ARGS) break; case QI_OPR: pq_sendint(&buf, item->qoperator.oper, sizeof(item->qoperator.oper)); + if (item->qoperator.oper == OP_PHRASE) + pq_sendint(&buf, item->qoperator.distance, sizeof(item->qoperator.distance)); break; default: elog(ERROR, "unrecognized tsquery node type: %d", item->type); @@ -830,15 +932,16 @@ tsquerysend(PG_FUNCTION_ARGS) Datum tsqueryrecv(PG_FUNCTION_ARGS) { - StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); - TSQuery query; - int i, - len; - QueryItem *item; - int datalen; - char *ptr; - uint32 size; - const char **operands; + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + TSQuery query; + int i, + len; + QueryItem *item; + int datalen; + char *ptr; + uint32 size; + const char **operands; + bool needcleanup; size = pq_getmsgint(buf, sizeof(uint32)); if (size > (MaxAllocSize / sizeof(QueryItem))) @@ -907,13 +1010,15 @@ tsqueryrecv(PG_FUNCTION_ARGS) int8 oper; oper = (int8) pq_getmsgint(buf, sizeof(int8)); - if (oper != OP_NOT && oper != OP_OR && oper != OP_AND) + if (oper != OP_NOT && oper != OP_OR && oper != OP_AND && oper != OP_PHRASE) elog(ERROR, "invalid tsquery: unrecognized operator type %d", (int) oper); if (i == size - 1) elog(ERROR, "invalid pointer to right operand"); item->qoperator.oper = oper; + if (oper == OP_PHRASE) + item->qoperator.distance = (int16) pq_getmsgint(buf, sizeof(int16)); } else elog(ERROR, "unrecognized tsquery node type: %d", item->type); @@ -930,7 +1035,7 @@ tsqueryrecv(PG_FUNCTION_ARGS) * Fill in the left-pointers. Checks that the tree is well-formed as a * side-effect. */ - findoprnd(item, size); + findoprnd(item, size, &needcleanup); /* Copy operands to output struct */ for (i = 0; i < size; i++) @@ -949,7 +1054,10 @@ tsqueryrecv(PG_FUNCTION_ARGS) SET_VARSIZE(query, len + datalen); - PG_RETURN_TSVECTOR(query); + if (needcleanup) + PG_RETURN_TSQUERY(cleanup_fakeval_and_phrase(query)); + + PG_RETURN_TSQUERY(query); } /* diff --git a/src/backend/utils/adt/tsquery_cleanup.c b/src/backend/utils/adt/tsquery_cleanup.c index 333789b..0d994d9 100644 --- a/src/backend/utils/adt/tsquery_cleanup.c +++ b/src/backend/utils/adt/tsquery_cleanup.c @@ -25,6 +25,10 @@ typedef struct NODE QueryItem *valnode; } NODE; +/* Non-operator nodes have fake (but highest) priority */ +#define NODE_PRIORITY(x) \ + ( ((x)->valnode->qoperator.type == QI_OPR) ? QO_PRIORITY((x)->valnode) : 16 ) + /* * make query tree from plain view of query */ @@ -160,7 +164,8 @@ clean_NOT_intree(NODE *node) { NODE *res = node; - Assert(node->valnode->qoperator.oper == OP_AND); + Assert(node->valnode->qoperator.oper == OP_AND || + node->valnode->qoperator.oper == OP_PHRASE); node->left = clean_NOT_intree(node->left); node->right = clean_NOT_intree(node->right); @@ -216,7 +221,7 @@ clean_NOT(QueryItem *ptr, int *len) * text (stopword) */ static NODE * -clean_fakeval_intree(NODE *node, char *result) +clean_fakeval_intree(NODE *node, char *result, int *adddistance) { char lresult = V_UNKNOWN, rresult = V_UNKNOWN; @@ -224,6 +229,9 @@ clean_fakeval_intree(NODE *node, char *result) /* since this function recurses, it could be driven to stack overflow. */ check_stack_depth(); + if (adddistance) + *adddistance = 0; + if (node->valnode->type == QI_VAL) return node; else if (node->valnode->type == QI_VALSTOP) @@ -237,7 +245,7 @@ clean_fakeval_intree(NODE *node, char *result) if (node->valnode->qoperator.oper == OP_NOT) { - node->right = clean_fakeval_intree(node->right, &rresult); + node->right = clean_fakeval_intree(node->right, &rresult, NULL); if (!node->right) { *result = V_STOP; @@ -248,12 +256,29 @@ clean_fakeval_intree(NODE *node, char *result) else { NODE *res = node; + int ndistance, ldistance = 0, rdistance = 0; + + ndistance = (node->valnode->qoperator.oper == OP_PHRASE) ? + node->valnode->qoperator.distance : + 0; - node->left = clean_fakeval_intree(node->left, &lresult); - node->right = clean_fakeval_intree(node->right, &rresult); + node->left = clean_fakeval_intree(node->left, + &lresult, + ndistance ? &ldistance : NULL); + + node->right = clean_fakeval_intree(node->right, + &rresult, + ndistance ? &rdistance : NULL); + + /* + * ndistance, ldistance and rdistance are greater than zero if + * corresponding node are OP_PHRASE + */ if (lresult == V_STOP && rresult == V_STOP) { + if (adddistance && ndistance) + *adddistance = ldistance + ndistance + rdistance; freetree(node); *result = V_STOP; return NULL; @@ -261,33 +286,316 @@ clean_fakeval_intree(NODE *node, char *result) else if (lresult == V_STOP) { res = node->right; + /* + * propagate distance from current node to the + * right upper sub-tree + */ + if (adddistance && ndistance) + *adddistance = rdistance; pfree(node); } else if (rresult == V_STOP) { res = node->left; + /* + * propagate distance to the upper tree to current node + */ + if (adddistance && ndistance) + *adddistance = ndistance + ldistance; pfree(node); } + else if (ndistance) + { + node->valnode->qoperator.distance += ldistance; + if (adddistance) + *adddistance = 0; + } + else if (adddistance) + { + *adddistance = 0; + } + return res; } return node; } -QueryItem * -clean_fakeval(QueryItem *ptr, int *len) +static NODE* +copyNODE(NODE *node) { - NODE *root = maketree(ptr); + NODE *cnode = palloc(sizeof(NODE)); + + /* since this function recurses, it could be driven to stack overflow. */ + check_stack_depth(); + + cnode->valnode = palloc(sizeof(QueryItem)); + *(cnode->valnode) = *(node->valnode); + + if (node->valnode->type == QI_OPR) + { + cnode->right = copyNODE(node->right); + if (node->valnode->qoperator.oper != OP_NOT) + cnode->left = copyNODE(node->left); + } + + return cnode; +} + +static NODE* +makeNODE(int8 op, NODE *left, NODE *right) +{ + NODE *node = palloc(sizeof(NODE)); + + node->valnode = palloc(sizeof(QueryItem)); + + node->valnode->qoperator.type = QI_OPR; + node->valnode->qoperator.oper = op; + + node->left = left; + node->right = right; + + return node; +} + +/* + * Move operation with high priority to the leaves. + * That guarantees that ? operation will be near the bottom of tree. + * But it's needed only for ? operation, so actual convertation + * will be done only for subtrees under ? operation. + * Rules: + * a ? (b|c) => (a?b) | (a?c) + * (a|b) ? c => (a?c) | (b?c) + * a ? !b => a & !(a?b) + * !a ? b => b & !(a?b) + * Warnings: + * - a?b != b?a + * - a ?[n] ( b ?[n] c ) != (a ?[n] b) ?[n] c , because phrase's length is: + * n 2n-1 + */ +static NODE* +normalize_phrase_tree(NODE *node) +{ + if (node->valnode->type == QI_VAL || node->valnode->type == QI_VALSTOP) + return node; + + /* since this function recurses, it could be driven to stack overflow. */ + check_stack_depth(); + + Assert(node->valnode->type == QI_OPR); + + if (node->valnode->qoperator.oper == OP_NOT) + { + /* eliminate NOT sequence */ + while (node->valnode->type == QI_OPR && + node->valnode->qoperator.oper == node->right->valnode->qoperator.oper) + { + node = node->right->right; + } + + node->right = normalize_phrase_tree(node->right); + } + else if (node->valnode->qoperator.oper == OP_PHRASE) + { + int16 distance; + NODE *X; + + node->left = normalize_phrase_tree(node->left); + node->right = normalize_phrase_tree(node->right); + + if (NODE_PRIORITY(node) <= NODE_PRIORITY(node->right) && + NODE_PRIORITY(node) <= NODE_PRIORITY(node->left)) + return node; + + /* + * We can't swap left-right and works only with left child + * because of a?b != b?a + */ + + distance = node->valnode->qoperator.distance; + + if (node->right->valnode->type == QI_OPR) + { + switch (node->right->valnode->qoperator.oper) + { + case OP_AND: + /* a ? (b&c) => (a?b) & (a?c) */ + node = makeNODE(OP_AND, + makeNODE(OP_PHRASE, + node->left, + node->right->left), + makeNODE(OP_PHRASE, + copyNODE(node->left), + node->right->right)); + node->left->valnode->qoperator.distance = + node->right->valnode->qoperator.distance = distance; + break; + case OP_OR: + /* a ? (b|c) => (a?b) | (a?c) */ + node = makeNODE(OP_OR, + makeNODE(OP_PHRASE, + node->left, + node->right->left), + makeNODE(OP_PHRASE, + copyNODE(node->left), + node->right->right)); + node->left->valnode->qoperator.distance = + node->right->valnode->qoperator.distance = distance; + break; + case OP_NOT: + /* a ? !b => a & !(a?b) */ + X = node->right; + node->right = node->right->right; + X->right = node; + node = makeNODE(OP_AND, + copyNODE(node->left), + X); + break; + case OP_PHRASE: + /* no-op */ + break; + default: + elog(ERROR,"Wrong type of tsquery node: %d", + node->right->valnode->qoperator.oper); + } + } + + if (node->left->valnode->type == QI_OPR && + node->valnode->qoperator.oper == OP_PHRASE) + { + /* + * if node is still OP_PHRASE, check the left subtree + * else the whole node will be done later. + */ + switch(node->left->valnode->qoperator.oper) + { + case OP_AND: + /* (a&b) ? c => (a?c) & (b?c) */ + node = makeNODE(OP_AND, + makeNODE(OP_PHRASE, + node->left->left, + node->right), + makeNODE(OP_PHRASE, + node->left->right, + copyNODE(node->right))); + node->left->valnode->qoperator.distance = + node->right->valnode->qoperator.distance = distance; + break; + case OP_OR: + /* (a|b) ? c => (a?c) | (b?c) */ + node = makeNODE(OP_OR, + makeNODE(OP_PHRASE, + node->left->left, + node->right), + makeNODE(OP_PHRASE, + node->left->right, + copyNODE(node->right))); + node->left->valnode->qoperator.distance = + node->right->valnode->qoperator.distance = distance; + break; + case OP_NOT: + /* !a ? b => b & !(a?b) */ + X = node->left; + node->left = node->left->right; + X->right = node; + node = makeNODE(OP_AND, + X, + copyNODE(node->right)); + break; + case OP_PHRASE: + /* no-op */ + break; + default: + elog(ERROR,"Wrong type of tsquery node: %d", + node->left->valnode->qoperator.oper); + } + } + + node = normalize_phrase_tree(node); + } + else /* AND or OR */ + { + node->left = normalize_phrase_tree(node->left); + node->right = normalize_phrase_tree(node->right); + } + + + return node; +} + +static int32 +calcstrlen(NODE *node) +{ + int32 size = 0; + + if (node->valnode->type == QI_VAL) + { + size = node->valnode->qoperand.length + 1; + } + else + { + Assert(node->valnode->type == QI_OPR); + + size = calcstrlen(node->right); + if (node->valnode->qoperator.oper != OP_NOT) + size += calcstrlen(node->left); + } + + return size; +} + +TSQuery +cleanup_fakeval_and_phrase(TSQuery in) +{ + int32 len, + lenstr, + commonlen, + i; + NODE *root; char result = V_UNKNOWN; - NODE *resroot; + TSQuery out; + QueryItem *items; + char *ptr; - resroot = clean_fakeval_intree(root, &result); + if (in->size == 0) + return in; + + root = clean_fakeval_intree(maketree(GETQUERY(in)), &result, NULL); if (result != V_UNKNOWN) { ereport(NOTICE, (errmsg("text-search query contains only stop words or doesn't contain lexemes, ignored"))); - *len = 0; - return NULL; + out = palloc(HDRSIZETQ); + out->size = 0; + SET_VARSIZE(out, HDRSIZETQ); + return out; + } + + root = normalize_phrase_tree(root); + + lenstr = calcstrlen(root); + items = plaintree(root, &len); + commonlen = COMPUTESIZE(len, lenstr); + + out = palloc(commonlen); + SET_VARSIZE(out, commonlen); + out->size = len; + + memcpy(GETQUERY(out), items, len * sizeof(QueryItem)); + + items = GETQUERY(out); + ptr = GETOPERAND(out); + for (i = 0; i < out->size; i++) + { + QueryOperand *op = (QueryOperand *) (items + i); + + if (op->type != QI_VAL) + continue; + + memcpy(ptr, GETOPERAND(in) + op->distance, op->length); + ptr[op->length] ='\0'; + op->distance = ptr - GETOPERAND(out); + ptr += op->length + 1; } - return plaintree(resroot, len); + return out; } diff --git a/src/backend/utils/adt/tsquery_op.c b/src/backend/utils/adt/tsquery_op.c index 9cdf1fe..4596f3f 100644 --- a/src/backend/utils/adt/tsquery_op.c +++ b/src/backend/utils/adt/tsquery_op.c @@ -27,7 +27,7 @@ tsquery_numnode(PG_FUNCTION_ARGS) } static QTNode * -join_tsqueries(TSQuery a, TSQuery b, int8 operator) +join_tsqueries(TSQuery a, TSQuery b, int8 operator, uint16 distance) { QTNode *res = (QTNode *) palloc0(sizeof(QTNode)); @@ -36,6 +36,8 @@ join_tsqueries(TSQuery a, TSQuery b, int8 operator) res->valnode = (QueryItem *) palloc0(sizeof(QueryItem)); res->valnode->type = QI_OPR; res->valnode->qoperator.oper = operator; + if (operator == OP_PHRASE) + res->valnode->qoperator.distance = distance; res->child = (QTNode **) palloc0(sizeof(QTNode *) * 2); res->child[0] = QT2QTN(GETQUERY(b), GETOPERAND(b)); @@ -64,7 +66,7 @@ tsquery_and(PG_FUNCTION_ARGS) PG_RETURN_POINTER(a); } - res = join_tsqueries(a, b, OP_AND); + res = join_tsqueries(a, b, OP_AND, 0); query = QTN2QT(res); @@ -94,7 +96,7 @@ tsquery_or(PG_FUNCTION_ARGS) PG_RETURN_POINTER(a); } - res = join_tsqueries(a, b, OP_OR); + res = join_tsqueries(a, b, OP_OR, 0); query = QTN2QT(res); @@ -106,6 +108,52 @@ tsquery_or(PG_FUNCTION_ARGS) } Datum +tsquery_phrase_distance(PG_FUNCTION_ARGS) +{ + TSQuery a = PG_GETARG_TSQUERY_COPY(0); + TSQuery b = PG_GETARG_TSQUERY_COPY(1); + QTNode *res; + TSQuery query; + int32 distance = PG_GETARG_INT32(2); + + if (distance < 0 || distance > MAXENTRYPOS) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("distance in phrase operator should be non-negative and less than %d", + MAXENTRYPOS))); + if (a->size == 0) + { + PG_FREE_IF_COPY(a, 1); + PG_RETURN_POINTER(b); + } + else if (b->size == 0) + { + PG_FREE_IF_COPY(b, 1); + PG_RETURN_POINTER(a); + } + + res = join_tsqueries(a, b, OP_PHRASE, (uint16) distance); + + query = QTN2QT(res); + + QTNFree(res); + PG_FREE_IF_COPY(a, 0); + PG_FREE_IF_COPY(b, 1); + + PG_RETURN_POINTER(cleanup_fakeval_and_phrase(query)); +} + +Datum +tsquery_phrase(PG_FUNCTION_ARGS) +{ + PG_RETURN_POINTER(DirectFunctionCall3( + tsquery_phrase_distance, + PG_GETARG_DATUM(0), + PG_GETARG_DATUM(1), + Int32GetDatum(1))); +} + +Datum tsquery_not(PG_FUNCTION_ARGS) { TSQuery a = PG_GETARG_TSQUERY_COPY(0); @@ -333,3 +381,52 @@ tsq_mcontained(PG_FUNCTION_ARGS) ) ); } + +Datum +tsquery_setweight(PG_FUNCTION_ARGS) +{ + TSQuery in = PG_GETARG_TSQUERY(0); + char cw = PG_GETARG_CHAR(1); + TSQuery out; + QueryItem *item; + int w = 0; + + switch (cw) + { + case 'A': + case 'a': + w = 3; + break; + case 'B': + case 'b': + w = 2; + break; + case 'C': + case 'c': + w = 1; + break; + case 'D': + case 'd': + w = 0; + break; + default: + /* internal error */ + elog(ERROR, "unrecognized weight: %d", cw); + } + + out = (TSQuery) palloc(VARSIZE(in)); + memcpy(out, in, VARSIZE(in)); + item = GETQUERY(out); + + while(item - GETQUERY(out) < out->size) + { + if (item->type == QI_VAL) + item->qoperand.weight |= (1 << w); + + item++; + } + + PG_FREE_IF_COPY(in, 0); + PG_RETURN_POINTER(out); +} + diff --git a/src/backend/utils/adt/tsquery_util.c b/src/backend/utils/adt/tsquery_util.c index fe26ad5..0f338aa 100644 --- a/src/backend/utils/adt/tsquery_util.c +++ b/src/backend/utils/adt/tsquery_util.c @@ -110,6 +110,10 @@ QTNodeCompare(QTNode *an, QTNode *bn) if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0) return res; } + + if (ao->oper == OP_PHRASE && ao->distance != bo->distance) + return (ao->distance > bo->distance) ? -1 : 1; + return 0; } else if (an->valnode->type == QI_VAL) @@ -150,7 +154,7 @@ QTNSort(QTNode *in) for (i = 0; i < in->nchild; i++) QTNSort(in->child[i]); - if (in->nchild > 1) + if (in->nchild > 1 && in->valnode->qoperator.oper != OP_PHRASE) qsort((void *) in->child, in->nchild, sizeof(QTNode *), cmpQTN); } @@ -190,7 +194,10 @@ QTNTernary(QTNode *in) { QTNode *cc = in->child[i]; - if (cc->valnode->type == QI_OPR && in->valnode->qoperator.oper == cc->valnode->qoperator.oper) + /* OP_Phrase isn't associative */ + if (cc->valnode->type == QI_OPR && + in->valnode->qoperator.oper == cc->valnode->qoperator.oper && + in->valnode->qoperator.oper != OP_PHRASE) { int oldnchild = in->nchild; diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c index 53f678a..73ed8b5 100644 --- a/src/backend/utils/adt/tsrank.c +++ b/src/backend/utils/adt/tsrank.c @@ -364,8 +364,10 @@ calc_rank(const float *w, TSVector t, TSQuery q, int32 method) return 0.0; /* XXX: What about NOT? */ - res = (item->type == QI_OPR && item->qoperator.oper == OP_AND) ? - calc_rank_and(w, t, q) : calc_rank_or(w, t, q); + res = (item->type == QI_OPR && (item->qoperator.oper == OP_AND || + item->qoperator.oper == OP_PHRASE)) ? + calc_rank_and(w, t, q) : + calc_rank_or(w, t, q); if (res < 0) res = 1e-20f; @@ -496,10 +498,17 @@ ts_rank_tt(PG_FUNCTION_ARGS) typedef struct { - QueryItem **item; - int16 nitem; - uint8 wclass; - int32 pos; + union { + struct { /* compiled doc representation */ + QueryItem **items; + int16 nitem; + } query; + struct { /* struct is used for preparing doc representation */ + QueryItem *item; + WordEntry *entry; + } map; + } data; + WordEntryPos pos; } DocRepresentation; static int @@ -508,26 +517,59 @@ compareDocR(const void *va, const void *vb) const DocRepresentation *a = (const DocRepresentation *) va; const DocRepresentation *b = (const DocRepresentation *) vb; - if (a->pos == b->pos) - return 0; - return (a->pos > b->pos) ? 1 : -1; + if (WEP_GETPOS(a->pos) == WEP_GETPOS(b->pos)) + { + if (WEP_GETWEIGHT(a->pos) == WEP_GETWEIGHT(b->pos)) + { + if (a->data.map.entry == b->data.map.entry) + return 0; + + return (a->data.map.entry > b->data.map.entry) ? 1 : -1; + } + + return (WEP_GETWEIGHT(a->pos) > WEP_GETWEIGHT(b->pos)) ? 1 : -1; + } + + return (WEP_GETPOS(a->pos) > WEP_GETPOS(b->pos)) ? 1 : -1; } +#define MAXQROPOS MAXENTRYPOS +typedef struct +{ + bool operandexist; + bool reverseinsert; /* indicates order to insert, + true means descending order */ + uint32 npos; + WordEntryPos pos[MAXQROPOS]; +} QueryRepresentationOperand; + typedef struct { - TSQuery query; - bool *operandexist; + TSQuery query; + QueryRepresentationOperand *operandData; } QueryRepresentation; -#define QR_GET_OPERAND_EXISTS(q, v) ( (q)->operandexist[ ((QueryItem*)(v)) - GETQUERY((q)->query) ] ) -#define QR_SET_OPERAND_EXISTS(q, v) QR_GET_OPERAND_EXISTS(q,v) = true +#define QR_GET_OPERAND_DATA(q, v) \ + ( (q)->operandData + (((QueryItem*)(v)) - GETQUERY((q)->query)) ) static bool -checkcondition_QueryOperand(void *checkval, QueryOperand *val) +checkcondition_QueryOperand(void *checkval, QueryOperand *val, ExecPhraseData *data) { - QueryRepresentation *qr = (QueryRepresentation *) checkval; + QueryRepresentation *qr = (QueryRepresentation *) checkval; + QueryRepresentationOperand *opData = QR_GET_OPERAND_DATA(qr, val); - return QR_GET_OPERAND_EXISTS(qr, val); + if (!opData->operandexist) + return false; + + if (data) + { + data->npos = opData->npos; + data->pos = opData->pos; + if (opData->reverseinsert) + data->pos += MAXQROPOS - opData->npos; + } + + return true; } typedef struct @@ -539,14 +581,65 @@ typedef struct DocRepresentation *end; } CoverExt; +static void +resetQueryRepresentation(QueryRepresentation *qr, bool reverseinsert) +{ + int i; + + for(i = 0; i < qr->query->size; i++) + { + qr->operandData[i].operandexist = false; + qr->operandData[i].reverseinsert = reverseinsert; + qr->operandData[i].npos = 0; + } +} + +static void +fillQueryRepresentationData(QueryRepresentation *qr, DocRepresentation *entry) +{ + int i; + int lastPos; + QueryRepresentationOperand *opData; + + for (i = 0; i < entry->data.query.nitem; i++) + { + if (entry->data.query.items[i]->type != QI_VAL) + continue; + + opData = QR_GET_OPERAND_DATA(qr, entry->data.query.items[i]); + + opData->operandexist = true; + + if (opData->npos == 0) + { + lastPos = (opData->reverseinsert) ? (MAXQROPOS - 1) : 0; + opData->pos[lastPos] = entry->pos; + opData->npos++; + continue; + } + + lastPos = opData->reverseinsert ? + (MAXQROPOS - opData->npos) : + (opData->npos - 1); + + if (WEP_GETPOS(opData->pos[lastPos]) != WEP_GETPOS(entry->pos)) + { + lastPos = opData->reverseinsert ? + (MAXQROPOS - 1 - opData->npos) : + (opData->npos); + + opData->pos[lastPos] = entry->pos; + opData->npos++; + } + } +} static bool Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext) { - DocRepresentation *ptr; - int lastpos = ext->pos; - int i; - bool found = false; + DocRepresentation *ptr; + int lastpos = ext->pos; + bool found = false; /* * since this function recurses, it could be driven to stack overflow. @@ -554,7 +647,7 @@ Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext) */ check_stack_depth(); - memset(qr->operandexist, 0, sizeof(bool) * qr->query->size); + resetQueryRepresentation(qr, false); ext->p = INT_MAX; ext->q = 0; @@ -563,16 +656,13 @@ Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext) /* find upper bound of cover from current position, move up */ while (ptr - doc < len) { - for (i = 0; i < ptr->nitem; i++) - { - if (ptr->item[i]->type == QI_VAL) - QR_SET_OPERAND_EXISTS(qr, ptr->item[i]); - } + fillQueryRepresentationData(qr, ptr); + if (TS_execute(GETQUERY(qr->query), (void *) qr, false, checkcondition_QueryOperand)) { - if (ptr->pos > ext->q) + if (WEP_GETPOS(ptr->pos) > ext->q) { - ext->q = ptr->pos; + ext->q = WEP_GETPOS(ptr->pos); ext->end = ptr; lastpos = ptr - doc; found = true; @@ -585,22 +675,24 @@ Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext) if (!found) return false; - memset(qr->operandexist, 0, sizeof(bool) * qr->query->size); + resetQueryRepresentation(qr, true); ptr = doc + lastpos; /* find lower bound of cover from found upper bound, move down */ while (ptr >= doc + ext->pos) { - for (i = 0; i < ptr->nitem; i++) - if (ptr->item[i]->type == QI_VAL) - QR_SET_OPERAND_EXISTS(qr, ptr->item[i]); + /* + * we scan doc from right to left, so pos info in reverse order! + */ + fillQueryRepresentationData(qr, ptr); + if (TS_execute(GETQUERY(qr->query), (void *) qr, true, checkcondition_QueryOperand)) { - if (ptr->pos < ext->p) + if (WEP_GETPOS(ptr->pos) < ext->p) { ext->begin = ptr; - ext->p = ptr->pos; + ext->p = WEP_GETPOS(ptr->pos); } break; } @@ -635,11 +727,13 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen) int len = qr->query->size * 4, cur = 0; DocRepresentation *doc; - char *operand; doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len); - operand = GETOPERAND(qr->query); + /* + * Iterate through query to make DocRepresentaion for words and it's entries + * satisfied by query + */ for (i = 0; i < qr->query->size; i++) { QueryOperand *curoperand; @@ -649,13 +743,11 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen) curoperand = &item[i].qoperand; - if (QR_GET_OPERAND_EXISTS(qr, &item[i])) - continue; - firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem); if (!entry) continue; + /* iterations over entries in tsvector */ while (entry - firstentry < nitem) { if (entry->haspos) @@ -676,53 +768,66 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen) doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len); } + /* iterations over entry's positions */ for (j = 0; j < dimt; j++) { - if (j == 0) - { - int k; - - doc[cur].nitem = 0; - doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * qr->query->size); - - for (k = 0; k < qr->query->size; k++) - { - QueryOperand *kptr = &item[k].qoperand; - QueryOperand *iptr = &item[i].qoperand; - - if (k == i || - (item[k].type == QI_VAL && - compareQueryOperand(&kptr, &iptr, operand) == 0)) - { - /* - * if k == i, we've already checked above that - * it's type == Q_VAL - */ - doc[cur].item[doc[cur].nitem] = item + k; - doc[cur].nitem++; - QR_SET_OPERAND_EXISTS(qr, item + k); - } - } - } - else + if ( curoperand->weight == 0 || (curoperand->weight & (1 << WEP_GETWEIGHT(post[j]))) ) { - doc[cur].nitem = doc[cur - 1].nitem; - doc[cur].item = doc[cur - 1].item; + doc[cur].pos = post[j]; + doc[cur].data.map.entry = entry; + doc[cur].data.map.item = (QueryItem*)curoperand; + cur++; } - doc[cur].pos = WEP_GETPOS(post[j]); - doc[cur].wclass = WEP_GETWEIGHT(post[j]); - cur++; } entry++; } } - *doclen = cur; - if (cur > 0) { + DocRepresentation *rptr = doc + 1, + *wptr = doc, + storage; + + /* + * Sort representation in ascending order by pos and entry + */ qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR); + + /* + * Join QueryItem per WordEntry and it's position + */ + storage.pos = doc->pos; + storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size); + storage.data.query.items[0] = doc->data.map.item; + storage.data.query.nitem = 1; + + while (rptr - doc < cur) + { + if (rptr->pos == (rptr-1)->pos && + rptr->data.map.entry == (rptr-1)->data.map.entry) + { + storage.data.query.items[storage.data.query.nitem] = rptr->data.map.item; + storage.data.query.nitem++; + } + else + { + *wptr = storage; + wptr++; + storage.pos = rptr->pos; + storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size); + storage.data.query.items[0] = rptr->data.map.item; + storage.data.query.nitem = 1; + } + + rptr++; + } + + *wptr = storage; + wptr++; + + *doclen = wptr - doc; return doc; } @@ -758,12 +863,13 @@ calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method) } qr.query = query; - qr.operandexist = (bool *) palloc0(sizeof(bool) * query->size); + qr.operandData = (QueryRepresentationOperand *) + palloc0(sizeof(QueryRepresentationOperand) * query->size); doc = get_docrep(txt, &qr, &doclen); if (!doc) { - pfree(qr.operandexist); + pfree(qr.operandData); return 0.0; } @@ -777,7 +883,7 @@ calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method) while (ptr <= ext.end) { - InvSum += invws[ptr->wclass]; + InvSum += invws[WEP_GETWEIGHT(ptr->pos)]; ptr++; } @@ -827,7 +933,7 @@ calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method) pfree(doc); - pfree(qr.operandexist); + pfree(qr.operandData); return (float4) Wdoc; } diff --git a/src/backend/utils/adt/tsvector.c b/src/backend/utils/adt/tsvector.c index 12043bf..2a26c46 100644 --- a/src/backend/utils/adt/tsvector.c +++ b/src/backend/utils/adt/tsvector.c @@ -28,7 +28,7 @@ typedef struct /* Compare two WordEntryPos values for qsort */ -static int +int comparePos(const void *a, const void *b) { int apos = WEP_GETPOS(*(const WordEntryPos *) a); diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c index f6d3fb5..243f96f 100644 --- a/src/backend/utils/adt/tsvector_op.c +++ b/src/backend/utils/adt/tsvector_op.c @@ -68,6 +68,7 @@ typedef struct static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column); static int tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len); + /* * Order: haspos, len, word, for all positions (pos, weight) */ @@ -1121,35 +1122,118 @@ tsCompareString(char *a, int lena, char *b, int lenb, bool prefix) } /* - * check weight info + * check weight info or/and fill data by needed positions */ static bool -checkclass_str(CHKVAL *chkval, WordEntry *val, QueryOperand *item) +checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val, + ExecPhraseData *data) { - WordEntryPosVector *posvec; - WordEntryPos *ptr; - uint16 len; + bool res = false; - posvec = (WordEntryPosVector *) - (chkval->values + SHORTALIGN(val->pos + val->len)); + if (entry->haspos && (val->weight || data)) + { + WordEntryPosVector *posvec; - len = posvec->npos; - ptr = posvec->pos; + posvec = (WordEntryPosVector *) + (chkval->values + SHORTALIGN(entry->pos + entry->len)); - while (len--) + if (val->weight && data) + { + WordEntryPos *ptr = posvec->pos; + WordEntryPos *dptr; + + /* + * Filter position information by weights + */ + dptr = data->pos = palloc(sizeof(WordEntryPos) * posvec->npos); + data->allocated = true; + + while (ptr - posvec->pos < posvec->npos) + { + if (val->weight & (1 << WEP_GETWEIGHT(*ptr))) + { + *dptr = WEP_GETPOS(*ptr); + dptr++; + } + + ptr++; + } + + data->npos = dptr - data->pos; + + if (data->npos > 0) + res = true; + } + else if (val->weight) + { + WordEntryPos *ptr = posvec->pos; + + while (ptr - posvec->pos < posvec->npos) + { + if (val->weight & (1 << WEP_GETWEIGHT(*ptr))) + { + res = true; + break; + } + + ptr++; + } + } + else /* data != NULL */ + { + data->npos = posvec->npos; + data->pos = posvec->pos; + data->allocated = false; + res = true; + } + } + else { - if (item->weight & (1 << WEP_GETWEIGHT(*ptr))) - return true; + res = true; + } + + return res; +} + +/* + * Removes duplicate pos entries. We can't use uniquePos() from + * tsvector.c because array could be longer than MAXENTRYPOS + * + * Returns new length. + */ + +static int +uniqueLongPos(WordEntryPos *a, int l) +{ + WordEntryPos *ptr, + *res; + + if (l <= 1) + return l; + + qsort((void *) a, l, sizeof(WordEntryPos), comparePos); + + res = a; + ptr = a + 1; + while (ptr - a < l) + { + if (WEP_GETPOS(*ptr) != WEP_GETPOS(*res)) + { + res++; + *res = WEP_GETPOS(*ptr); + } + ptr++; } - return false; + + return res + 1 - a; } /* * is there value 'val' in array or not ? */ static bool -checkcondition_str(void *checkval, QueryOperand *val) +checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data) { CHKVAL *chkval = (CHKVAL *) checkval; WordEntry *StopLow = chkval->arrb; @@ -1168,8 +1252,7 @@ checkcondition_str(void *checkval, QueryOperand *val) if (difference == 0) { - res = (val->weight && StopMiddle->haspos) ? - checkclass_str(chkval, StopMiddle, val) : true; + res = checkclass_str(chkval, StopMiddle, val, data); break; } else if (difference > 0) @@ -1178,31 +1261,201 @@ checkcondition_str(void *checkval, QueryOperand *val) StopHigh = StopMiddle; } - if (!res && val->prefix) + if ((res == false || data) && val->prefix == true) { + WordEntryPos *allpos = NULL; + int npos = 0, + totalpos = 0; /* * there was a failed exact search, so we should scan further to find - * a prefix match. + * a prefix match. We also need to do so if caller needs position info */ if (StopLow >= StopHigh) StopMiddle = StopHigh; - while (res == false && StopMiddle < chkval->arre && + while ((res == false || data) && StopMiddle < chkval->arre && tsCompareString(chkval->operand + val->distance, val->length, - chkval->values + StopMiddle->pos, StopMiddle->len, + chkval->values + StopMiddle->pos, StopMiddle->len, true) == 0) { - res = (val->weight && StopMiddle->haspos) ? - checkclass_str(chkval, StopMiddle, val) : true; + if (data) + { + /* + * We need to join position information + */ + res = checkclass_str(chkval, StopMiddle, val, data); + + if (res) + { + while (npos + data->npos >= totalpos) + { + if (totalpos == 0) + { + totalpos = 256; + allpos = palloc(sizeof(WordEntryPos) * totalpos); + } + else + { + totalpos *= 2; + allpos = repalloc(allpos, sizeof(WordEntryPos) * totalpos); + } + } + + memcpy(allpos+npos, data->pos, sizeof(WordEntryPos) * data->npos); + npos += data->npos; + } + } + else + { + res = checkclass_str(chkval, StopMiddle, val, NULL); + } StopMiddle++; } + + if (res && data) + { + /* Sort and make unique array of found positions */ + data->pos = allpos; + data->npos = uniqueLongPos(allpos, npos); + data->allocated = true; + } } return res; } /* + * check for phrase condition. Fallback to the AND operation + * if there is no position information + */ +static bool +TS_phrase_execute(QueryItem *curitem, void *checkval, bool calcnot, ExecPhraseData *data, + bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *)) +{ + /* since this function recurses, it could be driven to stack overflow */ + check_stack_depth(); + + if (curitem->type == QI_VAL) + { + return chkcond(checkval, (QueryOperand *) curitem, data); + } + else + { + ExecPhraseData Ldata = {0, false, NULL}, + Rdata = {0, false, NULL}; + WordEntryPos *Lpos, + *Rpos, + *pos = NULL; + + Assert(curitem->qoperator.oper == OP_PHRASE); + + if (data) + data->npos = 0; + + if (TS_phrase_execute(curitem + curitem->qoperator.left, + checkval, calcnot, &Ldata, chkcond) == false) + return false; + + if (TS_phrase_execute(curitem + 1, + checkval, calcnot, &Rdata, chkcond) == false) + return false; + + /* + * if at least one of operand has not a position information then + * fallback to AND operation. + */ + if (Ldata.npos == 0 || Rdata.npos == 0) + return true; + + /* + * Result of operation is a list of corresponding positions of RIGHT + * operand + */ + if (data) + { + if (Rdata.allocated == false) + /* + * OP_PHRASE is a modificated OP_AND, so number of resulting + * positions could not be greater than any of operands + */ + data->pos = palloc(sizeof(WordEntryPos) * + Min(Ldata.npos, Rdata.npos)); + else + data->pos = Rdata.pos; + + data->allocated = true; + data->npos = 0; + pos = data->pos; + } + + Lpos = Ldata.pos; + Rpos = Rdata.pos; + + /* + * Find matches by distance, WEP_GETPOS() is needed because + * ExecPhraseData->data could point to the tsvector's WordEntryPosVector + */ + + while (Rpos - Rdata.pos < Rdata.npos) + { + while (Lpos - Ldata.pos < Ldata.npos) + { + if (WEP_GETPOS(*Lpos) <= WEP_GETPOS(*Rpos)) + { + /* + * Lpos is lefter that Rpos, so we need to check distance + * condition + */ + if (WEP_GETPOS(*Rpos) - WEP_GETPOS(*Lpos) <= curitem->qoperator.distance) + { + /* MATCH! */ + if (data) + { + *pos = WEP_GETPOS(*Rpos); + pos++; + /* We need to make unique result array, so go to next Rpos */ + break; + } + else + { + /* + * we are on the root of phrase tree and hence + * we don't need to store resulting positions + */ + return true; + } + } + } + else + { + /* + * go to next Rpos, because current Lpos is righter + * then current Rpos + */ + break; + } + + Lpos++; + } + + Rpos++; + } + + if (data) + { + data->npos = pos - data->pos; + + if (data->npos > 0) + return true; + } + } + + return false; +} + + +/* * Evaluate tsquery boolean expression. * * chkcond is a callback function used to evaluate each VAL node in the query. @@ -1213,13 +1466,15 @@ checkcondition_str(void *checkval, QueryOperand *val) */ bool TS_execute(QueryItem *curitem, void *checkval, bool calcnot, - bool (*chkcond) (void *checkval, QueryOperand *val)) + bool (*chkcond) (void *checkval, QueryOperand *val, + ExecPhraseData *data)) { /* since this function recurses, it could be driven to stack overflow */ check_stack_depth(); if (curitem->type == QI_VAL) - return chkcond(checkval, (QueryOperand *) curitem); + return chkcond(checkval, (QueryOperand *) curitem, + NULL /* we don't need a position infos */); switch (curitem->qoperator.oper) { @@ -1241,6 +1496,9 @@ TS_execute(QueryItem *curitem, void *checkval, bool calcnot, else return TS_execute(curitem + 1, checkval, calcnot, chkcond); + case OP_PHRASE: + return TS_phrase_execute(curitem, checkval, calcnot, NULL, chkcond); + default: elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper); } @@ -1277,6 +1535,10 @@ tsquery_requires_match(QueryItem *curitem) */ return false; + case OP_PHRASE: + /* + * Treat OP_PHRASE as OP_AND here + */ case OP_AND: /* If either side requires a match, we're good */ if (tsquery_requires_match(curitem + curitem->qoperator.left)) diff --git a/src/backend/utils/adt/tsvector_parser.c b/src/backend/utils/adt/tsvector_parser.c index e281708..4ea4a5b 100644 --- a/src/backend/utils/adt/tsvector_parser.c +++ b/src/backend/utils/adt/tsvector_parser.c @@ -89,7 +89,14 @@ do { \ } \ } while (0) -#define ISOPERATOR(x) ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) ) +#define ISOPERATOR(x) \ + ( pg_mblen(x) == 1 && ( *(x) == '!' || \ + *(x) == '&' || \ + *(x) == '|' || \ + *(x) == '?' || \ + *(x) == '(' || \ + *(x) == ')' \ + ) ) /* Fills gettoken_tsvector's output parameters, and returns true */ #define RETURN_TOKEN \ diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h index b3daff2..1f59b7d 100644 --- a/src/include/catalog/pg_operator.h +++ b/src/include/catalog/pg_operator.h @@ -1675,6 +1675,9 @@ DATA(insert OID = 3680 ( "&&" PGNSP PGUID b f f 3615 3615 3615 0 0 tsque DESCR("AND-concatenate"); DATA(insert OID = 3681 ( "||" PGNSP PGUID b f f 3615 3615 3615 0 0 tsquery_or - - )); DESCR("OR-concatenate"); +/* ?? operation calls tsquery_phrase, but function is polymorphic. So, point OID of tsquery_phrase */ +DATA(insert OID = 5005 ( "??" PGNSP PGUID b f f 3615 3615 3615 0 0 5003 - - )); +DESCR("phrase-concatenate"); DATA(insert OID = 3682 ( "!!" PGNSP PGUID l f f 0 3615 3615 0 0 tsquery_not - - )); DESCR("NOT tsquery"); DATA(insert OID = 3693 ( "@>" PGNSP PGUID b f f 3615 3615 16 3694 0 tsq_mcontains contsel contjoinsel )); diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index ceb8129..6366ab7 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -4574,7 +4574,12 @@ DESCR("less-equal-greater"); DATA(insert OID = 3669 ( tsquery_and PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3615 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsquery_and _null_ _null_ _null_ )); DATA(insert OID = 3670 ( tsquery_or PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3615 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsquery_or _null_ _null_ _null_ )); +DATA(insert OID = 5003 ( tsquery_phrase PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3615 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsquery_phrase _null_ _null_ _null_ )); +DATA(insert OID = 5004 ( tsquery_phrase PGNSP PGUID 12 1 0 0 0 f f f f t f i s 3 0 3615 "3615 3615 23" _null_ _null_ _null_ _null_ _null_ tsquery_phrase_distance _null_ _null_ _null_ )); +DESCR("phrase-concatenate with distance"); DATA(insert OID = 3671 ( tsquery_not PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 3615 "3615" _null_ _null_ _null_ _null_ _null_ tsquery_not _null_ _null_ _null_ )); +DATA(insert OID = 5002 ( setweight PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3615 "3615 18" _null_ _null_ _null_ _null_ _null_ tsquery_setweight _null_ _null_ _null_ )); +DESCR("add weight class"); DATA(insert OID = 3691 ( tsq_mcontains PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsq_mcontains _null_ _null_ _null_ )); DATA(insert OID = 3692 ( tsq_mcontained PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsq_mcontained _null_ _null_ _null_ )); @@ -4693,12 +4698,16 @@ DATA(insert OID = 3746 ( to_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 DESCR("make tsquery"); DATA(insert OID = 3747 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ plainto_tsquery_byid _null_ _null_ _null_ )); DESCR("transform to tsquery"); +DATA(insert OID = 5000 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery_byid _null_ _null_ _null_ )); +DESCR("transform to tsquery"); DATA(insert OID = 3749 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3614 "25" _null_ _null_ _null_ _null_ _null_ to_tsvector _null_ _null_ _null_ )); DESCR("transform to tsvector"); DATA(insert OID = 3750 ( to_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ to_tsquery _null_ _null_ _null_ )); DESCR("make tsquery"); DATA(insert OID = 3751 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ plainto_tsquery _null_ _null_ _null_ )); DESCR("transform to tsquery"); +DATA(insert OID = 5001 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ )); +DESCR("transform to tsquery"); DATA(insert OID = 3752 ( tsvector_update_trigger PGNSP PGUID 12 1 0 0 0 f f f f f f v s 0 0 2279 "" _null_ _null_ _null_ _null_ _null_ tsvector_update_trigger_byid _null_ _null_ _null_ )); DESCR("trigger for automatic update of tsvector column"); diff --git a/src/include/tsearch/ts_public.h b/src/include/tsearch/ts_public.h index 6f7a891..6157094 100644 --- a/src/include/tsearch/ts_public.h +++ b/src/include/tsearch/ts_public.h @@ -34,16 +34,17 @@ typedef struct */ typedef struct { - uint32 selected:1, - in:1, - replace:1, - repeated:1, - skip:1, - unused:3, - type:8, - len:16; - char *word; - QueryOperand *item; + uint32 selected: 1, + in: 1, + replace: 1, + repeated: 1, + skip: 1, + unused: 3, + type: 8, + len: 16; + WordEntryPos pos; + char *word; + QueryOperand *item; } HeadlineWordEntry; typedef struct @@ -51,6 +52,7 @@ typedef struct HeadlineWordEntry *words; int32 lenwords; int32 curwords; + int32 vectorpos; /* postions a-la tsvector */ char *startsel; char *stopsel; char *fragdelim; diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h index bc99524..b670abb 100644 --- a/src/include/tsearch/ts_type.h +++ b/src/include/tsearch/ts_type.h @@ -49,6 +49,8 @@ typedef struct #define MAXSTRLEN ( (1<<11) - 1) #define MAXSTRPOS ( (1<<20) - 1) +int comparePos(const void *a, const void *b); + /* * Equivalent to * typedef struct { @@ -213,15 +215,26 @@ typedef struct } QueryOperand; -/* Legal values for QueryOperator.operator */ -#define OP_NOT 1 -#define OP_AND 2 -#define OP_OR 3 - +/* + * Legal values for QueryOperator.operator. + * They should be ordered by priority! We assume that phrase + * has highest priority, but this agreement is only + * for query transformation! That's need to simplify + * algorithm of query transformation. + */ +#define OP_OR 1 +#define OP_AND 2 +#define OP_NOT 3 +#define OP_PHRASE 4 +#define OP_NOT_PHRASE 5 /* OP_PHRASE negation */ + +#define OP_PRIORITY(x) (x) +#define QO_PRIORITY(x) OP_PRIORITY(((QueryOperator *) (x))->oper) typedef struct { QueryItemType type; int8 oper; /* see above */ + int16 distance; /* distance between agrs for OP_PHRASE */ uint32 left; /* pointer to left operand. Right operand is * item + 1, left operand is placed * item+item->left */ @@ -304,7 +317,10 @@ extern Datum tsquery_numnode(PG_FUNCTION_ARGS); extern Datum tsquery_and(PG_FUNCTION_ARGS); extern Datum tsquery_or(PG_FUNCTION_ARGS); +extern Datum tsquery_phrase(PG_FUNCTION_ARGS); +extern Datum tsquery_phrase_distance(PG_FUNCTION_ARGS); extern Datum tsquery_not(PG_FUNCTION_ARGS); +extern Datum tsquery_setweight(PG_FUNCTION_ARGS); extern Datum tsquery_rewrite(PG_FUNCTION_ARGS); extern Datum tsquery_rewrite_query(PG_FUNCTION_ARGS); diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h index 88533a6..163ae4c 100644 --- a/src/include/tsearch/ts_utils.h +++ b/src/include/tsearch/ts_utils.h @@ -55,7 +55,7 @@ extern TSQuery parse_tsquery(char *buf, extern void pushValue(TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix); extern void pushStop(TSQueryParserState state); -extern void pushOperator(TSQueryParserState state, int8 oper); +extern void pushOperator(TSQueryParserState state, int8 oper, int16 distance); /* * parse plain text and lexize words @@ -104,8 +104,15 @@ extern text *generateHeadline(HeadlineParsedText *prs); /* * Common check function for tsvector @@ tsquery */ +typedef struct ExecPhraseData +{ + int npos; + bool allocated; + WordEntryPos *pos; +} ExecPhraseData; + extern bool TS_execute(QueryItem *curitem, void *checkval, bool calcnot, - bool (*chkcond) (void *checkval, QueryOperand *val)); + bool (*chkcond) (void *checkval, QueryOperand *val, ExecPhraseData *data)); extern bool tsquery_requires_match(QueryItem *curitem); /* @@ -120,6 +127,8 @@ extern Datum to_tsquery_byid(PG_FUNCTION_ARGS); extern Datum to_tsquery(PG_FUNCTION_ARGS); extern Datum plainto_tsquery_byid(PG_FUNCTION_ARGS); extern Datum plainto_tsquery(PG_FUNCTION_ARGS); +extern Datum phraseto_tsquery_byid(PG_FUNCTION_ARGS); +extern Datum phraseto_tsquery(PG_FUNCTION_ARGS); /* * GiST support function @@ -169,7 +178,7 @@ extern Datum gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS); * TSQuery Utilities */ extern QueryItem *clean_NOT(QueryItem *ptr, int32 *len); -extern QueryItem *clean_fakeval(QueryItem *ptr, int32 *len); +extern TSQuery cleanup_fakeval_and_phrase(TSQuery in); typedef struct QTNode { diff --git a/src/test/regress/expected/tsdicts.out b/src/test/regress/expected/tsdicts.out index ef86295..1c1d338 100644 --- a/src/test/regress/expected/tsdicts.out +++ b/src/test/regress/expected/tsdicts.out @@ -434,9 +434,9 @@ SELECT to_tsvector('ispell_tst', 'Booking the skies after rebookings for footbal (1 row) SELECT to_tsquery('ispell_tst', 'footballklubber'); - to_tsquery ------------------------------------------------------------------------------- - ( 'footballklubber' | 'foot' & 'ball' & 'klubber' ) | 'football' & 'klubber' + to_tsquery +-------------------------------------------------------------------------- + 'footballklubber' | 'foot' & 'ball' & 'klubber' | 'football' & 'klubber' (1 row) SELECT to_tsquery('ispell_tst', 'footballyklubber:b & rebookings:A & sky'); @@ -458,9 +458,9 @@ SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footb (1 row) SELECT to_tsquery('hunspell_tst', 'footballklubber'); - to_tsquery ------------------------------------------------------------------------------- - ( 'footballklubber' | 'foot' & 'ball' & 'klubber' ) | 'football' & 'klubber' + to_tsquery +-------------------------------------------------------------------------- + 'footballklubber' | 'foot' & 'ball' & 'klubber' | 'football' & 'klubber' (1 row) SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky'); @@ -469,6 +469,18 @@ SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky'); 'foot':B & 'ball':B & 'klubber':B & ( 'booking':A | 'book':A ) & 'sky' (1 row) +SELECT to_tsquery('hunspell_tst', 'footballyklubber:b ? sky'); + to_tsquery +----------------------------------------------------------------------- + ( 'foot':B ? 'sky' ) & ( 'ball':B ? 'sky' ) & ( 'klubber':B ? 'sky' ) +(1 row) + +SELECT phraseto_tsquery('hunspell_tst', 'footballyklubber sky'); + phraseto_tsquery +----------------------------------------------------------------- + ( 'foot' ? 'sky' ) & ( 'ball' ? 'sky' ) & ( 'klubber' ? 'sky' ) +(1 row) + -- Test ispell dictionary with hunspell affix with FLAG long in configuration ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING REPLACE hunspell WITH hunspell_long; @@ -479,9 +491,9 @@ SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footb (1 row) SELECT to_tsquery('hunspell_tst', 'footballklubber'); - to_tsquery ------------------------------------------------------------------------------- - ( 'footballklubber' | 'foot' & 'ball' & 'klubber' ) | 'football' & 'klubber' + to_tsquery +-------------------------------------------------------------------------- + 'footballklubber' | 'foot' & 'ball' & 'klubber' | 'football' & 'klubber' (1 row) SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky'); @@ -500,9 +512,9 @@ SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footb (1 row) SELECT to_tsquery('hunspell_tst', 'footballklubber'); - to_tsquery ------------------------------------------------------------------------------- - ( 'footballklubber' | 'foot' & 'ball' & 'klubber' ) | 'football' & 'klubber' + to_tsquery +-------------------------------------------------------------------------- + 'footballklubber' | 'foot' & 'ball' & 'klubber' | 'football' & 'klubber' (1 row) SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky'); diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out index d22d345..5d1a46e 100644 --- a/src/test/regress/expected/tsearch.out +++ b/src/test/regress/expected/tsearch.out @@ -548,6 +548,235 @@ SELECT plainto_tsquery('english', 'foo bar') && 'asd | fg'; 'foo' & 'bar' & ( 'asd' | 'fg' ) (1 row) +-- Check stop word deletion, a and s are stop-words +SELECT to_tsquery('english', '(1 ? 2) ? a'); + to_tsquery +------------ + '1' ? '2' +(1 row) + +SELECT to_tsquery('english', '(1 ? a) ? 2'); + to_tsquery +-------------- + '1' ?[2] '2' +(1 row) + +SELECT to_tsquery('english', '(a ? 1) ? 2'); + to_tsquery +------------ + '1' ? '2' +(1 row) + +SELECT to_tsquery('english', 'a ? (1 ? 2)'); + to_tsquery +------------ + '1' ? '2' +(1 row) + +SELECT to_tsquery('english', '1 ? (a ? 2)'); + to_tsquery +------------ + '1' ? '2' +(1 row) + +SELECT to_tsquery('english', '1 ? (2 ? a)'); + to_tsquery +------------ + '1' ? '2' +(1 row) + +SELECT to_tsquery('english', '(1 ? 2) ?[3] a'); + to_tsquery +------------ + '1' ? '2' +(1 row) + +SELECT to_tsquery('english', '(1 ? a) ?[3] 2'); + to_tsquery +-------------- + '1' ?[4] '2' +(1 row) + +SELECT to_tsquery('english', '(a ? 1) ?[3] 2'); + to_tsquery +-------------- + '1' ?[3] '2' +(1 row) + +SELECT to_tsquery('english', 'a ?[3] (1 ? 2)'); + to_tsquery +------------ + '1' ? '2' +(1 row) + +SELECT to_tsquery('english', '1 ?[3] (a ? 2)'); + to_tsquery +-------------- + '1' ?[3] '2' +(1 row) + +SELECT to_tsquery('english', '1 ?[3] (2 ? a)'); + to_tsquery +-------------- + '1' ?[3] '2' +(1 row) + +SELECT to_tsquery('english', '(1 ?[3] 2) ? a'); + to_tsquery +-------------- + '1' ?[3] '2' +(1 row) + +SELECT to_tsquery('english', '(1 ?[3] a) ? 2'); + to_tsquery +-------------- + '1' ?[4] '2' +(1 row) + +SELECT to_tsquery('english', '(a ?[3] 1) ? 2'); + to_tsquery +------------ + '1' ? '2' +(1 row) + +SELECT to_tsquery('english', 'a ? (1 ?[3] 2)'); + to_tsquery +-------------- + '1' ?[3] '2' +(1 row) + +SELECT to_tsquery('english', '1 ? (a ?[3] 2)'); + to_tsquery +------------ + '1' ? '2' +(1 row) + +SELECT to_tsquery('english', '1 ? (2 ?[3] a)'); + to_tsquery +------------ + '1' ? '2' +(1 row) + +SELECT to_tsquery('english', '((a ? 1) ? 2) ? s'); + to_tsquery +------------ + '1' ? '2' +(1 row) + +SELECT to_tsquery('english', '(2 ? (a ? 1)) ? s'); + to_tsquery +------------ + '2' ? '1' +(1 row) + +SELECT to_tsquery('english', '((1 ? a) ? 2) ? s'); + to_tsquery +-------------- + '1' ?[2] '2' +(1 row) + +SELECT to_tsquery('english', '(2 ? (1 ? a)) ? s'); + to_tsquery +------------ + '2' ? '1' +(1 row) + +SELECT to_tsquery('english', 's ? ((a ? 1) ? 2)'); + to_tsquery +------------ + '1' ? '2' +(1 row) + +SELECT to_tsquery('english', 's ? (2 ? (a ? 1))'); + to_tsquery +------------ + '2' ? '1' +(1 row) + +SELECT to_tsquery('english', 's ? ((1 ? a) ? 2)'); + to_tsquery +-------------- + '1' ?[2] '2' +(1 row) + +SELECT to_tsquery('english', 's ? (2 ? (1 ? a))'); + to_tsquery +------------ + '2' ? '1' +(1 row) + +SELECT to_tsquery('english', '((a ? 1) ? s) ? 2'); + to_tsquery +-------------- + '1' ?[2] '2' +(1 row) + +SELECT to_tsquery('english', '(s ? (a ? 1)) ? 2'); + to_tsquery +------------ + '1' ? '2' +(1 row) + +SELECT to_tsquery('english', '((1 ? a) ? s) ? 2'); + to_tsquery +-------------- + '1' ?[3] '2' +(1 row) + +SELECT to_tsquery('english', '(s ? (1 ? a)) ? 2'); + to_tsquery +-------------- + '1' ?[2] '2' +(1 row) + +SELECT to_tsquery('english', '2 ? ((a ? 1) ? s)'); + to_tsquery +------------ + '2' ? '1' +(1 row) + +SELECT to_tsquery('english', '2 ? (s ? (a ? 1))'); + to_tsquery +------------ + '2' ? '1' +(1 row) + +SELECT to_tsquery('english', '2 ? ((1 ? a) ? s)'); + to_tsquery +------------ + '2' ? '1' +(1 row) + +SELECT to_tsquery('english', '2 ? (s ? (1 ? a))'); + to_tsquery +------------ + '2' ? '1' +(1 row) + +SELECT to_tsquery('foo ? (a ? (the ? bar))'); + to_tsquery +--------------- + 'foo' ? 'bar' +(1 row) + +SELECT to_tsquery('((foo ? a) ? the) ? bar'); + to_tsquery +------------------ + 'foo' ?[3] 'bar' +(1 row) + +SELECT to_tsquery('foo ? a ? the ? bar'); + to_tsquery +------------------ + 'foo' ?[3] 'bar' +(1 row) + +SELECT phraseto_tsquery('PostgreSQL can be extended by the user in many ways'); + phraseto_tsquery +------------------------------------------------------------------------ + ( ( ( 'postgresql' ?[3] 'extend' ) ?[3] 'user' ) ?[2] 'mani' ) ? 'way' +(1 row) + SELECT ts_rank_cd(to_tsvector('english', ' Day after day, day after day, We stuck, nor breath nor motion, @@ -596,6 +825,22 @@ S. T. Coleridge (1772-1834) 0.1 (1 row) +SELECT ts_rank_cd(to_tsvector('english', ' +Day after day, day after day, + We stuck, nor breath nor motion, +As idle as a painted Ship + Upon a painted Ocean. +Water, water, every where + And all the boards did shrink; +Water, water, every where, + Nor any drop to drink. +S. T. Coleridge (1772-1834) +'), to_tsquery('english', 'painted ? Ship')); + ts_rank_cd +------------ + 0.1 +(1 row) + SELECT ts_rank_cd(strip(to_tsvector('both stripped')), to_tsquery('both & stripped')); ts_rank_cd @@ -669,6 +914,44 @@ S. T. Coleridge (1772-1834) (1 row) SELECT ts_headline('english', ' +Day after day, day after day, + We stuck, nor breath nor motion, +As idle as a painted Ship + Upon a painted Ocean. +Water, water, every where + And all the boards did shrink; +Water, water, every where, + Nor any drop to drink. +S. T. Coleridge (1772-1834) +', phraseto_tsquery('english', 'painted Ocean')); + ts_headline +---------------------------------- + <b>painted</b> <b>Ocean</b>. + + Water, water, every where + + And all the boards did shrink;+ + Water, water, every +(1 row) + +SELECT ts_headline('english', ' +Day after day, day after day, + We stuck, nor breath nor motion, +As idle as a painted Ship + Upon a painted Ocean. +Water, water, every where + And all the boards did shrink; +Water, water, every where, + Nor any drop to drink. +S. T. Coleridge (1772-1834) +', phraseto_tsquery('english', 'idle as a painted Ship')); + ts_headline +--------------------------------------------- + <b>idle</b> as a <b>painted</b> <b>Ship</b>+ + Upon a <b>painted</b> Ocean. + + Water, water, every where + + And all the boards +(1 row) + +SELECT ts_headline('english', ' <html> <!-- some comment --> <body> @@ -697,6 +980,24 @@ to_tsquery('english', 'sea&foo'), 'HighlightAll=true'); </html> (1 row) +SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 ? 3', 'MaxWords=2, MinWords=1'); + ts_headline +------------------- + <b>1</b> <b>3</b> +(1 row) + +SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 & 3', 'MaxWords=4, MinWords=1'); + ts_headline +------------------------------ + <b>1</b> 2 <b>3</b> <b>1</b> +(1 row) + +SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 ? 3', 'MaxWords=4, MinWords=1'); + ts_headline +------------------- + <b>1</b> <b>3</b> +(1 row) + --Check if headline fragments work SELECT ts_headline('english', ' Day after day, day after day, @@ -799,13 +1100,13 @@ UPDATE test_tsquery SET sample = to_tsquery('english', txtsample::text); SELECT COUNT(*) FROM test_tsquery WHERE keyword < 'new & york'; count ------- - 1 + 2 (1 row) SELECT COUNT(*) FROM test_tsquery WHERE keyword <= 'new & york'; count ------- - 2 + 3 (1 row) SELECT COUNT(*) FROM test_tsquery WHERE keyword = 'new & york'; @@ -817,13 +1118,13 @@ SELECT COUNT(*) FROM test_tsquery WHERE keyword = 'new & york'; SELECT COUNT(*) FROM test_tsquery WHERE keyword >= 'new & york'; count ------- - 3 + 4 (1 row) SELECT COUNT(*) FROM test_tsquery WHERE keyword > 'new & york'; count ------- - 2 + 3 (1 row) CREATE UNIQUE INDEX bt_tsq ON test_tsquery (keyword); @@ -831,13 +1132,13 @@ SET enable_seqscan=OFF; SELECT COUNT(*) FROM test_tsquery WHERE keyword < 'new & york'; count ------- - 1 + 2 (1 row) SELECT COUNT(*) FROM test_tsquery WHERE keyword <= 'new & york'; count ------- - 2 + 3 (1 row) SELECT COUNT(*) FROM test_tsquery WHERE keyword = 'new & york'; @@ -849,20 +1150,20 @@ SELECT COUNT(*) FROM test_tsquery WHERE keyword = 'new & york'; SELECT COUNT(*) FROM test_tsquery WHERE keyword >= 'new & york'; count ------- - 3 + 4 (1 row) SELECT COUNT(*) FROM test_tsquery WHERE keyword > 'new & york'; count ------- - 2 + 3 (1 row) RESET enable_seqscan; SELECT ts_rewrite('foo & bar & qq & new & york', 'new & york'::tsquery, 'big & apple | nyc | new & york & city'); - ts_rewrite ----------------------------------------------------------------------------------- - 'foo' & 'bar' & 'qq' & ( 'city' & 'new' & 'york' | ( 'nyc' | 'big' & 'apple' ) ) + ts_rewrite +------------------------------------------------------------------------------ + 'foo' & 'bar' & 'qq' & ( 'nyc' | 'big' & 'apple' | 'city' & 'new' & 'york' ) (1 row) SELECT ts_rewrite('moscow', 'SELECT keyword, sample FROM test_tsquery'::text ); @@ -878,9 +1179,9 @@ SELECT ts_rewrite('moscow & hotel', 'SELECT keyword, sample FROM test_tsquery':: (1 row) SELECT ts_rewrite('bar & new & qq & foo & york', 'SELECT keyword, sample FROM test_tsquery'::text ); - ts_rewrite -------------------------------------------------------------------------------------- - 'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) ) + ts_rewrite +--------------------------------------------------------------------------------- + ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' ) (1 row) SELECT ts_rewrite( 'moscow', 'SELECT keyword, sample FROM test_tsquery'); @@ -896,9 +1197,33 @@ SELECT ts_rewrite( 'moscow & hotel', 'SELECT keyword, sample FROM test_tsquery') (1 row) SELECT ts_rewrite( 'bar & new & qq & foo & york', 'SELECT keyword, sample FROM test_tsquery'); - ts_rewrite -------------------------------------------------------------------------------------- - 'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) ) + ts_rewrite +--------------------------------------------------------------------------------- + ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' ) +(1 row) + +SELECT ts_rewrite('1 & (2 ? 3)', 'SELECT keyword, sample FROM test_tsquery'::text ); + ts_rewrite +------------ + '2' ? '4' +(1 row) + +SELECT ts_rewrite('1 & (2 ?[2] 3)', 'SELECT keyword, sample FROM test_tsquery'::text ); + ts_rewrite +------------------------ + '1' & ( '2' ?[2] '3' ) +(1 row) + +SELECT ts_rewrite('5 ? (1 & (2 ? 3))', 'SELECT keyword, sample FROM test_tsquery'::text ); + ts_rewrite +----------------------------------------- + ( '5' ? '1' ) & ( '5' ? ( '2' ? '3' ) ) +(1 row) + +SELECT ts_rewrite('5 ? (6 | 8)', 'SELECT keyword, sample FROM test_tsquery'::text ); + ts_rewrite +----------------------- + '5' ? '7' | '5' ? '8' (1 row) SELECT keyword FROM test_tsquery WHERE keyword @> 'new'; @@ -937,9 +1262,9 @@ SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_t (1 row) SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & new & qq & foo & york') AS query; - ts_rewrite -------------------------------------------------------------------------------------- - 'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) ) + ts_rewrite +--------------------------------------------------------------------------------- + ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' ) (1 row) SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow') AS query; @@ -955,9 +1280,9 @@ SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_t (1 row) SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & new & qq & foo & york') AS query; - ts_rewrite -------------------------------------------------------------------------------------- - 'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) ) + ts_rewrite +--------------------------------------------------------------------------------- + ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' ) (1 row) CREATE INDEX qq ON test_tsquery USING gist (keyword tsquery_ops); @@ -998,9 +1323,9 @@ SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_t (1 row) SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & new & qq & foo & york') AS query; - ts_rewrite -------------------------------------------------------------------------------------- - 'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) ) + ts_rewrite +--------------------------------------------------------------------------------- + ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' ) (1 row) SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow') AS query; @@ -1016,9 +1341,9 @@ SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_t (1 row) SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & new & qq & foo & york') AS query; - ts_rewrite -------------------------------------------------------------------------------------- - 'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) ) + ts_rewrite +--------------------------------------------------------------------------------- + ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' ) (1 row) RESET enable_seqscan; @@ -1126,3 +1451,15 @@ select * from pendtest where 'ipi:*'::tsquery @@ ts; ---- (0 rows) +--check OP_PHRASE on index +create temp table phrase_index_test(fts tsvector); +insert into phrase_index_test values('A fat cat has just eaten a rat.'); +create index phrase_index_test_idx on phrase_index_test using gin(fts); +set enable_seqscan = off; +select * from phrase_index_test where fts @@ phraseto_tsquery('fat cat'); + fts +------------------------------------------------- + 'A' 'a' 'cat' 'eaten' 'fat' 'has' 'just' 'rat.' +(1 row) + +set enable_seqscan = on; diff --git a/src/test/regress/expected/tstypes.out b/src/test/regress/expected/tstypes.out index a386a46..4a55f56 100644 --- a/src/test/regress/expected/tstypes.out +++ b/src/test/regress/expected/tstypes.out @@ -277,15 +277,15 @@ SELECT '(!1|2)&3'::tsquery; (1 row) SELECT '1|(2|(4|(5|6)))'::tsquery; - tsquery ------------------------------------------ - '1' | ( '2' | ( '4' | ( '5' | '6' ) ) ) + tsquery +----------------------------- + '1' | '2' | '4' | '5' | '6' (1 row) SELECT '1|2|4|5|6'::tsquery; - tsquery ------------------------------------------ - ( ( ( '1' | '2' ) | '4' ) | '5' ) | '6' + tsquery +----------------------------- + '1' | '2' | '4' | '5' | '6' (1 row) SELECT '1&(2&(4&(5&6)))'::tsquery; @@ -325,11 +325,145 @@ SELECT $$'\\as'$$::tsquery; (1 row) SELECT 'a:* & nbb:*ac | doo:a* | goo'::tsquery; + tsquery +-------------------------------------- + 'a':* & 'nbb':*AC | 'doo':*A | 'goo' +(1 row) + +SELECT setweight('a:d & b:a & f'::tsquery, 'c'); + setweight +------------------------- + 'a':CD & 'b':AC & 'f':C +(1 row) + +-- phrase transformation +SELECT 'a ? (b|c)'::tsquery; + tsquery +----------------------- + 'a' ? 'b' | 'a' ? 'c' +(1 row) + +SELECT '(a|b) ? c'::tsquery; + tsquery +----------------------- + 'a' ? 'c' | 'b' ? 'c' +(1 row) + +SELECT '(a|b) ? (d|c)'::tsquery; + tsquery +----------------------------------------------- + 'a' ? 'd' | 'b' ? 'd' | 'a' ? 'c' | 'b' ? 'c' +(1 row) + +SELECT 'a ? (b&c)'::tsquery; + tsquery +------------------------------- + ( 'a' ? 'b' ) & ( 'a' ? 'c' ) +(1 row) + +SELECT '(a&b) ? c'::tsquery; + tsquery +------------------------------- + ( 'a' ? 'c' ) & ( 'b' ? 'c' ) +(1 row) + +SELECT '(a&b) ? (d&c)'::tsquery; + tsquery +--------------------------------------------------------------- + ( 'a' ? 'd' ) & ( 'b' ? 'd' ) & ( 'a' ? 'c' ) & ( 'b' ? 'c' ) +(1 row) + +SELECT 'a ? !b'::tsquery; + tsquery +---------------------- + 'a' & !( 'a' ? 'b' ) +(1 row) + +SELECT '!a ? b'::tsquery; + tsquery +---------------------- + !( 'a' ? 'b' ) & 'b' +(1 row) + +SELECT '!a ? !b'::tsquery; + tsquery +---------------------------------- + !'a' & !( !( 'a' ? 'b' ) & 'b' ) +(1 row) + +SELECT 'a ? !(b&c)'::tsquery; tsquery ------------------------------------------ - ( 'a':* & 'nbb':*AC | 'doo':*A ) | 'goo' + 'a' & !( ( 'a' ? 'b' ) & ( 'a' ? 'c' ) ) +(1 row) + +SELECT 'a ? !(b|c)'::tsquery; + tsquery +---------------------------------- + 'a' & !( 'a' ? 'b' | 'a' ? 'c' ) +(1 row) + +SELECT '!(a&b) ? c'::tsquery; + tsquery +------------------------------------------ + !( ( 'a' ? 'c' ) & ( 'b' ? 'c' ) ) & 'c' +(1 row) + +SELECT '!(a|b) ? c'::tsquery; + tsquery +---------------------------------- + !( 'a' ? 'c' | 'b' ? 'c' ) & 'c' +(1 row) + +SELECT '(!a|b) ? c'::tsquery; + tsquery +---------------------------------- + !( 'a' ? 'c' ) & 'c' | 'b' ? 'c' (1 row) +SELECT '(!a&b) ? c'::tsquery; + tsquery +-------------------------------------- + !( 'a' ? 'c' ) & 'c' & ( 'b' ? 'c' ) +(1 row) + +SELECT 'c ? (!a|b)'::tsquery; + tsquery +---------------------------------- + 'c' & !( 'c' ? 'a' ) | 'c' ? 'b' +(1 row) + +SELECT 'c ? (!a&b)'::tsquery; + tsquery +-------------------------------------- + 'c' & !( 'c' ? 'a' ) & ( 'c' ? 'b' ) +(1 row) + +SELECT '(a|b) ? !c'::tsquery; + tsquery +-------------------------------------------- + ( 'a' | 'b' ) & !( 'a' ? 'c' | 'b' ? 'c' ) +(1 row) + +SELECT '(a&b) ? !c'::tsquery; + tsquery +------------------------------------------------ + 'a' & 'b' & !( ( 'a' ? 'c' ) & ( 'b' ? 'c' ) ) +(1 row) + +SELECT '!c ? (a|b)'::tsquery; + tsquery +--------------------------------------------- + !( 'c' ? 'a' ) & 'a' | !( 'c' ? 'b' ) & 'b' +(1 row) + +SELECT '!c ? (a&b)'::tsquery; + tsquery +--------------------------------------------- + !( 'c' ? 'a' ) & 'a' & !( 'c' ? 'b' ) & 'b' +(1 row) + +--comparisons SELECT 'a' < 'b & c'::tsquery as "true"; true ------ @@ -342,10 +476,10 @@ SELECT 'a' > 'b & c'::tsquery as "false"; f (1 row) -SELECT 'a | f' < 'b & c'::tsquery as "true"; - true ------- - t +SELECT 'a | f' < 'b & c'::tsquery as "false"; + false +------- + f (1 row) SELECT 'a | ff' < 'b & c'::tsquery as "false"; @@ -360,6 +494,7 @@ SELECT 'a | f | g' < 'b & c'::tsquery as "false"; f (1 row) +--concatenation SELECT numnode( 'new'::tsquery ); numnode --------- @@ -402,6 +537,36 @@ SELECT 'foo & bar'::tsquery && 'asd | fg'; 'foo' & 'bar' & ( 'asd' | 'fg' ) (1 row) +SELECT 'a' ?? 'b & d'::tsquery; + ?column? +------------------------------- + ( 'a' ? 'b' ) & ( 'a' ? 'd' ) +(1 row) + +SELECT 'a & g' ?? 'b & d'::tsquery; + ?column? +--------------------------------------------------------------- + ( 'a' ? 'b' ) & ( 'g' ? 'b' ) & ( 'a' ? 'd' ) & ( 'g' ? 'd' ) +(1 row) + +SELECT 'a & g' ?? 'b | d'::tsquery; + ?column? +--------------------------------------------------------------- + ( 'a' ? 'b' ) & ( 'g' ? 'b' ) | ( 'a' ? 'd' ) & ( 'g' ? 'd' ) +(1 row) + +SELECT 'a & g' ?? 'b ? d'::tsquery; + ?column? +--------------------------------------------------- + ( 'a' ? ( 'b' ? 'd' ) ) & ( 'g' ? ( 'b' ? 'd' ) ) +(1 row) + +SELECT tsquery_phrase('a ?[3] g', 'b & d', 10); + tsquery_phrase +----------------------------------------------------------------- + ( ( 'a' ?[3] 'g' ) ?[10] 'b' ) & ( ( 'a' ?[3] 'g' ) ?[10] 'd' ) +(1 row) + -- tsvector-tsquery operations SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca' as "true"; true @@ -499,6 +664,80 @@ SELECT 'supeznova supernova'::tsvector @@ 'super:*'::tsquery AS "true"; t (1 row) +--phrase search +SELECT to_tsvector('simple', '1 2 3 1') @@ '1 ? 2' AS "true"; + true +------ + t +(1 row) + +SELECT to_tsvector('simple', '1 2 3 1') @@ '1 ?[2] 2' AS "true"; + true +------ + t +(1 row) + +SELECT to_tsvector('simple', '1 2 3 1') @@ '1 ? 3' AS "false"; + false +------- + f +(1 row) + +SELECT to_tsvector('simple', '1 2 3 1') @@ '1 ?[2] 3' AS "true"; + true +------ + t +(1 row) + +SELECT to_tsvector('simple', '1 2 11 3') @@ '1 ? 3' AS "false"; + false +------- + f +(1 row) + +SELECT to_tsvector('simple', '1 2 11 3') @@ '1:* ? 3' AS "true"; + true +------ + t +(1 row) + +SELECT to_tsvector('simple', '1 2 3 4') @@ '1 ? 2 ? 3' AS "true"; + true +------ + t +(1 row) + +SELECT to_tsvector('simple', '1 2 3 4') @@ '(1 ? 2) ? 3' AS "true"; + true +------ + t +(1 row) + +SELECT to_tsvector('simple', '1 2 3 4') @@ '1 ? (2 ? 3)' AS "false"; + false +------- + f +(1 row) + +SELECT to_tsvector('simple', '1 2 3 4') @@ '1 ?[2] (2 ? 3)' AS "true"; + true +------ + t +(1 row) + +SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '(1 ? 2) ? 3' AS "true"; + true +------ + t +(1 row) + +SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '1 ? 2 ? 3' AS "true"; + true +------ + t +(1 row) + +--ranking SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s'); ts_rank ----------- @@ -613,6 +852,120 @@ SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a & s'); 0.1 (1 row) +SELECT ts_rank_cd(' a:1 s:2A d g'::tsvector, 'a ? s'); + ts_rank_cd +------------ + 0.181818 +(1 row) + +SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a ? s'); + ts_rank_cd +------------ + 0.133333 +(1 row) + +SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a ? s'); + ts_rank_cd +------------ + 0.1 +(1 row) + +SELECT ts_rank_cd(' a:1 s:2 d:2A g'::tsvector, 'a ? s'); + ts_rank_cd +------------ + 0.1 +(1 row) + +SELECT ts_rank_cd(' a:1 s:2,3A d:2A g'::tsvector, 'a ?[2] s:A'); + ts_rank_cd +------------ + 0.0909091 +(1 row) + +SELECT ts_rank_cd(' a:1 b:2 s:3A d:2A g'::tsvector, 'a ?[2] s:A'); + ts_rank_cd +------------ + 0.0909091 +(1 row) + +SELECT ts_rank_cd(' a:1 sa:2D sb:2A g'::tsvector, 'a ? s:*'); + ts_rank_cd +------------ + 0.1 +(1 row) + +SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a ? s:*'); + ts_rank_cd +------------ + 0.1 +(1 row) + +SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a ? s:* ? sa:A'); + ts_rank_cd +------------ + 0.0714286 +(1 row) + +SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a ? s:* ? sa:B'); + ts_rank_cd +------------ + 0 +(1 row) + +SELECT 'a:1 b:2'::tsvector @@ 'a ? b'::tsquery AS "true"; + true +------ + t +(1 row) + +SELECT 'a:1 b:2'::tsvector @@ 'a ?[0] b'::tsquery AS "false"; + false +------- + f +(1 row) + +SELECT 'a:1 b:2'::tsvector @@ 'a ?[1] b'::tsquery AS "true"; + true +------ + t +(1 row) + +SELECT 'a:1 b:2'::tsvector @@ 'a ?[2] b'::tsquery AS "true"; + true +------ + t +(1 row) + +SELECT 'a:1 b:3'::tsvector @@ 'a ? b'::tsquery AS "false"; + false +------- + f +(1 row) + +SELECT 'a:1 b:3'::tsvector @@ 'a ?[0] b'::tsquery AS "false"; + false +------- + f +(1 row) + +SELECT 'a:1 b:3'::tsvector @@ 'a ?[1] b'::tsquery AS "false"; + false +------- + f +(1 row) + +SELECT 'a:1 b:3'::tsvector @@ 'a ?[2] b'::tsquery AS "true"; + true +------ + t +(1 row) + +SELECT 'a:1 b:3'::tsvector @@ 'a ?[3] b'::tsquery AS "true"; + true +------ + t +(1 row) + -- tsvector editing operations SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector); strip diff --git a/src/test/regress/sql/tsdicts.sql b/src/test/regress/sql/tsdicts.sql index d13ce2e..0ab461a 100644 --- a/src/test/regress/sql/tsdicts.sql +++ b/src/test/regress/sql/tsdicts.sql @@ -142,6 +142,9 @@ SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footb SELECT to_tsquery('hunspell_tst', 'footballklubber'); SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky'); +SELECT to_tsquery('hunspell_tst', 'footballyklubber:b ? sky'); +SELECT phraseto_tsquery('hunspell_tst', 'footballyklubber sky'); + -- Test ispell dictionary with hunspell affix with FLAG long in configuration ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING REPLACE hunspell WITH hunspell_long; diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql index fa460cd..77d5c7c 100644 --- a/src/test/regress/sql/tsearch.sql +++ b/src/test/regress/sql/tsearch.sql @@ -129,6 +129,52 @@ SELECT plainto_tsquery('english', 'foo bar') || plainto_tsquery('english', 'asd SELECT plainto_tsquery('english', 'foo bar') || !!plainto_tsquery('english', 'asd fg'); SELECT plainto_tsquery('english', 'foo bar') && 'asd | fg'; +-- Check stop word deletion, a and s are stop-words +SELECT to_tsquery('english', '(1 ? 2) ? a'); +SELECT to_tsquery('english', '(1 ? a) ? 2'); +SELECT to_tsquery('english', '(a ? 1) ? 2'); +SELECT to_tsquery('english', 'a ? (1 ? 2)'); +SELECT to_tsquery('english', '1 ? (a ? 2)'); +SELECT to_tsquery('english', '1 ? (2 ? a)'); + +SELECT to_tsquery('english', '(1 ? 2) ?[3] a'); +SELECT to_tsquery('english', '(1 ? a) ?[3] 2'); +SELECT to_tsquery('english', '(a ? 1) ?[3] 2'); +SELECT to_tsquery('english', 'a ?[3] (1 ? 2)'); +SELECT to_tsquery('english', '1 ?[3] (a ? 2)'); +SELECT to_tsquery('english', '1 ?[3] (2 ? a)'); + +SELECT to_tsquery('english', '(1 ?[3] 2) ? a'); +SELECT to_tsquery('english', '(1 ?[3] a) ? 2'); +SELECT to_tsquery('english', '(a ?[3] 1) ? 2'); +SELECT to_tsquery('english', 'a ? (1 ?[3] 2)'); +SELECT to_tsquery('english', '1 ? (a ?[3] 2)'); +SELECT to_tsquery('english', '1 ? (2 ?[3] a)'); + +SELECT to_tsquery('english', '((a ? 1) ? 2) ? s'); +SELECT to_tsquery('english', '(2 ? (a ? 1)) ? s'); +SELECT to_tsquery('english', '((1 ? a) ? 2) ? s'); +SELECT to_tsquery('english', '(2 ? (1 ? a)) ? s'); +SELECT to_tsquery('english', 's ? ((a ? 1) ? 2)'); +SELECT to_tsquery('english', 's ? (2 ? (a ? 1))'); +SELECT to_tsquery('english', 's ? ((1 ? a) ? 2)'); +SELECT to_tsquery('english', 's ? (2 ? (1 ? a))'); + +SELECT to_tsquery('english', '((a ? 1) ? s) ? 2'); +SELECT to_tsquery('english', '(s ? (a ? 1)) ? 2'); +SELECT to_tsquery('english', '((1 ? a) ? s) ? 2'); +SELECT to_tsquery('english', '(s ? (1 ? a)) ? 2'); +SELECT to_tsquery('english', '2 ? ((a ? 1) ? s)'); +SELECT to_tsquery('english', '2 ? (s ? (a ? 1))'); +SELECT to_tsquery('english', '2 ? ((1 ? a) ? s)'); +SELECT to_tsquery('english', '2 ? (s ? (1 ? a))'); + +SELECT to_tsquery('foo ? (a ? (the ? bar))'); +SELECT to_tsquery('((foo ? a) ? the) ? bar'); +SELECT to_tsquery('foo ? a ? the ? bar'); +SELECT phraseto_tsquery('PostgreSQL can be extended by the user in many ways'); + + SELECT ts_rank_cd(to_tsvector('english', ' Day after day, day after day, We stuck, nor breath nor motion, @@ -165,6 +211,18 @@ Water, water, every where, S. T. Coleridge (1772-1834) '), to_tsquery('english', 'ocean')); +SELECT ts_rank_cd(to_tsvector('english', ' +Day after day, day after day, + We stuck, nor breath nor motion, +As idle as a painted Ship + Upon a painted Ocean. +Water, water, every where + And all the boards did shrink; +Water, water, every where, + Nor any drop to drink. +S. T. Coleridge (1772-1834) +'), to_tsquery('english', 'painted ? Ship')); + SELECT ts_rank_cd(strip(to_tsvector('both stripped')), to_tsquery('both & stripped')); @@ -209,6 +267,30 @@ S. T. Coleridge (1772-1834) ', to_tsquery('english', 'ocean')); SELECT ts_headline('english', ' +Day after day, day after day, + We stuck, nor breath nor motion, +As idle as a painted Ship + Upon a painted Ocean. +Water, water, every where + And all the boards did shrink; +Water, water, every where, + Nor any drop to drink. +S. T. Coleridge (1772-1834) +', phraseto_tsquery('english', 'painted Ocean')); + +SELECT ts_headline('english', ' +Day after day, day after day, + We stuck, nor breath nor motion, +As idle as a painted Ship + Upon a painted Ocean. +Water, water, every where + And all the boards did shrink; +Water, water, every where, + Nor any drop to drink. +S. T. Coleridge (1772-1834) +', phraseto_tsquery('english', 'idle as a painted Ship')); + +SELECT ts_headline('english', ' <html> <!-- some comment --> <body> @@ -222,6 +304,10 @@ ff-bg </html>', to_tsquery('english', 'sea&foo'), 'HighlightAll=true'); +SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 ? 3', 'MaxWords=2, MinWords=1'); +SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 & 3', 'MaxWords=4, MinWords=1'); +SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 ? 3', 'MaxWords=4, MinWords=1'); + --Check if headline fragments work SELECT ts_headline('english', ' Day after day, day after day, @@ -283,6 +369,8 @@ CREATE TABLE test_tsquery (txtkeyword TEXT, txtsample TEXT); Moscow moskva | moscow 'Sanct Peter' Peterburg | peter | 'Sanct Peterburg' 'foo bar qq' foo & (bar | qq) & city +1 & (2 ? 3) 2 ? 4 +5 ? 6 5 ? 7 \. \set ECHO all @@ -320,6 +408,11 @@ SELECT ts_rewrite( 'moscow', 'SELECT keyword, sample FROM test_tsquery'); SELECT ts_rewrite( 'moscow & hotel', 'SELECT keyword, sample FROM test_tsquery'); SELECT ts_rewrite( 'bar & new & qq & foo & york', 'SELECT keyword, sample FROM test_tsquery'); +SELECT ts_rewrite('1 & (2 ? 3)', 'SELECT keyword, sample FROM test_tsquery'::text ); +SELECT ts_rewrite('1 & (2 ?[2] 3)', 'SELECT keyword, sample FROM test_tsquery'::text ); +SELECT ts_rewrite('5 ? (1 & (2 ? 3))', 'SELECT keyword, sample FROM test_tsquery'::text ); +SELECT ts_rewrite('5 ? (6 | 8)', 'SELECT keyword, sample FROM test_tsquery'::text ); + SELECT keyword FROM test_tsquery WHERE keyword @> 'new'; SELECT keyword FROM test_tsquery WHERE keyword @> 'moscow'; @@ -386,3 +479,11 @@ select * from pendtest where 'ipsa:*'::tsquery @@ ts; select * from pendtest where 'ips:*'::tsquery @@ ts; select * from pendtest where 'ipt:*'::tsquery @@ ts; select * from pendtest where 'ipi:*'::tsquery @@ ts; + +--check OP_PHRASE on index +create temp table phrase_index_test(fts tsvector); +insert into phrase_index_test values('A fat cat has just eaten a rat.'); +create index phrase_index_test_idx on phrase_index_test using gin(fts); +set enable_seqscan = off; +select * from phrase_index_test where fts @@ phraseto_tsquery('fat cat'); +set enable_seqscan = on; diff --git a/src/test/regress/sql/tstypes.sql b/src/test/regress/sql/tstypes.sql index 38b7f65..271359c 100644 --- a/src/test/regress/sql/tstypes.sql +++ b/src/test/regress/sql/tstypes.sql @@ -57,13 +57,44 @@ SELECT '1&(2&(4&(5|!6)))'::tsquery; SELECT E'1&(''2''&('' 4''&(\\|5 | ''6 \\'' !|&'')))'::tsquery; SELECT $$'\\as'$$::tsquery; SELECT 'a:* & nbb:*ac | doo:a* | goo'::tsquery; +SELECT setweight('a:d & b:a & f'::tsquery, 'c'); +-- phrase transformation +SELECT 'a ? (b|c)'::tsquery; +SELECT '(a|b) ? c'::tsquery; +SELECT '(a|b) ? (d|c)'::tsquery; + +SELECT 'a ? (b&c)'::tsquery; +SELECT '(a&b) ? c'::tsquery; +SELECT '(a&b) ? (d&c)'::tsquery; + +SELECT 'a ? !b'::tsquery; +SELECT '!a ? b'::tsquery; +SELECT '!a ? !b'::tsquery; + +SELECT 'a ? !(b&c)'::tsquery; +SELECT 'a ? !(b|c)'::tsquery; +SELECT '!(a&b) ? c'::tsquery; +SELECT '!(a|b) ? c'::tsquery; + +SELECT '(!a|b) ? c'::tsquery; +SELECT '(!a&b) ? c'::tsquery; +SELECT 'c ? (!a|b)'::tsquery; +SELECT 'c ? (!a&b)'::tsquery; + +SELECT '(a|b) ? !c'::tsquery; +SELECT '(a&b) ? !c'::tsquery; +SELECT '!c ? (a|b)'::tsquery; +SELECT '!c ? (a&b)'::tsquery; + +--comparisons SELECT 'a' < 'b & c'::tsquery as "true"; SELECT 'a' > 'b & c'::tsquery as "false"; -SELECT 'a | f' < 'b & c'::tsquery as "true"; +SELECT 'a | f' < 'b & c'::tsquery as "false"; SELECT 'a | ff' < 'b & c'::tsquery as "false"; SELECT 'a | f | g' < 'b & c'::tsquery as "false"; +--concatenation SELECT numnode( 'new'::tsquery ); SELECT numnode( 'new & york'::tsquery ); SELECT numnode( 'new & york | qwery'::tsquery ); @@ -72,6 +103,11 @@ SELECT 'foo & bar'::tsquery && 'asd'; SELECT 'foo & bar'::tsquery || 'asd & fg'; SELECT 'foo & bar'::tsquery || !!'asd & fg'::tsquery; SELECT 'foo & bar'::tsquery && 'asd | fg'; +SELECT 'a' ?? 'b & d'::tsquery; +SELECT 'a & g' ?? 'b & d'::tsquery; +SELECT 'a & g' ?? 'b | d'::tsquery; +SELECT 'a & g' ?? 'b ? d'::tsquery; +SELECT tsquery_phrase('a ?[3] g', 'b & d', 10); -- tsvector-tsquery operations @@ -93,6 +129,23 @@ SELECT 'supernova'::tsvector @@ 'super:*'::tsquery AS "true"; SELECT 'supeanova supernova'::tsvector @@ 'super:*'::tsquery AS "true"; SELECT 'supeznova supernova'::tsvector @@ 'super:*'::tsquery AS "true"; +--phrase search +SELECT to_tsvector('simple', '1 2 3 1') @@ '1 ? 2' AS "true"; +SELECT to_tsvector('simple', '1 2 3 1') @@ '1 ?[2] 2' AS "true"; +SELECT to_tsvector('simple', '1 2 3 1') @@ '1 ? 3' AS "false"; +SELECT to_tsvector('simple', '1 2 3 1') @@ '1 ?[2] 3' AS "true"; + +SELECT to_tsvector('simple', '1 2 11 3') @@ '1 ? 3' AS "false"; +SELECT to_tsvector('simple', '1 2 11 3') @@ '1:* ? 3' AS "true"; + +SELECT to_tsvector('simple', '1 2 3 4') @@ '1 ? 2 ? 3' AS "true"; +SELECT to_tsvector('simple', '1 2 3 4') @@ '(1 ? 2) ? 3' AS "true"; +SELECT to_tsvector('simple', '1 2 3 4') @@ '1 ? (2 ? 3)' AS "false"; +SELECT to_tsvector('simple', '1 2 3 4') @@ '1 ?[2] (2 ? 3)' AS "true"; +SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '(1 ? 2) ? 3' AS "true"; +SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '1 ? 2 ? 3' AS "true"; + +--ranking SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s'); SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | s'); SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | s:*'); @@ -114,6 +167,27 @@ SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a & s'); SELECT ts_rank_cd(' a:1 s:2B d g'::tsvector, 'a & s'); SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a & s'); +SELECT ts_rank_cd(' a:1 s:2A d g'::tsvector, 'a ? s'); +SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a ? s'); +SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a ? s'); +SELECT ts_rank_cd(' a:1 s:2 d:2A g'::tsvector, 'a ? s'); +SELECT ts_rank_cd(' a:1 s:2,3A d:2A g'::tsvector, 'a ?[2] s:A'); +SELECT ts_rank_cd(' a:1 b:2 s:3A d:2A g'::tsvector, 'a ?[2] s:A'); +SELECT ts_rank_cd(' a:1 sa:2D sb:2A g'::tsvector, 'a ? s:*'); +SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a ? s:*'); +SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a ? s:* ? sa:A'); +SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a ? s:* ? sa:B'); + +SELECT 'a:1 b:2'::tsvector @@ 'a ? b'::tsquery AS "true"; +SELECT 'a:1 b:2'::tsvector @@ 'a ?[0] b'::tsquery AS "false"; +SELECT 'a:1 b:2'::tsvector @@ 'a ?[1] b'::tsquery AS "true"; +SELECT 'a:1 b:2'::tsvector @@ 'a ?[2] b'::tsquery AS "true"; +SELECT 'a:1 b:3'::tsvector @@ 'a ? b'::tsquery AS "false"; +SELECT 'a:1 b:3'::tsvector @@ 'a ?[0] b'::tsquery AS "false"; +SELECT 'a:1 b:3'::tsvector @@ 'a ?[1] b'::tsquery AS "false"; +SELECT 'a:1 b:3'::tsvector @@ 'a ?[2] b'::tsquery AS "true"; +SELECT 'a:1 b:3'::tsvector @@ 'a ?[3] b'::tsquery AS "true"; + -- tsvector editing operations SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers