Le dim. 26 sept. 2021 à 22:41, Jean-Christophe Arnu <jca...@gmail.com> a écrit :
> > > Le dim. 26 sept. 2021 à 15:55, Artur Zakirov <zaar...@gmail.com> a écrit : > >> Nice catch! The patch looks good to me. >> Can you also add a more general test case: >> >> =# SELECT $$'' '1' '2'$$::tsvector; >> ERROR: syntax error in tsvector: "'' '1' '2'" >> LINE 1: SELECT $$'' '1' '2'$$::tsvector; >> >> > Thank you, Artur for spotting this test. > It is now included into this patch. > > > Two more things : * I updated the documentation for array_to_tsvector(), ts_delete() and setweight() functions (so here's a new patch); * I should mention François Ferry from Logilab who first reported the backup/restore problem that led to this patch. I think this should be ok, now the doc is up to date. Kind regards. -- Jean-Christophe Arnu
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 78812b2dbe..01f0b870ca 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -12896,7 +12896,9 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple </para> <para> Converts an array of lexemes to a <type>tsvector</type>. - The given strings are used as-is without further processing. + The given strings are used as-is. Some checks are performed + on array elements. Empty strings and <literal>NULL</literal> values + will cause an error to be raised. </para> <para> <literal>array_to_tsvector('{fat,cat,rat}'::text[])</literal> @@ -13079,6 +13081,9 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple Assigns the specified <parameter>weight</parameter> to elements of the <parameter>vector</parameter> that are listed in <parameter>lexemes</parameter>. + Some checks are performed on <parameter>lexemes</parameter>. + Empty strings and <literal>NULL</literal> values + will cause an error to be raised. </para> <para> <literal>setweight('fat:2,4 cat:3 rat:5,6B'::tsvector, 'A', '{cat,rat}')</literal> @@ -13256,6 +13261,9 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple Removes any occurrences of the lexemes in <parameter>lexemes</parameter> from the <parameter>vector</parameter>. + Some checks are performed on <parameter>lexemes</parameter>. + Empty strings and <literal>NULL</literal> values + will cause an error to be raised. </para> <para> <literal>ts_delete('fat:2,4 cat:3 rat:5A'::tsvector, ARRAY['fat','rat'])</literal> diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c index 9236ebcc8f..00f80ffcbc 100644 --- a/src/backend/utils/adt/tsvector_op.c +++ b/src/backend/utils/adt/tsvector_op.c @@ -329,6 +329,12 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS) lex = VARDATA(dlexemes[i]); lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ; + + if (lex_len == 0) + ereport(ERROR, + (errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING), + errmsg("lexeme array may not contain empty strings"))); + lex_pos = tsvector_bsearch(tsout, lex, lex_len); if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0) @@ -609,6 +615,12 @@ tsvector_delete_arr(PG_FUNCTION_ARGS) lex = VARDATA(dlexemes[i]); lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ; + + if (lex_len == 0) + ereport(ERROR, + (errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING), + errmsg("lexeme array may not contain empty strings"))); + lex_pos = tsvector_bsearch(tsin, lex, lex_len); if (lex_pos >= 0) @@ -761,13 +773,18 @@ array_to_tsvector(PG_FUNCTION_ARGS) deconstruct_array(v, TEXTOID, -1, false, TYPALIGN_INT, &dlexemes, &nulls, &nitems); - /* Reject nulls (maybe we should just ignore them, instead?) */ + /* Reject nulls and zero length strings (maybe we should just ignore them, instead?) */ for (i = 0; i < nitems; i++) { if (nulls[i]) ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), errmsg("lexeme array may not contain nulls"))); + + if (VARSIZE(dlexemes[i]) - VARHDRSZ == 0) + ereport(ERROR, + (errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING), + errmsg("lexeme array may not contain empty strings"))); } /* Sort and de-dup, because this is required for a valid tsvector. */ diff --git a/src/test/regress/expected/tstypes.out b/src/test/regress/expected/tstypes.out index 2601e312df..f8bf9c6051 100644 --- a/src/test/regress/expected/tstypes.out +++ b/src/test/regress/expected/tstypes.out @@ -85,6 +85,10 @@ SELECT 'a:3A b:2a'::tsvector || 'ba:1234 a:1B'; 'a':3A,4B 'b':2A 'ba':1237 (1 row) +SELECT $$'' '1' '2'$$::tsvector; +ERROR: syntax error in tsvector: "'' '1' '2'" +LINE 1: SELECT $$'' '1' '2'$$::tsvector; + ^ --Base tsquery test SELECT '1'::tsquery; tsquery @@ -1260,6 +1264,8 @@ SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceshi SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel', NULL]); ERROR: lexeme array may not contain nulls +SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel', '']); +ERROR: lexeme array may not contain empty strings SELECT unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector); unnest --------------------------------------------- @@ -1330,6 +1336,8 @@ SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']); SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', NULL]); ERROR: lexeme array may not contain nulls +SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', '']); +ERROR: lexeme array may not contain empty strings -- array_to_tsvector must sort and de-dup SELECT array_to_tsvector(ARRAY['foo','bar','baz','bar']); array_to_tsvector @@ -1375,6 +1383,8 @@ SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', '{a,zxc}'); SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', ARRAY['a', 'zxc', NULL]); ERROR: lexeme array may not contain nulls +SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', ARRAY['a', 'zxc', '']); +ERROR: lexeme array may not contain empty strings SELECT ts_filter('base:7A empir:17 evil:15 first:11 galact:16 hidden:6A rebel:1A spaceship:2A strike:3A victori:12 won:9'::tsvector, '{a}'); ts_filter ------------------------------------------------------------- diff --git a/src/test/regress/sql/tstypes.sql b/src/test/regress/sql/tstypes.sql index 30c8c702f0..79a7777407 100644 --- a/src/test/regress/sql/tstypes.sql +++ b/src/test/regress/sql/tstypes.sql @@ -17,6 +17,7 @@ SELECT $$'\\as' ab\c ab\\c AB\\\c ab\\\\c$$::tsvector; SELECT tsvectorin(tsvectorout($$'\\as' ab\c ab\\c AB\\\c ab\\\\c$$::tsvector)); SELECT '''w'':4A,3B,2C,1D,5 a:8'; SELECT 'a:3A b:2a'::tsvector || 'ba:1234 a:1B'; +SELECT $$'' '1' '2'$$::tsvector; --Base tsquery test SELECT '1'::tsquery; @@ -240,6 +241,7 @@ SELECT ts_delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3': SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel']); SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel','rebel']); SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel', NULL]); +SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel', '']); SELECT unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector); SELECT unnest('base hidden rebel spaceship strike'::tsvector); @@ -252,6 +254,7 @@ SELECT tsvector_to_array('base hidden rebel spaceship strike'::tsvector); SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']); SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', NULL]); +SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', '']); -- array_to_tsvector must sort and de-dup SELECT array_to_tsvector(ARRAY['foo','bar','baz','bar']); @@ -262,6 +265,7 @@ SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', ' SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '{a,zxc}'); SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', '{a,zxc}'); SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', ARRAY['a', 'zxc', NULL]); +SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', ARRAY['a', 'zxc', '']); SELECT ts_filter('base:7A empir:17 evil:15 first:11 galact:16 hidden:6A rebel:1A spaceship:2A strike:3A victori:12 won:9'::tsvector, '{a}'); SELECT ts_filter('base hidden rebel spaceship strike'::tsvector, '{a}');