> On 4 April 2018 at 16:09, Teodor Sigaev <teo...@sigaev.ru> wrote: > >>> Hm, seems, it's useful feature, but I suggest to make separate function >>> jsonb_any_to_tsvector and add support for boolean too (if you know better >>> name for function, do not hide it). Changing behavior of existing >>> function >>> is not obvious for users and, seems, should not backpatched. >> >> >> What do you think about having not a separate function, but a flag >> argument to >> the existing one (like `create` in `jsonb_set`), that will have false as >> default value? The result would be the same, but without an extra function >> with >> almost the same implementation. > > > tsvector jsonb_to_tsvector(jsonb[, bool]) ? > Agreed. Second arg should be optional.
Unfortunately, this idea with a flag argument can't be implemented easily (related discussion is here [1]). So I've modified the patch accordingly to your original suggestion about having separate functions `json(b)_all_to_tsvector`. 1: https://www.postgresql.org/message-id/flat/CA%2Bq6zcVJ%2BWx%2B-%3DkkN5UC0T-LtsJWnx0g9S0xSnn3jUWkriufDA%40mail.gmail.com
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 5abb1c4..895b60a 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -9696,6 +9696,18 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple </row> <row> <entry> + <literal><function>json(b)_all_to_tsvector(<optional> <replaceable class="parameter">config</replaceable> <type>regconfig</type> , </optional> <replaceable class="parameter">document</replaceable> <type>json(b)</type>)</function></literal> + </entry> + <entry><type>tsvector</type></entry> + <entry> + reduce each string, numeric or boolean value in the document to a <type>tsvector</type>, + and then concatenate those in document order to produce a single <type>tsvector</type> + </entry> + <entry><literal>json_all_to_tsvector('english', '{"a": "The Fat Rats", "b": 123}'::json)</literal></entry> + <entry><literal>'123':5 'fat':2 'rat':3</literal></entry> + </row> + <row> + <entry> <indexterm> <primary>ts_delete</primary> </indexterm> diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c index ea5947a..02c2b00 100644 --- a/src/backend/tsearch/to_tsany.c +++ b/src/backend/tsearch/to_tsany.c @@ -267,12 +267,12 @@ to_tsvector(PG_FUNCTION_ARGS) PointerGetDatum(in))); } -Datum -jsonb_to_tsvector_byid(PG_FUNCTION_ARGS) +/* + * Worker function for jsonb(_all)_to_tsvector(_byid) + */ +static TSVector +jsonb_to_tsvector_worker(Oid cfgId, Jsonb *jb, bool allTypes) { - Oid cfgId = PG_GETARG_OID(0); - Jsonb *jb = PG_GETARG_JSONB_P(1); - TSVector result; TSVectorBuildState state; ParsedText prs; @@ -281,11 +281,24 @@ jsonb_to_tsvector_byid(PG_FUNCTION_ARGS) state.prs = &prs; state.cfgId = cfgId; - iterate_jsonb_string_values(jb, &state, add_to_tsvector); + if (allTypes) + iterate_jsonb_all_values(jb, &state, add_to_tsvector); + else + iterate_jsonb_string_values(jb, &state, add_to_tsvector); - PG_FREE_IF_COPY(jb, 1); - result = make_tsvector(&prs); + return make_tsvector(&prs); +} + +Datum +jsonb_to_tsvector_byid(PG_FUNCTION_ARGS) +{ + Oid cfgId = PG_GETARG_OID(0); + Jsonb *jb = PG_GETARG_JSONB_P(1); + TSVector result; + + result = jsonb_to_tsvector_worker(cfgId, jb, false); + PG_FREE_IF_COPY(jb, 1); PG_RETURN_TSVECTOR(result); } @@ -295,19 +308,48 @@ jsonb_to_tsvector(PG_FUNCTION_ARGS) { Jsonb *jb = PG_GETARG_JSONB_P(0); Oid cfgId; + TSVector result; cfgId = getTSCurrentConfig(true); - PG_RETURN_DATUM(DirectFunctionCall2(jsonb_to_tsvector_byid, - ObjectIdGetDatum(cfgId), - JsonbPGetDatum(jb))); + result = jsonb_to_tsvector_worker(cfgId, jb, false); + PG_FREE_IF_COPY(jb, 1); + + PG_RETURN_TSVECTOR(result); } Datum -json_to_tsvector_byid(PG_FUNCTION_ARGS) +jsonb_all_to_tsvector_byid(PG_FUNCTION_ARGS) { Oid cfgId = PG_GETARG_OID(0); - text *json = PG_GETARG_TEXT_P(1); + Jsonb *jb = PG_GETARG_JSONB_P(1); TSVector result; + + result = jsonb_to_tsvector_worker(cfgId, jb, true); + PG_FREE_IF_COPY(jb, 1); + + PG_RETURN_TSVECTOR(result); +} + +Datum +jsonb_all_to_tsvector(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + Oid cfgId; + TSVector result; + + cfgId = getTSCurrentConfig(true); + result = jsonb_to_tsvector_worker(cfgId, jb, true); + PG_FREE_IF_COPY(jb, 1); + + PG_RETURN_TSVECTOR(result); +} + +/* + * Worker function for json(_all)_to_tsvector(_byid) + */ +static TSVector +json_to_tsvector_worker(Oid cfgId, text *json, bool allTypes) +{ TSVectorBuildState state; ParsedText prs; @@ -316,11 +358,20 @@ json_to_tsvector_byid(PG_FUNCTION_ARGS) state.prs = &prs; state.cfgId = cfgId; - iterate_json_string_values(json, &state, add_to_tsvector); + iterate_json_values(json, allTypes, &state, add_to_tsvector); - PG_FREE_IF_COPY(json, 1); + return make_tsvector(&prs); +} + +Datum +json_to_tsvector_byid(PG_FUNCTION_ARGS) +{ + Oid cfgId = PG_GETARG_OID(0); + text *json = PG_GETARG_TEXT_P(1); + TSVector result; - result = make_tsvector(&prs); + result = json_to_tsvector_worker(cfgId, json, false); + PG_FREE_IF_COPY(json, 1); PG_RETURN_TSVECTOR(result); } @@ -330,11 +381,40 @@ json_to_tsvector(PG_FUNCTION_ARGS) { text *json = PG_GETARG_TEXT_P(0); Oid cfgId; + TSVector result; cfgId = getTSCurrentConfig(true); - PG_RETURN_DATUM(DirectFunctionCall2(json_to_tsvector_byid, - ObjectIdGetDatum(cfgId), - PointerGetDatum(json))); + result = json_to_tsvector_worker(cfgId, json, false); + PG_FREE_IF_COPY(json, 1); + + PG_RETURN_TSVECTOR(result); +} + +Datum +json_all_to_tsvector_byid(PG_FUNCTION_ARGS) +{ + Oid cfgId = PG_GETARG_OID(0); + text *json = PG_GETARG_TEXT_P(1); + TSVector result; + + result = json_to_tsvector_worker(cfgId, json, true); + PG_FREE_IF_COPY(json, 1); + + PG_RETURN_TSVECTOR(result); +} + +Datum +json_all_to_tsvector(PG_FUNCTION_ARGS) +{ + text *json = PG_GETARG_TEXT_P(0); + Oid cfgId; + TSVector result; + + cfgId = getTSCurrentConfig(true); + result = json_to_tsvector_worker(cfgId, json, true); + PG_FREE_IF_COPY(json, 1); + + PG_RETURN_TSVECTOR(result); } /* diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c index fa78451..624185e 100644 --- a/src/backend/utils/adt/jsonfuncs.c +++ b/src/backend/utils/adt/jsonfuncs.c @@ -474,8 +474,9 @@ static void setPathArray(JsonbIterator **it, Datum *path_elems, int level, Jsonb *newval, uint32 nelems, int op_type); static void addJsonbToParseState(JsonbParseState **jbps, Jsonb *jb); -/* function supporting iterate_json_string_values */ +/* function supporting iterate_json_values */ static void iterate_string_values_scalar(void *state, char *token, JsonTokenType tokentype); +static void iterate_all_values_scalar(void *state, char *token, JsonTokenType tokentype); /* functions supporting transform_json_string_values */ static void transform_string_values_object_start(void *state); @@ -4939,8 +4940,8 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls, } /* - * Iterate over jsonb string values or elements, and pass them together with an - * iteration state to a specified JsonIterateStringValuesAction. + * Iterate over jsonb string values or elements, and pass them together + * with an iteration state to a specified JsonIterateStringValuesAction. */ void iterate_jsonb_string_values(Jsonb *jb, void *state, JsonIterateStringValuesAction action) @@ -4961,11 +4962,50 @@ iterate_jsonb_string_values(Jsonb *jb, void *state, JsonIterateStringValuesActio } /* - * Iterate over json string values or elements, and pass them together with an + * Iterate over jsonb string/numeric/boolean values or elements, and pass them + * together with an iteration state to a specified JsonIterateStringValuesAction. + */ +void +iterate_jsonb_all_values(Jsonb *jb, void *state, JsonIterateStringValuesAction action) +{ + JsonbIterator *it; + JsonbValue v; + JsonbIteratorToken type; + char *val; + + it = JsonbIteratorInit(&jb->root); + + while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) + { + if (type == WJB_VALUE || type == WJB_ELEM) + { + + if (v.type == jbvString) + action(state, v.val.string.val, v.val.string.len); + + if (v.type == jbvNumeric) + { + val = DatumGetCString(DirectFunctionCall1(numeric_out, + NumericGetDatum(v.val.numeric))); + action(state, val, strlen(val)); + } + + if (v.type == jbvNumeric) + { + val = v.val.boolean ? "true" : "false"; + action(state, val, strlen(val)); + } + } + } +} + +/* + * Iterate over json values and elements, and pass them together with an * iteration state to a specified JsonIterateStringValuesAction. */ void -iterate_json_string_values(text *json, void *action_state, JsonIterateStringValuesAction action) +iterate_json_values(text *json, bool all_types, void *action_state, + JsonIterateStringValuesAction action) { JsonLexContext *lex = makeJsonLexContext(json, true); JsonSemAction *sem = palloc0(sizeof(JsonSemAction)); @@ -4976,14 +5016,17 @@ iterate_json_string_values(text *json, void *action_state, JsonIterateStringValu state->action_state = action_state; sem->semstate = (void *) state; - sem->scalar = iterate_string_values_scalar; + if (all_types) + sem->scalar = iterate_all_values_scalar; + else + sem->scalar = iterate_string_values_scalar; pg_parse_json(lex, sem); } /* - * An auxiliary function for iterate_json_string_values to invoke a specified - * JsonIterateStringValuesAction. + * An auxiliary function for iterate_json_values to invoke a specified + * JsonIterateStringValuesAction for string values. */ static void iterate_string_values_scalar(void *state, char *token, JsonTokenType tokentype) @@ -4995,6 +5038,20 @@ iterate_string_values_scalar(void *state, char *token, JsonTokenType tokentype) } /* + * An auxiliary function for iterate_json_values to invoke a specified + * JsonIterateStringValuesAction for string/numeric/boolean values. + */ +static void +iterate_all_values_scalar(void *state, char *token, JsonTokenType tokentype) +{ + IterateJsonStringValuesState *_state = (IterateJsonStringValuesState *) state; + + if (tokentype == JSON_TOKEN_STRING || tokentype == JSON_TOKEN_NUMBER || + tokentype == JSON_TOKEN_TRUE || tokentype == JSON_TOKEN_FALSE) + _state->action(_state->action_state, token, strlen(token)); +} + +/* * Iterate over a jsonb, and apply a specified JsonTransformStringValuesAction * to every string value or element. Any necessary context for a * JsonTransformStringValuesAction can be passed in the action_state variable. diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index ec50afc..8b1a80e 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -4975,13 +4975,21 @@ DESCR("transform to tsquery"); DATA(insert OID = 5001 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ )); DESCR("transform to tsquery"); DATA(insert OID = 4209 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector _null_ _null_ _null_ )); -DESCR("transform jsonb to tsvector"); +DESCR("transform string values from jsonb to tsvector"); +DATA(insert OID = 4213 ( jsonb_all_to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_all_to_tsvector _null_ _null_ _null_ )); +DESCR("transform all values from jsonb to tsvector"); DATA(insert OID = 4210 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector _null_ _null_ _null_ )); -DESCR("transform json to tsvector"); +DESCR("transform string values from json to tsvector"); +DATA(insert OID = 4215 ( json_all_to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "114" _null_ _null_ _null_ _null_ _null_ json_all_to_tsvector _null_ _null_ _null_ )); +DESCR("transform all values from json to tsvector"); DATA(insert OID = 4211 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3614 "3734 3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector_byid _null_ _null_ _null_ )); DESCR("transform jsonb to tsvector"); +DATA(insert OID = 4214 ( jsonb_all_to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3614 "3734 3802" _null_ _null_ _null_ _null_ _null_ jsonb_all_to_tsvector_byid _null_ _null_ _null_ )); +DESCR("transform jsonb to tsvector"); DATA(insert OID = 4212 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3614 "3734 114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector_byid _null_ _null_ _null_ )); -DESCR("transform json to tsvector"); +DESCR("transform string values from json to tsvector"); +DATA(insert OID = 4216 ( json_all_to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3614 "3734 114" _null_ _null_ _null_ _null_ _null_ json_all_to_tsvector_byid _null_ _null_ _null_ )); +DESCR("transform all values from json to tsvector"); DATA(insert OID = 3752 ( tsvector_update_trigger PGNSP PGUID 12 1 0 0 0 f f f f f v s 0 0 2279 "" _null_ _null_ _null_ _null_ _null_ tsvector_update_trigger_byid _null_ _null_ _null_ )); DESCR("trigger for automatic update of tsvector column"); diff --git a/src/include/utils/jsonapi.h b/src/include/utils/jsonapi.h index e39572e..6f5fb85 100644 --- a/src/include/utils/jsonapi.h +++ b/src/include/utils/jsonapi.h @@ -140,7 +140,9 @@ typedef text *(*JsonTransformStringValuesAction) (void *state, char *elem_value, extern void iterate_jsonb_string_values(Jsonb *jb, void *state, JsonIterateStringValuesAction action); -extern void iterate_json_string_values(text *json, void *action_state, +extern void iterate_jsonb_all_values(Jsonb *jb, void *state, + JsonIterateStringValuesAction action); +extern void iterate_json_values(text *json, bool allTypes, void *action_state, JsonIterateStringValuesAction action); extern Jsonb *transform_jsonb_string_values(Jsonb *jsonb, void *action_state, JsonTransformStringValuesAction transform_action); diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out index 06c728e..8ce7874 100644 --- a/src/test/regress/expected/json.out +++ b/src/test/regress/expected/json.out @@ -2324,6 +2324,19 @@ select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff gg 'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13 (1 row) +-- json to tsvector with numeric values +select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::json); + to_tsvector +--------------------------------- + 'aaa':1 'bbb':3 'ccc':5 'ddd':4 +(1 row) + +select json_all_to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::json); + json_all_to_tsvector +------------------------------------------------- + '123':7 '456':9 'aaa':1 'bbb':3 'ccc':5 'ddd':4 +(1 row) + -- ts_vector corner cases select to_tsvector('""'::json); to_tsvector @@ -2331,24 +2344,48 @@ select to_tsvector('""'::json); (1 row) +select json_all_to_tsvector('""'::json); + json_all_to_tsvector +---------------------- + +(1 row) + select to_tsvector('{}'::json); to_tsvector ------------- (1 row) +select json_all_to_tsvector('{}'::json); + json_all_to_tsvector +---------------------- + +(1 row) + select to_tsvector('[]'::json); to_tsvector ------------- (1 row) +select json_all_to_tsvector('[]'::json); + json_all_to_tsvector +---------------------- + +(1 row) + select to_tsvector('null'::json); to_tsvector ------------- (1 row) +select json_all_to_tsvector('null'::json); + json_all_to_tsvector +---------------------- + +(1 row) + -- ts_headline for json select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh')); ts_headline diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out index f8d6e6f..25cce08 100644 --- a/src/test/regress/expected/jsonb.out +++ b/src/test/regress/expected/jsonb.out @@ -4122,6 +4122,19 @@ select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff gg 'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13 (1 row) +-- json to tsvector with numeric values +select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::jsonb); + to_tsvector +--------------------------------- + 'aaa':1 'bbb':3 'ccc':5 'ddd':4 +(1 row) + +select jsonb_all_to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::jsonb); + jsonb_all_to_tsvector +-------------------------------------------------------------- + '123':7 '456':11 'aaa':1 'bbb':3 'ccc':5 'ddd':4 'true':9,13 +(1 row) + -- ts_vector corner cases select to_tsvector('""'::jsonb); to_tsvector @@ -4129,24 +4142,48 @@ select to_tsvector('""'::jsonb); (1 row) +select jsonb_all_to_tsvector('""'::jsonb); + jsonb_all_to_tsvector +----------------------- + +(1 row) + select to_tsvector('{}'::jsonb); to_tsvector ------------- (1 row) +select jsonb_all_to_tsvector('{}'::jsonb); + jsonb_all_to_tsvector +----------------------- + +(1 row) + select to_tsvector('[]'::jsonb); to_tsvector ------------- (1 row) +select jsonb_all_to_tsvector('[]'::jsonb); + jsonb_all_to_tsvector +----------------------- + +(1 row) + select to_tsvector('null'::jsonb); to_tsvector ------------- (1 row) +select jsonb_all_to_tsvector('null'::jsonb); + jsonb_all_to_tsvector +----------------------- + +(1 row) + -- ts_headline for jsonb select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh')); ts_headline diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql index 256652c..f22c0db 100644 --- a/src/test/regress/sql/json.sql +++ b/src/test/regress/sql/json.sql @@ -763,11 +763,19 @@ select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c" -- json to tsvector with stop words select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::json); +-- json to tsvector with numeric values +select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::json); +select json_all_to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::json); + -- ts_vector corner cases select to_tsvector('""'::json); +select json_all_to_tsvector('""'::json); select to_tsvector('{}'::json); +select json_all_to_tsvector('{}'::json); select to_tsvector('[]'::json); +select json_all_to_tsvector('[]'::json); select to_tsvector('null'::json); +select json_all_to_tsvector('null'::json); -- ts_headline for json select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh')); diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql index 2439f94..e16e7b4 100644 --- a/src/test/regress/sql/jsonb.sql +++ b/src/test/regress/sql/jsonb.sql @@ -1089,11 +1089,19 @@ select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c" -- jsonb to tsvector with stop words select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::jsonb); +-- json to tsvector with numeric values +select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::jsonb); +select jsonb_all_to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::jsonb); + -- ts_vector corner cases select to_tsvector('""'::jsonb); +select jsonb_all_to_tsvector('""'::jsonb); select to_tsvector('{}'::jsonb); +select jsonb_all_to_tsvector('{}'::jsonb); select to_tsvector('[]'::jsonb); +select jsonb_all_to_tsvector('[]'::jsonb); select to_tsvector('null'::jsonb); +select jsonb_all_to_tsvector('null'::jsonb); -- ts_headline for jsonb select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));