> On Sun, Aug 02, 2020 at 12:50:12PM +0200, Pavel Stehule wrote: > > > > > Maybe this could be salvaged by flushing 0005 in its current form and > > > having the jsonb subscript executor do something like "if the current > > > value-to-be-subscripted is a JSON array, then try to convert the textual > > > subscript value to an integer". Not sure about what the error handling > > > rules ought to be like, though. > > > > I'm fine with the idea of separating 0005 patch and potentially prusuing > > it as an independent item. Just need to rebase 0006, since Pavel > > mentioned that it's a reasonable change he would like to see in the > > final result. > > > > +1
Here is what I had in mind. Worth noting that, as well as the original patch, the attached implementation keeps the same behaviour for negative indices. Also, I've removed a strange inconsistency one could notice with the original implementation, when one extra gap was introduced when we append something at the beginning of an array.
>From ed8036ffd1fd65f5779f408fd0a4080357b29df2 Mon Sep 17 00:00:00 2001 From: erthalion <9erthali...@gmail.com> Date: Thu, 31 Jan 2019 22:37:19 +0100 Subject: [PATCH v33 1/5] Base implementation of subscripting mechanism Introduce all the required machinery for generalizing subscripting operation for a different data types: * subscripting handler procedure, to set up a relation between data type and corresponding subscripting logic. * subscripting routines, that help generalize a subscripting logic, since it involves few stages, namely preparation (e.g. to figure out required types), validation (to check the input and return meaningful error message), fetch (executed when we extract a value using subscripting), assign (executed when we update a data type with a new value using subscripting). Without this it would be neccessary to introduce more new fields to pg_type, which would be too invasive. All ArrayRef related logic was removed and landed as a separate subscripting implementation in the following patch from this series. The rest of the code was rearranged, to e.g. store a type of assigned value for an assign operation. Reviewed-by: Tom Lane, Arthur Zakirov --- .../pg_stat_statements/pg_stat_statements.c | 1 + src/backend/catalog/heap.c | 6 +- src/backend/catalog/pg_type.c | 15 +- src/backend/commands/typecmds.c | 77 +++++++++- src/backend/executor/execExpr.c | 25 +--- src/backend/executor/execExprInterp.c | 124 +++------------ src/backend/nodes/copyfuncs.c | 2 + src/backend/nodes/equalfuncs.c | 2 + src/backend/nodes/outfuncs.c | 2 + src/backend/nodes/readfuncs.c | 2 + src/backend/parser/parse_expr.c | 54 ++++--- src/backend/parser/parse_node.c | 141 ++++-------------- src/backend/parser/parse_target.c | 88 +++++------ src/backend/utils/adt/ruleutils.c | 21 +-- src/backend/utils/cache/lsyscache.c | 23 +++ src/include/c.h | 2 + src/include/catalog/pg_type.h | 9 +- src/include/executor/execExpr.h | 13 +- src/include/nodes/primnodes.h | 6 + src/include/nodes/subscripting.h | 42 ++++++ src/include/parser/parse_node.h | 6 +- src/include/utils/lsyscache.h | 1 + 22 files changed, 336 insertions(+), 326 deletions(-) create mode 100644 src/include/nodes/subscripting.h diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index 14cad19afb..bf19507d32 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -2793,6 +2793,7 @@ JumbleExpr(pgssJumbleState *jstate, Node *node) JumbleExpr(jstate, (Node *) sbsref->reflowerindexpr); JumbleExpr(jstate, (Node *) sbsref->refexpr); JumbleExpr(jstate, (Node *) sbsref->refassgnexpr); + APP_JUMB(sbsref->refnestedfunc); } break; case T_FuncExpr: diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 3985326df6..911e2a1ffe 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -1056,7 +1056,8 @@ AddNewRelationType(const char *typeName, -1, /* typmod */ 0, /* array dimensions for typBaseType */ false, /* Type NOT NULL */ - InvalidOid); /* rowtypes never have a collation */ + InvalidOid, /* rowtypes never have a collation */ + InvalidOid); /* typsubshandler - none */ } /* -------------------------------- @@ -1335,7 +1336,8 @@ heap_create_with_catalog(const char *relname, -1, /* typmod */ 0, /* array dimensions for typBaseType */ false, /* Type NOT NULL */ - InvalidOid); /* rowtypes never have a collation */ + InvalidOid, /* rowtypes never have a collation */ + 0); /* array implementation */ pfree(relarrayname); } diff --git a/src/backend/catalog/pg_type.c b/src/backend/catalog/pg_type.c index 79ffe317dd..bf353878f5 100644 --- a/src/backend/catalog/pg_type.c +++ b/src/backend/catalog/pg_type.c @@ -118,6 +118,7 @@ TypeShellMake(const char *typeName, Oid typeNamespace, Oid ownerId) values[Anum_pg_type_typtypmod - 1] = Int32GetDatum(-1); values[Anum_pg_type_typndims - 1] = Int32GetDatum(0); values[Anum_pg_type_typcollation - 1] = ObjectIdGetDatum(InvalidOid); + values[Anum_pg_type_typsubshandler - 1] = ObjectIdGetDatum(InvalidOid); nulls[Anum_pg_type_typdefaultbin - 1] = true; nulls[Anum_pg_type_typdefault - 1] = true; nulls[Anum_pg_type_typacl - 1] = true; @@ -158,10 +159,10 @@ TypeShellMake(const char *typeName, Oid typeNamespace, Oid ownerId) GenerateTypeDependencies(tup, pg_type_desc, NULL, - NULL, 0, false, false, + InvalidOid, false); /* Post creation hook for new shell type */ @@ -219,7 +220,8 @@ TypeCreate(Oid newTypeOid, int32 typeMod, int32 typNDims, /* Array dimensions for baseType */ bool typeNotNull, - Oid typeCollation) + Oid typeCollation, + Oid subscriptingHandlerProcedure) { Relation pg_type_desc; Oid typeObjectId; @@ -372,6 +374,7 @@ TypeCreate(Oid newTypeOid, values[Anum_pg_type_typtypmod - 1] = Int32GetDatum(typeMod); values[Anum_pg_type_typndims - 1] = Int32GetDatum(typNDims); values[Anum_pg_type_typcollation - 1] = ObjectIdGetDatum(typeCollation); + values[Anum_pg_type_typsubshandler - 1] = ObjectIdGetDatum(subscriptingHandlerProcedure); /* * initialize the default binary value for this type. Check for nulls of @@ -695,6 +698,14 @@ GenerateTypeDependencies(HeapTuple typeTuple, /* Normal dependency on the default expression. */ if (defaultExpr) recordDependencyOnExpr(&myself, defaultExpr, NIL, DEPENDENCY_NORMAL); + + if (OidIsValid(typeForm->typsubshandler)) + { + referenced.classId = ProcedureRelationId; + referenced.objectId = typeForm->typsubshandler; + referenced.objectSubId = 0; + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + } } /* diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c index 9e5938b10e..bf1e6fdc5c 100644 --- a/src/backend/commands/typecmds.c +++ b/src/backend/commands/typecmds.c @@ -115,6 +115,7 @@ static Oid findTypeSendFunction(List *procname, Oid typeOid); static Oid findTypeTypmodinFunction(List *procname); static Oid findTypeTypmodoutFunction(List *procname); static Oid findTypeAnalyzeFunction(List *procname, Oid typeOid); +static Oid findTypeSubscriptingFunction(List *procname, Oid typeOid, bool parseFunc); static Oid findRangeSubOpclass(List *opcname, Oid subtype); static Oid findRangeCanonicalFunction(List *procname, Oid typeOid); static Oid findRangeSubtypeDiffFunction(List *procname, Oid subtype); @@ -148,6 +149,7 @@ DefineType(ParseState *pstate, List *names, List *parameters) List *typmodinName = NIL; List *typmodoutName = NIL; List *analyzeName = NIL; + List *subscriptingParseName = NIL; char category = TYPCATEGORY_USER; bool preferred = false; char delimiter = DEFAULT_TYPDELIM; @@ -166,6 +168,7 @@ DefineType(ParseState *pstate, List *names, List *parameters) DefElem *typmodinNameEl = NULL; DefElem *typmodoutNameEl = NULL; DefElem *analyzeNameEl = NULL; + DefElem *subscriptingParseNameEl = NULL; DefElem *categoryEl = NULL; DefElem *preferredEl = NULL; DefElem *delimiterEl = NULL; @@ -187,6 +190,7 @@ DefineType(ParseState *pstate, List *names, List *parameters) Oid typoid; ListCell *pl; ObjectAddress address; + Oid subscriptingParseOid = InvalidOid; /* * As of Postgres 8.4, we require superuser privilege to create a base @@ -287,6 +291,8 @@ DefineType(ParseState *pstate, List *names, List *parameters) else if (strcmp(defel->defname, "analyze") == 0 || strcmp(defel->defname, "analyse") == 0) defelp = &analyzeNameEl; + else if (strcmp(defel->defname, "subscripting_handler") == 0) + defelp = &subscriptingParseNameEl; else if (strcmp(defel->defname, "category") == 0) defelp = &categoryEl; else if (strcmp(defel->defname, "preferred") == 0) @@ -357,6 +363,8 @@ DefineType(ParseState *pstate, List *names, List *parameters) typmodoutName = defGetQualifiedName(typmodoutNameEl); if (analyzeNameEl) analyzeName = defGetQualifiedName(analyzeNameEl); + if (subscriptingParseNameEl) + subscriptingParseName = defGetQualifiedName(subscriptingParseNameEl); if (categoryEl) { char *p = defGetString(categoryEl); @@ -481,6 +489,10 @@ DefineType(ParseState *pstate, List *names, List *parameters) if (analyzeName) analyzeOid = findTypeAnalyzeFunction(analyzeName, typoid); + if (subscriptingParseName) + subscriptingParseOid = findTypeSubscriptingFunction(subscriptingParseName, + typoid, true); + /* * Check permissions on functions. We choose to require the creator/owner * of a type to also own the underlying functions. Since creating a type @@ -562,7 +574,8 @@ DefineType(ParseState *pstate, List *names, List *parameters) -1, /* typMod (Domains only) */ 0, /* Array Dimensions of typbasetype */ false, /* Type NOT NULL */ - collation); /* type's collation */ + collation, /* type's collation */ + subscriptingParseOid); /* subscripting procedure */ Assert(typoid == address.objectId); /* @@ -603,7 +616,8 @@ DefineType(ParseState *pstate, List *names, List *parameters) -1, /* typMod (Domains only) */ 0, /* Array dimensions of typbasetype */ false, /* Type NOT NULL */ - collation); /* type's collation */ + collation, /* type's collation */ + 0); pfree(array_type); @@ -666,6 +680,7 @@ DefineDomain(CreateDomainStmt *stmt) Oid receiveProcedure; Oid sendProcedure; Oid analyzeProcedure; + Oid subscriptingHandlerProcedure; bool byValue; char category; char delimiter; @@ -799,6 +814,9 @@ DefineDomain(CreateDomainStmt *stmt) /* Analysis function */ analyzeProcedure = baseType->typanalyze; + /* Subscripting functions */ + subscriptingHandlerProcedure = baseType->typsubshandler; + /* Inherited default value */ datum = SysCacheGetAttr(TYPEOID, typeTup, Anum_pg_type_typdefault, &isnull); @@ -1004,7 +1022,8 @@ DefineDomain(CreateDomainStmt *stmt) basetypeMod, /* typeMod value */ typNDims, /* Array dimensions for base type */ typNotNull, /* Type NOT NULL */ - domaincoll); /* type's collation */ + domaincoll, /* type's collation */ + subscriptingHandlerProcedure); /* subscripting procedure */ /* * Create the array type that goes with it. @@ -1044,7 +1063,8 @@ DefineDomain(CreateDomainStmt *stmt) -1, /* typMod (Domains only) */ 0, /* Array dimensions of typbasetype */ false, /* Type NOT NULL */ - domaincoll); /* type's collation */ + domaincoll, /* type's collation */ + 0); /* array subscripting implementation */ pfree(domainArrayName); @@ -1159,7 +1179,8 @@ DefineEnum(CreateEnumStmt *stmt) -1, /* typMod (Domains only) */ 0, /* Array dimensions of typbasetype */ false, /* Type NOT NULL */ - InvalidOid); /* type's collation */ + InvalidOid, /* type's collation */ + InvalidOid); /* typsubshandler - none */ /* Enter the enum's values into pg_enum */ EnumValuesCreate(enumTypeAddr.objectId, stmt->vals); @@ -1199,7 +1220,8 @@ DefineEnum(CreateEnumStmt *stmt) -1, /* typMod (Domains only) */ 0, /* Array dimensions of typbasetype */ false, /* Type NOT NULL */ - InvalidOid); /* type's collation */ + InvalidOid, /* type's collation */ + 0); /* array subscripting implementation */ pfree(enumArrayName); @@ -1487,7 +1509,8 @@ DefineRange(CreateRangeStmt *stmt) -1, /* typMod (Domains only) */ 0, /* Array dimensions of typbasetype */ false, /* Type NOT NULL */ - InvalidOid); /* type's collation (ranges never have one) */ + InvalidOid, /* type's collation (ranges never have one) */ + InvalidOid); /* typsubshandler - none */ Assert(typoid == InvalidOid || typoid == address.objectId); typoid = address.objectId; @@ -1530,7 +1553,8 @@ DefineRange(CreateRangeStmt *stmt) -1, /* typMod (Domains only) */ 0, /* Array dimensions of typbasetype */ false, /* Type NOT NULL */ - InvalidOid); /* typcollation */ + InvalidOid, /* typcollation */ + 0); /* array subscripting implementation */ pfree(rangeArrayName); @@ -1881,6 +1905,43 @@ findTypeAnalyzeFunction(List *procname, Oid typeOid) return procOid; } +static Oid +findTypeSubscriptingFunction(List *procname, Oid typeOid, bool parseFunc) +{ + Oid argList[2]; + Oid procOid; + int nargs; + + if (parseFunc) + { + /* + * Subscripting parse functions always take two INTERNAL arguments and + * return INTERNAL. + */ + argList[0] = INTERNALOID; + nargs = 1; + } + else + { + /* + * Subscripting fetch/assign functions always take one typeOid + * argument, one INTERNAL argument and return typeOid. + */ + argList[0] = typeOid; + argList[1] = INTERNALOID; + nargs = 2; + } + + procOid = LookupFuncName(procname, nargs, argList, true); + if (!OidIsValid(procOid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("function %s does not exist", + func_signature_string(procname, nargs, NIL, argList)))); + + return procOid; +} + /* * Find suitable support functions and opclasses for a range type. */ diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c index 236413f62a..ee1077ebe9 100644 --- a/src/backend/executor/execExpr.c +++ b/src/backend/executor/execExpr.c @@ -2545,18 +2545,16 @@ ExecInitSubscriptingRef(ExprEvalStep *scratch, SubscriptingRef *sbsref, { bool isAssignment = (sbsref->refassgnexpr != NULL); SubscriptingRefState *sbsrefstate = palloc0(sizeof(SubscriptingRefState)); - List *adjust_jumps = NIL; - ListCell *lc; - int i; + List *adjust_jumps = NIL; + ListCell *lc; + int i; + RegProcedure typsubshandler = get_typsubsprocs(sbsref->refcontainertype); /* Fill constant fields of SubscriptingRefState */ sbsrefstate->isassignment = isAssignment; sbsrefstate->refelemtype = sbsref->refelemtype; sbsrefstate->refattrlength = get_typlen(sbsref->refcontainertype); - get_typlenbyvalalign(sbsref->refelemtype, - &sbsrefstate->refelemlength, - &sbsrefstate->refelembyval, - &sbsrefstate->refelemalign); + sbsrefstate->sbsroutines = (SubscriptRoutines *) OidFunctionCall0(typsubshandler); /* * Evaluate array input. It's safe to do so into resv/resnull, because we @@ -2580,19 +2578,6 @@ ExecInitSubscriptingRef(ExprEvalStep *scratch, SubscriptingRef *sbsref, state->steps_len - 1); } - /* Verify subscript list lengths are within limit */ - if (list_length(sbsref->refupperindexpr) > MAXDIM) - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)", - list_length(sbsref->refupperindexpr), MAXDIM))); - - if (list_length(sbsref->reflowerindexpr) > MAXDIM) - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)", - list_length(sbsref->reflowerindexpr), MAXDIM))); - /* Evaluate upper subscripts */ i = 0; foreach(lc, sbsref->refupperindexpr) diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index b812bbacee..838bb4d005 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -3146,8 +3146,8 @@ bool ExecEvalSubscriptingRef(ExprState *state, ExprEvalStep *op) { SubscriptingRefState *sbsrefstate = op->d.sbsref_subscript.state; - int *indexes; - int off; + Datum *indexes; + int off; /* If any index expr yields NULL, result is NULL or error */ if (sbsrefstate->subscriptnull) @@ -3167,7 +3167,7 @@ ExecEvalSubscriptingRef(ExprState *state, ExprEvalStep *op) indexes = sbsrefstate->lowerindex; off = op->d.sbsref_subscript.off; - indexes[off] = DatumGetInt32(sbsrefstate->subscriptvalue); + indexes[off] = sbsrefstate->subscriptvalue; return true; } @@ -3181,36 +3181,14 @@ void ExecEvalSubscriptingRefFetch(ExprState *state, ExprEvalStep *op) { SubscriptingRefState *sbsrefstate = op->d.sbsref.state; + SubscriptRoutines *sbsroutines = sbsrefstate->sbsroutines; /* Should not get here if source container (or any subscript) is null */ Assert(!(*op->resnull)); - if (sbsrefstate->numlower == 0) - { - /* Scalar case */ - *op->resvalue = array_get_element(*op->resvalue, - sbsrefstate->numupper, - sbsrefstate->upperindex, - sbsrefstate->refattrlength, - sbsrefstate->refelemlength, - sbsrefstate->refelembyval, - sbsrefstate->refelemalign, - op->resnull); - } - else - { - /* Slice case */ - *op->resvalue = array_get_slice(*op->resvalue, - sbsrefstate->numupper, - sbsrefstate->upperindex, - sbsrefstate->lowerindex, - sbsrefstate->upperprovided, - sbsrefstate->lowerprovided, - sbsrefstate->refattrlength, - sbsrefstate->refelemlength, - sbsrefstate->refelembyval, - sbsrefstate->refelemalign); - } + + *op->resvalue = sbsroutines->fetch(*op->resvalue, sbsrefstate); + *op->resnull = sbsrefstate->resnull; } /* @@ -3223,40 +3201,20 @@ void ExecEvalSubscriptingRefOld(ExprState *state, ExprEvalStep *op) { SubscriptingRefState *sbsrefstate = op->d.sbsref.state; + SubscriptRoutines *sbsroutines = sbsrefstate->sbsroutines; if (*op->resnull) { - /* whole array is null, so any element or slice is too */ + /* whole container is null, so any element or slice is too */ sbsrefstate->prevvalue = (Datum) 0; sbsrefstate->prevnull = true; } - else if (sbsrefstate->numlower == 0) - { - /* Scalar case */ - sbsrefstate->prevvalue = array_get_element(*op->resvalue, - sbsrefstate->numupper, - sbsrefstate->upperindex, - sbsrefstate->refattrlength, - sbsrefstate->refelemlength, - sbsrefstate->refelembyval, - sbsrefstate->refelemalign, - &sbsrefstate->prevnull); - } else { - /* Slice case */ - /* this is currently unreachable */ - sbsrefstate->prevvalue = array_get_slice(*op->resvalue, - sbsrefstate->numupper, - sbsrefstate->upperindex, - sbsrefstate->lowerindex, - sbsrefstate->upperprovided, - sbsrefstate->lowerprovided, - sbsrefstate->refattrlength, - sbsrefstate->refelemlength, - sbsrefstate->refelembyval, - sbsrefstate->refelemalign); - sbsrefstate->prevnull = false; + sbsrefstate->prevvalue = sbsroutines->fetch(*op->resvalue, sbsrefstate); + + if (sbsrefstate->numlower != 0) + sbsrefstate->prevnull = false; } } @@ -3270,59 +3228,11 @@ void ExecEvalSubscriptingRefAssign(ExprState *state, ExprEvalStep *op) { SubscriptingRefState *sbsrefstate = op->d.sbsref_subscript.state; + SubscriptRoutines *sbsroutines = sbsrefstate->sbsroutines; - /* - * For an assignment to a fixed-length container type, both the original - * container and the value to be assigned into it must be non-NULL, else - * we punt and return the original container. - */ - if (sbsrefstate->refattrlength > 0) - { - if (*op->resnull || sbsrefstate->replacenull) - return; - } - - /* - * For assignment to varlena arrays, we handle a NULL original array by - * substituting an empty (zero-dimensional) array; insertion of the new - * element will result in a singleton array value. It does not matter - * whether the new element is NULL. - */ - if (*op->resnull) - { - *op->resvalue = PointerGetDatum(construct_empty_array(sbsrefstate->refelemtype)); - *op->resnull = false; - } - - if (sbsrefstate->numlower == 0) - { - /* Scalar case */ - *op->resvalue = array_set_element(*op->resvalue, - sbsrefstate->numupper, - sbsrefstate->upperindex, - sbsrefstate->replacevalue, - sbsrefstate->replacenull, - sbsrefstate->refattrlength, - sbsrefstate->refelemlength, - sbsrefstate->refelembyval, - sbsrefstate->refelemalign); - } - else - { - /* Slice case */ - *op->resvalue = array_set_slice(*op->resvalue, - sbsrefstate->numupper, - sbsrefstate->upperindex, - sbsrefstate->lowerindex, - sbsrefstate->upperprovided, - sbsrefstate->lowerprovided, - sbsrefstate->replacevalue, - sbsrefstate->replacenull, - sbsrefstate->refattrlength, - sbsrefstate->refelemlength, - sbsrefstate->refelembyval, - sbsrefstate->refelemalign); - } + sbsrefstate->resnull = *op->resnull; + *op->resvalue = sbsroutines->assign(*op->resvalue, sbsrefstate); + *op->resnull = sbsrefstate->resnull; } /* diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 89c409de66..ec9b0dd97f 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -1548,8 +1548,10 @@ _copySubscriptingRef(const SubscriptingRef *from) COPY_SCALAR_FIELD(refcontainertype); COPY_SCALAR_FIELD(refelemtype); + COPY_SCALAR_FIELD(refassgntype); COPY_SCALAR_FIELD(reftypmod); COPY_SCALAR_FIELD(refcollid); + COPY_SCALAR_FIELD(refnestedfunc); COPY_NODE_FIELD(refupperindexpr); COPY_NODE_FIELD(reflowerindexpr); COPY_NODE_FIELD(refexpr); diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index e3f33c40be..48b436f7f7 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -274,8 +274,10 @@ _equalSubscriptingRef(const SubscriptingRef *a, const SubscriptingRef *b) { COMPARE_SCALAR_FIELD(refcontainertype); COMPARE_SCALAR_FIELD(refelemtype); + COMPARE_SCALAR_FIELD(refassgntype); COMPARE_SCALAR_FIELD(reftypmod); COMPARE_SCALAR_FIELD(refcollid); + COMPARE_SCALAR_FIELD(refnestedfunc); COMPARE_NODE_FIELD(refupperindexpr); COMPARE_NODE_FIELD(reflowerindexpr); COMPARE_NODE_FIELD(refexpr); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index e2f177515d..70c9736c46 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1194,8 +1194,10 @@ _outSubscriptingRef(StringInfo str, const SubscriptingRef *node) WRITE_OID_FIELD(refcontainertype); WRITE_OID_FIELD(refelemtype); + WRITE_OID_FIELD(refassgntype); WRITE_INT_FIELD(reftypmod); WRITE_OID_FIELD(refcollid); + WRITE_OID_FIELD(refnestedfunc); WRITE_NODE_FIELD(refupperindexpr); WRITE_NODE_FIELD(reflowerindexpr); WRITE_NODE_FIELD(refexpr); diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 42050ab719..1c9752c771 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -669,8 +669,10 @@ _readSubscriptingRef(void) READ_OID_FIELD(refcontainertype); READ_OID_FIELD(refelemtype); + READ_OID_FIELD(refassgntype); READ_INT_FIELD(reftypmod); READ_OID_FIELD(refcollid); + READ_OID_FIELD(refnestedfunc); READ_NODE_FIELD(refupperindexpr); READ_NODE_FIELD(reflowerindexpr); READ_NODE_FIELD(refexpr); diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c index f69976cc8c..ea9d35429c 100644 --- a/src/backend/parser/parse_expr.c +++ b/src/backend/parser/parse_expr.c @@ -433,11 +433,13 @@ unknown_attribute(ParseState *pstate, Node *relref, const char *attname, static Node * transformIndirection(ParseState *pstate, A_Indirection *ind) { - Node *last_srf = pstate->p_last_srf; - Node *result = transformExprRecurse(pstate, ind->arg); - List *subscripts = NIL; - int location = exprLocation(result); - ListCell *i; + Node *last_srf = pstate->p_last_srf; + Node *result = transformExprRecurse(pstate, ind->arg); + SubscriptRoutines *sbsroutines; + SubscriptingRef *sbsref; + List *subscripts = NIL; + int location = exprLocation(result); + ListCell *i; /* * We have to split any field-selection operations apart from @@ -465,13 +467,20 @@ transformIndirection(ParseState *pstate, A_Indirection *ind) /* process subscripts before this field selection */ if (subscripts) - result = (Node *) transformContainerSubscripts(pstate, - result, - exprType(result), - InvalidOid, - exprTypmod(result), - subscripts, - NULL); + { + sbsref = transformContainerSubscripts(pstate, + result, + exprType(result), + InvalidOid, + exprTypmod(result), + subscripts, + NULL); + + sbsroutines = getSubscriptingRoutines(sbsref->refcontainertype); + sbsref = sbsroutines->prepare(false, sbsref); + sbsroutines->validate(false, sbsref, pstate); + result = (Node *) sbsref; + } subscripts = NIL; newresult = ParseFuncOrColumn(pstate, @@ -488,13 +497,20 @@ transformIndirection(ParseState *pstate, A_Indirection *ind) } /* process trailing subscripts, if any */ if (subscripts) - result = (Node *) transformContainerSubscripts(pstate, - result, - exprType(result), - InvalidOid, - exprTypmod(result), - subscripts, - NULL); + { + sbsref = transformContainerSubscripts(pstate, + result, + exprType(result), + InvalidOid, + exprTypmod(result), + subscripts, + NULL); + + sbsroutines = getSubscriptingRoutines(sbsref->refcontainertype); + sbsref = sbsroutines->prepare(false, sbsref); + sbsroutines->validate(false, sbsref, pstate); + result = (Node *) sbsref; + } return result; } diff --git a/src/backend/parser/parse_node.c b/src/backend/parser/parse_node.c index 6e98fe55fc..4f46d6310a 100644 --- a/src/backend/parser/parse_node.c +++ b/src/backend/parser/parse_node.c @@ -184,21 +184,12 @@ pcb_error_callback(void *arg) * transformContainerType() * Identify the types involved in a subscripting operation for container * - * - * On entry, containerType/containerTypmod identify the type of the input value - * to be subscripted (which could be a domain type). These are modified if - * necessary to identify the actual container type and typmod, and the - * container's element type is returned. An error is thrown if the input isn't - * an array type. + * On entry, containerType/containerTypmod are modified if necessary to + * identify the actual container type and typmod. */ -Oid +void transformContainerType(Oid *containerType, int32 *containerTypmod) { - Oid origContainerType = *containerType; - Oid elementType; - HeapTuple type_tuple_container; - Form_pg_type type_struct_container; - /* * If the input is a domain, smash to base type, and extract the actual * typmod to be applied to the base type. Subscripting a domain is an @@ -219,25 +210,6 @@ transformContainerType(Oid *containerType, int32 *containerTypmod) *containerType = INT2ARRAYOID; else if (*containerType == OIDVECTOROID) *containerType = OIDARRAYOID; - - /* Get the type tuple for the container */ - type_tuple_container = SearchSysCache1(TYPEOID, ObjectIdGetDatum(*containerType)); - if (!HeapTupleIsValid(type_tuple_container)) - elog(ERROR, "cache lookup failed for type %u", *containerType); - type_struct_container = (Form_pg_type) GETSTRUCT(type_tuple_container); - - /* needn't check typisdefined since this will fail anyway */ - - elementType = type_struct_container->typelem; - if (elementType == InvalidOid) - ereport(ERROR, - (errcode(ERRCODE_DATATYPE_MISMATCH), - errmsg("cannot subscript type %s because it is not an array", - format_type_be(origContainerType)))); - - ReleaseSysCache(type_tuple_container); - - return elementType; } /* @@ -254,10 +226,15 @@ transformContainerType(Oid *containerType, int32 *containerTypmod) * container. We produce an expression that represents the new container value * with the source data inserted into the right part of the container. * - * For both cases, if the source container is of a domain-over-array type, - * the result is of the base array type or its element type; essentially, - * we must fold a domain to its base type before applying subscripting. - * (Note that int2vector and oidvector are treated as domains here.) + * For both cases, this function contains only general subscripting logic while + * type-specific logic (e.g. type verifications and coercion) is placed in + * separate procedures indicated by typsubshandler. There is only one exception + * for now about domain-over-container: if the source container is of a + * domain-over-container type, the result is of the base container type or its + * element type; essentially, we must fold a domain to its base type before + * applying subscripting. (Note that int2vector and oidvector are treated as + * domains here.) An error will appear in the case the current container type + * doesn't have a subscripting procedure. * * pstate Parse state * containerBase Already-transformed expression for the container as a whole @@ -284,16 +261,12 @@ transformContainerSubscripts(ParseState *pstate, bool isSlice = false; List *upperIndexpr = NIL; List *lowerIndexpr = NIL; + List *indexprSlice = NIL; ListCell *idx; SubscriptingRef *sbsref; - /* - * Caller may or may not have bothered to determine elementType. Note - * that if the caller did do so, containerType/containerTypMod must be as - * modified by transformContainerType, ie, smash domain to base type. - */ - if (!OidIsValid(elementType)) - elementType = transformContainerType(&containerType, &containerTypMod); + /* Identify the actual container type and element type involved */ + transformContainerType(&containerType, &containerTypMod); /* * A list containing only simple subscripts refers to a single container @@ -327,29 +300,6 @@ transformContainerSubscripts(ParseState *pstate, if (ai->lidx) { subexpr = transformExpr(pstate, ai->lidx, pstate->p_expr_kind); - /* If it's not int4 already, try to coerce */ - subexpr = coerce_to_target_type(pstate, - subexpr, exprType(subexpr), - INT4OID, -1, - COERCION_ASSIGNMENT, - COERCE_IMPLICIT_CAST, - -1); - if (subexpr == NULL) - ereport(ERROR, - (errcode(ERRCODE_DATATYPE_MISMATCH), - errmsg("array subscript must have type integer"), - parser_errposition(pstate, exprLocation(ai->lidx)))); - } - else if (!ai->is_slice) - { - /* Make a constant 1 */ - subexpr = (Node *) makeConst(INT4OID, - -1, - InvalidOid, - sizeof(int32), - Int32GetDatum(1), - false, - true); /* pass by value */ } else { @@ -357,63 +307,23 @@ transformContainerSubscripts(ParseState *pstate, subexpr = NULL; } lowerIndexpr = lappend(lowerIndexpr, subexpr); + indexprSlice = lappend(indexprSlice, ai); } else Assert(ai->lidx == NULL && !ai->is_slice); if (ai->uidx) - { subexpr = transformExpr(pstate, ai->uidx, pstate->p_expr_kind); - /* If it's not int4 already, try to coerce */ - subexpr = coerce_to_target_type(pstate, - subexpr, exprType(subexpr), - INT4OID, -1, - COERCION_ASSIGNMENT, - COERCE_IMPLICIT_CAST, - -1); - if (subexpr == NULL) - ereport(ERROR, - (errcode(ERRCODE_DATATYPE_MISMATCH), - errmsg("array subscript must have type integer"), - parser_errposition(pstate, exprLocation(ai->uidx)))); - } else { /* Slice with omitted upper bound, put NULL into the list */ Assert(isSlice && ai->is_slice); subexpr = NULL; } + subexpr = transformExpr(pstate, ai->uidx, pstate->p_expr_kind); upperIndexpr = lappend(upperIndexpr, subexpr); } - /* - * If doing an array store, coerce the source value to the right type. - * (This should agree with the coercion done by transformAssignedExpr.) - */ - if (assignFrom != NULL) - { - Oid typesource = exprType(assignFrom); - Oid typeneeded = isSlice ? containerType : elementType; - Node *newFrom; - - newFrom = coerce_to_target_type(pstate, - assignFrom, typesource, - typeneeded, containerTypMod, - COERCION_ASSIGNMENT, - COERCE_IMPLICIT_CAST, - -1); - if (newFrom == NULL) - ereport(ERROR, - (errcode(ERRCODE_DATATYPE_MISMATCH), - errmsg("array assignment requires type %s" - " but expression is of type %s", - format_type_be(typeneeded), - format_type_be(typesource)), - errhint("You will need to rewrite or cast the expression."), - parser_errposition(pstate, exprLocation(assignFrom)))); - assignFrom = newFrom; - } - /* * Ready to build the SubscriptingRef node. */ @@ -422,17 +332,30 @@ transformContainerSubscripts(ParseState *pstate, sbsref->refassgnexpr = (Expr *) assignFrom; sbsref->refcontainertype = containerType; - sbsref->refelemtype = elementType; sbsref->reftypmod = containerTypMod; /* refcollid will be set by parse_collate.c */ sbsref->refupperindexpr = upperIndexpr; sbsref->reflowerindexpr = lowerIndexpr; + sbsref->refindexprslice = indexprSlice; sbsref->refexpr = (Expr *) containerBase; - sbsref->refassgnexpr = (Expr *) assignFrom; return sbsref; } +SubscriptRoutines* +getSubscriptingRoutines(Oid containerType) +{ + RegProcedure typsubshandler = get_typsubsprocs(containerType); + + if (!OidIsValid(typsubshandler)) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("cannot subscript type %s because it does not support subscripting", + format_type_be(containerType)))); + + return (SubscriptRoutines *) OidFunctionCall0(typsubshandler); +} + /* * make_const * diff --git a/src/backend/parser/parse_target.c b/src/backend/parser/parse_target.c index 566c517837..d7483c6538 100644 --- a/src/backend/parser/parse_target.c +++ b/src/backend/parser/parse_target.c @@ -685,7 +685,7 @@ transformAssignmentIndirection(ParseState *pstate, Node *rhs, int location) { - Node *result; + Node *result = NULL; List *subscripts = NIL; bool isSlice = false; ListCell *i; @@ -848,27 +848,21 @@ transformAssignmentIndirection(ParseState *pstate, location); } - /* base case: just coerce RHS to match target type ID */ - - result = coerce_to_target_type(pstate, - rhs, exprType(rhs), - targetTypeId, targetTypMod, - COERCION_ASSIGNMENT, - COERCE_IMPLICIT_CAST, - -1); - if (result == NULL) + /* + * Base case: just coerce RHS to match target type ID. + * It's necessary only for field selection, since for + * subscripting its custom code should define types. + */ + if (!targetIsSubscripting) { - if (targetIsSubscripting) - ereport(ERROR, - (errcode(ERRCODE_DATATYPE_MISMATCH), - errmsg("array assignment to \"%s\" requires type %s" - " but expression is of type %s", - targetName, - format_type_be(targetTypeId), - format_type_be(exprType(rhs))), - errhint("You will need to rewrite or cast the expression."), - parser_errposition(pstate, location))); - else + result = coerce_to_target_type(pstate, + rhs, exprType(rhs), + targetTypeId, targetTypMod, + COERCION_ASSIGNMENT, + COERCE_IMPLICIT_CAST, + -1); + + if (result == NULL) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("subfield \"%s\" is of type %s" @@ -900,29 +894,42 @@ transformAssignmentSubscripts(ParseState *pstate, Node *rhs, int location) { - Node *result; - Oid containerType; - int32 containerTypMod; - Oid elementTypeId; - Oid typeNeeded; - Oid collationNeeded; + Node *result; + Oid containerType; + int32 containerTypMod; + Oid collationNeeded; + SubscriptingRef *sbsref; + SubscriptRoutines *sbsroutines; Assert(subscripts != NIL); /* Identify the actual array type and element type involved */ containerType = targetTypeId; containerTypMod = targetTypMod; - elementTypeId = transformContainerType(&containerType, &containerTypMod); - /* Identify type that RHS must provide */ - typeNeeded = isSlice ? containerType : elementTypeId; + /* process subscripts */ + sbsref = transformContainerSubscripts(pstate, + basenode, + containerType, + exprType(rhs), + containerTypMod, + subscripts, + rhs); + + sbsroutines = getSubscriptingRoutines(sbsref->refcontainertype); + + /* + * Let custom code provide necessary information about required types: + * refelemtype and refassgntype + */ + sbsref = sbsroutines->prepare(rhs != NULL, sbsref); /* * container normally has same collation as elements, but there's an * exception: we might be subscripting a domain over a container type. In * that case use collation of the base type. */ - if (containerType == targetTypeId) + if (sbsref->refcontainertype == containerType) collationNeeded = targetCollation; else collationNeeded = get_typcollation(containerType); @@ -932,25 +939,22 @@ transformAssignmentSubscripts(ParseState *pstate, NULL, targetName, true, - typeNeeded, - containerTypMod, + sbsref->refassgntype, + sbsref->reftypmod, collationNeeded, indirection, next_indirection, rhs, location); - /* process subscripts */ - result = (Node *) transformContainerSubscripts(pstate, - basenode, - containerType, - elementTypeId, - containerTypMod, - subscripts, - rhs); + /* Provide fully prepared subscripting information for custom validation */ + sbsref->refassgnexpr = (Expr *) rhs; + sbsroutines->validate(rhs != NULL, sbsref, pstate); + + result = (Node *) sbsref; /* If target was a domain over container, need to coerce up to the domain */ - if (containerType != targetTypeId) + if (sbsref->refcontainertype != targetTypeId) { Oid resulttype = exprType(result); diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 2cbcb4b85e..fb298c11c2 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -7994,17 +7994,18 @@ get_rule_expr(Node *node, deparse_context *context, if (need_parens) appendStringInfoChar(buf, ')'); - /* - * If there's a refassgnexpr, we want to print the node in the - * format "container[subscripts] := refassgnexpr". This is - * not legal SQL, so decompilation of INSERT or UPDATE - * statements should always use processIndirection as part of - * the statement-level syntax. We should only see this when - * EXPLAIN tries to print the targetlist of a plan resulting - * from such a statement. - */ - if (sbsref->refassgnexpr) + if (IsAssignment(sbsref)) { + /* + * If there's a refassgnexpr, we want to print the node in the + * format "container[subscripts] := refassgnexpr". This is not + * legal SQL, so decompilation of INSERT or UPDATE statements + * should always use processIndirection as part of the + * statement-level syntax. We should only see this when + * EXPLAIN tries to print the targetlist of a plan resulting + * from such a statement. + */ + Node *refassgnexpr; /* diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index f3bf413829..fb264a1e57 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -3273,6 +3273,29 @@ get_range_collation(Oid rangeOid) return InvalidOid; } +/* + * get_typsubshandler + * + * Given the type OID, return the type's subscripting procedures, if any, + * through pointers in arguments. + */ +RegProcedure +get_typsubsprocs(Oid typid) +{ + HeapTuple tp; + + tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + if (HeapTupleIsValid(tp)) + { + RegProcedure handler = ((Form_pg_type) GETSTRUCT(tp))->typsubshandler; + ReleaseSysCache(tp); + + return handler; + } + else + return InvalidOid; +} + /* ---------- PG_INDEX CACHE ---------- */ /* diff --git a/src/include/c.h b/src/include/c.h index f242e32edb..108e424df7 100644 --- a/src/include/c.h +++ b/src/include/c.h @@ -545,6 +545,8 @@ typedef struct int indx[MAXDIM]; } IntArray; +#define MAX_SUBSCRIPT_DEPTH 12 + /* ---------------- * Variable-length datatypes all share the 'struct varlena' header. * diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h index 7b37562648..ea237ec61d 100644 --- a/src/include/catalog/pg_type.h +++ b/src/include/catalog/pg_type.h @@ -221,6 +221,12 @@ CATALOG(pg_type,1247,TypeRelationId) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71,TypeRelati */ Oid typcollation BKI_DEFAULT(0) BKI_LOOKUP(pg_collation); + /* + * Type specific subscripting logic. If typsubshandler is NULL, it means + * that this type doesn't support subscripting. + */ + regproc typsubshandler BKI_DEFAULT(-) BKI_LOOKUP(pg_proc); + #ifdef CATALOG_VARLEN /* variable-length fields start here */ /* @@ -349,7 +355,8 @@ extern ObjectAddress TypeCreate(Oid newTypeOid, int32 typeMod, int32 typNDims, bool typeNotNull, - Oid typeCollation); + Oid typeCollation, + Oid subscriptingParseProcedure); extern void GenerateTypeDependencies(HeapTuple typeTuple, Relation typeCatalog, diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h index dbe8649a57..52c357b2aa 100644 --- a/src/include/executor/execExpr.h +++ b/src/include/executor/execExpr.h @@ -19,7 +19,7 @@ /* forward references to avoid circularity */ struct ExprEvalStep; -struct SubscriptingRefState; +struct SubscriptRoutines; /* Bits in ExprState->flags (see also execnodes.h for public flag bits): */ /* expression's interpreter has been initialized */ @@ -658,13 +658,13 @@ typedef struct SubscriptingRefState /* numupper and upperprovided[] are filled at compile time */ /* at runtime, extracted subscript datums get stored in upperindex[] */ int numupper; - bool upperprovided[MAXDIM]; - int upperindex[MAXDIM]; + bool upperprovided[MAX_SUBSCRIPT_DEPTH]; + Datum upperindex[MAX_SUBSCRIPT_DEPTH]; /* similarly for lower indexes, if any */ int numlower; - bool lowerprovided[MAXDIM]; - int lowerindex[MAXDIM]; + bool lowerprovided[MAX_SUBSCRIPT_DEPTH]; + Datum lowerindex[MAX_SUBSCRIPT_DEPTH]; /* subscript expressions get evaluated into here */ Datum subscriptvalue; @@ -677,6 +677,9 @@ typedef struct SubscriptingRefState /* if we have a nested assignment, SBSREF_OLD puts old value here */ Datum prevvalue; bool prevnull; + + bool resnull; + struct SubscriptRoutines *sbsroutines; } SubscriptingRefState; diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index d73be2ad46..5991f437cd 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -417,13 +417,17 @@ typedef struct SubscriptingRef Expr xpr; Oid refcontainertype; /* type of the container proper */ Oid refelemtype; /* type of the container elements */ + Oid refassgntype; /* type of assignment expr that is required */ int32 reftypmod; /* typmod of the container (and elements too) */ Oid refcollid; /* OID of collation, or InvalidOid if none */ + Oid refnestedfunc; /* OID of type-specific function to handle nested assignment */ List *refupperindexpr; /* expressions that evaluate to upper * container indexes */ List *reflowerindexpr; /* expressions that evaluate to lower * container indexes, or NIL for single * container element */ + List *refindexprslice; /* whether or not related indexpr from + * reflowerindexpr is a slice */ Expr *refexpr; /* the expression that evaluates to a * container value */ @@ -431,6 +435,8 @@ typedef struct SubscriptingRef * fetch */ } SubscriptingRef; +#define IsAssignment(expr) ( ((SubscriptingRef*) expr)->refassgnexpr != NULL ) + /* * CoercionContext - distinguishes the allowed set of type casts * diff --git a/src/include/nodes/subscripting.h b/src/include/nodes/subscripting.h new file mode 100644 index 0000000000..1800d5ecf5 --- /dev/null +++ b/src/include/nodes/subscripting.h @@ -0,0 +1,42 @@ +/*------------------------------------------------------------------------- + * + * subscripting.h + * API for generic type subscripting + * + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/nodes/subscripting.h + * + *------------------------------------------------------------------------- + */ +#ifndef SUBSCRIPTING_H +#define SUBSCRIPTING_H + +#include "parser/parse_node.h" +#include "nodes/primnodes.h" + +struct ParseState; +struct SubscriptingRefState; + +/* Callback function signatures --- see xsubscripting.sgml for more info. */ +typedef SubscriptingRef * (*SubscriptingPrepare) (bool isAssignment, SubscriptingRef *sbsef); + +typedef SubscriptingRef * (*SubscriptingValidate) (bool isAssignment, SubscriptingRef *sbsef, + struct ParseState *pstate); + +typedef Datum (*SubscriptingFetch) (Datum source, struct SubscriptingRefState *sbsrefstate); + +typedef Datum (*SubscriptingAssign) (Datum source, struct SubscriptingRefState *sbrsefstate); + +typedef struct SubscriptRoutines +{ + SubscriptingPrepare prepare; + SubscriptingValidate validate; + SubscriptingFetch fetch; + SubscriptingAssign assign; + +} SubscriptRoutines; + + +#endif /* SUBSCRIPTING_H */ diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h index d25819aa28..b4736206d1 100644 --- a/src/include/parser/parse_node.h +++ b/src/include/parser/parse_node.h @@ -15,6 +15,7 @@ #define PARSE_NODE_H #include "nodes/parsenodes.h" +#include "nodes/subscripting.h" #include "utils/queryenvironment.h" #include "utils/relcache.h" @@ -313,7 +314,9 @@ extern void setup_parser_errposition_callback(ParseCallbackState *pcbstate, ParseState *pstate, int location); extern void cancel_parser_errposition_callback(ParseCallbackState *pcbstate); -extern Oid transformContainerType(Oid *containerType, int32 *containerTypmod); +extern Var *make_var(ParseState *pstate, RangeTblEntry *rte, int attrno, + int location); +extern void transformContainerType(Oid *containerType, int32 *containerTypmod); extern SubscriptingRef *transformContainerSubscripts(ParseState *pstate, Node *containerBase, @@ -322,6 +325,7 @@ extern SubscriptingRef *transformContainerSubscripts(ParseState *pstate, int32 containerTypMod, List *indirection, Node *assignFrom); +extern SubscriptRoutines *getSubscriptingRoutines(Oid containerType); extern Const *make_const(ParseState *pstate, Value *value, int location); #endif /* PARSE_NODE_H */ diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index fecfe1f4f6..8fc570d2e1 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -183,6 +183,7 @@ extern char *get_namespace_name(Oid nspid); extern char *get_namespace_name_or_temp(Oid nspid); extern Oid get_range_subtype(Oid rangeOid); extern Oid get_range_collation(Oid rangeOid); +extern RegProcedure get_typsubsprocs(Oid typid); extern Oid get_index_column_opclass(Oid index_oid, int attno); extern bool get_index_isreplident(Oid index_oid); extern bool get_index_isvalid(Oid index_oid); -- 2.21.0
>From 083064ebdd084bb14560efedb8676f1585fe014c Mon Sep 17 00:00:00 2001 From: erthalion <9erthali...@gmail.com> Date: Fri, 1 Feb 2019 11:38:52 +0100 Subject: [PATCH v33 2/5] Subscripting for array Subscripting implementation for array data types. It includes all array specific parts, that were removed from the generalized code. Note, that for some array-like data types it's not necessary to assign array_subscript_handler explicitely, since it's done in Catalog.pm Reviewed-by: Tom Lane, Arthur Zakirov --- src/backend/catalog/Catalog.pm | 1 + src/backend/catalog/heap.c | 2 +- src/backend/commands/typecmds.c | 8 +- src/backend/executor/execExprInterp.c | 15 +- src/backend/nodes/nodeFuncs.c | 2 +- src/backend/parser/parse_node.c | 11 - src/backend/parser/parse_target.c | 4 +- src/backend/utils/adt/arrayfuncs.c | 289 ++++++++++++++++++++++++++ src/include/catalog/pg_proc.dat | 7 + src/include/catalog/pg_type.dat | 30 ++- src/test/regress/expected/arrays.out | 12 +- src/test/regress/sql/arrays.sql | 4 +- 12 files changed, 346 insertions(+), 39 deletions(-) diff --git a/src/backend/catalog/Catalog.pm b/src/backend/catalog/Catalog.pm index dd39a086ce..b4dfa26518 100644 --- a/src/backend/catalog/Catalog.pm +++ b/src/backend/catalog/Catalog.pm @@ -384,6 +384,7 @@ sub GenerateArrayTypes # Arrays require INT alignment, unless the element type requires # DOUBLE alignment. $array_type{typalign} = $elem_type->{typalign} eq 'd' ? 'd' : 'i'; + $array_type{typsubshandler} = 'array_subscript_handler'; # Fill in the rest of the array entry's fields. foreach my $column (@$pgtype_schema) diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 911e2a1ffe..734f8fe55a 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -1337,7 +1337,7 @@ heap_create_with_catalog(const char *relname, 0, /* array dimensions for typBaseType */ false, /* Type NOT NULL */ InvalidOid, /* rowtypes never have a collation */ - 0); /* array implementation */ + F_ARRAY_SUBSCRIPT_HANDLER); /* array implementation */ pfree(relarrayname); } diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c index bf1e6fdc5c..cd6855643f 100644 --- a/src/backend/commands/typecmds.c +++ b/src/backend/commands/typecmds.c @@ -617,7 +617,7 @@ DefineType(ParseState *pstate, List *names, List *parameters) 0, /* Array dimensions of typbasetype */ false, /* Type NOT NULL */ collation, /* type's collation */ - 0); + F_ARRAY_SUBSCRIPT_HANDLER); pfree(array_type); @@ -1064,7 +1064,7 @@ DefineDomain(CreateDomainStmt *stmt) 0, /* Array dimensions of typbasetype */ false, /* Type NOT NULL */ domaincoll, /* type's collation */ - 0); /* array subscripting implementation */ + F_ARRAY_SUBSCRIPT_HANDLER); /* array subscripting implementation */ pfree(domainArrayName); @@ -1221,7 +1221,7 @@ DefineEnum(CreateEnumStmt *stmt) 0, /* Array dimensions of typbasetype */ false, /* Type NOT NULL */ InvalidOid, /* type's collation */ - 0); /* array subscripting implementation */ + F_ARRAY_SUBSCRIPT_HANDLER); /* array subscripting implementation */ pfree(enumArrayName); @@ -1554,7 +1554,7 @@ DefineRange(CreateRangeStmt *stmt) 0, /* Array dimensions of typbasetype */ false, /* Type NOT NULL */ InvalidOid, /* typcollation */ - 0); /* array subscripting implementation */ + F_ARRAY_SUBSCRIPT_HANDLER); /* array subscripting implementation */ pfree(rangeArrayName); diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index 838bb4d005..b87daf65e0 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -3155,7 +3155,7 @@ ExecEvalSubscriptingRef(ExprState *state, ExprEvalStep *op) if (sbsrefstate->isassignment) ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), - errmsg("array subscript in assignment must not be null"))); + errmsg("subscript in assignment must not be null"))); *op->resnull = true; return false; } @@ -3227,9 +3227,20 @@ ExecEvalSubscriptingRefOld(ExprState *state, ExprEvalStep *op) void ExecEvalSubscriptingRefAssign(ExprState *state, ExprEvalStep *op) { - SubscriptingRefState *sbsrefstate = op->d.sbsref_subscript.state; + SubscriptingRefState *sbsrefstate = op->d.sbsref.state; SubscriptRoutines *sbsroutines = sbsrefstate->sbsroutines; + /* + * For an assignment to a fixed-length container type, both the original + * container and the value to be assigned into it must be non-NULL, else we + * punt and return the original container. + */ + if (sbsrefstate->refattrlength > 0) + { + if (*op->resnull || sbsrefstate->replacenull) + return; + } + sbsrefstate->resnull = *op->resnull; *op->resvalue = sbsroutines->assign(*op->resvalue, sbsrefstate); *op->resnull = sbsrefstate->resnull; diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c index d85ca9f7c5..7e995a66d0 100644 --- a/src/backend/nodes/nodeFuncs.c +++ b/src/backend/nodes/nodeFuncs.c @@ -70,7 +70,7 @@ exprType(const Node *expr) const SubscriptingRef *sbsref = (const SubscriptingRef *) expr; /* slice and/or store operations yield the container type */ - if (sbsref->reflowerindexpr || sbsref->refassgnexpr) + if (IsAssignment(sbsref) || sbsref->reflowerindexpr) type = sbsref->refcontainertype; else type = sbsref->refelemtype; diff --git a/src/backend/parser/parse_node.c b/src/backend/parser/parse_node.c index 4f46d6310a..d1c4ea8573 100644 --- a/src/backend/parser/parse_node.c +++ b/src/backend/parser/parse_node.c @@ -309,17 +309,6 @@ transformContainerSubscripts(ParseState *pstate, lowerIndexpr = lappend(lowerIndexpr, subexpr); indexprSlice = lappend(indexprSlice, ai); } - else - Assert(ai->lidx == NULL && !ai->is_slice); - - if (ai->uidx) - subexpr = transformExpr(pstate, ai->uidx, pstate->p_expr_kind); - else - { - /* Slice with omitted upper bound, put NULL into the list */ - Assert(isSlice && ai->is_slice); - subexpr = NULL; - } subexpr = transformExpr(pstate, ai->uidx, pstate->p_expr_kind); upperIndexpr = lappend(upperIndexpr, subexpr); } diff --git a/src/backend/parser/parse_target.c b/src/backend/parser/parse_target.c index d7483c6538..0161d0f540 100644 --- a/src/backend/parser/parse_target.c +++ b/src/backend/parser/parse_target.c @@ -685,7 +685,7 @@ transformAssignmentIndirection(ParseState *pstate, Node *rhs, int location) { - Node *result = NULL; + Node *result; List *subscripts = NIL; bool isSlice = false; ListCell *i; @@ -873,6 +873,8 @@ transformAssignmentIndirection(ParseState *pstate, errhint("You will need to rewrite or cast the expression."), parser_errposition(pstate, location))); } + else + result = rhs; return result; } diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index 800107d4e7..db23eab661 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -25,14 +25,20 @@ #include "nodes/supportnodes.h" #include "optimizer/optimizer.h" #include "port/pg_bitutils.h" +#include "nodes/makefuncs.h" +#include "executor/execExpr.h" #include "utils/array.h" #include "utils/arrayaccess.h" #include "utils/builtins.h" #include "utils/datum.h" +#include "utils/fmgroids.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/selfuncs.h" +#include "utils/syscache.h" #include "utils/typcache.h" +#include "parser/parse_node.h" +#include "parser/parse_coerce.h" /* @@ -159,7 +165,14 @@ static int width_bucket_array_variable(Datum operand, ArrayType *thresholds, Oid collation, TypeCacheEntry *typentry); +static SubscriptingRef *array_subscript_prepare(bool isAssignment, SubscriptingRef *sbsref); +static SubscriptingRef *array_subscript_validate(bool isAssignment, SubscriptingRef *sbsref, + ParseState *pstate); +static Datum array_subscript_fetch(Datum containerSource, + SubscriptingRefState *sbstate); +static Datum array_subscript_assign(Datum containerSource, + SubscriptingRefState *sbstate); /* * array_in : @@ -6605,3 +6618,279 @@ width_bucket_array_variable(Datum operand, return left; } + +/* + * Perform an actual data extraction or modification for the array + * subscripting. As a result the extracted Datum or the modified containers + * value will be returned. + */ +Datum +array_subscript_assign(Datum containerSource, SubscriptingRefState *sbstate) +{ + bool is_slice = (sbstate->numlower != 0); + IntArray u_index, l_index; + bool eisnull = sbstate->resnull; + int i = 0; + + if (sbstate->refelemlength == 0) + { + /* do one-time catalog lookups for type info */ + get_typlenbyvalalign(sbstate->refelemtype, + &sbstate->refelemlength, + &sbstate->refelembyval, + &sbstate->refelemalign); + } + + for(i = 0; i < sbstate->numupper; i++) + u_index.indx[i] = DatumGetInt32(sbstate->upperindex[i]); + + if (is_slice) + { + for(i = 0; i < sbstate->numlower; i++) + l_index.indx[i] = DatumGetInt32(sbstate->lowerindex[i]); + } + + /* + * For assignment to varlena arrays, we handle a NULL original array + * by substituting an empty (zero-dimensional) array; insertion of the + * new element will result in a singleton array value. It does not + * matter whether the new element is NULL. + */ + if (eisnull) + { + containerSource = PointerGetDatum(construct_empty_array(sbstate->refelemtype)); + sbstate->resnull = false; + eisnull = false; + } + + if (!is_slice) + return array_set_element(containerSource, sbstate->numupper, + u_index.indx, + sbstate->replacevalue, + sbstate->replacenull, + sbstate->refattrlength, + sbstate->refelemlength, + sbstate->refelembyval, + sbstate->refelemalign); + else + return array_set_slice(containerSource, sbstate->numupper, + u_index.indx, l_index.indx, + sbstate->upperprovided, + sbstate->lowerprovided, + sbstate->replacevalue, + sbstate->replacenull, + sbstate->refattrlength, + sbstate->refelemlength, + sbstate->refelembyval, + sbstate->refelemalign); +} + +Datum +array_subscript_fetch(Datum containerSource, SubscriptingRefState *sbstate) +{ + bool is_slice = (sbstate->numlower != 0); + IntArray u_index, l_index; + int i = 0; + + if (sbstate->refelemlength == 0) + { + /* do one-time catalog lookups for type info */ + get_typlenbyvalalign(sbstate->refelemtype, + &sbstate->refelemlength, + &sbstate->refelembyval, + &sbstate->refelemalign); + } + + for(i = 0; i < sbstate->numupper; i++) + u_index.indx[i] = DatumGetInt32(sbstate->upperindex[i]); + + if (is_slice) + { + for(i = 0; i < sbstate->numlower; i++) + l_index.indx[i] = DatumGetInt32(sbstate->lowerindex[i]); + } + + if (!is_slice) + return array_get_element(containerSource, sbstate->numupper, + u_index.indx, + sbstate->refattrlength, + sbstate->refelemlength, + sbstate->refelembyval, + sbstate->refelemalign, + &sbstate->resnull); + else + return array_get_slice(containerSource, sbstate->numupper, + u_index.indx, l_index.indx, + sbstate->upperprovided, + sbstate->lowerprovided, + sbstate->refattrlength, + sbstate->refelemlength, + sbstate->refelembyval, + sbstate->refelemalign); +} + +/* + * Handle array-type subscripting logic. + */ +Datum +array_subscript_handler(PG_FUNCTION_ARGS) +{ + SubscriptRoutines *sbsroutines = (SubscriptRoutines *) + palloc(sizeof(SubscriptRoutines)); + + sbsroutines->prepare = array_subscript_prepare; + sbsroutines->validate = array_subscript_validate; + sbsroutines->fetch = array_subscript_fetch; + sbsroutines->assign = array_subscript_assign; + + PG_RETURN_POINTER(sbsroutines); +} + +SubscriptingRef * +array_subscript_prepare(bool isAssignment, SubscriptingRef *sbsref) +{ + Oid array_type = sbsref->refcontainertype; + HeapTuple type_tuple_container; + Form_pg_type type_struct_container; + bool is_slice = sbsref->reflowerindexpr != NIL; + + /* Get the type tuple for the container */ + type_tuple_container = SearchSysCache1(TYPEOID, ObjectIdGetDatum(array_type)); + if (!HeapTupleIsValid(type_tuple_container)) + elog(ERROR, "cache lookup failed for type %u", array_type); + type_struct_container = (Form_pg_type) GETSTRUCT(type_tuple_container); + + /* needn't check typisdefined since this will fail anyway */ + sbsref->refelemtype = type_struct_container->typelem; + + /* Identify type that RHS must provide */ + if (isAssignment) + sbsref->refassgntype = is_slice ? sbsref->refcontainertype : sbsref->refelemtype; + + ReleaseSysCache(type_tuple_container); + + return sbsref; +} + +SubscriptingRef * +array_subscript_validate(bool isAssignment, SubscriptingRef *sbsref, + ParseState *pstate) +{ + bool is_slice = sbsref->reflowerindexpr != NIL; + Oid typeneeded = InvalidOid, + typesource = InvalidOid; + Node *new_from; + Node *subexpr; + List *upperIndexpr = NIL; + List *lowerIndexpr = NIL; + ListCell *u, *l, *s; + + foreach(u, sbsref->refupperindexpr) + { + subexpr = (Node *) lfirst(u); + + if (subexpr == NULL) + { + upperIndexpr = lappend(upperIndexpr, subexpr); + continue; + } + + subexpr = coerce_to_target_type(pstate, + subexpr, exprType(subexpr), + INT4OID, -1, + COERCION_ASSIGNMENT, + COERCE_IMPLICIT_CAST, + -1); + if (subexpr == NULL) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("array subscript must have type integer"), + parser_errposition(pstate, exprLocation(subexpr)))); + + upperIndexpr = lappend(upperIndexpr, subexpr); + } + + sbsref->refupperindexpr = upperIndexpr; + + forboth(l, sbsref->reflowerindexpr, s, sbsref->refindexprslice) + { + A_Indices *ai = (A_Indices *) lfirst(s); + subexpr = (Node *) lfirst(l); + + if (subexpr == NULL && !ai->is_slice) + { + /* Make a constant 1 */ + subexpr = (Node *) makeConst(INT4OID, + -1, + InvalidOid, + sizeof(int32), + Int32GetDatum(1), + false, + true); /* pass by value */ + } + + if (subexpr == NULL) + { + lowerIndexpr = lappend(lowerIndexpr, subexpr); + continue; + } + + subexpr = coerce_to_target_type(pstate, + subexpr, exprType(subexpr), + INT4OID, -1, + COERCION_ASSIGNMENT, + COERCE_IMPLICIT_CAST, + -1); + if (subexpr == NULL) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("array subscript must have type integer"), + parser_errposition(pstate, exprLocation(subexpr)))); + + lowerIndexpr = lappend(lowerIndexpr, subexpr); + } + + sbsref->reflowerindexpr = lowerIndexpr; + + if (isAssignment) + { + SubscriptingRef *assignRef = (SubscriptingRef *) sbsref; + Node *assignExpr = (Node *) assignRef->refassgnexpr; + + typesource = exprType(assignExpr); + typeneeded = is_slice ? sbsref->refcontainertype : sbsref->refelemtype; + new_from = coerce_to_target_type(pstate, + assignExpr, typesource, + typeneeded, sbsref->reftypmod, + COERCION_ASSIGNMENT, + COERCE_IMPLICIT_CAST, + -1); + if (new_from == NULL) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("array assignment requires type %s" + " but expression is of type %s", + format_type_be(sbsref->refelemtype), + format_type_be(typesource)), + errhint("You will need to rewrite or cast the expression."), + parser_errposition(pstate, exprLocation(assignExpr)))); + assignRef->refassgnexpr = (Expr *) new_from; + } + + sbsref->refnestedfunc = F_ARRAY_SUBSCRIPT_HANDLER; + + /* Verify subscript list lengths are within limit */ + if (list_length(sbsref->refupperindexpr) > MAXDIM) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)", + list_length(sbsref->refupperindexpr), MAXDIM))); + + if (list_length(sbsref->reflowerindexpr) > MAXDIM) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)", + list_length(sbsref->reflowerindexpr), MAXDIM))); + + return sbsref; +} diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 4b5af32440..8761597e12 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -10879,6 +10879,13 @@ proargnames => '{max_data_alignment,database_block_size,blocks_per_segment,wal_block_size,bytes_per_wal_segment,max_identifier_length,max_index_columns,max_toast_chunk_size,large_object_chunk_size,float8_pass_by_value,data_page_checksum_version}', prosrc => 'pg_control_init' }, +{ oid => '6099', + descr => 'Array subscripting logic', + proname => 'array_subscript_handler', + prorettype => 'internal', + proargtypes => 'internal', + prosrc => 'array_subscript_handler' }, + # collation management functions { oid => '3445', descr => 'import collations from operating system', proname => 'pg_import_system_collations', procost => '100', diff --git a/src/include/catalog/pg_type.dat b/src/include/catalog/pg_type.dat index b2cec07416..21489a02ae 100644 --- a/src/include/catalog/pg_type.dat +++ b/src/include/catalog/pg_type.dat @@ -54,7 +54,8 @@ typname => 'name', typlen => 'NAMEDATALEN', typbyval => 'f', typcategory => 'S', typelem => 'char', typinput => 'namein', typoutput => 'nameout', typreceive => 'namerecv', typsend => 'namesend', - typalign => 'c', typcollation => 'C' }, + typalign => 'c', typcollation => 'C', + typsubshandler => 'array_subscript_handler' }, { oid => '20', array_type_oid => '1016', descr => '~18 digit integer, 8-byte storage', typname => 'int8', typlen => '8', typbyval => 'FLOAT8PASSBYVAL', @@ -70,7 +71,7 @@ typname => 'int2vector', typlen => '-1', typbyval => 'f', typcategory => 'A', typelem => 'int2', typinput => 'int2vectorin', typoutput => 'int2vectorout', typreceive => 'int2vectorrecv', typsend => 'int2vectorsend', - typalign => 'i' }, + typalign => 'i', typsubshandler => 'array_subscript_handler' }, { oid => '23', array_type_oid => '1007', descr => '-2 billion to 2 billion integer, 4-byte storage', typname => 'int4', typlen => '4', typbyval => 't', typcategory => 'N', @@ -109,7 +110,8 @@ descr => 'array of oids, used in system tables', typname => 'oidvector', typlen => '-1', typbyval => 'f', typcategory => 'A', typelem => 'oid', typinput => 'oidvectorin', typoutput => 'oidvectorout', - typreceive => 'oidvectorrecv', typsend => 'oidvectorsend', typalign => 'i' }, + typreceive => 'oidvectorrecv', typsend => 'oidvectorsend', typalign => 'i', + typsubshandler => 'array_subscript_handler' }, # hand-built rowtype entries for bootstrapped catalogs # NB: OIDs assigned here must match the BKI_ROWTYPE_OID declarations @@ -188,32 +190,37 @@ descr => 'geometric point \'(x, y)\'', typname => 'point', typlen => '16', typbyval => 'f', typcategory => 'G', typelem => 'float8', typinput => 'point_in', typoutput => 'point_out', - typreceive => 'point_recv', typsend => 'point_send', typalign => 'd' }, + typreceive => 'point_recv', typsend => 'point_send', typalign => 'd', + typsubshandler => 'array_subscript_handler' }, { oid => '601', array_type_oid => '1018', descr => 'geometric line segment \'(pt1,pt2)\'', typname => 'lseg', typlen => '32', typbyval => 'f', typcategory => 'G', typelem => 'point', typinput => 'lseg_in', typoutput => 'lseg_out', - typreceive => 'lseg_recv', typsend => 'lseg_send', typalign => 'd' }, + typreceive => 'lseg_recv', typsend => 'lseg_send', typalign => 'd', + typsubshandler => 'array_subscript_handler' }, { oid => '602', array_type_oid => '1019', descr => 'geometric path \'(pt1,...)\'', typname => 'path', typlen => '-1', typbyval => 'f', typcategory => 'G', typinput => 'path_in', typoutput => 'path_out', typreceive => 'path_recv', - typsend => 'path_send', typalign => 'd', typstorage => 'x' }, + typsend => 'path_send', typalign => 'd', typstorage => 'x', + typsubshandler => 'array_subscript_handler' }, { oid => '603', array_type_oid => '1020', descr => 'geometric box \'(lower left,upper right)\'', typname => 'box', typlen => '32', typbyval => 'f', typcategory => 'G', typdelim => ';', typelem => 'point', typinput => 'box_in', typoutput => 'box_out', typreceive => 'box_recv', typsend => 'box_send', - typalign => 'd' }, + typalign => 'd', typsubshandler => 'array_subscript_handler' }, { oid => '604', array_type_oid => '1027', descr => 'geometric polygon \'(pt1,...)\'', typname => 'polygon', typlen => '-1', typbyval => 'f', typcategory => 'G', typinput => 'poly_in', typoutput => 'poly_out', typreceive => 'poly_recv', - typsend => 'poly_send', typalign => 'd', typstorage => 'x' }, + typsend => 'poly_send', typalign => 'd', typstorage => 'x', + typsubshandler => 'array_subscript_handler' }, { oid => '628', array_type_oid => '629', descr => 'geometric line', typname => 'line', typlen => '24', typbyval => 'f', typcategory => 'G', typelem => 'float8', typinput => 'line_in', typoutput => 'line_out', - typreceive => 'line_recv', typsend => 'line_send', typalign => 'd' }, + typreceive => 'line_recv', typsend => 'line_send', typalign => 'd', + typsubshandler => 'array_subscript_handler' }, # OIDS 700 - 799 @@ -272,7 +279,7 @@ { oid => '1033', array_type_oid => '1034', descr => 'access control list', typname => 'aclitem', typlen => '12', typbyval => 'f', typcategory => 'U', typinput => 'aclitemin', typoutput => 'aclitemout', typreceive => '-', - typsend => '-', typalign => 'i' }, + typsend => '-', typalign => 'i', typsubshandler => 'array_subscript_handler' }, { oid => '1042', array_type_oid => '1014', descr => 'char(length), blank-padded string, fixed storage length', typname => 'bpchar', typlen => '-1', typbyval => 'f', typcategory => 'S', @@ -311,7 +318,8 @@ typcategory => 'D', typispreferred => 't', typinput => 'timestamptz_in', typoutput => 'timestamptz_out', typreceive => 'timestamptz_recv', typsend => 'timestamptz_send', typmodin => 'timestamptztypmodin', - typmodout => 'timestamptztypmodout', typalign => 'd' }, + typmodout => 'timestamptztypmodout', typalign => 'd', + typsubshandler => 'array_subscript_handler' }, { oid => '1186', array_type_oid => '1187', descr => '@ <number> <units>, time interval', typname => 'interval', typlen => '16', typbyval => 'f', typcategory => 'T', diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out index c730563f03..f54b73c9da 100644 --- a/src/test/regress/expected/arrays.out +++ b/src/test/regress/expected/arrays.out @@ -190,9 +190,9 @@ select ('[0:2][0:2]={{1,2,3},{4,5,6},{7,8,9}}'::int[])[1:2][2]; -- -- check subscription corner cases -- --- More subscripts than MAXDIMS(6) -SELECT ('{}'::int[])[1][2][3][4][5][6][7]; -ERROR: number of array dimensions (7) exceeds the maximum allowed (6) +-- More subscripts than MAXDIMS(12) +SELECT ('{}'::int[])[1][2][3][4][5][6][7][8][9][10][11][12][13]; +ERROR: number of array dimensions (13) exceeds the maximum allowed (6) -- NULL index yields NULL when selecting SELECT ('{{{1},{2},{3}},{{4},{5},{6}}}'::int[])[1][NULL][1]; int4 @@ -216,15 +216,15 @@ SELECT ('{{{1},{2},{3}},{{4},{5},{6}}}'::int[])[1][1:NULL][1]; UPDATE arrtest SET c[NULL] = '{"can''t assign"}' WHERE array_dims(c) is not null; -ERROR: array subscript in assignment must not be null +ERROR: subscript in assignment must not be null UPDATE arrtest SET c[NULL:1] = '{"can''t assign"}' WHERE array_dims(c) is not null; -ERROR: array subscript in assignment must not be null +ERROR: subscript in assignment must not be null UPDATE arrtest SET c[1:NULL] = '{"can''t assign"}' WHERE array_dims(c) is not null; -ERROR: array subscript in assignment must not be null +ERROR: subscript in assignment must not be null -- test slices with empty lower and/or upper index CREATE TEMP TABLE arrtest_s ( a int2[], diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql index 25dd4e2c6d..fb7a319118 100644 --- a/src/test/regress/sql/arrays.sql +++ b/src/test/regress/sql/arrays.sql @@ -106,8 +106,8 @@ select ('[0:2][0:2]={{1,2,3},{4,5,6},{7,8,9}}'::int[])[1:2][2]; -- -- check subscription corner cases -- --- More subscripts than MAXDIMS(6) -SELECT ('{}'::int[])[1][2][3][4][5][6][7]; +-- More subscripts than MAXDIMS(12) +SELECT ('{}'::int[])[1][2][3][4][5][6][7][8][9][10][11][12][13]; -- NULL index yields NULL when selecting SELECT ('{{{1},{2},{3}},{{4},{5},{6}}}'::int[])[1][NULL][1]; SELECT ('{{{1},{2},{3}},{{4},{5},{6}}}'::int[])[1][NULL:1][1]; -- 2.21.0
>From ea655d01f987c68eb24e8b4ad7e7038390298915 Mon Sep 17 00:00:00 2001 From: erthalion <9erthali...@gmail.com> Date: Fri, 1 Feb 2019 11:41:45 +0100 Subject: [PATCH v33 3/5] Subscripting for jsonb Subscripting implementation for jsonb. For the sake of code reuse, some parts of jsonb functionality were rearranged to allow use the same functions for jsonb_set and assign subscripting operation. Reviewed-by: Tom Lane, Arthur Zakirov --- src/backend/utils/adt/jsonb.c | 27 ++- src/backend/utils/adt/jsonb_util.c | 76 ++++++- src/backend/utils/adt/jsonfuncs.c | 325 ++++++++++++++++++++-------- src/include/catalog/pg_proc.dat | 8 + src/include/catalog/pg_type.dat | 3 +- src/include/utils/jsonb.h | 2 + src/test/regress/expected/jsonb.out | 233 +++++++++++++++++++- src/test/regress/sql/jsonb.sql | 68 +++++- 8 files changed, 632 insertions(+), 110 deletions(-) diff --git a/src/backend/utils/adt/jsonb.c b/src/backend/utils/adt/jsonb.c index 1e9ca046c6..f66642def3 100644 --- a/src/backend/utils/adt/jsonb.c +++ b/src/backend/utils/adt/jsonb.c @@ -1134,23 +1134,34 @@ to_jsonb(PG_FUNCTION_ARGS) { Datum val = PG_GETARG_DATUM(0); Oid val_type = get_fn_expr_argtype(fcinfo->flinfo, 0); - JsonbInState result; - JsonbTypeCategory tcategory; - Oid outfuncoid; + JsonbValue *res = to_jsonb_worker(val, val_type, false); + PG_RETURN_POINTER(JsonbValueToJsonb(res)); +} - if (val_type == InvalidOid) +/* + * Do the actual conversion to jsonb for to_jsonb function. This logic is + * separated because it can be useful not only in here (e.g. we use it in + * jsonb subscripting) + */ +JsonbValue * +to_jsonb_worker(Datum source, Oid source_type, bool is_null) +{ + JsonbInState result; + JsonbTypeCategory tcategory; + Oid outfuncoid; + + if (source_type == InvalidOid) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("could not determine input data type"))); - jsonb_categorize_type(val_type, + jsonb_categorize_type(source_type, &tcategory, &outfuncoid); memset(&result, 0, sizeof(JsonbInState)); - datum_to_jsonb(val, false, &result, tcategory, outfuncoid, false); - - PG_RETURN_POINTER(JsonbValueToJsonb(result.res)); + datum_to_jsonb(source, is_null, &result, tcategory, outfuncoid, false); + return result.res; } /* diff --git a/src/backend/utils/adt/jsonb_util.c b/src/backend/utils/adt/jsonb_util.c index 4eeffa1424..41a1c1f9bb 100644 --- a/src/backend/utils/adt/jsonb_util.c +++ b/src/backend/utils/adt/jsonb_util.c @@ -68,18 +68,29 @@ static JsonbValue *pushJsonbValueScalar(JsonbParseState **pstate, JsonbIteratorToken seq, JsonbValue *scalarVal); +JsonbValue * +JsonbToJsonbValue(Jsonb *jsonb) +{ + JsonbValue *val = (JsonbValue *) palloc(sizeof(JsonbValue)); + + val->type = jbvBinary; + val->val.binary.data = &jsonb->root; + val->val.binary.len = VARSIZE(jsonb) - VARHDRSZ; + + return val; +} + /* * Turn an in-memory JsonbValue into a Jsonb for on-disk storage. * - * There isn't a JsonbToJsonbValue(), because generally we find it more - * convenient to directly iterate through the Jsonb representation and only - * really convert nested scalar values. JsonbIteratorNext() does this, so that - * clients of the iteration code don't have to directly deal with the binary - * representation (JsonbDeepContains() is a notable exception, although all - * exceptions are internal to this module). In general, functions that accept - * a JsonbValue argument are concerned with the manipulation of scalar values, - * or simple containers of scalar values, where it would be inconvenient to - * deal with a great amount of other state. + * Generally we find it more convenient to directly iterate through the Jsonb + * representation and only really convert nested scalar values. + * JsonbIteratorNext() does this, so that clients of the iteration code don't + * have to directly deal with the binary representation (JsonbDeepContains() is + * a notable exception, although all exceptions are internal to this module). + * In general, functions that accept a JsonbValue argument are concerned with + * the manipulation of scalar values, or simple containers of scalar values, + * where it would be inconvenient to deal with a great amount of other state. */ Jsonb * JsonbValueToJsonb(JsonbValue *val) @@ -563,6 +574,30 @@ pushJsonbValue(JsonbParseState **pstate, JsonbIteratorToken seq, JsonbValue *res = NULL; JsonbValue v; JsonbIteratorToken tok; + int i; + + if (jbval && (seq == WJB_ELEM || seq == WJB_VALUE) && jbval->type == jbvObject) + { + pushJsonbValue(pstate, WJB_BEGIN_OBJECT, NULL); + for (i = 0; i < jbval->val.object.nPairs; i++) + { + pushJsonbValue(pstate, WJB_KEY, &jbval->val.object.pairs[i].key); + pushJsonbValue(pstate, WJB_VALUE, &jbval->val.object.pairs[i].value); + } + + return pushJsonbValue(pstate, WJB_END_OBJECT, NULL); + } + + if (jbval && (seq == WJB_ELEM || seq == WJB_VALUE) && jbval->type == jbvArray) + { + pushJsonbValue(pstate, WJB_BEGIN_ARRAY, NULL); + for (i = 0; i < jbval->val.array.nElems; i++) + { + pushJsonbValue(pstate, WJB_ELEM, &jbval->val.array.elems[i]); + } + + return pushJsonbValue(pstate, WJB_END_ARRAY, NULL); + } if (!jbval || (seq != WJB_ELEM && seq != WJB_VALUE) || jbval->type != jbvBinary) @@ -573,9 +608,30 @@ pushJsonbValue(JsonbParseState **pstate, JsonbIteratorToken seq, /* unpack the binary and add each piece to the pstate */ it = JsonbIteratorInit(jbval->val.binary.data); + + if ((jbval->val.binary.data->header & JB_FSCALAR) && *pstate) + { + tok = JsonbIteratorNext(&it, &v, true); + Assert(tok == WJB_BEGIN_ARRAY); + Assert(v.type == jbvArray && v.val.array.rawScalar); + + tok = JsonbIteratorNext(&it, &v, true); + Assert(tok == WJB_ELEM); + + res = pushJsonbValueScalar(pstate, seq, &v); + + tok = JsonbIteratorNext(&it, &v, true); + Assert(tok == WJB_END_ARRAY); + Assert(it == NULL); + + return res; + } + while ((tok = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) res = pushJsonbValueScalar(pstate, tok, - tok < WJB_BEGIN_ARRAY ? &v : NULL); + tok < WJB_BEGIN_ARRAY || + (tok == WJB_BEGIN_ARRAY && + v.val.array.rawScalar) ? &v : NULL); return res; } diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c index 5a09d65fdc..f46a2828b3 100644 --- a/src/backend/utils/adt/jsonfuncs.c +++ b/src/backend/utils/adt/jsonfuncs.c @@ -21,12 +21,16 @@ #include "common/jsonapi.h" #include "fmgr.h" #include "funcapi.h" +#include "executor/execExpr.h" #include "lib/stringinfo.h" #include "mb/pg_wchar.h" #include "miscadmin.h" +#include "nodes/nodeFuncs.h" +#include "parser/parse_coerce.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/hsearch.h" +#include "utils/fmgroids.h" #include "utils/json.h" #include "utils/jsonb.h" #include "utils/jsonfuncs.h" @@ -460,18 +464,22 @@ static Datum populate_domain(DomainIOData *io, Oid typid, const char *colname, /* functions supporting jsonb_delete, jsonb_set and jsonb_concat */ static JsonbValue *IteratorConcat(JsonbIterator **it1, JsonbIterator **it2, JsonbParseState **state); +static Datum jsonb_set_element(Datum datum, Datum *path, int path_len, + Datum sourceData, Oid source_type, bool is_null); +static Datum jsonb_get_element(Jsonb *jb, Datum *path, int npath, + bool *isnull, bool as_text); static JsonbValue *setPath(JsonbIterator **it, Datum *path_elems, bool *path_nulls, int path_len, - JsonbParseState **st, int level, Jsonb *newval, + JsonbParseState **st, int level, JsonbValue *newval, int op_type); static void setPathObject(JsonbIterator **it, Datum *path_elems, bool *path_nulls, int path_len, JsonbParseState **st, int level, - Jsonb *newval, uint32 npairs, int op_type); + JsonbValue *newval, uint32 npairs, int op_type); static void setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls, int path_len, JsonbParseState **st, - int level, Jsonb *newval, uint32 nelems, int op_type); -static void addJsonbToParseState(JsonbParseState **jbps, Jsonb *jb); + int level, + JsonbValue *newval, uint32 nelems, int op_type); /* function supporting iterate_json_values */ static void iterate_values_scalar(void *state, char *token, JsonTokenType tokentype); @@ -486,6 +494,15 @@ static void transform_string_values_object_field_start(void *state, char *fname, static void transform_string_values_array_element_start(void *state, bool isnull); static void transform_string_values_scalar(void *state, char *token, JsonTokenType tokentype); +static SubscriptingRef *jsonb_subscript_prepare(bool isAssignment, + SubscriptingRef *sbsref); + +static SubscriptingRef *jsonb_subscript_validate(bool isAssignment, + SubscriptingRef *sbsref, + ParseState *pstate); +static Datum jsonb_subscript_fetch(Datum containerSource, SubscriptingRefState *sbstate); +static Datum jsonb_subscript_assign(Datum containerSource, SubscriptingRefState *sbstate); + /* * pg_parse_json_or_ereport * @@ -1447,13 +1464,9 @@ get_jsonb_path_all(FunctionCallInfo fcinfo, bool as_text) ArrayType *path = PG_GETARG_ARRAYTYPE_P(1); Datum *pathtext; bool *pathnulls; + bool isnull; int npath; - int i; - bool have_object = false, - have_array = false; - JsonbValue *jbvp = NULL; - JsonbValue jbvbuf; - JsonbContainer *container; + Datum res; /* * If the array contains any null elements, return NULL, on the grounds @@ -1468,9 +1481,28 @@ get_jsonb_path_all(FunctionCallInfo fcinfo, bool as_text) deconstruct_array(path, TEXTOID, -1, false, TYPALIGN_INT, &pathtext, &pathnulls, &npath); - /* Identify whether we have object, array, or scalar at top-level */ - container = &jb->root; + res = jsonb_get_element(jb, pathtext, npath, &isnull, as_text); + if (isnull) + PG_RETURN_NULL(); + else + PG_RETURN_DATUM(res); +} + +static Datum +jsonb_get_element(Jsonb *jb, Datum *path, int npath, bool *isnull, bool as_text) +{ + Jsonb *res; + JsonbContainer *container = &jb->root; + JsonbValue *jbvp = NULL; + JsonbValue tv; + int i; + bool have_object = false, + have_array = false; + + *isnull = false; + + /* Identify whether we have object, array, or scalar at top-level */ if (JB_ROOT_IS_OBJECT(jb)) have_object = true; else if (JB_ROOT_IS_ARRAY(jb) && !JB_ROOT_IS_SCALAR(jb)) @@ -1495,7 +1527,7 @@ get_jsonb_path_all(FunctionCallInfo fcinfo, bool as_text) { if (as_text) { - PG_RETURN_TEXT_P(cstring_to_text(JsonbToCString(NULL, + return PointerGetDatum(cstring_to_text(JsonbToCString(NULL, container, VARSIZE(jb)))); } @@ -1511,22 +1543,25 @@ get_jsonb_path_all(FunctionCallInfo fcinfo, bool as_text) if (have_object) { jbvp = getKeyJsonValueFromContainer(container, - VARDATA(pathtext[i]), - VARSIZE(pathtext[i]) - VARHDRSZ, - &jbvbuf); + VARDATA(path[i]), + VARSIZE(path[i]) - VARHDRSZ, + NULL); } else if (have_array) { long lindex; uint32 index; - char *indextext = TextDatumGetCString(pathtext[i]); + char *indextext = TextDatumGetCString(path[i]); char *endptr; errno = 0; lindex = strtol(indextext, &endptr, 10); if (endptr == indextext || *endptr != '\0' || errno != 0 || lindex > INT_MAX || lindex < INT_MIN) - PG_RETURN_NULL(); + { + *isnull = true; + return PointerGetDatum(NULL); + } if (lindex >= 0) { @@ -1544,7 +1579,10 @@ get_jsonb_path_all(FunctionCallInfo fcinfo, bool as_text) nelements = JsonContainerSize(container); if (-lindex > nelements) - PG_RETURN_NULL(); + { + *isnull = true; + return PointerGetDatum(NULL); + } else index = nelements + lindex; } @@ -1554,11 +1592,15 @@ get_jsonb_path_all(FunctionCallInfo fcinfo, bool as_text) else { /* scalar, extraction yields a null */ - PG_RETURN_NULL(); + *isnull = true; + return PointerGetDatum(NULL); } if (jbvp == NULL) - PG_RETURN_NULL(); + { + *isnull = true; + return PointerGetDatum(NULL); + } else if (i == npath - 1) break; @@ -1580,9 +1622,12 @@ get_jsonb_path_all(FunctionCallInfo fcinfo, bool as_text) if (as_text) { if (jbvp->type == jbvNull) - PG_RETURN_NULL(); + { + *isnull = true; + return PointerGetDatum(NULL); + } - PG_RETURN_TEXT_P(JsonbValueAsText(jbvp)); + return PointerGetDatum(JsonbValueAsText(jbvp)); } else { @@ -1593,6 +1638,32 @@ get_jsonb_path_all(FunctionCallInfo fcinfo, bool as_text) } } +Datum +jsonb_set_element(Datum jsonbdatum, Datum *path, int path_len, + Datum sourceData, Oid source_type, bool is_null) +{ + Jsonb *jb = DatumGetJsonbP(jsonbdatum); + JsonbValue *newval, + *res; + JsonbParseState *state = NULL; + JsonbIterator *it; + bool *path_nulls = palloc0(path_len * sizeof(bool)); + + newval = to_jsonb_worker(sourceData, source_type, is_null); + + if (newval->type == jbvArray && newval->val.array.rawScalar) + *newval = newval->val.array.elems[0]; + + it = JsonbIteratorInit(&jb->root); + + res = setPath(&it, path, path_nulls, path_len, &state, 0, + newval, JB_PATH_CREATE); + + pfree(path_nulls); + + PG_RETURN_JSONB_P(JsonbValueToJsonb(res)); +} + /* * Return the text representation of the given JsonbValue. */ @@ -4152,58 +4223,6 @@ jsonb_strip_nulls(PG_FUNCTION_ARGS) PG_RETURN_POINTER(JsonbValueToJsonb(res)); } -/* - * Add values from the jsonb to the parse state. - * - * If the parse state container is an object, the jsonb is pushed as - * a value, not a key. - * - * This needs to be done using an iterator because pushJsonbValue doesn't - * like getting jbvBinary values, so we can't just push jb as a whole. - */ -static void -addJsonbToParseState(JsonbParseState **jbps, Jsonb *jb) -{ - JsonbIterator *it; - JsonbValue *o = &(*jbps)->contVal; - JsonbValue v; - JsonbIteratorToken type; - - it = JsonbIteratorInit(&jb->root); - - Assert(o->type == jbvArray || o->type == jbvObject); - - if (JB_ROOT_IS_SCALAR(jb)) - { - (void) JsonbIteratorNext(&it, &v, false); /* skip array header */ - Assert(v.type == jbvArray); - (void) JsonbIteratorNext(&it, &v, false); /* fetch scalar value */ - - switch (o->type) - { - case jbvArray: - (void) pushJsonbValue(jbps, WJB_ELEM, &v); - break; - case jbvObject: - (void) pushJsonbValue(jbps, WJB_VALUE, &v); - break; - default: - elog(ERROR, "unexpected parent of nested structure"); - } - } - else - { - while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) - { - if (type == WJB_KEY || type == WJB_VALUE || type == WJB_ELEM) - (void) pushJsonbValue(jbps, type, &v); - else - (void) pushJsonbValue(jbps, type, NULL); - } - } - -} - /* * SQL function jsonb_pretty (jsonb) * @@ -4475,7 +4494,8 @@ jsonb_set(PG_FUNCTION_ARGS) { Jsonb *in = PG_GETARG_JSONB_P(0); ArrayType *path = PG_GETARG_ARRAYTYPE_P(1); - Jsonb *newval = PG_GETARG_JSONB_P(2); + Jsonb *newjsonb = PG_GETARG_JSONB_P(2); + JsonbValue *newval = JsonbToJsonbValue(newjsonb); bool create = PG_GETARG_BOOL(3); JsonbValue *res = NULL; Datum *path_elems; @@ -4633,7 +4653,8 @@ jsonb_insert(PG_FUNCTION_ARGS) { Jsonb *in = PG_GETARG_JSONB_P(0); ArrayType *path = PG_GETARG_ARRAYTYPE_P(1); - Jsonb *newval = PG_GETARG_JSONB_P(2); + Jsonb *newjsonb = PG_GETARG_JSONB_P(2); + JsonbValue *newval = JsonbToJsonbValue(newjsonb); bool after = PG_GETARG_BOOL(3); JsonbValue *res = NULL; Datum *path_elems; @@ -4796,7 +4817,7 @@ IteratorConcat(JsonbIterator **it1, JsonbIterator **it2, static JsonbValue * setPath(JsonbIterator **it, Datum *path_elems, bool *path_nulls, int path_len, - JsonbParseState **st, int level, Jsonb *newval, int op_type) + JsonbParseState **st, int level, JsonbValue *newval, int op_type) { JsonbValue v; JsonbIteratorToken r; @@ -4849,11 +4870,11 @@ setPath(JsonbIterator **it, Datum *path_elems, static void setPathObject(JsonbIterator **it, Datum *path_elems, bool *path_nulls, int path_len, JsonbParseState **st, int level, - Jsonb *newval, uint32 npairs, int op_type) + JsonbValue *newval, uint32 npairs, int op_type) { - JsonbValue v; int i; - JsonbValue k; + JsonbValue k, + v; bool done = false; if (level >= path_len || path_nulls[level]) @@ -4870,7 +4891,7 @@ setPathObject(JsonbIterator **it, Datum *path_elems, bool *path_nulls, newkey.val.string.val = VARDATA_ANY(path_elems[level]); (void) pushJsonbValue(st, WJB_KEY, &newkey); - addJsonbToParseState(st, newval); + (void) pushJsonbValue(st, WJB_VALUE, newval); } for (i = 0; i < npairs; i++) @@ -4901,7 +4922,7 @@ setPathObject(JsonbIterator **it, Datum *path_elems, bool *path_nulls, if (!(op_type & JB_PATH_DELETE)) { (void) pushJsonbValue(st, WJB_KEY, &k); - addJsonbToParseState(st, newval); + (void) pushJsonbValue(st, WJB_VALUE, newval); } done = true; } @@ -4924,7 +4945,7 @@ setPathObject(JsonbIterator **it, Datum *path_elems, bool *path_nulls, newkey.val.string.val = VARDATA_ANY(path_elems[level]); (void) pushJsonbValue(st, WJB_KEY, &newkey); - addJsonbToParseState(st, newval); + (void) pushJsonbValue(st, WJB_VALUE, newval); } (void) pushJsonbValue(st, r, &k); @@ -4956,7 +4977,7 @@ setPathObject(JsonbIterator **it, Datum *path_elems, bool *path_nulls, static void setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls, int path_len, JsonbParseState **st, int level, - Jsonb *newval, uint32 nelems, int op_type) + JsonbValue *newval, uint32 nelems, int op_type) { JsonbValue v; int idx, @@ -5004,7 +5025,7 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls, (op_type & JB_PATH_CREATE_OR_INSERT)) { Assert(newval != NULL); - addJsonbToParseState(st, newval); + (void) pushJsonbValue(st, WJB_ELEM, newval); done = true; } @@ -5020,7 +5041,7 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls, r = JsonbIteratorNext(it, &v, true); /* skip */ if (op_type & (JB_PATH_INSERT_BEFORE | JB_PATH_CREATE)) - addJsonbToParseState(st, newval); + (void) pushJsonbValue(st, WJB_ELEM, newval); /* * We should keep current value only in case of @@ -5031,7 +5052,7 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls, (void) pushJsonbValue(st, r, &v); if (op_type & (JB_PATH_INSERT_AFTER | JB_PATH_REPLACE)) - addJsonbToParseState(st, newval); + (void) pushJsonbValue(st, WJB_ELEM, newval); done = true; } @@ -5065,12 +5086,138 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls, if ((op_type & JB_PATH_CREATE_OR_INSERT) && !done && level == path_len - 1 && i == nelems - 1) { - addJsonbToParseState(st, newval); + (void) pushJsonbValue(st, WJB_ELEM, newval); } } } } +/* + * Perform an actual data extraction or modification for the jsonb + * subscripting. As a result the extracted Datum or the modified containers + * value will be returned. + */ +Datum +jsonb_subscript_fetch(Datum containerSource, SubscriptingRefState *sbstate) +{ + return jsonb_get_element(DatumGetJsonbP(containerSource), + sbstate->upperindex, + sbstate->numupper, + &sbstate->resnull, + false); +} + + + +/* + * Perform an actual data extraction or modification for the jsonb + * subscripting. As a result the extracted Datum or the modified containers + * value will be returned. + */ +Datum +jsonb_subscript_assign(Datum containerSource, SubscriptingRefState *sbstate) +{ + /* + * the original jsonb must be non-NULL, else we punt and return the + * original array. + */ + if (sbstate->resnull) + return containerSource; + + return jsonb_set_element(containerSource, + sbstate->upperindex, + sbstate->numupper, + sbstate->replacevalue, + sbstate->refelemtype, + sbstate->replacenull); +} + +/* + * Perform preparation for the jsonb subscripting. Since there are not any + * particular restrictions for this kind of subscripting, we will verify that + * it is not a slice operation. This function produces an expression that + * represents the result of extracting a single container element or the new + * container value with the source data inserted into the right part of the + * container. If you have read until this point, and will submit a meaningful + * review of this patch series, I'll owe you a beer at the next PGConfEU. + */ + +/* + * Handle jsonb-type subscripting logic. + */ +Datum +jsonb_subscript_handler(PG_FUNCTION_ARGS) +{ + SubscriptRoutines *sbsroutines = (SubscriptRoutines *) + palloc(sizeof(SubscriptRoutines)); + + sbsroutines->prepare = jsonb_subscript_prepare; + sbsroutines->validate = jsonb_subscript_validate; + sbsroutines->fetch = jsonb_subscript_fetch; + sbsroutines->assign = jsonb_subscript_assign; + + PG_RETURN_POINTER(sbsroutines); +} + +SubscriptingRef * +jsonb_subscript_prepare(bool isAssignment, SubscriptingRef *sbsref) +{ + if (isAssignment) + { + sbsref->refelemtype = exprType((Node *) sbsref->refassgnexpr); + sbsref->refassgntype = exprType((Node *) sbsref->refassgnexpr); + } + else + sbsref->refelemtype = JSONBOID; + + return sbsref; +} + +SubscriptingRef * +jsonb_subscript_validate(bool isAssignment, SubscriptingRef *sbsref, + ParseState *pstate) +{ + List *upperIndexpr = NIL; + ListCell *l; + + if (sbsref->reflowerindexpr != NIL) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("jsonb subscript does not support slices"), + parser_errposition(pstate, exprLocation( + ((Node *) linitial(sbsref->reflowerindexpr)))))); + + foreach(l, sbsref->refupperindexpr) + { + Node *subexpr = (Node *) lfirst(l); + + if (subexpr == NULL) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("jsonb subscript does not support slices"), + parser_errposition(pstate, exprLocation( + ((Node *) linitial(sbsref->refupperindexpr)))))); + + subexpr = coerce_to_target_type(pstate, + subexpr, exprType(subexpr), + TEXTOID, -1, + COERCION_ASSIGNMENT, + COERCE_IMPLICIT_CAST, + -1); + if (subexpr == NULL) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("jsonb subscript must have text type"), + parser_errposition(pstate, exprLocation(subexpr)))); + + upperIndexpr = lappend(upperIndexpr, subexpr); + } + + sbsref->refupperindexpr = upperIndexpr; + + return sbsref; +} + /* * Parse information about what elements of a jsonb document we want to iterate * in functions iterate_json(b)_values. This information is presented in jsonb diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 8761597e12..bf0d4ef743 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -10879,6 +10879,14 @@ proargnames => '{max_data_alignment,database_block_size,blocks_per_segment,wal_block_size,bytes_per_wal_segment,max_identifier_length,max_index_columns,max_toast_chunk_size,large_object_chunk_size,float8_pass_by_value,data_page_checksum_version}', prosrc => 'pg_control_init' }, +# type subscripting support +{ oid => '6098', + descr => 'Jsonb subscripting logic', + proname => 'jsonb_subscript_handler', + prorettype => 'internal', + proargtypes => 'internal', + prosrc => 'jsonb_subscript_handler' }, + { oid => '6099', descr => 'Array subscripting logic', proname => 'array_subscript_handler', diff --git a/src/include/catalog/pg_type.dat b/src/include/catalog/pg_type.dat index 21489a02ae..fa7e1d22a5 100644 --- a/src/include/catalog/pg_type.dat +++ b/src/include/catalog/pg_type.dat @@ -456,7 +456,8 @@ { oid => '3802', array_type_oid => '3807', descr => 'Binary JSON', typname => 'jsonb', typlen => '-1', typbyval => 'f', typcategory => 'U', typinput => 'jsonb_in', typoutput => 'jsonb_out', typreceive => 'jsonb_recv', - typsend => 'jsonb_send', typalign => 'i', typstorage => 'x' }, + typsend => 'jsonb_send', typalign => 'i', typstorage => 'x', + typsubshandler => 'jsonb_subscript_handler' }, { oid => '4072', array_type_oid => '4073', descr => 'JSON path', typname => 'jsonpath', typlen => '-1', typbyval => 'f', typcategory => 'U', typinput => 'jsonpath_in', typoutput => 'jsonpath_out', diff --git a/src/include/utils/jsonb.h b/src/include/utils/jsonb.h index 5860011693..6e3b75d56a 100644 --- a/src/include/utils/jsonb.h +++ b/src/include/utils/jsonb.h @@ -392,6 +392,7 @@ extern JsonbValue *pushJsonbValue(JsonbParseState **pstate, extern JsonbIterator *JsonbIteratorInit(JsonbContainer *container); extern JsonbIteratorToken JsonbIteratorNext(JsonbIterator **it, JsonbValue *val, bool skipNested); +extern JsonbValue *JsonbToJsonbValue(Jsonb *jsonb); extern Jsonb *JsonbValueToJsonb(JsonbValue *val); extern bool JsonbDeepContains(JsonbIterator **val, JsonbIterator **mContained); @@ -407,5 +408,6 @@ extern char *JsonbToCStringIndent(StringInfo out, JsonbContainer *in, extern bool JsonbExtractScalar(JsonbContainer *jbc, JsonbValue *res); extern const char *JsonbTypeName(JsonbValue *jb); +extern JsonbValue *to_jsonb_worker(Datum source, Oid source_type, bool is_null); #endif /* __JSONB_H__ */ diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out index a70cd0b7c1..04a146a7d0 100644 --- a/src/test/regress/expected/jsonb.out +++ b/src/test/regress/expected/jsonb.out @@ -4567,7 +4567,7 @@ select jsonb_set_lax('{"a":1,"b":2}', '{b}', null, null_value_treatment => 'use_ {"a": 1, "b": null} (1 row) -\pset null +\pset null '' -- jsonb_insert select jsonb_insert('{"a": [0,1,2]}', '{a, 1}', '"new_value"'); jsonb_insert @@ -4697,6 +4697,237 @@ HINT: Try using the function jsonb_set to replace key value. select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"', true); ERROR: cannot replace existing key HINT: Try using the function jsonb_set to replace key value. +-- jsonb subscript +select ('123'::jsonb)['a']; + jsonb +------- + +(1 row) + +select ('123'::jsonb)[0]; + jsonb +------- + +(1 row) + +select ('123'::jsonb)[NULL]; + jsonb +------- + +(1 row) + +select ('{"a": 1}'::jsonb)['a']; + jsonb +------- + 1 +(1 row) + +select ('{"a": 1}'::jsonb)[0]; + jsonb +------- + +(1 row) + +select ('{"a": 1}'::jsonb)['not_exist']; + jsonb +------- + +(1 row) + +select ('{"a": 1}'::jsonb)[NULL]; + jsonb +------- + +(1 row) + +select ('[1, "2", null]'::jsonb)['a']; + jsonb +------- + +(1 row) + +select ('[1, "2", null]'::jsonb)[0]; + jsonb +------- + 1 +(1 row) + +select ('[1, "2", null]'::jsonb)['1']; + jsonb +------- + "2" +(1 row) + +select ('[1, "2", null]'::jsonb)[1.0]; + jsonb +------- + +(1 row) + +select ('[1, "2", null]'::jsonb)[2]; + jsonb +------- + null +(1 row) + +select ('[1, "2", null]'::jsonb)[3]; + jsonb +------- + +(1 row) + +select ('[1, "2", null]'::jsonb)[-2]; + jsonb +------- + "2" +(1 row) + +select ('[1, "2", null]'::jsonb)[1]['a']; + jsonb +------- + +(1 row) + +select ('[1, "2", null]'::jsonb)[1][0]; + jsonb +------- + +(1 row) + +select ('{"a": 1, "b": "c", "d": [1, 2, 3]}'::jsonb)['b']; + jsonb +------- + "c" +(1 row) + +select ('{"a": 1, "b": "c", "d": [1, 2, 3]}'::jsonb)['d']; + jsonb +----------- + [1, 2, 3] +(1 row) + +select ('{"a": 1, "b": "c", "d": [1, 2, 3]}'::jsonb)['d'][1]; + jsonb +------- + 2 +(1 row) + +select ('{"a": 1, "b": "c", "d": [1, 2, 3]}'::jsonb)['d']['a']; + jsonb +------- + +(1 row) + +select ('{"a": {"a1": {"a2": "aaa"}}, "b": "bbb", "c": "ccc"}'::jsonb)['a']['a1']; + jsonb +--------------- + {"a2": "aaa"} +(1 row) + +select ('{"a": {"a1": {"a2": "aaa"}}, "b": "bbb", "c": "ccc"}'::jsonb)['a']['a1']['a2']; + jsonb +------- + "aaa" +(1 row) + +select ('{"a": {"a1": {"a2": "aaa"}}, "b": "bbb", "c": "ccc"}'::jsonb)['a']['a1']['a2']['a3']; + jsonb +------- + +(1 row) + +select ('{"a": ["a1", {"b1": ["aaa", "bbb", "ccc"]}], "b": "bb"}'::jsonb)['a'][1]['b1']; + jsonb +----------------------- + ["aaa", "bbb", "ccc"] +(1 row) + +select ('{"a": ["a1", {"b1": ["aaa", "bbb", "ccc"]}], "b": "bb"}'::jsonb)['a'][1]['b1'][2]; + jsonb +------- + "ccc" +(1 row) + +create TEMP TABLE test_jsonb_subscript ( + id int, + test_json jsonb +); +insert into test_jsonb_subscript values +(1, '{}'), -- empty jsonb +(2, '{"key": "value"}'); -- jsonb with data +-- update empty jsonb +update test_jsonb_subscript set test_json['a'] = 1 where id = 1; +select * from test_jsonb_subscript; + id | test_json +----+------------------ + 2 | {"key": "value"} + 1 | {"a": 1} +(2 rows) + +-- update jsonb with some data +update test_jsonb_subscript set test_json['a'] = 1 where id = 2; +select * from test_jsonb_subscript; + id | test_json +----+-------------------------- + 1 | {"a": 1} + 2 | {"a": 1, "key": "value"} +(2 rows) + +-- replace jsonb +update test_jsonb_subscript set test_json['a'] = 'test'; +select * from test_jsonb_subscript; + id | test_json +----+------------------------------- + 1 | {"a": "test"} + 2 | {"a": "test", "key": "value"} +(2 rows) + +-- replace by object +update test_jsonb_subscript set test_json['a'] = '{"b": 1}'::jsonb; +select * from test_jsonb_subscript; + id | test_json +----+--------------------------------- + 1 | {"a": {"b": 1}} + 2 | {"a": {"b": 1}, "key": "value"} +(2 rows) + +-- replace by array +update test_jsonb_subscript set test_json['a'] = '[1, 2, 3]'::jsonb; +select * from test_jsonb_subscript; + id | test_json +----+---------------------------------- + 1 | {"a": [1, 2, 3]} + 2 | {"a": [1, 2, 3], "key": "value"} +(2 rows) + +-- use jsonb subscription in where clause +select * from test_jsonb_subscript where test_json['key'] = '"value"'; + id | test_json +----+---------------------------------- + 2 | {"a": [1, 2, 3], "key": "value"} +(1 row) + +select * from test_jsonb_subscript where test_json['key_doesnt_exists'] = '"value"'; + id | test_json +----+----------- +(0 rows) + +select * from test_jsonb_subscript where test_json['key'] = '"wrong_value"'; + id | test_json +----+----------- +(0 rows) + +-- NULL +update test_jsonb_subscript set test_json[NULL] = 1; +ERROR: subscript in assignment must not be null +update test_jsonb_subscript set test_json['another_key'] = NULL; +select * from test_jsonb_subscript; + id | test_json +----+------------------------------------------------------- + 1 | {"a": [1, 2, 3], "another_key": null} + 2 | {"a": [1, 2, 3], "key": "value", "another_key": null} +(2 rows) + -- jsonb to tsvector select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb); to_tsvector diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql index 3e2b8f66df..12541e7e50 100644 --- a/src/test/regress/sql/jsonb.sql +++ b/src/test/regress/sql/jsonb.sql @@ -1172,7 +1172,7 @@ select jsonb_set_lax('{"a":1,"b":2}', '{b}', null, null_value_treatment => 'retu select jsonb_set_lax('{"a":1,"b":2}', '{b}', null, null_value_treatment => 'delete_key') as delete_key; select jsonb_set_lax('{"a":1,"b":2}', '{b}', null, null_value_treatment => 'use_json_null') as use_json_null; -\pset null +\pset null '' -- jsonb_insert select jsonb_insert('{"a": [0,1,2]}', '{a, 1}', '"new_value"'); @@ -1203,6 +1203,72 @@ select jsonb_insert('{"a": {"b": "value"}}', '{a, c}', '"new_value"', true); select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"'); select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"', true); +-- jsonb subscript +select ('123'::jsonb)['a']; +select ('123'::jsonb)[0]; +select ('123'::jsonb)[NULL]; +select ('{"a": 1}'::jsonb)['a']; +select ('{"a": 1}'::jsonb)[0]; +select ('{"a": 1}'::jsonb)['not_exist']; +select ('{"a": 1}'::jsonb)[NULL]; +select ('[1, "2", null]'::jsonb)['a']; +select ('[1, "2", null]'::jsonb)[0]; +select ('[1, "2", null]'::jsonb)['1']; +select ('[1, "2", null]'::jsonb)[1.0]; +select ('[1, "2", null]'::jsonb)[2]; +select ('[1, "2", null]'::jsonb)[3]; +select ('[1, "2", null]'::jsonb)[-2]; +select ('[1, "2", null]'::jsonb)[1]['a']; +select ('[1, "2", null]'::jsonb)[1][0]; +select ('{"a": 1, "b": "c", "d": [1, 2, 3]}'::jsonb)['b']; +select ('{"a": 1, "b": "c", "d": [1, 2, 3]}'::jsonb)['d']; +select ('{"a": 1, "b": "c", "d": [1, 2, 3]}'::jsonb)['d'][1]; +select ('{"a": 1, "b": "c", "d": [1, 2, 3]}'::jsonb)['d']['a']; +select ('{"a": {"a1": {"a2": "aaa"}}, "b": "bbb", "c": "ccc"}'::jsonb)['a']['a1']; +select ('{"a": {"a1": {"a2": "aaa"}}, "b": "bbb", "c": "ccc"}'::jsonb)['a']['a1']['a2']; +select ('{"a": {"a1": {"a2": "aaa"}}, "b": "bbb", "c": "ccc"}'::jsonb)['a']['a1']['a2']['a3']; +select ('{"a": ["a1", {"b1": ["aaa", "bbb", "ccc"]}], "b": "bb"}'::jsonb)['a'][1]['b1']; +select ('{"a": ["a1", {"b1": ["aaa", "bbb", "ccc"]}], "b": "bb"}'::jsonb)['a'][1]['b1'][2]; + +create TEMP TABLE test_jsonb_subscript ( + id int, + test_json jsonb +); + +insert into test_jsonb_subscript values +(1, '{}'), -- empty jsonb +(2, '{"key": "value"}'); -- jsonb with data + +-- update empty jsonb +update test_jsonb_subscript set test_json['a'] = 1 where id = 1; +select * from test_jsonb_subscript; + +-- update jsonb with some data +update test_jsonb_subscript set test_json['a'] = 1 where id = 2; +select * from test_jsonb_subscript; + +-- replace jsonb +update test_jsonb_subscript set test_json['a'] = 'test'; +select * from test_jsonb_subscript; + +-- replace by object +update test_jsonb_subscript set test_json['a'] = '{"b": 1}'::jsonb; +select * from test_jsonb_subscript; + +-- replace by array +update test_jsonb_subscript set test_json['a'] = '[1, 2, 3]'::jsonb; +select * from test_jsonb_subscript; + +-- use jsonb subscription in where clause +select * from test_jsonb_subscript where test_json['key'] = '"value"'; +select * from test_jsonb_subscript where test_json['key_doesnt_exists'] = '"value"'; +select * from test_jsonb_subscript where test_json['key'] = '"wrong_value"'; + +-- NULL +update test_jsonb_subscript set test_json[NULL] = 1; +update test_jsonb_subscript set test_json['another_key'] = NULL; +select * from test_jsonb_subscript; + -- jsonb to tsvector select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb); -- 2.21.0
>From da8123e0c16b195a4cc1d82b4dd3628b11bebe17 Mon Sep 17 00:00:00 2001 From: erthalion <9erthali...@gmail.com> Date: Fri, 1 Feb 2019 11:47:37 +0100 Subject: [PATCH v33 4/5] Subscripting documentation Supporting documentation for generalized subscripting. It includes the description of a new field in pg_type, the new section for jsonb documentation about subscripting feature on this data type, and also the tutorial about how to write subscripting operator for a custom data type. Reviewed-by: Tom Lane, Arthur Zakirov, Oleksandr Shulgin --- doc/src/sgml/catalogs.sgml | 8 ++ doc/src/sgml/extend.sgml | 6 + doc/src/sgml/filelist.sgml | 1 + doc/src/sgml/json.sgml | 39 ++++++ doc/src/sgml/ref/create_type.sgml | 33 ++++- doc/src/sgml/xsubscripting.sgml | 111 +++++++++++++++++ src/tutorial/Makefile | 4 +- src/tutorial/subscripting.c | 201 ++++++++++++++++++++++++++++++ src/tutorial/subscripting.source | 71 +++++++++++ 9 files changed, 470 insertions(+), 4 deletions(-) create mode 100644 doc/src/sgml/xsubscripting.sgml create mode 100644 src/tutorial/subscripting.c create mode 100644 src/tutorial/subscripting.source diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 048ff284f7..b17e9ee382 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -8971,6 +8971,14 @@ SCRAM-SHA-256$<replaceable><iteration count></replaceable>:<replaceable>&l </para></entry> </row> + <row> + <entry><structfield>typsubshandler</structfield></entry> + <entry><type>regproc</type></entry> + <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry> + <entry>Custom subscripting function with type-specific logic for parsing + and validation, or 0 if this type doesn't support subscripting.</entry> + </row> + <row> <entry role="catalog_table_entry"><para role="column_definition"> <structfield>typdefault</structfield> <type>text</type> diff --git a/doc/src/sgml/extend.sgml b/doc/src/sgml/extend.sgml index 890ff97b7a..65cf5706b6 100644 --- a/doc/src/sgml/extend.sgml +++ b/doc/src/sgml/extend.sgml @@ -33,6 +33,11 @@ operators (starting in <xref linkend="xoper"/>) </para> </listitem> + <listitem> + <para> + subscripting procedure (starting in <xref linkend="xsubscripting"/>) + </para> + </listitem> <listitem> <para> operator classes for indexes (starting in <xref linkend="xindex"/>) @@ -481,6 +486,7 @@ RETURNS anycompatible AS ... &xaggr; &xtypes; &xoper; + &xsubscripting; &xindex; diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml index 64b5da0070..4c3fd37fc1 100644 --- a/doc/src/sgml/filelist.sgml +++ b/doc/src/sgml/filelist.sgml @@ -69,6 +69,7 @@ <!ENTITY xplang SYSTEM "xplang.sgml"> <!ENTITY xoper SYSTEM "xoper.sgml"> <!ENTITY xtypes SYSTEM "xtypes.sgml"> +<!ENTITY xsubscripting SYSTEM "xsubscripting.sgml"> <!ENTITY plperl SYSTEM "plperl.sgml"> <!ENTITY plpython SYSTEM "plpython.sgml"> <!ENTITY plsql SYSTEM "plpgsql.sgml"> diff --git a/doc/src/sgml/json.sgml b/doc/src/sgml/json.sgml index c0a6554d4d..5c538dca05 100644 --- a/doc/src/sgml/json.sgml +++ b/doc/src/sgml/json.sgml @@ -602,6 +602,45 @@ SELECT jdoc->'guid', jdoc->'name' FROM api WHERE jdoc @> '{"tags": ["qu </para> </sect2> + <sect2 id="jsonb-subscripting"> + <title><type>jsonb</type> Subscripting</title> + <para> + <type>jsonb</type> data type supports array-style subscripting expressions + to extract or update particular elements. It's possible to use multiple + subscripting expressions to extract nested values. In this case, a chain of + subscripting expressions follows the same rules as the + <literal>path</literal> argument in <literal>jsonb_set</literal> function, + e.g. in case of arrays it is a 0-based operation or that negative integers + that appear in <literal>path</literal> count from the end of JSON arrays. + The result of subscripting expressions is always jsonb data type. An + example of subscripting syntax: +<programlisting> +-- Extract value by key +SELECT ('{"a": 1}'::jsonb)['a']; + +-- Extract nested value by key path +SELECT ('{"a": {"b": {"c": 1}}}'::jsonb)['a']['b']['c']; + +-- Extract element by index +SELECT ('[1, "2", null]'::jsonb)[1]; + +-- Update value by key +UPDATE table_name SET jsonb_field['key'] = 1; + +-- Select records using where clause with subscripting. Since the result of +-- subscripting is jsonb and we basically want to compare two jsonb objects, we +-- need to put the value in double quotes to be able to convert it to jsonb. +SELECT * FROM table_name WHERE jsonb_field['key'] = '"value"'; +</programlisting> + + There is no special indexing support for such kind of expressions, but you + can always create a functional index that includes it +<programlisting> +CREATE INDEX idx ON table_name ((jsonb_field['key'])); +</programlisting> + </para> + </sect2> + <sect2> <title>Transforms</title> diff --git a/doc/src/sgml/ref/create_type.sgml b/doc/src/sgml/ref/create_type.sgml index 111f8e65d2..a34df4d247 100644 --- a/doc/src/sgml/ref/create_type.sgml +++ b/doc/src/sgml/ref/create_type.sgml @@ -54,6 +54,7 @@ CREATE TYPE <replaceable class="parameter">name</replaceable> ( [ , ELEMENT = <replaceable class="parameter">element</replaceable> ] [ , DELIMITER = <replaceable class="parameter">delimiter</replaceable> ] [ , COLLATABLE = <replaceable class="parameter">collatable</replaceable> ] + [ , SUBSCRIPTING_HANDLER = <replaceable class="parameter">subscripting_handler_function</replaceable> ] ) CREATE TYPE <replaceable class="parameter">name</replaceable> @@ -196,8 +197,9 @@ CREATE TYPE <replaceable class="parameter">name</replaceable> <replaceable class="parameter">receive_function</replaceable>, <replaceable class="parameter">send_function</replaceable>, <replaceable class="parameter">type_modifier_input_function</replaceable>, - <replaceable class="parameter">type_modifier_output_function</replaceable> and - <replaceable class="parameter">analyze_function</replaceable> + <replaceable class="parameter">type_modifier_output_function</replaceable>, + <replaceable class="parameter">analyze_function</replaceable>, + <replaceable class="parameter">subscripting_handler_function</replaceable> are optional. Generally these functions have to be coded in C or another low-level language. </para> @@ -454,6 +456,22 @@ CREATE TYPE <replaceable class="parameter">name</replaceable> make use of the collation information; this does not happen automatically merely by marking the type collatable. </para> + + <para> + The optional + <replaceable class="parameter">subscripting_handler_function</replaceable> + contains type-specific logic for subscripting of the data type. + By default, there is no such function provided, which means that the data + type doesn't support subscripting. The subscripting function must be + declared to take a single argument of type <type>internal</type>, and return + a <type>internal</type> result. There are two examples of implementation for + subscripting functions in case of array + (<replaceable class="parameter">array_subscripting_handler</replaceable>) + and jsonb + (<replaceable class="parameter">jsonb_subscripting_handler</replaceable>) + types in <filename>src/backend/utils/adt/arrayfuncs.c</filename> and + <filename>src/backend/utils/adt/jsonfuncs.c</filename>, respectively. + </para> </refsect2> <refsect2> @@ -769,6 +787,17 @@ CREATE TYPE <replaceable class="parameter">name</replaceable> </para> </listitem> </varlistentry> + + <varlistentry> + <term><replaceable class="parameter">subscripting_handler_function</replaceable></term> + <listitem> + <para> + The name of a function that returns list of type-specific callback functions to + support subscripting logic for the data type. + </para> + </listitem> + </varlistentry> + </variablelist> </refsect1> diff --git a/doc/src/sgml/xsubscripting.sgml b/doc/src/sgml/xsubscripting.sgml new file mode 100644 index 0000000000..7224e81fa2 --- /dev/null +++ b/doc/src/sgml/xsubscripting.sgml @@ -0,0 +1,111 @@ +<!-- doc/src/sgml/xsubscripting.sgml --> + + <sect1 id="xsubscripting"> + <title>User-defined subscripting procedure</title> + + <indexterm zone="xsubscripting"> + <primary>custom subscripting</primary> + </indexterm> + <para> + When you define a new base type, you can also specify a custom procedure to + handle subscripting expressions. It must contain logic for verification and + evaluation of this expression, i.e. fetching or updating some data in this + data type. For instance: +</para> +<programlisting><![CDATA[ +typedef struct Custom +{ + int first; + int second; +} Custom; + +PG_FUNCTION_INFO_V1(custom_subscripting_handler); + +Datum +custom_subscripting_handler(PG_FUNCTION_ARGS) +{ + SubscriptRoutines *sbsroutines = (SubscriptRoutines *) + palloc(sizeof(SubscriptRoutines)); + sbsroutines->prepare = custom_subscript_prepare; + sbsroutines->validate = custom_subscript_validate; + sbsroutines->fetch = custom_subscript_fetch; + sbsroutines->assign = custom_subscript_assign; + + PG_RETURN_POINTER(sbsroutines); +} + +SubscriptingRef * +custom_subscript_prepare(bool isAssignment, SubscriptingRef *sbsref) +{ + sbsref->refelemtype = someType; + sbsref->refassgntype = someType; + + return sbsref; +} + +SubscriptingRef * +custom_subscript_validate(bool isAssignment, SubscriptingRef *sbsref, + ParseState *pstate) +{ + // some validation and coercion logic + + return sbsref; +} + +Datum +custom_subscript_assign(Datum containerSource, SubscriptingRefState *sbstate) +{ + // Some assignment logic + + return newContainer; +} + +Datum +custom_subscript_fetch(Datum containerSource, SubscriptingRefState *sbstate) +{ + // Some fetch logic based on sbstate +}]]> +</programlisting> + +<para> + Then you can define a subscripting procedure and a custom data type: +</para> +<programlisting> +CREATE FUNCTION custom_subscripting_handler(internal) + RETURNS internal + AS '<replaceable>filename</replaceable>' + LANGUAGE C IMMUTABLE STRICT; + +CREATE TYPE custom ( + internallength = 4, + input = custom_in, + output = custom_out, + subscripting_handler = custom_subscripting_handler, +); +</programlisting> + +<para> + and use it as usual: +</para> +<programlisting> +CREATE TABLE test_subscripting ( + data custom +); + +INSERT INTO test_subscripting VALUES ('(1, 2)'); + +SELECT data[0] from test_subscripting; + +UPDATE test_subscripting SET data[1] = 3; +</programlisting> + + + <para> + The examples of custom subscripting implementation can be found in + <filename>subscripting.sql</filename> and <filename>subscripting.c</filename> + in the <filename>src/tutorial</filename> directory of the source distribution. + See the <filename>README</filename> file in that directory for instructions + about running the examples. + </para> + +</sect1> diff --git a/src/tutorial/Makefile b/src/tutorial/Makefile index 16dc390f71..0ead60c2d4 100644 --- a/src/tutorial/Makefile +++ b/src/tutorial/Makefile @@ -13,8 +13,8 @@ # #------------------------------------------------------------------------- -MODULES = complex funcs -DATA_built = advanced.sql basics.sql complex.sql funcs.sql syscat.sql +MODULES = complex funcs subscripting +DATA_built = advanced.sql basics.sql complex.sql funcs.sql syscat.sql subscripting.sql ifdef NO_PGXS subdir = src/tutorial diff --git a/src/tutorial/subscripting.c b/src/tutorial/subscripting.c new file mode 100644 index 0000000000..1eb8c45652 --- /dev/null +++ b/src/tutorial/subscripting.c @@ -0,0 +1,201 @@ +/* + * src/tutorial/subscripting.c + * + ****************************************************************************** + This file contains routines that can be bound to a Postgres backend and + called by the backend in the process of processing queries. The calling + format for these routines is dictated by Postgres architecture. +******************************************************************************/ + +#include "postgres.h" + +#include "catalog/pg_type.h" +#include "executor/executor.h" +#include "executor/execExpr.h" +#include "nodes/nodeFuncs.h" +#include "parser/parse_coerce.h" +#include "utils/builtins.h" +#include "utils/fmgrprotos.h" + +PG_MODULE_MAGIC; + +typedef struct Custom +{ + int first; + int second; +} Custom; + +SubscriptingRef * custom_subscript_prepare(bool isAssignment, SubscriptingRef *sbsref); +SubscriptingRef * custom_subscript_validate(bool isAssignment, SubscriptingRef *sbsref, + ParseState *pstate); +Datum custom_subscript_fetch(Datum containerSource, SubscriptingRefState *sbstate); +Datum custom_subscript_assign(Datum containerSource, SubscriptingRefState *sbstate); + +PG_FUNCTION_INFO_V1(custom_in); +PG_FUNCTION_INFO_V1(custom_out); +PG_FUNCTION_INFO_V1(custom_subscripting_handler); + +/***************************************************************************** + * Input/Output functions + *****************************************************************************/ + +Datum +custom_in(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); + int firstValue, + secondValue; + Custom *result; + + if (sscanf(str, " ( %d , %d )", &firstValue, &secondValue) != 2) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for complex: \"%s\"", + str))); + + + result = (Custom *) palloc(sizeof(Custom)); + result->first = firstValue; + result->second = secondValue; + PG_RETURN_POINTER(result); +} + +Datum +custom_out(PG_FUNCTION_ARGS) +{ + Custom *custom = (Custom *) PG_GETARG_POINTER(0); + char *result; + + result = psprintf("(%d, %d)", custom->first, custom->second); + PG_RETURN_CSTRING(result); +} + +/***************************************************************************** + * Custom subscripting logic functions + *****************************************************************************/ + +Datum +custom_subscripting_handler(PG_FUNCTION_ARGS) +{ + SubscriptRoutines *sbsroutines = (SubscriptRoutines *) + palloc(sizeof(SubscriptRoutines)); + + sbsroutines->prepare = custom_subscript_prepare; + sbsroutines->validate = custom_subscript_validate; + sbsroutines->fetch = custom_subscript_fetch; + sbsroutines->assign = custom_subscript_assign; + + PG_RETURN_POINTER(sbsroutines); +} + +SubscriptingRef * +custom_subscript_prepare(bool isAssignment, SubscriptingRef *sbsref) +{ + sbsref->refelemtype = INT4OID; + sbsref->refassgntype = INT4OID; + return sbsref; +} + +SubscriptingRef * +custom_subscript_validate(bool isAssignment, SubscriptingRef *sbsref, + ParseState *pstate) +{ + List *upperIndexpr = NIL; + ListCell *l; + + if (sbsref->reflowerindexpr != NIL) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("custom subscript does not support slices"), + parser_errposition(pstate, exprLocation( + ((Node *)lfirst(sbsref->reflowerindexpr->head)))))); + + foreach(l, sbsref->refupperindexpr) + { + Node *subexpr = (Node *) lfirst(l); + + if (subexpr == NULL) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("custom subscript does not support slices"), + parser_errposition(pstate, exprLocation( + ((Node *) lfirst(sbsref->refupperindexpr->head)))))); + + subexpr = coerce_to_target_type(pstate, + subexpr, exprType(subexpr), + INT4OID, -1, + COERCION_ASSIGNMENT, + COERCE_IMPLICIT_CAST, + -1); + if (subexpr == NULL) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("custom subscript must have integer type"), + parser_errposition(pstate, exprLocation(subexpr)))); + + upperIndexpr = lappend(upperIndexpr, subexpr); + + if (isAssignment) + { + Node *assignExpr = (Node *) sbsref->refassgnexpr; + Node *new_from; + + new_from = coerce_to_target_type(pstate, + assignExpr, exprType(assignExpr), + INT4OID, -1, + COERCION_ASSIGNMENT, + COERCE_IMPLICIT_CAST, + -1); + if (new_from == NULL) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("custom assignment requires int type"), + errhint("You will need to rewrite or cast the expression."), + parser_errposition(pstate, exprLocation(assignExpr)))); + sbsref->refassgnexpr = (Expr *)new_from; + } + } + + sbsref->refupperindexpr = upperIndexpr; + + return sbsref; +} + +Datum +custom_subscript_fetch(Datum containerSource, SubscriptingRefState *sbstate) +{ + Custom *container= (Custom *) containerSource; + int index; + + if (sbstate->numupper != 1) + ereport(ERROR, (errmsg("custom does not support nested subscripting"))); + + index = DatumGetInt32(sbstate->upperindex[0]); + + if (index == 1) + return (Datum) container->first; + else + return (Datum) container->second; +} + +Datum +custom_subscript_assign(Datum containerSource, SubscriptingRefState *sbstate) +{ + int index; + Custom *container = (Custom *) containerSource; + + if (sbstate->resnull) + return containerSource; + + if (sbstate->numupper != 1) + ereport(ERROR, (errmsg("custom does not support nested subscripting"))); + + index = DatumGetInt32(sbstate->upperindex[0]); + + if (index == 1) + container->first = DatumGetInt32(sbstate->replacevalue); + else + container->second = DatumGetInt32(sbstate->replacevalue); + + return (Datum) container; +} diff --git a/src/tutorial/subscripting.source b/src/tutorial/subscripting.source new file mode 100644 index 0000000000..837cf30612 --- /dev/null +++ b/src/tutorial/subscripting.source @@ -0,0 +1,71 @@ +--------------------------------------------------------------------------- +-- +-- subscripting.sql- +-- This file shows how to create a new subscripting procedure for +-- user-defined type. +-- +-- +-- Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group +-- Portions Copyright (c) 1994, Regents of the University of California +-- +-- src/tutorial/subscripting.source +-- +--------------------------------------------------------------------------- + +----------------------------- +-- Creating a new type: +-- We are going to create a new type called 'complex' which represents +-- complex numbers. +-- A user-defined type must have an input and an output function, and +-- optionally can have binary input and output functions. All of these +-- are usually user-defined C functions. +----------------------------- + +-- Assume the user defined functions are in /home/erthalion/programms/postgresql-master/src/tutorial/complex$DLSUFFIX +-- (we do not want to assume this is in the dynamic loader search path). +-- Look at $PWD/complex.c for the source. Note that we declare all of +-- them as STRICT, so we do not need to cope with NULL inputs in the +-- C code. We also mark them IMMUTABLE, since they always return the +-- same outputs given the same inputs. + +-- the input function 'complex_in' takes a null-terminated string (the +-- textual representation of the type) and turns it into the internal +-- (in memory) representation. You will get a message telling you 'complex' +-- does not exist yet but that's okay. + +CREATE FUNCTION custom_in(cstring) + RETURNS custom + AS '_OBJWD_/subscripting' + LANGUAGE C IMMUTABLE STRICT; + +-- the output function 'complex_out' takes the internal representation and +-- converts it into the textual representation. + +CREATE FUNCTION custom_out(custom) + RETURNS cstring + AS '_OBJWD_/subscripting' + LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION custom_subscripting_handler(internal) + RETURNS internal + AS '_OBJWD_/subscripting' + LANGUAGE C IMMUTABLE STRICT; + +CREATE TYPE custom ( + internallength = 8, + input = custom_in, + output = custom_out, + subscripting_handler = custom_subscripting_handler +); + +-- we can use it in a table + +CREATE TABLE test_subscripting ( + data custom +); + +INSERT INTO test_subscripting VALUES ('(1, 2)'); + +SELECT data[0] from test_subscripting; + +UPDATE test_subscripting SET data[1] = 3; -- 2.21.0
>From 153a23282a7427c33018b7ce54a3bf9a09aa7dc8 Mon Sep 17 00:00:00 2001 From: Dmitrii Dolgov <9erthali...@gmail.com> Date: Tue, 4 Aug 2020 17:41:42 +0200 Subject: [PATCH v33 5/5] Filling gaps in jsonb arrays Appending or prepending array elements on the specified position, gaps filled with nulls (similar to JavaScript behavior). Originally proposed by Nikita Glukhov based on polymorphic subscripting patch, but transformed into an independent change. --- src/backend/utils/adt/jsonfuncs.c | 43 +++++++++++++++++++++++++---- src/test/regress/expected/jsonb.out | 24 ++++++++++++++++ src/test/regress/sql/jsonb.sql | 13 +++++++++ 3 files changed, 74 insertions(+), 6 deletions(-) diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c index f46a2828b3..6bb81f4c40 100644 --- a/src/backend/utils/adt/jsonfuncs.c +++ b/src/backend/utils/adt/jsonfuncs.c @@ -47,6 +47,7 @@ #define JB_PATH_INSERT_AFTER 0x0010 #define JB_PATH_CREATE_OR_INSERT \ (JB_PATH_INSERT_BEFORE | JB_PATH_INSERT_AFTER | JB_PATH_CREATE) +#define JB_PATH_FILL_GAPS 0x0020 /* state for json_object_keys */ typedef struct OkeysState @@ -1492,10 +1493,8 @@ get_jsonb_path_all(FunctionCallInfo fcinfo, bool as_text) static Datum jsonb_get_element(Jsonb *jb, Datum *path, int npath, bool *isnull, bool as_text) { - Jsonb *res; JsonbContainer *container = &jb->root; JsonbValue *jbvp = NULL; - JsonbValue tv; int i; bool have_object = false, have_array = false; @@ -1657,13 +1656,24 @@ jsonb_set_element(Datum jsonbdatum, Datum *path, int path_len, it = JsonbIteratorInit(&jb->root); res = setPath(&it, path, path_nulls, path_len, &state, 0, - newval, JB_PATH_CREATE); + newval, JB_PATH_CREATE | JB_PATH_FILL_GAPS); pfree(path_nulls); PG_RETURN_JSONB_P(JsonbValueToJsonb(res)); } +static void +push_null_elements(JsonbParseState **ps, int num) +{ + JsonbValue null; + + null.type = jbvNull; + + while (num-- > 0) + pushJsonbValue(ps, WJB_ELEM, &null); +} + /* * Return the text representation of the given JsonbValue. */ @@ -4811,6 +4821,13 @@ IteratorConcat(JsonbIterator **it1, JsonbIterator **it2, * Bits JB_PATH_INSERT_BEFORE and JB_PATH_INSERT_AFTER in op_type * behave as JB_PATH_CREATE if new value is inserted in JsonbObject. * + * If JB_PATH_FILL_GAPS bit is set, this will change an assignment logic in + * case if target is an array. The assignment index will not be restricted by + * number of elements in the array, and if there are any empty slots between + * last element of the array and a new one they will be filled with nulls. If + * the index is negative, it still will be considered an an index from the end + * of the array. + * * All path elements before the last must already exist * whatever bits in op_type are set, or nothing is done. */ @@ -5012,15 +5029,21 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls, idx = nelems + idx; } - if (idx > 0 && idx > nelems) - idx = nelems; + /* + * Filling the gaps means there are no limits on the positive index are + * imposed, we can set any element. Otherwise limit the index by nelems. + */ + if (!(op_type & JB_PATH_FILL_GAPS)) + { + if (idx > 0 && idx > nelems) + idx = nelems; + } /* * if we're creating, and idx == INT_MIN, we prepend the new value to the * array also if the array is empty - in which case we don't really care * what the idx value is */ - if ((idx == INT_MIN || nelems == 0) && (level == path_len - 1) && (op_type & JB_PATH_CREATE_OR_INSERT)) { @@ -5086,10 +5109,18 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls, if ((op_type & JB_PATH_CREATE_OR_INSERT) && !done && level == path_len - 1 && i == nelems - 1) { + /* + * If asked to fill the gaps, idx could be bigger than nelems, + * so prepend the new element with nulls if that's the case. + */ + if (op_type & JB_PATH_FILL_GAPS && idx > nelems) + push_null_elements(st, idx - nelems); + (void) pushJsonbValue(st, WJB_ELEM, newval); } } } + } /* diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out index 04a146a7d0..4d52652688 100644 --- a/src/test/regress/expected/jsonb.out +++ b/src/test/regress/expected/jsonb.out @@ -4928,6 +4928,30 @@ select * from test_jsonb_subscript; 2 | {"a": [1, 2, 3], "key": "value", "another_key": null} (2 rows) +-- Fill the gaps logic +delete from test_jsonb_subscript; +insert into test_jsonb_subscript values (1, '[0]'); +update test_jsonb_subscript set test_json[5] = 1; +select * from test_jsonb_subscript; + id | test_json +----+-------------------------------- + 1 | [0, null, null, null, null, 1] +(1 row) + +update test_jsonb_subscript set test_json[-4] = 1; +select * from test_jsonb_subscript; + id | test_json +----+----------------------------- + 1 | [0, null, 1, null, null, 1] +(1 row) + +update test_jsonb_subscript set test_json[-8] = 1; +select * from test_jsonb_subscript; + id | test_json +----+-------------------------------- + 1 | [1, 0, null, 1, null, null, 1] +(1 row) + -- jsonb to tsvector select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb); to_tsvector diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql index 12541e7e50..584f145bab 100644 --- a/src/test/regress/sql/jsonb.sql +++ b/src/test/regress/sql/jsonb.sql @@ -1269,6 +1269,19 @@ update test_jsonb_subscript set test_json[NULL] = 1; update test_jsonb_subscript set test_json['another_key'] = NULL; select * from test_jsonb_subscript; +-- Fill the gaps logic +delete from test_jsonb_subscript; +insert into test_jsonb_subscript values (1, '[0]'); + +update test_jsonb_subscript set test_json[5] = 1; +select * from test_jsonb_subscript; + +update test_jsonb_subscript set test_json[-4] = 1; +select * from test_jsonb_subscript; + +update test_jsonb_subscript set test_json[-8] = 1; +select * from test_jsonb_subscript; + -- jsonb to tsvector select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb); -- 2.21.0