Hi, Here is a updated version, the main changes are:
1. an shared_detoast_datum.org file which shows the latest desgin and pending items during discussion. 2. I removed the slot->pre_detoast_attrs totally. 3. handle some pg_detoast_datum_slice use case. 4. Some implementation improvement. commit 66c64c197a5dab97a563be5a291127e4c5d6841d (HEAD -> shared_detoast_value) Author: yizhi.fzh <yizhi....@alibaba-inc.com> Date: Sun Mar 3 13:48:25 2024 +0800 shared detoast datum See the overall design & alternative design & testing in shared_detoast_datum.org In the shared_detoast_datum.org, I added the alternative design part for the idea of TOAST cache. -- Best Regards Andy Fan
>From 66c64c197a5dab97a563be5a291127e4c5d6841d Mon Sep 17 00:00:00 2001 From: "yizhi.fzh" <yizhi....@alibaba-inc.com> Date: Sun, 3 Mar 2024 13:48:25 +0800 Subject: [PATCH v9 1/1] shared detoast datum See the overall design & alternative design & testing in shared_detoast_datum.org --- src/backend/access/common/detoast.c | 68 +- src/backend/access/common/toast_compression.c | 10 +- src/backend/executor/execExpr.c | 60 +- src/backend/executor/execExprInterp.c | 179 ++++++ src/backend/executor/execTuples.c | 127 ++++ src/backend/executor/execUtils.c | 2 + src/backend/executor/nodeHashjoin.c | 2 + src/backend/executor/nodeMergejoin.c | 2 + src/backend/executor/nodeNestloop.c | 1 + src/backend/executor/shared_detoast_datum.org | 203 ++++++ src/backend/jit/llvm/llvmjit_expr.c | 26 +- src/backend/jit/llvm/llvmjit_types.c | 1 + src/backend/optimizer/plan/createplan.c | 107 +++- src/backend/optimizer/plan/setrefs.c | 590 +++++++++++++++--- src/include/access/detoast.h | 3 + src/include/access/toast_compression.h | 4 +- src/include/executor/execExpr.h | 12 + src/include/executor/tuptable.h | 14 + src/include/nodes/execnodes.h | 14 + src/include/nodes/plannodes.h | 53 ++ src/tools/pgindent/typedefs.list | 2 + 21 files changed, 1342 insertions(+), 138 deletions(-) create mode 100644 src/backend/executor/shared_detoast_datum.org diff --git a/src/backend/access/common/detoast.c b/src/backend/access/common/detoast.c index 3547cdba56..acc9644689 100644 --- a/src/backend/access/common/detoast.c +++ b/src/backend/access/common/detoast.c @@ -22,11 +22,11 @@ #include "utils/expandeddatum.h" #include "utils/rel.h" -static struct varlena *toast_fetch_datum(struct varlena *attr); +static struct varlena *toast_fetch_datum(struct varlena *attr, MemoryContext ctx); static struct varlena *toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 slicelength); -static struct varlena *toast_decompress_datum(struct varlena *attr); +static struct varlena *toast_decompress_datum(struct varlena *attr, MemoryContext ctx); static struct varlena *toast_decompress_datum_slice(struct varlena *attr, int32 slicelength); /* ---------- @@ -42,7 +42,7 @@ static struct varlena *toast_decompress_datum_slice(struct varlena *attr, int32 * ---------- */ struct varlena * -detoast_external_attr(struct varlena *attr) +detoast_external_attr_ext(struct varlena *attr, MemoryContext ctx) { struct varlena *result; @@ -51,7 +51,7 @@ detoast_external_attr(struct varlena *attr) /* * This is an external stored plain value */ - result = toast_fetch_datum(attr); + result = toast_fetch_datum(attr, ctx); } else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) { @@ -68,13 +68,13 @@ detoast_external_attr(struct varlena *attr) /* recurse if value is still external in some other way */ if (VARATT_IS_EXTERNAL(attr)) - return detoast_external_attr(attr); + return detoast_external_attr_ext(attr, ctx); /* * Copy into the caller's memory context, in case caller tries to * pfree the result. */ - result = (struct varlena *) palloc(VARSIZE_ANY(attr)); + result = (struct varlena *) MemoryContextAlloc(ctx, VARSIZE_ANY(attr)); memcpy(result, attr, VARSIZE_ANY(attr)); } else if (VARATT_IS_EXTERNAL_EXPANDED(attr)) @@ -87,7 +87,7 @@ detoast_external_attr(struct varlena *attr) eoh = DatumGetEOHP(PointerGetDatum(attr)); resultsize = EOH_get_flat_size(eoh); - result = (struct varlena *) palloc(resultsize); + result = (struct varlena *) MemoryContextAlloc(ctx, resultsize); EOH_flatten_into(eoh, (void *) result, resultsize); } else @@ -101,32 +101,45 @@ detoast_external_attr(struct varlena *attr) return result; } +struct varlena * +detoast_external_attr(struct varlena *attr) +{ + return detoast_external_attr_ext(attr, CurrentMemoryContext); +} + /* ---------- - * detoast_attr - + * detoast_attr_ext - * * Public entry point to get back a toasted value from compression * or external storage. The result is always non-extended varlena form. * + * ctx: The memory context which the final value belongs to. + * * Note some callers assume that if the input is an EXTERNAL or COMPRESSED * datum, the result will be a pfree'able chunk. * ---------- */ -struct varlena * -detoast_attr(struct varlena *attr) + +extern struct varlena * +detoast_attr_ext(struct varlena *attr, MemoryContext ctx) { if (VARATT_IS_EXTERNAL_ONDISK(attr)) { /* * This is an externally stored datum --- fetch it back from there */ - attr = toast_fetch_datum(attr); + attr = toast_fetch_datum(attr, ctx); /* If it's compressed, decompress it */ if (VARATT_IS_COMPRESSED(attr)) { struct varlena *tmp = attr; - attr = toast_decompress_datum(tmp); + attr = toast_decompress_datum(tmp, ctx); + /* + * XXX: this pfree block us from using BumpContext directly + * we need some extra effort to make it happen at least. + */ pfree(tmp); } } @@ -144,14 +157,14 @@ detoast_attr(struct varlena *attr) Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr)); /* recurse in case value is still extended in some other way */ - attr = detoast_attr(attr); + attr = detoast_attr_ext(attr, ctx); /* if it isn't, we'd better copy it */ if (attr == (struct varlena *) redirect.pointer) { struct varlena *result; - result = (struct varlena *) palloc(VARSIZE_ANY(attr)); + result = (struct varlena *) MemoryContextAlloc(ctx, VARSIZE_ANY(attr)); memcpy(result, attr, VARSIZE_ANY(attr)); attr = result; } @@ -161,7 +174,7 @@ detoast_attr(struct varlena *attr) /* * This is an expanded-object pointer --- get flat format */ - attr = detoast_external_attr(attr); + attr = detoast_external_attr_ext(attr, ctx); /* flatteners are not allowed to produce compressed/short output */ Assert(!VARATT_IS_EXTENDED(attr)); } @@ -170,7 +183,7 @@ detoast_attr(struct varlena *attr) /* * This is a compressed value inside of the main tuple */ - attr = toast_decompress_datum(attr); + attr = toast_decompress_datum(attr, ctx); } else if (VARATT_IS_SHORT(attr)) { @@ -181,7 +194,7 @@ detoast_attr(struct varlena *attr) Size new_size = data_size + VARHDRSZ; struct varlena *new_attr; - new_attr = (struct varlena *) palloc(new_size); + new_attr = (struct varlena *) MemoryContextAlloc(ctx, new_size); SET_VARSIZE(new_attr, new_size); memcpy(VARDATA(new_attr), VARDATA_SHORT(attr), data_size); attr = new_attr; @@ -190,6 +203,11 @@ detoast_attr(struct varlena *attr) return attr; } +struct varlena * +detoast_attr(struct varlena *attr) +{ + return detoast_attr_ext(attr, CurrentMemoryContext); +} /* ---------- * detoast_attr_slice - @@ -262,7 +280,7 @@ detoast_attr_slice(struct varlena *attr, preslice = toast_fetch_datum_slice(attr, 0, max_size); } else - preslice = toast_fetch_datum(attr); + preslice = toast_fetch_datum(attr, CurrentMemoryContext); } else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) { @@ -294,7 +312,7 @@ detoast_attr_slice(struct varlena *attr, if (slicelimit >= 0) preslice = toast_decompress_datum_slice(tmp, slicelimit); else - preslice = toast_decompress_datum(tmp); + preslice = toast_decompress_datum(tmp, CurrentMemoryContext); if (tmp != attr) pfree(tmp); @@ -340,7 +358,7 @@ detoast_attr_slice(struct varlena *attr, * ---------- */ static struct varlena * -toast_fetch_datum(struct varlena *attr) +toast_fetch_datum(struct varlena *attr, MemoryContext ctx) { Relation toastrel; struct varlena *result; @@ -355,7 +373,7 @@ toast_fetch_datum(struct varlena *attr) attrsize = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer); - result = (struct varlena *) palloc(attrsize + VARHDRSZ); + result = (struct varlena *) MemoryContextAlloc(ctx, attrsize + VARHDRSZ); if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) SET_VARSIZE_COMPRESSED(result, attrsize + VARHDRSZ); @@ -468,7 +486,7 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, * Decompress a compressed version of a varlena datum */ static struct varlena * -toast_decompress_datum(struct varlena *attr) +toast_decompress_datum(struct varlena *attr, MemoryContext ctx) { ToastCompressionId cmid; @@ -482,9 +500,9 @@ toast_decompress_datum(struct varlena *attr) switch (cmid) { case TOAST_PGLZ_COMPRESSION_ID: - return pglz_decompress_datum(attr); + return pglz_decompress_datum(attr, ctx); case TOAST_LZ4_COMPRESSION_ID: - return lz4_decompress_datum(attr); + return lz4_decompress_datum(attr, ctx); default: elog(ERROR, "invalid compression method id %d", cmid); return NULL; /* keep compiler quiet */ @@ -515,7 +533,7 @@ toast_decompress_datum_slice(struct varlena *attr, int32 slicelength) * more than the data's true decompressed size. */ if ((uint32) slicelength >= TOAST_COMPRESS_EXTSIZE(attr)) - return toast_decompress_datum(attr); + return toast_decompress_datum(attr, CurrentMemoryContext); /* * Fetch the compression method id stored in the compression header and diff --git a/src/backend/access/common/toast_compression.c b/src/backend/access/common/toast_compression.c index 09d05d97c5..323cf013da 100644 --- a/src/backend/access/common/toast_compression.c +++ b/src/backend/access/common/toast_compression.c @@ -81,13 +81,13 @@ pglz_compress_datum(const struct varlena *value) * Decompress a varlena that was compressed using PGLZ. */ struct varlena * -pglz_decompress_datum(const struct varlena *value) +pglz_decompress_datum(const struct varlena *value, MemoryContext ctx) { struct varlena *result; int32 rawsize; /* allocate memory for the uncompressed data */ - result = (struct varlena *) palloc(VARDATA_COMPRESSED_GET_EXTSIZE(value) + VARHDRSZ); + result = (struct varlena *) MemoryContextAlloc(ctx, VARDATA_COMPRESSED_GET_EXTSIZE(value) + VARHDRSZ); /* decompress the data */ rawsize = pglz_decompress((char *) value + VARHDRSZ_COMPRESSED, @@ -181,7 +181,7 @@ lz4_compress_datum(const struct varlena *value) * Decompress a varlena that was compressed using LZ4. */ struct varlena * -lz4_decompress_datum(const struct varlena *value) +lz4_decompress_datum(const struct varlena *value, MemoryContext ctx) { #ifndef USE_LZ4 NO_LZ4_SUPPORT(); @@ -191,7 +191,7 @@ lz4_decompress_datum(const struct varlena *value) struct varlena *result; /* allocate memory for the uncompressed data */ - result = (struct varlena *) palloc(VARDATA_COMPRESSED_GET_EXTSIZE(value) + VARHDRSZ); + result = (struct varlena *) MemoryContextAlloc(ctx, VARDATA_COMPRESSED_GET_EXTSIZE(value) + VARHDRSZ); /* decompress the data */ rawsize = LZ4_decompress_safe((char *) value + VARHDRSZ_COMPRESSED, @@ -225,7 +225,7 @@ lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength) /* slice decompression not supported prior to 1.8.3 */ if (LZ4_versionNumber() < 10803) - return lz4_decompress_datum(value); + return lz4_decompress_datum(value, CurrentMemoryContext); /* allocate memory for the uncompressed data */ result = (struct varlena *) palloc(slicelength + VARHDRSZ); diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c index 3181b1136a..45c2c625b2 100644 --- a/src/backend/executor/execExpr.c +++ b/src/backend/executor/execExpr.c @@ -932,22 +932,76 @@ ExecInitExprRec(Expr *node, ExprState *state, } else { + int attnum; + Plan *plan = state->parent ? state->parent->plan : NULL; + /* regular user column */ scratch.d.var.attnum = variable->varattno - 1; scratch.d.var.vartype = variable->vartype; + attnum = scratch.d.var.attnum; + switch (variable->varno) { case INNER_VAR: - scratch.opcode = EEOP_INNER_VAR; + + if (is_join_plan(plan) && + bms_is_member(attnum, + ((JoinState *) state->parent)->inner_pre_detoast_attrs)) + { + scratch.opcode = EEOP_INNER_VAR_TOAST; +#ifdef DEBUG_PRE_DETOAST_DATUM + elog(INFO, + "EEOP_INNER_VAR_TOAST: flags = %d costs=%.2f..%.2f, attnum: %d", + state->flags, + plan->startup_cost, + plan->total_cost, + attnum); +#endif + } + else + { + scratch.opcode = EEOP_INNER_VAR; + } break; case OUTER_VAR: - scratch.opcode = EEOP_OUTER_VAR; + if (is_join_plan(plan) && + bms_is_member(attnum, + ((JoinState *) state->parent)->outer_pre_detoast_attrs)) + { + scratch.opcode = EEOP_OUTER_VAR_TOAST; +#ifdef DEBUG_PRE_DETOAST_DATUM + elog(INFO, + "EEOP_OUTER_VAR_TOAST: flags = %u costs=%.2f..%.2f, attnum: %d", + state->flags, + plan->startup_cost, + plan->total_cost, + attnum); +#endif + } + else + scratch.opcode = EEOP_OUTER_VAR; break; /* INDEX_VAR is handled by default case */ default: - scratch.opcode = EEOP_SCAN_VAR; + if (is_scan_plan(plan) && bms_is_member( + attnum, + ((ScanState *) state->parent)->scan_pre_detoast_attrs)) + { + scratch.opcode = EEOP_SCAN_VAR_TOAST; +#ifdef DEBUG_PRE_DETOAST_DATUM + elog(INFO, + "EEOP_SCAN_VAR_TOAST: flags = %u costs=%.2f..%.2f, scanId: %d, attnum: %d", + state->flags, + plan->startup_cost, + plan->total_cost, + ((Scan *) plan)->scanrelid, + attnum); +#endif + } + else + scratch.opcode = EEOP_SCAN_VAR; break; } } diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index 3f20f1dd31..7ebaca36bb 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -57,6 +57,7 @@ #include "postgres.h" #include "access/heaptoast.h" +#include "access/detoast.h" #include "catalog/pg_type.h" #include "commands/sequence.h" #include "executor/execExpr.h" @@ -158,6 +159,9 @@ static void ExecEvalRowNullInt(ExprState *state, ExprEvalStep *op, static Datum ExecJustInnerVar(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustOuterVar(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustScanVar(ExprState *state, ExprContext *econtext, bool *isnull); +static Datum ExecJustInnerVarToast(ExprState *state, ExprContext *econtext, bool *isnull); +static Datum ExecJustOuterVarToast(ExprState *state, ExprContext *econtext, bool *isnull); +static Datum ExecJustScanVarToast(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustAssignInnerVar(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustAssignOuterVar(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustAssignScanVar(ExprState *state, ExprContext *econtext, bool *isnull); @@ -166,6 +170,9 @@ static Datum ExecJustConst(ExprState *state, ExprContext *econtext, bool *isnull static Datum ExecJustInnerVarVirt(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustOuterVarVirt(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustScanVarVirt(ExprState *state, ExprContext *econtext, bool *isnull); +static Datum ExecJustInnerVarVirtToast(ExprState *state, ExprContext *econtext, bool *isnull); +static Datum ExecJustOuterVarVirtToast(ExprState *state, ExprContext *econtext, bool *isnull); +static Datum ExecJustScanVarVirtToast(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustAssignInnerVarVirt(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustAssignOuterVarVirt(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustAssignScanVarVirt(ExprState *state, ExprContext *econtext, bool *isnull); @@ -181,6 +188,42 @@ static pg_attribute_always_inline void ExecAggPlainTransByRef(AggState *aggstate AggStatePerGroup pergroup, ExprContext *aggcontext, int setno); +static inline void +ExecSlotDetoastDatum(TupleTableSlot *slot, int attnum) +{ + if (!slot->tts_isnull[attnum] && + VARATT_IS_EXTENDED(slot->tts_values[attnum])) + { + if (unlikely(slot->tts_data_mctx == NULL)) + slot->tts_data_mctx = GenerationContextCreate(slot->tts_mcxt, + "tts_value_ctx", + ALLOCSET_START_SMALL_SIZES); + slot->tts_values[attnum] = PointerGetDatum(detoast_attr_ext( + (struct varlena *) slot->tts_values[attnum], + /* save the detoast value to the given MemoryContext. */ + slot->tts_data_mctx)); + Assert(slot->tts_nvalid > attnum); + } +} + +/* JIT requires a non-static (and external?) function */ +void +ExecSlotDetoastDatumExternal(TupleTableSlot *slot, int attnum) +{ + return ExecSlotDetoastDatum(slot, attnum); +} + + +static inline void +ExecEvalToastVar(TupleTableSlot *slot, + ExprEvalStep *op, + int attnum) +{ + ExecSlotDetoastDatum(slot, attnum); + + *op->resvalue = slot->tts_values[attnum]; + *op->resnull = slot->tts_isnull[attnum]; +} /* * ScalarArrayOpExprHashEntry @@ -296,6 +339,24 @@ ExecReadyInterpretedExpr(ExprState *state) state->evalfunc_private = (void *) ExecJustScanVar; return; } + if (step0 == EEOP_INNER_FETCHSOME && + step1 == EEOP_INNER_VAR_TOAST) + { + state->evalfunc_private = (void *) ExecJustInnerVarToast; + return; + } + else if (step0 == EEOP_OUTER_FETCHSOME && + step1 == EEOP_OUTER_VAR_TOAST) + { + state->evalfunc_private = (void *) ExecJustOuterVarToast; + return; + } + else if (step0 == EEOP_SCAN_FETCHSOME && + step1 == EEOP_SCAN_VAR_TOAST) + { + state->evalfunc_private = (void *) ExecJustScanVarToast; + return; + } else if (step0 == EEOP_INNER_FETCHSOME && step1 == EEOP_ASSIGN_INNER_VAR) { @@ -346,6 +407,21 @@ ExecReadyInterpretedExpr(ExprState *state) state->evalfunc_private = (void *) ExecJustScanVarVirt; return; } + else if (step0 == EEOP_INNER_VAR_TOAST) + { + state->evalfunc_private = (void *) ExecJustInnerVarVirtToast; + return; + } + else if (step0 == EEOP_OUTER_VAR_TOAST) + { + state->evalfunc_private = (void *) ExecJustOuterVarVirtToast; + return; + } + else if (step0 == EEOP_SCAN_VAR_TOAST) + { + state->evalfunc_private = (void *) ExecJustScanVarVirtToast; + return; + } else if (step0 == EEOP_ASSIGN_INNER_VAR) { state->evalfunc_private = (void *) ExecJustAssignInnerVarVirt; @@ -413,6 +489,9 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull) &&CASE_EEOP_INNER_VAR, &&CASE_EEOP_OUTER_VAR, &&CASE_EEOP_SCAN_VAR, + &&CASE_EEOP_INNER_VAR_TOAST, + &&CASE_EEOP_OUTER_VAR_TOAST, + &&CASE_EEOP_SCAN_VAR_TOAST, &&CASE_EEOP_INNER_SYSVAR, &&CASE_EEOP_OUTER_SYSVAR, &&CASE_EEOP_SCAN_SYSVAR, @@ -597,6 +676,25 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull) Assert(attnum >= 0 && attnum < scanslot->tts_nvalid); *op->resvalue = scanslot->tts_values[attnum]; *op->resnull = scanslot->tts_isnull[attnum]; + EEO_NEXT(); + } + + EEO_CASE(EEOP_INNER_VAR_TOAST) + { + ExecEvalToastVar(innerslot, op, op->d.var.attnum); + EEO_NEXT(); + } + + EEO_CASE(EEOP_OUTER_VAR_TOAST) + { + ExecEvalToastVar(outerslot, op, op->d.var.attnum); + + EEO_NEXT(); + } + + EEO_CASE(EEOP_SCAN_VAR_TOAST) + { + ExecEvalToastVar(scanslot, op, op->d.var.attnum); EEO_NEXT(); } @@ -2137,6 +2235,42 @@ ExecJustScanVar(ExprState *state, ExprContext *econtext, bool *isnull) return ExecJustVarImpl(state, econtext->ecxt_scantuple, isnull); } +static pg_attribute_always_inline Datum +ExecJustVarImplToast(ExprState *state, TupleTableSlot *slot, bool *isnull) +{ + ExprEvalStep *op = &state->steps[1]; + int attnum = op->d.var.attnum; + + CheckOpSlotCompatibility(&state->steps[0], slot); + + slot_getattr(slot, attnum + 1, isnull); + + ExecSlotDetoastDatum(slot, attnum); + + return slot->tts_values[attnum]; +} + +/* Simple reference to inner Var */ +static Datum +ExecJustInnerVarToast(ExprState *state, ExprContext *econtext, bool *isnull) +{ + return ExecJustVarImplToast(state, econtext->ecxt_innertuple, isnull); +} + +/* Simple reference to outer Var */ +static Datum +ExecJustOuterVarToast(ExprState *state, ExprContext *econtext, bool *isnull) +{ + return ExecJustVarImplToast(state, econtext->ecxt_outertuple, isnull); +} + +/* Simple reference to scan Var */ +static Datum +ExecJustScanVarToast(ExprState *state, ExprContext *econtext, bool *isnull) +{ + return ExecJustVarImplToast(state, econtext->ecxt_scantuple, isnull); +} + /* implementation of ExecJustAssign(Inner|Outer|Scan)Var */ static pg_attribute_always_inline Datum ExecJustAssignVarImpl(ExprState *state, TupleTableSlot *inslot, bool *isnull) @@ -2275,6 +2409,51 @@ ExecJustScanVarVirt(ExprState *state, ExprContext *econtext, bool *isnull) return ExecJustVarVirtImpl(state, econtext->ecxt_scantuple, isnull); } +/* implementation of ExecJust(Inner|Outer|Scan)VarVirt */ +static pg_attribute_always_inline Datum +ExecJustVarVirtImplToast(ExprState *state, TupleTableSlot *slot, bool *isnull) +{ + ExprEvalStep *op = &state->steps[0]; + int attnum = op->d.var.attnum; + + /* + * As it is guaranteed that a virtual slot is used, there never is a need + * to perform tuple deforming (nor would it be possible). Therefore + * execExpr.c has not emitted an EEOP_*_FETCHSOME step. Verify, as much as + * possible, that that determination was accurate. + */ + Assert(TTS_IS_VIRTUAL(slot)); + Assert(TTS_FIXED(slot)); + Assert(attnum >= 0 && attnum < slot->tts_nvalid); + + *isnull = slot->tts_isnull[attnum]; + + ExecSlotDetoastDatum(slot, attnum); + + return slot->tts_values[attnum]; +} + +/* Like ExecJustInnerVar, optimized for virtual slots */ +static Datum +ExecJustInnerVarVirtToast(ExprState *state, ExprContext *econtext, bool *isnull) +{ + return ExecJustVarVirtImplToast(state, econtext->ecxt_innertuple, isnull); +} + +/* Like ExecJustOuterVar, optimized for virtual slots */ +static Datum +ExecJustOuterVarVirtToast(ExprState *state, ExprContext *econtext, bool *isnull) +{ + return ExecJustVarVirtImplToast(state, econtext->ecxt_outertuple, isnull); +} + +/* Like ExecJustScanVar, optimized for virtual slots */ +static Datum +ExecJustScanVarVirtToast(ExprState *state, ExprContext *econtext, bool *isnull) +{ + return ExecJustVarVirtImplToast(state, econtext->ecxt_scantuple, isnull); +} + /* implementation of ExecJustAssign(Inner|Outer|Scan)VarVirt */ static pg_attribute_always_inline Datum ExecJustAssignVarVirtImpl(ExprState *state, TupleTableSlot *inslot, bool *isnull) diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c index a7aa2ee02b..584f9f1fd1 100644 --- a/src/backend/executor/execTuples.c +++ b/src/backend/executor/execTuples.c @@ -79,6 +79,9 @@ static inline void tts_buffer_heap_store_tuple(TupleTableSlot *slot, bool transfer_pin); static void tts_heap_store_tuple(TupleTableSlot *slot, HeapTuple tuple, bool shouldFree); +static Bitmapset *cal_final_pre_detoast_attrs(Bitmapset *reference_attrs, + TupleDesc tupleDesc, + List *forbid_pre_detoast_vars); const TupleTableSlotOps TTSOpsVirtual; const TupleTableSlotOps TTSOpsHeapTuple; @@ -392,6 +395,12 @@ tts_heap_materialize(TupleTableSlot *slot) slot->tts_flags |= TTS_FLAG_SHOULDFREE; MemoryContextSwitchTo(oldContext); + + /* + * tts_values is treated invalidated since tts_nvalid will is set to 0, + * so let's free the pre-detoast datum. + */ + ExecFreePreDetoastDatum(slot); } static void @@ -449,6 +458,9 @@ tts_heap_store_tuple(TupleTableSlot *slot, HeapTuple tuple, bool shouldFree) tts_heap_clear(slot); + /* slot_nvalid = 0 */ + ExecFreePreDetoastDatum(slot); + slot->tts_nvalid = 0; hslot->tuple = tuple; hslot->off = 0; @@ -535,6 +547,7 @@ tts_minimal_materialize(TupleTableSlot *slot) oldContext = MemoryContextSwitchTo(slot->tts_mcxt); + /* * Have to deform from scratch, otherwise tts_values[] entries could point * into the non-materialized tuple (which might be gone when accessed). @@ -567,6 +580,9 @@ tts_minimal_materialize(TupleTableSlot *slot) mslot->minhdr.t_data = (HeapTupleHeader) ((char *) mslot->mintuple - MINIMAL_TUPLE_OFFSET); MemoryContextSwitchTo(oldContext); + + /* slot_nvalid = 0 */ + ExecFreePreDetoastDatum(slot); } static void @@ -626,6 +642,9 @@ tts_minimal_store_tuple(TupleTableSlot *slot, MinimalTuple mtup, bool shouldFree Assert(TTS_EMPTY(slot)); slot->tts_flags &= ~TTS_FLAG_EMPTY; + + /* tts_nvalid = 0 */ + ExecFreePreDetoastDatum(slot); slot->tts_nvalid = 0; mslot->off = 0; @@ -733,6 +752,10 @@ tts_buffer_heap_materialize(TupleTableSlot *slot) * into the non-materialized tuple (which might be gone when accessed). */ bslot->base.off = 0; + + /* slot_nvalid = 0 */ + ExecFreePreDetoastDatum(slot); + slot->tts_nvalid = 0; if (!bslot->base.tuple) @@ -870,6 +893,10 @@ tts_buffer_heap_store_tuple(TupleTableSlot *slot, HeapTuple tuple, } slot->tts_flags &= ~TTS_FLAG_EMPTY; + + /* tts_nvalid = 0 */ + ExecFreePreDetoastDatum(slot); + slot->tts_nvalid = 0; bslot->base.tuple = tuple; bslot->base.off = 0; @@ -1137,6 +1164,7 @@ MakeTupleTableSlot(TupleDesc tupleDesc, slot->tts_flags |= TTS_FLAG_FIXED; slot->tts_tupleDescriptor = tupleDesc; slot->tts_mcxt = CurrentMemoryContext; + slot->tts_data_mctx = NULL; slot->tts_nvalid = 0; if (tupleDesc != NULL) @@ -1215,6 +1243,8 @@ ExecResetTupleTable(List *tupleTable, /* tuple table */ if (slot->tts_isnull) pfree(slot->tts_isnull); } + if (slot->tts_data_mctx != NULL) + MemoryContextDelete(slot->tts_data_mctx); pfree(slot); } } @@ -1265,6 +1295,9 @@ ExecDropSingleTupleTableSlot(TupleTableSlot *slot) if (slot->tts_isnull) pfree(slot->tts_isnull); } + if (slot->tts_data_mctx != NULL) + MemoryContextDelete(slot->tts_data_mctx); + pfree(slot); } @@ -1810,12 +1843,26 @@ void ExecInitScanTupleSlot(EState *estate, ScanState *scanstate, TupleDesc tupledesc, const TupleTableSlotOps *tts_ops) { + Scan *splan = (Scan *) scanstate->ps.plan; + scanstate->ss_ScanTupleSlot = ExecAllocTableSlot(&estate->es_tupleTable, tupledesc, tts_ops); scanstate->ps.scandesc = tupledesc; scanstate->ps.scanopsfixed = tupledesc != NULL; scanstate->ps.scanops = tts_ops; scanstate->ps.scanopsset = true; + + if (is_scan_plan((Plan *) splan)) + { + /* + * We may run detoast in Qual or Projection, but all of them happen at + * the ss_ScanTupleSlot rather than ps_ResultTupleSlot. So we can only + * take care of the ss_ScanTupleSlot. + */ + scanstate->scan_pre_detoast_attrs = cal_final_pre_detoast_attrs(splan->reference_attrs, + tupledesc, + splan->plan.forbid_pre_detoast_vars); + } } /* ---------------- @@ -2336,3 +2383,83 @@ end_tup_output(TupOutputState *tstate) ExecDropSingleTupleTableSlot(tstate->slot); pfree(tstate); } + +/* + * cal_final_pre_detoast_attrs + * Calculate the final attributes which pre-detoast be helpful. + * + * reference_attrs: the attributes which will be detoast at this plan level. + * due to the implementation issue, some non-toast attribute may be included + * which should be filtered out with tupleDesc. + * + * forbid_pre_detoast_vars: the vars which should not be pre-detoast as the + * small_tlist reason. + */ +static Bitmapset * +cal_final_pre_detoast_attrs(Bitmapset *reference_attrs, + TupleDesc tupleDesc, + List *forbid_pre_detoast_vars) +{ + Bitmapset *final = NULL, + *toast_attrs = NULL, + *forbid_pre_detoast_attrs = NULL; + + int i; + ListCell *lc; + + if (bms_is_empty(reference_attrs)) + return NULL; + + /* + * there is no exact data type in create_plan or set_plan_refs stage, so + * reference_attrs may have some attribute which is not toast attrs at + * all, which should be removed. + */ + for (i = 0; i < tupleDesc->natts; i++) + { + Form_pg_attribute attr = TupleDescAttr(tupleDesc, i); + + if (attr->attlen == -1 && attr->attstorage != TYPSTORAGE_PLAIN) + toast_attrs = bms_add_member(toast_attrs, attr->attnum - 1); + } + + /* Filter out the non-toastable attributes. */ + final = bms_intersect(reference_attrs, toast_attrs); + + /* + * Due to the fact of detoast-datum will make the tuple bigger which is + * bad for some nodes like Sort/Hash, to avoid performance regression, + * such attribute should be removed as well. + */ + foreach(lc, forbid_pre_detoast_vars) + { + Var *var = lfirst_node(Var, lc); + + forbid_pre_detoast_attrs = bms_add_member(forbid_pre_detoast_attrs, var->varattno - 1); + } + + final = bms_del_members(final, forbid_pre_detoast_attrs); + + bms_free(toast_attrs); + bms_free(forbid_pre_detoast_attrs); + + return final; +} + + +void +SetPredetoastAttrsForJoin(JoinState *j) +{ + PlanState *outerstate = outerPlanState(j); + PlanState *innerstate = innerPlanState(j); + + j->outer_pre_detoast_attrs = cal_final_pre_detoast_attrs( + ((Join *) j->ps.plan)->outer_reference_attrs, + outerstate->ps_ResultTupleDesc, + outerstate->plan->forbid_pre_detoast_vars); + + j->inner_pre_detoast_attrs = cal_final_pre_detoast_attrs( + ((Join *) j->ps.plan)->inner_reference_attrs, + innerstate->ps_ResultTupleDesc, + innerstate->plan->forbid_pre_detoast_vars); +} diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index cff5dc723e..a8646ded02 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -572,6 +572,8 @@ ExecConditionalAssignProjectionInfo(PlanState *planstate, TupleDesc inputDesc, planstate->resultopsset = planstate->scanopsset; planstate->resultopsfixed = planstate->scanopsfixed; planstate->resultops = planstate->scanops; + + Assert(planstate->ps_ResultTupleDesc != NULL); } else { diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index 1cbec4647c..19a05ed624 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -756,6 +756,8 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) innerPlanState(hjstate) = ExecInitNode((Plan *) hashNode, estate, eflags); innerDesc = ExecGetResultType(innerPlanState(hjstate)); + SetPredetoastAttrsForJoin((JoinState *) hjstate); + /* * Initialize result slot, type and projection. */ diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c index c1a8ca2464..be7cbd7f30 100644 --- a/src/backend/executor/nodeMergejoin.c +++ b/src/backend/executor/nodeMergejoin.c @@ -1497,6 +1497,8 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags) (eflags | EXEC_FLAG_MARK)); innerDesc = ExecGetResultType(innerPlanState(mergestate)); + SetPredetoastAttrsForJoin((JoinState *) mergestate); + /* * For certain types of inner child nodes, it is advantageous to issue * MARK every time we advance past an inner tuple we will never return to. diff --git a/src/backend/executor/nodeNestloop.c b/src/backend/executor/nodeNestloop.c index 06fa0a9b31..2d40d19192 100644 --- a/src/backend/executor/nodeNestloop.c +++ b/src/backend/executor/nodeNestloop.c @@ -306,6 +306,7 @@ ExecInitNestLoop(NestLoop *node, EState *estate, int eflags) */ ExecInitResultTupleSlotTL(&nlstate->js.ps, &TTSOpsVirtual); ExecAssignProjectionInfo(&nlstate->js.ps, NULL); + SetPredetoastAttrsForJoin((JoinState *) nlstate); /* * initialize child expressions diff --git a/src/backend/executor/shared_detoast_datum.org b/src/backend/executor/shared_detoast_datum.org new file mode 100644 index 0000000000..f63392ef4f --- /dev/null +++ b/src/backend/executor/shared_detoast_datum.org @@ -0,0 +1,203 @@ +The problem: +------------- + +In the current expression engine, a toasted datum is detoasted when +required, but the result is discarded immediately, either by pfree it +immediately or leave it for ResetExprContext. Arguments for which one to +use exists sometimes. More serious problem is detoasting is expensive, +especially for the data types like jsonb or array, which the value might +be very huge. In the blow example, the detoasting happens twice. + +SELECT jb_col->'a', jb_col->'b' FROM t; + +Within the shared-detoast-datum, we just need to detoast once for each +tuple, and discard it immediately when the tuple is not needed any +more. FWIW this issue may existing for small numeric, text as well +because of SHORT_TOAST feature where the toast's len using 1 byte rather +than 4 bytes. + +Current Design +-------------- + +The high level design is let createplan.c and setref.c decide which +Vars can use this feature, and then the executor save the detoast +datum back slot->to tts_values[*] during the ExprEvalStep of +EEOP_{INNER|OUTER|SCAN}_VAR_TOAST. The reasons includes: + +- The existing expression engine read datum from tts_values[*], no any + extra work need to be done. +- Reuse the lifespan of TupleTableSlot system to manage memory. It + is natural to think the detoast datum is a tts_value just that it is + in a detoast format. Since we have a clear lifespan for TupleTableSlot + already, like ExecClearTuple, ExecCopySlot et. We are easy to reuse + them for managing the datoast datum's memory. +- The existing projection method can copy the datoasted datum (int64) + automatically to the next node's slot, but keeping the ownership + unchanged, so only the slot where the detoast really happen take the + charge of it's lifespan. + +Assuming which Var should use this feature has been decided in +createplan.c and setref.c already. The 3 new ExprEvalSteps +EEOP_{INNER,OUTER,SCAN}_VAR_TOAST as used. During the evaluating these +steps, the below code is used. + +static inline void +ExecSlotDetoastDatum(TupleTableSlot *slot, int attnum) +{ + if (!slot->tts_isnull[attnum] && + VARATT_IS_EXTENDED(slot->tts_values[attnum])) + { + if (unlikely(slot->tts_data_mctx == NULL)) + slot->tts_data_mctx = GenerationContextCreate(slot->tts_mcxt, + "tts_value_ctx", + ALLOCSET_START_SMALL_SIZES); + slot->tts_values[attnum] = PointerGetDatum(detoast_attr_ext((struct varlena *) slot->tts_values[attnum], + /* save the detoast value to the given MemoryContext. */ + slot->tts_data_mctx)); + } +} + +Since I don't want to the run-time extra check to see if is a detoast +should happen, so introducing 3 new steps. + +When to free the detoast datum? It depends on when the slot's +tts_values[*] is invalidated, ExecClearTuple is the clear one, but any +TupleTableSlotOps which set the tts_nvalid = 0 tells us no one will use +the datum in tts_values[*] so it is time to release them, this is an +important part for memory usage consideration. since we used +dedicated MemoryContext for it, so what we just need to do it: + +/* + * ExecFreePreDetoastDatum - free the memory which is allocated in tts_data_mcxt. + */ +static inline void +ExecFreePreDetoastDatum(TupleTableSlot *slot) +{ + if (slot->tts_data_mctx) + MemoryContextResetOnly(slot->tts_data_mctx); +} + + +Now comes to the createplan.c/setref.c part, which decides which Vars +should use the shared detoast feature. The guideline of this is: + +1. It needs a detoast for a given expression in the previous logic. +2. It should not breaks the CP_SMALL_TLIST design. Since we saved the + detoast datum back to tts_values[*], which make tuple bigger. if we + do this blindly, it would be harmful to the ORDER / HASH style nodes. + +A high level data flow is: + +1. at the createplan.c, we walk the plan tree go gather the + CP_SMALL_TLIST because of SORT/HASH style nodes, information and save + it to Plan.forbid_pre_detoast_vars via the function + set_plan_forbid_pre_detoast_vars_recurse. + +2. at the setrefs.c, fix_{scan|join}_expr will recurse to Var for each + expression, so it is a good time to track the attribute number and + see if the Var is directly or indirectly accessed. Usually the + indirectly access a Var means a detoast would happens, for + example an expression like a > 3. However some known expressions is + ignored. for example: NullTest, pg_column_compression which needs the + raw datum, start_with/sub_string which needs a slice + detoasting. Currently there is some hard code here, we may needs a + pg_proc.detoasting_requirement flags to make this generic. The + output is {Scan|Join}.xxx_reference_attrs; + +Note that here I used '_reference_' rather than '_detoast_' is because +at this part, I still don't know if it is a toastable attrbiute, which +is known at the MakeTupleTableSlot stage. + +3. At the InitPlan Stage, we calculate the final xxx_pre_detoast_attrs + in ScanState & JoinState, which will be passed into expression + engine in the ExecInitExprRec stage and EEOP_{INNER|OUTER|SCAN} + _VAR_TOAST steps are generated finally then everything is connected + with ExecSlotDetoastDatum! + + +Testing +------- + +Case 1: small numeric testing. +=============================== + +create table t (a numeric); +insert into t select i from generate_series(1, 100000)i; + +cat 1.sql + +select * from t where a > 0; + +In this test, the current master run detoast twice for each datum. one +in numeric_gt, one in numeric_out. this feature makes the detoast once. + +pgbench -f 1.sql -n postgres -T 10 -M prepared + +master: 30.218 ms +patched: 26.957 ms + + +Case 2: Big jsonbs test: +============================= + + +create table b(blog jsonb); + +INSERT INTO b +SELECT jsonb_build_object( + 'title', 'title ' || s.i::text, + 'content', substring(repeat(md5(random()::text), 100), 1, 3000), + 'subscriber', (random() * 100)::int, + 'reader', (random() * 100)::int + ) +FROM generate_series(1, 10000) s(i); + + +explain analyze +select blog from b +where cast(blog->'reader' as numeric) > 10 and +cast(blog->'subscriber' as numeric) > 0; + +Dump and restore the above data into the current master: + +master: 24.588 ms +patched: 17.664 ms + +Memory usage test: + +I run the workload of tpch scale 10 on against both master and patched +versions, the memory usage looks stable and the performance doesn't have +noticeable improvement and regression as well. + +A alternative design: toast cache +--------------------------------- + +This method is provided by Tomas during the review process. IIUC, this +method would maintain a local HTAB which map a toast datum to a detoast +datum and the entry is maintained / used in detoast_attr +function. Within this method, the overall design is pretty clear and the +code modification can be controlled in toasting system only. + +I assumed that releasing all of the memory at the end of executor once +is not an option since it may consumed too many memory. Then, when and +which entry to release becomes a trouble for me. For example: + + QUERY PLAN +------------------------------ + Nested Loop + Join Filter: (t1.a = t2.a) + -> Seq Scan on t1 + -> Seq Scan on t2 +(4 rows) + +In this case t1.a needs a longer lifespan than t2.a since it is +in outer relation. Without the help from slot's life-cycle system, I +can't think out a answer for the above question. + +Another difference between the 2 methods is my method have many +modification on createplan.c/setref.c/execExpr.c/execExprInterp.c, but +it can save some run-time effort like hash_search find / enter run-time +in method 2 since I put them directly into tts_values[*]. + +I'm not sure the factor 2 makes some real measurable difference in real +case, so my current concern mainly comes from factor 1. diff --git a/src/backend/jit/llvm/llvmjit_expr.c b/src/backend/jit/llvm/llvmjit_expr.c index 0c448422e2..74563c3454 100644 --- a/src/backend/jit/llvm/llvmjit_expr.c +++ b/src/backend/jit/llvm/llvmjit_expr.c @@ -396,30 +396,52 @@ llvm_compile_expr(ExprState *state) case EEOP_INNER_VAR: case EEOP_OUTER_VAR: case EEOP_SCAN_VAR: + case EEOP_INNER_VAR_TOAST: + case EEOP_OUTER_VAR_TOAST: + case EEOP_SCAN_VAR_TOAST: { LLVMValueRef value, isnull; LLVMValueRef v_attnum; LLVMValueRef v_values; LLVMValueRef v_nulls; + LLVMValueRef v_slot; - if (opcode == EEOP_INNER_VAR) + if (opcode == EEOP_INNER_VAR || opcode == EEOP_INNER_VAR_TOAST) { + v_slot = v_innerslot; v_values = v_innervalues; v_nulls = v_innernulls; } - else if (opcode == EEOP_OUTER_VAR) + else if (opcode == EEOP_OUTER_VAR || opcode == EEOP_OUTER_VAR_TOAST) { + v_slot = v_outerslot; v_values = v_outervalues; v_nulls = v_outernulls; } else { + v_slot = v_scanslot; v_values = v_scanvalues; v_nulls = v_scannulls; } v_attnum = l_int32_const(lc, op->d.var.attnum); + + if (opcode == EEOP_INNER_VAR_TOAST || + opcode == EEOP_OUTER_VAR_TOAST || + opcode == EEOP_SCAN_VAR_TOAST) + { + LLVMValueRef params[2]; + + params[0] = v_slot; + params[1] = l_int32_const(lc, op->d.var.attnum); + l_call(b, + llvm_pg_var_func_type("ExecSlotDetoastDatumExternal"), + llvm_pg_func(mod, "ExecSlotDetoastDatumExternal"), + params, lengthof(params), ""); + } + value = l_load_gep1(b, TypeSizeT, v_values, v_attnum, ""); isnull = l_load_gep1(b, TypeStorageBool, v_nulls, v_attnum, ""); LLVMBuildStore(b, value, v_resvaluep); diff --git a/src/backend/jit/llvm/llvmjit_types.c b/src/backend/jit/llvm/llvmjit_types.c index 47c9daf402..1dcf0c2fd8 100644 --- a/src/backend/jit/llvm/llvmjit_types.c +++ b/src/backend/jit/llvm/llvmjit_types.c @@ -178,4 +178,5 @@ void *referenced_functions[] = strlen, varsize_any, ExecInterpExprStillValid, + ExecSlotDetoastDatumExternal, }; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 610f4a56d6..8acb48240e 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -314,7 +314,9 @@ static ModifyTable *make_modifytable(PlannerInfo *root, Plan *subplan, List *mergeActionLists, int epqParam); static GatherMerge *create_gather_merge_plan(PlannerInfo *root, GatherMergePath *best_path); - +static void set_plan_forbid_pre_detoast_vars_recurse(Plan *plan, + List *small_tlist); +static void set_plan_not_pre_detoast_vars(Plan *plan, List *small_tlist); /* * create_plan @@ -346,6 +348,12 @@ create_plan(PlannerInfo *root, Path *best_path) /* Recursively process the path tree, demanding the correct tlist result */ plan = create_plan_recurse(root, best_path, CP_EXACT_TLIST); + /* + * After the plan tree is built completed, we start to walk for which + * expressions should not used the shared-detoast feature. + */ + set_plan_forbid_pre_detoast_vars_recurse(plan, NIL); + /* * Make sure the topmost plan node's targetlist exposes the original * column names and other decorative info. Targetlists generated within @@ -378,6 +386,101 @@ create_plan(PlannerInfo *root, Path *best_path) return plan; } +/* + * set_plan_forbid_pre_detoast_vars_recurse + * Walking the Plan tree in the top-down manner to gather the vars which + * should be as small as possible and record them in Plan.forbid_pre_detoast_vars + * + * plan: the plan node to walk right now. + * small_tlist: a list of nodes which its subplan should provide them as + * small as possible. + */ +static void +set_plan_forbid_pre_detoast_vars_recurse(Plan *plan, List *small_tlist) +{ + if (plan == NULL) + return; + + set_plan_not_pre_detoast_vars(plan, small_tlist); + + /* Recurse to its subplan.. */ + if (IsA(plan, Sort) || IsA(plan, Memoize) || IsA(plan, WindowAgg) || + IsA(plan, Hash) || IsA(plan, Material) || IsA(plan, IncrementalSort)) + { + List *small_tlist = get_tlist_exprs(plan->lefttree->targetlist, true); + + /* + * For the sort-like nodes, we want the output of its subplan as small + * as possible, but the subplan's other expressions like Qual doesn't + * have this restriction since they are not output to the upper nodes. + * so we set the small_tlist to the subplan->targetlist. + */ + set_plan_forbid_pre_detoast_vars_recurse(plan->lefttree, small_tlist); + } + else if (IsA(plan, HashJoin) && castNode(HashJoin, plan)->left_small_tlist) + { + List *small_tlist = get_tlist_exprs(plan->lefttree->targetlist, true); + + /* + * If the left_small_tlist wants a as small as possible tlist, set it + * in a way like sort for the left node. + */ + set_plan_forbid_pre_detoast_vars_recurse(plan->lefttree, small_tlist); + + /* + * The righttree is a Hash node, it can be set with its own rule, so + * the small_tlist provided is not important, we just need to recuse + * to its subplan. + */ + set_plan_forbid_pre_detoast_vars_recurse(plan->righttree, plan->forbid_pre_detoast_vars); + } + else + { + /* + * Recurse to its children, just push down the forbid_pre_detoast_vars + * to its children. + */ + set_plan_forbid_pre_detoast_vars_recurse(plan->lefttree, plan->forbid_pre_detoast_vars); + set_plan_forbid_pre_detoast_vars_recurse(plan->righttree, plan->forbid_pre_detoast_vars); + } +} + +/* + * set_plan_not_pre_detoast_vars + * + * Set the Plan.forbid_pre_detoast_vars according the small_tlist information. + * + * small_tlist = NIL means nothing is forbidden, or else if a Var belongs to the + * small_tlist, then it must not be pre-detoasted. + */ +static void +set_plan_not_pre_detoast_vars(Plan *plan, List *small_tlist) +{ + ListCell *lc; + Var *var; + + /* + * fast path, if we don't have a small_tlist, the var in targetlist is + * impossible member of it. and this case might be a pretty common case. + */ + if (small_tlist == NIL) + return; + + foreach(lc, plan->targetlist) + { + TargetEntry *te = lfirst_node(TargetEntry, lc); + + if (!IsA(te->expr, Var)) + continue; + var = castNode(Var, te->expr); + if (var->varattno <= 0) + continue; + if (list_member(small_tlist, var)) + /* pass the recheck */ + plan->forbid_pre_detoast_vars = lappend(plan->forbid_pre_detoast_vars, var); + } +} + /* * create_plan_recurse * Recursive guts of create_plan(). @@ -4893,6 +4996,8 @@ create_hashjoin_plan(PlannerInfo *root, copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); + join_plan->left_small_tlist = (best_path->num_batches > 1); + return join_plan; } diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 22a1fa29f3..9b9e2b4345 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -27,6 +27,7 @@ #include "optimizer/tlist.h" #include "parser/parse_relation.h" #include "tcop/utility.h" +#include "utils/fmgroids.h" #include "utils/lsyscache.h" #include "utils/syscache.h" @@ -55,11 +56,48 @@ typedef struct tlist_vinfo vars[FLEXIBLE_ARRAY_MEMBER]; /* has num_vars entries */ } indexed_tlist; +/* + * Decide which attrs are detoasted in a expressions level, this is judged + * at the fix_scan/join_expr stage. The recursed level is tracked when we + * walk to a Var, if the level is greater than 1, then it means the + * var needs an detoast in this expression list, there are some exceptions + * here, see increase_level_for_pre_detoast for details. + */ +typedef struct +{ + /* if the level is added during a certain walk. */ + bool level_added; + /* the current level during the walk. */ + int level; +} intermediate_level_context; + +/* + * Context to hold the detoast attribute within a expression. + * + * XXX: this design was intent to avoid the pre-detoast-logic if the var + * only need to be detoasted *once*, but for now, this context is only + * maintained at the expression level rather than plan tree level, so it + * can't detect if a Var will be detoasted 2+ time at the plan level. + * Recording the times of a Var is detoasted in the plan tree level is + * complex, so before we decide it is a must, I am not willing to do too + * many changes here. + */ +typedef struct +{ + /* var is accessed for the first time. */ + Bitmapset *existing_attrs; + /* var is accessed for the 2+ times. */ + Bitmapset **final_ref_attrs; +} intermediate_var_ref_context; + + typedef struct { PlannerInfo *root; int rtoffset; double num_exec; + intermediate_level_context level_ctx; + intermediate_var_ref_context scan_reference_attrs; } fix_scan_expr_context; typedef struct @@ -71,6 +109,9 @@ typedef struct int rtoffset; NullingRelsMatch nrm_match; double num_exec; + intermediate_level_context level_ctx; + intermediate_var_ref_context outer_reference_attrs; + intermediate_var_ref_context inner_reference_attrs; } fix_join_expr_context; typedef struct @@ -127,8 +168,8 @@ typedef struct (((con)->consttype == REGCLASSOID || (con)->consttype == OIDOID) && \ !(con)->constisnull) -#define fix_scan_list(root, lst, rtoffset, num_exec) \ - ((List *) fix_scan_expr(root, (Node *) (lst), rtoffset, num_exec)) +#define fix_scan_list(root, lst, rtoffset, num_exec, pre_detoast_attrs) \ + ((List *) fix_scan_expr(root, (Node *) (lst), rtoffset, num_exec, pre_detoast_attrs)) static void add_rtes_to_flat_rtable(PlannerInfo *root, bool recursing); static void flatten_unplanned_rtes(PlannerGlobal *glob, RangeTblEntry *rte); @@ -158,7 +199,8 @@ static Plan *set_mergeappend_references(PlannerInfo *root, static void set_hash_references(PlannerInfo *root, Plan *plan, int rtoffset); static Relids offset_relid_set(Relids relids, int rtoffset); static Node *fix_scan_expr(PlannerInfo *root, Node *node, - int rtoffset, double num_exec); + int rtoffset, double num_exec, + Bitmapset **scan_reference_attrs); static Node *fix_scan_expr_mutator(Node *node, fix_scan_expr_context *context); static bool fix_scan_expr_walker(Node *node, fix_scan_expr_context *context); static void set_join_references(PlannerInfo *root, Join *join, int rtoffset); @@ -190,7 +232,10 @@ static List *fix_join_expr(PlannerInfo *root, Index acceptable_rel, int rtoffset, NullingRelsMatch nrm_match, - double num_exec); + double num_exec, + Bitmapset **outer_reference_attrs, + Bitmapset **inner_reference_attrs); + static Node *fix_join_expr_mutator(Node *node, fix_join_expr_context *context); static Node *fix_upper_expr(PlannerInfo *root, @@ -211,6 +256,38 @@ static List *set_windowagg_runcondition_references(PlannerInfo *root, List *runcondition, Plan *plan); +/* + * func_use_slice_detoast + * Check if a func just needs a pg_detoast_datum_slice, if so we should + * not pre detoast it. For now, the known function ID is hard-coded. but + * it'd be good that pg_proc can have a attribute like 'detoast_requirement' + * the value can be either of: + * - full + * - first_n (0 ~ N, the current slice method). + * - any. (for the incoming of partial detoast feature. + * + * I think adding this attribute to pg_proc has a stronger reason if partial + * detoast patch is accepted. + */ +static inline bool +func_use_slice_detoast(Oid funcOid) +{ + /* hard code for now, and it is not used in a hot path yet. */ + const Oid oids[] = {F_STARTS_WITH, + + F_OVERLAY_BYTEA_BYTEA_INT4, F_OVERLAY_BYTEA_BYTEA_INT4_INT4, + F_OVERLAY_TEXT_TEXT_INT4, F_OVERLAY_TEXT_TEXT_INT4_INT4, + + F_SUBSTRING_BYTEA_INT4, F_SUBSTRING_BYTEA_INT4_INT4, + F_SUBSTRING_TEXT_INT4, F_SUBSTRING_TEXT_INT4_INT4}; + + for (int i = 0; i < sizeof(oids) /sizeof(Oid); i++) + { + if (funcOid == oids[i]) + return true; + } + return false; +} /***************************************************************************** * @@ -628,10 +705,16 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->scan.scanrelid += rtoffset; splan->scan.plan.targetlist = fix_scan_list(root, splan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST(plan)); + rtoffset, NUM_EXEC_TLIST(plan), + &splan->scan.reference_attrs); splan->scan.plan.qual = fix_scan_list(root, splan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); + + splan->scan.plan.forbid_pre_detoast_vars = + fix_scan_list(root, splan->scan.plan.forbid_pre_detoast_vars, + rtoffset, NUM_EXEC_TLIST(plan), NULL); } break; case T_SampleScan: @@ -641,13 +724,20 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->scan.scanrelid += rtoffset; splan->scan.plan.targetlist = fix_scan_list(root, splan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST(plan)); + rtoffset, NUM_EXEC_TLIST(plan), + &splan->scan.reference_attrs + ); splan->scan.plan.qual = fix_scan_list(root, splan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); splan->tablesample = (TableSampleClause *) fix_scan_expr(root, (Node *) splan->tablesample, - rtoffset, 1); + rtoffset, 1, + &splan->scan.reference_attrs); + splan->scan.plan.forbid_pre_detoast_vars = + fix_scan_list(root, splan->scan.plan.forbid_pre_detoast_vars, + rtoffset, NUM_EXEC_TLIST(plan), NULL); } break; case T_IndexScan: @@ -657,28 +747,40 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->scan.scanrelid += rtoffset; splan->scan.plan.targetlist = fix_scan_list(root, splan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST(plan)); + rtoffset, NUM_EXEC_TLIST(plan), + &splan->scan.reference_attrs); + splan->scan.plan.qual = fix_scan_list(root, splan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); + splan->indexqual = fix_scan_list(root, splan->indexqual, - rtoffset, 1); + rtoffset, 1, &splan->scan.reference_attrs); splan->indexqualorig = fix_scan_list(root, splan->indexqualorig, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); splan->indexorderby = fix_scan_list(root, splan->indexorderby, - rtoffset, 1); + rtoffset, 1, &splan->scan.reference_attrs); splan->indexorderbyorig = fix_scan_list(root, splan->indexorderbyorig, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), &splan->scan.reference_attrs); + splan->scan.plan.forbid_pre_detoast_vars = + fix_scan_list(root, splan->scan.plan.forbid_pre_detoast_vars, + rtoffset, NUM_EXEC_TLIST(plan), NULL); } break; case T_IndexOnlyScan: { IndexOnlyScan *splan = (IndexOnlyScan *) plan; + splan->scan.plan.forbid_pre_detoast_vars = + fix_scan_list(root, splan->scan.plan.forbid_pre_detoast_vars, + rtoffset, NUM_EXEC_TLIST(plan), NULL); + return set_indexonlyscan_references(root, splan, rtoffset); } break; @@ -691,10 +793,15 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) Assert(splan->scan.plan.targetlist == NIL); Assert(splan->scan.plan.qual == NIL); splan->indexqual = - fix_scan_list(root, splan->indexqual, rtoffset, 1); + fix_scan_list(root, splan->indexqual, rtoffset, 1, + &splan->scan.reference_attrs); splan->indexqualorig = fix_scan_list(root, splan->indexqualorig, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); + splan->scan.plan.forbid_pre_detoast_vars = + fix_scan_list(root, splan->scan.plan.forbid_pre_detoast_vars, + rtoffset, NUM_EXEC_TLIST(plan), NULL); } break; case T_BitmapHeapScan: @@ -704,13 +811,20 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->scan.scanrelid += rtoffset; splan->scan.plan.targetlist = fix_scan_list(root, splan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST(plan)); + rtoffset, NUM_EXEC_TLIST(plan), + &splan->scan.reference_attrs); splan->scan.plan.qual = fix_scan_list(root, splan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); splan->bitmapqualorig = fix_scan_list(root, splan->bitmapqualorig, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); + splan->scan.plan.forbid_pre_detoast_vars = + fix_scan_list(root, splan->scan.plan.forbid_pre_detoast_vars, + rtoffset, NUM_EXEC_TLIST(plan), + NULL); } break; case T_TidScan: @@ -720,13 +834,20 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->scan.scanrelid += rtoffset; splan->scan.plan.targetlist = fix_scan_list(root, splan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST(plan)); + rtoffset, NUM_EXEC_TLIST(plan), + &splan->scan.reference_attrs); splan->scan.plan.qual = fix_scan_list(root, splan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); splan->tidquals = fix_scan_list(root, splan->tidquals, - rtoffset, 1); + rtoffset, 1, + &splan->scan.reference_attrs); + splan->scan.plan.forbid_pre_detoast_vars = + fix_scan_list(root, splan->scan.plan.forbid_pre_detoast_vars, + rtoffset, NUM_EXEC_TLIST(plan), + NULL); } break; case T_TidRangeScan: @@ -736,13 +857,20 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->scan.scanrelid += rtoffset; splan->scan.plan.targetlist = fix_scan_list(root, splan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST(plan)); + rtoffset, NUM_EXEC_TLIST(plan), + &splan->scan.reference_attrs); splan->scan.plan.qual = fix_scan_list(root, splan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); splan->tidrangequals = fix_scan_list(root, splan->tidrangequals, - rtoffset, 1); + rtoffset, 1, + &splan->scan.reference_attrs); + splan->scan.plan.forbid_pre_detoast_vars = + fix_scan_list(root, splan->scan.plan.forbid_pre_detoast_vars, + rtoffset, NUM_EXEC_TLIST(plan), + NULL); } break; case T_SubqueryScan: @@ -757,12 +885,16 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->scan.scanrelid += rtoffset; splan->scan.plan.targetlist = fix_scan_list(root, splan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST(plan)); + rtoffset, NUM_EXEC_TLIST(plan), + &splan->scan.reference_attrs); splan->scan.plan.qual = fix_scan_list(root, splan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); splan->functions = - fix_scan_list(root, splan->functions, rtoffset, 1); + fix_scan_list(root, splan->functions, rtoffset, 1, + &splan->scan.reference_attrs); + } break; case T_TableFuncScan: @@ -772,13 +904,17 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->scan.scanrelid += rtoffset; splan->scan.plan.targetlist = fix_scan_list(root, splan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST(plan)); + rtoffset, NUM_EXEC_TLIST(plan), + &splan->scan.reference_attrs); splan->scan.plan.qual = fix_scan_list(root, splan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); + splan->tablefunc = (TableFunc *) fix_scan_expr(root, (Node *) splan->tablefunc, - rtoffset, 1); + rtoffset, 1, + &splan->scan.reference_attrs); } break; case T_ValuesScan: @@ -788,13 +924,16 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->scan.scanrelid += rtoffset; splan->scan.plan.targetlist = fix_scan_list(root, splan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST(plan)); + rtoffset, NUM_EXEC_TLIST(plan), + &splan->scan.reference_attrs); splan->scan.plan.qual = fix_scan_list(root, splan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); splan->values_lists = fix_scan_list(root, splan->values_lists, - rtoffset, 1); + rtoffset, 1, + &splan->scan.reference_attrs); } break; case T_CteScan: @@ -804,10 +943,16 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->scan.scanrelid += rtoffset; splan->scan.plan.targetlist = fix_scan_list(root, splan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST(plan)); + rtoffset, NUM_EXEC_TLIST(plan), + &splan->scan.reference_attrs); splan->scan.plan.qual = fix_scan_list(root, splan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); + splan->scan.plan.forbid_pre_detoast_vars = + fix_scan_list(root, splan->scan.plan.forbid_pre_detoast_vars, + rtoffset, NUM_EXEC_TLIST(plan), + NULL); } break; case T_NamedTuplestoreScan: @@ -817,10 +962,12 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->scan.scanrelid += rtoffset; splan->scan.plan.targetlist = fix_scan_list(root, splan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST(plan)); + rtoffset, NUM_EXEC_TLIST(plan), + &splan->scan.reference_attrs); splan->scan.plan.qual = fix_scan_list(root, splan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); } break; case T_WorkTableScan: @@ -830,10 +977,12 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->scan.scanrelid += rtoffset; splan->scan.plan.targetlist = fix_scan_list(root, splan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST(plan)); + rtoffset, NUM_EXEC_TLIST(plan), + &splan->scan.reference_attrs); splan->scan.plan.qual = fix_scan_list(root, splan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); } break; case T_ForeignScan: @@ -873,7 +1022,8 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) mplan->param_exprs = fix_scan_list(root, mplan->param_exprs, rtoffset, - NUM_EXEC_TLIST(plan)); + NUM_EXEC_TLIST(plan), + NULL); break; } @@ -933,9 +1083,9 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) Assert(splan->plan.qual == NIL); splan->limitOffset = - fix_scan_expr(root, splan->limitOffset, rtoffset, 1); + fix_scan_expr(root, splan->limitOffset, rtoffset, 1, NULL); splan->limitCount = - fix_scan_expr(root, splan->limitCount, rtoffset, 1); + fix_scan_expr(root, splan->limitCount, rtoffset, 1, NULL); } break; case T_Agg: @@ -988,17 +1138,17 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) * variable refs, so fix_scan_expr works for them. */ wplan->startOffset = - fix_scan_expr(root, wplan->startOffset, rtoffset, 1); + fix_scan_expr(root, wplan->startOffset, rtoffset, 1, NULL); wplan->endOffset = - fix_scan_expr(root, wplan->endOffset, rtoffset, 1); + fix_scan_expr(root, wplan->endOffset, rtoffset, 1, NULL); wplan->runCondition = fix_scan_list(root, wplan->runCondition, rtoffset, - NUM_EXEC_TLIST(plan)); + NUM_EXEC_TLIST(plan), NULL); wplan->runConditionOrig = fix_scan_list(root, wplan->runConditionOrig, rtoffset, - NUM_EXEC_TLIST(plan)); + NUM_EXEC_TLIST(plan), NULL); } break; case T_Result: @@ -1038,14 +1188,14 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->plan.targetlist = fix_scan_list(root, splan->plan.targetlist, - rtoffset, NUM_EXEC_TLIST(plan)); + rtoffset, NUM_EXEC_TLIST(plan), NULL); splan->plan.qual = fix_scan_list(root, splan->plan.qual, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), NULL); } /* resconstantqual can't contain any subplan variable refs */ splan->resconstantqual = - fix_scan_expr(root, splan->resconstantqual, rtoffset, 1); + fix_scan_expr(root, splan->resconstantqual, rtoffset, 1, NULL); } break; case T_ProjectSet: @@ -1061,7 +1211,7 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->withCheckOptionLists = fix_scan_list(root, splan->withCheckOptionLists, - rtoffset, 1); + rtoffset, 1, NULL); if (splan->returningLists) { @@ -1118,18 +1268,20 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) fix_join_expr(root, splan->onConflictSet, NULL, itlist, linitial_int(splan->resultRelations), - rtoffset, NRM_EQUAL, NUM_EXEC_QUAL(plan)); + rtoffset, NRM_EQUAL, NUM_EXEC_QUAL(plan), + NULL, NULL); splan->onConflictWhere = (Node *) fix_join_expr(root, (List *) splan->onConflictWhere, NULL, itlist, linitial_int(splan->resultRelations), - rtoffset, NRM_EQUAL, NUM_EXEC_QUAL(plan)); + rtoffset, NRM_EQUAL, NUM_EXEC_QUAL(plan), + NULL, NULL); pfree(itlist); splan->exclRelTlist = - fix_scan_list(root, splan->exclRelTlist, rtoffset, 1); + fix_scan_list(root, splan->exclRelTlist, rtoffset, 1, NULL); } /* @@ -1182,7 +1334,8 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) resultrel, rtoffset, NRM_EQUAL, - NUM_EXEC_TLIST(plan)); + NUM_EXEC_TLIST(plan), + NULL, NULL); /* Fix quals too. */ action->qual = (Node *) fix_join_expr(root, @@ -1191,7 +1344,8 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) resultrel, rtoffset, NRM_EQUAL, - NUM_EXEC_QUAL(plan)); + NUM_EXEC_QUAL(plan), + NULL, NULL); } } } @@ -1356,13 +1510,16 @@ set_indexonlyscan_references(PlannerInfo *root, NUM_EXEC_QUAL((Plan *) plan)); /* indexqual is already transformed to reference index columns */ plan->indexqual = fix_scan_list(root, plan->indexqual, - rtoffset, 1); + rtoffset, 1, + &plan->scan.reference_attrs); /* indexorderby is already transformed to reference index columns */ plan->indexorderby = fix_scan_list(root, plan->indexorderby, - rtoffset, 1); + rtoffset, 1, + &plan->scan.reference_attrs); /* indextlist must NOT be transformed to reference index columns */ plan->indextlist = fix_scan_list(root, plan->indextlist, - rtoffset, NUM_EXEC_TLIST((Plan *) plan)); + rtoffset, NUM_EXEC_TLIST((Plan *) plan), + &plan->scan.reference_attrs); pfree(index_itlist); @@ -1409,10 +1566,10 @@ set_subqueryscan_references(PlannerInfo *root, plan->scan.scanrelid += rtoffset; plan->scan.plan.targetlist = fix_scan_list(root, plan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST((Plan *) plan)); + rtoffset, NUM_EXEC_TLIST((Plan *) plan), NULL); plan->scan.plan.qual = fix_scan_list(root, plan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL((Plan *) plan)); + rtoffset, NUM_EXEC_QUAL((Plan *) plan), NULL); result = (Plan *) plan; } @@ -1612,7 +1769,7 @@ set_foreignscan_references(PlannerInfo *root, /* fdw_scan_tlist itself just needs fix_scan_list() adjustments */ fscan->fdw_scan_tlist = fix_scan_list(root, fscan->fdw_scan_tlist, - rtoffset, NUM_EXEC_TLIST((Plan *) fscan)); + rtoffset, NUM_EXEC_TLIST((Plan *) fscan), NULL); } else { @@ -1622,16 +1779,16 @@ set_foreignscan_references(PlannerInfo *root, */ fscan->scan.plan.targetlist = fix_scan_list(root, fscan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST((Plan *) fscan)); + rtoffset, NUM_EXEC_TLIST((Plan *) fscan), NULL); fscan->scan.plan.qual = fix_scan_list(root, fscan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL((Plan *) fscan)); + rtoffset, NUM_EXEC_QUAL((Plan *) fscan), NULL); fscan->fdw_exprs = fix_scan_list(root, fscan->fdw_exprs, - rtoffset, NUM_EXEC_QUAL((Plan *) fscan)); + rtoffset, NUM_EXEC_QUAL((Plan *) fscan), NULL); fscan->fdw_recheck_quals = fix_scan_list(root, fscan->fdw_recheck_quals, - rtoffset, NUM_EXEC_QUAL((Plan *) fscan)); + rtoffset, NUM_EXEC_QUAL((Plan *) fscan), NULL); } fscan->fs_relids = offset_relid_set(fscan->fs_relids, rtoffset); @@ -1690,20 +1847,20 @@ set_customscan_references(PlannerInfo *root, /* custom_scan_tlist itself just needs fix_scan_list() adjustments */ cscan->custom_scan_tlist = fix_scan_list(root, cscan->custom_scan_tlist, - rtoffset, NUM_EXEC_TLIST((Plan *) cscan)); + rtoffset, NUM_EXEC_TLIST((Plan *) cscan), NULL); } else { /* Adjust tlist, qual, custom_exprs in the standard way */ cscan->scan.plan.targetlist = fix_scan_list(root, cscan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST((Plan *) cscan)); + rtoffset, NUM_EXEC_TLIST((Plan *) cscan), NULL); cscan->scan.plan.qual = fix_scan_list(root, cscan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL((Plan *) cscan)); + rtoffset, NUM_EXEC_QUAL((Plan *) cscan), NULL); cscan->custom_exprs = fix_scan_list(root, cscan->custom_exprs, - rtoffset, NUM_EXEC_QUAL((Plan *) cscan)); + rtoffset, NUM_EXEC_QUAL((Plan *) cscan), NULL); } /* Adjust child plan-nodes recursively, if needed */ @@ -2111,6 +2268,102 @@ fix_alternative_subplan(PlannerInfo *root, AlternativeSubPlan *asplan, return (Node *) bestplan; } + +static inline void +setup_intermediate_level_ctx(intermediate_level_context *ctx) +{ + ctx->level = 0; + ctx->level_added = false; +} + +static inline void +setup_intermediate_var_ref_ctx(intermediate_var_ref_context *ctx, Bitmapset **final_ref_attrs) +{ + ctx->existing_attrs = NULL; + ctx->final_ref_attrs = final_ref_attrs; +} + +/* + * increase_level_for_pre_detoast + * Check if the given Expr could detoast a Var directly, if yes, + * increase the level and return true. otherwise return false; + */ +static inline void +increase_level_for_pre_detoast(Node *node, intermediate_level_context *ctx) +{ + /* The following nodes is impossible to detoast a Var directly. */ + if (IsA(node, List) || IsA(node, TargetEntry) || IsA(node, NullTest)) + { + ctx->level_added = false; + } + else if (IsA(node, FuncExpr)) + { + Oid funcOid = castNode(FuncExpr, node)->funcid; + + if (funcOid == F_PG_COLUMN_COMPRESSION || func_use_slice_detoast(funcOid)) + ctx->level_added = false; + else + { + ctx->level_added = true; + ctx->level += 1; + } + } + else + { + ctx->level_added = true; + ctx->level += 1; + } +} + +static inline void +decreased_level_for_pre_detoast(intermediate_level_context *ctx) +{ + if (ctx->level_added) + ctx->level -= 1; + + ctx->level_added = false; +} + +/* + * add_pre_detoast_vars + * add the var's information into pre_detoast_attrs when the check is pass. + */ +static inline void +add_pre_detoast_vars(intermediate_level_context *level_ctx, + intermediate_var_ref_context *ctx, + Var *var) +{ + int attno; + + if (level_ctx->level <= 1 || ctx->final_ref_attrs == NULL || var->varattno <= 0) + return; + + attno = var->varattno - 1; + if (bms_is_member(attno, ctx->existing_attrs)) + { + /* not the first time to access it, add it to final result. */ + *ctx->final_ref_attrs = bms_add_member(*ctx->final_ref_attrs, attno); + } + else + { + /* first time. */ + ctx->existing_attrs = bms_add_member(ctx->existing_attrs, attno); + + /* + * XXX: + * + * The above strategy doesn't help to detect if a Var is detoast + * twice. Reasons are: 1. the context is not maintain in Plan node + * level. so if it is detoast at targetlist and qual, we can't detect + * it. 2. even we can make it at plan node, it still doesn't help for + * the among-nodes case. + * + * So for now, I just disable it. + */ + *ctx->final_ref_attrs = bms_add_member(*ctx->final_ref_attrs, attno); + } +} + /* * fix_scan_expr * Do set_plan_references processing on a scan-level expression @@ -2125,18 +2378,23 @@ fix_alternative_subplan(PlannerInfo *root, AlternativeSubPlan *asplan, * 'node': the expression to be modified * 'rtoffset': how much to increment varnos by * 'num_exec': estimated number of executions of expression + * 'scan_reference_attrs': gather which vars are potential to run the detoast + * on this expr, NULL means the caller doesn't have interests on this. * * The expression tree is either copied-and-modified, or modified in-place * if that seems safe. */ static Node * -fix_scan_expr(PlannerInfo *root, Node *node, int rtoffset, double num_exec) +fix_scan_expr(PlannerInfo *root, Node *node, int rtoffset, + double num_exec, Bitmapset **scan_reference_attrs) { fix_scan_expr_context context; context.root = root; context.rtoffset = rtoffset; context.num_exec = num_exec; + setup_intermediate_level_ctx(&context.level_ctx); + setup_intermediate_var_ref_ctx(&context.scan_reference_attrs, scan_reference_attrs); if (rtoffset != 0 || root->multiexpr_params != NIL || @@ -2167,8 +2425,13 @@ fix_scan_expr(PlannerInfo *root, Node *node, int rtoffset, double num_exec) static Node * fix_scan_expr_mutator(Node *node, fix_scan_expr_context *context) { + Node *n; + if (node == NULL) return NULL; + + increase_level_for_pre_detoast(node, &context->level_ctx); + if (IsA(node, Var)) { Var *var = copyVar((Var *) node); @@ -2186,10 +2449,16 @@ fix_scan_expr_mutator(Node *node, fix_scan_expr_context *context) var->varno += context->rtoffset; if (var->varnosyn > 0) var->varnosyn += context->rtoffset; + + add_pre_detoast_vars(&context->level_ctx, &context->scan_reference_attrs, var); + decreased_level_for_pre_detoast(&context->level_ctx); return (Node *) var; } if (IsA(node, Param)) + { + decreased_level_for_pre_detoast(&context->level_ctx); return fix_param_node(context->root, (Param *) node); + } if (IsA(node, Aggref)) { Aggref *aggref = (Aggref *) node; @@ -2199,8 +2468,10 @@ fix_scan_expr_mutator(Node *node, fix_scan_expr_context *context) aggparam = find_minmax_agg_replacement_param(context->root, aggref); if (aggparam != NULL) { + decreased_level_for_pre_detoast(&context->level_ctx); /* Make a copy of the Param for paranoia's sake */ return (Node *) copyObject(aggparam); + } /* If no match, just fall through to process it normally */ } @@ -2210,6 +2481,7 @@ fix_scan_expr_mutator(Node *node, fix_scan_expr_context *context) Assert(!IS_SPECIAL_VARNO(cexpr->cvarno)); cexpr->cvarno += context->rtoffset; + decreased_level_for_pre_detoast(&context->level_ctx); return (Node *) cexpr; } if (IsA(node, PlaceHolderVar)) @@ -2218,29 +2490,52 @@ fix_scan_expr_mutator(Node *node, fix_scan_expr_context *context) PlaceHolderVar *phv = (PlaceHolderVar *) node; /* XXX can we assert something about phnullingrels? */ - return fix_scan_expr_mutator((Node *) phv->phexpr, context); + Node *n2 = fix_scan_expr_mutator((Node *) phv->phexpr, context); + + decreased_level_for_pre_detoast(&context->level_ctx); + return n2; } if (IsA(node, AlternativeSubPlan)) - return fix_scan_expr_mutator(fix_alternative_subplan(context->root, - (AlternativeSubPlan *) node, - context->num_exec), - context); + { + Node *n2 = fix_scan_expr_mutator(fix_alternative_subplan(context->root, + (AlternativeSubPlan *) node, + context->num_exec), + context); + + decreased_level_for_pre_detoast(&context->level_ctx); + return n2; + } fix_expr_common(context->root, node); - return expression_tree_mutator(node, fix_scan_expr_mutator, - (void *) context); + n = expression_tree_mutator(node, fix_scan_expr_mutator, (void *) context); + decreased_level_for_pre_detoast(&context->level_ctx); + return n; } static bool fix_scan_expr_walker(Node *node, fix_scan_expr_context *context) { + bool ret; + if (node == NULL) return false; + + increase_level_for_pre_detoast(node, &context->level_ctx); + + if (IsA(node, Var)) + { + add_pre_detoast_vars(&context->level_ctx, + &context->scan_reference_attrs, + castNode(Var, node)); + } Assert(!(IsA(node, Var) && ((Var *) node)->varno == ROWID_VAR)); Assert(!IsA(node, PlaceHolderVar)); Assert(!IsA(node, AlternativeSubPlan)); fix_expr_common(context->root, node); - return expression_tree_walker(node, fix_scan_expr_walker, - (void *) context); + ret = expression_tree_walker(node, fix_scan_expr_walker, + (void *) context); + + decreased_level_for_pre_detoast(&context->level_ctx); + return ret; } /* @@ -2276,7 +2571,10 @@ set_join_references(PlannerInfo *root, Join *join, int rtoffset) (Index) 0, rtoffset, NRM_EQUAL, - NUM_EXEC_QUAL((Plan *) join)); + NUM_EXEC_QUAL((Plan *) join), + &join->outer_reference_attrs, + &join->inner_reference_attrs + ); /* Now do join-type-specific stuff */ if (IsA(join, NestLoop)) @@ -2323,7 +2621,9 @@ set_join_references(PlannerInfo *root, Join *join, int rtoffset) (Index) 0, rtoffset, NRM_EQUAL, - NUM_EXEC_QUAL((Plan *) join)); + NUM_EXEC_QUAL((Plan *) join), + &join->outer_reference_attrs, + &join->inner_reference_attrs); } else if (IsA(join, HashJoin)) { @@ -2336,7 +2636,9 @@ set_join_references(PlannerInfo *root, Join *join, int rtoffset) (Index) 0, rtoffset, NRM_EQUAL, - NUM_EXEC_QUAL((Plan *) join)); + NUM_EXEC_QUAL((Plan *) join), + &join->outer_reference_attrs, + &join->inner_reference_attrs); /* * HashJoin's hashkeys are used to look for matching tuples from its @@ -2368,7 +2670,9 @@ set_join_references(PlannerInfo *root, Join *join, int rtoffset) (Index) 0, rtoffset, (join->jointype == JOIN_INNER ? NRM_EQUAL : NRM_SUPERSET), - NUM_EXEC_TLIST((Plan *) join)); + NUM_EXEC_TLIST((Plan *) join), + &join->outer_reference_attrs, + &join->inner_reference_attrs); join->plan.qual = fix_join_expr(root, join->plan.qual, outer_itlist, @@ -2376,8 +2680,20 @@ set_join_references(PlannerInfo *root, Join *join, int rtoffset) (Index) 0, rtoffset, (join->jointype == JOIN_INNER ? NRM_EQUAL : NRM_SUPERSET), - NUM_EXEC_QUAL((Plan *) join)); - + NUM_EXEC_QUAL((Plan *) join), + &join->outer_reference_attrs, + &join->inner_reference_attrs); + + join->plan.forbid_pre_detoast_vars = fix_join_expr(root, + join->plan.forbid_pre_detoast_vars, + outer_itlist, + inner_itlist, + (Index) 0, + rtoffset, + (join->jointype == JOIN_INNER ? NRM_EQUAL : NRM_SUPERSET), + NUM_EXEC_TLIST((Plan *) join), + NULL, + NULL); pfree(outer_itlist); pfree(inner_itlist); } @@ -3010,9 +3326,12 @@ fix_join_expr(PlannerInfo *root, Index acceptable_rel, int rtoffset, NullingRelsMatch nrm_match, - double num_exec) + double num_exec, + Bitmapset **outer_reference_attrs, + Bitmapset **inner_reference_attrs) { fix_join_expr_context context; + List *ret; context.root = root; context.outer_itlist = outer_itlist; @@ -3021,16 +3340,30 @@ fix_join_expr(PlannerInfo *root, context.rtoffset = rtoffset; context.nrm_match = nrm_match; context.num_exec = num_exec; - return (List *) fix_join_expr_mutator((Node *) clauses, &context); + + setup_intermediate_level_ctx(&context.level_ctx); + setup_intermediate_var_ref_ctx(&context.outer_reference_attrs, outer_reference_attrs); + setup_intermediate_var_ref_ctx(&context.inner_reference_attrs, inner_reference_attrs); + + ret = (List *) fix_join_expr_mutator((Node *) clauses, &context); + + bms_free(context.outer_reference_attrs.existing_attrs); + bms_free(context.inner_reference_attrs.existing_attrs); + + return ret; } static Node * fix_join_expr_mutator(Node *node, fix_join_expr_context *context) { Var *newvar; + Node *ret_node; if (node == NULL) return NULL; + + increase_level_for_pre_detoast(node, &context->level_ctx); + if (IsA(node, Var)) { Var *var = (Var *) node; @@ -3044,7 +3377,13 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) context->rtoffset, context->nrm_match); if (newvar) + { + add_pre_detoast_vars(&context->level_ctx, + &context->outer_reference_attrs, + newvar); + decreased_level_for_pre_detoast(&context->level_ctx); return (Node *) newvar; + } } /* then in the inner. */ @@ -3056,7 +3395,13 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) context->rtoffset, context->nrm_match); if (newvar) + { + add_pre_detoast_vars(&context->level_ctx, + &context->inner_reference_attrs, + newvar); + decreased_level_for_pre_detoast(&context->level_ctx); return (Node *) newvar; + } } /* If it's for acceptable_rel, adjust and return it */ @@ -3066,6 +3411,9 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) var->varno += context->rtoffset; if (var->varnosyn > 0) var->varnosyn += context->rtoffset; + /* XXX acceptable_rel? we can ignore it for safety. */ + decreased_level_for_pre_detoast(&context->level_ctx); + return (Node *) var; } @@ -3084,22 +3432,38 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) OUTER_VAR, context->nrm_match); if (newvar) + { + add_pre_detoast_vars(&context->level_ctx, + &context->outer_reference_attrs, + newvar); + decreased_level_for_pre_detoast(&context->level_ctx); return (Node *) newvar; + } } if (context->inner_itlist && context->inner_itlist->has_ph_vars) { + newvar = search_indexed_tlist_for_phv(phv, context->inner_itlist, INNER_VAR, context->nrm_match); if (newvar) + { + add_pre_detoast_vars(&context->level_ctx, + &context->inner_reference_attrs, + newvar); + decreased_level_for_pre_detoast(&context->level_ctx); return (Node *) newvar; + } } /* If not supplied by input plans, evaluate the contained expr */ /* XXX can we assert something about phnullingrels? */ - return fix_join_expr_mutator((Node *) phv->phexpr, context); + ret_node = fix_join_expr_mutator((Node *) phv->phexpr, context); + decreased_level_for_pre_detoast(&context->level_ctx); + return ret_node; } + /* Try matching more complex expressions too, if tlists have any */ if (context->outer_itlist && context->outer_itlist->has_non_vars) { @@ -3107,7 +3471,13 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) context->outer_itlist, OUTER_VAR); if (newvar) + { + add_pre_detoast_vars(&context->level_ctx, + &context->outer_reference_attrs, + newvar); + decreased_level_for_pre_detoast(&context->level_ctx); return (Node *) newvar; + } } if (context->inner_itlist && context->inner_itlist->has_non_vars) { @@ -3115,20 +3485,36 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) context->inner_itlist, INNER_VAR); if (newvar) + { + add_pre_detoast_vars(&context->level_ctx, + &context->inner_reference_attrs, + newvar); + decreased_level_for_pre_detoast(&context->level_ctx); return (Node *) newvar; + } } /* Special cases (apply only AFTER failing to match to lower tlist) */ if (IsA(node, Param)) - return fix_param_node(context->root, (Param *) node); + { + ret_node = fix_param_node(context->root, (Param *) node); + decreased_level_for_pre_detoast(&context->level_ctx); + return ret_node; + } if (IsA(node, AlternativeSubPlan)) - return fix_join_expr_mutator(fix_alternative_subplan(context->root, - (AlternativeSubPlan *) node, - context->num_exec), - context); + { + ret_node = fix_join_expr_mutator(fix_alternative_subplan(context->root, + (AlternativeSubPlan *) node, + context->num_exec), + context); + decreased_level_for_pre_detoast(&context->level_ctx); + return ret_node; + } fix_expr_common(context->root, node); - return expression_tree_mutator(node, - fix_join_expr_mutator, - (void *) context); + ret_node = expression_tree_mutator(node, + fix_join_expr_mutator, + (void *) context); + decreased_level_for_pre_detoast(&context->level_ctx); + return ret_node; } /* @@ -3163,7 +3549,8 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) * varno = newvarno, varattno = resno of corresponding targetlist element. * The original tree is not modified. */ -static Node * +static Node * /* XXX: shall I care about this for shared + * detoast optimization? */ fix_upper_expr(PlannerInfo *root, Node *node, indexed_tlist *subplan_itlist, @@ -3318,7 +3705,10 @@ set_returning_clause_references(PlannerInfo *root, resultRelation, rtoffset, NRM_EQUAL, - NUM_EXEC_TLIST(topplan)); + NUM_EXEC_TLIST(topplan), + NULL, + NULL + ); pfree(itlist); diff --git a/src/include/access/detoast.h b/src/include/access/detoast.h index 12d8cdb356..9ddc05604e 100644 --- a/src/include/access/detoast.h +++ b/src/include/access/detoast.h @@ -42,6 +42,7 @@ do { \ * ---------- */ extern struct varlena *detoast_external_attr(struct varlena *attr); +extern struct varlena *detoast_external_attr_ext(struct varlena *attr, MemoryContext ctx); /* ---------- * detoast_attr() - @@ -51,6 +52,8 @@ extern struct varlena *detoast_external_attr(struct varlena *attr); * ---------- */ extern struct varlena *detoast_attr(struct varlena *attr); +extern struct varlena *detoast_attr_ext(struct varlena *attr, MemoryContext ctx); + /* ---------- * detoast_attr_slice() - diff --git a/src/include/access/toast_compression.h b/src/include/access/toast_compression.h index 64d5e079fa..00ea153e4e 100644 --- a/src/include/access/toast_compression.h +++ b/src/include/access/toast_compression.h @@ -55,13 +55,13 @@ typedef enum ToastCompressionId /* pglz compression/decompression routines */ extern struct varlena *pglz_compress_datum(const struct varlena *value); -extern struct varlena *pglz_decompress_datum(const struct varlena *value); +extern struct varlena *pglz_decompress_datum(const struct varlena *value, MemoryContext ctx); extern struct varlena *pglz_decompress_datum_slice(const struct varlena *value, int32 slicelength); /* lz4 compression/decompression routines */ extern struct varlena *lz4_compress_datum(const struct varlena *value); -extern struct varlena *lz4_decompress_datum(const struct varlena *value); +extern struct varlena *lz4_decompress_datum(const struct varlena *value, MemoryContext ctx); extern struct varlena *lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength); diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h index a28ddcdd77..9304786bb2 100644 --- a/src/include/executor/execExpr.h +++ b/src/include/executor/execExpr.h @@ -78,6 +78,17 @@ typedef enum ExprEvalOp EEOP_OUTER_VAR, EEOP_SCAN_VAR, + /* + * compute non-system Var value with shared-detoast-datum logic, use some + * dedicated steps rather than add extra logic to existing steps is for + * performance aspect, within this way, we just decide if the extra logic + * is needed at ExecInitExpr stage once rather than every time of + * ExecInterpExpr. + */ + EEOP_INNER_VAR_TOAST, + EEOP_OUTER_VAR_TOAST, + EEOP_SCAN_VAR_TOAST, + /* compute system Var value */ EEOP_INNER_SYSVAR, EEOP_OUTER_SYSVAR, @@ -830,5 +841,6 @@ extern void ExecEvalAggOrderedTransDatum(ExprState *state, ExprEvalStep *op, ExprContext *econtext); extern void ExecEvalAggOrderedTransTuple(ExprState *state, ExprEvalStep *op, ExprContext *econtext); +extern void ExecSlotDetoastDatumExternal(TupleTableSlot *slot, int attnum); #endif /* EXEC_EXPR_H */ diff --git a/src/include/executor/tuptable.h b/src/include/executor/tuptable.h index 6133dbcd0a..9edb843b15 100644 --- a/src/include/executor/tuptable.h +++ b/src/include/executor/tuptable.h @@ -19,6 +19,7 @@ #include "access/sysattr.h" #include "access/tupdesc.h" #include "storage/buf.h" +#include "utils/memutils.h" /*---------- * The executor stores tuples in a "tuple table" which is a List of @@ -126,6 +127,7 @@ typedef struct TupleTableSlot #define FIELDNO_TUPLETABLESLOT_ISNULL 6 bool *tts_isnull; /* current per-attribute isnull flags */ MemoryContext tts_mcxt; /* slot itself is in this context */ + MemoryContext tts_data_mctx; /* The external content of tts_values[*] */ ItemPointerData tts_tid; /* stored tuple's tid */ Oid tts_tableOid; /* table oid of tuple */ } TupleTableSlot; @@ -426,12 +428,24 @@ slot_getsysattr(TupleTableSlot *slot, int attnum, bool *isnull) return slot->tts_ops->getsysattr(slot, attnum, isnull); } +/* + * ExecFreePreDetoastDatum - free the memory which is allocated in tts_data_mcxt. + */ +static inline void +ExecFreePreDetoastDatum(TupleTableSlot *slot) +{ + if (slot->tts_data_mctx) + MemoryContextResetOnly(slot->tts_data_mctx); +} + /* * ExecClearTuple - clear the slot's contents */ static inline TupleTableSlot * ExecClearTuple(TupleTableSlot *slot) { + ExecFreePreDetoastDatum(slot); + slot->tts_ops->clear(slot); return slot; diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 444a5f0fd5..30fdb37d1c 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1481,6 +1481,12 @@ typedef struct ScanState Relation ss_currentRelation; struct TableScanDescData *ss_currentScanDesc; TupleTableSlot *ss_ScanTupleSlot; + + /* + * The final attributes which should apply the pre-detoast-attrs logic on + * the Scan nodes. + */ + Bitmapset *scan_pre_detoast_attrs; } ScanState; /* ---------------- @@ -2010,6 +2016,13 @@ typedef struct JoinState bool single_match; /* True if we should skip to next outer tuple * after finding one inner match */ ExprState *joinqual; /* JOIN quals (in addition to ps.qual) */ + + /* + * The final attributes which should apply the pre-detoast-attrs logic on + * the join nodes. + */ + Bitmapset *outer_pre_detoast_attrs; + Bitmapset *inner_pre_detoast_attrs; } JoinState; /* ---------------- @@ -2771,4 +2784,5 @@ typedef struct LimitState TupleTableSlot *last_slot; /* slot for evaluation of ties */ } LimitState; +extern void SetPredetoastAttrsForJoin(JoinState *joinstate); #endif /* EXECNODES_H */ diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index b4ef6bc44c..ea5033aaa0 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -169,6 +169,13 @@ typedef struct Plan */ Bitmapset *extParam; Bitmapset *allParam; + + /* + * A list of Vars which should not apply the shared-detoast-datum logic + * since the upper nodes like Sort/Hash wants them as small as possible. + * It's a subset of targetlist in each Plan node. + */ + List *forbid_pre_detoast_vars; } Plan; /* ---------------- @@ -385,6 +392,16 @@ typedef struct Scan Plan plan; Index scanrelid; /* relid is index into the range table */ + + /* + * Records of var's varattno - 1 where the Var is accessed indirectly by + * any expression, like a > 3. However a IS [NOT] NULL is not included + * since it doesn't access the tts_values[*] at all. + * + * This is a essential information to figure out which attrs should use + * the pre-detoast-attrs logic. + */ + Bitmapset *reference_attrs; } Scan; /* ---------------- @@ -789,6 +806,17 @@ typedef struct Join JoinType jointype; bool inner_unique; List *joinqual; /* JOIN quals (in addition to plan.qual) */ + + /* + * Records of var's varattno - 1 where the Var is accessed indirectly by + * any expression, like a > 3. However a IS [NOT] NULL is not included + * since it doesn't access the tts_values[*] at all. + * + * This is a essential information to figure out which attrs should use + * the pre-detoast-attrs logic. + */ + Bitmapset *outer_reference_attrs; + Bitmapset *inner_reference_attrs; } Join; /* ---------------- @@ -869,6 +897,11 @@ typedef struct HashJoin * perform lookups in the hashtable over the inner plan. */ List *hashkeys; + + /* + * Whether the left plan tree should use a SMALL_TLIST. + */ + bool left_small_tlist; } HashJoin; /* ---------------- @@ -1588,4 +1621,24 @@ typedef enum MonotonicFunction MONOTONICFUNC_BOTH = MONOTONICFUNC_INCREASING | MONOTONICFUNC_DECREASING, } MonotonicFunction; +static inline bool +is_join_plan(Plan *plan) +{ + return (plan != NULL) && (IsA(plan, NestLoop) || IsA(plan, HashJoin) || IsA(plan, MergeJoin)); +} + +static inline bool +is_scan_plan(Plan *plan) +{ + return (plan != NULL) && + (IsA(plan, SeqScan) || + IsA(plan, SampleScan) || + IsA(plan, IndexScan) || + IsA(plan, IndexOnlyScan) || + IsA(plan, BitmapIndexScan) || + IsA(plan, BitmapHeapScan) || + IsA(plan, TidScan) || + IsA(plan, SubqueryScan)); +} + #endif /* PLANNODES_H */ diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index ee40a341d3..2335000e18 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -4048,6 +4048,8 @@ cb_cleanup_dir cb_options cb_tablespace cb_tablespace_mapping +intermediate_var_ref_context +intermediate_level_context manifest_data manifest_writer rfile -- 2.34.1