Andy Fan <zhihuifan1...@163.com> writes: > > Some Known issues: > ------------------ > > 1. Currently only Scan & Join nodes are considered for this feature. > 2. JIT is not adapted for this purpose yet.
JIT is adapted for this feature in v2. Any feedback is welcome. -- Best Regards Andy Fan
>From ee44d4721147589dbba8366382a18adeee05419b Mon Sep 17 00:00:00 2001 From: "yizhi.fzh" <yizhi....@alibaba-inc.com> Date: Wed, 27 Dec 2023 18:43:56 +0800 Subject: [PATCH v2 1/1] shared detoast feature. --- src/backend/executor/execExpr.c | 65 ++- src/backend/executor/execExprInterp.c | 181 +++++++ src/backend/executor/execTuples.c | 130 +++++ src/backend/executor/execUtils.c | 5 + src/backend/executor/nodeHashjoin.c | 2 + src/backend/executor/nodeMergejoin.c | 2 + src/backend/executor/nodeNestloop.c | 1 + src/backend/jit/llvm/llvmjit_expr.c | 22 + src/backend/jit/llvm/llvmjit_types.c | 1 + src/backend/nodes/bitmapset.c | 13 + src/backend/optimizer/plan/createplan.c | 73 ++- src/backend/optimizer/plan/setrefs.c | 536 +++++++++++++++---- src/include/executor/execExpr.h | 6 + src/include/executor/tuptable.h | 60 +++ src/include/nodes/bitmapset.h | 1 + src/include/nodes/execnodes.h | 5 + src/include/nodes/plannodes.h | 50 ++ src/test/regress/sql/shared_detoast_slow.sql | 70 +++ 18 files changed, 1117 insertions(+), 106 deletions(-) create mode 100644 src/test/regress/sql/shared_detoast_slow.sql diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c index 2c62b0c9c8..749bcc9023 100644 --- a/src/backend/executor/execExpr.c +++ b/src/backend/executor/execExpr.c @@ -935,22 +935,81 @@ ExecInitExprRec(Expr *node, ExprState *state, } else { + int attnum; + Plan *plan = state->parent ? state->parent->plan : NULL; + /* regular user column */ scratch.d.var.attnum = variable->varattno - 1; scratch.d.var.vartype = variable->vartype; + attnum = scratch.d.var.attnum; + switch (variable->varno) { case INNER_VAR: - scratch.opcode = EEOP_INNER_VAR; + + if (is_join_plan(plan) && + bms_is_member(attnum, + ((JoinState *) state->parent)->inner_pre_detoast_attrs)) + { + /* debug purpose. */ + if (!jit_enabled) + { + elog(INFO, + "EEOP_INNER_VAR_TOAST: flags = %d costs=%.2f..%.2f, attnum: %d", + state->flags, + plan->startup_cost, + plan->total_cost, + attnum); + } + scratch.opcode = EEOP_INNER_VAR_TOAST; + } + else + { + scratch.opcode = EEOP_INNER_VAR; + } break; case OUTER_VAR: - scratch.opcode = EEOP_OUTER_VAR; + if (is_join_plan(plan) && + bms_is_member(attnum, + ((JoinState *) state->parent)->outer_pre_detoast_attrs)) + { + /* debug purpose. */ + if (!jit_enabled) + { + elog(INFO, + "EEOP_OUTER_VAR_TOAST: flags = %u costs=%.2f..%.2f, attnum: %d", + state->flags, + plan->startup_cost, + plan->total_cost, + attnum); + } + scratch.opcode = EEOP_OUTER_VAR_TOAST; + } + else + scratch.opcode = EEOP_OUTER_VAR; break; /* INDEX_VAR is handled by default case */ default: - scratch.opcode = EEOP_SCAN_VAR; + if (is_scan_plan(plan) && bms_is_member( + attnum, + ((ScanState *) state->parent)->scan_pre_detoast_attrs)) + { + if (!jit_enabled) + { + elog(INFO, + "EEOP_SCAN_VAR_TOAST: flags = %u costs=%.2f..%.2f, scanId: %d, attnum: %d", + state->flags, + plan->startup_cost, + plan->total_cost, + ((Scan *) plan)->scanrelid, + attnum); + } + scratch.opcode = EEOP_SCAN_VAR_TOAST; + } + else + scratch.opcode = EEOP_SCAN_VAR; break; } } diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index 24c2b60c62..b5a464bf80 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -57,6 +57,7 @@ #include "postgres.h" #include "access/heaptoast.h" +#include "access/detoast.h" #include "catalog/pg_type.h" #include "commands/sequence.h" #include "executor/execExpr.h" @@ -157,6 +158,9 @@ static void ExecEvalRowNullInt(ExprState *state, ExprEvalStep *op, static Datum ExecJustInnerVar(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustOuterVar(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustScanVar(ExprState *state, ExprContext *econtext, bool *isnull); +static Datum ExecJustInnerVarToast(ExprState *state, ExprContext *econtext, bool *isnull); +static Datum ExecJustOuterVarToast(ExprState *state, ExprContext *econtext, bool *isnull); +static Datum ExecJustScanVarToast(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustAssignInnerVar(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustAssignOuterVar(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustAssignScanVar(ExprState *state, ExprContext *econtext, bool *isnull); @@ -165,6 +169,9 @@ static Datum ExecJustConst(ExprState *state, ExprContext *econtext, bool *isnull static Datum ExecJustInnerVarVirt(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustOuterVarVirt(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustScanVarVirt(ExprState *state, ExprContext *econtext, bool *isnull); +static Datum ExecJustInnerVarVirtToast(ExprState *state, ExprContext *econtext, bool *isnull); +static Datum ExecJustOuterVarVirtToast(ExprState *state, ExprContext *econtext, bool *isnull); +static Datum ExecJustScanVarVirtToast(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustAssignInnerVarVirt(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustAssignOuterVarVirt(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustAssignScanVarVirt(ExprState *state, ExprContext *econtext, bool *isnull); @@ -180,6 +187,43 @@ static pg_attribute_always_inline void ExecAggPlainTransByRef(AggState *aggstate AggStatePerGroup pergroup, ExprContext *aggcontext, int setno); +static inline void +ExecSlotDetoastDatum(TupleTableSlot *slot, int attnum) +{ + if (!slot->tts_isnull[attnum] && + VARATT_IS_EXTENDED(slot->tts_values[attnum])) + { + Datum oldDatum; + MemoryContext old = MemoryContextSwitchTo(slot->tts_mcxt); + + oldDatum = slot->tts_values[attnum]; + slot->tts_values[attnum] = PointerGetDatum(detoast_attr( + (struct varlena *) oldDatum)); + Assert(slot->tts_nvalid > attnum); + if (oldDatum != slot->tts_values[attnum]) + slot->pre_detoasted_attrs = bms_add_member(slot->pre_detoasted_attrs, attnum); + MemoryContextSwitchTo(old); + } +} + +/* JIT requires a non-static (and external?) function */ +void +ExecSlotDetoastDatumExternal(TupleTableSlot *slot, int attnum) +{ + return ExecSlotDetoastDatum(slot, attnum); +} + + +static inline void +ExecEvalToastVar(TupleTableSlot *slot, + ExprEvalStep *op, + int attnum) +{ + ExecSlotDetoastDatum(slot, attnum); + + *op->resvalue = slot->tts_values[attnum]; + *op->resnull = slot->tts_isnull[attnum]; +} /* * ScalarArrayOpExprHashEntry @@ -295,6 +339,24 @@ ExecReadyInterpretedExpr(ExprState *state) state->evalfunc_private = (void *) ExecJustScanVar; return; } + if (step0 == EEOP_INNER_FETCHSOME && + step1 == EEOP_INNER_VAR_TOAST) + { + state->evalfunc_private = (void *) ExecJustInnerVarToast; + return; + } + else if (step0 == EEOP_OUTER_FETCHSOME && + step1 == EEOP_OUTER_VAR_TOAST) + { + state->evalfunc_private = (void *) ExecJustOuterVarToast; + return; + } + else if (step0 == EEOP_SCAN_FETCHSOME && + step1 == EEOP_SCAN_VAR_TOAST) + { + state->evalfunc_private = (void *) ExecJustScanVarToast; + return; + } else if (step0 == EEOP_INNER_FETCHSOME && step1 == EEOP_ASSIGN_INNER_VAR) { @@ -330,6 +392,7 @@ ExecReadyInterpretedExpr(ExprState *state) state->evalfunc_private = (void *) ExecJustConst; return; } + /* ???? */ else if (step0 == EEOP_INNER_VAR) { state->evalfunc_private = (void *) ExecJustInnerVarVirt; @@ -345,6 +408,21 @@ ExecReadyInterpretedExpr(ExprState *state) state->evalfunc_private = (void *) ExecJustScanVarVirt; return; } + else if (step0 == EEOP_INNER_VAR_TOAST) + { + state->evalfunc_private = (void *) ExecJustInnerVarVirtToast; + return; + } + else if (step0 == EEOP_OUTER_VAR_TOAST) + { + state->evalfunc_private = (void *) ExecJustOuterVarVirtToast; + return; + } + else if (step0 == EEOP_SCAN_VAR_TOAST) + { + state->evalfunc_private = (void *) ExecJustScanVarVirtToast; + return; + } else if (step0 == EEOP_ASSIGN_INNER_VAR) { state->evalfunc_private = (void *) ExecJustAssignInnerVarVirt; @@ -412,6 +490,9 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull) &&CASE_EEOP_INNER_VAR, &&CASE_EEOP_OUTER_VAR, &&CASE_EEOP_SCAN_VAR, + &&CASE_EEOP_INNER_VAR_TOAST, + &&CASE_EEOP_OUTER_VAR_TOAST, + &&CASE_EEOP_SCAN_VAR_TOAST, &&CASE_EEOP_INNER_SYSVAR, &&CASE_EEOP_OUTER_SYSVAR, &&CASE_EEOP_SCAN_SYSVAR, @@ -595,6 +676,25 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull) Assert(attnum >= 0 && attnum < scanslot->tts_nvalid); *op->resvalue = scanslot->tts_values[attnum]; *op->resnull = scanslot->tts_isnull[attnum]; + EEO_NEXT(); + } + + EEO_CASE(EEOP_INNER_VAR_TOAST) + { + ExecEvalToastVar(innerslot, op, op->d.var.attnum); + EEO_NEXT(); + } + + EEO_CASE(EEOP_OUTER_VAR_TOAST) + { + ExecEvalToastVar(outerslot, op, op->d.var.attnum); + + EEO_NEXT(); + } + + EEO_CASE(EEOP_SCAN_VAR_TOAST) + { + ExecEvalToastVar(scanslot, op, op->d.var.attnum); EEO_NEXT(); } @@ -2126,6 +2226,42 @@ ExecJustScanVar(ExprState *state, ExprContext *econtext, bool *isnull) return ExecJustVarImpl(state, econtext->ecxt_scantuple, isnull); } +static pg_attribute_always_inline Datum +ExecJustVarImplToast(ExprState *state, TupleTableSlot *slot, bool *isnull) +{ + ExprEvalStep *op = &state->steps[1]; + int attnum = op->d.var.attnum; + + CheckOpSlotCompatibility(&state->steps[0], slot); + + slot_getattr(slot, attnum + 1, isnull); + + ExecSlotDetoastDatum(slot, attnum); + + return slot->tts_values[attnum]; +} + +/* Simple reference to inner Var */ +static Datum +ExecJustInnerVarToast(ExprState *state, ExprContext *econtext, bool *isnull) +{ + return ExecJustVarImplToast(state, econtext->ecxt_innertuple, isnull); +} + +/* Simple reference to outer Var */ +static Datum +ExecJustOuterVarToast(ExprState *state, ExprContext *econtext, bool *isnull) +{ + return ExecJustVarImplToast(state, econtext->ecxt_outertuple, isnull); +} + +/* Simple reference to scan Var */ +static Datum +ExecJustScanVarToast(ExprState *state, ExprContext *econtext, bool *isnull) +{ + return ExecJustVarImplToast(state, econtext->ecxt_scantuple, isnull); +} + /* implementation of ExecJustAssign(Inner|Outer|Scan)Var */ static pg_attribute_always_inline Datum ExecJustAssignVarImpl(ExprState *state, TupleTableSlot *inslot, bool *isnull) @@ -2264,6 +2400,51 @@ ExecJustScanVarVirt(ExprState *state, ExprContext *econtext, bool *isnull) return ExecJustVarVirtImpl(state, econtext->ecxt_scantuple, isnull); } +/* implementation of ExecJust(Inner|Outer|Scan)VarVirt */ +static pg_attribute_always_inline Datum +ExecJustVarVirtImplToast(ExprState *state, TupleTableSlot *slot, bool *isnull) +{ + ExprEvalStep *op = &state->steps[0]; + int attnum = op->d.var.attnum; + + /* + * As it is guaranteed that a virtual slot is used, there never is a need + * to perform tuple deforming (nor would it be possible). Therefore + * execExpr.c has not emitted an EEOP_*_FETCHSOME step. Verify, as much as + * possible, that that determination was accurate. + */ + Assert(TTS_IS_VIRTUAL(slot)); + Assert(TTS_FIXED(slot)); + Assert(attnum >= 0 && attnum < slot->tts_nvalid); + + *isnull = slot->tts_isnull[attnum]; + + ExecSlotDetoastDatum(slot, attnum); + + return slot->tts_values[attnum]; +} + +/* Like ExecJustInnerVar, optimized for virtual slots */ +static Datum +ExecJustInnerVarVirtToast(ExprState *state, ExprContext *econtext, bool *isnull) +{ + return ExecJustVarVirtImplToast(state, econtext->ecxt_innertuple, isnull); +} + +/* Like ExecJustOuterVar, optimized for virtual slots */ +static Datum +ExecJustOuterVarVirtToast(ExprState *state, ExprContext *econtext, bool *isnull) +{ + return ExecJustVarVirtImplToast(state, econtext->ecxt_outertuple, isnull); +} + +/* Like ExecJustScanVar, optimized for virtual slots */ +static Datum +ExecJustScanVarVirtToast(ExprState *state, ExprContext *econtext, bool *isnull) +{ + return ExecJustVarVirtImplToast(state, econtext->ecxt_scantuple, isnull); +} + /* implementation of ExecJustAssign(Inner|Outer|Scan)VarVirt */ static pg_attribute_always_inline Datum ExecJustAssignVarVirtImpl(ExprState *state, TupleTableSlot *inslot, bool *isnull) diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c index 2c2712ceac..68cc3a2f2e 100644 --- a/src/backend/executor/execTuples.c +++ b/src/backend/executor/execTuples.c @@ -79,6 +79,9 @@ static inline void tts_buffer_heap_store_tuple(TupleTableSlot *slot, bool transfer_pin); static void tts_heap_store_tuple(TupleTableSlot *slot, HeapTuple tuple, bool shouldFree); +static Bitmapset *cal_final_pre_detoast_attrs(Bitmapset *plan_pre_detoast_attrs, + TupleDesc tupleDesc, + List *not_pre_detoast_vars); const TupleTableSlotOps TTSOpsVirtual; const TupleTableSlotOps TTSOpsHeapTuple; @@ -176,6 +179,10 @@ tts_virtual_materialize(TupleTableSlot *slot) if (att->attbyval || slot->tts_isnull[natt]) continue; + if (bms_is_member(natt, slot->pre_detoasted_attrs)) + /* it has been in slot->tts_mcxt already. */ + continue; + val = slot->tts_values[natt]; if (att->attlen == -1 && @@ -394,6 +401,13 @@ tts_heap_materialize(TupleTableSlot *slot) slot->tts_flags |= TTS_FLAG_SHOULDFREE; MemoryContextSwitchTo(oldContext); + + /* + * tts_values is treated as non valid (tts_nvalid = 0), so let free the + * pre-detoast datum. + */ + ExecFreePreDetoastDatum(slot); + } static void @@ -459,6 +473,9 @@ tts_heap_store_tuple(TupleTableSlot *slot, HeapTuple tuple, bool shouldFree) if (shouldFree) slot->tts_flags |= TTS_FLAG_SHOULDFREE; + + /* slot_nvalid = 0 */ + ExecFreePreDetoastDatum(slot); } @@ -569,6 +586,12 @@ tts_minimal_materialize(TupleTableSlot *slot) mslot->minhdr.t_data = (HeapTupleHeader) ((char *) mslot->mintuple - MINIMAL_TUPLE_OFFSET); MemoryContextSwitchTo(oldContext); + + /* + * tts_values is treated as non valid (tts_nvalid = 0), free the + * pre-detoast datum. + */ + ExecFreePreDetoastDatum(slot); } static void @@ -639,6 +662,9 @@ tts_minimal_store_tuple(TupleTableSlot *slot, MinimalTuple mtup, bool shouldFree if (shouldFree) slot->tts_flags |= TTS_FLAG_SHOULDFREE; + + /* tts_nvalid = 0 */ + ExecFreePreDetoastDatum(slot); } @@ -773,6 +799,12 @@ tts_buffer_heap_materialize(TupleTableSlot *slot) slot->tts_flags |= TTS_FLAG_SHOULDFREE; MemoryContextSwitchTo(oldContext); + + /* + * tts_nvalid = 0 means tts_values will be not reliable, so clear the + * information about pre-detoast-datum. + */ + ExecFreePreDetoastDatum(slot); } static void @@ -862,6 +894,7 @@ tts_buffer_heap_store_tuple(TupleTableSlot *slot, HeapTuple tuple, { BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot; + if (TTS_SHOULDFREE(slot)) { /* materialized slot shouldn't have a buffer to release */ @@ -906,6 +939,8 @@ tts_buffer_heap_store_tuple(TupleTableSlot *slot, HeapTuple tuple, */ ReleaseBuffer(buffer); } + + ExecFreePreDetoastDatum(slot); } /* @@ -1140,6 +1175,7 @@ MakeTupleTableSlot(TupleDesc tupleDesc, slot->tts_tupleDescriptor = tupleDesc; slot->tts_mcxt = CurrentMemoryContext; slot->tts_nvalid = 0; + slot->pre_detoasted_attrs = NULL; if (tupleDesc != NULL) { @@ -1812,12 +1848,30 @@ void ExecInitScanTupleSlot(EState *estate, ScanState *scanstate, TupleDesc tupledesc, const TupleTableSlotOps *tts_ops) { + Scan *splan = (Scan *) scanstate->ps.plan; + scanstate->ss_ScanTupleSlot = ExecAllocTableSlot(&estate->es_tupleTable, tupledesc, tts_ops); scanstate->ps.scandesc = tupledesc; scanstate->ps.scanopsfixed = tupledesc != NULL; scanstate->ps.scanops = tts_ops; scanstate->ps.scanopsset = true; + + if (is_scan_plan((Plan *) splan)) + { + /* + * We may run detoast in Qual or Projection, but all of them happen at + * the ss_ScanTupleSlot rather than ps_ResultTupleSlot. So we can only + * take care of the ss_ScanTupleSlot. + * + * the ps_ResultTupleSlot may also have detoast on its parent node, + * like as a inner or outer slot in join case, the pre_detoast on + * these slot. + */ + scanstate->scan_pre_detoast_attrs = cal_final_pre_detoast_attrs(splan->reference_attrs, + tupledesc, + splan->plan.forbid_pre_detoast_vars); + } } /* ---------------- @@ -2338,3 +2392,79 @@ end_tup_output(TupOutputState *tstate) ExecDropSingleTupleTableSlot(tstate->slot); pfree(tstate); } + +static Bitmapset * +cal_final_pre_detoast_attrs(Bitmapset *plan_pre_detoast_attrs, + TupleDesc tupleDesc, + List *not_pre_detoast_vars) +{ + Bitmapset *final = NULL, + *toast_attrs = NULL, + *forbid_pre_detoast_attrs = NULL; + + int i; + ListCell *lc; + + if (bms_is_empty(plan_pre_detoast_attrs)) + return NULL; + + /* + * there is no exact data type in create_plan or set_plan_refs stage, so + * plan_pre_detoast_attrs may have some attribute which is not toast attrs + * at all, which should be removed. + */ + for (i = 0; i < tupleDesc->natts; i++) + { + Form_pg_attribute attr = TupleDescAttr(tupleDesc, i); + + if (attr->attlen == -1 && attr->attstorage != TYPSTORAGE_PLAIN) + toast_attrs = bms_add_member(toast_attrs, attr->attnum - 1); + } + + final = bms_intersect(plan_pre_detoast_attrs, toast_attrs); + + /* + * Due to the fact of detoast-datum will make the tuple bigger which is + * bad for some nodes like Sort/Hash, to avoid performance regression, + * such attribute should be removed as well. + */ + foreach(lc, not_pre_detoast_vars) + { + Var *var = lfirst_node(Var, lc); + + forbid_pre_detoast_attrs = bms_add_member(forbid_pre_detoast_attrs, var->varattno - 1); + } + + final = bms_del_members(final, forbid_pre_detoast_attrs); + + bms_free(toast_attrs); + bms_free(forbid_pre_detoast_attrs); + + return final; +} + + +/* Input slot, result slot. scan slot? */ +void +SetPredetoastAttrsForScan(ScanState *scanstate) +{ +} + +void +SetPredetoastAttrsForJoin(JoinState *j) +{ + PlanState *outerstate = outerPlanState(j); + PlanState *innerstate = innerPlanState(j); + + /* Input slot, result slot. scan slot? */ + + j->outer_pre_detoast_attrs = cal_final_pre_detoast_attrs( + ((Join *) j->ps.plan)->outer_reference_attrs, + outerstate->ps_ResultTupleDesc, + outerstate->plan->forbid_pre_detoast_vars); + + j->inner_pre_detoast_attrs = cal_final_pre_detoast_attrs( + ((Join *) j->ps.plan)->inner_reference_attrs, + innerstate->ps_ResultTupleDesc, + innerstate->plan->forbid_pre_detoast_vars); +} diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 16704c0c2f..3817979b37 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -572,6 +572,11 @@ ExecConditionalAssignProjectionInfo(PlanState *planstate, TupleDesc inputDesc, planstate->resultopsset = planstate->scanopsset; planstate->resultopsfixed = planstate->scanopsfixed; planstate->resultops = planstate->scanops; + + /* + * XXX: can I make sure the ps_ResultTupleDesc is set all the time? + */ + Assert(planstate->ps_ResultTupleDesc != NULL); } else { diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index 25a2d78f15..e3801d3b20 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -756,6 +756,8 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) innerPlanState(hjstate) = ExecInitNode((Plan *) hashNode, estate, eflags); innerDesc = ExecGetResultType(innerPlanState(hjstate)); + SetPredetoastAttrsForJoin((JoinState *) hjstate); + /* * Initialize result slot, type and projection. */ diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c index 3cdab77dfc..648ab6903d 100644 --- a/src/backend/executor/nodeMergejoin.c +++ b/src/backend/executor/nodeMergejoin.c @@ -1497,6 +1497,8 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags) (eflags | EXEC_FLAG_MARK)); innerDesc = ExecGetResultType(innerPlanState(mergestate)); + SetPredetoastAttrsForJoin((JoinState *) mergestate); + /* * For certain types of inner child nodes, it is advantageous to issue * MARK every time we advance past an inner tuple we will never return to. diff --git a/src/backend/executor/nodeNestloop.c b/src/backend/executor/nodeNestloop.c index ebd1406843..c869a516ec 100644 --- a/src/backend/executor/nodeNestloop.c +++ b/src/backend/executor/nodeNestloop.c @@ -306,6 +306,7 @@ ExecInitNestLoop(NestLoop *node, EState *estate, int eflags) */ ExecInitResultTupleSlotTL(&nlstate->js.ps, &TTSOpsVirtual); ExecAssignProjectionInfo(&nlstate->js.ps, NULL); + SetPredetoastAttrsForJoin((JoinState *) nlstate); /* * initialize child expressions diff --git a/src/backend/jit/llvm/llvmjit_expr.c b/src/backend/jit/llvm/llvmjit_expr.c index a3a0876bff..2fcc8dad36 100644 --- a/src/backend/jit/llvm/llvmjit_expr.c +++ b/src/backend/jit/llvm/llvmjit_expr.c @@ -396,30 +396,52 @@ llvm_compile_expr(ExprState *state) case EEOP_INNER_VAR: case EEOP_OUTER_VAR: case EEOP_SCAN_VAR: + case EEOP_INNER_VAR_TOAST: + case EEOP_OUTER_VAR_TOAST: + case EEOP_SCAN_VAR_TOAST: { LLVMValueRef value, isnull; LLVMValueRef v_attnum; LLVMValueRef v_values; LLVMValueRef v_nulls; + LLVMValueRef v_slot; if (opcode == EEOP_INNER_VAR) { + v_slot = v_innerslot; v_values = v_innervalues; v_nulls = v_innernulls; } else if (opcode == EEOP_OUTER_VAR) { + v_slot = v_outerslot; v_values = v_outervalues; v_nulls = v_outernulls; } else { + v_slot = v_scanslot; v_values = v_scanvalues; v_nulls = v_scannulls; } v_attnum = l_int32_const(lc, op->d.var.attnum); + + if (opcode == EEOP_INNER_VAR_TOAST || + opcode == EEOP_OUTER_VAR_TOAST || + opcode == EEOP_SCAN_VAR_TOAST) + { + LLVMValueRef params[2]; + + params[0] = v_slot; + params[1] = l_int32_const(lc, op->d.var.attnum); + l_call(b, + llvm_pg_var_func_type("ExecSlotDetoastDatumExternal"), + llvm_pg_func(mod, "ExecSlotDetoastDatumExternal"), + params, lengthof(params), ""); + } + value = l_load_gep1(b, TypeSizeT, v_values, v_attnum, ""); isnull = l_load_gep1(b, TypeStorageBool, v_nulls, v_attnum, ""); LLVMBuildStore(b, value, v_resvaluep); diff --git a/src/backend/jit/llvm/llvmjit_types.c b/src/backend/jit/llvm/llvmjit_types.c index 791902ff1f..7aee794100 100644 --- a/src/backend/jit/llvm/llvmjit_types.c +++ b/src/backend/jit/llvm/llvmjit_types.c @@ -177,4 +177,5 @@ void *referenced_functions[] = strlen, varsize_any, ExecInterpExprStillValid, + ExecSlotDetoastDatumExternal, }; diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c index 704879f566..d81f8afc33 100644 --- a/src/backend/nodes/bitmapset.c +++ b/src/backend/nodes/bitmapset.c @@ -743,6 +743,19 @@ bms_membership(const Bitmapset *a) * foo = bms_add_member(foo, x); */ +/* + * does this break commit 00b41463c21615f9bf3927f207e37f9e215d32e6? + * but I just found alloc memory and free the memory is too bad + * for this current feature. So let see ...; + */ +void +bms_zero(Bitmapset *a) +{ + if (a == NULL) + return; + + memset(a->words, 0, a->nwords * sizeof(bitmapword)); +} /* * bms_add_member - add a specified member to set diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 34ca6d4ac2..4d0e1b8eb1 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -42,6 +42,7 @@ #include "partitioning/partprune.h" #include "utils/lsyscache.h" +extern bool jit_enabled; /* * Flag bits that can appear in the flags argument of create_plan_recurse(). @@ -314,7 +315,8 @@ static ModifyTable *make_modifytable(PlannerInfo *root, Plan *subplan, List *mergeActionLists, int epqParam); static GatherMerge *create_gather_merge_plan(PlannerInfo *root, GatherMergePath *best_path); - +static void set_plan_not_detoast_attrs_recurse(Plan *plan, + List *recheck_list); /* * create_plan @@ -346,6 +348,8 @@ create_plan(PlannerInfo *root, Path *best_path) /* Recursively process the path tree, demanding the correct tlist result */ plan = create_plan_recurse(root, best_path, CP_EXACT_TLIST); + set_plan_not_detoast_attrs_recurse(plan, NIL); + /* * Make sure the topmost plan node's targetlist exposes the original * column names and other decorative info. Targetlists generated within @@ -378,6 +382,68 @@ create_plan(PlannerInfo *root, Path *best_path) return plan; } +/* + * set_plan_not_pre_detoast_vars + * + * set the toast_attrs according recheck_list. + * + * recheck_list = NIL means we need to do thing. + */ +static void +set_plan_not_pre_detoast_vars(Plan *plan, List *recheck_list) +{ + ListCell *lc; + Var *var; + + if (recheck_list == NIL) + return; + + foreach(lc, plan->targetlist) + { + TargetEntry *te = lfirst_node(TargetEntry, lc); + + if (!IsA(te->expr, Var)) + continue; + var = castNode(Var, te->expr); + if (var->varattno <= 0) + continue; + if (list_member(recheck_list, var)) + /* pass the recheck */ + plan->forbid_pre_detoast_vars = lappend(plan->forbid_pre_detoast_vars, var); + } +} + + +static void +set_plan_not_detoast_attrs_recurse(Plan *plan, List *recheck_list) +{ + if (plan == NULL) + return; + + set_plan_not_pre_detoast_vars(plan, recheck_list); + + if (IsA(plan, Sort) || IsA(plan, Memoize) || IsA(plan, WindowAgg) || + IsA(plan, Hash) || IsA(plan, Material) || IsA(plan, IncrementalSort)) + { + List *subplan_exprs = get_tlist_exprs(plan->lefttree->targetlist, true); + + set_plan_not_pre_detoast_vars(plan, subplan_exprs); + set_plan_not_detoast_attrs_recurse(plan->lefttree, subplan_exprs); + } + else if (IsA(plan, HashJoin) && castNode(HashJoin, plan)->left_small_tlist) + { + List *subplan_exprs = get_tlist_exprs(plan->lefttree->targetlist, true); + + set_plan_not_detoast_attrs_recurse(plan->lefttree, subplan_exprs); + set_plan_not_detoast_attrs_recurse(plan->righttree, plan->forbid_pre_detoast_vars); + } + else + { + set_plan_not_detoast_attrs_recurse(plan->lefttree, plan->forbid_pre_detoast_vars); + set_plan_not_detoast_attrs_recurse(plan->righttree, plan->forbid_pre_detoast_vars); + } +} + /* * create_plan_recurse * Recursive guts of create_plan(). @@ -4884,6 +4950,11 @@ create_hashjoin_plan(PlannerInfo *root, copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); + join_plan->left_small_tlist = (best_path->num_batches > 1); + + if (!jit_enabled) + elog(INFO, "num_batches = %d", best_path->num_batches); + return join_plan; } diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 4bb68ac90e..df8da328a0 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -27,6 +27,7 @@ #include "optimizer/tlist.h" #include "parser/parse_relation.h" #include "tcop/utility.h" +#include "utils/fmgroids.h" #include "utils/lsyscache.h" #include "utils/syscache.h" @@ -55,11 +56,27 @@ typedef struct tlist_vinfo vars[FLEXIBLE_ARRAY_MEMBER]; /* has num_vars entries */ } indexed_tlist; +typedef struct +{ + /* var is added into existing_attrs for the first time. */ + Bitmapset *existing_attrs; + /* following add to the final_ref_attrs. */ + Bitmapset **final_ref_attrs; +} intermediate_var_ref_context; + +typedef struct +{ + int level_added; + int level; +} intermediate_level_context; + typedef struct { PlannerInfo *root; int rtoffset; double num_exec; + intermediate_level_context level_ctx; + intermediate_var_ref_context scan_reference_attrs; } fix_scan_expr_context; typedef struct @@ -71,6 +88,9 @@ typedef struct int rtoffset; NullingRelsMatch nrm_match; double num_exec; + intermediate_level_context level_ctx; + intermediate_var_ref_context outer_reference_attrs; + intermediate_var_ref_context inner_reference_attrs; } fix_join_expr_context; typedef struct @@ -127,8 +147,8 @@ typedef struct (((con)->consttype == REGCLASSOID || (con)->consttype == OIDOID) && \ !(con)->constisnull) -#define fix_scan_list(root, lst, rtoffset, num_exec) \ - ((List *) fix_scan_expr(root, (Node *) (lst), rtoffset, num_exec)) +#define fix_scan_list(root, lst, rtoffset, num_exec, pre_detoast_attrs) \ + ((List *) fix_scan_expr(root, (Node *) (lst), rtoffset, num_exec, pre_detoast_attrs)) static void add_rtes_to_flat_rtable(PlannerInfo *root, bool recursing); static void flatten_unplanned_rtes(PlannerGlobal *glob, RangeTblEntry *rte); @@ -158,7 +178,8 @@ static Plan *set_mergeappend_references(PlannerInfo *root, static void set_hash_references(PlannerInfo *root, Plan *plan, int rtoffset); static Relids offset_relid_set(Relids relids, int rtoffset); static Node *fix_scan_expr(PlannerInfo *root, Node *node, - int rtoffset, double num_exec); + int rtoffset, double num_exec, + Bitmapset **scan_reference_attrs); static Node *fix_scan_expr_mutator(Node *node, fix_scan_expr_context *context); static bool fix_scan_expr_walker(Node *node, fix_scan_expr_context *context); static void set_join_references(PlannerInfo *root, Join *join, int rtoffset); @@ -190,7 +211,10 @@ static List *fix_join_expr(PlannerInfo *root, Index acceptable_rel, int rtoffset, NullingRelsMatch nrm_match, - double num_exec); + double num_exec, + Bitmapset **outer_reference_attrs, + Bitmapset **inner_reference_attrs); + static Node *fix_join_expr_mutator(Node *node, fix_join_expr_context *context); static Node *fix_upper_expr(PlannerInfo *root, @@ -628,10 +652,16 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->scan.scanrelid += rtoffset; splan->scan.plan.targetlist = fix_scan_list(root, splan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST(plan)); + rtoffset, NUM_EXEC_TLIST(plan), + &splan->scan.reference_attrs); splan->scan.plan.qual = fix_scan_list(root, splan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); + + splan->scan.plan.forbid_pre_detoast_vars = + fix_scan_list(root, splan->scan.plan.forbid_pre_detoast_vars, + rtoffset, NUM_EXEC_TLIST(plan), NULL); } break; case T_SampleScan: @@ -641,13 +671,20 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->scan.scanrelid += rtoffset; splan->scan.plan.targetlist = fix_scan_list(root, splan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST(plan)); + rtoffset, NUM_EXEC_TLIST(plan), + &splan->scan.reference_attrs + ); splan->scan.plan.qual = fix_scan_list(root, splan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); splan->tablesample = (TableSampleClause *) fix_scan_expr(root, (Node *) splan->tablesample, - rtoffset, 1); + rtoffset, 1, + &splan->scan.reference_attrs); + splan->scan.plan.forbid_pre_detoast_vars = + fix_scan_list(root, splan->scan.plan.forbid_pre_detoast_vars, + rtoffset, NUM_EXEC_TLIST(plan), NULL); } break; case T_IndexScan: @@ -657,28 +694,40 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->scan.scanrelid += rtoffset; splan->scan.plan.targetlist = fix_scan_list(root, splan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST(plan)); + rtoffset, NUM_EXEC_TLIST(plan), + &splan->scan.reference_attrs); + splan->scan.plan.qual = fix_scan_list(root, splan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); + splan->indexqual = fix_scan_list(root, splan->indexqual, - rtoffset, 1); + rtoffset, 1, &splan->scan.reference_attrs); splan->indexqualorig = fix_scan_list(root, splan->indexqualorig, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); splan->indexorderby = fix_scan_list(root, splan->indexorderby, - rtoffset, 1); + rtoffset, 1, &splan->scan.reference_attrs); splan->indexorderbyorig = fix_scan_list(root, splan->indexorderbyorig, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), &splan->scan.reference_attrs); + splan->scan.plan.forbid_pre_detoast_vars = + fix_scan_list(root, splan->scan.plan.forbid_pre_detoast_vars, + rtoffset, NUM_EXEC_TLIST(plan), NULL); } break; case T_IndexOnlyScan: { IndexOnlyScan *splan = (IndexOnlyScan *) plan; + splan->scan.plan.forbid_pre_detoast_vars = + fix_scan_list(root, splan->scan.plan.forbid_pre_detoast_vars, + rtoffset, NUM_EXEC_TLIST(plan), NULL); + return set_indexonlyscan_references(root, splan, rtoffset); } break; @@ -691,10 +740,15 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) Assert(splan->scan.plan.targetlist == NIL); Assert(splan->scan.plan.qual == NIL); splan->indexqual = - fix_scan_list(root, splan->indexqual, rtoffset, 1); + fix_scan_list(root, splan->indexqual, rtoffset, 1, + &splan->scan.reference_attrs); splan->indexqualorig = fix_scan_list(root, splan->indexqualorig, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); + splan->scan.plan.forbid_pre_detoast_vars = + fix_scan_list(root, splan->scan.plan.forbid_pre_detoast_vars, + rtoffset, NUM_EXEC_TLIST(plan), NULL); } break; case T_BitmapHeapScan: @@ -704,13 +758,20 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->scan.scanrelid += rtoffset; splan->scan.plan.targetlist = fix_scan_list(root, splan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST(plan)); + rtoffset, NUM_EXEC_TLIST(plan), + &splan->scan.reference_attrs); splan->scan.plan.qual = fix_scan_list(root, splan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); splan->bitmapqualorig = fix_scan_list(root, splan->bitmapqualorig, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); + splan->scan.plan.forbid_pre_detoast_vars = + fix_scan_list(root, splan->scan.plan.forbid_pre_detoast_vars, + rtoffset, NUM_EXEC_TLIST(plan), + NULL); } break; case T_TidScan: @@ -720,13 +781,20 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->scan.scanrelid += rtoffset; splan->scan.plan.targetlist = fix_scan_list(root, splan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST(plan)); + rtoffset, NUM_EXEC_TLIST(plan), + &splan->scan.reference_attrs); splan->scan.plan.qual = fix_scan_list(root, splan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); splan->tidquals = fix_scan_list(root, splan->tidquals, - rtoffset, 1); + rtoffset, 1, + &splan->scan.reference_attrs); + splan->scan.plan.forbid_pre_detoast_vars = + fix_scan_list(root, splan->scan.plan.forbid_pre_detoast_vars, + rtoffset, NUM_EXEC_TLIST(plan), + NULL); } break; case T_TidRangeScan: @@ -736,17 +804,25 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->scan.scanrelid += rtoffset; splan->scan.plan.targetlist = fix_scan_list(root, splan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST(plan)); + rtoffset, NUM_EXEC_TLIST(plan), + &splan->scan.reference_attrs); splan->scan.plan.qual = fix_scan_list(root, splan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); splan->tidrangequals = fix_scan_list(root, splan->tidrangequals, - rtoffset, 1); + rtoffset, 1, + &splan->scan.reference_attrs); + splan->scan.plan.forbid_pre_detoast_vars = + fix_scan_list(root, splan->scan.plan.forbid_pre_detoast_vars, + rtoffset, NUM_EXEC_TLIST(plan), + NULL); } break; case T_SubqueryScan: /* Needs special treatment, see comments below */ + /* XXX: shall I do anything? */ return set_subqueryscan_references(root, (SubqueryScan *) plan, rtoffset); @@ -757,12 +833,16 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->scan.scanrelid += rtoffset; splan->scan.plan.targetlist = fix_scan_list(root, splan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST(plan)); + rtoffset, NUM_EXEC_TLIST(plan), + &splan->scan.reference_attrs); splan->scan.plan.qual = fix_scan_list(root, splan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); splan->functions = - fix_scan_list(root, splan->functions, rtoffset, 1); + fix_scan_list(root, splan->functions, rtoffset, 1, + &splan->scan.reference_attrs); + } break; case T_TableFuncScan: @@ -772,13 +852,17 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->scan.scanrelid += rtoffset; splan->scan.plan.targetlist = fix_scan_list(root, splan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST(plan)); + rtoffset, NUM_EXEC_TLIST(plan), + &splan->scan.reference_attrs); splan->scan.plan.qual = fix_scan_list(root, splan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); + splan->tablefunc = (TableFunc *) fix_scan_expr(root, (Node *) splan->tablefunc, - rtoffset, 1); + rtoffset, 1, + &splan->scan.reference_attrs); } break; case T_ValuesScan: @@ -788,13 +872,16 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->scan.scanrelid += rtoffset; splan->scan.plan.targetlist = fix_scan_list(root, splan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST(plan)); + rtoffset, NUM_EXEC_TLIST(plan), + &splan->scan.reference_attrs); splan->scan.plan.qual = fix_scan_list(root, splan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); splan->values_lists = fix_scan_list(root, splan->values_lists, - rtoffset, 1); + rtoffset, 1, + &splan->scan.reference_attrs); } break; case T_CteScan: @@ -804,10 +891,16 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->scan.scanrelid += rtoffset; splan->scan.plan.targetlist = fix_scan_list(root, splan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST(plan)); + rtoffset, NUM_EXEC_TLIST(plan), + &splan->scan.reference_attrs); splan->scan.plan.qual = fix_scan_list(root, splan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); + splan->scan.plan.forbid_pre_detoast_vars = + fix_scan_list(root, splan->scan.plan.forbid_pre_detoast_vars, + rtoffset, NUM_EXEC_TLIST(plan), + NULL); } break; case T_NamedTuplestoreScan: @@ -817,10 +910,12 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->scan.scanrelid += rtoffset; splan->scan.plan.targetlist = fix_scan_list(root, splan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST(plan)); + rtoffset, NUM_EXEC_TLIST(plan), + &splan->scan.reference_attrs); splan->scan.plan.qual = fix_scan_list(root, splan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); } break; case T_WorkTableScan: @@ -830,10 +925,12 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->scan.scanrelid += rtoffset; splan->scan.plan.targetlist = fix_scan_list(root, splan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST(plan)); + rtoffset, NUM_EXEC_TLIST(plan), + &splan->scan.reference_attrs); splan->scan.plan.qual = fix_scan_list(root, splan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), + &splan->scan.reference_attrs); } break; case T_ForeignScan: @@ -873,7 +970,8 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) mplan->param_exprs = fix_scan_list(root, mplan->param_exprs, rtoffset, - NUM_EXEC_TLIST(plan)); + NUM_EXEC_TLIST(plan), + NULL); break; } @@ -933,9 +1031,9 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) Assert(splan->plan.qual == NIL); splan->limitOffset = - fix_scan_expr(root, splan->limitOffset, rtoffset, 1); + fix_scan_expr(root, splan->limitOffset, rtoffset, 1, NULL); splan->limitCount = - fix_scan_expr(root, splan->limitCount, rtoffset, 1); + fix_scan_expr(root, splan->limitCount, rtoffset, 1, NULL); } break; case T_Agg: @@ -988,17 +1086,17 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) * variable refs, so fix_scan_expr works for them. */ wplan->startOffset = - fix_scan_expr(root, wplan->startOffset, rtoffset, 1); + fix_scan_expr(root, wplan->startOffset, rtoffset, 1, NULL); wplan->endOffset = - fix_scan_expr(root, wplan->endOffset, rtoffset, 1); + fix_scan_expr(root, wplan->endOffset, rtoffset, 1, NULL); wplan->runCondition = fix_scan_list(root, wplan->runCondition, rtoffset, - NUM_EXEC_TLIST(plan)); + NUM_EXEC_TLIST(plan), NULL); wplan->runConditionOrig = fix_scan_list(root, wplan->runConditionOrig, rtoffset, - NUM_EXEC_TLIST(plan)); + NUM_EXEC_TLIST(plan), NULL); } break; case T_Result: @@ -1038,14 +1136,14 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->plan.targetlist = fix_scan_list(root, splan->plan.targetlist, - rtoffset, NUM_EXEC_TLIST(plan)); + rtoffset, NUM_EXEC_TLIST(plan), NULL); splan->plan.qual = fix_scan_list(root, splan->plan.qual, - rtoffset, NUM_EXEC_QUAL(plan)); + rtoffset, NUM_EXEC_QUAL(plan), NULL); } /* resconstantqual can't contain any subplan variable refs */ splan->resconstantqual = - fix_scan_expr(root, splan->resconstantqual, rtoffset, 1); + fix_scan_expr(root, splan->resconstantqual, rtoffset, 1, NULL); } break; case T_ProjectSet: @@ -1061,7 +1159,7 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->withCheckOptionLists = fix_scan_list(root, splan->withCheckOptionLists, - rtoffset, 1); + rtoffset, 1, NULL); if (splan->returningLists) { @@ -1118,18 +1216,20 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) fix_join_expr(root, splan->onConflictSet, NULL, itlist, linitial_int(splan->resultRelations), - rtoffset, NRM_EQUAL, NUM_EXEC_QUAL(plan)); + rtoffset, NRM_EQUAL, NUM_EXEC_QUAL(plan), + NULL, NULL); splan->onConflictWhere = (Node *) fix_join_expr(root, (List *) splan->onConflictWhere, NULL, itlist, linitial_int(splan->resultRelations), - rtoffset, NRM_EQUAL, NUM_EXEC_QUAL(plan)); + rtoffset, NRM_EQUAL, NUM_EXEC_QUAL(plan), + NULL, NULL); pfree(itlist); splan->exclRelTlist = - fix_scan_list(root, splan->exclRelTlist, rtoffset, 1); + fix_scan_list(root, splan->exclRelTlist, rtoffset, 1, NULL); } /* @@ -1182,7 +1282,8 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) resultrel, rtoffset, NRM_EQUAL, - NUM_EXEC_TLIST(plan)); + NUM_EXEC_TLIST(plan), + NULL, NULL); /* Fix quals too. */ action->qual = (Node *) fix_join_expr(root, @@ -1191,7 +1292,8 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) resultrel, rtoffset, NRM_EQUAL, - NUM_EXEC_QUAL(plan)); + NUM_EXEC_QUAL(plan), + NULL, NULL); } } } @@ -1356,13 +1458,16 @@ set_indexonlyscan_references(PlannerInfo *root, NUM_EXEC_QUAL((Plan *) plan)); /* indexqual is already transformed to reference index columns */ plan->indexqual = fix_scan_list(root, plan->indexqual, - rtoffset, 1); + rtoffset, 1, + &plan->scan.reference_attrs); /* indexorderby is already transformed to reference index columns */ plan->indexorderby = fix_scan_list(root, plan->indexorderby, - rtoffset, 1); + rtoffset, 1, + &plan->scan.reference_attrs); /* indextlist must NOT be transformed to reference index columns */ plan->indextlist = fix_scan_list(root, plan->indextlist, - rtoffset, NUM_EXEC_TLIST((Plan *) plan)); + rtoffset, NUM_EXEC_TLIST((Plan *) plan), + &plan->scan.reference_attrs); pfree(index_itlist); @@ -1409,10 +1514,10 @@ set_subqueryscan_references(PlannerInfo *root, plan->scan.scanrelid += rtoffset; plan->scan.plan.targetlist = fix_scan_list(root, plan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST((Plan *) plan)); + rtoffset, NUM_EXEC_TLIST((Plan *) plan), NULL); plan->scan.plan.qual = fix_scan_list(root, plan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL((Plan *) plan)); + rtoffset, NUM_EXEC_QUAL((Plan *) plan), NULL); result = (Plan *) plan; } @@ -1612,7 +1717,7 @@ set_foreignscan_references(PlannerInfo *root, /* fdw_scan_tlist itself just needs fix_scan_list() adjustments */ fscan->fdw_scan_tlist = fix_scan_list(root, fscan->fdw_scan_tlist, - rtoffset, NUM_EXEC_TLIST((Plan *) fscan)); + rtoffset, NUM_EXEC_TLIST((Plan *) fscan), NULL); } else { @@ -1622,16 +1727,16 @@ set_foreignscan_references(PlannerInfo *root, */ fscan->scan.plan.targetlist = fix_scan_list(root, fscan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST((Plan *) fscan)); + rtoffset, NUM_EXEC_TLIST((Plan *) fscan), NULL); fscan->scan.plan.qual = fix_scan_list(root, fscan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL((Plan *) fscan)); + rtoffset, NUM_EXEC_QUAL((Plan *) fscan), NULL); fscan->fdw_exprs = fix_scan_list(root, fscan->fdw_exprs, - rtoffset, NUM_EXEC_QUAL((Plan *) fscan)); + rtoffset, NUM_EXEC_QUAL((Plan *) fscan), NULL); fscan->fdw_recheck_quals = fix_scan_list(root, fscan->fdw_recheck_quals, - rtoffset, NUM_EXEC_QUAL((Plan *) fscan)); + rtoffset, NUM_EXEC_QUAL((Plan *) fscan), NULL); } fscan->fs_relids = offset_relid_set(fscan->fs_relids, rtoffset); @@ -1690,20 +1795,20 @@ set_customscan_references(PlannerInfo *root, /* custom_scan_tlist itself just needs fix_scan_list() adjustments */ cscan->custom_scan_tlist = fix_scan_list(root, cscan->custom_scan_tlist, - rtoffset, NUM_EXEC_TLIST((Plan *) cscan)); + rtoffset, NUM_EXEC_TLIST((Plan *) cscan), NULL); } else { /* Adjust tlist, qual, custom_exprs in the standard way */ cscan->scan.plan.targetlist = fix_scan_list(root, cscan->scan.plan.targetlist, - rtoffset, NUM_EXEC_TLIST((Plan *) cscan)); + rtoffset, NUM_EXEC_TLIST((Plan *) cscan), NULL); cscan->scan.plan.qual = fix_scan_list(root, cscan->scan.plan.qual, - rtoffset, NUM_EXEC_QUAL((Plan *) cscan)); + rtoffset, NUM_EXEC_QUAL((Plan *) cscan), NULL); cscan->custom_exprs = fix_scan_list(root, cscan->custom_exprs, - rtoffset, NUM_EXEC_QUAL((Plan *) cscan)); + rtoffset, NUM_EXEC_QUAL((Plan *) cscan), NULL); } /* Adjust child plan-nodes recursively, if needed */ @@ -2111,6 +2216,95 @@ fix_alternative_subplan(PlannerInfo *root, AlternativeSubPlan *asplan, return (Node *) bestplan; } + +static inline void +setup_intermediate_level_ctx(intermediate_level_context * ctx) +{ + ctx->level = 0; + ctx->level_added = false; +} + +static inline void +setup_intermediate_var_ref_ctx(intermediate_var_ref_context * ctx, Bitmapset **final_ref_attrs) +{ + ctx->existing_attrs = NULL; + ctx->final_ref_attrs = final_ref_attrs; +} + +/* + * increase_level_for_pre_detoast + * Check if the given Expr could detoast a Var directly, if yes, + * increase the level and return true. otherwise return false; + */ +static inline void +increase_level_for_pre_detoast(Node *node, intermediate_level_context * ctx) +{ + /* The following nodes is impossible to detoast a Var directly. */ + if (IsA(node, List) || IsA(node, TargetEntry) || IsA(node, NullTest)) + { + ctx->level_added = false; + } + else if (IsA(node, FuncExpr) && castNode(FuncExpr, node)->funcid == F_PG_COLUMN_COMPRESSION) + { + /* let's not detoast first so that pg_column_compression works. */ + ctx->level_added = false; + } + else + { + ctx->level_added = true; + ctx->level += 1; + } +} + +static inline void +decreased_level_for_pre_detoast(intermediate_level_context * ctx) +{ + if (ctx->level_added) + ctx->level -= 1; + + ctx->level_added = false; +} + +/* + * add_pre_detoast_vars + * add the var's information into pre_detoast_attrs when the check is pass. + */ +static inline void +add_pre_detoast_vars(intermediate_level_context * level_ctx, + intermediate_var_ref_context * ctx, + Var *var) +{ + int attno; + + if (level_ctx->level <= 1 || ctx->final_ref_attrs == NULL || var->varattno <= 0) + return; + + attno = var->varattno - 1; + if (bms_is_member(attno, ctx->existing_attrs)) + { + /* not the first time to access it, add it to final result. */ + *ctx->final_ref_attrs = bms_add_member(*ctx->final_ref_attrs, attno); + } + else + { + /* first time. */ + ctx->existing_attrs = bms_add_member(ctx->existing_attrs, attno); + + /* + * XXX: + * + * The above strategy doesn't help to detect if a Var is detoast + * twice. Reasons are: 1. the context is not maintain in Plan node + * level. so if it is detoast at targetlist and qual, we can't detect + * it. 2. even we can make it at plan node, it still doesn't help for + * the among-nodes case. + * + * So for now, I just disable it. + */ + *ctx->final_ref_attrs = bms_add_member(*ctx->final_ref_attrs, attno); + } +} + /* * fix_scan_expr * Do set_plan_references processing on a scan-level expression @@ -2130,13 +2324,16 @@ fix_alternative_subplan(PlannerInfo *root, AlternativeSubPlan *asplan, * if that seems safe. */ static Node * -fix_scan_expr(PlannerInfo *root, Node *node, int rtoffset, double num_exec) +fix_scan_expr(PlannerInfo *root, Node *node, int rtoffset, + double num_exec, Bitmapset **scan_reference_attrs) { fix_scan_expr_context context; context.root = root; context.rtoffset = rtoffset; context.num_exec = num_exec; + setup_intermediate_level_ctx(&context.level_ctx); + setup_intermediate_var_ref_ctx(&context.scan_reference_attrs, scan_reference_attrs); if (rtoffset != 0 || root->multiexpr_params != NIL || @@ -2167,8 +2364,13 @@ fix_scan_expr(PlannerInfo *root, Node *node, int rtoffset, double num_exec) static Node * fix_scan_expr_mutator(Node *node, fix_scan_expr_context *context) { + Node *n; + if (node == NULL) return NULL; + + increase_level_for_pre_detoast(node, &context->level_ctx); + if (IsA(node, Var)) { Var *var = copyVar((Var *) node); @@ -2186,10 +2388,18 @@ fix_scan_expr_mutator(Node *node, fix_scan_expr_context *context) var->varno += context->rtoffset; if (var->varnosyn > 0) var->varnosyn += context->rtoffset; + + add_pre_detoast_vars(&context->level_ctx, &context->scan_reference_attrs, var); + decreased_level_for_pre_detoast(&context->level_ctx); return (Node *) var; } if (IsA(node, Param)) - return fix_param_node(context->root, (Param *) node); + { + Node *n = fix_param_node(context->root, (Param *) node); + + decreased_level_for_pre_detoast(&context->level_ctx); + return n; + } if (IsA(node, Aggref)) { Aggref *aggref = (Aggref *) node; @@ -2200,7 +2410,10 @@ fix_scan_expr_mutator(Node *node, fix_scan_expr_context *context) if (aggparam != NULL) { /* Make a copy of the Param for paranoia's sake */ - return (Node *) copyObject(aggparam); + Node *n = (Node *) copyObject(aggparam); + + decreased_level_for_pre_detoast(&context->level_ctx); + return n; } /* If no match, just fall through to process it normally */ } @@ -2210,6 +2423,7 @@ fix_scan_expr_mutator(Node *node, fix_scan_expr_context *context) Assert(!IS_SPECIAL_VARNO(cexpr->cvarno)); cexpr->cvarno += context->rtoffset; + decreased_level_for_pre_detoast(&context->level_ctx); return (Node *) cexpr; } if (IsA(node, PlaceHolderVar)) @@ -2218,29 +2432,52 @@ fix_scan_expr_mutator(Node *node, fix_scan_expr_context *context) PlaceHolderVar *phv = (PlaceHolderVar *) node; /* XXX can we assert something about phnullingrels? */ - return fix_scan_expr_mutator((Node *) phv->phexpr, context); + Node *n = fix_scan_expr_mutator((Node *) phv->phexpr, context); + + decreased_level_for_pre_detoast(&context->level_ctx); + return n; } if (IsA(node, AlternativeSubPlan)) - return fix_scan_expr_mutator(fix_alternative_subplan(context->root, - (AlternativeSubPlan *) node, - context->num_exec), - context); + { + Node *n = fix_scan_expr_mutator(fix_alternative_subplan(context->root, + (AlternativeSubPlan *) node, + context->num_exec), + context); + + decreased_level_for_pre_detoast(&context->level_ctx); + return n; + } fix_expr_common(context->root, node); - return expression_tree_mutator(node, fix_scan_expr_mutator, - (void *) context); + n = expression_tree_mutator(node, fix_scan_expr_mutator, (void *) context); + decreased_level_for_pre_detoast(&context->level_ctx); + return n; } static bool fix_scan_expr_walker(Node *node, fix_scan_expr_context *context) { + bool ret; + if (node == NULL) return false; + + increase_level_for_pre_detoast(node, &context->level_ctx); + + if (IsA(node, Var)) + { + add_pre_detoast_vars(&context->level_ctx, + &context->scan_reference_attrs, + castNode(Var, node)); + } Assert(!(IsA(node, Var) && ((Var *) node)->varno == ROWID_VAR)); Assert(!IsA(node, PlaceHolderVar)); Assert(!IsA(node, AlternativeSubPlan)); fix_expr_common(context->root, node); - return expression_tree_walker(node, fix_scan_expr_walker, - (void *) context); + ret = expression_tree_walker(node, fix_scan_expr_walker, + (void *) context); + + decreased_level_for_pre_detoast(&context->level_ctx); + return ret; } /* @@ -2276,7 +2513,10 @@ set_join_references(PlannerInfo *root, Join *join, int rtoffset) (Index) 0, rtoffset, NRM_EQUAL, - NUM_EXEC_QUAL((Plan *) join)); + NUM_EXEC_QUAL((Plan *) join), + &join->outer_reference_attrs, + &join->inner_reference_attrs + ); /* Now do join-type-specific stuff */ if (IsA(join, NestLoop)) @@ -2323,7 +2563,9 @@ set_join_references(PlannerInfo *root, Join *join, int rtoffset) (Index) 0, rtoffset, NRM_EQUAL, - NUM_EXEC_QUAL((Plan *) join)); + NUM_EXEC_QUAL((Plan *) join), + &join->outer_reference_attrs, + &join->inner_reference_attrs); } else if (IsA(join, HashJoin)) { @@ -2336,7 +2578,9 @@ set_join_references(PlannerInfo *root, Join *join, int rtoffset) (Index) 0, rtoffset, NRM_EQUAL, - NUM_EXEC_QUAL((Plan *) join)); + NUM_EXEC_QUAL((Plan *) join), + &join->outer_reference_attrs, + &join->inner_reference_attrs); /* * HashJoin's hashkeys are used to look for matching tuples from its @@ -2368,7 +2612,9 @@ set_join_references(PlannerInfo *root, Join *join, int rtoffset) (Index) 0, rtoffset, (join->jointype == JOIN_INNER ? NRM_EQUAL : NRM_SUPERSET), - NUM_EXEC_TLIST((Plan *) join)); + NUM_EXEC_TLIST((Plan *) join), + &join->outer_reference_attrs, + &join->inner_reference_attrs); join->plan.qual = fix_join_expr(root, join->plan.qual, outer_itlist, @@ -2376,8 +2622,20 @@ set_join_references(PlannerInfo *root, Join *join, int rtoffset) (Index) 0, rtoffset, (join->jointype == JOIN_INNER ? NRM_EQUAL : NRM_SUPERSET), - NUM_EXEC_QUAL((Plan *) join)); - + NUM_EXEC_QUAL((Plan *) join), + &join->outer_reference_attrs, + &join->inner_reference_attrs); + + join->plan.forbid_pre_detoast_vars = fix_join_expr(root, + join->plan.forbid_pre_detoast_vars, + outer_itlist, + inner_itlist, + (Index) 0, + rtoffset, + (join->jointype == JOIN_INNER ? NRM_EQUAL : NRM_SUPERSET), + NUM_EXEC_TLIST((Plan *) join), + NULL, + NULL); pfree(outer_itlist); pfree(inner_itlist); } @@ -3010,9 +3268,12 @@ fix_join_expr(PlannerInfo *root, Index acceptable_rel, int rtoffset, NullingRelsMatch nrm_match, - double num_exec) + double num_exec, + Bitmapset **outer_reference_attrs, + Bitmapset **inner_reference_attrs) { fix_join_expr_context context; + List *ret; context.root = root; context.outer_itlist = outer_itlist; @@ -3021,16 +3282,30 @@ fix_join_expr(PlannerInfo *root, context.rtoffset = rtoffset; context.nrm_match = nrm_match; context.num_exec = num_exec; - return (List *) fix_join_expr_mutator((Node *) clauses, &context); + + setup_intermediate_level_ctx(&context.level_ctx); + setup_intermediate_var_ref_ctx(&context.outer_reference_attrs, outer_reference_attrs); + setup_intermediate_var_ref_ctx(&context.inner_reference_attrs, inner_reference_attrs); + + ret = (List *) fix_join_expr_mutator((Node *) clauses, &context); + + bms_free(context.outer_reference_attrs.existing_attrs); + bms_free(context.inner_reference_attrs.existing_attrs); + + return ret; } static Node * fix_join_expr_mutator(Node *node, fix_join_expr_context *context) { Var *newvar; + Node *ret_node; if (node == NULL) return NULL; + + increase_level_for_pre_detoast(node, &context->level_ctx); + if (IsA(node, Var)) { Var *var = (Var *) node; @@ -3044,7 +3319,13 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) context->rtoffset, context->nrm_match); if (newvar) + { + add_pre_detoast_vars(&context->level_ctx, + &context->outer_reference_attrs, + newvar); + decreased_level_for_pre_detoast(&context->level_ctx); return (Node *) newvar; + } } /* then in the inner. */ @@ -3056,7 +3337,13 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) context->rtoffset, context->nrm_match); if (newvar) + { + add_pre_detoast_vars(&context->level_ctx, + &context->inner_reference_attrs, + newvar); + decreased_level_for_pre_detoast(&context->level_ctx); return (Node *) newvar; + } } /* If it's for acceptable_rel, adjust and return it */ @@ -3066,6 +3353,9 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) var->varno += context->rtoffset; if (var->varnosyn > 0) var->varnosyn += context->rtoffset; + /* XXX acceptable_rel? we can ignore it for safety. */ + decreased_level_for_pre_detoast(&context->level_ctx); + return (Node *) var; } @@ -3084,22 +3374,38 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) OUTER_VAR, context->nrm_match); if (newvar) + { + add_pre_detoast_vars(&context->level_ctx, + &context->outer_reference_attrs, + newvar); + decreased_level_for_pre_detoast(&context->level_ctx); return (Node *) newvar; + } } if (context->inner_itlist && context->inner_itlist->has_ph_vars) { + newvar = search_indexed_tlist_for_phv(phv, context->inner_itlist, INNER_VAR, context->nrm_match); if (newvar) + { + add_pre_detoast_vars(&context->level_ctx, + &context->inner_reference_attrs, + newvar); + decreased_level_for_pre_detoast(&context->level_ctx); return (Node *) newvar; + } } /* If not supplied by input plans, evaluate the contained expr */ /* XXX can we assert something about phnullingrels? */ - return fix_join_expr_mutator((Node *) phv->phexpr, context); + ret_node = fix_join_expr_mutator((Node *) phv->phexpr, context); + decreased_level_for_pre_detoast(&context->level_ctx); + return ret_node; } + /* Try matching more complex expressions too, if tlists have any */ if (context->outer_itlist && context->outer_itlist->has_non_vars) { @@ -3107,7 +3413,13 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) context->outer_itlist, OUTER_VAR); if (newvar) + { + add_pre_detoast_vars(&context->level_ctx, + &context->outer_reference_attrs, + newvar); + decreased_level_for_pre_detoast(&context->level_ctx); return (Node *) newvar; + } } if (context->inner_itlist && context->inner_itlist->has_non_vars) { @@ -3115,20 +3427,36 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) context->inner_itlist, INNER_VAR); if (newvar) + { + add_pre_detoast_vars(&context->level_ctx, + &context->inner_reference_attrs, + newvar); + decreased_level_for_pre_detoast(&context->level_ctx); return (Node *) newvar; + } } /* Special cases (apply only AFTER failing to match to lower tlist) */ if (IsA(node, Param)) - return fix_param_node(context->root, (Param *) node); + { + ret_node = fix_param_node(context->root, (Param *) node); + decreased_level_for_pre_detoast(&context->level_ctx); + return ret_node; + } if (IsA(node, AlternativeSubPlan)) - return fix_join_expr_mutator(fix_alternative_subplan(context->root, - (AlternativeSubPlan *) node, - context->num_exec), - context); + { + ret_node = fix_join_expr_mutator(fix_alternative_subplan(context->root, + (AlternativeSubPlan *) node, + context->num_exec), + context); + decreased_level_for_pre_detoast(&context->level_ctx); + return ret_node; + } fix_expr_common(context->root, node); - return expression_tree_mutator(node, - fix_join_expr_mutator, - (void *) context); + ret_node = expression_tree_mutator(node, + fix_join_expr_mutator, + (void *) context); + decreased_level_for_pre_detoast(&context->level_ctx); + return ret_node; } /* @@ -3163,7 +3491,8 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) * varno = newvarno, varattno = resno of corresponding targetlist element. * The original tree is not modified. */ -static Node * +static Node * /* XXX: shall I care about this for shared + * detoast optimization? */ fix_upper_expr(PlannerInfo *root, Node *node, indexed_tlist *subplan_itlist, @@ -3318,7 +3647,10 @@ set_returning_clause_references(PlannerInfo *root, resultRelation, rtoffset, NRM_EQUAL, - NUM_EXEC_TLIST(topplan)); + NUM_EXEC_TLIST(topplan), + NULL, + NULL + ); pfree(itlist); diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h index 048573c2bc..39c33e51b1 100644 --- a/src/include/executor/execExpr.h +++ b/src/include/executor/execExpr.h @@ -77,6 +77,11 @@ typedef enum ExprEvalOp EEOP_OUTER_VAR, EEOP_SCAN_VAR, + /* compute non-system Var value with shared-detoast-datum logic */ + EEOP_INNER_VAR_TOAST, + EEOP_OUTER_VAR_TOAST, + EEOP_SCAN_VAR_TOAST, + /* compute system Var value */ EEOP_INNER_SYSVAR, EEOP_OUTER_SYSVAR, @@ -826,5 +831,6 @@ extern void ExecEvalAggOrderedTransDatum(ExprState *state, ExprEvalStep *op, ExprContext *econtext); extern void ExecEvalAggOrderedTransTuple(ExprState *state, ExprEvalStep *op, ExprContext *econtext); +extern void ExecSlotDetoastDatumExternal(TupleTableSlot *slot, int attnum); #endif /* EXEC_EXPR_H */ diff --git a/src/include/executor/tuptable.h b/src/include/executor/tuptable.h index 4210d6d838..258ff8906e 100644 --- a/src/include/executor/tuptable.h +++ b/src/include/executor/tuptable.h @@ -18,6 +18,7 @@ #include "access/htup_details.h" #include "access/sysattr.h" #include "access/tupdesc.h" +#include "nodes/bitmapset.h" #include "storage/buf.h" /*---------- @@ -128,6 +129,11 @@ typedef struct TupleTableSlot MemoryContext tts_mcxt; /* slot itself is in this context */ ItemPointerData tts_tid; /* stored tuple's tid */ Oid tts_tableOid; /* table oid of tuple */ + + /* + * The attributes populated by EEOP_{INNER/OUTER/SCAN}_VAR_TOAST step. + */ + Bitmapset *pre_detoasted_attrs; } TupleTableSlot; /* routines for a TupleTableSlot implementation */ @@ -425,12 +431,38 @@ slot_getsysattr(TupleTableSlot *slot, int attnum, bool *isnull) return slot->tts_ops->getsysattr(slot, attnum, isnull); } +static inline void +ExecFreePreDetoastDatum(TupleTableSlot *slot) +{ + int attnum; + + if (bms_is_empty(slot->pre_detoasted_attrs)) + return; + + attnum = -1; + /* free the memory used by pre-detoasted datum and reset the flags. */ + while ((attnum = bms_next_member(slot->pre_detoasted_attrs, attnum)) >= 0) + { + pfree((void *) slot->tts_values[attnum]); + } + + /* + * bms_free each time cost too much, so just zero these bits and keep its + * memory, just like what we did for TupleTableSlot. but.. see the + * comments about the bms_zero. + */ + bms_zero(slot->pre_detoasted_attrs); +} + + /* * ExecClearTuple - clear the slot's contents */ static inline TupleTableSlot * ExecClearTuple(TupleTableSlot *slot) { + ExecFreePreDetoastDatum(slot); + slot->tts_ops->clear(slot); return slot; @@ -449,6 +481,10 @@ ExecClearTuple(TupleTableSlot *slot) static inline void ExecMaterializeSlot(TupleTableSlot *slot) { + /* + * XXX: pre_detoasted_attrs doesn't dependent on any external storage, so + * nothing should be done here. + */ slot->tts_ops->materialize(slot); } @@ -486,6 +522,30 @@ ExecCopySlot(TupleTableSlot *dstslot, TupleTableSlot *srcslot) dstslot->tts_ops->copyslot(dstslot, srcslot); + /* Assert this assumption the below code depends on. */ + Assert(dstslot->tts_nvalid == 0 || + dstslot->tts_nvalid == srcslot->tts_nvalid); + + if (dstslot->tts_nvalid == srcslot->tts_nvalid + && !bms_is_empty(srcslot->pre_detoasted_attrs)) + { + int attnum = -1; + MemoryContext old = MemoryContextSwitchTo(dstslot->tts_mcxt); + + dstslot->pre_detoasted_attrs = bms_copy(srcslot->pre_detoasted_attrs); + + while ((attnum = bms_next_member(dstslot->pre_detoasted_attrs, attnum)) >= 0) + { + struct varlena *datum = (struct varlena *) srcslot->tts_values[attnum]; + Size len; + + Assert(!VARATT_IS_EXTENDED(datum)); + len = VARSIZE(datum); + dstslot->tts_values[attnum] = (Datum) palloc(len); + memcpy((void *) dstslot->tts_values[attnum], datum, len); + } + MemoryContextSwitchTo(old); + } return dstslot; } diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h index 161243b2d0..402ff02027 100644 --- a/src/include/nodes/bitmapset.h +++ b/src/include/nodes/bitmapset.h @@ -113,6 +113,7 @@ extern Bitmapset *bms_add_range(Bitmapset *a, int lower, int upper); extern Bitmapset *bms_int_members(Bitmapset *a, const Bitmapset *b); extern Bitmapset *bms_del_members(Bitmapset *a, const Bitmapset *b); extern Bitmapset *bms_join(Bitmapset *a, Bitmapset *b); +extern void bms_zero(Bitmapset *a); /* support for iterating through the integer elements of a set: */ extern int bms_next_member(const Bitmapset *a, int prevbit); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 5d7f17dee0..c3f7d19ba2 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1474,6 +1474,7 @@ typedef struct ScanState Relation ss_currentRelation; struct TableScanDescData *ss_currentScanDesc; TupleTableSlot *ss_ScanTupleSlot; + Bitmapset *scan_pre_detoast_attrs; } ScanState; /* ---------------- @@ -2003,6 +2004,8 @@ typedef struct JoinState bool single_match; /* True if we should skip to next outer tuple * after finding one inner match */ ExprState *joinqual; /* JOIN quals (in addition to ps.qual) */ + Bitmapset *outer_pre_detoast_attrs; + Bitmapset *inner_pre_detoast_attrs; } JoinState; /* ---------------- @@ -2764,4 +2767,6 @@ typedef struct LimitState TupleTableSlot *last_slot; /* slot for evaluation of ties */ } LimitState; +extern void SetPredetoastAttrsForJoin(JoinState *joinstate); +extern void SetPredetoastAttrsForScan(ScanState *scanstate); #endif /* EXECNODES_H */ diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index d40af8e59f..de4f3f190a 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -169,6 +169,13 @@ typedef struct Plan */ Bitmapset *extParam; Bitmapset *allParam; + + /* + * A list of Vars which should not apply the shared-detoast-datum logic + * since the upper nodes like Sort/Hash want the tuples as small as + * possible. Its a subset of targetlist in each Plan node. + */ + List *forbid_pre_detoast_vars; } Plan; /* ---------------- @@ -385,6 +392,13 @@ typedef struct Scan Plan plan; Index scanrelid; /* relid is index into the range table */ + + /* + * Records of var's varattno - 1 where the Var is accessed indirectly by + * any expression, like a > 3. However a IS [NOT] NULL is not included + * since it doesn't access the tts_values[*] at all. + */ + Bitmapset *reference_attrs; } Scan; /* ---------------- @@ -789,6 +803,14 @@ typedef struct Join JoinType jointype; bool inner_unique; List *joinqual; /* JOIN quals (in addition to plan.qual) */ + + /* + * Records of var's varattno - 1 where the Var is accessed indirectly by + * any expression, like a > 3. However a IS [NOT] NULL is not included + * since it doesn't access the tts_values[*] at all. + */ + Bitmapset *outer_reference_attrs; + Bitmapset *inner_reference_attrs; } Join; /* ---------------- @@ -869,6 +891,14 @@ typedef struct HashJoin * perform lookups in the hashtable over the inner plan. */ List *hashkeys; + + /* + * keep the small tlist information in plan for the shared-detoast datum + * logic. If left_small_tlist is true, then all the datum in outerPlan + * should not apply that logic, used for maintaining + * forbid_pre_detoast_vars fields in Plan. + */ + bool left_small_tlist; } HashJoin; /* ---------------- @@ -1588,4 +1618,24 @@ typedef enum MonotonicFunction MONOTONICFUNC_BOTH = MONOTONICFUNC_INCREASING | MONOTONICFUNC_DECREASING, } MonotonicFunction; +static inline bool +is_join_plan(Plan *plan) +{ + return (plan != NULL) && (IsA(plan, NestLoop) || IsA(plan, HashJoin) || IsA(plan, MergeJoin)); +} + +static inline bool +is_scan_plan(Plan *plan) +{ + return (plan != NULL) && + (IsA(plan, SeqScan) || + IsA(plan, SampleScan) || + IsA(plan, IndexScan) || + IsA(plan, IndexOnlyScan) || + IsA(plan, BitmapIndexScan) || + IsA(plan, BitmapHeapScan) || + IsA(plan, TidScan) || + IsA(plan, SubqueryScan)); +} + #endif /* PLANNODES_H */ diff --git a/src/test/regress/sql/shared_detoast_slow.sql b/src/test/regress/sql/shared_detoast_slow.sql new file mode 100644 index 0000000000..beecaa6bc5 --- /dev/null +++ b/src/test/regress/sql/shared_detoast_slow.sql @@ -0,0 +1,70 @@ +create table t1(a text, b text, c text); +create table t2(a text, b text, c text); +create table t3(a text, b text, c text); + +insert into t1 select i, i, i from generate_series(1, 1000000)i; +insert into t2 select i, i, i from generate_series(1, 1000000)i; +insert into t3 select i, i, i from generate_series(1, 1000000)i; + +create index on t1(c); + +analyze t1; +analyze t2; +analyze t3; + +-- Turn off jit first, reasons: +-- 1. JIT is not adapted for this feature, it may cause crash on jit. +-- 2. more logging for this feature is enabled when jit=off +set jit to off; + +explain (verbose) select * from t1 where b > 'a'; + + +-- NullTest has nothing with tts_values, so its access to toast value +-- should be ignored. +explain (verbose) select * from t1 where b is NULL and c is not null; + +-- b can't be shared-detoasted since it would make the work_mem bigger. +explain (verbose) select * from t1 where b > 'a' order by c; + +-- b CAN be shared-detoasted since it would NOT make the work_mem bigger. +-- but compared with the old behavior, it cause the lifespan of the +-- 'detoast datum' +-- longer, in the old behavior, it is reset becase of ExecQualAndReset. +explain (verbose) select a, c from t1 where b > 'a' order by c; + +-- The detoast only happen at the join stage. +explain (verbose) select * from t1 join t2 using(b); + +-- +explain (verbose) select * from t1 join t2 using(b) where t1.c > '3'; + +explain (verbose) +select t3.* +from t1, t2, t3 +where t2.c > '999999999999999' +and t2.c = t1.c +and t3.b = t1.b; + + +-- t2.b the innerHash can't be pre detoast due to Hash. +-- t1.b the outerPlan can't be pre detoast due to num_batch. +explain (verbose) select * from t1 join t2 using(b); + +-- Increase the work_mem so that the num_batch = 1, then +-- the t1.b in outerPlan can be pre-detoast. + +set work_mem to '1GB'; +explain (verbose) select * from t1 join t2 using(b); +reset work_mem; + +-- Show even if a datum is under a hash node, but it is +-- not DIRECTLY input of the Hash, it's still able to be +-- pre detoasted. +explain (verbose) +select t3.* +from t1, t2, t3 +where t2.c > '999999999999999' +and t2.c = t1.c +and t3.b = t1.b +and t1.b > '0'; -- 2.34.1