Hi, On 2019-09-25 22:11:51 -0700, Soumyadeep Chakraborty wrote: > Thank you very much for reviewing my patch! > > On Wed, Sep 25, 2019 at 1:02 PM Andres Freund <and...@anarazel.de> wrote: > > IOW, wherever ExecComputeSlotInfo() is called, we should only actually > > push the expression step, when ExecComputeSlotInfo does not determine > > that a) the slot is virtual, b) and fixed (i.e. guaranteed to always be > > the same type of slot). > > > > Does that make sense? > > That is a great suggestion and I totally agree. I have attached a patch > that achieves this.
I think as done, this has the slight disadvantage of removing the fast-path for small interpreted expressions, because that explicitly matches for seeing the EEOP_*_FETCHSOME ops. Look at execExprInterp.c, around: /* * Select fast-path evalfuncs for very simple expressions. "Starting up" * the full interpreter is a measurable overhead for these, and these * patterns occur often enough to be worth optimizing. */ if (state->steps_len == 3) { So I think we'd have to add a separate fastpath for virtual slots for that. What do you think about the attached? Greetings, Andres Freund
>From 26b963f2989cdeec963f30c5d9b6c0e9b942741f Mon Sep 17 00:00:00 2001 From: Andres Freund <and...@anarazel.de> Date: Thu, 26 Sep 2019 11:14:46 -0700 Subject: [PATCH v1 1/2] Reduce code duplication for ExecJust*Var operations. This is mainly in preparation for introducing a few additional fastpath implementations. Also reorder ExecJust*Var functions to be consistent with the order in which they're used. Author: Andres Freund Discussion: https://postgr.es/m/cae-ml+9oksn71+mhtfmd-l24odp8dgtfavjdu6u+j+fnaw5...@mail.gmail.com --- src/backend/executor/execExprInterp.c | 94 ++++++++++----------------- 1 file changed, 35 insertions(+), 59 deletions(-) diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index 293bfb61c36..e876160a0e7 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -155,11 +155,11 @@ static void ExecEvalRowNullInt(ExprState *state, ExprEvalStep *op, static Datum ExecJustInnerVar(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustOuterVar(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustScanVar(ExprState *state, ExprContext *econtext, bool *isnull); -static Datum ExecJustConst(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustAssignInnerVar(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustAssignOuterVar(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustAssignScanVar(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustApplyFuncToCase(ExprState *state, ExprContext *econtext, bool *isnull); +static Datum ExecJustConst(ExprState *state, ExprContext *econtext, bool *isnull); /* @@ -1966,13 +1966,12 @@ ShutdownTupleDescRef(Datum arg) * Fast-path functions, for very simple expressions */ -/* Simple reference to inner Var */ -static Datum -ExecJustInnerVar(ExprState *state, ExprContext *econtext, bool *isnull) +/* implementation of ExecJust(Inner|Outer|Scan)Var */ +static pg_attribute_always_inline Datum +ExecJustVarImpl(ExprState *state, TupleTableSlot *slot, bool *isnull) { ExprEvalStep *op = &state->steps[1]; int attnum = op->d.var.attnum + 1; - TupleTableSlot *slot = econtext->ecxt_innertuple; CheckOpSlotCompatibility(&state->steps[0], slot); @@ -1984,52 +1983,34 @@ ExecJustInnerVar(ExprState *state, ExprContext *econtext, bool *isnull) return slot_getattr(slot, attnum, isnull); } -/* Simple reference to outer Var */ +/* Simple reference to inner Var */ static Datum -ExecJustOuterVar(ExprState *state, ExprContext *econtext, bool *isnull) +ExecJustInnerVar(ExprState *state, ExprContext *econtext, bool *isnull) { - ExprEvalStep *op = &state->steps[1]; - int attnum = op->d.var.attnum + 1; - TupleTableSlot *slot = econtext->ecxt_outertuple; - - CheckOpSlotCompatibility(&state->steps[0], slot); - - /* See comments in ExecJustInnerVar */ - return slot_getattr(slot, attnum, isnull); + return ExecJustVarImpl(state, econtext->ecxt_innertuple, isnull); } -/* Simple reference to scan Var */ +/* Simple reference to outer Var */ static Datum -ExecJustScanVar(ExprState *state, ExprContext *econtext, bool *isnull) +ExecJustOuterVar(ExprState *state, ExprContext *econtext, bool *isnull) { - ExprEvalStep *op = &state->steps[1]; - int attnum = op->d.var.attnum + 1; - TupleTableSlot *slot = econtext->ecxt_scantuple; - - CheckOpSlotCompatibility(&state->steps[0], slot); - - /* See comments in ExecJustInnerVar */ - return slot_getattr(slot, attnum, isnull); + return ExecJustVarImpl(state, econtext->ecxt_outertuple, isnull); } -/* Simple Const expression */ +/* Simple reference to scan Var */ static Datum -ExecJustConst(ExprState *state, ExprContext *econtext, bool *isnull) +ExecJustScanVar(ExprState *state, ExprContext *econtext, bool *isnull) { - ExprEvalStep *op = &state->steps[0]; - - *isnull = op->d.constval.isnull; - return op->d.constval.value; + return ExecJustVarImpl(state, econtext->ecxt_scantuple, isnull); } -/* Evaluate inner Var and assign to appropriate column of result tuple */ -static Datum -ExecJustAssignInnerVar(ExprState *state, ExprContext *econtext, bool *isnull) +/* implementation of ExecJustAssign(Inner|Outer|Scan)Var */ +static pg_attribute_always_inline Datum +ExecJustAssignVarImpl(ExprState *state, TupleTableSlot *inslot, bool *isnull) { ExprEvalStep *op = &state->steps[1]; int attnum = op->d.assign_var.attnum + 1; int resultnum = op->d.assign_var.resultnum; - TupleTableSlot *inslot = econtext->ecxt_innertuple; TupleTableSlot *outslot = state->resultslot; CheckOpSlotCompatibility(&state->steps[0], inslot); @@ -2047,40 +2028,25 @@ ExecJustAssignInnerVar(ExprState *state, ExprContext *econtext, bool *isnull) return 0; } +/* Evaluate inner Var and assign to appropriate column of result tuple */ +static Datum +ExecJustAssignInnerVar(ExprState *state, ExprContext *econtext, bool *isnull) +{ + return ExecJustAssignVarImpl(state, econtext->ecxt_innertuple, isnull); +} + /* Evaluate outer Var and assign to appropriate column of result tuple */ static Datum ExecJustAssignOuterVar(ExprState *state, ExprContext *econtext, bool *isnull) { - ExprEvalStep *op = &state->steps[1]; - int attnum = op->d.assign_var.attnum + 1; - int resultnum = op->d.assign_var.resultnum; - TupleTableSlot *inslot = econtext->ecxt_outertuple; - TupleTableSlot *outslot = state->resultslot; - - CheckOpSlotCompatibility(&state->steps[0], inslot); - - /* See comments in ExecJustAssignInnerVar */ - outslot->tts_values[resultnum] = - slot_getattr(inslot, attnum, &outslot->tts_isnull[resultnum]); - return 0; + return ExecJustAssignVarImpl(state, econtext->ecxt_outertuple, isnull); } /* Evaluate scan Var and assign to appropriate column of result tuple */ static Datum ExecJustAssignScanVar(ExprState *state, ExprContext *econtext, bool *isnull) { - ExprEvalStep *op = &state->steps[1]; - int attnum = op->d.assign_var.attnum + 1; - int resultnum = op->d.assign_var.resultnum; - TupleTableSlot *inslot = econtext->ecxt_scantuple; - TupleTableSlot *outslot = state->resultslot; - - CheckOpSlotCompatibility(&state->steps[0], inslot); - - /* See comments in ExecJustAssignInnerVar */ - outslot->tts_values[resultnum] = - slot_getattr(inslot, attnum, &outslot->tts_isnull[resultnum]); - return 0; + return ExecJustAssignVarImpl(state, econtext->ecxt_scantuple, isnull); } /* Evaluate CASE_TESTVAL and apply a strict function to it */ @@ -2120,6 +2086,16 @@ ExecJustApplyFuncToCase(ExprState *state, ExprContext *econtext, bool *isnull) return d; } +/* Simple Const expression */ +static Datum +ExecJustConst(ExprState *state, ExprContext *econtext, bool *isnull) +{ + ExprEvalStep *op = &state->steps[0]; + + *isnull = op->d.constval.isnull; + return op->d.constval.value; +} + #if defined(EEO_USE_COMPUTED_GOTO) /* * Comparator used when building address->opcode lookup table for -- 2.23.0.162.gf1d4a28250
>From 09b8c12569671525de5741e77710a905b1bbd1a0 Mon Sep 17 00:00:00 2001 From: Andres Freund <and...@anarazel.de> Date: Thu, 26 Sep 2019 11:23:43 -0700 Subject: [PATCH v1 2/2] Don't generate EEOP_*_FETCHSOME operations for slots know to be virtual. That avoids unnecessary work during both interpreted and JIT compiled expression evaluation. Author: Soumyadeep Chakraborty, Andres Freund Discussion: https://postgr.es/m/cae-ml+9oksn71+mhtfmd-l24odp8dgtfavjdu6u+j+fnaw5...@mail.gmail.com --- src/backend/executor/execExpr.c | 43 ++++++--- src/backend/executor/execExprInterp.c | 133 +++++++++++++++++++++++++- src/backend/jit/llvm/llvmjit_expr.c | 6 +- 3 files changed, 160 insertions(+), 22 deletions(-) diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c index 39442f8866f..1ff13d1079a 100644 --- a/src/backend/executor/execExpr.c +++ b/src/backend/executor/execExpr.c @@ -65,7 +65,7 @@ static void ExecInitFunc(ExprEvalStep *scratch, Expr *node, List *args, static void ExecInitExprSlots(ExprState *state, Node *node); static void ExecPushExprSlots(ExprState *state, LastAttnumInfo *info); static bool get_last_attnums_walker(Node *node, LastAttnumInfo *info); -static void ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op); +static bool ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op); static void ExecInitWholeRowVar(ExprEvalStep *scratch, Var *variable, ExprState *state); static void ExecInitSubscriptingRef(ExprEvalStep *scratch, @@ -2285,8 +2285,8 @@ ExecPushExprSlots(ExprState *state, LastAttnumInfo *info) scratch.d.fetch.fixed = false; scratch.d.fetch.kind = NULL; scratch.d.fetch.known_desc = NULL; - ExecComputeSlotInfo(state, &scratch); - ExprEvalPushStep(state, &scratch); + if (ExecComputeSlotInfo(state, &scratch)) + ExprEvalPushStep(state, &scratch); } if (info->last_outer > 0) { @@ -2295,8 +2295,8 @@ ExecPushExprSlots(ExprState *state, LastAttnumInfo *info) scratch.d.fetch.fixed = false; scratch.d.fetch.kind = NULL; scratch.d.fetch.known_desc = NULL; - ExecComputeSlotInfo(state, &scratch); - ExprEvalPushStep(state, &scratch); + if (ExecComputeSlotInfo(state, &scratch)) + ExprEvalPushStep(state, &scratch); } if (info->last_scan > 0) { @@ -2305,8 +2305,8 @@ ExecPushExprSlots(ExprState *state, LastAttnumInfo *info) scratch.d.fetch.fixed = false; scratch.d.fetch.kind = NULL; scratch.d.fetch.known_desc = NULL; - ExecComputeSlotInfo(state, &scratch); - ExprEvalPushStep(state, &scratch); + if (ExecComputeSlotInfo(state, &scratch)) + ExprEvalPushStep(state, &scratch); } } @@ -2364,14 +2364,21 @@ get_last_attnums_walker(Node *node, LastAttnumInfo *info) * The goal is to determine whether a slot is 'fixed', that is, every * evaluation of the expression will have the same type of slot, with an * equivalent descriptor. + * + * Returns true if the the deforming step is required, false otherwise. */ -static void +static bool ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op) { PlanState *parent = state->parent; TupleDesc desc = NULL; const TupleTableSlotOps *tts_ops = NULL; bool isfixed = false; + ExprEvalOp opcode = op->opcode; + + Assert(opcode == EEOP_INNER_FETCHSOME || + opcode == EEOP_OUTER_FETCHSOME || + opcode == EEOP_SCAN_FETCHSOME); if (op->d.fetch.known_desc != NULL) { @@ -2383,7 +2390,7 @@ ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op) { isfixed = false; } - else if (op->opcode == EEOP_INNER_FETCHSOME) + else if (opcode == EEOP_INNER_FETCHSOME) { PlanState *is = innerPlanState(parent); @@ -2402,7 +2409,7 @@ ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op) desc = ExecGetResultType(is); } } - else if (op->opcode == EEOP_OUTER_FETCHSOME) + else if (opcode == EEOP_OUTER_FETCHSOME) { PlanState *os = outerPlanState(parent); @@ -2421,7 +2428,7 @@ ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op) desc = ExecGetResultType(os); } } - else if (op->opcode == EEOP_SCAN_FETCHSOME) + else if (opcode == EEOP_SCAN_FETCHSOME) { desc = parent->scandesc; @@ -2444,6 +2451,12 @@ ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op) op->d.fetch.kind = NULL; op->d.fetch.known_desc = NULL; } + + /* if the slot is known to always virtual we never need to deform */ + if (op->d.fetch.fixed && op->d.fetch.kind == &TTSOpsVirtual) + return false; + + return true; } /* @@ -3358,16 +3371,16 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, scratch.d.fetch.fixed = false; scratch.d.fetch.known_desc = ldesc; scratch.d.fetch.kind = lops; - ExecComputeSlotInfo(state, &scratch); - ExprEvalPushStep(state, &scratch); + if (ExecComputeSlotInfo(state, &scratch)) + ExprEvalPushStep(state, &scratch); scratch.opcode = EEOP_OUTER_FETCHSOME; scratch.d.fetch.last_var = maxatt; scratch.d.fetch.fixed = false; scratch.d.fetch.known_desc = rdesc; scratch.d.fetch.kind = rops; - ExecComputeSlotInfo(state, &scratch); - ExprEvalPushStep(state, &scratch); + if (ExecComputeSlotInfo(state, &scratch)) + ExprEvalPushStep(state, &scratch); /* * Start comparing at the last field (least significant sort key). That's diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index e876160a0e7..ccea030cd70 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -160,6 +160,12 @@ static Datum ExecJustAssignOuterVar(ExprState *state, ExprContext *econtext, boo static Datum ExecJustAssignScanVar(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustApplyFuncToCase(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustConst(ExprState *state, ExprContext *econtext, bool *isnull); +static Datum ExecJustInnerVarVirt(ExprState *state, ExprContext *econtext, bool *isnull); +static Datum ExecJustOuterVarVirt(ExprState *state, ExprContext *econtext, bool *isnull); +static Datum ExecJustScanVarVirt(ExprState *state, ExprContext *econtext, bool *isnull); +static Datum ExecJustAssignInnerVarVirt(ExprState *state, ExprContext *econtext, bool *isnull); +static Datum ExecJustAssignOuterVarVirt(ExprState *state, ExprContext *econtext, bool *isnull); +static Datum ExecJustAssignScanVarVirt(ExprState *state, ExprContext *econtext, bool *isnull); /* @@ -255,11 +261,45 @@ ExecReadyInterpretedExpr(ExprState *state) return; } } - else if (state->steps_len == 2 && - state->steps[0].opcode == EEOP_CONST) + else if (state->steps_len == 2) { - state->evalfunc_private = (void *) ExecJustConst; - return; + ExprEvalOp step0 = state->steps[0].opcode; + + if (step0 == EEOP_CONST) + { + state->evalfunc_private = (void *) ExecJustConst; + return; + } + else if (step0 == EEOP_INNER_VAR) + { + state->evalfunc_private = (void *) ExecJustInnerVarVirt; + return; + } + else if (step0 == EEOP_OUTER_VAR) + { + state->evalfunc_private = (void *) ExecJustOuterVarVirt; + return; + } + else if (step0 == EEOP_SCAN_VAR) + { + state->evalfunc_private = (void *) ExecJustScanVarVirt; + return; + } + else if (step0 == EEOP_ASSIGN_INNER_VAR) + { + state->evalfunc_private = (void *) ExecJustAssignInnerVarVirt; + return; + } + else if (step0 == EEOP_ASSIGN_OUTER_VAR) + { + state->evalfunc_private = (void *) ExecJustAssignOuterVarVirt; + return; + } + else if (step0 == EEOP_ASSIGN_SCAN_VAR) + { + state->evalfunc_private = (void *) ExecJustAssignScanVarVirt; + return; + } } #if defined(EEO_USE_COMPUTED_GOTO) @@ -2096,6 +2136,91 @@ ExecJustConst(ExprState *state, ExprContext *econtext, bool *isnull) return op->d.constval.value; } +/* implementation of ExecJust(Inner|Outer|Scan)VarVirt */ +static pg_attribute_always_inline Datum +ExecJustVarVirtImpl(ExprState *state, TupleTableSlot *slot, bool *isnull) +{ + ExprEvalStep *op = &state->steps[0]; + int attnum = op->d.var.attnum; + + /* + * As it is guaranteed that a virtual slot is used, there never is a need + * to perform tuple deforming (nor would it be possible). Therefore + * execExpr.c has not emitted a EEOP_*_FETCHSOME step. Verify, as much as + * possible, that that determination was accurate. + */ + Assert(slot->tts_ops == &TTSOpsVirtual); + Assert(TTS_FIXED(slot)); + Assert(attnum >= 0 && attnum < slot->tts_nvalid); + + *isnull = slot->tts_isnull[attnum]; + + return slot->tts_values[attnum]; +} + +/* Like ExecJustInnerVar, optimized for virtual slots */ +static Datum +ExecJustInnerVarVirt(ExprState *state, ExprContext *econtext, bool *isnull) +{ + return ExecJustVarVirtImpl(state, econtext->ecxt_innertuple, isnull); +} + +/* Like ExecJustOuterVar, optimized for virtual slots */ +static Datum +ExecJustOuterVarVirt(ExprState *state, ExprContext *econtext, bool *isnull) +{ + return ExecJustVarVirtImpl(state, econtext->ecxt_outertuple, isnull); +} + +/* Like ExecJustScanVar, optimized for virtual slots */ +static Datum +ExecJustScanVarVirt(ExprState *state, ExprContext *econtext, bool *isnull) +{ + return ExecJustVarVirtImpl(state, econtext->ecxt_scantuple, isnull); +} + +/* implementation of ExecJustAssign(Inner|Outer|Scan)VarVirt */ +static pg_attribute_always_inline Datum +ExecJustAssignVarVirtImpl(ExprState *state, TupleTableSlot *inslot, bool *isnull) +{ + ExprEvalStep *op = &state->steps[0]; + int attnum = op->d.assign_var.attnum; + int resultnum = op->d.assign_var.resultnum; + TupleTableSlot *outslot = state->resultslot; + + /* see ExecJustVarVirtImpl for comments */ + + Assert(inslot->tts_ops == &TTSOpsVirtual); + Assert(TTS_FIXED(inslot)); + Assert(attnum >= 0 && attnum < inslot->tts_nvalid); + + outslot->tts_values[resultnum] = inslot->tts_values[attnum]; + outslot->tts_isnull[resultnum] = inslot->tts_isnull[attnum]; + + return 0; +} + +/* Like ExecJustAssignInnerVar, optimized for virtual slots */ +static Datum +ExecJustAssignInnerVarVirt(ExprState *state, ExprContext *econtext, bool *isnull) +{ + return ExecJustAssignVarVirtImpl(state, econtext->ecxt_innertuple, isnull); +} + +/* Like ExecJustAssignOuterVar, optimized for virtual slots */ +static Datum +ExecJustAssignOuterVarVirt(ExprState *state, ExprContext *econtext, bool *isnull) +{ + return ExecJustAssignVarVirtImpl(state, econtext->ecxt_outertuple, isnull); +} + +/* Like ExecJustAssignScanVar, optimized for virtual slots */ +static Datum +ExecJustAssignScanVarVirt(ExprState *state, ExprContext *econtext, bool *isnull) +{ + return ExecJustAssignVarVirtImpl(state, econtext->ecxt_scantuple, isnull); +} + #if defined(EEO_USE_COMPUTED_GOTO) /* * Comparator used when building address->opcode lookup table for diff --git a/src/backend/jit/llvm/llvmjit_expr.c b/src/backend/jit/llvm/llvmjit_expr.c index 30133634c70..d09324637b9 100644 --- a/src/backend/jit/llvm/llvmjit_expr.c +++ b/src/backend/jit/llvm/llvmjit_expr.c @@ -287,6 +287,9 @@ llvm_compile_expr(ExprState *state) if (op->d.fetch.fixed) tts_ops = op->d.fetch.kind; + /* step should not have been generated */ + Assert(tts_ops != &TTSOpsVirtual); + if (opcode == EEOP_INNER_FETCHSOME) v_slot = v_innerslot; else if (opcode == EEOP_OUTER_FETCHSOME) @@ -297,9 +300,6 @@ llvm_compile_expr(ExprState *state) /* * Check if all required attributes are available, or * whether deforming is required. - * - * TODO: skip nvalid check if slot is fixed and known to - * be a virtual slot. */ v_nvalid = l_load_struct_gep(b, v_slot, -- 2.23.0.162.gf1d4a28250