Hi,

On 2017-11-29 00:09:34 -0800, Andres Freund wrote:
> Similar to [1] (Expression based aggregate transition / combine function
> invocation), this patch provides small-medium performance benefits in
> order to later enable larger performance benefits with JIT compilation.

Here's an updated version of the patch.


> - there's now a bit of additional code at callsites to reset the
>   ExprContext - was tempted to put that that in a ExecQualAndReset()
>   inline wrapper, but that's not entirely trivial because executor.h
>   doesn't include memutils.h and ResetExprContext() is declared late.

I've now added ExecQualAndReset(), in patch 0001, and solved the above
problems by including memutils.h and "inlining" ResetExprContext() to
avoid the ordering issue. Not pretty, but seems better than the
alternatives.


I'm not yet quite happy with this, but I thought it'be good to send an
updated version...

- Andres
>From 7b5ca94dfce005c11f09beba0ab2676312f18d99 Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Mon, 8 Jan 2018 13:09:01 -0800
Subject: [PATCH 1/2] Introduce ExecQualAndReset() helper.

It's a bit ugly that it currently uses MemoryContextReset() instead of
ResetExprContext(), but that seems easier than reordering all of
executor.h.

Author: Andres Freund
Discussion: https://postgr.es/m/20171129080934.amqqkke2zjtek...@alap3.anarazel.de
---
 src/backend/executor/nodeBitmapHeapscan.c |  9 ++-------
 src/backend/executor/nodeHash.c           | 10 ++--------
 src/backend/executor/nodeIndexonlyscan.c  |  3 +--
 src/backend/executor/nodeIndexscan.c      | 11 +++--------
 src/include/executor/executor.h           | 17 +++++++++++++++++
 5 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c
index 7ba1db7d7ec..fa65d4efbe7 100644
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -352,9 +352,7 @@ BitmapHeapNext(BitmapHeapScanState *node)
 			if (tbmres->recheck)
 			{
 				econtext->ecxt_scantuple = slot;
-				ResetExprContext(econtext);
-
-				if (!ExecQual(node->bitmapqualorig, econtext))
+				if (!ExecQualAndReset(node->bitmapqualorig, econtext))
 				{
 					/* Fails recheck, so drop it and loop back for another */
 					InstrCountFiltered2(node, 1);
@@ -717,10 +715,7 @@ BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
 
 	/* Does the tuple meet the original qual conditions? */
 	econtext->ecxt_scantuple = slot;
-
-	ResetExprContext(econtext);
-
-	return ExecQual(node->bitmapqualorig, econtext);
+	return ExecQualAndReset(node->bitmapqualorig, econtext);
 }
 
 /* ----------------------------------------------------------------
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index a9149ef81ce..c26b8ea44e3 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -1942,10 +1942,7 @@ ExecScanHashBucket(HashJoinState *hjstate,
 											 false);	/* do not pfree */
 			econtext->ecxt_innertuple = inntuple;
 
-			/* reset temp memory each time to avoid leaks from qual expr */
-			ResetExprContext(econtext);
-
-			if (ExecQual(hjclauses, econtext))
+			if (ExecQualAndReset(hjclauses, econtext))
 			{
 				hjstate->hj_CurTuple = hashTuple;
 				return true;
@@ -2002,10 +1999,7 @@ ExecParallelScanHashBucket(HashJoinState *hjstate,
 											 false);	/* do not pfree */
 			econtext->ecxt_innertuple = inntuple;
 
-			/* reset temp memory each time to avoid leaks from qual expr */
-			ResetExprContext(econtext);
-
-			if (ExecQual(hjclauses, econtext))
+			if (ExecQualAndReset(hjclauses, econtext))
 			{
 				hjstate->hj_CurTuple = hashTuple;
 				return true;
diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c
index 9b7f470ee28..d6fa5fe4e81 100644
--- a/src/backend/executor/nodeIndexonlyscan.c
+++ b/src/backend/executor/nodeIndexonlyscan.c
@@ -214,8 +214,7 @@ IndexOnlyNext(IndexOnlyScanState *node)
 		if (scandesc->xs_recheck)
 		{
 			econtext->ecxt_scantuple = slot;
-			ResetExprContext(econtext);
-			if (!ExecQual(node->indexqual, econtext))
+			if (!ExecQualAndReset(node->indexqual, econtext))
 			{
 				/* Fails recheck, so drop it and loop back for another */
 				InstrCountFiltered2(node, 1);
diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c
index 54fafa5033f..0f07bf0c6d8 100644
--- a/src/backend/executor/nodeIndexscan.c
+++ b/src/backend/executor/nodeIndexscan.c
@@ -152,8 +152,7 @@ IndexNext(IndexScanState *node)
 		if (scandesc->xs_recheck)
 		{
 			econtext->ecxt_scantuple = slot;
-			ResetExprContext(econtext);
-			if (!ExecQual(node->indexqualorig, econtext))
+			if (!ExecQualAndReset(node->indexqualorig, econtext))
 			{
 				/* Fails recheck, so drop it and loop back for another */
 				InstrCountFiltered2(node, 1);
@@ -300,8 +299,7 @@ next_indextuple:
 		if (scandesc->xs_recheck)
 		{
 			econtext->ecxt_scantuple = slot;
-			ResetExprContext(econtext);
-			if (!ExecQual(node->indexqualorig, econtext))
+			if (!ExecQualAndReset(node->indexqualorig, econtext))
 			{
 				/* Fails recheck, so drop it and loop back for another */
 				InstrCountFiltered2(node, 1);
@@ -420,10 +418,7 @@ IndexRecheck(IndexScanState *node, TupleTableSlot *slot)
 
 	/* Does the tuple meet the indexqual condition? */
 	econtext->ecxt_scantuple = slot;
-
-	ResetExprContext(econtext);
-
-	return ExecQual(node->indexqualorig, econtext);
+	return ExecQualAndReset(node->indexqualorig, econtext);
 }
 
 
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 6545a802223..1d824eff361 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -17,6 +17,7 @@
 #include "catalog/partition.h"
 #include "executor/execdesc.h"
 #include "nodes/parsenodes.h"
+#include "utils/memutils.h"
 
 
 /*
@@ -381,6 +382,22 @@ ExecQual(ExprState *state, ExprContext *econtext)
 }
 #endif
 
+/*
+ * ExecQualAndReset() - evaluate qual with ExecQual() and reset expression
+ * context.
+ */
+#ifndef FRONTEND
+static inline bool
+ExecQualAndReset(ExprState *state, ExprContext *econtext)
+{
+	bool		ret = ExecQual(state, econtext);
+
+	/* inline ResetExprContext, to avoid ordering issue in this file */
+	MemoryContextReset(econtext->ecxt_per_tuple_memory);
+	return ret;
+}
+#endif
+
 extern bool ExecCheck(ExprState *state, ExprContext *context);
 
 /*
-- 
2.15.1.354.g95ec6b1b33.dirty

>From b1b301b0159ca0303b0d785c6eec286b68fb9f38 Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Mon, 8 Jan 2018 12:14:28 -0800
Subject: [PATCH 2/2] Do execGrouping.c via expression eval machinery.

This has a performance benefit on own, although not hugely so. The
primary benefit is that it will allow for JITing the deforming and
comparisons.

Author: Andres Freund
Discussion: https://postgr.es/m/20171129080934.amqqkke2zjtek...@alap3.anarazel.de
---
 src/backend/executor/execExpr.c           | 117 +++++++++++++++
 src/backend/executor/execExprInterp.c     |  29 ++++
 src/backend/executor/execGrouping.c       | 236 +++++++-----------------------
 src/backend/executor/nodeAgg.c            | 123 +++++++++-------
 src/backend/executor/nodeGroup.c          |  25 ++--
 src/backend/executor/nodeRecursiveunion.c |   5 +-
 src/backend/executor/nodeSetOp.c          |  48 +++---
 src/backend/executor/nodeSubplan.c        |  81 +++++++++-
 src/backend/executor/nodeUnique.c         |  31 ++--
 src/backend/executor/nodeWindowAgg.c      |  38 +++--
 src/backend/utils/adt/orderedsetaggs.c    |  54 +++----
 src/include/executor/execExpr.h           |   1 +
 src/include/executor/executor.h           |  28 ++--
 src/include/executor/nodeAgg.h            |  12 +-
 src/include/nodes/execnodes.h             |  14 +-
 15 files changed, 478 insertions(+), 364 deletions(-)

diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index 794573803da..05288a56d9c 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -3190,3 +3190,120 @@ ExecBuildAggTransCall(ExprState *state, AggState *aggstate,
 		as->d.agg_strict_trans_check.jumpnull = state->steps_len;
 	}
 }
+
+/*
+ * Build equality expression that can be evaluated using ExecQual(), returning
+ * true if the expression context's inner/outer tuple are NOT DISTINCT. I.e
+ * two nulls match, a null and a not-null don't match.
+ *
+ * desc: tuple descriptor of the to-be-compared tuples
+ * numCols: the number of attributes to be examined
+ * keyColIdx: array of attribute column numbers
+ * eqFunctions: array of function oids of the equality functions to use
+ * parent: parent executor node
+ */
+ExprState *
+ExecBuildGroupingEqual(TupleDesc desc,
+					   int numCols,
+					   AttrNumber *keyColIdx,
+					   Oid *eqfunctions,
+					   PlanState *parent)
+{
+	ExprState  *state = makeNode(ExprState);
+	ExprEvalStep scratch;
+	int			natt;
+	int			maxatt = -1;
+	List	   *adjust_jumps = NIL;
+	ListCell   *lc;
+
+	/*
+	 * When no columns are actually compared, the result's always true. See
+	 * special case in ExecQual().
+	 */
+	if (numCols == 0)
+		return NULL;
+
+	state->expr = NULL;
+	state->flags = EEO_FLAG_IS_QUAL;
+
+	scratch.resvalue = &state->resvalue;
+	scratch.resnull = &state->resnull;
+
+	/* compute max needed attribute */
+	for (natt = 0; natt < numCols; natt++)
+	{
+		int			attno = keyColIdx[natt];
+
+		if (attno > maxatt)
+			maxatt = attno;
+	}
+	Assert(maxatt >= 0);
+
+	/* push deform steps */
+	scratch.opcode = EEOP_INNER_FETCHSOME;
+	scratch.d.fetch.last_var = maxatt;
+	ExprEvalPushStep(state, &scratch);
+
+	scratch.opcode = EEOP_OUTER_FETCHSOME;
+	scratch.d.fetch.last_var = maxatt;
+	ExprEvalPushStep(state, &scratch);
+
+	/*
+	 * Start comparing at the last field (least significant sort key). That's
+	 * the most likely to be different if we are dealing with sorted input.
+	 */
+	for (natt = numCols; --natt >= 0;)
+	{
+		int			attno = keyColIdx[natt];
+		Form_pg_attribute att = TupleDescAttr(desc, attno - 1);
+		Var		   *larg,
+				   *rarg;
+		List	   *args;
+
+		/*
+		 * Reusing ExecInitFunc() requires creating Vars, but still seems
+		 * worth it from a code reuse perspective.
+		 */
+
+		/* left arg */
+		larg = makeVar(INNER_VAR, attno, att->atttypid,
+					   att->atttypmod, InvalidOid, 0);
+		/* right arg */
+		rarg = makeVar(OUTER_VAR, attno, att->atttypid,
+					   att->atttypmod, InvalidOid, 0);
+		args = list_make2(larg, rarg);
+
+		/* evaluate distinctness */
+		ExecInitFunc(&scratch, NULL,
+					 args, eqfunctions[natt], InvalidOid,
+					 state);
+		scratch.opcode = EEOP_NOT_DISTINCT;
+		ExprEvalPushStep(state, &scratch);
+
+		/* then emit EEOP_QUAL to detect if result is false (or null) */
+		scratch.opcode = EEOP_QUAL;
+		scratch.d.qualexpr.jumpdone = -1;
+		ExprEvalPushStep(state, &scratch);
+		adjust_jumps = lappend_int(adjust_jumps,
+								   state->steps_len - 1);
+	}
+
+	/* adjust jump targets */
+	foreach(lc, adjust_jumps)
+	{
+		ExprEvalStep *as = &state->steps[lfirst_int(lc)];
+
+		Assert(as->opcode == EEOP_QUAL);
+		Assert(as->d.qualexpr.jumpdone == -1);
+		as->d.qualexpr.jumpdone = state->steps_len;
+	}
+
+	scratch.resvalue = NULL;
+	scratch.resnull = NULL;
+	scratch.opcode = EEOP_DONE;
+	ExprEvalPushStep(state, &scratch);
+
+	ExecReadyExpr(state);
+
+	return state;
+}
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index f646fd9c51e..71bce37ca3a 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -354,6 +354,7 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
 		&&CASE_EEOP_MAKE_READONLY,
 		&&CASE_EEOP_IOCOERCE,
 		&&CASE_EEOP_DISTINCT,
+		&&CASE_EEOP_NOT_DISTINCT,
 		&&CASE_EEOP_NULLIF,
 		&&CASE_EEOP_SQLVALUEFUNCTION,
 		&&CASE_EEOP_CURRENTOFEXPR,
@@ -1197,6 +1198,34 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
 			EEO_NEXT();
 		}
 
+		/* see EEOP_DISTINCT for comments, this is just inverted  */
+		EEO_CASE(EEOP_NOT_DISTINCT)
+		{
+			FunctionCallInfo fcinfo = op->d.func.fcinfo_data;
+
+			if (fcinfo->argnull[0] && fcinfo->argnull[1])
+			{
+				*op->resvalue = BoolGetDatum(true);
+				*op->resnull = false;
+			}
+			else if (fcinfo->argnull[0] || fcinfo->argnull[1])
+			{
+				*op->resvalue = BoolGetDatum(false);
+				*op->resnull = false;
+			}
+			else
+			{
+				Datum		eqresult;
+
+				fcinfo->isnull = false;
+				eqresult = op->d.func.fn_addr(fcinfo);
+				*op->resvalue = eqresult;
+				*op->resnull = fcinfo->isnull;
+			}
+
+			EEO_NEXT();
+		}
+
 		EEO_CASE(EEOP_NULLIF)
 		{
 			/*
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index 058ee688041..2a750b3a14d 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -50,173 +50,34 @@ static int	TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tup
  *		Utility routines for grouping tuples together
  *****************************************************************************/
 
-/*
- * execTuplesMatch
- *		Return true if two tuples match in all the indicated fields.
- *
- * This actually implements SQL's notion of "not distinct".  Two nulls
- * match, a null and a not-null don't match.
- *
- * slot1, slot2: the tuples to compare (must have same columns!)
- * numCols: the number of attributes to be examined
- * matchColIdx: array of attribute column numbers
- * eqFunctions: array of fmgr lookup info for the equality functions to use
- * evalContext: short-term memory context for executing the functions
- *
- * NB: evalContext is reset each time!
- */
-bool
-execTuplesMatch(TupleTableSlot *slot1,
-				TupleTableSlot *slot2,
-				int numCols,
-				AttrNumber *matchColIdx,
-				FmgrInfo *eqfunctions,
-				MemoryContext evalContext)
-{
-	MemoryContext oldContext;
-	bool		result;
-	int			i;
-
-	/* Reset and switch into the temp context. */
-	MemoryContextReset(evalContext);
-	oldContext = MemoryContextSwitchTo(evalContext);
-
-	/*
-	 * We cannot report a match without checking all the fields, but we can
-	 * report a non-match as soon as we find unequal fields.  So, start
-	 * comparing at the last field (least significant sort key). That's the
-	 * most likely to be different if we are dealing with sorted input.
-	 */
-	result = true;
-
-	for (i = numCols; --i >= 0;)
-	{
-		AttrNumber	att = matchColIdx[i];
-		Datum		attr1,
-					attr2;
-		bool		isNull1,
-					isNull2;
-
-		attr1 = slot_getattr(slot1, att, &isNull1);
-
-		attr2 = slot_getattr(slot2, att, &isNull2);
-
-		if (isNull1 != isNull2)
-		{
-			result = false;		/* one null and one not; they aren't equal */
-			break;
-		}
-
-		if (isNull1)
-			continue;			/* both are null, treat as equal */
-
-		/* Apply the type-specific equality function */
-
-		if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
-										attr1, attr2)))
-		{
-			result = false;		/* they aren't equal */
-			break;
-		}
-	}
-
-	MemoryContextSwitchTo(oldContext);
-
-	return result;
-}
-
-/*
- * execTuplesUnequal
- *		Return true if two tuples are definitely unequal in the indicated
- *		fields.
- *
- * Nulls are neither equal nor unequal to anything else.  A true result
- * is obtained only if there are non-null fields that compare not-equal.
- *
- * Parameters are identical to execTuplesMatch.
- */
-bool
-execTuplesUnequal(TupleTableSlot *slot1,
-				  TupleTableSlot *slot2,
-				  int numCols,
-				  AttrNumber *matchColIdx,
-				  FmgrInfo *eqfunctions,
-				  MemoryContext evalContext)
-{
-	MemoryContext oldContext;
-	bool		result;
-	int			i;
-
-	/* Reset and switch into the temp context. */
-	MemoryContextReset(evalContext);
-	oldContext = MemoryContextSwitchTo(evalContext);
-
-	/*
-	 * We cannot report a match without checking all the fields, but we can
-	 * report a non-match as soon as we find unequal fields.  So, start
-	 * comparing at the last field (least significant sort key). That's the
-	 * most likely to be different if we are dealing with sorted input.
-	 */
-	result = false;
-
-	for (i = numCols; --i >= 0;)
-	{
-		AttrNumber	att = matchColIdx[i];
-		Datum		attr1,
-					attr2;
-		bool		isNull1,
-					isNull2;
-
-		attr1 = slot_getattr(slot1, att, &isNull1);
-
-		if (isNull1)
-			continue;			/* can't prove anything here */
-
-		attr2 = slot_getattr(slot2, att, &isNull2);
-
-		if (isNull2)
-			continue;			/* can't prove anything here */
-
-		/* Apply the type-specific equality function */
-
-		if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
-										attr1, attr2)))
-		{
-			result = true;		/* they are unequal */
-			break;
-		}
-	}
-
-	MemoryContextSwitchTo(oldContext);
-
-	return result;
-}
-
-
 /*
  * execTuplesMatchPrepare
- *		Look up the equality functions needed for execTuplesMatch or
- *		execTuplesUnequal, given an array of equality operator OIDs.
- *
- * The result is a palloc'd array.
+ *		Build expression that can be evaluated using ExecQual(), returning
+ *		whether an ExprContext's inner/outer tuples are NOT DISTINCT
  */
-FmgrInfo *
-execTuplesMatchPrepare(int numCols,
-					   Oid *eqOperators)
+ExprState *
+execTuplesMatchPrepare(TupleDesc desc,
+					   int numCols,
+					   AttrNumber *keyColIdx,
+					   Oid *eqOperators,
+					   PlanState *parent)
 {
-	FmgrInfo   *eqFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
+	Oid		   *eqFunctions = (Oid *) palloc(numCols * sizeof(Oid));
 	int			i;
+	ExprState  *expr;
 
+	if (numCols == 0)
+		return NULL;
+
+	/* lookup equality functions */
 	for (i = 0; i < numCols; i++)
-	{
-		Oid			eq_opr = eqOperators[i];
-		Oid			eq_function;
+		eqFunctions[i] = get_opcode(eqOperators[i]);
 
-		eq_function = get_opcode(eq_opr);
-		fmgr_info(eq_function, &eqFunctions[i]);
-	}
+	/* build actual expression */
+	expr = ExecBuildGroupingEqual(desc, numCols, keyColIdx, eqFunctions,
+								  parent);
 
-	return eqFunctions;
+	return expr;
 }
 
 /*
@@ -287,7 +148,9 @@ execTuplesHashPrepare(int numCols,
  * storage that will live as long as the hashtable does.
  */
 TupleHashTable
-BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
+BuildTupleHashTable(PlanState *parent,
+					TupleDesc inputDesc,
+					int numCols, AttrNumber *keyColIdx,
 					FmgrInfo *eqfunctions,
 					FmgrInfo *hashfunctions,
 					long nbuckets, Size additionalsize,
@@ -296,6 +159,9 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
 {
 	TupleHashTable hashtable;
 	Size		entrysize = sizeof(TupleHashEntryData) + additionalsize;
+	MemoryContext oldcontext;
+	Oid		   *eqoids = (Oid *) palloc(numCols * sizeof(Oid));
+	int			i;
 
 	Assert(nbuckets > 0);
 
@@ -332,6 +198,26 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
 
 	hashtable->hashtab = tuplehash_create(tablecxt, nbuckets, hashtable);
 
+	oldcontext = MemoryContextSwitchTo(hashtable->tablecxt);
+
+	/*
+	 * We copy the input tuple descriptor just for safety --- we assume all
+	 * input tuples will have equivalent descriptors.
+	 */
+	hashtable->tableslot = MakeSingleTupleTableSlot(CreateTupleDescCopy(inputDesc));
+
+	/* build comparator for all columns */
+	for (i = 0; i < numCols; i++)
+		eqoids[i] = eqfunctions[i].fn_oid;
+	hashtable->eq_func = ExecBuildGroupingEqual(inputDesc,
+												numCols,
+												keyColIdx, eqoids,
+												parent);
+
+	MemoryContextSwitchTo(oldcontext);
+
+	hashtable->exprcontext = CreateExprContext(parent->state);
+
 	return hashtable;
 }
 
@@ -356,22 +242,6 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
 	bool		found;
 	MinimalTuple key;
 
-	/* If first time through, clone the input slot to make table slot */
-	if (hashtable->tableslot == NULL)
-	{
-		TupleDesc	tupdesc;
-
-		oldContext = MemoryContextSwitchTo(hashtable->tablecxt);
-
-		/*
-		 * We copy the input tuple descriptor just for safety --- we assume
-		 * all input tuples will have equivalent descriptors.
-		 */
-		tupdesc = CreateTupleDescCopy(slot->tts_tupleDescriptor);
-		hashtable->tableslot = MakeSingleTupleTableSlot(tupdesc);
-		MemoryContextSwitchTo(oldContext);
-	}
-
 	/* Need to run the hash functions in short-lived context */
 	oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
 
@@ -517,9 +387,6 @@ TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple)
  * See whether two tuples (presumably of the same hash value) match
  *
  * As above, the passed pointers are pointers to TupleHashEntryData.
- *
- * Also, the caller must select an appropriate memory context for running
- * the compare functions.  (dynahash.c doesn't change CurrentMemoryContext.)
  */
 static int
 TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2)
@@ -527,6 +394,7 @@ TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const
 	TupleTableSlot *slot1;
 	TupleTableSlot *slot2;
 	TupleHashTable hashtable = (TupleHashTable) tb->private_data;
+	ExprContext *econtext = hashtable->exprcontext;
 
 	/*
 	 * We assume that simplehash.h will only ever call us with the first
@@ -541,13 +409,7 @@ TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const
 	slot2 = hashtable->inputslot;
 
 	/* For crosstype comparisons, the inputslot must be first */
-	if (execTuplesMatch(slot2,
-						slot1,
-						hashtable->numCols,
-						hashtable->keyColIdx,
-						hashtable->cur_eq_funcs,
-						hashtable->tempcxt))
-		return 0;
-	else
-		return 1;
+	econtext->ecxt_innertuple = slot1;
+	econtext->ecxt_outertuple = slot2;
+	return !ExecQualAndReset(hashtable->eq_func, econtext);
 }
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 061acad80f1..b34b1e67af7 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -755,7 +755,7 @@ process_ordered_aggregate_single(AggState *aggstate,
 			((oldIsNull && *isNull) ||
 			 (!oldIsNull && !*isNull &&
 			  oldAbbrevVal == newAbbrevVal &&
-			  DatumGetBool(FunctionCall2(&pertrans->equalfns[0],
+			  DatumGetBool(FunctionCall2(&pertrans->equalfnOne,
 										 oldVal, *newVal)))))
 		{
 			/* equal to prior, so forget this one */
@@ -802,7 +802,7 @@ process_ordered_aggregate_multi(AggState *aggstate,
 								AggStatePerTrans pertrans,
 								AggStatePerGroup pergroupstate)
 {
-	MemoryContext workcontext = aggstate->tmpcontext->ecxt_per_tuple_memory;
+	ExprContext *tmpcontext = aggstate->tmpcontext;
 	FunctionCallInfo fcinfo = &pertrans->transfn_fcinfo;
 	TupleTableSlot *slot1 = pertrans->sortslot;
 	TupleTableSlot *slot2 = pertrans->uniqslot;
@@ -811,6 +811,7 @@ process_ordered_aggregate_multi(AggState *aggstate,
 	Datum		newAbbrevVal = (Datum) 0;
 	Datum		oldAbbrevVal = (Datum) 0;
 	bool		haveOldValue = false;
+	TupleTableSlot *save = aggstate->tmpcontext->ecxt_outertuple;
 	int			i;
 
 	tuplesort_performsort(pertrans->sortstates[aggstate->current_set]);
@@ -824,22 +825,20 @@ process_ordered_aggregate_multi(AggState *aggstate,
 	{
 		CHECK_FOR_INTERRUPTS();
 
-		/*
-		 * Extract the first numTransInputs columns as datums to pass to the
-		 * transfn.  (This will help execTuplesMatch too, so we do it
-		 * immediately.)
-		 */
-		slot_getsomeattrs(slot1, numTransInputs);
+		tmpcontext->ecxt_outertuple = slot1;
+		tmpcontext->ecxt_innertuple = slot2;
 
 		if (numDistinctCols == 0 ||
 			!haveOldValue ||
 			newAbbrevVal != oldAbbrevVal ||
-			!execTuplesMatch(slot1, slot2,
-							 numDistinctCols,
-							 pertrans->sortColIdx,
-							 pertrans->equalfns,
-							 workcontext))
+			!ExecQual(pertrans->equalfnMulti, tmpcontext))
 		{
+			/*
+			 * Extract the first numTransInputs columns as datums to pass to
+			 * the transfn.
+			 */
+			slot_getsomeattrs(slot1, numTransInputs);
+
 			/* Load values into fcinfo */
 			/* Start from 1, since the 0th arg will be the transition value */
 			for (i = 0; i < numTransInputs; i++)
@@ -857,15 +856,14 @@ process_ordered_aggregate_multi(AggState *aggstate,
 
 				slot2 = slot1;
 				slot1 = tmpslot;
-				/* avoid execTuplesMatch() calls by reusing abbreviated keys */
+				/* avoid ExecQual() calls by reusing abbreviated keys */
 				oldAbbrevVal = newAbbrevVal;
 				haveOldValue = true;
 			}
 		}
 
-		/* Reset context each time, unless execTuplesMatch did it for us */
-		if (numDistinctCols == 0)
-			MemoryContextReset(workcontext);
+		/* Reset context each time */
+		ResetExprContext(tmpcontext);
 
 		ExecClearTuple(slot1);
 	}
@@ -875,6 +873,9 @@ process_ordered_aggregate_multi(AggState *aggstate,
 
 	tuplesort_end(pertrans->sortstates[aggstate->current_set]);
 	pertrans->sortstates[aggstate->current_set] = NULL;
+
+	/* restore previous slot, potentially in use for grouping sets */
+	tmpcontext->ecxt_outertuple = save;
 }
 
 /*
@@ -1276,7 +1277,9 @@ build_hash_table(AggState *aggstate)
 
 		Assert(perhash->aggnode->numGroups > 0);
 
-		perhash->hashtable = BuildTupleHashTable(perhash->numCols,
+		perhash->hashtable = BuildTupleHashTable(&aggstate->ss.ps,
+												 perhash->hashslot->tts_tupleDescriptor,
+												 perhash->numCols,
 												 perhash->hashGrpColIdxHash,
 												 perhash->eqfunctions,
 												 perhash->hashfunctions,
@@ -1314,6 +1317,7 @@ find_hash_columns(AggState *aggstate)
 	Bitmapset  *base_colnos;
 	List	   *outerTlist = outerPlanState(aggstate)->plan->targetlist;
 	int			numHashes = aggstate->num_hashes;
+	EState	   *estate = aggstate->ss.ps.state;
 	int			j;
 
 	/* Find Vars that will be needed in tlist and qual */
@@ -1393,6 +1397,12 @@ find_hash_columns(AggState *aggstate)
 		}
 
 		hashDesc = ExecTypeFromTL(hashTlist, false);
+
+		execTuplesHashPrepare(perhash->numCols,
+							  perhash->aggnode->grpOperators,
+							  &perhash->eqfunctions,
+							  &perhash->hashfunctions);
+		perhash->hashslot = ExecAllocTableSlot(&estate->es_tupleTable);
 		ExecSetSlotDescriptor(perhash->hashslot, hashDesc);
 
 		list_free(hashTlist);
@@ -1694,17 +1704,15 @@ agg_retrieve_direct(AggState *aggstate)
 		 *		of the next grouping set
 		 *----------
 		 */
+		ResetExprContext(tmpcontext);
+		tmpcontext->ecxt_innertuple = econtext->ecxt_outertuple;
+
 		if (aggstate->input_done ||
 			(node->aggstrategy != AGG_PLAIN &&
 			 aggstate->projected_set != -1 &&
 			 aggstate->projected_set < (numGroupingSets - 1) &&
 			 nextSetSize > 0 &&
-			 !execTuplesMatch(econtext->ecxt_outertuple,
-							  tmpcontext->ecxt_outertuple,
-							  nextSetSize,
-							  node->grpColIdx,
-							  aggstate->phase->eqfunctions,
-							  tmpcontext->ecxt_per_tuple_memory)))
+			 !ExecQual(aggstate->phase->eqfunctions[nextSetSize - 1], tmpcontext)))
 		{
 			aggstate->projected_set += 1;
 
@@ -1847,12 +1855,9 @@ agg_retrieve_direct(AggState *aggstate)
 					 */
 					if (node->aggstrategy != AGG_PLAIN)
 					{
-						if (!execTuplesMatch(firstSlot,
-											 outerslot,
-											 node->numCols,
-											 node->grpColIdx,
-											 aggstate->phase->eqfunctions,
-											 tmpcontext->ecxt_per_tuple_memory))
+						tmpcontext->ecxt_innertuple = firstSlot;
+						if (!ExecQual(aggstate->phase->eqfunctions[node->numCols - 1],
+									  tmpcontext))
 						{
 							aggstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
 							break;
@@ -2355,11 +2360,25 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 			 */
 			if (aggnode->aggstrategy == AGG_SORTED)
 			{
+				int			i = 0;
+
 				Assert(aggnode->numCols > 0);
 
+				/*
+				 * Build a separate function for each subset of columns that
+				 * need to be compared.
+				 */
 				phasedata->eqfunctions =
-					execTuplesMatchPrepare(aggnode->numCols,
-										   aggnode->grpOperators);
+					(ExprState **) palloc0(aggnode->numCols * sizeof(ExprState *));
+				for (i = 0; i < aggnode->numCols; i++)
+				{
+					phasedata->eqfunctions[i] =
+						execTuplesMatchPrepare(aggstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor,
+											   i + 1,
+											   aggnode->grpColIdx,
+											   aggnode->grpOperators,
+											   (PlanState *) aggstate);
+				}
 			}
 
 			phasedata->aggnode = aggnode;
@@ -2412,16 +2431,6 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 	 */
 	if (use_hashing)
 	{
-		for (i = 0; i < numHashes; ++i)
-		{
-			aggstate->perhash[i].hashslot = ExecInitExtraTupleSlot(estate);
-
-			execTuplesHashPrepare(aggstate->perhash[i].numCols,
-								  aggstate->perhash[i].aggnode->grpOperators,
-								  &aggstate->perhash[i].eqfunctions,
-								  &aggstate->perhash[i].hashfunctions);
-		}
-
 		/* this is an array of pointers, not structures */
 		aggstate->hash_pergroup = pergroups;
 
@@ -3101,24 +3110,28 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
 
 	if (aggref->aggdistinct)
 	{
-		Assert(numArguments > 0);
+		Oid		   *ops;
 
-		/*
-		 * We need the equal function for each DISTINCT comparison we will
-		 * make.
-		 */
-		pertrans->equalfns =
-			(FmgrInfo *) palloc(numDistinctCols * sizeof(FmgrInfo));
+		Assert(numArguments > 0);
+		Assert(list_length(aggref->aggdistinct) == numDistinctCols);
+
+		ops = palloc(numDistinctCols * sizeof(Oid));
 
 		i = 0;
 		foreach(lc, aggref->aggdistinct)
-		{
-			SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc);
+			ops[i++] = ((SortGroupClause *) lfirst(lc))->eqop;
 
-			fmgr_info(get_opcode(sortcl->eqop), &pertrans->equalfns[i]);
-			i++;
-		}
-		Assert(i == numDistinctCols);
+		/* lookup / build the necessary comparators */
+		if (numDistinctCols == 1)
+			fmgr_info(get_opcode(ops[0]), &pertrans->equalfnOne);
+		else
+			pertrans->equalfnMulti =
+				execTuplesMatchPrepare(pertrans->sortdesc,
+									   numDistinctCols,
+									   pertrans->sortColIdx,
+									   ops,
+									   &aggstate->ss.ps);
+		pfree(ops);
 	}
 
 	pertrans->sortstates = (Tuplesortstate **)
diff --git a/src/backend/executor/nodeGroup.c b/src/backend/executor/nodeGroup.c
index f1cdbaa4e67..0f835671bf6 100644
--- a/src/backend/executor/nodeGroup.c
+++ b/src/backend/executor/nodeGroup.c
@@ -25,6 +25,7 @@
 #include "executor/executor.h"
 #include "executor/nodeGroup.h"
 #include "miscadmin.h"
+#include "utils/memutils.h"
 
 
 /*
@@ -37,8 +38,6 @@ ExecGroup(PlanState *pstate)
 {
 	GroupState *node = castNode(GroupState, pstate);
 	ExprContext *econtext;
-	int			numCols;
-	AttrNumber *grpColIdx;
 	TupleTableSlot *firsttupleslot;
 	TupleTableSlot *outerslot;
 
@@ -50,16 +49,15 @@ ExecGroup(PlanState *pstate)
 	if (node->grp_done)
 		return NULL;
 	econtext = node->ss.ps.ps_ExprContext;
-	numCols = ((Group *) node->ss.ps.plan)->numCols;
-	grpColIdx = ((Group *) node->ss.ps.plan)->grpColIdx;
 
 	/*
 	 * The ScanTupleSlot holds the (copied) first tuple of each group.
 	 */
 	firsttupleslot = node->ss.ss_ScanTupleSlot;
 
+
 	/*
-	 * We need not call ResetExprContext here because execTuplesMatch will
+	 * We need not call ResetExprContext here because ExecQualAndReset() will
 	 * reset the per-tuple memory context once per input tuple.
 	 */
 
@@ -124,10 +122,9 @@ ExecGroup(PlanState *pstate)
 			 * Compare with first tuple and see if this tuple is of the same
 			 * group.  If so, ignore it and keep scanning.
 			 */
-			if (!execTuplesMatch(firsttupleslot, outerslot,
-								 numCols, grpColIdx,
-								 node->eqfunctions,
-								 econtext->ecxt_per_tuple_memory))
+			econtext->ecxt_innertuple = firsttupleslot;
+			econtext->ecxt_outertuple = outerslot;
+			if (!ExecQualAndReset(node->eqfunction, econtext))
 				break;
 		}
 
@@ -166,6 +163,7 @@ GroupState *
 ExecInitGroup(Group *node, EState *estate, int eflags)
 {
 	GroupState *grpstate;
+	AttrNumber *grpColIdx = grpColIdx = node->grpColIdx;
 
 	/* check for unsupported flags */
 	Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
@@ -215,9 +213,12 @@ ExecInitGroup(Group *node, EState *estate, int eflags)
 	/*
 	 * Precompute fmgr lookup data for inner loop
 	 */
-	grpstate->eqfunctions =
-		execTuplesMatchPrepare(node->numCols,
-							   node->grpOperators);
+	grpstate->eqfunction =
+		execTuplesMatchPrepare(ExecGetResultType(outerPlanState(grpstate)),
+							   node->numCols,
+							   grpColIdx,
+							   node->grpOperators,
+							   &grpstate->ss.ps);
 
 	return grpstate;
 }
diff --git a/src/backend/executor/nodeRecursiveunion.c b/src/backend/executor/nodeRecursiveunion.c
index 817749855fc..c070338fdb7 100644
--- a/src/backend/executor/nodeRecursiveunion.c
+++ b/src/backend/executor/nodeRecursiveunion.c
@@ -32,11 +32,14 @@ static void
 build_hash_table(RecursiveUnionState *rustate)
 {
 	RecursiveUnion *node = (RecursiveUnion *) rustate->ps.plan;
+	TupleDesc	desc = ExecGetResultType(outerPlanState(rustate));
 
 	Assert(node->numCols > 0);
 	Assert(node->numGroups > 0);
 
-	rustate->hashtable = BuildTupleHashTable(node->numCols,
+	rustate->hashtable = BuildTupleHashTable(&rustate->ps,
+											 desc,
+											 node->numCols,
 											 node->dupColIdx,
 											 rustate->eqfunctions,
 											 rustate->hashfunctions,
diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c
index c91c3402d25..ba2d3159c0c 100644
--- a/src/backend/executor/nodeSetOp.c
+++ b/src/backend/executor/nodeSetOp.c
@@ -120,18 +120,22 @@ static void
 build_hash_table(SetOpState *setopstate)
 {
 	SetOp	   *node = (SetOp *) setopstate->ps.plan;
+	ExprContext *econtext = setopstate->ps.ps_ExprContext;
+	TupleDesc	desc = ExecGetResultType(outerPlanState(setopstate));
 
 	Assert(node->strategy == SETOP_HASHED);
 	Assert(node->numGroups > 0);
 
-	setopstate->hashtable = BuildTupleHashTable(node->numCols,
+	setopstate->hashtable = BuildTupleHashTable(&setopstate->ps,
+												desc,
+												node->numCols,
 												node->dupColIdx,
 												setopstate->eqfunctions,
 												setopstate->hashfunctions,
 												node->numGroups,
 												0,
 												setopstate->tableContext,
-												setopstate->tempContext,
+												econtext->ecxt_per_tuple_memory,
 												false);
 }
 
@@ -220,11 +224,11 @@ ExecSetOp(PlanState *pstate)
 static TupleTableSlot *
 setop_retrieve_direct(SetOpState *setopstate)
 {
-	SetOp	   *node = (SetOp *) setopstate->ps.plan;
 	PlanState  *outerPlan;
 	SetOpStatePerGroup pergroup;
 	TupleTableSlot *outerslot;
 	TupleTableSlot *resultTupleSlot;
+	ExprContext *econtext = setopstate->ps.ps_ExprContext;
 
 	/*
 	 * get state info from node
@@ -292,11 +296,10 @@ setop_retrieve_direct(SetOpState *setopstate)
 			/*
 			 * Check whether we've crossed a group boundary.
 			 */
-			if (!execTuplesMatch(resultTupleSlot,
-								 outerslot,
-								 node->numCols, node->dupColIdx,
-								 setopstate->eqfunctions,
-								 setopstate->tempContext))
+			econtext->ecxt_outertuple = resultTupleSlot;
+			econtext->ecxt_innertuple = outerslot;
+
+			if (!ExecQualAndReset(setopstate->eqfunction, econtext))
 			{
 				/*
 				 * Save the first input tuple of the next group.
@@ -338,6 +341,7 @@ setop_fill_hash_table(SetOpState *setopstate)
 	PlanState  *outerPlan;
 	int			firstFlag;
 	bool		in_first_rel PG_USED_FOR_ASSERTS_ONLY;
+	ExprContext *econtext = setopstate->ps.ps_ExprContext;
 
 	/*
 	 * get state info from node
@@ -404,8 +408,8 @@ setop_fill_hash_table(SetOpState *setopstate)
 				advance_counts((SetOpStatePerGroup) entry->additional, flag);
 		}
 
-		/* Must reset temp context after each hashtable lookup */
-		MemoryContextReset(setopstate->tempContext);
+		/* Must reset expression context after each hashtable lookup */
+		ResetExprContext(econtext);
 	}
 
 	setopstate->table_filled = true;
@@ -476,6 +480,7 @@ SetOpState *
 ExecInitSetOp(SetOp *node, EState *estate, int eflags)
 {
 	SetOpState *setopstate;
+	TupleDesc	outerDesc;
 
 	/* check for unsupported flags */
 	Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
@@ -498,16 +503,9 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags)
 	setopstate->tableContext = NULL;
 
 	/*
-	 * Miscellaneous initialization
-	 *
-	 * SetOp nodes have no ExprContext initialization because they never call
-	 * ExecQual or ExecProject.  But they do need a per-tuple memory context
-	 * anyway for calling execTuplesMatch.
+	 * create expression context
 	 */
-	setopstate->tempContext =
-		AllocSetContextCreate(CurrentMemoryContext,
-							  "SetOp",
-							  ALLOCSET_DEFAULT_SIZES);
+	ExecAssignExprContext(estate, &setopstate->ps);
 
 	/*
 	 * If hashing, we also need a longer-lived context to store the hash
@@ -534,6 +532,7 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags)
 	if (node->strategy == SETOP_HASHED)
 		eflags &= ~EXEC_FLAG_REWIND;
 	outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags);
+	outerDesc = ExecGetResultType(outerPlanState(setopstate));
 
 	/*
 	 * setop nodes do no projections, so initialize projection info for this
@@ -553,9 +552,12 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags)
 							  &setopstate->eqfunctions,
 							  &setopstate->hashfunctions);
 	else
-		setopstate->eqfunctions =
-			execTuplesMatchPrepare(node->numCols,
-								   node->dupOperators);
+		setopstate->eqfunction =
+			execTuplesMatchPrepare(outerDesc,
+								   node->numCols,
+								   node->dupColIdx,
+								   node->dupOperators,
+								   &setopstate->ps);
 
 	if (node->strategy == SETOP_HASHED)
 	{
@@ -585,9 +587,9 @@ ExecEndSetOp(SetOpState *node)
 	ExecClearTuple(node->ps.ps_ResultTupleSlot);
 
 	/* free subsidiary stuff including hashtable */
-	MemoryContextDelete(node->tempContext);
 	if (node->tableContext)
 		MemoryContextDelete(node->tableContext);
+	ExecFreeExprContext(&node->ps);
 
 	ExecEndNode(outerPlanState(node));
 }
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index edf7d034bd3..fcf739b5e23 100644
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -494,7 +494,9 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
 	if (nbuckets < 1)
 		nbuckets = 1;
 
-	node->hashtable = BuildTupleHashTable(ncols,
+	node->hashtable = BuildTupleHashTable(node->parent,
+										  node->descRight,
+										  ncols,
 										  node->keyColIdx,
 										  node->tab_eq_funcs,
 										  node->tab_hash_funcs,
@@ -514,7 +516,9 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
 			if (nbuckets < 1)
 				nbuckets = 1;
 		}
-		node->hashnulls = BuildTupleHashTable(ncols,
+		node->hashnulls = BuildTupleHashTable(node->parent,
+											  node->descRight,
+											  ncols,
 											  node->keyColIdx,
 											  node->tab_eq_funcs,
 											  node->tab_hash_funcs,
@@ -598,6 +602,78 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
 	MemoryContextSwitchTo(oldcontext);
 }
 
+
+/*
+ * execTuplesUnequal
+ *		Return true if two tuples are definitely unequal in the indicated
+ *		fields.
+ *
+ * Nulls are neither equal nor unequal to anything else.  A true result
+ * is obtained only if there are non-null fields that compare not-equal.
+ *
+ * slot1, slot2: the tuples to compare (must have same columns!)
+ * numCols: the number of attributes to be examined
+ * matchColIdx: array of attribute column numbers
+ * eqFunctions: array of fmgr lookup info for the equality functions to use
+ * evalContext: short-term memory context for executing the functions
+ */
+static bool
+execTuplesUnequal(TupleTableSlot *slot1,
+				  TupleTableSlot *slot2,
+				  int numCols,
+				  AttrNumber *matchColIdx,
+				  FmgrInfo *eqfunctions,
+				  MemoryContext evalContext)
+{
+	MemoryContext oldContext;
+	bool		result;
+	int			i;
+
+	/* Reset and switch into the temp context. */
+	MemoryContextReset(evalContext);
+	oldContext = MemoryContextSwitchTo(evalContext);
+
+	/*
+	 * We cannot report a match without checking all the fields, but we can
+	 * report a non-match as soon as we find unequal fields.  So, start
+	 * comparing at the last field (least significant sort key). That's the
+	 * most likely to be different if we are dealing with sorted input.
+	 */
+	result = false;
+
+	for (i = numCols; --i >= 0;)
+	{
+		AttrNumber	att = matchColIdx[i];
+		Datum		attr1,
+					attr2;
+		bool		isNull1,
+					isNull2;
+
+		attr1 = slot_getattr(slot1, att, &isNull1);
+
+		if (isNull1)
+			continue;			/* can't prove anything here */
+
+		attr2 = slot_getattr(slot2, att, &isNull2);
+
+		if (isNull2)
+			continue;			/* can't prove anything here */
+
+		/* Apply the type-specific equality function */
+
+		if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
+										attr1, attr2)))
+		{
+			result = true;		/* they are unequal */
+			break;
+		}
+	}
+
+	MemoryContextSwitchTo(oldContext);
+
+	return result;
+}
+
 /*
  * findPartialMatch: does the hashtable contain an entry that is not
  * provably distinct from the tuple?
@@ -887,6 +963,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
 												   NULL);
 
 		tupDesc = ExecTypeFromTL(righttlist, false);
+		sstate->descRight = tupDesc;
 		slot = ExecInitExtraTupleSlot(estate);
 		ExecSetSlotDescriptor(slot, tupDesc);
 		sstate->projRight = ExecBuildProjectionInfo(righttlist,
diff --git a/src/backend/executor/nodeUnique.c b/src/backend/executor/nodeUnique.c
index e330650593a..9f823c58e1a 100644
--- a/src/backend/executor/nodeUnique.c
+++ b/src/backend/executor/nodeUnique.c
@@ -47,7 +47,7 @@ static TupleTableSlot *			/* return: a tuple or NULL */
 ExecUnique(PlanState *pstate)
 {
 	UniqueState *node = castNode(UniqueState, pstate);
-	Unique	   *plannode = (Unique *) node->ps.plan;
+	ExprContext *econtext = node->ps.ps_ExprContext;
 	TupleTableSlot *resultTupleSlot;
 	TupleTableSlot *slot;
 	PlanState  *outerPlan;
@@ -89,10 +89,9 @@ ExecUnique(PlanState *pstate)
 		 * If so then we loop back and fetch another new tuple from the
 		 * subplan.
 		 */
-		if (!execTuplesMatch(slot, resultTupleSlot,
-							 plannode->numCols, plannode->uniqColIdx,
-							 node->eqfunctions,
-							 node->tempContext))
+		econtext->ecxt_innertuple = slot;
+		econtext->ecxt_outertuple = resultTupleSlot;
+		if (!ExecQualAndReset(node->eqfunction, econtext))
 			break;
 	}
 
@@ -129,16 +128,9 @@ ExecInitUnique(Unique *node, EState *estate, int eflags)
 	uniquestate->ps.ExecProcNode = ExecUnique;
 
 	/*
-	 * Miscellaneous initialization
-	 *
-	 * Unique nodes have no ExprContext initialization because they never call
-	 * ExecQual or ExecProject.  But they do need a per-tuple memory context
-	 * anyway for calling execTuplesMatch.
+	 * create expression context
 	 */
-	uniquestate->tempContext =
-		AllocSetContextCreate(CurrentMemoryContext,
-							  "Unique",
-							  ALLOCSET_DEFAULT_SIZES);
+	ExecAssignExprContext(estate, &uniquestate->ps);
 
 	/*
 	 * Tuple table initialization
@@ -160,9 +152,12 @@ ExecInitUnique(Unique *node, EState *estate, int eflags)
 	/*
 	 * Precompute fmgr lookup data for inner loop
 	 */
-	uniquestate->eqfunctions =
-		execTuplesMatchPrepare(node->numCols,
-							   node->uniqOperators);
+	uniquestate->eqfunction =
+		execTuplesMatchPrepare(ExecGetResultType(outerPlanState(uniquestate)),
+							   node->numCols,
+							   node->uniqColIdx,
+							   node->uniqOperators,
+							   &uniquestate->ps);
 
 	return uniquestate;
 }
@@ -180,7 +175,7 @@ ExecEndUnique(UniqueState *node)
 	/* clean up tuple table */
 	ExecClearTuple(node->ps.ps_ResultTupleSlot);
 
-	MemoryContextDelete(node->tempContext);
+	ExecFreeExprContext(&node->ps);
 
 	ExecEndNode(outerPlanState(node));
 }
diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c
index 5492fb33692..f9eb7787e16 100644
--- a/src/backend/executor/nodeWindowAgg.c
+++ b/src/backend/executor/nodeWindowAgg.c
@@ -1203,12 +1203,13 @@ spool_tuples(WindowAggState *winstate, int64 pos)
 
 		if (node->partNumCols > 0)
 		{
+			ExprContext *econtext = winstate->tmpcontext;
+
+			econtext->ecxt_innertuple = winstate->first_part_slot;
+			econtext->ecxt_outertuple = outerslot;
+
 			/* Check if this tuple still belongs to the current partition */
-			if (!execTuplesMatch(winstate->first_part_slot,
-								 outerslot,
-								 node->partNumCols, node->partColIdx,
-								 winstate->partEqfunctions,
-								 winstate->tmpcontext->ecxt_per_tuple_memory))
+			if (!ExecQualAndReset(winstate->partEqfunction, econtext))
 			{
 				/*
 				 * end of partition; copy the tuple for the next cycle.
@@ -1781,6 +1782,7 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags)
 				wfuncno,
 				numaggs,
 				aggno;
+	TupleDesc	scanDesc;
 	ListCell   *l;
 
 	/* check for unsupported flags */
@@ -1849,6 +1851,7 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags)
 	 * store in the tuplestore and use in all our working slots).
 	 */
 	ExecAssignScanTypeFromOuterPlan(&winstate->ss);
+	scanDesc = winstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor;
 
 	ExecSetSlotDescriptor(winstate->first_part_slot,
 						  winstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor);
@@ -1867,11 +1870,20 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags)
 
 	/* Set up data for comparing tuples */
 	if (node->partNumCols > 0)
-		winstate->partEqfunctions = execTuplesMatchPrepare(node->partNumCols,
-														   node->partOperators);
+		winstate->partEqfunction =
+			execTuplesMatchPrepare(scanDesc,
+								   node->partNumCols,
+								   node->partColIdx,
+								   node->partOperators,
+								   &winstate->ss.ps);
+
 	if (node->ordNumCols > 0)
-		winstate->ordEqfunctions = execTuplesMatchPrepare(node->ordNumCols,
-														  node->ordOperators);
+		winstate->ordEqfunction =
+			execTuplesMatchPrepare(scanDesc,
+								   node->ordNumCols,
+								   node->ordColIdx,
+								   node->ordOperators,
+								   &winstate->ss.ps);
 
 	/*
 	 * WindowAgg nodes use aggvalues and aggnulls as well as Agg nodes.
@@ -2378,15 +2390,15 @@ are_peers(WindowAggState *winstate, TupleTableSlot *slot1,
 		  TupleTableSlot *slot2)
 {
 	WindowAgg  *node = (WindowAgg *) winstate->ss.ps.plan;
+	ExprContext *econtext = winstate->tmpcontext;
 
 	/* If no ORDER BY, all rows are peers with each other */
 	if (node->ordNumCols == 0)
 		return true;
 
-	return execTuplesMatch(slot1, slot2,
-						   node->ordNumCols, node->ordColIdx,
-						   winstate->ordEqfunctions,
-						   winstate->tmpcontext->ecxt_per_tuple_memory);
+	econtext->ecxt_outertuple = slot1;
+	econtext->ecxt_innertuple = slot2;
+	return ExecQualAndReset(winstate->ordEqfunction, econtext);
 }
 
 /*
diff --git a/src/backend/utils/adt/orderedsetaggs.c b/src/backend/utils/adt/orderedsetaggs.c
index 79dbfd1a059..ff56a320e67 100644
--- a/src/backend/utils/adt/orderedsetaggs.c
+++ b/src/backend/utils/adt/orderedsetaggs.c
@@ -27,6 +27,7 @@
 #include "utils/array.h"
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
+#include "utils/memutils.h"
 #include "utils/timestamp.h"
 #include "utils/tuplesort.h"
 
@@ -54,6 +55,8 @@ typedef struct OSAPerQueryState
 	Aggref	   *aggref;
 	/* Memory context containing this struct and other per-query data: */
 	MemoryContext qcontext;
+	/* Context for expression evaluation */
+	ExprContext *econtext;
 	/* Do we expect multiple final-function calls within one group? */
 	bool		rescan_needed;
 
@@ -71,7 +74,7 @@ typedef struct OSAPerQueryState
 	Oid		   *sortCollations;
 	bool	   *sortNullsFirsts;
 	/* Equality operator call info, created only if needed: */
-	FmgrInfo   *equalfns;
+	ExprState  *compareTuple;
 
 	/* These fields are used only when accumulating datums: */
 
@@ -1285,6 +1288,8 @@ hypothetical_cume_dist_final(PG_FUNCTION_ARGS)
 Datum
 hypothetical_dense_rank_final(PG_FUNCTION_ARGS)
 {
+	ExprContext *econtext;
+	ExprState  *compareTuple;
 	int			nargs = PG_NARGS() - 1;
 	int64		rank = 1;
 	int64		duplicate_count = 0;
@@ -1292,12 +1297,9 @@ hypothetical_dense_rank_final(PG_FUNCTION_ARGS)
 	int			numDistinctCols;
 	Datum		abbrevVal = (Datum) 0;
 	Datum		abbrevOld = (Datum) 0;
-	AttrNumber *sortColIdx;
-	FmgrInfo   *equalfns;
 	TupleTableSlot *slot;
 	TupleTableSlot *extraslot;
 	TupleTableSlot *slot2;
-	MemoryContext tmpcontext;
 	int			i;
 
 	Assert(AggCheckCallContext(fcinfo, NULL) == AGG_CONTEXT_AGGREGATE);
@@ -1307,6 +1309,9 @@ hypothetical_dense_rank_final(PG_FUNCTION_ARGS)
 		PG_RETURN_INT64(rank);
 
 	osastate = (OSAPerGroupState *) PG_GETARG_POINTER(0);
+	econtext = osastate->qstate->econtext;
+	if (!econtext)
+		osastate->qstate->econtext = econtext = CreateStandaloneExprContext();
 
 	/* Adjust nargs to be the number of direct (or aggregated) args */
 	if (nargs % 2 != 0)
@@ -1321,26 +1326,22 @@ hypothetical_dense_rank_final(PG_FUNCTION_ARGS)
 	 */
 	numDistinctCols = osastate->qstate->numSortCols - 1;
 
-	/* Look up the equality function(s), if we didn't already */
-	equalfns = osastate->qstate->equalfns;
-	if (equalfns == NULL)
+	/* Build tuple comparator, if we didn't already */
+	compareTuple = osastate->qstate->compareTuple;
+	if (compareTuple == NULL)
 	{
-		MemoryContext qcontext = osastate->qstate->qcontext;
+		AttrNumber *sortColIdx = osastate->qstate->sortColIdx;
+		MemoryContext oldContext;
 
-		equalfns = (FmgrInfo *)
-			MemoryContextAlloc(qcontext, numDistinctCols * sizeof(FmgrInfo));
-		for (i = 0; i < numDistinctCols; i++)
-		{
-			fmgr_info_cxt(get_opcode(osastate->qstate->eqOperators[i]),
-						  &equalfns[i],
-						  qcontext);
-		}
-		osastate->qstate->equalfns = equalfns;
+		oldContext = MemoryContextSwitchTo(osastate->qstate->qcontext);
+		compareTuple = execTuplesMatchPrepare(osastate->qstate->tupdesc,
+											  numDistinctCols,
+											  sortColIdx,
+											  osastate->qstate->eqOperators,
+											  NULL);
+		MemoryContextSwitchTo(oldContext);
+		osastate->qstate->compareTuple = compareTuple;
 	}
-	sortColIdx = osastate->qstate->sortColIdx;
-
-	/* Get short-term context we can use for execTuplesMatch */
-	tmpcontext = AggGetTempMemoryContext(fcinfo);
 
 	/* because we need a hypothetical row, we can't share transition state */
 	Assert(!osastate->sort_done);
@@ -1383,19 +1384,18 @@ hypothetical_dense_rank_final(PG_FUNCTION_ARGS)
 			break;
 
 		/* count non-distinct tuples */
+		econtext->ecxt_outertuple = slot;
+		econtext->ecxt_innertuple = slot2;
+
 		if (!TupIsNull(slot2) &&
 			abbrevVal == abbrevOld &&
-			execTuplesMatch(slot, slot2,
-							numDistinctCols,
-							sortColIdx,
-							equalfns,
-							tmpcontext))
+			ExecQualAndReset(compareTuple, econtext))
 			duplicate_count++;
 
 		tmpslot = slot2;
 		slot2 = slot;
 		slot = tmpslot;
-		/* avoid execTuplesMatch() calls by reusing abbreviated keys */
+		/* avoid ExecQual() calls by reusing abbreviated keys */
 		abbrevOld = abbrevVal;
 
 		rank++;
diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h
index 117fc892f4b..0cab431f656 100644
--- a/src/include/executor/execExpr.h
+++ b/src/include/executor/execExpr.h
@@ -148,6 +148,7 @@ typedef enum ExprEvalOp
 	/* evaluate assorted special-purpose expression types */
 	EEOP_IOCOERCE,
 	EEOP_DISTINCT,
+	EEOP_NOT_DISTINCT,
 	EEOP_NULLIF,
 	EEOP_SQLVALUEFUNCTION,
 	EEOP_CURRENTOFEXPR,
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 1d824eff361..f648af27898 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -113,25 +113,18 @@ extern bool execCurrentOf(CurrentOfExpr *cexpr,
 /*
  * prototypes from functions in execGrouping.c
  */
-extern bool execTuplesMatch(TupleTableSlot *slot1,
-				TupleTableSlot *slot2,
-				int numCols,
-				AttrNumber *matchColIdx,
-				FmgrInfo *eqfunctions,
-				MemoryContext evalContext);
-extern bool execTuplesUnequal(TupleTableSlot *slot1,
-				  TupleTableSlot *slot2,
-				  int numCols,
-				  AttrNumber *matchColIdx,
-				  FmgrInfo *eqfunctions,
-				  MemoryContext evalContext);
-extern FmgrInfo *execTuplesMatchPrepare(int numCols,
-					   Oid *eqOperators);
+extern ExprState *execTuplesMatchPrepare(TupleDesc desc,
+					   int numCols,
+					   AttrNumber *keyColIdx,
+					   Oid *eqOperators,
+					   PlanState *parent);
 extern void execTuplesHashPrepare(int numCols,
 					  Oid *eqOperators,
 					  FmgrInfo **eqFunctions,
 					  FmgrInfo **hashFunctions);
-extern TupleHashTable BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
+extern TupleHashTable BuildTupleHashTable(PlanState *parent,
+					TupleDesc inputDesc,
+					int numCols, AttrNumber *keyColIdx,
 					FmgrInfo *eqfunctions,
 					FmgrInfo *hashfunctions,
 					long nbuckets, Size additionalsize,
@@ -257,6 +250,11 @@ extern ExprState *ExecInitCheck(List *qual, PlanState *parent);
 extern List *ExecInitExprList(List *nodes, PlanState *parent);
 extern ExprState *ExecBuildAggTrans(AggState *aggstate, struct AggStatePerPhaseData *phase,
 				  bool doSort, bool doHash);
+extern ExprState *ExecBuildGroupingEqual(TupleDesc desc,
+					   int numCols,
+					   AttrNumber *keyColIdx,
+					   Oid *eqfunctions,
+					   PlanState *parent);
 extern ProjectionInfo *ExecBuildProjectionInfo(List *targetList,
 						ExprContext *econtext,
 						TupleTableSlot *slot,
diff --git a/src/include/executor/nodeAgg.h b/src/include/executor/nodeAgg.h
index 3b06db86fd8..24be7d2daa8 100644
--- a/src/include/executor/nodeAgg.h
+++ b/src/include/executor/nodeAgg.h
@@ -102,11 +102,12 @@ typedef struct AggStatePerTransData
 	bool	   *sortNullsFirst;
 
 	/*
-	 * fmgr lookup data for input columns' equality operators --- only
-	 * set/used when aggregate has DISTINCT flag.  Note that these are in
-	 * order of sort column index, not parameter index.
+	 * Comparators for input columns --- only set/used when aggregate has
+	 * DISTINCT flag. equalfnOne version is used for single-column
+	 * commparisons, equalfnMulti for the case of multiple columns.
 	 */
-	FmgrInfo   *equalfns;		/* array of length numDistinctCols */
+	FmgrInfo	equalfnOne;
+	ExprState  *equalfnMulti;
 
 	/*
 	 * initial value from pg_aggregate entry
@@ -270,7 +271,8 @@ typedef struct AggStatePerPhaseData
 	int			numsets;		/* number of grouping sets (or 0) */
 	int		   *gset_lengths;	/* lengths of grouping sets */
 	Bitmapset **grouped_cols;	/* column groupings for rollup */
-	FmgrInfo   *eqfunctions;	/* per-grouping-field equality fns */
+	ExprState **eqfunctions;	/* expression returning equality, indexed by
+								 * nr of cols to compare */
 	Agg		   *aggnode;		/* Agg node for phase data */
 	Sort	   *sortnode;		/* Sort node for input ordering for phase */
 
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 4bb5cb163d7..d63896f46e6 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -629,6 +629,8 @@ typedef struct TupleHashTableData
 	FmgrInfo   *in_hash_funcs;	/* hash functions for input datatype(s) */
 	FmgrInfo   *cur_eq_funcs;	/* equality functions for input vs. table */
 	uint32		hash_iv;		/* hash-function IV */
+	ExprState  *eq_func;		/* tuple equality comparator */
+	ExprContext *exprcontext;	/* expression context */
 }			TupleHashTableData;
 
 typedef tuplehash_iterator TupleHashIterator;
@@ -775,6 +777,7 @@ typedef struct SubPlanState
 	HeapTuple	curTuple;		/* copy of most recent tuple from subplan */
 	Datum		curArray;		/* most recent array from ARRAY() subplan */
 	/* these are used when hashing the subselect's output: */
+	TupleDesc	descRight;		/* subselect desc after projection */
 	ProjectionInfo *projLeft;	/* for projecting lefthand exprs */
 	ProjectionInfo *projRight;	/* for projecting subselect output */
 	TupleHashTable hashtable;	/* hash table for no-nulls subselect rows */
@@ -1789,7 +1792,7 @@ typedef struct SortState
 typedef struct GroupState
 {
 	ScanState	ss;				/* its first field is NodeTag */
-	FmgrInfo   *eqfunctions;	/* per-field lookup data for equality fns */
+	ExprState  *eqfunction;		/* equality function */
 	bool		grp_done;		/* indicates completion of Group scan */
 } GroupState;
 
@@ -1879,8 +1882,8 @@ typedef struct WindowAggState
 
 	WindowStatePerFunc perfunc; /* per-window-function information */
 	WindowStatePerAgg peragg;	/* per-plain-aggregate information */
-	FmgrInfo   *partEqfunctions;	/* equality funcs for partition columns */
-	FmgrInfo   *ordEqfunctions; /* equality funcs for ordering columns */
+	ExprState  *partEqfunction;	/* equality funcs for partition columns */
+	ExprState  *ordEqfunction; /* equality funcs for ordering columns */
 	Tuplestorestate *buffer;	/* stores rows of current partition */
 	int			current_ptr;	/* read pointer # for current */
 	int64		spooled_rows;	/* total # of rows in buffer */
@@ -1937,8 +1940,7 @@ typedef struct WindowAggState
 typedef struct UniqueState
 {
 	PlanState	ps;				/* its first field is NodeTag */
-	FmgrInfo   *eqfunctions;	/* per-field lookup data for equality fns */
-	MemoryContext tempContext;	/* short-term context for comparisons */
+	ExprState   *eqfunction;		/* tuple equality qual */
 } UniqueState;
 
 /* ----------------
@@ -2052,11 +2054,11 @@ typedef struct SetOpStatePerGroupData *SetOpStatePerGroup;
 typedef struct SetOpState
 {
 	PlanState	ps;				/* its first field is NodeTag */
+	ExprState  *eqfunction;		/* equality comparator */
 	FmgrInfo   *eqfunctions;	/* per-grouping-field equality fns */
 	FmgrInfo   *hashfunctions;	/* per-grouping-field hash fns */
 	bool		setop_done;		/* indicates completion of output scan */
 	long		numOutput;		/* number of dups left to output */
-	MemoryContext tempContext;	/* short-term context for comparisons */
 	/* these fields are used in SETOP_SORTED mode: */
 	SetOpStatePerGroup pergroup;	/* per-group working state */
 	HeapTuple	grp_firstTuple; /* copy of first tuple of current group */
-- 
2.15.1.354.g95ec6b1b33.dirty

Reply via email to