On Tue, Apr 28, 2020 at 8:25 AM Tomas Vondra
<tomas.von...@2ndquadrant.com> wrote:
...
> Any particular reasons to pick dynahash over simplehash? ISTM we're
> using simplehash elsewhere in the executor (grouping, tidbitmap, ...),
> while there are not many places using dynahash for simple short-lived
> hash tables. Of course, that alone is a weak reason to insist on using
> simplehash here, but I suppose there were reasons for not using dynahash
> and we'll end up facing the same issues here.

I've attached a patch series that includes switching to simplehash.
Obviously we'd really just collapse all of these patches, but it's
perhaps interesting to see the changes required to use each style
(binary search, dynahash, simplehash).

As before, there are clearly comments and naming things to be
addressed, but the implementation should be reasonably clean.

James
From d455a57701df064f96e5f2a3be2b1c736bacc485 Mon Sep 17 00:00:00 2001
From: James Coleman <jtc...@gmail.com>
Date: Tue, 28 Apr 2020 21:33:12 -0400
Subject: [PATCH v3 3/3] Try simple hash

---
 src/backend/executor/execExpr.c       | 24 +++----
 src/backend/executor/execExprInterp.c | 99 +++++++++++++++++----------
 src/include/executor/execExpr.h       | 26 ++++++-
 3 files changed, 100 insertions(+), 49 deletions(-)

diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index 6249db5426..37c1669153 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -950,13 +950,10 @@ ExecInitExprRec(Expr *node, ExprState *state,
 			{
 				ScalarArrayOpExpr *opexpr = (ScalarArrayOpExpr *) node;
 				Oid			func;
-				Oid			hash_func;
 				Expr	   *scalararg;
 				Expr	   *arrayarg;
 				FmgrInfo   *finfo;
 				FunctionCallInfo fcinfo;
-				FmgrInfo   *hash_finfo;
-				FunctionCallInfo hash_fcinfo;
 				AclResult	aclresult;
 				bool		useBinarySearch = false;
 
@@ -996,18 +993,20 @@ ExecInitExprRec(Expr *node, ExprState *state,
 					nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr));
 					if (nitems >= MIN_ARRAY_SIZE_FOR_BINARY_SEARCH)
 					{
+						Oid			hash_func;
+
 						/*
-						 * Find the hash op that matches the originally
-						 * planned equality op.
+						 * Find the hash op that matches the originally planned
+						 * equality op. If we don't have one, we'll just fall
+						 * back to the default linear scan implementation.
 						 */
 						useBinarySearch = get_op_hash_functions(opexpr->opno, NULL, &hash_func);
 
-						/*
-						 * But we may not have one, so fall back to the
-						 * default implementation if necessary.
-						 */
 						if (useBinarySearch)
 						{
+							FmgrInfo   *hash_finfo;
+							FunctionCallInfo hash_fcinfo;
+
 							hash_finfo = palloc0(sizeof(FmgrInfo));
 							hash_fcinfo = palloc0(SizeForFunctionCallInfo(2));
 							fmgr_info(hash_func, hash_finfo);
@@ -1015,6 +1014,10 @@ ExecInitExprRec(Expr *node, ExprState *state,
 							InitFunctionCallInfoData(*hash_fcinfo, hash_finfo, 2,
 													 opexpr->inputcollid, NULL, NULL);
 							InvokeFunctionExecuteHook(hash_func);
+
+							scratch.d.scalararraybinsearchop.hash_finfo = hash_finfo;
+							scratch.d.scalararraybinsearchop.hash_fcinfo_data = hash_fcinfo;
+							scratch.d.scalararraybinsearchop.hash_fn_addr = hash_finfo->fn_addr;
 						}
 					}
 				}
@@ -1044,9 +1047,6 @@ ExecInitExprRec(Expr *node, ExprState *state,
 					scratch.d.scalararraybinsearchop.finfo = finfo;
 					scratch.d.scalararraybinsearchop.fcinfo_data = fcinfo;
 					scratch.d.scalararraybinsearchop.fn_addr = finfo->fn_addr;
-					scratch.d.scalararraybinsearchop.hash_finfo = hash_finfo;
-					scratch.d.scalararraybinsearchop.hash_fcinfo_data = hash_fcinfo;
-					scratch.d.scalararraybinsearchop.hash_fn_addr = hash_finfo->fn_addr;
 				}
 				else
 				{
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index e54e807c6b..aa7f5ae0df 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -178,6 +178,27 @@ ExecAggPlainTransByRef(AggState *aggstate, AggStatePerTrans pertrans,
 					   AggStatePerGroup pergroup,
 					   ExprContext *aggcontext, int setno);
 
+static bool
+element_match(struct saophash_hash *tb, Datum key1, Datum key2);
+static uint32 element_hash(struct saophash_hash *tb, Datum key);
+
+/*
+ * Define parameters for ScalarArrayOpExpr hash table code generation. The interface is
+ * *also* declared in execnodes.h (to generate the types, which are externally
+ * visible).
+ */
+#define SH_PREFIX saophash
+#define SH_ELEMENT_TYPE ScalarArrayOpExprHashEntryData
+#define SH_KEY_TYPE Datum
+#define SH_KEY key
+#define SH_HASH_KEY(tb, key) element_hash(tb, key)
+#define SH_EQUAL(tb, a, b) element_match(tb, a, b)
+#define SH_SCOPE extern
+#define SH_STORE_HASH
+#define SH_GET_HASH(tb, a) a->hash
+#define SH_DEFINE
+#include "lib/simplehash.h"
+
 /*
  * Prepare ExprState for interpreted execution.
  */
@@ -3591,8 +3612,6 @@ ExecEvalScalarArrayOp(ExprState *state, ExprEvalStep *op)
 	*op->resnull = resultnull;
 }
 
-static ExprEvalStep *current_saop_op;
-
 /*
  * Hash function for elements.
  *
@@ -3601,16 +3620,16 @@ static ExprEvalStep *current_saop_op;
  */
 /* XXX: Name function to be specific to saop binsearch? */
 static uint32
-element_hash(const void *key, Size keysize)
+element_hash(struct saophash_hash *tb, Datum key)
 {
+	ScalarArrayOpExprHashTable elements_tab = (ScalarArrayOpExprHashTable) tb->private_data;
+	FunctionCallInfo fcinfo = elements_tab->op->d.scalararraybinsearchop.hash_fcinfo_data;
 	Datum hash;
-	FunctionCallInfo fcinfo = current_saop_op->d.scalararraybinsearchop.hash_fcinfo_data;
 
-	fcinfo->args[0].value = *((const Datum *) key);
+	fcinfo->args[0].value = key;
 	fcinfo->args[0].isnull = false;
 
-	/* The keysize parameter is superfluous here */
-	hash = current_saop_op->d.scalararraybinsearchop.hash_fn_addr(fcinfo);
+	hash = elements_tab->op->d.scalararraybinsearchop.hash_fn_addr(fcinfo);
 
 	return DatumGetUInt32(hash);
 }
@@ -3619,21 +3638,22 @@ element_hash(const void *key, Size keysize)
  * Matching function for elements, to be used in hashtable lookups.
  */
 /* XXX: Name function to be specific to saop binsearch? */
-static int
-element_match(const void *key1, const void *key2, Size keysize)
+static bool
+element_match(struct saophash_hash *tb, Datum key1, Datum key2)
 {
 	Datum result;
-	FunctionCallInfo fcinfo = current_saop_op->d.scalararraybinsearchop.fcinfo_data;
 
-	fcinfo->args[0].value = *((const Datum *) key1);
+	ScalarArrayOpExprHashTable elements_tab = (ScalarArrayOpExprHashTable) tb->private_data;
+	FunctionCallInfo fcinfo = elements_tab->op->d.scalararraybinsearchop.fcinfo_data;
+
+	fcinfo->args[0].value = key1;
 	fcinfo->args[0].isnull = false;
-	fcinfo->args[1].value = *((const Datum *) key2);
+	fcinfo->args[1].value = key2;
 	fcinfo->args[1].isnull = false;
 
-	/* The keysize parameter is superfluous here */
-	result = current_saop_op->d.scalararraybinsearchop.fn_addr(fcinfo);
+	result = elements_tab->op->d.scalararraybinsearchop.fn_addr(fcinfo);
 
-	return DatumGetBool(result) ? 0 : 1;
+	return DatumGetBool(result);
 }
 
 /*
@@ -3653,22 +3673,21 @@ element_match(const void *key1, const void *key2, Size keysize)
 void
 ExecEvalScalarArrayOpBinSearch(ExprState *state, ExprEvalStep *op, ExprContext *econtext)
 {
+	ScalarArrayOpExprHashTable elements_tab = op->d.scalararraybinsearchop.elements_tab;
 	FunctionCallInfo fcinfo = op->d.scalararraybinsearchop.fcinfo_data;
 	bool		strictfunc = op->d.scalararraybinsearchop.finfo->fn_strict;
 	ArrayType  *arr;
 	Datum		scalar = fcinfo->args[0].value;
+	bool		scalar_isnull = fcinfo->args[0].isnull;
 	Datum		result;
 	bool		resultnull;
 	bool		hashfound;
-	int			res;
 
 	/* If we're only executing once, do we need a way to fall back to the regular loop? */
 
 	/* We don't setup a binary search op if the array const is null. */
 	Assert(!*op->resnull);
 
-	current_saop_op = op;
-
 	/*
 	 * If the scalar is NULL, and the function is strict, return NULL; no
 	 * point in executing the search.
@@ -3680,17 +3699,17 @@ ExecEvalScalarArrayOpBinSearch(ExprState *state, ExprEvalStep *op, ExprContext *
 	}
 
 	/* Preprocess the array the first time we execute the op. */
-	if (op->d.scalararraybinsearchop.elements_tab == NULL)
+	if (elements_tab == NULL)
 	{
 		int16		typlen;
 		bool		typbyval;
 		char		typalign;
-		HASHCTL		elem_hash_ctl;
 		int			nitems;
 		int			num_nulls = 0;
 		char	   *s;
 		bits8	   *bitmap;
 		int			bitmask;
+		MemoryContext oldcontext;
 
 		arr = DatumGetArrayTypeP(*op->resvalue);
 		nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr));
@@ -3700,16 +3719,14 @@ ExecEvalScalarArrayOpBinSearch(ExprState *state, ExprEvalStep *op, ExprContext *
 							 &typbyval,
 							 &typalign);
 
-		MemSet(&elem_hash_ctl, 0, sizeof(elem_hash_ctl));
-		elem_hash_ctl.keysize = sizeof(Datum);
-		elem_hash_ctl.entrysize = sizeof(Datum);
-		elem_hash_ctl.hash = element_hash;
-		elem_hash_ctl.match = element_match;
-		elem_hash_ctl.hcxt = econtext->ecxt_per_query_memory;
-		op->d.scalararraybinsearchop.elements_tab = hash_create("Scalar array op expr elements",
-								   nitems,
-								   &elem_hash_ctl,
-								   HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
+		oldcontext = MemoryContextSwitchTo(econtext->ecxt_per_query_memory);
+
+		elements_tab = (ScalarArrayOpExprHashTable) palloc(sizeof(ScalarArrayOpExprHashTableData));
+		op->d.scalararraybinsearchop.elements_tab = elements_tab;
+		elements_tab->op = op;
+		elements_tab->hashtab = saophash_create(CurrentMemoryContext, nitems, elements_tab);
+
+		MemoryContextSwitchTo(oldcontext);
 
 		s = (char *) ARR_DATA_PTR(arr);
 		bitmap = ARR_NULLBITMAP(arr);
@@ -3729,7 +3746,7 @@ ExecEvalScalarArrayOpBinSearch(ExprState *state, ExprEvalStep *op, ExprContext *
 				s = att_addlength_pointer(s, typlen, s);
 				s = (char *) att_align_nominal(s, typalign);
 
-				hash_search(op->d.scalararraybinsearchop.elements_tab, (const void *) &element, HASH_ENTER, NULL);
+				saophash_insert(elements_tab->hashtab, element, &hashfound);
 			}
 
 			/* Advance bitmap pointer if any. */
@@ -3758,7 +3775,8 @@ ExecEvalScalarArrayOpBinSearch(ExprState *state, ExprEvalStep *op, ExprContext *
 	}
 
 	/* Check the hash to see if we have a match. */
-	hash_search(op->d.scalararraybinsearchop.elements_tab, (const void *) &scalar, HASH_FIND, &hashfound);
+	hashfound = NULL != saophash_lookup(elements_tab->hashtab, scalar);
+
 	result = BoolGetDatum(hashfound);
 	resultnull = false;
 
@@ -3771,18 +3789,27 @@ ExecEvalScalarArrayOpBinSearch(ExprState *state, ExprEvalStep *op, ExprContext *
 	{
 		if (strictfunc)
 		{
-			/* Had nulls, so strict function implies null. */
+			/* Had nulls and is a strict function, so instead of executing the
+			 * function onces with a null rhs, we can assume null. */
 			result = (Datum) 0;
 			resultnull = true;
 		}
 		else
 		{
-			/* Execute function will null rhs just once. */
+			/* TODO: No regression test for this branch. */
+			/*
+			 * Execute function will null rhs just once.
+			 *
+			 * The hash lookup path will have scribbled on the lhs argument so
+			 * we need to set it up also (even though we entered this function
+			 * with it already set).
+			 */
+			fcinfo->args[0].value = scalar;
+			fcinfo->args[0].isnull = scalar_isnull;
 			fcinfo->args[1].value = (Datum) 0;
 			fcinfo->args[1].isnull = true;
 
-			res = DatumGetInt32(op->d.scalararraybinsearchop.fn_addr(fcinfo));
-			result = BoolGetDatum(res == 0);
+			result = op->d.scalararraybinsearchop.fn_addr(fcinfo);
 			resultnull = fcinfo->isnull;
 		}
 	}
diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h
index 2e93b1f990..847d344e8d 100644
--- a/src/include/executor/execExpr.h
+++ b/src/include/executor/execExpr.h
@@ -21,6 +21,30 @@
 struct ExprEvalStep;
 struct SubscriptingRefState;
 
+typedef struct ScalarArrayOpExprHashEntryData *ScalarArrayOpExprHashEntry;
+typedef struct ScalarArrayOpExprHashTableData *ScalarArrayOpExprHashTable;
+
+typedef struct ScalarArrayOpExprHashEntryData
+{
+	Datum key;
+	uint32		status;			/* hash status */
+	uint32		hash;			/* hash value (cached) */
+} ScalarArrayOpExprHashEntryData;
+
+/* define parameters necessary to generate the tuple hash table interface */
+#define SH_PREFIX saophash
+#define SH_ELEMENT_TYPE ScalarArrayOpExprHashEntryData
+#define SH_KEY_TYPE Datum
+#define SH_SCOPE extern
+#define SH_DECLARE
+#include "lib/simplehash.h"
+
+typedef struct ScalarArrayOpExprHashTableData
+{
+	saophash_hash *hashtab;	/* underlying hash table */
+	struct ExprEvalStep *op;
+}			ScalarArrayOpExprHashTableData;
+
 /* Bits in ExprState->flags (see also execnodes.h for public flag bits): */
 /* expression's interpreter has been initialized */
 #define EEO_FLAG_INTERPRETER_INITIALIZED	(1 << 1)
@@ -555,7 +579,7 @@ typedef struct ExprEvalStep
 		struct
 		{
 			bool		has_nulls;
-			HTAB	   *elements_tab;
+			ScalarArrayOpExprHashTable	   elements_tab;
 			FmgrInfo   *finfo;	/* function's lookup data */
 			FunctionCallInfo fcinfo_data;	/* arguments etc */
 			/* faster to access without additional indirection: */
-- 
2.17.1

From cd760f5d333a2f1c7b871f05d4a231c0381c660d Mon Sep 17 00:00:00 2001
From: James Coleman <jtc...@gmail.com>
Date: Mon, 27 Apr 2020 08:51:28 -0400
Subject: [PATCH v3 2/3] Try using dynahash

---
 src/backend/executor/execExpr.c           |  29 ++--
 src/backend/executor/execExprInterp.c     | 197 ++++++++++++----------
 src/include/executor/execExpr.h           |   7 +-
 src/test/regress/expected/expressions.out |   7 +
 src/test/regress/sql/expressions.sql      |   2 +
 5 files changed, 138 insertions(+), 104 deletions(-)

diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index c202cc7e89..6249db5426 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -31,6 +31,7 @@
 #include "postgres.h"
 
 #include "access/nbtree.h"
+#include "access/hash.h"
 #include "catalog/objectaccess.h"
 #include "catalog/pg_type.h"
 #include "executor/execExpr.h"
@@ -949,10 +950,13 @@ ExecInitExprRec(Expr *node, ExprState *state,
 			{
 				ScalarArrayOpExpr *opexpr = (ScalarArrayOpExpr *) node;
 				Oid			func;
+				Oid			hash_func;
 				Expr	   *scalararg;
 				Expr	   *arrayarg;
 				FmgrInfo   *finfo;
 				FunctionCallInfo fcinfo;
+				FmgrInfo   *hash_finfo;
+				FunctionCallInfo hash_fcinfo;
 				AclResult	aclresult;
 				bool		useBinarySearch = false;
 
@@ -983,11 +987,6 @@ ExecInitExprRec(Expr *node, ExprState *state,
 				{
 					Datum		arrdatum = ((Const *) arrayarg)->constvalue;
 					ArrayType  *arr = (ArrayType *) DatumGetPointer(arrdatum);
-					Oid			orderingOp;
-					Oid			orderingFunc;
-					Oid			opfamily;
-					Oid			opcintype;
-					int16		strategy;
 					int			nitems;
 
 					/*
@@ -998,21 +997,24 @@ ExecInitExprRec(Expr *node, ExprState *state,
 					if (nitems >= MIN_ARRAY_SIZE_FOR_BINARY_SEARCH)
 					{
 						/*
-						 * Find the ordering op that matches the originally
+						 * Find the hash op that matches the originally
 						 * planned equality op.
 						 */
-						orderingOp = get_ordering_op_for_equality_op(opexpr->opno, NULL);
-						get_ordering_op_properties(orderingOp, &opfamily, &opcintype, &strategy);
-						orderingFunc = get_opfamily_proc(opfamily, opcintype, opcintype, BTORDER_PROC);
+						useBinarySearch = get_op_hash_functions(opexpr->opno, NULL, &hash_func);
 
 						/*
 						 * But we may not have one, so fall back to the
 						 * default implementation if necessary.
 						 */
-						if (OidIsValid(orderingFunc))
+						if (useBinarySearch)
 						{
-							useBinarySearch = true;
-							func = orderingFunc;
+							hash_finfo = palloc0(sizeof(FmgrInfo));
+							hash_fcinfo = palloc0(SizeForFunctionCallInfo(2));
+							fmgr_info(hash_func, hash_finfo);
+							fmgr_info_set_expr((Node *) node, hash_finfo);
+							InitFunctionCallInfoData(*hash_fcinfo, hash_finfo, 2,
+													 opexpr->inputcollid, NULL, NULL);
+							InvokeFunctionExecuteHook(hash_func);
 						}
 					}
 				}
@@ -1042,6 +1044,9 @@ ExecInitExprRec(Expr *node, ExprState *state,
 					scratch.d.scalararraybinsearchop.finfo = finfo;
 					scratch.d.scalararraybinsearchop.fcinfo_data = fcinfo;
 					scratch.d.scalararraybinsearchop.fn_addr = finfo->fn_addr;
+					scratch.d.scalararraybinsearchop.hash_finfo = hash_finfo;
+					scratch.d.scalararraybinsearchop.hash_fcinfo_data = hash_fcinfo;
+					scratch.d.scalararraybinsearchop.hash_fn_addr = hash_finfo->fn_addr;
 				}
 				else
 				{
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 5bebafbf0c..e54e807c6b 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -178,8 +178,6 @@ ExecAggPlainTransByRef(AggState *aggstate, AggStatePerTrans pertrans,
 					   AggStatePerGroup pergroup,
 					   ExprContext *aggcontext, int setno);
 
-static int	compare_array_elements(const void *a, const void *b, void *arg);
-
 /*
  * Prepare ExprState for interpreted execution.
  */
@@ -3593,6 +3591,51 @@ ExecEvalScalarArrayOp(ExprState *state, ExprEvalStep *op)
 	*op->resnull = resultnull;
 }
 
+static ExprEvalStep *current_saop_op;
+
+/*
+ * Hash function for elements.
+ *
+ * We use the element type's default hash opclass, and the column collation
+ * if the type is collation-sensitive.
+ */
+/* XXX: Name function to be specific to saop binsearch? */
+static uint32
+element_hash(const void *key, Size keysize)
+{
+	Datum hash;
+	FunctionCallInfo fcinfo = current_saop_op->d.scalararraybinsearchop.hash_fcinfo_data;
+
+	fcinfo->args[0].value = *((const Datum *) key);
+	fcinfo->args[0].isnull = false;
+
+	/* The keysize parameter is superfluous here */
+	hash = current_saop_op->d.scalararraybinsearchop.hash_fn_addr(fcinfo);
+
+	return DatumGetUInt32(hash);
+}
+
+/*
+ * Matching function for elements, to be used in hashtable lookups.
+ */
+/* XXX: Name function to be specific to saop binsearch? */
+static int
+element_match(const void *key1, const void *key2, Size keysize)
+{
+	Datum result;
+	FunctionCallInfo fcinfo = current_saop_op->d.scalararraybinsearchop.fcinfo_data;
+
+	fcinfo->args[0].value = *((const Datum *) key1);
+	fcinfo->args[0].isnull = false;
+	fcinfo->args[1].value = *((const Datum *) key2);
+	fcinfo->args[1].isnull = false;
+
+	/* The keysize parameter is superfluous here */
+	result = current_saop_op->d.scalararraybinsearchop.fn_addr(fcinfo);
+
+	return DatumGetBool(result) ? 0 : 1;
+}
+
 /*
  * Evaluate "scalar op ANY (const array)".
  *
@@ -3613,16 +3656,19 @@ ExecEvalScalarArrayOpBinSearch(ExprState *state, ExprEvalStep *op, ExprContext *
 	FunctionCallInfo fcinfo = op->d.scalararraybinsearchop.fcinfo_data;
 	bool		strictfunc = op->d.scalararraybinsearchop.finfo->fn_strict;
 	ArrayType  *arr;
+	Datum		scalar = fcinfo->args[0].value;
 	Datum		result;
 	bool		resultnull;
-	bool	   *elem_nulls;
-	int			l = 0,
-				r,
-				res;
+	bool		hashfound;
+	int			res;
+
+	/* If we're only executing once, do we need a way to fall back to the regular loop? */
 
 	/* We don't setup a binary search op if the array const is null. */
 	Assert(!*op->resnull);
 
+	current_saop_op = op;
+
 	/*
 	 * If the scalar is NULL, and the function is strict, return NULL; no
 	 * point in executing the search.
@@ -3634,104 +3680,88 @@ ExecEvalScalarArrayOpBinSearch(ExprState *state, ExprEvalStep *op, ExprContext *
 	}
 
 	/* Preprocess the array the first time we execute the op. */
-	if (op->d.scalararraybinsearchop.elem_values == NULL)
+	if (op->d.scalararraybinsearchop.elements_tab == NULL)
 	{
-		/* Cache the original lhs so we can scribble on it. */
-		Datum		scalar = fcinfo->args[0].value;
-		bool		scalar_isnull = fcinfo->args[0].isnull;
-		int			num_nonnulls = 0;
-		MemoryContext old_cxt;
-		MemoryContext array_cxt;
 		int16		typlen;
 		bool		typbyval;
 		char		typalign;
+		HASHCTL		elem_hash_ctl;
+		int			nitems;
+		int			num_nulls = 0;
+		char	   *s;
+		bits8	   *bitmap;
+		int			bitmask;
 
 		arr = DatumGetArrayTypeP(*op->resvalue);
+		nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr));
 
 		get_typlenbyvalalign(ARR_ELEMTYPE(arr),
 							 &typlen,
 							 &typbyval,
 							 &typalign);
 
-		array_cxt = AllocSetContextCreate(
-										  econtext->ecxt_per_query_memory,
-										  "scalararraybinsearchop context",
-										  ALLOCSET_SMALL_SIZES);
-		old_cxt = MemoryContextSwitchTo(array_cxt);
+		MemSet(&elem_hash_ctl, 0, sizeof(elem_hash_ctl));
+		elem_hash_ctl.keysize = sizeof(Datum);
+		elem_hash_ctl.entrysize = sizeof(Datum);
+		elem_hash_ctl.hash = element_hash;
+		elem_hash_ctl.match = element_match;
+		elem_hash_ctl.hcxt = econtext->ecxt_per_query_memory;
+		op->d.scalararraybinsearchop.elements_tab = hash_create("Scalar array op expr elements",
+								   nitems,
+								   &elem_hash_ctl,
+								   HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
+
+		s = (char *) ARR_DATA_PTR(arr);
+		bitmap = ARR_NULLBITMAP(arr);
+		bitmask = 1;
+		for (int i = 0; i < nitems; i++)
+		{
+			Datum		element;
+
+			/* Get array element, checking for NULL. */
+			if (bitmap && (*bitmap & bitmask) == 0)
+			{
+				num_nulls++;
+			}
+			else
+			{
+				element = fetch_att(s, typbyval, typlen);
+				s = att_addlength_pointer(s, typlen, s);
+				s = (char *) att_align_nominal(s, typalign);
 
-		deconstruct_array(arr,
-						  ARR_ELEMTYPE(arr),
-						  typlen, typbyval, typalign,
-						  &op->d.scalararraybinsearchop.elem_values, &elem_nulls, &op->d.scalararraybinsearchop.num_elems);
+				hash_search(op->d.scalararraybinsearchop.elements_tab, (const void *) &element, HASH_ENTER, NULL);
+			}
 
-		/* Remove null entries from the array. */
-		for (int j = 0; j < op->d.scalararraybinsearchop.num_elems; j++)
-		{
-			if (!elem_nulls[j])
-				op->d.scalararraybinsearchop.elem_values[num_nonnulls++] = op->d.scalararraybinsearchop.elem_values[j];
+			/* Advance bitmap pointer if any. */
+			if (bitmap)
+			{
+				bitmask <<= 1;
+				if (bitmask == 0x100)
+				{
+					bitmap++;
+					bitmask = 1;
+				}
+			}
 		}
 
 		/*
 		 * Remember if we had any nulls so that we know if we need to execute
 		 * non-strict functions with a null lhs value if no match is found.
 		 */
-		op->d.scalararraybinsearchop.has_nulls = num_nonnulls < op->d.scalararraybinsearchop.num_elems;
-		op->d.scalararraybinsearchop.num_elems = num_nonnulls;
+		op->d.scalararraybinsearchop.has_nulls = num_nulls > 0;
 
 		/*
-		 * Setup the fcinfo for sorting. We've removed nulls, so both lhs and
-		 * rhs values are guaranteed to be non-null.
+		 * We only setup a binary search op if we have > 8 elements, so we don't
+		 * need to worry about adding an optimization for the empty array case.
 		 */
-		fcinfo->args[0].isnull = false;
-		fcinfo->args[1].isnull = false;
-
-		/* Sort the array and remove duplicate elements. */
-		qsort_arg(op->d.scalararraybinsearchop.elem_values, op->d.scalararraybinsearchop.num_elems, sizeof(Datum),
-				  compare_array_elements, op);
-		op->d.scalararraybinsearchop.num_elems = qunique_arg(op->d.scalararraybinsearchop.elem_values, op->d.scalararraybinsearchop.num_elems, sizeof(Datum),
-															 compare_array_elements, op);
-
-		/* Restore the lhs value after we scribbed on it for sorting. */
-		fcinfo->args[0].value = scalar;
-		fcinfo->args[0].isnull = scalar_isnull;
-
-		MemoryContextSwitchTo(old_cxt);
+		Assert(nitems > 0);
 	}
 
-	/*
-	 * We only setup a binary search op if we have > 8 elements, so we don't
-	 * need to worry about adding an optimization for the empty array case.
-	 */
-	Assert(!(op->d.scalararraybinsearchop.num_elems <= 0 && !op->d.scalararraybinsearchop.has_nulls));
-
-	/* Assume no match will be found until proven otherwise. */
-	result = BoolGetDatum(false);
+	/* Check the hash to see if we have a match. */
+	hash_search(op->d.scalararraybinsearchop.elements_tab, (const void *) &scalar, HASH_FIND, &hashfound);
+	result = BoolGetDatum(hashfound);
 	resultnull = false;
 
-	/* Binary search through the array. */
-	r = op->d.scalararraybinsearchop.num_elems - 1;
-	while (l <= r)
-	{
-		int			i = (l + r) / 2;
-
-		fcinfo->args[1].value = op->d.scalararraybinsearchop.elem_values[i];
-
-		/* Call comparison function */
-		fcinfo->isnull = false;
-		res = DatumGetInt32(op->d.scalararraybinsearchop.fn_addr(fcinfo));
-
-		if (res == 0)
-		{
-			result = BoolGetDatum(true);
-			resultnull = false;
-			break;
-		}
-		else if (res > 0)
-			l = i + 1;
-		else
-			r = i - 1;
-	}
-
 	/*
 	 * If we didn't find a match in the array, we still might need to handle
 	 * the possibility of null values (we've previously removed them from the
@@ -3761,19 +3791,6 @@ ExecEvalScalarArrayOpBinSearch(ExprState *state, ExprEvalStep *op, ExprContext *
 	*op->resnull = resultnull;
 }
 
-/* XXX: Name function to be specific to saop binsearch? */
-static int
-compare_array_elements(const void *a, const void *b, void *arg)
-{
-	ExprEvalStep *op = (ExprEvalStep *) arg;
-	FunctionCallInfo fcinfo = op->d.scalararraybinsearchop.fcinfo_data;
-
-	fcinfo->args[0].value = *((const Datum *) a);
-	fcinfo->args[1].value = *((const Datum *) b);
-
-	return DatumGetInt32(op->d.scalararraybinsearchop.fn_addr(fcinfo));
-}
-
 /*
  * Evaluate a NOT NULL domain constraint.
  */
diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h
index ac4478d060..2e93b1f990 100644
--- a/src/include/executor/execExpr.h
+++ b/src/include/executor/execExpr.h
@@ -554,13 +554,16 @@ typedef struct ExprEvalStep
 		/* for EEOP_SCALARARRAYOP_BINSEARCH */
 		struct
 		{
-			int			num_elems;
 			bool		has_nulls;
-			Datum	   *elem_values;
+			HTAB	   *elements_tab;
 			FmgrInfo   *finfo;	/* function's lookup data */
 			FunctionCallInfo fcinfo_data;	/* arguments etc */
 			/* faster to access without additional indirection: */
 			PGFunction	fn_addr;	/* actual call address */
+			FmgrInfo   *hash_finfo;	/* function's lookup data */
+			FunctionCallInfo hash_fcinfo_data;	/* arguments etc */
+			/* faster to access without additional indirection: */
+			PGFunction	hash_fn_addr;	/* actual call address */
 		}			scalararraybinsearchop;
 
 		/* for EEOP_XMLEXPR */
diff --git a/src/test/regress/expected/expressions.out b/src/test/regress/expected/expressions.out
index 55b57b9c59..f37dfe1ce2 100644
--- a/src/test/regress/expected/expressions.out
+++ b/src/test/regress/expected/expressions.out
@@ -197,3 +197,10 @@ select null::int in (10, 9, 2, 8, 3, 7, 4, 6, 5, null);
  
 (1 row)
 
+select 'a' in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j');
+ ?column? 
+----------
+ t
+(1 row)
+
+-- TODO: test non-strict op?
diff --git a/src/test/regress/sql/expressions.sql b/src/test/regress/sql/expressions.sql
index 3cb850d838..c30fe66c5e 100644
--- a/src/test/regress/sql/expressions.sql
+++ b/src/test/regress/sql/expressions.sql
@@ -76,3 +76,5 @@ select 1 in (null, null, null, null, null, null, null, null, null, null, null);
 select 1 in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1, null);
 select null::int in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1);
 select null::int in (10, 9, 2, 8, 3, 7, 4, 6, 5, null);
+select 'a' in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j');
+-- TODO: test non-strict op?
-- 
2.17.1

From 08742543d7865d5f25c24c26bf1014924035c9eb Mon Sep 17 00:00:00 2001
From: jcoleman <jtc...@gmail.com>
Date: Fri, 10 Apr 2020 21:40:50 +0000
Subject: [PATCH v3 1/3] Binary search const arrays in OR'd ScalarArrayOps

Currently all scalar array op expressions execute as a linear search
through the array argument. However when OR semantics are desired it's
possible to instead use a binary search. Here we apply that optimization
to constant arrays (so we don't need to worry about teaching expression
execution when params change) of at least length 9 (since very short
arrays average to the same number of comparisons for linear searches and
thus avoid the preprocessing necessary for a binary search).
---
 src/backend/executor/execExpr.c           |  79 +++++++--
 src/backend/executor/execExprInterp.c     | 193 ++++++++++++++++++++++
 src/include/executor/execExpr.h           |  14 ++
 src/test/regress/expected/expressions.out |  39 +++++
 src/test/regress/sql/expressions.sql      |  11 ++
 5 files changed, 326 insertions(+), 10 deletions(-)

diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index c6a77bd66f..c202cc7e89 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -49,6 +49,7 @@
 #include "utils/lsyscache.h"
 #include "utils/typcache.h"
 
+#define MIN_ARRAY_SIZE_FOR_BINARY_SEARCH 9
 
 typedef struct LastAttnumInfo
 {
@@ -947,11 +948,13 @@ ExecInitExprRec(Expr *node, ExprState *state,
 		case T_ScalarArrayOpExpr:
 			{
 				ScalarArrayOpExpr *opexpr = (ScalarArrayOpExpr *) node;
+				Oid			func;
 				Expr	   *scalararg;
 				Expr	   *arrayarg;
 				FmgrInfo   *finfo;
 				FunctionCallInfo fcinfo;
 				AclResult	aclresult;
+				bool		useBinarySearch = false;
 
 				Assert(list_length(opexpr->args) == 2);
 				scalararg = (Expr *) linitial(opexpr->args);
@@ -964,12 +967,58 @@ ExecInitExprRec(Expr *node, ExprState *state,
 				if (aclresult != ACLCHECK_OK)
 					aclcheck_error(aclresult, OBJECT_FUNCTION,
 								   get_func_name(opexpr->opfuncid));
-				InvokeFunctionExecuteHook(opexpr->opfuncid);
 
 				/* Set up the primary fmgr lookup information */
 				finfo = palloc0(sizeof(FmgrInfo));
 				fcinfo = palloc0(SizeForFunctionCallInfo(2));
-				fmgr_info(opexpr->opfuncid, finfo);
+				func = opexpr->opfuncid;
+
+				/*
+				 * If we have a constant array and want OR semantics, then we
+				 * can use a binary search to implement the op instead of
+				 * looping through the entire array for each execution.
+				 */
+				if (opexpr->useOr && arrayarg && IsA(arrayarg, Const) &&
+					!((Const *) arrayarg)->constisnull)
+				{
+					Datum		arrdatum = ((Const *) arrayarg)->constvalue;
+					ArrayType  *arr = (ArrayType *) DatumGetPointer(arrdatum);
+					Oid			orderingOp;
+					Oid			orderingFunc;
+					Oid			opfamily;
+					Oid			opcintype;
+					int16		strategy;
+					int			nitems;
+
+					/*
+					 * Only do the optimization if we have a large enough
+					 * array to make it worth it.
+					 */
+					nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr));
+					if (nitems >= MIN_ARRAY_SIZE_FOR_BINARY_SEARCH)
+					{
+						/*
+						 * Find the ordering op that matches the originally
+						 * planned equality op.
+						 */
+						orderingOp = get_ordering_op_for_equality_op(opexpr->opno, NULL);
+						get_ordering_op_properties(orderingOp, &opfamily, &opcintype, &strategy);
+						orderingFunc = get_opfamily_proc(opfamily, opcintype, opcintype, BTORDER_PROC);
+
+						/*
+						 * But we may not have one, so fall back to the
+						 * default implementation if necessary.
+						 */
+						if (OidIsValid(orderingFunc))
+						{
+							useBinarySearch = true;
+							func = orderingFunc;
+						}
+					}
+				}
+
+				InvokeFunctionExecuteHook(func);
+				fmgr_info(func, finfo);
 				fmgr_info_set_expr((Node *) node, finfo);
 				InitFunctionCallInfoData(*fcinfo, finfo, 2,
 										 opexpr->inputcollid, NULL, NULL);
@@ -981,18 +1030,28 @@ ExecInitExprRec(Expr *node, ExprState *state,
 				/*
 				 * Evaluate array argument into our return value.  There's no
 				 * danger in that, because the return value is guaranteed to
-				 * be overwritten by EEOP_SCALARARRAYOP, and will not be
-				 * passed to any other expression.
+				 * be overwritten by EEOP_SCALARARRAYOP[_BINSEARCH], and will
+				 * not be passed to any other expression.
 				 */
 				ExecInitExprRec(arrayarg, state, resv, resnull);
 
 				/* And perform the operation */
-				scratch.opcode = EEOP_SCALARARRAYOP;
-				scratch.d.scalararrayop.element_type = InvalidOid;
-				scratch.d.scalararrayop.useOr = opexpr->useOr;
-				scratch.d.scalararrayop.finfo = finfo;
-				scratch.d.scalararrayop.fcinfo_data = fcinfo;
-				scratch.d.scalararrayop.fn_addr = finfo->fn_addr;
+				if (useBinarySearch)
+				{
+					scratch.opcode = EEOP_SCALARARRAYOP_BINSEARCH;
+					scratch.d.scalararraybinsearchop.finfo = finfo;
+					scratch.d.scalararraybinsearchop.fcinfo_data = fcinfo;
+					scratch.d.scalararraybinsearchop.fn_addr = finfo->fn_addr;
+				}
+				else
+				{
+					scratch.opcode = EEOP_SCALARARRAYOP;
+					scratch.d.scalararrayop.element_type = InvalidOid;
+					scratch.d.scalararrayop.useOr = opexpr->useOr;
+					scratch.d.scalararrayop.finfo = finfo;
+					scratch.d.scalararrayop.fcinfo_data = fcinfo;
+					scratch.d.scalararrayop.fn_addr = finfo->fn_addr;
+				}
 				ExprEvalPushStep(state, &scratch);
 				break;
 			}
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 113ed1547c..5bebafbf0c 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -76,6 +76,7 @@
 #include "utils/timestamp.h"
 #include "utils/typcache.h"
 #include "utils/xml.h"
+#include "lib/qunique.h"
 
 /*
  * Use computed-goto-based opcode dispatch when computed gotos are available.
@@ -177,6 +178,8 @@ ExecAggPlainTransByRef(AggState *aggstate, AggStatePerTrans pertrans,
 					   AggStatePerGroup pergroup,
 					   ExprContext *aggcontext, int setno);
 
+static int	compare_array_elements(const void *a, const void *b, void *arg);
+
 /*
  * Prepare ExprState for interpreted execution.
  */
@@ -425,6 +428,7 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
 		&&CASE_EEOP_DOMAIN_CHECK,
 		&&CASE_EEOP_CONVERT_ROWTYPE,
 		&&CASE_EEOP_SCALARARRAYOP,
+		&&CASE_EEOP_SCALARARRAYOP_BINSEARCH,
 		&&CASE_EEOP_XMLEXPR,
 		&&CASE_EEOP_AGGREF,
 		&&CASE_EEOP_GROUPING_FUNC,
@@ -1464,6 +1468,14 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
 			EEO_NEXT();
 		}
 
+		EEO_CASE(EEOP_SCALARARRAYOP_BINSEARCH)
+		{
+			/* too complex for an inline implementation */
+			ExecEvalScalarArrayOpBinSearch(state, op, econtext);
+
+			EEO_NEXT();
+		}
+
 		EEO_CASE(EEOP_DOMAIN_NOTNULL)
 		{
 			/* too complex for an inline implementation */
@@ -3581,6 +3593,187 @@ ExecEvalScalarArrayOp(ExprState *state, ExprEvalStep *op)
 	*op->resnull = resultnull;
 }
 
+/*
+ * Evaluate "scalar op ANY (const array)".
+ *
+ * This is an optimized version of ExecEvalScalarArrayOp that only supports
+ * ANY (i.e., OR semantics) because it binary searches through the array for a
+ * match after sorting the array and removing null and duplicate entries.
+ *
+ * Source array is in our result area, scalar arg is already evaluated into
+ * fcinfo->args[0].
+ *
+ * The operator always yields boolean, and we combine the results across all
+ * array elements using OR.  Of course we short-circuit as soon as the result
+ * is known.
+ */
+void
+ExecEvalScalarArrayOpBinSearch(ExprState *state, ExprEvalStep *op, ExprContext *econtext)
+{
+	FunctionCallInfo fcinfo = op->d.scalararraybinsearchop.fcinfo_data;
+	bool		strictfunc = op->d.scalararraybinsearchop.finfo->fn_strict;
+	ArrayType  *arr;
+	Datum		result;
+	bool		resultnull;
+	bool	   *elem_nulls;
+	int			l = 0,
+				r,
+				res;
+
+	/* We don't setup a binary search op if the array const is null. */
+	Assert(!*op->resnull);
+
+	/*
+	 * If the scalar is NULL, and the function is strict, return NULL; no
+	 * point in executing the search.
+	 */
+	if (fcinfo->args[0].isnull && strictfunc)
+	{
+		*op->resnull = true;
+		return;
+	}
+
+	/* Preprocess the array the first time we execute the op. */
+	if (op->d.scalararraybinsearchop.elem_values == NULL)
+	{
+		/* Cache the original lhs so we can scribble on it. */
+		Datum		scalar = fcinfo->args[0].value;
+		bool		scalar_isnull = fcinfo->args[0].isnull;
+		int			num_nonnulls = 0;
+		MemoryContext old_cxt;
+		MemoryContext array_cxt;
+		int16		typlen;
+		bool		typbyval;
+		char		typalign;
+
+		arr = DatumGetArrayTypeP(*op->resvalue);
+
+		get_typlenbyvalalign(ARR_ELEMTYPE(arr),
+							 &typlen,
+							 &typbyval,
+							 &typalign);
+
+		array_cxt = AllocSetContextCreate(
+										  econtext->ecxt_per_query_memory,
+										  "scalararraybinsearchop context",
+										  ALLOCSET_SMALL_SIZES);
+		old_cxt = MemoryContextSwitchTo(array_cxt);
+
+		deconstruct_array(arr,
+						  ARR_ELEMTYPE(arr),
+						  typlen, typbyval, typalign,
+						  &op->d.scalararraybinsearchop.elem_values, &elem_nulls, &op->d.scalararraybinsearchop.num_elems);
+
+		/* Remove null entries from the array. */
+		for (int j = 0; j < op->d.scalararraybinsearchop.num_elems; j++)
+		{
+			if (!elem_nulls[j])
+				op->d.scalararraybinsearchop.elem_values[num_nonnulls++] = op->d.scalararraybinsearchop.elem_values[j];
+		}
+
+		/*
+		 * Remember if we had any nulls so that we know if we need to execute
+		 * non-strict functions with a null lhs value if no match is found.
+		 */
+		op->d.scalararraybinsearchop.has_nulls = num_nonnulls < op->d.scalararraybinsearchop.num_elems;
+		op->d.scalararraybinsearchop.num_elems = num_nonnulls;
+
+		/*
+		 * Setup the fcinfo for sorting. We've removed nulls, so both lhs and
+		 * rhs values are guaranteed to be non-null.
+		 */
+		fcinfo->args[0].isnull = false;
+		fcinfo->args[1].isnull = false;
+
+		/* Sort the array and remove duplicate elements. */
+		qsort_arg(op->d.scalararraybinsearchop.elem_values, op->d.scalararraybinsearchop.num_elems, sizeof(Datum),
+				  compare_array_elements, op);
+		op->d.scalararraybinsearchop.num_elems = qunique_arg(op->d.scalararraybinsearchop.elem_values, op->d.scalararraybinsearchop.num_elems, sizeof(Datum),
+															 compare_array_elements, op);
+
+		/* Restore the lhs value after we scribbed on it for sorting. */
+		fcinfo->args[0].value = scalar;
+		fcinfo->args[0].isnull = scalar_isnull;
+
+		MemoryContextSwitchTo(old_cxt);
+	}
+
+	/*
+	 * We only setup a binary search op if we have > 8 elements, so we don't
+	 * need to worry about adding an optimization for the empty array case.
+	 */
+	Assert(!(op->d.scalararraybinsearchop.num_elems <= 0 && !op->d.scalararraybinsearchop.has_nulls));
+
+	/* Assume no match will be found until proven otherwise. */
+	result = BoolGetDatum(false);
+	resultnull = false;
+
+	/* Binary search through the array. */
+	r = op->d.scalararraybinsearchop.num_elems - 1;
+	while (l <= r)
+	{
+		int			i = (l + r) / 2;
+
+		fcinfo->args[1].value = op->d.scalararraybinsearchop.elem_values[i];
+
+		/* Call comparison function */
+		fcinfo->isnull = false;
+		res = DatumGetInt32(op->d.scalararraybinsearchop.fn_addr(fcinfo));
+
+		if (res == 0)
+		{
+			result = BoolGetDatum(true);
+			resultnull = false;
+			break;
+		}
+		else if (res > 0)
+			l = i + 1;
+		else
+			r = i - 1;
+	}
+
+	/*
+	 * If we didn't find a match in the array, we still might need to handle
+	 * the possibility of null values (we've previously removed them from the
+	 * array).
+	 */
+	if (!DatumGetBool(result) && op->d.scalararraybinsearchop.has_nulls)
+	{
+		if (strictfunc)
+		{
+			/* Had nulls, so strict function implies null. */
+			result = (Datum) 0;
+			resultnull = true;
+		}
+		else
+		{
+			/* Execute function will null rhs just once. */
+			fcinfo->args[1].value = (Datum) 0;
+			fcinfo->args[1].isnull = true;
+
+			res = DatumGetInt32(op->d.scalararraybinsearchop.fn_addr(fcinfo));
+			result = BoolGetDatum(res == 0);
+			resultnull = fcinfo->isnull;
+		}
+	}
+
+	*op->resvalue = result;
+	*op->resnull = resultnull;
+}
+
+/* XXX: Name function to be specific to saop binsearch? */
+static int
+compare_array_elements(const void *a, const void *b, void *arg)
+{
+	ExprEvalStep *op = (ExprEvalStep *) arg;
+	FunctionCallInfo fcinfo = op->d.scalararraybinsearchop.fcinfo_data;
+
+	fcinfo->args[0].value = *((const Datum *) a);
+	fcinfo->args[1].value = *((const Datum *) b);
+
+	return DatumGetInt32(op->d.scalararraybinsearchop.fn_addr(fcinfo));
+}
+
 /*
  * Evaluate a NOT NULL domain constraint.
  */
diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h
index dbe8649a57..ac4478d060 100644
--- a/src/include/executor/execExpr.h
+++ b/src/include/executor/execExpr.h
@@ -213,6 +213,7 @@ typedef enum ExprEvalOp
 	/* evaluate assorted special-purpose expression types */
 	EEOP_CONVERT_ROWTYPE,
 	EEOP_SCALARARRAYOP,
+	EEOP_SCALARARRAYOP_BINSEARCH,
 	EEOP_XMLEXPR,
 	EEOP_AGGREF,
 	EEOP_GROUPING_FUNC,
@@ -550,6 +551,18 @@ typedef struct ExprEvalStep
 			PGFunction	fn_addr;	/* actual call address */
 		}			scalararrayop;
 
+		/* for EEOP_SCALARARRAYOP_BINSEARCH */
+		struct
+		{
+			int			num_elems;
+			bool		has_nulls;
+			Datum	   *elem_values;
+			FmgrInfo   *finfo;	/* function's lookup data */
+			FunctionCallInfo fcinfo_data;	/* arguments etc */
+			/* faster to access without additional indirection: */
+			PGFunction	fn_addr;	/* actual call address */
+		}			scalararraybinsearchop;
+
 		/* for EEOP_XMLEXPR */
 		struct
 		{
@@ -728,6 +741,7 @@ extern void ExecEvalSubscriptingRefAssign(ExprState *state, ExprEvalStep *op);
 extern void ExecEvalConvertRowtype(ExprState *state, ExprEvalStep *op,
 								   ExprContext *econtext);
 extern void ExecEvalScalarArrayOp(ExprState *state, ExprEvalStep *op);
+extern void ExecEvalScalarArrayOpBinSearch(ExprState *state, ExprEvalStep *op, ExprContext *econtext);
 extern void ExecEvalConstraintNotNull(ExprState *state, ExprEvalStep *op);
 extern void ExecEvalConstraintCheck(ExprState *state, ExprEvalStep *op);
 extern void ExecEvalXmlExpr(ExprState *state, ExprEvalStep *op);
diff --git a/src/test/regress/expected/expressions.out b/src/test/regress/expected/expressions.out
index 4f4deaec22..55b57b9c59 100644
--- a/src/test/regress/expected/expressions.out
+++ b/src/test/regress/expected/expressions.out
@@ -158,3 +158,42 @@ select count(*) from date_tbl
     12
 (1 row)
 
+--
+-- Tests for ScalarArrayOpExpr binary search optimization
+--
+select 1 in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1);
+ ?column? 
+----------
+ t
+(1 row)
+
+select 1 in (10, 9, 2, 8, 3, 7, 4, 6, 5, null);
+ ?column? 
+----------
+ 
+(1 row)
+
+select 1 in (null, null, null, null, null, null, null, null, null, null, null);
+ ?column? 
+----------
+ 
+(1 row)
+
+select 1 in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1, null);
+ ?column? 
+----------
+ t
+(1 row)
+
+select null::int in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1);
+ ?column? 
+----------
+ 
+(1 row)
+
+select null::int in (10, 9, 2, 8, 3, 7, 4, 6, 5, null);
+ ?column? 
+----------
+ 
+(1 row)
+
diff --git a/src/test/regress/sql/expressions.sql b/src/test/regress/sql/expressions.sql
index 1ca8bb151c..3cb850d838 100644
--- a/src/test/regress/sql/expressions.sql
+++ b/src/test/regress/sql/expressions.sql
@@ -65,3 +65,14 @@ select count(*) from date_tbl
   where f1 not between symmetric '1997-01-01' and '1998-01-01';
 select count(*) from date_tbl
   where f1 not between symmetric '1997-01-01' and '1998-01-01';
+
+--
+-- Tests for ScalarArrayOpExpr binary search optimization
+--
+
+select 1 in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1);
+select 1 in (10, 9, 2, 8, 3, 7, 4, 6, 5, null);
+select 1 in (null, null, null, null, null, null, null, null, null, null, null);
+select 1 in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1, null);
+select null::int in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1);
+select null::int in (10, 9, 2, 8, 3, 7, 4, 6, 5, null);
-- 
2.17.1

Reply via email to