Over in "execExprInterp() questions / How to improve scalar array op
expr eval?" [1] I'd mused about how we might be able to optimized
scalar array ops with OR'd semantics.

This patch implements a binary search for such expressions when the
array argument is a constant so that we can avoid needing to teach
expression execution to cache stable values or know when a param has
changed.

The speed-up for the target case can pretty impressive: in my
admittedly contrived and relatively unscientific test with a query in
the form:

select count(*) from generate_series(1,100000) n(i) where i in (<1000
random integers in the series>)

shows ~30ms for the patch versus ~640ms on master.

James

[1]: 
https://www.postgresql.org/message-id/flat/CAAaqYe-UQBba7sScrucDOyHb7cDoNbWf_rcLrOWeD4ikP3_qTQ%40mail.gmail.com
From 08742543d7865d5f25c24c26bf1014924035c9eb Mon Sep 17 00:00:00 2001
From: jcoleman <jtc...@gmail.com>
Date: Fri, 10 Apr 2020 21:40:50 +0000
Subject: [PATCH v1] Binary search const arrays in OR'd ScalarArrayOps

Currently all scalar array op expressions execute as a linear search
through the array argument. However when OR semantics are desired it's
possible to instead use a binary search. Here we apply that optimization
to constant arrays (so we don't need to worry about teaching expression
execution when params change) of at least length 9 (since very short
arrays average to the same number of comparisons for linear searches and
thus avoid the preprocessing necessary for a binary search).
---
 src/backend/executor/execExpr.c           |  79 +++++++--
 src/backend/executor/execExprInterp.c     | 193 ++++++++++++++++++++++
 src/include/executor/execExpr.h           |  14 ++
 src/test/regress/expected/expressions.out |  39 +++++
 src/test/regress/sql/expressions.sql      |  11 ++
 5 files changed, 326 insertions(+), 10 deletions(-)

diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index c6a77bd66f..c202cc7e89 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -49,6 +49,7 @@
 #include "utils/lsyscache.h"
 #include "utils/typcache.h"
 
+#define MIN_ARRAY_SIZE_FOR_BINARY_SEARCH 9
 
 typedef struct LastAttnumInfo
 {
@@ -947,11 +948,13 @@ ExecInitExprRec(Expr *node, ExprState *state,
 		case T_ScalarArrayOpExpr:
 			{
 				ScalarArrayOpExpr *opexpr = (ScalarArrayOpExpr *) node;
+				Oid			func;
 				Expr	   *scalararg;
 				Expr	   *arrayarg;
 				FmgrInfo   *finfo;
 				FunctionCallInfo fcinfo;
 				AclResult	aclresult;
+				bool		useBinarySearch = false;
 
 				Assert(list_length(opexpr->args) == 2);
 				scalararg = (Expr *) linitial(opexpr->args);
@@ -964,12 +967,58 @@ ExecInitExprRec(Expr *node, ExprState *state,
 				if (aclresult != ACLCHECK_OK)
 					aclcheck_error(aclresult, OBJECT_FUNCTION,
 								   get_func_name(opexpr->opfuncid));
-				InvokeFunctionExecuteHook(opexpr->opfuncid);
 
 				/* Set up the primary fmgr lookup information */
 				finfo = palloc0(sizeof(FmgrInfo));
 				fcinfo = palloc0(SizeForFunctionCallInfo(2));
-				fmgr_info(opexpr->opfuncid, finfo);
+				func = opexpr->opfuncid;
+
+				/*
+				 * If we have a constant array and want OR semantics, then we
+				 * can use a binary search to implement the op instead of
+				 * looping through the entire array for each execution.
+				 */
+				if (opexpr->useOr && arrayarg && IsA(arrayarg, Const) &&
+					!((Const *) arrayarg)->constisnull)
+				{
+					Datum		arrdatum = ((Const *) arrayarg)->constvalue;
+					ArrayType  *arr = (ArrayType *) DatumGetPointer(arrdatum);
+					Oid			orderingOp;
+					Oid			orderingFunc;
+					Oid			opfamily;
+					Oid			opcintype;
+					int16		strategy;
+					int			nitems;
+
+					/*
+					 * Only do the optimization if we have a large enough
+					 * array to make it worth it.
+					 */
+					nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr));
+					if (nitems >= MIN_ARRAY_SIZE_FOR_BINARY_SEARCH)
+					{
+						/*
+						 * Find the ordering op that matches the originally
+						 * planned equality op.
+						 */
+						orderingOp = get_ordering_op_for_equality_op(opexpr->opno, NULL);
+						get_ordering_op_properties(orderingOp, &opfamily, &opcintype, &strategy);
+						orderingFunc = get_opfamily_proc(opfamily, opcintype, opcintype, BTORDER_PROC);
+
+						/*
+						 * But we may not have one, so fall back to the
+						 * default implementation if necessary.
+						 */
+						if (OidIsValid(orderingFunc))
+						{
+							useBinarySearch = true;
+							func = orderingFunc;
+						}
+					}
+				}
+
+				InvokeFunctionExecuteHook(func);
+				fmgr_info(func, finfo);
 				fmgr_info_set_expr((Node *) node, finfo);
 				InitFunctionCallInfoData(*fcinfo, finfo, 2,
 										 opexpr->inputcollid, NULL, NULL);
@@ -981,18 +1030,28 @@ ExecInitExprRec(Expr *node, ExprState *state,
 				/*
 				 * Evaluate array argument into our return value.  There's no
 				 * danger in that, because the return value is guaranteed to
-				 * be overwritten by EEOP_SCALARARRAYOP, and will not be
-				 * passed to any other expression.
+				 * be overwritten by EEOP_SCALARARRAYOP[_BINSEARCH], and will
+				 * not be passed to any other expression.
 				 */
 				ExecInitExprRec(arrayarg, state, resv, resnull);
 
 				/* And perform the operation */
-				scratch.opcode = EEOP_SCALARARRAYOP;
-				scratch.d.scalararrayop.element_type = InvalidOid;
-				scratch.d.scalararrayop.useOr = opexpr->useOr;
-				scratch.d.scalararrayop.finfo = finfo;
-				scratch.d.scalararrayop.fcinfo_data = fcinfo;
-				scratch.d.scalararrayop.fn_addr = finfo->fn_addr;
+				if (useBinarySearch)
+				{
+					scratch.opcode = EEOP_SCALARARRAYOP_BINSEARCH;
+					scratch.d.scalararraybinsearchop.finfo = finfo;
+					scratch.d.scalararraybinsearchop.fcinfo_data = fcinfo;
+					scratch.d.scalararraybinsearchop.fn_addr = finfo->fn_addr;
+				}
+				else
+				{
+					scratch.opcode = EEOP_SCALARARRAYOP;
+					scratch.d.scalararrayop.element_type = InvalidOid;
+					scratch.d.scalararrayop.useOr = opexpr->useOr;
+					scratch.d.scalararrayop.finfo = finfo;
+					scratch.d.scalararrayop.fcinfo_data = fcinfo;
+					scratch.d.scalararrayop.fn_addr = finfo->fn_addr;
+				}
 				ExprEvalPushStep(state, &scratch);
 				break;
 			}
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 113ed1547c..5bebafbf0c 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -76,6 +76,7 @@
 #include "utils/timestamp.h"
 #include "utils/typcache.h"
 #include "utils/xml.h"
+#include "lib/qunique.h"
 
 /*
  * Use computed-goto-based opcode dispatch when computed gotos are available.
@@ -177,6 +178,8 @@ ExecAggPlainTransByRef(AggState *aggstate, AggStatePerTrans pertrans,
 					   AggStatePerGroup pergroup,
 					   ExprContext *aggcontext, int setno);
 
+static int	compare_array_elements(const void *a, const void *b, void *arg);
+
 /*
  * Prepare ExprState for interpreted execution.
  */
@@ -425,6 +428,7 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
 		&&CASE_EEOP_DOMAIN_CHECK,
 		&&CASE_EEOP_CONVERT_ROWTYPE,
 		&&CASE_EEOP_SCALARARRAYOP,
+		&&CASE_EEOP_SCALARARRAYOP_BINSEARCH,
 		&&CASE_EEOP_XMLEXPR,
 		&&CASE_EEOP_AGGREF,
 		&&CASE_EEOP_GROUPING_FUNC,
@@ -1464,6 +1468,14 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
 			EEO_NEXT();
 		}
 
+		EEO_CASE(EEOP_SCALARARRAYOP_BINSEARCH)
+		{
+			/* too complex for an inline implementation */
+			ExecEvalScalarArrayOpBinSearch(state, op, econtext);
+
+			EEO_NEXT();
+		}
+
 		EEO_CASE(EEOP_DOMAIN_NOTNULL)
 		{
 			/* too complex for an inline implementation */
@@ -3581,6 +3593,187 @@ ExecEvalScalarArrayOp(ExprState *state, ExprEvalStep *op)
 	*op->resnull = resultnull;
 }
 
+/*
+ * Evaluate "scalar op ANY (const array)".
+ *
+ * This is an optimized version of ExecEvalScalarArrayOp that only supports
+ * ANY (i.e., OR semantics) because it binary searches through the array for a
+ * match after sorting the array and removing null and duplicate entries.
+ *
+ * Source array is in our result area, scalar arg is already evaluated into
+ * fcinfo->args[0].
+ *
+ * The operator always yields boolean, and we combine the results across all
+ * array elements using OR.  Of course we short-circuit as soon as the result
+ * is known.
+ */
+void
+ExecEvalScalarArrayOpBinSearch(ExprState *state, ExprEvalStep *op, ExprContext *econtext)
+{
+	FunctionCallInfo fcinfo = op->d.scalararraybinsearchop.fcinfo_data;
+	bool		strictfunc = op->d.scalararraybinsearchop.finfo->fn_strict;
+	ArrayType  *arr;
+	Datum		result;
+	bool		resultnull;
+	bool	   *elem_nulls;
+	int			l = 0,
+				r,
+				res;
+
+	/* We don't setup a binary search op if the array const is null. */
+	Assert(!*op->resnull);
+
+	/*
+	 * If the scalar is NULL, and the function is strict, return NULL; no
+	 * point in executing the search.
+	 */
+	if (fcinfo->args[0].isnull && strictfunc)
+	{
+		*op->resnull = true;
+		return;
+	}
+
+	/* Preprocess the array the first time we execute the op. */
+	if (op->d.scalararraybinsearchop.elem_values == NULL)
+	{
+		/* Cache the original lhs so we can scribble on it. */
+		Datum		scalar = fcinfo->args[0].value;
+		bool		scalar_isnull = fcinfo->args[0].isnull;
+		int			num_nonnulls = 0;
+		MemoryContext old_cxt;
+		MemoryContext array_cxt;
+		int16		typlen;
+		bool		typbyval;
+		char		typalign;
+
+		arr = DatumGetArrayTypeP(*op->resvalue);
+
+		get_typlenbyvalalign(ARR_ELEMTYPE(arr),
+							 &typlen,
+							 &typbyval,
+							 &typalign);
+
+		array_cxt = AllocSetContextCreate(
+										  econtext->ecxt_per_query_memory,
+										  "scalararraybinsearchop context",
+										  ALLOCSET_SMALL_SIZES);
+		old_cxt = MemoryContextSwitchTo(array_cxt);
+
+		deconstruct_array(arr,
+						  ARR_ELEMTYPE(arr),
+						  typlen, typbyval, typalign,
+						  &op->d.scalararraybinsearchop.elem_values, &elem_nulls, &op->d.scalararraybinsearchop.num_elems);
+
+		/* Remove null entries from the array. */
+		for (int j = 0; j < op->d.scalararraybinsearchop.num_elems; j++)
+		{
+			if (!elem_nulls[j])
+				op->d.scalararraybinsearchop.elem_values[num_nonnulls++] = op->d.scalararraybinsearchop.elem_values[j];
+		}
+
+		/*
+		 * Remember if we had any nulls so that we know if we need to execute
+		 * non-strict functions with a null lhs value if no match is found.
+		 */
+		op->d.scalararraybinsearchop.has_nulls = num_nonnulls < op->d.scalararraybinsearchop.num_elems;
+		op->d.scalararraybinsearchop.num_elems = num_nonnulls;
+
+		/*
+		 * Setup the fcinfo for sorting. We've removed nulls, so both lhs and
+		 * rhs values are guaranteed to be non-null.
+		 */
+		fcinfo->args[0].isnull = false;
+		fcinfo->args[1].isnull = false;
+
+		/* Sort the array and remove duplicate elements. */
+		qsort_arg(op->d.scalararraybinsearchop.elem_values, op->d.scalararraybinsearchop.num_elems, sizeof(Datum),
+				  compare_array_elements, op);
+		op->d.scalararraybinsearchop.num_elems = qunique_arg(op->d.scalararraybinsearchop.elem_values, op->d.scalararraybinsearchop.num_elems, sizeof(Datum),
+															 compare_array_elements, op);
+
+		/* Restore the lhs value after we scribbed on it for sorting. */
+		fcinfo->args[0].value = scalar;
+		fcinfo->args[0].isnull = scalar_isnull;
+
+		MemoryContextSwitchTo(old_cxt);
+	}
+
+	/*
+	 * We only setup a binary search op if we have > 8 elements, so we don't
+	 * need to worry about adding an optimization for the empty array case.
+	 */
+	Assert(!(op->d.scalararraybinsearchop.num_elems <= 0 && !op->d.scalararraybinsearchop.has_nulls));
+
+	/* Assume no match will be found until proven otherwise. */
+	result = BoolGetDatum(false);
+	resultnull = false;
+
+	/* Binary search through the array. */
+	r = op->d.scalararraybinsearchop.num_elems - 1;
+	while (l <= r)
+	{
+		int			i = (l + r) / 2;
+
+		fcinfo->args[1].value = op->d.scalararraybinsearchop.elem_values[i];
+
+		/* Call comparison function */
+		fcinfo->isnull = false;
+		res = DatumGetInt32(op->d.scalararraybinsearchop.fn_addr(fcinfo));
+
+		if (res == 0)
+		{
+			result = BoolGetDatum(true);
+			resultnull = false;
+			break;
+		}
+		else if (res > 0)
+			l = i + 1;
+		else
+			r = i - 1;
+	}
+
+	/*
+	 * If we didn't find a match in the array, we still might need to handle
+	 * the possibility of null values (we've previously removed them from the
+	 * array).
+	 */
+	if (!DatumGetBool(result) && op->d.scalararraybinsearchop.has_nulls)
+	{
+		if (strictfunc)
+		{
+			/* Had nulls, so strict function implies null. */
+			result = (Datum) 0;
+			resultnull = true;
+		}
+		else
+		{
+			/* Execute function will null rhs just once. */
+			fcinfo->args[1].value = (Datum) 0;
+			fcinfo->args[1].isnull = true;
+
+			res = DatumGetInt32(op->d.scalararraybinsearchop.fn_addr(fcinfo));
+			result = BoolGetDatum(res == 0);
+			resultnull = fcinfo->isnull;
+		}
+	}
+
+	*op->resvalue = result;
+	*op->resnull = resultnull;
+}
+
+/* XXX: Name function to be specific to saop binsearch? */
+static int
+compare_array_elements(const void *a, const void *b, void *arg)
+{
+	ExprEvalStep *op = (ExprEvalStep *) arg;
+	FunctionCallInfo fcinfo = op->d.scalararraybinsearchop.fcinfo_data;
+
+	fcinfo->args[0].value = *((const Datum *) a);
+	fcinfo->args[1].value = *((const Datum *) b);
+
+	return DatumGetInt32(op->d.scalararraybinsearchop.fn_addr(fcinfo));
+}
+
 /*
  * Evaluate a NOT NULL domain constraint.
  */
diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h
index dbe8649a57..ac4478d060 100644
--- a/src/include/executor/execExpr.h
+++ b/src/include/executor/execExpr.h
@@ -213,6 +213,7 @@ typedef enum ExprEvalOp
 	/* evaluate assorted special-purpose expression types */
 	EEOP_CONVERT_ROWTYPE,
 	EEOP_SCALARARRAYOP,
+	EEOP_SCALARARRAYOP_BINSEARCH,
 	EEOP_XMLEXPR,
 	EEOP_AGGREF,
 	EEOP_GROUPING_FUNC,
@@ -550,6 +551,18 @@ typedef struct ExprEvalStep
 			PGFunction	fn_addr;	/* actual call address */
 		}			scalararrayop;
 
+		/* for EEOP_SCALARARRAYOP_BINSEARCH */
+		struct
+		{
+			int			num_elems;
+			bool		has_nulls;
+			Datum	   *elem_values;
+			FmgrInfo   *finfo;	/* function's lookup data */
+			FunctionCallInfo fcinfo_data;	/* arguments etc */
+			/* faster to access without additional indirection: */
+			PGFunction	fn_addr;	/* actual call address */
+		}			scalararraybinsearchop;
+
 		/* for EEOP_XMLEXPR */
 		struct
 		{
@@ -728,6 +741,7 @@ extern void ExecEvalSubscriptingRefAssign(ExprState *state, ExprEvalStep *op);
 extern void ExecEvalConvertRowtype(ExprState *state, ExprEvalStep *op,
 								   ExprContext *econtext);
 extern void ExecEvalScalarArrayOp(ExprState *state, ExprEvalStep *op);
+extern void ExecEvalScalarArrayOpBinSearch(ExprState *state, ExprEvalStep *op, ExprContext *econtext);
 extern void ExecEvalConstraintNotNull(ExprState *state, ExprEvalStep *op);
 extern void ExecEvalConstraintCheck(ExprState *state, ExprEvalStep *op);
 extern void ExecEvalXmlExpr(ExprState *state, ExprEvalStep *op);
diff --git a/src/test/regress/expected/expressions.out b/src/test/regress/expected/expressions.out
index 4f4deaec22..55b57b9c59 100644
--- a/src/test/regress/expected/expressions.out
+++ b/src/test/regress/expected/expressions.out
@@ -158,3 +158,42 @@ select count(*) from date_tbl
     12
 (1 row)
 
+--
+-- Tests for ScalarArrayOpExpr binary search optimization
+--
+select 1 in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1);
+ ?column? 
+----------
+ t
+(1 row)
+
+select 1 in (10, 9, 2, 8, 3, 7, 4, 6, 5, null);
+ ?column? 
+----------
+ 
+(1 row)
+
+select 1 in (null, null, null, null, null, null, null, null, null, null, null);
+ ?column? 
+----------
+ 
+(1 row)
+
+select 1 in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1, null);
+ ?column? 
+----------
+ t
+(1 row)
+
+select null::int in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1);
+ ?column? 
+----------
+ 
+(1 row)
+
+select null::int in (10, 9, 2, 8, 3, 7, 4, 6, 5, null);
+ ?column? 
+----------
+ 
+(1 row)
+
diff --git a/src/test/regress/sql/expressions.sql b/src/test/regress/sql/expressions.sql
index 1ca8bb151c..3cb850d838 100644
--- a/src/test/regress/sql/expressions.sql
+++ b/src/test/regress/sql/expressions.sql
@@ -65,3 +65,14 @@ select count(*) from date_tbl
   where f1 not between symmetric '1997-01-01' and '1998-01-01';
 select count(*) from date_tbl
   where f1 not between symmetric '1997-01-01' and '1998-01-01';
+
+--
+-- Tests for ScalarArrayOpExpr binary search optimization
+--
+
+select 1 in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1);
+select 1 in (10, 9, 2, 8, 3, 7, 4, 6, 5, null);
+select 1 in (null, null, null, null, null, null, null, null, null, null, null);
+select 1 in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1, null);
+select null::int in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1);
+select null::int in (10, 9, 2, 8, 3, 7, 4, 6, 5, null);
-- 
2.17.1

Reply via email to