On 2018-Nov-15, Amit Langote wrote:

> Maybe name it PARTITION_INIT_ALLOCSIZE (dropping the ROUTING from it), or
> PROUTE_INIT_ALLOCSIZE, to make it clear that it's only allocation size?

Here's a proposed delta on v17 0001.  Most importantly, I noticed that
the hashed subplans stuff didn't actually work, because the hash API was
not being used correctly.  So the search in the hash would never return
a hit, and we'd create RRIs for those partitions again.  To fix this, I
added a new struct to hold hash entries.

I think this merits that the performance tests be redone.  (Unless I
misunderstand, this shouldn't change the performance of INSERT, only
that of UPDATE.)

On the subject of the ArraySpace routines, I decided to drop them and
instead do the re-allocations on the places where they were needed.
In the original code there were two places for the partitions array, but
both did the same thing so it made sense to create a separate routine to
do it instead (ExecRememberPartitionRel), and do the allocation there.
Just out of custom I moved the palloc to appear at the same place as the
repalloc, and after doing so it became obvious that we were
over-allocating memory for the PartitionDispatchData pointer --
allocating the size for the whole struct instead of just the pointer.

(I renamed the "allocsize" struct members to "max", as is customary.)

I added CHECK_FOR_INTERRUPTS to the ExecFindPartition loop.  It
shouldn't be useful if the code is correct, but if there are bugs it's
better to be able to interrupt infinite loops :-)

I reindented the comment atop PartitionTupleRouting.  The other way was
just too unwieldy.

Let me know what you think.  Regression tests still pass for me.

-- 
Álvaro Herrera                https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 32d2461528..22a814bcbe 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -1343,7 +1343,7 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo,
 
 	resultRelInfo->ri_PartitionCheck = partition_check;
 	resultRelInfo->ri_PartitionRoot = partition_root;
-	resultRelInfo->ri_PartitionInfo = NULL; /* May be set later */
+	resultRelInfo->ri_PartitionInfo = NULL; /* may be set later */
 }
 
 /*
diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c
index b2d394676f..592daab1be 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -38,48 +38,46 @@
  * route a tuple inserted into a partitioned table to one of its leaf
  * partitions.
  *
- * partition_root			The partitioned table that's the target of the
- *							command.
+ * partition_root
+ *		The partitioned table that's the target of the command.
  *
- * partition_dispatch_info	Array of 'dispatch_allocsize' elements containing
- *							a pointer to a PartitionDispatch object for every
- *							partitioned table touched by tuple routing.  The
- *							entry for the target partitioned table is *always*
- *							present in the 0th element of this array.  See
- *							comment for PartitionDispatchData->indexes for
- *							details on how this array is indexed.
+ * partition_dispatch_info
+ *		Array of 'max_dispatch' elements containing a pointer to a
+ *		PartitionDispatch object for every partitioned table touched by tuple
+ *		routing.  The entry for the target partitioned table is *always*
+ *		present in the 0th element of this array.  See comment for
+ *		PartitionDispatchData->indexes for details on how this array is
+ *		indexed.
  *
- * num_dispatch				The current number of items stored in the
- *							'partition_dispatch_info' array.  Also serves as
- *							the index of the next free array element for new
- *							PartitionDispatch objects that need to be stored.
+ * num_dispatch
+ *		The current number of items stored in the 'partition_dispatch_info'
+ *		array.  Also serves as the index of the next free array element for
+ *		new PartitionDispatch objects that need to be stored.
  *
- * dispatch_allocsize		The current allocated size of the
- *							'partition_dispatch_info' array.
+ * max_dispatch
+ *		The current allocated size of the 'partition_dispatch_info' array.
  *
- * partitions				Array of 'partitions_allocsize' elements
- *							containing a pointer to a ResultRelInfo for every
- *							leaf partitions touched by tuple routing.  Some of
- *							these are pointers to ResultRelInfos which are
- *							borrowed out of 'subplan_resultrel_hash'.  The
- *							remainder have been built especially for tuple
- *							routing.  See comment for
- *							PartitionDispatchData->indexes for details on how
- *							this array is indexed.
+ * partitions
+ *		Array of 'max_partitions' elements containing a pointer to a
+ *		ResultRelInfo for every leaf partitions touched by tuple routing.
+ *		Some of these are pointers to ResultRelInfos which are borrowed out of
+ *		'subplan_resultrel_hash'.  The remainder have been built especially
+ *		for tuple routing.  See comment for PartitionDispatchData->indexes for
+ *		details on how this array is indexed.
  *
- * num_partitions			The current number of items stored in the
- *							'partitions' array.  Also serves as the index of
- *							the next free array element for new ResultRelInfo
- *							objects that need to be stored.
+ * num_partitions
+ *		The current number of items stored in the 'partitions' array.  Also
+ *		serves as the index of the next free array element for new
+ *		ResultRelInfo objects that need to be stored.
  *
- * partitions_allocsize		The current allocated size of the 'partitions'
- *							array.
+ * max_partitions
+ *		The current allocated size of the 'partitions' array.
  *
- * subplan_resultrel_hash	Hash table to store subplan ResultRelInfos by Oid.
- *							This is used to cache ResultRelInfos from subplans
- *							of an UPDATE ModifyTable node.  Some of these may
- *							be useful for tuple routing to save having to build
- *							duplicates.
+ * subplan_resultrel_hash
+ *		Hash table to store subplan ResultRelInfos by Oid.  This is used to
+ *		cache ResultRelInfos from subplans of an UPDATE ModifyTable node;
+ *		NULL in other cases.  Some of these may be useful for tuple routing
+ *		to save having to build duplicates.
  *-----------------------
  */
 typedef struct PartitionTupleRouting
@@ -87,10 +85,10 @@ typedef struct PartitionTupleRouting
 	Relation	partition_root;
 	PartitionDispatch *partition_dispatch_info;
 	int			num_dispatch;
-	int			dispatch_allocsize;
+	int			max_dispatch;
 	ResultRelInfo **partitions;
 	int			num_partitions;
-	int			partitions_allocsize;
+	int			max_partitions;
 	HTAB	   *subplan_resultrel_hash;
 } PartitionTupleRouting;
 
@@ -132,11 +130,16 @@ typedef struct PartitionDispatchData
 	int			indexes[FLEXIBLE_ARRAY_MEMBER];
 } PartitionDispatchData;
 
+/* struct to hold result relations coming from UPDATE subplans */
+typedef struct SubplanResultRelHashElem
+{
+	Oid		relid;		/* hash key -- must be first */
+	ResultRelInfo *rri;
+} SubplanResultRelHashElem;
+
 
 static void ExecHashSubPlanResultRelsByOid(ModifyTableState *mtstate,
 							   PartitionTupleRouting *proute);
-static void ExecCheckPartitionArraySpace(PartitionTupleRouting *proute);
-static void ExecCheckDispatchArraySpace(PartitionTupleRouting *proute);
 static ResultRelInfo *ExecInitPartitionInfo(ModifyTableState *mtstate,
 					  ResultRelInfo *rootResultRelInfo,
 					  PartitionTupleRouting *proute,
@@ -147,6 +150,9 @@ static void ExecInitRoutingInfo(ModifyTableState *mtstate,
 					ResultRelInfo *partRelInfo);
 static PartitionDispatch ExecInitPartitionDispatchInfo(PartitionTupleRouting *proute,
 							  Oid partoid, PartitionDispatch parent_pd, int partidx);
+static void ExecRememberPartitionRel(EState *estate, PartitionTupleRouting *proute,
+						 int partidx, ResultRelInfo *rri,
+						 PartitionDispatch dispatch);
 static void FormPartitionKeyDatum(PartitionDispatch pd,
 					  TupleTableSlot *slot,
 					  EState *estate,
@@ -192,39 +198,17 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel)
 	 * demand, only when we actually need to route a tuple to that partition.
 	 * The reason for this is that a common case is for INSERT to insert a
 	 * single tuple into a partitioned table and this must be fast.
-	 *
-	 * We initially size the 'partition_dispatch_info' and 'partitions' arrays
-	 * to allow storage of PARTITION_ROUTING_INITSIZE pointers.  If we route
-	 * tuples to more than this many partitions or through more than that many
-	 * sub-partitioned tables then we'll need to increase the size of these
-	 * arrays.
-	 *
-	 * Initially we must only set up 1 PartitionDispatch object; the one for
-	 * the partitioned table that's the target of the command.  If we must
-	 * route a tuple via some sub-partitioned table, then its
-	 * PartitionDispatch is only built the first time it's required.
 	 */
-	proute = (PartitionTupleRouting *) palloc(sizeof(PartitionTupleRouting));
+	proute = (PartitionTupleRouting *) palloc0(sizeof(PartitionTupleRouting));
 	proute->partition_root = rel;
-	proute->partition_dispatch_info = (PartitionDispatchData **)
-		palloc(sizeof(PartitionDispatchData) * PARTITION_ROUTING_INITSIZE);
-	proute->num_dispatch = 0;
-	proute->dispatch_allocsize = PARTITION_ROUTING_INITSIZE;
-
-	proute->partitions = (ResultRelInfo **)
-		palloc(sizeof(ResultRelInfo *) * PARTITION_ROUTING_INITSIZE);
-
-	/* Mark that no items are yet stored in the 'partitions' array. */
-	proute->num_partitions = 0;
-	proute->partitions_allocsize = PARTITION_ROUTING_INITSIZE;
+	/* Rest of members initialized by zeroing */
 
 	/*
 	 * Initialize this table's PartitionDispatch object.  Here we pass in the
 	 * parent as NULL as we don't need to care about any parent of the target
 	 * partitioned table.
 	 */
-	(void) ExecInitPartitionDispatchInfo(proute, RelationGetRelid(rel), NULL,
-										 0);
+	ExecInitPartitionDispatchInfo(proute, RelationGetRelid(rel), NULL, 0);
 
 	/*
 	 * If performing an UPDATE with tuple routing, we can reuse partition
@@ -236,8 +220,6 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel)
 	 */
 	if (node && node->operation == CMD_UPDATE)
 		ExecHashSubPlanResultRelsByOid(mtstate, proute);
-	else
-		proute->subplan_resultrel_hash = NULL;
 
 	return proute;
 }
@@ -292,6 +274,8 @@ ExecFindPartition(ModifyTableState *mtstate,
 		AttrNumber *map = dispatch->tupmap;
 		int			partidx = -1;
 
+		CHECK_FOR_INTERRUPTS();
+
 		rel = dispatch->reldesc;
 		partdesc = dispatch->partdesc;
 
@@ -319,7 +303,7 @@ ExecFindPartition(ModifyTableState *mtstate,
 
 		/*
 		 * If this partitioned table has no partitions or no partition for
-		 * these values, then error out.
+		 * these values, error out.
 		 */
 		if (partdesc->nparts == 0 ||
 			(partidx = get_partition_for_tuple(dispatch, values, isnull)) < 0)
@@ -333,7 +317,9 @@ ExecFindPartition(ModifyTableState *mtstate,
 					(errcode(ERRCODE_CHECK_VIOLATION),
 					 errmsg("no partition of relation \"%s\" found for row",
 							RelationGetRelationName(rel)),
-					 val_desc ? errdetail("Partition key of the failing row contains %s.", val_desc) : 0));
+					 val_desc ?
+					 errdetail("Partition key of the failing row contains %s.",
+							   val_desc) : 0));
 		}
 
 		if (partdesc->is_leaf[partidx])
@@ -352,55 +338,39 @@ ExecFindPartition(ModifyTableState *mtstate,
 			}
 			else
 			{
-				int			rri_index = -1;
+				bool		found = false;
 
 				/*
-				 * A ResultRelInfo has not been set up for this partition yet,
-				 * so either use one of the sub-plan result rels or build a
-				 * new one.
+				 * We have not yet set up a ResultRelInfo for this partition,
+				 * but if we have a subplan hash table, we might have one
+				 * there.  If not, we'll have to create one.
 				 */
 				if (proute->subplan_resultrel_hash)
 				{
 					Oid			partoid = partdesc->oids[partidx];
+					SubplanResultRelHashElem   *elem;
 
-					rri = hash_search(proute->subplan_resultrel_hash,
-									  &partoid, HASH_FIND, NULL);
-
-					if (rri)
+					elem = hash_search(proute->subplan_resultrel_hash,
+									   &partoid, HASH_FIND, NULL);
+					if (elem)
 					{
-						/* Found one! */
+						found = true;
+						rri = elem->rri;
 
 						/* Verify this ResultRelInfo allows INSERTs */
 						CheckValidResultRel(rri, CMD_INSERT);
 
-						/* This shouldn't have be set up yet */
-						Assert(rri->ri_PartitionInfo == NULL);
-
 						/* Set up the PartitionRoutingInfo for it */
 						ExecInitRoutingInfo(mtstate, estate, rri);
-
-						rri_index = proute->num_partitions++;
-						dispatch->indexes[partidx] = rri_index;
-
-						ExecCheckPartitionArraySpace(proute);
-
-						/*
-						 * Store it in the partitions array so we don't have
-						 * to look it up again.
-						 */
-						proute->partitions[rri_index] = rri;
+						ExecRememberPartitionRel(estate, proute, partidx, rri, dispatch);
 					}
 				}
 
 				/* We need to create a new one. */
-				if (rri_index < 0)
-				{
-					MemoryContextSwitchTo(oldcxt);
+				if (!found)
 					rri = ExecInitPartitionInfo(mtstate, rootResultRelInfo,
 												proute, estate,
 												dispatch, partidx);
-					MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
-				}
 			}
 
 			/* Release the tuple in the lowest parent's dedicated slot. */
@@ -460,38 +430,31 @@ static void
 ExecHashSubPlanResultRelsByOid(ModifyTableState *mtstate,
 							   PartitionTupleRouting *proute)
 {
-	ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
-	ResultRelInfo *subplan_result_rels;
 	HASHCTL		ctl;
 	HTAB	   *htab;
-	int			nsubplans;
 	int			i;
 
-	subplan_result_rels = mtstate->resultRelInfo;
-	nsubplans = list_length(node->plans);
-
 	memset(&ctl, 0, sizeof(ctl));
 	ctl.keysize = sizeof(Oid);
-	ctl.entrysize = sizeof(ResultRelInfo **);
+	ctl.entrysize = sizeof(SubplanResultRelHashElem);
 	ctl.hcxt = CurrentMemoryContext;
 
-	htab = hash_create("PartitionTupleRouting table", nsubplans, &ctl,
-					   HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+	htab = hash_create("PartitionTupleRouting table", mtstate->mt_nplans,
+					   &ctl, HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
 	proute->subplan_resultrel_hash = htab;
 
 	/* Hash all subplans by their Oid */
-	for (i = 0; i < nsubplans; i++)
+	for (i = 0; i < mtstate->mt_nplans; i++)
 	{
-		ResultRelInfo *rri = &subplan_result_rels[i];
+		ResultRelInfo *rri = &mtstate->resultRelInfo[i];
 		bool		found;
 		Oid			partoid = RelationGetRelid(rri->ri_RelationDesc);
-		ResultRelInfo **subplanrri;
+		SubplanResultRelHashElem   *elem;
 
-		subplanrri = (ResultRelInfo **) hash_search(htab, &partoid, HASH_ENTER,
-													&found);
-
-		if (!found)
-			*subplanrri = rri;
+		elem = (SubplanResultRelHashElem *)
+			hash_search(htab, &partoid, HASH_ENTER, &found);
+		Assert(!found);
+		elem->rri = rri;
 
 		/*
 		 * This is required in order to convert the partition's tuple to be
@@ -503,41 +466,6 @@ ExecHashSubPlanResultRelsByOid(ModifyTableState *mtstate,
 }
 
 /*
- * ExecCheckPartitionArraySpace
- *		Ensure there's enough space in the proute->partitions array
- */
-static void
-ExecCheckPartitionArraySpace(PartitionTupleRouting *proute)
-{
-	if (proute->num_partitions >= proute->partitions_allocsize)
-	{
-		proute->partitions_allocsize *= 2;
-		proute->partitions = (ResultRelInfo **)
-			repalloc(proute->partitions, sizeof(ResultRelInfo *) *
-					 proute->partitions_allocsize);
-	}
-}
-
-/*
- * ExecCheckDispatchArraySpace
- *		Ensure there's enough space in the proute->partition_dispatch_info
- *		array.
- */
-static void
-ExecCheckDispatchArraySpace(PartitionTupleRouting *proute)
-{
-	if (proute->num_dispatch >= proute->dispatch_allocsize)
-	{
-		/* Expand allocated space. */
-		proute->dispatch_allocsize *= 2;
-		proute->partition_dispatch_info = (PartitionDispatchData **)
-			repalloc(proute->partition_dispatch_info,
-				sizeof(PartitionDispatchData *) *
-				proute->dispatch_allocsize);
-	}
-}
-
-/*
  * ExecInitPartitionInfo
  *		Initialize ResultRelInfo and other information for a partition
  *		and store it in the next empty slot in the proute->partitions array.
@@ -559,7 +487,6 @@ ExecInitPartitionInfo(ModifyTableState *mtstate,
 	MemoryContext oldContext;
 	AttrNumber *part_attnos = NULL;
 	bool		found_whole_row;
-	int			part_result_rel_index;
 
 	/*
 	 * We locked all the partitions in ExecSetupPartitionTupleRouting
@@ -903,13 +830,7 @@ ExecInitPartitionInfo(ModifyTableState *mtstate,
 		}
 	}
 
-	part_result_rel_index = proute->num_partitions++;
-	dispatch->indexes[partidx] = part_result_rel_index;
-
-	ExecCheckPartitionArraySpace(proute);
-
-	/* Save here for later use. */
-	proute->partitions[part_result_rel_index] = leaf_part_rri;
+	ExecRememberPartitionRel(estate, proute, partidx, leaf_part_rri, dispatch);
 
 	MemoryContextSwitchTo(oldContext);
 
@@ -1018,8 +939,8 @@ ExecInitPartitionDispatchInfo(PartitionTupleRouting *proute, Oid partoid,
 		rel = proute->partition_root;
 	partdesc = RelationGetPartitionDesc(rel);
 
-	pd = (PartitionDispatch) palloc(offsetof(PartitionDispatchData, indexes)
-									+ (partdesc->nparts * sizeof(int)));
+	pd = (PartitionDispatch) palloc(offsetof(PartitionDispatchData, indexes) +
+									partdesc->nparts * sizeof(int));
 	pd->reldesc = rel;
 	pd->key = RelationGetPartitionKey(rel);
 	pd->keystate = NIL;
@@ -1045,8 +966,8 @@ ExecInitPartitionDispatchInfo(PartitionTupleRouting *proute, Oid partoid,
 	else
 	{
 		/* Not required for the root partitioned table */
-		pd->tupslot = NULL;
 		pd->tupmap = NULL;
+		pd->tupslot = NULL;
 	}
 
 	/*
@@ -1055,25 +976,79 @@ ExecInitPartitionDispatchInfo(PartitionTupleRouting *proute, Oid partoid,
 	 */
 	memset(pd->indexes, -1, sizeof(int) * partdesc->nparts);
 
+	/* Track in PartitionTupleRouting for later use */
 	dispatchidx = proute->num_dispatch++;
 
-	ExecCheckDispatchArraySpace(proute);
-
-	/* Save here for later use. */
+	/* Allocate or enlarge the array, as needed */
+	if (proute->num_dispatch >= proute->max_dispatch)
+	{
+		if (proute->max_dispatch == 0)
+		{
+			proute->max_dispatch = PARTITION_ROUTING_INITSIZE;
+			proute->partition_dispatch_info = (PartitionDispatch *)
+				palloc(sizeof(PartitionDispatch) * proute->max_dispatch);
+		}
+		else
+		{
+			proute->max_dispatch *= 2;
+			proute->partition_dispatch_info = (PartitionDispatch *)
+				repalloc(proute->partition_dispatch_info,
+						 sizeof(PartitionDispatch) * proute->max_dispatch);
+		}
+	}
 	proute->partition_dispatch_info[dispatchidx] = pd;
 
 	/*
 	 * Finally, if setting up a PartitionDispatch for a sub-partitioned table,
-	 * install the link to allow us to descend the partition hierarchy for
-	 * future searches
+	 * install a downlink in the parent to allow quick descent.
 	 */
 	if (parent_pd)
+	{
+		Assert(parent_pd->indexes[partidx] == -1);
 		parent_pd->indexes[partidx] = dispatchidx;
+	}
 
 	return pd;
 }
 
 /*
+ * Store the given ResultRelInfo as corresponding to partition partidx in
+ * proute, tracking which array item was used in dispatch->indexes.
+ */
+static void
+ExecRememberPartitionRel(EState *estate, PartitionTupleRouting *proute, int partidx,
+						 ResultRelInfo *rri, PartitionDispatch dispatch)
+{
+	int		rri_index;
+
+	Assert(dispatch->indexes[partidx] == -1);
+
+	rri_index = proute->num_partitions++;
+
+	/* Allocate or enlarge the array, as needed */
+	if (proute->num_partitions >= proute->max_partitions)
+	{
+		if (proute->max_partitions == 0)
+		{
+			proute->max_partitions = PARTITION_ROUTING_INITSIZE;
+			proute->partitions = (ResultRelInfo **)
+				MemoryContextAlloc(estate->es_query_cxt,
+								   sizeof(ResultRelInfo *) * proute->max_partitions);
+		}
+		else
+		{
+			proute->max_partitions *= 2;
+			proute->partitions = (ResultRelInfo **)
+				repalloc(proute->partitions, sizeof(ResultRelInfo *) *
+						 proute->max_partitions);
+		}
+	}
+
+	proute->partitions[rri_index] = rri;
+	dispatch->indexes[partidx] = rri_index;
+}
+
+/*
  * ExecCleanupTupleRouting -- Clean up objects allocated for partition tuple
  * routing.
  *
@@ -1107,7 +1082,6 @@ ExecCleanupTupleRouting(ModifyTableState *mtstate,
 	{
 		ResultRelInfo *resultRelInfo = proute->partitions[i];
 
-
 		/*
 		 * Check if this result rel is one belonging to the node's subplans,
 		 * if so, let ExecEndPlan() clean it up.
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 423118cbbc..de27d88e63 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -34,7 +34,6 @@
 
 struct PlanState;				/* forward references in this file */
 struct PartitionRoutingInfo;
-struct PartitionTupleRouting;
 struct ParallelHashJoinState;
 struct ExecRowMark;
 struct ExprState;
@@ -471,7 +470,7 @@ typedef struct ResultRelInfo
 	/* relation descriptor for root partitioned table */
 	Relation	ri_PartitionRoot;
 
-	/* Additional information that's specific to partition tuple routing */
+	/* Additional information specific to partition tuple routing */
 	struct PartitionRoutingInfo *ri_PartitionInfo;
 } ResultRelInfo;
 

Reply via email to