On Mon, Jan 18, 2016 at 12:34 PM, Robert Haas <robertmh...@gmail.com> wrote:
> On Mon, Jan 18, 2016 at 12:09 PM, Tom Lane <t...@sss.pgh.pa.us> wrote:
>> Robert Haas <robertmh...@gmail.com> writes:
>>> Yeah.  The API contract for an expanded object took me quite a while
>>> to puzzle out, but it seems to be this: if somebody hands you an R/W
>>> pointer to an expanded object, you're entitled to assume that you can
>>> "take over" that object and mutate it however you like.  But the
>>> object might be in some other memory context, so you have to move it
>>> into your own memory context.
>>
>> Only if you intend to keep it --- for example, a function that is mutating
>> and returning an object isn't required to move it somewhere else, if the
>> input is R/W, and I think it generally shouldn't.
>>
>> In the context here, I'd think it is the responsibility of nodeAgg.c
>> not individual datatype functions to make sure that expanded objects
>> live where it wants them to.
>
> That's how I did it in my prototype, but the problem with that is that
> spinning up a memory context for every group sucks when there are many
> groups with only a small number of elements each - hence the 50%
> regression that David Rowley observed.  If we're going to use expanded
> objects here, which seems like a good idea in principle, that's going
> to have to be fixed somehow.  We're flogging the heck out of malloc by
> repeatedly creating a context, doing one or two allocations in it, and
> then destroying the context.
>
> I think that, in general, the memory context machinery wasn't really
> designed to manage lots of small contexts.  The overhead of spinning
> up a new context for just a few allocations is substantial.  That
> matters in some other situations too, I think - I've commonly seen
> AllocSetContextCreate taking several percent  of runtime in profiles.
> But here it's much exacerbated.  I'm not sure whether it's better to
> attack that problem at the root and try to make AllocSetContextCreate
> cheaper, or whether we should try to figure out some change to the
> expanded-object machinery to address the issue.

Here is a patch that helps a good deal.  I changed things so that when
we create a context, we always allocate at least 1kB.  If that's more
than we need for the node itself and the name, then we use the rest of
the space as a sort of keeper block.  I think there's more that can be
done to improve this, but I'm wimping out for now because it's late
here.

I suspect something like this is a good idea even if we don't end up
using it for aggregate transition functions.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
diff --git a/src/backend/utils/mmgr/aset.c b/src/backend/utils/mmgr/aset.c
index d26991e..3730a21 100644
--- a/src/backend/utils/mmgr/aset.c
+++ b/src/backend/utils/mmgr/aset.c
@@ -164,6 +164,14 @@ typedef void *AllocPointer;
 /*
  * AllocSetContext is our standard implementation of MemoryContext.
  *
+ * Note: An AllocSetContext can include one or more "keeper" blocks which
+ * are not freed when the context is reset.  "keeper" should point to the
+ * first of these blocks.  Sometimes, there may be a block which shouldn't
+ * be freed even when the context is deleted (because that space isn't a
+ * separate allocation); if so, "stopper" can point to this block.  "keeper"
+ * must be set whenever "stopper" is set, and must point to the same block
+ * as "stopper" or an earlier one.
+ *
  * Note: header.isReset means there is nothing for AllocSetReset to do.
  * This is different from the aset being physically empty (empty blocks list)
  * because we may still have a keeper block.  It's also different from the set
@@ -181,7 +189,8 @@ typedef struct AllocSetContext
 	Size		maxBlockSize;	/* maximum block size */
 	Size		nextBlockSize;	/* next block size to allocate */
 	Size		allocChunkLimit;	/* effective chunk size limit */
-	AllocBlock	keeper;			/* if not NULL, keep this block over resets */
+	AllocBlock	keeper;		/* on reset, stop freeing when we reach this */
+	AllocBlock	stopper;	/* on delete, stop freeing when we reach this */
 } AllocSetContext;
 
 typedef AllocSetContext *AllocSet;
@@ -248,7 +257,6 @@ typedef struct AllocChunkData
 static void *AllocSetAlloc(MemoryContext context, Size size);
 static void AllocSetFree(MemoryContext context, void *pointer);
 static void *AllocSetRealloc(MemoryContext context, void *pointer, Size size);
-static void AllocSetInit(MemoryContext context);
 static void AllocSetReset(MemoryContext context);
 static void AllocSetDelete(MemoryContext context);
 static Size AllocSetGetChunkSpace(MemoryContext context, void *pointer);
@@ -267,7 +275,6 @@ static MemoryContextMethods AllocSetMethods = {
 	AllocSetAlloc,
 	AllocSetFree,
 	AllocSetRealloc,
-	AllocSetInit,
 	AllocSetReset,
 	AllocSetDelete,
 	AllocSetGetChunkSpace,
@@ -440,13 +447,45 @@ AllocSetContextCreate(MemoryContext parent,
 					  Size maxBlockSize)
 {
 	AllocSet	set;
+	Size		needed;
+	Size		request_size;
+
+	/*
+	 * We always allocate at least 1kB so that we can service a few small
+	 * allocations without needing to request more memory from the operating
+	 * system.  If the name is extremely long or a minimum context size is
+	 * specified, we might need to allocate more space.
+	 */
+	needed = MAXALIGN(sizeof(AllocSetContext) + strlen(name) + 1);
+	request_size = Max(MAXALIGN(needed) + minContextSize, 1024);
+
+	/* Get initial space */
+	if (TopMemoryContext != NULL)
+	{
+		/* Normal case: allocate from TopMemoryContext */
+		set = MemoryContextAlloc(TopMemoryContext, request_size);
+	}
+	else
+	{
+		/* Special case for startup: use good ol' malloc. */
+		set = malloc(request_size);
+		if (set == NULL)
+			elog(FATAL, "out of memory");
+	}
 
-	/* Do the type-independent part of context creation */
-	set = (AllocSet) MemoryContextCreate(T_AllocSetContext,
-										 sizeof(AllocSetContext),
-										 &AllocSetMethods,
-										 parent,
-										 name);
+	/*
+	 * Initialize the node.  Note that the name and the containing context
+	 * are both stored in the same allocation, so there are no steps that
+	 * can fail after we've allocated memory and before MemoryContextCreate
+	 * gets called.  That's good, because any such failure would leak
+	 * memory.
+	 */
+	MemSetAligned(set, 0, sizeof(AllocSetContext));
+	set->header.type = T_AllocSetContext;
+	set->header.methods = &AllocSetMethods;
+	set->header.name = ((char *) set) + sizeof(AllocSetContext);
+	strcpy(set->header.name, name);
+	MemoryContextCreate(&set->header, parent);
 
 	/*
 	 * Make sure alloc parameters are reasonable, and save them.
@@ -489,30 +528,20 @@ AllocSetContextCreate(MemoryContext parent,
 		set->allocChunkLimit >>= 1;
 
 	/*
-	 * Grab always-allocated space, if requested
+	 * Turn any remaining space into an allocation block that is never freed.
 	 */
-	if (minContextSize > ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ)
+	if (request_size > needed + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ)
 	{
-		Size		blksize = MAXALIGN(minContextSize);
-		AllocBlock	block;
+		AllocBlock	block = (AllocBlock) (((char *) set) + needed);
 
-		block = (AllocBlock) malloc(blksize);
-		if (block == NULL)
-		{
-			MemoryContextStats(TopMemoryContext);
-			ereport(ERROR,
-					(errcode(ERRCODE_OUT_OF_MEMORY),
-					 errmsg("out of memory"),
-					 errdetail("Failed while creating memory context \"%s\".",
-							   name)));
-		}
 		block->aset = set;
 		block->freeptr = ((char *) block) + ALLOC_BLOCKHDRSZ;
-		block->endptr = ((char *) block) + blksize;
+		block->endptr = ((char *) set) + request_size;
 		block->next = set->blocks;
 		set->blocks = block;
-		/* Mark block as not to be released at reset time */
+		/* Mark block as not to be released - ever */
 		set->keeper = block;
+		set->stopper = block;
 
 		/* Mark unallocated space NOACCESS; leave the block header alone. */
 		VALGRIND_MAKE_MEM_NOACCESS(block->freeptr,
@@ -523,27 +552,6 @@ AllocSetContextCreate(MemoryContext parent,
 }
 
 /*
- * AllocSetInit
- *		Context-type-specific initialization routine.
- *
- * This is called by MemoryContextCreate() after setting up the
- * generic MemoryContext fields and before linking the new context
- * into the context tree.  We must do whatever is needed to make the
- * new context minimally valid for deletion.  We must *not* risk
- * failure --- thus, for example, allocating more memory is not cool.
- * (AllocSetContextCreate can allocate memory when it gets control
- * back, however.)
- */
-static void
-AllocSetInit(MemoryContext context)
-{
-	/*
-	 * Since MemoryContextCreate already zeroed the context node, we don't
-	 * have to do anything here: it's already OK.
-	 */
-}
-
-/*
  * AllocSetReset
  *		Frees all memory which is allocated in the given set.
  *
@@ -634,7 +642,7 @@ AllocSetDelete(MemoryContext context)
 	set->blocks = NULL;
 	set->keeper = NULL;
 
-	while (block != NULL)
+	while (block != set->stopper)
 	{
 		AllocBlock	next = block->next;
 
@@ -644,6 +652,8 @@ AllocSetDelete(MemoryContext context)
 		free(block);
 		block = next;
 	}
+
+	pfree(context);
 }
 
 /*
diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c
index 2bfd364..87cc0f2 100644
--- a/src/backend/utils/mmgr/mcxt.c
+++ b/src/backend/utils/mmgr/mcxt.c
@@ -194,10 +194,9 @@ MemoryContextResetChildren(MemoryContext context)
  *		Delete a context and its descendants, and release all space
  *		allocated therein.
  *
- * The type-specific delete routine removes all subsidiary storage
- * for the context, but we have to delete the context node itself,
- * as well as recurse to get the children.  We must also delink the
- * node from its parent, if it has one.
+ * The type-specific delete routine removes the context itself and all
+ * subsidiary storage, but we have to recurse to get the children.  We must
+ * also delink the node from its parent, if it has one.
  */
 void
 MemoryContextDelete(MemoryContext context)
@@ -227,7 +226,6 @@ MemoryContextDelete(MemoryContext context)
 
 	(*context->methods->delete_context) (context);
 	VALGRIND_DESTROY_MEMPOOL(context);
-	pfree(context);
 }
 
 /*
@@ -640,89 +638,25 @@ MemoryContextContains(MemoryContext context, void *pointer)
  * This is only intended to be called by context-type-specific
  * context creation routines, not by the unwashed masses.
  *
- * The context creation procedure is a little bit tricky because
- * we want to be sure that we don't leave the context tree invalid
- * in case of failure (such as insufficient memory to allocate the
- * context node itself).  The procedure goes like this:
- *	1.  Context-type-specific routine first calls MemoryContextCreate(),
- *		passing the appropriate tag/size/methods values (the methods
- *		pointer will ordinarily point to statically allocated data).
- *		The parent and name parameters usually come from the caller.
- *	2.  MemoryContextCreate() attempts to allocate the context node,
- *		plus space for the name.  If this fails we can ereport() with no
- *		damage done.
- *	3.  We fill in all of the type-independent MemoryContext fields.
- *	4.  We call the type-specific init routine (using the methods pointer).
- *		The init routine is required to make the node minimally valid
- *		with zero chance of failure --- it can't allocate more memory,
- *		for example.
- *	5.  Now we have a minimally valid node that can behave correctly
- *		when told to reset or delete itself.  We link the node to its
- *		parent (if any), making the node part of the context tree.
- *	6.  We return to the context-type-specific routine, which finishes
- *		up type-specific initialization.  This routine can now do things
- *		that might fail (like allocate more memory), so long as it's
- *		sure the node is left in a state that delete will handle.
- *
- * This protocol doesn't prevent us from leaking memory if step 6 fails
- * during creation of a top-level context, since there's no parent link
- * in that case.  However, if you run out of memory while you're building
- * a top-level context, you might as well go home anyway...
- *
- * Normally, the context node and the name are allocated from
- * TopMemoryContext (NOT from the parent context, since the node must
- * survive resets of its parent context!).  However, this routine is itself
- * used to create TopMemoryContext!  If we see that TopMemoryContext is NULL,
- * we assume we are creating TopMemoryContext and use malloc() to allocate
- * the node.
- *
- * Note that the name field of a MemoryContext does not point to
- * separately-allocated storage, so it should not be freed at context
- * deletion.
+ * The caller must initialize a minimally-valid MemoryContext - the name
+ * and methods pointers must be valid, in particular - before calling this
+ * function, being careful to do so in a way that does not risk leaking
+ * memory used for the context or name.  Then, it should call this function
+ * to link the new context into the context tree.  Finally, it can finish
+ * any initialization steps that might fail (like allocate more memory),
+ * so long as it's sure the node will be left in a state that delete will
+ * handle.
  *--------------------
  */
-MemoryContext
-MemoryContextCreate(NodeTag tag, Size size,
-					MemoryContextMethods *methods,
-					MemoryContext parent,
-					const char *name)
+void
+MemoryContextCreate(MemoryContext node, MemoryContext parent)
 {
-	MemoryContext node;
-	Size		needed = size + strlen(name) + 1;
-
 	/* creating new memory contexts is not allowed in a critical section */
 	Assert(CritSectionCount == 0);
 
-	/* Get space for node and name */
-	if (TopMemoryContext != NULL)
-	{
-		/* Normal case: allocate the node in TopMemoryContext */
-		node = (MemoryContext) MemoryContextAlloc(TopMemoryContext,
-												  needed);
-	}
-	else
-	{
-		/* Special case for startup: use good ol' malloc */
-		node = (MemoryContext) malloc(needed);
-		Assert(node != NULL);
-	}
-
-	/* Initialize the node as best we can */
-	MemSet(node, 0, size);
-	node->type = tag;
-	node->methods = methods;
-	node->parent = NULL;		/* for the moment */
-	node->firstchild = NULL;
-	node->prevchild = NULL;
-	node->nextchild = NULL;
 	node->isReset = true;
-	node->name = ((char *) node) + size;
-	strcpy(node->name, name);
-
-	/* Type-specific routine finishes any other essential initialization */
-	(*node->methods->init) (node);
 
-	/* OK to link node to parent (if any) */
+	/* link node to parent (if any) */
 	/* Could use MemoryContextSetParent here, but doesn't seem worthwhile */
 	if (parent)
 	{
@@ -736,9 +670,6 @@ MemoryContextCreate(NodeTag tag, Size size,
 	}
 
 	VALGRIND_CREATE_MEMPOOL(node, 0, false);
-
-	/* Return to type-specific creation routine to finish up */
-	return node;
 }
 
 /*
diff --git a/src/include/nodes/memnodes.h b/src/include/nodes/memnodes.h
index ba069cc..56ea445 100644
--- a/src/include/nodes/memnodes.h
+++ b/src/include/nodes/memnodes.h
@@ -57,7 +57,6 @@ typedef struct MemoryContextMethods
 	/* call this free_p in case someone #define's free() */
 	void		(*free_p) (MemoryContext context, void *pointer);
 	void	   *(*realloc) (MemoryContext context, void *pointer, Size size);
-	void		(*init) (MemoryContext context);
 	void		(*reset) (MemoryContext context);
 	void		(*delete_context) (MemoryContext context);
 	Size		(*get_chunk_space) (MemoryContext context, void *pointer);
diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h
index ae07705..8e1f645 100644
--- a/src/include/utils/memutils.h
+++ b/src/include/utils/memutils.h
@@ -118,10 +118,7 @@ extern bool MemoryContextContains(MemoryContext context, void *pointer);
  * context creation.  It's intended to be called from context-type-
  * specific creation routines, and noplace else.
  */
-extern MemoryContext MemoryContextCreate(NodeTag tag, Size size,
-					MemoryContextMethods *methods,
-					MemoryContext parent,
-					const char *name);
+extern void MemoryContextCreate(MemoryContext node, MemoryContext parent);
 
 
 /*
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to