>From 64a0b078127d371bc64520c508b253058dc70b09 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@pgaddict.com>
Date: Wed, 20 Jul 2016 23:46:36 +0200
Subject: [PATCH 2/2] generational slab (auto-tuning allocator)

---
 src/backend/replication/logical/reorderbuffer.c |  71 +----
 src/backend/utils/mmgr/Makefile                 |   2 +-
 src/backend/utils/mmgr/genslab.c                | 347 ++++++++++++++++++++++++
 src/include/nodes/memnodes.h                    |   4 +-
 src/include/nodes/nodes.h                       |   1 +
 src/include/replication/reorderbuffer.h         |   8 +-
 src/include/utils/memutils.h                    |   7 +
 7 files changed, 369 insertions(+), 71 deletions(-)
 create mode 100644 src/backend/utils/mmgr/genslab.c

diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c
index 00e2b7b..42a3792 100644
--- a/src/backend/replication/logical/reorderbuffer.c
+++ b/src/backend/replication/logical/reorderbuffer.c
@@ -251,17 +251,12 @@ ReorderBufferAllocate(void)
 									SLAB_DEFAULT_BLOCK_SIZE,
 									sizeof(ReorderBufferTXN));
 
-	buffer->tup_context_slab = SlabContextCreate(new_ctx,
+	buffer->tup_context = GenSlabContextCreate(new_ctx,
 									"TuplesSlab",
 									SLAB_LARGE_BLOCK_SIZE,
 									sizeof(ReorderBufferTupleBuf) +
-									MAXIMUM_ALIGNOF + MaxHeapTupleSize);
-
-	buffer->tup_context_oversized = AllocSetContextCreate(new_ctx,
-									"TuplesOversized",
-									ALLOCSET_DEFAULT_MINSIZE,
-									ALLOCSET_DEFAULT_INITSIZE,
-									ALLOCSET_DEFAULT_MAXSIZE);
+									MAXIMUM_ALIGNOF + MaxHeapTupleSize,
+									TUPLES_PER_GENERATION);
 
 	hash_ctl.keysize = sizeof(TransactionId);
 	hash_ctl.entrysize = sizeof(ReorderBufferTXNByIdEnt);
@@ -282,17 +277,11 @@ ReorderBufferAllocate(void)
 
 	buffer->current_restart_decoding_lsn = InvalidXLogRecPtr;
 
-	buffer->tuples_count = 0;
-	buffer->tuples_size = 0;
-
 	dlist_init(&buffer->toplevel_by_lsn);
 	dlist_init(&buffer->cached_transactions);
 	dlist_init(&buffer->cached_changes);
 	slist_init(&buffer->cached_tuplebufs);
 
-	buffer->current_size = sizeof(ReorderBufferTupleBuf) +
-						   MAXIMUM_ALIGNOF + MaxHeapTupleSize;
-
 	return buffer;
 }
 
@@ -444,54 +433,12 @@ ReorderBufferGetTupleBuf(ReorderBuffer *rb, Size tuple_len)
 
 	alloc_len = tuple_len + SizeofHeapTupleHeader;
 
-	/* see if we need to allocate a new context generation */
-	if (rb->tuples_count == TUPLES_PER_GENERATION)
-	{
-		Size	new_size;
-		Size	avg_length = (rb->tuples_size / rb->tuples_count);
-
-		/* mark the current SLAB context for automatic destruction */
-		SlabAutodestruct(rb->tup_context_slab);
-
-		/* assume +50% is enough slack to fit most tuples into the slab context */
-		new_size = MAXALIGN(avg_length * 1.5);
-
-		rb->current_size = new_size;
-		rb->tup_context_slab = SlabContextCreate(rb->context,
-									"TuplesSlab",
-									SLAB_LARGE_BLOCK_SIZE,
-									sizeof(ReorderBufferTupleBuf) +
-									MAXIMUM_ALIGNOF + rb->current_size);
-
-		/* we could also recreate the aset context, with block sizes set so
-		 * that the palloc always does malloc(), but not sure about that */
-
-		rb->tuples_count = 0;
-		rb->tuples_size = 0;
-	}
-
-	rb->tuples_count += 1;
-	rb->tuples_size  += alloc_len;
-
-	/* if small enough, check the slab cache */
-	if (alloc_len <= rb->current_size)
-	{
-		tuple = (ReorderBufferTupleBuf *)
-			MemoryContextAlloc(rb->tup_context_slab,
-							   sizeof(ReorderBufferTupleBuf) +
-							   MAXIMUM_ALIGNOF + rb->current_size);
-		tuple->alloc_tuple_size = rb->current_size;
-		tuple->tuple.t_data = ReorderBufferTupleBufData(tuple);
-	}
-	else
-	{
-		tuple = (ReorderBufferTupleBuf *)
-			MemoryContextAlloc(rb->tup_context_oversized,
-							   sizeof(ReorderBufferTupleBuf) +
-							   MAXIMUM_ALIGNOF + alloc_len);
-		tuple->alloc_tuple_size = alloc_len;
-		tuple->tuple.t_data = ReorderBufferTupleBufData(tuple);
-	}
+	tuple = (ReorderBufferTupleBuf *)
+		MemoryContextAlloc(rb->tup_context,
+						   sizeof(ReorderBufferTupleBuf) +
+						   MAXIMUM_ALIGNOF + alloc_len);
+	tuple->alloc_tuple_size = alloc_len;
+	tuple->tuple.t_data = ReorderBufferTupleBufData(tuple);
 
 	return tuple;
 }
diff --git a/src/backend/utils/mmgr/Makefile b/src/backend/utils/mmgr/Makefile
index 321289f..08b5e3a 100644
--- a/src/backend/utils/mmgr/Makefile
+++ b/src/backend/utils/mmgr/Makefile
@@ -12,6 +12,6 @@ subdir = src/backend/utils/mmgr
 top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = aset.o mcxt.o portalmem.o slab.o
+OBJS = aset.o genslab.o mcxt.o portalmem.o slab.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/utils/mmgr/genslab.c b/src/backend/utils/mmgr/genslab.c
new file mode 100644
index 0000000..1f300aa
--- /dev/null
+++ b/src/backend/utils/mmgr/genslab.c
@@ -0,0 +1,347 @@
+/*-------------------------------------------------------------------------
+ *
+ * genslab.c
+ *	  Generational SLAB allocator definitions.
+ *
+ * An extension of the SLAB allocator relaxing the fixed-size limitation by
+ * using a generational design.
+ *
+ *
+ * Portions Copyright (c) 2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/mmgr/genslab.c
+ *
+ *
+ *	The simple SLAB allocator only allows allocating chunks with exactly the
+ *	same size. That only works for some special cases, e.g. when the context
+ *	is only used for instances of a single structure with fixed size.
+ * 
+ *	This implementation tries to relax this restriction by treating the chunk
+ *	size as an upper boundary, and using a regular AllocSet context to serve
+ *	requests for larger pieces of memory.
+ *
+ *	Furthermore, instead of using a single SLAB context (fixing the maximum
+ *	chunk size) it's possible to automatically tune the chunk size based on
+ *	past allocations. This is done by replacing the single SLAB context with
+ *	a sequence of contexts (with only the last one used for allocations).
+ *
+ *	This works particularly well when we can't predict the size of the
+ *	objects easily, but we know that the size is unlikely to vary too much.
+ *	It also works quite nicely when the memory is freed in about the same
+ *	sequence as it was allocated, because the old SLAB contexts will get
+ *	empty and freed automatically (one of the benefits of SLAB contexts).
+ *
+ *	A good example is ReorderBuffer - the tuples tend to be of about the
+ *	same size, and freed in roughly the same sequence as allocated.
+ *
+ *	In a sense, this delegates the allocation to actual implementations,
+ *	which also handle CLOBBER_FREED_MEMORY and MEMORY_CONTEXT_CHECKING.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "utils/memdebug.h"
+#include "utils/memutils.h"
+
+
+/*
+ * GenSlabContext is a self-tuning version of SlabContext.
+ */
+typedef struct GenSlabContext
+{
+	MemoryContextData header;	/* Standard memory-context fields */
+
+	MemoryContext	slab;
+	MemoryContext	aset;
+
+	/* SLAB parameters */
+	Size		blockSize;		/* block size */
+	Size		chunkSize;		/* chunk size */
+
+	/* counters used for tuning chunk size */
+
+	Size		nbytes;			/* bytes allocated (as requested) */
+	int			nallocations;	/* number of allocations */
+	int			maxallocations;	/* self-tune after number of allocations */
+
+} GenSlabContext;
+
+typedef GenSlabContext *GenSlab;
+
+/*
+ * These functions implement the MemoryContext API for GenSlab contexts.
+ */
+static void *GenSlabAlloc(MemoryContext context, Size size);
+static void GenSlabFree(MemoryContext context, void *pointer);
+static void *GenSlabRealloc(MemoryContext context, void *pointer, Size size);
+static void GenSlabInit(MemoryContext context);
+static void GenSlabReset(MemoryContext context);
+static void GenSlabDelete(MemoryContext context);
+static Size GenSlabGetChunkSpace(MemoryContext context, void *pointer);
+static bool GenSlabIsEmpty(MemoryContext context);
+static void GenSlabStats(MemoryContext context, int level, bool print,
+			  MemoryContextCounters *totals);
+
+#ifdef MEMORY_CONTEXT_CHECKING
+static void GenSlabCheck(MemoryContext context);
+#endif
+
+/*
+ * This is the virtual function table for Slab contexts.
+ */
+static MemoryContextMethods GenSlabMethods = {
+	GenSlabAlloc,
+	GenSlabFree,
+	GenSlabRealloc,
+	GenSlabInit,
+	GenSlabReset,
+	GenSlabDelete,
+	GenSlabGetChunkSpace,
+	GenSlabIsEmpty,
+	GenSlabStats
+#ifdef MEMORY_CONTEXT_CHECKING
+	,GenSlabCheck
+#endif
+};
+
+
+/*
+ * Public routines
+ */
+
+
+/*
+ * GenSlabContextCreate
+ *		Create a new GenSlab context.
+ */
+MemoryContext
+GenSlabContextCreate(MemoryContext parent,
+					  const char *name,
+					  Size blockSize,
+					  Size chunkSize,
+					  int maxAllocations)
+{
+	GenSlab	set;
+
+	/* Do the type-independent part of context creation */
+	set = (GenSlab) MemoryContextCreate(T_GenSlabContext,
+										sizeof(GenSlabContext),
+										&GenSlabMethods,
+										parent,
+										name);
+
+	/* the default context */
+	set->slab = SlabContextCreate((MemoryContext)set,
+								  "slab",
+								  blockSize,
+								  chunkSize);
+
+	/*
+	 * TODO Maybe we could set the parameters so that all requests exceeding
+	 * the SLAB chunk size (and thus falling through to the AllocSet) also
+	 * exceed allocChunkLimit and thus get allocated using malloc(). That's
+	 * more expensive, but vast majority of requests should be handled by
+	 * the SLAB context anyway. And chunks over allocChunkLimit are freed
+	 * immediately, which is also nice.
+	 */
+	set->aset = AllocSetContextCreate((MemoryContext)set,
+									 "oversized",
+									 ALLOCSET_DEFAULT_MINSIZE,
+									 ALLOCSET_DEFAULT_INITSIZE,
+									 ALLOCSET_DEFAULT_MAXSIZE);
+
+	set->blockSize = blockSize;
+	set->nbytes = 0;
+	set->nallocations = 0;
+	set->maxallocations = maxAllocations;
+
+	return (MemoryContext) set;
+}
+
+/*
+ * GenSlabInit
+ *		Context-type-specific initialization routine. Simply delegate the
+ *		child contexts.
+ */
+static void
+GenSlabInit(MemoryContext context)
+{
+	GenSlab set = (GenSlab)context;
+
+	set->nallocations = 0;
+	set->nbytes = 0;
+}
+
+/*
+ * GenSlabReset
+ *		Frees all memory which is allocated in the given set. We also get
+ *		rid of all the old SLAB generations and only keep the current one.
+ *
+ * The code simply frees all the blocks in the context - we don't keep any
+ * keeper blocks or anything like that.
+ */
+static void
+GenSlabReset(MemoryContext context)
+{
+	GenSlab	set = (GenSlab) context;
+
+	set->nallocations = 0;
+	set->nbytes = 0;
+}
+
+/*
+ * GenSlabDelete
+ *		Frees all memory which is allocated in the given set, in preparation
+ *		for deletion of the set. We don't really need to do anything special
+ *		as MemoryContextDelete deletes child contexts automatically.
+ */
+static void
+GenSlabDelete(MemoryContext context)
+{
+	/* just reset (although not really necessary) */
+	GenSlabReset(context);
+}
+
+/*
+ * GenSlabAlloc
+ *		Returns pointer to allocated memory of given size or NULL if
+ *		request could not be completed; memory is added to the set.
+ *
+ * No request may exceed:
+ *		MAXALIGN_DOWN(SIZE_MAX) - SLAB_BLOCKHDRSZ - SLAB_CHUNKHDRSZ
+ * All callers use a much-lower limit.
+ */
+static void *
+GenSlabAlloc(MemoryContext context, Size size)
+{
+	GenSlab	set = (GenSlab) context;
+
+	/* do we need to auto-tune the SLAB chunk size */
+	if (set->nallocations > set->maxallocations)
+	{
+		/*
+		 * TODO we could also assume the requests follow normal distribution,
+		 * computing stddev and then computing a chosen percentile (e.g. 0.95).
+		 * For now we simply use 1.5x the average, as it's simple.
+		 */
+
+		/* compute the new chunk size */
+		Size chunkSize = (1.5 * set->nbytes) / set->nallocations;
+
+		/* mark for autodestruction */
+		SlabAutodestruct(set->slab);
+
+		set->slab = SlabContextCreate((MemoryContext)set,
+									  "slab",
+									  set->blockSize,
+									  chunkSize);
+
+		set->chunkSize = chunkSize;
+		set->nallocations = 0;
+		set->nbytes = 0;
+	}
+
+	if (size <= set->chunkSize)
+		return MemoryContextAlloc(set->slab, set->chunkSize);
+	else
+		return MemoryContextAlloc(set->aset, size);
+}
+
+/*
+ * GenSlabFree
+ *		As the memory is actually allocated in other contexts, we should
+ *		never really get here.
+ *
+ * FIXME Although someone could call MemoryContextFree directly.
+ */
+static void
+GenSlabFree(MemoryContext context, void *pointer)
+{
+	return pfree(pointer);
+}
+
+/*
+ * GenSlabRealloc
+ *		As the memory is actually allocated in other contexts, we should
+ *		never really get here.
+ *
+ * FIXME Although someone could call MemoryContextRealloc directly.
+ */
+static void *
+GenSlabRealloc(MemoryContext context, void *pointer, Size size)
+{
+	return repalloc(pointer, size);
+}
+
+/*
+ * GenSlabGetChunkSpace
+ *		As the memory is actually allocated in other contexts, we should
+ *		never really get here.
+ *
+ * FIXME Although someone could call MemoryContextGetChunkSpace directly.
+ */
+static Size
+GenSlabGetChunkSpace(MemoryContext context, void *pointer)
+{
+	return GetMemoryChunkSpace(pointer);
+}
+
+/*
+ * GenSlabIsEmpty
+ *		Is an GenSlab empty of any allocated space?
+ *
+ * TODO This does not really work, as MemoryContextIsEmpty returns false if
+ * 		there are any children, and GenSlab always has at least two.
+ */
+static bool
+GenSlabIsEmpty(MemoryContext context)
+{
+	/* */
+	return true;
+}
+
+/*
+ * GenSlabStats
+ *		Compute stats about memory consumption of an GenSlab.
+ *
+ * level: recursion level (0 at top level); used for print indentation.
+ * print: true to print stats to stderr.
+ * totals: if not NULL, add stats about this Slab into *totals.
+ */
+static void
+GenSlabStats(MemoryContext context, int level, bool print,
+			  MemoryContextCounters *totals)
+{
+	GenSlab		set = (GenSlab) context;
+
+	if (print)
+	{
+		int			i;
+
+		for (i = 0; i < level; i++)
+			fprintf(stderr, "  ");
+		fprintf(stderr, "%s\n", set->header.name);
+	}
+}
+
+
+#ifdef MEMORY_CONTEXT_CHECKING
+
+/*
+ * GenSlabCheck
+ *		Walk through chunks and check consistency of memory.
+ *
+ * NOTE: report errors as WARNING, *not* ERROR or FATAL.  Otherwise you'll
+ * find yourself in an infinite loop when trouble occurs, because this
+ * routine will be entered again when elog cleanup tries to release memory!
+ */
+static void
+GenSlabCheck(MemoryContext context)
+{
+	
+}
+
+#endif   /* MEMORY_CONTEXT_CHECKING */
diff --git a/src/include/nodes/memnodes.h b/src/include/nodes/memnodes.h
index 92a7478..aae2349 100644
--- a/src/include/nodes/memnodes.h
+++ b/src/include/nodes/memnodes.h
@@ -96,6 +96,8 @@ typedef struct MemoryContextData
  */
 #define MemoryContextIsValid(context) \
 	((context) != NULL && \
-	 (IsA((context), AllocSetContext) || IsA((context), SlabContext)))
+	 (IsA((context), AllocSetContext) || \
+	  IsA((context), SlabContext) || \
+	  IsA((context), GenSlabContext)))
 
 #endif   /* MEMNODES_H */
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 2cbbfec..2992b6e 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -276,6 +276,7 @@ typedef enum NodeTag
 	T_MemoryContext = 600,
 	T_AllocSetContext,
 	T_SlabContext,
+	T_GenSlabContext,
 
 	/*
 	 * TAGS FOR VALUE NODES (value.h)
diff --git a/src/include/replication/reorderbuffer.h b/src/include/replication/reorderbuffer.h
index e8a8d77..2dfab26 100644
--- a/src/include/replication/reorderbuffer.h
+++ b/src/include/replication/reorderbuffer.h
@@ -335,13 +335,7 @@ struct ReorderBuffer
 	 */
 	MemoryContext change_context;
 	MemoryContext txn_context;
-	MemoryContext tup_context_slab;
-	MemoryContext tup_context_oversized;
-
-	/* counters for current generation of tuples */
-	int		tuples_count;
-	Size	tuples_size;
-	Size	current_size;
+	MemoryContext tup_context;
 
 	/*
 	 * Data structure slab cache.
diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h
index fd2c9c2..f4417d5 100644
--- a/src/include/utils/memutils.h
+++ b/src/include/utils/memutils.h
@@ -143,6 +143,13 @@ extern MemoryContext SlabContextCreate(MemoryContext parent,
 
 extern void SlabAutodestruct(MemoryContext context);
 
+/* genslab.c */
+extern MemoryContext GenSlabContextCreate(MemoryContext parent,
+					  const char *name,
+					  Size blockSize,
+					  Size chunkSize,
+					  int maxAllocations);
+
 /*
  * Recommended default alloc parameters, suitable for "ordinary" contexts
  * that might hold quite a lot of data.
-- 
2.5.5

