Hi all,
A while back, Robert Haas noticed that the space taken up by very
small tables is dominated by the FSM [1]. Tom suggested that we could
prevent creation of the FSM until the heap has reached a certain
threshold size [2]. Attached is a WIP patch to implement that. I've
also attached a SQL script to demonstrate the change in behavior for
various scenarios.

The behavior that allows the simplest implementation I thought of is as follows:

-The FSM isn't created if the heap has fewer than 10 blocks (or
whatever). If the last known good block has insufficient space, try
every block before extending the heap.

-If a heap with a FSM is truncated back to below the threshold, the
FSM stays around and can be used as usual.

-If the heap tuples are all deleted, the FSM stays but has no leaf
blocks (same as on master). Although it exists, it won't be
re-extended until the heap re-passes the threshold.

--
Some notes:

-For normal mode, I taught fsm_set_and_search() to switch to a
non-extending buffer call, but the biggest missing piece is WAL
replay. I couldn't find a non-extending equivalent of
XLogReadBufferExtended(), so I might have to create one.

-There'll need to be some performance testing to make sure there's no
regression, and to choose a good value for the threshold. I'll look
into that, but if anyone has any ideas for tests, that'll help this
effort along.

-A possible TODO item is to teach pg_upgrade not to link FSMs for
small heaps. I haven't look into the feasibility of that, however.

-RelationGetBufferForTuple() now has two boolean variables that mean
"don't use the FSM", but with different behaviors. To avoid confusion,
I've renamed use_fsm to always_extend and revised the commentary
accordingly.

-I've only implemented this for heaps, because indexes (at least
B-tree) don't seem to be as eager to create a FSM. I haven't looked at
the code, however.

--
[1] 
https://www.postgresql.org/message-id/CA%2BTgmoac%2B6qTNp2U%2BwedY8-PU6kK_b6hbdhR5xYGBG3GtdFcww%40mail.gmail.com
[2] https://www.postgresql.org/message-id/11360.1345502641%40sss.pgh.pa.us

--
I'll add this to the November commitfest.

-John Naylor

Attachment: fsmtest.sql
Description: application/sql

From 77c85f633f915bd247c554b691a134fac1f32316 Mon Sep 17 00:00:00 2001
From: John Naylor <jcnay...@gmail.com>
Date: Sat, 6 Oct 2018 00:35:33 +0700
Subject: [PATCH v1] Avoid creation of the free space map for small tables.

The FSM isn't created if the heap has fewer than 10 blocks. If the last
known good block has insufficient space, try every block before extending
the heap.

If a heap with a FSM is truncated back to below the threshold, the FSM
stays around and can be used as usual.

If the heap tuples are all deleted, the FSM stays but has no leaf blocks
(same as before). Although it exists, it won't be re-extended until
the heap re-passes the threshold.
---
 src/backend/access/heap/hio.c             | 60 +++++++++++++++++------
 src/backend/storage/freespace/freespace.c | 32 +++++++++++-
 src/include/storage/freespace.h           |  4 ++
 3 files changed, 79 insertions(+), 17 deletions(-)

diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c
index b8b5871559..aadb75ee8c 100644
--- a/src/backend/access/heap/hio.c
+++ b/src/backend/access/heap/hio.c
@@ -315,7 +315,8 @@ RelationGetBufferForTuple(Relation relation, Size len,
 						  BulkInsertState bistate,
 						  Buffer *vmbuffer, Buffer *vmbuffer_other)
 {
-	bool		use_fsm = !(options & HEAP_INSERT_SKIP_FSM);
+	bool		always_extend = (options & HEAP_INSERT_SKIP_FSM),
+				visit_every_page;
 	Buffer		buffer = InvalidBuffer;
 	Page		page;
 	Size		pageFreeSpace = 0,
@@ -357,21 +358,24 @@ RelationGetBufferForTuple(Relation relation, Size len,
 	 * each page that proves not to be suitable.)  If the FSM has no record of
 	 * a page with enough free space, we give up and extend the relation.
 	 *
-	 * When use_fsm is false, we either put the tuple onto the existing target
-	 * page or extend the relation.
+	 * Special cases for when the existing target page fails:
+	 * 1. When always_extend is true, we don't bother with consulting the FSM
+	 *    and just extend the relation.
+	 * 2. When visit_every_page is true and the FSM doesn't provide any
+	 *    information, we try every page before extending the relation.
 	 */
 	if (len + saveFreeSpace > MaxHeapTupleSize)
 	{
 		/* can't fit, don't bother asking FSM */
 		targetBlock = InvalidBlockNumber;
-		use_fsm = false;
+		always_extend = true;
 	}
 	else if (bistate && bistate->current_buf != InvalidBuffer)
 		targetBlock = BufferGetBlockNumber(bistate->current_buf);
 	else
 		targetBlock = RelationGetTargetBlock(relation);
 
-	if (targetBlock == InvalidBlockNumber && use_fsm)
+	if (targetBlock == InvalidBlockNumber && !always_extend)
 	{
 		/*
 		 * We have no cached target page, so ask the FSM for an initial
@@ -389,7 +393,20 @@ RelationGetBufferForTuple(Relation relation, Size len,
 			BlockNumber nblocks = RelationGetNumberOfBlocks(relation);
 
 			if (nblocks > 0)
+			{
 				targetBlock = nblocks - 1;
+
+				/*
+				 * If the heap is small enough, it likely has no FSM.
+				 * However, if at some point the heap passed the threshold,
+				 * acquired a FSM, and was subsequently truncated to below
+				 * the threshold, the FSM remains.  Since there are so few
+				 * pages in either case, just try every page, starting with
+				 * the last page.
+				 */
+				if (nblocks <= HEAP_FSM_EXTENSION_THRESHOLD)
+					visit_every_page = true;
+			}
 		}
 	}
 
@@ -502,18 +519,29 @@ loop:
 			ReleaseBuffer(buffer);
 		}
 
-		/* Without FSM, always fall out of the loop and extend */
-		if (!use_fsm)
+		if (always_extend)
 			break;
 
-		/*
-		 * Update FSM as to condition of this page, and ask for another page
-		 * to try.
-		 */
-		targetBlock = RecordAndGetPageWithFreeSpace(relation,
-													targetBlock,
-													pageFreeSpace,
-													len + saveFreeSpace);
+		if (visit_every_page)
+		{
+			/* No pages have enough free space; extend. */
+			if (targetBlock == 0)
+				break;
+
+			/* Try the next lower page. */
+			targetBlock--;
+		}
+		else
+		{
+			/*
+			 * Update FSM as to condition of this page, and ask for another
+			 * page to try.
+			 */
+			targetBlock = RecordAndGetPageWithFreeSpace(relation,
+														targetBlock,
+														pageFreeSpace,
+														len + saveFreeSpace);
+		}
 	}
 
 	/*
@@ -534,7 +562,7 @@ loop:
 	 */
 	if (needLock)
 	{
-		if (!use_fsm)
+		if (always_extend || visit_every_page)
 			LockRelationForExtension(relation, ExclusiveLock);
 		else if (!ConditionalLockRelationForExtension(relation, ExclusiveLock))
 		{
diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c
index 7c4ad1c449..abb03874b9 100644
--- a/src/backend/storage/freespace/freespace.c
+++ b/src/backend/storage/freespace/freespace.c
@@ -193,6 +193,7 @@ RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
 /*
  * XLogRecordPageWithFreeSpace - like RecordPageWithFreeSpace, for use in
  *		WAL replay
+ * TODO: Avoid creating a FSM here, also.
  */
 void
 XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
@@ -675,7 +676,36 @@ fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
 	Page		page;
 	int			newslot = -1;
 
-	buf = fsm_readbuf(rel, addr, true);
+	/*
+	 * For heaps we prevent extension of the FSM unless the number of pages
+	 * exceeds HEAP_FSM_EXTENSION_THRESHOLD. For tables that don't already
+	 * have a FSM, this will save an inode and a few kB of space.
+	 * For sane threshold values, the FSM address will be zero, so we
+	 * don't bother dealing with anything else.
+	 */
+	if (rel->rd_rel->relkind == RELKIND_RELATION
+		&& addr.logpageno == 0)
+	{
+		FSMAddress	threshold_addr;
+		uint16		threshold_slot;
+
+		/* Get the location in the FSM of the threshold. */
+		threshold_addr = fsm_get_location(HEAP_FSM_EXTENSION_THRESHOLD,
+										  &threshold_slot);
+		Assert(threshold_addr.logpageno == 0);
+
+		if (slot <= threshold_slot)
+		{
+			buf = fsm_readbuf(rel, addr, false);
+			if (buf == InvalidBuffer)
+				return newslot;
+		}
+		else
+			buf = fsm_readbuf(rel, addr, true);
+	}
+	else
+		buf = fsm_readbuf(rel, addr, true);
+
 	LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
 
 	page = BufferGetPage(buf);
diff --git a/src/include/storage/freespace.h b/src/include/storage/freespace.h
index 726eb30fb8..68e4d27818 100644
--- a/src/include/storage/freespace.h
+++ b/src/include/storage/freespace.h
@@ -18,6 +18,10 @@
 #include "storage/relfilenode.h"
 #include "utils/relcache.h"
 
+/* Only extend a heap's FSM if the heap has greater than this many blocks */
+/* TODO: Performance-test different values. */
+#define HEAP_FSM_EXTENSION_THRESHOLD 10
+
 /* prototypes for public functions in freespace.c */
 extern Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk);
 extern BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded);
-- 
2.17.1

Reply via email to