On 10/15/18, Amit Kapila <amit.kapil...@gmail.com> wrote:
> I think you can avoid calling RelationGetNumberOfBlocks, if you call
> smgrexists before

This is done in the attached v5, 0001.

> and for the purpose of vacuum, we can get that as an
> input parameter.  I think one can argue for not changing the interface
> functions like RecordPageWithFreeSpace to avoid calling
> RelationGetNumberOfBlocks, but to me, it appears worth to save the
> additional system call.

This is done in 0002. I also added a check for the cached value of
pg_class.relpages, since it's cheap and may help non-VACUUM callers.

> [proposal for a cache of blocks to try]

That's interesting. I'll have to do some reading elsewhere in the
codebase, and then I'll follow up.

Thanks,
-John Naylor
From 0dafd220e437ed1cf9ed77e41cf3090365a63798 Mon Sep 17 00:00:00 2001
From: John Naylor <jcnay...@gmail.com>
Date: Tue, 16 Oct 2018 17:10:14 +0700
Subject: [PATCH v5 1/2] Avoid creation of the free space map for small tables.

The FSM isn't created if the heap has fewer than 10 blocks. If the last
known good block has insufficient space, try every block before extending
the heap.

If a heap with a FSM is truncated back to below the threshold, the FSM
stays around and can be used as usual.
---
 contrib/pageinspect/expected/page.out     |  77 ++++++------
 contrib/pageinspect/sql/page.sql          |  33 +++--
 src/backend/access/heap/hio.c             | 141 +++++++++++++++++-----
 src/backend/storage/freespace/freespace.c |  64 +++++++++-
 src/include/storage/freespace.h           |   4 +
 5 files changed, 236 insertions(+), 83 deletions(-)

diff --git a/contrib/pageinspect/expected/page.out b/contrib/pageinspect/expected/page.out
index 3fcd9fbe6d..83e5910453 100644
--- a/contrib/pageinspect/expected/page.out
+++ b/contrib/pageinspect/expected/page.out
@@ -1,48 +1,69 @@
 CREATE EXTENSION pageinspect;
-CREATE TABLE test1 (a int, b int);
-INSERT INTO test1 VALUES (16777217, 131584);
-VACUUM test1;  -- set up FSM
+CREATE TABLE test_rel_forks (a int);
+-- Make sure there are enough blocks in the heap for the FSM to be created.
+INSERT INTO test_rel_forks SELECT g from generate_series(1,10000) g;
+-- set up FSM and VM
+VACUUM test_rel_forks;
 -- The page contents can vary, so just test that it can be read
 -- successfully, but don't keep the output.
-SELECT octet_length(get_raw_page('test1', 'main', 0)) AS main_0;
+SELECT octet_length(get_raw_page('test_rel_forks', 'main', 0)) AS main_0;
  main_0 
 --------
    8192
 (1 row)
 
-SELECT octet_length(get_raw_page('test1', 'main', 1)) AS main_1;
-ERROR:  block number 1 is out of range for relation "test1"
-SELECT octet_length(get_raw_page('test1', 'fsm', 0)) AS fsm_0;
+SELECT octet_length(get_raw_page('test_rel_forks', 'main', 100)) AS main_100;
+ERROR:  block number 100 is out of range for relation "test_rel_forks"
+SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 0)) AS fsm_0;
  fsm_0 
 -------
   8192
 (1 row)
 
-SELECT octet_length(get_raw_page('test1', 'fsm', 1)) AS fsm_1;
- fsm_1 
--------
-  8192
-(1 row)
-
-SELECT octet_length(get_raw_page('test1', 'vm', 0)) AS vm_0;
+SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 10)) AS fsm_10;
+ERROR:  block number 10 is out of range for relation "test_rel_forks"
+SELECT octet_length(get_raw_page('test_rel_forks', 'vm', 0)) AS vm_0;
  vm_0 
 ------
  8192
 (1 row)
 
-SELECT octet_length(get_raw_page('test1', 'vm', 1)) AS vm_1;
-ERROR:  block number 1 is out of range for relation "test1"
+SELECT octet_length(get_raw_page('test_rel_forks', 'vm', 1)) AS vm_1;
+ERROR:  block number 1 is out of range for relation "test_rel_forks"
 SELECT octet_length(get_raw_page('xxx', 'main', 0));
 ERROR:  relation "xxx" does not exist
-SELECT octet_length(get_raw_page('test1', 'xxx', 0));
+SELECT octet_length(get_raw_page('test_rel_forks', 'xxx', 0));
 ERROR:  invalid fork name
 HINT:  Valid fork names are "main", "fsm", "vm", and "init".
-SELECT get_raw_page('test1', 0) = get_raw_page('test1', 'main', 0);
+SELECT * FROM fsm_page_contents(get_raw_page('test_rel_forks', 'fsm', 0));
+ fsm_page_contents 
+-------------------
+ 0: 192           +
+ 1: 192           +
+ 3: 192           +
+ 7: 192           +
+ 15: 192          +
+ 31: 192          +
+ 63: 192          +
+ 127: 192         +
+ 255: 192         +
+ 511: 192         +
+ 1023: 192        +
+ 2047: 192        +
+ 4095: 192        +
+ fp_next_slot: 0  +
+ 
+(1 row)
+
+SELECT get_raw_page('test_rel_forks', 0) = get_raw_page('test_rel_forks', 'main', 0);
  ?column? 
 ----------
  t
 (1 row)
 
+DROP TABLE test_rel_forks;
+CREATE TABLE test1 (a int, b int);
+INSERT INTO test1 VALUES (16777217, 131584);
 SELECT pagesize, version FROM page_header(get_raw_page('test1', 0));
  pagesize | version 
 ----------+---------
@@ -62,26 +83,6 @@ SELECT tuple_data_split('test1'::regclass, t_data, t_infomask, t_infomask2, t_bi
  {"\\x01000001","\\x00020200"}
 (1 row)
 
-SELECT * FROM fsm_page_contents(get_raw_page('test1', 'fsm', 0));
- fsm_page_contents 
--------------------
- 0: 254           +
- 1: 254           +
- 3: 254           +
- 7: 254           +
- 15: 254          +
- 31: 254          +
- 63: 254          +
- 127: 254         +
- 255: 254         +
- 511: 254         +
- 1023: 254        +
- 2047: 254        +
- 4095: 254        +
- fp_next_slot: 0  +
- 
-(1 row)
-
 DROP TABLE test1;
 -- check that using any of these functions with a partitioned table or index
 -- would fail
diff --git a/contrib/pageinspect/sql/page.sql b/contrib/pageinspect/sql/page.sql
index 8ac9991837..ee811759d5 100644
--- a/contrib/pageinspect/sql/page.sql
+++ b/contrib/pageinspect/sql/page.sql
@@ -1,26 +1,35 @@
 CREATE EXTENSION pageinspect;
 
-CREATE TABLE test1 (a int, b int);
-INSERT INTO test1 VALUES (16777217, 131584);
+CREATE TABLE test_rel_forks (a int);
+-- Make sure there are enough blocks in the heap for the FSM to be created.
+INSERT INTO test_rel_forks SELECT g from generate_series(1,10000) g;
 
-VACUUM test1;  -- set up FSM
+-- set up FSM and VM
+VACUUM test_rel_forks;
 
 -- The page contents can vary, so just test that it can be read
 -- successfully, but don't keep the output.
 
-SELECT octet_length(get_raw_page('test1', 'main', 0)) AS main_0;
-SELECT octet_length(get_raw_page('test1', 'main', 1)) AS main_1;
+SELECT octet_length(get_raw_page('test_rel_forks', 'main', 0)) AS main_0;
+SELECT octet_length(get_raw_page('test_rel_forks', 'main', 100)) AS main_100;
 
-SELECT octet_length(get_raw_page('test1', 'fsm', 0)) AS fsm_0;
-SELECT octet_length(get_raw_page('test1', 'fsm', 1)) AS fsm_1;
+SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 0)) AS fsm_0;
+SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 10)) AS fsm_10;
 
-SELECT octet_length(get_raw_page('test1', 'vm', 0)) AS vm_0;
-SELECT octet_length(get_raw_page('test1', 'vm', 1)) AS vm_1;
+SELECT octet_length(get_raw_page('test_rel_forks', 'vm', 0)) AS vm_0;
+SELECT octet_length(get_raw_page('test_rel_forks', 'vm', 1)) AS vm_1;
 
 SELECT octet_length(get_raw_page('xxx', 'main', 0));
-SELECT octet_length(get_raw_page('test1', 'xxx', 0));
+SELECT octet_length(get_raw_page('test_rel_forks', 'xxx', 0));
+
+SELECT * FROM fsm_page_contents(get_raw_page('test_rel_forks', 'fsm', 0));
+
+SELECT get_raw_page('test_rel_forks', 0) = get_raw_page('test_rel_forks', 'main', 0);
 
-SELECT get_raw_page('test1', 0) = get_raw_page('test1', 'main', 0);
+DROP TABLE test_rel_forks;
+
+CREATE TABLE test1 (a int, b int);
+INSERT INTO test1 VALUES (16777217, 131584);
 
 SELECT pagesize, version FROM page_header(get_raw_page('test1', 0));
 
@@ -29,8 +38,6 @@ SELECT page_checksum(get_raw_page('test1', 0), 0) IS NOT NULL AS silly_checksum_
 SELECT tuple_data_split('test1'::regclass, t_data, t_infomask, t_infomask2, t_bits)
     FROM heap_page_items(get_raw_page('test1', 0));
 
-SELECT * FROM fsm_page_contents(get_raw_page('test1', 'fsm', 0));
-
 DROP TABLE test1;
 
 -- check that using any of these functions with a partitioned table or index
diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c
index b8b5871559..dd38c45845 100644
--- a/src/backend/access/heap/hio.c
+++ b/src/backend/access/heap/hio.c
@@ -24,6 +24,12 @@
 #include "storage/lmgr.h"
 #include "storage/smgr.h"
 
+/*#define TRACE_TARGETBLOCK */
+
+static BlockNumber get_page_no_fsm(Relation relation,
+				BlockNumber prevBlockAttempted,
+				bool *try_every_page);
+
 
 /*
  * RelationPutHeapTuple - place tuple at specified page
@@ -315,13 +321,15 @@ RelationGetBufferForTuple(Relation relation, Size len,
 						  BulkInsertState bistate,
 						  Buffer *vmbuffer, Buffer *vmbuffer_other)
 {
-	bool		use_fsm = !(options & HEAP_INSERT_SKIP_FSM);
+	bool		always_extend = (options & HEAP_INSERT_SKIP_FSM),
+				try_every_page = false;
 	Buffer		buffer = InvalidBuffer;
 	Page		page;
 	Size		pageFreeSpace = 0,
 				saveFreeSpace = 0;
 	BlockNumber targetBlock,
-				otherBlock;
+				otherBlock,
+				prevBlockAttempted;
 	bool		needLock;
 
 	len = MAXALIGN(len);		/* be conservative */
@@ -355,47 +363,42 @@ RelationGetBufferForTuple(Relation relation, Size len,
 	 * loop around and retry multiple times. (To insure this isn't an infinite
 	 * loop, we must update the FSM with the correct amount of free space on
 	 * each page that proves not to be suitable.)  If the FSM has no record of
-	 * a page with enough free space, we give up and extend the relation.
+	 * a page with enough free space, we try every page if the heap is small,
+	 * or give up and extend the relation.
 	 *
-	 * When use_fsm is false, we either put the tuple onto the existing target
-	 * page or extend the relation.
+	 * When always_extend is true, we either put the tuple onto the existing
+	 * target page or extend the relation.
 	 */
 	if (len + saveFreeSpace > MaxHeapTupleSize)
 	{
 		/* can't fit, don't bother asking FSM */
 		targetBlock = InvalidBlockNumber;
-		use_fsm = false;
+		always_extend = true;
 	}
 	else if (bistate && bistate->current_buf != InvalidBuffer)
 		targetBlock = BufferGetBlockNumber(bistate->current_buf);
 	else
 		targetBlock = RelationGetTargetBlock(relation);
 
-	if (targetBlock == InvalidBlockNumber && use_fsm)
+	if (targetBlock == InvalidBlockNumber && !always_extend)
 	{
 		/*
 		 * We have no cached target page, so ask the FSM for an initial
 		 * target.
 		 */
 		targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace);
-
-		/*
-		 * If the FSM knows nothing of the rel, try the last page before we
-		 * give up and extend.  This avoids one-tuple-per-page syndrome during
-		 * bootstrapping or in a recently-started system.
-		 */
 		if (targetBlock == InvalidBlockNumber)
-		{
-			BlockNumber nblocks = RelationGetNumberOfBlocks(relation);
-
-			if (nblocks > 0)
-				targetBlock = nblocks - 1;
-		}
+			targetBlock = get_page_no_fsm(relation, InvalidBlockNumber,
+										  &try_every_page);
 	}
 
 loop:
 	while (targetBlock != InvalidBlockNumber)
 	{
+
+#ifdef TRACE_TARGETBLOCK
+		elog(DEBUG1, "Attempting block %u", targetBlock);
+#endif
 		/*
 		 * Read and exclusive-lock the target block, as well as the other
 		 * block if one was given, taking suitable care with lock ordering and
@@ -482,6 +485,10 @@ loop:
 		pageFreeSpace = PageGetHeapFreeSpace(page);
 		if (len + saveFreeSpace <= pageFreeSpace)
 		{
+
+#ifdef TRACE_TARGETBLOCK
+			elog(DEBUG1, "Returning buffer for block %u", targetBlock);
+#endif
 			/* use this page as future insert target, too */
 			RelationSetTargetBlock(relation, targetBlock);
 			return buffer;
@@ -502,18 +509,37 @@ loop:
 			ReleaseBuffer(buffer);
 		}
 
-		/* Without FSM, always fall out of the loop and extend */
-		if (!use_fsm)
+		if (always_extend)
 			break;
 
-		/*
-		 * Update FSM as to condition of this page, and ask for another page
-		 * to try.
-		 */
-		targetBlock = RecordAndGetPageWithFreeSpace(relation,
-													targetBlock,
-													pageFreeSpace,
-													len + saveFreeSpace);
+		if (try_every_page)
+		{
+			/* We've tried every page; extend. */
+			if (targetBlock == 0)
+				break;
+
+			/* Try the next lower block number. */
+			targetBlock--;
+
+#ifdef TRACE_TARGETBLOCK
+			elog(DEBUG1, "Trying next lower block number");
+#endif
+		}
+		else
+		{
+			/*
+			 * Update FSM as to condition of this page, and ask for another
+			 * page to try.
+			 */
+			prevBlockAttempted = targetBlock;
+			targetBlock = RecordAndGetPageWithFreeSpace(relation,
+														targetBlock,
+														pageFreeSpace,
+														len + saveFreeSpace);
+			if (targetBlock == InvalidBlockNumber)
+				targetBlock = get_page_no_fsm(relation, prevBlockAttempted,
+											  &try_every_page);
+		}
 	}
 
 	/*
@@ -534,7 +560,7 @@ loop:
 	 */
 	if (needLock)
 	{
-		if (!use_fsm)
+		if (always_extend || try_every_page)
 			LockRelationForExtension(relation, ExclusiveLock);
 		else if (!ConditionalLockRelationForExtension(relation, ExclusiveLock))
 		{
@@ -554,6 +580,10 @@ loop:
 			if (targetBlock != InvalidBlockNumber)
 			{
 				UnlockRelationForExtension(relation, ExclusiveLock);
+
+				/* This shouldn't be true, but let's make sure it isn't. */
+				try_every_page = false;
+
 				goto loop;
 			}
 
@@ -627,3 +657,54 @@ loop:
 
 	return buffer;
 }
+
+/*
+ * If the FSM has no information, first try the last page in the relation
+ * if we haven't already.  This avoids one-tuple-per-page syndrome during
+ * bootstrapping or in a recently-started system.
+ *
+ * If the heap is small enough, it likely has no FSM (or a truncated one),
+ * but even if it does, just try every page.
+ *
+ * If InvalidBlockNumber is returned, extend the relation.
+ */
+static BlockNumber
+get_page_no_fsm(Relation relation,
+				BlockNumber prevBlockAttempted,
+				bool *try_every_page)
+{
+	BlockNumber nblocks = RelationGetNumberOfBlocks(relation),
+				targetBlock = InvalidBlockNumber;
+
+	if (nblocks > 0)
+	{
+		targetBlock = nblocks - 1;
+
+		if (nblocks <= HEAP_FSM_EXTENSION_THRESHOLD)
+		{
+			*try_every_page = true;
+
+#ifdef TRACE_TARGETBLOCK
+			elog(DEBUG1, "Setting try_every_page");
+#endif
+			/* If we already tried the last page, skip it or extend. */
+			if (targetBlock == prevBlockAttempted)
+			{
+				if (nblocks > 1)
+				{
+					Assert(targetBlock != InvalidBlockNumber);
+					targetBlock--;
+				}
+				else
+					targetBlock = InvalidBlockNumber;
+			}
+		}
+		else
+		{
+			/* If we already tried the last page, extend. */
+			if (targetBlock == prevBlockAttempted)
+				targetBlock = InvalidBlockNumber;
+		}
+	}
+	return targetBlock;
+}
diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c
index 7c4ad1c449..8414e33aed 100644
--- a/src/backend/storage/freespace/freespace.c
+++ b/src/backend/storage/freespace/freespace.c
@@ -111,6 +111,7 @@ static BlockNumber fsm_search(Relation rel, uint8 min_cat);
 static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr,
 				BlockNumber start, BlockNumber end,
 				bool *eof);
+static bool allow_write_to_fsm(Relation rel, BlockNumber heapblk);
 
 
 /******** Public API ********/
@@ -125,8 +126,7 @@ static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr,
  * will turn out to have too little space available by the time the caller
  * gets a lock on it.  In that case, the caller should report the actual
  * amount of free space available on that page and then try again (see
- * RecordAndGetPageWithFreeSpace).  If InvalidBlockNumber is returned,
- * extend the relation.
+ * RecordAndGetPageWithFreeSpace).
  */
 BlockNumber
 GetPageWithFreeSpace(Relation rel, Size spaceNeeded)
@@ -155,6 +155,9 @@ RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage,
 	uint16		slot;
 	int			search_slot;
 
+	if (!allow_write_to_fsm(rel, oldPage))
+		return InvalidBlockNumber;
+
 	/* Get the location of the FSM byte representing the heap block */
 	addr = fsm_get_location(oldPage, &slot);
 
@@ -184,6 +187,9 @@ RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
 	FSMAddress	addr;
 	uint16		slot;
 
+	if (!allow_write_to_fsm(rel, heapBlk))
+		return;
+
 	/* Get the location of the FSM byte representing the heap block */
 	addr = fsm_get_location(heapBlk, &slot);
 
@@ -204,11 +210,35 @@ XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
 	BlockNumber blkno;
 	Buffer		buf;
 	Page		page;
+	bool		write_to_fsm;
 
 	/* Get the location of the FSM byte representing the heap block */
 	addr = fsm_get_location(heapBlk, &slot);
 	blkno = fsm_logical_to_physical(addr);
 
+	/* This is meant to mirror the logic in allow_write_to_fsm() */
+	if (heapBlk > HEAP_FSM_EXTENSION_THRESHOLD)
+		write_to_fsm = true;
+	else
+	{
+		/* Open the relation at smgr level */
+		SMgrRelation smgr = smgropen(rnode, InvalidBackendId);
+
+		if (smgrexists(smgr, FSM_FORKNUM))
+			write_to_fsm = true;
+		else
+		{
+			BlockNumber heap_nblocks = smgrnblocks(smgr, MAIN_FORKNUM);
+			if (heap_nblocks > HEAP_FSM_EXTENSION_THRESHOLD)
+				write_to_fsm = true;
+			else
+				write_to_fsm = false;
+		}
+	}
+
+	if (!write_to_fsm)
+		return;
+
 	/* If the page doesn't exist already, extend */
 	buf = XLogReadBufferExtended(rnode, FSM_FORKNUM, blkno, RBM_ZERO_ON_ERROR);
 	LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
@@ -904,3 +934,33 @@ fsm_vacuum_page(Relation rel, FSMAddress addr,
 
 	return max_avail;
 }
+
+/*
+ * For heaps we prevent extension of the FSM unless the number
+ * of pages exceeds HEAP_FSM_EXTENSION_THRESHOLD. For tables
+ * that don't already have a FSM, this will save an inode
+ * and a few kB of space.
+ */
+static bool
+allow_write_to_fsm(Relation rel, BlockNumber heapblk)
+{
+	BlockNumber		heap_nblocks;
+
+	if (heapblk > HEAP_FSM_EXTENSION_THRESHOLD)
+		return true;
+
+	/* Index rels can always create an FSM. */
+	if (rel->rd_rel->relkind != RELKIND_RELATION)
+		return true;
+
+	RelationOpenSmgr(rel);
+	if (smgrexists(rel->rd_smgr, FSM_FORKNUM))
+		return true;
+
+	/* last resort */
+	heap_nblocks = RelationGetNumberOfBlocks(rel);
+	if (heap_nblocks > HEAP_FSM_EXTENSION_THRESHOLD)
+		return true;
+	else
+		return false;
+}
diff --git a/src/include/storage/freespace.h b/src/include/storage/freespace.h
index 726eb30fb8..68e4d27818 100644
--- a/src/include/storage/freespace.h
+++ b/src/include/storage/freespace.h
@@ -18,6 +18,10 @@
 #include "storage/relfilenode.h"
 #include "utils/relcache.h"
 
+/* Only extend a heap's FSM if the heap has greater than this many blocks */
+/* TODO: Performance-test different values. */
+#define HEAP_FSM_EXTENSION_THRESHOLD 10
+
 /* prototypes for public functions in freespace.c */
 extern Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk);
 extern BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded);
-- 
2.17.1

From b928d4fd814aefaa90bde5388ad81a1a68e363a5 Mon Sep 17 00:00:00 2001
From: John Naylor <jcnay...@gmail.com>
Date: Tue, 16 Oct 2018 17:12:19 +0700
Subject: [PATCH v5 2/2] Add parameter nblocks to RecordPageWithFreeSpace().

Now, VACUUM won't be penalized by too many system calls to get number of
blocks in the relation when it calls RecordPageWithFreeSpace().
---
 src/backend/access/brin/brin.c            |  2 +-
 src/backend/access/brin/brin_pageops.c    |  8 +++---
 src/backend/access/heap/hio.c             |  2 +-
 src/backend/commands/vacuumlazy.c         | 17 +++++++------
 src/backend/storage/freespace/freespace.c | 31 +++++++++++++++++++----
 src/backend/storage/freespace/indexfsm.c  |  4 +--
 src/include/storage/freespace.h           |  2 +-
 7 files changed, 44 insertions(+), 22 deletions(-)

diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index e95fbbcea7..7c5b1af764 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -1148,7 +1148,7 @@ terminate_brin_buildstate(BrinBuildState *state)
 		freespace = PageGetFreeSpace(page);
 		blk = BufferGetBlockNumber(state->bs_currentInsertBuf);
 		ReleaseBuffer(state->bs_currentInsertBuf);
-		RecordPageWithFreeSpace(state->bs_irel, blk, freespace);
+		RecordPageWithFreeSpace(state->bs_irel, blk, freespace, InvalidBlockNumber);
 		FreeSpaceMapVacuumRange(state->bs_irel, blk, blk + 1);
 	}
 
diff --git a/src/backend/access/brin/brin_pageops.c b/src/backend/access/brin/brin_pageops.c
index 040cb62e55..70d93878ba 100644
--- a/src/backend/access/brin/brin_pageops.c
+++ b/src/backend/access/brin/brin_pageops.c
@@ -310,7 +310,7 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
 
 		if (extended)
 		{
-			RecordPageWithFreeSpace(idxrel, newblk, freespace);
+			RecordPageWithFreeSpace(idxrel, newblk, freespace, InvalidBlockNumber);
 			FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
 		}
 
@@ -461,7 +461,7 @@ brin_doinsert(Relation idxrel, BlockNumber pagesPerRange,
 
 	if (extended)
 	{
-		RecordPageWithFreeSpace(idxrel, blk, freespace);
+		RecordPageWithFreeSpace(idxrel, blk, freespace, InvalidBlockNumber);
 		FreeSpaceMapVacuumRange(idxrel, blk, blk + 1);
 	}
 
@@ -654,7 +654,7 @@ brin_page_cleanup(Relation idxrel, Buffer buf)
 
 	/* Measure free space and record it */
 	RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buf),
-							br_page_get_freespace(page));
+							br_page_get_freespace(page), InvalidBlockNumber);
 }
 
 /*
@@ -895,7 +895,7 @@ brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer)
 	 * pages whose FSM records were forgotten in a crash.
 	 */
 	RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buffer),
-							br_page_get_freespace(page));
+							br_page_get_freespace(page), InvalidBlockNumber);
 }
 
 
diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c
index dd38c45845..47ca168465 100644
--- a/src/backend/access/heap/hio.c
+++ b/src/backend/access/heap/hio.c
@@ -246,7 +246,7 @@ RelationAddExtraBlocks(Relation relation, BulkInsertState bistate)
 		 * chance of making this page visible to other concurrently inserting
 		 * backends, and we want that to happen without delay.
 		 */
-		RecordPageWithFreeSpace(relation, blockNum, freespace);
+		RecordPageWithFreeSpace(relation, blockNum, freespace, InvalidBlockNumber);
 	}
 	while (--extraBlocks > 0);
 
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index 8996d366e9..9f4d8adcdb 100644
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -154,7 +154,7 @@ static BufferAccessStrategy vac_strategy;
 static void lazy_scan_heap(Relation onerel, int options,
 			   LVRelStats *vacrelstats, Relation *Irel, int nindexes,
 			   bool aggressive);
-static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);
+static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats, BlockNumber nblocks);
 static bool lazy_check_needs_freeze(Buffer buf, bool *hastup);
 static void lazy_vacuum_index(Relation indrel,
 				  IndexBulkDeleteResult **stats,
@@ -759,7 +759,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
 			pgstat_progress_update_multi_param(2, hvp_index, hvp_val);
 
 			/* Remove tuples from heap */
-			lazy_vacuum_heap(onerel, vacrelstats);
+			lazy_vacuum_heap(onerel, vacrelstats, nblocks);
 
 			/*
 			 * Forget the now-vacuumed tuples, and press on, but be careful
@@ -897,7 +897,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
 			MarkBufferDirty(buf);
 			UnlockReleaseBuffer(buf);
 
-			RecordPageWithFreeSpace(onerel, blkno, freespace);
+			RecordPageWithFreeSpace(onerel, blkno, freespace, nblocks);
 			continue;
 		}
 
@@ -936,7 +936,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
 			}
 
 			UnlockReleaseBuffer(buf);
-			RecordPageWithFreeSpace(onerel, blkno, freespace);
+			RecordPageWithFreeSpace(onerel, blkno, freespace, nblocks);
 			continue;
 		}
 
@@ -1339,7 +1339,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
 		 * taken if there are no indexes.)
 		 */
 		if (vacrelstats->num_dead_tuples == prev_dead_count)
-			RecordPageWithFreeSpace(onerel, blkno, freespace);
+			RecordPageWithFreeSpace(onerel, blkno, freespace, nblocks);
 	}
 
 	/* report that everything is scanned and vacuumed */
@@ -1401,7 +1401,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
 		/* Remove tuples from heap */
 		pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
 									 PROGRESS_VACUUM_PHASE_VACUUM_HEAP);
-		lazy_vacuum_heap(onerel, vacrelstats);
+		lazy_vacuum_heap(onerel, vacrelstats, nblocks);
 		vacrelstats->num_index_scans++;
 	}
 
@@ -1472,9 +1472,10 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
  * Note: the reason for doing this as a second pass is we cannot remove
  * the tuples until we've removed their index entries, and we want to
  * process index entry removal in batches as large as possible.
+ * Note: nblocks is passed as an optimization for RecordPageWithFreeSpace().
  */
 static void
-lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
+lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats, BlockNumber nblocks)
 {
 	int			tupindex;
 	int			npages;
@@ -1511,7 +1512,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
 		freespace = PageGetHeapFreeSpace(page);
 
 		UnlockReleaseBuffer(buf);
-		RecordPageWithFreeSpace(onerel, tblk, freespace);
+		RecordPageWithFreeSpace(onerel, tblk, freespace, nblocks);
 		npages++;
 	}
 
diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c
index 8414e33aed..7592e17279 100644
--- a/src/backend/storage/freespace/freespace.c
+++ b/src/backend/storage/freespace/freespace.c
@@ -111,7 +111,7 @@ static BlockNumber fsm_search(Relation rel, uint8 min_cat);
 static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr,
 				BlockNumber start, BlockNumber end,
 				bool *eof);
-static bool allow_write_to_fsm(Relation rel, BlockNumber heapblk);
+static bool allow_write_to_fsm(Relation rel, BlockNumber heapblk, BlockNumber cached_nblocks);
 
 
 /******** Public API ********/
@@ -155,7 +155,7 @@ RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage,
 	uint16		slot;
 	int			search_slot;
 
-	if (!allow_write_to_fsm(rel, oldPage))
+	if (!allow_write_to_fsm(rel, oldPage, InvalidBlockNumber))
 		return InvalidBlockNumber;
 
 	/* Get the location of the FSM byte representing the heap block */
@@ -181,13 +181,13 @@ RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage,
  * FreeSpaceMapVacuum call, which updates the upper level pages.
  */
 void
-RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
+RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail, BlockNumber nblocks)
 {
 	int			new_cat = fsm_space_avail_to_cat(spaceAvail);
 	FSMAddress	addr;
 	uint16		slot;
 
-	if (!allow_write_to_fsm(rel, heapBlk))
+	if (!allow_write_to_fsm(rel, heapBlk, nblocks))
 		return;
 
 	/* Get the location of the FSM byte representing the heap block */
@@ -942,9 +942,10 @@ fsm_vacuum_page(Relation rel, FSMAddress addr,
  * and a few kB of space.
  */
 static bool
-allow_write_to_fsm(Relation rel, BlockNumber heapblk)
+allow_write_to_fsm(Relation rel, BlockNumber heapblk, BlockNumber cached_nblocks)
 {
 	BlockNumber		heap_nblocks;
+	bool			skip_get_nblocks = false;
 
 	if (heapblk > HEAP_FSM_EXTENSION_THRESHOLD)
 		return true;
@@ -953,10 +954,30 @@ allow_write_to_fsm(Relation rel, BlockNumber heapblk)
 	if (rel->rd_rel->relkind != RELKIND_RELATION)
 		return true;
 
+	/*
+	 * If the caller knows nblocks, we can avoid a system call later.
+	 * If it doesn't, maybe we have relpages from a previous VACUUM.
+	 * Since the table may have extended since then, we still have to
+	 * count the pages later if we can't return now.
+	 */
+	if (cached_nblocks != InvalidBlockNumber)
+	{
+		if (cached_nblocks > HEAP_FSM_EXTENSION_THRESHOLD)
+			return true;
+		else
+			skip_get_nblocks = true;
+	}
+	else if (rel->rd_rel->relpages != InvalidBlockNumber &&
+			 rel->rd_rel->relpages > HEAP_FSM_EXTENSION_THRESHOLD)
+		return true;
+
 	RelationOpenSmgr(rel);
 	if (smgrexists(rel->rd_smgr, FSM_FORKNUM))
 		return true;
 
+	if (skip_get_nblocks)
+		return false;
+
 	/* last resort */
 	heap_nblocks = RelationGetNumberOfBlocks(rel);
 	if (heap_nblocks > HEAP_FSM_EXTENSION_THRESHOLD)
diff --git a/src/backend/storage/freespace/indexfsm.c b/src/backend/storage/freespace/indexfsm.c
index e21047b96f..d8fd29a7eb 100644
--- a/src/backend/storage/freespace/indexfsm.c
+++ b/src/backend/storage/freespace/indexfsm.c
@@ -51,7 +51,7 @@ GetFreeIndexPage(Relation rel)
 void
 RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
 {
-	RecordPageWithFreeSpace(rel, freeBlock, BLCKSZ - 1);
+	RecordPageWithFreeSpace(rel, freeBlock, BLCKSZ - 1, InvalidBlockNumber);
 }
 
 
@@ -61,7 +61,7 @@ RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
 void
 RecordUsedIndexPage(Relation rel, BlockNumber usedBlock)
 {
-	RecordPageWithFreeSpace(rel, usedBlock, 0);
+	RecordPageWithFreeSpace(rel, usedBlock, 0, InvalidBlockNumber);
 }
 
 /*
diff --git a/src/include/storage/freespace.h b/src/include/storage/freespace.h
index 68e4d27818..e86ad9d77e 100644
--- a/src/include/storage/freespace.h
+++ b/src/include/storage/freespace.h
@@ -30,7 +30,7 @@ extern BlockNumber RecordAndGetPageWithFreeSpace(Relation rel,
 							  Size oldSpaceAvail,
 							  Size spaceNeeded);
 extern void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk,
-						Size spaceAvail);
+						Size spaceAvail, BlockNumber nblocks);
 extern void XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
 							Size spaceAvail);
 
-- 
2.17.1

Reply via email to