commit c07e8f64131cc0cbb933a3881596a74a4782e351
Author: mithun <mithun@localhost.localdomain>
Date:   Sun Jun 4 12:12:57 2017 +0530

    autoprewarm_11.patch

diff --git a/contrib/pg_prewarm/Makefile b/contrib/pg_prewarm/Makefile
index 7ad941e..88580d1 100644
--- a/contrib/pg_prewarm/Makefile
+++ b/contrib/pg_prewarm/Makefile
@@ -1,10 +1,10 @@
 # contrib/pg_prewarm/Makefile
 
 MODULE_big = pg_prewarm
-OBJS = pg_prewarm.o $(WIN32RES)
+OBJS = pg_prewarm.o autoprewarm.o $(WIN32RES)
 
 EXTENSION = pg_prewarm
-DATA = pg_prewarm--1.1.sql pg_prewarm--1.0--1.1.sql
+DATA = pg_prewarm--1.1--1.2.sql pg_prewarm--1.1.sql pg_prewarm--1.0--1.1.sql
 PGFILEDESC = "pg_prewarm - preload relation data into system buffer cache"
 
 ifdef USE_PGXS
diff --git a/contrib/pg_prewarm/autoprewarm.c b/contrib/pg_prewarm/autoprewarm.c
new file mode 100644
index 0000000..51acc09
--- /dev/null
+++ b/contrib/pg_prewarm/autoprewarm.c
@@ -0,0 +1,1055 @@
+/*-------------------------------------------------------------------------
+ *
+ * autoprewarm.c
+ *		Automatically prewarm the shared buffer pool when server restarts.
+ *
+ * DESCRIPTION
+ *
+ *		It is a bgworker which automatically records information about blocks
+ *		which were present in buffer pool before server shutdown and then
+ *		prewarm the buffer pool upon server restart with those blocks.
+ *
+ *		How does it work? When the shared library "pg_prewarm" is preloaded, a
+ *		bgworker "autoprewarm" is launched immediately after the server has
+ *		reached consistent state. The bgworker will start loading blocks
+ *		recorded in the format BlockInfoRecord
+ *		database,tablespace,filenode,forknum,blocknum in
+ *		$PGDATA/AUTOPREWARM_FILE, until there is no free buffer left in the
+ *		buffer pool. This way we do not replace any new blocks which were
+ *		loaded either by the recovery process or the querying clients.
+ *
+ *		Once the "autoprewarm" bgworker has completed its prewarm task, it will
+ *		start a new task to periodically dump the BlockInfoRecords related to
+ *		blocks which are currently in shared buffer pool. Upon next server
+ *		restart, the bgworker will prewarm the buffer pool by loading those
+ *		blocks. The GUC pg_prewarm.dump_interval will control the dumping
+ *		activity of the bgworker.
+ *
+ *	Copyright (c) 2016-2017, PostgreSQL Global Development Group
+ *
+ *	IDENTIFICATION
+ *		contrib/pg_prewarm/autoprewarm.c
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+#include <unistd.h>
+
+/* These are always necessary for a bgworker. */
+#include "miscadmin.h"
+#include "postmaster/bgworker.h"
+#include "storage/ipc.h"
+#include "storage/latch.h"
+#include "storage/lwlock.h"
+#include "storage/proc.h"
+#include "storage/shmem.h"
+
+/* These are necessary for prewarm utilities. */
+#include "access/heapam.h"
+#include "access/xact.h"
+#include "catalog/pg_class.h"
+#include "catalog/pg_type.h"
+#include "pgstat.h"
+#include "storage/buf_internals.h"
+#include "storage/dsm.h"
+#include "storage/smgr.h"
+#include "utils/acl.h"
+#include "utils/guc.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/relfilenodemap.h"
+#include "utils/resowner.h"
+
+PG_FUNCTION_INFO_V1(launch_autoprewarm_dump);
+PG_FUNCTION_INFO_V1(autoprewarm_dump_now);
+
+#define AT_PWARM_OFF -1
+#define AT_PWARM_DUMP_AT_SHUTDOWN_ONLY 0
+#define AT_PWARM_DEFAULT_DUMP_INTERVAL 300
+
+#define AUTOPREWARM_FILE "autoprewarm.blocks"
+
+/* Primary functions */
+void		_PG_init(void);
+void		autoprewarm_main(Datum main_arg);
+static void dump_block_info_periodically(void);
+static pid_t autoprewarm_dump_launcher(void);
+static void setup_autoprewarm(BackgroundWorker *autoprewarm,
+				  const char *worker_name,
+				  const char *worker_function,
+				  Datum main_arg, int restart_time,
+				  int extra_flags);
+void		load_one_database(Datum main_arg);
+
+/*
+ * Signal Handlers.
+ */
+
+static void apw_sigterm_handler(SIGNAL_ARGS);
+static void apw_sighup_handler(SIGNAL_ARGS);
+static void apw_sigusr1_handler(SIGNAL_ARGS);
+
+/* flags set by signal handlers */
+static volatile sig_atomic_t got_sigterm = false;
+static volatile sig_atomic_t got_sighup = false;
+
+/*
+ *	Signal handler for SIGTERM
+ *	Set a flag to let the main loop to terminate, and set our latch to wake it
+ *	up.
+ */
+static void
+apw_sigterm_handler(SIGNAL_ARGS)
+{
+	int			save_errno = errno;
+
+	got_sigterm = true;
+
+	if (MyProc)
+		SetLatch(&MyProc->procLatch);
+
+	errno = save_errno;
+}
+
+/*
+ *	Signal handler for SIGHUP
+ *	Set a flag to tell the process to reread the config file, and set our
+ *	latch to wake it up.
+ */
+static void
+apw_sighup_handler(SIGNAL_ARGS)
+{
+	int			save_errno = errno;
+
+	got_sighup = true;
+
+	if (MyProc)
+		SetLatch(&MyProc->procLatch);
+
+	errno = save_errno;
+}
+
+/*
+ *	Signal handler for SIGUSR1.
+ *	The prewarm per-database workers will notify with SIGUSR1 on their
+ *	startup/shutdown.
+ */
+static void
+apw_sigusr1_handler(SIGNAL_ARGS)
+{
+	int			save_errno = errno;
+
+	if (MyProc)
+		SetLatch(&MyProc->procLatch);
+
+	errno = save_errno;
+}
+
+/* ============================================================================
+ * ==============	types and variables used by autoprewarm   =============
+ * ============================================================================
+ */
+
+/*
+ * Metadata of each persistent block which is dumped and used to load.
+ */
+typedef struct BlockInfoRecord
+{
+	Oid			database;
+	Oid			tablespace;
+	Oid			filenode;
+	ForkNumber	forknum;
+	BlockNumber blocknum;
+} BlockInfoRecord;
+
+/*
+ * Tasks performed by autoprewarm workers.
+ */
+typedef enum
+{
+	TASK_PREWARM_BUFFERPOOL,	/* prewarm the buffer pool. */
+	TASK_DUMP_BUFFERPOOL_INFO	/* dump the buffer pool block info. */
+} AutoPrewarmTask;
+
+/*
+ * Shared state information about the running autoprewarm bgworker.
+ */
+typedef struct AutoPrewarmSharedState
+{
+	LWLock		lock;			/* mutual exclusion */
+	pid_t		bgworker_pid;	/* for main bgworker */
+	pid_t		pid_using_dumpfile;		/* for autoprewarm or block dump */
+	bool		skip_prewarm_on_restart;		/* if set true, prewarm task
+												 * will not be done */
+
+	/* following items are for communication with per-database worker */
+	dsm_handle	block_info_handle;
+	Oid			database;
+	int			prewarm_start_idx;
+	int			prewarm_stop_idx;
+} AutoPrewarmSharedState;
+
+static AutoPrewarmSharedState *state = NULL;
+
+/* GUC variable which control the dump activity of autoprewarm. */
+static int	dump_interval = 0;
+
+/*
+ * GUC variable which say whether autoprewarm worker has to be started when
+ * preloaded.
+ */
+static bool autoprewarm = true;
+
+/* compare member elements to check if they are not equal. */
+#define cmp_member_elem(fld)	\
+do { \
+	if (a->fld < b->fld)		\
+		return -1;				\
+	else if (a->fld > b->fld)	\
+		return 1;				\
+} while(0);
+
+/*
+ * blockinfo_cmp
+ *		Compare function used for qsort().
+ */
+static int
+blockinfo_cmp(const void *p, const void *q)
+{
+	BlockInfoRecord *a = (BlockInfoRecord *) p;
+	BlockInfoRecord *b = (BlockInfoRecord *) q;
+
+	cmp_member_elem(database);
+	cmp_member_elem(tablespace);
+	cmp_member_elem(filenode);
+	cmp_member_elem(forknum);
+	cmp_member_elem(blocknum);
+	return 0;
+}
+
+/* ============================================================================
+ * =====================	prewarm part of autoprewarm =======================
+ * ============================================================================
+ */
+
+/*
+ * reset_shm_state
+ *		on_shm_exit reset the prewarm state
+ */
+
+static void
+reset_shm_state(int code, Datum arg)
+{
+	if (state->pid_using_dumpfile == MyProcPid)
+		state->pid_using_dumpfile = InvalidPid;
+	if (state->bgworker_pid == MyProcPid)
+		state->bgworker_pid = InvalidPid;
+}
+
+/*
+ * init_autoprewarm_state
+ *		Allocate and initialize autoprewarm related shared memory
+ */
+static void
+init_autoprewarm_state(void)
+{
+	bool		found = false;
+
+	LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
+	state = ShmemInitStruct("autoprewarm",
+							sizeof(AutoPrewarmSharedState),
+							&found);
+	if (!found)
+	{
+		/* First time through ... */
+		LWLockInitialize(&state->lock, LWLockNewTrancheId());
+		state->bgworker_pid = InvalidPid;
+		state->pid_using_dumpfile = InvalidPid;
+		state->skip_prewarm_on_restart = false;
+	}
+
+	LWLockRelease(AddinShmemInitLock);
+}
+
+/*
+ * load_one_database
+ *		Load block infos of one database by connecting to them.
+ *
+ * Start of prewarm per-database worker. This will try to load blocks of one
+ * database starting from block info position state->prewarm_start_idx to
+ * state->prewarm_stop_idx.
+ */
+void
+load_one_database(Datum main_arg)
+{
+	uint32		pos;
+	BlockInfoRecord *block_info;
+	Relation	rel = NULL;
+	BlockNumber nblocks = 0;
+	BlockInfoRecord *old_blk;
+	dsm_segment *seg;
+
+	/* Establish signal handlers before unblocking signals. */
+	pqsignal(SIGTERM, apw_sigterm_handler);
+	pqsignal(SIGHUP, apw_sighup_handler);
+
+	/*
+	 * We're now ready to receive signals
+	 */
+	BackgroundWorkerUnblockSignals();
+
+	init_autoprewarm_state();
+	seg = dsm_attach(state->block_info_handle);
+	if (seg == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("could not map dynamic shared memory segment")));
+
+	block_info = (BlockInfoRecord *) dsm_segment_address(seg);
+
+	BackgroundWorkerInitializeConnectionByOid(state->database, InvalidOid);
+	old_blk = NULL;
+	pos = state->prewarm_start_idx;
+
+	while (!got_sigterm && pos < state->prewarm_stop_idx && have_free_buffer())
+	{
+		BlockInfoRecord *blk = &block_info[pos];
+		Buffer		buf;
+
+		/*
+		 * Quit if we've reached records for another database. Unless the
+		 * previous blocks were of global objects which were combined with
+		 * next database's block infos.
+		 */
+		if (old_blk != NULL && old_blk->database != blk->database &&
+			old_blk->database != 0)
+			break;
+
+		/*
+		 * When we reach a new relation, close the old one.  Note, however,
+		 * that the previous try_relation_open may have failed, in which case
+		 * rel will be NULL.
+		 */
+		if (old_blk != NULL && old_blk->filenode != blk->filenode &&
+			rel != NULL)
+		{
+			relation_close(rel, AccessShareLock);
+			rel = NULL;
+			CommitTransactionCommand();
+		}
+
+		/*
+		 * Try to open each new relation, but only once, when we first
+		 * encounter it.  If it's been dropped, skip the associated blocks.
+		 */
+		if (old_blk == NULL || old_blk->filenode != blk->filenode)
+		{
+			Oid			reloid;
+
+			Assert(rel == NULL);
+			StartTransactionCommand();
+			reloid = RelidByRelfilenode(blk->tablespace, blk->filenode);
+			if (OidIsValid(reloid))
+				rel = try_relation_open(reloid, AccessShareLock);
+
+			if (!rel)
+				CommitTransactionCommand();
+		}
+		if (!rel)
+		{
+			++pos;
+			old_blk = blk;
+			continue;
+		}
+
+		/* Once per fork, check for fork existence and size. */
+		if (old_blk == NULL ||
+			old_blk->filenode != blk->filenode ||
+			old_blk->forknum != blk->forknum)
+		{
+			RelationOpenSmgr(rel);
+
+			/*
+			 * smgrexists is not safe for illegal forknum, so test before
+			 * calling same.
+			 */
+			if (blk->forknum > InvalidForkNumber &&
+				blk->forknum <= MAX_FORKNUM &&
+				smgrexists(rel->rd_smgr, blk->forknum))
+				nblocks = RelationGetNumberOfBlocksInFork(rel, blk->forknum);
+			else
+				nblocks = 0;
+		}
+
+		/* check if blocknum is valid and with in fork file size. */
+		if (blk->blocknum >= nblocks)
+		{
+			/* move to next forknum. */
+			++pos;
+			old_blk = blk;
+			continue;
+		}
+
+		/* Prewarm buffer. */
+		buf = ReadBufferExtended(rel, blk->forknum, blk->blocknum, RBM_NORMAL,
+								 NULL);
+		if (BufferIsValid(buf))
+			ReleaseBuffer(buf);
+
+		old_blk = blk;
+		++pos;
+	}
+
+	dsm_detach(seg);
+
+	/* release lock on previous relation. */
+	if (rel)
+	{
+		relation_close(rel, AccessShareLock);
+		CommitTransactionCommand();
+	}
+
+	return;
+}
+
+/*
+ * launch_and_wait_for_per_database_worker
+ *		Register a per-database dynamic worker to load.
+ */
+static void
+launch_and_wait_for_per_database_worker(void)
+{
+	BackgroundWorker worker;
+	BackgroundWorkerHandle *handle = NULL;
+	BgwHandleStatus status PG_USED_FOR_ASSERTS_ONLY;
+
+	setup_autoprewarm(&worker, "autoprewarm", "load_one_database",
+					  (Datum) NULL, BGW_NEVER_RESTART,
+					  BGWORKER_BACKEND_DATABASE_CONNECTION);
+
+	/* set bgw_notify_pid so that we can use WaitForBackgroundWorkerShutdown */
+	worker.bgw_notify_pid = MyProcPid;
+
+	if (!RegisterDynamicBackgroundWorker(&worker, &handle))
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
+				 errmsg("registering dynamic bgworker autoprewarm failed"),
+				 errhint("Consider increasing configuration parameter \"max_worker_processes\".")));
+	}
+
+	status = WaitForBackgroundWorkerShutdown(handle);
+	Assert(status == BGWH_STOPPED);
+}
+
+/*
+ * prewarm_buffer_pool
+ *		The main routine which prewarm the buffer pool
+ *
+ * The prewarm bgworker will first load all of the BlockInfoRecord's in
+ * $PGDATA/AUTOPREWARM_FILE to a dsm. And those BlockInfoRecords are further
+ * separated based on their database. And for each group of BlockInfoRecords a
+ * per-database worker will be launched to load corresponding blocks. Each of
+ * those workers will be launched in sequential order only after the previous
+ * one has finished its job.
+ */
+static void
+prewarm_buffer_pool(void)
+{
+	FILE	   *file = NULL;
+	uint32		num_elements,
+				i;
+	BlockInfoRecord *blkinfo;
+	dsm_segment *seg;
+
+	/*
+	 * since there could be at max one worker who could do a prewarm no need
+	 * to take lock before setting skip_prewarm_on_restart.
+	 */
+	state->skip_prewarm_on_restart = true;
+
+	LWLockAcquire(&state->lock, LW_EXCLUSIVE);
+	if (state->pid_using_dumpfile == InvalidPid)
+		state->pid_using_dumpfile = MyProcPid;
+	else
+	{
+		LWLockRelease(&state->lock);
+		ereport(LOG,
+				(errmsg("skipping prewarm because block dump file is being written by PID %d",
+						state->pid_using_dumpfile)));
+		return;
+	}
+
+	LWLockRelease(&state->lock);
+
+	file = AllocateFile(AUTOPREWARM_FILE, PG_BINARY_R);
+	if (!file)
+	{
+		if (errno != ENOENT)
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m",
+							AUTOPREWARM_FILE)));
+
+		state->pid_using_dumpfile = InvalidPid;
+		return;					/* No file to load. */
+	}
+
+	if (fscanf(file, "<<%u>>i\n", &num_elements) != 1)
+	{
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not read from file \"%s\": %m",
+						AUTOPREWARM_FILE)));
+	}
+
+	seg = dsm_create(sizeof(BlockInfoRecord) * num_elements, 0);
+
+	blkinfo = (BlockInfoRecord *) dsm_segment_address(seg);
+
+	for (i = 0; i < num_elements; i++)
+	{
+		/* get next block. */
+		if (5 != fscanf(file, "%u,%u,%u,%u,%u\n", &blkinfo[i].database,
+						&blkinfo[i].tablespace, &blkinfo[i].filenode,
+						(uint32 *) &blkinfo[i].forknum, &blkinfo[i].blocknum))
+			break;
+	}
+
+	FreeFile(file);
+
+	if (num_elements != i)
+		elog(ERROR, "autoprewarm block dump has %u entries but expected %u",
+			 i, num_elements);
+
+	/*
+	 * sort the block number to increase the chance of sequential reads during
+	 * load.
+	 */
+	pg_qsort(blkinfo, num_elements, sizeof(BlockInfoRecord), blockinfo_cmp);
+
+	state->block_info_handle = dsm_segment_handle(seg);
+	state->prewarm_start_idx = state->prewarm_stop_idx = 0;
+
+	/* get next database's first block info's position. */
+	while (state->prewarm_start_idx < num_elements)
+	{
+		uint32		i = state->prewarm_start_idx;
+		Oid			current_db = blkinfo[i].database;
+
+		/*
+		 * advance the prewarm_stop_idx to end of block infos of current
+		 * database.
+		 */
+		do
+		{
+			i++;
+			if (current_db != blkinfo[i].database)
+			{
+				/*
+				 * For block info of a global object whose database will be 0
+				 * try to combine them with next non-zero database's block
+				 * infos to load.
+				 */
+				if (current_db != InvalidOid)
+					break;
+				current_db = blkinfo[i].database;
+			}
+		} while (i < num_elements);
+
+		/*
+		 * If we are here with database as InvalidOid it means we only have
+		 * block_infos belonging to global objects. As we do not have a valid
+		 * database to connect we shall simply ignore them.
+		 */
+		if (current_db == 0)
+			break;
+
+		state->prewarm_stop_idx = i;
+		state->database = current_db;
+
+		Assert(state->prewarm_start_idx < state->prewarm_stop_idx);
+
+		/*
+		 * Register a per-database worker to load new database's block. And
+		 * wait until they finish their job to launch next one.
+		 */
+		launch_and_wait_for_per_database_worker();
+		state->prewarm_start_idx = state->prewarm_stop_idx;
+	}
+
+	dsm_detach(seg);
+	state->block_info_handle = DSM_HANDLE_INVALID;
+
+	state->pid_using_dumpfile = InvalidPid;
+	ereport(LOG,
+			(errmsg("autoprewarm load task ended")));
+	return;
+}
+
+/* ============================================================================
+ * =============	buffer pool info dump part of autoprewarm	===============
+ * ============================================================================
+ */
+
+/* This sub-module is for periodically dumping buffer pool's block info into
+ * a dump file AUTOPREWARM_FILE.
+ * Each entry of block info looks like this:
+ * database,tablespace,filenode,forknum,blocknum and we shall call it as
+ * BlockInfoRecord. Note we write in the text form so that the dump information
+ * is readable and if necessary can be carefully edited.
+ */
+
+/*
+ * dump_now
+ *		Dumps block infos in buffer pool
+ */
+static uint32
+dump_now(bool is_bgworker)
+{
+	static char transient_dump_file_path[MAXPGPATH];
+	uint32		i;
+	int			ret,
+				buflen;
+	uint32		num_blocks;
+	BlockInfoRecord *block_info_array;
+	BufferDesc *bufHdr;
+	int			fd;
+	char		buf[1024];
+
+	LWLockAcquire(&state->lock, LW_EXCLUSIVE);
+	if (state->pid_using_dumpfile == InvalidPid)
+		state->pid_using_dumpfile = MyProcPid;
+	else
+	{
+		LWLockRelease(&state->lock);
+
+		if (!is_bgworker)
+			ereport(ERROR,
+					(errmsg("could not perform block dump because dump file is being used by PID %d",
+							state->pid_using_dumpfile)));
+		ereport(LOG,
+				(errmsg("skipping block dump because it is already being performed by PID %d",
+						state->pid_using_dumpfile)));
+		return 0;
+	}
+
+	LWLockRelease(&state->lock);
+
+	block_info_array =
+		(BlockInfoRecord *) palloc(sizeof(BlockInfoRecord) * NBuffers);
+
+	for (num_blocks = 0, i = 0; i < NBuffers; i++)
+	{
+		uint32		buf_state;
+
+		/*
+		 * In case of a SIGHUP, just reload the configuration.
+		 */
+		if (got_sighup)
+		{
+			got_sighup = false;
+			ProcessConfigFile(PGC_SIGHUP);
+		}
+
+		/* Have we been asked to stop dump? */
+		if (dump_interval == AT_PWARM_OFF)
+		{
+			pfree(block_info_array);
+			return 0;
+		}
+
+		bufHdr = GetBufferDescriptor(i);
+
+		/* lock each buffer header before inspecting. */
+		buf_state = LockBufHdr(bufHdr);
+
+		if (buf_state & BM_TAG_VALID)
+		{
+			block_info_array[num_blocks].database = bufHdr->tag.rnode.dbNode;
+			block_info_array[num_blocks].tablespace = bufHdr->tag.rnode.spcNode;
+			block_info_array[num_blocks].filenode = bufHdr->tag.rnode.relNode;
+			block_info_array[num_blocks].forknum = bufHdr->tag.forkNum;
+			block_info_array[num_blocks].blocknum = bufHdr->tag.blockNum;
+			++num_blocks;
+		}
+
+		UnlockBufHdr(bufHdr, buf_state);
+	}
+
+	snprintf(transient_dump_file_path, MAXPGPATH, "%s.tmp", AUTOPREWARM_FILE);
+
+	fd = OpenTransientFile(transient_dump_file_path,
+						   O_CREAT | O_WRONLY | O_TRUNC, 0666);
+	if (fd < 0)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not open \"%s\": %m",
+						transient_dump_file_path)));
+
+	buflen = sprintf(buf, "<<%u>>\n", num_blocks);
+	if (write(fd, buf, buflen) < buflen)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not write to file \"%s\" : %m",
+						transient_dump_file_path)));
+
+	for (i = 0; i < num_blocks; i++)
+	{
+		/*
+		 * In case of a SIGHUP, just reload the configuration.
+		 */
+		if (got_sighup)
+		{
+			got_sighup = false;
+			ProcessConfigFile(PGC_SIGHUP);
+		}
+
+		/* Have we been asked to stop dump? */
+		if (dump_interval == AT_PWARM_OFF)
+		{
+			pfree(block_info_array);
+			CloseTransientFile(fd);
+			unlink(transient_dump_file_path);
+			return 0;
+		}
+
+		buflen = sprintf(buf, "%u,%u,%u,%u,%u\n",
+						 block_info_array[i].database,
+						 block_info_array[i].tablespace,
+						 block_info_array[i].filenode,
+						 (uint32) block_info_array[i].forknum,
+						 block_info_array[i].blocknum);
+
+		if (write(fd, buf, buflen) < buflen)
+		{
+			int			save_errno = errno;
+
+			CloseTransientFile(fd);
+			unlink(transient_dump_file_path);
+			errno = save_errno;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not write to file \"%s\": %m",
+							transient_dump_file_path)));
+		}
+	}
+
+	pfree(block_info_array);
+
+	/*
+	 * rename transient_dump_file_path to AUTOPREWARM_FILE to make things
+	 * permanent.
+	 */
+	ret = CloseTransientFile(fd);
+	if (ret != 0)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not close file \"%s\" : %m",
+						transient_dump_file_path)));
+	(void) durable_rename(transient_dump_file_path, AUTOPREWARM_FILE, ERROR);
+
+	state->pid_using_dumpfile = InvalidPid;
+
+	ereport(LOG,
+			(errmsg("saved metadata info of %d blocks", num_blocks)));
+	return num_blocks;
+}
+
+/*
+ * dump_block_info_periodically
+ *		Loop which periodically calls dump_now()
+ *
+ * At regular intervals, which is defined by GUC dump_interval, dump_now() will
+ * be called.
+ */
+void
+dump_block_info_periodically(void)
+{
+	TimestampTz last_dump_time = GetCurrentTimestamp();
+
+	while (!got_sigterm)
+	{
+		int			rc;
+		struct timeval nap;
+
+		nap.tv_sec = AT_PWARM_DEFAULT_DUMP_INTERVAL;
+		nap.tv_usec = 0;
+
+		/* Has been set not to dump. Nothing more to do. */
+		if (dump_interval == AT_PWARM_OFF)
+			return;
+
+		if (dump_interval > AT_PWARM_DUMP_AT_SHUTDOWN_ONLY)
+		{
+			TimestampTz current_time = GetCurrentTimestamp();
+
+			if (TimestampDifferenceExceeds(last_dump_time,
+										   current_time,
+										   (dump_interval * 1000)))
+			{
+				dump_now(true);
+				if (got_sigterm)
+					return;		/* got shutdown signal during or right after a
+								 * dump. And, I think better to return now. */
+				last_dump_time = GetCurrentTimestamp();
+				nap.tv_sec = dump_interval;
+				nap.tv_usec = 0;
+			}
+			else
+			{
+				long		secs;
+				int			usecs;
+
+				TimestampDifference(last_dump_time, current_time,
+									&secs, &usecs);
+				nap.tv_sec = dump_interval - secs;
+				nap.tv_usec = 0;
+			}
+		}
+
+		ResetLatch(&MyProc->procLatch);
+		rc = WaitLatch(&MyProc->procLatch,
+					   WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+					   (nap.tv_sec * 1000L) + (nap.tv_usec / 1000L),
+					   PG_WAIT_EXTENSION);
+
+		if (rc & WL_POSTMASTER_DEATH)
+			proc_exit(1);
+
+		/*
+		 * In case of a SIGHUP, just reload the configuration.
+		 */
+		if (got_sighup)
+		{
+			got_sighup = false;
+			ProcessConfigFile(PGC_SIGHUP);
+		}
+	}
+
+	/* One last block info dump while postmaster shutdown. */
+	if (dump_interval != AT_PWARM_OFF)
+		dump_now(true);
+}
+
+/*
+ * autoprewarm_main
+ *		The main entry point of autoprewarm bgworker process
+ */
+void
+autoprewarm_main(Datum main_arg)
+{
+	AutoPrewarmTask todo_task;
+
+	/* Establish signal handlers before unblocking signals. */
+	pqsignal(SIGTERM, apw_sigterm_handler);
+	pqsignal(SIGHUP, apw_sighup_handler);
+	pqsignal(SIGUSR1, apw_sigusr1_handler);
+
+	/* We're now ready to receive signals */
+	BackgroundWorkerUnblockSignals();
+
+	todo_task = DatumGetInt32(main_arg);
+	Assert(todo_task == TASK_PREWARM_BUFFERPOOL ||
+		   todo_task == TASK_DUMP_BUFFERPOOL_INFO);
+	init_autoprewarm_state();
+
+	LWLockAcquire(&state->lock, LW_EXCLUSIVE);
+	if (state->bgworker_pid != InvalidPid)
+	{
+		LWLockRelease(&state->lock);
+		ereport(LOG,
+				(errmsg("could not continue autoprewarm worker is already running under PID %d",
+						state->bgworker_pid)));
+		return;
+	}
+
+	state->bgworker_pid = MyProcPid;
+	LWLockRelease(&state->lock);
+
+	on_shmem_exit(reset_shm_state, 0);
+
+	ereport(LOG,
+			(errmsg("autoprewarm has started")));
+
+	/*
+	 * **** perform autoprewarm's task	****
+	 */
+	if (todo_task == TASK_PREWARM_BUFFERPOOL &&
+		!state->skip_prewarm_on_restart)
+		prewarm_buffer_pool();
+
+	dump_block_info_periodically();
+
+	ereport(LOG,
+			(errmsg("autoprewarm shutting down")));
+}
+
+/* ============================================================================
+ * =============	extension's entry functions/utilities	===================
+ * ============================================================================
+ */
+
+/*
+ * setup_autoprewarm
+ *		A Common function to initialize BackgroundWorker structure
+ */
+static void
+setup_autoprewarm(BackgroundWorker *autoprewarm, const char *worker_name,
+			   const char *worker_function, Datum main_arg, int restart_time,
+				  int extra_flags)
+{
+	MemSet(autoprewarm, 0, sizeof(BackgroundWorker));
+	autoprewarm->bgw_flags = BGWORKER_SHMEM_ACCESS | extra_flags;
+
+	/* Register the autoprewarm background worker */
+	autoprewarm->bgw_start_time = BgWorkerStart_ConsistentState;
+	autoprewarm->bgw_restart_time = restart_time;
+	strcpy(autoprewarm->bgw_library_name, "pg_prewarm");
+	strcpy(autoprewarm->bgw_function_name, worker_function);
+	strncpy(autoprewarm->bgw_name, worker_name, BGW_MAXLEN);
+	autoprewarm->bgw_main_arg = main_arg;
+}
+
+/*
+ * _PG_init
+ *		Extension's entry point
+ */
+void
+_PG_init(void)
+{
+	BackgroundWorker prewarm_worker;
+
+	/* Define custom GUC variables. */
+	if (process_shared_preload_libraries_in_progress)
+		DefineCustomBoolVariable("pg_prewarm.autoprewarm",
+								 "Enable/Disable auto-prewarm feature.",
+								 NULL,
+								 &autoprewarm,
+								 true,
+								 PGC_POSTMASTER,
+								 0,
+								 NULL,
+								 NULL,
+								 NULL);
+
+	DefineCustomIntVariable("pg_prewarm.dump_interval",
+					   "Sets the maximum time between two buffer pool dumps",
+							"If set to Zero, timer based dumping is disabled."
+							" If set to -1, stops the running autoprewarm.",
+							&dump_interval,
+							AT_PWARM_DEFAULT_DUMP_INTERVAL,
+							AT_PWARM_OFF, INT_MAX / 1000,
+							PGC_SIGHUP,
+							GUC_UNIT_S,
+							NULL,
+							NULL,
+							NULL);
+
+	EmitWarningsOnPlaceholders("pg_prewarm");
+
+	/* if not run as a preloaded library, nothing more to do here! */
+	if (!process_shared_preload_libraries_in_progress)
+		return;
+
+	/* Request additional shared resources */
+	RequestAddinShmemSpace(MAXALIGN(sizeof(AutoPrewarmSharedState)));
+
+	/* Has been set not to start autoprewarm bgworker. Nothing more to do. */
+	if (!autoprewarm)
+		return;
+
+	/* Register autoprewarm load. */
+	setup_autoprewarm(&prewarm_worker, "autoprewarm", "autoprewarm_main",
+					  Int32GetDatum(TASK_PREWARM_BUFFERPOOL), 0, 0);
+	RegisterBackgroundWorker(&prewarm_worker);
+}
+
+/*
+ * autoprewarm_dump_launcher
+ *		Dynamically launch an autoprewarm dump worker
+ */
+static pid_t
+autoprewarm_dump_launcher(void)
+{
+	BackgroundWorker worker;
+	BackgroundWorkerHandle *handle;
+	BgwHandleStatus status;
+	pid_t		pid;
+
+	setup_autoprewarm(&worker, "autoprewarm", "autoprewarm_main",
+					  Int32GetDatum(TASK_DUMP_BUFFERPOOL_INFO), 0, 0);
+
+	/* set bgw_notify_pid so that we can use WaitForBackgroundWorkerStartup */
+	worker.bgw_notify_pid = MyProcPid;
+
+	if (!RegisterDynamicBackgroundWorker(&worker, &handle))
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
+			   errmsg("registering dynamic bgworker \"autoprewarm\" failed"),
+				 errhint("Consider increasing configuration parameter \"max_worker_processes\".")));
+	}
+
+	status = WaitForBackgroundWorkerStartup(handle, &pid);
+	if (status == BGWH_STOPPED)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
+				 errmsg("could not start autoprewarm dump bgworker"),
+			   errhint("More details may be available in the server log.")));
+	}
+
+	if (status == BGWH_POSTMASTER_DIED)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
+			  errmsg("cannot start bgworker autoprewarm without postmaster"),
+				 errhint("Kill all remaining database processes and restart the database.")));
+	}
+
+	Assert(status == BGWH_STARTED);
+	return pid;
+}
+
+/*
+ * launch_autoprewarm_dump
+ *		The C-Language entry function to launch autoprewarm dump bgworker
+ */
+Datum
+launch_autoprewarm_dump(PG_FUNCTION_ARGS)
+{
+	pid_t		pid;
+
+	/* Has been set not to dump. Nothing more to do. */
+	if (dump_interval == AT_PWARM_OFF)
+		PG_RETURN_NULL();
+
+	pid = autoprewarm_dump_launcher();
+	PG_RETURN_INT32(pid);
+}
+
+/*
+ * autoprewarm_dump_now
+ *		The C-Language entry function to dump immediately
+ */
+Datum
+autoprewarm_dump_now(PG_FUNCTION_ARGS)
+{
+	uint32		num_blocks = 0;
+
+	init_autoprewarm_state();
+
+	PG_TRY();
+	{
+		num_blocks = dump_now(false);
+	}
+	PG_CATCH();
+	{
+		if (state->pid_using_dumpfile == MyProcPid)
+			state->pid_using_dumpfile = InvalidPid;
+		PG_RE_THROW();
+	}
+	PG_END_TRY();
+	PG_RETURN_INT64(num_blocks);
+}
diff --git a/contrib/pg_prewarm/pg_prewarm--1.1--1.2.sql b/contrib/pg_prewarm/pg_prewarm--1.1--1.2.sql
new file mode 100644
index 0000000..6c35fb7
--- /dev/null
+++ b/contrib/pg_prewarm/pg_prewarm--1.1--1.2.sql
@@ -0,0 +1,14 @@
+/* contrib/pg_prewarm/pg_prewarm--1.0--1.1.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION pg_prewarm UPDATE TO '1.2'" to load this file. \quit
+
+CREATE FUNCTION launch_autoprewarm_dump()
+RETURNS pg_catalog.int4 STRICT
+AS 'MODULE_PATHNAME', 'launch_autoprewarm_dump'
+LANGUAGE C;
+
+CREATE FUNCTION autoprewarm_dump_now()
+RETURNS pg_catalog.int8 STRICT
+AS 'MODULE_PATHNAME', 'autoprewarm_dump_now'
+LANGUAGE C;
diff --git a/contrib/pg_prewarm/pg_prewarm.control b/contrib/pg_prewarm/pg_prewarm.control
index cf2fb92..40e3add 100644
--- a/contrib/pg_prewarm/pg_prewarm.control
+++ b/contrib/pg_prewarm/pg_prewarm.control
@@ -1,5 +1,5 @@
 # pg_prewarm extension
 comment = 'prewarm relation data'
-default_version = '1.1'
+default_version = '1.2'
 module_pathname = '$libdir/pg_prewarm'
 relocatable = true
diff --git a/doc/src/sgml/pgprewarm.sgml b/doc/src/sgml/pgprewarm.sgml
index c090401..e8d0c2e 100644
--- a/doc/src/sgml/pgprewarm.sgml
+++ b/doc/src/sgml/pgprewarm.sgml
@@ -10,7 +10,9 @@
  <para>
   The <filename>pg_prewarm</filename> module provides a convenient way
   to load relation data into either the operating system buffer cache
-  or the <productname>PostgreSQL</productname> buffer cache.
+  or the <productname>PostgreSQL</productname> buffer cache. Additionally, an
+  automatic prewarming of the server buffers is supported whenever the server
+  restarts.
  </para>
 
  <sect2>
@@ -55,6 +57,100 @@ pg_prewarm(regclass, mode text default 'buffer', fork text default 'main',
    cache. For these reasons, prewarming is typically most useful at startup,
    when caches are largely empty.
   </para>
+
+<synopsis>
+launch_autoprewarm_dump() RETURNS int4
+</synopsis>
+
+  <para>
+   This is a SQL callable function to launch the <literal>autoprewarm</literal>
+   worker to dump the buffer pool information at regular interval. In a server,
+   we can only run one <literal>autoprewarm</literal> worker so if worker sees
+   another existing worker it will exit immediately. The return value is pid of
+   the worker which has been launched.
+  </para>
+
+<synopsis>
+autoprewarm_dump_now() RETURNS int8
+</synopsis>
+
+  <para>
+   This is a SQL callable function to dump buffer pool information immediately
+   once by a backend. The return value is the number of block infos dumped.
+  </para>
+ </sect2>
+
+ <sect2>
+  <title>autoprewarm</title>
+
+  <para>
+  A bgworker which automatically records information about blocks which were
+  present in buffer pool before server shutdown and then prewarm the buffer
+  pool upon server restart with those blocks.
+  </para>
+
+  <para>
+  When the shared library <literal>pg_prewarm</literal> is preloaded via
+  <xref linkend="guc-shared-preload-libraries"> in <filename>postgresql.conf</>,
+  a bgworker <literal>autoprewarm</literal> is launched immediately after the
+  server has reached a consistent state. The bgworker will start loading blocks
+  recorded in <literal>$PGDATA/autoprewarm.blocks</literal> until there is a
+  free buffer left in the buffer pool. This way we do not replace any new
+  blocks which were loaded either by the recovery process or the querying
+  clients.
+  </para>
+
+  <para>
+  Once the <literal>autoprewarm</literal> bgworker has completed its prewarm
+  task, it will start a new task to periodically dump the information about
+  blocks which are currently in shared buffer pool. Upon next server restart,
+  the bgworker will prewarm the buffer pool by loading those blocks. The GUC
+  <literal>pg_prewarm.dump_interval</literal> will control the dumping activity
+  of the bgworker.
+  </para>
+ </sect2>
+
+ <sect2>
+  <title>Configuration Parameters</title>
+
+ <variablelist>
+   <varlistentry>
+    <term>
+     <varname>pg_prewarm.autoprewarm</varname> (<type>boolean</type>)
+     <indexterm>
+      <primary><varname>pg_prewarm.autoprewarm</> configuration parameter</primary>
+     </indexterm>
+    </term>
+    <listitem>
+     <para>
+      This is valid only for <literal>autoprewarm</literal>. An autoprewarm
+      worker will only be started if this variable is set <literal>on</literal>.
+      The default value is <literal>on</literal>.
+     </para>
+    </listitem>
+   </varlistentry>
+  </variablelist>
+
+  <variablelist>
+   <varlistentry>
+   <term>
+     <varname>pg_prewarm.dump_interval</varname> (<type>int</type>)
+     <indexterm>
+      <primary><varname>pg_prewarm.dump_interval</> configuration parameter</primary>
+     </indexterm>
+    </term>
+    <listitem>
+     <para>
+      This is valid only for <literal>autoprewarm</literal>. The minimum number
+      of seconds between two buffer pool's block information dump. The default
+      is 300 seconds. It also takes special values. If set to 0 then timer
+      based dump is disabled, it dumps only while the server is shutting down.
+      If set to -1, the running <literal>autoprewarm</literal> will be stopped.
+     </para>
+    </listitem>
+   </varlistentry>
+  </variablelist>
+
  </sect2>
 
  <sect2>
diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c
index 5d0a636..06a34a7 100644
--- a/src/backend/storage/buffer/freelist.c
+++ b/src/backend/storage/buffer/freelist.c
@@ -169,6 +169,23 @@ ClockSweepTick(void)
 }
 
 /*
+ * have_free_buffer -- a lockless check to see if there is a free buffer in
+ *					   buffer pool.
+ *
+ * If the result is true that will become stale once free buffers are moved out
+ * by other operations, so the caller who strictly want to use a free buffer
+ * should not call this.
+ */
+bool
+have_free_buffer()
+{
+	if (StrategyControl->firstFreeBuffer >= 0)
+		return true;
+	else
+		return false;
+}
+
+/*
  * StrategyGetBuffer
  *
  *	Called by the bufmgr to get the next candidate buffer to use in
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index ff99f6b..ab04bd9 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -317,6 +317,7 @@ extern void StrategyNotifyBgWriter(int bgwprocno);
 
 extern Size StrategyShmemSize(void);
 extern void StrategyInitialize(bool init);
+extern bool have_free_buffer(void);
 
 /* buf_table.c */
 extern Size BufTableShmemSize(int size);
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index eaa6d32..c6fa86a 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -138,6 +138,8 @@ AttrDefault
 AttrNumber
 AttributeOpts
 AuthRequest
+AutoPrewarmSharedState
+AutoPrewarmTask
 AutoVacOpts
 AutoVacuumShmemStruct
 AutoVacuumWorkItem
@@ -214,10 +216,12 @@ BitmapOr
 BitmapOrPath
 BitmapOrState
 Bitmapset
+BlkType
 BlobInfo
 Block
 BlockId
 BlockIdData
+BlockInfoRecord
 BlockNumber
 BlockSampler
 BlockSamplerData
@@ -2869,6 +2873,7 @@ pos_trgm
 post_parse_analyze_hook_type
 pqbool
 pqsigfunc
+prewarm_elem
 printQueryOpt
 printTableContent
 printTableFooter
