commit a94095576809948e61b1778d4fb99010b28c51bb
Author: mithun <mithun@localhost.localdomain>
Date:   Mon Mar 13 18:32:15 2017 +0530

    Work :: AutoPrewarm
    Author :: Mithun

diff --git a/contrib/pg_prewarm/Makefile b/contrib/pg_prewarm/Makefile
index 7ad941e..88580d1 100644
--- a/contrib/pg_prewarm/Makefile
+++ b/contrib/pg_prewarm/Makefile
@@ -1,10 +1,10 @@
 # contrib/pg_prewarm/Makefile
 
 MODULE_big = pg_prewarm
-OBJS = pg_prewarm.o $(WIN32RES)
+OBJS = pg_prewarm.o autoprewarm.o $(WIN32RES)
 
 EXTENSION = pg_prewarm
-DATA = pg_prewarm--1.1.sql pg_prewarm--1.0--1.1.sql
+DATA = pg_prewarm--1.1--1.2.sql pg_prewarm--1.1.sql pg_prewarm--1.0--1.1.sql
 PGFILEDESC = "pg_prewarm - preload relation data into system buffer cache"
 
 ifdef USE_PGXS
diff --git a/contrib/pg_prewarm/autoprewarm.c b/contrib/pg_prewarm/autoprewarm.c
new file mode 100644
index 0000000..f4b34ca
--- /dev/null
+++ b/contrib/pg_prewarm/autoprewarm.c
@@ -0,0 +1,1137 @@
+/*-------------------------------------------------------------------------
+ *
+ * autoprewarm.c
+ *
+ * -- Automatically prewarm the shared buffer pool when server restarts.
+ *
+ *	Copyright (c) 2013-2017, PostgreSQL Global Development Group
+ *
+ *	IDENTIFICATION
+ *		contrib/pg_prewarm.c/autoprewarm.c
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+#include <unistd.h>
+
+/* These are always necessary for a bgworker. */
+#include "miscadmin.h"
+#include "postmaster/bgworker.h"
+#include "storage/ipc.h"
+#include "storage/latch.h"
+#include "storage/lwlock.h"
+#include "storage/proc.h"
+#include "storage/shmem.h"
+
+/* These are necessary for prewarm utilities. */
+#include "pgstat.h"
+#include "storage/buf_internals.h"
+#include "storage/smgr.h"
+#include "utils/memutils.h"
+#include "utils/resowner.h"
+#include "utils/guc.h"
+#include "catalog/pg_class.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "access/xact.h"
+#include "utils/rel.h"
+#include "port/atomics.h"
+
+/*
+ * autoprewarm :
+ *
+ * What is it?
+ * ===========
+ * A bgworker which automatically records information about blocks which were
+ * present in buffer pool before server shutdown and then prewarm the buffer
+ * pool upon server restart with those blocks.
+ *
+ * How does it work?
+ * =================
+ * When the shared library "pg_prewarm" is preloaded, a
+ * bgworker "autoprewarm" is launched immediately after the server has reached
+ * consistent state. The bgworker will start loading blocks recorded in the
+ * format BlockInfoRecord
+ * <<DatabaseId,TableSpaceId,RelationId,Forknum,BlockNum>> in
+ * $PGDATA/AUTOPREWARM_FILE, until there is a free buffer left in the buffer
+ * pool. This way we do not replace any new blocks which were loaded either by
+ * the recovery process or the querying clients.
+ *
+ * Once the "autoprewarm" bgworker has completed its prewarm task, it will
+ * start a new task to periodically dump the BlockInfoRecords related to blocks
+ * which are currently in shared buffer pool. Upon next server restart, the
+ * bgworker will prewarm the buffer pool by loading those blocks. The GUC
+ * pg_prewarm.dump_interval will control the dumping activity of the bgworker.
+ */
+
+PG_FUNCTION_INFO_V1(launch_autoprewarm_dump);
+PG_FUNCTION_INFO_V1(autoprewarm_dump_now);
+
+#define AT_PWARM_OFF -1
+#define AT_PWARM_DUMP_AT_SHUTDOWN_ONLY 0
+#define AT_PWARM_DEFAULT_DUMP_INTERVAL 300
+
+#define AUTOPREWARM_FILE "autoprewarm.blocks"
+
+/* Primary functions */
+void		_PG_init(void);
+void		autoprewarm_main(Datum main_arg);
+static void dump_block_info_periodically(void);
+static pid_t autoprewarm_dump_launcher(void);
+static void setup_autoprewarm(BackgroundWorker *autoprewarm,
+				  const char *worker_name,
+				  const char *worker_function,
+				  Datum main_arg, int restart_time,
+				  int extra_flags);
+void		load_one_database(Datum main_arg);
+
+/*
+ * ============================================================================
+ * ===========================	 SIGNAL HANDLERS	===========================
+ * ============================================================================
+ */
+
+static void sigtermHandler(SIGNAL_ARGS);
+static void sighupHandler(SIGNAL_ARGS);
+
+/* flags set by signal handlers */
+static volatile sig_atomic_t got_sigterm = false;
+static volatile sig_atomic_t got_sighup = false;
+
+/*
+ *	Signal handler for SIGTERM
+ *	Set a flag to let the main loop to terminate, and set our latch to wake it
+ *	up.
+ */
+static void
+sigtermHandler(SIGNAL_ARGS)
+{
+	int			save_errno = errno;
+
+	got_sigterm = true;
+
+	if (MyProc)
+		SetLatch(&MyProc->procLatch);
+
+	errno = save_errno;
+}
+
+/*
+ *	Signal handler for SIGHUP
+ *	Set a flag to tell the process to reread the config file, and set our
+ *	latch to wake it up.
+ */
+static void
+sighupHandler(SIGNAL_ARGS)
+{
+	int			save_errno = errno;
+
+	got_sighup = true;
+
+	if (MyProc)
+		SetLatch(&MyProc->procLatch);
+
+	errno = save_errno;
+}
+
+/*
+ *	Signal handler for SIGUSR1.
+ */
+static void
+sigusr1Handler(SIGNAL_ARGS)
+{
+	int			save_errno = errno;
+
+	if (MyProc)
+		SetLatch(&MyProc->procLatch);
+
+	errno = save_errno;
+}
+
+/* ============================================================================
+ * ==============	types and variables used by autoprewam	  =============
+ * ============================================================================
+ */
+
+/*
+ * Meta-data of each persistent block which is dumped and used to load.
+ */
+typedef struct BlockInfoRecord
+{
+	Oid			database;		/* database */
+	Oid			spcNode;		/* tablespace */
+	Oid			filenode;		/* relation's filenode. */
+	ForkNumber	forknum;		/* fork number */
+	BlockNumber blocknum;		/* block number */
+} BlockInfoRecord;
+
+/*
+ * Tasks performed by autoprewarm workers.
+ */
+typedef enum
+{
+	TASK_PREWARM_BUFFERPOOL,	/* prewarm the buffer pool. */
+	TASK_DUMP_BUFFERPOOL_INFO,	/* dump the buffer pool block info. */
+	TASK_DUMP_IMMEDIATE_ONCE,	/* dump the buffer pool block info immediately
+								 * once. */
+	TASK_END					/* no more tasks to do. */
+} AutoPrewarmTask;
+
+/*
+ * Shared state information about the running autoprewarm bgworker.
+ */
+typedef struct AutoPrewarmSharedState
+{
+	pg_atomic_uint32 current_task;		/* current tasks performed by
+										 * autoprewarm workers. */
+} AutoPrewarmSharedState;
+
+static AutoPrewarmSharedState *state = NULL;
+
+/*
+ * Kind of BlockInfoRecord in AUTOPREWARM_FILE file.
+ */
+typedef enum
+{
+	BLKTYPE_NEW_DATABASE,		/* first BlockInfoRecord of new database. */
+	BLKTYPE_NEW_RELATION,		/* first BlockInfoRecord of new relation. */
+	BLKTYPE_NEW_FORK,			/* first BlockInfoRecord of new fork file. */
+	BLKTYPE_NEW_BLOCK,			/* any next BlockInfoRecord. */
+	BLKTYPE_END					/* No More BlockInfoRecords available in dump
+								 * file. */
+} BlkType;
+
+/* GUC variable which control the dump activity of autoprewarm. */
+static int	dump_interval = 0;
+
+/*
+ * GUC variable which says to which database we have to connect when
+ * BlockInfoRecord belongs to global objects.
+ */
+static char *default_database;
+
+/* compare member elements to check if they are not equal. */
+#define cmp_member_elem(fld)	\
+do { \
+	if (a->fld < b->fld)		\
+		return -1;				\
+	else if (a->fld > b->fld)	\
+		return 1;				\
+} while(0);
+
+/*
+ * sort_cmp_func - compare function used for qsort().
+ */
+static int
+sort_cmp_func(const void *p, const void *q)
+{
+	BlockInfoRecord *a = (BlockInfoRecord *) p;
+	BlockInfoRecord *b = (BlockInfoRecord *) q;
+
+	cmp_member_elem(database);
+	cmp_member_elem(spcNode);
+	cmp_member_elem(filenode);
+	cmp_member_elem(forknum);
+	cmp_member_elem(blocknum);
+	return 0;
+}
+
+/* ============================================================================
+ * =====================	prewarm part of autoprewarm =======================
+ * ============================================================================
+ */
+
+/*
+ * set_autoprewarm_task - get next task allowed and to be performed by the
+ * autoprewarm worker.
+ *
+ * It works like this if we are the first to allocate shared memory we can do
+ * what ever task we wanted to do. If TASK_PREWARM_BUFFERPOOL is running
+ * nothing else can go parallel. If TASK_DUMP_BUFFERPOOL_INFO is running then
+ * only TASK_DUMP_IMMEDIATE_ONCE can go further ahead.
+ */
+static AutoPrewarmTask
+get_autoprewarm_task(AutoPrewarmTask todo_task)
+{
+	bool		found;
+
+	state = NULL;
+
+	LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
+	state = ShmemInitStruct("autoprewarm",
+							sizeof(AutoPrewarmSharedState),
+							&found);
+	if (!found)
+		pg_atomic_write_u32(&(state->current_task), todo_task);
+
+	LWLockRelease(AddinShmemInitLock);
+
+	/* If found check if we can go ahead. */
+	if (found)
+	{
+		if (pg_atomic_read_u32(&(state->current_task)) ==
+			TASK_PREWARM_BUFFERPOOL)
+		{
+			if (todo_task == TASK_PREWARM_BUFFERPOOL)
+			{
+				/*
+				 * we were prewarming and we are back to do same, time to
+				 * abort prewarming and move to dumping.
+				 */
+				pg_atomic_write_u32(&(state->current_task),
+									TASK_DUMP_BUFFERPOOL_INFO);
+				return TASK_DUMP_BUFFERPOOL_INFO;
+			}
+			else
+				return TASK_END;	/* rest all cannot proceed further. */
+		}
+		else if (pg_atomic_read_u32(&(state->current_task)) ==
+				 TASK_DUMP_BUFFERPOOL_INFO)
+		{
+			/*
+			 * only thing that can be done now is TASK_DUMP_IMMEDIATE_ONCE.
+			 */
+			if (todo_task == TASK_DUMP_IMMEDIATE_ONCE)
+				return TASK_DUMP_IMMEDIATE_ONCE;
+			else
+				return TASK_END;
+		}
+		else if (pg_atomic_read_u32(&(state->current_task)) ==
+				 TASK_DUMP_IMMEDIATE_ONCE)
+		{
+			uint32		current_state = TASK_DUMP_IMMEDIATE_ONCE;
+
+			/* We cannot do a TASK_PREWARM_BUFFERPOOL but rest can go ahead */
+			if (todo_task == TASK_DUMP_IMMEDIATE_ONCE)
+				return TASK_DUMP_IMMEDIATE_ONCE;
+
+			if (todo_task == TASK_PREWARM_BUFFERPOOL)
+				todo_task = TASK_DUMP_BUFFERPOOL_INFO;	/* skip to do dump only */
+
+			/*
+			 * first guy who can atomically set the current_task get the
+			 * opportunity to proceed further
+			 */
+			if (pg_atomic_compare_exchange_u32(&(state->current_task),
+											   &current_state,
+											   TASK_DUMP_BUFFERPOOL_INFO))
+			{
+				/* Wow! We won the race proceed with the task. */
+				return TASK_DUMP_BUFFERPOOL_INFO;
+			}
+			else
+				return TASK_END;
+		}
+
+		return TASK_END;
+	}
+
+	return todo_task;			/* we were first we can do what we wanted. */
+}
+
+/*
+ * getnextblockinfo -- given a BlkType get its next BlockInfoRecord from the
+ *					   dump file.
+ */
+static BlkType
+getnextblockinfo(FILE *file, BlockInfoRecord *currblkinfo, BlkType reqblock,
+				 BlockInfoRecord *newblkinfo)
+{
+	BlkType		nextblk;
+
+	while (true)
+	{
+		/* get next block. */
+		if (5 != fscanf(file, "%u,%u,%u,%u,%u\n", &(newblkinfo->database),
+						&(newblkinfo->spcNode), &(newblkinfo->filenode),
+						(uint32 *) &(newblkinfo->forknum),
+						&(newblkinfo->blocknum)))
+			return BLKTYPE_END; /* No more valid entry hence stop processing. */
+
+		if (!currblkinfo || newblkinfo->database != currblkinfo->database)
+			nextblk = BLKTYPE_NEW_DATABASE;
+		else if (newblkinfo->filenode != currblkinfo->filenode)
+			nextblk = BLKTYPE_NEW_RELATION;
+		else if (newblkinfo->forknum != currblkinfo->forknum)
+			nextblk = BLKTYPE_NEW_FORK;
+		else
+			nextblk = BLKTYPE_NEW_BLOCK;
+
+		if (nextblk <= reqblock)
+			return nextblk;
+	}
+}
+
+/*
+ * GetRelOid -- given a filenode get its relation oid.
+ */
+static Oid
+get_reloid(Oid filenode)
+{
+	int			ret;
+	Oid			relationid;
+	bool		isnull;
+	Datum		value[1] = {ObjectIdGetDatum(filenode)};
+	StringInfoData buf;
+	Oid			ptype[1] = {OIDOID};
+
+	initStringInfo(&buf);
+	appendStringInfo(&buf,
+			"select oid from pg_class where pg_relation_filenode(oid) = $1");
+
+	ret = SPI_execute_with_args(buf.data, 1, (Oid *) &ptype, (Datum *) &value,
+								NULL, true, 1);
+
+	if (ret != SPI_OK_SELECT)
+		ereport(FATAL, (errmsg("SPI_execute failed: error code %d", ret)));
+
+	if (SPI_processed < 1)
+		return InvalidOid;
+
+	relationid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0],
+												SPI_tuptable->tupdesc,
+												1, &isnull));
+	if (isnull)
+		return InvalidOid;
+
+	return relationid;
+}
+
+/*
+ * connect_to_db -- connect to the given dbid.
+ *
+ * For global objects the dbid will be InvalidOid, connect to user given
+ * default_database and try to load those blocks.
+ */
+static void
+connect_to_db(Oid dbid)
+{
+	if (!OidIsValid(dbid))
+		BackgroundWorkerInitializeConnection(default_database, NULL);
+	else
+		BackgroundWorkerInitializeConnectionByOid(dbid, InvalidOid);
+	SetCurrentStatementStartTimestamp();
+	StartTransactionCommand();
+	SPI_connect();
+	PushActiveSnapshot(GetTransactionSnapshot());
+}
+
+/*
+ * load_one_database -- start of prewarm sub-worker, this will try to load
+ * blocks of one database starting from block info position passed by main
+ * prewarm worker.
+ */
+void
+load_one_database(Datum main_arg)
+{
+	off_t		blockinfo_pos;
+	char		dump_file_path[MAXPGPATH];
+	FILE	   *file = NULL;
+	BlockInfoRecord prevblock,
+				toload_block;
+	Relation	rel = NULL;
+	bool		have_dbconnection = false;
+	BlkType		loadblocktype;
+	BlockNumber nblocks = 0;
+
+	/* Establish signal handlers before unblocking signals. */
+	pqsignal(SIGTERM, sigtermHandler);
+	pqsignal(SIGHUP, sighupHandler);
+	pqsignal(SIGUSR1, sigusr1Handler);
+
+	/*
+	 * We're now ready to receive signals
+	 */
+	BackgroundWorkerUnblockSignals();
+
+	blockinfo_pos = DatumGetInt64(main_arg);
+
+	/*
+	 * Seek to the blockinfo_pos and get the database ID to which following
+	 * block info's belong to. Connect to the that database and start loading
+	 * the blocks which follows until we reach end of block infos that belongs
+	 * to connected database.
+	 */
+
+	/* check if file exists and open file in read mode. */
+	snprintf(dump_file_path, sizeof(dump_file_path), "%s", AUTOPREWARM_FILE);
+	file = fopen(dump_file_path, PG_BINARY_R);
+	if (!file)
+		return;					/* No file to load. */
+
+	if (fseeko(file, blockinfo_pos, SEEK_SET))
+	{
+		fclose(file);
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("autoprewarm: error reading from \"%s\" : %m",
+						dump_file_path)));
+	}
+
+	loadblocktype = BLKTYPE_NEW_BLOCK;
+	loadblocktype = getnextblockinfo(file, NULL, loadblocktype, &toload_block);
+
+	/*
+	 * It should be a block info belonging to a new database. Or else dump
+	 * file is corrupted better to end the loading of bocks now.
+	 */
+	if (loadblocktype != BLKTYPE_NEW_DATABASE)
+		goto end_load;			/* should we raise a voice here? */
+
+	while (loadblocktype != BLKTYPE_END)
+	{
+		Buffer		buf;
+		Oid			reloid;
+
+		/*
+		 * Load the block only if there exist a free buffer. We do not want to
+		 * replace a block already in buffer pool.
+		 */
+		if (!have_free_buffer())
+			goto end_load;
+
+		if (got_sigterm)
+			goto end_load;
+
+		switch (loadblocktype)
+		{
+			case BLKTYPE_NEW_DATABASE:
+
+				if (have_dbconnection)
+					goto end_load;		/* blocks belong to a new database,
+										 * lets end the loading process. */
+				loadblocktype = BLKTYPE_NEW_DATABASE;
+
+				/*
+				 * connect to the database.
+				 */
+				connect_to_db(toload_block.database);
+				have_dbconnection = true;
+
+			case BLKTYPE_NEW_RELATION:
+
+				/*
+				 * release lock on previous relation.
+				 */
+				if (rel)
+				{
+					relation_close(rel, AccessShareLock);
+					rel = NULL;
+				}
+
+				loadblocktype = BLKTYPE_NEW_RELATION;
+
+				/*
+				 * lock new relation.
+				 */
+				reloid = get_reloid(toload_block.filenode);
+
+				if (!OidIsValid(reloid))
+					break;
+
+				rel = try_relation_open(reloid, AccessShareLock);
+				if (!rel)
+					break;
+				RelationOpenSmgr(rel);
+
+			case BLKTYPE_NEW_FORK:
+
+				/*
+				 * check if fork exists and if block is within the range
+				 */
+				loadblocktype = BLKTYPE_NEW_FORK;
+				if (			/* toload_block.forknum > InvalidForkNumber &&
+								 * toload_block.forknum <= MAX_FORKNUM && */
+					!smgrexists(rel->rd_smgr, toload_block.forknum))
+					break;
+				nblocks = RelationGetNumberOfBlocksInFork(rel,
+													   toload_block.forknum);
+			case BLKTYPE_NEW_BLOCK:
+
+				/* check if blocknum is valid and with in fork file size. */
+				if (toload_block.blocknum >= nblocks)
+				{
+					/* move to next forknum. */
+					loadblocktype = BLKTYPE_NEW_FORK;
+					break;
+				}
+
+				buf = ReadBufferExtended(rel, toload_block.forknum,
+										 toload_block.blocknum, RBM_NORMAL,
+										 NULL);
+				if (BufferIsValid(buf))
+				{
+					ReleaseBuffer(buf);
+				}
+
+				loadblocktype = BLKTYPE_NEW_BLOCK;
+				break;
+
+			case BLKTYPE_END:
+				Assert(0);		/* Should not be here! */
+		}
+
+		memcpy(&prevblock, &toload_block, sizeof(BlockInfoRecord));
+		memset(&toload_block, 0, sizeof(BlockInfoRecord));
+		loadblocktype = getnextblockinfo(file, &prevblock, loadblocktype,
+										 &toload_block);
+	}
+
+end_load:
+
+	fclose(file);
+	/* release lock on previous relation. */
+	if (rel)
+	{
+		relation_close(rel, AccessShareLock);
+		rel = NULL;
+	}
+
+	if (have_dbconnection)
+	{
+		SPI_finish();
+		PopActiveSnapshot();
+		CommitTransactionCommand();
+	}
+	return;
+}
+
+/*
+ * launch_prewarm_subworker -- register a dynamic worker to load the blocks
+ * starting from next_db_pos. We wait until the worker has stopped.
+ */
+static void
+launch_prewarm_subworker(off_t next_db_pos)
+{
+	BackgroundWorker worker;
+	BackgroundWorkerHandle *handle = NULL;
+	BgwHandleStatus status;
+
+	setup_autoprewarm(&worker, "autoprewarm", "load_one_database",
+					  Int64GetDatum(next_db_pos), BGW_NEVER_RESTART,
+					  BGWORKER_BACKEND_DATABASE_CONNECTION);
+	/* set bgw_notify_pid so that we can use WaitForBackgroundWorkerShutdown */
+	worker.bgw_notify_pid = MyProcPid;
+
+	if (!RegisterDynamicBackgroundWorker(&worker, &handle))
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
+			   errmsg("registering dynamic bgworker autoprewarm failed"),
+				 errhint("Consider increasing configuration parameter "
+						 "\"max_worker_processes\".")));
+	}
+
+	status = WaitForBackgroundWorkerShutdown(handle);
+	if (status == BGWH_STOPPED)
+		return;
+
+	if (status == BGWH_POSTMASTER_DIED)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
+			  errmsg("cannot start bgworker autoprewarm without postmaster"),
+				 errhint("Kill all remaining database processes and restart"
+						 " the database.")));
+	}
+
+	Assert(0);
+}
+
+/*
+ *	prewarm_buffer_pool - the main routine which prewarm the buffer pool.
+ *
+ *	The prewarm bgworker will first seek to database map table in
+ *	$PGDATA/AUTOPREWARM_FILE. For each offset in the map table it launches a
+ *	sub-worker to load the block info from that offset position until the end
+ *	of that database's block info. All sub-workers will be launched in
+ *	sequential order only after the previous sub-worker has finished its job.
+ *	We try to load each blocknum read from $PGDATA/AUTOPREWARM_FILE until we
+ *	have any free buffer left or SIGTERM is received.
+ */
+static void
+prewarm_buffer_pool(void)
+{
+	char		dump_file_path[MAXPGPATH];
+	FILE	   *file = NULL;
+	off_t		database_map_table,
+				next_db_pos;
+
+	snprintf(dump_file_path, sizeof(dump_file_path), "%s",
+			 AUTOPREWARM_FILE);
+
+	file = fopen(dump_file_path, PG_BINARY_R);
+	if (!file)
+		return;					/* No file to load. */
+
+	/* seek to start of database_map_table. */
+	if (1 != fscanf(file, "%020jd\n", (intmax_t *) & database_map_table))
+		return;
+	if (fseeko(file, database_map_table, SEEK_SET))
+	{
+		fclose(file);
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("autoprewarm : error writing to \"%s\" : %m",
+						dump_file_path)));
+	}
+
+	/* get next database's first block info's position. */
+	while (!got_sigterm &&
+		   1 == fscanf(file, "|%jd", (intmax_t *) & next_db_pos))
+	{
+		/*
+		 * Register a sub-worker to load new database's block. Wait until the
+		 * sub-worker finish its job before launching next subworker.
+		 */
+		launch_prewarm_subworker(next_db_pos);
+	}
+
+	ereport(LOG, (errmsg("autoprewarm load task ended")));
+
+	fclose(file);
+
+	return;
+}
+
+/* ============================================================================
+ * =============	buffer pool info dump part of autoprewarm	===============
+ * ============================================================================
+ */
+
+/* This sub-module is for periodically dumping buffer pool's block info into
+ * a dump file AUTOPREWARM_FILE.
+ * Each entry of block info looks like this:
+ * <DatabaseId,TableSpaceId,RelationId,Forknum,BlockNum> and we shall call it
+ * as BlockInfoRecord.
+ *
+ * Contents of AUTOPREWARM_FILE has been formated such a way that
+ * blockInfoRecord of each database can be given to different prewarm workers.
+ *
+ *	format of AUTOPREWAM_FILE
+ *	=======================================
+ *	[offset position of database map table]
+ *	[sorted BlockInfoRecords..............]
+ *	[database map table]
+ *	=======================================
+ *
+ *	The [database map table] is sequence of offset in file which will point to
+ *	first BlockInfoRecords of each database in the dump. The prewarm worker
+ *	will read this offset one by one in sequence and ask its subworker to seek
+ *	to this position and then start loading the BlockInfoRecords one by one
+ *	until it see a BlockInfoRecords of a different database than it is actually
+ *	connected to.
+ *	NOTE : We store off_t inside file so the dump file will not be portable to
+ *	be used across systems where sizeof off_t is different from each other.
+ */
+
+/*
+ *	dump_now - the main routine which goes through each buffer header of buffer
+ *	pool and dumps their meta data. We Sort these data and then dump them.
+ *	Sorting is necessary as it facilitates sequential read during load.
+ */
+static uint32
+dump_now(void)
+{
+	static char dump_file_path[MAXPGPATH],
+				transient_dump_file_path[MAXPGPATH];
+	uint32		i;
+	int			ret;
+	uint32		num_blocks;
+	BlockInfoRecord *block_info_array;
+	BufferDesc *bufHdr;
+	FILE	   *file = NULL;
+	off_t	   *database_map_table,
+				database_map_table_pos;
+	size_t		database_map_table_size;
+	uint32		num_db = 0;
+	Oid			prev_database;
+
+	block_info_array =
+		(BlockInfoRecord *) palloc(sizeof(BlockInfoRecord) * NBuffers);
+	database_map_table_size = 64;
+	database_map_table =
+		(off_t *) palloc(sizeof(off_t) * database_map_table_size);
+
+	for (num_blocks = 0, i = 0; i < NBuffers; i++)
+	{
+		uint32		buf_state;
+
+		bufHdr = GetBufferDescriptor(i);
+
+		/* lock each buffer header before inspecting. */
+		buf_state = LockBufHdr(bufHdr);
+
+		if (buf_state & BM_TAG_VALID)
+		{
+			block_info_array[num_blocks].database = bufHdr->tag.rnode.dbNode;
+			block_info_array[num_blocks].spcNode = bufHdr->tag.rnode.spcNode;
+			block_info_array[num_blocks].filenode = bufHdr->tag.rnode.relNode;
+			block_info_array[num_blocks].forknum = bufHdr->tag.forkNum;
+			block_info_array[num_blocks].blocknum = bufHdr->tag.blockNum;
+			++num_blocks;
+		}
+
+		UnlockBufHdr(bufHdr, buf_state);
+	}
+
+	/* sorting now only to avoid sorting while loading. */
+	pg_qsort(block_info_array, num_blocks, sizeof(BlockInfoRecord),
+			 sort_cmp_func);
+
+	snprintf(transient_dump_file_path, sizeof(dump_file_path),
+			 "%s.%d", AUTOPREWARM_FILE, MyProcPid);
+	file = fopen(transient_dump_file_path, "w");
+	if (file == NULL)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("autoprewarm: could not open \"%s\": %m",
+						dump_file_path)));
+
+	snprintf(dump_file_path, sizeof(dump_file_path),
+			 "%s", AUTOPREWARM_FILE);
+	ret = fprintf(file, "%020jd\n", (intmax_t) 0);
+	if (ret < 0)
+	{
+		fclose(file);
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("autoprewarm: error writing to \"%s\" : %m",
+						dump_file_path)));
+	}
+
+	database_map_table[num_db++] = ftello(file);
+
+	for (i = 0; i < num_blocks; i++)
+	{
+		if (i > 0 && block_info_array[i].database != prev_database)
+		{
+			if (num_db == database_map_table_size)
+			{
+				database_map_table_size *= 2;	/* double and repalloc. */
+				database_map_table =
+					(off_t *) repalloc(database_map_table,
+									sizeof(off_t) * database_map_table_size);
+			}
+			fflush(file);
+			database_map_table[num_db++] = ftello(file);
+		}
+
+		ret = fprintf(file, "%u,%u,%u,%u,%u\n",
+					  block_info_array[i].database,
+					  block_info_array[i].spcNode,
+					  block_info_array[i].filenode,
+					  (uint32) block_info_array[i].forknum,
+					  block_info_array[i].blocknum);
+		if (ret < 0)
+		{
+			fclose(file);
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("autoprewarm: error writing to \"%s\" : %m",
+							dump_file_path)));
+		}
+
+		prev_database = block_info_array[i].database;
+	}
+
+	pfree(block_info_array);
+	database_map_table_pos = ftello(file);
+
+	for (i = 0; i < num_db; i++)
+	{
+		ret = fprintf(file, "|%jd", (intmax_t) database_map_table[i]);
+		if (ret < 0)
+		{
+			fclose(file);
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("autoprewarm: error writing to \"%s\" : %m",
+							dump_file_path)));
+		}
+	}
+
+	rewind(file);
+	ret = fprintf(file, "%020jd\n", (intmax_t) database_map_table_pos);
+	if (ret < 0)
+	{
+		fclose(file);
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("autoprewarm: error writing to \"%s\" : %m",
+						dump_file_path)));
+	}
+
+	/*
+	 * rename transient_dump_file_path to dump_file_path to make things
+	 * permanent.
+	 */
+	ret = fclose(file);
+	if (ret != 0)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("autoprewarm: error closing \"%s\" : %m",
+						transient_dump_file_path)));
+	(void) durable_rename(transient_dump_file_path, dump_file_path, LOG);
+
+	ereport(LOG, (errmsg("autoprewarm: saved metadata info of %d blocks",
+						 num_blocks)));
+	return num_blocks;
+}
+
+/*
+ * dump_block_info_periodically - at regular intervals, which is defined by GUC
+ * dump_interval, dump the info of blocks which are present in buffer pool.
+ */
+void
+dump_block_info_periodically()
+{
+	pg_time_t	last_dump_time = (pg_time_t) time(NULL);
+
+	while (!got_sigterm)
+	{
+		int			rc;
+		pg_time_t	now;
+		int			elapsed_secs = 0,
+					timeout = AT_PWARM_DEFAULT_DUMP_INTERVAL;
+
+		if (dump_interval > AT_PWARM_DUMP_AT_SHUTDOWN_ONLY)
+		{
+			now = (pg_time_t) time(NULL);
+			elapsed_secs = now - last_dump_time;
+
+			if (elapsed_secs > dump_interval)
+			{
+				dump_now();
+				if (got_sigterm)
+					return;		/* got shutdown signal just after a dump. And,
+								 * I think better to return now. */
+				last_dump_time = (pg_time_t) time(NULL);
+				elapsed_secs = 0;
+			}
+
+			timeout = dump_interval - elapsed_secs;
+		}
+
+		/* Has been set not to dump. Nothing more to do. */
+		if (dump_interval == AT_PWARM_OFF)
+			return;
+
+		ResetLatch(&MyProc->procLatch);
+		rc = WaitLatch(&MyProc->procLatch,
+					   WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+					   timeout * 1000, PG_WAIT_EXTENSION);
+
+		if (rc & WL_POSTMASTER_DEATH)
+			proc_exit(1);
+
+		/*
+		 * In case of a SIGHUP, just reload the configuration.
+		 */
+		if (got_sighup)
+		{
+			got_sighup = false;
+			ProcessConfigFile(PGC_SIGHUP);
+		}
+	}
+
+	/* One last block meta info dump while postmaster shutdown. */
+	if (dump_interval != AT_PWARM_OFF)
+		dump_now();
+}
+
+/*
+ * autoprewarm_main -- the main entry point of autoprewarm bgworker process.
+ */
+void
+autoprewarm_main(Datum main_arg)
+{
+	AutoPrewarmTask next_task;
+
+	/* Establish signal handlers before unblocking signals. */
+	pqsignal(SIGTERM, sigtermHandler);
+	pqsignal(SIGHUP, sighupHandler);
+	pqsignal(SIGUSR1, sigusr1Handler);
+
+	/* We're now ready to receive signals */
+	BackgroundWorkerUnblockSignals();
+
+	next_task = get_autoprewarm_task(DatumGetInt32(main_arg));
+
+	ereport(LOG, (errmsg("autoprewarm has started")));
+
+	/*
+	 * **** perform autoprewarm's next task	****
+	 */
+	if (next_task == TASK_PREWARM_BUFFERPOOL)
+	{
+		prewarm_buffer_pool();
+		/* prewarm is done lets move to TASK_DUMP_BUFFERPOOL_INFO. */
+		pg_atomic_write_u32(&(state->current_task),
+							TASK_DUMP_BUFFERPOOL_INFO);
+		next_task = TASK_DUMP_BUFFERPOOL_INFO;
+	}
+
+	if (next_task == TASK_DUMP_BUFFERPOOL_INFO)
+	{
+		dump_block_info_periodically();
+
+		/*
+		 * down grade to TASK_DUMP_IMMEDIATE_ONCE so others can start
+		 * TASK_DUMP_BUFFERPOOL_INFO
+		 */
+		pg_atomic_write_u32(&(state->current_task), TASK_DUMP_IMMEDIATE_ONCE);
+	}
+
+	ereport(LOG, (errmsg("autoprewarm shutting down")));
+}
+
+/* ============================================================================
+ * =============	extension's entry functions/utilities	===================
+ * ============================================================================
+ */
+
+/* Register autoprewarm load bgworker. */
+static void
+setup_autoprewarm(BackgroundWorker *autoprewarm, const char *worker_name,
+			   const char *worker_function, Datum main_arg, int restart_time,
+				  int extra_flags)
+{
+	MemSet(autoprewarm, 0, sizeof(BackgroundWorker));
+	autoprewarm->bgw_flags = BGWORKER_SHMEM_ACCESS | extra_flags;
+
+	/* Register the autoprewarm background worker */
+	autoprewarm->bgw_start_time = BgWorkerStart_ConsistentState;
+	autoprewarm->bgw_restart_time = restart_time;
+	autoprewarm->bgw_main = NULL;
+	strcpy(autoprewarm->bgw_library_name, "pg_prewarm");
+	strcpy(autoprewarm->bgw_function_name, worker_function);
+	strncpy(autoprewarm->bgw_name, worker_name, BGW_MAXLEN);
+	autoprewarm->bgw_main_arg = main_arg;
+}
+
+/* Extension's entry point. */
+void
+_PG_init(void)
+{
+	BackgroundWorker autoprewarm;
+
+	/* Define custom GUC variables. */
+	DefineCustomIntVariable("pg_prewarm.dump_interval",
+					   "Sets the maximum time between two buffer pool dumps",
+							"If set to Zero, timer based dumping is disabled."
+							" If set to -1, stops the running autoprewarm.",
+							&dump_interval,
+							AT_PWARM_DEFAULT_DUMP_INTERVAL,
+							AT_PWARM_OFF, INT_MAX / 1000,
+							PGC_SIGHUP,
+							GUC_UNIT_S,
+							NULL,
+							NULL,
+							NULL);
+
+	/* if not run as a preloaded library, nothing more to do here! */
+	if (!process_shared_preload_libraries_in_progress)
+		return;
+
+	DefineCustomStringVariable("pg_prewarm.default_database",
+				"default database to connect if dump has not recorded same.",
+							   NULL,
+							   &default_database,
+							   "postgres",
+							   PGC_POSTMASTER,
+							   0,
+							   NULL,
+							   NULL,
+							   NULL);
+	/* Request additional shared resources */
+	RequestAddinShmemSpace(MAXALIGN(sizeof(AutoPrewarmSharedState)));
+
+	/* Has been set not to prewarm/dump. Nothing more to do. */
+	if (dump_interval == AT_PWARM_OFF)
+		return;
+
+	/* Register autoprewarm load. */
+	setup_autoprewarm(&autoprewarm, "autoprewarm", "autoprewarm_main",
+					  Int32GetDatum(TASK_PREWARM_BUFFERPOOL), 0, 0);
+	RegisterBackgroundWorker(&autoprewarm);
+}
+
+/*
+ * Dynamically launch an autoprewarm dump worker.
+ */
+static pid_t
+autoprewarm_dump_launcher(void)
+{
+	BackgroundWorker worker;
+	BackgroundWorkerHandle *handle;
+	BgwHandleStatus status;
+	pid_t		pid;
+
+	setup_autoprewarm(&worker, "autoprewarm", "autoprewarm_main",
+					  Int32GetDatum(TASK_DUMP_BUFFERPOOL_INFO), 0, 0);
+
+	/* set bgw_notify_pid so that we can use WaitForBackgroundWorkerStartup */
+	worker.bgw_notify_pid = MyProcPid;
+
+	if (!RegisterDynamicBackgroundWorker(&worker, &handle))
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
+			   errmsg("registering dynamic bgworker \"autoprewarm\" failed"),
+				 errhint("Consider increasing configuration parameter "
+						 "\"max_worker_processes\".")));
+	}
+
+	status = WaitForBackgroundWorkerStartup(handle, &pid);
+	if (status == BGWH_STOPPED)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
+				 errmsg("could not start autoprewarm dump bgworker"),
+			   errhint("More details may be available in the server log.")));
+	}
+
+	if (status == BGWH_POSTMASTER_DIED)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
+			  errmsg("cannot start bgworker autoprewarm without postmaster"),
+				 errhint("Kill all remaining database processes and restart"
+						 " the database.")));
+	}
+
+	Assert(status == BGWH_STARTED);
+	return pid;
+}
+
+/*
+ * The C-Language entry function to launch autoprewarm dump bgworker.
+ */
+Datum
+launch_autoprewarm_dump(PG_FUNCTION_ARGS)
+{
+	pid_t		pid;
+
+	/* Has been set not to prewarm/dump. Nothing more to do. */
+	if (dump_interval == AT_PWARM_OFF)
+		PG_RETURN_NULL();
+
+	pid = autoprewarm_dump_launcher();
+	PG_RETURN_INT32(pid);
+}
+
+/*
+ * The C-Language entry function to dump immediately.
+ */
+Datum
+autoprewarm_dump_now(PG_FUNCTION_ARGS)
+{
+	AutoPrewarmTask next_task;
+
+	/* dump only if prewarm is not in progress. */
+	next_task = get_autoprewarm_task(TASK_DUMP_IMMEDIATE_ONCE);
+	if (next_task == TASK_DUMP_IMMEDIATE_ONCE)
+		PG_RETURN_INT64(dump_now());
+	PG_RETURN_NULL();
+}
diff --git a/contrib/pg_prewarm/pg_prewarm--1.1--1.2.sql b/contrib/pg_prewarm/pg_prewarm--1.1--1.2.sql
new file mode 100644
index 0000000..6c35fb7
--- /dev/null
+++ b/contrib/pg_prewarm/pg_prewarm--1.1--1.2.sql
@@ -0,0 +1,14 @@
+/* contrib/pg_prewarm/pg_prewarm--1.0--1.1.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION pg_prewarm UPDATE TO '1.2'" to load this file. \quit
+
+CREATE FUNCTION launch_autoprewarm_dump()
+RETURNS pg_catalog.int4 STRICT
+AS 'MODULE_PATHNAME', 'launch_autoprewarm_dump'
+LANGUAGE C;
+
+CREATE FUNCTION autoprewarm_dump_now()
+RETURNS pg_catalog.int8 STRICT
+AS 'MODULE_PATHNAME', 'autoprewarm_dump_now'
+LANGUAGE C;
diff --git a/contrib/pg_prewarm/pg_prewarm.control b/contrib/pg_prewarm/pg_prewarm.control
index cf2fb92..40e3add 100644
--- a/contrib/pg_prewarm/pg_prewarm.control
+++ b/contrib/pg_prewarm/pg_prewarm.control
@@ -1,5 +1,5 @@
 # pg_prewarm extension
 comment = 'prewarm relation data'
-default_version = '1.1'
+default_version = '1.2'
 module_pathname = '$libdir/pg_prewarm'
 relocatable = true
diff --git a/doc/src/sgml/pgprewarm.sgml b/doc/src/sgml/pgprewarm.sgml
index c090401..1538446 100644
--- a/doc/src/sgml/pgprewarm.sgml
+++ b/doc/src/sgml/pgprewarm.sgml
@@ -10,7 +10,9 @@
  <para>
   The <filename>pg_prewarm</filename> module provides a convenient way
   to load relation data into either the operating system buffer cache
-  or the <productname>PostgreSQL</productname> buffer cache.
+  or the <productname>PostgreSQL</productname> buffer cache. Additionally, an
+  automatic prewarming of the server buffers is supported whenever the server
+  restarts.
  </para>
 
  <sect2>
@@ -55,6 +57,102 @@ pg_prewarm(regclass, mode text default 'buffer', fork text default 'main',
    cache. For these reasons, prewarming is typically most useful at startup,
    when caches are largely empty.
   </para>
+
+<synopsis>
+launch_autoprewarm_dump() RETURNS int4
+</synopsis>
+
+  <para>
+   This is a SQL callable function to launch the <literal>autoprewarm</literal>
+   worker to dump the buffer pool information at regular interval. In a server,
+   we can only run one <literal>autoprewarm</literal> worker so if worker sees
+   another existing worker it will exit immediately. The return value is pid of
+   the worker which has been launched.
+  </para>
+
+<synopsis>
+autoprewarm_dump_now() RETURNS int8
+</synopsis>
+
+  <para>
+   This is a SQL callable function to dump buffer pool information immediately
+   once by a backend. This can work in parallel
+   with the <literal>autoprewarm</literal> worker while it is dumping.
+   The return value is the number of blocks info dumped.
+  </para>
+ </sect2>
+
+ <sect2>
+  <title>autoprewarm</title>
+
+  <para>
+  A bgworker which automatically records information about blocks which were
+  present in buffer pool before server shutdown and then prewarm the buffer
+  pool upon server restart with those blocks.
+  </para>
+
+  <para>
+  When the shared library <literal>pg_prewarm</literal> is preloaded via
+  <xref linkend="guc-shared-preload-libraries"> in <filename>postgresql.conf</>,
+  a bgworker <literal>autoprewarm</literal> is launched immediately after the
+  server has reached a consistent state. The bgworker will start loading blocks
+  recorded in <literal>$PGDATA/autoprewarm.blocks</literal> until there is a
+  free buffer left in the buffer pool. This way we do not replace any new
+  blocks which were loaded either by the recovery process or the querying
+  clients.
+  </para>
+
+  <para>
+  Once the <literal>autoprewarm</literal> bgworker has completed its prewarm
+  task, it will start a new task to periodically dump the information about
+  blocks which are currently in shared buffer pool. Upon next server restart,
+  the bgworker will prewarm the buffer pool by loading those blocks. The GUC
+  <literal>pg_prewarm.dump_interval</literal> will control the dumping activity
+  of the bgworker.
+  </para>
+ </sect2>
+
+ <sect2>
+  <title>Configuration Parameters</title>
+
+  <variablelist>
+   <varlistentry>
+   <term>
+     <varname>pg_prewarm.dump_interval</varname> (<type>int</type>)
+     <indexterm>
+      <primary><varname>pg_prewarm.dump_interval</> configuration parameter</primary>
+     </indexterm>
+    </term>
+    <listitem>
+     <para>
+      This is valid only for <literal>autoprewarm</literal>. The minimum number
+      of seconds between two buffer pool's block information dump. The default
+      is 300 seconds. It also takes special values. If set to 0 then timer
+      based dump is disabled, it dumps only while the server is shutting down.
+      If set to -1, the running <literal>autoprewarm</literal> will be stopped.
+     </para>
+    </listitem>
+   </varlistentry>
+  </variablelist>
+
+ <variablelist>
+   <varlistentry>
+    <term>
+     <varname>pg_prewarm.default_database</varname> (<type>string</type>)
+     <indexterm>
+      <primary><varname>pg_prewarm.default_database</> configuration parameter</primary>
+     </indexterm>
+    </term>
+    <listitem>
+     <para>
+      This is valid only for <literal>autoprewarm</literal>. The blocks of
+      global objects will not have a database associated with them. The
+      <literal>default_database</literal> will be used to connect and preload
+      such blocks.
+     </para>
+    </listitem>
+   </varlistentry>
+  </variablelist>
  </sect2>
 
  <sect2>
diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c
index 5d0a636..06a34a7 100644
--- a/src/backend/storage/buffer/freelist.c
+++ b/src/backend/storage/buffer/freelist.c
@@ -169,6 +169,23 @@ ClockSweepTick(void)
 }
 
 /*
+ * have_free_buffer -- a lockless check to see if there is a free buffer in
+ *					   buffer pool.
+ *
+ * If the result is true that will become stale once free buffers are moved out
+ * by other operations, so the caller who strictly want to use a free buffer
+ * should not call this.
+ */
+bool
+have_free_buffer()
+{
+	if (StrategyControl->firstFreeBuffer >= 0)
+		return true;
+	else
+		return false;
+}
+
+/*
  * StrategyGetBuffer
  *
  *	Called by the bufmgr to get the next candidate buffer to use in
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index d117b66..58d4871 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -317,6 +317,7 @@ extern void StrategyNotifyBgWriter(int bgwprocno);
 
 extern Size StrategyShmemSize(void);
 extern void StrategyInitialize(bool init);
+extern bool have_free_buffer(void);
 
 /* buf_table.c */
 extern Size BufTableShmemSize(int size);
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 9f876ae..4e7ea86 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -133,6 +133,8 @@ AttrDefault
 AttrNumber
 AttributeOpts
 AuthRequest
+AutoPrewarmSharedState
+AutoPrewarmTask
 AutoVacOpts
 AutoVacuumShmemStruct
 AuxProcType
@@ -206,10 +208,12 @@ BitmapOr
 BitmapOrPath
 BitmapOrState
 Bitmapset
+BlkType
 BlobInfo
 Block
 BlockId
 BlockIdData
+BlockInfoRecord
 BlockNumber
 BlockSampler
 BlockSamplerData
