Here's a ninth version of this patch.  (version 8 went unpublished).  I
have simplified a lot of things and improved some comments; I think I
understand much of it now.  I think this patch is fairly close to
committable, but one issue remains, which is this bit in
pgstat_write_statsfiles():

        /* In any case, we can just throw away all the db requests, but we need 
to
         * write dummy files for databases without a stat entry (it would cause
         * issues in pgstat_read_db_statsfile_timestamp and pgstat wait 
timeouts).
         * This may happen e.g. for shared DB (oid = 0) right after initdb.
         */
        if (!slist_is_empty(&last_statrequests))
        {
                slist_mutable_iter      iter;

                slist_foreach_modify(iter, &last_statrequests)
                {
                        DBWriteRequest *req = slist_container(DBWriteRequest, 
next,
                                                                                
                  iter.cur);

                        /*
                         * Create dummy files for requested databases without a 
proper
                         * dbentry. It's much easier this way than dealing with 
multiple
                         * timestamps, possibly existing but not yet written 
DBs etc.
                         * */
                        if (!pgstat_get_db_entry(req->databaseid, false))
                                pgstat_write_db_dummyfile(req->databaseid);

                        pfree(req);
                }

                slist_init(&last_statrequests);
        }

The problem here is that creating these dummy entries will cause a
difference in autovacuum behavior.  Autovacuum will skip processing
databases with no pgstat entry, and the intended reason is that if
there's no pgstat entry it's because the database doesn't have enough
activity.  Now perhaps we want to change that, but it should be an
explicit decision taken after discussion and thought, not side effect
from an unrelated patch.

Hm, and I now also realize another bug in this patch: the global stats
only include database entries for requested databases; but perhaps the
existing files can serve later requestors just fine for databases that
already had files; so the global stats file should continue to carry
entries for them, with the old timestamps.

-- 
Álvaro Herrera                http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
*** a/src/backend/postmaster/pgstat.c
--- b/src/backend/postmaster/pgstat.c
***************
*** 38,43 ****
--- 38,44 ----
  #include "access/xact.h"
  #include "catalog/pg_database.h"
  #include "catalog/pg_proc.h"
+ #include "lib/ilist.h"
  #include "libpq/ip.h"
  #include "libpq/libpq.h"
  #include "libpq/pqsignal.h"
***************
*** 66,73 ****
   * Paths for the statistics files (relative to installation's $PGDATA).
   * ----------
   */
! #define PGSTAT_STAT_PERMANENT_FILENAME		"global/pgstat.stat"
! #define PGSTAT_STAT_PERMANENT_TMPFILE		"global/pgstat.tmp"
  
  /* ----------
   * Timer definitions.
--- 67,75 ----
   * Paths for the statistics files (relative to installation's $PGDATA).
   * ----------
   */
! #define PGSTAT_STAT_PERMANENT_DIRECTORY		"pg_stat"
! #define PGSTAT_STAT_PERMANENT_FILENAME		"pg_stat/global.stat"
! #define PGSTAT_STAT_PERMANENT_TMPFILE		"pg_stat/global.tmp"
  
  /* ----------
   * Timer definitions.
***************
*** 115,120 **** int			pgstat_track_activity_query_size = 1024;
--- 117,124 ----
   * Built from GUC parameter
   * ----------
   */
+ char	   *pgstat_stat_directory = NULL;
+ int			pgstat_stat_dbfile_maxlen = 0;
  char	   *pgstat_stat_filename = NULL;
  char	   *pgstat_stat_tmpname = NULL;
  
***************
*** 219,229 **** static int	localNumBackends = 0;
   */
  static PgStat_GlobalStats globalStats;
  
! /* Last time the collector successfully wrote the stats file */
! static TimestampTz last_statwrite;
  
! /* Latest statistics request time from backends */
! static TimestampTz last_statrequest;
  
  static volatile bool need_exit = false;
  static volatile bool got_SIGHUP = false;
--- 223,238 ----
   */
  static PgStat_GlobalStats globalStats;
  
! /* Write request info for each database */
! typedef struct DBWriteRequest
! {
! 	Oid			databaseid;		/* OID of the database to write */
! 	TimestampTz request_time;	/* timestamp of the last write request */
! 	slist_node	next;
! } DBWriteRequest;
  
! /* Latest statistics request times from backends */
! static slist_head	last_statrequests = SLIST_STATIC_INIT(last_statrequests);
  
  static volatile bool need_exit = false;
  static volatile bool got_SIGHUP = false;
***************
*** 252,262 **** static void pgstat_sighup_handler(SIGNAL_ARGS);
  static PgStat_StatDBEntry *pgstat_get_db_entry(Oid databaseid, bool create);
  static PgStat_StatTabEntry *pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry,
  					 Oid tableoid, bool create);
! static void pgstat_write_statsfile(bool permanent);
! static HTAB *pgstat_read_statsfile(Oid onlydb, bool permanent);
  static void backend_read_statsfile(void);
  static void pgstat_read_current_status(void);
  
  static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
  static void pgstat_send_funcstats(void);
  static HTAB *pgstat_collect_oids(Oid catalogid);
--- 261,277 ----
  static PgStat_StatDBEntry *pgstat_get_db_entry(Oid databaseid, bool create);
  static PgStat_StatTabEntry *pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry,
  					 Oid tableoid, bool create);
! static void pgstat_write_statsfiles(bool permanent, bool allDbs);
! static void pgstat_write_db_statsfile(PgStat_StatDBEntry * dbentry, bool permanent);
! static void pgstat_write_db_dummyfile(Oid databaseid);
! static HTAB *pgstat_read_statsfile(Oid onlydb, bool permanent, bool deep);
! static void pgstat_read_db_statsfile(Oid databaseid, HTAB *tabhash, HTAB *funchash, bool permanent);
  static void backend_read_statsfile(void);
  static void pgstat_read_current_status(void);
  
+ static bool pgstat_write_statsfile_needed(void);
+ static bool pgstat_db_requested(Oid databaseid);
+ 
  static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
  static void pgstat_send_funcstats(void);
  static HTAB *pgstat_collect_oids(Oid catalogid);
***************
*** 285,291 **** static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int le
  static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len);
  static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len);
  
- 
  /* ------------------------------------------------------------
   * Public functions called from postmaster follow
   * ------------------------------------------------------------
--- 300,305 ----
***************
*** 541,556 **** startup_failed:
  }
  
  /*
   * pgstat_reset_all() -
   *
!  * Remove the stats file.  This is currently used only if WAL
   * recovery is needed after a crash.
   */
  void
  pgstat_reset_all(void)
  {
! 	unlink(pgstat_stat_filename);
! 	unlink(PGSTAT_STAT_PERMANENT_FILENAME);
  }
  
  #ifdef EXEC_BACKEND
--- 555,594 ----
  }
  
  /*
+  * subroutine for pgstat_reset_all
+  */
+ static void
+ pgstat_reset_remove_files(const char *directory)
+ {
+ 	DIR * dir;
+ 	struct dirent * entry;
+ 	char	fname[MAXPGPATH];
+ 
+ 	dir = AllocateDir(pgstat_stat_directory);
+ 	while ((entry = ReadDir(dir, pgstat_stat_directory)) != NULL)
+ 	{
+ 		if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0)
+ 			continue;
+ 
+ 		snprintf(fname, MAXPGPATH, "%s/%s", pgstat_stat_directory,
+ 				 entry->d_name);
+ 		unlink(fname);
+ 	}
+ 	FreeDir(dir);
+ }
+ 
+ /*
   * pgstat_reset_all() -
   *
!  * Remove the stats files.  This is currently used only if WAL
   * recovery is needed after a crash.
   */
  void
  pgstat_reset_all(void)
  {
! 
! 	pgstat_reset_remove_files(pgstat_stat_directory);
! 	pgstat_reset_remove_files(PGSTAT_STAT_PERMANENT_DIRECTORY);
  }
  
  #ifdef EXEC_BACKEND
***************
*** 1408,1420 **** pgstat_ping(void)
   * ----------
   */
  static void
! pgstat_send_inquiry(TimestampTz clock_time, TimestampTz cutoff_time)
  {
  	PgStat_MsgInquiry msg;
  
  	pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_INQUIRY);
  	msg.clock_time = clock_time;
  	msg.cutoff_time = cutoff_time;
  	pgstat_send(&msg, sizeof(msg));
  }
  
--- 1446,1459 ----
   * ----------
   */
  static void
! pgstat_send_inquiry(TimestampTz clock_time, TimestampTz cutoff_time, Oid databaseid)
  {
  	PgStat_MsgInquiry msg;
  
  	pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_INQUIRY);
  	msg.clock_time = clock_time;
  	msg.cutoff_time = cutoff_time;
+ 	msg.databaseid = databaseid;
  	pgstat_send(&msg, sizeof(msg));
  }
  
***************
*** 3004,3009 **** PgstatCollectorMain(int argc, char *argv[])
--- 3043,3049 ----
  	int			len;
  	PgStat_Msg	msg;
  	int			wr;
+ 	bool		first_write = true;
  
  	IsUnderPostmaster = true;	/* we are a postmaster subprocess now */
  
***************
*** 3053,3069 **** PgstatCollectorMain(int argc, char *argv[])
  	init_ps_display("stats collector process", "", "", "");
  
  	/*
- 	 * Arrange to write the initial status file right away
- 	 */
- 	last_statrequest = GetCurrentTimestamp();
- 	last_statwrite = last_statrequest - 1;
- 
- 	/*
  	 * Read in an existing statistics stats file or initialize the stats to
  	 * zero.
  	 */
  	pgStatRunningInCollector = true;
! 	pgStatDBHash = pgstat_read_statsfile(InvalidOid, true);
  
  	/*
  	 * Loop to process messages until we get SIGQUIT or detect ungraceful
--- 3093,3103 ----
  	init_ps_display("stats collector process", "", "", "");
  
  	/*
  	 * Read in an existing statistics stats file or initialize the stats to
  	 * zero.
  	 */
  	pgStatRunningInCollector = true;
! 	pgStatDBHash = pgstat_read_statsfile(InvalidOid, true, true);
  
  	/*
  	 * Loop to process messages until we get SIGQUIT or detect ungraceful
***************
*** 3107,3116 **** PgstatCollectorMain(int argc, char *argv[])
  
  			/*
  			 * Write the stats file if a new request has arrived that is not
! 			 * satisfied by existing file.
  			 */
! 			if (last_statwrite < last_statrequest)
! 				pgstat_write_statsfile(false);
  
  			/*
  			 * Try to receive and process a message.  This will not block,
--- 3141,3154 ----
  
  			/*
  			 * Write the stats file if a new request has arrived that is not
! 			 * satisfied by existing file (force writing all files if it's
! 			 * the first write after startup).
  			 */
! 			if (first_write || pgstat_write_statsfile_needed())
! 			{
! 				pgstat_write_statsfiles(false, first_write);
! 				first_write = false;
! 			}
  
  			/*
  			 * Try to receive and process a message.  This will not block,
***************
*** 3269,3275 **** PgstatCollectorMain(int argc, char *argv[])
  	/*
  	 * Save the final stats to reuse at next startup.
  	 */
! 	pgstat_write_statsfile(true);
  
  	exit(0);
  }
--- 3307,3313 ----
  	/*
  	 * Save the final stats to reuse at next startup.
  	 */
! 	pgstat_write_statsfiles(true, true);
  
  	exit(0);
  }
***************
*** 3349,3354 **** pgstat_get_db_entry(Oid databaseid, bool create)
--- 3387,3393 ----
  		result->n_block_write_time = 0;
  
  		result->stat_reset_timestamp = GetCurrentTimestamp();
+ 		result->stats_timestamp = 0;
  
  		memset(&hash_ctl, 0, sizeof(hash_ctl));
  		hash_ctl.keysize = sizeof(Oid);
***************
*** 3422,3451 **** pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry, Oid tableoid, bool create)
  
  
  /* ----------
!  * pgstat_write_statsfile() -
   *
   *	Tell the news.
!  *	If writing to the permanent file (happens when the collector is
!  *	shutting down only), remove the temporary file so that backends
   *	starting up under a new postmaster can't read the old data before
   *	the new collector is ready.
   * ----------
   */
  static void
! pgstat_write_statsfile(bool permanent)
  {
  	HASH_SEQ_STATUS hstat;
- 	HASH_SEQ_STATUS tstat;
- 	HASH_SEQ_STATUS fstat;
  	PgStat_StatDBEntry *dbentry;
- 	PgStat_StatTabEntry *tabentry;
- 	PgStat_StatFuncEntry *funcentry;
  	FILE	   *fpout;
  	int32		format_id;
  	const char *tmpfile = permanent ? PGSTAT_STAT_PERMANENT_TMPFILE : pgstat_stat_tmpname;
  	const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename;
  	int			rc;
  
  	/*
  	 * Open the statistics temp file to write out the current values.
  	 */
--- 3461,3492 ----
  
  
  /* ----------
!  * pgstat_write_statsfiles() -
   *
   *	Tell the news.
!  *	If writing to the permanent files (happens when the collector is
!  *	shutting down only), remove the temporary files so that backends
   *	starting up under a new postmaster can't read the old data before
   *	the new collector is ready.
+  *
+  *	When 'allDbs' is false, only the requested databases (listed in
+  * 	last_statrequests) will be written. If 'allDbs' is true, all databases
+  * 	will be written.
   * ----------
   */
  static void
! pgstat_write_statsfiles(bool permanent, bool allDbs)
  {
  	HASH_SEQ_STATUS hstat;
  	PgStat_StatDBEntry *dbentry;
  	FILE	   *fpout;
  	int32		format_id;
  	const char *tmpfile = permanent ? PGSTAT_STAT_PERMANENT_TMPFILE : pgstat_stat_tmpname;
  	const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename;
  	int			rc;
  
+ 	elog(DEBUG1, "writing statsfile '%s'", statfile);
+ 
  	/*
  	 * Open the statistics temp file to write out the current values.
  	 */
***************
*** 3484,3523 **** pgstat_write_statsfile(bool permanent)
  	while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL)
  	{
  		/*
! 		 * Write out the DB entry including the number of live backends. We
! 		 * don't write the tables or functions pointers, since they're of no
! 		 * use to any other process.
  		 */
  		fputc('D', fpout);
  		rc = fwrite(dbentry, offsetof(PgStat_StatDBEntry, tables), 1, fpout);
  		(void) rc;				/* we'll check for error with ferror */
- 
- 		/*
- 		 * Walk through the database's access stats per table.
- 		 */
- 		hash_seq_init(&tstat, dbentry->tables);
- 		while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&tstat)) != NULL)
- 		{
- 			fputc('T', fpout);
- 			rc = fwrite(tabentry, sizeof(PgStat_StatTabEntry), 1, fpout);
- 			(void) rc;			/* we'll check for error with ferror */
- 		}
- 
- 		/*
- 		 * Walk through the database's function stats table.
- 		 */
- 		hash_seq_init(&fstat, dbentry->functions);
- 		while ((funcentry = (PgStat_StatFuncEntry *) hash_seq_search(&fstat)) != NULL)
- 		{
- 			fputc('F', fpout);
- 			rc = fwrite(funcentry, sizeof(PgStat_StatFuncEntry), 1, fpout);
- 			(void) rc;			/* we'll check for error with ferror */
- 		}
- 
- 		/*
- 		 * Mark the end of this DB
- 		 */
- 		fputc('d', fpout);
  	}
  
  	/*
--- 3525,3550 ----
  	while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL)
  	{
  		/*
! 		 * Write out the tables and functions into a separate file, if
! 		 * required.
! 		 *
! 		 * We need to do this before the dbentry write, to ensure the
! 		 * timestamps written to both are consistent.
! 		 */
! 		if (allDbs || pgstat_db_requested(dbentry->databaseid))
! 		{
! 			elog(DEBUG1, "writing statsfile for DB %d", dbentry->databaseid);
! 			dbentry->stats_timestamp = globalStats.stats_timestamp;
! 			pgstat_write_db_statsfile(dbentry, permanent);
! 		}
! 
! 		/*
! 		 * Write out the DB entry. We don't write the tables or functions
! 		 * pointers, since they're of no use to any other process.
  		 */
  		fputc('D', fpout);
  		rc = fwrite(dbentry, offsetof(PgStat_StatDBEntry, tables), 1, fpout);
  		(void) rc;				/* we'll check for error with ferror */
  	}
  
  	/*
***************
*** 3527,3532 **** pgstat_write_statsfile(bool permanent)
--- 3554,3587 ----
  	 */
  	fputc('E', fpout);
  
+ 	/* In any case, we can just throw away all the db requests, but we need to
+ 	 * write dummy files for databases without a stat entry (it would cause
+ 	 * issues in pgstat_read_db_statsfile_timestamp and pgstat wait timeouts).
+ 	 * This may happen e.g. for shared DB (oid = 0) right after initdb.
+ 	 */
+ 	if (!slist_is_empty(&last_statrequests))
+ 	{
+ 		slist_mutable_iter	iter;
+ 
+ 		slist_foreach_modify(iter, &last_statrequests)
+ 		{
+ 			DBWriteRequest *req = slist_container(DBWriteRequest, next,
+ 												  iter.cur);
+ 
+ 			/*
+ 			 * Create dummy files for requested databases without a proper
+ 			 * dbentry. It's much easier this way than dealing with multiple
+ 			 * timestamps, possibly existing but not yet written DBs etc.
+ 			 * */
+ 			if (!pgstat_get_db_entry(req->databaseid, false))
+ 				pgstat_write_db_dummyfile(req->databaseid);
+ 
+ 			pfree(req);
+ 		}
+ 
+ 		slist_init(&last_statrequests);
+ 	}
+ 
  	if (ferror(fpout))
  	{
  		ereport(LOG,
***************
*** 3552,3612 **** pgstat_write_statsfile(bool permanent)
  						tmpfile, statfile)));
  		unlink(tmpfile);
  	}
- 	else
- 	{
- 		/*
- 		 * Successful write, so update last_statwrite.
- 		 */
- 		last_statwrite = globalStats.stats_timestamp;
- 
- 		/*
- 		 * If there is clock skew between backends and the collector, we could
- 		 * receive a stats request time that's in the future.  If so, complain
- 		 * and reset last_statrequest.	Resetting ensures that no inquiry
- 		 * message can cause more than one stats file write to occur.
- 		 */
- 		if (last_statrequest > last_statwrite)
- 		{
- 			char	   *reqtime;
- 			char	   *mytime;
- 
- 			/* Copy because timestamptz_to_str returns a static buffer */
- 			reqtime = pstrdup(timestamptz_to_str(last_statrequest));
- 			mytime = pstrdup(timestamptz_to_str(last_statwrite));
- 			elog(LOG, "last_statrequest %s is later than collector's time %s",
- 				 reqtime, mytime);
- 			pfree(reqtime);
- 			pfree(mytime);
- 
- 			last_statrequest = last_statwrite;
- 		}
- 	}
  
  	if (permanent)
  		unlink(pgstat_stat_filename);
  }
  
  
  /* ----------
   * pgstat_read_statsfile() -
   *
   *	Reads in an existing statistics collector file and initializes the
!  *	databases' hash table (whose entries point to the tables' hash tables).
   * ----------
   */
  static HTAB *
! pgstat_read_statsfile(Oid onlydb, bool permanent)
  {
  	PgStat_StatDBEntry *dbentry;
  	PgStat_StatDBEntry dbbuf;
- 	PgStat_StatTabEntry *tabentry;
- 	PgStat_StatTabEntry tabbuf;
- 	PgStat_StatFuncEntry funcbuf;
- 	PgStat_StatFuncEntry *funcentry;
  	HASHCTL		hash_ctl;
  	HTAB	   *dbhash;
- 	HTAB	   *tabhash = NULL;
- 	HTAB	   *funchash = NULL;
  	FILE	   *fpin;
  	int32		format_id;
  	bool		found;
--- 3607,3855 ----
  						tmpfile, statfile)));
  		unlink(tmpfile);
  	}
  
  	if (permanent)
  		unlink(pgstat_stat_filename);
  }
  
+ /*
+  * return the filename for a DB stat file; filename is the output buffer,
+  * of length len.
+  */
+ static void
+ get_dbstat_filename(bool permanent, bool tempname, Oid databaseid,
+ 					char *filename, int len)
+ {
+ 	int		printed;
+ 
+ 	printed = snprintf(filename, len, "%s/db_%u.%s",
+ 					   permanent ? "pg_stat" : pgstat_stat_directory,
+ 					   databaseid,
+ 					   tempname ? "tmp" : "stat");
+ 	if (printed > len)
+ 		elog(ERROR, "overlength pgstat path");
+ }
+ 
+ /* ----------
+  * pgstat_write_db_statsfile() -
+  *
+  *	Tell the news. This writes stats file for a single database.
+  *
+  *	If writing to the permanent file (happens when the collector is
+  *	shutting down only), remove the temporary file so that backends
+  *	starting up under a new postmaster can't read the old data before
+  *	the new collector is ready.
+  * ----------
+  */
+ static void
+ pgstat_write_db_statsfile(PgStat_StatDBEntry * dbentry, bool permanent)
+ {
+ 	HASH_SEQ_STATUS tstat;
+ 	HASH_SEQ_STATUS fstat;
+ 	PgStat_StatTabEntry *tabentry;
+ 	PgStat_StatFuncEntry *funcentry;
+ 	FILE	   *fpout;
+ 	int32		format_id;
+ 	Oid			dbid = dbentry->databaseid;
+ 	int			rc;
+ 	char		tmpfile[MAXPGPATH];
+ 	char		statfile[MAXPGPATH];
+ 
+ 	get_dbstat_filename(permanent, true, dbid, tmpfile, MAXPGPATH);
+ 	get_dbstat_filename(permanent, false, dbid, statfile, MAXPGPATH);
+ 
+ 	elog(DEBUG1, "writing statsfile '%s'", statfile);
+ 
+ 	/*
+ 	 * Open the statistics temp file to write out the current values.
+ 	 */
+ 	fpout = AllocateFile(tmpfile, PG_BINARY_W);
+ 	if (fpout == NULL)
+ 	{
+ 		ereport(LOG,
+ 				(errcode_for_file_access(),
+ 				 errmsg("could not open temporary statistics file \"%s\": %m",
+ 						tmpfile)));
+ 		return;
+ 	}
+ 
+ 	/*
+ 	 * Write the file header --- currently just a format ID.
+ 	 */
+ 	format_id = PGSTAT_FILE_FORMAT_ID;
+ 	rc = fwrite(&format_id, sizeof(format_id), 1, fpout);
+ 	(void) rc;					/* we'll check for error with ferror */
+ 
+ 	/*
+ 	 * Write the timestamp.
+ 	 */
+ 	rc = fwrite(&(globalStats.stats_timestamp), sizeof(globalStats.stats_timestamp), 1, fpout);
+ 	(void) rc;					/* we'll check for error with ferror */
+ 
+ 	/*
+ 	 * Walk through the database's access stats per table.
+ 	 */
+ 	hash_seq_init(&tstat, dbentry->tables);
+ 	while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&tstat)) != NULL)
+ 	{
+ 		fputc('T', fpout);
+ 		rc = fwrite(tabentry, sizeof(PgStat_StatTabEntry), 1, fpout);
+ 		(void) rc;			/* we'll check for error with ferror */
+ 	}
+ 
+ 	/*
+ 	 * Walk through the database's function stats table.
+ 	 */
+ 	hash_seq_init(&fstat, dbentry->functions);
+ 	while ((funcentry = (PgStat_StatFuncEntry *) hash_seq_search(&fstat)) != NULL)
+ 	{
+ 		fputc('F', fpout);
+ 		rc = fwrite(funcentry, sizeof(PgStat_StatFuncEntry), 1, fpout);
+ 		(void) rc;			/* we'll check for error with ferror */
+ 	}
+ 
+ 	/*
+ 	 * No more output to be done. Close the temp file and replace the old
+ 	 * pgstat.stat with it.  The ferror() check replaces testing for error
+ 	 * after each individual fputc or fwrite above.
+ 	 */
+ 	fputc('E', fpout);
+ 
+ 	if (ferror(fpout))
+ 	{
+ 		ereport(LOG,
+ 				(errcode_for_file_access(),
+ 			   errmsg("could not write temporary statistics file \"%s\": %m",
+ 					  tmpfile)));
+ 		FreeFile(fpout);
+ 		unlink(tmpfile);
+ 	}
+ 	else if (FreeFile(fpout) < 0)
+ 	{
+ 		ereport(LOG,
+ 				(errcode_for_file_access(),
+ 			   errmsg("could not close temporary statistics file \"%s\": %m",
+ 					  tmpfile)));
+ 		unlink(tmpfile);
+ 	}
+ 	else if (rename(tmpfile, statfile) < 0)
+ 	{
+ 		ereport(LOG,
+ 				(errcode_for_file_access(),
+ 				 errmsg("could not rename temporary statistics file \"%s\" to \"%s\": %m",
+ 						tmpfile, statfile)));
+ 		unlink(tmpfile);
+ 	}
+ 
+ 	if (permanent)
+ 	{
+ 		get_dbstat_filename(false, false, dbid, tmpfile, MAXPGPATH);
+ 
+ 		elog(DEBUG1, "removing temporary stat file '%s'", tmpfile);
+ 		unlink(tmpfile);
+ 	}
+ }
+ 
+ 
+ /* ----------
+  * pgstat_write_db_dummyfile() -
+  *
+  *	All this does is writing a dummy stat file for databases without dbentry
+  *	yet. It basically writes just a file header - format ID and a timestamp.
+  * ----------
+  */
+ static void
+ pgstat_write_db_dummyfile(Oid databaseid)
+ {
+ 	FILE	   *fpout;
+ 	int32		format_id;
+ 	int			rc;
+ 	char		tmpfile[MAXPGPATH];
+ 	char		statfile[MAXPGPATH];
+ 
+ 	get_dbstat_filename(false, true, databaseid, tmpfile, MAXPGPATH);
+ 	get_dbstat_filename(false, false, databaseid, statfile, MAXPGPATH);
+ 
+ 	elog(DEBUG1, "writing statsfile '%s'", statfile);
+ 
+ 	/*
+ 	 * Open the statistics temp file to write out the current values.
+ 	 */
+ 	fpout = AllocateFile(tmpfile, PG_BINARY_W);
+ 	if (fpout == NULL)
+ 	{
+ 		ereport(LOG,
+ 				(errcode_for_file_access(),
+ 				 errmsg("could not open temporary statistics file \"%s\": %m",
+ 						tmpfile)));
+ 		return;
+ 	}
+ 
+ 	/*
+ 	 * Write the file header --- currently just a format ID.
+ 	 */
+ 	format_id = PGSTAT_FILE_FORMAT_ID;
+ 	rc = fwrite(&format_id, sizeof(format_id), 1, fpout);
+ 	(void) rc;					/* we'll check for error with ferror */
+ 
+ 	/*
+ 	 * Write the timestamp.
+ 	 */
+ 	rc = fwrite(&(globalStats.stats_timestamp), sizeof(globalStats.stats_timestamp), 1, fpout);
+ 	(void) rc;					/* we'll check for error with ferror */
+ 
+ 	/*
+ 	 * No more output to be done. Close the temp file and replace the old
+ 	 * pgstat.stat with it.  The ferror() check replaces testing for error
+ 	 * after each individual fputc or fwrite above.
+ 	 */
+ 	fputc('E', fpout);
+ 
+ 	if (ferror(fpout))
+ 	{
+ 		ereport(LOG,
+ 				(errcode_for_file_access(),
+ 			   errmsg("could not write temporary dummy statistics file \"%s\": %m",
+ 					  tmpfile)));
+ 		FreeFile(fpout);
+ 		unlink(tmpfile);
+ 	}
+ 	else if (FreeFile(fpout) < 0)
+ 	{
+ 		ereport(LOG,
+ 				(errcode_for_file_access(),
+ 			   errmsg("could not close temporary dummy statistics file \"%s\": %m",
+ 					  tmpfile)));
+ 		unlink(tmpfile);
+ 	}
+ 	else if (rename(tmpfile, statfile) < 0)
+ 	{
+ 		ereport(LOG,
+ 				(errcode_for_file_access(),
+ 				 errmsg("could not rename temporary dummy statistics file \"%s\" to \"%s\": %m",
+ 						tmpfile, statfile)));
+ 		unlink(tmpfile);
+ 	}
+ }
  
  /* ----------
   * pgstat_read_statsfile() -
   *
   *	Reads in an existing statistics collector file and initializes the
!  *	databases' hash table.  If the permanent file name is requested, also
!  *	remove it after reading.
!  *
!  *  If a deep read is requested, table/function stats are read also, otherwise
!  *  the table/function hash tables remain empty.
   * ----------
   */
  static HTAB *
! pgstat_read_statsfile(Oid onlydb, bool permanent, bool deep)
  {
  	PgStat_StatDBEntry *dbentry;
  	PgStat_StatDBEntry dbbuf;
  	HASHCTL		hash_ctl;
  	HTAB	   *dbhash;
  	FILE	   *fpin;
  	int32		format_id;
  	bool		found;
***************
*** 3690,3697 **** pgstat_read_statsfile(Oid onlydb, bool permanent)
  		{
  				/*
  				 * 'D'	A PgStat_StatDBEntry struct describing a database
! 				 * follows. Subsequently, zero to many 'T' and 'F' entries
! 				 * will follow until a 'd' is encountered.
  				 */
  			case 'D':
  				if (fread(&dbbuf, 1, offsetof(PgStat_StatDBEntry, tables),
--- 3933,3939 ----
  		{
  				/*
  				 * 'D'	A PgStat_StatDBEntry struct describing a database
! 				 * follows.
  				 */
  			case 'D':
  				if (fread(&dbbuf, 1, offsetof(PgStat_StatDBEntry, tables),
***************
*** 3753,3773 **** pgstat_read_statsfile(Oid onlydb, bool permanent)
  								   HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
  
  				/*
! 				 * Arrange that following records add entries to this
! 				 * database's hash tables.
  				 */
! 				tabhash = dbentry->tables;
! 				funchash = dbentry->functions;
! 				break;
  
- 				/*
- 				 * 'd'	End of this database.
- 				 */
- 			case 'd':
- 				tabhash = NULL;
- 				funchash = NULL;
  				break;
  
  				/*
  				 * 'T'	A PgStat_StatTabEntry follows.
  				 */
--- 3995,4111 ----
  								   HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
  
  				/*
! 				 * If requested, read the data from the database-specific file.
! 				 * If there was onlydb specified (!= InvalidOid), we would not
! 				 * get here because of a break above. So we don't need to
! 				 * recheck.
  				 */
! 				if (deep)
! 					pgstat_read_db_statsfile(dbentry->databaseid,
! 											 dbentry->tables,
! 											 dbentry->functions,
! 											 permanent);
  
  				break;
  
+ 			case 'E':
+ 				goto done;
+ 
+ 			default:
+ 				ereport(pgStatRunningInCollector ? LOG : WARNING,
+ 						(errmsg("corrupted statistics file \"%s\"",
+ 								statfile)));
+ 				goto done;
+ 		}
+ 	}
+ 
+ done:
+ 	FreeFile(fpin);
+ 
+ 	if (permanent)
+ 	{
+ 		/*
+ 		 * If requested to read the permanent file, also get rid of it; the
+ 		 * in-memory status is now authoritative, and the permanent file would
+ 		 * be out of date in case somebody else reads it.
+ 		 */
+ 		unlink(PGSTAT_STAT_PERMANENT_FILENAME);
+ 	}
+ 
+ 	return dbhash;
+ }
+ 
+ 
+ /* ----------
+  * pgstat_read_db_statsfile() -
+  *
+  *	Reads in an existing statistics collector db file and initializes the
+  *	tables and functions hash tables (for the database identified by Oid).
+  * ----------
+  */
+ static void
+ pgstat_read_db_statsfile(Oid databaseid, HTAB *tabhash, HTAB *funchash, bool permanent)
+ {
+ 	PgStat_StatTabEntry *tabentry;
+ 	PgStat_StatTabEntry tabbuf;
+ 	PgStat_StatFuncEntry funcbuf;
+ 	PgStat_StatFuncEntry *funcentry;
+ 	FILE	   *fpin;
+ 	int32		format_id;
+ 	TimestampTz timestamp;
+ 	bool		found;
+ 	char		statfile[MAXPGPATH];
+ 
+ 	get_dbstat_filename(permanent, false, databaseid, statfile, MAXPGPATH);
+ 
+ 	/*
+ 	 * Try to open the status file. If it doesn't exist, the backends simply
+ 	 * return zero for anything and the collector simply starts from scratch
+ 	 * with empty counters.
+ 	 *
+ 	 * ENOENT is a possibility if the stats collector is not running or has
+ 	 * not yet written the stats file the first time.  Any other failure
+ 	 * condition is suspicious.
+ 	 */
+ 	if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
+ 	{
+ 		if (errno != ENOENT)
+ 			ereport(pgStatRunningInCollector ? LOG : WARNING,
+ 					(errcode_for_file_access(),
+ 					 errmsg("could not open statistics file \"%s\": %m",
+ 							statfile)));
+ 		return;
+ 	}
+ 
+ 	/*
+ 	 * Verify it's of the expected format.
+ 	 */
+ 	if (fread(&format_id, 1, sizeof(format_id), fpin) != sizeof(format_id)
+ 		|| format_id != PGSTAT_FILE_FORMAT_ID)
+ 	{
+ 		ereport(pgStatRunningInCollector ? LOG : WARNING,
+ 				(errmsg("corrupted statistics file \"%s\"", statfile)));
+ 		goto done;
+ 	}
+ 
+ 	/*
+ 	 * Read global stats struct
+ 	 */
+ 	if (fread(&timestamp, 1, sizeof(timestamp), fpin) != sizeof(timestamp))
+ 	{
+ 		ereport(pgStatRunningInCollector ? LOG : WARNING,
+ 				(errmsg("corrupted statistics file \"%s\"", statfile)));
+ 		goto done;
+ 	}
+ 
+ 	/*
+ 	 * We found an existing collector stats file. Read it and put all the
+ 	 * hashtable entries into place.
+ 	 */
+ 	for (;;)
+ 	{
+ 		switch (fgetc(fpin))
+ 		{
  				/*
  				 * 'T'	A PgStat_StatTabEntry follows.
  				 */
***************
*** 3854,3878 **** done:
  	FreeFile(fpin);
  
  	if (permanent)
! 		unlink(PGSTAT_STAT_PERMANENT_FILENAME);
  
! 	return dbhash;
  }
  
  /* ----------
!  * pgstat_read_statsfile_timestamp() -
   *
!  *	Attempt to fetch the timestamp of an existing stats file.
   *	Returns TRUE if successful (timestamp is stored at *ts).
   * ----------
   */
  static bool
! pgstat_read_statsfile_timestamp(bool permanent, TimestampTz *ts)
  {
! 	PgStat_GlobalStats myGlobalStats;
  	FILE	   *fpin;
  	int32		format_id;
! 	const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename;
  
  	/*
  	 * Try to open the status file.  As above, anything but ENOENT is worthy
--- 4192,4224 ----
  	FreeFile(fpin);
  
  	if (permanent)
! 	{
! 		get_dbstat_filename(permanent, false, databaseid, statfile, MAXPGPATH);
  
! 		elog(DEBUG1, "removing permanent stats file '%s'", statfile);
! 		unlink(statfile);
! 	}
! 
! 	return;
  }
  
+ 
  /* ----------
!  * pgstat_read_db_statsfile_timestamp() -
   *
!  *	Attempt to fetch the timestamp of an existing stats file (for a DB).
   *	Returns TRUE if successful (timestamp is stored at *ts).
   * ----------
   */
  static bool
! pgstat_read_db_statsfile_timestamp(Oid databaseid, bool permanent, TimestampTz *ts)
  {
! 	TimestampTz timestamp;
  	FILE	   *fpin;
  	int32		format_id;
! 	char		statfile[MAXPGPATH];
! 
! 	get_dbstat_filename(permanent, false, databaseid, statfile, MAXPGPATH);
  
  	/*
  	 * Try to open the status file.  As above, anything but ENOENT is worthy
***************
*** 3891,3898 **** pgstat_read_statsfile_timestamp(bool permanent, TimestampTz *ts)
  	/*
  	 * Verify it's of the expected format.
  	 */
! 	if (fread(&format_id, 1, sizeof(format_id), fpin) != sizeof(format_id)
! 		|| format_id != PGSTAT_FILE_FORMAT_ID)
  	{
  		ereport(pgStatRunningInCollector ? LOG : WARNING,
  				(errmsg("corrupted statistics file \"%s\"", statfile)));
--- 4237,4244 ----
  	/*
  	 * Verify it's of the expected format.
  	 */
! 	if (fread(&format_id, 1, sizeof(format_id), fpin) != sizeof(format_id) ||
! 		format_id != PGSTAT_FILE_FORMAT_ID)
  	{
  		ereport(pgStatRunningInCollector ? LOG : WARNING,
  				(errmsg("corrupted statistics file \"%s\"", statfile)));
***************
*** 3903,3909 **** pgstat_read_statsfile_timestamp(bool permanent, TimestampTz *ts)
  	/*
  	 * Read global stats struct
  	 */
! 	if (fread(&myGlobalStats, 1, sizeof(myGlobalStats), fpin) != sizeof(myGlobalStats))
  	{
  		ereport(pgStatRunningInCollector ? LOG : WARNING,
  				(errmsg("corrupted statistics file \"%s\"", statfile)));
--- 4249,4255 ----
  	/*
  	 * Read global stats struct
  	 */
! 	if (fread(&timestamp, 1, sizeof(TimestampTz), fpin) != sizeof(TimestampTz))
  	{
  		ereport(pgStatRunningInCollector ? LOG : WARNING,
  				(errmsg("corrupted statistics file \"%s\"", statfile)));
***************
*** 3911,3917 **** pgstat_read_statsfile_timestamp(bool permanent, TimestampTz *ts)
  		return false;
  	}
  
! 	*ts = myGlobalStats.stats_timestamp;
  
  	FreeFile(fpin);
  	return true;
--- 4257,4263 ----
  		return false;
  	}
  
! 	*ts = timestamp;
  
  	FreeFile(fpin);
  	return true;
***************
*** 3947,3953 **** backend_read_statsfile(void)
  
  		CHECK_FOR_INTERRUPTS();
  
! 		ok = pgstat_read_statsfile_timestamp(false, &file_ts);
  
  		cur_ts = GetCurrentTimestamp();
  		/* Calculate min acceptable timestamp, if we didn't already */
--- 4293,4299 ----
  
  		CHECK_FOR_INTERRUPTS();
  
! 		ok = pgstat_read_db_statsfile_timestamp(MyDatabaseId, false, &file_ts);
  
  		cur_ts = GetCurrentTimestamp();
  		/* Calculate min acceptable timestamp, if we didn't already */
***************
*** 4006,4012 **** backend_read_statsfile(void)
  				pfree(mytime);
  			}
  
! 			pgstat_send_inquiry(cur_ts, min_ts);
  			break;
  		}
  
--- 4352,4358 ----
  				pfree(mytime);
  			}
  
! 			pgstat_send_inquiry(cur_ts, min_ts, MyDatabaseId);
  			break;
  		}
  
***************
*** 4016,4022 **** backend_read_statsfile(void)
  
  		/* Not there or too old, so kick the collector and wait a bit */
  		if ((count % PGSTAT_INQ_LOOP_COUNT) == 0)
! 			pgstat_send_inquiry(cur_ts, min_ts);
  
  		pg_usleep(PGSTAT_RETRY_DELAY * 1000L);
  	}
--- 4362,4368 ----
  
  		/* Not there or too old, so kick the collector and wait a bit */
  		if ((count % PGSTAT_INQ_LOOP_COUNT) == 0)
! 			pgstat_send_inquiry(cur_ts, min_ts, MyDatabaseId);
  
  		pg_usleep(PGSTAT_RETRY_DELAY * 1000L);
  	}
***************
*** 4024,4034 **** backend_read_statsfile(void)
  	if (count >= PGSTAT_POLL_LOOP_COUNT)
  		elog(WARNING, "pgstat wait timeout");
  
! 	/* Autovacuum launcher wants stats about all databases */
  	if (IsAutoVacuumLauncherProcess())
! 		pgStatDBHash = pgstat_read_statsfile(InvalidOid, false);
  	else
! 		pgStatDBHash = pgstat_read_statsfile(MyDatabaseId, false);
  }
  
  
--- 4370,4383 ----
  	if (count >= PGSTAT_POLL_LOOP_COUNT)
  		elog(WARNING, "pgstat wait timeout");
  
! 	/*
! 	 * Autovacuum launcher wants stats about all databases, but a shallow
! 	 * read is sufficient.
! 	 */
  	if (IsAutoVacuumLauncherProcess())
! 		pgStatDBHash = pgstat_read_statsfile(InvalidOid, false, false);
  	else
! 		pgStatDBHash = pgstat_read_statsfile(MyDatabaseId, false, true);
  }
  
  
***************
*** 4084,4109 **** pgstat_clear_snapshot(void)
  static void
  pgstat_recv_inquiry(PgStat_MsgInquiry *msg, int len)
  {
  	/*
! 	 * Advance last_statrequest if this requestor has a newer cutoff time
! 	 * than any previous request.
  	 */
! 	if (msg->cutoff_time > last_statrequest)
! 		last_statrequest = msg->cutoff_time;
  
  	/*
! 	 * If the requestor's local clock time is older than last_statwrite, we
  	 * should suspect a clock glitch, ie system time going backwards; though
  	 * the more likely explanation is just delayed message receipt.  It is
  	 * worth expending a GetCurrentTimestamp call to be sure, since a large
  	 * retreat in the system clock reading could otherwise cause us to neglect
  	 * to update the stats file for a long time.
  	 */
! 	if (msg->clock_time < last_statwrite)
  	{
  		TimestampTz cur_ts = GetCurrentTimestamp();
  
! 		if (cur_ts < last_statwrite)
  		{
  			/*
  			 * Sure enough, time went backwards.  Force a new stats file write
--- 4433,4485 ----
  static void
  pgstat_recv_inquiry(PgStat_MsgInquiry *msg, int len)
  {
+ 	slist_iter	iter;
+ 	bool		found = false;
+ 	DBWriteRequest *newreq;
+ 	PgStat_StatDBEntry *dbentry;
+ 
+ 	elog(DEBUG1, "received inquiry for %d", msg->databaseid);
+ 
+ 	/*
+ 	 * Find the last write request for this DB (found=true in that case). Plain
+ 	 * linear search, not really worth doing any magic here (probably).
+ 	 */
+ 	slist_foreach(iter, &last_statrequests)
+ 	{
+ 		DBWriteRequest *req = slist_container(DBWriteRequest, next, iter.cur);
+ 
+ 		if (req->databaseid != msg->databaseid)
+ 			continue;
+ 
+ 		if (msg->cutoff_time > req->request_time)
+ 			req->request_time = msg->cutoff_time;
+ 		found = true;
+ 		return;
+ 	}
+ 
  	/*
! 	 * There's no request for this DB yet, so create one.
  	 */
! 	newreq = palloc(sizeof(DBWriteRequest));
! 
! 	newreq->databaseid = msg->databaseid;
! 	newreq->request_time = msg->clock_time;
! 	slist_push_head(&last_statrequests, &newreq->next);
  
  	/*
! 	 * If the requestor's local clock time is older than stats_timestamp, we
  	 * should suspect a clock glitch, ie system time going backwards; though
  	 * the more likely explanation is just delayed message receipt.  It is
  	 * worth expending a GetCurrentTimestamp call to be sure, since a large
  	 * retreat in the system clock reading could otherwise cause us to neglect
  	 * to update the stats file for a long time.
  	 */
! 	dbentry = pgstat_get_db_entry(msg->databaseid, false);
! 	if ((dbentry != NULL) && (msg->clock_time < dbentry->stats_timestamp))
  	{
  		TimestampTz cur_ts = GetCurrentTimestamp();
  
! 		if (cur_ts < dbentry->stats_timestamp)
  		{
  			/*
  			 * Sure enough, time went backwards.  Force a new stats file write
***************
*** 4113,4127 **** pgstat_recv_inquiry(PgStat_MsgInquiry *msg, int len)
  			char	   *mytime;
  
  			/* Copy because timestamptz_to_str returns a static buffer */
! 			writetime = pstrdup(timestamptz_to_str(last_statwrite));
  			mytime = pstrdup(timestamptz_to_str(cur_ts));
! 			elog(LOG, "last_statwrite %s is later than collector's time %s",
! 				 writetime, mytime);
  			pfree(writetime);
  			pfree(mytime);
  
! 			last_statrequest = cur_ts;
! 			last_statwrite = last_statrequest - 1;
  		}
  	}
  }
--- 4489,4504 ----
  			char	   *mytime;
  
  			/* Copy because timestamptz_to_str returns a static buffer */
! 			writetime = pstrdup(timestamptz_to_str(dbentry->stats_timestamp));
  			mytime = pstrdup(timestamptz_to_str(cur_ts));
! 			elog(LOG,
! 				 "stats_timestamp %s is later than collector's time %s for db %d",
! 				 writetime, mytime, dbentry->databaseid);
  			pfree(writetime);
  			pfree(mytime);
  
! 			newreq->request_time = cur_ts;
! 			dbentry->stats_timestamp = cur_ts - 1;
  		}
  	}
  }
***************
*** 4270,4298 **** pgstat_recv_tabpurge(PgStat_MsgTabpurge *msg, int len)
  static void
  pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len)
  {
  	PgStat_StatDBEntry *dbentry;
  
  	/*
  	 * Lookup the database in the hashtable.
  	 */
! 	dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
  
  	/*
! 	 * If found, remove it.
  	 */
  	if (dbentry)
  	{
  		if (dbentry->tables != NULL)
  			hash_destroy(dbentry->tables);
  		if (dbentry->functions != NULL)
  			hash_destroy(dbentry->functions);
  
  		if (hash_search(pgStatDBHash,
! 						(void *) &(dbentry->databaseid),
  						HASH_REMOVE, NULL) == NULL)
  			ereport(ERROR,
! 					(errmsg("database hash table corrupted "
! 							"during cleanup --- abort")));
  	}
  }
  
--- 4647,4682 ----
  static void
  pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len)
  {
+ 	Oid			dbid = msg->m_databaseid;
  	PgStat_StatDBEntry *dbentry;
  
  	/*
  	 * Lookup the database in the hashtable.
  	 */
! 	dbentry = pgstat_get_db_entry(dbid, false);
  
  	/*
! 	 * If found, remove it (along with the db statfile).
  	 */
  	if (dbentry)
  	{
+ 		char		statfile[MAXPGPATH];
+ 
+ 		get_dbstat_filename(true, false, dbid, statfile, MAXPGPATH);
+ 
+ 		elog(DEBUG1, "removing %s", statfile);
+ 		unlink(statfile);
+ 
  		if (dbentry->tables != NULL)
  			hash_destroy(dbentry->tables);
  		if (dbentry->functions != NULL)
  			hash_destroy(dbentry->functions);
  
  		if (hash_search(pgStatDBHash,
! 						(void *) &dbid,
  						HASH_REMOVE, NULL) == NULL)
  			ereport(ERROR,
! 					(errmsg("database hash table corrupted during cleanup --- abort")));
  	}
  }
  
***************
*** 4687,4689 **** pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len)
--- 5071,5131 ----
  						   HASH_REMOVE, NULL);
  	}
  }
+ 
+ /* ----------
+  * pgstat_write_statsfile_needed() -
+  *
+  *	Checks whether there's a db stats request, requiring a file write.
+  *
+  *	TODO Seems that thanks the way we handle last_statrequests (erase after
+  *	a write), this is unnecessary. Just check that there's at least one
+  *	request and you're done. Although there might be delayed requests ...
+  * ----------
+  */
+ static bool
+ pgstat_write_statsfile_needed(void)
+ {
+ 	PgStat_StatDBEntry *dbentry;
+ 	slist_iter	iter;
+ 
+ 	/* Check the databases if they need to refresh the stats. */
+ 	slist_foreach(iter, &last_statrequests)
+ 	{
+ 		DBWriteRequest *req = slist_container(DBWriteRequest, next, iter.cur);
+ 
+ 		dbentry = pgstat_get_db_entry(req->databaseid, false);
+ 
+ 		/* No dbentry yet or too old. */
+ 		if (!dbentry || (dbentry->stats_timestamp < req->request_time))
+ 		{
+ 			return true;
+ 		}
+ 	}
+ 
+ 	/* Well, everything was written recently ... */
+ 	return false;
+ }
+ 
+ /* ----------
+  * pgstat_db_requested() -
+  *
+  *	Checks whether stats for a particular DB need to be written to a file.
+  * ----------
+  */
+ 
+ static bool
+ pgstat_db_requested(Oid databaseid)
+ {
+ 	slist_iter	iter;
+ 
+ 	/* Check the databases if they need to refresh the stats. */
+ 	slist_foreach(iter, &last_statrequests)
+ 	{
+ 		DBWriteRequest	*req = slist_container(DBWriteRequest, next, iter.cur);
+ 
+ 		if (req->databaseid == databaseid)
+ 			return true;
+ 	}
+ 
+ 	return false;
+ }
*** a/src/backend/utils/misc/guc.c
--- b/src/backend/utils/misc/guc.c
***************
*** 8704,8717 **** static void
  assign_pgstat_temp_directory(const char *newval, void *extra)
  {
  	/* check_canonical_path already canonicalized newval for us */
  	char	   *tname;
  	char	   *fname;
  
! 	tname = guc_malloc(ERROR, strlen(newval) + 12);		/* /pgstat.tmp */
! 	sprintf(tname, "%s/pgstat.tmp", newval);
! 	fname = guc_malloc(ERROR, strlen(newval) + 13);		/* /pgstat.stat */
! 	sprintf(fname, "%s/pgstat.stat", newval);
  
  	if (pgstat_stat_tmpname)
  		free(pgstat_stat_tmpname);
  	pgstat_stat_tmpname = tname;
--- 8704,8728 ----
  assign_pgstat_temp_directory(const char *newval, void *extra)
  {
  	/* check_canonical_path already canonicalized newval for us */
+ 	char	   *dname;
  	char	   *tname;
  	char	   *fname;
  
! 	/* directory */
! 	dname = guc_malloc(ERROR, strlen(newval) + 1);		/* runtime dir */
! 	sprintf(dname, "%s", newval);
  
+ 	/* global stats */
+ 	tname = guc_malloc(ERROR, strlen(newval) + 12);		/* /global.tmp */
+ 	sprintf(tname, "%s/global.tmp", newval);
+ 	fname = guc_malloc(ERROR, strlen(newval) + 13);		/* /global.stat */
+ 	sprintf(fname, "%s/global.stat", newval);
+ 
+ 	if (pgstat_stat_directory)
+ 		free(pgstat_stat_directory);
+ 	pgstat_stat_directory = dname;
+ 	/* invalidate cached length in pgstat.c */
+ 	pgstat_stat_dbfile_maxlen = 0;
  	if (pgstat_stat_tmpname)
  		free(pgstat_stat_tmpname);
  	pgstat_stat_tmpname = tname;
*** a/src/bin/initdb/initdb.c
--- b/src/bin/initdb/initdb.c
***************
*** 192,197 **** const char *subdirs[] = {
--- 192,198 ----
  	"base",
  	"base/1",
  	"pg_tblspc",
+ 	"pg_stat",
  	"pg_stat_tmp"
  };
  
*** a/src/include/pgstat.h
--- b/src/include/pgstat.h
***************
*** 205,210 **** typedef struct PgStat_MsgInquiry
--- 205,211 ----
  	PgStat_MsgHdr m_hdr;
  	TimestampTz clock_time;		/* observed local clock time */
  	TimestampTz cutoff_time;	/* minimum acceptable file timestamp */
+ 	Oid			databaseid;		/* requested DB (InvalidOid => all DBs) */
  } PgStat_MsgInquiry;
  
  
***************
*** 514,520 **** typedef union PgStat_Msg
   * ------------------------------------------------------------
   */
  
! #define PGSTAT_FILE_FORMAT_ID	0x01A5BC9A
  
  /* ----------
   * PgStat_StatDBEntry			The collector's data per database
--- 515,521 ----
   * ------------------------------------------------------------
   */
  
! #define PGSTAT_FILE_FORMAT_ID	0xA240CA47
  
  /* ----------
   * PgStat_StatDBEntry			The collector's data per database
***************
*** 545,550 **** typedef struct PgStat_StatDBEntry
--- 546,552 ----
  	PgStat_Counter n_block_write_time;
  
  	TimestampTz stat_reset_timestamp;
+ 	TimestampTz stats_timestamp;		/* time of db stats file update */
  
  	/*
  	 * tables and functions must be last in the struct, because we don't write
***************
*** 722,727 **** extern bool pgstat_track_activities;
--- 724,731 ----
  extern bool pgstat_track_counts;
  extern int	pgstat_track_functions;
  extern PGDLLIMPORT int pgstat_track_activity_query_size;
+ extern char *pgstat_stat_directory;
+ extern int	pgstat_stat_dbfile_maxlen;
  extern char *pgstat_stat_tmpname;
  extern char *pgstat_stat_filename;
  
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to