Hi,

At Tue, 19 Sep 2017 16:55:38 -0700, Peter Geoghegan <p...@bowt.ie> wrote in 
<cah2-wzn0-3zxgrp_qp1oaexy7h1w0-w_vcfo0ndv0k_+kab...@mail.gmail.com>
> On Tue, Sep 19, 2017 at 4:47 PM, Claudio Freire <klaussfre...@gmail.com> 
> wrote:
> > Maybe this is looking at the problem from the wrong direction.
> >
> > Why can't the page be added to the FSM immediately and the check be
> > done at runtime when looking for a reusable page?
> >
> > Index FSMs currently store only 0 or 255, couldn't they store 128 for
> > half-recyclable pages and make the caller re-check reusability before
> > using it?
> 
> No, because it's impossible for them to know whether or not the page
> that their index scan just landed on recycled just a second ago, or
> was like this since before their xact began/snapshot was acquired.
> 
> For your reference, this RecentGlobalXmin interlock stuff is what
> Lanin & Shasha call "The Drain Technique" within "2.5 Freeing Empty
> Nodes". Seems pretty hard to do it any other way.

Anyway(:p) the attached first patch is a PoC for the
cleanup-state-in-stats method works only for btree. Some
LOG-level debugging messages are put in the patch to show how it
works.

The following steps makes a not-recyclable page but I'm not sure
it is general enough, and I couldn't generate half-dead pages.
The pg_sleep() in the following steps is inserted in order to see
the updated values in stats.


DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (a int);
CREATE INDEX ON t1 (a);
INSERT INTO t1 (SELECT a FROM generate_series(0, 800000) a);
DELETE FROM t1 WHERE a > 416700 AND a < 417250;
VACUUM t1;
DELETE FROM t1;
VACUUM t1;  -- 1 (or wait for autovacuum)
select pg_sleep(1);
VACUUM t1;  -- 2 (autovacuum doesn't work)
select pg_sleep(1);
VACUUM t1;  -- 3 (ditto)


The following logs are emited while the three VACUUMs are issued.

# VACUUM t1;  -- 1 (or wait for autovacuum)
 LOG:  btvacuumscan(t1_a_idx) result: deleted = 2185, notrecyclable = 1, 
hafldead = 0, no_cleanup_needed = false
 LOG:  Vacuum cleanup of index t1_a_idx is NOT skipped
 LOG:  btvacuumcleanup on index t1_a_idx is skipped since bulkdelete has run 
just before.
# VACUUM t1;  -- 2
 LOG:  Vacuum cleanup of index t1_a_idx is NOT skipped
 LOG:  btvacuumscan(t1_a_idx) result: deleted = 2192, notrecyclable = 0, 
hafldead = 0, no_cleanup_needed = true
# VACUUM t1;  -- 3
 LOG:  Vacuum cleanup of index t1_a_idx is skipped


VACUUM #1 leaves a unrecyclable page and requests the next cleanup.
VACUUM #2 leaves no unrecyclable page and inhibits the next cleanup.
VACUUM #3 (and ever after) no vacuum cleanup executed.

# I suppose it is a known issue that the cleanup cycles are not
# executed automatically unless new dead tuples are generated.

- Getting stats takes a very long time to fail during
  initdb. Since I couldn't find the right way to cope with this,
  I added a tentative function pgstat_live(), which checks that
  the backend has a valid stats socket.

- The patch calls pg_stat_get_vac_cleanup_needed using
  DirectFunctionCall. It might be better be wrapped.


As a byproduct, this enables us to run extra autovacuum rounds fo
r index cleanup. With the second attached, autovacuum works as
follows.

DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (a int);
CREATE INDEX ON t1 (a);
INSERT INTO t1 (SELECT a FROM generate_series(0, 800000) a);
DELETE FROM t1 WHERE a > 416700 AND a < 417250;
(autovacuum on t1 runs)
> LOG:  btvacuumscan(t1_a_idx) result: deleted = 0, notrecyclable = 0, hafldead 
> = 0, no_cleanup_needed = true
> LOG:  Vacuum cleanup of index t1_a_idx is skipped
> LOG:  automatic vacuum of table "postgres.public.t1": index scans: 1
DELETE FROM t1;
(autovacuum on t1 runs)
> LOG:  btvacuumscan(t1_a_idx) result: deleted = 2185, notrecyclable = 1, 
> hafldead = 0, no_cleanup_needed = false
> LOG:  Vacuum cleanup of index t1_a_idx is NOT skipped
> LOG:  btvacuumcleanup on index t1_a_idx is skipped since bulkdelete has run 
> just before.
> LOG:  automatic vacuum of table "postgres.public.t1": index scans: 1
(cleanup vacuum runs for t1 in the next autovac timing)
> LOG:  Vacuum cleanup of index t1_a_idx is NOT skipped
> LOG:  btvacuumscan(t1_a_idx) result: deleted = 2192, notrecyclable = 0, 
> hafldead = 0, no_cleanup_needed = true
> LOG:  automatic vacuum of table "postgres.public.t1": index scans: 0


Any suggestions are welcome.

regards,

-- 
Kyotaro Horiguchi
NTT Open Source Software Center
*** a/src/backend/access/nbtree/nbtpage.c
--- b/src/backend/access/nbtree/nbtpage.c
***************
*** 1110,1116 **** _bt_pagedel(Relation rel, Buffer buf)
  {
  	int			ndeleted = 0;
  	BlockNumber rightsib;
! 	bool		rightsib_empty;
  	Page		page;
  	BTPageOpaque opaque;
  
--- 1110,1116 ----
  {
  	int			ndeleted = 0;
  	BlockNumber rightsib;
! 	bool		rightsib_empty = false;
  	Page		page;
  	BTPageOpaque opaque;
  
*** a/src/backend/access/nbtree/nbtree.c
--- b/src/backend/access/nbtree/nbtree.c
***************
*** 63,68 **** typedef struct
--- 63,70 ----
  	BlockNumber lastBlockLocked;	/* highest blkno we've cleanup-locked */
  	BlockNumber totFreePages;	/* true total # of free pages */
  	MemoryContext pagedelcontext;
+ 	uint32		pages_notrecyclable;	/* # of not-yet-recyclable pages */
+ 	uint32		pages_halfdead;		/* # of half-dead pages */
  } BTVacState;
  
  /*
***************
*** 945,950 **** btbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
--- 947,954 ----
  IndexBulkDeleteResult *
  btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
  {
+ 	extern char *get_rel_name(Oid);
+ 
  	/* No-op in ANALYZE ONLY mode */
  	if (info->analyze_only)
  		return stats;
***************
*** 963,968 **** btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
--- 967,977 ----
  		stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
  		btvacuumscan(info, stats, NULL, NULL, 0);
  	}
+ 	else
+ 		ereport(LOG,
+ 				(errmsg ("btvacuumcleanup on index %s is skipped since bulkdelete has run just before.",
+ 						 get_rel_name(info->index->rd_id)),
+ 				 errhidestmt (true)));
  
  	/* Finally, vacuum the FSM */
  	IndexFreeSpaceMapVacuum(info->index);
***************
*** 1004,1009 **** btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
--- 1013,1019 ----
  	BlockNumber num_pages;
  	BlockNumber blkno;
  	bool		needLock;
+ 	extern char *get_rel_name(Oid);
  
  	/*
  	 * Reset counts that will be incremented during the scan; needed in case
***************
*** 1022,1027 **** btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
--- 1032,1039 ----
  	vstate.lastBlockVacuumed = BTREE_METAPAGE;	/* Initialise at first block */
  	vstate.lastBlockLocked = BTREE_METAPAGE;
  	vstate.totFreePages = 0;
+ 	vstate.pages_notrecyclable = 0;
+ 	vstate.pages_halfdead = 0;
  
  	/* Create a temporary memory context to run _bt_pagedel in */
  	vstate.pagedelcontext = AllocSetContextCreate(CurrentMemoryContext,
***************
*** 1111,1116 **** btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
--- 1123,1139 ----
  	/* update statistics */
  	stats->num_pages = num_pages;
  	stats->pages_free = vstate.totFreePages;
+ 
+ 	/* check if we need no further clenaup */
+ 	if (vstate.pages_notrecyclable == 0 && vstate.pages_halfdead == 0)
+ 		stats->no_cleanup_needed = true;
+ 
+ 	ereport(LOG,
+ 			(errmsg ("btvacuumscan(%s) result: deleted = %d, notrecyclable = %d, hafldead = %d, no_cleanup_needed = %s",
+ 					 get_rel_name(rel->rd_id), stats->pages_deleted,
+ 					 vstate.pages_notrecyclable, vstate.pages_halfdead,
+ 					 stats->no_cleanup_needed ? "true":"false"),
+ 			 errhidestmt(true)));
  }
  
  /*
***************
*** 1190,1195 **** restart:
--- 1213,1219 ----
  	{
  		/* Already deleted, but can't recycle yet */
  		stats->pages_deleted++;
+ 		vstate->pages_notrecyclable++;
  	}
  	else if (P_ISHALFDEAD(opaque))
  	{
***************
*** 1359,1364 **** restart:
--- 1383,1390 ----
  		/* count only this page, else may double-count parent */
  		if (ndel)
  			stats->pages_deleted++;
+ 		else if (P_ISHALFDEAD(opaque))
+ 			vstate->pages_halfdead++;	/* Still half-dead */
  
  		MemoryContextSwitchTo(oldcontext);
  		/* pagedel released buffer, so we shouldn't */
*** a/src/backend/commands/vacuumlazy.c
--- b/src/backend/commands/vacuumlazy.c
***************
*** 56,61 ****
--- 56,62 ----
  #include "storage/bufmgr.h"
  #include "storage/freespace.h"
  #include "storage/lmgr.h"
+ #include "utils/fmgrprotos.h"
  #include "utils/lsyscache.h"
  #include "utils/memutils.h"
  #include "utils/pg_rusage.h"
***************
*** 129,134 **** typedef struct LVRelStats
--- 130,137 ----
  	int			num_index_scans;
  	TransactionId latestRemovedXid;
  	bool		lock_waiter_detected;
+ 	int			num_index_stats;
+ 	PgStat_MsgVacuum_indstate *indstats;
  } LVRelStats;
  
  
***************
*** 152,158 **** static void lazy_vacuum_index(Relation indrel,
  				  IndexBulkDeleteResult **stats,
  				  LVRelStats *vacrelstats);
  static void lazy_cleanup_index(Relation indrel,
! 				   IndexBulkDeleteResult *stats,
  				   LVRelStats *vacrelstats);
  static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
  				 int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer);
--- 155,161 ----
  				  IndexBulkDeleteResult **stats,
  				  LVRelStats *vacrelstats);
  static void lazy_cleanup_index(Relation indrel,
! 				   IndexBulkDeleteResult **stats,
  				   LVRelStats *vacrelstats);
  static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
  				 int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer);
***************
*** 342,348 **** lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
  	pgstat_report_vacuum(RelationGetRelid(onerel),
  						 onerel->rd_rel->relisshared,
  						 new_live_tuples,
! 						 vacrelstats->new_dead_tuples);
  	pgstat_progress_end_command();
  
  	/* and log the action if appropriate */
--- 345,352 ----
  	pgstat_report_vacuum(RelationGetRelid(onerel),
  						 onerel->rd_rel->relisshared,
  						 new_live_tuples,
! 						 vacrelstats->new_dead_tuples,
! 						 vacrelstats->num_index_stats, vacrelstats->indstats);
  	pgstat_progress_end_command();
  
  	/* and log the action if appropriate */
***************
*** 496,501 **** lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
--- 500,508 ----
  
  	indstats = (IndexBulkDeleteResult **)
  		palloc0(nindexes * sizeof(IndexBulkDeleteResult *));
+ 	vacrelstats->num_index_stats = nindexes;
+ 	vacrelstats->indstats = (PgStat_MsgVacuum_indstate *)
+ 		palloc0(nindexes * MAXALIGN(sizeof(PgStat_MsgVacuum_indstate)));
  
  	nblocks = RelationGetNumberOfBlocks(onerel);
  	vacrelstats->rel_pages = nblocks;
***************
*** 1320,1326 **** lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
  
  	/* Do post-vacuum cleanup and statistics update for each index */
  	for (i = 0; i < nindexes; i++)
! 		lazy_cleanup_index(Irel[i], indstats[i], vacrelstats);
  
  	/* If no indexes, make log report that lazy_vacuum_heap would've made */
  	if (vacuumed_pages)
--- 1327,1344 ----
  
  	/* Do post-vacuum cleanup and statistics update for each index */
  	for (i = 0; i < nindexes; i++)
! 	{
! 		lazy_cleanup_index(Irel[i], &indstats[i], vacrelstats);
! 
! 		/* update stats if indstats exists */
! 		if (indstats[i])
! 		{
! 			/* prepare to record the result to stats */
! 			vacrelstats->indstats[i].indexoid = Irel[i]->rd_id;
! 			vacrelstats->indstats[i].vac_cleanup_needed =
! 				!(indstats[i] && indstats[i]->no_cleanup_needed);
! 		}
! 	}
  
  	/* If no indexes, make log report that lazy_vacuum_heap would've made */
  	if (vacuumed_pages)
***************
*** 1622,1632 **** lazy_vacuum_index(Relation indrel,
   */
  static void
  lazy_cleanup_index(Relation indrel,
! 				   IndexBulkDeleteResult *stats,
  				   LVRelStats *vacrelstats)
  {
  	IndexVacuumInfo ivinfo;
  	PGRUsage	ru0;
  
  	pg_rusage_init(&ru0);
  
--- 1640,1652 ----
   */
  static void
  lazy_cleanup_index(Relation indrel,
! 				   IndexBulkDeleteResult **stats,
  				   LVRelStats *vacrelstats)
  {
  	IndexVacuumInfo ivinfo;
  	PGRUsage	ru0;
+ 	bool		run_cleanup = true;
+ 	extern char *get_rel_name(Oid);
  
  	pg_rusage_init(&ru0);
  
***************
*** 1637,1655 **** lazy_cleanup_index(Relation indrel,
  	ivinfo.num_heap_tuples = vacrelstats->new_rel_tuples;
  	ivinfo.strategy = vac_strategy;
  
! 	stats = index_vacuum_cleanup(&ivinfo, stats);
  
! 	if (!stats)
  		return;
  
  	/*
  	 * Now update statistics in pg_class, but only if the index says the count
  	 * is accurate.
  	 */
! 	if (!stats->estimated_count)
  		vac_update_relstats(indrel,
! 							stats->num_pages,
! 							stats->num_index_tuples,
  							0,
  							false,
  							InvalidTransactionId,
--- 1657,1696 ----
  	ivinfo.num_heap_tuples = vacrelstats->new_rel_tuples;
  	ivinfo.strategy = vac_strategy;
  
! 	/*
! 	 * If lazy_vacuum_index tells me that no cleanup is required, or stats
! 	 * tells so, skip cleanup.
! 	 */
! 	if (*stats)
! 	{
! 		if ((*stats)->no_cleanup_needed)
! 			run_cleanup =false;
! 	}
! 	else
! 		run_cleanup = DatumGetBool(
! 			DirectFunctionCall1(pg_stat_get_vac_cleanup_needed,
! 								ObjectIdGetDatum(indrel->rd_id)));
  
! 	ereport(LOG,
! 			(errmsg ("Vacuum cleanup of index %s is %sskipped",
! 					 get_rel_name(indrel->rd_id),
! 					 run_cleanup ? "NOT ": ""),
! 			 errhidestmt (true)));
! 
! 	if (run_cleanup)
! 		*stats = index_vacuum_cleanup(&ivinfo, *stats);
! 
! 	if (!*stats)
  		return;
  
  	/*
  	 * Now update statistics in pg_class, but only if the index says the count
  	 * is accurate.
  	 */
! 	if (!(*stats)->estimated_count)
  		vac_update_relstats(indrel,
! 							(*stats)->num_pages,
! 							(*stats)->num_index_tuples,
  							0,
  							false,
  							InvalidTransactionId,
***************
*** 1659,1674 **** lazy_cleanup_index(Relation indrel,
  	ereport(elevel,
  			(errmsg("index \"%s\" now contains %.0f row versions in %u pages",
  					RelationGetRelationName(indrel),
! 					stats->num_index_tuples,
! 					stats->num_pages),
  			 errdetail("%.0f index row versions were removed.\n"
  					   "%u index pages have been deleted, %u are currently reusable.\n"
  					   "%s.",
! 					   stats->tuples_removed,
! 					   stats->pages_deleted, stats->pages_free,
  					   pg_rusage_show(&ru0))));
- 
- 	pfree(stats);
  }
  
  /*
--- 1700,1713 ----
  	ereport(elevel,
  			(errmsg("index \"%s\" now contains %.0f row versions in %u pages",
  					RelationGetRelationName(indrel),
! 					(*stats)->num_index_tuples,
! 					(*stats)->num_pages),
  			 errdetail("%.0f index row versions were removed.\n"
  					   "%u index pages have been deleted, %u are currently reusable.\n"
  					   "%s.",
! 					   (*stats)->tuples_removed,
! 					   (*stats)->pages_deleted, (*stats)->pages_free,
  					   pg_rusage_show(&ru0))));
  }
  
  /*
*** a/src/backend/postmaster/pgstat.c
--- b/src/backend/postmaster/pgstat.c
***************
*** 1403,1423 **** pgstat_report_autovac(Oid dboid)
   */
  void
  pgstat_report_vacuum(Oid tableoid, bool shared,
! 					 PgStat_Counter livetuples, PgStat_Counter deadtuples)
  {
! 	PgStat_MsgVacuum msg;
  
  	if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
  		return;
  
! 	pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_VACUUM);
! 	msg.m_databaseid = shared ? InvalidOid : MyDatabaseId;
! 	msg.m_tableoid = tableoid;
! 	msg.m_autovacuum = IsAutoVacuumWorkerProcess();
! 	msg.m_vacuumtime = GetCurrentTimestamp();
! 	msg.m_live_tuples = livetuples;
! 	msg.m_dead_tuples = deadtuples;
! 	pgstat_send(&msg, sizeof(msg));
  }
  
  /* --------
--- 1403,1437 ----
   */
  void
  pgstat_report_vacuum(Oid tableoid, bool shared,
! 					 PgStat_Counter livetuples, PgStat_Counter deadtuples,
! 					 int nindstats, PgStat_MsgVacuum_indstate *stats)
  {
! 	PgStat_MsgVacuum *msg;
! 	int i;
! 	int msgsize;
  
  	if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
  		return;
+ 	msgsize = offsetof(PgStat_MsgVacuum, m_indvacstates) +
+ 		MAXALIGN(sizeof(PgStat_MsgVacuum_indstate)) * nindstats;
  
! 	msg = (PgStat_MsgVacuum *) palloc(msgsize);
! 	pgstat_setheader(&msg->m_hdr, PGSTAT_MTYPE_VACUUM);
! 	msg->m_databaseid = shared ? InvalidOid : MyDatabaseId;
! 	msg->m_tableoid = tableoid;
! 	msg->m_autovacuum = IsAutoVacuumWorkerProcess();
! 	msg->m_vacuumtime = GetCurrentTimestamp();
! 	msg->m_live_tuples = livetuples;
! 	msg->m_dead_tuples = deadtuples;
! 	msg->m_n_indvac_states = nindstats;
! 
! 	for (i = 0 ; i < nindstats ; i++)
! 	{
! 		msg->m_indvacstates[i].indexoid = stats[i].indexoid;
! 		msg->m_indvacstates[i].vac_cleanup_needed = stats[i].vac_cleanup_needed;
! 	}
! 
! 	pgstat_send(msg, msgsize);
  }
  
  /* --------
***************
*** 1535,1541 **** pgstat_report_tempfile(size_t filesize)
  	pgstat_send(&msg, sizeof(msg));
  }
  
! 
  /* ----------
   * pgstat_ping() -
   *
--- 1549,1561 ----
  	pgstat_send(&msg, sizeof(msg));
  }
  
! bool
! pgstat_live(void)
! {
! 	if (pgStatSock == PGINVALID_SOCKET)
! 		return false;
! 	return true;
! }
  /* ----------
   * pgstat_ping() -
   *
***************
*** 4587,4592 **** pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry, Oid tableoid, bool create)
--- 4607,4613 ----
  		result->analyze_count = 0;
  		result->autovac_analyze_timestamp = 0;
  		result->autovac_analyze_count = 0;
+ 		result->needs_vacuum_cleanup = true;
  	}
  
  	return result;
***************
*** 5718,5723 **** pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
--- 5739,5745 ----
  			tabentry->analyze_count = 0;
  			tabentry->autovac_analyze_timestamp = 0;
  			tabentry->autovac_analyze_count = 0;
+ 			tabentry->needs_vacuum_cleanup = true;
  		}
  		else
  		{
***************
*** 5963,5968 **** pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len)
--- 5985,5991 ----
  {
  	PgStat_StatDBEntry *dbentry;
  	PgStat_StatTabEntry *tabentry;
+ 	int i;
  
  	/*
  	 * Store the data in the table's hashtable entry.
***************
*** 5984,5989 **** pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len)
--- 6007,6023 ----
  		tabentry->vacuum_timestamp = msg->m_vacuumtime;
  		tabentry->vacuum_count++;
  	}
+ 
+ 	/* store index vacuum stats */
+ 	for (i = 0 ; i < msg->m_n_indvac_states ; i++)
+ 	{
+ 		PgStat_StatTabEntry *indtabentry;
+ 		Oid indoid = msg->m_indvacstates[i].indexoid;
+ 		bool vac_cleanup_needed = msg->m_indvacstates[i].vac_cleanup_needed;
+ 
+ 		indtabentry = pgstat_get_tab_entry(dbentry, indoid, true);
+ 		indtabentry->needs_vacuum_cleanup = vac_cleanup_needed;
+ 	}
  }
  
  /* ----------
*** a/src/backend/utils/adt/pgstatfuncs.c
--- b/src/backend/utils/adt/pgstatfuncs.c
***************
*** 27,32 ****
--- 27,33 ----
  #include "utils/acl.h"
  #include "utils/builtins.h"
  #include "utils/inet.h"
+ #include "utils/syscache.h"
  #include "utils/timestamp.h"
  
  #define UINT32_ACCESS_ONCE(var)		 ((uint32)(*((volatile uint32 *)&(var))))
***************
*** 328,333 **** pg_stat_get_autovacuum_count(PG_FUNCTION_ARGS)
--- 329,366 ----
  }
  
  Datum
+ pg_stat_get_vac_cleanup_needed(PG_FUNCTION_ARGS)
+ {
+ 	Oid			relid = PG_GETARG_OID(0);
+ 	bool result;
+ 	PgStat_StatTabEntry *tabentry;
+ 	HeapTuple	reltup;
+ 	bool is_index = false;
+ 
+ 	if (!pgstat_live())
+ 		return true;
+ 
+ 	reltup = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
+ 	if (HeapTupleIsValid(reltup))
+ 	{
+ 		if (((Form_pg_class) GETSTRUCT(reltup))->relkind == RELKIND_INDEX)
+ 			is_index = true;
+ 
+ 		ReleaseSysCache(reltup);
+ 	}
+ 
+ 	if (!is_index)
+ 		PG_RETURN_NULL();
+ 
+ 	if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
+ 		result = true;
+ 	else
+ 		result = tabentry->needs_vacuum_cleanup;
+ 
+ 	PG_RETURN_BOOL(result);
+ }
+ 
+ Datum
  pg_stat_get_analyze_count(PG_FUNCTION_ARGS)
  {
  	Oid			relid = PG_GETARG_OID(0);
*** a/src/include/access/genam.h
--- b/src/include/access/genam.h
***************
*** 77,82 **** typedef struct IndexBulkDeleteResult
--- 77,83 ----
  	double		tuples_removed; /* # removed during vacuum operation */
  	BlockNumber pages_deleted;	/* # unused pages in index */
  	BlockNumber pages_free;		/* # pages available for reuse */
+ 	bool		no_cleanup_needed;  /* true if no cleanup needed  */
  } IndexBulkDeleteResult;
  
  /* Typedef for callback function to determine if a tuple is bulk-deletable */
*** a/src/include/access/nbtree.h
--- b/src/include/access/nbtree.h
***************
*** 416,421 **** typedef struct BTScanOpaqueData
--- 416,422 ----
  
  typedef BTScanOpaqueData *BTScanOpaque;
  
+ 
  /*
   * We use some private sk_flags bits in preprocessed scan keys.  We're allowed
   * to use bits 16-31 (see skey.h).  The uppermost bits are copied from the
*** a/src/include/catalog/pg_proc.h
--- b/src/include/catalog/pg_proc.h
***************
*** 2873,2878 **** DATA(insert OID = 3054 ( pg_stat_get_vacuum_count PGNSP PGUID 12 1 0 0 0 f f f f
--- 2873,2880 ----
  DESCR("statistics: number of manual vacuums for a table");
  DATA(insert OID = 3055 ( pg_stat_get_autovacuum_count PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_autovacuum_count _null_ _null_ _null_ ));
  DESCR("statistics: number of auto vacuums for a table");
+ DATA(insert OID = 3419 ( pg_stat_get_vac_cleanup_needed PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 16 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_vac_cleanup_needed _null_ _null_ _null_ ));
+ DESCR("statistics: whether vacuum on a relation requires cleanup");
  DATA(insert OID = 3056 ( pg_stat_get_analyze_count PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_analyze_count _null_ _null_ _null_ ));
  DESCR("statistics: number of manual analyzes for a table");
  DATA(insert OID = 3057 ( pg_stat_get_autoanalyze_count PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_autoanalyze_count _null_ _null_ _null_ ));
*** a/src/include/pgstat.h
--- b/src/include/pgstat.h
***************
*** 360,365 **** typedef struct PgStat_MsgAutovacStart
--- 360,372 ----
   *								after VACUUM
   * ----------
   */
+ typedef struct PgStat_MsgVacuum_indstate
+ {
+ 	Oid		indexoid;
+ 	bool	vac_cleanup_needed;
+ } PgStat_MsgVacuum_indstate;
+ 
+ 
  typedef struct PgStat_MsgVacuum
  {
  	PgStat_MsgHdr m_hdr;
***************
*** 369,377 **** typedef struct PgStat_MsgVacuum
  	TimestampTz m_vacuumtime;
  	PgStat_Counter m_live_tuples;
  	PgStat_Counter m_dead_tuples;
  } PgStat_MsgVacuum;
  
- 
  /* ----------
   * PgStat_MsgAnalyze			Sent by the backend or autovacuum daemon
   *								after ANALYZE
--- 376,385 ----
  	TimestampTz m_vacuumtime;
  	PgStat_Counter m_live_tuples;
  	PgStat_Counter m_dead_tuples;
+ 	int			m_n_indvac_states;
+ 	PgStat_MsgVacuum_indstate m_indvacstates[FLEXIBLE_ARRAY_MEMBER];
  } PgStat_MsgVacuum;
  
  /* ----------
   * PgStat_MsgAnalyze			Sent by the backend or autovacuum daemon
   *								after ANALYZE
***************
*** 641,646 **** typedef struct PgStat_StatTabEntry
--- 649,656 ----
  	PgStat_Counter analyze_count;
  	TimestampTz autovac_analyze_timestamp;	/* autovacuum initiated */
  	PgStat_Counter autovac_analyze_count;
+ 
+ 	bool		 needs_vacuum_cleanup;	/* This index needs vac cleanup */
  } PgStat_StatTabEntry;
  
  
***************
*** 1159,1166 **** extern void pgstat_reset_single_counter(Oid objectid, PgStat_Single_Reset_Type t
  
  extern void pgstat_report_autovac(Oid dboid);
  extern void pgstat_report_vacuum(Oid tableoid, bool shared,
! 					 PgStat_Counter livetuples, PgStat_Counter deadtuples);
! extern void pgstat_report_analyze(Relation rel,
  					  PgStat_Counter livetuples, PgStat_Counter deadtuples,
  					  bool resetcounter);
  
--- 1169,1177 ----
  
  extern void pgstat_report_autovac(Oid dboid);
  extern void pgstat_report_vacuum(Oid tableoid, bool shared,
! 					 PgStat_Counter livetuples, PgStat_Counter deadtuples,
! 					 int nindstats, PgStat_MsgVacuum_indstate *states);
! 	extern void pgstat_report_analyze(Relation rel,
  					  PgStat_Counter livetuples, PgStat_Counter deadtuples,
  					  bool resetcounter);
  
***************
*** 1172,1177 **** extern void pgstat_bestart(void);
--- 1183,1189 ----
  
  extern void pgstat_report_activity(BackendState state, const char *cmd_str);
  extern void pgstat_report_tempfile(size_t filesize);
+ extern bool pgstat_live(void);
  extern void pgstat_report_appname(const char *appname);
  extern void pgstat_report_xact_timestamp(TimestampTz tstamp);
  extern const char *pgstat_get_wait_event(uint32 wait_event_info);
*** a/src/backend/postmaster/autovacuum.c
--- b/src/backend/postmaster/autovacuum.c
***************
*** 2791,2796 **** table_recheck_autovac(Oid relid, HTAB *table_toast_map,
--- 2791,2803 ----
  							  effective_multixact_freeze_max_age,
  							  &dovacuum, &doanalyze, &wraparound);
  
+ 	/* force vacuum if any index on the rel is requesting cleanup scan */
+ 	if (!dovacuum)
+ 		dovacuum =
+ 			DatumGetBool(
+ 				DirectFunctionCall1(pg_stat_get_vac_cleanup_needed,
+ 									ObjectIdGetDatum(relid)));
+ 
  	/* ignore ANALYZE for toast tables */
  	if (classForm->relkind == RELKIND_TOASTVALUE)
  		doanalyze = false;
***************
*** 3045,3050 **** relation_needs_vacanalyze(Oid relid,
--- 3052,3064 ----
  		/* Determine if this table needs vacuum or analyze. */
  		*dovacuum = force_vacuum || (vactuples > vacthresh);
  		*doanalyze = (anltuples > anlthresh);
+ 
+ 		/* still force vacuum if index cleanup is requested */
+ 		if (!*dovacuum)
+ 			*dovacuum =
+ 				DatumGetBool(
+ 					DirectFunctionCall1(pg_stat_get_vac_cleanup_needed,
+ 										ObjectIdGetDatum(relid)));
  	}
  	else
  	{
*** a/src/backend/utils/adt/pgstatfuncs.c
--- b/src/backend/utils/adt/pgstatfuncs.c
***************
*** 349,361 **** pg_stat_get_vac_cleanup_needed(PG_FUNCTION_ARGS)
  		ReleaseSysCache(reltup);
  	}
  
  	if (!is_index)
! 		PG_RETURN_NULL();
  
! 	if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
! 		result = true;
  	else
! 		result = tabentry->needs_vacuum_cleanup;
  
  	PG_RETURN_BOOL(result);
  }
--- 349,393 ----
  		ReleaseSysCache(reltup);
  	}
  
+ 	/*
+ 	 * If normal relaion is specified, return true if any index of the
+ 	 * relation is explicitly requesting cleanup.
+ 	 */
  	if (!is_index)
! 	{
! 		Relation indrel;
! 		SysScanDesc indscan;
! 		HeapTuple indtup;
  
! 		result = false;
! 		indrel = heap_open(IndexRelationId, AccessShareLock);
! 		indscan = systable_beginscan(indrel, InvalidOid, false, NULL, 0, NULL);
! 		while (HeapTupleIsValid(indtup = systable_getnext(indscan)) &&
! 			   !result)
! 		{
! 			Form_pg_index ind = (Form_pg_index) GETSTRUCT(indtup);
! 
! 			if (ind->indrelid != relid)
! 				continue;
! 
! 			if ((tabentry = pgstat_fetch_stat_tabentry(ind->indexrelid)))
! 				result |= tabentry->needs_vacuum_cleanup;
! 		}
! 		systable_endscan(indscan);
! 		heap_close(indrel, AccessShareLock);
! 	}
  	else
! 	{
! 		/*
! 		 * Elsewise reutrn the status of the index. As somewhat inconsistent
! 		 * behavior with the normal relation case above, *true* is returned
! 		 * for indexes with no stats here.
! 		 */
! 		if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
! 			result = true;
! 		else
! 			result = tabentry->needs_vacuum_cleanup;
! 	}
  
  	PG_RETURN_BOOL(result);
  }
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to