Attached is a patch to implement the idea discussed here:

http://archives.postgresql.org/pgsql-hackers/2009-08/msg01137.php

If VACUUM freezes one tuple on a page, it's likely that there are others
on the same page that are close to vacuum_freeze_min_age, but not quite.
Because the page is already dirty from freezing one tuple, it makes
sense to be more aggressive about freezing the rest, in the hope that
all the tuples will be frozen, and we will not have to dirty the page
again later.

This patch introduces a GUC vacuum_freeze_opportunistic_ratio. If one
tuple on a page is frozen by vacuum, it effectively multiplies
vacuum_freeze_min_age by vacuum_freeze_opportunistic_ratio and uses that
lower (more aggressive) value only for the current page.

The reason we don't just freeze all the tuples we can (effectively
setting the vacuum_freeze_opportunistic_ratio to zero) is to preserve
transaction ID information for diagnosing problems.

Regards,
        Jeff Davis
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 2034fdc..1d71abf 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -4060,6 +4060,27 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-vacuum-freeze-opportunistic-ratio" xreflabel="vacuum_freeze_opportunistic_ratio">
+      <term><varname>vacuum_freeze_opportunistic_ratio</varname> (<type>floating point</type>)</term>
+      <indexterm>
+       <primary><varname>vacuum_freeze_opportunistic_ratio</> configuration parameter</primary>
+      </indexterm>
+      <listitem>
+       <para>
+	While <command>VACUUM</> is scanning a table, if it replaces
+	some transaction IDs with <literal>FrozenXID</> on a page, it
+	is cheaper to do so for other transaction IDs on the same page
+	at the same time. This value, which must be between 0 and 1,
+	is multiplied by <xref linkend="guc-vacuum-freeze-min-age"> to
+	determine a lower (more aggressive) cutoff for use during this
+	opportunity. A lower setting may reduce writes for
+	rarely-updated data, while a higher setting will preserve
+	transaction ID information longer (which is important when
+	diagnosing problems). The default setting is 0.5.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-bytea-output" xreflabel="bytea_output">
       <term><varname>bytea_output</varname> (<type>enum</type>)</term>
       <indexterm>
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index a6ba2ec..4e86e97 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -771,6 +771,7 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex)
 	bool		use_wal;
 	TransactionId OldestXmin;
 	TransactionId FreezeXid;
+	TransactionId OpportunisticFreezeXid;
 	RewriteState rwstate;
 
 	/*
@@ -808,7 +809,8 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex)
 	 * plain VACUUM would.
 	 */
 	vacuum_set_xid_limits(-1, -1, OldHeap->rd_rel->relisshared,
-						  &OldestXmin, &FreezeXid, NULL);
+						  &OldestXmin, &FreezeXid, &OpportunisticFreezeXid,
+						  NULL);
 
 	/*
 	 * FreezeXid will become the table's new relfrozenxid, and that mustn't go
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 03c5edc..854d184 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -62,6 +62,7 @@
  * GUC parameters
  */
 int			vacuum_freeze_min_age;
+double		vacuum_freeze_opportunistic_ratio;
 int			vacuum_freeze_table_age;
 
 /*
@@ -208,6 +209,7 @@ static int	elevel = -1;
 
 static TransactionId OldestXmin;
 static TransactionId FreezeLimit;
+static TransactionId OpportunisticFreezeLimit;
 
 static BufferAccessStrategy vac_strategy;
 
@@ -596,10 +598,12 @@ vacuum_set_xid_limits(int freeze_min_age,
 					  bool sharedRel,
 					  TransactionId *oldestXmin,
 					  TransactionId *freezeLimit,
+					  TransactionId *opportunisticFreezeLimit,
 					  TransactionId *freezeTableLimit)
 {
 	int			freezemin;
 	TransactionId limit;
+	TransactionId opportunistic_limit;
 	TransactionId safeLimit;
 
 	/*
@@ -634,6 +638,11 @@ vacuum_set_xid_limits(int freeze_min_age,
 	if (!TransactionIdIsNormal(limit))
 		limit = FirstNormalTransactionId;
 
+	opportunistic_limit = *oldestXmin - (freezemin *
+										 vacuum_freeze_opportunistic_ratio);
+	if (!TransactionIdIsNormal(opportunistic_limit))
+		opportunistic_limit = FirstNormalTransactionId;
+
 	/*
 	 * If oldestXmin is very far back (in practice, more than
 	 * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum
@@ -648,10 +657,11 @@ vacuum_set_xid_limits(int freeze_min_age,
 		ereport(WARNING,
 				(errmsg("oldest xmin is far in the past"),
 				 errhint("Close open transactions soon to avoid wraparound problems.")));
-		limit = *oldestXmin;
+		limit = opportunistic_limit = *oldestXmin;
 	}
 
 	*freezeLimit = limit;
+	*opportunisticFreezeLimit = opportunistic_limit;
 
 	if (freezeTableLimit != NULL)
 	{
@@ -1253,7 +1263,8 @@ full_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
 
 	vacuum_set_xid_limits(vacstmt->freeze_min_age, vacstmt->freeze_table_age,
 						  onerel->rd_rel->relisshared,
-						  &OldestXmin, &FreezeLimit, NULL);
+						  &OldestXmin, &FreezeLimit, &OpportunisticFreezeLimit,
+						  NULL);
 
 	/*
 	 * Flush any previous async-commit transactions.  This does not guarantee
@@ -1396,6 +1407,8 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
 		bool		notup;
 		OffsetNumber frozen[MaxOffsetNumber];
 		int			nfrozen;
+		TransactionId current_freeze_limit = FreezeLimit;
+		OffsetNumber first_frozen_offset = InvalidOffsetNumber;
 
 		vacuum_delay_point();
 
@@ -1713,9 +1726,14 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
 				 * Each non-removable tuple must be checked to see if it needs
 				 * freezing.
 				 */
-				if (heap_freeze_tuple(tuple.t_data, FreezeLimit,
+				if (heap_freeze_tuple(tuple.t_data, current_freeze_limit,
 									  InvalidBuffer))
+				{
 					frozen[nfrozen++] = offnum;
+					current_freeze_limit = OpportunisticFreezeLimit;
+					if (!OffsetNumberIsValid(first_frozen_offset))
+						first_frozen_offset = offnum;
+				}
 			}
 		}						/* scan along page */
 
@@ -1781,7 +1799,12 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
 		 */
 		if (nfrozen > 0)
 		{
+			vacuum_opportunistic_freeze_page(blkno, page, first_frozen_offset,
+											 OpportunisticFreezeLimit,
+											 frozen, &nfrozen);
+
 			MarkBufferDirty(buf);
+
 			/* no XLOG for temp tables, though */
 			if (!onerel->rd_istemp)
 			{
@@ -3889,3 +3912,44 @@ vacuum_delay_point(void)
 		CHECK_FOR_INTERRUPTS();
 	}
 }
+
+/*
+ * Freeze all tuples on page lower than offnum using
+ * freeze_limit. This routine is used by scan_heap() and
+ * lazy_scan_heap() to do a second pass over the page that is more
+ * aggressive about freezing tuples if at least one tuple on the page
+ * has already been frozen. If a tuple has already been frozen, the
+ * page is already dirty and there are likely to be other tuples that
+ * are almost eligible to be frozen, so this second pass might save
+ * writes to disk.
+ */
+void
+vacuum_opportunistic_freeze_page(BlockNumber blkno, Page page,
+								 OffsetNumber maxoff,
+								 TransactionId freeze_limit,
+								 OffsetNumber *frozen, int *nfrozen)
+{
+	OffsetNumber offnum;
+
+	Assert (maxoff <= PageGetMaxOffsetNumber(page));
+	for (offnum = FirstOffsetNumber;
+		 offnum <= maxoff;
+		 offnum = OffsetNumberNext(offnum))
+	{
+		ItemId			itemid = PageGetItemId(page, offnum);
+		HeapTupleData	tuple;
+
+		if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid) ||
+			ItemIdIsRedirected(itemid))
+			continue;
+
+		Assert(ItemIdIsNormal(itemid));
+
+		tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
+		tuple.t_len = ItemIdGetLength(itemid);
+		ItemPointerSet(&(tuple.t_self), blkno, offnum);
+
+		if (heap_freeze_tuple(tuple.t_data, freeze_limit, InvalidBuffer))
+			frozen[*nfrozen++] = offnum;
+	}
+}
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index 66ef463..7e2e7d7 100644
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -106,6 +106,7 @@ static int	elevel = -1;
 
 static TransactionId OldestXmin;
 static TransactionId FreezeLimit;
+static TransactionId OpportunisticFreezeLimit;
 
 static BufferAccessStrategy vac_strategy;
 
@@ -169,7 +170,8 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
 
 	vacuum_set_xid_limits(vacstmt->freeze_min_age, vacstmt->freeze_table_age,
 						  onerel->rd_rel->relisshared,
-						  &OldestXmin, &FreezeLimit, &freezeTableLimit);
+						  &OldestXmin, &FreezeLimit, &OpportunisticFreezeLimit,
+						  &freezeTableLimit);
 	scan_all = TransactionIdPrecedesOrEquals(onerel->rd_rel->relfrozenxid,
 											 freezeTableLimit);
 
@@ -316,6 +318,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 		Size		freespace;
 		bool		all_visible_according_to_vm = false;
 		bool		all_visible;
+		TransactionId current_freeze_limit = FreezeLimit;
+		OffsetNumber first_frozen_offset = InvalidOffsetNumber;
 
 		/*
 		 * Skip pages that don't require vacuuming according to the visibility
@@ -607,9 +611,14 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 				 * Each non-removable tuple must be checked to see if it needs
 				 * freezing.  Note we already have exclusive buffer lock.
 				 */
-				if (heap_freeze_tuple(tuple.t_data, FreezeLimit,
+				if (heap_freeze_tuple(tuple.t_data, current_freeze_limit,
 									  InvalidBuffer))
+				{
 					frozen[nfrozen++] = offnum;
+					current_freeze_limit = OpportunisticFreezeLimit;
+					if (!OffsetNumberIsValid(first_frozen_offset))
+						first_frozen_offset = offnum;
+				}
 			}
 		}						/* scan along page */
 
@@ -620,7 +629,12 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 		 */
 		if (nfrozen > 0)
 		{
+			vacuum_opportunistic_freeze_page(blkno, page, first_frozen_offset,
+											 OpportunisticFreezeLimit,
+											 frozen, &nfrozen);
+
 			MarkBufferDirty(buf);
+
 			/* no XLOG for temp tables, though */
 			if (!onerel->rd_istemp)
 			{
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index b7bf6e2..b8e869b 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2087,6 +2087,15 @@ static struct config_real ConfigureNamesReal[] =
 		0.5, 0.0, 1.0, NULL, NULL
 	},
 
+	{
+		{"vacuum_freeze_opportunistic_ratio", PGC_USERSET, CLIENT_CONN_STATEMENT,
+			gettext_noop("VACUUM will freeze tuples opportunistically if they are this fraction of vacuum_freeze_min_age transactions old."),
+			NULL
+		},
+		&vacuum_freeze_opportunistic_ratio,
+		0.5, 0.0, 1.0, NULL, NULL
+	},
+
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, 0.0, 0.0, 0.0, NULL, NULL
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index 8e719f7..07a712c 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -122,6 +122,7 @@ typedef struct VacAttrStats
 extern PGDLLIMPORT int default_statistics_target;		/* PGDLLIMPORT for
 														 * PostGIS */
 extern int	vacuum_freeze_min_age;
+extern double	vacuum_freeze_opportunistic_ratio;
 extern int	vacuum_freeze_table_age;
 
 
@@ -140,10 +141,15 @@ extern void vacuum_set_xid_limits(int freeze_min_age, int freeze_table_age,
 					  bool sharedRel,
 					  TransactionId *oldestXmin,
 					  TransactionId *freezeLimit,
+					  TransactionId *opportunisticFreezeLimit,
 					  TransactionId *freezeTableLimit);
 extern void vac_update_datfrozenxid(void);
 extern bool vac_is_partial_index(Relation indrel);
 extern void vacuum_delay_point(void);
+extern void vacuum_opportunistic_freeze_page(
+	BlockNumber blkno, Page page, OffsetNumber first_frozen_offset,
+	TransactionId OpportunisticFreezeLimit, OffsetNumber *frozen,
+	int *nfrozen);
 
 /* in commands/vacuumlazy.c */
 extern void lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to