On 2019-11-29 05:32, Michael Paquier wrote:
These comments are unanswered for more than 2 months, so I am marking
this entry as returned with feedback.
I'd like to revive this patch. To make further work easier, attaching a
rebased version of the latest patch by Alexander.
As for having yet another copy of logic determining visibility: the
visibility check was mainly taken from heap_page_is_all_visible(), so I
refactored the code to make sure that logic is not duplicated. The
updated patch is attached too.
--
Anna Akenteva
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
commit b7482718231f0abf584cc65c5dcd19a91fefa512
Author: Akenteva Anna <a.akent...@postgrespro.ru>
Date: Mon Jun 28 08:22:53 2021 +0300
0001-write-vm-during-cluster-3-rebase.patch
diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c
index 1aff62cd42..ecb73aa608 100644
--- a/src/backend/access/heap/rewriteheap.c
+++ b/src/backend/access/heap/rewriteheap.c
@@ -110,6 +110,7 @@
#include "access/heaptoast.h"
#include "access/rewriteheap.h"
#include "access/transam.h"
+#include "access/visibilitymap.h"
#include "access/xact.h"
#include "access/xloginsert.h"
#include "catalog/catalog.h"
@@ -152,6 +153,11 @@ typedef struct RewriteStateData
HTAB *rs_old_new_tid_map; /* unmatched B tuples */
HTAB *rs_logical_mappings; /* logical remapping files */
uint32 rs_num_rewrite_mappings; /* # in memory mappings */
+ Page rs_vm_buffer; /* visibility map page */
+ BlockNumber rs_vm_blockno; /* block number of visibility page */
+ BlockNumber rs_vm_buffer_valid; /* T if any bits are set */
+ bool rs_all_visible; /* all visible flag for rs_buffer */
+ bool rs_all_frozen; /* all frozen flag for rs_buffer */
} RewriteStateData;
/*
@@ -213,6 +219,9 @@ typedef struct RewriteMappingDataEntry
/* prototypes for internal functions */
+static void rewrite_flush_vm_page(RewriteState state);
+static void rewrite_set_vm_flags(RewriteState state);
+static void rewrite_update_vm_flags(RewriteState state, HeapTuple tuple);
static void raw_heap_insert(RewriteState state, HeapTuple tup);
/* internal logical remapping prototypes */
@@ -264,6 +273,11 @@ begin_heap_rewrite(Relation old_heap, Relation new_heap, TransactionId oldest_xm
state->rs_freeze_xid = freeze_xid;
state->rs_cutoff_multi = cutoff_multi;
state->rs_cxt = rw_cxt;
+ state->rs_vm_buffer = (Page) palloc(BLCKSZ);
+ state->rs_vm_blockno = HEAPBLK_TO_MAPBLOCK(state->rs_blockno);
+ state->rs_vm_buffer_valid = false;
+ state->rs_all_visible = true;
+ state->rs_all_frozen = true;
/* Initialize hash tables used to track update chains */
hash_ctl.keysize = sizeof(TidHashKey);
@@ -284,6 +298,10 @@ begin_heap_rewrite(Relation old_heap, Relation new_heap, TransactionId oldest_xm
&hash_ctl,
HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+ RelationOpenSmgr(state->rs_new_rel);
+ if (!smgrexists(state->rs_new_rel->rd_smgr, VISIBILITYMAP_FORKNUM))
+ smgrcreate(state->rs_new_rel->rd_smgr, VISIBILITYMAP_FORKNUM, false);
+
MemoryContextSwitchTo(old_cxt);
logical_begin_heap_rewrite(state);
@@ -317,6 +335,9 @@ end_heap_rewrite(RewriteState state)
/* Write the last page, if any */
if (state->rs_buffer_valid)
{
+ if (state->rs_all_visible)
+ PageSetAllVisible(state->rs_buffer);
+
if (RelationNeedsWAL(state->rs_new_rel))
log_newpage(&state->rs_new_rel->rd_node,
MAIN_FORKNUM,
@@ -329,8 +350,14 @@ end_heap_rewrite(RewriteState state)
RelationOpenSmgr(state->rs_new_rel);
smgrextend(state->rs_new_rel->rd_smgr, MAIN_FORKNUM, state->rs_blockno,
(char *) state->rs_buffer, true);
+
+ rewrite_set_vm_flags(state);
}
+ /* Write the last VM page too */
+ if (state->rs_vm_buffer_valid)
+ rewrite_flush_vm_page(state);
+
/*
* When we WAL-logged rel pages, we must nonetheless fsync them. The
* reason is the same as in storage.c's RelationCopyStorage(): we're
@@ -343,6 +370,7 @@ end_heap_rewrite(RewriteState state)
/* for an empty table, this could be first smgr access */
RelationOpenSmgr(state->rs_new_rel);
smgrimmedsync(state->rs_new_rel->rd_smgr, MAIN_FORKNUM);
+ smgrimmedsync(state->rs_new_rel->rd_smgr, VISIBILITYMAP_FORKNUM);
}
logical_end_heap_rewrite(state);
@@ -351,6 +379,98 @@ end_heap_rewrite(RewriteState state)
MemoryContextDelete(state->rs_cxt);
}
+/* Write contents of VM page */
+static void
+rewrite_flush_vm_page(RewriteState state)
+{
+ Assert(state->rs_vm_buffer_valid);
+
+ if (RelationNeedsWAL(state->rs_new_rel))
+ log_newpage(&state->rs_new_rel->rd_node,
+ VISIBILITYMAP_FORKNUM,
+ state->rs_vm_blockno,
+ state->rs_vm_buffer,
+ true);
+ RelationOpenSmgr(state->rs_new_rel);
+
+ PageSetChecksumInplace(state->rs_vm_buffer, state->rs_vm_blockno);
+
+ smgrextend(state->rs_new_rel->rd_smgr, VISIBILITYMAP_FORKNUM,
+ state->rs_vm_blockno, (char *) state->rs_vm_buffer, true);
+
+ state->rs_vm_buffer_valid = false;
+}
+
+/* Set VM flags to the VM page */
+static void
+rewrite_set_vm_flags(RewriteState state)
+{
+ BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(state->rs_blockno);
+ uint32 mapByte = HEAPBLK_TO_MAPBYTE(state->rs_blockno);
+ uint8 mapOffset = HEAPBLK_TO_OFFSET(state->rs_blockno);
+ char *map;
+ uint8 flags;
+
+ if (mapBlock != state->rs_vm_blockno && state->rs_vm_buffer_valid)
+ rewrite_flush_vm_page(state);
+
+ if (!state->rs_vm_buffer_valid)
+ {
+ PageInit(state->rs_vm_buffer, BLCKSZ, 0);
+ state->rs_vm_blockno = mapBlock;
+ state->rs_vm_buffer_valid = true;
+ }
+
+ flags = (state->rs_all_visible ? VISIBILITYMAP_ALL_VISIBLE : 0) |
+ (state->rs_all_frozen ? VISIBILITYMAP_ALL_FROZEN : 0);
+
+ map = PageGetContents(state->rs_vm_buffer);
+ map[mapByte] |= (flags << mapOffset);
+}
+
+/*
+ * Update rs_all_visible and rs_all_frozen flags according to the tuple. We
+ * use simplified check assuming that HeapTupleSatisfiesVacuum() should already
+ * set tuple hint bits.
+ */
+static void
+rewrite_update_vm_flags(RewriteState state, HeapTuple tuple)
+{
+ TransactionId xmin;
+
+ if (!state->rs_all_visible)
+ return;
+
+ if (!HeapTupleHeaderXminCommitted(tuple->t_data))
+ {
+ state->rs_all_visible = false;
+ state->rs_all_frozen = false;
+ return;
+ }
+
+ xmin = HeapTupleHeaderGetXmin(tuple->t_data);
+ if (!TransactionIdPrecedes(xmin, state->rs_oldest_xmin))
+ {
+ state->rs_all_visible = false;
+ state->rs_all_frozen = false;
+ return;
+ }
+
+ if (!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID) &&
+ !HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask))
+ {
+ state->rs_all_visible = false;
+ state->rs_all_frozen = false;
+ return;
+ }
+
+ if (!state->rs_all_frozen)
+ return;
+
+ if (heap_tuple_needs_eventual_freeze(tuple->t_data))
+ state->rs_all_frozen = false;
+}
+
/*
* Add a tuple to the new heap.
*
@@ -477,6 +597,7 @@ rewrite_heap_tuple(RewriteState state,
/* Insert the tuple and find out where it's put in new_heap */
raw_heap_insert(state, new_tuple);
+ rewrite_update_vm_flags(state, new_tuple);
new_tid = new_tuple->t_self;
logical_rewrite_heap_tuple(state, old_tid, new_tuple);
@@ -682,6 +803,9 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
* enforce saveFreeSpace unconditionally.
*/
+ if (state->rs_all_visible)
+ PageSetAllVisible(page);
+
/* XLOG stuff */
if (RelationNeedsWAL(state->rs_new_rel))
log_newpage(&state->rs_new_rel->rd_node,
@@ -702,6 +826,8 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
smgrextend(state->rs_new_rel->rd_smgr, MAIN_FORKNUM,
state->rs_blockno, (char *) page, true);
+ rewrite_set_vm_flags(state);
+
state->rs_blockno++;
state->rs_buffer_valid = false;
}
diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
index e198df65d8..98e249b0c2 100644
--- a/src/backend/access/heap/visibilitymap.c
+++ b/src/backend/access/heap/visibilitymap.c
@@ -99,24 +99,6 @@
/*#define TRACE_VISIBILITYMAP */
-/*
- * Size of the bitmap on each visibility map page, in bytes. There's no
- * extra headers, so the whole page minus the standard page header is
- * used for the bitmap.
- */
-#define MAPSIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData))
-
-/* Number of heap blocks we can represent in one byte */
-#define HEAPBLOCKS_PER_BYTE (BITS_PER_BYTE / BITS_PER_HEAPBLOCK)
-
-/* Number of heap blocks we can represent in one visibility map page. */
-#define HEAPBLOCKS_PER_PAGE (MAPSIZE * HEAPBLOCKS_PER_BYTE)
-
-/* Mapping from heap block number to the right bit in the visibility map */
-#define HEAPBLK_TO_MAPBLOCK(x) ((x) / HEAPBLOCKS_PER_PAGE)
-#define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
-#define HEAPBLK_TO_OFFSET(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK)
-
/* Masks for counting subsets of bits in the visibility map. */
#define VISIBLE_MASK64 UINT64CONST(0x5555555555555555) /* The lower bit of each
* bit pair */
diff --git a/src/include/access/visibilitymap.h b/src/include/access/visibilitymap.h
index 57362c3687..39f3d59156 100644
--- a/src/include/access/visibilitymap.h
+++ b/src/include/access/visibilitymap.h
@@ -34,6 +34,24 @@
#define VM_ALL_FROZEN(r, b, v) \
((visibilitymap_get_status((r), (b), (v)) & VISIBILITYMAP_ALL_FROZEN) != 0)
+/*
+ * Size of the bitmap on each visibility map page, in bytes. There's no
+ * extra headers, so the whole page minus the standard page header is
+ * used for the bitmap.
+ */
+#define MAPSIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData))
+
+/* Number of heap blocks we can represent in one byte */
+#define HEAPBLOCKS_PER_BYTE (BITS_PER_BYTE / BITS_PER_HEAPBLOCK)
+
+/* Number of heap blocks we can represent in one visibility map page. */
+#define HEAPBLOCKS_PER_PAGE (MAPSIZE * HEAPBLOCKS_PER_BYTE)
+
+/* Mapping from heap block number to the right bit in the visibility map */
+#define HEAPBLK_TO_MAPBLOCK(x) ((x) / HEAPBLOCKS_PER_PAGE)
+#define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
+#define HEAPBLK_TO_OFFSET(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK)
+
extern bool visibilitymap_clear(Relation rel, BlockNumber heapBlk,
Buffer vmbuf, uint8 flags);
extern void visibilitymap_pin(Relation rel, BlockNumber heapBlk,
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 1b8b640012..f9cfc5d911 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -47,7 +47,8 @@
static void reform_and_rewrite_tuple(HeapTuple tuple,
Relation OldHeap, Relation NewHeap,
- Datum *values, bool *isnull, RewriteState rwstate);
+ Datum *values, bool *isnull, bool islive,
+ RewriteState rwstate);
static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
HeapTuple tuple,
@@ -779,6 +780,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
HeapTuple tuple;
Buffer buf;
bool isdead;
+ bool islive = false;
CHECK_FOR_INTERRUPTS();
@@ -845,6 +847,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
case HEAPTUPLE_LIVE:
/* Live or recently dead, must copy it */
isdead = false;
+ islive = true;
break;
case HEAPTUPLE_INSERT_IN_PROGRESS:
@@ -900,7 +903,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
*num_tuples += 1;
if (tuplesort != NULL)
{
- tuplesort_putheaptuple(tuplesort, tuple);
+ tuplesort_putheaptuple(tuplesort, tuple, islive);
/*
* In scan-and-sort mode, report increase in number of tuples
@@ -918,7 +921,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
int64 ct_val[2];
reform_and_rewrite_tuple(tuple, OldHeap, NewHeap,
- values, isnull, rwstate);
+ values, isnull, islive, rwstate);
/*
* In indexscan mode and also VACUUM FULL, report increase in
@@ -958,17 +961,18 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
for (;;)
{
HeapTuple tuple;
+ bool islive;
CHECK_FOR_INTERRUPTS();
- tuple = tuplesort_getheaptuple(tuplesort, true);
+ tuple = tuplesort_getheaptuple(tuplesort, true, &islive);
if (tuple == NULL)
break;
n_tuples += 1;
reform_and_rewrite_tuple(tuple,
OldHeap, NewHeap,
- values, isnull,
+ values, isnull, islive,
rwstate);
/* Report n_tuples */
pgstat_progress_update_param(PROGRESS_CLUSTER_HEAP_TUPLES_WRITTEN,
@@ -2455,7 +2459,8 @@ heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate,
static void
reform_and_rewrite_tuple(HeapTuple tuple,
Relation OldHeap, Relation NewHeap,
- Datum *values, bool *isnull, RewriteState rwstate)
+ Datum *values, bool *isnull, bool islive,
+ RewriteState rwstate)
{
TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
TupleDesc newTupDesc = RelationGetDescr(NewHeap);
@@ -2474,7 +2479,7 @@ reform_and_rewrite_tuple(HeapTuple tuple,
copiedTuple = heap_form_tuple(newTupDesc, values, isnull);
/* The heap rewrite module does the rest */
- rewrite_heap_tuple(rwstate, tuple, copiedTuple);
+ rewrite_heap_tuple(rwstate, islive, tuple, copiedTuple);
heap_freetuple(copiedTuple);
}
diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c
index 1aff62cd42..dc136d2a51 100644
--- a/src/backend/access/heap/rewriteheap.c
+++ b/src/backend/access/heap/rewriteheap.c
@@ -110,6 +110,7 @@
#include "access/heaptoast.h"
#include "access/rewriteheap.h"
#include "access/transam.h"
+#include "access/visibilitymap.h"
#include "access/xact.h"
#include "access/xloginsert.h"
#include "catalog/catalog.h"
@@ -152,6 +153,11 @@ typedef struct RewriteStateData
HTAB *rs_old_new_tid_map; /* unmatched B tuples */
HTAB *rs_logical_mappings; /* logical remapping files */
uint32 rs_num_rewrite_mappings; /* # in memory mappings */
+ Page rs_vm_buffer; /* visibility map page */
+ BlockNumber rs_vm_blockno; /* block number of visibility page */
+ BlockNumber rs_vm_buffer_valid; /* T if any bits are set */
+ bool rs_all_visible; /* all visible flag for rs_buffer */
+ bool rs_all_frozen; /* all frozen flag for rs_buffer */
} RewriteStateData;
/*
@@ -213,6 +219,8 @@ typedef struct RewriteMappingDataEntry
/* prototypes for internal functions */
+static void rewrite_flush_vm_page(RewriteState state);
+static void rewrite_set_vm_flags(RewriteState state);
static void raw_heap_insert(RewriteState state, HeapTuple tup);
/* internal logical remapping prototypes */
@@ -264,6 +272,11 @@ begin_heap_rewrite(Relation old_heap, Relation new_heap, TransactionId oldest_xm
state->rs_freeze_xid = freeze_xid;
state->rs_cutoff_multi = cutoff_multi;
state->rs_cxt = rw_cxt;
+ state->rs_vm_buffer = (Page) palloc(BLCKSZ);
+ state->rs_vm_blockno = HEAPBLK_TO_MAPBLOCK(state->rs_blockno);
+ state->rs_vm_buffer_valid = false;
+ state->rs_all_visible = true;
+ state->rs_all_frozen = true;
/* Initialize hash tables used to track update chains */
hash_ctl.keysize = sizeof(TidHashKey);
@@ -284,6 +297,10 @@ begin_heap_rewrite(Relation old_heap, Relation new_heap, TransactionId oldest_xm
&hash_ctl,
HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+ RelationOpenSmgr(state->rs_new_rel);
+ if (!smgrexists(state->rs_new_rel->rd_smgr, VISIBILITYMAP_FORKNUM))
+ smgrcreate(state->rs_new_rel->rd_smgr, VISIBILITYMAP_FORKNUM, false);
+
MemoryContextSwitchTo(old_cxt);
logical_begin_heap_rewrite(state);
@@ -317,6 +334,9 @@ end_heap_rewrite(RewriteState state)
/* Write the last page, if any */
if (state->rs_buffer_valid)
{
+ if (state->rs_all_visible)
+ PageSetAllVisible(state->rs_buffer);
+
if (RelationNeedsWAL(state->rs_new_rel))
log_newpage(&state->rs_new_rel->rd_node,
MAIN_FORKNUM,
@@ -329,8 +349,14 @@ end_heap_rewrite(RewriteState state)
RelationOpenSmgr(state->rs_new_rel);
smgrextend(state->rs_new_rel->rd_smgr, MAIN_FORKNUM, state->rs_blockno,
(char *) state->rs_buffer, true);
+
+ rewrite_set_vm_flags(state);
}
+ /* Write the last VM page too */
+ if (state->rs_vm_buffer_valid)
+ rewrite_flush_vm_page(state);
+
/*
* When we WAL-logged rel pages, we must nonetheless fsync them. The
* reason is the same as in storage.c's RelationCopyStorage(): we're
@@ -343,6 +369,7 @@ end_heap_rewrite(RewriteState state)
/* for an empty table, this could be first smgr access */
RelationOpenSmgr(state->rs_new_rel);
smgrimmedsync(state->rs_new_rel->rd_smgr, MAIN_FORKNUM);
+ smgrimmedsync(state->rs_new_rel->rd_smgr, VISIBILITYMAP_FORKNUM);
}
logical_end_heap_rewrite(state);
@@ -351,6 +378,55 @@ end_heap_rewrite(RewriteState state)
MemoryContextDelete(state->rs_cxt);
}
+/* Write contents of VM page */
+static void
+rewrite_flush_vm_page(RewriteState state)
+{
+ Assert(state->rs_vm_buffer_valid);
+
+ if (RelationNeedsWAL(state->rs_new_rel))
+ log_newpage(&state->rs_new_rel->rd_node,
+ VISIBILITYMAP_FORKNUM,
+ state->rs_vm_blockno,
+ state->rs_vm_buffer,
+ true);
+ RelationOpenSmgr(state->rs_new_rel);
+
+ PageSetChecksumInplace(state->rs_vm_buffer, state->rs_vm_blockno);
+
+ smgrextend(state->rs_new_rel->rd_smgr, VISIBILITYMAP_FORKNUM,
+ state->rs_vm_blockno, (char *) state->rs_vm_buffer, true);
+
+ state->rs_vm_buffer_valid = false;
+}
+
+/* Set VM flags to the VM page */
+static void
+rewrite_set_vm_flags(RewriteState state)
+{
+ BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(state->rs_blockno);
+ uint32 mapByte = HEAPBLK_TO_MAPBYTE(state->rs_blockno);
+ uint8 mapOffset = HEAPBLK_TO_OFFSET(state->rs_blockno);
+ char *map;
+ uint8 flags;
+
+ if (mapBlock != state->rs_vm_blockno && state->rs_vm_buffer_valid)
+ rewrite_flush_vm_page(state);
+
+ if (!state->rs_vm_buffer_valid)
+ {
+ PageInit(state->rs_vm_buffer, BLCKSZ, 0);
+ state->rs_vm_blockno = mapBlock;
+ state->rs_vm_buffer_valid = true;
+ }
+
+ flags = (state->rs_all_visible ? VISIBILITYMAP_ALL_VISIBLE : 0) |
+ (state->rs_all_frozen ? VISIBILITYMAP_ALL_FROZEN : 0);
+
+ map = PageGetContents(state->rs_vm_buffer);
+ map[mapByte] |= (flags << mapOffset);
+}
+
/*
* Add a tuple to the new heap.
*
@@ -359,11 +435,12 @@ end_heap_rewrite(RewriteState state)
* it had better be temp storage not a pointer to the original tuple.
*
* state opaque state as returned by begin_heap_rewrite
+ * is_live whether the currently processed tuple is live
* old_tuple original tuple in the old heap
* new_tuple new, rewritten tuple to be inserted to new heap
*/
void
-rewrite_heap_tuple(RewriteState state,
+rewrite_heap_tuple(RewriteState state, bool is_live,
HeapTuple old_tuple, HeapTuple new_tuple)
{
MemoryContext old_cxt;
@@ -477,6 +554,10 @@ rewrite_heap_tuple(RewriteState state,
/* Insert the tuple and find out where it's put in new_heap */
raw_heap_insert(state, new_tuple);
+ heap_page_update_vm_flags(new_tuple, state->rs_oldest_xmin,
+ is_live, true,
+ &(state->rs_all_visible),
+ &(state->rs_all_frozen));
new_tid = new_tuple->t_self;
logical_rewrite_heap_tuple(state, old_tid, new_tuple);
@@ -682,6 +763,9 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
* enforce saveFreeSpace unconditionally.
*/
+ if (state->rs_all_visible)
+ PageSetAllVisible(page);
+
/* XLOG stuff */
if (RelationNeedsWAL(state->rs_new_rel))
log_newpage(&state->rs_new_rel->rd_node,
@@ -702,6 +786,8 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
smgrextend(state->rs_new_rel->rd_smgr, MAIN_FORKNUM,
state->rs_blockno, (char *) page, true);
+ rewrite_set_vm_flags(state);
+
state->rs_blockno++;
state->rs_buffer_valid = false;
}
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 44f198398c..9feef917cd 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -3610,6 +3610,53 @@ vac_cmp_itemptr(const void *left, const void *right)
return 0;
}
+/*
+ * Update all_visible and all_frozen flags according to the tuple. We
+ * use simplified check assuming that HeapTupleSatisfiesVacuum() should already
+ * set tuple hint bits.
+ */
+void
+heap_page_update_vm_flags(HeapTuple tuple, TransactionId OldestXmin,
+ bool is_live, bool check_infomask,
+ bool *all_visible, bool *all_frozen)
+{
+ TransactionId xmin;
+
+ /* Check comments in lazy_scan_heap. */
+ if (!HeapTupleHeaderXminCommitted(tuple->t_data) || !is_live)
+ {
+ *all_visible = false;
+ *all_frozen = false;
+ return;
+ }
+
+ /*
+ * The inserter definitely committed. But is it old enough
+ * that everyone sees it as committed?
+ */
+ xmin = HeapTupleHeaderGetXmin(tuple->t_data);
+ if (!TransactionIdPrecedes(xmin, OldestXmin))
+ {
+ *all_visible = false;
+ *all_frozen = false;
+ return;
+ }
+
+ if (check_infomask &&
+ !(tuple->t_data->t_infomask & HEAP_XMAX_INVALID) &&
+ !HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask))
+ {
+ *all_visible = false;
+ *all_frozen = false;
+ return;
+ }
+
+ /* Check whether this tuple is already frozen or not */
+ if (*all_visible && *all_frozen &&
+ heap_tuple_needs_eventual_freeze(tuple->t_data))
+ *all_frozen = false;
+}
+
/*
* Check if every tuple in the given page is visible to all current and future
* transactions. Also return the visibility_cutoff_xid which is the highest
@@ -3678,36 +3725,16 @@ heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
{
TransactionId xmin;
- /* Check comments in lazy_scan_heap. */
- if (!HeapTupleHeaderXminCommitted(tuple.t_data))
- {
- all_visible = false;
- *all_frozen = false;
- break;
- }
-
- /*
- * The inserter definitely committed. But is it old enough
- * that everyone sees it as committed?
- */
- xmin = HeapTupleHeaderGetXmin(tuple.t_data);
- if (!TransactionIdPrecedes(xmin, vacrel->OldestXmin))
- {
- all_visible = false;
- *all_frozen = false;
- break;
- }
+ heap_page_update_vm_flags(&tuple, vacrel->OldestXmin,
+ true, false,
+ &all_visible, all_frozen);
/* Track newest xmin on page. */
+ xmin = HeapTupleHeaderGetXmin(tuple.t_data);
if (TransactionIdFollows(xmin, *visibility_cutoff_xid))
*visibility_cutoff_xid = xmin;
-
- /* Check whether this tuple is already frozen or not */
- if (all_visible && *all_frozen &&
- heap_tuple_needs_eventual_freeze(tuple.t_data))
- *all_frozen = false;
+ break;
}
- break;
case HEAPTUPLE_DEAD:
case HEAPTUPLE_RECENTLY_DEAD:
diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
index e198df65d8..98e249b0c2 100644
--- a/src/backend/access/heap/visibilitymap.c
+++ b/src/backend/access/heap/visibilitymap.c
@@ -99,24 +99,6 @@
/*#define TRACE_VISIBILITYMAP */
-/*
- * Size of the bitmap on each visibility map page, in bytes. There's no
- * extra headers, so the whole page minus the standard page header is
- * used for the bitmap.
- */
-#define MAPSIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData))
-
-/* Number of heap blocks we can represent in one byte */
-#define HEAPBLOCKS_PER_BYTE (BITS_PER_BYTE / BITS_PER_HEAPBLOCK)
-
-/* Number of heap blocks we can represent in one visibility map page. */
-#define HEAPBLOCKS_PER_PAGE (MAPSIZE * HEAPBLOCKS_PER_BYTE)
-
-/* Mapping from heap block number to the right bit in the visibility map */
-#define HEAPBLK_TO_MAPBLOCK(x) ((x) / HEAPBLOCKS_PER_PAGE)
-#define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
-#define HEAPBLK_TO_OFFSET(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK)
-
/* Masks for counting subsets of bits in the visibility map. */
#define VISIBLE_MASK64 UINT64CONST(0x5555555555555555) /* The lower bit of each
* bit pair */
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 22972071ff..9ec922a047 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -180,6 +180,7 @@ typedef struct
Datum datum1; /* value of first key column */
bool isnull1; /* is first key column NULL? */
int srctape; /* source tape number */
+ bool is_live; /* is the tuple alive? */
} SortTuple;
/*
@@ -1703,7 +1704,7 @@ tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot)
* Note that the input data is always copied; the caller need not save it.
*/
void
-tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup)
+tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup, bool is_live)
{
MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);
SortTuple stup;
@@ -1715,6 +1716,7 @@ tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup)
COPYTUP(state, &stup, (void *) tup);
puttuple_common(state, &stup);
+ stup.is_live = is_live;
MemoryContextSwitchTo(oldcontext);
}
@@ -2442,7 +2444,7 @@ tuplesort_gettupleslot(Tuplesortstate *state, bool forward, bool copy,
* remaining valid after any further manipulation of tuplesort.
*/
HeapTuple
-tuplesort_getheaptuple(Tuplesortstate *state, bool forward)
+tuplesort_getheaptuple(Tuplesortstate *state, bool forward, bool *is_live)
{
MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);
SortTuple stup;
@@ -2452,6 +2454,9 @@ tuplesort_getheaptuple(Tuplesortstate *state, bool forward)
MemoryContextSwitchTo(oldcontext);
+ if (is_live)
+ *is_live = stup.is_live;
+
return stup.tuple;
}
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index e63b49fc38..71bda855f2 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -199,6 +199,9 @@ struct VacuumParams;
extern void heap_vacuum_rel(Relation rel,
struct VacuumParams *params, BufferAccessStrategy bstrategy);
extern void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc);
+extern void heap_page_update_vm_flags(HeapTuple tuple, TransactionId OldestXmin,
+ bool is_live, bool check_infomask,
+ bool *all_visible, bool *all_frozen);
/* in heap/heapam_visibility.c */
extern bool HeapTupleSatisfiesVisibility(HeapTuple stup, Snapshot snapshot,
diff --git a/src/include/access/rewriteheap.h b/src/include/access/rewriteheap.h
index 121f552405..2cda7320d3 100644
--- a/src/include/access/rewriteheap.h
+++ b/src/include/access/rewriteheap.h
@@ -25,8 +25,8 @@ extern RewriteState begin_heap_rewrite(Relation OldHeap, Relation NewHeap,
TransactionId OldestXmin, TransactionId FreezeXid,
MultiXactId MultiXactCutoff);
extern void end_heap_rewrite(RewriteState state);
-extern void rewrite_heap_tuple(RewriteState state, HeapTuple oldTuple,
- HeapTuple newTuple);
+extern void rewrite_heap_tuple(RewriteState state, bool is_live,
+ HeapTuple oldTuple, HeapTuple newTuple);
extern bool rewrite_heap_dead_tuple(RewriteState state, HeapTuple oldTuple);
/*
diff --git a/src/include/access/visibilitymap.h b/src/include/access/visibilitymap.h
index 57362c3687..39f3d59156 100644
--- a/src/include/access/visibilitymap.h
+++ b/src/include/access/visibilitymap.h
@@ -34,6 +34,24 @@
#define VM_ALL_FROZEN(r, b, v) \
((visibilitymap_get_status((r), (b), (v)) & VISIBILITYMAP_ALL_FROZEN) != 0)
+/*
+ * Size of the bitmap on each visibility map page, in bytes. There's no
+ * extra headers, so the whole page minus the standard page header is
+ * used for the bitmap.
+ */
+#define MAPSIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData))
+
+/* Number of heap blocks we can represent in one byte */
+#define HEAPBLOCKS_PER_BYTE (BITS_PER_BYTE / BITS_PER_HEAPBLOCK)
+
+/* Number of heap blocks we can represent in one visibility map page. */
+#define HEAPBLOCKS_PER_PAGE (MAPSIZE * HEAPBLOCKS_PER_BYTE)
+
+/* Mapping from heap block number to the right bit in the visibility map */
+#define HEAPBLK_TO_MAPBLOCK(x) ((x) / HEAPBLOCKS_PER_PAGE)
+#define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
+#define HEAPBLK_TO_OFFSET(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK)
+
extern bool visibilitymap_clear(Relation rel, BlockNumber heapBlk,
Buffer vmbuf, uint8 flags);
extern void visibilitymap_pin(Relation rel, BlockNumber heapBlk,
diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h
index f94949370b..45442c4351 100644
--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -232,7 +232,7 @@ extern bool tuplesort_used_bound(Tuplesortstate *state);
extern void tuplesort_puttupleslot(Tuplesortstate *state,
TupleTableSlot *slot);
-extern void tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup);
+extern void tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup, bool is_live);
extern void tuplesort_putindextuplevalues(Tuplesortstate *state,
Relation rel, ItemPointer self,
Datum *values, bool *isnull);
@@ -243,7 +243,7 @@ extern void tuplesort_performsort(Tuplesortstate *state);
extern bool tuplesort_gettupleslot(Tuplesortstate *state, bool forward,
bool copy, TupleTableSlot *slot, Datum *abbrev);
-extern HeapTuple tuplesort_getheaptuple(Tuplesortstate *state, bool forward);
+extern HeapTuple tuplesort_getheaptuple(Tuplesortstate *state, bool forward, bool *is_live);
extern IndexTuple tuplesort_getindextuple(Tuplesortstate *state, bool forward);
extern bool tuplesort_getdatum(Tuplesortstate *state, bool forward,
Datum *val, bool *isNull, Datum *abbrev);