I wrote a little toy implementation that just returns constant data to
play with this a little. Looks good overall.
There were a bunch of typos in the comments in tableam.h, see attached.
Some of the comments could use more copy-editing and clarification, I
think, but I stuck to fixing just typos and such for now.
index_update_stats() calls RelationGetNumberOfBlocks(<table>). If the AM
doesn't use normal data files, that won't work. I bumped into that with
my toy implementation, which wouldn't need to create any data files, if
it wasn't for this.
The comments for relation_set_new_relfilenode() callback say that the AM
can set *freezeXid and *minmulti to invalid. But when I did that, VACUUM
hits this assertion:
TRAP: FailedAssertion("!(((classForm->relfrozenxid) >= ((TransactionId)
3)))", File: "vacuum.c", Line: 1323)
There's a little bug in index-only scan executor node, where it mixes up
the slots to hold a tuple from the index, and from the table. That
doesn't cause any ill effects if the AM uses TTSOpsHeapTuple, but with
my toy AM, which uses a virtual slot, it caused warnings like this from
index-only scans:
WARNING: problem in alloc set ExecutorState: detected write past chunk
end in block 0x56419b0f88e8, chunk 0x56419b0f8f90
Attached is a patch with the toy implementation I used to test this.
I'm not suggesting we should commit that - although feel free to do that
if you think it's useful - but it shows how I bumped into these issues.
The second patch fixes the index-only-scan slot confusion (untested,
except with my toy AM).
- Heikki
>From 97e0eea6a3fb123845ac5650f1aaa1802bf56694 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakan...@iki.fi>
Date: Mon, 8 Apr 2019 15:16:53 +0300
Subject: [PATCH 1/3] Add a toy table AM implementation to play with.
It returns a constant data set. No insert/update/delete. But you can
create indexes.
src/test/modules/toytable/Makefile | 25 +
.../modules/toytable/expected/toytable.out | 41 ++
src/test/modules/toytable/sql/toytable.sql | 17 +
src/test/modules/toytable/toytable--1.0.sql | 12 +
src/test/modules/toytable/toytable.control | 4 +
src/test/modules/toytable/toytableam.c | 612 ++++++++++++++++++
6 files changed, 711 insertions(+)
create mode 100644 src/test/modules/toytable/Makefile
create mode 100644 src/test/modules/toytable/expected/toytable.out
create mode 100644 src/test/modules/toytable/sql/toytable.sql
create mode 100644 src/test/modules/toytable/toytable--1.0.sql
create mode 100644 src/test/modules/toytable/toytable.control
create mode 100644 src/test/modules/toytable/toytableam.c
diff --git a/src/test/modules/toytable/Makefile b/src/test/modules/toytable/Makefile
new file mode 100644
index 00000000000..142ef2d23e6
--- /dev/null
+++ b/src/test/modules/toytable/Makefile
@@ -0,0 +1,25 @@
+# src/test/modules/toytable/Makefile
+MODULE_big = toytable
+OBJS = toytableam.o $(WIN32RES)
+PGFILEDESC = "A dummy implementantation of the table AM API"
+EXTENSION = toytable
+DATA = toytable--1.0.sql
+REGRESS = toytable
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+subdir = src/test/modules/toytable
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+OBJS = toytableam.o
+include $(top_srcdir)/src/backend/common.mk
diff --git a/src/test/modules/toytable/expected/toytable.out b/src/test/modules/toytable/expected/toytable.out
new file mode 100644
index 00000000000..3e8598e284c
--- /dev/null
+++ b/src/test/modules/toytable/expected/toytable.out
@@ -0,0 +1,41 @@
+create table toytab (i int4, j int4, k int4) using toytable;
+select * from toytab;
+ i | j | k
+ 1 | 1 | 1
+ 2 | 2 | 2
+ 3 | 3 | 3
+ 4 | 4 | 4
+ 5 | 5 | 5
+ 6 | 6 | 6
+ 7 | 7 | 7
+ 8 | 8 | 8
+ 9 | 9 | 9
+ 10 | 10 | 10
+(10 rows)
+create index toyidx on toytab(i);
+-- test index scan
+set enable_seqscan=off;
+set enable_indexscan=on;
+select i, j from toytab where i = 4;
+ i | j
+ 4 | 4
+(1 row)
+-- index only scan
+explain (costs off) select i from toytab where i = 4;
+ Index Only Scan using toyidx on toytab
+ Index Cond: (i = 4)
+(2 rows)
+select i from toytab where i = 4 ;
+ i
+ 4
+(1 row)
diff --git a/src/test/modules/toytable/sql/toytable.sql b/src/test/modules/toytable/sql/toytable.sql
new file mode 100644
index 00000000000..8d9bac41bbf
--- /dev/null
+++ b/src/test/modules/toytable/sql/toytable.sql
@@ -0,0 +1,17 @@
+create table toytab (i int4, j int4, k int4) using toytable;
+select * from toytab;
+create index toyidx on toytab(i);
+-- test index scan
+set enable_seqscan=off;
+set enable_indexscan=on;
+select i, j from toytab where i = 4;
+-- index only scan
+explain (costs off) select i from toytab where i = 4;
+select i from toytab where i = 4 ;
diff --git a/src/test/modules/toytable/toytable--1.0.sql b/src/test/modules/toytable/toytable--1.0.sql
new file mode 100644
index 00000000000..52085d27f4a
--- /dev/null
+++ b/src/test/modules/toytable/toytable--1.0.sql
@@ -0,0 +1,12 @@
+/* src/test/modules/toyam/toyam--1.0.sql */
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION toytable" to load this file. \quit
+CREATE FUNCTION toytableam_handler(internal)
+RETURNS pg_catalog.table_am_handler STRICT
+CREATE ACCESS METHOD toytable TYPE TABLE HANDLER toytableam_handler
diff --git a/src/test/modules/toytable/toytable.control b/src/test/modules/toytable/toytable.control
new file mode 100644
index 00000000000..8f613e58d6e
--- /dev/null
+++ b/src/test/modules/toytable/toytable.control
@@ -0,0 +1,4 @@
+comment = 'Dummy implementation of table AM api'
+default_version = '1.0'
+module_pathname = '$libdir/toytable'
+relocatable = true
diff --git a/src/test/modules/toytable/toytableam.c b/src/test/modules/toytable/toytableam.c
new file mode 100644
index 00000000000..30b0e74e7f6
--- /dev/null
+++ b/src/test/modules/toytable/toytableam.c
@@ -0,0 +1,612 @@
+ *
+ * toyam_handler.c
+ * a toy table access method code
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * src/backend/access/toytable/toyam_handler.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+#include <math.h>
+#include "miscadmin.h"
+#include "access/multixact.h"
+#include "access/relscan.h"
+#include "access/tableam.h"
+#include "catalog/catalog.h"
+#include "catalog/storage.h"
+#include "catalog/index.h"
+#include "catalog/pg_type.h"
+#include "executor/executor.h"
+#include "utils/builtins.h"
+#include "utils/rel.h"
+#include "storage/bufmgr.h"
+typedef struct
+ TableScanDescData scan;
+ int tupidx;
+} ToyScanDescData;
+typedef ToyScanDescData *ToyScanDesc;
+static const TupleTableSlotOps *
+toyam_slot_callbacks(Relation relation)
+ return &TTSOpsVirtual;
+static TableScanDesc toyam_scan_begin(Relation rel,
+ Snapshot snapshot,
+ int nkeys, struct ScanKeyData *key,
+ ParallelTableScanDesc pscan,
+ bool allow_strat,
+ bool allow_sync,
+ bool allow_pagemode,
+ bool is_bitmapscan,
+ bool is_samplescan,
+ bool temp_snap)
+ ToyScanDesc tscan;
+ tscan = palloc0(sizeof(ToyScanDescData));
+ tscan->scan.rs_rd = rel;
+ tscan->scan.rs_snapshot = snapshot;
+ tscan->scan.rs_nkeys = nkeys;
+ tscan->scan.rs_bitmapscan = is_bitmapscan;
+ tscan->scan.rs_samplescan = is_samplescan;
+ tscan->scan.rs_allow_strat = allow_strat;
+ tscan->scan.rs_allow_sync = allow_sync;
+ tscan->scan.rs_temp_snap = temp_snap;
+ tscan->scan.rs_parallel = pscan;
+ tscan->tupidx = 0;
+ return &tscan->scan;
+static void
+toyam_scan_end(TableScanDesc scan)
+ pfree(scan);
+static void
+toyam_scan_rescan(TableScanDesc scan, struct ScanKeyData *key,
+ bool set_params, bool allow_strat,
+ bool allow_sync, bool allow_pagemode)
+ ereport(ERROR,
+ errmsg("function %s not implemented yet", __func__)));
+static bool
+toyam_scan_getnextslot(TableScanDesc scan,
+ ScanDirection direction,
+ TupleTableSlot *slot)
+ ToyScanDesc tscan = (ToyScanDesc) scan;
+ slot->tts_nvalid = 0;
+ slot->tts_flags |= TTS_FLAG_EMPTY;
+ /*
+ * Return a constant 1 rows. Every int4 attribute gets
+ * a running count, everything else is NULL.
+ */
+ if (tscan->tupidx < 10)
+ {
+ TupleDesc desc = RelationGetDescr(tscan->scan.rs_rd);
+ tscan->tupidx++;
+ for (AttrNumber attno = 1; attno <= desc->natts; attno++)
+ {
+ Form_pg_attribute att = &desc->attrs[attno - 1];
+ Datum d;
+ bool isnull;
+ if (att->atttypid == INT4OID)
+ {
+ d = tscan->tupidx;
+ isnull = false;
+ }
+ else
+ {
+ d = (Datum) 0;
+ isnull = true;
+ }
+ slot->tts_values[attno - 1] = d;
+ slot->tts_isnull[attno - 1] = isnull;
+ }
+ ItemPointerSet(&slot->tts_tid, 1, tscan->tupidx);
+ slot->tts_nvalid = slot->tts_tupleDescriptor->natts;
+ slot->tts_flags &= ~TTS_FLAG_EMPTY;
+ return true;
+ }
+ else
+ return false;
+static Size
+toyam_parallelscan_estimate(Relation rel)
+ ereport(ERROR,
+ errmsg("function %s not implemented yet", __func__)));
+static Size
+toyam_parallelscan_initialize(Relation rel,
+ ParallelTableScanDesc pscan)
+ ereport(ERROR,
+ errmsg("function %s not implemented yet", __func__)));
+static void
+toyam_parallelscan_reinitialize(Relation rel,
+ ParallelTableScanDesc pscan)
+ ereport(ERROR,
+ errmsg("function %s not implemented yet", __func__)));
+static struct IndexFetchTableData *
+toyam_index_fetch_begin(Relation rel)
+ IndexFetchTableData *tfetch = palloc0(sizeof(IndexFetchTableData));
+ tfetch->rel = rel;
+ return tfetch;
+static void
+toyam_index_fetch_reset(struct IndexFetchTableData *data)
+static void
+toyam_index_fetch_end(struct IndexFetchTableData *data)
+ pfree(data);
+static bool
+toyam_index_fetch_tuple(struct IndexFetchTableData *scan,
+ ItemPointer tid,
+ Snapshot snapshot,
+ TupleTableSlot *slot,
+ bool *call_again, bool *all_dead)
+ TupleDesc desc = RelationGetDescr(scan->rel);
+ int tupidx;
+ if (ItemPointerGetBlockNumber(tid) != 1)
+ return false;
+ tupidx = ItemPointerGetOffsetNumber(tid);
+ if (tupidx < 1 || tupidx > 10)
+ return false;
+ slot->tts_nvalid = 0;
+ slot->tts_flags |= TTS_FLAG_EMPTY;
+ /* Return same data as toyam_scan_getnextslot does */
+ for (AttrNumber attno = 1; attno <= desc->natts; attno++)
+ {
+ Form_pg_attribute att = &desc->attrs[attno - 1];
+ Datum d;
+ bool isnull;
+ if (att->atttypid == INT4OID)
+ {
+ d = tupidx;
+ isnull = false;
+ }
+ else
+ {
+ d = (Datum) 0;
+ isnull = true;
+ }
+ slot->tts_values[attno - 1] = d;
+ slot->tts_isnull[attno - 1] = isnull;
+ }
+ ItemPointerSet(&slot->tts_tid, 1, tupidx);
+ slot->tts_nvalid = slot->tts_tupleDescriptor->natts;
+ slot->tts_flags &= ~TTS_FLAG_EMPTY;
+ return true;
+static bool
+toyam_tuple_fetch_row_version(Relation rel,
+ ItemPointer tid,
+ Snapshot snapshot,
+ TupleTableSlot *slot)
+ ereport(ERROR,
+ errmsg("function %s not implemented yet", __func__)));
+static void
+toyam_tuple_get_latest_tid(Relation rel,
+ Snapshot snapshot,
+ ItemPointer tid)
+ ereport(ERROR,
+ errmsg("function %s not implemented yet", __func__)));
+static bool
+toyam_tuple_satisfies_snapshot(Relation rel,
+ TupleTableSlot *slot,
+ Snapshot snapshot)
+ ereport(ERROR,
+ errmsg("function %s not implemented yet", __func__)));
+static TransactionId
+toyam_compute_xid_horizon_for_tuples(Relation rel,
+ ItemPointerData *items,
+ int nitems)
+ ereport(ERROR,
+ errmsg("function %s not implemented yet", __func__)));
+static void
+toyam_tuple_insert(Relation rel, TupleTableSlot *slot,
+ CommandId cid, int options,
+ struct BulkInsertStateData *bistate)
+ ereport(ERROR,
+ errmsg("function %s not implemented yet", __func__)));
+static void
+toyam_tuple_insert_speculative(Relation rel,
+ TupleTableSlot *slot,
+ CommandId cid,
+ int options,
+ struct BulkInsertStateData *bistate,
+ uint32 specToken)
+ ereport(ERROR,
+ errmsg("function %s not implemented yet", __func__)));
+static void
+toyam_tuple_complete_speculative(Relation rel,
+ TupleTableSlot *slot,
+ uint32 specToken,
+ bool succeeded)
+ ereport(ERROR,
+ errmsg("function %s not implemented yet", __func__)));
+static TM_Result
+toyam_tuple_delete(Relation rel,
+ ItemPointer tid,
+ CommandId cid,
+ Snapshot snapshot,
+ Snapshot crosscheck,
+ bool wait,
+ TM_FailureData *tmfd,
+ bool changingPart)
+ ereport(ERROR,
+ errmsg("function %s not implemented yet", __func__)));
+static void
+toyam_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
+ CommandId cid, int options, struct BulkInsertStateData *bistate)
+ ereport(ERROR,
+ errmsg("function %s not implemented yet", __func__)));
+static TM_Result
+toyam_tuple_update(Relation rel,
+ ItemPointer otid,
+ TupleTableSlot *slot,
+ CommandId cid,
+ Snapshot snapshot,
+ Snapshot crosscheck,
+ bool wait,
+ TM_FailureData *tmfd,
+ LockTupleMode *lockmode,
+ bool *update_indexes)
+ ereport(ERROR,
+ errmsg("function %s not implemented yet", __func__)));
+static TM_Result
+toyam_tuple_lock(Relation rel,
+ ItemPointer tid,
+ Snapshot snapshot,
+ TupleTableSlot *slot,
+ CommandId cid,
+ LockTupleMode mode,
+ LockWaitPolicy wait_policy,
+ uint8 flags,
+ TM_FailureData *tmfd)
+ ereport(ERROR,
+ errmsg("function %s not implemented yet", __func__)));
+static void
+toyam_finish_bulk_insert(Relation rel, int options)
+ return;
+static void
+toyam_relation_set_new_filenode(Relation rel,
+ char persistence,
+ TransactionId *freezeXid,
+ MultiXactId *minmulti)
+ *freezeXid = InvalidTransactionId;
+ *minmulti = InvalidMultiXactId;
+ /*
+ * FIXME: We don't need this for anything. But index build calls
+ * RelationGetNumberOfBlocks, from index_update_stats(), and that
+ * fails if the underlying file doesn't exist.
+ */
+ RelationCreateStorage(rel->rd_node, persistence);
+static void
+toyam_relation_nontransactional_truncate(Relation rel)
+ ereport(ERROR,
+ errmsg("function %s not implemented yet", __func__)));
+static void
+toyam_relation_copy_data(Relation rel, RelFileNode newrnode)
+ ereport(ERROR,
+ errmsg("function %s not implemented yet", __func__)));
+static void
+toyam_relation_copy_for_cluster(Relation NewHeap,
+ Relation OldHeap,
+ Relation OldIndex,
+ bool use_sort,
+ TransactionId OldestXmin,
+ TransactionId FreezeXid,
+ MultiXactId MultiXactCutoff,
+ double *num_tuples,
+ double *tups_vacuumed,
+ double *tups_recently_dead)
+ ereport(ERROR,
+ errmsg("function %s not implemented yet", __func__)));
+static void
+toyam_relation_vacuum(Relation onerel,
+ struct VacuumParams *params,
+ BufferAccessStrategy bstrategy)
+ /* we've got nothing to do */
+static bool
+toyam_scan_analyze_next_block(TableScanDesc scan,
+ BlockNumber blockno,
+ BufferAccessStrategy bstrategy)
+ ereport(ERROR,
+ errmsg("function %s not implemented yet", __func__)));
+static bool
+toyam_scan_analyze_next_tuple(TableScanDesc scan,
+ TransactionId OldestXmin,
+ double *liverows,
+ double *deadrows,
+ TupleTableSlot *slot)
+ ereport(ERROR,
+ errmsg("function %s not implemented yet", __func__)));
+static double
+toyam_index_build_range_scan(Relation heap_rel,
+ Relation index_rel,
+ struct IndexInfo *index_nfo,
+ bool allow_sync,
+ bool anyvisible,
+ bool progress,
+ BlockNumber start_blockno,
+ BlockNumber end_blockno,
+ IndexBuildCallback callback,
+ void *callback_state,
+ TableScanDesc scan)
+ TupleTableSlot *slot;
+ EState *estate;
+ estate = CreateExecutorState();
+ slot = table_slot_create(heap_rel, NULL);
+ if (!scan)
+ scan = toyam_scan_begin(heap_rel,
+ SnapshotAny,
+ 0, NULL,
+ false,
+ false,
+ false,
+ false,
+ false,
+ false);
+ while (toyam_scan_getnextslot(scan, ForwardScanDirection, slot))
+ {
+ Datum values[INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+ HeapTuple heapTuple;
+ FormIndexDatum(index_nfo, slot, estate, values, isnull);
+ /* Call the AM's callback routine to process the tuple */
+ heapTuple = ExecCopySlotHeapTuple(slot);
+ heapTuple->t_self = slot->tts_tid;
+ callback(heap_rel, heapTuple, values, isnull, true,
+ callback_state);
+ pfree(heapTuple);
+ }
+ toyam_scan_end(scan);
+ ExecDropSingleTupleTableSlot(slot);
+ FreeExecutorState(estate);
+ return 10;
+static void
+toyam_index_validate_scan(Relation heap_rel,
+ Relation index_rel,
+ struct IndexInfo *index_info,
+ Snapshot snapshot,
+ struct ValidateIndexState *state)
+ ereport(ERROR,
+ errmsg("function %s not implemented yet", __func__)));
+static void
+toyam_relation_estimate_size(Relation rel, int32 *attr_widths,
+ BlockNumber *pages, double *tuples,
+ double *allvisfrac)
+ *pages = 1;
+ *tuples = 1;
+ *allvisfrac = 1.0;
+static bool
+toyam_scan_sample_next_block(TableScanDesc scan,
+ struct SampleScanState *scanstate)
+ ereport(ERROR,
+ errmsg("function %s not implemented yet", __func__)));
+static bool
+toyam_scan_sample_next_tuple(TableScanDesc scan,
+ struct SampleScanState *scanstate,
+ TupleTableSlot *slot)
+ ereport(ERROR,
+ errmsg("function %s not implemented yet", __func__)));
+static const TableAmRoutine toyam_methods = {
+ .type = T_TableAmRoutine,
+ .slot_callbacks = toyam_slot_callbacks,
+ .scan_begin = toyam_scan_begin,
+ .scan_end = toyam_scan_end,
+ .scan_rescan = toyam_scan_rescan,
+ .scan_getnextslot = toyam_scan_getnextslot,
+ .parallelscan_estimate = toyam_parallelscan_estimate,
+ .parallelscan_initialize = toyam_parallelscan_initialize,
+ .parallelscan_reinitialize = toyam_parallelscan_reinitialize,
+ .index_fetch_begin = toyam_index_fetch_begin,
+ .index_fetch_reset = toyam_index_fetch_reset,
+ .index_fetch_end = toyam_index_fetch_end,
+ .index_fetch_tuple = toyam_index_fetch_tuple,
+ .tuple_fetch_row_version = toyam_tuple_fetch_row_version,
+ .tuple_get_latest_tid = toyam_tuple_get_latest_tid,
+ .tuple_satisfies_snapshot = toyam_tuple_satisfies_snapshot,
+ .compute_xid_horizon_for_tuples = toyam_compute_xid_horizon_for_tuples,
+ .tuple_insert = toyam_tuple_insert,
+ .tuple_insert_speculative = toyam_tuple_insert_speculative,
+ .tuple_complete_speculative = toyam_tuple_complete_speculative,
+ .multi_insert = toyam_multi_insert,
+ .tuple_delete = toyam_tuple_delete,
+ .tuple_update = toyam_tuple_update,
+ .tuple_lock = toyam_tuple_lock,
+ .finish_bulk_insert = toyam_finish_bulk_insert,
+ .relation_set_new_filenode = toyam_relation_set_new_filenode,
+ .relation_nontransactional_truncate = toyam_relation_nontransactional_truncate,
+ .relation_copy_data = toyam_relation_copy_data,
+ .relation_copy_for_cluster = toyam_relation_copy_for_cluster,
+ .relation_vacuum = toyam_relation_vacuum,
+ .scan_analyze_next_block = toyam_scan_analyze_next_block,
+ .scan_analyze_next_tuple = toyam_scan_analyze_next_tuple,
+ .index_build_range_scan = toyam_index_build_range_scan,
+ .index_validate_scan = toyam_index_validate_scan,
+ .relation_estimate_size = toyam_relation_estimate_size,
+ .scan_bitmap_next_block = NULL,
+ .scan_bitmap_next_tuple = NULL,
+ .scan_sample_next_block = toyam_scan_sample_next_block,
+ .scan_sample_next_tuple = toyam_scan_sample_next_tuple,
+ PG_RETURN_POINTER(&toyam_methods);
>From b329e4345731cd84708e5efcc51e3d5298c27bb2 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakan...@iki.fi>
Date: Mon, 8 Apr 2019 15:18:19 +0300
Subject: [PATCH 2/3] Fix confusion on different kinds of slots in
We used the same slot, to store a tuple from the index, and to store a
tuple from the table. That's not OK. It worked with the heap, because
heapam_getnextslot() stores a HeapTuple to the slot, and doesn't care how
large the tts_values/nulls arrays are. But when I played with a toy table
AM implementation that used a virtual tuple, it caused memory overruns.
src/backend/executor/nodeIndexonlyscan.c | 16 +++++++++++++---
src/include/nodes/execnodes.h | 1 +
2 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c
index 7711728495c..5833d683b38 100644
--- a/src/backend/executor/nodeIndexonlyscan.c
+++ b/src/backend/executor/nodeIndexonlyscan.c
@@ -166,10 +166,10 @@ IndexOnlyNext(IndexOnlyScanState *node)
* Rats, we have to visit the heap to check visibility.
InstrCountTuples2(node, 1);
- if (!index_fetch_heap(scandesc, slot))
+ if (!index_fetch_heap(scandesc, node->ioss_TableSlot))
continue; /* no visible tuple, try next index entry */
- ExecClearTuple(slot);
+ ExecClearTuple(node->ioss_TableSlot);
* Only MVCC snapshots are supported here, so there should be no
@@ -528,7 +528,17 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
tupDesc = ExecTypeFromTL(node->indextlist);
ExecInitScanTupleSlot(estate, &indexstate->ss, tupDesc,
- table_slot_callbacks(currentRelation));
+ &TTSOpsVirtual);
+ /*
+ * We need another slot, in a format that's suitable for the table AM,
+ * for when we need to fetch a tuple from the table for rechecking
+ * visibility.
+ */
+ indexstate->ioss_TableSlot =
+ ExecAllocTableSlot(&estate->es_tupleTable,
+ RelationGetDescr(currentRelation),
+ table_slot_callbacks(currentRelation));
* Initialize result type and projection info. The node's targetlist will
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index a5e4b7ef2e0..108dee61e24 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1424,6 +1424,7 @@ typedef struct IndexOnlyScanState
struct IndexScanDescData *ioss_ScanDesc;
Buffer ioss_VMBuffer;
Size ioss_PscanLen;
+ TupleTableSlot *ioss_TableSlot;
} IndexOnlyScanState;
/* ----------------
>From 213e33f92532201d0d278394cac7ffcaf0dccafa Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakan...@iki.fi>
Date: Mon, 8 Apr 2019 15:28:00 +0300
Subject: [PATCH 3/3] Fix typos and grammar in tableam.h comments.
src/include/access/tableam.h | 119 +++++++++++++++++------------------
1 file changed, 59 insertions(+), 60 deletions(-)
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index 51398f35c01..ab80919f8d0 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -26,6 +26,7 @@
+/* GUCs */
extern char *default_table_access_method;
extern bool synchronize_seqscans;
@@ -40,7 +41,7 @@ struct ValidateIndexState;
- * Result codes for table_{update,delete,lock}_tuple, and for visibility
+ * Result codes for table_{update,delete,lock_tuple}, and for visibility
* routines inside table AMs.
typedef enum TM_Result
@@ -68,8 +69,8 @@ typedef enum TM_Result
* The affected tuple is currently being modified by another session. This
- * will only be returned if (update/delete/lock)_tuple are instructed not
- * to wait.
+ * will only be returned if table_(update/delete/lock_tuple) are instructed
+ * not to wait.
@@ -82,12 +83,15 @@ typedef enum TM_Result
* When table_update, table_delete, or table_lock_tuple fail because the target
* tuple is already outdated, they fill in this struct to provide information
* to the caller about what happened.
+ *
* ctid is the target's ctid link: it is the same as the target's TID if the
* target was deleted, or the location of the replacement tuple if the target
* was updated.
+ *
* xmax is the outdating transaction's XID. If the caller wants to visit the
* replacement tuple, it must check that this matches before believing the
* replacement is really a match.
+ *
* cmax is the outdating command's CID, but only when the failure code is
* TM_SelfModified (i.e., something in the current transaction outdated the
* tuple); otherwise cmax is zero. (We make this restriction because
@@ -108,10 +112,10 @@ typedef struct TM_FailureData
#define TABLE_INSERT_FROZEN 0x0004
-/* flag bits fortable_lock_tuple */
+/* flag bits for table_lock_tuple */
/* Follow tuples whose update is in progress if lock modes don't conflict */
-/* Follow update chain and lock lastest version of tuple */
+/* Follow update chain and lock latest version of tuple */
@@ -128,8 +132,8 @@ typedef void (*IndexBuildCallback) (Relation index,
* server-lifetime manner, typically as a static const struct, which then gets
* returned by FormData_pg_am.amhandler.
- * I most cases it's not appropriate to directly call the callbacks directly,
- * instead use the table_* wrapper functions.
+ * In most cases it's not appropriate to call the callbacks directly, use the
+ * table_* wrapper functions instead.
* GetTableAmRoutine() asserts that required callbacks are filled in, remember
* to update when adding a callback.
@@ -194,7 +198,7 @@ typedef struct TableAmRoutine
void (*scan_end) (TableScanDesc scan);
- * Restart relation scan. If set_params is set to true, allow{strat,
+ * Restart relation scan. If set_params is set to true, allow_{strat,
* sync, pagemode} (see scan_begin) changes should be taken into account.
void (*scan_rescan) (TableScanDesc scan, struct ScanKeyData *key,
@@ -222,7 +226,7 @@ typedef struct TableAmRoutine
* Initialize ParallelTableScanDesc for a parallel scan of this relation.
- * pscan will be sized according to parallelscan_estimate() for the same
+ * `pscan` will be sized according to parallelscan_estimate() for the same
* relation.
Size (*parallelscan_initialize) (Relation rel,
@@ -243,7 +247,7 @@ typedef struct TableAmRoutine
* Prepare to fetch tuples from the relation, as needed when fetching
- * tuples for an index scan. The callback has to return a
+ * tuples for an index scan. The callback has to return an
* IndexFetchTableData, which the AM will typically embed in a larger
* structure with additional information.
@@ -268,16 +272,16 @@ typedef struct TableAmRoutine
* test, return true, false otherwise.
* Note that AMs that do not necessarily update indexes when indexed
- * columns do not change, need to return the current/correct version of a
- * tuple as appropriate, even if the tid points to an older version of the
- * tuple.
+ * columns do not change, need to return the current/correct version of
+ * the tuple that is visible to the snapshot, even if the tid points to an
+ * older version of the tuple.
* *call_again is false on the first call to index_fetch_tuple for a tid.
* If there potentially is another tuple matching the tid, *call_again
* needs be set to true by index_fetch_tuple, signalling to the caller
* that index_fetch_tuple should be called again for the same tid.
- * *all_dead, if all_dead is not NULL, should be set to true if by
+ * *all_dead, if all_dead is not NULL, should be set to true by
* index_fetch_tuple iff it is guaranteed that no backend needs to see
* that tuple. Index AMs can use that do avoid returning that tid in
* future searches.
@@ -288,14 +292,14 @@ typedef struct TableAmRoutine
TupleTableSlot *slot,
bool *call_again, bool *all_dead);
/* ------------------------------------------------------------------------
* Callbacks for non-modifying operations on individual tuples
* ------------------------------------------------------------------------
- * Fetch tuple at `tid` into `slot, after doing a visibility test
+ * Fetch tuple at `tid` into `slot`, after doing a visibility test
* according to `snapshot`. If a tuple was found and passed the visibility
* test, returns true, false otherwise.
@@ -390,13 +394,13 @@ typedef struct TableAmRoutine
* Perform operations necessary to complete insertions made via
* tuple_insert and multi_insert with a BulkInsertState specified. This
- * e.g. may e.g. used to flush the relation when inserting with
- * TABLE_INSERT_SKIP_WAL specified.
+ * may for example be used to flush the relation, when the
+ * TABLE_INSERT_SKIP_WAL option was used.
* Typically callers of tuple_insert and multi_insert will just pass all
- * the flags the apply to them, and each AM has to decide which of them
+ * the flags that apply to them, and each AM has to decide which of them
* make sense for it, and then only take actions in finish_bulk_insert
- * that make sense for a specific AM.
+ * for those flags, and ignore others.
* Optional callback.
@@ -412,10 +416,10 @@ typedef struct TableAmRoutine
* This callback needs to create a new relation filenode for `rel`, with
* appropriate durability behaviour for `persistence`.
- * On output *freezeXid, *minmulti should be set to the values appropriate
- * for pg_class.{relfrozenxid, relminmxid} have to be set to. For AMs that
- * don't need those fields to be filled they can be set to
- * InvalidTransactionId, InvalidMultiXactId respectively.
+ * On output *freezeXid, *minmulti must be set to the values appropriate
+ * for pg_class.{relfrozenxid, relminmxid}. For AMs that don't need those
+ * fields to be filled they can be set to InvalidTransactionId and
+ * InvalidMultiXactId, respectively.
* See also table_relation_set_new_filenode().
@@ -463,8 +467,8 @@ typedef struct TableAmRoutine
* locked with a ShareUpdateExclusive lock.
* Note that neither VACUUM FULL (and CLUSTER), nor ANALYZE go through
- * this routine, even if (in the latter case), part of the same VACUUM
- * command.
+ * this routine, even if (for ANALYZE) it is part of the same
+ * VACUUM command.
* There probably, in the future, needs to be a separate callback to
* integrate with autovacuum's scheduling.
@@ -487,8 +491,8 @@ typedef struct TableAmRoutine
* sampling, e.g. because it's a metapage that could never contain tuples.
* XXX: This obviously is primarily suited for block-based AMs. It's not
- * clear what a good interface for non block based AMs would be, so don't
- * try to invent one yet.
+ * clear what a good interface for non block based AMs would be, so there
+ * isn't one yet.
bool (*scan_analyze_next_block) (TableScanDesc scan,
BlockNumber blockno,
@@ -537,7 +541,7 @@ typedef struct TableAmRoutine
* See table_relation_estimate_size().
- * While block oriented, it shouldn't be too hard to for an AM that
+ * While block oriented, it shouldn't be too hard for an AM that doesn't
* doesn't internally use blocks to convert into a usable representation.
void (*relation_estimate_size) (Relation rel, int32 *attr_widths,
@@ -553,7 +557,7 @@ typedef struct TableAmRoutine
* Prepare to fetch / check / return tuples from `tbmres->blockno` as part
* of a bitmap table scan. `scan` was started via table_beginscan_bm().
- * Return false if there's no tuples to be found on the page, true
+ * Return false if there are no tuples to be found on the page, true
* otherwise.
* This will typically read and pin the target block, and do the necessary
@@ -617,8 +621,8 @@ typedef struct TableAmRoutine
* Note that it's not acceptable to hold deadlock prone resources such as
* lwlocks until scan_sample_next_tuple() has exhausted the tuples on the
* block - the tuple is likely to be returned to an upper query node, and
- * the next call could be off a long while. Holding buffer pins etc is
- * obviously OK.
+ * the next call could be off a long while. Holding buffer pins and such
+ * is obviously OK.
* Currently it is required to implement this interface, as there's no
* alternative way (contrary e.g. to bitmap scans) to implement sample
@@ -707,7 +711,6 @@ table_beginscan_strat(Relation rel, Snapshot snapshot,
false, false, false);
* table_beginscan_bm is an alternative entry point for setting up a
* TableScanDesc for a bitmap heap scan. Although that scan technology is
@@ -762,7 +765,6 @@ table_endscan(TableScanDesc scan)
* Restart a relation scan.
@@ -795,7 +797,6 @@ table_rescan_set_params(TableScanDesc scan, struct ScanKeyData *key,
extern void table_scan_update_snapshot(TableScanDesc scan, Snapshot snapshot);
* Return next tuple from `scan`, store in slot.
@@ -833,7 +834,7 @@ extern void table_parallelscan_initialize(Relation rel,
* table_parallelscan_initialize(), for the same relation. The initialization
* does not need to have happened in this backend.
- * Caller must hold a suitable lock on the correct relation.
+ * Caller must hold a suitable lock on the relation.
extern TableScanDesc table_beginscan_parallel(Relation rel,
ParallelTableScanDesc pscan);
@@ -904,7 +905,7 @@ table_index_fetch_end(struct IndexFetchTableData *scan)
* The difference between this function and table_fetch_row_version is that
* this function returns the currently visible version of a row if the AM
* supports storing multiple row versions reachable via a single index entry
- * (like heap's HOT). Whereas table_fetch_row_version only evaluates the the
+ * (like heap's HOT). Whereas table_fetch_row_version only evaluates the
* tuple exactly at `tid`. Outside of index entry ->table tuple lookups,
* table_fetch_row_version is what's usually needed.
@@ -940,7 +941,7 @@ extern bool table_index_fetch_tuple_check(Relation rel,
- * Fetch tuple at `tid` into `slot, after doing a visibility test according to
+ * Fetch tuple at `tid` into `slot`, after doing a visibility test according to
* `snapshot`. If a tuple was found and passed the visibility test, returns
* true, false otherwise.
@@ -1009,8 +1010,8 @@ table_compute_xid_horizon_for_tuples(Relation rel,
* behaviour of the AM. Several options might be ignored by AMs not supporting
* them.
- * If the TABLE_INSERT_SKIP_WAL option is specified, the new tuple will not
- * necessarily logged to WAL, even for a non-temp relation. It is the AMs
+ * If the TABLE_INSERT_SKIP_WAL option is specified, the new tuple doesn't
+ * need to be logged to WAL, even for a non-temp relation. It is the AMs
* choice whether this optimization is supported.
* If the TABLE_INSERT_SKIP_FSM option is specified, AMs are free to not reuse
@@ -1030,7 +1031,7 @@ table_compute_xid_horizon_for_tuples(Relation rel,
* relation.
* Note that most of these options will be applied when inserting into the
- * heap's TOAST table, too, if the tuple requires any out-of-line data
+ * heap's TOAST table, too, if the tuple requires any out-of-line data.
* The BulkInsertState object (if any; bistate can be NULL for default
@@ -1082,7 +1083,7 @@ table_complete_speculative(Relation rel, TupleTableSlot *slot,
- * Insert multiple tuple into a table.
+ * Insert multiple tuples into a table.
* This is like table_insert(), but inserts multiple tuples in one
* operation. That's often faster than calling table_insert() in a loop,
@@ -1121,10 +1122,9 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
* changingPart - true iff the tuple is being moved to another partition
* table due to an update of the partition key. Otherwise, false.
- * Normal, successful return value is TM_Ok, which
- * actually means we did delete it. Failure return codes are
- * TM_SelfModified, TM_Updated, or TM_BeingModified
- * (the last only possible if wait == false).
+ * Normal, successful return value is TM_Ok, which means we did actually
+ * delete it. Failure return codes are TM_SelfModified, TM_Updated, and
+ * TM_BeingModified (the last only possible if wait == false).
* In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
* t_xmax, and, if possible, and, if possible, t_cmax. See comments for
@@ -1160,10 +1160,9 @@ table_delete(Relation rel, ItemPointer tid, CommandId cid,
* update_indexes - in success cases this is set to true if new index entries
* are required for this tuple
- * Normal, successful return value is TM_Ok, which
- * actually means we *did* update it. Failure return codes are
- * TM_SelfModified, TM_Updated, or TM_BeingModified
- * (the last only possible if wait == false).
+ * Normal, successful return value is TM_Ok, which means we did actually
+ * update it. Failure return codes are TM_SelfModified, TM_Updated, and
+ * TM_BeingModified (the last only possible if wait == false).
* On success, the slot's tts_tid and tts_tableOid are updated to match the new
* stored tuple; in particular, slot->tts_tid is set to the TID where the
@@ -1201,8 +1200,8 @@ table_update(Relation rel, ItemPointer otid, TupleTableSlot *slot,
* flags:
* If TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS, follow the update chain to
* also lock descendant tuples if lock modes don't conflict.
- * If TUPLE_LOCK_FLAG_FIND_LAST_VERSION, update chain and lock latest
- * version.
+ * If TUPLE_LOCK_FLAG_FIND_LAST_VERSION, follow the update chain and lock
+ * latest version.
* Output parameters:
* *slot: contains the target tuple
@@ -1303,7 +1302,7 @@ table_relation_copy_data(Relation rel, RelFileNode newrnode)
* is copied in that index's order; if use_sort is false and OidIndex is
* InvalidOid, no sorting is performed.
- * OldestXmin, FreezeXid, MultiXactCutoff need to currently valid values for
+ * OldestXmin, FreezeXid, MultiXactCutoff must be currently valid values for
* the table.
* *num_tuples, *tups_vacuumed, *tups_recently_dead will contain statistics
@@ -1329,15 +1328,15 @@ table_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
- * Perform VACUUM on the relation. The VACUUM can be user triggered or by
+ * Perform VACUUM on the relation. The VACUUM can be user-triggered or by
* autovacuum. The specific actions performed by the AM will depend heavily on
* the individual AM.
* On entry a transaction needs to already been established, and the
- * transaction is locked with a ShareUpdateExclusive lock.
+ * table is locked with a ShareUpdateExclusive lock.
* Note that neither VACUUM FULL (and CLUSTER), nor ANALYZE go through this
- * routine, even if (in the latter case), part of the same VACUUM command.
+ * routine, even if (for ANALYZE) it is part of the same VACUUM command.
static inline void
table_relation_vacuum(Relation rel, struct VacuumParams *params,
@@ -1363,7 +1362,7 @@ table_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno,
- * Iterate over tuples tuples in the block selected with
+ * Iterate over tuples in the block selected with
* table_scan_analyze_next_block() (which needs to have returned true, and
* this routine may not have returned false for the same block before). If a
* tuple that's suitable for sampling is found, true is returned and a tuple
@@ -1383,7 +1382,7 @@ table_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
- * table_index_build_range_scan - scan the table to find tuples to be indexed
+ * table_index_build_scan - scan the table to find tuples to be indexed
* This is called back from an access-method-specific index build procedure
* after the AM has done whatever setup it needs. The parent heap relation
@@ -1515,8 +1514,8 @@ table_relation_estimate_size(Relation rel, int32 *attr_widths,
* Prepare to fetch / check / return tuples from `tbmres->blockno` as part of
* a bitmap table scan. `scan` needs to have been started via
- * table_beginscan_bm(). Returns false if there's no tuples to be found on the
- * page, true otherwise.
+ * table_beginscan_bm(). Returns false if there are no tuples to be found on
+ * the page, true otherwise.
* Note, this is an optionally implemented function, therefore should only be
* used after verifying the presence (at plan time or such).