Re: An out-of-date comment in nodeIndexonlyscan.c

Thomas Munro Sun, 13 Jun 2021 23:12:48 -0700

On Mon, Jun 14, 2021 at 2:29 PM David Rowley <dgrowle...@gmail.com> wrote:
> Have you also thought about deferrable unique / primary key constraints?
>
> It's possible to the uniqueness temporarily violated during a
> transaction when the unique constraint is deferred,


Oh, yeah, very good question.  I think many scenarios involving
duplicates work correctly, but I think there is a danger like this:

create table t (i int primary key deferrable initially deferred, j int);
create unique index on t(j);
insert into t values (999, 999); -- avoid empty index
set enable_seqscan = off;

begin transaction isolation level serializable;
insert into t values (1, 2); -- create phantom row
select * from t where i = 1;
delete from t where j = 2; -- remove phantom row
SELECT locktype, relation::regclass, page, tuple FROM pg_locks WHERE
mode = 'SIReadLock';
commit;

master:

 locktype | relation | page | tuple
----------+----------+------+-------
 page     | t_pkey   |    1 |
 page     | t_j_idx  |    1 |
(2 rows)

v3 patch:

 locktype | relation | page | tuple
----------+----------+------+-------
(0 rows)

In fact the lock on t_pkey's page 1 was important: it represents the
search for a tuple with i = 1, other than our temporary phantom (only
allowed because constraint deferred).  If someone else inserts a row
matching i = 1, the SSI system will not know that we tried to look for
it, because our own temporary phantom confused us.

> I think you'd just need to add a check to ensure that indimmediate is
> true around where you're checking the indisunique flag.

Yeah, that fixes the problem in this case at least.  With v4:

 locktype | relation | page | tuple
----------+----------+------+-------
 page     | t_pkey   |    1 |
(1 row)

(It's expected that t_j_idx is not locked: the delete query benefits
from the optimisation when accessing the index on t(j)).

That test case is a little confusing, because at no point does it ever
actually create a duplicate, but I suspect the problem is avoided
without the deferred constraint because you'd either get a UCV on
insertion, or block anyone else from inserting until after you commit
(even after you delete the phantom), and I think that may avoid the
hazard.  I could be confused about that.  If I am wrong, then a
possible general solution may be to apply the optimisation only if we
find a match that wasn't written by this transaction, though I'm not
sure how given the current layering.

From e52860f760ade79813aae7beb7e9d81ab0d03f34 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Thu, 10 Jun 2021 23:35:16 +1200
Subject: [PATCH v4 1/2] Use tuple-level SIREAD locks for index-only scans.

When index-only scans manage to avoid fetching heap tuples,
previously we'd predicate-lock the whole heap page (since commit
cdf91edb).  Commits c01262a8 and 6f38d4da made it possible to lock the
tuple instead with only a TID, to match the behavior of regular index
scans and avoid some false conflicts.

Discussion: https://postgr.es/m/CAEepm%3D2GK3FVdnt5V3d%2Bh9njWipCv_fNL%3DwjxyUhzsF%3D0PcbNg%40mail.gmail.com
---
 src/backend/executor/nodeIndexonlyscan.c      | 12 ++++--
 .../isolation/expected/index-only-scan-2.out  | 15 +++++++
 .../isolation/expected/index-only-scan-3.out  | 16 ++++++++
 src/test/isolation/isolation_schedule         |  2 +
 .../isolation/specs/index-only-scan-2.spec    | 39 +++++++++++++++++++
 .../isolation/specs/index-only-scan-3.spec    | 33 ++++++++++++++++
 6 files changed, 113 insertions(+), 4 deletions(-)
 create mode 100644 src/test/isolation/expected/index-only-scan-2.out
 create mode 100644 src/test/isolation/expected/index-only-scan-3.out
 create mode 100644 src/test/isolation/specs/index-only-scan-2.spec
 create mode 100644 src/test/isolation/specs/index-only-scan-3.spec

diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c
index 0754e28a9a..f7185b4519 100644
--- a/src/backend/executor/nodeIndexonlyscan.c
+++ b/src/backend/executor/nodeIndexonlyscan.c
@@ -243,12 +243,16 @@ IndexOnlyNext(IndexOnlyScanState *node)
 
 		/*
 		 * If we didn't access the heap, then we'll need to take a predicate
-		 * lock explicitly, as if we had.  For now we do that at page level.
+		 * lock explicitly, as if we had.  We don't have the inserter's xid,
+		 * but that's only used by PredicateLockTID to check if it matches the
+		 * caller's top level xid, and it can't possibly have been inserted by
+		 * us or the page wouldn't be all visible.
 		 */
 		if (!tuple_from_heap)
-			PredicateLockPage(scandesc->heapRelation,
-							  ItemPointerGetBlockNumber(tid),
-							  estate->es_snapshot);
+			PredicateLockTID(scandesc->heapRelation,
+							 tid,
+							 estate->es_snapshot,
+							 InvalidTransactionId);
 
 		return slot;
 	}
diff --git a/src/test/isolation/expected/index-only-scan-2.out b/src/test/isolation/expected/index-only-scan-2.out
new file mode 100644
index 0000000000..fef5b8d398
--- /dev/null
+++ b/src/test/isolation/expected/index-only-scan-2.out
@@ -0,0 +1,15 @@
+Parsed test spec with 2 sessions
+
+starting permutation: r1 r2 w1 w2 c1 c2
+step r1: SELECT id1 FROM account WHERE id1 = 1;
+id1            
+
+1              
+step r2: SELECT id2 FROM account WHERE id2 = 2;
+id2            
+
+2              
+step w1: UPDATE account SET balance = 200 WHERE id1 = 1;
+step w2: UPDATE account SET balance = 200 WHERE id2 = 2;
+step c1: COMMIT;
+step c2: COMMIT;
diff --git a/src/test/isolation/expected/index-only-scan-3.out b/src/test/isolation/expected/index-only-scan-3.out
new file mode 100644
index 0000000000..efef162779
--- /dev/null
+++ b/src/test/isolation/expected/index-only-scan-3.out
@@ -0,0 +1,16 @@
+Parsed test spec with 2 sessions
+
+starting permutation: r1 r2 w1 w2 c1 c2
+step r1: SELECT id1 FROM account WHERE id1 = 2;
+id1            
+
+2              
+step r2: SELECT id2 FROM account WHERE id2 = 1;
+id2            
+
+1              
+step w1: UPDATE account SET balance = 200 WHERE id1 = 1;
+step w2: UPDATE account SET balance = 200 WHERE id2 = 2;
+step c1: COMMIT;
+step c2: COMMIT;
+ERROR:  could not serialize access due to read/write dependencies among transactions
diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule
index f4c01006fc..570aedb900 100644
--- a/src/test/isolation/isolation_schedule
+++ b/src/test/isolation/isolation_schedule
@@ -17,6 +17,8 @@ test: partial-index
 test: two-ids
 test: multiple-row-versions
 test: index-only-scan
+test: index-only-scan-2
+test: index-only-scan-3
 test: predicate-lock-hot-tuple
 test: update-conflict-out
 test: deadlock-simple
diff --git a/src/test/isolation/specs/index-only-scan-2.spec b/src/test/isolation/specs/index-only-scan-2.spec
new file mode 100644
index 0000000000..cd3c599af8
--- /dev/null
+++ b/src/test/isolation/specs/index-only-scan-2.spec
@@ -0,0 +1,39 @@
+# Test the granularity of predicate locks on heap tuples, for index-only scans.
+
+# Access the accounts through two different indexes, so that index predicate
+# locks don't confuse matters; here we only want to test heap locking.  Since
+# s1 and s2 access different heap tuples there is no cycle, so both
+# transactions should be able to commit.  Before PostgreSQL 15, a serialization
+# failure was reported because of a page-level SIREAD lock on the heap that
+# produced a false conflict.
+
+setup
+{
+  CREATE TABLE account (id1 int, id2 int, balance int);
+  CREATE UNIQUE INDEX ON account(id1);
+  CREATE UNIQUE INDEX ON account(id2);
+  INSERT INTO account VALUES (1, 1, 100), (2, 2, 100);
+}
+setup
+{
+  VACUUM account;
+}
+
+teardown
+{
+  DROP TABLE account;
+}
+
+session "s1"
+setup { BEGIN ISOLATION LEVEL SERIALIZABLE; SET enable_seqscan = off; }
+step "r1" { SELECT id1 FROM account WHERE id1 = 1; }
+step "w1" { UPDATE account SET balance = 200 WHERE id1 = 1; }
+step "c1" { COMMIT; }
+
+session "s2"
+setup { BEGIN ISOLATION LEVEL SERIALIZABLE; SET enable_seqscan = off; }
+step "r2" { SELECT id2 FROM account WHERE id2 = 2; }
+step "w2" { UPDATE account SET balance = 200 WHERE id2 = 2; }
+step "c2" { COMMIT; }
+
+permutation "r1" "r2" "w1" "w2" "c1" "c2"
diff --git a/src/test/isolation/specs/index-only-scan-3.spec b/src/test/isolation/specs/index-only-scan-3.spec
new file mode 100644
index 0000000000..5f04923002
--- /dev/null
+++ b/src/test/isolation/specs/index-only-scan-3.spec
@@ -0,0 +1,33 @@
+# A variant of index-only-scan-2.spec that is a true conflict, detected via
+# heap tuple locks only.
+
+setup
+{
+  CREATE TABLE account (id1 int, id2 int, balance int);
+  CREATE UNIQUE INDEX ON account(id1);
+  CREATE UNIQUE INDEX ON account(id2);
+  INSERT INTO account VALUES (1, 1, 100), (2, 2, 100);
+}
+setup
+{
+  VACUUM account;
+}
+
+teardown
+{
+  DROP TABLE account;
+}
+
+session "s1"
+setup { BEGIN ISOLATION LEVEL SERIALIZABLE; SET enable_seqscan = off; }
+step "r1" { SELECT id1 FROM account WHERE id1 = 2; }
+step "w1" { UPDATE account SET balance = 200 WHERE id1 = 1; }
+step "c1" { COMMIT; }
+
+session "s2"
+setup { BEGIN ISOLATION LEVEL SERIALIZABLE; SET enable_seqscan = off; }
+step "r2" { SELECT id2 FROM account WHERE id2 = 1; }
+step "w2" { UPDATE account SET balance = 200 WHERE id2 = 2; }
+step "c2" { COMMIT; }
+
+permutation "r1" "r2" "w1" "w2" "c1" "c2"
-- 
2.30.2

From 839fb88e9753b39dd283f93fd363a0df7f29982b Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Fri, 11 Jun 2021 15:33:59 +1200
Subject: [PATCH v4 2/2] WIP: Skip SIREAD locks on btree pages when possible.

Previously, we'd always predicate-lock the index pages we searched, and
any heap tuples we found.  As described by Kevin and Dan in README-SSI,
it's not strictly necessary to lock both: if we found a tuple and can
prove that there can't be another visible match, it's enough to lock
just the tuple.

XXX The work of detecting suitable quals is done at execution time by
ExecIndexBuildScanKeys(), but it could probably be done by the planner.

XXX The logic for "scheduling" a predicate lock and then clearing or
flushing is a bit weird; it seems like it's necessary though, because
you don't yet have enough information to judge the visibility while
you're examining the index page.

XXX The mechanism for detecting page splits is a bit weird, and is not
battle tested yet

Reviewed-by: David Rowley <dgrowle...@gmail.com>
Discussion: https://postgr.es/m/CAEepm%3D2GK3FVdnt5V3d%2Bh9njWipCv_fNL%3DwjxyUhzsF%3D0PcbNg%40mail.gmail.com
---
 src/backend/access/index/genam.c           |   2 +
 src/backend/access/nbtree/nbtsearch.c      |  24 ++-
 src/backend/executor/nodeBitmapIndexscan.c |   3 +-
 src/backend/executor/nodeIndexonlyscan.c   |  25 ++-
 src/backend/executor/nodeIndexscan.c       |  81 ++++++++-
 src/backend/storage/lmgr/README-SSI        |  19 +-
 src/backend/storage/lmgr/predicate.c       |  84 +++++++++
 src/include/access/relscan.h               |  10 ++
 src/include/executor/nodeIndexscan.h       |   3 +-
 src/include/nodes/execnodes.h              |   2 +
 src/include/storage/predicate.h            |  23 +++
 src/include/storage/predicate_internals.h  |   1 +
 src/test/regress/expected/btree_index.out  | 193 +++++++++++++++++++++
 src/test/regress/sql/btree_index.sql       |  79 +++++++++
 14 files changed, 525 insertions(+), 24 deletions(-)

diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c
index b93288a6fe..40ae726081 100644
--- a/src/backend/access/index/genam.c
+++ b/src/backend/access/index/genam.c
@@ -126,6 +126,8 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
 	scan->xs_hitup = NULL;
 	scan->xs_hitupdesc = NULL;
 
+	scan->xs_defer_predlocks = false;
+
 	return scan;
 }
 
diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index d1177d8772..914aff9fc4 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -824,6 +824,21 @@ _bt_compare(Relation rel,
 	return 0;
 }
 
+/*
+ * _bt_predlock_page() -- Predicate-lock a page, immediate or deferred.
+ */
+static inline void
+_bt_predlock_page(IndexScanDesc scan, Relation rel, BlockNumber blocknum)
+{
+	if (scan->xs_defer_predlocks)
+		SchedulePredicateLockPage(&scan->xs_deferred_predlocks,
+								  rel,
+								  blocknum,
+								  scan->xs_snapshot);
+	else
+		PredicateLockPage(rel, blocknum, scan->xs_snapshot);
+}
+
 /*
  *	_bt_first() -- Find the first item in a scan.
  *
@@ -1374,8 +1389,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 		return false;
 	}
 	else
-		PredicateLockPage(rel, BufferGetBlockNumber(buf),
-						  scan->xs_snapshot);
+		_bt_predlock_page(scan, rel, BufferGetBlockNumber(buf));
 
 	_bt_initialize_more_data(so, dir);
 
@@ -1999,7 +2013,7 @@ _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno, ScanDirection dir)
 			/* check for deleted page */
 			if (!P_IGNORE(opaque))
 			{
-				PredicateLockPage(rel, blkno, scan->xs_snapshot);
+				_bt_predlock_page(scan, rel, blkno);
 				/* see if there are any matches on this page */
 				/* note that this will clear moreRight if we can stop */
 				if (_bt_readpage(scan, dir, P_FIRSTDATAKEY(opaque)))
@@ -2101,7 +2115,7 @@ _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno, ScanDirection dir)
 			opaque = (BTPageOpaque) PageGetSpecialPointer(page);
 			if (!P_IGNORE(opaque))
 			{
-				PredicateLockPage(rel, BufferGetBlockNumber(so->currPos.buf), scan->xs_snapshot);
+				_bt_predlock_page(scan, rel, BufferGetBlockNumber(so->currPos.buf));
 				/* see if there are any matches on this page */
 				/* note that this will clear moreLeft if we can stop */
 				if (_bt_readpage(scan, dir, PageGetMaxOffsetNumber(page)))
@@ -2404,7 +2418,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir)
 		return false;
 	}
 
-	PredicateLockPage(rel, BufferGetBlockNumber(buf), scan->xs_snapshot);
+	_bt_predlock_page(scan, rel, BufferGetBlockNumber(buf));
 	page = BufferGetPage(buf);
 	opaque = (BTPageOpaque) PageGetSpecialPointer(page);
 	Assert(P_ISLEAF(opaque));
diff --git a/src/backend/executor/nodeBitmapIndexscan.c b/src/backend/executor/nodeBitmapIndexscan.c
index 48c2036297..ea9298fed4 100644
--- a/src/backend/executor/nodeBitmapIndexscan.c
+++ b/src/backend/executor/nodeBitmapIndexscan.c
@@ -283,7 +283,8 @@ ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags)
 						   &indexstate->biss_RuntimeKeys,
 						   &indexstate->biss_NumRuntimeKeys,
 						   &indexstate->biss_ArrayKeys,
-						   &indexstate->biss_NumArrayKeys);
+						   &indexstate->biss_NumArrayKeys,
+						   NULL);
 
 	/*
 	 * If we have runtime keys or array keys, we need an ExprContext to
diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c
index f7185b4519..4583375b17 100644
--- a/src/backend/executor/nodeIndexonlyscan.c
+++ b/src/backend/executor/nodeIndexonlyscan.c
@@ -103,6 +103,13 @@ IndexOnlyNext(IndexOnlyScanState *node)
 		node->ioss_ScanDesc->xs_want_itup = true;
 		node->ioss_VMBuffer = InvalidBuffer;
 
+		/* Set up the single row SSI optimization, if possible. */
+		if (node->ioss_SingleRow)
+		{
+			InitPredicateLockBuffer(&scandesc->xs_deferred_predlocks);
+			scandesc->xs_defer_predlocks = true;
+		}
+
 		/*
 		 * If no run-time keys to calculate or they are ready, go ahead and
 		 * pass the scankeys to the index AM.
@@ -254,9 +261,23 @@ IndexOnlyNext(IndexOnlyScanState *node)
 							 estate->es_snapshot,
 							 InvalidTransactionId);
 
+		/*
+		 * Single row SSI optimization: found a match, so we can throw away
+		 * predicate locks on on the index.
+		 */
+		if (node->ioss_SingleRow)
+			ClearPredicateLockBuffer(&scandesc->xs_deferred_predlocks);
+
 		return slot;
 	}
 
+	/*
+	 * Single row SSI optimization: no match found, so we need the index locks
+	 * to "lock the gap".
+	 */
+	if (node->ioss_SingleRow)
+		FlushPredicateLockBuffer(&scandesc->xs_deferred_predlocks);
+
 	/*
 	 * if we get here it means the index scan failed so we are at the end of
 	 * the scan..
@@ -593,7 +614,8 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
 						   &indexstate->ioss_RuntimeKeys,
 						   &indexstate->ioss_NumRuntimeKeys,
 						   NULL,	/* no ArrayKeys */
-						   NULL);
+						   NULL,
+						   &indexstate->ioss_SingleRow);
 
 	/*
 	 * any ORDER BY exprs have to be turned into scankeys in the same way
@@ -607,6 +629,7 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
 						   &indexstate->ioss_RuntimeKeys,
 						   &indexstate->ioss_NumRuntimeKeys,
 						   NULL,	/* no ArrayKeys */
+						   NULL,
 						   NULL);
 
 	/*
diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c
index 2fffb1b437..ef32ccc87f 100644
--- a/src/backend/executor/nodeIndexscan.c
+++ b/src/backend/executor/nodeIndexscan.c
@@ -38,6 +38,7 @@
 #include "lib/pairingheap.h"
 #include "miscadmin.h"
 #include "nodes/nodeFuncs.h"
+#include "storage/predicate.h"
 #include "utils/array.h"
 #include "utils/datum.h"
 #include "utils/lsyscache.h"
@@ -117,6 +118,18 @@ IndexNext(IndexScanState *node)
 
 		node->iss_ScanDesc = scandesc;
 
+		/*
+		 * Single row SSI optimization: if this scan produce only one row, tell
+		 * the index to defer acquisition of predicate locks on the index
+		 * itself (but not the heap); we'll decide whether to acquire or forget
+		 * them later.
+		 */
+		if (node->iss_SingleRow)
+		{
+			InitPredicateLockBuffer(&scandesc->xs_deferred_predlocks);
+			scandesc->xs_defer_predlocks = true;
+		}
+
 		/*
 		 * If no run-time keys to calculate or they are ready, go ahead and
 		 * pass the scankeys to the index AM.
@@ -149,9 +162,24 @@ IndexNext(IndexScanState *node)
 			}
 		}
 
+		/*
+		 * Single row SSI optimization: If we found a visible match, then there
+		 * is no need for the index to acquire predicate locks.  The heap tuple
+		 * lock is enough, because there can be only one match.
+		 */
+		if (node->iss_SingleRow)
+			ClearPredicateLockBuffer(&scandesc->xs_deferred_predlocks);
+
 		return slot;
 	}
 
+	/*
+	 * Single row SSI optimization: If we didn't find a row, then the index
+	 * needs to lock the gap.
+	 */
+	if (node->iss_SingleRow)
+		FlushPredicateLockBuffer(&scandesc->xs_deferred_predlocks);
+
 	/*
 	 * if we get here it means the index scan failed so we are at the end of
 	 * the scan..
@@ -987,7 +1015,8 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
 						   &indexstate->iss_RuntimeKeys,
 						   &indexstate->iss_NumRuntimeKeys,
 						   NULL,	/* no ArrayKeys */
-						   NULL);
+						   NULL,
+						   &indexstate->iss_SingleRow);
 
 	/*
 	 * any ORDER BY exprs have to be turned into scankeys in the same way
@@ -1001,6 +1030,7 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
 						   &indexstate->iss_RuntimeKeys,
 						   &indexstate->iss_NumRuntimeKeys,
 						   NULL,	/* no ArrayKeys */
+						   NULL,
 						   NULL);
 
 	/* Initialize sort support, if we need to re-check ORDER BY exprs */
@@ -1141,16 +1171,18 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
  * *numRuntimeKeys: receives number of runtime keys
  * *arrayKeys: receives ptr to array of IndexArrayKeyInfos, or NULL if none
  * *numArrayKeys: receives number of array keys
+ * *singleRow: receives flag meaning at most one visible row can be produced
  *
  * Caller may pass NULL for arrayKeys and numArrayKeys to indicate that
- * IndexArrayKeyInfos are not supported.
+ * IndexArrayKeyInfos are not supported.  Likewise for singleRow.
  */
 void
 ExecIndexBuildScanKeys(PlanState *planstate, Relation index,
 					   List *quals, bool isorderby,
 					   ScanKey *scanKeys, int *numScanKeys,
 					   IndexRuntimeKeyInfo **runtimeKeys, int *numRuntimeKeys,
-					   IndexArrayKeyInfo **arrayKeys, int *numArrayKeys)
+					   IndexArrayKeyInfo **arrayKeys, int *numArrayKeys,
+					   bool *singleRow)
 {
 	ListCell   *qual_cell;
 	ScanKey		scan_keys;
@@ -1161,6 +1193,7 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index,
 	int			max_runtime_keys;
 	int			n_array_keys;
 	int			j;
+	bool		single_row;
 
 	/* Allocate array for ScanKey structs: one per qual */
 	n_scan_keys = list_length(quals);
@@ -1623,6 +1656,34 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index,
 		array_keys = NULL;
 	}
 
+	/*
+	 * Can at most one visible row can be produced by the scan?
+	 *
+	 * XXX btree only for now due to need to recognise equality
+	 * XXX this is probably not the right place for this work!
+	 * XXX do it in the planner? 🤯
+	 */
+	single_row = false;
+	if (singleRow &&
+		IsolationIsSerializable() &&
+		index->rd_rel->relam == BTREE_AM_OID &&
+		index->rd_index->indisunique &&
+		index->rd_index->indimmediate)
+	{
+		Bitmapset *attrs = NULL;
+
+		/* Find all equality quals. */
+		for (int i = 0; i < n_scan_keys; ++i)
+			if (scan_keys[i].sk_strategy == BTEqualStrategyNumber)
+				attrs = bms_add_member(attrs, scan_keys[i].sk_attno);
+
+		/* Are all attributes covered? */
+		if (bms_num_members(attrs) == index->rd_index->indnkeyatts)
+			single_row = true;
+
+		bms_free(attrs);
+	}
+
 	/*
 	 * Return info to our caller.
 	 */
@@ -1637,6 +1698,8 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index,
 	}
 	else if (n_array_keys != 0)
 		elog(ERROR, "ScalarArrayOpExpr index qual found where not allowed");
+	if (singleRow)
+		*singleRow = single_row;
 }
 
 /* ----------------------------------------------------------------
@@ -1689,6 +1752,12 @@ ExecIndexScanInitializeDSM(IndexScanState *node,
 								 node->iss_NumOrderByKeys,
 								 piscan);
 
+	if (node->iss_SingleRow)
+	{
+		InitPredicateLockBuffer(&node->iss_ScanDesc->xs_deferred_predlocks);
+		node->iss_ScanDesc->xs_defer_predlocks = true;
+	}
+
 	/*
 	 * If no run-time keys to calculate or they are ready, go ahead and pass
 	 * the scankeys to the index AM.
@@ -1732,6 +1801,12 @@ ExecIndexScanInitializeWorker(IndexScanState *node,
 								 node->iss_NumOrderByKeys,
 								 piscan);
 
+	if (node->iss_SingleRow)
+	{
+		InitPredicateLockBuffer(&node->iss_ScanDesc->xs_deferred_predlocks);
+		node->iss_ScanDesc->xs_defer_predlocks = true;
+	}
+
 	/*
 	 * If no run-time keys to calculate or they are ready, go ahead and pass
 	 * the scankeys to the index AM.
diff --git a/src/backend/storage/lmgr/README-SSI b/src/backend/storage/lmgr/README-SSI
index 50d2ecca9d..f302015a36 100644
--- a/src/backend/storage/lmgr/README-SSI
+++ b/src/backend/storage/lmgr/README-SSI
@@ -344,17 +344,17 @@ need not generate a conflict, although an update which "moves" a row
 into the scan must generate a conflict.  While correctness allows
 false positives, they should be minimized for performance reasons.
 
+An index scan which is comparing for equality on the entire key
+for a unique index need not acquire a predicate lock as long as a key
+is found corresponding to a visible tuple which has not been modified by
+another transaction -- there are no "between or around" gaps to
+cover.
+
 Several optimizations are possible, though not all are implemented yet:
 
     * An index scan which is just finding the right position for an
 index insertion or deletion need not acquire a predicate lock.
 
-    * An index scan which is comparing for equality on the entire key
-for a unique index need not acquire a predicate lock as long as a key
-is found corresponding to a visible tuple which has not been modified
-by another transaction -- there are no "between or around" gaps to
-cover.
-
     * As long as built-in foreign key enforcement continues to use
 its current "special tricks" to deal with MVCC issues, predicate
 locks should not be needed for scans done by enforcement code.
@@ -610,13 +610,6 @@ address it?
 replication solutions, like Postgres-R, Slony, pgpool, HS/SR, etc.
 This is related to the "WAL file replay" issue.
 
-    * UNIQUE btree search for equality on all columns. Since a search
-of a UNIQUE index using equality tests on all columns will lock the
-heap tuple if an entry is found, it appears that there is no need to
-get a predicate lock on the index in that case. A predicate lock is
-still needed for such a search if a matching index entry which points
-to a visible tuple is not found.
-
     * Minimize touching of shared memory. Should lists in shared
 memory push entries which have just been returned to the front of the
 available list, so they will be popped back off soon and some memory
diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c
index d493aeef0f..db997d2bc8 100644
--- a/src/backend/storage/lmgr/predicate.c
+++ b/src/backend/storage/lmgr/predicate.c
@@ -1277,6 +1277,8 @@ InitPredicateLocks(void)
 		PredXact->OldCommittedSxact->xmin = InvalidTransactionId;
 		PredXact->OldCommittedSxact->flags = SXACT_FLAG_COMMITTED;
 		PredXact->OldCommittedSxact->pid = 0;
+		for (int i = 0; i < lengthof(PredXact->split_counters); ++i)
+			pg_atomic_init_u64(&PredXact->split_counters[i], 0);
 	}
 	/* This never changes, so let's keep a local copy. */
 	OldCommittedSxact = PredXact->OldCommittedSxact;
@@ -3170,6 +3172,11 @@ PredicateLockPageSplit(Relation relation, BlockNumber oldblkno,
 	PREDICATELOCKTARGETTAG oldtargettag;
 	PREDICATELOCKTARGETTAG newtargettag;
 	bool		success;
+	uint32		split_counter;
+
+	/* Make sure that deferred predicate locks are promoted to relation. */
+	split_counter = oldblkno % lengthof(PredXact->split_counters);
+	pg_atomic_fetch_add_u64(&PredXact->split_counters[split_counter], 1);
 
 	/*
 	 * Bail out quickly if there are no serializable transactions running.
@@ -5201,3 +5208,80 @@ AttachSerializableXact(SerializableXactHandle handle)
 	if (MySerializableXact != InvalidSerializableXact)
 		CreateLocalPredicateLockHash();
 }
+
+/*
+ * Initialize a buffer of predicate lock requests.
+ */
+void
+InitPredicateLockBuffer(PredicateLockBuffer *buffer)
+{
+	buffer->size = 0;
+}
+
+/*
+ * Acquire all buffered predicate locks.
+ */
+void
+FlushPredicateLockBuffer(PredicateLockBuffer *buffer)
+{
+	for (int i = 0; i < buffer->size; ++i)
+	{
+		PredicateLockRequest *request = &buffer->requests[i];
+		uint32 split_counter;
+
+		PredicateLockPage(request->relation,
+						  request->blocknum,
+						  request->snapshot);
+
+		/*
+		 * Check if there's been a split.  If so, we don't have enough
+		 * information to follow the split, so we'll promote the SIREAD lock to
+		 * whole-relation level.  This should be unlikely.
+		 */
+		split_counter = request->blocknum % lengthof(PredXact->split_counters);
+		if (request->page_split_count !=
+			pg_atomic_read_u64(&PredXact->split_counters[split_counter]))
+			PredicateLockRelation(request->relation,
+								  request->snapshot);
+	}
+	buffer->size = 0;
+}
+
+/*
+ * Forget buffered predicate locks, we don't need them after all.
+ */
+void
+ClearPredicateLockBuffer(PredicateLockBuffer *buffer)
+{
+	buffer->size = 0;
+}
+
+/*
+ * Remember to acquire a page-level predicate lock.
+ */
+void
+SchedulePredicateLockPage(PredicateLockBuffer *buffer,
+						  Relation relation,
+						  BlockNumber blkno,
+						  Snapshot snapshot)
+{
+	PredicateLockRequest *request;
+	uint32			split_counter;
+
+	if (buffer->size == lengthof(buffer->requests))
+		FlushPredicateLockBuffer(buffer);
+
+	request = &buffer->requests[buffer->size++];
+	request->relation = relation;
+	request->blocknum = blkno;
+	request->snapshot = snapshot;
+
+	/*
+	 * Record the page split counter while the caller holds the page content
+	 * lock.  This way we can detect page splits that occur between now and the
+	 * time that we acquire the SIREAD lock, if we eventually flush.
+	 */
+	split_counter = blkno % lengthof(PredXact->split_counters);
+	request->page_split_count =
+		pg_atomic_read_u64(&PredXact->split_counters[split_counter]);
+}
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index 74a07ef152..b0747c5e1e 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -18,6 +18,7 @@
 #include "access/itup.h"
 #include "port/atomics.h"
 #include "storage/buf.h"
+#include "storage/predicate.h"
 #include "storage/spin.h"
 #include "utils/relcache.h"
 
@@ -162,6 +163,15 @@ typedef struct IndexScanDescData
 	bool	   *xs_orderbynulls;
 	bool		xs_recheckorderby;
 
+	/*
+	 * In certain conditions, we can avoid acquiring predicate locks.  We don't
+	 * know if that's possible until later, so we provide a way for index AMs
+	 * to defer predicate lock acquisition so that the caller can decide
+	 * whether they're needed.
+	 */
+	bool		xs_defer_predlocks;
+	PredicateLockBuffer xs_deferred_predlocks;
+
 	/* parallel index scan information, in shared memory */
 	struct ParallelIndexScanDescData *parallel_scan;
 }			IndexScanDescData;
diff --git a/src/include/executor/nodeIndexscan.h b/src/include/executor/nodeIndexscan.h
index 70a2cff199..c0ccc5bd53 100644
--- a/src/include/executor/nodeIndexscan.h
+++ b/src/include/executor/nodeIndexscan.h
@@ -37,7 +37,8 @@ extern void ExecIndexBuildScanKeys(PlanState *planstate, Relation index,
 								   List *quals, bool isorderby,
 								   ScanKey *scanKeys, int *numScanKeys,
 								   IndexRuntimeKeyInfo **runtimeKeys, int *numRuntimeKeys,
-								   IndexArrayKeyInfo **arrayKeys, int *numArrayKeys);
+								   IndexArrayKeyInfo **arrayKeys, int *numArrayKeys,
+								   bool *single_row);
 extern void ExecIndexEvalRuntimeKeys(ExprContext *econtext,
 									 IndexRuntimeKeyInfo *runtimeKeys, int numRuntimeKeys);
 extern bool ExecIndexEvalArrayKeys(ExprContext *econtext,
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 9a5ca7b3db..d5ec8ef37c 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1472,6 +1472,7 @@ typedef struct IndexScanState
 	int			iss_NumRuntimeKeys;
 	bool		iss_RuntimeKeysReady;
 	ExprContext *iss_RuntimeContext;
+	bool		iss_SingleRow;
 	Relation	iss_RelationDesc;
 	struct IndexScanDescData *iss_ScanDesc;
 
@@ -1517,6 +1518,7 @@ typedef struct IndexOnlyScanState
 	int			ioss_NumRuntimeKeys;
 	bool		ioss_RuntimeKeysReady;
 	ExprContext *ioss_RuntimeContext;
+	bool		ioss_SingleRow;
 	Relation	ioss_RelationDesc;
 	struct IndexScanDescData *ioss_ScanDesc;
 	TupleTableSlot *ioss_TableSlot;
diff --git a/src/include/storage/predicate.h b/src/include/storage/predicate.h
index 152b698611..8ed0063f2e 100644
--- a/src/include/storage/predicate.h
+++ b/src/include/storage/predicate.h
@@ -36,6 +36,20 @@ extern int	max_predicate_locks_per_page;
  */
 typedef void *SerializableXactHandle;
 
+typedef struct PredicateLockRequest
+{
+	Relation 		relation;
+	BlockNumber		blocknum;
+	Snapshot		snapshot;
+	uint64			page_split_count;
+} PredicateLockRequest;
+
+/* A small buffer for predicate lock requests that might later be elided. */
+typedef struct PredicateLockBuffer {
+	int				size;
+	PredicateLockRequest requests[4];
+} PredicateLockBuffer;
+
 /*
  * function prototypes
  */
@@ -64,6 +78,15 @@ extern void PredicateLockPageCombine(Relation relation, BlockNumber oldblkno, Bl
 extern void TransferPredicateLocksToHeapRelation(Relation relation);
 extern void ReleasePredicateLocks(bool isCommit, bool isReadOnlySafe);
 
+/* deferred predicate locks */
+extern void InitPredicateLockBuffer(PredicateLockBuffer *buffer);
+extern void FlushPredicateLockBuffer(PredicateLockBuffer *buffer);
+extern void ClearPredicateLockBuffer(PredicateLockBuffer *buffer);
+extern void SchedulePredicateLockPage(PredicateLockBuffer *buffer,
+									  Relation relation,
+									  BlockNumber blkno,
+									  Snapshot snapshot);
+
 /* conflict detection (may also trigger rollback) */
 extern bool CheckForSerializableConflictOutNeeded(Relation relation, Snapshot snapshot);
 extern void CheckForSerializableConflictOut(Relation relation, TransactionId xid, Snapshot snapshot);
diff --git a/src/include/storage/predicate_internals.h b/src/include/storage/predicate_internals.h
index 104f560d38..6a3dce6659 100644
--- a/src/include/storage/predicate_internals.h
+++ b/src/include/storage/predicate_internals.h
@@ -187,6 +187,7 @@ typedef struct PredXactListData
 	SERIALIZABLEXACT *OldCommittedSxact;	/* shared copy of dummy sxact */
 
 	PredXactListElement element;
+	pg_atomic_uint64 split_counters[64]; /* page split counters */
 }			PredXactListData;
 
 typedef struct PredXactListData *PredXactList;
diff --git a/src/test/regress/expected/btree_index.out b/src/test/regress/expected/btree_index.out
index bc113a70b4..545fff8b37 100644
--- a/src/test/regress/expected/btree_index.out
+++ b/src/test/regress/expected/btree_index.out
@@ -329,3 +329,196 @@ INSERT INTO delete_test_table SELECT i, 1, 2, 3 FROM generate_series(1,1000) i;
 -- Test unsupported btree opclass parameters
 create index on btree_tall_tbl (id int4_ops(foo=1));
 ERROR:  operator class int4_ops has no options
+--
+-- Test SIREAD locking
+--
+CREATE TABLE test_table (i int, j int, PRIMARY KEY (i, j));
+INSERT INTO test_table VALUES (1, 1), (2, 2);
+VACUUM test_table;
+SET enable_seqscan = off;
+SET enable_indexonlyscan = off;
+-- Unique quals, found -> heap tuple lock only
+BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+EXPLAIN (COSTS OFF)
+SELECT * FROM test_table WHERE i = 1 AND j = 1;
+                   QUERY PLAN                   
+------------------------------------------------
+ Index Scan using test_table_pkey on test_table
+   Index Cond: ((i = 1) AND (j = 1))
+(2 rows)
+
+SELECT * FROM test_table WHERE i = 1 AND j = 1;
+ i | j 
+---+---
+ 1 | 1
+(1 row)
+
+SELECT locktype, relation::regclass, page, tuple FROM pg_locks WHERE mode = 'SIReadLock' AND pid = pg_backend_pid() ORDER BY 1, 2, 3, 4;
+ locktype |  relation  | page | tuple 
+----------+------------+------+-------
+ tuple    | test_table |    0 |     1
+(1 row)
+
+ROLLBACK;
+-- Unique quals, not found -> btree page lock
+BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+EXPLAIN (COSTS OFF)
+SELECT * FROM test_table WHERE i = 1 AND j = 2;
+                   QUERY PLAN                   
+------------------------------------------------
+ Index Scan using test_table_pkey on test_table
+   Index Cond: ((i = 1) AND (j = 2))
+(2 rows)
+
+SELECT * FROM test_table WHERE i = 1 AND j = 2;
+ i | j 
+---+---
+(0 rows)
+
+SELECT locktype, relation::regclass, page, tuple FROM pg_locks WHERE mode = 'SIReadLock' AND pid = pg_backend_pid() ORDER BY 1, 2, 3, 4;
+ locktype |    relation     | page | tuple 
+----------+-----------------+------+-------
+ page     | test_table_pkey |    1 |      
+(1 row)
+
+ROLLBACK;
+-- Non-unique quals, found -> heap tuple and btree page locks
+BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+EXPLAIN (COSTS OFF)
+SELECT * FROM test_table WHERE i = 1;
+                   QUERY PLAN                   
+------------------------------------------------
+ Index Scan using test_table_pkey on test_table
+   Index Cond: (i = 1)
+(2 rows)
+
+SELECT * FROM test_table WHERE i = 1;
+ i | j 
+---+---
+ 1 | 1
+(1 row)
+
+SELECT locktype, relation::regclass, page, tuple FROM pg_locks WHERE mode = 'SIReadLock' AND pid = pg_backend_pid() ORDER BY 1, 2, 3, 4;
+ locktype |    relation     | page | tuple 
+----------+-----------------+------+-------
+ page     | test_table_pkey |    1 |      
+ tuple    | test_table      |    0 |     1
+(2 rows)
+
+ROLLBACK;
+-- Non-unique quals, not found -> btree page lock
+BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+EXPLAIN (COSTS OFF)
+SELECT * FROM test_table WHERE i = 3;
+                   QUERY PLAN                   
+------------------------------------------------
+ Index Scan using test_table_pkey on test_table
+   Index Cond: (i = 3)
+(2 rows)
+
+SELECT * FROM test_table WHERE i = 3;
+ i | j 
+---+---
+(0 rows)
+
+SELECT locktype, relation::regclass, page, tuple FROM pg_locks WHERE mode = 'SIReadLock' AND pid = pg_backend_pid() ORDER BY 1, 2, 3, 4;
+ locktype |    relation     | page | tuple 
+----------+-----------------+------+-------
+ page     | test_table_pkey |    1 |      
+(1 row)
+
+ROLLBACK;
+-- The same 4 cases, this time with an index-only scan
+RESET enable_indexonlyscan;
+-- Unique quals, found -> heap tuple lock only
+BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+EXPLAIN (COSTS OFF)
+SELECT * FROM test_table WHERE i = 1 AND j = 1;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Index Only Scan using test_table_pkey on test_table
+   Index Cond: ((i = 1) AND (j = 1))
+(2 rows)
+
+SELECT * FROM test_table WHERE i = 1 AND j = 1;
+ i | j 
+---+---
+ 1 | 1
+(1 row)
+
+SELECT locktype, relation::regclass, page, tuple FROM pg_locks WHERE mode = 'SIReadLock' AND pid = pg_backend_pid() ORDER BY 1, 2, 3, 4;
+ locktype |  relation  | page | tuple 
+----------+------------+------+-------
+ tuple    | test_table |    0 |     1
+(1 row)
+
+ROLLBACK;
+-- Unique quals, not found -> btree page lock
+BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+EXPLAIN (COSTS OFF)
+SELECT * FROM test_table WHERE i = 1 AND j = 2;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Index Only Scan using test_table_pkey on test_table
+   Index Cond: ((i = 1) AND (j = 2))
+(2 rows)
+
+SELECT * FROM test_table WHERE i = 1 AND j = 2;
+ i | j 
+---+---
+(0 rows)
+
+SELECT locktype, relation::regclass, page, tuple FROM pg_locks WHERE mode = 'SIReadLock' AND pid = pg_backend_pid() ORDER BY 1, 2, 3, 4;
+ locktype |    relation     | page | tuple 
+----------+-----------------+------+-------
+ page     | test_table_pkey |    1 |      
+(1 row)
+
+ROLLBACK;
+-- Non-unique quals, found -> heap tuple and btree page locks
+BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+EXPLAIN (COSTS OFF)
+SELECT * FROM test_table WHERE i = 1;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Index Only Scan using test_table_pkey on test_table
+   Index Cond: (i = 1)
+(2 rows)
+
+SELECT * FROM test_table WHERE i = 1;
+ i | j 
+---+---
+ 1 | 1
+(1 row)
+
+SELECT locktype, relation::regclass, page, tuple FROM pg_locks WHERE mode = 'SIReadLock' AND pid = pg_backend_pid() ORDER BY 1, 2, 3, 4;
+ locktype |    relation     | page | tuple 
+----------+-----------------+------+-------
+ page     | test_table_pkey |    1 |      
+ tuple    | test_table      |    0 |     1
+(2 rows)
+
+ROLLBACK;
+-- Non-unique quals, not found -> btree page lock
+BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+EXPLAIN (COSTS OFF)
+SELECT * FROM test_table WHERE i = 3;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Index Only Scan using test_table_pkey on test_table
+   Index Cond: (i = 3)
+(2 rows)
+
+SELECT * FROM test_table WHERE i = 3;
+ i | j 
+---+---
+(0 rows)
+
+SELECT locktype, relation::regclass, page, tuple FROM pg_locks WHERE mode = 'SIReadLock' AND pid = pg_backend_pid() ORDER BY 1, 2, 3, 4;
+ locktype |    relation     | page | tuple 
+----------+-----------------+------+-------
+ page     | test_table_pkey |    1 |      
+(1 row)
+
+ROLLBACK;
+DROP TABLE test_table;
diff --git a/src/test/regress/sql/btree_index.sql b/src/test/regress/sql/btree_index.sql
index c60312db2d..739b243f5d 100644
--- a/src/test/regress/sql/btree_index.sql
+++ b/src/test/regress/sql/btree_index.sql
@@ -172,3 +172,82 @@ INSERT INTO delete_test_table SELECT i, 1, 2, 3 FROM generate_series(1,1000) i;
 
 -- Test unsupported btree opclass parameters
 create index on btree_tall_tbl (id int4_ops(foo=1));
+
+--
+-- Test SIREAD locking
+--
+
+CREATE TABLE test_table (i int, j int, PRIMARY KEY (i, j));
+INSERT INTO test_table VALUES (1, 1), (2, 2);
+VACUUM test_table;
+SET enable_seqscan = off;
+SET enable_indexonlyscan = off;
+
+-- Unique quals, found -> heap tuple lock only
+BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+EXPLAIN (COSTS OFF)
+SELECT * FROM test_table WHERE i = 1 AND j = 1;
+SELECT * FROM test_table WHERE i = 1 AND j = 1;
+SELECT locktype, relation::regclass, page, tuple FROM pg_locks WHERE mode = 'SIReadLock' AND pid = pg_backend_pid() ORDER BY 1, 2, 3, 4;
+ROLLBACK;
+
+-- Unique quals, not found -> btree page lock
+BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+EXPLAIN (COSTS OFF)
+SELECT * FROM test_table WHERE i = 1 AND j = 2;
+SELECT * FROM test_table WHERE i = 1 AND j = 2;
+SELECT locktype, relation::regclass, page, tuple FROM pg_locks WHERE mode = 'SIReadLock' AND pid = pg_backend_pid() ORDER BY 1, 2, 3, 4;
+ROLLBACK;
+
+-- Non-unique quals, found -> heap tuple and btree page locks
+BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+EXPLAIN (COSTS OFF)
+SELECT * FROM test_table WHERE i = 1;
+SELECT * FROM test_table WHERE i = 1;
+SELECT locktype, relation::regclass, page, tuple FROM pg_locks WHERE mode = 'SIReadLock' AND pid = pg_backend_pid() ORDER BY 1, 2, 3, 4;
+ROLLBACK;
+
+-- Non-unique quals, not found -> btree page lock
+BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+EXPLAIN (COSTS OFF)
+SELECT * FROM test_table WHERE i = 3;
+SELECT * FROM test_table WHERE i = 3;
+SELECT locktype, relation::regclass, page, tuple FROM pg_locks WHERE mode = 'SIReadLock' AND pid = pg_backend_pid() ORDER BY 1, 2, 3, 4;
+ROLLBACK;
+
+-- The same 4 cases, this time with an index-only scan
+RESET enable_indexonlyscan;
+
+-- Unique quals, found -> heap tuple lock only
+BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+EXPLAIN (COSTS OFF)
+SELECT * FROM test_table WHERE i = 1 AND j = 1;
+SELECT * FROM test_table WHERE i = 1 AND j = 1;
+SELECT locktype, relation::regclass, page, tuple FROM pg_locks WHERE mode = 'SIReadLock' AND pid = pg_backend_pid() ORDER BY 1, 2, 3, 4;
+ROLLBACK;
+
+-- Unique quals, not found -> btree page lock
+BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+EXPLAIN (COSTS OFF)
+SELECT * FROM test_table WHERE i = 1 AND j = 2;
+SELECT * FROM test_table WHERE i = 1 AND j = 2;
+SELECT locktype, relation::regclass, page, tuple FROM pg_locks WHERE mode = 'SIReadLock' AND pid = pg_backend_pid() ORDER BY 1, 2, 3, 4;
+ROLLBACK;
+
+-- Non-unique quals, found -> heap tuple and btree page locks
+BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+EXPLAIN (COSTS OFF)
+SELECT * FROM test_table WHERE i = 1;
+SELECT * FROM test_table WHERE i = 1;
+SELECT locktype, relation::regclass, page, tuple FROM pg_locks WHERE mode = 'SIReadLock' AND pid = pg_backend_pid() ORDER BY 1, 2, 3, 4;
+ROLLBACK;
+
+-- Non-unique quals, not found -> btree page lock
+BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+EXPLAIN (COSTS OFF)
+SELECT * FROM test_table WHERE i = 3;
+SELECT * FROM test_table WHERE i = 3;
+SELECT locktype, relation::regclass, page, tuple FROM pg_locks WHERE mode = 'SIReadLock' AND pid = pg_backend_pid() ORDER BY 1, 2, 3, 4;
+ROLLBACK;
+
+DROP TABLE test_table;
-- 
2.30.2

Re: An out-of-date comment in nodeIndexonlyscan.c

Reply via email to