While hacking on the GiST insertion algorithm, I noticed a bug with temporary GiST indexes. The scan algorithm uses LSNs to detect a concurrent page split, but for a temporary index, we just use a constant LSN on the assumption that there can't be anyone else modifying the index concurrently. But that assumption is not true. While a temporary index can't be modified by other backends, it's entirely possible to insert new tuples to the index within the same backend while a scan is in progress.

Here's a test case, using btree_gist. If you modify it to use a non-temporary table, it works fine, but with a temporary table the 2nd SELECT misses some rows, when new rows are inserted in the middle of the scan.

Operations on temporary tables are not WAL-logged, so we don't have LSNs to use, but we just need a monotonically increasing sequence of numbers for this purpose. Attached patch provides a function to generate such fake LSNs.

--
  Heikki Linnakangas
  EnterpriseDB   http://www.enterprisedb.com
-- Initialize a temporary table with 1000 rows and a gist index.
CREATE TEMPORARY TABLE tt (a integer);
CREATE INDEX i_tt ON tt USING gist (a);
INSERT INTO tt SELECT generate_series(1,1000);

-- Fetch n tuples from "tt", and write them to table called "resultset"
CREATE OR REPLACE FUNCTION fetchfunc(n int) RETURNS bool AS $$
declare
  r record;
  c refcursor;
begin
  c:= 'foocur';
  FOR i IN 1..n LOOP
    FETCH FROM c INTO r;
    IF r IS NULL THEN RETURN true; END IF;
    INSERT INTO resultset VALUES (r.a);
  END LOOP;
  RETURN false;
end;
$$ LANGUAGE plpgsql;

-- ensure that we use a plain index scan.
set enable_bitmapscan=off;
set enable_seqscan=off;

CREATE TEMPORARY TABLE resultset (a integer);

-- Fetch all rows from tt into resultset, with no inserts in the middle
BEGIN;
DECLARE  foocur CURSOR FOR SELECT a FROM tt WHERE a > 100;
SELECT fetchfunc(100);
SELECT fetchfunc(1000);
COMMIT;
SELECT COUNT(*) FROM resultset; -- Display result

truncate resultset;

-- Do the same, but insert new tuples in the middle of the scan. The result
-- should be the same.
BEGIN;
DECLARE  foocur CURSOR FOR SELECT a FROM tt WHERE a > 100;
SELECT fetchfunc(100);
INSERT INTO tt SELECT a% 10 FROM generate_series(1,1000) a;
SELECT fetchfunc(1000);
COMMIT;
SELECT COUNT(*) FROM resultset; -- Display result
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index 3054f98..8c2dbc9 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -22,8 +22,6 @@
 #include "storage/indexfsm.h"
 #include "utils/memutils.h"
 
-const XLogRecPtr XLogRecPtrForTemp = {1, 1};
-
 /* Working state for gistbuild and its callback */
 typedef struct
 {
@@ -132,7 +130,7 @@ gistbuild(PG_FUNCTION_ARGS)
 		PageSetTLI(page, ThisTimeLineID);
 	}
 	else
-		PageSetLSN(page, XLogRecPtrForTemp);
+		PageSetLSN(page, GetXLogRecPtrForTemp());
 
 	UnlockReleaseBuffer(buffer);
 
@@ -423,7 +421,7 @@ gistplacetopage(GISTInsertState *state, GISTSTATE *giststate)
 		{
 			for (ptr = dist; ptr; ptr = ptr->next)
 			{
-				PageSetLSN(ptr->page, XLogRecPtrForTemp);
+				PageSetLSN(ptr->page, GetXLogRecPtrForTemp());
 			}
 		}
 
@@ -491,7 +489,7 @@ gistplacetopage(GISTInsertState *state, GISTSTATE *giststate)
 			PageSetTLI(state->stack->page, ThisTimeLineID);
 		}
 		else
-			PageSetLSN(state->stack->page, XLogRecPtrForTemp);
+			PageSetLSN(state->stack->page, GetXLogRecPtrForTemp());
 
 		if (state->stack->blkno == GIST_ROOT_BLKNO)
 			state->needInsertComplete = false;
@@ -1027,7 +1025,7 @@ gistnewroot(Relation r, Buffer buffer, IndexTuple *itup, int len, ItemPointer ke
 		PageSetTLI(page, ThisTimeLineID);
 	}
 	else
-		PageSetLSN(page, XLogRecPtrForTemp);
+		PageSetLSN(page, GetXLogRecPtrForTemp());
 
 	END_CRIT_SECTION();
 }
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c
index 28ab575..9de08f9 100644
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -677,3 +677,22 @@ gistoptions(PG_FUNCTION_ARGS)
 		PG_RETURN_BYTEA_P(result);
 	PG_RETURN_NULL();
 }
+
+/*
+ * Temporary GiST indexes are not WAL-logged, but we need LSNs to detect
+ * concurrent page splits anyway. GetXLogRecPtrForTemp() provides a fake
+ * monotonically increasing sequence of LSNs for that purpose.
+ */
+XLogRecPtr
+GetXLogRecPtrForTemp(void)
+{
+	static XLogRecPtr counter = {0, 1};
+
+	counter.xrecoff++;
+	if (counter.xrecoff == 0)
+	{
+		counter.xlogid++;
+		counter.xrecoff++;
+	}
+	return counter;
+}
diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c
index 0ff5ba8..dbe9406 100644
--- a/src/backend/access/gist/gistvacuum.c
+++ b/src/backend/access/gist/gistvacuum.c
@@ -268,7 +268,7 @@ gistbulkdelete(PG_FUNCTION_ARGS)
 					pfree(rdata);
 				}
 				else
-					PageSetLSN(page, XLogRecPtrForTemp);
+					PageSetLSN(page, GetXLogRecPtrForTemp());
 
 				END_CRIT_SECTION();
 			}
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h
index 34cc5d5..1f5fb7c 100644
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -326,6 +326,8 @@ extern void gistMakeUnionKey(GISTSTATE *giststate, int attno,
 				 GISTENTRY *entry2, bool isnull2,
 				 Datum *dst, bool *dstisnull);
 
+extern XLogRecPtr GetXLogRecPtrForTemp(void);
+
 /* gistvacuum.c */
 extern Datum gistbulkdelete(PG_FUNCTION_ARGS);
 extern Datum gistvacuumcleanup(PG_FUNCTION_ARGS);
-- 
Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-bugs

Reply via email to