On Sun, Feb 26, 2012 at 7:16 PM, Heikki Linnakangas
<heikki.linnakan...@enterprisedb.com> wrote:
> On 24.02.2012 22:55, Simon Riggs wrote:
>>
>> A long time ago, in a galaxy far away, we discussed ways to speed up
>> data loads/COPY.
>> http://archives.postgresql.org/pgsql-hackers/2007-01/msg00470.php
>>
>> In particular, the idea that we could mark tuples as committed while
>> we are still loading them, to avoid negative behaviour for the first
>> reader.
>>
>> Simple patch to implement this is attached, together with test case.
>>
>>  ...
>>
>>
>> What exactly does it do? Previously, we optimised COPY when it was
>> loading data into a newly created table or a freshly truncated table.
>> This patch extends that and actually sets the tuple header flag as
>> HEAP_XMIN_COMMITTED during the load. Doing so is simple 2 lines of
>> code. The patch also adds some tests for corner cases that would make
>> that action break MVCC - though those cases are minor and typical data
>> loads will benefit fully from this.
>
>
> This doesn't work with subtransactions:
...
> The query should return the row copied in the same subtransaction.

Thanks for pointing that out.

New patch with corrected logic and test case attached.

>> In the link above, Tom suggested reworking HeapTupleSatisfiesMVCC()
>> and adding current xid to snapshots. That is an invasive change that I
>> would wish to avoid at any time and explains the long delay in
>> tackling this. The way I've implemented it, is just as a short test
>> during XidInMVCCSnapshot() so that we trap the case when the xid ==
>> xmax and so would appear to be running. This is much less invasive and
>> just as performant as Tom's original suggestion.
>
>
> TransactionIdIsCurrentTransactionId() can be fairly expensive if you have a
> lot of subtransactions open...

I've put in something to avoid that cost for the common case - just a boolean.

This seems like the best plan rather than the explicit FREEZE option.

-- 
 Simon Riggs                   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index c910863..3899282 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -2056,6 +2056,17 @@ heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid,
 	tup->t_tableOid = RelationGetRelid(relation);
 
 	/*
+	 * If we are inserting into a new relation invisible as yet to other
+	 * backends and our session has no prior snapshots and no ready portals
+	 * then we can also set the hint bit for the rows we are inserting. The
+	 * last two restrictions ensure that HeapTupleSatisfiesMVCC() gives
+	 * the right answer if the current transaction inspects the data after
+	 * we load it.
+	 */
+	if (options & HEAP_INSERT_HINT_XMIN)
+		tup->t_data->t_infomask |= HEAP_XMIN_COMMITTED;
+
+	/*
 	 * If the new tuple is too big for storage or contains already toasted
 	 * out-of-line attributes from some other relation, invoke the toaster.
 	 */
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index e22bdac..de7504c 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -146,6 +146,7 @@ typedef struct TransactionStateData
 	int			prevSecContext; /* previous SecurityRestrictionContext */
 	bool		prevXactReadOnly;		/* entry-time xact r/o state */
 	bool		startedInRecovery;		/* did we start in recovery? */
+	bool		maySeePreHintedTuples;	/* did subtrans write pre-hinted rows? */
 	struct TransactionStateData *parent;		/* back link to parent */
 } TransactionStateData;
 
@@ -175,6 +176,7 @@ static TransactionStateData TopTransactionStateData = {
 	0,							/* previous SecurityRestrictionContext */
 	false,						/* entry-time xact r/o state */
 	false,						/* startedInRecovery */
+	false,						/* maySeePreHintedTuples */
 	NULL						/* link to parent state block */
 };
 
@@ -704,6 +706,18 @@ TransactionStartedDuringRecovery(void)
 	return CurrentTransactionState->startedInRecovery;
 }
 
+bool
+TransactionMaySeePreHintedTuples(void)
+{
+	return CurrentTransactionState->maySeePreHintedTuples;
+}
+
+void
+TransactionMayWritePreHintedTuples(void)
+{
+	CurrentTransactionState->maySeePreHintedTuples = true;
+}
+
 /*
  *	CommandCounterIncrement
  */
@@ -1689,6 +1703,7 @@ StartTransaction(void)
 		s->startedInRecovery = false;
 		XactReadOnly = DefaultXactReadOnly;
 	}
+	s->maySeePreHintedTuples = false;
 	XactDeferrable = DefaultXactDeferrable;
 	XactIsoLevel = DefaultXactIsoLevel;
 	forceSyncCommit = false;
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 110480f..1419e33 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -43,6 +43,7 @@
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
+#include "utils/portal.h"
 #include "utils/rel.h"
 #include "utils/snapmgr.h"
 
@@ -1922,6 +1923,13 @@ CopyFrom(CopyState cstate)
 		hi_options |= HEAP_INSERT_SKIP_FSM;
 		if (!XLogIsNeeded())
 			hi_options |= HEAP_INSERT_SKIP_WAL;
+
+		if (ThereAreNoPriorRegisteredSnapshots() &&
+			ThereAreNoReadyPortals())
+		{
+			hi_options |= HEAP_INSERT_HINT_XMIN;
+			TransactionMayWritePreHintedTuples();
+		}
 	}
 
 	/*
diff --git a/src/backend/utils/mmgr/portalmem.c b/src/backend/utils/mmgr/portalmem.c
index cfb73c1..24075db 100644
--- a/src/backend/utils/mmgr/portalmem.c
+++ b/src/backend/utils/mmgr/portalmem.c
@@ -1055,3 +1055,22 @@ pg_cursor(PG_FUNCTION_ARGS)
 
 	return (Datum) 0;
 }
+
+bool
+ThereAreNoReadyPortals(void)
+{
+	HASH_SEQ_STATUS status;
+	PortalHashEnt *hentry;
+
+	hash_seq_init(&status, PortalHashTable);
+
+	while ((hentry = (PortalHashEnt *) hash_seq_search(&status)) != NULL)
+	{
+		Portal		portal = hentry->portal;
+
+		if (portal->status == PORTAL_READY)
+			return false;
+	}
+
+	return true;
+}
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 5aebbd1..5d9e3bf 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -1183,3 +1183,12 @@ DeleteAllExportedSnapshotFiles(void)
 
 	FreeDir(s_dir);
 }
+
+bool
+ThereAreNoPriorRegisteredSnapshots(void)
+{
+	if (RegisteredSnapshots <= 1)
+		return true;
+
+	return false;
+}
diff --git a/src/backend/utils/time/tqual.c b/src/backend/utils/time/tqual.c
index 31791a7..6cb4152 100644
--- a/src/backend/utils/time/tqual.c
+++ b/src/backend/utils/time/tqual.c
@@ -1255,6 +1255,28 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
 	/* Any xid < xmin is not in-progress */
 	if (TransactionIdPrecedes(xid, snapshot->xmin))
 		return false;
+
+	/*
+	 * TransactionIdIsCurrentTransactionId() could be expensive so avoid
+	 * running it unless we know that the current subtransaction has
+	 * decided to write tuples that already have HEAP_XMIN_COMMITTED set.
+	 * In that case we end up here when we run HeapTupleSatisfiesMVCC.
+	 */
+	if (TransactionMaySeePreHintedTuples())
+	{
+		/*
+		 * Make sure current xid is never thought to be in progress on tuples
+		 * that are already marked as committed - for use in bulk COPY etc..
+		 * We never included the current xid in snapshots, but its possible
+		 * that the current xid is equal to or greater than xmax, so we
+		 * check for the current xid before we check for xmax.
+		 * We could optimise this further by stopping the search as soon
+		 * as the xid is less than xmax, but seems like a step too far.
+		 */
+		if (TransactionIdIsCurrentTransactionId(xid))
+			return false;
+	}
+
 	/* Any xid >= xmax is in-progress */
 	if (TransactionIdFollowsOrEquals(xid, snapshot->xmax))
 		return true;
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index fa38803..0381785 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -26,6 +26,7 @@
 /* "options" flag bits for heap_insert */
 #define HEAP_INSERT_SKIP_WAL	0x0001
 #define HEAP_INSERT_SKIP_FSM	0x0002
+#define HEAP_INSERT_HINT_XMIN	0x0004
 
 typedef struct BulkInsertStateData *BulkInsertState;
 
diff --git a/src/include/access/xact.h b/src/include/access/xact.h
index 20e344e..7fb0123 100644
--- a/src/include/access/xact.h
+++ b/src/include/access/xact.h
@@ -217,6 +217,8 @@ extern TimestampTz GetCurrentTransactionStopTimestamp(void);
 extern void SetCurrentStatementStartTimestamp(void);
 extern int	GetCurrentTransactionNestLevel(void);
 extern bool TransactionIdIsCurrentTransactionId(TransactionId xid);
+extern bool TransactionMaySeePreHintedTuples(void);
+extern void TransactionMayWritePreHintedTuples(void);
 extern void CommandCounterIncrement(void);
 extern void ForceSyncCommit(void);
 extern void StartTransactionCommand(void);
diff --git a/src/include/utils/portal.h b/src/include/utils/portal.h
index 4833942..bd2b133 100644
--- a/src/include/utils/portal.h
+++ b/src/include/utils/portal.h
@@ -219,5 +219,6 @@ extern void PortalDefineQuery(Portal portal,
 extern Node *PortalListGetPrimaryStmt(List *stmts);
 extern void PortalCreateHoldStore(Portal portal);
 extern void PortalHashTableDeleteAll(void);
+extern bool ThereAreNoReadyPortals(void);
 
 #endif   /* PORTAL_H */
diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h
index f195981..789b72e 100644
--- a/src/include/utils/snapmgr.h
+++ b/src/include/utils/snapmgr.h
@@ -46,5 +46,6 @@ extern Datum pg_export_snapshot(PG_FUNCTION_ARGS);
 extern void ImportSnapshot(const char *idstr);
 extern bool XactHasExportedSnapshots(void);
 extern void DeleteAllExportedSnapshotFiles(void);
+extern bool ThereAreNoPriorRegisteredSnapshots(void);
 
 #endif   /* SNAPMGR_H */
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 8e2bc0c..7461529 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -239,6 +239,49 @@ a\.
 \.b
 c\.d
 "\."
+CREATE TABLE vistest (LIKE testeoc);
+BEGIN;
+TRUNCATE vistest;
+COPY vistest FROM stdin CSV;
+SELECT * FROM vistest;
+ a 
+---
+ a
+ b
+ c
+(3 rows)
+
+SAVEPOINT s1;
+TRUNCATE vistest;
+COPY vistest FROM stdin CSV;
+SELECT * FROM vistest;
+ a 
+---
+ d
+ e
+ f
+(3 rows)
+
+SAVEPOINT s2;
+TRUNCATE vistest;
+COPY vistest FROM stdin CSV;
+SELECT * FROM vistest;
+ a 
+---
+ x
+ y
+ z
+(3 rows)
+
+COMMIT;
+SELECT * FROM vistest;
+ a 
+---
+ x
+ y
+ z
+(3 rows)
+
 DROP TABLE x, y;
 DROP FUNCTION fn_x_before();
 DROP FUNCTION fn_x_after();
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index 6322c8f..a1a8d97 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -164,6 +164,34 @@ c\.d
 
 COPY testeoc TO stdout CSV;
 
+CREATE TABLE vistest (LIKE testeoc);
+BEGIN;
+TRUNCATE vistest;
+COPY vistest FROM stdin CSV;
+a
+b
+c
+\.
+SELECT * FROM vistest;
+SAVEPOINT s1;
+TRUNCATE vistest;
+COPY vistest FROM stdin CSV;
+d
+e
+f
+\.
+SELECT * FROM vistest;
+SAVEPOINT s2;
+TRUNCATE vistest;
+COPY vistest FROM stdin CSV;
+x
+y
+z
+\.
+SELECT * FROM vistest;
+COMMIT;
+SELECT * FROM vistest;
+
 DROP TABLE x, y;
 DROP FUNCTION fn_x_before();
 DROP FUNCTION fn_x_after();
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to