On Sun, Feb 26, 2012 at 7:16 PM, Heikki Linnakangas <heikki.linnakan...@enterprisedb.com> wrote: > On 24.02.2012 22:55, Simon Riggs wrote: >> >> A long time ago, in a galaxy far away, we discussed ways to speed up >> data loads/COPY. >> http://archives.postgresql.org/pgsql-hackers/2007-01/msg00470.php >> >> In particular, the idea that we could mark tuples as committed while >> we are still loading them, to avoid negative behaviour for the first >> reader. >> >> Simple patch to implement this is attached, together with test case. >> >> ... >> >> >> What exactly does it do? Previously, we optimised COPY when it was >> loading data into a newly created table or a freshly truncated table. >> This patch extends that and actually sets the tuple header flag as >> HEAP_XMIN_COMMITTED during the load. Doing so is simple 2 lines of >> code. The patch also adds some tests for corner cases that would make >> that action break MVCC - though those cases are minor and typical data >> loads will benefit fully from this. > > > This doesn't work with subtransactions: ... > The query should return the row copied in the same subtransaction.
Thanks for pointing that out. New patch with corrected logic and test case attached. >> In the link above, Tom suggested reworking HeapTupleSatisfiesMVCC() >> and adding current xid to snapshots. That is an invasive change that I >> would wish to avoid at any time and explains the long delay in >> tackling this. The way I've implemented it, is just as a short test >> during XidInMVCCSnapshot() so that we trap the case when the xid == >> xmax and so would appear to be running. This is much less invasive and >> just as performant as Tom's original suggestion. > > > TransactionIdIsCurrentTransactionId() can be fairly expensive if you have a > lot of subtransactions open... I've put in something to avoid that cost for the common case - just a boolean. This seems like the best plan rather than the explicit FREEZE option. -- Simon Riggs http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index c910863..3899282 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -2056,6 +2056,17 @@ heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, tup->t_tableOid = RelationGetRelid(relation); /* + * If we are inserting into a new relation invisible as yet to other + * backends and our session has no prior snapshots and no ready portals + * then we can also set the hint bit for the rows we are inserting. The + * last two restrictions ensure that HeapTupleSatisfiesMVCC() gives + * the right answer if the current transaction inspects the data after + * we load it. + */ + if (options & HEAP_INSERT_HINT_XMIN) + tup->t_data->t_infomask |= HEAP_XMIN_COMMITTED; + + /* * If the new tuple is too big for storage or contains already toasted * out-of-line attributes from some other relation, invoke the toaster. */ diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index e22bdac..de7504c 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -146,6 +146,7 @@ typedef struct TransactionStateData int prevSecContext; /* previous SecurityRestrictionContext */ bool prevXactReadOnly; /* entry-time xact r/o state */ bool startedInRecovery; /* did we start in recovery? */ + bool maySeePreHintedTuples; /* did subtrans write pre-hinted rows? */ struct TransactionStateData *parent; /* back link to parent */ } TransactionStateData; @@ -175,6 +176,7 @@ static TransactionStateData TopTransactionStateData = { 0, /* previous SecurityRestrictionContext */ false, /* entry-time xact r/o state */ false, /* startedInRecovery */ + false, /* maySeePreHintedTuples */ NULL /* link to parent state block */ }; @@ -704,6 +706,18 @@ TransactionStartedDuringRecovery(void) return CurrentTransactionState->startedInRecovery; } +bool +TransactionMaySeePreHintedTuples(void) +{ + return CurrentTransactionState->maySeePreHintedTuples; +} + +void +TransactionMayWritePreHintedTuples(void) +{ + CurrentTransactionState->maySeePreHintedTuples = true; +} + /* * CommandCounterIncrement */ @@ -1689,6 +1703,7 @@ StartTransaction(void) s->startedInRecovery = false; XactReadOnly = DefaultXactReadOnly; } + s->maySeePreHintedTuples = false; XactDeferrable = DefaultXactDeferrable; XactIsoLevel = DefaultXactIsoLevel; forceSyncCommit = false; diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 110480f..1419e33 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -43,6 +43,7 @@ #include "utils/builtins.h" #include "utils/lsyscache.h" #include "utils/memutils.h" +#include "utils/portal.h" #include "utils/rel.h" #include "utils/snapmgr.h" @@ -1922,6 +1923,13 @@ CopyFrom(CopyState cstate) hi_options |= HEAP_INSERT_SKIP_FSM; if (!XLogIsNeeded()) hi_options |= HEAP_INSERT_SKIP_WAL; + + if (ThereAreNoPriorRegisteredSnapshots() && + ThereAreNoReadyPortals()) + { + hi_options |= HEAP_INSERT_HINT_XMIN; + TransactionMayWritePreHintedTuples(); + } } /* diff --git a/src/backend/utils/mmgr/portalmem.c b/src/backend/utils/mmgr/portalmem.c index cfb73c1..24075db 100644 --- a/src/backend/utils/mmgr/portalmem.c +++ b/src/backend/utils/mmgr/portalmem.c @@ -1055,3 +1055,22 @@ pg_cursor(PG_FUNCTION_ARGS) return (Datum) 0; } + +bool +ThereAreNoReadyPortals(void) +{ + HASH_SEQ_STATUS status; + PortalHashEnt *hentry; + + hash_seq_init(&status, PortalHashTable); + + while ((hentry = (PortalHashEnt *) hash_seq_search(&status)) != NULL) + { + Portal portal = hentry->portal; + + if (portal->status == PORTAL_READY) + return false; + } + + return true; +} diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index 5aebbd1..5d9e3bf 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -1183,3 +1183,12 @@ DeleteAllExportedSnapshotFiles(void) FreeDir(s_dir); } + +bool +ThereAreNoPriorRegisteredSnapshots(void) +{ + if (RegisteredSnapshots <= 1) + return true; + + return false; +} diff --git a/src/backend/utils/time/tqual.c b/src/backend/utils/time/tqual.c index 31791a7..6cb4152 100644 --- a/src/backend/utils/time/tqual.c +++ b/src/backend/utils/time/tqual.c @@ -1255,6 +1255,28 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) /* Any xid < xmin is not in-progress */ if (TransactionIdPrecedes(xid, snapshot->xmin)) return false; + + /* + * TransactionIdIsCurrentTransactionId() could be expensive so avoid + * running it unless we know that the current subtransaction has + * decided to write tuples that already have HEAP_XMIN_COMMITTED set. + * In that case we end up here when we run HeapTupleSatisfiesMVCC. + */ + if (TransactionMaySeePreHintedTuples()) + { + /* + * Make sure current xid is never thought to be in progress on tuples + * that are already marked as committed - for use in bulk COPY etc.. + * We never included the current xid in snapshots, but its possible + * that the current xid is equal to or greater than xmax, so we + * check for the current xid before we check for xmax. + * We could optimise this further by stopping the search as soon + * as the xid is less than xmax, but seems like a step too far. + */ + if (TransactionIdIsCurrentTransactionId(xid)) + return false; + } + /* Any xid >= xmax is in-progress */ if (TransactionIdFollowsOrEquals(xid, snapshot->xmax)) return true; diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index fa38803..0381785 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -26,6 +26,7 @@ /* "options" flag bits for heap_insert */ #define HEAP_INSERT_SKIP_WAL 0x0001 #define HEAP_INSERT_SKIP_FSM 0x0002 +#define HEAP_INSERT_HINT_XMIN 0x0004 typedef struct BulkInsertStateData *BulkInsertState; diff --git a/src/include/access/xact.h b/src/include/access/xact.h index 20e344e..7fb0123 100644 --- a/src/include/access/xact.h +++ b/src/include/access/xact.h @@ -217,6 +217,8 @@ extern TimestampTz GetCurrentTransactionStopTimestamp(void); extern void SetCurrentStatementStartTimestamp(void); extern int GetCurrentTransactionNestLevel(void); extern bool TransactionIdIsCurrentTransactionId(TransactionId xid); +extern bool TransactionMaySeePreHintedTuples(void); +extern void TransactionMayWritePreHintedTuples(void); extern void CommandCounterIncrement(void); extern void ForceSyncCommit(void); extern void StartTransactionCommand(void); diff --git a/src/include/utils/portal.h b/src/include/utils/portal.h index 4833942..bd2b133 100644 --- a/src/include/utils/portal.h +++ b/src/include/utils/portal.h @@ -219,5 +219,6 @@ extern void PortalDefineQuery(Portal portal, extern Node *PortalListGetPrimaryStmt(List *stmts); extern void PortalCreateHoldStore(Portal portal); extern void PortalHashTableDeleteAll(void); +extern bool ThereAreNoReadyPortals(void); #endif /* PORTAL_H */ diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h index f195981..789b72e 100644 --- a/src/include/utils/snapmgr.h +++ b/src/include/utils/snapmgr.h @@ -46,5 +46,6 @@ extern Datum pg_export_snapshot(PG_FUNCTION_ARGS); extern void ImportSnapshot(const char *idstr); extern bool XactHasExportedSnapshots(void); extern void DeleteAllExportedSnapshotFiles(void); +extern bool ThereAreNoPriorRegisteredSnapshots(void); #endif /* SNAPMGR_H */ diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out index 8e2bc0c..7461529 100644 --- a/src/test/regress/expected/copy2.out +++ b/src/test/regress/expected/copy2.out @@ -239,6 +239,49 @@ a\. \.b c\.d "\." +CREATE TABLE vistest (LIKE testeoc); +BEGIN; +TRUNCATE vistest; +COPY vistest FROM stdin CSV; +SELECT * FROM vistest; + a +--- + a + b + c +(3 rows) + +SAVEPOINT s1; +TRUNCATE vistest; +COPY vistest FROM stdin CSV; +SELECT * FROM vistest; + a +--- + d + e + f +(3 rows) + +SAVEPOINT s2; +TRUNCATE vistest; +COPY vistest FROM stdin CSV; +SELECT * FROM vistest; + a +--- + x + y + z +(3 rows) + +COMMIT; +SELECT * FROM vistest; + a +--- + x + y + z +(3 rows) + DROP TABLE x, y; DROP FUNCTION fn_x_before(); DROP FUNCTION fn_x_after(); diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql index 6322c8f..a1a8d97 100644 --- a/src/test/regress/sql/copy2.sql +++ b/src/test/regress/sql/copy2.sql @@ -164,6 +164,34 @@ c\.d COPY testeoc TO stdout CSV; +CREATE TABLE vistest (LIKE testeoc); +BEGIN; +TRUNCATE vistest; +COPY vistest FROM stdin CSV; +a +b +c +\. +SELECT * FROM vistest; +SAVEPOINT s1; +TRUNCATE vistest; +COPY vistest FROM stdin CSV; +d +e +f +\. +SELECT * FROM vistest; +SAVEPOINT s2; +TRUNCATE vistest; +COPY vistest FROM stdin CSV; +x +y +z +\. +SELECT * FROM vistest; +COMMIT; +SELECT * FROM vistest; + DROP TABLE x, y; DROP FUNCTION fn_x_before(); DROP FUNCTION fn_x_after();
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers