On Sat, Feb 11, 2012 at 4:46 AM, Noah Misch <n...@leadboat.com> wrote:
>> But I have to admit I'm intrigued by the idea of extending this to >> other cases, if there's a reasonable way to do that. For example, if >> we could fix things up so that we don't see a table at all if it was >> created after we took our snapshot, that would remove one of the >> obstacles to marking pages bulk-loaded into that table with FrozenXID >> and PD_ALL_VISIBLE from the get-go. I think a lot of people would be >> mighty happy about that. > > Exactly. > >> But the necessary semantics seem somewhat different. For TRUNCATE, >> you want to throw a serialization error; but is that also what you >> want for CREATE TABLE? Or do you just want it to appear empty? > > I think an error helps just as much there. If you create a table and populate > it with data in the same transaction, letting some snapshot see an empty table > is an atomicity failure. > > Your comment illustrates a helpful point: this is just another kind of > ordinary serialization failure, one that can happen at any isolation level. > No serial transaction sequence can yield one of these errors. Thanks Noah for drawing attention to this thread. I hadn't been watching. As you say, this work would allow me to freeze rows at load time and avoid the overhead of hint bit setting, which avoids performance issues from hint bit setting in checksum patch. I've reviewed this patch and it seems OK to me. Good work Marti. v2 patch attached, updated to latest HEAD. Patch adds * a GUC called strict_mvcc to isolate the new behaviour if required * relvalidxid is reset by VACUUM/ANALYZE to avoid wraparound failure At present this lacks docs for strict_mvcc and doesn't attempt to handle CREATE TABLE case yet, but should be straightforward to do so. Hint bit setting is in separate patch on other thread. -- Simon Riggs http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index c910863..4387f49 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -73,7 +73,7 @@ /* GUC variable */ bool synchronize_seqscans = true; - +bool StrictMVCC = true; static HeapScanDesc heap_beginscan_internal(Relation relation, Snapshot snapshot, @@ -1175,6 +1175,24 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot, HeapScanDesc scan; /* + * If the snapshot is older than relvalidxmin then that either a table + * has only recently been created or that a TRUNCATE has removed data + * that we should still be able to see. Either way, we aren't allowed + * to view the rows in StrictMVCC mode. + */ + if (StrictMVCC && + TransactionIdIsValid(relation->rd_rel->relvalidxmin) && + TransactionIdIsValid(snapshot->xmax) && + NormalTransactionIdPrecedes(snapshot->xmax, relation->rd_rel->relvalidxmin)) + { + ereport(ERROR, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("canceling statement due to conflict with TRUNCATE TABLE on %s", + NameStr(relation->rd_rel->relname)), + errdetail("User query attempting to see row versions that have been removed."))); + } + + /* * increment relation ref count while scanning relation * * This is just to make really sure the relcache entry won't go away while diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index aef410a..3f9bd5d 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -787,6 +787,7 @@ InsertPgClassTuple(Relation pg_class_desc, values[Anum_pg_class_relhastriggers - 1] = BoolGetDatum(rd_rel->relhastriggers); values[Anum_pg_class_relhassubclass - 1] = BoolGetDatum(rd_rel->relhassubclass); values[Anum_pg_class_relfrozenxid - 1] = TransactionIdGetDatum(rd_rel->relfrozenxid); + values[Anum_pg_class_relvalidxmin - 1] = TransactionIdGetDatum(rd_rel->relvalidxmin); if (relacl != (Datum) 0) values[Anum_pg_class_relacl - 1] = relacl; else @@ -884,6 +885,7 @@ AddNewRelationTuple(Relation pg_class_desc, new_rel_reltup->relfrozenxid = InvalidTransactionId; } + new_rel_reltup->relvalidxmin = InvalidTransactionId; new_rel_reltup->relowner = relowner; new_rel_reltup->reltype = new_type_oid; new_rel_reltup->reloftype = reloftype; diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index bfbe642..0d96a6a 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -2854,7 +2854,7 @@ reindex_index(Oid indexId, bool skip_constraint_checks) } /* We'll build a new physical relation for the index */ - RelationSetNewRelfilenode(iRel, InvalidTransactionId); + RelationSetNewRelfilenode(iRel, InvalidTransactionId, InvalidTransactionId); /* Initialize the index and rebuild */ /* Note: we do not need to re-establish pkey setting */ diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index b40e57b..0578241 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -82,6 +82,8 @@ static int elevel = -1; static MemoryContext anl_context = NULL; +static TransactionId OldestXmin; + static BufferAccessStrategy vac_strategy; @@ -538,7 +540,8 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh) totalrows, visibilitymap_count(onerel), hasindex, - InvalidTransactionId); + InvalidTransactionId, + OldestXmin); /* * Same for indexes. Vacuum always scans all indexes, so if we're part of @@ -558,6 +561,7 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh) totalindexrows, 0, false, + InvalidTransactionId, InvalidTransactionId); } } @@ -1025,7 +1029,6 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows, double deadrows = 0; /* # dead rows seen */ double rowstoskip = -1; /* -1 means not set yet */ BlockNumber totalblocks; - TransactionId OldestXmin; BlockSamplerData bs; double rstate; diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index 1f95abc..ab4aec2 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -287,7 +287,7 @@ ResetSequence(Oid seq_relid) * Create a new storage file for the sequence. We want to keep the * sequence's relfrozenxid at 0, since it won't contain any unfrozen XIDs. */ - RelationSetNewRelfilenode(seq_rel, InvalidTransactionId); + RelationSetNewRelfilenode(seq_rel, InvalidTransactionId, InvalidTransactionId); /* * Insert the modified tuple into the new storage file. diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index cd4490a..aa9d2e9 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -19,6 +19,7 @@ #include "access/reloptions.h" #include "access/relscan.h" #include "access/sysattr.h" +#include "access/transam.h" #include "access/xact.h" #include "catalog/catalog.h" #include "catalog/dependency.h" @@ -1121,7 +1122,7 @@ ExecuteTruncate(TruncateStmt *stmt) * as the relfilenode value. The old storage file is scheduled for * deletion at commit. */ - RelationSetNewRelfilenode(rel, RecentXmin); + RelationSetNewRelfilenode(rel, RecentXmin, GetCurrentTransactionId()); heap_relid = RelationGetRelid(rel); toast_relid = rel->rd_rel->reltoastrelid; @@ -1132,7 +1133,12 @@ ExecuteTruncate(TruncateStmt *stmt) if (OidIsValid(toast_relid)) { rel = relation_open(toast_relid, AccessExclusiveLock); - RelationSetNewRelfilenode(rel, RecentXmin); + RelationSetNewRelfilenode(rel, RecentXmin, InvalidTransactionId); + + /* + * We don't need a cmin here because there can't be any open + * cursors in the same session as the TRUNCATE command. + */ heap_close(rel, NoLock); } diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 353af50..7609e59 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -573,7 +573,8 @@ void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, - bool hasindex, TransactionId frozenxid) + bool hasindex, TransactionId frozenxid, + TransactionId oldestxmin) { Oid relid = RelationGetRelid(relation); Relation rd; @@ -647,6 +648,17 @@ vac_update_relstats(Relation relation, dirty = true; } + /* + * Reset relvalidxmin if its old enough + */ + if (TransactionIdIsValid(oldestxmin) && + TransactionIdIsNormal(pgcform->relvalidxmin) && + TransactionIdPrecedes(pgcform->relvalidxmin, oldestxmin)) + { + pgcform->relvalidxmin = InvalidTransactionId; + dirty = true; + } + /* If anything changed, write out the tuple. */ if (dirty) heap_inplace_update(rd, ctup); diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index 2fc749e..46d6ab8 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -255,7 +255,8 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, new_rel_tuples, new_rel_allvisible, vacrelstats->hasindex, - new_frozen_xid); + new_frozen_xid, + OldestXmin); /* report results to the stats collector, too */ pgstat_report_vacuum(RelationGetRelid(onerel), @@ -1185,6 +1186,7 @@ lazy_cleanup_index(Relation indrel, stats->num_index_tuples, 0, false, + InvalidTransactionId, InvalidTransactionId); ereport(elevel, diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index a59950e..17303e8 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -2605,7 +2605,8 @@ RelationBuildLocalRelation(const char *relname, * the XIDs that will be put into the new relation contents. */ void -RelationSetNewRelfilenode(Relation relation, TransactionId freezeXid) +RelationSetNewRelfilenode(Relation relation, TransactionId freezeXid, + TransactionId validxmin) { Oid newrelfilenode; RelFileNodeBackend newrnode; @@ -2674,6 +2675,10 @@ RelationSetNewRelfilenode(Relation relation, TransactionId freezeXid) } classform->relfrozenxid = freezeXid; + if (validxmin != InvalidTransactionId && + TransactionIdPrecedes(classform->relvalidxmin, validxmin)) + classform->relvalidxmin = validxmin; + simple_heap_update(pg_class, &tuple->t_self, tuple); CatalogUpdateIndexes(pg_class, tuple); diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 486bdcd..1561889 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -1430,6 +1430,19 @@ static struct config_bool ConfigureNamesBool[] = }, { + {"strict_mvcc", PGC_POSTMASTER, COMPAT_OPTIONS_PREVIOUS, + gettext_noop("Enables backward compatibility mode of strict MVCC " + "for TRUNCATE and CREATE TABLE."), + gettext_noop("When enabled viewing a truncated or newly created table " + "will throw a serialization error to prevent MVCC " + "discrepancies that were possible priot to 9.2.") + }, + &StrictMVCC, + true, + NULL, NULL, NULL + }, + + { {"quote_all_identifiers", PGC_USERSET, COMPAT_OPTIONS_PREVIOUS, gettext_noop("When generating SQL fragments, quote all identifiers."), NULL, diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 96da086..f56e374 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -548,6 +548,7 @@ #quote_all_identifiers = off #sql_inheritance = on #standard_conforming_strings = on +#strict_mvcc = on #synchronize_seqscans = on # - Other Platforms and Clients - diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 0335347..223f157 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201203021 +#define CATALOG_VERSION_NO 201203031 #endif diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h index 1567206..d6ee70c 100644 --- a/src/include/catalog/pg_class.h +++ b/src/include/catalog/pg_class.h @@ -67,6 +67,7 @@ CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83) BKI_SCHEMA_MACRO bool relhastriggers; /* has (or has had) any TRIGGERs */ bool relhassubclass; /* has (or has had) derived classes */ TransactionId relfrozenxid; /* all Xids < this are frozen in this rel */ + TransactionId relvalidxmin; /* data is only valid for snapshots > this Xid */ #ifdef CATALOG_VARLEN /* variable-length fields start here */ /* NOTE: These fields are not present in a relcache entry's rd_rel field. */ @@ -77,7 +78,7 @@ CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83) BKI_SCHEMA_MACRO /* Size of fixed part of pg_class tuples, not counting var-length fields */ #define CLASS_TUPLE_SIZE \ - (offsetof(FormData_pg_class,relfrozenxid) + sizeof(TransactionId)) + (offsetof(FormData_pg_class,relvalidxmin) + sizeof(TransactionId)) /* ---------------- * Form_pg_class corresponds to a pointer to a tuple with @@ -91,7 +92,7 @@ typedef FormData_pg_class *Form_pg_class; * ---------------- */ -#define Natts_pg_class 27 +#define Natts_pg_class 28 #define Anum_pg_class_relname 1 #define Anum_pg_class_relnamespace 2 #define Anum_pg_class_reltype 3 @@ -117,8 +118,9 @@ typedef FormData_pg_class *Form_pg_class; #define Anum_pg_class_relhastriggers 23 #define Anum_pg_class_relhassubclass 24 #define Anum_pg_class_relfrozenxid 25 -#define Anum_pg_class_relacl 26 -#define Anum_pg_class_reloptions 27 +#define Anum_pg_class_relvalidxmin 26 +#define Anum_pg_class_relacl 27 +#define Anum_pg_class_reloptions 28 /* ---------------- * initial contents of pg_class @@ -130,13 +132,13 @@ typedef FormData_pg_class *Form_pg_class; */ /* Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId */ -DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 0 f f p r 30 0 t f f f f 3 _null_ _null_ )); +DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 0 f f p r 30 0 t f f f f 3 0 _null_ _null_ )); DESCR(""); -DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 0 f f p r 21 0 f f f f f 3 _null_ _null_ )); +DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 0 f f p r 21 0 f f f f f 3 0 _null_ _null_ )); DESCR(""); -DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 0 f f p r 27 0 t f f f f 3 _null_ _null_ )); +DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 0 f f p r 27 0 t f f f f 3 0 _null_ _null_ )); DESCR(""); -DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 0 f f p r 27 0 t f f f f 3 _null_ _null_ )); +DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 0 f f p r 28 0 t f f f f 3 0 _null_ _null_ )); DESCR(""); diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index 4526648..fa79231 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -151,7 +151,8 @@ extern void vac_update_relstats(Relation relation, double num_tuples, BlockNumber num_all_visible_pages, bool hasindex, - TransactionId frozenxid); + TransactionId frozenxid, + TransactionId oldestxmin); extern void vacuum_set_xid_limits(int freeze_min_age, int freeze_table_age, bool sharedRel, TransactionId *oldestXmin, diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 610cb59..9d2be31 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -342,6 +342,8 @@ extern ProcessingMode Mode; #define GetProcessingMode() Mode +/* in access/heap/heapam.c */ +extern bool StrictMVCC; /***************************************************************************** * pinit.h -- * diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index 2f39486..60c2eb6 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -76,7 +76,8 @@ extern Relation RelationBuildLocalRelation(const char *relname, * Routine to manage assignment of new relfilenode to a relation */ extern void RelationSetNewRelfilenode(Relation relation, - TransactionId freezeXid); + TransactionId freezeXid, + TransactionId validxmin); /* * Routines for flushing/rebuilding relcache entries in various scenarios diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule index 669c0f2..bba722a 100644 --- a/src/test/isolation/isolation_schedule +++ b/src/test/isolation/isolation_schedule @@ -14,3 +14,4 @@ test: fk-contention test: fk-deadlock test: fk-deadlock2 test: eval-plan-qual +test: truncate diff --git a/src/test/isolation/expected/truncate.out b/src/test/isolation/expected/truncate.out new file mode 100644 index ...2b9f161 *** a/src/test/isolation/expected/truncate.out --- b/src/test/isolation/expected/truncate.out *************** *** 0 **** --- 1,63 ---- + Parsed test spec with 2 sessions + + starting permutation: begin select roll trunc + step begin: + BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ; + -- Force snapshot open + SELECT 1 AS foo FROM txid_current(); + + foo + + 1 + step select: SELECT * FROM foo; + i + + 1 + step roll: ROLLBACK; + step trunc: TRUNCATE TABLE foo; + + starting permutation: begin select trunc roll + step begin: + BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ; + -- Force snapshot open + SELECT 1 AS foo FROM txid_current(); + + foo + + 1 + step select: SELECT * FROM foo; + i + + 1 + step trunc: TRUNCATE TABLE foo; <waiting ...> + step roll: ROLLBACK; + step trunc: <... completed> + + starting permutation: begin trunc select roll + step begin: + BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ; + -- Force snapshot open + SELECT 1 AS foo FROM txid_current(); + + foo + + 1 + step trunc: TRUNCATE TABLE foo; + step select: SELECT * FROM foo; + ERROR: canceling statement due to conflict with TRUNCATE TABLE on foo + step roll: ROLLBACK; + + starting permutation: trunc begin select roll + step trunc: TRUNCATE TABLE foo; + step begin: + BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ; + -- Force snapshot open + SELECT 1 AS foo FROM txid_current(); + + foo + + 1 + step select: SELECT * FROM foo; + i + + step roll: ROLLBACK; diff --git a/src/test/isolation/specs/truncate.spec b/src/test/isolation/specs/truncate.spec new file mode 100644 index ...2c7b707 *** a/src/test/isolation/specs/truncate.spec --- b/src/test/isolation/specs/truncate.spec *************** *** 0 **** --- 1,23 ---- + setup + { + CREATE TABLE foo (i int); + INSERT INTO foo VALUES (1); + } + + teardown + { + DROP TABLE foo; + } + + session "s1" + step "begin" + { + BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ; + -- Force snapshot open + SELECT 1 AS foo FROM txid_current(); + } + step "select" { SELECT * FROM foo; } + step "roll" { ROLLBACK; } + + session "s2" + step "trunc" { TRUNCATE TABLE foo; }
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers