On 2021-Apr-23, Alvaro Herrera wrote: > I think the patch I posted was too simple. I think a real fix requires > us to keep track of exactly in what way the partdesc is outdated, so > that we can compare to the current situation in deciding to use that > partdesc or build a new one. For example, we could keep track of a list > of OIDs of detached partitions (and it simplifies things a lot if allow > only a single partition in this situation, because it's either zero OIDs > or one OID); or we can save the Xmin of the pg_inherits tuple for the > detached partition (and we can compare that Xmin to our current active > snapshot and decide whether to use that partdesc or not). > > I'll experiment with this a little more and propose a patch later today.
This (POC-quality) seems to do the trick. (I restored the API of find_inheritance_children, because it was getting a little obnoxious. I haven't thought this through but I think we should do something like it.) > I don't think it's too much of a problem to state that you need to > finalize one detach before you can do the next one. After all, with > regular detach, you'd have the tables exclusively locked so you can't do > them in parallel anyway. (It also increases the chances that people > will finalize detach operations that went unnoticed.) I haven't added a mechanism to verify this; but with asserts on, this patch will crash if you have more than one. I think the behavior is not necessarily sane with asserts off, since you'll get an arbitrary detach-Xmin assigned to the partdesc, depending on catalog scan order. -- Álvaro Herrera Valdivia, Chile "El hombre nunca sabe de lo que es capaz hasta que lo intenta" (C. Dickens)
diff --git a/src/backend/catalog/pg_inherits.c b/src/backend/catalog/pg_inherits.c index 6447b52854..e3ee72c0b7 100644 --- a/src/backend/catalog/pg_inherits.c +++ b/src/backend/catalog/pg_inherits.c @@ -63,8 +63,16 @@ typedef struct SeenRelsEntry * committed to the active snapshot. */ List * -find_inheritance_children(Oid parentrelId, bool omit_detached, - LOCKMODE lockmode, bool *detached_exist) +find_inheritance_children(Oid parentrelId, LOCKMODE lockmode) +{ + return find_inheritance_children_extended(parentrelId, true, lockmode, + NULL, NULL); +} + +List * +find_inheritance_children_extended(Oid parentrelId, bool omit_detached, + LOCKMODE lockmode, bool *detached_exist, + TransactionId *detached_xmin) { List *list = NIL; Relation relation; @@ -132,7 +140,15 @@ find_inheritance_children(Oid parentrelId, bool omit_detached, snap = GetActiveSnapshot(); if (!XidInMVCCSnapshot(xmin, snap)) + { + /* XXX only one detached partition allowed */ + if (detached_xmin) + { + Assert(*detached_xmin == InvalidTransactionId); + *detached_xmin = xmin; + } continue; + } } } @@ -247,8 +263,7 @@ find_all_inheritors(Oid parentrelId, LOCKMODE lockmode, List **numparents) ListCell *lc; /* Get the direct children of this rel */ - currentchildren = find_inheritance_children(currentrel, true, - lockmode, NULL); + currentchildren = find_inheritance_children(currentrel, lockmode); /* * Add to the queue only those children not already seen. This avoids diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 7d00f4eb25..dd931d09af 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -3507,7 +3507,7 @@ renameatt_internal(Oid myrelid, * expected_parents will only be 0 if we are not already recursing. */ if (expected_parents == 0 && - find_inheritance_children(myrelid, true, NoLock, NULL) != NIL) + find_inheritance_children(myrelid, NoLock) != NIL) ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("inherited column \"%s\" must be renamed in child tables too", @@ -3706,7 +3706,7 @@ rename_constraint_internal(Oid myrelid, else { if (expected_parents == 0 && - find_inheritance_children(myrelid, true, NoLock, NULL) != NIL) + find_inheritance_children(myrelid, NoLock) != NIL) ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("inherited constraint \"%s\" must be renamed in child tables too", @@ -6580,7 +6580,7 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel, */ if (colDef->identity && recurse && - find_inheritance_children(myrelid, true, NoLock, NULL) != NIL) + find_inheritance_children(myrelid, NoLock) != NIL) ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("cannot recursively add identity column to table that has child tables"))); @@ -6826,7 +6826,7 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel, * use find_all_inheritors to do it in one pass. */ children = - find_inheritance_children(RelationGetRelid(rel), true, lockmode, NULL); + find_inheritance_children(RelationGetRelid(rel), lockmode); /* * If we are told not to recurse, there had better not be any child @@ -7689,7 +7689,7 @@ ATPrepDropExpression(Relation rel, AlterTableCmd *cmd, bool recurse, bool recurs * resulting state can be properly dumped and restored. */ if (!recurse && - find_inheritance_children(RelationGetRelid(rel), true, lockmode, NULL)) + find_inheritance_children(RelationGetRelid(rel), lockmode)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("ALTER TABLE / DROP EXPRESSION must be applied to child tables too"))); @@ -8297,7 +8297,7 @@ ATExecDropColumn(List **wqueue, Relation rel, const char *colName, * use find_all_inheritors to do it in one pass. */ children = - find_inheritance_children(RelationGetRelid(rel), true, lockmode, NULL); + find_inheritance_children(RelationGetRelid(rel), lockmode); if (children) { @@ -8785,7 +8785,7 @@ ATAddCheckConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel, * use find_all_inheritors to do it in one pass. */ children = - find_inheritance_children(RelationGetRelid(rel), true, lockmode, NULL); + find_inheritance_children(RelationGetRelid(rel), lockmode); /* * Check if ONLY was specified with ALTER TABLE. If so, allow the @@ -11318,8 +11318,7 @@ ATExecDropConstraint(Relation rel, const char *constrName, * use find_all_inheritors to do it in one pass. */ if (!is_no_inherit_constraint) - children = find_inheritance_children(RelationGetRelid(rel), true, - lockmode, NULL); + children = find_inheritance_children(RelationGetRelid(rel), lockmode); else children = NIL; @@ -11703,8 +11702,7 @@ ATPrepAlterColumnType(List **wqueue, } } else if (!recursing && - find_inheritance_children(RelationGetRelid(rel), true, - NoLock, NULL) != NIL) + find_inheritance_children(RelationGetRelid(rel), NoLock) != NIL) ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("type of inherited column \"%s\" must be changed in child tables too", diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index d8393aa4de..f305f8bc0f 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -1141,8 +1141,7 @@ CreateTrigger(CreateTrigStmt *stmt, const char *queryString, ListCell *l; List *idxs = NIL; - idxs = find_inheritance_children(indexOid, true, - ShareRowExclusiveLock, NULL); + idxs = find_inheritance_children(indexOid, ShareRowExclusiveLock); foreach(l, idxs) childTbls = lappend_oid(childTbls, IndexGetRelation(lfirst_oid(l), diff --git a/src/backend/partitioning/partdesc.c b/src/backend/partitioning/partdesc.c index 2305dff407..73fa0459a1 100644 --- a/src/backend/partitioning/partdesc.c +++ b/src/backend/partitioning/partdesc.c @@ -78,11 +78,33 @@ RelationGetPartitionDesc(Relation rel, bool omit_detached) * If relcache has a partition descriptor, use that. However, we can only * do so when we are asked to include all partitions including detached; * and also when we know that there are no detached partitions. + * + * If there is no active snapshot, detached partitions aren't omitted + * either, so we can use the cached descriptor too in that case. */ if (likely(rel->rd_partdesc && - (!rel->rd_partdesc->detached_exist || !omit_detached))) + (!rel->rd_partdesc->detached_exist || !omit_detached || + !ActiveSnapshotSet()))) return rel->rd_partdesc; + /* + * If we're asked to omit detached partitions, we may be able to use a + * cached version of it, if it was built with the same conditions that + * we see now. + */ + if (omit_detached && + rel->rd_partdesc_nodetached && + TransactionIdIsValid(rel->rd_partdesc_nodetached_xmin) && + ActiveSnapshotSet()) + { + Snapshot activesnap; + + activesnap = GetActiveSnapshot(); + + if (!XidInMVCCSnapshot(rel->rd_partdesc_nodetached_xmin, activesnap)) + return rel->rd_partdesc_nodetached; + } + return RelationBuildPartitionDesc(rel, omit_detached); } @@ -117,6 +139,7 @@ RelationBuildPartitionDesc(Relation rel, bool omit_detached) Oid *oids = NULL; bool *is_leaf = NULL; bool detached_exist; + TransactionId detached_xmin; ListCell *cell; int i, nparts; @@ -132,8 +155,11 @@ RelationBuildPartitionDesc(Relation rel, bool omit_detached) * some well-defined point in time. */ detached_exist = false; - inhoids = find_inheritance_children(RelationGetRelid(rel), omit_detached, - NoLock, &detached_exist); + detached_xmin = InvalidTransactionId; + inhoids = find_inheritance_children_extended(RelationGetRelid(rel), + omit_detached, NoLock, + &detached_exist, + &detached_xmin); nparts = list_length(inhoids); /* Allocate working arrays for OIDs, leaf flags, and boundspecs. */ @@ -282,36 +308,29 @@ RelationBuildPartitionDesc(Relation rel, bool omit_detached) /* * We have a fully valid partdesc. Reparent it so that it has the right - * lifespan, and if appropriate put it into the relation's relcache entry. + * lifespan. */ - if (omit_detached && detached_exist) + MemoryContextSetParent(new_pdcxt, CacheMemoryContext); + + /* + * But first, a kluge: if there's an old rd_pdcxt, it contains an old + * partition descriptor that may still be referenced somewhere. + * Preserve it, while not leaking it, by reattaching it as a child + * context of the new rd_pdcxt. Eventually it will get dropped by + * either RelationClose or RelationClearRelation. + */ + if (rel->rd_pdcxt != NULL) + MemoryContextSetParent(rel->rd_pdcxt, new_pdcxt); + rel->rd_pdcxt = new_pdcxt; + + /* Store it into relcache */ + if (omit_detached && detached_exist && ActiveSnapshotSet()) { - /* - * A transient partition descriptor is only good for the current - * statement, so make it a child of the current portal's context. - */ - MemoryContextSetParent(new_pdcxt, PortalContext); + rel->rd_partdesc_nodetached = partdesc; + rel->rd_partdesc_nodetached_xmin = detached_xmin; } else { - /* - * This partdesc goes into relcache. - */ - - MemoryContextSetParent(new_pdcxt, CacheMemoryContext); - - /* - * But first, a kluge: if there's an old rd_pdcxt, it contains an old - * partition descriptor that may still be referenced somewhere. - * Preserve it, while not leaking it, by reattaching it as a child - * context of the new rd_pdcxt. Eventually it will get dropped by - * either RelationClose or RelationClearRelation. - */ - if (rel->rd_pdcxt != NULL) - MemoryContextSetParent(rel->rd_pdcxt, new_pdcxt); - rel->rd_pdcxt = new_pdcxt; - - /* Store it into relcache */ rel->rd_partdesc = partdesc; } diff --git a/src/include/catalog/pg_inherits.h b/src/include/catalog/pg_inherits.h index 4d28ede5a6..f47925588a 100644 --- a/src/include/catalog/pg_inherits.h +++ b/src/include/catalog/pg_inherits.h @@ -50,8 +50,10 @@ DECLARE_INDEX(pg_inherits_parent_index, 2187, on pg_inherits using btree(inhpare #define InheritsParentIndexId 2187 -extern List *find_inheritance_children(Oid parentrelId, bool omit_detached, - LOCKMODE lockmode, bool *detached_exist); +extern List *find_inheritance_children(Oid parentrelId, LOCKMODE lockmode); +extern List *find_inheritance_children_extended(Oid parentrelId, bool omit_detached, + LOCKMODE lockmode, bool *detached_exist, TransactionId *detached_xmin); + extern List *find_all_inheritors(Oid parentrelId, LOCKMODE lockmode, List **parents); extern bool has_subclass(Oid relationId); diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 34e25eb597..6e5a5bf4a7 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -127,6 +127,9 @@ typedef struct RelationData /* data managed by RelationGetPartitionDesc: */ PartitionDesc rd_partdesc; /* partition descriptor, or NULL */ + PartitionDesc rd_partdesc_nodetached; /* partition descriptor */ + TransactionId rd_partdesc_nodetached_xmin; /* xmin of partdesc */ + MemoryContext rd_pdcxt; /* private context for rd_partdesc, if any */ /* data managed by RelationGetPartitionQual: */