Robert Haas <robertmh...@gmail.com> writes: > As a parenthetical note, I observe that relcache.c seems to know > almost nothing about rd_partcheck. rd_partkey and rd_partdesc both > have handling in RelationClearRelation(), but rd_partcheck does not, > and I suspect that's wrong. So the problems are probably not confined > to the relcache-drop-time problem that you observed.
I concluded that that's not parenthetical but pretty relevant... Attached is a revised version of Amit's patch at [1] that makes these data structures be treated more similarly. I also added some Asserts and comment improvements to address the complaints I made upthread about under-documentation of all this logic. I also cleaned up the problem the code had with failing to distinguish "partcheck list is NIL" from "partcheck list hasn't been computed yet". For a relation with no such constraints, generate_partition_qual would do the full pushups every time. I'm not sure if the case actually occurs commonly enough that that's a performance problem, but failure to account for it made my added assertions fall over :-( and I thought fixing it was better than weakening the assertions. I haven't made back-patch versions yet. I'd expect they could be substantially the same, except the two new fields have to go at the end of struct RelationData to avoid ABI breaks. Oh: we might also need some change in RelationCacheInitializePhase3, depending on the decision about [2]. regards, tom lane [1] https://www.postgresql.org/message-id/036852f2-ba7f-7a1f-21c6-00bc3515e...@lab.ntt.co.jp [2] https://www.postgresql.org/message-id/5706.1555093...@sss.pgh.pa.us
diff --git a/src/backend/partitioning/partdesc.c b/src/backend/partitioning/partdesc.c index e436d1e..4d6595b 100644 --- a/src/backend/partitioning/partdesc.c +++ b/src/backend/partitioning/partdesc.c @@ -49,10 +49,13 @@ typedef struct PartitionDirectoryEntry /* * RelationBuildPartitionDesc - * Form rel's partition descriptor + * Form rel's partition descriptor, and store in relcache entry * - * Not flushed from the cache by RelationClearRelation() unless changed because - * of addition or removal of partition. + * Note: the descriptor won't be flushed from the cache by + * RelationClearRelation() unless it's changed because of + * addition or removal of a partition. Hence, code holding a lock + * that's sufficient to prevent that can assume that rd_partdesc + * won't change underneath it. */ void RelationBuildPartitionDesc(Relation rel) @@ -173,7 +176,19 @@ RelationBuildPartitionDesc(Relation rel) ++i; } - /* Now build the actual relcache partition descriptor */ + /* Assert we aren't about to leak any old data structure */ + Assert(rel->rd_pdcxt == NULL); + Assert(rel->rd_partdesc == NULL); + + /* + * Now build the actual relcache partition descriptor. Note that the + * order of operations here is fairly critical. If we fail partway + * through this code, we won't have leaked memory because the rd_pdcxt is + * attached to the relcache entry immediately, so it'll be freed whenever + * the entry is rebuilt or destroyed. However, we don't assign to + * rd_partdesc until the cached data structure is fully complete and + * valid, so that no other code might try to use it. + */ rel->rd_pdcxt = AllocSetContextCreate(CacheMemoryContext, "partition descriptor", ALLOCSET_SMALL_SIZES); diff --git a/src/backend/utils/cache/partcache.c b/src/backend/utils/cache/partcache.c index 8f43d68..29b4a76 100644 --- a/src/backend/utils/cache/partcache.c +++ b/src/backend/utils/cache/partcache.c @@ -41,11 +41,11 @@ static List *generate_partition_qual(Relation rel); /* * RelationBuildPartitionKey - * Build and attach to relcache partition key data of relation + * Build partition key data of relation, and attach to relcache * * Partitioning key data is a complex structure; to avoid complicated logic to * free individual elements whenever the relcache entry is flushed, we give it - * its own memory context, child of CacheMemoryContext, which can easily be + * its own memory context, a child of CacheMemoryContext, which can easily be * deleted on its own. To avoid leaking memory in that context in case of an * error partway through this function, the context is initially created as a * child of CurTransactionContext and only re-parented to CacheMemoryContext @@ -144,6 +144,7 @@ RelationBuildPartitionKey(Relation relation) MemoryContextSwitchTo(oldcxt); } + /* Allocate assorted arrays in the partkeycxt, which we'll fill below */ oldcxt = MemoryContextSwitchTo(partkeycxt); key->partattrs = (AttrNumber *) palloc0(key->partnatts * sizeof(AttrNumber)); key->partopfamily = (Oid *) palloc0(key->partnatts * sizeof(Oid)); @@ -151,8 +152,6 @@ RelationBuildPartitionKey(Relation relation) key->partsupfunc = (FmgrInfo *) palloc0(key->partnatts * sizeof(FmgrInfo)); key->partcollation = (Oid *) palloc0(key->partnatts * sizeof(Oid)); - - /* Gather type and collation info as well */ key->parttypid = (Oid *) palloc0(key->partnatts * sizeof(Oid)); key->parttypmod = (int32 *) palloc0(key->partnatts * sizeof(int32)); key->parttyplen = (int16 *) palloc0(key->partnatts * sizeof(int16)); @@ -235,6 +234,10 @@ RelationBuildPartitionKey(Relation relation) ReleaseSysCache(tuple); + /* Assert that we're not leaking any old data during assignments below */ + Assert(relation->rd_partkeycxt == NULL); + Assert(relation->rd_partkey == NULL); + /* * Success --- reparent our context and make the relcache point to the * newly constructed key @@ -305,11 +308,9 @@ get_partition_qual_relid(Oid relid) * Generate partition predicate from rel's partition bound expression. The * function returns a NIL list if there is no predicate. * - * Result expression tree is stored CacheMemoryContext to ensure it survives - * as long as the relcache entry. But we should be running in a less long-lived - * working context. To avoid leaking cache memory if this routine fails partway - * through, we build in working memory and then copy the completed structure - * into cache memory. + * We cache a copy of the result in the relcache entry, after constructing + * it using the caller's context. This approach avoids leaking any data + * into long-lived cache contexts, especially if we fail partway through. */ static List * generate_partition_qual(Relation rel) @@ -326,8 +327,8 @@ generate_partition_qual(Relation rel) /* Guard against stack overflow due to overly deep partition tree */ check_stack_depth(); - /* Quick copy */ - if (rel->rd_partcheck != NIL) + /* If we already cached the result, just return a copy */ + if (rel->rd_partcheckvalid) return copyObject(rel->rd_partcheck); /* Grab at least an AccessShareLock on the parent table */ @@ -373,13 +374,36 @@ generate_partition_qual(Relation rel) if (found_whole_row) elog(ERROR, "unexpected whole-row reference found in partition key"); - /* Save a copy in the relcache */ - oldcxt = MemoryContextSwitchTo(CacheMemoryContext); - rel->rd_partcheck = copyObject(result); - MemoryContextSwitchTo(oldcxt); + /* Assert that we're not leaking any old data during assignments below */ + Assert(rel->rd_partcheckcxt == NULL); + Assert(rel->rd_partcheck == NIL); + + /* + * Save a copy in the relcache. The order of these operations is fairly + * critical to avoid memory leaks and ensure that we don't leave a corrupt + * relcache entry if we fail partway through copyObject. + * + * If, as is definitely possible, the partcheck list is NIL, then we do + * not need to make a context to hold it. + */ + if (result != NIL) + { + rel->rd_partcheckcxt = AllocSetContextCreate(CacheMemoryContext, + "partition constraint", + ALLOCSET_SMALL_SIZES); + MemoryContextCopyAndSetIdentifier(rel->rd_partcheckcxt, + RelationGetRelationName(rel)); + oldcxt = MemoryContextSwitchTo(rel->rd_partcheckcxt); + rel->rd_partcheck = copyObject(result); + MemoryContextSwitchTo(oldcxt); + } + else + rel->rd_partcheck = NIL; + rel->rd_partcheckvalid = true; /* Keep the parent locked until commit */ relation_close(parent, NoLock); + /* Return the working copy to the caller */ return result; } diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 64f3c2e..4b884d5 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -1175,11 +1175,15 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) } else { - relation->rd_partkeycxt = NULL; relation->rd_partkey = NULL; + relation->rd_partkeycxt = NULL; relation->rd_partdesc = NULL; relation->rd_pdcxt = NULL; } + /* ... but partcheck is not loaded till asked for */ + relation->rd_partcheck = NIL; + relation->rd_partcheckvalid = false; + relation->rd_partcheckcxt = NULL; /* * initialize access method information @@ -2364,8 +2368,8 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc) MemoryContextDelete(relation->rd_partkeycxt); if (relation->rd_pdcxt) MemoryContextDelete(relation->rd_pdcxt); - if (relation->rd_partcheck) - pfree(relation->rd_partcheck); + if (relation->rd_partcheckcxt) + MemoryContextDelete(relation->rd_partcheckcxt); if (relation->rd_fdwroutine) pfree(relation->rd_fdwroutine); pfree(relation); @@ -5600,18 +5604,20 @@ load_relcache_init_file(bool shared) * format is complex and subject to change). They must be rebuilt if * needed by RelationCacheInitializePhase3. This is not expected to * be a big performance hit since few system catalogs have such. Ditto - * for RLS policy data, index expressions, predicates, exclusion info, - * and FDW info. + * for RLS policy data, partition info, index expressions, predicates, + * exclusion info, and FDW info. */ rel->rd_rules = NULL; rel->rd_rulescxt = NULL; rel->trigdesc = NULL; rel->rd_rsdesc = NULL; - rel->rd_partkeycxt = NULL; rel->rd_partkey = NULL; - rel->rd_pdcxt = NULL; + rel->rd_partkeycxt = NULL; rel->rd_partdesc = NULL; + rel->rd_pdcxt = NULL; rel->rd_partcheck = NIL; + rel->rd_partcheckvalid = false; + rel->rd_partcheckcxt = NULL; rel->rd_indexprs = NIL; rel->rd_indpred = NIL; rel->rd_exclops = NULL; diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 764e6fa..51ad250 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -95,11 +95,13 @@ typedef struct RelationData List *rd_fkeylist; /* list of ForeignKeyCacheInfo (see below) */ bool rd_fkeyvalid; /* true if list has been computed */ - MemoryContext rd_partkeycxt; /* private memory cxt for the below */ struct PartitionKeyData *rd_partkey; /* partition key, or NULL */ - MemoryContext rd_pdcxt; /* private context for partdesc */ + MemoryContext rd_partkeycxt; /* private context for rd_partkey, if any */ struct PartitionDescData *rd_partdesc; /* partitions, or NULL */ + MemoryContext rd_pdcxt; /* private context for partdesc, if any */ List *rd_partcheck; /* partition CHECK quals */ + bool rd_partcheckvalid; /* true if list has been computed */ + MemoryContext rd_partcheckcxt; /* private cxt for rd_partcheck, if any */ /* data managed by RelationGetIndexList: */ List *rd_indexlist; /* list of OIDs of indexes on relation */