Hi!I'd like to suggest two independent patches to improve performance of type cache cleanup. I found a case where type cache cleanup was a reason for low performance. In short, customer makes 100 thousand temporary tables in one transaction.
1 mapRelType.patchIt just adds a local map between relation and its type as it was suggested in comment above TypeCacheRelCallback(). Unfortunately, using syscache here was impossible because this call back could be called outside transaction and it makes impossible catalog lookups.
2 hash_seq_init_with_hash_value.patchTypeCacheTypCallback() loop over type hash to find entry with given hash value. Here there are two problems: 1) there isn't interface to dynahash to search entry with given hash value and 2) hash value calculation algorithm is differ from system cache. But coming hashvalue is came from system cache. Patch is addressed to both issues. It suggests hash_seq_init_with_hash_value() call which inits hash sequential scan over the single bucket which could contain entry with given hash value, and hash_seq_search() will iterate only over such entries. Anf patch changes hash algorithm to match syscache. Actually, patch makes small refactoring of dynahash, it makes common function hash_do_lookup() which does initial lookup in hash.
Some artificial performance test is in attachment, command to test is 'time psql < custom_types_and_array.sql', here I show only last rollback time and total execution time:
1) master 92d2ab7554f92b841ea71bcc72eaa8ab11aae662 Time: 33353,288 ms (00:33,353) psql < custom_types_and_array.sql 0,82s user 0,71s system 1% cpu 1:28,36 total 2) mapRelType.patch Time: 7455,581 ms (00:07,456) psql < custom_types_and_array.sql 1,39s user 1,19s system 6% cpu 41,220 total 3) hash_seq_init_with_hash_value.patch Time: 24975,886 ms (00:24,976) psql < custom_types_and_array.sql 1,33s user 1,25s system 3% cpu 1:19,77 total 4) both Time: 89,446 ms psql < custom_types_and_array.sql 0,72s user 0,52s system 10% cpu 12,137 total -- Teodor Sigaev E-mail: teo...@sigaev.ru WWW: http://www.sigaev.ru/
diff --git a/src/backend/utils/cache/typcache.c b/src/backend/utils/cache/typcache.c index f411e33b8e7..49e27ca8ba4 100644 --- a/src/backend/utils/cache/typcache.c +++ b/src/backend/utils/cache/typcache.c @@ -331,6 +331,14 @@ static TupleDesc find_or_make_matching_shared_tupledesc(TupleDesc tupdesc); static dsa_pointer share_tupledesc(dsa_area *area, TupleDesc tupdesc, uint32 typmod); +/* + * Hash function compatible with one-arg system cache hash function. + */ +static uint32 +type_cache_syshash(const void *key, Size keysize) +{ + return GetSysCacheHashValue1(TYPEOID, ObjectIdGetDatum(*(const Oid*)key)); +} /* * lookup_type_cache @@ -356,8 +364,16 @@ lookup_type_cache(Oid type_id, int flags) ctl.keysize = sizeof(Oid); ctl.entrysize = sizeof(TypeCacheEntry); + /* + * Hash function must be compatible to make possible work + * hash_seq_init_with_hash_value(). Hash value in TypeEntry is taken + * from system cache and we use the same (compatible) to use it's hash + * value to speedup search by hash value instead of scanning whole hash + */ + ctl.hash = type_cache_syshash; + TypeCacheHash = hash_create("Type information cache", 64, - &ctl, HASH_ELEM | HASH_BLOBS); + &ctl, HASH_ELEM | HASH_FUNCTION); /* Also set up callbacks for SI invalidations */ CacheRegisterRelcacheCallback(TypeCacheRelCallback, (Datum) 0); @@ -408,8 +424,7 @@ lookup_type_cache(Oid type_id, int flags) /* These fields can never change, by definition */ typentry->type_id = type_id; - typentry->type_id_hash = GetSysCacheHashValue1(TYPEOID, - ObjectIdGetDatum(type_id)); + typentry->type_id_hash = get_hash_value(TypeCacheHash, &type_id); /* Keep this part in sync with the code below */ typentry->typlen = typtup->typlen; @@ -2359,20 +2374,21 @@ TypeCacheTypCallback(Datum arg, int cacheid, uint32 hashvalue) TypeCacheEntry *typentry; /* TypeCacheHash must exist, else this callback wouldn't be registered */ - hash_seq_init(&status, TypeCacheHash); + if (hashvalue == 0) + hash_seq_init(&status, TypeCacheHash); + else + hash_seq_init_with_hash_value(&status, TypeCacheHash, hashvalue); + while ((typentry = (TypeCacheEntry *) hash_seq_search(&status)) != NULL) { - /* Is this the targeted type row (or it's a total cache flush)? */ - if (hashvalue == 0 || typentry->type_id_hash == hashvalue) - { - /* - * Mark the data obtained directly from pg_type as invalid. Also, - * if it's a domain, typnotnull might've changed, so we'll need to - * recalculate its constraints. - */ - typentry->flags &= ~(TCFLAGS_HAVE_PG_TYPE_DATA | - TCFLAGS_CHECKED_DOMAIN_CONSTRAINTS); - } + Assert(hashvalue == 0 || typentry->type_id_hash == hashvalue); + /* + * Mark the data obtained directly from pg_type as invalid. Also, + * if it's a domain, typnotnull might've changed, so we'll need to + * recalculate its constraints. + */ + typentry->flags &= ~(TCFLAGS_HAVE_PG_TYPE_DATA | + TCFLAGS_CHECKED_DOMAIN_CONSTRAINTS); } } diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c index a4152080b5d..0180e096f2d 100644 --- a/src/backend/utils/hash/dynahash.c +++ b/src/backend/utils/hash/dynahash.c @@ -962,6 +962,33 @@ hash_search(HTAB *hashp, foundPtr); } +/* + * Helper function executed initial lookup of bucket by given hashvalue + */ +static HASHBUCKET* +hash_do_lookup(HTAB *hashp, uint32 hashvalue, uint32 *bucket) +{ + HASHHDR *hctl = hashp->hctl; + HASHSEGMENT segp; + long segment_num; + long segment_ndx; + + /* + * Do the initial lookup + */ + *bucket = calc_bucket(hctl, hashvalue); + + segment_num = *bucket >> hashp->sshift; + segment_ndx = MOD(*bucket, hashp->ssize); + + segp = hashp->dir[segment_num]; + + if (segp == NULL) + hash_corrupted(hashp); + + return &segp[segment_ndx]; +} + void * hash_search_with_hash_value(HTAB *hashp, const void *keyPtr, @@ -973,9 +1000,6 @@ hash_search_with_hash_value(HTAB *hashp, int freelist_idx = FREELIST_IDX(hctl, hashvalue); Size keysize; uint32 bucket; - long segment_num; - long segment_ndx; - HASHSEGMENT segp; HASHBUCKET currBucket; HASHBUCKET *prevBucketPtr; HashCompareFunc match; @@ -1008,17 +1032,7 @@ hash_search_with_hash_value(HTAB *hashp, /* * Do the initial lookup */ - bucket = calc_bucket(hctl, hashvalue); - - segment_num = bucket >> hashp->sshift; - segment_ndx = MOD(bucket, hashp->ssize); - - segp = hashp->dir[segment_num]; - - if (segp == NULL) - hash_corrupted(hashp); - - prevBucketPtr = &segp[segment_ndx]; + prevBucketPtr = hash_do_lookup(hashp, hashvalue, &bucket); currBucket = *prevBucketPtr; /* @@ -1159,14 +1173,10 @@ hash_update_hash_key(HTAB *hashp, const void *newKeyPtr) { HASHELEMENT *existingElement = ELEMENT_FROM_KEY(existingEntry); - HASHHDR *hctl = hashp->hctl; uint32 newhashvalue; Size keysize; uint32 bucket; uint32 newbucket; - long segment_num; - long segment_ndx; - HASHSEGMENT segp; HASHBUCKET currBucket; HASHBUCKET *prevBucketPtr; HASHBUCKET *oldPrevPtr; @@ -1187,17 +1197,7 @@ hash_update_hash_key(HTAB *hashp, * this to be able to unlink it from its hash chain, but as a side benefit * we can verify the validity of the passed existingEntry pointer. */ - bucket = calc_bucket(hctl, existingElement->hashvalue); - - segment_num = bucket >> hashp->sshift; - segment_ndx = MOD(bucket, hashp->ssize); - - segp = hashp->dir[segment_num]; - - if (segp == NULL) - hash_corrupted(hashp); - - prevBucketPtr = &segp[segment_ndx]; + prevBucketPtr = hash_do_lookup(hashp, existingElement->hashvalue, &bucket); currBucket = *prevBucketPtr; while (currBucket != NULL) @@ -1219,18 +1219,7 @@ hash_update_hash_key(HTAB *hashp, * chain we want to put the entry into. */ newhashvalue = hashp->hash(newKeyPtr, hashp->keysize); - - newbucket = calc_bucket(hctl, newhashvalue); - - segment_num = newbucket >> hashp->sshift; - segment_ndx = MOD(newbucket, hashp->ssize); - - segp = hashp->dir[segment_num]; - - if (segp == NULL) - hash_corrupted(hashp); - - prevBucketPtr = &segp[segment_ndx]; + prevBucketPtr = hash_do_lookup(hashp, newhashvalue, &newbucket); currBucket = *prevBucketPtr; /* @@ -1423,10 +1412,27 @@ hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp) status->hashp = hashp; status->curBucket = 0; status->curEntry = NULL; + status->hasHashvalue = false; if (!hashp->frozen) register_seq_scan(hashp); } +/* + * The same as previous but init sequentially search through hash table and + * return all the elements one by one with given hashvalue. + */ +void +hash_seq_init_with_hash_value(HASH_SEQ_STATUS *status, HTAB *hashp, + uint32 hashvalue) +{ + hash_seq_init(status, hashp); + + status->hasHashvalue = true; + status->hashvalue = hashvalue; + + status->curEntry = *hash_do_lookup(hashp, hashvalue, &status->curBucket); +} + void * hash_seq_search(HASH_SEQ_STATUS *status) { @@ -1440,6 +1446,24 @@ hash_seq_search(HASH_SEQ_STATUS *status) uint32 curBucket; HASHELEMENT *curElem; + if (status->hasHashvalue) + { + /* + * scan all entries in only one bucket because only current bucket could + * contain entries with given hashvalue + */ + while ((curElem = status->curEntry) != NULL) + { + status->curEntry = curElem->link; + if (status->hashvalue != curElem->hashvalue) + continue; + return (void *) ELEMENTKEY(curElem); + } + + hash_seq_term(status); + return NULL; + } + if ((curElem = status->curEntry) != NULL) { /* Continuing scan of curBucket... */ diff --git a/src/include/utils/hsearch.h b/src/include/utils/hsearch.h index da26941f6db..c99d74625f7 100644 --- a/src/include/utils/hsearch.h +++ b/src/include/utils/hsearch.h @@ -122,6 +122,8 @@ typedef struct HTAB *hashp; uint32 curBucket; /* index of current bucket */ HASHELEMENT *curEntry; /* current entry in bucket */ + bool hasHashvalue; /* true if hashvalue was provided */ + uint32 hashvalue; /* hashvalue to start seqscan over hash */ } HASH_SEQ_STATUS; /* @@ -141,6 +143,8 @@ extern bool hash_update_hash_key(HTAB *hashp, void *existingEntry, const void *newKeyPtr); extern long hash_get_num_entries(HTAB *hashp); extern void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp); +extern void hash_seq_init_with_hash_value(HASH_SEQ_STATUS *status, HTAB *hashp, + uint32 hashvalue); extern void *hash_seq_search(HASH_SEQ_STATUS *status); extern void hash_seq_term(HASH_SEQ_STATUS *status); extern void hash_freeze(HTAB *hashp);
diff --git a/src/backend/utils/cache/typcache.c b/src/backend/utils/cache/typcache.c index f411e33b8e7..72c309b84c6 100644 --- a/src/backend/utils/cache/typcache.c +++ b/src/backend/utils/cache/typcache.c @@ -78,6 +78,15 @@ /* The main type cache hashtable searched by lookup_type_cache */ static HTAB *TypeCacheHash = NULL; +/* The map from relation's oid to its type oid */ +typedef struct mapRelTypeEntry +{ + Oid typrelid; + Oid type_id; +} mapRelTypeEntry; + +static HTAB *mapRelType = NULL; + /* List of type cache entries for domain types */ static TypeCacheEntry *firstDomainTypeEntry = NULL; @@ -359,6 +368,11 @@ lookup_type_cache(Oid type_id, int flags) TypeCacheHash = hash_create("Type information cache", 64, &ctl, HASH_ELEM | HASH_BLOBS); + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(mapRelTypeEntry); + mapRelType = hash_create("Map reloid to typeoid", 64, + &ctl, HASH_ELEM | HASH_BLOBS); + /* Also set up callbacks for SI invalidations */ CacheRegisterRelcacheCallback(TypeCacheRelCallback, (Datum) 0); CacheRegisterSyscacheCallback(TYPEOID, TypeCacheTypCallback, (Datum) 0); @@ -471,6 +485,24 @@ lookup_type_cache(Oid type_id, int flags) ReleaseSysCache(tp); } + /* + * Add record to map relation->type. We don't bother even of type became + * disconnected from relation, it seems to be impossible, but anyway, + * storing old data is safe, in a worstest case we will just do an extra + * cleanup cache entry. + */ + if (OidIsValid(typentry->typrelid) && typentry->typtype == TYPTYPE_COMPOSITE) + { + mapRelTypeEntry *relentry; + + relentry = (mapRelTypeEntry*) hash_search(mapRelType, + &typentry->typrelid, + HASH_ENTER, NULL); + + relentry->typrelid = typentry->typrelid; + relentry->type_id = typentry->type_id; + } + /* * Look up opclasses if we haven't already and any dependent info is * requested. @@ -2265,6 +2297,37 @@ SharedRecordTypmodRegistryAttach(SharedRecordTypmodRegistry *registry) CurrentSession->shared_typmod_table = typmod_table; } +static void +invalidateCompositeTypeCacheEntry(TypeCacheEntry *typentry) +{ + /* Delete tupdesc if we have it */ + if (typentry->tupDesc != NULL) + { + /* + * Release our refcount, and free the tupdesc if none remain. + * (Can't use DecrTupleDescRefCount because this reference is + * not logged in current resource owner.) + */ + Assert(typentry->tupDesc->tdrefcount > 0); + if (--typentry->tupDesc->tdrefcount == 0) + FreeTupleDesc(typentry->tupDesc); + typentry->tupDesc = NULL; + + /* + * Also clear tupDesc_identifier, so that anything watching + * that will realize that the tupdesc has possibly changed. + * (Alternatively, we could specify that to detect possible + * tupdesc change, one must check for tupDesc != NULL as well + * as tupDesc_identifier being the same as what was previously + * seen. That seems error-prone.) + */ + typentry->tupDesc_identifier = 0; + } + + /* Reset equality/comparison/hashing validity information */ + typentry->flags &= ~TCFLAGS_OPERATOR_FLAGS; +} + /* * TypeCacheRelCallback * Relcache inval callback function @@ -2274,63 +2337,42 @@ SharedRecordTypmodRegistryAttach(SharedRecordTypmodRegistry *registry) * whatever info we have cached about the composite type's comparability. * * This is called when a relcache invalidation event occurs for the given - * relid. We must scan the whole typcache hash since we don't know the - * type OID corresponding to the relid. We could do a direct search if this - * were a syscache-flush callback on pg_type, but then we would need all - * ALTER-TABLE-like commands that could modify a rowtype to issue syscache - * invals against the rel's pg_type OID. The extra SI signaling could very - * well cost more than we'd save, since in most usages there are not very - * many entries in a backend's typcache. The risk of bugs-of-omission seems - * high, too. - * - * Another possibility, with only localized impact, is to maintain a second - * hashtable that indexes composite-type typcache entries by their typrelid. - * But it's still not clear it's worth the trouble. + * relid. We don't able to use syscache to find correspoding type because of + * we could be called outside of transaction. So, we track a separate map. */ static void TypeCacheRelCallback(Datum arg, Oid relid) { - HASH_SEQ_STATUS status; TypeCacheEntry *typentry; - /* TypeCacheHash must exist, else this callback wouldn't be registered */ - hash_seq_init(&status, TypeCacheHash); - while ((typentry = (TypeCacheEntry *) hash_seq_search(&status)) != NULL) + /* mapRelType/TypeCacheHash must exist, else this callback wouldn't be registered */ + + if (OidIsValid(relid)) { - if (typentry->typtype == TYPTYPE_COMPOSITE) + mapRelTypeEntry *relentry; + + relentry = (mapRelTypeEntry *) hash_search(mapRelType, + &relid, + HASH_FIND, NULL); + + if (relentry != NULL) { - /* Skip if no match, unless we're zapping all composite types */ - if (relid != typentry->typrelid && relid != InvalidOid) - continue; + typentry = (TypeCacheEntry *) hash_search(TypeCacheHash, + &relentry->type_id, + HASH_FIND, NULL); - /* Delete tupdesc if we have it */ - if (typentry->tupDesc != NULL) + if (typentry != NULL) { - /* - * Release our refcount, and free the tupdesc if none remain. - * (Can't use DecrTupleDescRefCount because this reference is - * not logged in current resource owner.) - */ - Assert(typentry->tupDesc->tdrefcount > 0); - if (--typentry->tupDesc->tdrefcount == 0) - FreeTupleDesc(typentry->tupDesc); - typentry->tupDesc = NULL; + Assert(typentry->typtype == TYPTYPE_COMPOSITE); + Assert(relid == typentry->typrelid); - /* - * Also clear tupDesc_identifier, so that anything watching - * that will realize that the tupdesc has possibly changed. - * (Alternatively, we could specify that to detect possible - * tupdesc change, one must check for tupDesc != NULL as well - * as tupDesc_identifier being the same as what was previously - * seen. That seems error-prone.) - */ - typentry->tupDesc_identifier = 0; + invalidateCompositeTypeCacheEntry(typentry); } - - /* Reset equality/comparison/hashing validity information */ - typentry->flags &= ~TCFLAGS_OPERATOR_FLAGS; } - else if (typentry->typtype == TYPTYPE_DOMAIN) + + for (typentry = firstDomainTypeEntry; + typentry != NULL; + typentry = typentry->nextDomain) { /* * If it's domain over composite, reset flags. (We don't bother @@ -2342,6 +2384,35 @@ TypeCacheRelCallback(Datum arg, Oid relid) typentry->flags &= ~TCFLAGS_OPERATOR_FLAGS; } } + else + { + HASH_SEQ_STATUS status; + + /* + * Relid = 0, so we need to reset all composite types in cache. Also, we + * should reset flags for domain types, and we loop over all entries + * in hash, so, do it in single scan + */ + hash_seq_init(&status, TypeCacheHash); + while ((typentry = (TypeCacheEntry *) hash_seq_search(&status)) != NULL) + { + if (typentry->typtype == TYPTYPE_COMPOSITE) + { + invalidateCompositeTypeCacheEntry(typentry); + } + else if (typentry->typtype == TYPTYPE_DOMAIN) + { + /* + * If it's domain over composite, reset flags. (We don't bother + * trying to determine whether the specific base type needs a + * reset.) Note that if we haven't determined whether the base + * type is composite, we don't need to reset anything. + */ + if (typentry->flags & TCFLAGS_DOMAIN_BASE_IS_COMPOSITE) + typentry->flags &= ~TCFLAGS_OPERATOR_FLAGS; + } + } + } } /*
custom_types_and_array.sql
Description: application/sql