Hi!
Thank you for interesting in it!
These changes look very promising. Unfortunately the proposed patches
conflict with each other regardless the order of applying:
```
error: patch failed: src/backend/utils/cache/typcache.c:356
error: src/backend/utils/cache/typcache.c: patch does not apply
```
Try increase -F option of patch.
Anyway, union of both patches in attachment
--
Teodor Sigaev E-mail: teo...@sigaev.ru
WWW: http://www.sigaev.ru/
diff --git a/src/backend/utils/cache/typcache.c b/src/backend/utils/cache/typcache.c
index 0d4d0b0a154..ccf798c440c 100644
--- a/src/backend/utils/cache/typcache.c
+++ b/src/backend/utils/cache/typcache.c
@@ -77,6 +77,15 @@
/* The main type cache hashtable searched by lookup_type_cache */
static HTAB *TypeCacheHash = NULL;
+/* The map from relation's oid to its type oid */
+typedef struct mapRelTypeEntry
+{
+ Oid typrelid;
+ Oid type_id;
+} mapRelTypeEntry;
+
+static HTAB *mapRelType = NULL;
+
/* List of type cache entries for domain types */
static TypeCacheEntry *firstDomainTypeEntry = NULL;
@@ -330,6 +339,14 @@ static TupleDesc find_or_make_matching_shared_tupledesc(TupleDesc tupdesc);
static dsa_pointer share_tupledesc(dsa_area *area, TupleDesc tupdesc,
uint32 typmod);
+/*
+ * Hash function compatible with one-arg system cache hash function.
+ */
+static uint32
+type_cache_syshash(const void *key, Size keysize)
+{
+ return GetSysCacheHashValue1(TYPEOID, ObjectIdGetDatum(*(const Oid*)key));
+}
/*
* lookup_type_cache
@@ -355,8 +372,21 @@ lookup_type_cache(Oid type_id, int flags)
ctl.keysize = sizeof(Oid);
ctl.entrysize = sizeof(TypeCacheEntry);
+ /*
+ * Hash function must be compatible to make possible work
+ * hash_seq_init_with_hash_value(). Hash value in TypeEntry is taken
+ * from system cache and we use the same (compatible) to use it's hash
+ * value to speedup search by hash value instead of scanning whole hash
+ */
+ ctl.hash = type_cache_syshash;
+
TypeCacheHash = hash_create("Type information cache", 64,
- &ctl, HASH_ELEM | HASH_BLOBS);
+ &ctl, HASH_ELEM | HASH_FUNCTION);
+
+ ctl.keysize = sizeof(Oid);
+ ctl.entrysize = sizeof(mapRelTypeEntry);
+ mapRelType = hash_create("Map reloid to typeoid", 64,
+ &ctl, HASH_ELEM | HASH_BLOBS);
/* Also set up callbacks for SI invalidations */
CacheRegisterRelcacheCallback(TypeCacheRelCallback, (Datum) 0);
@@ -407,8 +437,7 @@ lookup_type_cache(Oid type_id, int flags)
/* These fields can never change, by definition */
typentry->type_id = type_id;
- typentry->type_id_hash = GetSysCacheHashValue1(TYPEOID,
- ObjectIdGetDatum(type_id));
+ typentry->type_id_hash = get_hash_value(TypeCacheHash, &type_id);
/* Keep this part in sync with the code below */
typentry->typlen = typtup->typlen;
@@ -470,6 +499,24 @@ lookup_type_cache(Oid type_id, int flags)
ReleaseSysCache(tp);
}
+ /*
+ * Add record to map relation->type. We don't bother even of type became
+ * disconnected from relation, it seems to be impossible, but anyway,
+ * storing old data is safe, in a worstest case we will just do an extra
+ * cleanup cache entry.
+ */
+ if (OidIsValid(typentry->typrelid) && typentry->typtype == TYPTYPE_COMPOSITE)
+ {
+ mapRelTypeEntry *relentry;
+
+ relentry = (mapRelTypeEntry*) hash_search(mapRelType,
+ &typentry->typrelid,
+ HASH_ENTER, NULL);
+
+ relentry->typrelid = typentry->typrelid;
+ relentry->type_id = typentry->type_id;
+ }
+
/*
* Look up opclasses if we haven't already and any dependent info is
* requested.
@@ -2264,6 +2311,37 @@ SharedRecordTypmodRegistryAttach(SharedRecordTypmodRegistry *registry)
CurrentSession->shared_typmod_table = typmod_table;
}
+static void
+invalidateCompositeTypeCacheEntry(TypeCacheEntry *typentry)
+{
+ /* Delete tupdesc if we have it */
+ if (typentry->tupDesc != NULL)
+ {
+ /*
+ * Release our refcount, and free the tupdesc if none remain.
+ * (Can't use DecrTupleDescRefCount because this reference is
+ * not logged in current resource owner.)
+ */
+ Assert(typentry->tupDesc->tdrefcount > 0);
+ if (--typentry->tupDesc->tdrefcount == 0)
+ FreeTupleDesc(typentry->tupDesc);
+ typentry->tupDesc = NULL;
+
+ /*
+ * Also clear tupDesc_identifier, so that anything watching
+ * that will realize that the tupdesc has possibly changed.
+ * (Alternatively, we could specify that to detect possible
+ * tupdesc change, one must check for tupDesc != NULL as well
+ * as tupDesc_identifier being the same as what was previously
+ * seen. That seems error-prone.)
+ */
+ typentry->tupDesc_identifier = 0;
+ }
+
+ /* Reset equality/comparison/hashing validity information */
+ typentry->flags &= ~TCFLAGS_OPERATOR_FLAGS;
+}
+
/*
* TypeCacheRelCallback
* Relcache inval callback function
@@ -2273,63 +2351,42 @@ SharedRecordTypmodRegistryAttach(SharedRecordTypmodRegistry *registry)
* whatever info we have cached about the composite type's comparability.
*
* This is called when a relcache invalidation event occurs for the given
- * relid. We must scan the whole typcache hash since we don't know the
- * type OID corresponding to the relid. We could do a direct search if this
- * were a syscache-flush callback on pg_type, but then we would need all
- * ALTER-TABLE-like commands that could modify a rowtype to issue syscache
- * invals against the rel's pg_type OID. The extra SI signaling could very
- * well cost more than we'd save, since in most usages there are not very
- * many entries in a backend's typcache. The risk of bugs-of-omission seems
- * high, too.
- *
- * Another possibility, with only localized impact, is to maintain a second
- * hashtable that indexes composite-type typcache entries by their typrelid.
- * But it's still not clear it's worth the trouble.
+ * relid. We don't able to use syscache to find correspoding type because of
+ * we could be called outside of transaction. So, we track a separate map.
*/
static void
TypeCacheRelCallback(Datum arg, Oid relid)
{
- HASH_SEQ_STATUS status;
TypeCacheEntry *typentry;
- /* TypeCacheHash must exist, else this callback wouldn't be registered */
- hash_seq_init(&status, TypeCacheHash);
- while ((typentry = (TypeCacheEntry *) hash_seq_search(&status)) != NULL)
+ /* mapRelType/TypeCacheHash must exist, else this callback wouldn't be registered */
+
+ if (OidIsValid(relid))
{
- if (typentry->typtype == TYPTYPE_COMPOSITE)
+ mapRelTypeEntry *relentry;
+
+ relentry = (mapRelTypeEntry *) hash_search(mapRelType,
+ &relid,
+ HASH_FIND, NULL);
+
+ if (relentry != NULL)
{
- /* Skip if no match, unless we're zapping all composite types */
- if (relid != typentry->typrelid && relid != InvalidOid)
- continue;
+ typentry = (TypeCacheEntry *) hash_search(TypeCacheHash,
+ &relentry->type_id,
+ HASH_FIND, NULL);
- /* Delete tupdesc if we have it */
- if (typentry->tupDesc != NULL)
+ if (typentry != NULL)
{
- /*
- * Release our refcount, and free the tupdesc if none remain.
- * (Can't use DecrTupleDescRefCount because this reference is
- * not logged in current resource owner.)
- */
- Assert(typentry->tupDesc->tdrefcount > 0);
- if (--typentry->tupDesc->tdrefcount == 0)
- FreeTupleDesc(typentry->tupDesc);
- typentry->tupDesc = NULL;
+ Assert(typentry->typtype == TYPTYPE_COMPOSITE);
+ Assert(relid == typentry->typrelid);
- /*
- * Also clear tupDesc_identifier, so that anything watching
- * that will realize that the tupdesc has possibly changed.
- * (Alternatively, we could specify that to detect possible
- * tupdesc change, one must check for tupDesc != NULL as well
- * as tupDesc_identifier being the same as what was previously
- * seen. That seems error-prone.)
- */
- typentry->tupDesc_identifier = 0;
+ invalidateCompositeTypeCacheEntry(typentry);
}
-
- /* Reset equality/comparison/hashing validity information */
- typentry->flags &= ~TCFLAGS_OPERATOR_FLAGS;
}
- else if (typentry->typtype == TYPTYPE_DOMAIN)
+
+ for (typentry = firstDomainTypeEntry;
+ typentry != NULL;
+ typentry = typentry->nextDomain)
{
/*
* If it's domain over composite, reset flags. (We don't bother
@@ -2341,6 +2398,35 @@ TypeCacheRelCallback(Datum arg, Oid relid)
typentry->flags &= ~TCFLAGS_OPERATOR_FLAGS;
}
}
+ else
+ {
+ HASH_SEQ_STATUS status;
+
+ /*
+ * Relid = 0, so we need to reset all composite types in cache. Also, we
+ * should reset flags for domain types, and we loop over all entries
+ * in hash, so, do it in single scan
+ */
+ hash_seq_init(&status, TypeCacheHash);
+ while ((typentry = (TypeCacheEntry *) hash_seq_search(&status)) != NULL)
+ {
+ if (typentry->typtype == TYPTYPE_COMPOSITE)
+ {
+ invalidateCompositeTypeCacheEntry(typentry);
+ }
+ else if (typentry->typtype == TYPTYPE_DOMAIN)
+ {
+ /*
+ * If it's domain over composite, reset flags. (We don't bother
+ * trying to determine whether the specific base type needs a
+ * reset.) Note that if we haven't determined whether the base
+ * type is composite, we don't need to reset anything.
+ */
+ if (typentry->flags & TCFLAGS_DOMAIN_BASE_IS_COMPOSITE)
+ typentry->flags &= ~TCFLAGS_OPERATOR_FLAGS;
+ }
+ }
+ }
}
/*
@@ -2358,20 +2444,21 @@ TypeCacheTypCallback(Datum arg, int cacheid, uint32 hashvalue)
TypeCacheEntry *typentry;
/* TypeCacheHash must exist, else this callback wouldn't be registered */
- hash_seq_init(&status, TypeCacheHash);
+ if (hashvalue == 0)
+ hash_seq_init(&status, TypeCacheHash);
+ else
+ hash_seq_init_with_hash_value(&status, TypeCacheHash, hashvalue);
+
while ((typentry = (TypeCacheEntry *) hash_seq_search(&status)) != NULL)
{
- /* Is this the targeted type row (or it's a total cache flush)? */
- if (hashvalue == 0 || typentry->type_id_hash == hashvalue)
- {
- /*
- * Mark the data obtained directly from pg_type as invalid. Also,
- * if it's a domain, typnotnull might've changed, so we'll need to
- * recalculate its constraints.
- */
- typentry->flags &= ~(TCFLAGS_HAVE_PG_TYPE_DATA |
- TCFLAGS_CHECKED_DOMAIN_CONSTRAINTS);
- }
+ Assert(hashvalue == 0 || typentry->type_id_hash == hashvalue);
+ /*
+ * Mark the data obtained directly from pg_type as invalid. Also,
+ * if it's a domain, typnotnull might've changed, so we'll need to
+ * recalculate its constraints.
+ */
+ typentry->flags &= ~(TCFLAGS_HAVE_PG_TYPE_DATA |
+ TCFLAGS_CHECKED_DOMAIN_CONSTRAINTS);
}
}
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c
index a4152080b5d..0180e096f2d 100644
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -962,6 +962,33 @@ hash_search(HTAB *hashp,
foundPtr);
}
+/*
+ * Helper function executed initial lookup of bucket by given hashvalue
+ */
+static HASHBUCKET*
+hash_do_lookup(HTAB *hashp, uint32 hashvalue, uint32 *bucket)
+{
+ HASHHDR *hctl = hashp->hctl;
+ HASHSEGMENT segp;
+ long segment_num;
+ long segment_ndx;
+
+ /*
+ * Do the initial lookup
+ */
+ *bucket = calc_bucket(hctl, hashvalue);
+
+ segment_num = *bucket >> hashp->sshift;
+ segment_ndx = MOD(*bucket, hashp->ssize);
+
+ segp = hashp->dir[segment_num];
+
+ if (segp == NULL)
+ hash_corrupted(hashp);
+
+ return &segp[segment_ndx];
+}
+
void *
hash_search_with_hash_value(HTAB *hashp,
const void *keyPtr,
@@ -973,9 +1000,6 @@ hash_search_with_hash_value(HTAB *hashp,
int freelist_idx = FREELIST_IDX(hctl, hashvalue);
Size keysize;
uint32 bucket;
- long segment_num;
- long segment_ndx;
- HASHSEGMENT segp;
HASHBUCKET currBucket;
HASHBUCKET *prevBucketPtr;
HashCompareFunc match;
@@ -1008,17 +1032,7 @@ hash_search_with_hash_value(HTAB *hashp,
/*
* Do the initial lookup
*/
- bucket = calc_bucket(hctl, hashvalue);
-
- segment_num = bucket >> hashp->sshift;
- segment_ndx = MOD(bucket, hashp->ssize);
-
- segp = hashp->dir[segment_num];
-
- if (segp == NULL)
- hash_corrupted(hashp);
-
- prevBucketPtr = &segp[segment_ndx];
+ prevBucketPtr = hash_do_lookup(hashp, hashvalue, &bucket);
currBucket = *prevBucketPtr;
/*
@@ -1159,14 +1173,10 @@ hash_update_hash_key(HTAB *hashp,
const void *newKeyPtr)
{
HASHELEMENT *existingElement = ELEMENT_FROM_KEY(existingEntry);
- HASHHDR *hctl = hashp->hctl;
uint32 newhashvalue;
Size keysize;
uint32 bucket;
uint32 newbucket;
- long segment_num;
- long segment_ndx;
- HASHSEGMENT segp;
HASHBUCKET currBucket;
HASHBUCKET *prevBucketPtr;
HASHBUCKET *oldPrevPtr;
@@ -1187,17 +1197,7 @@ hash_update_hash_key(HTAB *hashp,
* this to be able to unlink it from its hash chain, but as a side benefit
* we can verify the validity of the passed existingEntry pointer.
*/
- bucket = calc_bucket(hctl, existingElement->hashvalue);
-
- segment_num = bucket >> hashp->sshift;
- segment_ndx = MOD(bucket, hashp->ssize);
-
- segp = hashp->dir[segment_num];
-
- if (segp == NULL)
- hash_corrupted(hashp);
-
- prevBucketPtr = &segp[segment_ndx];
+ prevBucketPtr = hash_do_lookup(hashp, existingElement->hashvalue, &bucket);
currBucket = *prevBucketPtr;
while (currBucket != NULL)
@@ -1219,18 +1219,7 @@ hash_update_hash_key(HTAB *hashp,
* chain we want to put the entry into.
*/
newhashvalue = hashp->hash(newKeyPtr, hashp->keysize);
-
- newbucket = calc_bucket(hctl, newhashvalue);
-
- segment_num = newbucket >> hashp->sshift;
- segment_ndx = MOD(newbucket, hashp->ssize);
-
- segp = hashp->dir[segment_num];
-
- if (segp == NULL)
- hash_corrupted(hashp);
-
- prevBucketPtr = &segp[segment_ndx];
+ prevBucketPtr = hash_do_lookup(hashp, newhashvalue, &newbucket);
currBucket = *prevBucketPtr;
/*
@@ -1423,10 +1412,27 @@ hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
status->hashp = hashp;
status->curBucket = 0;
status->curEntry = NULL;
+ status->hasHashvalue = false;
if (!hashp->frozen)
register_seq_scan(hashp);
}
+/*
+ * The same as previous but init sequentially search through hash table and
+ * return all the elements one by one with given hashvalue.
+ */
+void
+hash_seq_init_with_hash_value(HASH_SEQ_STATUS *status, HTAB *hashp,
+ uint32 hashvalue)
+{
+ hash_seq_init(status, hashp);
+
+ status->hasHashvalue = true;
+ status->hashvalue = hashvalue;
+
+ status->curEntry = *hash_do_lookup(hashp, hashvalue, &status->curBucket);
+}
+
void *
hash_seq_search(HASH_SEQ_STATUS *status)
{
@@ -1440,6 +1446,24 @@ hash_seq_search(HASH_SEQ_STATUS *status)
uint32 curBucket;
HASHELEMENT *curElem;
+ if (status->hasHashvalue)
+ {
+ /*
+ * scan all entries in only one bucket because only current bucket could
+ * contain entries with given hashvalue
+ */
+ while ((curElem = status->curEntry) != NULL)
+ {
+ status->curEntry = curElem->link;
+ if (status->hashvalue != curElem->hashvalue)
+ continue;
+ return (void *) ELEMENTKEY(curElem);
+ }
+
+ hash_seq_term(status);
+ return NULL;
+ }
+
if ((curElem = status->curEntry) != NULL)
{
/* Continuing scan of curBucket... */
diff --git a/src/include/utils/hsearch.h b/src/include/utils/hsearch.h
index da26941f6db..c99d74625f7 100644
--- a/src/include/utils/hsearch.h
+++ b/src/include/utils/hsearch.h
@@ -122,6 +122,8 @@ typedef struct
HTAB *hashp;
uint32 curBucket; /* index of current bucket */
HASHELEMENT *curEntry; /* current entry in bucket */
+ bool hasHashvalue; /* true if hashvalue was provided */
+ uint32 hashvalue; /* hashvalue to start seqscan over hash */
} HASH_SEQ_STATUS;
/*
@@ -141,6 +143,8 @@ extern bool hash_update_hash_key(HTAB *hashp, void *existingEntry,
const void *newKeyPtr);
extern long hash_get_num_entries(HTAB *hashp);
extern void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp);
+extern void hash_seq_init_with_hash_value(HASH_SEQ_STATUS *status, HTAB *hashp,
+ uint32 hashvalue);
extern void *hash_seq_search(HASH_SEQ_STATUS *status);
extern void hash_seq_term(HASH_SEQ_STATUS *status);
extern void hash_freeze(HTAB *hashp);