Re: type cache cleanup improvements

Teodor Sigaev Tue, 05 Mar 2024 01:51:51 -0800

Hi!

Thank you for interesting in it!

These changes look very promising. Unfortunately the proposed patches
conflict with each other regardless the order of applying:

```
error: patch failed: src/backend/utils/cache/typcache.c:356
error: src/backend/utils/cache/typcache.c: patch does not apply
```

Try increase -F option of patch.

Anyway, union of both patches in attachment


--
Teodor Sigaev                                   E-mail: teo...@sigaev.ru
                                                   WWW: http://www.sigaev.ru/

diff --git a/src/backend/utils/cache/typcache.c b/src/backend/utils/cache/typcache.c
index 0d4d0b0a154..ccf798c440c 100644
--- a/src/backend/utils/cache/typcache.c
+++ b/src/backend/utils/cache/typcache.c
@@ -77,6 +77,15 @@
 /* The main type cache hashtable searched by lookup_type_cache */
 static HTAB *TypeCacheHash = NULL;
 
+/* The map from relation's oid to its type oid */
+typedef struct mapRelTypeEntry
+{
+	Oid	typrelid;
+	Oid type_id;
+} mapRelTypeEntry;
+
+static HTAB *mapRelType = NULL;
+
 /* List of type cache entries for domain types */
 static TypeCacheEntry *firstDomainTypeEntry = NULL;
 
@@ -330,6 +339,14 @@ static TupleDesc find_or_make_matching_shared_tupledesc(TupleDesc tupdesc);
 static dsa_pointer share_tupledesc(dsa_area *area, TupleDesc tupdesc,
 								   uint32 typmod);
 
+/*
+ * Hash function compatible with one-arg system cache hash function.
+ */
+static uint32
+type_cache_syshash(const void *key, Size keysize)
+{
+	return GetSysCacheHashValue1(TYPEOID, ObjectIdGetDatum(*(const Oid*)key));
+}
 
 /*
  * lookup_type_cache
@@ -355,8 +372,21 @@ lookup_type_cache(Oid type_id, int flags)
 
 		ctl.keysize = sizeof(Oid);
 		ctl.entrysize = sizeof(TypeCacheEntry);
+		/*
+		 * Hash function must be compatible to make possible work
+		 * hash_seq_init_with_hash_value(). Hash value in TypeEntry is taken
+		 * from system cache and we use the same (compatible) to use it's hash
+		 * value to speedup search by hash value instead of scanning whole hash
+		 */
+		ctl.hash = type_cache_syshash;
+
 		TypeCacheHash = hash_create("Type information cache", 64,
-									&ctl, HASH_ELEM | HASH_BLOBS);
+									&ctl, HASH_ELEM | HASH_FUNCTION);
+
+		ctl.keysize = sizeof(Oid);
+		ctl.entrysize = sizeof(mapRelTypeEntry);
+		mapRelType = hash_create("Map reloid to typeoid", 64,
+								 &ctl, HASH_ELEM | HASH_BLOBS);
 
 		/* Also set up callbacks for SI invalidations */
 		CacheRegisterRelcacheCallback(TypeCacheRelCallback, (Datum) 0);
@@ -407,8 +437,7 @@ lookup_type_cache(Oid type_id, int flags)
 
 		/* These fields can never change, by definition */
 		typentry->type_id = type_id;
-		typentry->type_id_hash = GetSysCacheHashValue1(TYPEOID,
-													   ObjectIdGetDatum(type_id));
+		typentry->type_id_hash = get_hash_value(TypeCacheHash, &type_id);
 
 		/* Keep this part in sync with the code below */
 		typentry->typlen = typtup->typlen;
@@ -470,6 +499,24 @@ lookup_type_cache(Oid type_id, int flags)
 		ReleaseSysCache(tp);
 	}
 
+	/*
+	 * Add record to map relation->type. We don't bother even of type became
+	 * disconnected from relation, it seems to be impossible, but anyway,
+	 * storing old data is safe, in a worstest case we will just do an extra
+	 * cleanup cache entry.
+	 */
+	if (OidIsValid(typentry->typrelid) && typentry->typtype == TYPTYPE_COMPOSITE)
+	{
+		mapRelTypeEntry *relentry;
+
+		relentry = (mapRelTypeEntry*) hash_search(mapRelType,
+												  &typentry->typrelid,
+												  HASH_ENTER, NULL);
+
+		relentry->typrelid = typentry->typrelid;
+		relentry->type_id = typentry->type_id;
+	}
+
 	/*
 	 * Look up opclasses if we haven't already and any dependent info is
 	 * requested.
@@ -2264,6 +2311,37 @@ SharedRecordTypmodRegistryAttach(SharedRecordTypmodRegistry *registry)
 	CurrentSession->shared_typmod_table = typmod_table;
 }
 
+static void
+invalidateCompositeTypeCacheEntry(TypeCacheEntry *typentry)
+{
+	/* Delete tupdesc if we have it */
+	if (typentry->tupDesc != NULL)
+	{
+		/*
+		 * Release our refcount, and free the tupdesc if none remain.
+		 * (Can't use DecrTupleDescRefCount because this reference is
+		 * not logged in current resource owner.)
+		 */
+		Assert(typentry->tupDesc->tdrefcount > 0);
+		if (--typentry->tupDesc->tdrefcount == 0)
+			FreeTupleDesc(typentry->tupDesc);
+		typentry->tupDesc = NULL;
+
+		/*
+		 * Also clear tupDesc_identifier, so that anything watching
+		 * that will realize that the tupdesc has possibly changed.
+		 * (Alternatively, we could specify that to detect possible
+		 * tupdesc change, one must check for tupDesc != NULL as well
+		 * as tupDesc_identifier being the same as what was previously
+		 * seen.  That seems error-prone.)
+		 */
+		typentry->tupDesc_identifier = 0;
+	}
+
+	/* Reset equality/comparison/hashing validity information */
+	typentry->flags &= ~TCFLAGS_OPERATOR_FLAGS;
+}
+
 /*
  * TypeCacheRelCallback
  *		Relcache inval callback function
@@ -2273,63 +2351,42 @@ SharedRecordTypmodRegistryAttach(SharedRecordTypmodRegistry *registry)
  * whatever info we have cached about the composite type's comparability.
  *
  * This is called when a relcache invalidation event occurs for the given
- * relid.  We must scan the whole typcache hash since we don't know the
- * type OID corresponding to the relid.  We could do a direct search if this
- * were a syscache-flush callback on pg_type, but then we would need all
- * ALTER-TABLE-like commands that could modify a rowtype to issue syscache
- * invals against the rel's pg_type OID.  The extra SI signaling could very
- * well cost more than we'd save, since in most usages there are not very
- * many entries in a backend's typcache.  The risk of bugs-of-omission seems
- * high, too.
- *
- * Another possibility, with only localized impact, is to maintain a second
- * hashtable that indexes composite-type typcache entries by their typrelid.
- * But it's still not clear it's worth the trouble.
+ * relid.  We don't able to use syscache to find correspoding type because of
+ * we could be called outside of transaction. So, we track a separate map.
  */
 static void
 TypeCacheRelCallback(Datum arg, Oid relid)
 {
-	HASH_SEQ_STATUS status;
 	TypeCacheEntry *typentry;
 
-	/* TypeCacheHash must exist, else this callback wouldn't be registered */
-	hash_seq_init(&status, TypeCacheHash);
-	while ((typentry = (TypeCacheEntry *) hash_seq_search(&status)) != NULL)
+	/* mapRelType/TypeCacheHash must exist, else this callback wouldn't be registered */
+
+	if (OidIsValid(relid))
 	{
-		if (typentry->typtype == TYPTYPE_COMPOSITE)
+		mapRelTypeEntry *relentry;
+
+		relentry = (mapRelTypeEntry *) hash_search(mapRelType,
+												  &relid,
+												  HASH_FIND, NULL);
+
+		if (relentry != NULL)
 		{
-			/* Skip if no match, unless we're zapping all composite types */
-			if (relid != typentry->typrelid && relid != InvalidOid)
-				continue;
+			typentry = (TypeCacheEntry *) hash_search(TypeCacheHash,
+													  &relentry->type_id,
+													  HASH_FIND, NULL);
 
-			/* Delete tupdesc if we have it */
-			if (typentry->tupDesc != NULL)
+			if (typentry != NULL)
 			{
-				/*
-				 * Release our refcount, and free the tupdesc if none remain.
-				 * (Can't use DecrTupleDescRefCount because this reference is
-				 * not logged in current resource owner.)
-				 */
-				Assert(typentry->tupDesc->tdrefcount > 0);
-				if (--typentry->tupDesc->tdrefcount == 0)
-					FreeTupleDesc(typentry->tupDesc);
-				typentry->tupDesc = NULL;
+				Assert(typentry->typtype == TYPTYPE_COMPOSITE);
+				Assert(relid == typentry->typrelid);
 
-				/*
-				 * Also clear tupDesc_identifier, so that anything watching
-				 * that will realize that the tupdesc has possibly changed.
-				 * (Alternatively, we could specify that to detect possible
-				 * tupdesc change, one must check for tupDesc != NULL as well
-				 * as tupDesc_identifier being the same as what was previously
-				 * seen.  That seems error-prone.)
-				 */
-				typentry->tupDesc_identifier = 0;
+				invalidateCompositeTypeCacheEntry(typentry);
 			}
-
-			/* Reset equality/comparison/hashing validity information */
-			typentry->flags &= ~TCFLAGS_OPERATOR_FLAGS;
 		}
-		else if (typentry->typtype == TYPTYPE_DOMAIN)
+
+		for (typentry = firstDomainTypeEntry;
+			 typentry != NULL;
+			 typentry = typentry->nextDomain)
 		{
 			/*
 			 * If it's domain over composite, reset flags.  (We don't bother
@@ -2341,6 +2398,35 @@ TypeCacheRelCallback(Datum arg, Oid relid)
 				typentry->flags &= ~TCFLAGS_OPERATOR_FLAGS;
 		}
 	}
+	else
+	{
+		HASH_SEQ_STATUS status;
+
+		/*
+		 * Relid = 0, so we need to reset all composite types in cache. Also, we
+		 * should reset flags for domain types, and we loop over all entries
+		 * in hash, so, do it in single scan
+		 */
+		hash_seq_init(&status, TypeCacheHash);
+		while ((typentry = (TypeCacheEntry *) hash_seq_search(&status)) != NULL)
+		{
+			if (typentry->typtype == TYPTYPE_COMPOSITE)
+			{
+				invalidateCompositeTypeCacheEntry(typentry);
+			}
+			else if (typentry->typtype == TYPTYPE_DOMAIN)
+			{
+				/*
+				 * If it's domain over composite, reset flags.  (We don't bother
+				 * trying to determine whether the specific base type needs a
+				 * reset.)  Note that if we haven't determined whether the base
+				 * type is composite, we don't need to reset anything.
+				 */
+				if (typentry->flags & TCFLAGS_DOMAIN_BASE_IS_COMPOSITE)
+					typentry->flags &= ~TCFLAGS_OPERATOR_FLAGS;
+			}
+		}
+	}
 }
 
 /*
@@ -2358,20 +2444,21 @@ TypeCacheTypCallback(Datum arg, int cacheid, uint32 hashvalue)
 	TypeCacheEntry *typentry;
 
 	/* TypeCacheHash must exist, else this callback wouldn't be registered */
-	hash_seq_init(&status, TypeCacheHash);
+	if (hashvalue == 0)
+		hash_seq_init(&status, TypeCacheHash);
+	else
+		hash_seq_init_with_hash_value(&status, TypeCacheHash, hashvalue);
+
 	while ((typentry = (TypeCacheEntry *) hash_seq_search(&status)) != NULL)
 	{
-		/* Is this the targeted type row (or it's a total cache flush)? */
-		if (hashvalue == 0 || typentry->type_id_hash == hashvalue)
-		{
-			/*
-			 * Mark the data obtained directly from pg_type as invalid.  Also,
-			 * if it's a domain, typnotnull might've changed, so we'll need to
-			 * recalculate its constraints.
-			 */
-			typentry->flags &= ~(TCFLAGS_HAVE_PG_TYPE_DATA |
-								 TCFLAGS_CHECKED_DOMAIN_CONSTRAINTS);
-		}
+		Assert(hashvalue == 0 || typentry->type_id_hash == hashvalue);
+		/*
+		 * Mark the data obtained directly from pg_type as invalid.  Also,
+		 * if it's a domain, typnotnull might've changed, so we'll need to
+		 * recalculate its constraints.
+		 */
+		typentry->flags &= ~(TCFLAGS_HAVE_PG_TYPE_DATA |
+							 TCFLAGS_CHECKED_DOMAIN_CONSTRAINTS);
 	}
 }
 
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c
index a4152080b5d..0180e096f2d 100644
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -962,6 +962,33 @@ hash_search(HTAB *hashp,
 									   foundPtr);
 }
 
+/*
+ * Helper function executed initial lookup of bucket by given hashvalue
+ */
+static HASHBUCKET*
+hash_do_lookup(HTAB *hashp, uint32 hashvalue, uint32 *bucket)
+{
+	HASHHDR    *hctl = hashp->hctl;
+	HASHSEGMENT segp;
+	long		segment_num;
+	long		segment_ndx;
+
+	/*
+	 * Do the initial lookup
+	 */
+	*bucket = calc_bucket(hctl, hashvalue);
+
+	segment_num = *bucket >> hashp->sshift;
+	segment_ndx = MOD(*bucket, hashp->ssize);
+
+	segp = hashp->dir[segment_num];
+
+	if (segp == NULL)
+		hash_corrupted(hashp);
+
+	return &segp[segment_ndx];
+}
+
 void *
 hash_search_with_hash_value(HTAB *hashp,
 							const void *keyPtr,
@@ -973,9 +1000,6 @@ hash_search_with_hash_value(HTAB *hashp,
 	int			freelist_idx = FREELIST_IDX(hctl, hashvalue);
 	Size		keysize;
 	uint32		bucket;
-	long		segment_num;
-	long		segment_ndx;
-	HASHSEGMENT segp;
 	HASHBUCKET	currBucket;
 	HASHBUCKET *prevBucketPtr;
 	HashCompareFunc match;
@@ -1008,17 +1032,7 @@ hash_search_with_hash_value(HTAB *hashp,
 	/*
 	 * Do the initial lookup
 	 */
-	bucket = calc_bucket(hctl, hashvalue);
-
-	segment_num = bucket >> hashp->sshift;
-	segment_ndx = MOD(bucket, hashp->ssize);
-
-	segp = hashp->dir[segment_num];
-
-	if (segp == NULL)
-		hash_corrupted(hashp);
-
-	prevBucketPtr = &segp[segment_ndx];
+	prevBucketPtr = hash_do_lookup(hashp, hashvalue, &bucket);
 	currBucket = *prevBucketPtr;
 
 	/*
@@ -1159,14 +1173,10 @@ hash_update_hash_key(HTAB *hashp,
 					 const void *newKeyPtr)
 {
 	HASHELEMENT *existingElement = ELEMENT_FROM_KEY(existingEntry);
-	HASHHDR    *hctl = hashp->hctl;
 	uint32		newhashvalue;
 	Size		keysize;
 	uint32		bucket;
 	uint32		newbucket;
-	long		segment_num;
-	long		segment_ndx;
-	HASHSEGMENT segp;
 	HASHBUCKET	currBucket;
 	HASHBUCKET *prevBucketPtr;
 	HASHBUCKET *oldPrevPtr;
@@ -1187,17 +1197,7 @@ hash_update_hash_key(HTAB *hashp,
 	 * this to be able to unlink it from its hash chain, but as a side benefit
 	 * we can verify the validity of the passed existingEntry pointer.
 	 */
-	bucket = calc_bucket(hctl, existingElement->hashvalue);
-
-	segment_num = bucket >> hashp->sshift;
-	segment_ndx = MOD(bucket, hashp->ssize);
-
-	segp = hashp->dir[segment_num];
-
-	if (segp == NULL)
-		hash_corrupted(hashp);
-
-	prevBucketPtr = &segp[segment_ndx];
+	prevBucketPtr = hash_do_lookup(hashp, existingElement->hashvalue, &bucket);
 	currBucket = *prevBucketPtr;
 
 	while (currBucket != NULL)
@@ -1219,18 +1219,7 @@ hash_update_hash_key(HTAB *hashp,
 	 * chain we want to put the entry into.
 	 */
 	newhashvalue = hashp->hash(newKeyPtr, hashp->keysize);
-
-	newbucket = calc_bucket(hctl, newhashvalue);
-
-	segment_num = newbucket >> hashp->sshift;
-	segment_ndx = MOD(newbucket, hashp->ssize);
-
-	segp = hashp->dir[segment_num];
-
-	if (segp == NULL)
-		hash_corrupted(hashp);
-
-	prevBucketPtr = &segp[segment_ndx];
+	prevBucketPtr = hash_do_lookup(hashp, newhashvalue, &newbucket);
 	currBucket = *prevBucketPtr;
 
 	/*
@@ -1423,10 +1412,27 @@ hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
 	status->hashp = hashp;
 	status->curBucket = 0;
 	status->curEntry = NULL;
+	status->hasHashvalue = false;
 	if (!hashp->frozen)
 		register_seq_scan(hashp);
 }
 
+/*
+ * The same as previous but init sequentially search through hash table and
+ * return all the elements one by one with given hashvalue.
+ */
+void
+hash_seq_init_with_hash_value(HASH_SEQ_STATUS *status, HTAB *hashp,
+							  uint32 hashvalue)
+{
+	hash_seq_init(status, hashp);
+
+	status->hasHashvalue = true;
+	status->hashvalue = hashvalue;
+
+	status->curEntry = *hash_do_lookup(hashp, hashvalue, &status->curBucket);
+}
+
 void *
 hash_seq_search(HASH_SEQ_STATUS *status)
 {
@@ -1440,6 +1446,24 @@ hash_seq_search(HASH_SEQ_STATUS *status)
 	uint32		curBucket;
 	HASHELEMENT *curElem;
 
+	if (status->hasHashvalue)
+	{
+		/*
+		 * scan all entries in only one bucket because only current bucket could
+		 * contain entries with given hashvalue
+		 */
+		while ((curElem = status->curEntry) != NULL)
+		{
+			status->curEntry = curElem->link;
+			if (status->hashvalue != curElem->hashvalue)
+				continue;
+			return (void *) ELEMENTKEY(curElem);
+		}
+
+		hash_seq_term(status);
+		return NULL;
+	}
+
 	if ((curElem = status->curEntry) != NULL)
 	{
 		/* Continuing scan of curBucket... */
diff --git a/src/include/utils/hsearch.h b/src/include/utils/hsearch.h
index da26941f6db..c99d74625f7 100644
--- a/src/include/utils/hsearch.h
+++ b/src/include/utils/hsearch.h
@@ -122,6 +122,8 @@ typedef struct
 	HTAB	   *hashp;
 	uint32		curBucket;		/* index of current bucket */
 	HASHELEMENT *curEntry;		/* current entry in bucket */
+	bool		hasHashvalue;	/* true if hashvalue was provided */
+	uint32		hashvalue;		/* hashvalue to start seqscan over hash */
 } HASH_SEQ_STATUS;
 
 /*
@@ -141,6 +143,8 @@ extern bool hash_update_hash_key(HTAB *hashp, void *existingEntry,
 								 const void *newKeyPtr);
 extern long hash_get_num_entries(HTAB *hashp);
 extern void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp);
+extern void hash_seq_init_with_hash_value(HASH_SEQ_STATUS *status, HTAB *hashp,
+										  uint32 hashvalue);
 extern void *hash_seq_search(HASH_SEQ_STATUS *status);
 extern void hash_seq_term(HASH_SEQ_STATUS *status);
 extern void hash_freeze(HTAB *hashp);

Re: type cache cleanup improvements

Reply via email to