On Mon, Sep 14, 2020 at 11:35 PM David Rowley <dgrowle...@gmail.com> wrote: > I just did some benchmarking with this patch using the same recovery > benchmark that I used in [1] and also the two patches that I posted in > [2]. Additionally, I added a PANIC at the end of recovery so that I > could repeat the recovery over and over again with the same WAL. > > [data]
N Min Max Median Avg Stddev x 10 62.15 67.06 64.86 64.132 1.6188528 + 10 59.6 63.81 63.13 62.233 1.4983031 Difference at 95.0% confidence -1.899 +/- 1.46553 -2.96108% +/- 2.28517% (Student's t, pooled s = 1.55974) Thanks! Hmm, small but apparently significant and in line with Jakub's report, and I suppose the effect will be greater with other nearby recovery performance patches applied that halve the times. Annoyingly, I can't reproduce this speedup on my local i9-9900; maybe it requires a different CPU... > I looked over the patch and the only thing I saw was that we might > also want to remove the following line: > > #define DEF_FFACTOR 1 /* default fill factor */ Right, thanks. Fixed in the attached. > The 2nd most costly call to hash_search_with_hash_value() came in via > hash_search() via smgropen(). That does use HASH_ENTER, which could > have triggered the divide code. The main caller of smgropen() was > XLogReadBufferExtended(). > > So, it looks unlikely that any gains we are seeing are from improved > buffer lookups. It's more likely they're coming from more optimal > XLogReadBufferExtended() I think we call smgropen() twice for every buffer referenced in the WAL: XLogReadBufferExtended() and again in ReadBufferWithoutRelcache(). We could reduce it to once with some refactoring, but I am looking into whether I can reduce it to zero as a side-effect of another change, more soon...
From efecf68b159a3c65517e91076009cb4e5cc6f157 Mon Sep 17 00:00:00 2001 From: Thomas Munro <thomas.mu...@gmail.com> Date: Thu, 10 Sep 2020 12:27:25 +1200 Subject: [PATCH v2] Remove custom fill factor support from dynahash.c. Since ancient times we have had support for a fill factor (maximum load factor) to be set for a dynahash hash table, but: 1. It had to be an integer value >= 1, whereas for in memory hash tables interesting load factor targets are probably somewhere near the 0.75-1.0 range. 2. It was implemented in a way that performed an expensive division operation that regularly showed up in profiles. 3. We are not aware of anyone ever having used a non-default value. Therefore, remove support, making fill factor 1 as the implicit value. Author: Jakub Wartak <jakub.war...@tomtom.com> Reviewed-by: Alvaro Herrera <alvhe...@2ndquadrant.com> Reviewed-by: Tomas Vondra <tomas.von...@2ndquadrant.com> Reviewed-by: Thomas Munro <thomas.mu...@gmail.com> Reviewed-by: David Rowley <dgrowle...@gmail.com> Discussion: https://postgr.es/m/VI1PR0701MB696044FC35013A96FECC7AC8F62D0%40VI1PR0701MB6960.eurprd07.prod.outlook.com --- src/backend/utils/hash/dynahash.c | 20 ++++++-------------- src/include/utils/hsearch.h | 2 -- 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c index f4fbccdd7e..1122e2e5e5 100644 --- a/src/backend/utils/hash/dynahash.c +++ b/src/backend/utils/hash/dynahash.c @@ -122,7 +122,6 @@ #define DEF_SEGSIZE 256 #define DEF_SEGSIZE_SHIFT 8 /* must be log2(DEF_SEGSIZE) */ #define DEF_DIRSIZE 256 -#define DEF_FFACTOR 1 /* default fill factor */ /* Number of freelists to be used for a partitioned hash table. */ #define NUM_FREELISTS 32 @@ -191,7 +190,6 @@ struct HASHHDR Size keysize; /* hash key length in bytes */ Size entrysize; /* total user element size in bytes */ long num_partitions; /* # partitions (must be power of 2), or 0 */ - long ffactor; /* target fill factor */ long max_dsize; /* 'dsize' limit if directory is fixed size */ long ssize; /* segment size --- must be power of 2 */ int sshift; /* segment shift = log2(ssize) */ @@ -497,8 +495,6 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags) /* ssize had better be a power of 2 */ Assert(hctl->ssize == (1L << hctl->sshift)); } - if (flags & HASH_FFACTOR) - hctl->ffactor = info->ffactor; /* * SHM hash tables have fixed directory size passed by the caller. @@ -603,8 +599,6 @@ hdefault(HTAB *hashp) hctl->num_partitions = 0; /* not partitioned */ - hctl->ffactor = DEF_FFACTOR; - /* table has no fixed maximum size */ hctl->max_dsize = NO_MAX_DSIZE; @@ -670,11 +664,10 @@ init_htab(HTAB *hashp, long nelem) SpinLockInit(&(hctl->freeList[i].mutex)); /* - * Divide number of elements by the fill factor to determine a desired - * number of buckets. Allocate space for the next greater power of two - * number of buckets + * Allocate space for the next greater power of two number of buckets, + * assuming a desired maximum load factor of 1. */ - nbuckets = next_pow2_int((nelem - 1) / hctl->ffactor + 1); + nbuckets = next_pow2_int(nelem); /* * In a partitioned table, nbuckets must be at least equal to @@ -733,7 +726,6 @@ init_htab(HTAB *hashp, long nelem) "DIRECTORY SIZE ", hctl->dsize, "SEGMENT SIZE ", hctl->ssize, "SEGMENT SHIFT ", hctl->sshift, - "FILL FACTOR ", hctl->ffactor, "MAX BUCKET ", hctl->max_bucket, "HIGH MASK ", hctl->high_mask, "LOW MASK ", hctl->low_mask, @@ -761,7 +753,7 @@ hash_estimate_size(long num_entries, Size entrysize) elementAllocCnt; /* estimate number of buckets wanted */ - nBuckets = next_pow2_long((num_entries - 1) / DEF_FFACTOR + 1); + nBuckets = next_pow2_long(num_entries); /* # of segments needed for nBuckets */ nSegments = next_pow2_long((nBuckets - 1) / DEF_SEGSIZE + 1); /* directory entries */ @@ -804,7 +796,7 @@ hash_select_dirsize(long num_entries) nDirEntries; /* estimate number of buckets wanted */ - nBuckets = next_pow2_long((num_entries - 1) / DEF_FFACTOR + 1); + nBuckets = next_pow2_long(num_entries); /* # of segments needed for nBuckets */ nSegments = next_pow2_long((nBuckets - 1) / DEF_SEGSIZE + 1); /* directory entries */ @@ -975,7 +967,7 @@ hash_search_with_hash_value(HTAB *hashp, * order of these tests is to try to check cheaper conditions first. */ if (!IS_PARTITIONED(hctl) && !hashp->frozen && - hctl->freeList[0].nentries / (long) (hctl->max_bucket + 1) >= hctl->ffactor && + hctl->freeList[0].nentries > (long) (hctl->max_bucket + 1) && !has_seq_scans(hashp)) (void) expand_table(hashp); } diff --git a/src/include/utils/hsearch.h b/src/include/utils/hsearch.h index f1deb9beab..bebf89b3c4 100644 --- a/src/include/utils/hsearch.h +++ b/src/include/utils/hsearch.h @@ -68,7 +68,6 @@ typedef struct HASHCTL long ssize; /* segment size */ long dsize; /* (initial) directory size */ long max_dsize; /* limit to dsize if dir size is limited */ - long ffactor; /* fill factor */ Size keysize; /* hash key length in bytes */ Size entrysize; /* total user element size in bytes */ HashValueFunc hash; /* hash function */ @@ -83,7 +82,6 @@ typedef struct HASHCTL #define HASH_PARTITION 0x0001 /* Hashtable is used w/partitioned locking */ #define HASH_SEGMENT 0x0002 /* Set segment size */ #define HASH_DIRSIZE 0x0004 /* Set directory size (initial and max) */ -#define HASH_FFACTOR 0x0008 /* Set fill factor */ #define HASH_ELEM 0x0010 /* Set keysize and entrysize */ #define HASH_BLOBS 0x0020 /* Select support functions for binary keys */ #define HASH_FUNCTION 0x0040 /* Set user defined hash function */ -- 2.20.1