On Thu, Aug 21, 2025 at 12:53:09AM -0400, Tom Lane wrote: > If you prefer to regard this as an independent issue, that's okay with > me ... but it's touching most of the same lines of code, so it seems > to me that it'd be about as easy to deal with both items at once.
I'd rather do that after a second look at the whole picture as this led to an accumulation of bullet points. The long->int64 switch was looking OK on its own and I have applied it. Attached is the second piece of refactoring, where I have introduced a couple more APIs in pg_bitutils.h (cross-checked the resulting computations of the old and new routines with some quick hacks, in case, and they matched): pg_nextpower2_32_bound, replacing next_pow2_int pg_nextpower2_64_bound, replacing next_pow2_int64 pg_ceil_log2_64_bound, replacing my_log2() pg_ceil_log2_32_bound, not used, present for symmetry. An extra thing is a suggested change for pg_nextpower2_32(), to use a uint64 instead of a uint32 as argument, which is caused by next_pow2_int64() and next_pow2_int(), that both used int64 previously. There's likely some opinion differences according to one's taste; that's my idea of the refactoring to remove the duplication. -- Michael
From e4a6e9da211c7d81b226c3b714449d90bf309470 Mon Sep 17 00:00:00 2001 From: Michael Paquier <mich...@paquier.xyz> Date: Fri, 22 Aug 2025 12:36:04 +0900 Subject: [PATCH] Clean up next-pow2 routines of dynahash.c, moving to bitutils.h --- src/include/port/pg_bitutils.h | 66 +++++++++++++++++++++++- src/include/utils/dynahash.h | 20 ------- src/backend/access/heap/heapam.c | 6 +-- src/backend/executor/nodeAgg.c | 3 +- src/backend/executor/nodeHash.c | 7 ++- src/backend/replication/logical/worker.c | 3 +- src/backend/utils/hash/dynahash.c | 50 +++--------------- 7 files changed, 80 insertions(+), 75 deletions(-) delete mode 100644 src/include/utils/dynahash.h diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h index c7901bf8ddc0..10478e702c74 100644 --- a/src/include/port/pg_bitutils.h +++ b/src/include/port/pg_bitutils.h @@ -186,7 +186,7 @@ pg_rightmost_one_pos64(uint64 word) * 'num' mustn't be 0 or be above PG_UINT32_MAX / 2 + 1. */ static inline uint32 -pg_nextpower2_32(uint32 num) +pg_nextpower2_32(uint64 num) { Assert(num > 0 && num <= PG_UINT32_MAX / 2 + 1); @@ -198,7 +198,7 @@ pg_nextpower2_32(uint32 num) if ((num & (num - 1)) == 0) return num; /* already power 2 */ - return ((uint32) 1) << (pg_leftmost_one_pos32(num) + 1); + return ((uint32) 1) << (pg_leftmost_one_pos32((uint32) num) + 1); } /* @@ -224,6 +224,34 @@ pg_nextpower2_64(uint64 num) return ((uint64) 1) << (pg_leftmost_one_pos64(num) + 1); } +/* + * pg_nextpower2_32_bound + * Returns the next higher power of 2 above 'num', or 'num' if it's + * already a power of 2, with upper bound safeguard. + */ +static inline uint32 +pg_nextpower2_32_bound(uint64 num) +{ + if (num > PG_INT32_MAX / 2) + num = PG_INT32_MAX / 2; + + return pg_nextpower2_32(num); +} + +/* + * pg_nextpower2_64_bound + * Returns the next higher power of 2 above 'num', or 'num' if it's + * already a power of 2, with upper bound safeguard. + */ +static inline uint64 +pg_nextpower2_64_bound(uint64 num) +{ + if (num > PG_INT64_MAX / 2) + num = PG_INT64_MAX / 2; + + return pg_nextpower2_64(num); +} + /* * pg_prevpower2_32 * Returns the next lower power of 2 below 'num', or 'num' if it's @@ -276,6 +304,40 @@ pg_ceil_log2_64(uint64 num) return pg_leftmost_one_pos64(num - 1) + 1; } +/* + * pg_ceil_log2_64_bound + * Returns equivalent of ceil(log2(num)), with overflow safeguard + * for pg_leftmost_one_pos32. + */ +static inline uint32 +pg_ceil_log2_32_bound(uint64 num) +{ + if (num > PG_INT32_MAX / 2) + num = PG_INT32_MAX / 2; + + if (num < 2) + return 0; + else + return pg_leftmost_one_pos32(num - 1) + 1; +} + +/* + * pg_ceil_log2_64_bound + * Returns equivalent of ceil(log2(num)), with overflow safeguard + * for pg_leftmost_one_pos64. + */ +static inline uint64 +pg_ceil_log2_64_bound(uint64 num) +{ + if (num > PG_INT64_MAX / 2) + num = PG_INT64_MAX / 2; + + if (num < 2) + return 0; + else + return pg_leftmost_one_pos64(num - 1) + 1; +} + /* * With MSVC on x86_64 builds, try using native popcnt instructions via the * __popcnt and __popcnt64 intrinsics. These don't work the same as GCC's diff --git a/src/include/utils/dynahash.h b/src/include/utils/dynahash.h deleted file mode 100644 index a4362d3f65e5..000000000000 --- a/src/include/utils/dynahash.h +++ /dev/null @@ -1,20 +0,0 @@ -/*------------------------------------------------------------------------- - * - * dynahash.h - * POSTGRES dynahash.h file definitions - * - * - * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * IDENTIFICATION - * src/include/utils/dynahash.h - * - *------------------------------------------------------------------------- - */ -#ifndef DYNAHASH_H -#define DYNAHASH_H - -extern int my_log2(int64 num); - -#endif /* DYNAHASH_H */ diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 0dcd6ee817e0..9bfe55f9e214 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -8617,8 +8617,8 @@ bottomup_sort_and_shrink_cmp(const void *arg1, const void *arg2) */ if (group1->ntids != group2->ntids) { - uint32 ntids1 = pg_nextpower2_32((uint32) group1->ntids); - uint32 ntids2 = pg_nextpower2_32((uint32) group2->ntids); + uint32 ntids1 = pg_nextpower2_32((uint64) group1->ntids); + uint32 ntids2 = pg_nextpower2_32((uint64) group2->ntids); if (ntids1 > ntids2) return -1; @@ -8745,7 +8745,7 @@ bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate) group->npromisingtids = 4; else group->npromisingtids = - pg_nextpower2_32((uint32) group->npromisingtids); + pg_nextpower2_32((uint64) group->npromisingtids); } /* Sort groups and rearrange caller's deltids array */ diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 377e016d7322..b5fa9af9d992 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -267,7 +267,6 @@ #include "utils/acl.h" #include "utils/builtins.h" #include "utils/datum.h" -#include "utils/dynahash.h" #include "utils/expandeddatum.h" #include "utils/injection_point.h" #include "utils/logtape.h" @@ -2115,7 +2114,7 @@ hash_choose_num_partitions(double input_groups, double hashentrysize, npartitions = (int) dpartitions; /* ceil(log2(npartitions)) */ - partition_bits = my_log2(npartitions); + partition_bits = pg_ceil_log2_64_bound(npartitions); /* make sure that we don't exhaust the hash bits */ if (partition_bits + used_bits >= 32) diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index 8d2201ab67fa..14d934ab42b2 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -36,7 +36,6 @@ #include "executor/nodeHashjoin.h" #include "miscadmin.h" #include "port/pg_bitutils.h" -#include "utils/dynahash.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/syscache.h" @@ -340,7 +339,7 @@ MultiExecParallelHash(HashState *node) */ hashtable->curbatch = -1; hashtable->nbuckets = pstate->nbuckets; - hashtable->log2_nbuckets = my_log2(hashtable->nbuckets); + hashtable->log2_nbuckets = pg_ceil_log2_64_bound(hashtable->nbuckets); hashtable->totalTuples = pstate->total_tuples; /* @@ -480,7 +479,7 @@ ExecHashTableCreate(HashState *state) &nbuckets, &nbatch, &num_skew_mcvs); /* nbuckets must be a power of 2 */ - log2_nbuckets = my_log2(nbuckets); + log2_nbuckets = pg_ceil_log2_64_bound(nbuckets); Assert(nbuckets == (1 << log2_nbuckets)); /* @@ -3499,7 +3498,7 @@ ExecParallelHashTableSetCurrentBatch(HashJoinTable hashtable, int batchno) dsa_get_address(hashtable->area, hashtable->batches[batchno].shared->buckets); hashtable->nbuckets = hashtable->parallel_state->nbuckets; - hashtable->log2_nbuckets = my_log2(hashtable->nbuckets); + hashtable->log2_nbuckets = pg_ceil_log2_64_bound(hashtable->nbuckets); hashtable->current_chunk = NULL; hashtable->current_chunk_shared = InvalidDsaPointer; hashtable->batches[batchno].at_least_one_chunk = false; diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index 22ad9051db3f..664db8096b26 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -268,7 +268,6 @@ #include "storage/procarray.h" #include "tcop/tcopprot.h" #include "utils/acl.h" -#include "utils/dynahash.h" #include "utils/guc.h" #include "utils/inval.h" #include "utils/lsyscache.h" @@ -4911,7 +4910,7 @@ subxact_info_read(Oid subid, TransactionId xid) len = sizeof(SubXactInfo) * subxact_data.nsubxacts; /* we keep the maximum as a power of 2 */ - subxact_data.nsubxacts_max = 1 << my_log2(subxact_data.nsubxacts); + subxact_data.nsubxacts_max = 1 << pg_ceil_log2_64_bound(subxact_data.nsubxacts); /* * Allocate subxact information in the logical streaming context. We need diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c index 1aeee5be42ac..81b0239eb69f 100644 --- a/src/backend/utils/hash/dynahash.c +++ b/src/backend/utils/hash/dynahash.c @@ -102,7 +102,6 @@ #include "port/pg_bitutils.h" #include "storage/shmem.h" #include "storage/spin.h" -#include "utils/dynahash.h" #include "utils/memutils.h" @@ -281,8 +280,6 @@ static bool init_htab(HTAB *hashp, int64 nelem); pg_noreturn static void hash_corrupted(HTAB *hashp); static uint32 hash_initial_lookup(HTAB *hashp, uint32 hashvalue, HASHBUCKET **bucketptr); -static int64 next_pow2_int64(int64 num); -static int next_pow2_int(int64 num); static void register_seq_scan(HTAB *hashp); static void deregister_seq_scan(HTAB *hashp); static bool has_seq_scans(HTAB *hashp); @@ -539,7 +536,7 @@ hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags) * be less than INT_MAX (see init_htab()), so call the int version of * next_pow2. */ - Assert(info->num_partitions == next_pow2_int(info->num_partitions)); + Assert(info->num_partitions == pg_nextpower2_32_bound(info->num_partitions)); hctl->num_partitions = info->num_partitions; } @@ -547,7 +544,7 @@ hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags) if (flags & HASH_SEGMENT) { hctl->ssize = info->ssize; - hctl->sshift = my_log2(info->ssize); + hctl->sshift = pg_ceil_log2_64_bound(info->ssize); /* ssize had better be a power of 2 */ Assert(hctl->ssize == (1L << hctl->sshift)); } @@ -716,7 +713,7 @@ init_htab(HTAB *hashp, int64 nelem) * Allocate space for the next greater power of two number of buckets, * assuming a desired maximum load factor of 1. */ - nbuckets = next_pow2_int(nelem); + nbuckets = pg_nextpower2_32_bound(nelem); /* * In a partitioned table, nbuckets must be at least equal to @@ -734,7 +731,7 @@ init_htab(HTAB *hashp, int64 nelem) * Figure number of directory segments needed, round up to a power of 2 */ nsegs = (nbuckets - 1) / hctl->ssize + 1; - nsegs = next_pow2_int(nsegs); + nsegs = pg_nextpower2_32_bound(nsegs); /* * Make sure directory is big enough. If pre-allocated directory is too @@ -791,9 +788,9 @@ hash_estimate_size(int64 num_entries, Size entrysize) elementAllocCnt; /* estimate number of buckets wanted */ - nBuckets = next_pow2_int64(num_entries); + nBuckets = pg_nextpower2_64_bound(num_entries); /* # of segments needed for nBuckets */ - nSegments = next_pow2_int64((nBuckets - 1) / DEF_SEGSIZE + 1); + nSegments = pg_nextpower2_64_bound((nBuckets - 1) / DEF_SEGSIZE + 1); /* directory entries */ nDirEntries = DEF_DIRSIZE; while (nDirEntries < nSegments) @@ -834,9 +831,9 @@ hash_select_dirsize(int64 num_entries) nDirEntries; /* estimate number of buckets wanted */ - nBuckets = next_pow2_int64(num_entries); + nBuckets = pg_nextpower2_64_bound(num_entries); /* # of segments needed for nBuckets */ - nSegments = next_pow2_int64((nBuckets - 1) / DEF_SEGSIZE + 1); + nSegments = pg_nextpower2_64_bound((nBuckets - 1) / DEF_SEGSIZE + 1); /* directory entries */ nDirEntries = DEF_DIRSIZE; while (nDirEntries < nSegments) @@ -1812,37 +1809,6 @@ hash_corrupted(HTAB *hashp) elog(FATAL, "hash table \"%s\" corrupted", hashp->tabname); } -/* calculate ceil(log base 2) of num */ -int -my_log2(int64 num) -{ - /* - * guard against too-large input, which would be invalid for - * pg_ceil_log2_*() - */ - if (num > PG_INT64_MAX / 2) - num = PG_INT64_MAX / 2; - - return pg_ceil_log2_64(num); -} - -/* calculate first power of 2 >= num, bounded to what will fit in a int64 */ -static int64 -next_pow2_int64(int64 num) -{ - /* my_log2's internal range check is sufficient */ - return 1L << my_log2(num); -} - -/* calculate first power of 2 >= num, bounded to what will fit in an int */ -static int -next_pow2_int(int64 num) -{ - if (num > INT_MAX / 2) - num = INT_MAX / 2; - return 1 << my_log2(num); -} - /************************* SEQ SCAN TRACKING ************************/ -- 2.50.1
signature.asc
Description: PGP signature