On Thu, Mar 04, 2021 at 08:43:51AM +0000, Georgios wrote: > > ‐‐‐‐‐‐‐ Original Message ‐‐‐‐‐‐‐ > On Thursday, March 4, 2021 9:21 AM, Michael Paquier <mich...@paquier.xyz> > wrote: > > > On Thu, Mar 04, 2021 at 03:23:33PM +0800, Julien Rouhaud wrote: > > > > > I was also considering adding new (add|mull)_*_size functions to avoid > > > having > > > too messy code. I'm not terribly happy with xxx_shm_size(), as not all > > > call to > > > those functions would require an alignment. Maybe > > > (add|mull)shmemalign_size? > > > But before modifying dozens of calls, should we really fix those or only > > > increase a bit the "slop factor", or a mix of it? > > > For instance, I can see that for instance BackendStatusShmemSize() never > > > had > > > any padding consideration, while others do. > > > Maybe only fixing contribs, some macro like PredXactListDataSize that > > > already > > > do a MAXALIGN, SimpleLruShmemSize and hash_estimate_size() would be a > > > short > > > patch and should significantly improve the estimation. > > > > The lack of complaints in this area looks to me like a sign that we > > may not really need to backpatch something, so I would not be against > > a precise chirurgy, with a separate set of {add,mul}_size() routines > > that are used where adapted, so as it is easy to track down which size > > estimations expect an extra padding. I would be curious to hear more > > thoughts from others here. > > > > Saying that, calling a new routine something like add_shmem_align_size > > makes it clear what's its purpose. > > My limited opinion on the matter after spending some time yesterday through > the related code, is that with the current api it is easy to miss something > and underestimate or just be sloppy. If only from the readability point of > view, adding the proposed add_shmem_align_size will be beneficial. > > I hold no opinion on backpatching.
So here's a v2 patch which hopefully account for all unaccounted alignment padding. There's also a significant change in the shared hash table size estimation. AFAICT, allocation will be done this way: - num_freelists allocs of init_size / num_freelists entries (on average) for partitioned htab, num_freelist being 1 for non partitioned table, NUM_FREELISTS (32) otherwise. - then the rest of the entries, if any, are alloced in groups of choose_nelem_alloc() entries With this patch applied, I see an extra 16KB of shared memory being requested.
>From ddc03fbbebb8486d10b0fb0718bf6e4ed1291759 Mon Sep 17 00:00:00 2001 From: Julien Rouhaud <julien.rouh...@free.fr> Date: Sat, 27 Feb 2021 15:45:29 +0800 Subject: [PATCH v2] Fix various shared memory estimates. --- contrib/pg_prewarm/autoprewarm.c | 2 +- .../pg_stat_statements/pg_stat_statements.c | 2 +- src/backend/access/common/syncscan.c | 2 +- src/backend/access/nbtree/nbtutils.c | 3 +- src/backend/access/transam/multixact.c | 2 +- src/backend/access/transam/slru.c | 2 +- src/backend/access/transam/twophase.c | 2 +- src/backend/access/transam/xlog.c | 2 +- src/backend/commands/async.c | 1 + src/backend/postmaster/autovacuum.c | 3 +- src/backend/postmaster/bgworker.c | 2 +- src/backend/postmaster/checkpointer.c | 2 +- src/backend/postmaster/pgstat.c | 17 ++--- src/backend/postmaster/postmaster.c | 2 +- src/backend/replication/logical/launcher.c | 3 +- src/backend/replication/logical/origin.c | 3 +- src/backend/replication/slot.c | 2 +- src/backend/replication/walreceiverfuncs.c | 2 +- src/backend/replication/walsender.c | 2 +- src/backend/storage/buffer/buf_init.c | 14 ++-- src/backend/storage/buffer/buf_table.c | 3 +- src/backend/storage/buffer/freelist.c | 2 +- src/backend/storage/ipc/dsm.c | 2 +- src/backend/storage/ipc/pmsignal.c | 2 +- src/backend/storage/ipc/procarray.c | 8 +-- src/backend/storage/ipc/procsignal.c | 2 +- src/backend/storage/ipc/sinvaladt.c | 2 +- src/backend/storage/lmgr/lock.c | 16 ++++- src/backend/storage/lmgr/lwlock.c | 2 +- src/backend/storage/lmgr/predicate.c | 22 +++--- src/backend/storage/lmgr/proc.c | 12 ++-- src/backend/utils/hash/dynahash.c | 69 ++++++++++++++----- src/backend/utils/time/snapmgr.c | 2 +- src/include/storage/predicate_internals.h | 4 +- src/include/storage/shmem.h | 13 ++++ src/include/utils/hsearch.h | 2 + 36 files changed, 146 insertions(+), 87 deletions(-) diff --git a/contrib/pg_prewarm/autoprewarm.c b/contrib/pg_prewarm/autoprewarm.c index b3f73ea92d..887e68b288 100644 --- a/contrib/pg_prewarm/autoprewarm.c +++ b/contrib/pg_prewarm/autoprewarm.c @@ -138,7 +138,7 @@ _PG_init(void) EmitWarningsOnPlaceholders("pg_prewarm"); - RequestAddinShmemSpace(MAXALIGN(sizeof(AutoPrewarmSharedState))); + RequestAddinShmemSpace(CACHELINEALIGN(sizeof(AutoPrewarmSharedState))); /* Register autoprewarm worker, if enabled. */ if (autoprewarm) diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index 62cccbfa44..4e045ffba7 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -1933,7 +1933,7 @@ pgss_memsize(void) { Size size; - size = MAXALIGN(sizeof(pgssSharedState)); + size = CACHELINEALIGN(sizeof(pgssSharedState)); size = add_size(size, hash_estimate_size(pgss_max, sizeof(pgssEntry))); return size; diff --git a/src/backend/access/common/syncscan.c b/src/backend/access/common/syncscan.c index b7a28af4ad..c19f9cab59 100644 --- a/src/backend/access/common/syncscan.c +++ b/src/backend/access/common/syncscan.c @@ -125,7 +125,7 @@ static BlockNumber ss_search(RelFileNode relfilenode, Size SyncScanShmemSize(void) { - return SizeOfScanLocations(SYNC_SCAN_NELEM); + return (Size) CACHELINEALIGN(SizeOfScanLocations(SYNC_SCAN_NELEM)); } /* diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index d524310723..310ea904df 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -2066,7 +2066,8 @@ BTreeShmemSize(void) size = offsetof(BTVacInfo, vacuums); size = add_size(size, mul_size(MaxBackends, sizeof(BTOneVacInfo))); - return size; + + return (Size) CACHELINEALIGN(size); } /* diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 1f9f1a1fa1..c789623c34 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -1830,7 +1830,7 @@ MultiXactShmemSize(void) add_size(offsetof(MultiXactStateData, perBackendXactIds) + sizeof(MultiXactId), \ mul_size(sizeof(MultiXactId) * 2, MaxOldestSlot)) - size = SHARED_MULTIXACT_STATE_SIZE; + size = CACHELINEALIGN(SHARED_MULTIXACT_STATE_SIZE); size = add_size(size, SimpleLruShmemSize(NUM_MULTIXACTOFFSET_BUFFERS, 0)); size = add_size(size, SimpleLruShmemSize(NUM_MULTIXACTMEMBER_BUFFERS, 0)); diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index 82149ad782..2271e092e9 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -168,7 +168,7 @@ SimpleLruShmemSize(int nslots, int nlsns) if (nlsns > 0) sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr)); /* group_lsn[] */ - return BUFFERALIGN(sz) + BLCKSZ * nslots; + return (Size) CACHELINEALIGN(BUFFERALIGN(sz) + BLCKSZ * nslots); } /* diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index 80d2d20d6c..fca70fabac 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -245,7 +245,7 @@ TwoPhaseShmemSize(void) size = add_size(size, mul_size(max_prepared_xacts, sizeof(GlobalTransactionData))); - return size; + return (Size) CACHELINEALIGN(size); } void diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 377afb8732..64de453340 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -5096,7 +5096,7 @@ XLOGShmemSize(void) * routine again below to compute the actual allocation size. */ - return size; + return (Size) CACHELINEALIGN(size); } void diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c index 4b16fb5682..6a0fd6ca10 100644 --- a/src/backend/commands/async.c +++ b/src/backend/commands/async.c @@ -513,6 +513,7 @@ AsyncShmemSize(void) /* This had better match AsyncShmemInit */ size = mul_size(MaxBackends + 1, sizeof(QueueBackendStatus)); size = add_size(size, offsetof(AsyncQueueControl, backend)); + size = (Size) CACHELINEALIGN(size); size = add_size(size, SimpleLruShmemSize(NUM_NOTIFY_BUFFERS, 0)); diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 23ef23c13e..7ddf86af74 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -3414,7 +3414,8 @@ AutoVacuumShmemSize(void) size = MAXALIGN(size); size = add_size(size, mul_size(autovacuum_max_workers, sizeof(WorkerInfoData))); - return size; + + return (Size) CACHELINEALIGN(size); } /* diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c index bbbc09b0b5..d2f838eaed 100644 --- a/src/backend/postmaster/bgworker.c +++ b/src/backend/postmaster/bgworker.c @@ -148,7 +148,7 @@ BackgroundWorkerShmemSize(void) size = add_size(size, mul_size(max_worker_processes, sizeof(BackgroundWorkerSlot))); - return size; + return (Size) CACHELINEALIGN(size); } /* diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index 76f9f98ebb..6acd5e4c2b 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -862,7 +862,7 @@ CheckpointerShmemSize(void) size = offsetof(CheckpointerShmemStruct, requests); size = add_size(size, mul_size(NBuffers, sizeof(CheckpointerRequest))); - return size; + return (Size) CACHELINEALIGN(size); } /* diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index f75b52719d..b08f7f1d56 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -2930,25 +2930,20 @@ BackendStatusShmemSize(void) Size size; /* BackendStatusArray: */ - size = mul_size(sizeof(PgBackendStatus), NumBackendStatSlots); + size = mul_size_and_shmem_align(sizeof(PgBackendStatus), NumBackendStatSlots); /* BackendAppnameBuffer: */ - size = add_size(size, - mul_size(NAMEDATALEN, NumBackendStatSlots)); + size = add_shmem_aligned_size(size, mul_size(NAMEDATALEN, NumBackendStatSlots)); /* BackendClientHostnameBuffer: */ - size = add_size(size, - mul_size(NAMEDATALEN, NumBackendStatSlots)); + size = add_shmem_aligned_size(size, mul_size(NAMEDATALEN, NumBackendStatSlots)); /* BackendActivityBuffer: */ - size = add_size(size, - mul_size(pgstat_track_activity_query_size, NumBackendStatSlots)); + size = add_shmem_aligned_size(size, mul_size(pgstat_track_activity_query_size, NumBackendStatSlots)); #ifdef USE_SSL /* BackendSslStatusBuffer: */ - size = add_size(size, - mul_size(sizeof(PgBackendSSLStatus), NumBackendStatSlots)); + size = add_shmem_aligned_size(size, mul_size(sizeof(PgBackendSSLStatus), NumBackendStatSlots)); #endif #ifdef ENABLE_GSS /* BackendGssStatusBuffer: */ - size = add_size(size, - mul_size(sizeof(PgBackendGSSStatus), NumBackendStatSlots)); + size = add_shmem_aligned_size(size, mul_size(sizeof(PgBackendGSSStatus), NumBackendStatSlots)); #endif return size; } diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index edab95a19e..716f7434e6 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -6425,7 +6425,7 @@ restore_backend_variables(BackendParameters *param, Port *port) Size ShmemBackendArraySize(void) { - return mul_size(MaxLivePostmasterChildren(), sizeof(Backend)); + return mul_size_and_shmem_align(MaxLivePostmasterChildren(), sizeof(Backend)); } void diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c index cb462a052a..7d20877920 100644 --- a/src/backend/replication/logical/launcher.c +++ b/src/backend/replication/logical/launcher.c @@ -697,7 +697,8 @@ ApplyLauncherShmemSize(void) size = MAXALIGN(size); size = add_size(size, mul_size(max_logical_replication_workers, sizeof(LogicalRepWorker))); - return size; + + return (Size) CACHELINEALIGN(size); } /* diff --git a/src/backend/replication/logical/origin.c b/src/backend/replication/logical/origin.c index 39471fddad..b9e9fa121a 100644 --- a/src/backend/replication/logical/origin.c +++ b/src/backend/replication/logical/origin.c @@ -503,7 +503,8 @@ ReplicationOriginShmemSize(void) size = add_size(size, mul_size(max_replication_slots, sizeof(ReplicationState))); - return size; + + return (Size) CACHELINEALIGN(size); } void diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index 75a087c2f9..c399c491e9 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -124,7 +124,7 @@ ReplicationSlotsShmemSize(void) size = add_size(size, mul_size(max_replication_slots, sizeof(ReplicationSlot))); - return size; + return (Size) CACHELINEALIGN(size); } /* diff --git a/src/backend/replication/walreceiverfuncs.c b/src/backend/replication/walreceiverfuncs.c index 63e60478ea..a8cab3fa28 100644 --- a/src/backend/replication/walreceiverfuncs.c +++ b/src/backend/replication/walreceiverfuncs.c @@ -45,7 +45,7 @@ WalRcvShmemSize(void) size = add_size(size, sizeof(WalRcvData)); - return size; + return (Size) CACHELINEALIGN(size); } /* Allocate and initialize walreceiver-related shared memory */ diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 23baa4498a..d1c91cf354 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -3055,7 +3055,7 @@ WalSndShmemSize(void) size = offsetof(WalSndCtlData, walsnds); size = add_size(size, mul_size(max_wal_senders, sizeof(WalSnd))); - return size; + return (Size) CACHELINEALIGN(size); } /* Allocate and initialize walsender-related shared memory */ diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c index e9e4f35bb5..b92fdeef14 100644 --- a/src/backend/storage/buffer/buf_init.c +++ b/src/backend/storage/buffer/buf_init.c @@ -159,15 +159,13 @@ BufferShmemSize(void) Size size = 0; /* size of buffer descriptors */ - size = add_size(size, mul_size(NBuffers, sizeof(BufferDescPadded))); - /* to allow aligning buffer descriptors */ - size = add_size(size, PG_CACHE_LINE_SIZE); + size = add_shmem_aligned_size(size, mul_size(NBuffers, sizeof(BufferDescPadded))); /* size of data pages */ - size = add_size(size, mul_size(NBuffers, BLCKSZ)); + size = add_shmem_aligned_size(size, mul_size(NBuffers, BLCKSZ)); /* size of stuff controlled by freelist.c */ - size = add_size(size, StrategyShmemSize()); + size = add_shmem_aligned_size(size, StrategyShmemSize()); /* * It would be nice to include the I/O locks in the BufferDesc, but that @@ -178,12 +176,10 @@ BufferShmemSize(void) * locks are not highly contended, we lay out the array with minimal * padding. */ - size = add_size(size, mul_size(NBuffers, sizeof(LWLockMinimallyPadded))); - /* to allow aligning the above */ - size = add_size(size, PG_CACHE_LINE_SIZE); + size = add_shmem_aligned_size(size, mul_size(NBuffers, sizeof(LWLockMinimallyPadded))); /* size of checkpoint sort array in bufmgr.c */ - size = add_size(size, mul_size(NBuffers, sizeof(CkptSortItem))); + size = add_shmem_aligned_size(size, mul_size(NBuffers, sizeof(CkptSortItem))); return size; } diff --git a/src/backend/storage/buffer/buf_table.c b/src/backend/storage/buffer/buf_table.c index caa03ae123..4200f99340 100644 --- a/src/backend/storage/buffer/buf_table.c +++ b/src/backend/storage/buffer/buf_table.c @@ -41,7 +41,8 @@ static HTAB *SharedBufHash; Size BufTableShmemSize(int size) { - return hash_estimate_size(size, sizeof(BufferLookupEnt)); + return hash_estimate_size_ext(size, sizeof(BufferLookupEnt), + size, true); } /* diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c index 6be80476db..33e9e74f40 100644 --- a/src/backend/storage/buffer/freelist.c +++ b/src/backend/storage/buffer/freelist.c @@ -459,7 +459,7 @@ StrategyShmemSize(void) size = add_size(size, BufTableShmemSize(NBuffers + NUM_BUFFER_PARTITIONS)); /* size of the shared replacement strategy control block */ - size = add_size(size, MAXALIGN(sizeof(BufferStrategyControl))); + size = add_shmem_aligned_size(size, sizeof(BufferStrategyControl)); return size; } diff --git a/src/backend/storage/ipc/dsm.c b/src/backend/storage/ipc/dsm.c index b461a5f7e9..6ecf195c55 100644 --- a/src/backend/storage/ipc/dsm.c +++ b/src/backend/storage/ipc/dsm.c @@ -440,7 +440,7 @@ dsm_set_control_handle(dsm_handle h) size_t dsm_estimate_size(void) { - return 1024 * 1024 * (size_t) min_dynamic_shared_memory; + return (size_t) CACHELINEALIGN(1024 * 1024 * min_dynamic_shared_memory); } /* diff --git a/src/backend/storage/ipc/pmsignal.c b/src/backend/storage/ipc/pmsignal.c index 280c2395c9..d25080224a 100644 --- a/src/backend/storage/ipc/pmsignal.c +++ b/src/backend/storage/ipc/pmsignal.c @@ -124,7 +124,7 @@ PMSignalShmemSize(void) size = add_size(size, mul_size(MaxLivePostmasterChildren(), sizeof(sig_atomic_t))); - return size; + return (Size) CACHELINEALIGN(size); } /* diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 4fc6ffb917..9b804f1152 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -358,6 +358,7 @@ ProcArrayShmemSize(void) size = offsetof(ProcArrayStruct, pgprocnos); size = add_size(size, mul_size(sizeof(int), PROCARRAY_MAXPROCS)); + size = CACHELINEALIGN(size); /* * During Hot Standby processing we have a data structure called @@ -377,11 +378,8 @@ ProcArrayShmemSize(void) if (EnableHotStandby) { - size = add_size(size, - mul_size(sizeof(TransactionId), - TOTAL_MAX_CACHED_SUBXIDS)); - size = add_size(size, - mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS)); + size = add_shmem_aligned_size(size, mul_size(sizeof(TransactionId), TOTAL_MAX_CACHED_SUBXIDS)); + size = add_shmem_aligned_size(size, mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS)); } return size; diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c index c6a8d4611e..2af7f3e343 100644 --- a/src/backend/storage/ipc/procsignal.c +++ b/src/backend/storage/ipc/procsignal.c @@ -113,7 +113,7 @@ ProcSignalShmemSize(void) size = mul_size(NumProcSignalSlots, sizeof(ProcSignalSlot)); size = add_size(size, offsetof(ProcSignalHeader, psh_slot)); - return size; + return (Size) CACHELINEALIGN(size); } /* diff --git a/src/backend/storage/ipc/sinvaladt.c b/src/backend/storage/ipc/sinvaladt.c index 946bd8e3cb..8379bf12e7 100644 --- a/src/backend/storage/ipc/sinvaladt.c +++ b/src/backend/storage/ipc/sinvaladt.c @@ -207,7 +207,7 @@ SInvalShmemSize(void) size = offsetof(SISeg, procState); size = add_size(size, mul_size(sizeof(ProcState), MaxBackends)); - return size; + return (Size) CACHELINEALIGN(size); } /* diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index 108b4d9023..f2fc8af5c5 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -54,7 +54,7 @@ int max_locks_per_xact; /* set by guc.c */ #define NLOCKENTS() \ - mul_size(max_locks_per_xact, add_size(MaxBackends, max_prepared_xacts)) + mul_size_and_shmem_align(max_locks_per_xact, add_size(MaxBackends, max_prepared_xacts)) /* @@ -3527,17 +3527,27 @@ LockShmemSize(void) /* lock hash table */ max_table_size = NLOCKENTS(); - size = add_size(size, hash_estimate_size(max_table_size, sizeof(LOCK))); + size = add_size(size, hash_estimate_size_ext(max_table_size, sizeof(LOCK), + max_table_size / 2, + true)); /* proclock hash table */ max_table_size *= 2; - size = add_size(size, hash_estimate_size(max_table_size, sizeof(PROCLOCK))); + size = add_size(size, hash_estimate_size_ext(max_table_size, sizeof(PROCLOCK), + max_table_size / 2, + true)); /* * Since NLOCKENTS is only an estimate, add 10% safety margin. */ size = add_size(size, size / 10); + /* + * Note: we don't count fast-path structure, it comes out of the "slop + * factor" added by CreateSharedMemoryAndSemaphores. This lets us use this + * routine again below to compute the actual allocation size. + */ + return size; } diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index 8cb6a6f042..6a47918616 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -456,7 +456,7 @@ LWLockShmemSize(void) /* Disallow adding any more named tranches. */ lock_named_request_allowed = false; - return size; + return (Size) CACHELINEALIGN(size); } /* diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c index d493aeef0f..1dcadc00ae 100644 --- a/src/backend/storage/lmgr/predicate.c +++ b/src/backend/storage/lmgr/predicate.c @@ -1359,13 +1359,17 @@ PredicateLockShmemSize(void) /* predicate lock target hash table */ max_table_size = NPREDICATELOCKTARGETENTS(); - size = add_size(size, hash_estimate_size(max_table_size, - sizeof(PREDICATELOCKTARGET))); + size = add_size(size, hash_estimate_size_ext(max_table_size, + sizeof(PREDICATELOCKTARGET), + max_table_size, + true)); /* predicate lock hash table */ max_table_size *= 2; - size = add_size(size, hash_estimate_size(max_table_size, - sizeof(PREDICATELOCK))); + size = add_size(size, hash_estimate_size_ext(max_table_size, + sizeof(PREDICATELOCK), + max_table_size, + true)); /* * Since NPREDICATELOCKTARGETENTS is only an estimate, add 10% safety @@ -1377,8 +1381,7 @@ PredicateLockShmemSize(void) max_table_size = MaxBackends + max_prepared_xacts; max_table_size *= 10; size = add_size(size, PredXactListDataSize); - size = add_size(size, mul_size((Size) max_table_size, - PredXactListElementDataSize)); + size = add_shmem_aligned_size(size, mul_size((Size) max_table_size, PredXactListElementDataSize)); /* transaction xid table */ size = add_size(size, hash_estimate_size(max_table_size, @@ -1387,14 +1390,13 @@ PredicateLockShmemSize(void) /* rw-conflict pool */ max_table_size *= 5; size = add_size(size, RWConflictPoolHeaderDataSize); - size = add_size(size, mul_size((Size) max_table_size, - RWConflictDataSize)); + size = add_shmem_aligned_size(size, mul_size((Size) max_table_size, RWConflictDataSize)); /* Head for list of finished serializable transactions. */ - size = add_size(size, sizeof(SHM_QUEUE)); + size = add_shmem_aligned_size(size, sizeof(SHM_QUEUE)); /* Shared memory structures for SLRU tracking of old committed xids. */ - size = add_size(size, sizeof(SerialControlData)); + size = add_shmem_aligned_size(size, sizeof(SerialControlData)); size = add_size(size, SimpleLruShmemSize(NUM_SERIAL_BUFFERS, 0)); return size; diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 897045ee27..28031c7d22 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -106,13 +106,13 @@ ProcGlobalShmemSize(void) add_size(MaxBackends, add_size(NUM_AUXILIARY_PROCS, max_prepared_xacts)); /* ProcGlobal */ - size = add_size(size, sizeof(PROC_HDR)); - size = add_size(size, mul_size(TotalProcs, sizeof(PGPROC))); - size = add_size(size, sizeof(slock_t)); + size = add_shmem_aligned_size(size, sizeof(PROC_HDR)); + size = add_shmem_aligned_size(size, mul_size(TotalProcs, sizeof(PGPROC))); + size = add_shmem_aligned_size(size, sizeof(slock_t)); - size = add_size(size, mul_size(TotalProcs, sizeof(*ProcGlobal->xids))); - size = add_size(size, mul_size(TotalProcs, sizeof(*ProcGlobal->subxidStates))); - size = add_size(size, mul_size(TotalProcs, sizeof(*ProcGlobal->statusFlags))); + size = add_shmem_aligned_size(size, mul_size(TotalProcs, sizeof(*ProcGlobal->xids))); + size = add_shmem_aligned_size(size, mul_size(TotalProcs, sizeof(*ProcGlobal->subxidStates))); + size = add_shmem_aligned_size(size, mul_size(TotalProcs, sizeof(*ProcGlobal->statusFlags))); return size; } diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c index 6546e3c7c7..030ca3917c 100644 --- a/src/backend/utils/hash/dynahash.c +++ b/src/backend/utils/hash/dynahash.c @@ -769,23 +769,43 @@ init_htab(HTAB *hashp, long nelem) return true; } +/* + * Wrapper around hash_estimate_size for non partitioned hash table and all + * entries preallocated. + */ +Size +hash_estimate_size(long num_entries, Size entrysize) +{ + return hash_estimate_size_ext(num_entries, entrysize, num_entries, false); +} + /* * Estimate the space needed for a hashtable containing the given number - * of entries of given size. + * of entries of given size, and given it's partitioned or not. * NOTE: this is used to estimate the footprint of hashtables in shared * memory; therefore it does not count HTAB which is in local memory. * NB: assumes that all hash structure parameters have default values! */ Size -hash_estimate_size(long num_entries, Size entrysize) +hash_estimate_size_ext(long num_entries, Size entrysize, long init_entries, + bool partitioned) { - Size size; + Size size, + segmentSize, + freelistAllocSize; long nBuckets, nSegments, nDirEntries, - nElementAllocs, elementSize, - elementAllocCnt; + nelemFreelist; + int nFreelists; + + if (partitioned) + nFreelists = NUM_FREELISTS; + else + nFreelists = 1; + + Assert(init_entries <= num_entries); /* estimate number of buckets wanted */ nBuckets = next_pow2_long(num_entries); @@ -797,19 +817,36 @@ hash_estimate_size(long num_entries, Size entrysize) nDirEntries <<= 1; /* dir_alloc doubles dsize at each call */ /* fixed control info */ - size = MAXALIGN(sizeof(HASHHDR)); /* but not HTAB, per above */ + size = CACHELINEALIGN(sizeof(HASHHDR)); /* but not HTAB, per above */ /* directory */ - size = add_size(size, mul_size(nDirEntries, sizeof(HASHSEGMENT))); + size = add_shmem_aligned_size(size, mul_size(nDirEntries, sizeof(HASHSEGMENT))); /* segments */ - size = add_size(size, mul_size(nSegments, - MAXALIGN(DEF_SEGSIZE * sizeof(HASHBUCKET)))); - /* elements --- allocated in groups of choose_nelem_alloc() entries */ - elementAllocCnt = choose_nelem_alloc(entrysize); - nElementAllocs = (num_entries - 1) / elementAllocCnt + 1; + segmentSize = CACHELINEALIGN(DEF_SEGSIZE * sizeof(HASHBUCKET)); + size = add_size(size, mul_size(nSegments, segmentSize)); + elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(entrysize); - size = add_size(size, - mul_size(nElementAllocs, - mul_size(elementAllocCnt, elementSize))); + + /* + * elements --- allocated in groups of choose_nelem_alloc() entries, except + * for the first init_entries, allocated in num_freelists groups. + */ + nelemFreelist = init_entries / nFreelists; + freelistAllocSize = mul_size_and_shmem_align(nelemFreelist, elementSize); + + size = add_size(size, mul_size(nFreelists, freelistAllocSize)); + + if (num_entries > init_entries) + { + Size nelem, + nAlloc, + allocSize; + + nelem = choose_nelem_alloc(entrysize); + nAlloc = (num_entries - init_entries - 1) / nelem + 1; + allocSize = mul_size_and_shmem_align(nelem, elementSize); + + size = add_size(size, mul_size(nAlloc, allocSize)); + } return size; } @@ -852,7 +889,7 @@ hash_get_shared_size(HASHCTL *info, int flags) { Assert(flags & HASH_DIRSIZE); Assert(info->dsize == info->max_dsize); - return sizeof(HASHHDR) + info->dsize * sizeof(HASHSEGMENT); + return (Size) CACHELINEALIGN(sizeof(HASHHDR) + info->dsize * sizeof(HASHSEGMENT)); } diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index 95704265b6..b66278e466 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -203,7 +203,7 @@ SnapMgrShmemSize(void) size = add_size(size, mul_size(sizeof(TransactionId), OLD_SNAPSHOT_TIME_MAP_ENTRIES)); - return size; + return (Size) CACHELINEALIGN(size); } /* diff --git a/src/include/storage/predicate_internals.h b/src/include/storage/predicate_internals.h index 104f560d38..bbc6ca4cbf 100644 --- a/src/include/storage/predicate_internals.h +++ b/src/include/storage/predicate_internals.h @@ -192,7 +192,7 @@ typedef struct PredXactListData typedef struct PredXactListData *PredXactList; #define PredXactListDataSize \ - ((Size)MAXALIGN(sizeof(PredXactListData))) + ((Size)CACHELINEALIGN(sizeof(PredXactListData))) /* @@ -227,7 +227,7 @@ typedef struct RWConflictPoolHeaderData typedef struct RWConflictPoolHeaderData *RWConflictPoolHeader; #define RWConflictPoolHeaderDataSize \ - ((Size)MAXALIGN(sizeof(RWConflictPoolHeaderData))) + ((Size)CACHELINEALIGN(sizeof(RWConflictPoolHeaderData))) /* diff --git a/src/include/storage/shmem.h b/src/include/storage/shmem.h index 31024e5a50..897672bc8e 100644 --- a/src/include/storage/shmem.h +++ b/src/include/storage/shmem.h @@ -45,6 +45,19 @@ extern void *ShmemInitStruct(const char *name, Size size, bool *foundPtr); extern Size add_size(Size s1, Size s2); extern Size mul_size(Size s1, Size s2); +/* + * Macro around add_size and mul_size to account for extra padding as done in + * ShmemAlloc(). + * + * add_shmem_aligned_size() will add the padding bytes to the 2nd argument + * mul_size_and_shmem_align will return the aligned size + */ +#define add_shmem_aligned_size(s1, s2) add_size((s1), \ + (Size) CACHELINEALIGN(s2)) + +#define mul_size_and_shmem_align(s1, s2) add_shmem_aligned_size(0, \ + mul_size((s1), (s2))) + /* ipci.c */ extern void RequestAddinShmemSpace(Size size); diff --git a/src/include/utils/hsearch.h b/src/include/utils/hsearch.h index d7af0239c8..20c0ff3036 100644 --- a/src/include/utils/hsearch.h +++ b/src/include/utils/hsearch.h @@ -145,6 +145,8 @@ extern void *hash_seq_search(HASH_SEQ_STATUS *status); extern void hash_seq_term(HASH_SEQ_STATUS *status); extern void hash_freeze(HTAB *hashp); extern Size hash_estimate_size(long num_entries, Size entrysize); +extern Size hash_estimate_size_ext(long num_entries, Size entrysize, + long init_entries, bool num_freelists); extern long hash_select_dirsize(long num_entries); extern Size hash_get_shared_size(HASHCTL *info, int flags); extern void AtEOXact_HashTables(bool isCommit); -- 2.30.1