On 2016-04-10 09:03:37 +0300, Alexander Korotkov wrote:
> On Sun, Apr 10, 2016 at 8:36 AM, Alexander Korotkov <
> a.korot...@postgrespro.ru> wrote:
> 
> > On Sat, Apr 9, 2016 at 10:49 PM, Andres Freund <and...@anarazel.de> wrote:
> >
> >>
> >>
> >> On April 9, 2016 12:43:03 PM PDT, Andres Freund <and...@anarazel.de>
> >> wrote:
> >> >On 2016-04-09 22:38:31 +0300, Alexander Korotkov wrote:
> >> >> There are results with 5364b357 reverted.
> >> >
> >> >Crazy that this has such a negative impact. Amit, can you reproduce
> >> >that? Alexander, I guess for r/w workload 5364b357 is a benefit on that
> >> >machine as well?
> >>
> >> How sure are you about these measurements?
> >
> >
> > I'm pretty sure.  I've retried it multiple times by hand before re-run the
> > script.
> >
> >
> >> Because there really shouldn't be clog lookups one a steady state is
> >> reached...
> >>
> >
> > Hm... I'm also surprised. There shouldn't be clog lookups once hint bits
> > are set.
> >
> 
> I also tried to run perf top during pgbench and get some interesting
> results.
> 
> Without 5364b357:
>    5,69%  postgres                 [.] GetSnapshotData
>    4,47%  postgres                 [.] LWLockAttemptLock
>    3,81%  postgres                 [.] _bt_compare
>    3,42%  postgres                 [.] hash_search_with_hash_value
>    3,08%  postgres                 [.] LWLockRelease
>    2,49%  postgres                 [.] PinBuffer.isra.3
>    1,58%  postgres                 [.] AllocSetAlloc
>    1,17%  [kernel]                 [k] __schedule
>    1,15%  postgres                 [.] PostgresMain
>    1,13%  libc-2.17.so             [.] vfprintf
>    1,01%  libc-2.17.so             [.] __memcpy_ssse3_back
> 
> With 5364b357:
>   18,54%  postgres                 [.] GetSnapshotData
>    3,45%  postgres                 [.] LWLockRelease
>    3,27%  postgres                 [.] LWLockAttemptLock
>    3,21%  postgres                 [.] _bt_compare
>    2,93%  postgres                 [.] hash_search_with_hash_value
>    2,00%  postgres                 [.] PinBuffer.isra.3
>    1,32%  postgres                 [.] AllocSetAlloc
>    1,10%  libc-2.17.so             [.] vfprintf
> 
> Very surprising.  It appears that after 5364b357, GetSnapshotData consumes
> more time.  But I can't see anything depending on clog buffers
> in GetSnapshotData code...

Could you retry after applying the attached series of patches?

- Andres
>From e8ad791c97004c64a1f27a500ba100b69fdc8d87 Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Sun, 10 Apr 2016 21:47:04 -0700
Subject: [PATCH 1/3] Finish 09adc9a8c09c9640de05c7023b27fb83c761e91c.

---
 src/backend/access/transam/parallel.c |  2 +-
 src/backend/port/sysv_shmem.c         |  2 +-
 src/backend/port/win32_shmem.c        |  2 +-
 src/backend/storage/buffer/buf_init.c | 19 ++++++-------------
 src/backend/storage/ipc/shm_toc.c     |  6 +++---
 src/backend/storage/ipc/shmem.c       | 17 ++++++++++-------
 src/include/c.h                       |  2 --
 src/include/pg_config_manual.h        |  8 --------
 src/include/storage/shm_toc.h         |  2 +-
 9 files changed, 23 insertions(+), 37 deletions(-)

diff --git a/src/backend/access/transam/parallel.c b/src/backend/access/transam/parallel.c
index 0bba9a7..f8ea89c 100644
--- a/src/backend/access/transam/parallel.c
+++ b/src/backend/access/transam/parallel.c
@@ -237,7 +237,7 @@ InitializeParallelDSM(ParallelContext *pcxt)
 		shm_toc_estimate_keys(&pcxt->estimator, 6);
 
 		/* Estimate space need for error queues. */
-		StaticAssertStmt(BUFFERALIGN(PARALLEL_ERROR_QUEUE_SIZE) ==
+		StaticAssertStmt(CACHELINEALIGN(PARALLEL_ERROR_QUEUE_SIZE) ==
 						 PARALLEL_ERROR_QUEUE_SIZE,
 						 "parallel error queue size not buffer-aligned");
 		shm_toc_estimate_chunk(&pcxt->estimator,
diff --git a/src/backend/port/sysv_shmem.c b/src/backend/port/sysv_shmem.c
index 6c442b9..084bc31 100644
--- a/src/backend/port/sysv_shmem.c
+++ b/src/backend/port/sysv_shmem.c
@@ -559,7 +559,7 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port,
 	 * Initialize space allocation status for segment.
 	 */
 	hdr->totalsize = size;
-	hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
+	hdr->freeoffset = CACHELINEALIGN(sizeof(PGShmemHeader));
 	*shim = hdr;
 
 	/* Save info for possible future use */
diff --git a/src/backend/port/win32_shmem.c b/src/backend/port/win32_shmem.c
index 0ff2c7e..81705fc 100644
--- a/src/backend/port/win32_shmem.c
+++ b/src/backend/port/win32_shmem.c
@@ -241,7 +241,7 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port,
 	 * Initialize space allocation status for segment.
 	 */
 	hdr->totalsize = size;
-	hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
+	hdr->freeoffset = CACHELINEALIGN(sizeof(PGShmemHeader));
 	hdr->dsm_control = 0;
 
 	/* Save info for possible future use */
diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c
index a5cffc7..61f9c34 100644
--- a/src/backend/storage/buffer/buf_init.c
+++ b/src/backend/storage/buffer/buf_init.c
@@ -76,11 +76,9 @@ InitBufferPool(void)
 
 	/* Align descriptors to a cacheline boundary. */
 	BufferDescriptors = (BufferDescPadded *)
-		CACHELINEALIGN(
-					   ShmemInitStruct("Buffer Descriptors",
-									   NBuffers * sizeof(BufferDescPadded)
-									   + PG_CACHE_LINE_SIZE,
-									   &foundDescs));
+		ShmemInitStruct("Buffer Descriptors",
+						NBuffers * sizeof(BufferDescPadded),
+						&foundDescs);
 
 	BufferBlocks = (char *)
 		ShmemInitStruct("Buffer Blocks",
@@ -88,10 +86,9 @@ InitBufferPool(void)
 
 	/* Align lwlocks to cacheline boundary */
 	BufferIOLWLockArray = (LWLockMinimallyPadded *)
-		CACHELINEALIGN(ShmemInitStruct("Buffer IO Locks",
-							  NBuffers * (Size) sizeof(LWLockMinimallyPadded)
-									   + PG_CACHE_LINE_SIZE,
-									   &foundIOLocks));
+		ShmemInitStruct("Buffer IO Locks",
+						NBuffers * (Size) sizeof(LWLockMinimallyPadded),
+						&foundIOLocks);
 
 	BufferIOLWLockTranche.name = "buffer_io";
 	BufferIOLWLockTranche.array_base = BufferIOLWLockArray;
@@ -179,8 +176,6 @@ BufferShmemSize(void)
 
 	/* size of buffer descriptors */
 	size = add_size(size, mul_size(NBuffers, sizeof(BufferDescPadded)));
-	/* to allow aligning buffer descriptors */
-	size = add_size(size, PG_CACHE_LINE_SIZE);
 
 	/* size of data pages */
 	size = add_size(size, mul_size(NBuffers, BLCKSZ));
@@ -197,8 +192,6 @@ BufferShmemSize(void)
 	 * not highly contentended, we lay out the array with minimal padding.
 	 */
 	size = add_size(size, mul_size(NBuffers, sizeof(LWLockMinimallyPadded)));
-	/* to allow aligning the above */
-	size = add_size(size, PG_CACHE_LINE_SIZE);
 
 	/* size of checkpoint sort array in bufmgr.c */
 	size = add_size(size, mul_size(NBuffers, sizeof(CkptSortItem)));
diff --git a/src/backend/storage/ipc/shm_toc.c b/src/backend/storage/ipc/shm_toc.c
index 55248c2..683abc4 100644
--- a/src/backend/storage/ipc/shm_toc.c
+++ b/src/backend/storage/ipc/shm_toc.c
@@ -89,7 +89,7 @@ shm_toc_allocate(shm_toc *toc, Size nbytes)
 	Size		toc_bytes;
 
 	/* Make sure request is well-aligned. */
-	nbytes = BUFFERALIGN(nbytes);
+	nbytes = CACHELINEALIGN(nbytes);
 
 	SpinLockAcquire(&toc->toc_mutex);
 
@@ -133,8 +133,8 @@ shm_toc_freespace(shm_toc *toc)
 	SpinLockRelease(&toc->toc_mutex);
 
 	toc_bytes = offsetof(shm_toc, toc_entry) +nentry * sizeof(shm_toc_entry);
-	Assert(allocated_bytes + BUFFERALIGN(toc_bytes) <= total_bytes);
-	return total_bytes - (allocated_bytes + BUFFERALIGN(toc_bytes));
+	Assert(allocated_bytes + CACHELINEALIGN(toc_bytes) <= total_bytes);
+	return total_bytes - (allocated_bytes + CACHELINEALIGN(toc_bytes));
 }
 
 /*
diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c
index 1ad68cd..ae26d4b 100644
--- a/src/backend/storage/ipc/shmem.c
+++ b/src/backend/storage/ipc/shmem.c
@@ -125,7 +125,7 @@ InitShmemAllocation(void)
 		PGSemaphore spinsemas;
 
 		spinsemas = (PGSemaphore) (((char *) shmhdr) + shmhdr->freeoffset);
-		shmhdr->freeoffset += MAXALIGN(SpinlockSemaSize());
+		shmhdr->freeoffset += CACHELINEALIGN(SpinlockSemaSize());
 		SpinlockSemaInit(spinsemas);
 		Assert(shmhdr->freeoffset <= shmhdr->totalsize);
 	}
@@ -136,7 +136,7 @@ InitShmemAllocation(void)
 	 * way, too, for the same reasons as above.
 	 */
 	ShmemLock = (slock_t *) (((char *) shmhdr) + shmhdr->freeoffset);
-	shmhdr->freeoffset += MAXALIGN(sizeof(slock_t));
+	shmhdr->freeoffset += CACHELINEALIGN(sizeof(slock_t));
 	Assert(shmhdr->freeoffset <= shmhdr->totalsize);
 
 	SpinLockInit(ShmemLock);
@@ -179,7 +179,9 @@ ShmemAlloc(Size size)
 	 * boundary.  The calling code will still need to be careful about how it
 	 * uses the allocated space - e.g. by padding each element in an array of
 	 * structures out to a power-of-two size - but without this, even that
-	 * won't be sufficient.
+	 * won't be sufficient.  We take care that freeoffset initially is
+	 * adequately aligned, so aligning the size for all allocations guarantees
+	 * allocations are aligned.
 	 */
 	size = CACHELINEALIGN(size);
 
@@ -189,10 +191,6 @@ ShmemAlloc(Size size)
 
 	newStart = ShmemSegHdr->freeoffset;
 
-	/* extra alignment for large requests, since they are probably buffers */
-	if (size >= BLCKSZ)
-		newStart = BUFFERALIGN(newStart);
-
 	newFree = newStart + size;
 	if (newFree <= ShmemSegHdr->totalsize)
 	{
@@ -209,6 +207,8 @@ ShmemAlloc(Size size)
 				(errcode(ERRCODE_OUT_OF_MEMORY),
 				 errmsg("out of shared memory")));
 
+	Assert(newSpace == (void *) CACHELINEALIGN(newSpace));
+
 	return newSpace;
 }
 
@@ -425,6 +425,9 @@ ShmemInitStruct(const char *name, Size size, bool *foundPtr)
 	LWLockRelease(ShmemIndexLock);
 
 	Assert(ShmemAddrIsValid(structPtr));
+
+	Assert(structPtr == (void *) CACHELINEALIGN(structPtr));
+
 	return structPtr;
 }
 
diff --git a/src/include/c.h b/src/include/c.h
index 7c57430..fb526b7 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -567,8 +567,6 @@ typedef NameData *Name;
 #define LONGALIGN(LEN)			TYPEALIGN(ALIGNOF_LONG, (LEN))
 #define DOUBLEALIGN(LEN)		TYPEALIGN(ALIGNOF_DOUBLE, (LEN))
 #define MAXALIGN(LEN)			TYPEALIGN(MAXIMUM_ALIGNOF, (LEN))
-/* MAXALIGN covers only built-in types, not buffers */
-#define BUFFERALIGN(LEN)		TYPEALIGN(ALIGNOF_BUFFER, (LEN))
 #define CACHELINEALIGN(LEN)		TYPEALIGN(PG_CACHE_LINE_SIZE, (LEN))
 
 #define TYPEALIGN_DOWN(ALIGNVAL,LEN)  \
diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h
index ef89521..8f59b53 100644
--- a/src/include/pg_config_manual.h
+++ b/src/include/pg_config_manual.h
@@ -107,14 +107,6 @@
 #define BITS_PER_BYTE		8
 
 /*
- * Preferred alignment for disk I/O buffers.  On some CPUs, copies between
- * user space and kernel space are significantly faster if the user buffer
- * is aligned on a larger-than-MAXALIGN boundary.  Ideally this should be
- * a platform-dependent value, but for now we just hard-wire it.
- */
-#define ALIGNOF_BUFFER	32
-
-/*
  * Disable UNIX sockets for certain operating systems.
  */
 #if defined(WIN32)
diff --git a/src/include/storage/shm_toc.h b/src/include/storage/shm_toc.h
index 6822f91..2365b02 100644
--- a/src/include/storage/shm_toc.h
+++ b/src/include/storage/shm_toc.h
@@ -48,7 +48,7 @@ typedef struct
 	((e)->space_for_chunks = 0, (e)->number_of_keys = 0)
 #define shm_toc_estimate_chunk(e, sz) \
 	((e)->space_for_chunks = add_size((e)->space_for_chunks, \
-		BUFFERALIGN((sz))))
+		CACHELINEALIGN((sz))))
 #define shm_toc_estimate_keys(e, cnt) \
 	((e)->number_of_keys = add_size((e)->number_of_keys, (cnt)))
 
-- 
2.7.0.229.g701fa7f

>From 5a19816c3b4268d4e2279119e1c7c4272bad73c9 Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Sun, 10 Apr 2016 21:47:14 -0700
Subject: [PATCH 2/3] Align individual parts of an slru.

---
 src/backend/access/transam/slru.c | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index 36a011c..71b6d16 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -146,18 +146,18 @@ SimpleLruShmemSize(int nslots, int nlsns)
 	Size		sz;
 
 	/* we assume nslots isn't so large as to risk overflow */
-	sz = MAXALIGN(sizeof(SlruSharedData));
-	sz += MAXALIGN(nslots * sizeof(char *));	/* page_buffer[] */
-	sz += MAXALIGN(nslots * sizeof(SlruPageStatus));	/* page_status[] */
-	sz += MAXALIGN(nslots * sizeof(bool));		/* page_dirty[] */
-	sz += MAXALIGN(nslots * sizeof(int));		/* page_number[] */
-	sz += MAXALIGN(nslots * sizeof(int));		/* page_lru_count[] */
-	sz += MAXALIGN(nslots * sizeof(LWLockPadded)); /* buffer_locks[] */
+	sz = CACHELINEALIGN(sizeof(SlruSharedData));
+	sz += CACHELINEALIGN(nslots * sizeof(char *));	/* page_buffer[] */
+	sz += CACHELINEALIGN(nslots * sizeof(SlruPageStatus));	/* page_status[] */
+	sz += CACHELINEALIGN(nslots * sizeof(bool));		/* page_dirty[] */
+	sz += CACHELINEALIGN(nslots * sizeof(int));		/* page_number[] */
+	sz += CACHELINEALIGN(nslots * sizeof(int));		/* page_lru_count[] */
+	sz += CACHELINEALIGN(nslots * sizeof(LWLockPadded)); /* buffer_locks[] */
 
 	if (nlsns > 0)
-		sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));	/* group_lsn[] */
+		sz += CACHELINEALIGN(nslots * nlsns * sizeof(XLogRecPtr));	/* group_lsn[] */
 
-	return BUFFERALIGN(sz) + BLCKSZ * nslots;
+	return CACHELINEALIGN(sz) + BLCKSZ * nslots;
 }
 
 void
@@ -192,22 +192,22 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
 		/* shared->latest_page_number will be set later */
 
 		ptr = (char *) shared;
-		offset = MAXALIGN(sizeof(SlruSharedData));
+		offset = CACHELINEALIGN(sizeof(SlruSharedData));
 		shared->page_buffer = (char **) (ptr + offset);
-		offset += MAXALIGN(nslots * sizeof(char *));
+		offset += CACHELINEALIGN(nslots * sizeof(char *));
 		shared->page_status = (SlruPageStatus *) (ptr + offset);
-		offset += MAXALIGN(nslots * sizeof(SlruPageStatus));
+		offset += CACHELINEALIGN(nslots * sizeof(SlruPageStatus));
 		shared->page_dirty = (bool *) (ptr + offset);
-		offset += MAXALIGN(nslots * sizeof(bool));
+		offset += CACHELINEALIGN(nslots * sizeof(bool));
 		shared->page_number = (int *) (ptr + offset);
-		offset += MAXALIGN(nslots * sizeof(int));
+		offset += CACHELINEALIGN(nslots * sizeof(int));
 		shared->page_lru_count = (int *) (ptr + offset);
-		offset += MAXALIGN(nslots * sizeof(int));
+		offset += CACHELINEALIGN(nslots * sizeof(int));
 
 		if (nlsns > 0)
 		{
 			shared->group_lsn = (XLogRecPtr *) (ptr + offset);
-			offset += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));
+			offset += CACHELINEALIGN(nslots * nlsns * sizeof(XLogRecPtr));
 		}
 
 		/* Initialize LWLocks */
@@ -220,7 +220,7 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
 		shared->lwlock_tranche.array_base = shared->buffer_locks;
 		shared->lwlock_tranche.array_stride = sizeof(LWLockPadded);
 
-		ptr += BUFFERALIGN(offset);
+		ptr += CACHELINEALIGN(offset);
 		for (slotno = 0; slotno < nslots; slotno++)
 		{
 			LWLockInitialize(&shared->buffer_locks[slotno].lock,
-- 
2.7.0.229.g701fa7f

>From 956044e7af1ee543c2e76d05b3367109db562477 Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Sun, 10 Apr 2016 21:59:49 -0700
Subject: [PATCH 3/3] WIP: Force PGXACT stride to be 16 instead of 12 bytes.

The previous solution lead to PGXACTs split across cachelines.
---
 src/include/storage/proc.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index c3b462c..d2cd71d 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -204,6 +204,8 @@ typedef struct PGXACT
 								 * previously called InCommit */
 
 	uint8		nxids;
+
+	uint32		pad;			/* FIXME: proper padding solution */
 } PGXACT;
 
 /*
-- 
2.7.0.229.g701fa7f

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to