Sorry for the noise.  I fixed x86-64 builds in v2.

-- 
nathan
>From 50debef2733eeaae85031a005a48b1d645da072d Mon Sep 17 00:00:00 2001
From: Nathan Bossart <[email protected]>
Date: Thu, 2 Oct 2025 21:22:17 -0500
Subject: [PATCH v2 1/2] prepare bufmgr for simd

---
 src/backend/storage/buffer/bufmgr.c | 35 +++++++++++++++--------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c 
b/src/backend/storage/buffer/bufmgr.c
index fe470de63f2..14222516237 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -213,10 +213,12 @@ static BufferDesc *PinCountWaitBuf = NULL;
  * because in some scenarios it's called with a spinlock held...
  */
 static struct PrivateRefCountEntry 
PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES];
+static Buffer PrivateRefCountArrayBuffers[REFCOUNT_ARRAY_ENTRIES];
 static HTAB *PrivateRefCountHash = NULL;
 static int32 PrivateRefCountOverflowed = 0;
 static uint32 PrivateRefCountClock = 0;
 static PrivateRefCountEntry *ReservedRefCountEntry = NULL;
+static int     ReservedRefCountEntryIdx;
 
 static uint32 MaxProportionalPins;
 
@@ -267,17 +269,12 @@ ReservePrivateRefCountEntry(void)
         * majority of cases.
         */
        {
-               int                     i;
-
-               for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
+               for (int i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
                {
-                       PrivateRefCountEntry *res;
-
-                       res = &PrivateRefCountArray[i];
-
-                       if (res->buffer == InvalidBuffer)
+                       if (PrivateRefCountArrayBuffers[i] == InvalidBuffer)
                        {
-                               ReservedRefCountEntry = res;
+                               ReservedRefCountEntry = 
&PrivateRefCountArray[i];
+                               ReservedRefCountEntryIdx = i;
                                return;
                        }
                }
@@ -296,8 +293,8 @@ ReservePrivateRefCountEntry(void)
                bool            found;
 
                /* select victim slot */
-               ReservedRefCountEntry =
-                       &PrivateRefCountArray[PrivateRefCountClock++ % 
REFCOUNT_ARRAY_ENTRIES];
+               ReservedRefCountEntryIdx = PrivateRefCountClock++ % 
REFCOUNT_ARRAY_ENTRIES;
+               ReservedRefCountEntry = 
&PrivateRefCountArray[ReservedRefCountEntryIdx];
 
                /* Better be used, otherwise we shouldn't get here. */
                Assert(ReservedRefCountEntry->buffer != InvalidBuffer);
@@ -312,6 +309,7 @@ ReservePrivateRefCountEntry(void)
 
                /* clear the now free array slot */
                ReservedRefCountEntry->buffer = InvalidBuffer;
+               PrivateRefCountArrayBuffers[ReservedRefCountEntryIdx] = 
InvalidBuffer;
                ReservedRefCountEntry->refcount = 0;
 
                PrivateRefCountOverflowed++;
@@ -335,6 +333,7 @@ NewPrivateRefCountEntry(Buffer buffer)
 
        /* and fill it */
        res->buffer = buffer;
+       PrivateRefCountArrayBuffers[ReservedRefCountEntryIdx] = buffer;
        res->refcount = 0;
 
        return res;
@@ -351,7 +350,6 @@ static PrivateRefCountEntry *
 GetPrivateRefCountEntry(Buffer buffer, bool do_move)
 {
        PrivateRefCountEntry *res;
-       int                     i;
 
        Assert(BufferIsValid(buffer));
        Assert(!BufferIsLocal(buffer));
@@ -360,12 +358,10 @@ GetPrivateRefCountEntry(Buffer buffer, bool do_move)
         * First search for references in the array, that'll be sufficient in 
the
         * majority of cases.
         */
-       for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
+       for (int i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
        {
-               res = &PrivateRefCountArray[i];
-
-               if (res->buffer == buffer)
-                       return res;
+               if (PrivateRefCountArrayBuffers[i] == buffer)
+                       return &PrivateRefCountArray[i];
        }
 
        /*
@@ -404,6 +400,7 @@ GetPrivateRefCountEntry(Buffer buffer, bool do_move)
 
                /* and fill it */
                free->buffer = buffer;
+               PrivateRefCountArrayBuffers[ReservedRefCountEntryIdx] = buffer;
                free->refcount = res->refcount;
 
                /* delete from hashtable */
@@ -460,6 +457,9 @@ ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref)
                 * entries.
                 */
                ReservedRefCountEntry = ref;
+
+               ReservedRefCountEntryIdx = ref - &PrivateRefCountArray[0];
+               PrivateRefCountArrayBuffers[ReservedRefCountEntryIdx] = 
InvalidBuffer;
        }
        else
        {
@@ -3993,6 +3993,7 @@ InitBufferManagerAccess(void)
        MaxProportionalPins = NBuffers / (MaxBackends + NUM_AUXILIARY_PROCS);
 
        memset(&PrivateRefCountArray, 0, sizeof(PrivateRefCountArray));
+       memset(&PrivateRefCountArrayBuffers, InvalidBuffer, 
sizeof(PrivateRefCountArrayBuffers));
 
        hash_ctl.keysize = sizeof(int32);
        hash_ctl.entrysize = sizeof(PrivateRefCountEntry);
-- 
2.39.5 (Apple Git-154)

>From 8643d8ee4d77026dd21aeb3d669c211691bee951 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <[email protected]>
Date: Thu, 2 Oct 2025 22:27:43 -0500
Subject: [PATCH v2 2/2] simd-ify GetPrivateRefCountEntry()

---
 src/backend/storage/buffer/bufmgr.c | 45 +++++++++++++++++++++++++++++
 src/include/port/simd.h             | 25 ++++++++++++++++
 2 files changed, 70 insertions(+)

diff --git a/src/backend/storage/buffer/bufmgr.c 
b/src/backend/storage/buffer/bufmgr.c
index 14222516237..8257fde22fd 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -50,6 +50,8 @@
 #include "miscadmin.h"
 #include "pg_trace.h"
 #include "pgstat.h"
+#include "port/pg_bitutils.h"
+#include "port/simd.h"
 #include "postmaster/bgwriter.h"
 #include "storage/aio.h"
 #include "storage/buf_internals.h"
@@ -358,11 +360,54 @@ GetPrivateRefCountEntry(Buffer buffer, bool do_move)
         * First search for references in the array, that'll be sufficient in 
the
         * majority of cases.
         */
+#ifndef USE_NO_SIMD
+
+       {
+               const Vector32 keys = vector32_broadcast(buffer);
+               Vector32        bufs1;
+               Vector32        bufs2;
+               Vector32        res1;
+               Vector32        res2;
+               Vector32        res;
+               uint32          msk;
+
+#ifdef USE_ASSERT_CHECKING
+               PrivateRefCountEntry *ret = NULL;
+
+               for (int i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
+               {
+                       if (PrivateRefCountArrayBuffers[i] == buffer)
+                       {
+                               ret = &PrivateRefCountArray[i];
+                               break;
+                       }
+               }
+#endif
+
+               vector32_load(&bufs1, (uint32 *) 
&PrivateRefCountArrayBuffers[0]);
+               vector32_load(&bufs2, (uint32 *) 
&PrivateRefCountArrayBuffers[sizeof(Vector32) / sizeof(int)]);
+
+               res1 = vector32_eq(bufs1, keys);
+               res2 = vector32_eq(bufs2, keys);
+               res = vector32_pack_32(res1, res2);
+
+               msk = vector32_highbit_mask(res);
+               if (likely(msk))
+               {
+                       Assert(ret == 
&PrivateRefCountArray[pg_rightmost_one_pos32(msk) / 2]);
+                       return 
&PrivateRefCountArray[pg_rightmost_one_pos32(msk) / 2];
+               }
+               else
+                       Assert(ret == NULL);
+       }
+
+#else
        for (int i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
        {
                if (PrivateRefCountArrayBuffers[i] == buffer)
                        return &PrivateRefCountArray[i];
        }
+#endif
 
        /*
         * By here we know that the buffer, if already pinned, isn't residing in
diff --git a/src/include/port/simd.h b/src/include/port/simd.h
index 5f5737707a8..e1cb1d85fbc 100644
--- a/src/include/port/simd.h
+++ b/src/include/port/simd.h
@@ -332,6 +332,18 @@ vector8_highbit_mask(const Vector8 v)
 }
 #endif                                                 /* ! USE_NO_SIMD */
 
+#ifndef USE_NO_SIMD
+static inline uint32
+vector32_highbit_mask(const Vector32 v)
+{
+#ifdef USE_NEON
+       return vector8_highbit_mask((Vector8) v);
+#else
+       return vector8_highbit_mask(v);
+#endif
+}
+#endif                                                 /* ! USE_NO_SIMD */
+
 /*
  * Return the bitwise OR of the inputs
  */
@@ -402,4 +414,17 @@ vector8_min(const Vector8 v1, const Vector8 v2)
 }
 #endif                                                 /* ! USE_NO_SIMD */
 
+
+#ifndef USE_NO_SIMD
+static inline Vector32
+vector32_pack_32(const Vector32 v1, const Vector32 v2)
+{
+#ifdef USE_SSE2
+       return _mm_packs_epi32(v1, v2);
+#elif defined(USE_NEON)
+       return (Vector32) vuzp1q_u16((uint16x8_t) v1, (uint16x8_t) v2);
+#endif
+}
+#endif                                                 /* ! USE_NO_SIMD */
+
 #endif                                                 /* SIMD_H */
-- 
2.39.5 (Apple Git-154)

Reply via email to