On Tue, Feb 18, 2025 at 1:40 PM John Naylor <johncnaylo...@gmail.com> wrote:
>
> On Tue, Feb 18, 2025 at 12:41 AM Nathan Bossart
> <nathandboss...@gmail.com> wrote:

> > While this needn't block this patch set, I do find the dispatch code to be
> > pretty complicated.  Maybe we can improve that in the future by using
> > macros to auto-generate much of it.  My concern here is less about this
> > particular patch set and more about the long term maintainability as we add
> > more and more stuff like it, each with its own tangled web of build and
> > dispatch rules.

I had a further thought on this: CRC and non-vector popcount are kind
of special in that recent OSes assume they exist, and it's worth a bit
of effort to take advantage of that. Other things we may add should be
kept as simple as possible.

> - Rename the CRC choose*.c files to pg_cpucap_{x86,arm}.c and build
> them unconditionally for each platform
> - Initialize the runtime info by CPU platform and not other symbols
> where possible (I guess anything needing AVX-512 will still be a mess)

I've made a start of this for v8:

0001 is mostly the same as before
0002 (Meson-only for now) changes 0001 per the above, to see how it
looks, but I've not tried to add popcount or anything else. I like it
overall, but some details may need tweaking.
0004 generates the pclmul loop slightly differently to simplify
integrating with our code, but shouldn't make a big difference

Another thing I found in Agner's manuals: AMD Zen, even as recently as
Zen 4, don't have as good a microarchitecture for PCLMUL, so if anyone
with such a machine would like to help test the cutoff, the script is
at

https://www.postgresql.org/message-id/CANWCAZahvhE-%2BhtZiUyzPiS5e45ukx5877mD-dHr-KSX6LcdjQ%40mail.gmail.com

(needs "CREATE EXTENSION test_crc32c;" to run it)

--
John Naylor
Amazon Web Services
From d704f3f76ba555e0c0ad8c3cfc2d953ea4baa162 Mon Sep 17 00:00:00 2001
From: John Naylor <john.nay...@postgresql.org>
Date: Sat, 15 Feb 2025 19:18:16 +0700
Subject: [PATCH v8 1/4] Dispatch CRC computation by branching rather than
 indirect calls

---
 src/backend/postmaster/postmaster.c |  4 ++
 src/include/port/pg_cpucap.h        | 25 +++++++++
 src/include/port/pg_crc32c.h        | 78 +++++++++++++++++++++--------
 src/port/Makefile                   |  1 +
 src/port/meson.build                |  4 ++
 src/port/pg_cpucap.c                | 51 +++++++++++++++++++
 src/port/pg_crc32c_armv8_choose.c   | 26 +---------
 src/port/pg_crc32c_sse42_choose.c   | 26 +---------
 8 files changed, 145 insertions(+), 70 deletions(-)
 create mode 100644 src/include/port/pg_cpucap.h
 create mode 100644 src/port/pg_cpucap.c

diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 5dd3b6a4fd4..43e35f8041f 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -99,6 +99,7 @@
 #include "pg_getopt.h"
 #include "pgstat.h"
 #include "port/pg_bswap.h"
+#include "port/pg_cpucap.h"
 #include "postmaster/autovacuum.h"
 #include "postmaster/bgworker_internals.h"
 #include "postmaster/pgarch.h"
@@ -1951,6 +1952,9 @@ InitProcessGlobals(void)
 #ifndef WIN32
 	srandom(pg_prng_uint32(&pg_global_prng_state));
 #endif
+
+	/* detect CPU capabilities */
+	pg_cpucap_initialize();
 }
 
 /*
diff --git a/src/include/port/pg_cpucap.h b/src/include/port/pg_cpucap.h
new file mode 100644
index 00000000000..81edfedce5d
--- /dev/null
+++ b/src/include/port/pg_cpucap.h
@@ -0,0 +1,25 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_cpucap.h
+ *	  Runtime detection of CPU capabilities.
+ *
+ * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ *	  src/include/port/pg_cpucap.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_CPUCAP_H
+#define PG_CPUCAP_H
+
+#define PGCPUCAP_INIT           (1 << 0)
+#define PGCPUCAP_POPCNT         (1 << 1)
+#define PGCPUCAP_VPOPCNT        (1 << 2)
+#define PGCPUCAP_CRC32C         (1 << 3)
+
+extern PGDLLIMPORT uint32 pg_cpucap;
+extern void pg_cpucap_initialize(void);
+
+#endif							/* PG_CPUCAP_H */
diff --git a/src/include/port/pg_crc32c.h b/src/include/port/pg_crc32c.h
index 65ebeacf4b1..b565a0f2949 100644
--- a/src/include/port/pg_crc32c.h
+++ b/src/include/port/pg_crc32c.h
@@ -34,6 +34,7 @@
 #define PG_CRC32C_H
 
 #include "port/pg_bswap.h"
+#include "port/pg_cpucap.h"
 
 typedef uint32 pg_crc32c;
 
@@ -41,52 +42,55 @@ typedef uint32 pg_crc32c;
 #define INIT_CRC32C(crc) ((crc) = 0xFFFFFFFF)
 #define EQ_CRC32C(c1, c2) ((c1) == (c2))
 
-#if defined(USE_SSE42_CRC32C)
+#if defined(USE_SSE42_CRC32C) || defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK)
 /* Use Intel SSE4.2 instructions. */
 #define COMP_CRC32C(crc, data, len) \
+	((crc) = pg_comp_crc32c_dispatch((crc), (data), (len)))
+#define COMP_CRC32C_HW(crc, data, len) \
 	((crc) = pg_comp_crc32c_sse42((crc), (data), (len)))
 #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF)
 
+#if defined(USE_SSE42_CRC32C)
+#define HAVE_CRC_COMPTIME
+#else
+#define HAVE_CRC_RUNTIME
+extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len);
+#endif
+
+extern bool pg_crc32c_sse42_available(void);
 extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len);
 
-#elif defined(USE_ARMV8_CRC32C)
+#elif defined(USE_ARMV8_CRC32C) || defined(USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK)
 /* Use ARMv8 CRC Extension instructions. */
 
 #define COMP_CRC32C(crc, data, len)							\
+	((crc) = pg_comp_crc32c_dispatch((crc), (data), (len)))
+#define COMP_CRC32C_HW(crc, data, len)						\
 	((crc) = pg_comp_crc32c_armv8((crc), (data), (len)))
 #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF)
 
+#if defined(USE_ARMV8_CRC32C)
+#define HAVE_CRC_COMPTIME
+#else
+#define HAVE_CRC_RUNTIME
+extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len);
+#endif
+
+extern bool pg_crc32c_armv8_available(void);
 extern pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len);
 
 #elif defined(USE_LOONGARCH_CRC32C)
 /* Use LoongArch CRCC instructions. */
 
 #define COMP_CRC32C(crc, data, len)							\
+	((crc) = pg_comp_crc32c_dispatch((crc), (data), (len)))
+#define COMP_CRC32C_HW(crc, data, len)						\
 	((crc) = pg_comp_crc32c_loongarch((crc), (data), (len)))
 #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF)
 
+#define HAVE_CRC_COMPTIME
 extern pg_crc32c pg_comp_crc32c_loongarch(pg_crc32c crc, const void *data, size_t len);
 
-#elif defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK) || defined(USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK)
-
-/*
- * Use Intel SSE 4.2 or ARMv8 instructions, but perform a runtime check first
- * to check that they are available.
- */
-#define COMP_CRC32C(crc, data, len) \
-	((crc) = pg_comp_crc32c((crc), (data), (len)))
-#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF)
-
-extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len);
-extern pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len);
-
-#ifdef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK
-extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len);
-#endif
-#ifdef USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK
-extern pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len);
-#endif
-
 #else
 /*
  * Use slicing-by-8 algorithm.
@@ -105,6 +109,36 @@ extern pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t le
 
 extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len);
 
+#endif							/* end of CPU-specfic symbols */
+
+#if defined(HAVE_CRC_COMPTIME) || defined(HAVE_CRC_RUNTIME)
+/*
+ * Check if the CPU we're running on supports special
+ * instructions for CRC-32C computation. Otherwise, fall
+ * back to the pure software implementation (slicing-by-8).
+ */
+static inline pg_crc32c
+pg_comp_crc32c_dispatch(pg_crc32c crc, const void *data, size_t len)
+{
+	/*
+	 * If this is firing in a frontend program, first look if you forgot a
+	 * call to pg_cpucap_initialize() in main(). See for example
+	 * src/bin/pg_controldata/pg_controldata.c.
+	 */
+	// WIP: how to best intialize in frontend?
+#ifndef FRONTEND
+	Assert(pg_cpucap & PGCPUCAP_INIT);
+#endif
+
+#if defined(HAVE_CRC_COMPTIME)
+	return COMP_CRC32C_HW(crc, data, len);
+#else
+	if (pg_cpucap & PGCPUCAP_CRC32C)
+		return COMP_CRC32C_HW(crc, data, len);
+	else
+		return pg_comp_crc32c_sb8(crc, data, len);
 #endif
+}
+#endif							/* HAVE_CRC_COMPTIME || HAVE_CRC_RUNTIME */
 
 #endif							/* PG_CRC32C_H */
diff --git a/src/port/Makefile b/src/port/Makefile
index 4c224319512..5a05179e926 100644
--- a/src/port/Makefile
+++ b/src/port/Makefile
@@ -44,6 +44,7 @@ OBJS = \
 	noblock.o \
 	path.o \
 	pg_bitutils.o \
+	pg_cpucap.o \
 	pg_popcount_avx512.o \
 	pg_strong_random.o \
 	pgcheckdir.o \
diff --git a/src/port/meson.build b/src/port/meson.build
index 7fcfa728d43..e1e7ce8fb87 100644
--- a/src/port/meson.build
+++ b/src/port/meson.build
@@ -7,6 +7,7 @@ pgport_sources = [
   'noblock.c',
   'path.c',
   'pg_bitutils.c',
+  'pg_cpucap.c',
   'pg_popcount_avx512.c',
   'pg_strong_random.c',
   'pgcheckdir.c',
@@ -83,12 +84,15 @@ replace_funcs_pos = [
   # x86/x64
   ['pg_crc32c_sse42', 'USE_SSE42_CRC32C'],
   ['pg_crc32c_sse42', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'],
+  # WIP sometime we'll need to build these based on host_cpu
+  ['pg_crc32c_sse42_choose', 'USE_SSE42_CRC32C'],
   ['pg_crc32c_sse42_choose', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'],
   ['pg_crc32c_sb8', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'],
 
   # arm / aarch64
   ['pg_crc32c_armv8', 'USE_ARMV8_CRC32C'],
   ['pg_crc32c_armv8', 'USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK', 'crc'],
+  ['pg_crc32c_armv8_choose', 'USE_ARMV8_CRC32C'],
   ['pg_crc32c_armv8_choose', 'USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK'],
   ['pg_crc32c_sb8', 'USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK'],
 
diff --git a/src/port/pg_cpucap.c b/src/port/pg_cpucap.c
new file mode 100644
index 00000000000..eba6e31c63f
--- /dev/null
+++ b/src/port/pg_cpucap.c
@@ -0,0 +1,51 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_cpucap.c
+ *	  Runtime detection of CPU capabilities.
+ *
+ * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ *	  src/port/pg_cpucap.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+
+#include "port/pg_cpucap.h"
+#include "port/pg_crc32c.h"
+
+
+/* starts uninitialized so we can detect errors of omission */
+uint32		pg_cpucap = 0;
+
+/*
+ * Check if hardware instructions for CRC computation are available.
+ */
+static void
+pg_cpucap_crc32c(void)
+{
+	/* WIP: It seems like we should use CPU arch symbols instead */
+#if defined(USE_SSE42_CRC32C) || defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK)
+	if (pg_crc32c_sse42_available())
+		pg_cpucap |= PGCPUCAP_CRC32C;
+
+#elif defined(USE_ARMV8_CRC32C) || defined(USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK)
+	if (pg_crc32c_armv8_available())
+		pg_cpucap |= PGCPUCAP_CRC32C;
+#endif
+}
+
+/*
+ * This needs to be called in main() for every
+ * program that calls a function that dispatches
+ * according to CPU features.
+ */
+void
+pg_cpucap_initialize(void)
+{
+	pg_cpucap = PGCPUCAP_INIT;
+
+	pg_cpucap_crc32c();
+}
diff --git a/src/port/pg_crc32c_armv8_choose.c b/src/port/pg_crc32c_armv8_choose.c
index ec12be1bbc3..e3654427c3f 100644
--- a/src/port/pg_crc32c_armv8_choose.c
+++ b/src/port/pg_crc32c_armv8_choose.c
@@ -1,12 +1,7 @@
 /*-------------------------------------------------------------------------
  *
  * pg_crc32c_armv8_choose.c
- *	  Choose between ARMv8 and software CRC-32C implementation.
- *
- * On first call, checks if the CPU we're running on supports the ARMv8
- * CRC Extension. If it does, use the special instructions for CRC-32C
- * computation. Otherwise, fall back to the pure software implementation
- * (slicing-by-8).
+ *	  Check if the CPU we're running on supports the ARMv8 CRC Extension.
  *
  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
@@ -40,7 +35,7 @@
 
 #include "port/pg_crc32c.h"
 
-static bool
+bool
 pg_crc32c_armv8_available(void)
 {
 #if defined(HAVE_ELF_AUX_INFO)
@@ -106,20 +101,3 @@ pg_crc32c_armv8_available(void)
 	return false;
 #endif
 }
-
-/*
- * This gets called on the first call. It replaces the function pointer
- * so that subsequent calls are routed directly to the chosen implementation.
- */
-static pg_crc32c
-pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len)
-{
-	if (pg_crc32c_armv8_available())
-		pg_comp_crc32c = pg_comp_crc32c_armv8;
-	else
-		pg_comp_crc32c = pg_comp_crc32c_sb8;
-
-	return pg_comp_crc32c(crc, data, len);
-}
-
-pg_crc32c	(*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose;
diff --git a/src/port/pg_crc32c_sse42_choose.c b/src/port/pg_crc32c_sse42_choose.c
index 65dbc4d4249..f4d3215bc55 100644
--- a/src/port/pg_crc32c_sse42_choose.c
+++ b/src/port/pg_crc32c_sse42_choose.c
@@ -1,12 +1,7 @@
 /*-------------------------------------------------------------------------
  *
  * pg_crc32c_sse42_choose.c
- *	  Choose between Intel SSE 4.2 and software CRC-32C implementation.
- *
- * On first call, checks if the CPU we're running on supports Intel SSE
- * 4.2. If it does, use the special SSE instructions for CRC-32C
- * computation. Otherwise, fall back to the pure software implementation
- * (slicing-by-8).
+ *	  Check if the CPU we're running on supports SSE4.2.
  *
  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
@@ -30,7 +25,7 @@
 
 #include "port/pg_crc32c.h"
 
-static bool
+bool
 pg_crc32c_sse42_available(void)
 {
 	unsigned int exx[4] = {0, 0, 0, 0};
@@ -45,20 +40,3 @@ pg_crc32c_sse42_available(void)
 
 	return (exx[2] & (1 << 20)) != 0;	/* SSE 4.2 */
 }
-
-/*
- * This gets called on the first call. It replaces the function pointer
- * so that subsequent calls are routed directly to the chosen implementation.
- */
-static pg_crc32c
-pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len)
-{
-	if (pg_crc32c_sse42_available())
-		pg_comp_crc32c = pg_comp_crc32c_sse42;
-	else
-		pg_comp_crc32c = pg_comp_crc32c_sb8;
-
-	return pg_comp_crc32c(crc, data, len);
-}
-
-pg_crc32c	(*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose;
-- 
2.48.1

From 49d1cec52cf2f167e20d916330bb7d37b2f8af34 Mon Sep 17 00:00:00 2001
From: Paul Amonson <paul.d.amon...@intel.com>
Date: Mon, 6 May 2024 08:34:17 -0700
Subject: [PATCH v8 3/4] Add a Postgres SQL function for crc32c benchmarking

Add a drive_crc32c() function to use for benchmarking crc32c
computation. The function takes 2 arguments:

(1) count: num of times CRC32C is computed in a loop.
(2) num: #bytes in the buffer to calculate crc over.

XXX not for commit

Extracted from a patch by  Raghuveer Devulapalli
---
 contrib/meson.build                          |  1 +
 contrib/test_crc32c/Makefile                 | 20 +++++++
 contrib/test_crc32c/expected/test_crc32c.out | 57 ++++++++++++++++++++
 contrib/test_crc32c/meson.build              | 34 ++++++++++++
 contrib/test_crc32c/sql/test_crc32c.sql      |  3 ++
 contrib/test_crc32c/test_crc32c--1.0.sql     |  1 +
 contrib/test_crc32c/test_crc32c.c            | 47 ++++++++++++++++
 contrib/test_crc32c/test_crc32c.control      |  4 ++
 8 files changed, 167 insertions(+)
 create mode 100644 contrib/test_crc32c/Makefile
 create mode 100644 contrib/test_crc32c/expected/test_crc32c.out
 create mode 100644 contrib/test_crc32c/meson.build
 create mode 100644 contrib/test_crc32c/sql/test_crc32c.sql
 create mode 100644 contrib/test_crc32c/test_crc32c--1.0.sql
 create mode 100644 contrib/test_crc32c/test_crc32c.c
 create mode 100644 contrib/test_crc32c/test_crc32c.control

diff --git a/contrib/meson.build b/contrib/meson.build
index 1ba73ebd67a..06673db0625 100644
--- a/contrib/meson.build
+++ b/contrib/meson.build
@@ -12,6 +12,7 @@ contrib_doc_args = {
   'install_dir': contrib_doc_dir,
 }
 
+subdir('test_crc32c')
 subdir('amcheck')
 subdir('auth_delay')
 subdir('auto_explain')
diff --git a/contrib/test_crc32c/Makefile b/contrib/test_crc32c/Makefile
new file mode 100644
index 00000000000..5b747c6184a
--- /dev/null
+++ b/contrib/test_crc32c/Makefile
@@ -0,0 +1,20 @@
+MODULE_big = test_crc32c
+OBJS = test_crc32c.o
+PGFILEDESC = "test"
+EXTENSION = test_crc32c
+DATA = test_crc32c--1.0.sql
+
+first: all
+
+# test_crc32c.o:	CFLAGS+=-g
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = src/test/modules/test_crc32c
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/test_crc32c/expected/test_crc32c.out b/contrib/test_crc32c/expected/test_crc32c.out
new file mode 100644
index 00000000000..dff6bb3133b
--- /dev/null
+++ b/contrib/test_crc32c/expected/test_crc32c.out
@@ -0,0 +1,57 @@
+CREATE EXTENSION test_crc32c;
+select drive_crc32c(1, i) from generate_series(100, 300, 4) i;
+ drive_crc32c 
+--------------
+    532139994
+   2103623867
+    785984197
+   2686825890
+   3213049059
+   3819630168
+   1389234603
+    534072900
+   2930108140
+   2496889855
+   1475239611
+    136366931
+   3067402116
+   2012717871
+   3682416023
+   2054270645
+   1817339875
+   4100939569
+   1192727539
+   3636976218
+    369764421
+   3161609879
+   1067984880
+   1235066769
+   3138425899
+    648132037
+   4203750233
+   1330187888
+   2683521348
+   1951644495
+   2574090107
+   3904902018
+   3772697795
+   1644686344
+   2868962106
+   3369218491
+   3902689890
+   3456411865
+    141004025
+   1504497996
+   3782655204
+   3544797610
+   3429174879
+   2524728016
+   3935861181
+     25498897
+    692684159
+    345705535
+   2761600287
+   2654632420
+   3945991399
+(51 rows)
+
diff --git a/contrib/test_crc32c/meson.build b/contrib/test_crc32c/meson.build
new file mode 100644
index 00000000000..d7bec4ba1cb
--- /dev/null
+++ b/contrib/test_crc32c/meson.build
@@ -0,0 +1,34 @@
+# Copyright (c) 2022-2024, PostgreSQL Global Development Group
+
+test_crc32c_sources = files(
+  'test_crc32c.c',
+)
+
+if host_system == 'windows'
+  test_crc32c_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+    '--NAME', 'test_crc32c',
+    '--FILEDESC', 'test_crc32c - test code for crc32c library',])
+endif
+
+test_crc32c = shared_module('test_crc32c',
+  test_crc32c_sources,
+  kwargs: contrib_mod_args,
+)
+contrib_targets += test_crc32c
+
+install_data(
+  'test_crc32c.control',
+  'test_crc32c--1.0.sql',
+  kwargs: contrib_data_args,
+)
+
+tests += {
+  'name': 'test_crc32c',
+  'sd': meson.current_source_dir(),
+  'bd': meson.current_build_dir(),
+  'regress': {
+    'sql': [
+      'test_crc32c',
+    ],
+  },
+}
diff --git a/contrib/test_crc32c/sql/test_crc32c.sql b/contrib/test_crc32c/sql/test_crc32c.sql
new file mode 100644
index 00000000000..95c6dfe4488
--- /dev/null
+++ b/contrib/test_crc32c/sql/test_crc32c.sql
@@ -0,0 +1,3 @@
+CREATE EXTENSION test_crc32c;
+
+select drive_crc32c(1, i) from generate_series(100, 300, 4) i;
diff --git a/contrib/test_crc32c/test_crc32c--1.0.sql b/contrib/test_crc32c/test_crc32c--1.0.sql
new file mode 100644
index 00000000000..52b9772f908
--- /dev/null
+++ b/contrib/test_crc32c/test_crc32c--1.0.sql
@@ -0,0 +1 @@
+CREATE FUNCTION drive_crc32c  (count int, num int) RETURNS bigint AS 'MODULE_PATHNAME' LANGUAGE C;
diff --git a/contrib/test_crc32c/test_crc32c.c b/contrib/test_crc32c/test_crc32c.c
new file mode 100644
index 00000000000..28bc42de314
--- /dev/null
+++ b/contrib/test_crc32c/test_crc32c.c
@@ -0,0 +1,47 @@
+/* select drive_crc32c(1000000, 1024); */
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "port/pg_crc32c.h"
+#include "common/pg_prng.h"
+
+PG_MODULE_MAGIC;
+
+/*
+ * drive_crc32c(count: int, num: int) returns bigint
+ *
+ * count is the nuimber of loops to perform
+ *
+ * num is the number byte in the buffer to calculate
+ * crc32c over.
+ */
+PG_FUNCTION_INFO_V1(drive_crc32c);
+Datum
+drive_crc32c(PG_FUNCTION_ARGS)
+{
+	int64			count	= PG_GETARG_INT32(0);
+	int64			num		= PG_GETARG_INT32(1);
+	char*		data	= malloc((size_t)num);
+	pg_crc32c crc;
+	pg_prng_state state;
+	uint64 seed = 42;
+	pg_prng_seed(&state, seed);
+	/* set random data */
+	for (uint64 i = 0; i < num; i++)
+	{
+		data[i] = pg_prng_uint32(&state) % 255;
+	}
+
+	INIT_CRC32C(crc);
+
+	while(count--)
+	{
+		INIT_CRC32C(crc);
+		COMP_CRC32C(crc, data, num);
+		FIN_CRC32C(crc);
+	}
+
+	free((void *)data);
+
+	PG_RETURN_INT64((int64_t)crc);
+}
diff --git a/contrib/test_crc32c/test_crc32c.control b/contrib/test_crc32c/test_crc32c.control
new file mode 100644
index 00000000000..878a077ee18
--- /dev/null
+++ b/contrib/test_crc32c/test_crc32c.control
@@ -0,0 +1,4 @@
+comment = 'test'
+default_version = '1.0'
+module_pathname = '$libdir/test_crc32c'
+relocatable = true
-- 
2.48.1

From d5ff7ff575cb9b005ff559903a0e1ffe0e023cf4 Mon Sep 17 00:00:00 2001
From: John Naylor <john.nay...@postgresql.org>
Date: Wed, 12 Feb 2025 15:27:16 +0700
Subject: [PATCH v8 4/4] Improve CRC32C performance on x86_64

The current SSE4.2 implementation of CRC32C relies on the native
CRC32 instruction, which operates on 8 bytes at a time. We can get a
substantial speedup on longer inputs by using carryless multiplication
on SIMD registers, processing 64 bytes per loop iteration.

The PCLMULQDQ instruction has been widely available since 2011 (almost
as old as SSE 4.2), so this commit now requires that, as well as SSE
4.2, to build pg_crc32c_sse42.c.

The MIT-licensed implementation was generated with the "generate"
program from

https://github.com/corsix/fast-crc32/

Based on: "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ
Instruction" V. Gopal, E. Ozturk, et al., 2009

Author: Raghuveer Devulapalli <raghuveer.devulapa...@intel.com>
Author: John Naylor <johncnaylo...@gmail.com>
Discussion: https://postgr.es/m/ph8pr11mb82869ff741dfa4e9a029ff13fb...@ph8pr11mb8286.namprd11.prod.outlook.com
---
 src/include/port/pg_cpucap.h          |   2 +
 src/port/pg_cpucap.c                  |   1 +
 src/port/pg_cpucap_arm.c              |   6 ++
 src/port/pg_cpucap_x86.c              |  23 +++++
 src/port/pg_crc32c_sse42.c            | 123 ++++++++++++++++++++++++++
 src/test/regress/expected/strings.out |  24 +++++
 src/test/regress/sql/strings.sql      |   4 +
 7 files changed, 183 insertions(+)

diff --git a/src/include/port/pg_cpucap.h b/src/include/port/pg_cpucap.h
index 5e04213b211..af3fabfcffb 100644
--- a/src/include/port/pg_cpucap.h
+++ b/src/include/port/pg_cpucap.h
@@ -18,11 +18,13 @@
 #define PGCPUCAP_POPCNT         (1 << 1)
 #define PGCPUCAP_VPOPCNT        (1 << 2)
 #define PGCPUCAP_CRC32C         (1 << 3)
+#define PGCPUCAP_CLMUL          (1 << 4)
 
 extern PGDLLIMPORT uint32 pg_cpucap;
 extern void pg_cpucap_initialize(void);
 
 /* arch-specific functions private to src/port */
 extern void pg_cpucap_crc32c(void);
+extern void pg_cpucap_clmul(void);
 
 #endif							/* PG_CPUCAP_H */
diff --git a/src/port/pg_cpucap.c b/src/port/pg_cpucap.c
index 88d75827022..301bd9fc2c7 100644
--- a/src/port/pg_cpucap.c
+++ b/src/port/pg_cpucap.c
@@ -30,4 +30,5 @@ pg_cpucap_initialize(void)
 	pg_cpucap = PGCPUCAP_INIT;
 
 	pg_cpucap_crc32c();
+	pg_cpucap_clmul();
 }
diff --git a/src/port/pg_cpucap_arm.c b/src/port/pg_cpucap_arm.c
index 19e052fecf6..e080a5a931f 100644
--- a/src/port/pg_cpucap_arm.c
+++ b/src/port/pg_cpucap_arm.c
@@ -111,3 +111,9 @@ pg_cpucap_crc32c(void)
 	if (pg_crc32c_armv8_available())
 		pg_cpucap |= PGCPUCAP_CRC32C;
 }
+
+void
+pg_cpucap_clmul(void)
+{
+	// WIP: does this even make sense?
+}
diff --git a/src/port/pg_cpucap_x86.c b/src/port/pg_cpucap_x86.c
index 07462bd1d2a..3a62a3a582f 100644
--- a/src/port/pg_cpucap_x86.c
+++ b/src/port/pg_cpucap_x86.c
@@ -41,6 +41,22 @@ pg_sse42_available(void)
 	return (exx[2] & (1 << 20)) != 0;	/* SSE 4.2 */
 }
 
+static bool
+pg_pclmul_available(void)
+{
+	unsigned int exx[4] = {0, 0, 0, 0};
+
+#if defined(HAVE__GET_CPUID)
+	__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUID)
+	__cpuid(exx, 1);
+#else
+#error cpuid instruction not available
+#endif
+
+	return (exx[2] & (1 << 1)) != 0;	/* PCLMUL */
+}
+
 /*
  * Check if hardware instructions for CRC computation are available.
  */
@@ -50,3 +66,10 @@ pg_cpucap_crc32c(void)
 	if (pg_sse42_available())
 		pg_cpucap |= PGCPUCAP_CRC32C;
 }
+
+void
+pg_cpucap_clmul(void)
+{
+	if (pg_pclmul_available())
+		pg_cpucap |= PGCPUCAP_CLMUL;
+}
diff --git a/src/port/pg_crc32c_sse42.c b/src/port/pg_crc32c_sse42.c
index 22c2137df31..fc3cf0d0882 100644
--- a/src/port/pg_crc32c_sse42.c
+++ b/src/port/pg_crc32c_sse42.c
@@ -15,9 +15,118 @@
 #include "c.h"
 
 #include <nmmintrin.h>
+#include <wmmintrin.h>
 
 #include "port/pg_crc32c.h"
 
+/* WIP: configure checks */
+#ifdef __x86_64__
+#define HAVE_PCLMUL_RUNTIME
+#endif
+
+ /*
+  * WIP: Testing has shown that on Kaby Lake (2016) this algorithm needs two
+  * iterations of the main loop to be faster than using regular CRC
+  * instrutions, but Tiger Lake (2020) is fine with a single iteration. Could
+  * use more testing between those years (on AMD as well).
+  */
+#define PCLMUL_THRESHOLD 128
+
+#ifdef HAVE_PCLMUL_RUNTIME
+
+/* Generated by https://github.com/corsix/fast-crc32/ using: */
+/* ./generate -i sse -p crc32c -a v4e */
+/* MIT licensed */
+
+#define clmul_lo(a, b) (_mm_clmulepi64_si128((a), (b), 0))
+#define clmul_hi(a, b) (_mm_clmulepi64_si128((a), (b), 17))
+
+pg_attribute_target("sse4.2,pclmul")
+static pg_crc32c
+pg_comp_crc32c_pclmul(pg_crc32c crc, const void *data, size_t length)
+{
+	/* adjust names to match generated code */
+	pg_crc32c	crc0 = crc;
+	size_t		len = length;
+	const char *buf = data;
+
+	// This prolog is trying to avoid loads straddling
+	// cache lines, but it doesn't seem worth it if
+	// we're trying to be fast on small inputs as well
+#if 0
+	for (; len && ((uintptr_t) buf & 7); --len)
+	{
+		crc0 = _mm_crc32_u8(crc0, *buf++);
+	}
+	if (((uintptr_t) buf & 8) && len >= 8)
+	{
+		crc0 = _mm_crc32_u64(crc0, *(const uint64_t *) buf);
+		buf += 8;
+		len -= 8;
+	}
+#endif
+	if (len >= 64)
+	{
+		const char *end = buf + len;
+		const char *limit = buf + len - 64;
+
+		/* First vector chunk. */
+		__m128i		x0 = _mm_loadu_si128((const __m128i *) buf),
+					y0;
+		__m128i		x1 = _mm_loadu_si128((const __m128i *) (buf + 16)),
+					y1;
+		__m128i		x2 = _mm_loadu_si128((const __m128i *) (buf + 32)),
+					y2;
+		__m128i		x3 = _mm_loadu_si128((const __m128i *) (buf + 48)),
+					y3;
+		__m128i		k;
+
+		k = _mm_setr_epi32(0x740eef02, 0, 0x9e4addf8, 0);
+		x0 = _mm_xor_si128(_mm_cvtsi32_si128(crc0), x0);
+		buf += 64;
+		/* Main loop. */
+		while (buf <= limit)
+		{
+			y0 = clmul_lo(x0, k), x0 = clmul_hi(x0, k);
+			y1 = clmul_lo(x1, k), x1 = clmul_hi(x1, k);
+			y2 = clmul_lo(x2, k), x2 = clmul_hi(x2, k);
+			y3 = clmul_lo(x3, k), x3 = clmul_hi(x3, k);
+			y0 = _mm_xor_si128(y0, _mm_loadu_si128((const __m128i *) buf)), x0 = _mm_xor_si128(x0, y0);
+			y1 = _mm_xor_si128(y1, _mm_loadu_si128((const __m128i *) (buf + 16))), x1 = _mm_xor_si128(x1, y1);
+			y2 = _mm_xor_si128(y2, _mm_loadu_si128((const __m128i *) (buf + 32))), x2 = _mm_xor_si128(x2, y2);
+			y3 = _mm_xor_si128(y3, _mm_loadu_si128((const __m128i *) (buf + 48))), x3 = _mm_xor_si128(x3, y3);
+			buf += 64;
+		}
+
+		/* Reduce x0 ... x3 to just x0. */
+		k = _mm_setr_epi32(0xf20c0dfe, 0, 0x493c7d27, 0);
+		y0 = clmul_lo(x0, k), x0 = clmul_hi(x0, k);
+		y2 = clmul_lo(x2, k), x2 = clmul_hi(x2, k);
+		y0 = _mm_xor_si128(y0, x1), x0 = _mm_xor_si128(x0, y0);
+		y2 = _mm_xor_si128(y2, x3), x2 = _mm_xor_si128(x2, y2);
+		k = _mm_setr_epi32(0x3da6d0cb, 0, 0xba4fc28e, 0);
+		y0 = clmul_lo(x0, k), x0 = clmul_hi(x0, k);
+		y0 = _mm_xor_si128(y0, x2), x0 = _mm_xor_si128(x0, y0);
+
+		/* Reduce 128 bits to 32 bits, and multiply by x^32. */
+		crc0 = _mm_crc32_u64(0, _mm_extract_epi64(x0, 0));
+		crc0 = _mm_crc32_u64(crc0, _mm_extract_epi64(x0, 1));
+		len = end - buf;
+	}
+	for (; len >= 8; buf += 8, len -= 8)
+	{
+		crc0 = _mm_crc32_u64(crc0, *(const uint64_t *) buf);
+	}
+	for (; len; --len)
+	{
+		crc0 = _mm_crc32_u8(crc0, *buf++);
+	}
+
+	return crc0;
+}
+
+#endif
+
 pg_attribute_no_sanitize_alignment()
 pg_attribute_target("sse4.2")
 pg_crc32c
@@ -26,6 +135,17 @@ pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len)
 	const unsigned char *p = data;
 	const unsigned char *pend = p + len;
 
+	/* XXX not for commit */
+	const pg_crc32c orig_crc PG_USED_FOR_ASSERTS_ONLY = crc;
+	const size_t orig_len PG_USED_FOR_ASSERTS_ONLY = len;
+
+#ifdef HAVE_PCLMUL_RUNTIME
+	if (len >= PCLMUL_THRESHOLD && (pg_cpucap & PGCPUCAP_CLMUL))
+	{
+		return pg_comp_crc32c_pclmul(crc, data, len);
+	}
+#endif
+
 	/*
 	 * Process eight bytes of data at a time.
 	 *
@@ -66,5 +186,8 @@ pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len)
 		p++;
 	}
 
+	/* XXX not for commit */
+	Assert(crc == pg_comp_crc32c_sb8(orig_crc, data, orig_len));
+
 	return crc;
 }
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index b65bb2d5368..662bd37ace6 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -2282,6 +2282,30 @@ SELECT crc32c('The quick brown fox jumps over the lazy dog.');
  419469235
 (1 row)
 
+SELECT crc32c(repeat('A', 80)::bytea);
+   crc32c   
+------------
+ 3799127650
+(1 row)
+
+SELECT crc32c(repeat('A', 127)::bytea);
+  crc32c   
+-----------
+ 291820082
+(1 row)
+
+SELECT crc32c(repeat('A', 128)::bytea);
+  crc32c   
+-----------
+ 816091258
+(1 row)
+
+SELECT crc32c(repeat('A', 129)::bytea);
+   crc32c   
+------------
+ 4213642571
+(1 row)
+
 --
 -- encode/decode
 --
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index 8e0f3a0e75f..26f86dc92e0 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -727,6 +727,10 @@ SELECT crc32('The quick brown fox jumps over the lazy dog.');
 
 SELECT crc32c('');
 SELECT crc32c('The quick brown fox jumps over the lazy dog.');
+SELECT crc32c(repeat('A', 80)::bytea);
+SELECT crc32c(repeat('A', 127)::bytea);
+SELECT crc32c(repeat('A', 128)::bytea);
+SELECT crc32c(repeat('A', 129)::bytea);
 
 --
 -- encode/decode
-- 
2.48.1

From 6ab9d5854cdd0247507d2f20cb8a21783547a798 Mon Sep 17 00:00:00 2001
From: John Naylor <john.nay...@postgresql.org>
Date: Tue, 25 Feb 2025 13:59:21 +0700
Subject: [PATCH v8 2/4] Rename CRC *choose files to cpucap* files

On Meson, build them unconditionally on the relevant arch.

FIXME autoconf builds are broken
---
 configure                                     |  4 ++--
 configure.ac                                  |  4 ++--
 src/include/port/pg_cpucap.h                  |  3 +++
 src/include/port/pg_crc32c.h                  |  2 --
 src/port/Makefile                             |  2 ++
 src/port/meson.build                          | 17 +++++++++++-----
 src/port/pg_cpucap.c                          | 18 -----------------
 ..._crc32c_armv8_choose.c => pg_cpucap_arm.c} | 16 ++++++++++++---
 ..._crc32c_sse42_choose.c => pg_cpucap_x86.c} | 20 ++++++++++++++-----
 9 files changed, 49 insertions(+), 37 deletions(-)
 rename src/port/{pg_crc32c_armv8_choose.c => pg_cpucap_arm.c} (92%)
 rename src/port/{pg_crc32c_sse42_choose.c => pg_cpucap_x86.c} (73%)

diff --git a/configure b/configure
index 93fddd69981..5e686793c16 100755
--- a/configure
+++ b/configure
@@ -17692,7 +17692,7 @@ else
 
 $as_echo "#define USE_SSE42_CRC32C_WITH_RUNTIME_CHECK 1" >>confdefs.h
 
-    PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_sse42_choose.o"
+    PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o
     { $as_echo "$as_me:${as_lineno-$LINENO}: result: SSE 4.2 with runtime check" >&5
 $as_echo "SSE 4.2 with runtime check" >&6; }
   else
@@ -17708,7 +17708,7 @@ $as_echo "ARMv8 CRC instructions" >&6; }
 
 $as_echo "#define USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK 1" >>confdefs.h
 
-        PG_CRC32C_OBJS="pg_crc32c_armv8.o pg_crc32c_sb8.o pg_crc32c_armv8_choose.o"
+        PG_CRC32C_OBJS="pg_crc32c_armv8.o pg_crc32c_sb8.o
         { $as_echo "$as_me:${as_lineno-$LINENO}: result: ARMv8 CRC instructions with runtime check" >&5
 $as_echo "ARMv8 CRC instructions with runtime check" >&6; }
       else
diff --git a/configure.ac b/configure.ac
index b6d02f5ecc7..056b406f117 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2156,7 +2156,7 @@ if test x"$USE_SSE42_CRC32C" = x"1"; then
 else
   if test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then
     AC_DEFINE(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK, 1, [Define to 1 to use Intel SSE 4.2 CRC instructions with a runtime check.])
-    PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_sse42_choose.o"
+    PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o
     AC_MSG_RESULT(SSE 4.2 with runtime check)
   else
     if test x"$USE_ARMV8_CRC32C" = x"1"; then
@@ -2166,7 +2166,7 @@ else
     else
       if test x"$USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then
         AC_DEFINE(USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK, 1, [Define to 1 to use ARMv8 CRC Extension with a runtime check.])
-        PG_CRC32C_OBJS="pg_crc32c_armv8.o pg_crc32c_sb8.o pg_crc32c_armv8_choose.o"
+        PG_CRC32C_OBJS="pg_crc32c_armv8.o pg_crc32c_sb8.o
         AC_MSG_RESULT(ARMv8 CRC instructions with runtime check)
       else
         if test x"$USE_LOONGARCH_CRC32C" = x"1"; then
diff --git a/src/include/port/pg_cpucap.h b/src/include/port/pg_cpucap.h
index 81edfedce5d..5e04213b211 100644
--- a/src/include/port/pg_cpucap.h
+++ b/src/include/port/pg_cpucap.h
@@ -22,4 +22,7 @@
 extern PGDLLIMPORT uint32 pg_cpucap;
 extern void pg_cpucap_initialize(void);
 
+/* arch-specific functions private to src/port */
+extern void pg_cpucap_crc32c(void);
+
 #endif							/* PG_CPUCAP_H */
diff --git a/src/include/port/pg_crc32c.h b/src/include/port/pg_crc32c.h
index b565a0f2949..4f0ebb9923c 100644
--- a/src/include/port/pg_crc32c.h
+++ b/src/include/port/pg_crc32c.h
@@ -57,7 +57,6 @@ typedef uint32 pg_crc32c;
 extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len);
 #endif
 
-extern bool pg_crc32c_sse42_available(void);
 extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len);
 
 #elif defined(USE_ARMV8_CRC32C) || defined(USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK)
@@ -76,7 +75,6 @@ extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t le
 extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len);
 #endif
 
-extern bool pg_crc32c_armv8_available(void);
 extern pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len);
 
 #elif defined(USE_LOONGARCH_CRC32C)
diff --git a/src/port/Makefile b/src/port/Makefile
index 5a05179e926..1fc03713b31 100644
--- a/src/port/Makefile
+++ b/src/port/Makefile
@@ -45,6 +45,8 @@ OBJS = \
 	path.o \
 	pg_bitutils.o \
 	pg_cpucap.o \
+	pg_cpucap_x86.o \
+	pg_cpucap_arm.o \
 	pg_popcount_avx512.o \
 	pg_strong_random.o \
 	pgcheckdir.o \
diff --git a/src/port/meson.build b/src/port/meson.build
index e1e7ce8fb87..baa8e16200d 100644
--- a/src/port/meson.build
+++ b/src/port/meson.build
@@ -78,22 +78,29 @@ if host_system != 'windows'
   replace_funcs_neg += [['pthread_barrier_wait']]
 endif
 
+# arch-specific runtime checks
+if host_cpu == 'x86' or host_cpu == 'x86_64'
+  pgport_sources += files(
+    'pg_cpucap_x86.c'
+  )
+
+elif host_cpu == 'arm' or host_cpu == 'aarch64'
+  pgport_sources += files(
+    'pg_cpucap_arm.c'
+  )
+endif
+
 # Replacement functionality to be built if corresponding configure symbol
 # is true
 replace_funcs_pos = [
   # x86/x64
   ['pg_crc32c_sse42', 'USE_SSE42_CRC32C'],
   ['pg_crc32c_sse42', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'],
-  # WIP sometime we'll need to build these based on host_cpu
-  ['pg_crc32c_sse42_choose', 'USE_SSE42_CRC32C'],
-  ['pg_crc32c_sse42_choose', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'],
   ['pg_crc32c_sb8', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'],
 
   # arm / aarch64
   ['pg_crc32c_armv8', 'USE_ARMV8_CRC32C'],
   ['pg_crc32c_armv8', 'USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK', 'crc'],
-  ['pg_crc32c_armv8_choose', 'USE_ARMV8_CRC32C'],
-  ['pg_crc32c_armv8_choose', 'USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK'],
   ['pg_crc32c_sb8', 'USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK'],
 
   # loongarch
diff --git a/src/port/pg_cpucap.c b/src/port/pg_cpucap.c
index eba6e31c63f..88d75827022 100644
--- a/src/port/pg_cpucap.c
+++ b/src/port/pg_cpucap.c
@@ -14,29 +14,11 @@
 #include "c.h"
 
 #include "port/pg_cpucap.h"
-#include "port/pg_crc32c.h"
 
 
 /* starts uninitialized so we can detect errors of omission */
 uint32		pg_cpucap = 0;
 
-/*
- * Check if hardware instructions for CRC computation are available.
- */
-static void
-pg_cpucap_crc32c(void)
-{
-	/* WIP: It seems like we should use CPU arch symbols instead */
-#if defined(USE_SSE42_CRC32C) || defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK)
-	if (pg_crc32c_sse42_available())
-		pg_cpucap |= PGCPUCAP_CRC32C;
-
-#elif defined(USE_ARMV8_CRC32C) || defined(USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK)
-	if (pg_crc32c_armv8_available())
-		pg_cpucap |= PGCPUCAP_CRC32C;
-#endif
-}
-
 /*
  * This needs to be called in main() for every
  * program that calls a function that dispatches
diff --git a/src/port/pg_crc32c_armv8_choose.c b/src/port/pg_cpucap_arm.c
similarity index 92%
rename from src/port/pg_crc32c_armv8_choose.c
rename to src/port/pg_cpucap_arm.c
index e3654427c3f..19e052fecf6 100644
--- a/src/port/pg_crc32c_armv8_choose.c
+++ b/src/port/pg_cpucap_arm.c
@@ -1,6 +1,6 @@
 /*-------------------------------------------------------------------------
  *
- * pg_crc32c_armv8_choose.c
+ * pg_cpucap_arm.c
  *	  Check if the CPU we're running on supports the ARMv8 CRC Extension.
  *
  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  src/port/pg_crc32c_armv8_choose.c
+ *	  src/port/pg_cpucap_arm.c
  *
  *-------------------------------------------------------------------------
  */
@@ -35,7 +35,7 @@
 
 #include "port/pg_crc32c.h"
 
-bool
+static bool
 pg_crc32c_armv8_available(void)
 {
 #if defined(HAVE_ELF_AUX_INFO)
@@ -101,3 +101,13 @@ pg_crc32c_armv8_available(void)
 	return false;
 #endif
 }
+
+/*
+ * Check if hardware instructions for CRC computation are available.
+ */
+void
+pg_cpucap_crc32c(void)
+{
+	if (pg_crc32c_armv8_available())
+		pg_cpucap |= PGCPUCAP_CRC32C;
+}
diff --git a/src/port/pg_crc32c_sse42_choose.c b/src/port/pg_cpucap_x86.c
similarity index 73%
rename from src/port/pg_crc32c_sse42_choose.c
rename to src/port/pg_cpucap_x86.c
index f4d3215bc55..07462bd1d2a 100644
--- a/src/port/pg_crc32c_sse42_choose.c
+++ b/src/port/pg_cpucap_x86.c
@@ -1,6 +1,6 @@
 /*-------------------------------------------------------------------------
  *
- * pg_crc32c_sse42_choose.c
+ * pg_cpucap_x86.c
  *	  Check if the CPU we're running on supports SSE4.2.
  *
  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  src/port/pg_crc32c_sse42_choose.c
+ *	  src/port/pg_cpucap_x86.c
  *
  *-------------------------------------------------------------------------
  */
@@ -23,10 +23,10 @@
 #include <intrin.h>
 #endif
 
-#include "port/pg_crc32c.h"
+#include "port/pg_cpucap.h"
 
-bool
-pg_crc32c_sse42_available(void)
+static bool
+pg_sse42_available(void)
 {
 	unsigned int exx[4] = {0, 0, 0, 0};
 
@@ -40,3 +40,13 @@ pg_crc32c_sse42_available(void)
 
 	return (exx[2] & (1 << 20)) != 0;	/* SSE 4.2 */
 }
+
+/*
+ * Check if hardware instructions for CRC computation are available.
+ */
+void
+pg_cpucap_crc32c(void)
+{
+	if (pg_sse42_available())
+		pg_cpucap |= PGCPUCAP_CRC32C;
+}
-- 
2.48.1

Reply via email to