On Tue, Mar 4, 2025 at 2:11 AM Nathan Bossart <nathandboss...@gmail.com> wrote:
>
> Overall, I wish we could avoid splitting things into separate files and
> adding more header file gymnastics, but maybe there isn't much better we
> can do without overhauling the CPU feature detection code.

I wanted to make an attempt to make this aspect nicer. v13-0002
incorporates deliberately compact and simple loops for inlined
constant input into the dispatch function, and leaves the existing
code alone. This avoids code churn and saves vertical space in the
copied code. It needs a bit more commentary, but I hope this is a more
digestible prerequisite to the CLMUL algorithm -- as a reminder, it'll
be simpler if we can always assume non-constant input can go through a
function pointer.

I've re-attached the modified perf test from v12 just in case anyone
wants to play with it (v13-0003), but named so that the CF bot can't
find it, since it breaks the tests in the original perf test (It's not
for commit anyway).

Adding back AVX-512 should be fairly mechanical, since Raghuveer and
Nathan have already done the work needed for that.

-- 
John Naylor
Amazon Web Services
From 298cbb26558807fe9ce87f3709d6bfb785668d5d Mon Sep 17 00:00:00 2001
From: Paul Amonson <paul.d.amon...@intel.com>
Date: Mon, 6 May 2024 08:34:17 -0700
Subject: [PATCH v13 1/3] Add a Postgres SQL function for crc32c benchmarking

Add a drive_crc32c() function to use for benchmarking crc32c
computation. The function takes 2 arguments:

(1) count: num of times CRC32C is computed in a loop.
(2) num: #bytes in the buffer to calculate crc over.

XXX not for commit

Extracted from a patch by  Raghuveer Devulapalli
---
 contrib/meson.build                          |  1 +
 contrib/test_crc32c/Makefile                 | 20 +++++++
 contrib/test_crc32c/expected/test_crc32c.out | 57 ++++++++++++++++++++
 contrib/test_crc32c/meson.build              | 34 ++++++++++++
 contrib/test_crc32c/sql/test_crc32c.sql      |  3 ++
 contrib/test_crc32c/test_crc32c--1.0.sql     |  1 +
 contrib/test_crc32c/test_crc32c.c            | 47 ++++++++++++++++
 contrib/test_crc32c/test_crc32c.control      |  4 ++
 8 files changed, 167 insertions(+)
 create mode 100644 contrib/test_crc32c/Makefile
 create mode 100644 contrib/test_crc32c/expected/test_crc32c.out
 create mode 100644 contrib/test_crc32c/meson.build
 create mode 100644 contrib/test_crc32c/sql/test_crc32c.sql
 create mode 100644 contrib/test_crc32c/test_crc32c--1.0.sql
 create mode 100644 contrib/test_crc32c/test_crc32c.c
 create mode 100644 contrib/test_crc32c/test_crc32c.control

diff --git a/contrib/meson.build b/contrib/meson.build
index 1ba73ebd67a..06673db0625 100644
--- a/contrib/meson.build
+++ b/contrib/meson.build
@@ -12,6 +12,7 @@ contrib_doc_args = {
   'install_dir': contrib_doc_dir,
 }
 
+subdir('test_crc32c')
 subdir('amcheck')
 subdir('auth_delay')
 subdir('auto_explain')
diff --git a/contrib/test_crc32c/Makefile b/contrib/test_crc32c/Makefile
new file mode 100644
index 00000000000..5b747c6184a
--- /dev/null
+++ b/contrib/test_crc32c/Makefile
@@ -0,0 +1,20 @@
+MODULE_big = test_crc32c
+OBJS = test_crc32c.o
+PGFILEDESC = "test"
+EXTENSION = test_crc32c
+DATA = test_crc32c--1.0.sql
+
+first: all
+
+# test_crc32c.o:	CFLAGS+=-g
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = src/test/modules/test_crc32c
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/test_crc32c/expected/test_crc32c.out b/contrib/test_crc32c/expected/test_crc32c.out
new file mode 100644
index 00000000000..dff6bb3133b
--- /dev/null
+++ b/contrib/test_crc32c/expected/test_crc32c.out
@@ -0,0 +1,57 @@
+CREATE EXTENSION test_crc32c;
+select drive_crc32c(1, i) from generate_series(100, 300, 4) i;
+ drive_crc32c 
+--------------
+    532139994
+   2103623867
+    785984197
+   2686825890
+   3213049059
+   3819630168
+   1389234603
+    534072900
+   2930108140
+   2496889855
+   1475239611
+    136366931
+   3067402116
+   2012717871
+   3682416023
+   2054270645
+   1817339875
+   4100939569
+   1192727539
+   3636976218
+    369764421
+   3161609879
+   1067984880
+   1235066769
+   3138425899
+    648132037
+   4203750233
+   1330187888
+   2683521348
+   1951644495
+   2574090107
+   3904902018
+   3772697795
+   1644686344
+   2868962106
+   3369218491
+   3902689890
+   3456411865
+    141004025
+   1504497996
+   3782655204
+   3544797610
+   3429174879
+   2524728016
+   3935861181
+     25498897
+    692684159
+    345705535
+   2761600287
+   2654632420
+   3945991399
+(51 rows)
+
diff --git a/contrib/test_crc32c/meson.build b/contrib/test_crc32c/meson.build
new file mode 100644
index 00000000000..d7bec4ba1cb
--- /dev/null
+++ b/contrib/test_crc32c/meson.build
@@ -0,0 +1,34 @@
+# Copyright (c) 2022-2024, PostgreSQL Global Development Group
+
+test_crc32c_sources = files(
+  'test_crc32c.c',
+)
+
+if host_system == 'windows'
+  test_crc32c_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+    '--NAME', 'test_crc32c',
+    '--FILEDESC', 'test_crc32c - test code for crc32c library',])
+endif
+
+test_crc32c = shared_module('test_crc32c',
+  test_crc32c_sources,
+  kwargs: contrib_mod_args,
+)
+contrib_targets += test_crc32c
+
+install_data(
+  'test_crc32c.control',
+  'test_crc32c--1.0.sql',
+  kwargs: contrib_data_args,
+)
+
+tests += {
+  'name': 'test_crc32c',
+  'sd': meson.current_source_dir(),
+  'bd': meson.current_build_dir(),
+  'regress': {
+    'sql': [
+      'test_crc32c',
+    ],
+  },
+}
diff --git a/contrib/test_crc32c/sql/test_crc32c.sql b/contrib/test_crc32c/sql/test_crc32c.sql
new file mode 100644
index 00000000000..95c6dfe4488
--- /dev/null
+++ b/contrib/test_crc32c/sql/test_crc32c.sql
@@ -0,0 +1,3 @@
+CREATE EXTENSION test_crc32c;
+
+select drive_crc32c(1, i) from generate_series(100, 300, 4) i;
diff --git a/contrib/test_crc32c/test_crc32c--1.0.sql b/contrib/test_crc32c/test_crc32c--1.0.sql
new file mode 100644
index 00000000000..52b9772f908
--- /dev/null
+++ b/contrib/test_crc32c/test_crc32c--1.0.sql
@@ -0,0 +1 @@
+CREATE FUNCTION drive_crc32c  (count int, num int) RETURNS bigint AS 'MODULE_PATHNAME' LANGUAGE C;
diff --git a/contrib/test_crc32c/test_crc32c.c b/contrib/test_crc32c/test_crc32c.c
new file mode 100644
index 00000000000..28bc42de314
--- /dev/null
+++ b/contrib/test_crc32c/test_crc32c.c
@@ -0,0 +1,47 @@
+/* select drive_crc32c(1000000, 1024); */
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "port/pg_crc32c.h"
+#include "common/pg_prng.h"
+
+PG_MODULE_MAGIC;
+
+/*
+ * drive_crc32c(count: int, num: int) returns bigint
+ *
+ * count is the nuimber of loops to perform
+ *
+ * num is the number byte in the buffer to calculate
+ * crc32c over.
+ */
+PG_FUNCTION_INFO_V1(drive_crc32c);
+Datum
+drive_crc32c(PG_FUNCTION_ARGS)
+{
+	int64			count	= PG_GETARG_INT32(0);
+	int64			num		= PG_GETARG_INT32(1);
+	char*		data	= malloc((size_t)num);
+	pg_crc32c crc;
+	pg_prng_state state;
+	uint64 seed = 42;
+	pg_prng_seed(&state, seed);
+	/* set random data */
+	for (uint64 i = 0; i < num; i++)
+	{
+		data[i] = pg_prng_uint32(&state) % 255;
+	}
+
+	INIT_CRC32C(crc);
+
+	while(count--)
+	{
+		INIT_CRC32C(crc);
+		COMP_CRC32C(crc, data, num);
+		FIN_CRC32C(crc);
+	}
+
+	free((void *)data);
+
+	PG_RETURN_INT64((int64_t)crc);
+}
diff --git a/contrib/test_crc32c/test_crc32c.control b/contrib/test_crc32c/test_crc32c.control
new file mode 100644
index 00000000000..878a077ee18
--- /dev/null
+++ b/contrib/test_crc32c/test_crc32c.control
@@ -0,0 +1,4 @@
+comment = 'test'
+default_version = '1.0'
+module_pathname = '$libdir/test_crc32c'
+relocatable = true
-- 
2.48.1

From e5f721d017e4431a32e2605639afea64bf11f52e Mon Sep 17 00:00:00 2001
From: John Naylor <john.nay...@postgresql.org>
Date: Fri, 28 Feb 2025 16:27:30 +0700
Subject: [PATCH v13 2/3] Inline CRC computation for fixed-length input

Use a simplified copy of the loop in pg_crc32c_sse42.c to avoid
moving code to a separate header.
---
 src/include/port/pg_crc32c.h | 33 ++++++++++++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/src/include/port/pg_crc32c.h b/src/include/port/pg_crc32c.h
index 65ebeacf4b1..b9f0a8c7cca 100644
--- a/src/include/port/pg_crc32c.h
+++ b/src/include/port/pg_crc32c.h
@@ -43,12 +43,43 @@ typedef uint32 pg_crc32c;
 
 #if defined(USE_SSE42_CRC32C)
 /* Use Intel SSE4.2 instructions. */
+
+#include <nmmintrin.h>
+
 #define COMP_CRC32C(crc, data, len) \
-	((crc) = pg_comp_crc32c_sse42((crc), (data), (len)))
+	((crc) = pg_comp_crc32c_dispatch((crc), (data), (len)))
 #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF)
 
 extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len);
 
+pg_attribute_no_sanitize_alignment()
+static inline
+pg_crc32c
+pg_comp_crc32c_dispatch(pg_crc32c crc, const void *data, size_t len)
+{
+	if (__builtin_constant_p(len))
+	{
+		const unsigned char *p = data;
+
+		/*
+		 * For constant inputs, inline the computation to avoid the
+		 * indirect function call. This also allows the compiler to unroll
+		 * loops for small inputs.
+		 */
+#if SIZEOF_VOID_P >= 8
+		for (; len >= 8; p += 8, len -= 8)
+			crc = _mm_crc32_u64(crc, *(const uint64 *) p);
+#endif
+		for (; len >= 4; p += 4, len -= 4)
+			crc = _mm_crc32_u32(crc, *(const uint32 *) p);
+		for (; len > 0; --len)
+			crc = _mm_crc32_u8(crc, *p++);
+		return crc;
+	}
+	else
+		return pg_comp_crc32c_sse42(crc, data, len);
+}
+
 #elif defined(USE_ARMV8_CRC32C)
 /* Use ARMv8 CRC Extension instructions. */
 
-- 
2.48.1

Attachment: v13-0003-Attempt-to-make-benchmark-more-sensitive-to-late.patch.nocfbot
Description: Binary data

Reply via email to