Hi Raghuveer, You raised some interesting points, which deserve a thoughtful response. After sleeping on it, however I came to the conclusion that a sweeping change in runtime checks, with either of our approaches, has downsides and unresolved questions. Perhaps we can come back to it at a later time. For this release cycle, I took a step back and tried to think of the least invasive way to solve the immediate problem, which is: How to allow existing builds with "-msse4.2" to take advantage of CLMUL while not adding overhead. Here's what I came up with in v11:
0001: same benchmark module as before 0002: For SSE4.2 builds, arrange so that constant input uses an inlined path so that the compiler can emit unrolled loops anywhere. This is particularly important for the WAL insertion lock, so this is possibly committable on its own just for that. 0003: the PCLMUL path, only for runtime-check builds 0004: the PCLMUL path for SSE4.2 builds. This uses a function pointer for long-ish input and the same above inlined path for short input (whether constant or not). So it gets the best of both worlds. There is also a separate issue: On Tue, Feb 25, 2025 at 6:05 PM John Naylor <johncnaylo...@gmail.com> wrote: > > Another thing I found in Agner's manuals: AMD Zen, even as recently as > Zen 4, don't have as good a microarchitecture for PCLMUL, so if anyone > with such a machine would like to help test the cutoff David Rowley shared some results off-list, which are: Zen 4 is very good with this algorithm even at 64 bytes input length, but Zen 2 regresses up to maybe 256 bytes. Having a large cutoff to cover all bases makes this less useful, and that was one of my reservations about AVX-512. However, with the corsix generator I found it's possible to specify AVX-512 with a single accumulator (rather than 4), which still gives a minimum input of 64 bytes, so I'll plan to put something together to demonstrate. (Older machines could use the 3-way stream algorithm as a fallback on long inputs, as I've mentioned elsewhere, assuming that's legally unencumbered.) -- John Naylor Amazon Web Services
From f96ae8bf6913796c5d724ff9da25bbc79927ff1c Mon Sep 17 00:00:00 2001 From: John Naylor <john.nay...@postgresql.org> Date: Fri, 28 Feb 2025 18:27:40 +0700 Subject: [PATCH v11 4/4] Use runtime check even when we have SSE 4.2 at compile time This allows us to use PCLMUL for longer inputs. Short inputs are inlined to avoid the indirection through a function pointer. --- configure | 2 +- configure.ac | 2 +- src/include/port/pg_crc32c.h | 15 +++++++++++---- src/port/meson.build | 1 + src/port/pg_crc32c_sse42_choose.c | 2 ++ 5 files changed, 16 insertions(+), 6 deletions(-) diff --git a/configure b/configure index 93fddd69981..91c0ffc8272 100755 --- a/configure +++ b/configure @@ -17684,7 +17684,7 @@ if test x"$USE_SSE42_CRC32C" = x"1"; then $as_echo "#define USE_SSE42_CRC32C 1" >>confdefs.h - PG_CRC32C_OBJS="pg_crc32c_sse42.o" + PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sse42_choose.o" { $as_echo "$as_me:${as_lineno-$LINENO}: result: SSE 4.2" >&5 $as_echo "SSE 4.2" >&6; } else diff --git a/configure.ac b/configure.ac index b6d02f5ecc7..a85bdbd4ff6 100644 --- a/configure.ac +++ b/configure.ac @@ -2151,7 +2151,7 @@ fi AC_MSG_CHECKING([which CRC-32C implementation to use]) if test x"$USE_SSE42_CRC32C" = x"1"; then AC_DEFINE(USE_SSE42_CRC32C, 1, [Define to 1 use Intel SSE 4.2 CRC instructions.]) - PG_CRC32C_OBJS="pg_crc32c_sse42.o" + PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sse42_choose.o" AC_MSG_RESULT(SSE 4.2) else if test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then diff --git a/src/include/port/pg_crc32c.h b/src/include/port/pg_crc32c.h index fe0e1b6b275..26b676dddc9 100644 --- a/src/include/port/pg_crc32c.h +++ b/src/include/port/pg_crc32c.h @@ -55,22 +55,29 @@ typedef uint32 pg_crc32c; ((crc) = pg_comp_crc32c_dispatch((crc), (data), (len))) #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) +extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len); +extern pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len); extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len); +#ifdef USE_PCLMUL_WITH_RUNTIME_CHECK +extern pg_crc32c pg_comp_crc32c_pclmul(pg_crc32c crc, const void *data, size_t len); +#endif static inline pg_crc32c pg_comp_crc32c_dispatch(pg_crc32c crc, const void *data, size_t len) { - if (__builtin_constant_p(len) && len < 64) + if (len < 64) { /* - * For small constant inputs, inline the computation. This allows the - * compiler to unroll loops. + * For small inputs, inline the computation to avoid the runtime + * check. This also allows the compiler to unroll loops for constant + * input. */ return pg_comp_crc32c_sse42_inline(crc, data, len); } else - return pg_comp_crc32c_sse42(crc, data, len); + /* For larger inputs, use a runtime check for PCLMUL instructions. */ + return pg_comp_crc32c(crc, data, len); } #elif defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK) diff --git a/src/port/meson.build b/src/port/meson.build index 7fcfa728d43..8d70a4d510e 100644 --- a/src/port/meson.build +++ b/src/port/meson.build @@ -83,6 +83,7 @@ replace_funcs_pos = [ # x86/x64 ['pg_crc32c_sse42', 'USE_SSE42_CRC32C'], ['pg_crc32c_sse42', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'], + ['pg_crc32c_sse42_choose', 'USE_SSE42_CRC32C'], ['pg_crc32c_sse42_choose', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'], ['pg_crc32c_sb8', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'], diff --git a/src/port/pg_crc32c_sse42_choose.c b/src/port/pg_crc32c_sse42_choose.c index abea0f90eb3..89a48c76894 100644 --- a/src/port/pg_crc32c_sse42_choose.c +++ b/src/port/pg_crc32c_sse42_choose.c @@ -55,8 +55,10 @@ pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len) pg_comp_crc32c = pg_comp_crc32c_pclmul; #endif } +#ifdef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK else pg_comp_crc32c = pg_comp_crc32c_sb8; +#endif return pg_comp_crc32c(crc, data, len); } -- 2.48.1
From 9bf92b1ee09810ae734e6463fe01ad9858fa7168 Mon Sep 17 00:00:00 2001 From: John Naylor <john.nay...@postgresql.org> Date: Wed, 12 Feb 2025 15:27:16 +0700 Subject: [PATCH v11 3/4] Improve CRC32C performance on x86_64 The current SSE4.2 implementation of CRC32C relies on the native CRC32 instruction, which operates on 8 bytes at a time. We can get a substantial speedup on longer inputs by using carryless multiplication on SIMD registers, processing 64 bytes per loop iteration. The PCLMULQDQ instruction has been widely available since 2011 (almost as old as SSE 4.2), so this commit now requires that, as well as SSE 4.2, to build pg_crc32c_sse42.c. The MIT-licensed implementation was generated with the "generate" program from https://github.com/corsix/fast-crc32/ Based on: "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction" V. Gopal, E. Ozturk, et al., 2009 Author: Raghuveer Devulapalli <raghuveer.devulapa...@intel.com> Author: John Naylor <johncnaylo...@gmail.com> Discussion: https://postgr.es/m/ph8pr11mb82869ff741dfa4e9a029ff13fb...@ph8pr11mb8286.namprd11.prod.outlook.com --- src/include/port/pg_crc32c.h | 30 ++++++++--- src/port/pg_crc32c_sse42.c | 88 +++++++++++++++++++++++++++++++ src/port/pg_crc32c_sse42_choose.c | 26 ++++----- 3 files changed, 124 insertions(+), 20 deletions(-) diff --git a/src/include/port/pg_crc32c.h b/src/include/port/pg_crc32c.h index 5ccc79295c0..fe0e1b6b275 100644 --- a/src/include/port/pg_crc32c.h +++ b/src/include/port/pg_crc32c.h @@ -37,6 +37,11 @@ typedef uint32 pg_crc32c; +/* WIP: configure checks */ +#ifdef __x86_64__ +#define USE_PCLMUL_WITH_RUNTIME_CHECK +#endif + /* The INIT and EQ macros are the same for all implementations. */ #define INIT_CRC32C(crc) ((crc) = 0xFFFFFFFF) #define EQ_CRC32C(c1, c2) ((c1) == (c2)) @@ -68,6 +73,23 @@ pg_comp_crc32c_dispatch(pg_crc32c crc, const void *data, size_t len) return pg_comp_crc32c_sse42(crc, data, len); } +#elif defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK) + +/* + * Use Intel SSE 4.2 or PCLMUL instructions, but perform a runtime check first + * to check that they are available. + */ +#define COMP_CRC32C(crc, data, len) \ + ((crc) = pg_comp_crc32c((crc), (data), (len))) +#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) + +extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len); +extern pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len); +extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len); +#ifdef USE_PCLMUL_WITH_RUNTIME_CHECK +extern pg_crc32c pg_comp_crc32c_pclmul(pg_crc32c crc, const void *data, size_t len); +#endif + #elif defined(USE_ARMV8_CRC32C) /* Use ARMv8 CRC Extension instructions. */ @@ -86,7 +108,7 @@ extern pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t le extern pg_crc32c pg_comp_crc32c_loongarch(pg_crc32c crc, const void *data, size_t len); -#elif defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK) || defined(USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK) +#elif defined(USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK) /* * Use Intel SSE 4.2 or ARMv8 instructions, but perform a runtime check first @@ -98,13 +120,7 @@ extern pg_crc32c pg_comp_crc32c_loongarch(pg_crc32c crc, const void *data, size_ extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len); extern pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len); - -#ifdef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK -extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len); -#endif -#ifdef USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK extern pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len); -#endif #else /* diff --git a/src/port/pg_crc32c_sse42.c b/src/port/pg_crc32c_sse42.c index 6a35f7fdc67..b56da2f6934 100644 --- a/src/port/pg_crc32c_sse42.c +++ b/src/port/pg_crc32c_sse42.c @@ -15,6 +15,7 @@ #include "c.h" #include <nmmintrin.h> +#include <wmmintrin.h> #include "port/pg_crc32c.h" #include "port/pg_crc32c_sse42_impl.h" @@ -26,3 +27,90 @@ pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len) { return pg_comp_crc32c_sse42_inline(crc, data, len); } + +#ifdef USE_PCLMUL_WITH_RUNTIME_CHECK + +/* Generated by https://github.com/corsix/fast-crc32/ using: */ +/* ./generate -i sse -p crc32c -a v4e */ +/* MIT licensed */ + +#define clmul_lo(a, b) (_mm_clmulepi64_si128((a), (b), 0)) +#define clmul_hi(a, b) (_mm_clmulepi64_si128((a), (b), 17)) + +pg_attribute_target("sse4.2,pclmul") +pg_crc32c +pg_comp_crc32c_pclmul(pg_crc32c crc, const void *data, size_t length) +{ + /* adjust names to match generated code */ + pg_crc32c crc0 = crc; + size_t len = length; + const char *buf = data; + + // This prolog is trying to avoid loads straddling + // cache lines, but it doesn't seem worth it if + // we're trying to be fast on small inputs as well +#if 0 + for (; len && ((uintptr_t) buf & 7); --len) + { + crc0 = _mm_crc32_u8(crc0, *buf++); + } + if (((uintptr_t) buf & 8) && len >= 8) + { + crc0 = _mm_crc32_u64(crc0, *(const uint64_t *) buf); + buf += 8; + len -= 8; + } +#endif + if (len >= 64) + { + const char *end = buf + len; + const char *limit = buf + len - 64; + + /* First vector chunk. */ + __m128i x0 = _mm_loadu_si128((const __m128i *) buf), + y0; + __m128i x1 = _mm_loadu_si128((const __m128i *) (buf + 16)), + y1; + __m128i x2 = _mm_loadu_si128((const __m128i *) (buf + 32)), + y2; + __m128i x3 = _mm_loadu_si128((const __m128i *) (buf + 48)), + y3; + __m128i k; + + k = _mm_setr_epi32(0x740eef02, 0, 0x9e4addf8, 0); + x0 = _mm_xor_si128(_mm_cvtsi32_si128(crc0), x0); + buf += 64; + /* Main loop. */ + while (buf <= limit) + { + y0 = clmul_lo(x0, k), x0 = clmul_hi(x0, k); + y1 = clmul_lo(x1, k), x1 = clmul_hi(x1, k); + y2 = clmul_lo(x2, k), x2 = clmul_hi(x2, k); + y3 = clmul_lo(x3, k), x3 = clmul_hi(x3, k); + y0 = _mm_xor_si128(y0, _mm_loadu_si128((const __m128i *) buf)), x0 = _mm_xor_si128(x0, y0); + y1 = _mm_xor_si128(y1, _mm_loadu_si128((const __m128i *) (buf + 16))), x1 = _mm_xor_si128(x1, y1); + y2 = _mm_xor_si128(y2, _mm_loadu_si128((const __m128i *) (buf + 32))), x2 = _mm_xor_si128(x2, y2); + y3 = _mm_xor_si128(y3, _mm_loadu_si128((const __m128i *) (buf + 48))), x3 = _mm_xor_si128(x3, y3); + buf += 64; + } + + /* Reduce x0 ... x3 to just x0. */ + k = _mm_setr_epi32(0xf20c0dfe, 0, 0x493c7d27, 0); + y0 = clmul_lo(x0, k), x0 = clmul_hi(x0, k); + y2 = clmul_lo(x2, k), x2 = clmul_hi(x2, k); + y0 = _mm_xor_si128(y0, x1), x0 = _mm_xor_si128(x0, y0); + y2 = _mm_xor_si128(y2, x3), x2 = _mm_xor_si128(x2, y2); + k = _mm_setr_epi32(0x3da6d0cb, 0, 0xba4fc28e, 0); + y0 = clmul_lo(x0, k), x0 = clmul_hi(x0, k); + y0 = _mm_xor_si128(y0, x2), x0 = _mm_xor_si128(x0, y0); + + /* Reduce 128 bits to 32 bits, and multiply by x^32. */ + crc0 = _mm_crc32_u64(0, _mm_extract_epi64(x0, 0)); + crc0 = _mm_crc32_u64(crc0, _mm_extract_epi64(x0, 1)); + len = end - buf; + } + + return pg_comp_crc32c_sse42_inline(crc0, buf, len); +} + +#endif diff --git a/src/port/pg_crc32c_sse42_choose.c b/src/port/pg_crc32c_sse42_choose.c index 65dbc4d4249..abea0f90eb3 100644 --- a/src/port/pg_crc32c_sse42_choose.c +++ b/src/port/pg_crc32c_sse42_choose.c @@ -30,8 +30,12 @@ #include "port/pg_crc32c.h" -static bool -pg_crc32c_sse42_available(void) +/* + * This gets called on the first call. It replaces the function pointer + * so that subsequent calls are routed directly to the chosen implementation. + */ +static pg_crc32c +pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len) { unsigned int exx[4] = {0, 0, 0, 0}; @@ -43,18 +47,14 @@ pg_crc32c_sse42_available(void) #error cpuid instruction not available #endif - return (exx[2] & (1 << 20)) != 0; /* SSE 4.2 */ -} - -/* - * This gets called on the first call. It replaces the function pointer - * so that subsequent calls are routed directly to the chosen implementation. - */ -static pg_crc32c -pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len) -{ - if (pg_crc32c_sse42_available()) + if ((exx[2] & (1 << 20)) != 0) /* SSE 4.2 */ + { pg_comp_crc32c = pg_comp_crc32c_sse42; +#ifdef USE_PCLMUL_WITH_RUNTIME_CHECK + if ((exx[2] & (1 << 1)) != 0) /* PCLMUL */ + pg_comp_crc32c = pg_comp_crc32c_pclmul; +#endif + } else pg_comp_crc32c = pg_comp_crc32c_sb8; -- 2.48.1
From 7cd98c95678752dac778c3e4c36cc3ae8d93639d Mon Sep 17 00:00:00 2001 From: John Naylor <john.nay...@postgresql.org> Date: Fri, 28 Feb 2025 16:27:30 +0700 Subject: [PATCH v11 2/4] Inline CRC computation for small fixed-length input --- src/include/port/pg_crc32c.h | 21 ++++++- src/include/port/pg_crc32c_sse42_impl.h | 74 +++++++++++++++++++++++++ src/port/pg_crc32c_sse42.c | 46 +-------------- 3 files changed, 96 insertions(+), 45 deletions(-) create mode 100644 src/include/port/pg_crc32c_sse42_impl.h diff --git a/src/include/port/pg_crc32c.h b/src/include/port/pg_crc32c.h index 65ebeacf4b1..5ccc79295c0 100644 --- a/src/include/port/pg_crc32c.h +++ b/src/include/port/pg_crc32c.h @@ -43,12 +43,31 @@ typedef uint32 pg_crc32c; #if defined(USE_SSE42_CRC32C) /* Use Intel SSE4.2 instructions. */ + +#include "pg_crc32c_sse42_impl.h" + #define COMP_CRC32C(crc, data, len) \ - ((crc) = pg_comp_crc32c_sse42((crc), (data), (len))) + ((crc) = pg_comp_crc32c_dispatch((crc), (data), (len))) #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len); +static inline +pg_crc32c +pg_comp_crc32c_dispatch(pg_crc32c crc, const void *data, size_t len) +{ + if (__builtin_constant_p(len) && len < 64) + { + /* + * For small constant inputs, inline the computation. This allows the + * compiler to unroll loops. + */ + return pg_comp_crc32c_sse42_inline(crc, data, len); + } + else + return pg_comp_crc32c_sse42(crc, data, len); +} + #elif defined(USE_ARMV8_CRC32C) /* Use ARMv8 CRC Extension instructions. */ diff --git a/src/include/port/pg_crc32c_sse42_impl.h b/src/include/port/pg_crc32c_sse42_impl.h new file mode 100644 index 00000000000..e10ad777618 --- /dev/null +++ b/src/include/port/pg_crc32c_sse42_impl.h @@ -0,0 +1,74 @@ +/*------------------------------------------------------------------------- + * + * pg_crc32c_sse42_impl.h + * Inline implementation of CRC computation using SSE 4.2 + * + * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/port/pg_crc32c_sse42_impl.h + * + *------------------------------------------------------------------------- + */ +#ifndef PG_CRC32C_SSE42_IMPL_H +#define PG_CRC32C_SSE42_IMPL_H + +#include "c.h" + +#include <nmmintrin.h> + +pg_attribute_no_sanitize_alignment() +pg_attribute_target("sse4.2") +static inline +pg_crc32c +pg_comp_crc32c_sse42_inline(pg_crc32c crc, const void *data, size_t len) +{ + const unsigned char *p = data; + const unsigned char *pend = p + len; + + /* + * Process eight bytes of data at a time. + * + * NB: We do unaligned accesses here. The Intel architecture allows that, + * and performance testing didn't show any performance gain from aligning + * the begin address. + */ +#ifdef __x86_64__ + while (p + 8 <= pend) + { + crc = (uint32) _mm_crc32_u64(crc, *((const uint64 *) p)); + p += 8; + } + + /* Process remaining full four bytes if any */ + if (p + 4 <= pend) + { + crc = _mm_crc32_u32(crc, *((const unsigned int *) p)); + p += 4; + } +#else + + /* + * Process four bytes at a time. (The eight byte instruction is not + * available on the 32-bit x86 architecture). + */ + while (p + 4 <= pend) + { + crc = _mm_crc32_u32(crc, *((const unsigned int *) p)); + p += 4; + } +#endif /* __x86_64__ */ + + /* Process any remaining bytes one at a time. */ + while (p < pend) + { + crc = _mm_crc32_u8(crc, *p); + p++; + } + + return crc; +} + +#endif /* PG_CRC32C_SSE42_IMPL_H */ diff --git a/src/port/pg_crc32c_sse42.c b/src/port/pg_crc32c_sse42.c index 22c2137df31..6a35f7fdc67 100644 --- a/src/port/pg_crc32c_sse42.c +++ b/src/port/pg_crc32c_sse42.c @@ -17,54 +17,12 @@ #include <nmmintrin.h> #include "port/pg_crc32c.h" +#include "port/pg_crc32c_sse42_impl.h" pg_attribute_no_sanitize_alignment() pg_attribute_target("sse4.2") pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len) { - const unsigned char *p = data; - const unsigned char *pend = p + len; - - /* - * Process eight bytes of data at a time. - * - * NB: We do unaligned accesses here. The Intel architecture allows that, - * and performance testing didn't show any performance gain from aligning - * the begin address. - */ -#ifdef __x86_64__ - while (p + 8 <= pend) - { - crc = (uint32) _mm_crc32_u64(crc, *((const uint64 *) p)); - p += 8; - } - - /* Process remaining full four bytes if any */ - if (p + 4 <= pend) - { - crc = _mm_crc32_u32(crc, *((const unsigned int *) p)); - p += 4; - } -#else - - /* - * Process four bytes at a time. (The eight byte instruction is not - * available on the 32-bit x86 architecture). - */ - while (p + 4 <= pend) - { - crc = _mm_crc32_u32(crc, *((const unsigned int *) p)); - p += 4; - } -#endif /* __x86_64__ */ - - /* Process any remaining bytes one at a time. */ - while (p < pend) - { - crc = _mm_crc32_u8(crc, *p); - p++; - } - - return crc; + return pg_comp_crc32c_sse42_inline(crc, data, len); } -- 2.48.1
From f176a0cfadcb0a9971746f23677de7d4278a5391 Mon Sep 17 00:00:00 2001 From: Paul Amonson <paul.d.amon...@intel.com> Date: Mon, 6 May 2024 08:34:17 -0700 Subject: [PATCH v11 1/4] Add a Postgres SQL function for crc32c benchmarking Add a drive_crc32c() function to use for benchmarking crc32c computation. The function takes 2 arguments: (1) count: num of times CRC32C is computed in a loop. (2) num: #bytes in the buffer to calculate crc over. XXX not for commit Extracted from a patch by Raghuveer Devulapalli --- contrib/meson.build | 1 + contrib/test_crc32c/Makefile | 20 +++++++ contrib/test_crc32c/expected/test_crc32c.out | 57 ++++++++++++++++++++ contrib/test_crc32c/meson.build | 34 ++++++++++++ contrib/test_crc32c/sql/test_crc32c.sql | 3 ++ contrib/test_crc32c/test_crc32c--1.0.sql | 1 + contrib/test_crc32c/test_crc32c.c | 47 ++++++++++++++++ contrib/test_crc32c/test_crc32c.control | 4 ++ 8 files changed, 167 insertions(+) create mode 100644 contrib/test_crc32c/Makefile create mode 100644 contrib/test_crc32c/expected/test_crc32c.out create mode 100644 contrib/test_crc32c/meson.build create mode 100644 contrib/test_crc32c/sql/test_crc32c.sql create mode 100644 contrib/test_crc32c/test_crc32c--1.0.sql create mode 100644 contrib/test_crc32c/test_crc32c.c create mode 100644 contrib/test_crc32c/test_crc32c.control diff --git a/contrib/meson.build b/contrib/meson.build index 1ba73ebd67a..06673db0625 100644 --- a/contrib/meson.build +++ b/contrib/meson.build @@ -12,6 +12,7 @@ contrib_doc_args = { 'install_dir': contrib_doc_dir, } +subdir('test_crc32c') subdir('amcheck') subdir('auth_delay') subdir('auto_explain') diff --git a/contrib/test_crc32c/Makefile b/contrib/test_crc32c/Makefile new file mode 100644 index 00000000000..5b747c6184a --- /dev/null +++ b/contrib/test_crc32c/Makefile @@ -0,0 +1,20 @@ +MODULE_big = test_crc32c +OBJS = test_crc32c.o +PGFILEDESC = "test" +EXTENSION = test_crc32c +DATA = test_crc32c--1.0.sql + +first: all + +# test_crc32c.o: CFLAGS+=-g + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = src/test/modules/test_crc32c +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/contrib/test_crc32c/expected/test_crc32c.out b/contrib/test_crc32c/expected/test_crc32c.out new file mode 100644 index 00000000000..dff6bb3133b --- /dev/null +++ b/contrib/test_crc32c/expected/test_crc32c.out @@ -0,0 +1,57 @@ +CREATE EXTENSION test_crc32c; +select drive_crc32c(1, i) from generate_series(100, 300, 4) i; + drive_crc32c +-------------- + 532139994 + 2103623867 + 785984197 + 2686825890 + 3213049059 + 3819630168 + 1389234603 + 534072900 + 2930108140 + 2496889855 + 1475239611 + 136366931 + 3067402116 + 2012717871 + 3682416023 + 2054270645 + 1817339875 + 4100939569 + 1192727539 + 3636976218 + 369764421 + 3161609879 + 1067984880 + 1235066769 + 3138425899 + 648132037 + 4203750233 + 1330187888 + 2683521348 + 1951644495 + 2574090107 + 3904902018 + 3772697795 + 1644686344 + 2868962106 + 3369218491 + 3902689890 + 3456411865 + 141004025 + 1504497996 + 3782655204 + 3544797610 + 3429174879 + 2524728016 + 3935861181 + 25498897 + 692684159 + 345705535 + 2761600287 + 2654632420 + 3945991399 +(51 rows) + diff --git a/contrib/test_crc32c/meson.build b/contrib/test_crc32c/meson.build new file mode 100644 index 00000000000..d7bec4ba1cb --- /dev/null +++ b/contrib/test_crc32c/meson.build @@ -0,0 +1,34 @@ +# Copyright (c) 2022-2024, PostgreSQL Global Development Group + +test_crc32c_sources = files( + 'test_crc32c.c', +) + +if host_system == 'windows' + test_crc32c_sources += rc_lib_gen.process(win32ver_rc, extra_args: [ + '--NAME', 'test_crc32c', + '--FILEDESC', 'test_crc32c - test code for crc32c library',]) +endif + +test_crc32c = shared_module('test_crc32c', + test_crc32c_sources, + kwargs: contrib_mod_args, +) +contrib_targets += test_crc32c + +install_data( + 'test_crc32c.control', + 'test_crc32c--1.0.sql', + kwargs: contrib_data_args, +) + +tests += { + 'name': 'test_crc32c', + 'sd': meson.current_source_dir(), + 'bd': meson.current_build_dir(), + 'regress': { + 'sql': [ + 'test_crc32c', + ], + }, +} diff --git a/contrib/test_crc32c/sql/test_crc32c.sql b/contrib/test_crc32c/sql/test_crc32c.sql new file mode 100644 index 00000000000..95c6dfe4488 --- /dev/null +++ b/contrib/test_crc32c/sql/test_crc32c.sql @@ -0,0 +1,3 @@ +CREATE EXTENSION test_crc32c; + +select drive_crc32c(1, i) from generate_series(100, 300, 4) i; diff --git a/contrib/test_crc32c/test_crc32c--1.0.sql b/contrib/test_crc32c/test_crc32c--1.0.sql new file mode 100644 index 00000000000..52b9772f908 --- /dev/null +++ b/contrib/test_crc32c/test_crc32c--1.0.sql @@ -0,0 +1 @@ +CREATE FUNCTION drive_crc32c (count int, num int) RETURNS bigint AS 'MODULE_PATHNAME' LANGUAGE C; diff --git a/contrib/test_crc32c/test_crc32c.c b/contrib/test_crc32c/test_crc32c.c new file mode 100644 index 00000000000..28bc42de314 --- /dev/null +++ b/contrib/test_crc32c/test_crc32c.c @@ -0,0 +1,47 @@ +/* select drive_crc32c(1000000, 1024); */ + +#include "postgres.h" +#include "fmgr.h" +#include "port/pg_crc32c.h" +#include "common/pg_prng.h" + +PG_MODULE_MAGIC; + +/* + * drive_crc32c(count: int, num: int) returns bigint + * + * count is the nuimber of loops to perform + * + * num is the number byte in the buffer to calculate + * crc32c over. + */ +PG_FUNCTION_INFO_V1(drive_crc32c); +Datum +drive_crc32c(PG_FUNCTION_ARGS) +{ + int64 count = PG_GETARG_INT32(0); + int64 num = PG_GETARG_INT32(1); + char* data = malloc((size_t)num); + pg_crc32c crc; + pg_prng_state state; + uint64 seed = 42; + pg_prng_seed(&state, seed); + /* set random data */ + for (uint64 i = 0; i < num; i++) + { + data[i] = pg_prng_uint32(&state) % 255; + } + + INIT_CRC32C(crc); + + while(count--) + { + INIT_CRC32C(crc); + COMP_CRC32C(crc, data, num); + FIN_CRC32C(crc); + } + + free((void *)data); + + PG_RETURN_INT64((int64_t)crc); +} diff --git a/contrib/test_crc32c/test_crc32c.control b/contrib/test_crc32c/test_crc32c.control new file mode 100644 index 00000000000..878a077ee18 --- /dev/null +++ b/contrib/test_crc32c/test_crc32c.control @@ -0,0 +1,4 @@ +comment = 'test' +default_version = '1.0' +module_pathname = '$libdir/test_crc32c' +relocatable = true -- 2.48.1