On Wed, Feb 12, 2025 at 4:34 AM Devulapalli, Raghuveer <raghuveer.devulapa...@intel.com> wrote:
> > On my machine that still regresses compared to master in that range > > (although by > > not as much) so I still think 128 bytes is the right threshold. > > On my TGL, buffer sizes as small as 64 bytes see performance benefits. Yeah, I'm guessing it's because newer chips will have better IPC. We need to take care not to regress for hardware that's only 5-10 years old. Attached is v4: 0001: Same perf test, just in case 0002-4: I redid adding the implementation (without the single-block loop and with 128-byte threshold), but split out the steps for reference, as a model for possible ARM support in the future. These will all be squashed on commit. The upstream code has very long lines, even after running pgindent, so some may find that objectionable. We could easily turn some commas into semicolons, and then it'll wrap more nicely. I just wanted to change as little as possible for now. (I also need to check if I need to put more license text here..) 0005: This has a fleshed-out draft commit message, but otherwise is just the same configure/choose support as v3. Some review comments: 1. Some of the comments that only mention SSE 4.2 in the compile- and run-time checks need to be updated. > > Okay, Nehalem is 17 years old, and the additional cpuid check would still > > work on > > hardware 14-15 years old, so I think it's fine to bump the requirement for > > runtime > > hardware support. > > Sounds good. I updated the runtime check to include PCLMULQDQ. New algorithm > will run only on Westmere and newer CPU. 2. Unfortunately, there is another wrinkle that I failed to consider: If you search the web for "VirtualBox pclmulqdq" you can see a few reports from not very long ago that some hypervisors don't enable the CPUID for pclmul. I don't know how big a problem that is in practice today, but it seems we should actually have separate checks, with fallback. Sorry I didn't think of this earlier. 3. Note: I left out the new test file from v3-0001. We should have tests, but note we already have some CRC tests in src/test/regress/sql/strings.sql -- let's put new ones there. Also, for the longer strings we want to test, it's easier to read/verify to use something like SELECT crc32c(repeat('A', 128)::bytea); Maybe it's sufficient to have 127, 128, 129 for lengths, and maybe a couple more. -- John Naylor Amazon Web Services
From 57952d1f89f0c3a4a2d28399344e9335f8bee72b Mon Sep 17 00:00:00 2001 From: John Naylor <john.nay...@postgresql.org> Date: Wed, 12 Feb 2025 15:27:16 +0700 Subject: [PATCH v4 2/5] Vendor SSE implementation from https://github.com/corsix/fast-crc32/ --- src/port/pg_crc32c_sse42.c | 77 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/src/port/pg_crc32c_sse42.c b/src/port/pg_crc32c_sse42.c index 22c2137df3..6cc39de175 100644 --- a/src/port/pg_crc32c_sse42.c +++ b/src/port/pg_crc32c_sse42.c @@ -68,3 +68,80 @@ pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len) return crc; } + +/* Generated by https://github.com/corsix/fast-crc32/ using: */ +/* ./generate -i sse -p crc32c -a v4 */ +/* MIT licensed */ + +#include <stddef.h> +#include <stdint.h> +#include <nmmintrin.h> +#include <wmmintrin.h> + +#if defined(_MSC_VER) +#define CRC_AINLINE static __forceinline +#define CRC_ALIGN(n) __declspec(align(n)) +#else +#define CRC_AINLINE static __inline __attribute__((always_inline)) +#define CRC_ALIGN(n) __attribute__((aligned(n))) +#endif +#define CRC_EXPORT extern + +#define clmul_lo(a, b) (_mm_clmulepi64_si128((a), (b), 0)) +#define clmul_hi(a, b) (_mm_clmulepi64_si128((a), (b), 17)) + +CRC_EXPORT uint32_t crc32_impl(uint32_t crc0, const char* buf, size_t len) { + crc0 = ~crc0; + for (; len && ((uintptr_t)buf & 7); --len) { + crc0 = _mm_crc32_u8(crc0, *buf++); + } + if (((uintptr_t)buf & 8) && len >= 8) { + crc0 = _mm_crc32_u64(crc0, *(const uint64_t*)buf); + buf += 8; + len -= 8; + } + if (len >= 64) { + /* First vector chunk. */ + __m128i x0 = _mm_loadu_si128((const __m128i*)buf), y0; + __m128i x1 = _mm_loadu_si128((const __m128i*)(buf + 16)), y1; + __m128i x2 = _mm_loadu_si128((const __m128i*)(buf + 32)), y2; + __m128i x3 = _mm_loadu_si128((const __m128i*)(buf + 48)), y3; + __m128i k; + k = _mm_setr_epi32(0x740eef02, 0, 0x9e4addf8, 0); + x0 = _mm_xor_si128(_mm_cvtsi32_si128(crc0), x0); + buf += 64; + len -= 64; + /* Main loop. */ + while (len >= 64) { + y0 = clmul_lo(x0, k), x0 = clmul_hi(x0, k); + y1 = clmul_lo(x1, k), x1 = clmul_hi(x1, k); + y2 = clmul_lo(x2, k), x2 = clmul_hi(x2, k); + y3 = clmul_lo(x3, k), x3 = clmul_hi(x3, k); + y0 = _mm_xor_si128(y0, _mm_loadu_si128((const __m128i*)buf)), x0 = _mm_xor_si128(x0, y0); + y1 = _mm_xor_si128(y1, _mm_loadu_si128((const __m128i*)(buf + 16))), x1 = _mm_xor_si128(x1, y1); + y2 = _mm_xor_si128(y2, _mm_loadu_si128((const __m128i*)(buf + 32))), x2 = _mm_xor_si128(x2, y2); + y3 = _mm_xor_si128(y3, _mm_loadu_si128((const __m128i*)(buf + 48))), x3 = _mm_xor_si128(x3, y3); + buf += 64; + len -= 64; + } + /* Reduce x0 ... x3 to just x0. */ + k = _mm_setr_epi32(0xf20c0dfe, 0, 0x493c7d27, 0); + y0 = clmul_lo(x0, k), x0 = clmul_hi(x0, k); + y2 = clmul_lo(x2, k), x2 = clmul_hi(x2, k); + y0 = _mm_xor_si128(y0, x1), x0 = _mm_xor_si128(x0, y0); + y2 = _mm_xor_si128(y2, x3), x2 = _mm_xor_si128(x2, y2); + k = _mm_setr_epi32(0x3da6d0cb, 0, 0xba4fc28e, 0); + y0 = clmul_lo(x0, k), x0 = clmul_hi(x0, k); + y0 = _mm_xor_si128(y0, x2), x0 = _mm_xor_si128(x0, y0); + /* Reduce 128 bits to 32 bits, and multiply by x^32. */ + crc0 = _mm_crc32_u64(0, _mm_extract_epi64(x0, 0)); + crc0 = _mm_crc32_u64(crc0, _mm_extract_epi64(x0, 1)); + } + for (; len >= 8; buf += 8, len -= 8) { + crc0 = _mm_crc32_u64(crc0, *(const uint64_t*)buf); + } + for (; len; --len) { + crc0 = _mm_crc32_u8(crc0, *buf++); + } + return ~crc0; +} -- 2.48.1
From acb63cddd8c8220db97ae0b012bf4f2fb5174e8a Mon Sep 17 00:00:00 2001 From: John Naylor <john.nay...@postgresql.org> Date: Wed, 12 Feb 2025 17:07:49 +0700 Subject: [PATCH v4 5/5] Improve CRC32C performance on x86_64 The current SSE4.2 implementation of CRC32C relies on the native CRC32 instruction, which operates on 8 bytes at a time. We can get a substantial speedup on longer inputs by using carryless multiplication on SIMD registers, processing 64 bytes per loop iteration. The PCLMULQDQ instruction has been widely available since 2011 (almost as old as SSE 4.2), so this commit now requires that, as well as SSE 4.2, to build pg_crc32c_sse42.c. The MIT-licensed implementation was generated with the "generate" program from https://github.com/corsix/fast-crc32/ Based on: "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction" V. Gopal, E. Ozturk, et al., 2009 Author: Raghuveer Devulapalli <raghuveer.devulapa...@intel.com> Author: John Naylor <johncnaylo...@gmail.com> Discussion: https://postgr.es/m/ph8pr11mb82869ff741dfa4e9a029ff13fb...@ph8pr11mb8286.namprd11.prod.outlook.com --- config/c-compiler.m4 | 7 ++++++- configure | 7 ++++++- meson.build | 7 +++++-- src/port/pg_crc32c_sse42.c | 4 ++++ src/port/pg_crc32c_sse42_choose.c | 9 ++++++--- 5 files changed, 27 insertions(+), 7 deletions(-) diff --git a/config/c-compiler.m4 b/config/c-compiler.m4 index 8534cc54c1..8b255b5cc8 100644 --- a/config/c-compiler.m4 +++ b/config/c-compiler.m4 @@ -557,14 +557,19 @@ AC_DEFUN([PGAC_SSE42_CRC32_INTRINSICS], [define([Ac_cachevar], [AS_TR_SH([pgac_cv_sse42_crc32_intrinsics])])dnl AC_CACHE_CHECK([for _mm_crc32_u8 and _mm_crc32_u32], [Ac_cachevar], [AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <nmmintrin.h> + #include <wmmintrin.h> #if defined(__has_attribute) && __has_attribute (target) - __attribute__((target("sse4.2"))) + __attribute__((target("sse4.2,pclmul"))) #endif static int crc32_sse42_test(void) + { + __m128i x1 = _mm_set1_epi32(1); unsigned int crc = 0; crc = _mm_crc32_u8(crc, 0); crc = _mm_crc32_u32(crc, 0); + x1 = _mm_clmulepi64_si128(x1, x1, 0x00); // pclmul + crc = crc + _mm_extract_epi32(x1, 1); /* return computed value, to prevent the above being optimized away */ return crc == 0; }], diff --git a/configure b/configure index 0ffcaeb436..3f2a2a515e 100755 --- a/configure +++ b/configure @@ -17059,14 +17059,19 @@ else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include <nmmintrin.h> + #include <wmmintrin.h> #if defined(__has_attribute) && __has_attribute (target) - __attribute__((target("sse4.2"))) + __attribute__((target("sse4.2,pclmul"))) #endif static int crc32_sse42_test(void) + { + __m128i x1 = _mm_set1_epi32(1); unsigned int crc = 0; crc = _mm_crc32_u8(crc, 0); crc = _mm_crc32_u32(crc, 0); + x1 = _mm_clmulepi64_si128(x1, x1, 0x00); + crc = crc + _mm_extract_epi32(x1, 1); /* return computed value, to prevent the above being optimized away */ return crc == 0; } diff --git a/meson.build b/meson.build index 1ceadb9a83..456c3fafc3 100644 --- a/meson.build +++ b/meson.build @@ -2227,15 +2227,18 @@ if host_cpu == 'x86' or host_cpu == 'x86_64' prog = ''' #include <nmmintrin.h> - +#include <wmmintrin.h> #if defined(__has_attribute) && __has_attribute (target) -__attribute__((target("sse4.2"))) +__attribute__((target("sse4.2,pclmul"))) #endif int main(void) { + __m128i x1 = _mm_set1_epi32(1); unsigned int crc = 0; crc = _mm_crc32_u8(crc, 0); crc = _mm_crc32_u32(crc, 0); + x1 = _mm_clmulepi64_si128(x1, x1, 0x00); // pclmul + crc = crc + _mm_extract_epi32(x1, 1); /* return computed value, to prevent the above being optimized away */ return crc == 0; } diff --git a/src/port/pg_crc32c_sse42.c b/src/port/pg_crc32c_sse42.c index 7250eccf6b..05b11b47cb 100644 --- a/src/port/pg_crc32c_sse42.c +++ b/src/port/pg_crc32c_sse42.c @@ -3,6 +3,10 @@ * pg_crc32c_sse42.c * Compute CRC-32C checksum using Intel SSE 4.2 instructions. * + * For longer inputs, we use carryless multiplication on SIMD registers, + * based on: "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ + * Instruction" V. Gopal, E. Ozturk, et al., 2009 + * * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/port/pg_crc32c_sse42_choose.c b/src/port/pg_crc32c_sse42_choose.c index 65dbc4d424..95cfe63493 100644 --- a/src/port/pg_crc32c_sse42_choose.c +++ b/src/port/pg_crc32c_sse42_choose.c @@ -31,7 +31,7 @@ #include "port/pg_crc32c.h" static bool -pg_crc32c_sse42_available(void) +pg_crc32c_sse42_pclmul_available(void) { unsigned int exx[4] = {0, 0, 0, 0}; @@ -43,7 +43,10 @@ pg_crc32c_sse42_available(void) #error cpuid instruction not available #endif - return (exx[2] & (1 << 20)) != 0; /* SSE 4.2 */ + bool sse42 = (exx[2] & (1 << 20)) != 0; /* SSE 4.2 */ + bool pclmul = (exx[2] & (1 << 1)) != 0; /* PCLMULQDQ */ + + return sse42 && pclmul; } /* @@ -53,7 +56,7 @@ pg_crc32c_sse42_available(void) static pg_crc32c pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len) { - if (pg_crc32c_sse42_available()) + if (pg_crc32c_sse42_pclmul_available()) pg_comp_crc32c = pg_comp_crc32c_sse42; else pg_comp_crc32c = pg_comp_crc32c_sb8; -- 2.48.1
From a09e918bab5b6aac134c28bebd4b6f60ed05bfc9 Mon Sep 17 00:00:00 2001 From: John Naylor <john.nay...@postgresql.org> Date: Wed, 12 Feb 2025 16:03:52 +0700 Subject: [PATCH v4 4/5] Run pgindent XXX Some lines are still really long --- src/port/pg_crc32c_sse42.c | 95 +++++++++++++++++++++----------------- 1 file changed, 53 insertions(+), 42 deletions(-) diff --git a/src/port/pg_crc32c_sse42.c b/src/port/pg_crc32c_sse42.c index 3395617301..7250eccf6b 100644 --- a/src/port/pg_crc32c_sse42.c +++ b/src/port/pg_crc32c_sse42.c @@ -79,49 +79,60 @@ pg_comp_crc32c_sse42_tail(pg_crc32c crc, const void *data, size_t len) pg_attribute_target("sse4.2,pclmul") pg_crc32c -pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t length) { +pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t length) +{ /* adjust names to match generated code */ - pg_crc32c crc0 = crc; - size_t len = length; + pg_crc32c crc0 = crc; + size_t len = length; const unsigned char *buf = data; - if (len >= 128) { - /* First vector chunk. */ - __m128i x0 = _mm_loadu_si128((const __m128i*)buf), y0; - __m128i x1 = _mm_loadu_si128((const __m128i*)(buf + 16)), y1; - __m128i x2 = _mm_loadu_si128((const __m128i*)(buf + 32)), y2; - __m128i x3 = _mm_loadu_si128((const __m128i*)(buf + 48)), y3; - __m128i k; - k = _mm_setr_epi32(0x740eef02, 0, 0x9e4addf8, 0); - x0 = _mm_xor_si128(_mm_cvtsi32_si128(crc0), x0); - buf += 64; - len -= 64; - /* Main loop. */ - while (len >= 64) { - y0 = clmul_lo(x0, k), x0 = clmul_hi(x0, k); - y1 = clmul_lo(x1, k), x1 = clmul_hi(x1, k); - y2 = clmul_lo(x2, k), x2 = clmul_hi(x2, k); - y3 = clmul_lo(x3, k), x3 = clmul_hi(x3, k); - y0 = _mm_xor_si128(y0, _mm_loadu_si128((const __m128i*)buf)), x0 = _mm_xor_si128(x0, y0); - y1 = _mm_xor_si128(y1, _mm_loadu_si128((const __m128i*)(buf + 16))), x1 = _mm_xor_si128(x1, y1); - y2 = _mm_xor_si128(y2, _mm_loadu_si128((const __m128i*)(buf + 32))), x2 = _mm_xor_si128(x2, y2); - y3 = _mm_xor_si128(y3, _mm_loadu_si128((const __m128i*)(buf + 48))), x3 = _mm_xor_si128(x3, y3); - buf += 64; - len -= 64; - } - /* Reduce x0 ... x3 to just x0. */ - k = _mm_setr_epi32(0xf20c0dfe, 0, 0x493c7d27, 0); - y0 = clmul_lo(x0, k), x0 = clmul_hi(x0, k); - y2 = clmul_lo(x2, k), x2 = clmul_hi(x2, k); - y0 = _mm_xor_si128(y0, x1), x0 = _mm_xor_si128(x0, y0); - y2 = _mm_xor_si128(y2, x3), x2 = _mm_xor_si128(x2, y2); - k = _mm_setr_epi32(0x3da6d0cb, 0, 0xba4fc28e, 0); - y0 = clmul_lo(x0, k), x0 = clmul_hi(x0, k); - y0 = _mm_xor_si128(y0, x2), x0 = _mm_xor_si128(x0, y0); - /* Reduce 128 bits to 32 bits, and multiply by x^32. */ - crc0 = _mm_crc32_u64(0, _mm_extract_epi64(x0, 0)); - crc0 = _mm_crc32_u64(crc0, _mm_extract_epi64(x0, 1)); - } - - return pg_comp_crc32c_sse42_tail(crc0, buf, len); + if (len >= 128) + { + /* First vector chunk. */ + __m128i x0 = _mm_loadu_si128((const __m128i *) buf), + y0; + __m128i x1 = _mm_loadu_si128((const __m128i *) (buf + 16)), + y1; + __m128i x2 = _mm_loadu_si128((const __m128i *) (buf + 32)), + y2; + __m128i x3 = _mm_loadu_si128((const __m128i *) (buf + 48)), + y3; + __m128i k; + + k = _mm_setr_epi32(0x740eef02, 0, 0x9e4addf8, 0); + x0 = _mm_xor_si128(_mm_cvtsi32_si128(crc0), x0); + buf += 64; + len -= 64; + + /* Main loop. */ + while (len >= 64) + { + y0 = clmul_lo(x0, k), x0 = clmul_hi(x0, k); + y1 = clmul_lo(x1, k), x1 = clmul_hi(x1, k); + y2 = clmul_lo(x2, k), x2 = clmul_hi(x2, k); + y3 = clmul_lo(x3, k), x3 = clmul_hi(x3, k); + y0 = _mm_xor_si128(y0, _mm_loadu_si128((const __m128i *) buf)), x0 = _mm_xor_si128(x0, y0); + y1 = _mm_xor_si128(y1, _mm_loadu_si128((const __m128i *) (buf + 16))), x1 = _mm_xor_si128(x1, y1); + y2 = _mm_xor_si128(y2, _mm_loadu_si128((const __m128i *) (buf + 32))), x2 = _mm_xor_si128(x2, y2); + y3 = _mm_xor_si128(y3, _mm_loadu_si128((const __m128i *) (buf + 48))), x3 = _mm_xor_si128(x3, y3); + buf += 64; + len -= 64; + } + + /* Reduce x0 ... x3 to just x0. */ + k = _mm_setr_epi32(0xf20c0dfe, 0, 0x493c7d27, 0); + y0 = clmul_lo(x0, k), x0 = clmul_hi(x0, k); + y2 = clmul_lo(x2, k), x2 = clmul_hi(x2, k); + y0 = _mm_xor_si128(y0, x1), x0 = _mm_xor_si128(x0, y0); + y2 = _mm_xor_si128(y2, x3), x2 = _mm_xor_si128(x2, y2); + k = _mm_setr_epi32(0x3da6d0cb, 0, 0xba4fc28e, 0); + y0 = clmul_lo(x0, k), x0 = clmul_hi(x0, k); + y0 = _mm_xor_si128(y0, x2), x0 = _mm_xor_si128(x0, y0); + + /* Reduce 128 bits to 32 bits, and multiply by x^32. */ + crc0 = _mm_crc32_u64(0, _mm_extract_epi64(x0, 0)); + crc0 = _mm_crc32_u64(crc0, _mm_extract_epi64(x0, 1)); + } + + return pg_comp_crc32c_sse42_tail(crc0, buf, len); } -- 2.48.1
From 543752f816e3f9f0e312dac2be14fabb7c56101e Mon Sep 17 00:00:00 2001 From: John Naylor <john.nay...@postgresql.org> Date: Wed, 12 Feb 2025 15:27:27 +0700 Subject: [PATCH v4 3/5] Adjust previous commit to match our style, add 128-byte threshold --- src/port/pg_crc32c_sse42.c | 48 +++++++++++--------------------------- 1 file changed, 14 insertions(+), 34 deletions(-) diff --git a/src/port/pg_crc32c_sse42.c b/src/port/pg_crc32c_sse42.c index 6cc39de175..3395617301 100644 --- a/src/port/pg_crc32c_sse42.c +++ b/src/port/pg_crc32c_sse42.c @@ -15,13 +15,14 @@ #include "c.h" #include <nmmintrin.h> +#include <wmmintrin.h> #include "port/pg_crc32c.h" pg_attribute_no_sanitize_alignment() pg_attribute_target("sse4.2") -pg_crc32c -pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len) +static pg_crc32c +pg_comp_crc32c_sse42_tail(pg_crc32c crc, const void *data, size_t len) { const unsigned char *p = data; const unsigned char *pend = p + len; @@ -73,34 +74,18 @@ pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len) /* ./generate -i sse -p crc32c -a v4 */ /* MIT licensed */ -#include <stddef.h> -#include <stdint.h> -#include <nmmintrin.h> -#include <wmmintrin.h> - -#if defined(_MSC_VER) -#define CRC_AINLINE static __forceinline -#define CRC_ALIGN(n) __declspec(align(n)) -#else -#define CRC_AINLINE static __inline __attribute__((always_inline)) -#define CRC_ALIGN(n) __attribute__((aligned(n))) -#endif -#define CRC_EXPORT extern - #define clmul_lo(a, b) (_mm_clmulepi64_si128((a), (b), 0)) #define clmul_hi(a, b) (_mm_clmulepi64_si128((a), (b), 17)) -CRC_EXPORT uint32_t crc32_impl(uint32_t crc0, const char* buf, size_t len) { - crc0 = ~crc0; - for (; len && ((uintptr_t)buf & 7); --len) { - crc0 = _mm_crc32_u8(crc0, *buf++); - } - if (((uintptr_t)buf & 8) && len >= 8) { - crc0 = _mm_crc32_u64(crc0, *(const uint64_t*)buf); - buf += 8; - len -= 8; - } - if (len >= 64) { +pg_attribute_target("sse4.2,pclmul") +pg_crc32c +pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t length) { + /* adjust names to match generated code */ + pg_crc32c crc0 = crc; + size_t len = length; + const unsigned char *buf = data; + + if (len >= 128) { /* First vector chunk. */ __m128i x0 = _mm_loadu_si128((const __m128i*)buf), y0; __m128i x1 = _mm_loadu_si128((const __m128i*)(buf + 16)), y1; @@ -137,11 +122,6 @@ CRC_EXPORT uint32_t crc32_impl(uint32_t crc0, const char* buf, size_t len) { crc0 = _mm_crc32_u64(0, _mm_extract_epi64(x0, 0)); crc0 = _mm_crc32_u64(crc0, _mm_extract_epi64(x0, 1)); } - for (; len >= 8; buf += 8, len -= 8) { - crc0 = _mm_crc32_u64(crc0, *(const uint64_t*)buf); - } - for (; len; --len) { - crc0 = _mm_crc32_u8(crc0, *buf++); - } - return ~crc0; + + return pg_comp_crc32c_sse42_tail(crc0, buf, len); } -- 2.48.1
From 3a27b748ec17feff4547d7ab2689d80ba6d55665 Mon Sep 17 00:00:00 2001 From: Paul Amonson <paul.d.amon...@intel.com> Date: Mon, 6 May 2024 08:34:17 -0700 Subject: [PATCH v4 1/5] Add a Postgres SQL function for crc32c benchmarking Add a drive_crc32c() function to use for benchmarking crc32c computation. The function takes 2 arguments: (1) count: num of times CRC32C is computed in a loop. (2) num: #bytes in the buffer to calculate crc over. XXX not for commit Extracted from a patch by Raghuveer Devulapalli --- contrib/meson.build | 1 + contrib/test_crc32c/Makefile | 20 +++++++ contrib/test_crc32c/expected/test_crc32c.out | 57 ++++++++++++++++++++ contrib/test_crc32c/meson.build | 34 ++++++++++++ contrib/test_crc32c/sql/test_crc32c.sql | 3 ++ contrib/test_crc32c/test_crc32c--1.0.sql | 1 + contrib/test_crc32c/test_crc32c.c | 47 ++++++++++++++++ contrib/test_crc32c/test_crc32c.control | 4 ++ 8 files changed, 167 insertions(+) create mode 100644 contrib/test_crc32c/Makefile create mode 100644 contrib/test_crc32c/expected/test_crc32c.out create mode 100644 contrib/test_crc32c/meson.build create mode 100644 contrib/test_crc32c/sql/test_crc32c.sql create mode 100644 contrib/test_crc32c/test_crc32c--1.0.sql create mode 100644 contrib/test_crc32c/test_crc32c.c create mode 100644 contrib/test_crc32c/test_crc32c.control diff --git a/contrib/meson.build b/contrib/meson.build index 1ba73ebd67..06673db062 100644 --- a/contrib/meson.build +++ b/contrib/meson.build @@ -12,6 +12,7 @@ contrib_doc_args = { 'install_dir': contrib_doc_dir, } +subdir('test_crc32c') subdir('amcheck') subdir('auth_delay') subdir('auto_explain') diff --git a/contrib/test_crc32c/Makefile b/contrib/test_crc32c/Makefile new file mode 100644 index 0000000000..5b747c6184 --- /dev/null +++ b/contrib/test_crc32c/Makefile @@ -0,0 +1,20 @@ +MODULE_big = test_crc32c +OBJS = test_crc32c.o +PGFILEDESC = "test" +EXTENSION = test_crc32c +DATA = test_crc32c--1.0.sql + +first: all + +# test_crc32c.o: CFLAGS+=-g + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = src/test/modules/test_crc32c +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/contrib/test_crc32c/expected/test_crc32c.out b/contrib/test_crc32c/expected/test_crc32c.out new file mode 100644 index 0000000000..dff6bb3133 --- /dev/null +++ b/contrib/test_crc32c/expected/test_crc32c.out @@ -0,0 +1,57 @@ +CREATE EXTENSION test_crc32c; +select drive_crc32c(1, i) from generate_series(100, 300, 4) i; + drive_crc32c +-------------- + 532139994 + 2103623867 + 785984197 + 2686825890 + 3213049059 + 3819630168 + 1389234603 + 534072900 + 2930108140 + 2496889855 + 1475239611 + 136366931 + 3067402116 + 2012717871 + 3682416023 + 2054270645 + 1817339875 + 4100939569 + 1192727539 + 3636976218 + 369764421 + 3161609879 + 1067984880 + 1235066769 + 3138425899 + 648132037 + 4203750233 + 1330187888 + 2683521348 + 1951644495 + 2574090107 + 3904902018 + 3772697795 + 1644686344 + 2868962106 + 3369218491 + 3902689890 + 3456411865 + 141004025 + 1504497996 + 3782655204 + 3544797610 + 3429174879 + 2524728016 + 3935861181 + 25498897 + 692684159 + 345705535 + 2761600287 + 2654632420 + 3945991399 +(51 rows) + diff --git a/contrib/test_crc32c/meson.build b/contrib/test_crc32c/meson.build new file mode 100644 index 0000000000..d7bec4ba1c --- /dev/null +++ b/contrib/test_crc32c/meson.build @@ -0,0 +1,34 @@ +# Copyright (c) 2022-2024, PostgreSQL Global Development Group + +test_crc32c_sources = files( + 'test_crc32c.c', +) + +if host_system == 'windows' + test_crc32c_sources += rc_lib_gen.process(win32ver_rc, extra_args: [ + '--NAME', 'test_crc32c', + '--FILEDESC', 'test_crc32c - test code for crc32c library',]) +endif + +test_crc32c = shared_module('test_crc32c', + test_crc32c_sources, + kwargs: contrib_mod_args, +) +contrib_targets += test_crc32c + +install_data( + 'test_crc32c.control', + 'test_crc32c--1.0.sql', + kwargs: contrib_data_args, +) + +tests += { + 'name': 'test_crc32c', + 'sd': meson.current_source_dir(), + 'bd': meson.current_build_dir(), + 'regress': { + 'sql': [ + 'test_crc32c', + ], + }, +} diff --git a/contrib/test_crc32c/sql/test_crc32c.sql b/contrib/test_crc32c/sql/test_crc32c.sql new file mode 100644 index 0000000000..95c6dfe448 --- /dev/null +++ b/contrib/test_crc32c/sql/test_crc32c.sql @@ -0,0 +1,3 @@ +CREATE EXTENSION test_crc32c; + +select drive_crc32c(1, i) from generate_series(100, 300, 4) i; diff --git a/contrib/test_crc32c/test_crc32c--1.0.sql b/contrib/test_crc32c/test_crc32c--1.0.sql new file mode 100644 index 0000000000..52b9772f90 --- /dev/null +++ b/contrib/test_crc32c/test_crc32c--1.0.sql @@ -0,0 +1 @@ +CREATE FUNCTION drive_crc32c (count int, num int) RETURNS bigint AS 'MODULE_PATHNAME' LANGUAGE C; diff --git a/contrib/test_crc32c/test_crc32c.c b/contrib/test_crc32c/test_crc32c.c new file mode 100644 index 0000000000..b350caf5ce --- /dev/null +++ b/contrib/test_crc32c/test_crc32c.c @@ -0,0 +1,47 @@ +/* select drive_crc32c(1000000, 1024); */ + +#include "postgres.h" +#include "fmgr.h" +#include "port/pg_crc32c.h" +#include "common/pg_prng.h" + +PG_MODULE_MAGIC; + +/* + * drive_crc32c(count: int, num: int) returns bigint + * + * count is the nuimber of loops to perform + * + * num is the number byte in the buffer to calculate + * crc32c over. + */ +PG_FUNCTION_INFO_V1(drive_crc32c); +Datum +drive_crc32c(PG_FUNCTION_ARGS) +{ + int64 count = PG_GETARG_INT64(0); + int64 num = PG_GETARG_INT64(1); + char* data = malloc((size_t)num); + pg_crc32c crc; + pg_prng_state state; + uint64 seed = 42; + pg_prng_seed(&state, seed); + /* set random data */ + for (uint64 i = 0; i < num; i++) + { + data[i] = pg_prng_uint32(&state) % 255; + } + + INIT_CRC32C(crc); + + while(count--) + { + INIT_CRC32C(crc); + COMP_CRC32C(crc, data, num); + FIN_CRC32C(crc); + } + + free((void *)data); + + PG_RETURN_INT64((int64_t)crc); +} diff --git a/contrib/test_crc32c/test_crc32c.control b/contrib/test_crc32c/test_crc32c.control new file mode 100644 index 0000000000..878a077ee1 --- /dev/null +++ b/contrib/test_crc32c/test_crc32c.control @@ -0,0 +1,4 @@ +comment = 'test' +default_version = '1.0' +module_pathname = '$libdir/test_crc32c' +relocatable = true -- 2.48.1