Hi, On 2018-01-10 05:58:19 +0000, Yuqi Gu wrote: > Currently PostgreSQL only implements hardware support for CRC32 checksums for > the x86_64 architecture. > Some ARMv8 (AArch64) CPUs implement the CRC32 extension which is implemented > by inline assembly, > so they can also benefit from hardware acceleration in IO-intensive workloads. > > I would like to propose the patch to optimize crc32c calculation with Arm64 > specific instructions. > The hardware-specific code implementation is used under #if defined > USE_ARMCE_CRC32C_WITH_RUNTIME_CHECK. > And the performance is improved on platforms: cortex-A57, cortex-A72, > cortex-A73, etc. > > I'll create a CommitFests ticket for this submission. > Any comments or feedback are welcome.
Could you show whether it actually improves performance? Usually bulk loading data with parallel COPYs is a good way to hit this codepath. > + > +AC_DEFUN([PGAC_ARM64CE_CRC32_INTRINSICS], > +[AC_CACHE_CHECK([for Arm64ce CRC32], [pgac_cv_arm64ce_crc32_intrinsics], > +[AC_LINK_IFELSE([AC_LANG_PROGRAM([], > + [unsigned int arm_flag = 0; > +#if defined(__ARM_ARCH) && (__ARM_ARCH > 7) > + arm_flag = 1; > +#endif > + return arm_flag == 1;])], > + [pgac_cv_arm64ce_crc32_intrinsics="yes"], > + [pgac_cv_arm64ce_crc32_intrinsics="no"])]) > +if test x"$pgac_cv_arm64ce_crc32_intrinsics" = x"yes"; then > + pgac_arm64ce_crc32_intrinsics=yes > +fi > +])# PGAC_ARM64CE_CRC32_INTRINSICS I don't understand what this check is supposed to be doing? AC_LINK_IFELSE doesn't run the program, so I don't see this test achieving anything at all? > * Use slicing-by-8 algorithm. > diff --git a/src/port/pg_crc32c_choose.c b/src/port/pg_crc32c_choose.c > index 40bee67..d3682ad 100644 > --- a/src/port/pg_crc32c_choose.c > +++ b/src/port/pg_crc32c_choose.c > @@ -29,6 +29,20 @@ > > #include "port/pg_crc32c.h" > > +#ifdef USE_ARMCE_CRC32C_WITH_RUNTIME_CHECK > +#include <sys/auxv.h> > +#include <asm/hwcap.h> > +#ifndef HWCAP_CRC32 > +#define HWCAP_CRC32 (1 << 7) > +#endif > +static bool > +pg_crc32c_arm64ce_available(void) { > + unsigned long auxv = getauxval(AT_HWCAP); > + return (auxv & HWCAP_CRC32) != 0; > +} > + > +#else What's the availability of these headers and functions on non-linux platforms? > +#if defined(USE_ARMCE_CRC32C_WITH_RUNTIME_CHECK) > +asm(".arch_extension crc"); So this emitted globally? > +#define LDP(x,y,p) asm("ldp %x[a], %x[b], [%x[c]], #16" : > [a]"=r"(x),[b]"=r"(y),[c]"+r"(p)) > +/* CRC32C: Castagnoli polynomial 0x1EDC6F41 */ > +#define CRC32CX(crc,value) asm("crc32cx %w[c], %w[c], %x[v]" : > [c]"+r"(*&crc) : [v]"r"(+value)) > +#define CRC32CW(crc,value) asm("crc32cw %w[c], %w[c], %w[v]" : > [c]"+r"(*&crc) : [v]"r"(+value)) > +#define CRC32CH(crc,value) asm("crc32ch %w[c], %w[c], %w[v]" : > [c]"+r"(*&crc) : [v]"r"(+value)) > +#define CRC32CB(crc,value) asm("crc32cb %w[c], %w[c], %w[v]" : > [c]"+r"(*&crc) : [v]"r"(+value)) > + > +pg_crc32c > +pg_comp_crc32c_arm64(pg_crc32c crc, const void* data, size_t len) { > + uint64 p0, p1; > + pg_crc32c crc32_c = crc; > + long length = len; > + const unsigned char *p_buf = data; > + > + /* Allow crc instructions in asm */ > + asm(".cpu generic+crc"); Hm, this switches it for the rest of the function, program, ...? Greetings, Andres Freund