Using carryless multiply instructions from RISC-V's Zbc extension, implement a Barrett reduction that calculates CRC-32C checksums.
Based on the approach described by Intel's whitepaper on "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction", which is also described here (https://web.archive.org/web/20240111232520/https://mary.rs/lab/crc32/) Signed-off-by: Daniel Gregory <daniel.greg...@bytedance.com> --- MAINTAINERS | 1 + app/test/test_hash.c | 7 +++ lib/hash/meson.build | 1 + lib/hash/rte_crc_riscv64.h | 89 ++++++++++++++++++++++++++++++++++++++ lib/hash/rte_hash_crc.c | 12 ++++- lib/hash/rte_hash_crc.h | 6 ++- 6 files changed, 114 insertions(+), 2 deletions(-) create mode 100644 lib/hash/rte_crc_riscv64.h diff --git a/MAINTAINERS b/MAINTAINERS index 472713124c..48800f39c4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -318,6 +318,7 @@ M: Stanislaw Kardach <stanislaw.kard...@gmail.com> F: config/riscv/ F: doc/guides/linux_gsg/cross_build_dpdk_for_riscv.rst F: lib/eal/riscv/ +F: lib/hash/rte_crc_riscv64.h Intel x86 M: Bruce Richardson <bruce.richard...@intel.com> diff --git a/app/test/test_hash.c b/app/test/test_hash.c index 24d3b547ad..c8c4197ad8 100644 --- a/app/test/test_hash.c +++ b/app/test/test_hash.c @@ -205,6 +205,13 @@ test_crc32_hash_alg_equiv(void) printf("Failed checking CRC32_SW against CRC32_ARM64\n"); break; } + + /* Check against 8-byte-operand RISCV64 CRC32 if available */ + rte_hash_crc_set_alg(CRC32_RISCV64); + if (hash_val != rte_hash_crc(data64, data_len, init_val)) { + printf("Failed checking CRC32_SW against CRC32_RISC64\n"); + break; + } } /* Resetting to best available algorithm */ diff --git a/lib/hash/meson.build b/lib/hash/meson.build index 277eb9fa93..8355869a80 100644 --- a/lib/hash/meson.build +++ b/lib/hash/meson.build @@ -12,6 +12,7 @@ headers = files( indirect_headers += files( 'rte_crc_arm64.h', 'rte_crc_generic.h', + 'rte_crc_riscv64.h', 'rte_crc_sw.h', 'rte_crc_x86.h', 'rte_thash_x86_gfni.h', diff --git a/lib/hash/rte_crc_riscv64.h b/lib/hash/rte_crc_riscv64.h new file mode 100644 index 0000000000..94f6857c69 --- /dev/null +++ b/lib/hash/rte_crc_riscv64.h @@ -0,0 +1,89 @@ +/* SPDX-License_Identifier: BSD-3-Clause + * Copyright(c) ByteDance 2024 + */ + +#include <assert.h> +#include <stdint.h> + +#include <riscv_bitmanip.h> + +#ifndef _RTE_CRC_RISCV64_H_ +#define _RTE_CRC_RISCV64_H_ + +/* + * CRC-32C takes a reflected input (bit 7 is the lsb) and produces a reflected + * output. As reflecting the value we're checksumming is expensive, we instead + * reflect the polynomial P (0x11EDC6F41) and mu and our CRC32 algorithm. + * + * The mu constant is used for a Barrett reduction. It's 2^96 / P (0x11F91CAF6) + * reflected. Picking 2^96 rather than 2^64 means we can calculate a 64-bit crc + * using only two multiplications (https://mary.rs/lab/crc32/) + */ +static const uint64_t p = 0x105EC76F1; +static const uint64_t mu = 0x4869EC38DEA713F1UL; + +/* Calculate the CRC32C checksum using a Barrett reduction */ +static inline uint32_t +crc32c_riscv64(uint64_t data, uint32_t init_val, uint32_t bits) +{ + assert((bits == 64) || (bits == 32) || (bits == 16) || (bits == 8)); + + /* Combine data with the initial value */ + uint64_t crc = (uint64_t)(data ^ init_val) << (64 - bits); + + /* + * Multiply by mu, which is 2^96 / P. Division by 2^96 occurs by taking + * the lower 64 bits of the result (remember we're inverted) + */ + crc = __riscv_clmul_64(crc, mu); + /* Multiply by P */ + crc = __riscv_clmulh_64(crc, p); + + /* Subtract from original (only needed for smaller sizes) */ + if (bits == 16 || bits == 8) + crc ^= init_val >> bits; + + return crc; +} + +/* + * Use carryless multiply to perform hash on a value, falling back on the + * software in case the Zbc extension is not supported + */ +static inline uint32_t +rte_hash_crc_1byte(uint8_t data, uint32_t init_val) +{ + if (likely(rte_hash_crc32_alg & CRC32_RISCV64)) + return crc32c_riscv64(data, init_val, 8); + + return crc32c_1byte(data, init_val); +} + +static inline uint32_t +rte_hash_crc_2byte(uint16_t data, uint32_t init_val) +{ + if (likely(rte_hash_crc32_alg & CRC32_RISCV64)) + return crc32c_riscv64(data, init_val, 16); + + return crc32c_2bytes(data, init_val); +} + +static inline uint32_t +rte_hash_crc_4byte(uint32_t data, uint32_t init_val) +{ + if (likely(rte_hash_crc32_alg & CRC32_RISCV64)) + return crc32c_riscv64(data, init_val, 32); + + return crc32c_1word(data, init_val); +} + +static inline uint32_t +rte_hash_crc_8byte(uint64_t data, uint32_t init_val) +{ + if (likely(rte_hash_crc32_alg & CRC32_RISCV64)) + return crc32c_riscv64(data, init_val, 64); + + return crc32c_2words(data, init_val); +} + +#endif /* _RTE_CRC_RISCV64_H_ */ diff --git a/lib/hash/rte_hash_crc.c b/lib/hash/rte_hash_crc.c index c037cdb0f0..ece1a84b29 100644 --- a/lib/hash/rte_hash_crc.c +++ b/lib/hash/rte_hash_crc.c @@ -15,7 +15,7 @@ RTE_LOG_REGISTER_SUFFIX(hash_crc_logtype, crc, INFO); uint8_t rte_hash_crc32_alg = CRC32_SW; /** - * Allow or disallow use of SSE4.2/ARMv8 intrinsics for CRC32 hash + * Allow or disallow use of SSE4.2/ARMv8/RISC-V intrinsics for CRC32 hash * calculation. * * @param alg @@ -24,6 +24,7 @@ uint8_t rte_hash_crc32_alg = CRC32_SW; * - (CRC32_SSE42) Use SSE4.2 intrinsics if available * - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default x86) * - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available (default ARMv8) + * - (CRC32_RISCV64) Use RISCV64 Zbc extension if available * */ void @@ -52,6 +53,13 @@ rte_hash_crc_set_alg(uint8_t alg) rte_hash_crc32_alg = CRC32_ARM64; #endif +#if defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_ZBC) + if (!(alg & CRC32_RISCV64)) + HASH_CRC_LOG(WARNING, + "Unsupported CRC32 algorithm requested using CRC32_RISCV64"); + rte_hash_crc32_alg = CRC32_RISCV64; +#endif + if (rte_hash_crc32_alg == CRC32_SW) HASH_CRC_LOG(WARNING, "Unsupported CRC32 algorithm requested using CRC32_SW"); @@ -64,6 +72,8 @@ RTE_INIT(rte_hash_crc_init_alg) rte_hash_crc_set_alg(CRC32_SSE42_x64); #elif defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_CRC32) rte_hash_crc_set_alg(CRC32_ARM64); +#elif defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_ZBC) + rte_hash_crc_set_alg(CRC32_RISCV64); #else rte_hash_crc_set_alg(CRC32_SW); #endif diff --git a/lib/hash/rte_hash_crc.h b/lib/hash/rte_hash_crc.h index 8ad2422ec3..2be433fa21 100644 --- a/lib/hash/rte_hash_crc.h +++ b/lib/hash/rte_hash_crc.h @@ -28,6 +28,7 @@ extern "C" { #define CRC32_x64 (1U << 2) #define CRC32_SSE42_x64 (CRC32_x64|CRC32_SSE42) #define CRC32_ARM64 (1U << 3) +#define CRC32_RISCV64 (1U << 4) extern uint8_t rte_hash_crc32_alg; @@ -35,12 +36,14 @@ extern uint8_t rte_hash_crc32_alg; #include "rte_crc_arm64.h" #elif defined(RTE_ARCH_X86) #include "rte_crc_x86.h" +#elif defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_ZBC) +#include "rte_crc_riscv64.h" #else #include "rte_crc_generic.h" #endif /** - * Allow or disallow use of SSE4.2/ARMv8 intrinsics for CRC32 hash + * Allow or disallow use of SSE4.2/ARMv8/RISC-V intrinsics for CRC32 hash * calculation. * * @param alg @@ -49,6 +52,7 @@ extern uint8_t rte_hash_crc32_alg; * - (CRC32_SSE42) Use SSE4.2 intrinsics if available * - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default x86) * - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available (default ARMv8) + * - (CRC32_RISCV64) Use RISC-V Carry-less multiply if available (default rv64gc_zbc) */ void rte_hash_crc_set_alg(uint8_t alg); -- 2.39.2