Implement EFD lookup internal function for RISC-V architecture using RISC-V Vector Extension instruction set
Performance results (EFD perf autotest) These performance results are the average of five test runs. Results (in CPU cycles/operation): Before optimization: Keysize Add Lookup Lookup_bulk Delete 4 136.0 5.6 5.0 25.6 8 149.4 6.0 6.0 31.4 16 156.8 8.0 7.0 36.8 32 179.2 13.0 14.0 45.4 48 205.8 13.0 15.0 45.6 64 214.4 16.0 18.4 46.6 9 152.8 6.2 6.0 30.0 13 160.6 7.2 7.0 35.0 37 191.2 12.0 13.0 44.2 40 195.0 12.0 13.6 43.2 After optimization: Keysize Add Lookup Lookup_bulk Delete 4 131.6 5.0 5.0 26.6 8 143.6 6.0 6.2 33.0 16 153.2 7.0 7.0 35.8 32 173.6 14.0 14.0 45.0 48 201.6 12.6 14.0 46.2 64 211.4 15.0 16.0 46.8 9 146.6 6.0 6.0 30.6 13 154.8 7.0 7.0 35.0 37 189.2 11.0 12.2 45.4 40 191.8 11.0 12.4 43.8 This patch is co-developed with Gong Xiaofei. Signed-off-by: gong-flying <[email protected]> Signed-off-by: P1erreCashon <[email protected]> --- lib/efd/rte_efd.c | 20 +++++++++++++- lib/efd/rte_efd_rvv.h | 64 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 lib/efd/rte_efd_rvv.h diff --git a/lib/efd/rte_efd.c b/lib/efd/rte_efd.c index 4dfc0ec9a4..053ee4f232 100644 --- a/lib/efd/rte_efd.c +++ b/lib/efd/rte_efd.c @@ -29,6 +29,9 @@ #include "rte_efd_x86.h" #elif defined(RTE_ARCH_ARM64) #include "rte_efd_arm64.h" +#elif defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_V) +#include <riscv_vector.h> +#include "rte_efd_rvv.h" #endif RTE_LOG_REGISTER_DEFAULT(efd_logtype, INFO); @@ -85,7 +88,8 @@ enum efd_lookup_internal_function { EFD_LOOKUP_SCALAR = 0, EFD_LOOKUP_AVX2, EFD_LOOKUP_NEON, - EFD_LOOKUP_NUM + EFD_LOOKUP_NUM, + EFD_LOOKUP_RVV }; TAILQ_HEAD(rte_efd_list, rte_tailq_entry); @@ -675,6 +679,12 @@ rte_efd_create(const char *name, uint32_t max_num_rules, uint32_t key_len, rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) table->lookup_fn = EFD_LOOKUP_NEON; else +#endif +#if defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_V) + if (RTE_EFD_VALUE_NUM_BITS > 16 && + __riscv_vsetvl_e32m1(4) == 4) + table->lookup_fn = EFD_LOOKUP_RVV; + else #endif table->lookup_fn = EFD_LOOKUP_SCALAR; @@ -1305,6 +1315,14 @@ efd_lookup_internal(const struct efd_online_group_entry * const group, hash_val_a, hash_val_b); break; +#endif +#if defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_V) + case EFD_LOOKUP_RVV: + return efd_lookup_internal_rvv(group->hash_idx, + group->lookup_table, + hash_val_a, + hash_val_b); + break; #endif case EFD_LOOKUP_SCALAR: /* Fall-through */ diff --git a/lib/efd/rte_efd_rvv.h b/lib/efd/rte_efd_rvv.h new file mode 100644 index 0000000000..e8ea6e0343 --- /dev/null +++ b/lib/efd/rte_efd_rvv.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2026 Institute of Software Chinese Academy of Sciences (ISCAS). + */ + +#ifndef __RTE_EFD_RISCV_H__ +#define __RTE_EFD_RISCV_H__ + +#include <riscv_vector.h> + +static inline efd_value_t +efd_lookup_internal_rvv(const efd_hashfunc_t *group_hash_idx, + const efd_lookuptbl_t *group_lookup_table, + const uint32_t hash_val_a, const uint32_t hash_val_b) +{ + efd_value_t value = 0; + const uint32_t N = RTE_EFD_VALUE_NUM_BITS; + size_t vl = 4; + vuint32m1_t vhash_val_a = __riscv_vmv_v_x_u32m1(hash_val_a, vl); + vuint32m1_t vhash_val_b = __riscv_vmv_v_x_u32m1(hash_val_b, vl); + vuint32m1_t vshift = __riscv_vid_v_u32m1(vl); + vuint32m1_t vmask = __riscv_vmv_v_x_u32m1(0x1, vl); + vuint32m1_t vincr = __riscv_vmv_v_x_u32m1(0, vl); + for (unsigned int i = 0; i < N; i += vl) { + vuint16mf2_t vhash_idx16 = + __riscv_vle16_v_u16mf2( + (const uint16_t *)&group_hash_idx[i], vl); + + vuint32m1_t vhash_idx = + __riscv_vwcvtu_x_x_v_u32m1(vhash_idx16, vl); + + vuint16mf2_t vlookup16 = + __riscv_vle16_v_u16mf2( + (const uint16_t *)&group_lookup_table[i], vl); + + vuint32m1_t vlookup = + __riscv_vwcvtu_x_x_v_u32m1(vlookup16, vl); + + vuint32m1_t vhash = + __riscv_vmadd_vv_u32m1(vhash_idx, vhash_val_b, vhash_val_a, vl); + + vuint32m1_t vbucket = + __riscv_vsrl_vx_u32m1(vhash, EFD_LOOKUPTBL_SHIFT, vl); + + vuint32m1_t vresult = + __riscv_vsrl_vv_u32m1(vlookup, vbucket, vl); + + vresult = __riscv_vand_vv_u32m1(vresult, vmask, vl); + + vresult = __riscv_vsll_vv_u32m1(vresult, vshift, vl); + + vuint32m1_t vzero = __riscv_vmv_v_x_u32m1(0, vl); + + vuint32m1_t vsum = + __riscv_vredsum_vs_u32m1_u32m1(vresult, vzero, vl); + + value |= __riscv_vmv_x_s_u32m1_u32(vsum); + + vshift = __riscv_vadd_vv_u32m1(vshift, vincr, vl); + } + + return value; +} + +#endif /* __RTE_EFD_RISCV_H__ */ -- 2.43.0

