Hi Jerin, > -----Original Message----- > From: Jerin Jacob Kollanukkaran <jer...@marvell.com> > Sent: Monday, February 11, 2019 15:49 > To: Ruifeng Wang (Arm Technology China) <ruifeng.w...@arm.com>; > yipeng1.w...@intel.com > Cc: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com>; nd > <n...@arm.com>; dev@dpdk.org > Subject: Re: [EXT] [PATCH v1] hash: optimize signature compare by using > neon intrinsic > > On Mon, 2019-02-11 at 15:30 +0800, Ruifeng Wang wrote: > > > > ------------------------------------------------------------------- > > --- > > Implemented signature compare function based on neon intrinsic. > > Hash bulk lookup had 3% - 6% performance gain after optimization. > > > > Signed-off-by: Ruifeng Wang <ruifeng.w...@arm.com> > > --- > > lib/librte_hash/rte_cuckoo_hash.c | 32 > > ++++++++++++++++++++++++++++++- > > lib/librte_hash/rte_cuckoo_hash.h | 1 + > > 2 files changed, 32 insertions(+), 1 deletion(-) > > > > diff --git a/lib/librte_hash/rte_cuckoo_hash.c > > b/lib/librte_hash/rte_cuckoo_hash.c > > index c01489ba5..5745a254f 100644 > > --- a/lib/librte_hash/rte_cuckoo_hash.c > > +++ b/lib/librte_hash/rte_cuckoo_hash.c > > @@ -26,6 +26,9 @@ > > #include <rte_spinlock.h> > > #include <rte_ring.h> > > #include <rte_compat.h> > > +#if defined(RTE_ARCH_ARM64) > > +#include <arm_neon.h> > > +#endif > > > The use of rte_vector.h will remove the need for #if defined... >
OK. Will change to rte_vect.h in next version. > > > > > #include "rte_hash.h" > > #include "rte_cuckoo_hash.h" > > @@ -407,6 +410,10 @@ rte_hash_create(const struct > rte_hash_parameters > > *params) > > if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2)) > > h->sig_cmp_fn = RTE_HASH_COMPARE_SSE; > > else > > +#elif defined(RTE_ARCH_ARM64) > > + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) > > + h->sig_cmp_fn = RTE_HASH_COMPARE_NEON; > > + else > > #endif > > h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR; > > > > @@ -1578,10 +1585,15 @@ compare_signatures(uint32_t > > *prim_hash_matches, uint32_t *sec_hash_matches, > > enum rte_hash_sig_compare_function sig_cmp_fn) > { > > unsigned int i; > > +#ifdef RTE_MACHINE_CPUFLAG_NEON > > + uint16x8_t vmat, vsig, x; > > + uint64x2_t x64; > > + int16x8_t shift = {-15, -13, -11, -9, -7, -5, -3, -1}; #endif > > Is it possible move down the variable declaration? to avoid the need for > #ifdef here > Will move down the variable declaration into 'case' block. > > > > /* For match mask the first bit of every two bits indicates the > > match */ > > switch (sig_cmp_fn) { > > -#ifdef RTE_MACHINE_CPUFLAG_SSE2 > > +#if defined(RTE_MACHINE_CPUFLAG_SSE2) > > case RTE_HASH_COMPARE_SSE: > > /* Compare all signatures in the bucket */ > > *prim_hash_matches = > _mm_movemask_epi8(_mm_cmpeq_epi16( > > @@ -1594,6 +1606,24 @@ compare_signatures(uint32_t > *prim_hash_matches, > > uint32_t *sec_hash_matches, > > (__m128i const *)sec_bkt- > > >sig_current), > > _mm_set1_epi16(sig))); > > break; > > +#elif defined(RTE_MACHINE_CPUFLAG_NEON) > > + case RTE_HASH_COMPARE_NEON: > > + vsig = vld1q_dup_u16((uint16_t const *)&sig); > > + /* Compare all signatures in the primary bucket */ > > + vmat = vceqq_u16(vsig, > > + vld1q_u16((uint16_t const *)prim_bkt- > > >sig_current)); > > + x = vshlq_u16(vandq_u16(vmat, vdupq_n_u16(0x8000)), > > shift); > > + x64 = vpaddlq_u32(vpaddlq_u16(x)); > > + *prim_hash_matches = (uint32_t)(vgetq_lane_u64(x64, 0) > > + > > + vgetq_lane_u64(x64, 1)); > > + /* Compare all signatures in the secondary bucket */ > > + vmat = vceqq_u16(vsig, > > + vld1q_u16((uint16_t const *)sec_bkt- > > >sig_current)); > > + x = vshlq_u16(vandq_u16(vmat, vdupq_n_u16(0x8000)), > > shift); > > + x64 = vpaddlq_u32(vpaddlq_u16(x)); > > + *sec_hash_matches = (uint32_t)(vgetq_lane_u64(x64, 0) + > > + vgetq_lane_u64(x64, 1)); > > + break; > > #endif > > default: > > for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { diff --git > > a/lib/librte_hash/rte_cuckoo_hash.h > > b/lib/librte_hash/rte_cuckoo_hash.h > > index eacdaa8d4..0548c97f0 100644 > > --- a/lib/librte_hash/rte_cuckoo_hash.h > > +++ b/lib/librte_hash/rte_cuckoo_hash.h > > @@ -141,6 +141,7 @@ struct rte_hash_key { enum > > rte_hash_sig_compare_function { > > RTE_HASH_COMPARE_SCALAR = 0, > > RTE_HASH_COMPARE_SSE, > > + RTE_HASH_COMPARE_NEON, > > RTE_HASH_COMPARE_NUM > > }; > >