From: Pavan Nikhilesh <pbhagavat...@marvell.com>
Use optimized rte_hash_k32_cmp_eq routine for key comparison for
x86 and ARM64.
Use CRC instructions for hash generation on ARM64.
Signed-off-by: Pavan Nikhilesh <pbhagavat...@marvell.com>
Reviewed-by: Ruifeng Wang <ruifeng.w...@arm.com>
---
On Neoverse-N2, performance improved by 10% when measured with
examples/ip_reassembly.
v5 Changes:
- Fix spellcheck.
v4 Changes:
- Fix compilation failures (sys/queue)
- Update test case to use proper macros.
v3 Changes:
- Drop NEON patch.
v2 Changes:
- Fix compilation failure with non ARM64/x86 targets
lib/hash/rte_cmp_arm64.h | 16 ++++++++--------
lib/hash/rte_cmp_x86.h | 16 ++++++++--------
lib/ip_frag/ip_frag_common.h | 14 ++++++++++++++
lib/ip_frag/ip_frag_internal.c | 4 ++--
4 files changed, 32 insertions(+), 18 deletions(-)
diff --git a/lib/hash/rte_cmp_arm64.h b/lib/hash/rte_cmp_arm64.h
index e9e26f9abd..a3e85635eb 100644
--- a/lib/hash/rte_cmp_arm64.h
+++ b/lib/hash/rte_cmp_arm64.h
@@ -3,7 +3,7 @@
*/
/* Functions to compare multiple of 16 byte keys (up to 128 bytes) */
-static int
+static inline int
rte_hash_k16_cmp_eq(const void *key1, const void *key2,
size_t key_len __rte_unused)
{
@@ -24,7 +24,7 @@ rte_hash_k16_cmp_eq(const void *key1, const void *key2,
return !(x0 == 0 && x1 == 0);
}
-static int
+static inline int
rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len)
{
return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
@@ -32,7 +32,7 @@ rte_hash_k32_cmp_eq(const void *key1, const void *key2,
size_t key_len)
(const char *) key2 + 16, key_len);
}
-static int
+static inline int
rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len)
{
return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
@@ -42,7 +42,7 @@ rte_hash_k48_cmp_eq(const void *key1, const void *key2,
size_t key_len)
(const char *) key2 + 32, key_len);
}
-static int
+static inline int
rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len)
{
return rte_hash_k32_cmp_eq(key1, key2, key_len) ||
@@ -50,7 +50,7 @@ rte_hash_k64_cmp_eq(const void *key1, const void *key2,
size_t key_len)
(const char *) key2 + 32, key_len);
}
-static int
+static inline int
rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len)
{
return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
@@ -58,7 +58,7 @@ rte_hash_k80_cmp_eq(const void *key1, const void *key2,
size_t key_len)
(const char *) key2 + 64, key_len);
}
-static int
+static inline int
rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len)
{
return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
@@ -66,7 +66,7 @@ rte_hash_k96_cmp_eq(const void *key1, const void *key2,
size_t key_len)
(const char *) key2 + 64, key_len);
}
-static int
+static inline int
rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len)
{
return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
@@ -76,7 +76,7 @@ rte_hash_k112_cmp_eq(const void *key1, const void *key2,
size_t key_len)
(const char *) key2 + 96, key_len);
}
-static int
+static inline int
rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len)
{
return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
diff --git a/lib/hash/rte_cmp_x86.h b/lib/hash/rte_cmp_x86.h
index 13a5836351..ddfbef462f 100644
--- a/lib/hash/rte_cmp_x86.h
+++ b/lib/hash/rte_cmp_x86.h
@@ -5,7 +5,7 @@
#include <rte_vect.h>
/* Functions to compare multiple of 16 byte keys (up to 128 bytes) */
-static int
+static inline int
rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len
__rte_unused)
{
const __m128i k1 = _mm_loadu_si128((const __m128i *) key1);
@@ -15,7 +15,7 @@ rte_hash_k16_cmp_eq(const void *key1, const void *key2,
size_t key_len __rte_unu
return !_mm_test_all_zeros(x, x);
}
-static int
+static inline int
rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len)
{
return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
@@ -23,7 +23,7 @@ rte_hash_k32_cmp_eq(const void *key1, const void *key2,
size_t key_len)
(const char *) key2 + 16, key_len);
}
-static int
+static inline int
rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len)
{
return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
@@ -33,7 +33,7 @@ rte_hash_k48_cmp_eq(const void *key1, const void *key2,
size_t key_len)
(const char *) key2 + 32, key_len);
}
-static int
+static inline int
rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len)
{
return rte_hash_k32_cmp_eq(key1, key2, key_len) ||
@@ -41,7 +41,7 @@ rte_hash_k64_cmp_eq(const void *key1, const void *key2,
size_t key_len)
(const char *) key2 + 32, key_len);
}
-static int
+static inline int
rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len)
{
return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
@@ -49,7 +49,7 @@ rte_hash_k80_cmp_eq(const void *key1, const void *key2,
size_t key_len)
(const char *) key2 + 64, key_len);
}
-static int
+static inline int
rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len)
{
return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
@@ -57,7 +57,7 @@ rte_hash_k96_cmp_eq(const void *key1, const void *key2,
size_t key_len)
(const char *) key2 + 64, key_len);
}
-static int
+static inline int
rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len)
{
return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
@@ -67,7 +67,7 @@ rte_hash_k112_cmp_eq(const void *key1, const void *key2,
size_t key_len)
(const char *) key2 + 96, key_len);
}
-static int
+static inline int
rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len)
{
return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
diff --git a/lib/ip_frag/ip_frag_common.h b/lib/ip_frag/ip_frag_common.h
index 0d8ce6a1e1..7d6c1aa98d 100644
--- a/lib/ip_frag/ip_frag_common.h
+++ b/lib/ip_frag/ip_frag_common.h
@@ -7,6 +7,14 @@
#include <sys/queue.h>
+#include <rte_common.h>
+
+#if defined(RTE_ARCH_ARM64)
+#include <rte_cmp_arm64.h>
+#elif defined(RTE_ARCH_X86)
+#include <rte_cmp_x86.h>
+#endif
+
#include "rte_ip_frag.h"
#include "ip_reassembly.h"
@@ -75,12 +83,18 @@ ip_frag_key_invalidate(struct ip_frag_key * key)
static inline uint64_t
ip_frag_key_cmp(const struct ip_frag_key * k1, const struct ip_frag_key * k2)
{
+#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
+ return (k1->id_key_len != k2->id_key_len) ||
+ (k1->key_len == IPV4_KEYLEN ? k1->src_dst[0] != k2->src_dst[0] :
+ rte_hash_k32_cmp_eq(k1, k2, 32));
+#else
uint32_t i;
uint64_t val;
val = k1->id_key_len ^ k2->id_key_len;
for (i = 0; i < k1->key_len; i++)
val |= k1->src_dst[i] ^ k2->src_dst[i];
return val;
+#endif
}
/*
diff --git a/lib/ip_frag/ip_frag_internal.c b/lib/ip_frag/ip_frag_internal.c
index b436a4c931..7cbef647df 100644
--- a/lib/ip_frag/ip_frag_internal.c
+++ b/lib/ip_frag/ip_frag_internal.c
@@ -45,7 +45,7 @@ ipv4_frag_hash(const struct ip_frag_key *key, uint32_t *v1,
uint32_t *v2)
p = (const uint32_t *)&key->src_dst;
-#ifdef RTE_ARCH_X86
+#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
v = rte_hash_crc_4byte(p[0], PRIME_VALUE);
v = rte_hash_crc_4byte(p[1], v);
v = rte_hash_crc_4byte(key->id, v);
@@ -66,7 +66,7 @@ ipv6_frag_hash(const struct ip_frag_key *key, uint32_t *v1,
uint32_t *v2)
p = (const uint32_t *) &key->src_dst;
-#ifdef RTE_ARCH_X86
+#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
v = rte_hash_crc_4byte(p[0], PRIME_VALUE);
v = rte_hash_crc_4byte(p[1], v);
v = rte_hash_crc_4byte(p[2], v);
--