For architectures can load and store unaligned long efficiently, use 4
or 8 bytes operations. This improves the efficiency compare to byte wise
operations.

This patch is uses ideas and code from a patch submitted by Peter Klausler
titled "replace memcmp() with specialized comparator". The flow compare
function is essentially his implementation.  The original patch
mentioned 7X speed up with this optimization.

Co-authored-by: Peter Klausler <p...@google.com>
Signed-off-by: Andy Zhou <az...@nicira.com>
---
 datapath/flow.c |   55 +++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 45 insertions(+), 10 deletions(-)

diff --git a/datapath/flow.c b/datapath/flow.c
index 39de931..273cbea 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -45,6 +45,13 @@
 
 #include "vlan.h"
 
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+#define ADDR_IS_ALIGNED(addr)  1
+#else
+#define ADDR_IS_ALIGNED(addr)  \
+           ((long) addr & (sizeof(long) - 1) == 0)
+#endif
+
 static struct kmem_cache *flow_cache;
 
 static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask,
@@ -343,16 +350,26 @@ static void flow_key_mask(struct sw_flow_key *dst,
                          const struct sw_flow_key *src,
                          const struct sw_flow_mask *mask)
 {
-       u8 *m = (u8 *)&mask->key + mask->range.start;
-       u8 *s = (u8 *)src + mask->range.start;
-       u8 *d = (u8 *)dst + mask->range.start;
-       int i;
+       const u8 *m = (u8 *)&mask->key;
+       const u8 *s = (u8 *)src;
+       u8 *d = (u8 *)dst;
+       int len = sizeof(*dst);
 
-       memset(dst, 0, sizeof(*dst));
-       for (i = 0; i < ovs_sw_flow_mask_size_roundup(mask); i++) {
-               *d = *s & *m;
-               d++, s++, m++;
+       if (ADDR_IS_ALIGNED((m | s | d))) {
+               const long *ml = (const long *)m;
+               const long *sl = (const long *)s;
+               long *dl = (long *)d;
+
+               for (; len >= sizeof(long); len -= sizeof(long))
+                       *dl++ = *sl++ & *ml++;
+
+               m = (const u8 *)ml;
+               s = (const u8 *)sl;
+               d = (u8 *)dl;
        }
+
+       while (len-- > 0)
+               *d++ = *s++ & *m++;
 }
 
 #define TCP_FLAGS_OFFSET 13
@@ -984,8 +1001,26 @@ static int flow_key_start(const struct sw_flow_key *key)
 static bool __cmp_key(const struct sw_flow_key *key1,
                const struct sw_flow_key *key2,  int key_start, int key_len)
 {
-       return !memcmp((u8 *)key1 + key_start,
-                       (u8 *)key2 + key_start, (key_len - key_start));
+       const u8 *cp1 = (u8 *)key1 + key_start;
+       const u8 *cp2 = (u8 *)key2 + key_start;
+       int len = key_len - key_start;
+       long diffs = 0;
+
+       if (ADDR_IS_ALIGNED(cp1 | cp2)) {
+               const long *lp1 = (const long *)cp1;
+               const long *lp2 = (const long *)cp2;
+
+               for (; len >= sizeof(long); len -= sizeof(long))
+                       diffs |= *lp1++ ^ *lp2++;
+
+               cp1 = (const u8 *)lp1;
+               cp2 = (const u8 *)lp2;
+       }
+
+       while (len-- > 0)
+               diffs |= *cp1++ ^ *cp2++;
+
+       return diffs == 0;
 }
 
 static bool __flow_cmp_key(const struct sw_flow *flow,
-- 
1.7.9.5

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to