Parsing packets against all the flow-key fields and lookup
is expensive.
This patch enables to implement least-possible flow
key extraction and/or packet lookup for a specific pattern
of flows installed in the datapath (e.g., all the flows
are interested in only source and destination MAC addresses).

Signed-off-by: Michio Honda <michio.ho...@neclab.eu>
---
 datapath/actions.c      | 20 +++++++----
 datapath/datapath.c     |  9 +++--
 datapath/datapath.h     |  2 ++
 datapath/flow.c         | 48 ++++++++++++++++++++++++--
 datapath/flow.h         | 40 ++++++++++++++++++++++
 datapath/flow_netlink.c | 60 --------------------------------
 datapath/flow_table.c   | 91 +++++++++++++++++++++++++++++++++++++++++++++++++
 datapath/flow_table.h   |  9 +++++
 datapath/vport.c        | 18 +++++++++-
 9 files changed, 225 insertions(+), 72 deletions(-)

diff --git a/datapath/actions.c b/datapath/actions.c
index a42ad1e..3662084 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -635,6 +635,7 @@ static int output_userspace(struct datapath *dp, struct 
sk_buff *skb,
        struct ovs_tunnel_info info;
 
        upcall.cmd = OVS_PACKET_CMD_ACTION;
+       ovs_flow_key_rebuild(skb, key);
        upcall.userdata = NULL;
        upcall.portid = 0;
        upcall.egress_tun_info = NULL;
@@ -800,15 +801,22 @@ static int execute_recirc(struct datapath *dp, struct 
sk_buff *skb,
                          struct sw_flow_key *key, const struct nlattr *a, int 
rem)
 {
        struct deferred_action *da;
+       const struct flow_fastpath *fp;
+       int err;
 
-       if (!is_flow_key_valid(key)) {
-               int err;
+       fp = rcu_dereference_ovsl(dp->table.fastpath);
+       if (fp) {
+               struct sw_flow *flow;
 
+               flow = fp->lookup(skb, key, &err);
+               if (likely(!err)) {
+                       OVS_CB(skb)->flow = flow;
+                       OVS_CB(skb)->key_maybe_masked = 1;
+               }
+       } else if (!is_flow_key_valid(key))
                err = ovs_flow_key_update(skb, key);
-               if (err)
-                       return err;
-
-       }
+       if (err)
+               return err;
        BUG_ON(!is_flow_key_valid(key));
 
        if (!last_action(a, rem)) {
diff --git a/datapath/datapath.c b/datapath/datapath.c
index 789b453..7210ab3 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -267,13 +267,18 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct 
sw_flow_key *key)
        stats = this_cpu_ptr(dp->stats_percpu);
 
        /* Look up flow. */
-       flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
-                                        &n_mask_hit);
+       flow = OVS_CB(skb)->flow;
+       if (flow)
+               n_mask_hit = 1; /* XXX pretend mask cache hit */
+       else if (!OVS_CB(skb)->key_maybe_masked)
+               flow = ovs_flow_tbl_lookup_stats(&dp->table, key,
+                               skb_get_hash(skb), &n_mask_hit);
        if (unlikely(!flow)) {
                struct dp_upcall_info upcall;
                int error;
 
                upcall.cmd = OVS_PACKET_CMD_MISS;
+               ovs_flow_key_rebuild(skb, key);
                upcall.userdata = NULL;
                upcall.portid = ovs_vport_find_upcall_portid(p, skb);
                upcall.egress_tun_info = NULL;
diff --git a/datapath/datapath.h b/datapath/datapath.h
index 7dfd5af..b85ee2a 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -104,6 +104,8 @@ struct datapath {
 struct ovs_skb_cb {
        struct ovs_tunnel_info  *egress_tun_info;
        struct vport            *input_vport;
+       struct sw_flow          *flow;
+       uint8_t key_maybe_masked;
 };
 #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
 
diff --git a/datapath/flow.c b/datapath/flow.c
index a3c5d2f..53ee71f 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -680,9 +680,19 @@ int ovs_flow_key_update(struct sk_buff *skb, struct 
sw_flow_key *key)
        return key_extract(skb, key);
 }
 
-int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info,
-                        struct sk_buff *skb,
-                        struct sw_flow_key *key)
+int ovs_flow_key_rebuild(struct sk_buff *skb, struct sw_flow_key *key)
+{
+       if (!OVS_CB(skb)->key_maybe_masked)
+               return 0;
+       else if (ovs_flow_key_update(skb, key))
+               return -1;
+       OVS_CB(skb)->key_maybe_masked = 0;
+       return 0;
+}
+
+void ovs_metadata_key_extract(const struct ovs_tunnel_info *tun_info,
+                            struct sk_buff *skb,
+                            struct sw_flow_key *key)
 {
        /* Extract metadata from packet. */
        if (tun_info) {
@@ -708,7 +718,13 @@ int ovs_flow_key_extract(const struct ovs_tunnel_info 
*tun_info,
        key->phy.skb_mark = skb->mark;
        key->ovs_flow_hash = 0;
        key->recirc_id = 0;
+}
 
+int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info,
+                        struct sk_buff *skb,
+                        struct sw_flow_key *key)
+{
+       ovs_metadata_key_extract(tun_info, skb, key);
        return key_extract(skb, key);
 }
 
@@ -725,3 +741,29 @@ int ovs_flow_key_extract_userspace(const struct nlattr 
*attr,
 
        return key_extract(skb, key);
 }
+
+void update_range(struct sw_flow_match *match,
+                 size_t offset, size_t size, bool is_mask)
+{
+       struct sw_flow_key_range *range;
+       size_t start = rounddown(offset, sizeof(long));
+       size_t end = roundup(offset + size, sizeof(long));
+
+       if (!is_mask)
+               range = &match->range;
+       else
+               range = &match->mask->range;
+
+       if (range->start == range->end) {
+               range->start = start;
+               range->end = end;
+               return;
+       }
+
+       if (range->start > start)
+               range->start = start;
+
+       if (range->end < end)
+               range->end = end;
+}
+
diff --git a/datapath/flow.h b/datapath/flow.h
index c78b864..8fb1566 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -260,5 +260,45 @@ int ovs_flow_key_extract_userspace(const struct nlattr 
*attr,
                                   struct sw_flow_key *key, bool log);
 /* Update the non-metadata part of the flow key using skb. */
 int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key);
+int ovs_flow_key_rebuild(struct sk_buff *skb, struct sw_flow_key *key);
+void ovs_metadata_key_extract(const struct ovs_tunnel_info *tun_info,
+                            struct sk_buff *skb,
+                            struct sw_flow_key *key);
+
+void update_range(struct sw_flow_match *, size_t, size_t, bool);
+#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
+       do { \
+               update_range(match, offsetof(struct sw_flow_key, field),    \
+                            sizeof((match)->key->field), is_mask);         \
+               if (is_mask)                                                \
+                       (match)->mask->key.field = value;                   \
+               else                                                        \
+                       (match)->key->field = value;                        \
+       } while (0)
+
+#define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask)        
    \
+       do {                                                                \
+               update_range(match, offset, len, is_mask);                  \
+               if (is_mask)                                                \
+                       memcpy((u8 *)&(match)->mask->key + offset, value_p, 
len);\
+               else                                                        \
+                       memcpy((u8 *)(match)->key + offset, value_p, len);  \
+       } while (0)
+
+#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask)                
      \
+       SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
+                                 value_p, len, is_mask)
+
+#define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask)             \
+       do {                                                                \
+               update_range(match, offsetof(struct sw_flow_key, field),    \
+                            sizeof((match)->key->field), is_mask);         \
+               if (is_mask)                                                \
+                       memset((u8 *)&(match)->mask->key.field, value,      \
+                              sizeof((match)->mask->key.field));           \
+               else                                                        \
+                       memset((u8 *)&(match)->key->field, value,           \
+                              sizeof((match)->key->field));                \
+       } while (0)
 
 #endif /* flow.h */
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
index 37b0bdd..013a4e9 100644
--- a/datapath/flow_netlink.c
+++ b/datapath/flow_netlink.c
@@ -51,66 +51,6 @@
 
 #include "flow_netlink.h"
 
-static void update_range(struct sw_flow_match *match,
-                        size_t offset, size_t size, bool is_mask)
-{
-       struct sw_flow_key_range *range;
-       size_t start = rounddown(offset, sizeof(long));
-       size_t end = roundup(offset + size, sizeof(long));
-
-       if (!is_mask)
-               range = &match->range;
-       else
-               range = &match->mask->range;
-
-       if (range->start == range->end) {
-               range->start = start;
-               range->end = end;
-               return;
-       }
-
-       if (range->start > start)
-               range->start = start;
-
-       if (range->end < end)
-               range->end = end;
-}
-
-#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
-       do { \
-               update_range(match, offsetof(struct sw_flow_key, field),    \
-                            sizeof((match)->key->field), is_mask);         \
-               if (is_mask)                                                \
-                       (match)->mask->key.field = value;                   \
-               else                                                        \
-                       (match)->key->field = value;                        \
-       } while (0)
-
-#define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask)        
    \
-       do {                                                                \
-               update_range(match, offset, len, is_mask);                  \
-               if (is_mask)                                                \
-                       memcpy((u8 *)&(match)->mask->key + offset, value_p, 
len);\
-               else                                                        \
-                       memcpy((u8 *)(match)->key + offset, value_p, len);  \
-       } while (0)
-
-#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask)                
      \
-       SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
-                                 value_p, len, is_mask)
-
-#define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask)             \
-       do {                                                                \
-               update_range(match, offsetof(struct sw_flow_key, field),    \
-                            sizeof((match)->key->field), is_mask);         \
-               if (is_mask)                                                \
-                       memset((u8 *)&(match)->mask->key.field, value,      \
-                              sizeof((match)->mask->key.field));           \
-               else                                                        \
-                       memset((u8 *)&(match)->key->field, value,           \
-                              sizeof((match)->key->field));                \
-       } while (0)
-
 static bool match_validate(const struct sw_flow_match *match,
                           u64 key_attrs, u64 mask_attrs, bool log)
 {
diff --git a/datapath/flow_table.c b/datapath/flow_table.c
index ad410fd..72bb06d 100644
--- a/datapath/flow_table.c
+++ b/datapath/flow_table.c
@@ -18,6 +18,7 @@
 
 #include "flow.h"
 #include "datapath.h"
+#include "flow_netlink.h"
 #include <linux/uaccess.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
@@ -57,6 +58,14 @@
 static struct kmem_cache *flow_cache;
 struct kmem_cache *flow_stats_cache __read_mostly;
 
+static struct flow_fastpath fastpath_array[] =
+{
+       {
+       }
+};
+#define FASTPATH_ARRAY_LEN ARRAY_SIZE(fastpath_array)
+static void fastpath_update(struct flow_table *tbl);
+
 static u16 range_n_bytes(const struct sw_flow_key_range *range)
 {
        return range->end - range->start;
@@ -263,10 +272,32 @@ static int tbl_mask_array_realloc(struct flow_table *tbl, 
int size)
        return 0;
 }
 
+static void tbl_mask_array_delete_mask(struct mask_array *, struct 
sw_flow_mask *);
+static void flow_fastpath_destroy(struct flow_table *table)
+{
+       int i, j;
+
+       rcu_assign_pointer(table->fastpath, NULL);
+       for (i = 0; i < FASTPATH_ARRAY_LEN; i++) {
+               struct flow_fastpath *fp = &fastpath_array[i];
+               struct mask_array *ma = &fp->ma;
+
+               /* we have not been ref-counted masks */
+               for (j = 0; j < ma->count; j++) {
+                       struct sw_flow_mask *mask = 
ovsl_dereference(ma->masks[j]);
+
+                       tbl_mask_array_delete_mask(ma, mask);
+                       call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb);
+               }
+               ma->max = 0;
+       }
+}
+
 int ovs_flow_tbl_init(struct flow_table *table)
 {
        struct table_instance *ti;
        struct mask_array *ma;
+       int i;
 
        table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) *
                                          MC_HASH_ENTRIES, __alignof__(struct 
mask_cache_entry));
@@ -285,6 +316,11 @@ int ovs_flow_tbl_init(struct flow_table *table)
        rcu_assign_pointer(table->mask_array, ma);
        table->last_rehash = jiffies;
        table->count = 0;
+       rcu_assign_pointer(table->fastpath, NULL);
+       for (i = 0; i < FASTPATH_ARRAY_LEN; i++) {
+               struct flow_fastpath *fp = &fastpath_array[i];
+               fp->init(fp);
+       }
        return 0;
 
 free_mask_array:
@@ -337,6 +373,7 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
 {
        struct table_instance *ti = rcu_dereference_raw(table->ti);
 
+       flow_fastpath_destroy(table);
        free_percpu(table->mask_cache);
        kfree(rcu_dereference_raw(table->mask_array));
        table_instance_destroy(ti, false);
@@ -696,6 +733,7 @@ static void flow_mask_remove(struct flow_table *tbl, struct 
sw_flow_mask *mask)
 
                        ma = ovsl_dereference(tbl->mask_array);
                        tbl_mask_array_delete_mask(ma, mask);
+                       fastpath_update(tbl);
 
                        /* Shrink the mask array if necessary. */
                        if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) &&
@@ -811,6 +849,7 @@ static int flow_mask_insert(struct flow_table *tbl, struct 
sw_flow *flow,
        }
 
        flow->mask = mask;
+       fastpath_update(tbl);
        return 0;
 }
 
@@ -846,6 +885,58 @@ int ovs_flow_tbl_insert(struct flow_table *table, struct 
sw_flow *flow,
        return 0;
 }
 
+/* Return 0 if two mask arrays are identical in random
+ * order. We assume no duplicate in each of arrays.
+ */
+static int mask_array_cmp(const struct mask_array *a, const struct mask_array 
*b)
+{
+       int i, j;
+
+       if (a->count != b->count)
+               return 1;
+
+       for (i = 0; i < a->count; i++) {
+               struct sw_flow_mask *x;
+
+               x = ovsl_dereference(a->masks[i]);
+               for (j = 0; j < b->count; j++) {
+                       struct sw_flow_mask *y;
+
+                       y = ovsl_dereference(b->masks[j]);
+                       if (mask_equal(x, y))
+                               break;
+               }
+               if (j == b->count)
+                       return 1;
+       }
+       return 0;
+}
+
+/*
+ * Search for a corresponding fastpath implementation.
+ * If there is a match, we install the corresponding one,
+ * otherwise de-install current one.
+ * So this can be used on both addition and deletion of a mask.
+ */
+static void fastpath_update(struct flow_table *tbl)
+{
+       const struct mask_array *ma;
+       int i;
+
+       ma = ovsl_dereference(tbl->mask_array);
+
+       for (i = 0; i < FASTPATH_ARRAY_LEN; i++) {
+               struct flow_fastpath *fp = &fastpath_array[i];
+
+               if (mask_array_cmp(&fp->ma, ma) == 0) {
+                       rcu_assign_pointer(tbl->fastpath, fp);
+                       break;
+               }
+       }
+       if (i == FASTPATH_ARRAY_LEN && ovsl_dereference(tbl->fastpath) != NULL)
+               rcu_assign_pointer(tbl->fastpath, NULL);
+}
+
 /* Initializes the flow module.
  * Returns zero if successful or a negative error code.
  */
diff --git a/datapath/flow_table.h b/datapath/flow_table.h
index 9eb4af9..e1fbbcf 100644
--- a/datapath/flow_table.h
+++ b/datapath/flow_table.h
@@ -56,12 +56,21 @@ struct table_instance {
        bool keep_flows;
 };
 
+struct flow_fastpath {
+       void (*init)(struct flow_fastpath *);
+       struct sw_flow* (*lookup)(struct sk_buff *skb,
+                                 struct sw_flow_key *key, int *error);
+       void *data; /* opaque to store optimal database */
+       struct mask_array ma;
+};
+
 struct flow_table {
        struct table_instance __rcu *ti;
        struct mask_cache_entry __percpu *mask_cache;
        struct mask_array __rcu *mask_array;
        unsigned long last_rehash;
        unsigned int count;
+       struct flow_fastpath __rcu *fastpath;
 };
 
 extern struct kmem_cache *flow_stats_cache;
diff --git a/datapath/vport.c b/datapath/vport.c
index 274e47f..3699a82 100644
--- a/datapath/vport.c
+++ b/datapath/vport.c
@@ -448,6 +448,8 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff 
*skb,
 {
        struct pcpu_sw_netstats *stats;
        struct sw_flow_key key;
+       const struct datapath *dp = vport->dp;
+       const struct flow_fastpath *fp;
        int error;
 
        stats = this_cpu_ptr(vport->percpu_stats);
@@ -459,7 +461,21 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff 
*skb,
        ovs_skb_init_inner_protocol(skb);
        OVS_CB(skb)->input_vport = vport;
        OVS_CB(skb)->egress_tun_info = NULL;
-       error = ovs_flow_key_extract(tun_info, skb, &key);
+       OVS_CB(skb)->flow = NULL;
+       fp = rcu_dereference_ovsl(dp->table.fastpath);
+       if (fp) {
+               struct sw_flow *flow;
+
+               memset(&key, 0, sizeof(key));
+               ovs_metadata_key_extract(tun_info, skb, &key);
+               flow = fp->lookup(skb, &key, &error);
+               if (likely(!error)) {
+                       OVS_CB(skb)->flow = flow;
+                       OVS_CB(skb)->key_maybe_masked = 1;
+               }
+       } else
+               /* Extract flow from 'skb' into 'key'. */
+               error = ovs_flow_key_extract(tun_info, skb, &key);
        if (unlikely(error)) {
                kfree_skb(skb);
                return;
-- 
1.9.3 (Apple Git-50)
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to