On Wed, Nov 5, 2014 at 2:23 PM, Michio Honda <michio.ho...@neclab.eu> wrote:
> Parsing packets against all the flow-key fields and lookup
> is expensive.
> This patch enables to implement least-possible flow
> key extraction and/or packet lookup for a specific pattern
> of flows installed in the datapath (e.g., all the flows
> are interested in only source and destination MAC addresses).

This is very specific solution, so I do not think this is useful
addition to generic datapath classifier.

>
> Signed-off-by: Michio Honda <michio.ho...@neclab.eu>
> ---
>  datapath/actions.c      | 20 +++++++----
>  datapath/datapath.c     |  9 +++--
>  datapath/datapath.h     |  2 ++
>  datapath/flow.c         | 48 ++++++++++++++++++++++++--
>  datapath/flow.h         | 40 ++++++++++++++++++++++
>  datapath/flow_netlink.c | 60 --------------------------------
>  datapath/flow_table.c   | 91 
> +++++++++++++++++++++++++++++++++++++++++++++++++
>  datapath/flow_table.h   |  9 +++++
>  datapath/vport.c        | 18 +++++++++-
>  9 files changed, 225 insertions(+), 72 deletions(-)
>
> diff --git a/datapath/actions.c b/datapath/actions.c
> index a42ad1e..3662084 100644
> --- a/datapath/actions.c
> +++ b/datapath/actions.c
> @@ -635,6 +635,7 @@ static int output_userspace(struct datapath *dp, struct 
> sk_buff *skb,
>         struct ovs_tunnel_info info;
>
>         upcall.cmd = OVS_PACKET_CMD_ACTION;
> +       ovs_flow_key_rebuild(skb, key);
>         upcall.userdata = NULL;
>         upcall.portid = 0;
>         upcall.egress_tun_info = NULL;
> @@ -800,15 +801,22 @@ static int execute_recirc(struct datapath *dp, struct 
> sk_buff *skb,
>                           struct sw_flow_key *key, const struct nlattr *a, 
> int rem)
>  {
>         struct deferred_action *da;
> +       const struct flow_fastpath *fp;
> +       int err;
>
> -       if (!is_flow_key_valid(key)) {
> -               int err;
> +       fp = rcu_dereference_ovsl(dp->table.fastpath);
> +       if (fp) {
> +               struct sw_flow *flow;
>
> +               flow = fp->lookup(skb, key, &err);
> +               if (likely(!err)) {
> +                       OVS_CB(skb)->flow = flow;
> +                       OVS_CB(skb)->key_maybe_masked = 1;
> +               }
> +       } else if (!is_flow_key_valid(key))
>                 err = ovs_flow_key_update(skb, key);
> -               if (err)
> -                       return err;
> -
> -       }
> +       if (err)
> +               return err;
>         BUG_ON(!is_flow_key_valid(key));
>
>         if (!last_action(a, rem)) {
> diff --git a/datapath/datapath.c b/datapath/datapath.c
> index 789b453..7210ab3 100644
> --- a/datapath/datapath.c
> +++ b/datapath/datapath.c
> @@ -267,13 +267,18 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct 
> sw_flow_key *key)
>         stats = this_cpu_ptr(dp->stats_percpu);
>
>         /* Look up flow. */
> -       flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
> -                                        &n_mask_hit);
> +       flow = OVS_CB(skb)->flow;
> +       if (flow)
> +               n_mask_hit = 1; /* XXX pretend mask cache hit */
> +       else if (!OVS_CB(skb)->key_maybe_masked)
> +               flow = ovs_flow_tbl_lookup_stats(&dp->table, key,
> +                               skb_get_hash(skb), &n_mask_hit);
>         if (unlikely(!flow)) {
>                 struct dp_upcall_info upcall;
>                 int error;
>
>                 upcall.cmd = OVS_PACKET_CMD_MISS;
> +               ovs_flow_key_rebuild(skb, key);
>                 upcall.userdata = NULL;
>                 upcall.portid = ovs_vport_find_upcall_portid(p, skb);
>                 upcall.egress_tun_info = NULL;
> diff --git a/datapath/datapath.h b/datapath/datapath.h
> index 7dfd5af..b85ee2a 100644
> --- a/datapath/datapath.h
> +++ b/datapath/datapath.h
> @@ -104,6 +104,8 @@ struct datapath {
>  struct ovs_skb_cb {
>         struct ovs_tunnel_info  *egress_tun_info;
>         struct vport            *input_vport;
> +       struct sw_flow          *flow;
> +       uint8_t key_maybe_masked;
>  };
>  #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
>
> diff --git a/datapath/flow.c b/datapath/flow.c
> index a3c5d2f..53ee71f 100644
> --- a/datapath/flow.c
> +++ b/datapath/flow.c
> @@ -680,9 +680,19 @@ int ovs_flow_key_update(struct sk_buff *skb, struct 
> sw_flow_key *key)
>         return key_extract(skb, key);
>  }
>
> -int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info,
> -                        struct sk_buff *skb,
> -                        struct sw_flow_key *key)
> +int ovs_flow_key_rebuild(struct sk_buff *skb, struct sw_flow_key *key)
> +{
> +       if (!OVS_CB(skb)->key_maybe_masked)
> +               return 0;
> +       else if (ovs_flow_key_update(skb, key))
> +               return -1;
> +       OVS_CB(skb)->key_maybe_masked = 0;
> +       return 0;
> +}
> +
> +void ovs_metadata_key_extract(const struct ovs_tunnel_info *tun_info,
> +                            struct sk_buff *skb,
> +                            struct sw_flow_key *key)
>  {
>         /* Extract metadata from packet. */
>         if (tun_info) {
> @@ -708,7 +718,13 @@ int ovs_flow_key_extract(const struct ovs_tunnel_info 
> *tun_info,
>         key->phy.skb_mark = skb->mark;
>         key->ovs_flow_hash = 0;
>         key->recirc_id = 0;
> +}
>
> +int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info,
> +                        struct sk_buff *skb,
> +                        struct sw_flow_key *key)
> +{
> +       ovs_metadata_key_extract(tun_info, skb, key);
>         return key_extract(skb, key);
>  }
>
> @@ -725,3 +741,29 @@ int ovs_flow_key_extract_userspace(const struct nlattr 
> *attr,
>
>         return key_extract(skb, key);
>  }
> +
> +void update_range(struct sw_flow_match *match,
> +                 size_t offset, size_t size, bool is_mask)
> +{
> +       struct sw_flow_key_range *range;
> +       size_t start = rounddown(offset, sizeof(long));
> +       size_t end = roundup(offset + size, sizeof(long));
> +
> +       if (!is_mask)
> +               range = &match->range;
> +       else
> +               range = &match->mask->range;
> +
> +       if (range->start == range->end) {
> +               range->start = start;
> +               range->end = end;
> +               return;
> +       }
> +
> +       if (range->start > start)
> +               range->start = start;
> +
> +       if (range->end < end)
> +               range->end = end;
> +}
> +
> diff --git a/datapath/flow.h b/datapath/flow.h
> index c78b864..8fb1566 100644
> --- a/datapath/flow.h
> +++ b/datapath/flow.h
> @@ -260,5 +260,45 @@ int ovs_flow_key_extract_userspace(const struct nlattr 
> *attr,
>                                    struct sw_flow_key *key, bool log);
>  /* Update the non-metadata part of the flow key using skb. */
>  int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key);
> +int ovs_flow_key_rebuild(struct sk_buff *skb, struct sw_flow_key *key);
> +void ovs_metadata_key_extract(const struct ovs_tunnel_info *tun_info,
> +                            struct sk_buff *skb,
> +                            struct sw_flow_key *key);
> +
> +void update_range(struct sw_flow_match *, size_t, size_t, bool);
> +#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
> +       do { \
> +               update_range(match, offsetof(struct sw_flow_key, field),    \
> +                            sizeof((match)->key->field), is_mask);         \
> +               if (is_mask)                                                \
> +                       (match)->mask->key.field = value;                   \
> +               else                                                        \
> +                       (match)->key->field = value;                        \
> +       } while (0)
> +
> +#define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask)      
>       \
> +       do {                                                                \
> +               update_range(match, offset, len, is_mask);                  \
> +               if (is_mask)                                                \
> +                       memcpy((u8 *)&(match)->mask->key + offset, value_p, 
> len);\
> +               else                                                        \
> +                       memcpy((u8 *)(match)->key + offset, value_p, len);  \
> +       } while (0)
> +
> +#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask)              
>         \
> +       SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), 
> \
> +                                 value_p, len, is_mask)
> +
> +#define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask)             \
> +       do {                                                                \
> +               update_range(match, offsetof(struct sw_flow_key, field),    \
> +                            sizeof((match)->key->field), is_mask);         \
> +               if (is_mask)                                                \
> +                       memset((u8 *)&(match)->mask->key.field, value,      \
> +                              sizeof((match)->mask->key.field));           \
> +               else                                                        \
> +                       memset((u8 *)&(match)->key->field, value,           \
> +                              sizeof((match)->key->field));                \
> +       } while (0)
>
>  #endif /* flow.h */
> diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
> index 37b0bdd..013a4e9 100644
> --- a/datapath/flow_netlink.c
> +++ b/datapath/flow_netlink.c
> @@ -51,66 +51,6 @@
>
>  #include "flow_netlink.h"
>
> -static void update_range(struct sw_flow_match *match,
> -                        size_t offset, size_t size, bool is_mask)
> -{
> -       struct sw_flow_key_range *range;
> -       size_t start = rounddown(offset, sizeof(long));
> -       size_t end = roundup(offset + size, sizeof(long));
> -
> -       if (!is_mask)
> -               range = &match->range;
> -       else
> -               range = &match->mask->range;
> -
> -       if (range->start == range->end) {
> -               range->start = start;
> -               range->end = end;
> -               return;
> -       }
> -
> -       if (range->start > start)
> -               range->start = start;
> -
> -       if (range->end < end)
> -               range->end = end;
> -}
> -
> -#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
> -       do { \
> -               update_range(match, offsetof(struct sw_flow_key, field),    \
> -                            sizeof((match)->key->field), is_mask);         \
> -               if (is_mask)                                                \
> -                       (match)->mask->key.field = value;                   \
> -               else                                                        \
> -                       (match)->key->field = value;                        \
> -       } while (0)
> -
> -#define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask)      
>       \
> -       do {                                                                \
> -               update_range(match, offset, len, is_mask);                  \
> -               if (is_mask)                                                \
> -                       memcpy((u8 *)&(match)->mask->key + offset, value_p, 
> len);\
> -               else                                                        \
> -                       memcpy((u8 *)(match)->key + offset, value_p, len);  \
> -       } while (0)
> -
> -#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask)              
>         \
> -       SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), 
> \
> -                                 value_p, len, is_mask)
> -
> -#define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask)             \
> -       do {                                                                \
> -               update_range(match, offsetof(struct sw_flow_key, field),    \
> -                            sizeof((match)->key->field), is_mask);         \
> -               if (is_mask)                                                \
> -                       memset((u8 *)&(match)->mask->key.field, value,      \
> -                              sizeof((match)->mask->key.field));           \
> -               else                                                        \
> -                       memset((u8 *)&(match)->key->field, value,           \
> -                              sizeof((match)->key->field));                \
> -       } while (0)
> -
>  static bool match_validate(const struct sw_flow_match *match,
>                            u64 key_attrs, u64 mask_attrs, bool log)
>  {
> diff --git a/datapath/flow_table.c b/datapath/flow_table.c
> index ad410fd..72bb06d 100644
> --- a/datapath/flow_table.c
> +++ b/datapath/flow_table.c
> @@ -18,6 +18,7 @@
>
>  #include "flow.h"
>  #include "datapath.h"
> +#include "flow_netlink.h"
>  #include <linux/uaccess.h>
>  #include <linux/netdevice.h>
>  #include <linux/etherdevice.h>
> @@ -57,6 +58,14 @@
>  static struct kmem_cache *flow_cache;
>  struct kmem_cache *flow_stats_cache __read_mostly;
>
> +static struct flow_fastpath fastpath_array[] =
> +{
> +       {
> +       }
> +};
> +#define FASTPATH_ARRAY_LEN ARRAY_SIZE(fastpath_array)
> +static void fastpath_update(struct flow_table *tbl);
> +
>  static u16 range_n_bytes(const struct sw_flow_key_range *range)
>  {
>         return range->end - range->start;
> @@ -263,10 +272,32 @@ static int tbl_mask_array_realloc(struct flow_table 
> *tbl, int size)
>         return 0;
>  }
>
> +static void tbl_mask_array_delete_mask(struct mask_array *, struct 
> sw_flow_mask *);
> +static void flow_fastpath_destroy(struct flow_table *table)
> +{
> +       int i, j;
> +
> +       rcu_assign_pointer(table->fastpath, NULL);
> +       for (i = 0; i < FASTPATH_ARRAY_LEN; i++) {
> +               struct flow_fastpath *fp = &fastpath_array[i];
> +               struct mask_array *ma = &fp->ma;
> +
> +               /* we have not been ref-counted masks */
> +               for (j = 0; j < ma->count; j++) {
> +                       struct sw_flow_mask *mask = 
> ovsl_dereference(ma->masks[j]);
> +
> +                       tbl_mask_array_delete_mask(ma, mask);
> +                       call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb);
> +               }
> +               ma->max = 0;
> +       }
> +}
> +
>  int ovs_flow_tbl_init(struct flow_table *table)
>  {
>         struct table_instance *ti;
>         struct mask_array *ma;
> +       int i;
>
>         table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) *
>                                           MC_HASH_ENTRIES, __alignof__(struct 
> mask_cache_entry));
> @@ -285,6 +316,11 @@ int ovs_flow_tbl_init(struct flow_table *table)
>         rcu_assign_pointer(table->mask_array, ma);
>         table->last_rehash = jiffies;
>         table->count = 0;
> +       rcu_assign_pointer(table->fastpath, NULL);
> +       for (i = 0; i < FASTPATH_ARRAY_LEN; i++) {
> +               struct flow_fastpath *fp = &fastpath_array[i];
> +               fp->init(fp);
> +       }
>         return 0;
>
>  free_mask_array:
> @@ -337,6 +373,7 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
>  {
>         struct table_instance *ti = rcu_dereference_raw(table->ti);
>
> +       flow_fastpath_destroy(table);
>         free_percpu(table->mask_cache);
>         kfree(rcu_dereference_raw(table->mask_array));
>         table_instance_destroy(ti, false);
> @@ -696,6 +733,7 @@ static void flow_mask_remove(struct flow_table *tbl, 
> struct sw_flow_mask *mask)
>
>                         ma = ovsl_dereference(tbl->mask_array);
>                         tbl_mask_array_delete_mask(ma, mask);
> +                       fastpath_update(tbl);
>
>                         /* Shrink the mask array if necessary. */
>                         if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) &&
> @@ -811,6 +849,7 @@ static int flow_mask_insert(struct flow_table *tbl, 
> struct sw_flow *flow,
>         }
>
>         flow->mask = mask;
> +       fastpath_update(tbl);
>         return 0;
>  }
>
> @@ -846,6 +885,58 @@ int ovs_flow_tbl_insert(struct flow_table *table, struct 
> sw_flow *flow,
>         return 0;
>  }
>
> +/* Return 0 if two mask arrays are identical in random
> + * order. We assume no duplicate in each of arrays.
> + */
> +static int mask_array_cmp(const struct mask_array *a, const struct 
> mask_array *b)
> +{
> +       int i, j;
> +
> +       if (a->count != b->count)
> +               return 1;
> +
> +       for (i = 0; i < a->count; i++) {
> +               struct sw_flow_mask *x;
> +
> +               x = ovsl_dereference(a->masks[i]);
> +               for (j = 0; j < b->count; j++) {
> +                       struct sw_flow_mask *y;
> +
> +                       y = ovsl_dereference(b->masks[j]);
> +                       if (mask_equal(x, y))
> +                               break;
> +               }
> +               if (j == b->count)
> +                       return 1;
> +       }
> +       return 0;
> +}
> +
> +/*
> + * Search for a corresponding fastpath implementation.
> + * If there is a match, we install the corresponding one,
> + * otherwise de-install current one.
> + * So this can be used on both addition and deletion of a mask.
> + */
> +static void fastpath_update(struct flow_table *tbl)
> +{
> +       const struct mask_array *ma;
> +       int i;
> +
> +       ma = ovsl_dereference(tbl->mask_array);
> +
> +       for (i = 0; i < FASTPATH_ARRAY_LEN; i++) {
> +               struct flow_fastpath *fp = &fastpath_array[i];
> +
> +               if (mask_array_cmp(&fp->ma, ma) == 0) {
> +                       rcu_assign_pointer(tbl->fastpath, fp);
> +                       break;
> +               }
> +       }
> +       if (i == FASTPATH_ARRAY_LEN && ovsl_dereference(tbl->fastpath) != 
> NULL)
> +               rcu_assign_pointer(tbl->fastpath, NULL);
> +}
> +
>  /* Initializes the flow module.
>   * Returns zero if successful or a negative error code.
>   */
> diff --git a/datapath/flow_table.h b/datapath/flow_table.h
> index 9eb4af9..e1fbbcf 100644
> --- a/datapath/flow_table.h
> +++ b/datapath/flow_table.h
> @@ -56,12 +56,21 @@ struct table_instance {
>         bool keep_flows;
>  };
>
> +struct flow_fastpath {
> +       void (*init)(struct flow_fastpath *);
> +       struct sw_flow* (*lookup)(struct sk_buff *skb,
> +                                 struct sw_flow_key *key, int *error);
> +       void *data; /* opaque to store optimal database */
> +       struct mask_array ma;
> +};
> +
>  struct flow_table {
>         struct table_instance __rcu *ti;
>         struct mask_cache_entry __percpu *mask_cache;
>         struct mask_array __rcu *mask_array;
>         unsigned long last_rehash;
>         unsigned int count;
> +       struct flow_fastpath __rcu *fastpath;
>  };
>
>  extern struct kmem_cache *flow_stats_cache;
> diff --git a/datapath/vport.c b/datapath/vport.c
> index 274e47f..3699a82 100644
> --- a/datapath/vport.c
> +++ b/datapath/vport.c
> @@ -448,6 +448,8 @@ void ovs_vport_receive(struct vport *vport, struct 
> sk_buff *skb,
>  {
>         struct pcpu_sw_netstats *stats;
>         struct sw_flow_key key;
> +       const struct datapath *dp = vport->dp;
> +       const struct flow_fastpath *fp;
>         int error;
>
>         stats = this_cpu_ptr(vport->percpu_stats);
> @@ -459,7 +461,21 @@ void ovs_vport_receive(struct vport *vport, struct 
> sk_buff *skb,
>         ovs_skb_init_inner_protocol(skb);
>         OVS_CB(skb)->input_vport = vport;
>         OVS_CB(skb)->egress_tun_info = NULL;
> -       error = ovs_flow_key_extract(tun_info, skb, &key);
> +       OVS_CB(skb)->flow = NULL;
> +       fp = rcu_dereference_ovsl(dp->table.fastpath);
> +       if (fp) {
> +               struct sw_flow *flow;
> +
> +               memset(&key, 0, sizeof(key));
> +               ovs_metadata_key_extract(tun_info, skb, &key);
> +               flow = fp->lookup(skb, &key, &error);
> +               if (likely(!error)) {
> +                       OVS_CB(skb)->flow = flow;
> +                       OVS_CB(skb)->key_maybe_masked = 1;
> +               }
> +       } else
> +               /* Extract flow from 'skb' into 'key'. */
> +               error = ovs_flow_key_extract(tun_info, skb, &key);
>         if (unlikely(error)) {
>                 kfree_skb(skb);
>                 return;
> --
> 1.9.3 (Apple Git-50)
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to