ovs-flow rehash does not touch mega flow list. Following patch moves it dp struct datapath. Avoid one extra indirection for accessing mega-flow list head on every packet receive.
Signed-off-by: Pravin B Shelar <pshe...@nicira.com> --- v3: No change. v2: No change. --- datapath/datapath.c | 77 +++++------------------- datapath/datapath.h | 6 +- datapath/flow_table.c | 157 +++++++++++++++++++++++++++++++++---------------- datapath/flow_table.h | 31 ++++------ 4 files changed, 137 insertions(+), 134 deletions(-) diff --git a/datapath/datapath.c b/datapath/datapath.c index 7178513..1e7806c 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -61,8 +61,6 @@ #include "vport-internal_dev.h" #include "vport-netdev.h" -#define REHASH_FLOW_INTERVAL (10 * 60 * HZ) - int ovs_net_id __read_mostly; static void ovs_notify(struct sk_buff *skb, struct genl_info *info, @@ -165,7 +163,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu) { struct datapath *dp = container_of(rcu, struct datapath, rcu); - ovs_flow_tbl_destroy((__force struct flow_table *)dp->table, false); + ovs_flow_tbl_destroy(&dp->table, false); free_percpu(dp->stats_percpu); release_net(ovs_dp_get_net(dp)); kfree(dp->ports); @@ -237,7 +235,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) } /* Look up flow. */ - flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table), &key); + flow = ovs_flow_tbl_lookup(&dp->table, &key); if (unlikely(!flow)) { struct dp_upcall_info upcall; @@ -456,23 +454,6 @@ out: return err; } -/* Called with ovs_mutex. */ -static int flush_flows(struct datapath *dp) -{ - struct flow_table *old_table; - struct flow_table *new_table; - - old_table = ovsl_dereference(dp->table); - new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS); - if (!new_table) - return -ENOMEM; - - rcu_assign_pointer(dp->table, new_table); - - ovs_flow_tbl_destroy(old_table, true); - return 0; -} - static void clear_stats(struct sw_flow *flow) { flow->used = 0; @@ -587,11 +568,9 @@ static struct genl_ops dp_packet_genl_ops[] = { static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats) { - struct flow_table *table; int i; - table = rcu_dereference_check(dp->table, lockdep_ovsl_is_held()); - stats->n_flows = ovs_flow_tbl_count(table); + stats->n_flows = ovs_flow_tbl_count(&dp->table); stats->n_hit = stats->n_missed = stats->n_lost = 0; for_each_possible_cpu(i) { @@ -777,7 +756,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) struct sw_flow_mask mask; struct sk_buff *reply; struct datapath *dp; - struct flow_table *table; struct sw_flow_actions *acts = NULL; struct sw_flow_match match; int error; @@ -818,12 +796,9 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) if (!dp) goto err_unlock_ovs; - table = ovsl_dereference(dp->table); - /* Check if this is a duplicate flow */ - flow = ovs_flow_tbl_lookup(table, &key); + flow = ovs_flow_tbl_lookup(&dp->table, &key); if (!flow) { - struct flow_table *new_table = NULL; struct sw_flow_mask *mask_p; /* Bail out if we're not allowed to create a new flow. */ @@ -831,19 +806,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) goto err_unlock_ovs; - /* Expand table, if necessary, to make room. */ - if (ovs_flow_tbl_need_to_expand(table)) - new_table = ovs_flow_tbl_expand(table); - else if (time_after(jiffies, dp->last_rehash + REHASH_FLOW_INTERVAL)) - new_table = ovs_flow_tbl_rehash(table); - - if (new_table && !IS_ERR(new_table)) { - rcu_assign_pointer(dp->table, new_table); - ovs_flow_tbl_destroy(table, true); - table = ovsl_dereference(dp->table); - dp->last_rehash = jiffies; - } - /* Allocate flow. */ flow = ovs_flow_alloc(); if (IS_ERR(flow)) { @@ -856,7 +818,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) flow->unmasked_key = key; /* Make sure mask is unique in the system */ - mask_p = ovs_sw_flow_mask_find(table, &mask); + mask_p = ovs_sw_flow_mask_find(&dp->table, &mask); if (!mask_p) { /* Allocate a new mask if none exsits. */ mask_p = ovs_sw_flow_mask_alloc(); @@ -864,7 +826,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto err_flow_free; mask_p->key = mask.key; mask_p->range = mask.range; - ovs_sw_flow_mask_insert(table, mask_p); + ovs_sw_flow_mask_insert(&dp->table, mask_p); } ovs_sw_flow_mask_add_ref(mask_p); @@ -872,7 +834,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) rcu_assign_pointer(flow->sf_acts, acts); /* Put flow in bucket. */ - ovs_flow_tbl_insert(table, flow); + ovs_flow_tbl_insert(&dp->table, flow); reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); @@ -940,7 +902,6 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) struct sk_buff *reply; struct sw_flow *flow; struct datapath *dp; - struct flow_table *table; struct sw_flow_match match; int err; @@ -961,8 +922,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) goto unlock; } - table = ovsl_dereference(dp->table); - flow = ovs_flow_tbl_lookup(table, &key); + flow = ovs_flow_tbl_lookup(&dp->table, &key); if (!flow) { err = -ENOENT; goto unlock; @@ -995,7 +955,6 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) struct sk_buff *reply; struct sw_flow *flow; struct datapath *dp; - struct flow_table *table; struct sw_flow_match match; int err; @@ -1007,7 +966,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) } if (!a[OVS_FLOW_ATTR_KEY]) { - err = flush_flows(dp); + err = ovs_flows_tbl_flush(&dp->table); goto unlock; } @@ -1016,8 +975,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) if (err) goto unlock; - table = ovsl_dereference(dp->table); - flow = ovs_flow_tbl_lookup(table, &key); + flow = ovs_flow_tbl_lookup(&dp->table, &key); if (!flow) { err = -ENOENT; goto unlock; @@ -1034,7 +992,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) goto unlock; } - ovs_flow_tbl_remove(table, flow); + ovs_flow_tbl_remove(&dp->table, flow); err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, info->snd_seq, 0, OVS_FLOW_CMD_DEL); @@ -1053,8 +1011,8 @@ unlock: static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); + struct hash_table *htable; struct datapath *dp; - struct flow_table *table; rcu_read_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); @@ -1062,15 +1020,15 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_unlock(); return -ENODEV; } + htable = rcu_dereference(dp->table.htable); - table = rcu_dereference(dp->table); for (;;) { struct sw_flow *flow; u32 bucket, obj; bucket = cb->args[0]; obj = cb->args[1]; - flow = ovs_flow_tbl_dump_next(table, &bucket, &obj); + flow = ovs_flow_tbl_dump_next(htable, &bucket, &obj); if (!flow) break; @@ -1234,9 +1192,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); /* Allocate table. */ - err = -ENOMEM; - rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS)); - if (!dp->table) + err = ovs_flow_tbl_init(&dp->table); + if (err) goto err_free_dp; dp->stats_percpu = alloc_percpu(struct dp_stats_percpu); @@ -1293,7 +1250,7 @@ err_destroy_ports_array: err_destroy_percpu: free_percpu(dp->stats_percpu); err_destroy_table: - ovs_flow_tbl_destroy(ovsl_dereference(dp->table), false); + ovs_flow_tbl_destroy(&dp->table, false); err_free_dp: release_net(ovs_dp_get_net(dp)); kfree(dp); diff --git a/datapath/datapath.h b/datapath/datapath.h index 403ea00..64920de 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -60,12 +60,11 @@ struct dp_stats_percpu { * struct datapath - datapath for flow-based packet switching * @rcu: RCU callback head for deferred destruction. * @list_node: Element in global 'dps' list. - * @table: Current flow table. Protected by ovs_mutex and RCU. + * @table: flow table. * @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by * ovs_mutex and RCU. * @stats_percpu: Per-CPU datapath statistics. * @net: Reference to net namespace. - * @last_rehash: Timestamp of last rehash. * * Context: See the comment on locking at the top of datapath.c for additional * locking information. @@ -75,7 +74,7 @@ struct datapath { struct list_head list_node; /* Flow table. */ - struct flow_table __rcu *table; + struct flow_table table; /* Switch ports. */ struct hlist_head *ports; @@ -87,7 +86,6 @@ struct datapath { /* Network namespace ref. */ struct net *net; #endif - unsigned long last_rehash; }; /** diff --git a/datapath/flow_table.c b/datapath/flow_table.c index 89e3110..0836ec2 100644 --- a/datapath/flow_table.c +++ b/datapath/flow_table.c @@ -44,8 +44,12 @@ #include <net/ipv6.h> #include <net/ndisc.h> +#include "datapath.h" #include "vlan.h" +#define TBL_MIN_BUCKETS 1024 +#define REHASH_INTERVAL (10 * 60 * HZ) + static struct kmem_cache *flow_cache; void ovs_match_init(struct sw_flow_match *match, @@ -100,6 +104,13 @@ struct sw_flow *ovs_flow_alloc(void) return flow; } +int ovs_flow_tbl_count(struct flow_table *table) +{ + struct hash_table *htable = rcu_dereference_check(table->htable, lockdep_ovsl_is_held()); + + return htable->count; +} + static struct flex_array *alloc_buckets(unsigned int n_buckets) { struct flex_array *buckets; @@ -154,7 +165,7 @@ static void free_buckets(struct flex_array *buckets) flex_array_free(buckets); } -static void __flow_tbl_destroy(struct flow_table *table) +static void __flow_tbl_destroy(struct hash_table *table) { int i; @@ -173,17 +184,14 @@ static void __flow_tbl_destroy(struct flow_table *table) } } - BUG_ON(!list_empty(table->mask_list)); - kfree(table->mask_list); - skip_flows: free_buckets(table->buckets); kfree(table); } -static struct flow_table *__flow_tbl_alloc(int new_size) +static struct hash_table *__flow_tbl_alloc(int new_size) { - struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL); + struct hash_table *table = kmalloc(sizeof(*table), GFP_KERNEL); if (!table) return NULL; @@ -199,37 +207,33 @@ static struct flow_table *__flow_tbl_alloc(int new_size) table->node_ver = 0; table->keep_flows = false; get_random_bytes(&table->hash_seed, sizeof(u32)); - table->mask_list = NULL; return table; } -struct flow_table *ovs_flow_tbl_alloc(int new_size) +int ovs_flow_tbl_init(struct flow_table *table) { - struct flow_table *table = __flow_tbl_alloc(new_size); + struct hash_table *htable; - if (!table) - return NULL; + htable = __flow_tbl_alloc(TBL_MIN_BUCKETS); - table->mask_list = kmalloc(sizeof(struct list_head), GFP_KERNEL); - if (!table->mask_list) { - table->keep_flows = true; - __flow_tbl_destroy(table); - return NULL; - } - INIT_LIST_HEAD(table->mask_list); + if (!htable) + return -ENOMEM; - return table; + rcu_assign_pointer(table->htable, htable); + INIT_LIST_HEAD(&table->mask_list); + table->last_rehash = jiffies; + return 0; } static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) { - struct flow_table *table = container_of(rcu, struct flow_table, rcu); + struct hash_table *table = container_of(rcu, struct hash_table, rcu); __flow_tbl_destroy(table); } -void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred) +static void __ovs_flow_tbl_destroy(struct hash_table *table, bool deferred) { if (!table) return; @@ -240,8 +244,15 @@ void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred) __flow_tbl_destroy(table); } -struct sw_flow *ovs_flow_dump_next(struct flow_table *table, - u32 *bucket, u32 *last) +void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred) +{ + struct hash_table *htable = ovsl_dereference(table->htable); + + __ovs_flow_tbl_destroy(htable, deferred); +} + +struct sw_flow *ovs_flow_tbl_dump_next(struct hash_table *table, + u32 *bucket, u32 *last) { struct sw_flow *flow; struct hlist_head *head; @@ -267,14 +278,14 @@ struct sw_flow *ovs_flow_dump_next(struct flow_table *table, return NULL; } -static struct hlist_head *find_bucket(struct flow_table *table, u32 hash) +static struct hlist_head *find_bucket(struct hash_table *table, u32 hash) { hash = jhash_1word(hash, table->hash_seed); return flex_array_get(table->buckets, (hash & (table->n_buckets - 1))); } -static void __tbl_insert(struct flow_table *table, struct sw_flow *flow) +static void __tbl_insert(struct hash_table *table, struct sw_flow *flow) { struct hlist_head *head; @@ -284,8 +295,8 @@ static void __tbl_insert(struct flow_table *table, struct sw_flow *flow) table->count++; } -static void flow_table_copy_flows(struct flow_table *old, - struct flow_table *new) +static void flow_table_copy_flows(struct hash_table *old, + struct hash_table *new) { int old_ver; int i; @@ -304,32 +315,37 @@ static void flow_table_copy_flows(struct flow_table *old, __tbl_insert(new, flow); } - new->mask_list = old->mask_list; old->keep_flows = true; } -static struct flow_table *__flow_tbl_rehash(struct flow_table *table, +static struct hash_table *__flow_tbl_rehash(struct hash_table *table, int n_buckets) { - struct flow_table *new_table; + struct hash_table *new_htable; - new_table = __flow_tbl_alloc(n_buckets); - if (!new_table) + new_htable = __flow_tbl_alloc(n_buckets); + if (!new_htable) return ERR_PTR(-ENOMEM); - flow_table_copy_flows(table, new_table); + flow_table_copy_flows(table, new_htable); - return new_table; + return new_htable; } -struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table) +int ovs_flows_tbl_flush(struct flow_table *flow_table) { - return __flow_tbl_rehash(table, table->n_buckets); -} + struct hash_table *old_table; + struct hash_table *new_htable; -struct flow_table *ovs_flow_tbl_expand(struct flow_table *table) -{ - return __flow_tbl_rehash(table, table->n_buckets * 2); + old_table = ovsl_dereference(flow_table->htable); + new_htable = __flow_tbl_alloc(TBL_MIN_BUCKETS); + if (!new_htable) + return -ENOMEM; + + rcu_assign_pointer(flow_table->htable, new_htable); + + __ovs_flow_tbl_destroy(old_table, true); + return 0; } static u32 flow_hash(const struct sw_flow_key *key, int key_start, @@ -392,7 +408,7 @@ bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, return __flow_cmp_unmasked_key(flow, key, key_start, key_end); } -static struct sw_flow *masked_flow_lookup(struct flow_table *table, +static struct sw_flow *masked_flow_lookup(struct hash_table *table, const struct sw_flow_key *unmasked, struct sw_flow_mask *mask) { @@ -415,14 +431,15 @@ static struct sw_flow *masked_flow_lookup(struct flow_table *table, return NULL; } -struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, - const struct sw_flow_key *key) +static struct sw_flow *__flow_lookup(struct list_head *mask_list, + struct hash_table *htbl, + const struct sw_flow_key *key) { struct sw_flow *flow = NULL; struct sw_flow_mask *mask; - list_for_each_entry_rcu(mask, tbl->mask_list, list) { - flow = masked_flow_lookup(tbl, key, mask); + list_for_each_entry_rcu(mask, mask_list, list) { + flow = masked_flow_lookup(htbl, key, mask); if (flow) /* Found */ break; } @@ -430,18 +447,56 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, return flow; } +struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, + const struct sw_flow_key *key) +{ + struct hash_table *htbl = rcu_dereference(tbl->htable); + + return __flow_lookup(&tbl->mask_list, htbl, key); +} + +static struct hash_table *flow_tbl_rehash(struct hash_table *table) +{ + return __flow_tbl_rehash(table, table->n_buckets); +} + +static struct hash_table *flow_tbl_expand(struct hash_table *table) +{ + return __flow_tbl_rehash(table, table->n_buckets * 2); +} + void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow) { + struct hash_table *htable = NULL; + struct hash_table *new_htable = NULL; + + htable = ovsl_dereference(table->htable); + + /* Expand table, if necessary, to make room. */ + if (htable->count > htable->n_buckets) + new_htable = flow_tbl_expand(htable); + else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL)) + new_htable = flow_tbl_rehash(htable); + + if (new_htable && !IS_ERR(new_htable)) { + rcu_assign_pointer(table->htable, new_htable); + ovs_flow_tbl_destroy(table, true); + htable = ovsl_dereference(table->htable); + table->last_rehash = jiffies; + } + flow->hash = flow_hash(&flow->key, flow->mask->range.start, flow->mask->range.end); - __tbl_insert(table, flow); + __tbl_insert(htable, flow); } void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) { - BUG_ON(table->count == 0); - hlist_del_rcu(&flow->hash_node[table->node_ver]); - table->count--; + struct hash_table *htbl = ovsl_dereference(table->htable); + + BUG_ON(htbl->count == 0); + hlist_del_rcu(&flow->hash_node[htbl->node_ver]); + htbl->count--; } struct sw_flow_mask *ovs_sw_flow_mask_alloc(void) @@ -500,7 +555,7 @@ struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *tbl, { struct list_head *ml; - list_for_each(ml, tbl->mask_list) { + list_for_each(ml, &tbl->mask_list) { struct sw_flow_mask *m; m = container_of(ml, struct sw_flow_mask, list); if (mask_equal(mask, m)) @@ -517,7 +572,7 @@ struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *tbl, */ void ovs_sw_flow_mask_insert(struct flow_table *tbl, struct sw_flow_mask *mask) { - list_add_rcu(&mask->list, tbl->mask_list); + list_add_rcu(&mask->list, &tbl->mask_list); } /* Initializes the flow module. diff --git a/datapath/flow_table.h b/datapath/flow_table.h index d0cc5cd..c0b357e 100644 --- a/datapath/flow_table.h +++ b/datapath/flow_table.h @@ -36,20 +36,23 @@ #include "flow.h" -#define TBL_MIN_BUCKETS 1024 - -struct flow_table { +struct hash_table { struct flex_array *buckets; unsigned int count, n_buckets; struct rcu_head rcu; - struct list_head *mask_list; int node_ver; u32 hash_seed; bool keep_flows; }; +struct flow_table { + struct hash_table __rcu *htable; + struct list_head mask_list; + unsigned long last_rehash; +}; + void ovs_match_init(struct sw_flow_match *match, - struct sw_flow_key *key, struct sw_flow_mask *mask); + struct sw_flow_key *key, struct sw_flow_mask *mask); int ovs_flow_init(void); void ovs_flow_exit(void); @@ -57,24 +60,14 @@ void ovs_flow_exit(void); struct sw_flow *ovs_flow_alloc(void); void ovs_flow_free(struct sw_flow *, bool deferred); -static inline int ovs_flow_tbl_count(struct flow_table *table) -{ - return table->count; -} - -static inline int ovs_flow_tbl_need_to_expand(struct flow_table *table) -{ - return (table->count > table->n_buckets); -} - -struct flow_table *ovs_flow_tbl_alloc(int new_size); -struct flow_table *ovs_flow_tbl_expand(struct flow_table *table); -struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table); +int ovs_flow_tbl_init(struct flow_table *); +int ovs_flow_tbl_count(struct flow_table *table); void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred); +int ovs_flows_tbl_flush(struct flow_table *flow_table); void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow); void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow); -struct sw_flow *ovs_flow_tbl_dump_next(struct flow_table *table, +struct sw_flow *ovs_flow_tbl_dump_next(struct hash_table *table, u32 *bucket, u32 *idx); struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, const struct sw_flow_key *); -- 1.7.1 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev