If a datapath is created with the flag OVS_DP_F_INDEX_BY_UID, then an additional table_instance is added to the flow_table, which is indexed by unique identifiers ("UID"). Userspace implementations can specify a UID of up to 128 bits along with a flow operation as shorthand for the key. This allows revalidation performance improvements of up to 50%.
If a datapath is created using OVS_DP_F_INDEX_BY_UID and a UID is not specified at flow setup time, then that operation will fail. If OVS_UID_F_* flags are specified for an operation, then they will modify what is returned through the operation. For instance, OVS_UID_F_SKIP_KEY allows the datapath to skip returning the key (eg, during dump to reduce memory copy). Signed-off-by: Joe Stringer <joestrin...@nicira.com> --- v6: Fix documentation for supporting UIDs between 32-128 bits. Minor style fixes. Rebase. v5: No change. v4: Fix memory leaks. Log when triggering the older userspace issue above. v3: Initial post. --- datapath/datapath.c | 215 +++++++++++++------ datapath/flow.h | 12 +- datapath/flow_netlink.c | 60 ++++++ datapath/flow_netlink.h | 2 + datapath/flow_table.c | 230 +++++++++++++++++---- datapath/flow_table.h | 5 +- datapath/linux/compat/include/linux/openvswitch.h | 29 +++ 7 files changed, 449 insertions(+), 104 deletions(-) diff --git a/datapath/datapath.c b/datapath/datapath.c index 45e7c56..7f6717b 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -675,36 +675,41 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ + nla_total_size(8) /* OVS_FLOW_ATTR_USED */ + + nla_total_size(ovs_uid_attr_size()) /* OVS_FLOW_ATTR_UID */ + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */ } /* Called with ovs_mutex or RCU read lock. */ static int ovs_flow_cmd_fill_match(const struct sw_flow *flow, - struct sk_buff *skb) + struct sk_buff *skb, u32 uid_flags) { struct nlattr *nla; int err; /* Fill flow key. */ - nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY); - if (!nla) - return -EMSGSIZE; + if (!(uid_flags & OVS_UID_F_SKIP_KEY)) { + nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY); + if (!nla) + return -EMSGSIZE; - err = ovs_nla_put_flow(&flow->unmasked_key, - &flow->unmasked_key, skb); - if (err) - return err; - nla_nest_end(skb, nla); + err = ovs_nla_put_flow(&flow->unmasked_key, + &flow->unmasked_key, skb); + if (err) + return err; + nla_nest_end(skb, nla); + } /* Fill flow mask. */ - nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK); - if (!nla) - return -EMSGSIZE; + if (!(uid_flags & OVS_UID_F_SKIP_MASK)) { + nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK); + if (!nla) + return -EMSGSIZE; - err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb); - if (err) - return err; - nla_nest_end(skb, nla); + err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb); + if (err) + return err; + nla_nest_end(skb, nla); + } return 0; } @@ -735,6 +740,30 @@ static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow, } /* Called with ovs_mutex or RCU read lock. */ +static int ovs_flow_cmd_fill_uid(const struct sw_flow *flow, + struct sk_buff *skb) +{ + struct nlattr *start; + const struct sw_flow_id *sfid = &flow->uid; + + if (!sfid->uid) + return 0; + + start = nla_nest_start(skb, OVS_FLOW_ATTR_UID); + if (start) { + int err; + + err = nla_put(skb, OVS_UID_ATTR_ID, sfid->uid_len, sfid->uid); + if (err) + return err; + nla_nest_end(skb, start); + } else + return -EMSGSIZE; + + return 0; +} + +/* Called with ovs_mutex or RCU read lock. */ static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow, struct sk_buff *skb, int skb_orig_len) { @@ -777,7 +806,7 @@ static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow, /* Called with ovs_mutex or RCU read lock. */ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, struct sk_buff *skb, u32 portid, - u32 seq, u32 flags, u8 cmd) + u32 seq, u32 flags, u8 cmd, u32 uid_flags) { const int skb_orig_len = skb->len; struct ovs_header *ovs_header; @@ -788,18 +817,24 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, return -EMSGSIZE; ovs_header->dp_ifindex = dp_ifindex; - err = ovs_flow_cmd_fill_match(flow, skb); + err = ovs_flow_cmd_fill_match(flow, skb, uid_flags); if (err) goto error; - err = ovs_flow_cmd_fill_stats(flow, skb); + err = ovs_flow_cmd_fill_uid(flow, skb); if (err) goto error; - err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len); + err = ovs_flow_cmd_fill_stats(flow, skb); if (err) goto error; + if (!(uid_flags & OVS_UID_F_SKIP_ACTIONS)) { + err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len); + if (err) + goto error; + } + return genlmsg_end(skb, ovs_header); error: @@ -831,7 +866,7 @@ static struct sk_buff *ovs_flow_cmd_build_info(struct datapath *dp, const struct sw_flow *flow, int dp_ifindex, struct genl_info *info, u8 cmd, - bool always) + bool always, u32 uid_flags) { struct sk_buff *skb; int retval; @@ -843,7 +878,7 @@ static struct sk_buff *ovs_flow_cmd_build_info(struct datapath *dp, retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb, info->snd_portid, info->snd_seq, 0, - cmd); + cmd, uid_flags); BUG_ON(retval < 0); return skb; } @@ -858,6 +893,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; struct sw_flow_actions *acts; struct sw_flow_match match; + struct sw_flow_id sfid; + u32 uid_flags; int error; /* Must have key and actions. */ @@ -888,6 +925,13 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask); + /* Extract uid. */ + error = ovs_nla_get_uid(a[OVS_FLOW_ATTR_UID], &sfid, &uid_flags); + if (!error) + error = ovs_flow_uid(&new_flow->uid, &sfid); + if (error) + goto err_kfree_flow; + /* Validate actions. */ error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, &acts); @@ -908,6 +952,13 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) error = -ENODEV; goto err_unlock_ovs; } + + if (rcu_access_pointer(dp->table.uid_ti) && !new_flow->uid.uid) { + OVS_NLERR("Flow table indexes by UID but UID is not specified.\n"); + error = -EINVAL; + goto err_unlock_ovs; + } + /* Check if this is a duplicate flow */ flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->unmasked_key); if (likely(!flow)) { @@ -925,7 +976,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_header->dp_ifindex, reply, info->snd_portid, info->snd_seq, 0, - OVS_FLOW_CMD_NEW); + OVS_FLOW_CMD_NEW, + uid_flags); BUG_ON(error < 0); } ovs_unlock(); @@ -946,9 +998,10 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) /* The unmasked key has to be the same for flow updates. */ if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) { /* Look for any overlapping flow. */ - flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); - if (!flow) { - error = -ENOENT; + flow = ovs_flow_tbl_lookup_exact(&dp->table, &sfid, + &match); + if (unlikely(IS_ERR(flow))) { + error = PTR_ERR(flow); goto err_unlock_ovs; } } @@ -961,7 +1014,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_header->dp_ifindex, reply, info->snd_portid, info->snd_seq, 0, - OVS_FLOW_CMD_NEW); + OVS_FLOW_CMD_NEW, + uid_flags); BUG_ON(error < 0); } ovs_unlock(); @@ -1009,26 +1063,32 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) struct nlattr **a = info->attrs; struct ovs_header *ovs_header = info->userhdr; struct sw_flow_key key; - struct sw_flow *flow; + struct sw_flow *flow = NULL; struct sw_flow_mask mask; struct sk_buff *reply = NULL; struct datapath *dp; struct sw_flow_actions *old_acts = NULL, *acts = NULL; - struct sw_flow_match match; + struct sw_flow_match match, *matchp; + struct sw_flow_id uid; + u32 uid_flags; int error; /* Extract key. */ error = -EINVAL; - if (!a[OVS_FLOW_ATTR_KEY]) { - OVS_NLERR("Flow key attribute not present in set flow.\n"); + if (!a[OVS_FLOW_ATTR_UID] && !a[OVS_FLOW_ATTR_KEY]) { + OVS_NLERR("Flow index attribute not present in set flow.\n"); goto error; } - - ovs_match_init(&match, &key, &mask); - error = ovs_nla_get_match(&match, - a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); + error = ovs_nla_get_uid(a[OVS_FLOW_ATTR_UID], &uid, &uid_flags); if (error) goto error; + if (a[OVS_FLOW_ATTR_KEY]) { + ovs_match_init(&match, &key, &mask); + error = ovs_nla_get_match(&match, + a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); + if (error) + goto error; + } /* Validate actions. */ if (a[OVS_FLOW_ATTR_ACTIONS]) { @@ -1053,9 +1113,10 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) goto err_unlock_ovs; } /* Check that the flow exists. */ - flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); - if (unlikely(!flow)) { - error = -ENOENT; + matchp = a[OVS_FLOW_ATTR_KEY] ? &match : NULL; + flow = ovs_flow_tbl_lookup_exact(&dp->table, &uid, matchp); + if (unlikely(IS_ERR(flow))) { + error = PTR_ERR(flow); goto err_unlock_ovs; } @@ -1069,14 +1130,16 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) ovs_header->dp_ifindex, reply, info->snd_portid, info->snd_seq, 0, - OVS_FLOW_CMD_NEW); + OVS_FLOW_CMD_NEW, + uid_flags); BUG_ON(error < 0); } } else { /* Could not alloc without acts before locking. */ reply = ovs_flow_cmd_build_info(dp, flow, ovs_header->dp_ifindex, - info, OVS_FLOW_CMD_NEW, false); + info, OVS_FLOW_CMD_NEW, false, + uid_flags); if (unlikely(IS_ERR(reply))) { error = PTR_ERR(reply); goto err_unlock_ovs; @@ -1109,20 +1172,27 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) struct ovs_header *ovs_header = info->userhdr; struct sw_flow_key key; struct sk_buff *reply; - struct sw_flow *flow; + struct sw_flow *flow = NULL; struct datapath *dp; - struct sw_flow_match match; + struct sw_flow_match match, *matchp; + struct sw_flow_id uid; + u32 uid_flags; int err; - if (!a[OVS_FLOW_ATTR_KEY]) { - OVS_NLERR("Flow get message rejected, Key attribute missing.\n"); + err = -EINVAL; + if (!a[OVS_FLOW_ATTR_UID] && !a[OVS_FLOW_ATTR_KEY]) { + OVS_NLERR("Flow get message rejected, Index attribute missing.\n"); return -EINVAL; } - - ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); + err = ovs_nla_get_uid(a[OVS_FLOW_ATTR_UID], &uid, &uid_flags); if (err) return err; + if (a[OVS_FLOW_ATTR_KEY]) { + ovs_match_init(&match, &key, NULL); + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); + if (err) + return err; + } ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); @@ -1131,14 +1201,15 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) goto unlock; } - flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); - if (!flow) { - err = -ENOENT; + matchp = a[OVS_FLOW_ATTR_KEY] ? &match : NULL; + flow = ovs_flow_tbl_lookup_exact(&dp->table, &uid, matchp); + if (unlikely(IS_ERR(flow))) { + err = PTR_ERR(flow); goto unlock; } reply = ovs_flow_cmd_build_info(dp, flow, ovs_header->dp_ifindex, info, - OVS_FLOW_CMD_NEW, true); + OVS_FLOW_CMD_NEW, true, uid_flags); if (IS_ERR(reply)) { err = PTR_ERR(reply); goto unlock; @@ -1157,12 +1228,17 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) struct ovs_header *ovs_header = info->userhdr; struct sw_flow_key key; struct sk_buff *reply; - struct sw_flow *flow; + struct sw_flow *flow = NULL; struct datapath *dp; - struct sw_flow_match match; + struct sw_flow_match match, *matchp; + struct sw_flow_id uid; + u32 uid_flags; int err; - if (likely(a[OVS_FLOW_ATTR_KEY])) { + err = ovs_nla_get_uid(a[OVS_FLOW_ATTR_UID], &uid, &uid_flags); + if (err) + return err; + if (a[OVS_FLOW_ATTR_KEY]) { ovs_match_init(&match, &key, NULL); err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); if (unlikely(err)) @@ -1175,13 +1251,16 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) err = -ENODEV; goto unlock; } - if (unlikely(!a[OVS_FLOW_ATTR_KEY])) { + + if (unlikely(!a[OVS_FLOW_ATTR_UID] && !a[OVS_FLOW_ATTR_KEY])) { err = ovs_flow_tbl_flush(&dp->table); goto unlock; } - flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); - if (unlikely(!flow)) { - err = -ENOENT; + + matchp = a[OVS_FLOW_ATTR_KEY] ? &match : NULL; + flow = ovs_flow_tbl_lookup_exact(&dp->table, &uid, matchp); + if (unlikely(IS_ERR(flow))) { + err = PTR_ERR(flow); goto unlock; } @@ -1198,7 +1277,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) ovs_header->dp_ifindex, reply, info->snd_portid, info->snd_seq, 0, - OVS_FLOW_CMD_DEL); + OVS_FLOW_CMD_DEL, uid_flags); rcu_read_unlock(); BUG_ON(err < 0); ovs_notify(&dp_flow_genl_family, &ovs_dp_flow_multicast_group, reply, info); @@ -1219,8 +1298,15 @@ unlock: static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); + struct nlattr *nla, *uid; struct table_instance *ti; struct datapath *dp; + u32 uid_flags = 0; + + nla = nlmsg_attrdata(cb->nlh, sizeof(*ovs_header)); + uid = nla_find_nested(nla, OVS_FLOW_ATTR_UID); + if (uid && ovs_nla_get_uid(uid, NULL, &uid_flags)) + OVS_NLERR("Error occurred parsing UID flags on dump"); rcu_read_lock(); dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex); @@ -1243,7 +1329,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - OVS_FLOW_CMD_NEW) < 0) + OVS_FLOW_CMD_NEW, uid_flags) < 0) break; cb->args[0] = bucket; @@ -1257,6 +1343,7 @@ static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, + [OVS_FLOW_ATTR_UID] = { .type = NLA_NESTED }, }; static struct genl_ops dp_flow_genl_ops[] = { @@ -1399,6 +1486,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; struct vport *vport; struct ovs_net *ovs_net; + bool support_uid; int err, i; err = -EINVAL; @@ -1416,8 +1504,11 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); + ovs_dp_change(dp, a); + /* Allocate table. */ - err = ovs_flow_tbl_init(&dp->table); + support_uid = dp->user_features & OVS_DP_F_INDEX_BY_UID; + err = ovs_flow_tbl_init(&dp->table, support_uid); if (err) goto err_free_dp; @@ -1445,8 +1536,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) parms.port_no = OVSP_LOCAL; parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID]; - ovs_dp_change(dp, a); - /* So far only local changes have been made, now need the lock. */ ovs_lock(); diff --git a/datapath/flow.h b/datapath/flow.h index eb9246a..816e69e 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -199,6 +199,11 @@ struct sw_flow_match { struct sw_flow_mask *mask; }; +struct sw_flow_id { + u32 *uid; + u32 uid_len; +}; + struct sw_flow_actions { struct rcu_head rcu; u32 actions_len; @@ -215,11 +220,14 @@ struct flow_stats { struct sw_flow { struct rcu_head rcu; - struct hlist_node hash_node[2]; - u32 hash; + struct { + struct hlist_node node[2]; + u32 hash; + } flow_hash, uid_hash; int stats_last_writer; /* NUMA-node id of the last writer on * 'stats[0]'. */ + struct sw_flow_id uid; struct sw_flow_key key; struct sw_flow_key unmasked_key; struct sw_flow_mask *mask; diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c index a3f34f1..6958d87 100644 --- a/datapath/flow_netlink.c +++ b/datapath/flow_netlink.c @@ -1071,6 +1071,66 @@ free_newmask: return err; } +/* Support UIDs up to 128 bits. */ +#define MAX_UID_BUFSIZE (128 / 8) + +size_t ovs_uid_attr_size(void) +{ + /* Whenever adding new OVS_UID_ATTR_ FIELDS, we should consider + * updating this function. */ + return nla_total_size(4) /* OVS_UID_ATTR_FLAGS */ + + nla_total_size(MAX_UID_BUFSIZE); /* OVS_UID_ATTR_ID */ +} + +int ovs_nla_get_uid(const struct nlattr *attr, struct sw_flow_id *sfid, + u32 *flags) +{ + static const struct nla_policy ovs_uid_policy[OVS_UID_ATTR_MAX + 1] = { + [OVS_UID_ATTR_FLAGS] = { .type = NLA_U32 }, + [OVS_UID_ATTR_ID] = { .len = sizeof(u32) }, + }; + const struct nlattr *a[OVS_UID_ATTR_MAX + 1]; + int err; + + if (sfid) { + sfid->uid = NULL; + sfid->uid_len = 0; + } + if (flags) + *flags = 0; + + if (!attr) + return 0; + + err = nla_parse_nested((struct nlattr **)a, OVS_UID_ATTR_MAX, attr, + ovs_uid_policy); + if (err) + return err; + + if (sfid) { + if (a[OVS_UID_ATTR_ID]) { + size_t len; + + len = nla_len(a[OVS_UID_ATTR_ID]); + if (len > MAX_UID_BUFSIZE) { + OVS_NLERR("Flow uid size (%zu bytes) exceeds " + "maximum (%u bytes)\n", len, + MAX_UID_BUFSIZE); + return -EINVAL; + } + sfid->uid = nla_data(a[OVS_UID_ATTR_ID]); + sfid->uid_len = len; + } else { + return -EINVAL; + } + } + + if (flags && a[OVS_UID_ATTR_FLAGS]) + *flags = nla_get_u32(a[OVS_UID_ATTR_FLAGS]); + + return 0; +} + /** * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key. * @key: Receives extracted in_port, priority, tun_key and skb_mark. diff --git a/datapath/flow_netlink.h b/datapath/flow_netlink.h index 90bbe37..0158c69 100644 --- a/datapath/flow_netlink.h +++ b/datapath/flow_netlink.h @@ -39,6 +39,7 @@ size_t ovs_tun_key_attr_size(void); size_t ovs_key_attr_size(void); +size_t ovs_uid_attr_size(void); void ovs_match_init(struct sw_flow_match *match, struct sw_flow_key *key, struct sw_flow_mask *mask); @@ -52,6 +53,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, const struct nlattr *); int ovs_nla_put_egress_tunnel_key(struct sk_buff *, const struct ovs_tunnel_info *); +int ovs_nla_get_uid(const struct nlattr *, struct sw_flow_id *, u32 *flags); int ovs_nla_copy_actions(const struct nlattr *attr, const struct sw_flow_key *key, diff --git a/datapath/flow_table.c b/datapath/flow_table.c index 4efef13..6a92994 100644 --- a/datapath/flow_table.c +++ b/datapath/flow_table.c @@ -80,6 +80,20 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, *d++ = *s++ & *m++; } +int ovs_flow_uid(struct sw_flow_id *dst, const struct sw_flow_id *src) +{ + if (src->uid_len) { + dst->uid = kmalloc(src->uid_len, GFP_KERNEL); + if (!dst->uid) + return -ENOMEM; + + memcpy(dst->uid, src->uid, src->uid_len); + dst->uid_len = src->uid_len; + } + + return 0; +} + struct sw_flow *ovs_flow_alloc(void) { struct sw_flow *flow; @@ -90,6 +104,8 @@ struct sw_flow *ovs_flow_alloc(void) if (!flow) return ERR_PTR(-ENOMEM); + flow->uid.uid = NULL; + flow->uid.uid_len = 0; flow->sf_acts = NULL; flow->mask = NULL; flow->stats_last_writer = NUMA_NO_NODE; @@ -147,6 +163,7 @@ static void flow_free(struct sw_flow *flow) int node; kfree((struct sw_flow_actions __force *)flow->sf_acts); + kfree((struct sw_flow_id __force *)flow->uid.uid); for_each_node(node) if (flow->stats[node]) kmem_cache_free(flow_stats_cache, @@ -263,7 +280,7 @@ static int tbl_mask_array_realloc(struct flow_table *tbl, int size) return 0; } -int ovs_flow_tbl_init(struct flow_table *table) +int ovs_flow_tbl_init(struct flow_table *table, bool support_uid) { struct table_instance *ti; struct mask_array *ma; @@ -281,12 +298,25 @@ int ovs_flow_tbl_init(struct flow_table *table) if (!ti) goto free_mask_array; + if (support_uid) { + struct table_instance *uid_ti; + + uid_ti = table_instance_alloc(TBL_MIN_BUCKETS); + if (!uid_ti) + goto free_ti; + rcu_assign_pointer(table->uid_ti, uid_ti); + } else { + rcu_assign_pointer(table->uid_ti, NULL); + } + rcu_assign_pointer(table->ti, ti); rcu_assign_pointer(table->mask_array, ma); table->last_rehash = jiffies; table->count = 0; return 0; +free_ti: + __table_instance_destroy(ti); free_mask_array: kfree(ma); free_mask_cache: @@ -301,7 +331,8 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) __table_instance_destroy(ti); } -static void table_instance_destroy(struct table_instance *ti, bool deferred) +static void table_instance_destroy(struct table_instance *ti, bool deferred, + bool uid) { int i; @@ -317,9 +348,14 @@ static void table_instance_destroy(struct table_instance *ti, bool deferred) struct hlist_node *n; int ver = ti->node_ver; - hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { - hlist_del_rcu(&flow->hash_node[ver]); - ovs_flow_free(flow, deferred); + if (uid) { + hlist_for_each_entry_safe(flow, n, head, uid_hash.node[ver]) + hlist_del_rcu(&flow->uid_hash.node[ver]); + } else { + hlist_for_each_entry_safe(flow, n, head, flow_hash.node[ver]) { + hlist_del_rcu(&flow->flow_hash.node[ver]); + ovs_flow_free(flow, deferred); + } } } @@ -335,10 +371,12 @@ skip_flows: void ovs_flow_tbl_destroy(struct flow_table *table) { struct table_instance *ti = (struct table_instance __force *)table->ti; + struct table_instance *uid_ti = (struct table_instance __force *)table->uid_ti; free_percpu(table->mask_cache); kfree((struct mask_array __force *)table->mask_array); - table_instance_destroy(ti, false); + table_instance_destroy(ti, false, false); + table_instance_destroy(uid_ti, false, true); } struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, @@ -353,7 +391,7 @@ struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, while (*bucket < ti->n_buckets) { i = 0; head = flex_array_get(ti->buckets, *bucket); - hlist_for_each_entry_rcu(flow, head, hash_node[ver]) { + hlist_for_each_entry_rcu(flow, head, flow_hash.node[ver]) { if (i < *last) { i++; continue; @@ -379,12 +417,20 @@ static void table_instance_insert(struct table_instance *ti, struct sw_flow *flo { struct hlist_head *head; - head = find_bucket(ti, flow->hash); - hlist_add_head_rcu(&flow->hash_node[ti->node_ver], head); + head = find_bucket(ti, flow->flow_hash.hash); + hlist_add_head_rcu(&flow->flow_hash.node[ti->node_ver], head); +} + +static void uid_table_instance_insert(struct table_instance *ti, struct sw_flow *flow) +{ + struct hlist_head *head; + + head = find_bucket(ti, flow->uid_hash.hash); + hlist_add_head_rcu(&flow->uid_hash.node[ti->node_ver], head); } static void flow_table_copy_flows(struct table_instance *old, - struct table_instance *new) + struct table_instance *new, bool uid) { int old_ver; int i; @@ -399,42 +445,81 @@ static void flow_table_copy_flows(struct table_instance *old, head = flex_array_get(old->buckets, i); - hlist_for_each_entry(flow, head, hash_node[old_ver]) - table_instance_insert(new, flow); + if (uid) + hlist_for_each_entry(flow, head, uid_hash.node[old_ver]) + uid_table_instance_insert(new, flow); + else + hlist_for_each_entry(flow, head, flow_hash.node[old_ver]) + table_instance_insert(new, flow); } old->keep_flows = true; } -static struct table_instance *table_instance_rehash(struct table_instance *ti, - int n_buckets) +static int flow_table_instance_alloc(struct table_instance **ti, + struct table_instance **uid_ti, + int n_buckets) { - struct table_instance *new_ti; + struct table_instance *new_ti, *new_uid_ti; new_ti = table_instance_alloc(n_buckets); if (!new_ti) - return NULL; + return -ENOMEM; - flow_table_copy_flows(ti, new_ti); + if (uid_ti) { + new_uid_ti = table_instance_alloc(n_buckets); + if (!new_uid_ti) { + __table_instance_destroy(new_ti); + return -ENOMEM; + } + *uid_ti = new_uid_ti; + } - return new_ti; + *ti = new_ti; + return 0; +} + +static int flow_table_rehash(struct table_instance *old_ti, + struct table_instance *old_uid_ti, int n_buckets, + struct table_instance **new_ti, + struct table_instance **new_uid_ti) +{ + int err; + + err = flow_table_instance_alloc(new_ti, old_uid_ti ? new_uid_ti : NULL, + n_buckets); + if (err) + return err; + + flow_table_copy_flows(old_ti, *new_ti, false); + if (old_uid_ti) + flow_table_copy_flows(old_uid_ti, *new_uid_ti, true); + + return 0; } int ovs_flow_tbl_flush(struct flow_table *flow_table) { - struct table_instance *old_ti; - struct table_instance *new_ti; + struct table_instance *old_ti, *new_ti, *old_uid_ti; + struct table_instance *new_uid_ti = NULL; + int err; old_ti = ovsl_dereference(flow_table->ti); - new_ti = table_instance_alloc(TBL_MIN_BUCKETS); - if (!new_ti) - return -ENOMEM; + old_uid_ti = ovsl_dereference(flow_table->uid_ti); + if (old_uid_ti) + err = flow_table_instance_alloc(&new_ti, &new_uid_ti, TBL_MIN_BUCKETS); + else + err = flow_table_instance_alloc(&new_ti, NULL, TBL_MIN_BUCKETS); + if (err) + return err; rcu_assign_pointer(flow_table->ti, new_ti); + rcu_assign_pointer(flow_table->uid_ti, new_uid_ti); flow_table->last_rehash = jiffies; flow_table->count = 0; - table_instance_destroy(old_ti, true); + table_instance_destroy(old_ti, true, false); + table_instance_destroy(old_uid_ti, true, true); return 0; } @@ -507,8 +592,8 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti, hash = flow_hash(&masked_key, key_start, key_end); head = find_bucket(ti, hash); (*n_mask_hit)++; - hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) { - if (flow->mask == mask && flow->hash == hash && + hlist_for_each_entry_rcu(flow, head, flow_hash.node[ti->node_ver]) { + if (flow->mask == mask && flow->flow_hash.hash == hash && flow_cmp_masked_key(flow, &masked_key, key_start, key_end)) return flow; @@ -626,8 +711,8 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &index); } -struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, - const struct sw_flow_match *match) +static struct sw_flow *lookup_exact(struct flow_table *tbl, + const struct sw_flow_match *match) { struct mask_array *ma = ovsl_dereference(tbl->mask_array); int i; @@ -649,6 +734,58 @@ struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, return NULL; } +static u32 uid_hash(const struct sw_flow_id *sfid) +{ + return arch_fast_hash2(sfid->uid, sfid->uid_len >> 2, 0); +} + +static bool flow_cmp_uid(const struct sw_flow *flow, + const struct sw_flow_id *sfid) +{ + if (flow->uid.uid_len != sfid->uid_len) + return false; + + return !memcmp(flow->uid.uid, sfid->uid, sfid->uid_len); +} + +static struct sw_flow *lookup_uid(struct flow_table *tbl, + const struct sw_flow_id *uid) +{ + struct table_instance *ti = rcu_dereference_ovsl(tbl->uid_ti); + struct sw_flow *flow; + struct hlist_head *head; + u32 hash; + + BUG_ON(!ti); + hash = uid_hash(uid); + head = find_bucket(ti, hash); + hlist_for_each_entry_rcu(flow, head, uid_hash.node[ti->node_ver]) { + if (flow->uid_hash.hash == hash && flow_cmp_uid(flow, uid)) + return flow; + } + return NULL; +} + +struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, + const struct sw_flow_id *uid, + const struct sw_flow_match *match) +{ + struct sw_flow *flow; + + if (rcu_access_pointer(tbl->uid_ti) && uid->uid_len) { + flow = lookup_uid(tbl, uid); + } else if (match) { + flow = lookup_exact(tbl, match); + } else { + OVS_NLERR("Flow key attribute not present in flow lookup.\n"); + return ERR_PTR(-EINVAL); + } + + if (!flow) + return ERR_PTR(-ENOENT); + return flow; +} + int ovs_flow_tbl_num_masks(const struct flow_table *table) { struct mask_array *ma; @@ -657,9 +794,13 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table) return ma->count; } -static struct table_instance *table_instance_expand(struct table_instance *ti) +static int flow_table_expand(struct table_instance *old_ti, + struct table_instance *old_uid_ti, + struct table_instance **new_ti, + struct table_instance **new_uid_ti) { - return table_instance_rehash(ti, ti->n_buckets * 2); + return flow_table_rehash(old_ti, old_uid_ti, old_ti->n_buckets * 2, + new_ti, new_uid_ti); } static void tbl_mask_array_delete_mask(struct mask_array *ma, @@ -709,9 +850,12 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) { struct table_instance *ti = ovsl_dereference(table->ti); + struct table_instance *uid_ti = ovsl_dereference(table->uid_ti); BUG_ON(table->count == 0); - hlist_del_rcu(&flow->hash_node[ti->node_ver]); + hlist_del_rcu(&flow->flow_hash.node[ti->node_ver]); + if (uid_ti) + hlist_del_rcu(&flow->uid_hash.node[uid_ti->node_ver]); table->count--; /* RCU delete the mask. 'flow->mask' is not NULLed, as it should be @@ -816,29 +960,39 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, const struct sw_flow_mask *mask) { - struct table_instance *new_ti = NULL; - struct table_instance *ti; + struct table_instance *new_ti = NULL, *new_uid_ti = NULL; + struct table_instance *ti, *uid_ti; int err; err = flow_mask_insert(table, flow, mask); if (err) return err; - flow->hash = flow_hash(&flow->key, flow->mask->range.start, - flow->mask->range.end); + flow->flow_hash.hash = flow_hash(&flow->key, flow->mask->range.start, + flow->mask->range.end); ti = ovsl_dereference(table->ti); table_instance_insert(ti, flow); table->count++; + uid_ti = ovsl_dereference(table->uid_ti); + if (uid_ti) { + BUG_ON(!flow->uid.uid); + flow->uid_hash.hash = uid_hash(&flow->uid); + uid_table_instance_insert(uid_ti, flow); + } + /* Expand table, if necessary, to make room. */ if (table->count > ti->n_buckets) - new_ti = table_instance_expand(ti); + flow_table_expand(ti, uid_ti, &new_ti, &new_uid_ti); else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL)) - new_ti = table_instance_rehash(ti, ti->n_buckets); + flow_table_rehash(ti, uid_ti, ti->n_buckets, + &new_ti, &new_uid_ti); if (new_ti) { rcu_assign_pointer(table->ti, new_ti); - table_instance_destroy(ti, true); + rcu_assign_pointer(table->uid_ti, new_uid_ti); + table_instance_destroy(ti, true, false); + table_instance_destroy(uid_ti, true, true); table->last_rehash = jiffies; } return 0; diff --git a/datapath/flow_table.h b/datapath/flow_table.h index 9eb4af9..2e6582f 100644 --- a/datapath/flow_table.h +++ b/datapath/flow_table.h @@ -60,6 +60,7 @@ struct flow_table { struct table_instance __rcu *ti; struct mask_cache_entry __percpu *mask_cache; struct mask_array __rcu *mask_array; + struct table_instance __rcu *uid_ti; unsigned long last_rehash; unsigned int count; }; @@ -72,7 +73,7 @@ void ovs_flow_exit(void); struct sw_flow *ovs_flow_alloc(void); void ovs_flow_free(struct sw_flow *, bool deferred); -int ovs_flow_tbl_init(struct flow_table *); +int ovs_flow_tbl_init(struct flow_table *, bool support_uid); int ovs_flow_tbl_count(const struct flow_table *table); void ovs_flow_tbl_destroy(struct flow_table *table); int ovs_flow_tbl_flush(struct flow_table *flow_table); @@ -90,11 +91,13 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *, struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, const struct sw_flow_key *); struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *, + const struct sw_flow_id *sfid, const struct sw_flow_match *match); bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, const struct sw_flow_match *match); +int ovs_flow_uid(struct sw_flow_id *dst, const struct sw_flow_id *src); void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, const struct sw_flow_mask *mask); #endif /* flow_table.h */ diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h index 306ea86..af463a5 100644 --- a/datapath/linux/compat/include/linux/openvswitch.h +++ b/datapath/linux/compat/include/linux/openvswitch.h @@ -142,6 +142,9 @@ struct ovs_vport_stats { /* Allow datapath to associate multiple Netlink PIDs to each vport */ #define OVS_DP_F_VPORT_PIDS (1 << 1) +/* Force the datapath to index flows by userspace flow ID. */ +#define OVS_DP_F_INDEX_BY_UID (1 << 2) + /* Fixed logical ports. */ #define OVSP_LOCAL ((__u32)0) @@ -470,6 +473,10 @@ struct ovs_key_nd { * a wildcarded match. Omitting attribute is treated as wildcarding all * corresponding fields. Optional for all requests. If not present, * all flow key bits are exact match bits. + * @OVS_FLOW_ATTR_UID: Nested %OVS_UID_ATTR_* attributes specifying unique + * identifiers for flows and providing alternative semantics for flow + * installation and retrieval. Required for all requests if the datapath is + * created with %OVS_DP_F_INDEX_BY_UID. * * These attributes follow the &struct ovs_header within the Generic Netlink * payload for %OVS_FLOW_* commands. @@ -483,12 +490,34 @@ enum ovs_flow_attr { OVS_FLOW_ATTR_USED, /* u64 msecs last used in monotonic time. */ OVS_FLOW_ATTR_CLEAR, /* Flag to clear stats, tcp_flags, used. */ OVS_FLOW_ATTR_MASK, /* Sequence of OVS_KEY_ATTR_* attributes. */ + OVS_FLOW_ATTR_UID, /* Unique flow identifier. */ __OVS_FLOW_ATTR_MAX }; #define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1) /** + * enum ovs_uid_attr - Unique identifier types. + * + * @OVS_UID_ATTR_FLAGS: A 32-bit value specifying changes to the behaviour of + * the current %OVS_FLOW_CMD_* request. Optional for all requests. + * @OVS_UID_ATTR_ID: A unique identifier for a flow. + */ +enum ovs_uid_attr { + OVS_UID_ATTR_UNSPEC, + OVS_UID_ATTR_FLAGS, /* u32 of OVS_UID_F_* */ + OVS_UID_ATTR_ID, /* variable size: 4, 8, 12 or 16 octets. */ + __OVS_UID_ATTR_MAX +}; + +#define OVS_UID_ATTR_MAX (__OVS_UID_ATTR_MAX - 1) + +/* Skip attributes for notifications. */ +#define OVS_UID_F_SKIP_KEY (1 << 0) +#define OVS_UID_F_SKIP_MASK (1 << 1) +#define OVS_UID_F_SKIP_ACTIONS (1 << 2) + +/** * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action. * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with * @OVS_ACTION_ATTR_SAMPLE. A value of 0 samples no packets, a value of -- 1.7.10.4 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev