On Sat, Apr 5, 2014 at 5:23 AM, Alex Wang <[email protected]> wrote:
> In order to allow handlers directly read upcalls from datapath,
> we need to support per-handler netlink socket for each vport in
> datapath. This commit makes this happen. Also, it is guaranteed
> backward and forward compatibility with previous branch.
>
> Signed-off-by: Alex Wang <[email protected]>
> Acked-by: Thomas Graf <[email protected]>
>
This looks pretty close now, I have few comments.
> ---
> V6 -> V7:
> - rebase.
>
> V5 -> V6:
> - '__force' cast 'struct vport_portids *' in ovs_vport_free().
>
> V4 -> V5:
> - use nla_memcpy() instead of memcpy().
> - move 'struct rcu_head' back in the 'struct vport_portids'
> for cacheline efficiency.
>
> V3 -> V4:
> - define the type of OVS_VPORT_ATTR_UPCALL_PID to NLA_U32.
> - add check of the OVS_VPORT_ATTR_UPCALL_PID in attr array in
> ovs_vport_cmd_set().
> - use reciprocal_div() in the modular computation, to reduce the
> overhead.
>
> V2 -> V3:
> - use OVS_DP_ATTR_USER_FEATURES to control the type of
> OVS_VPORT_ATTR_UPCALL_PID attribute returned by the ovs_vport_cmd_get().
> - directly free the 'vport->upcall_portids' in ovs_vport_free().
> - add check to guarantee pids array length is always positive.
>
> PATCH -> V2:
> - rebase.
>
> major changes since RFC:
> - guarantee the compatibility with branch-2.1 userspace.
> - use rcu to protect upcall port_id array multi-access.
> - use skb_get_rxhash() to select the port id to send upcall.
> ---
> datapath/datapath.c | 21 ++++++---
> datapath/vport.c | 103
> ++++++++++++++++++++++++++++++++++++++++++-
> datapath/vport.h | 28 ++++++++++--
> include/linux/openvswitch.h | 13 ++++--
> 4 files changed, 149 insertions(+), 16 deletions(-)
>
> diff --git a/datapath/datapath.c b/datapath/datapath.c
> index c6d42db..c2032b1 100644
> --- a/datapath/datapath.c
> +++ b/datapath/datapath.c
> @@ -268,7 +268,7 @@ void ovs_dp_process_received_packet(struct vport *p,
> struct sk_buff *skb)
> upcall.cmd = OVS_PACKET_CMD_MISS;
> upcall.key = &key;
> upcall.userdata = NULL;
> - upcall.portid = p->upcall_portid;
> + upcall.portid = ovs_vport_find_portid(p, skb);
> ovs_dp_upcall(dp, skb, &upcall);
> consume_skb(skb);
> stats_counter = &stats->n_missed;
> @@ -1381,7 +1381,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct
> genl_info *info)
> parms.options = NULL;
> parms.dp = dp;
> parms.port_no = OVSP_LOCAL;
> - parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
> + parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
>
> ovs_dp_change(dp, a);
>
> @@ -1638,8 +1638,7 @@ static int ovs_vport_cmd_fill_info(struct vport *vport,
> struct sk_buff *skb,
>
> if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
> nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
> - nla_put_string(skb, OVS_VPORT_ATTR_NAME,
> vport->ops->get_name(vport)) ||
> - nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid))
> + nla_put_string(skb, OVS_VPORT_ATTR_NAME,
> vport->ops->get_name(vport)))
> goto nla_put_failure;
>
> ovs_vport_get_stats(vport, &vport_stats);
> @@ -1647,6 +1646,9 @@ static int ovs_vport_cmd_fill_info(struct vport *vport,
> struct sk_buff *skb,
> &vport_stats))
> goto nla_put_failure;
>
> + if (ovs_vport_get_upcall_portids(vport, skb))
> + goto nla_put_failure;
> +
> err = ovs_vport_get_options(vport, skb);
> if (err == -EMSGSIZE)
> goto error;
> @@ -1768,7 +1770,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb,
> struct genl_info *info)
> parms.options = a[OVS_VPORT_ATTR_OPTIONS];
> parms.dp = dp;
> parms.port_no = port_no;
> - parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
> + parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
>
> vport = new_vport(&parms);
> err = PTR_ERR(vport);
> @@ -1825,8 +1827,13 @@ static int ovs_vport_cmd_set(struct sk_buff *skb,
> struct genl_info *info)
> if (a[OVS_VPORT_ATTR_STATS])
> ovs_vport_set_stats(vport, nla_data(a[OVS_VPORT_ATTR_STATS]));
>
> - if (a[OVS_VPORT_ATTR_UPCALL_PID])
> - vport->upcall_portid =
> nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
> +
> + if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
> + err = ovs_vport_set_upcall_portids(vport,
> +
> a[OVS_VPORT_ATTR_UPCALL_PID]);
> + if (err)
> + goto exit_unlock_free;
> + }
>
> err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
> info->snd_seq, 0, OVS_VPORT_CMD_NEW);
> diff --git a/datapath/vport.c b/datapath/vport.c
> index 2673b81..9deba28 100644
> --- a/datapath/vport.c
> +++ b/datapath/vport.c
> @@ -136,10 +136,12 @@ struct vport *ovs_vport_alloc(int priv_size, const
> struct vport_ops *ops,
>
> vport->dp = parms->dp;
> vport->port_no = parms->port_no;
> - vport->upcall_portid = parms->upcall_portid;
> vport->ops = ops;
> INIT_HLIST_NODE(&vport->dp_hash_node);
>
> + if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids))
> + return ERR_PTR(-EINVAL);
> +
> vport->percpu_stats = alloc_percpu(struct pcpu_tstats);
> if (!vport->percpu_stats) {
> kfree(vport);
> @@ -169,6 +171,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct
> vport_ops *ops,
> */
> void ovs_vport_free(struct vport *vport)
> {
> + kfree((struct vport_portids __force *)vport->upcall_portids);
> free_percpu(vport->percpu_stats);
> kfree(vport);
> }
> @@ -355,6 +358,104 @@ int ovs_vport_get_options(const struct vport *vport,
> struct sk_buff *skb)
> return 0;
> }
>
> +static void vport_portids_destroy_rcu_cb(struct rcu_head *rcu)
> +{
> + struct vport_portids *ids = container_of(rcu, struct vport_portids,
> + rcu);
> +
> + kfree(ids);
> +}
> +
> +/**
> + * ovs_vport_set_upcall_portids - set upcall portids of @vport.
> + *
> + * @vport: vport to modify.
> + * @ids: new configuration, an array of port ids.
> + *
> + * Sets the vport's upcall_portids to @ids.
> + *
> + * Returns 0 if successful, -EINVAL if @ids is zero length or cannot be
> parsed
> + * as an array of U32.
> + *
> + * Must be called with rcu_read_lock.
> + */
> +int ovs_vport_set_upcall_portids(struct vport *vport, struct nlattr *ids)
> +{
> + struct vport_portids *old, *vport_portids;
> +
> + if (!nla_len(ids) || nla_len(ids) % sizeof(u32))
> + return -EINVAL;
> +
> + old = ovsl_dereference(vport->upcall_portids);
> +
> + vport_portids = kmalloc(sizeof *vport_portids + nla_len(ids),
> + GFP_KERNEL);
need to divide nla_len() by 4.
> + vport_portids->ids = (void *) vport_portids + sizeof *vport_portids;
> + vport_portids->n_ids = nla_len(ids) / sizeof(u32);
> + vport_portids->rn_ids = reciprocal_value(vport_portids->n_ids);
> + nla_memcpy(vport_portids->ids, ids, nla_len(ids));
> +
> + rcu_assign_pointer(vport->upcall_portids, vport_portids);
> +
> + if (old)
> + call_rcu(&old->rcu, vport_portids_destroy_rcu_cb);
> +
> + return 0;
> +}
> +
> +/**
> + * ovs_vport_get_upcall_portids - get the upcall_portids of @vport.
> + *
> + * @vport: vport from which to retrieve the portids.
> + * @skb: sk_buff where portids should be appended.
> + *
> + * Retrieves the configuration of the given vport, appending the
> + * %OVS_VPORT_ATTR_UPCALL_PID attribute which is the array of upcall
> + * portids to @skb.
> + *
> + * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room.
> + * If an error occurs, @skb is left unmodified. Must be called with
> + * rcu_read_lock.
> + */
> +int ovs_vport_get_upcall_portids(const struct vport *vport,
> + struct sk_buff *skb)
> +{
> + struct vport_portids *ids;
> +
> + ids = rcu_dereference_ovsl(vport->upcall_portids);
> +
> + if (vport->dp->user_features & OVS_DP_F_VPORT_PIDS)
> + return nla_put(skb, OVS_VPORT_ATTR_UPCALL_PID,
> + ids->n_ids * sizeof(u32), (void *) ids->ids);
> + else
> + return nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, *ids->ids);
> +}
> +
> +/**
> + * ovs_vport_find_portid - find the upcall portid to send upcall.
> + *
> + * @vport: vport from which the missed packet is received.
> + * @skb: skb that the missed packet was received.
> + *
> + * Uses the skb_get_rxhash() to select the upcall portid to send the
> + * upcall.
> + *
> + * Returns the portid of the target socket. Must be called with
> rcu_read_lock.
> + */
> +u32 ovs_vport_find_portid(const struct vport *p, struct sk_buff *skb)
> +{
> + struct vport_portids *ids;
> + u32 hash;
> +
> + ids = rcu_dereference_ovsl(p->upcall_portids);
> +
Is this function ever called under ovs-lock? if not we can just use
rcu_dereference.
> + if (ids->n_ids == 1 && *ids->ids == 0)
> + return 0;
> +
> + hash = skb_get_rxhash(skb);
> + return ids->ids[hash - ids->n_ids * reciprocal_divide(hash,
> ids->rn_ids)];
> +}
> +
> /**
> * ovs_vport_receive - pass up received packet to the datapath for
> processing
> *
> diff --git a/datapath/vport.h b/datapath/vport.h
> index 18b723e..528040b 100644
> --- a/datapath/vport.h
> +++ b/datapath/vport.h
> @@ -23,6 +23,7 @@
> #include <linux/list.h>
> #include <linux/netlink.h>
> #include <linux/openvswitch.h>
> +#include <linux/reciprocal_div.h>
> #include <linux/skbuff.h>
> #include <linux/spinlock.h>
> #include <linux/u64_stats_sync.h>
> @@ -50,6 +51,11 @@ void ovs_vport_get_stats(struct vport *, struct
> ovs_vport_stats *);
> int ovs_vport_set_options(struct vport *, struct nlattr *options);
> int ovs_vport_get_options(const struct vport *, struct sk_buff *);
>
> +int ovs_vport_set_upcall_portids(struct vport *, struct nlattr *pids);
> +int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *);
> +
> +u32 ovs_vport_find_portid(const struct vport *, struct sk_buff *);
> +
> int ovs_vport_send(struct vport *, struct sk_buff *);
>
> /* The following definitions are for implementers of vport devices: */
> @@ -60,13 +66,27 @@ struct vport_err_stats {
> u64 tx_dropped;
> u64 tx_errors;
> };
> +/**
> + * struct vport_portids - array of netlink portids of a vport.
> + * must be protected by rcu.
> + * @rcu: RCU callback head for deferred destruction.
> + * @n_ids: Size of @ids array.
> + * @rn_ids: The reciprocal value of @n_ids.
> + * @ids: Array storing the Netlink socket pids to use for packets received
> + * on this port that miss the flow table.
> + */
> +struct vport_portids {
> + u32 n_ids;
> + struct reciprocal_value rn_ids;
> + struct rcu_head rcu;
> + u32 *ids;
> +};
>
if we use u32 ids[], we can save pointer space. Same as we have done
in struct sw_flow{} for stats.
> /**
> * struct vport - one port within a datapath
> * @rcu: RCU callback head for deferred destruction.
> * @dp: Datapath to which this port belongs.
> - * @upcall_portid: The Netlink port to use for packets received on this port
> that
> - * miss the flow table.
> + * @upcall_portids: RCU protected 'struct vport_portids'.
> * @port_no: Index into @dp's @ports array.
> * @hash_node: Element in @dev_table hash table in vport.c.
> * @dp_hash_node: Element in @datapath->ports hash table in datapath.c.
> @@ -80,7 +100,7 @@ struct vport_err_stats {
> struct vport {
> struct rcu_head rcu;
> struct datapath *dp;
> - u32 upcall_portid;
> + struct vport_portids __rcu *upcall_portids;
> u16 port_no;
>
> struct hlist_node hash_node;
> @@ -112,7 +132,7 @@ struct vport_parms {
> /* For ovs_vport_alloc(). */
> struct datapath *dp;
> u16 port_no;
> - u32 upcall_portid;
> + struct nlattr *upcall_portids;
> };
>
> /**
> diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
> index a88f6f1..b3977d1 100644
> --- a/include/linux/openvswitch.h
> +++ b/include/linux/openvswitch.h
> @@ -138,6 +138,9 @@ struct ovs_vport_stats {
> /* Allow last Netlink attribute to be unaligned */
> #define OVS_DP_F_UNALIGNED (1 << 0)
>
> +/* Allow datapath to associate multiple Netlink PIDs to each vport */
> +#define OVS_DP_F_VPORT_PIDS (1 << 1)
> +
Have you tested downgrade case with this patch?
> /* Fixed logical ports. */
> #define OVSP_LOCAL ((__u32)0)
>
> @@ -225,9 +228,10 @@ enum ovs_vport_type {
> * this is the name of the network device. Maximum length %IFNAMSIZ-1 bytes
> * plus a null terminator.
> * @OVS_VPORT_ATTR_OPTIONS: Vport-specific configuration information.
> - * @OVS_VPORT_ATTR_UPCALL_PID: The Netlink socket in userspace that
> - * OVS_PACKET_CMD_MISS upcalls will be directed to for packets received on
> - * this port. A value of zero indicates that upcalls should not be sent.
> + * @OVS_VPORT_ATTR_UPCALL_PID: The array of Netlink socket pids in userspace
> + * among which OVS_PACKET_CMD_MISS upcalls will be distributed for packets
> + * received on this port. If this is a single-element array of value 0,
> + * upcalls should not be sent.
> * @OVS_VPORT_ATTR_STATS: A &struct ovs_vport_stats giving statistics for
> * packets sent or received through the vport.
> *
> @@ -251,7 +255,8 @@ enum ovs_vport_attr {
> OVS_VPORT_ATTR_TYPE, /* u32 OVS_VPORT_TYPE_* constant. */
> OVS_VPORT_ATTR_NAME, /* string name, up to IFNAMSIZ bytes long */
> OVS_VPORT_ATTR_OPTIONS, /* nested attributes, varies by vport type */
> - OVS_VPORT_ATTR_UPCALL_PID, /* u32 Netlink PID to receive upcalls */
> + OVS_VPORT_ATTR_UPCALL_PID, /* array of u32 Netlink socket PIDs for */
> + /* receiving upcalls */
> OVS_VPORT_ATTR_STATS, /* struct ovs_vport_stats */
> __OVS_VPORT_ATTR_MAX
> };
> --
> 1.7.9.5
>
> _______________________________________________
> dev mailing list
> [email protected]
> http://openvswitch.org/mailman/listinfo/dev
_______________________________________________
dev mailing list
[email protected]
http://openvswitch.org/mailman/listinfo/dev