On Sat, Apr 5, 2014 at 5:23 AM, Alex Wang <al...@nicira.com> wrote:
> In order to allow handlers directly read upcalls from datapath,
> we need to support per-handler netlink socket for each vport in
> datapath.  This commit makes this happen.  Also, it is guaranteed
> backward and forward compatibility with previous branch.
>
> Signed-off-by: Alex Wang <al...@nicira.com>
> Acked-by: Thomas Graf <tg...@redhat.com>
>
This looks pretty close now, I have few comments.

> ---
> V6 -> V7:
> - rebase.
>
> V5 -> V6:
> - '__force' cast 'struct vport_portids *' in ovs_vport_free().
>
> V4 -> V5:
> - use nla_memcpy() instead of memcpy().
> - move 'struct rcu_head' back in the 'struct vport_portids'
>   for cacheline efficiency.
>
> V3 -> V4:
> - define the type of OVS_VPORT_ATTR_UPCALL_PID to NLA_U32.
> - add check of the OVS_VPORT_ATTR_UPCALL_PID in attr array in
>   ovs_vport_cmd_set().
> - use reciprocal_div() in the modular computation, to reduce the
>   overhead.
>
> V2 -> V3:
> - use OVS_DP_ATTR_USER_FEATURES to control the type of
>   OVS_VPORT_ATTR_UPCALL_PID attribute returned by the ovs_vport_cmd_get().
> - directly free the 'vport->upcall_portids' in ovs_vport_free().
> - add check to guarantee pids array length is always positive.
>
> PATCH -> V2:
> - rebase.
>
> major changes since RFC:
> - guarantee the compatibility with branch-2.1 userspace.
> - use rcu to protect upcall port_id array multi-access.
> - use skb_get_rxhash() to select the port id to send upcall.
> ---
>  datapath/datapath.c         |   21 ++++++---
>  datapath/vport.c            |  103 
> ++++++++++++++++++++++++++++++++++++++++++-
>  datapath/vport.h            |   28 ++++++++++--
>  include/linux/openvswitch.h |   13 ++++--
>  4 files changed, 149 insertions(+), 16 deletions(-)
>
> diff --git a/datapath/datapath.c b/datapath/datapath.c
> index c6d42db..c2032b1 100644
> --- a/datapath/datapath.c
> +++ b/datapath/datapath.c
> @@ -268,7 +268,7 @@ void ovs_dp_process_received_packet(struct vport *p, 
> struct sk_buff *skb)
>                 upcall.cmd = OVS_PACKET_CMD_MISS;
>                 upcall.key = &key;
>                 upcall.userdata = NULL;
> -               upcall.portid = p->upcall_portid;
> +               upcall.portid = ovs_vport_find_portid(p, skb);
>                 ovs_dp_upcall(dp, skb, &upcall);
>                 consume_skb(skb);
>                 stats_counter = &stats->n_missed;
> @@ -1381,7 +1381,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct 
> genl_info *info)
>         parms.options = NULL;
>         parms.dp = dp;
>         parms.port_no = OVSP_LOCAL;
> -       parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
> +       parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
>
>         ovs_dp_change(dp, a);
>
> @@ -1638,8 +1638,7 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, 
> struct sk_buff *skb,
>
>         if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
>             nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
> -           nla_put_string(skb, OVS_VPORT_ATTR_NAME, 
> vport->ops->get_name(vport)) ||
> -           nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid))
> +           nla_put_string(skb, OVS_VPORT_ATTR_NAME, 
> vport->ops->get_name(vport)))
>                 goto nla_put_failure;
>
>         ovs_vport_get_stats(vport, &vport_stats);
> @@ -1647,6 +1646,9 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, 
> struct sk_buff *skb,
>                     &vport_stats))
>                 goto nla_put_failure;
>
> +       if (ovs_vport_get_upcall_portids(vport, skb))
> +               goto nla_put_failure;
> +
>         err = ovs_vport_get_options(vport, skb);
>         if (err == -EMSGSIZE)
>                 goto error;
> @@ -1768,7 +1770,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, 
> struct genl_info *info)
>         parms.options = a[OVS_VPORT_ATTR_OPTIONS];
>         parms.dp = dp;
>         parms.port_no = port_no;
> -       parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
> +       parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
>
>         vport = new_vport(&parms);
>         err = PTR_ERR(vport);
> @@ -1825,8 +1827,13 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, 
> struct genl_info *info)
>         if (a[OVS_VPORT_ATTR_STATS])
>                 ovs_vport_set_stats(vport, nla_data(a[OVS_VPORT_ATTR_STATS]));
>
> -       if (a[OVS_VPORT_ATTR_UPCALL_PID])
> -               vport->upcall_portid = 
> nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
> +
> +       if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
> +               err = ovs_vport_set_upcall_portids(vport,
> +                                                  
> a[OVS_VPORT_ATTR_UPCALL_PID]);
> +               if (err)
> +                       goto exit_unlock_free;
> +       }
>
>         err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
>                                       info->snd_seq, 0, OVS_VPORT_CMD_NEW);
> diff --git a/datapath/vport.c b/datapath/vport.c
> index 2673b81..9deba28 100644
> --- a/datapath/vport.c
> +++ b/datapath/vport.c
> @@ -136,10 +136,12 @@ struct vport *ovs_vport_alloc(int priv_size, const 
> struct vport_ops *ops,
>
>         vport->dp = parms->dp;
>         vport->port_no = parms->port_no;
> -       vport->upcall_portid = parms->upcall_portid;
>         vport->ops = ops;
>         INIT_HLIST_NODE(&vport->dp_hash_node);
>
> +       if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids))
> +               return ERR_PTR(-EINVAL);
> +
>         vport->percpu_stats = alloc_percpu(struct pcpu_tstats);
>         if (!vport->percpu_stats) {
>                 kfree(vport);
> @@ -169,6 +171,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct 
> vport_ops *ops,
>   */
>  void ovs_vport_free(struct vport *vport)
>  {
> +       kfree((struct vport_portids __force *)vport->upcall_portids);
>         free_percpu(vport->percpu_stats);
>         kfree(vport);
>  }
> @@ -355,6 +358,104 @@ int ovs_vport_get_options(const struct vport *vport, 
> struct sk_buff *skb)
>         return 0;
>  }
>
> +static void vport_portids_destroy_rcu_cb(struct rcu_head *rcu)
> +{
> +       struct vport_portids *ids = container_of(rcu, struct vport_portids,
> +                                                rcu);
> +
> +       kfree(ids);
> +}
> +
> +/**
> + *     ovs_vport_set_upcall_portids - set upcall portids of @vport.
> + *
> + * @vport: vport to modify.
> + * @ids: new configuration, an array of port ids.
> + *
> + * Sets the vport's upcall_portids to @ids.
> + *
> + * Returns 0 if successful, -EINVAL if @ids is zero length or cannot be 
> parsed
> + * as an array of U32.
> + *
> + * Must be called with rcu_read_lock.
> + */
> +int ovs_vport_set_upcall_portids(struct vport *vport,  struct nlattr *ids)
> +{
> +       struct vport_portids *old, *vport_portids;
> +
> +       if (!nla_len(ids) || nla_len(ids) % sizeof(u32))
> +               return -EINVAL;
> +
> +       old = ovsl_dereference(vport->upcall_portids);
> +
> +       vport_portids = kmalloc(sizeof *vport_portids + nla_len(ids),
> +                               GFP_KERNEL);
need to divide nla_len() by 4.

> +       vport_portids->ids = (void *) vport_portids + sizeof *vport_portids;
> +       vport_portids->n_ids = nla_len(ids) / sizeof(u32);
> +       vport_portids->rn_ids = reciprocal_value(vport_portids->n_ids);
> +       nla_memcpy(vport_portids->ids, ids, nla_len(ids));
> +
> +       rcu_assign_pointer(vport->upcall_portids, vport_portids);
> +
> +       if (old)
> +               call_rcu(&old->rcu, vport_portids_destroy_rcu_cb);
> +
> +       return 0;
> +}
> +
> +/**
> + *     ovs_vport_get_upcall_portids - get the upcall_portids of @vport.
> + *
> + * @vport: vport from which to retrieve the portids.
> + * @skb: sk_buff where portids should be appended.
> + *
> + * Retrieves the configuration of the given vport, appending the
> + * %OVS_VPORT_ATTR_UPCALL_PID attribute which is the array of upcall
> + * portids to @skb.
> + *
> + * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room.
> + * If an error occurs, @skb is left unmodified.  Must be called with
> + * rcu_read_lock.
> + */
> +int ovs_vport_get_upcall_portids(const struct vport *vport,
> +                                struct sk_buff *skb)
> +{
> +       struct vport_portids *ids;
> +
> +       ids = rcu_dereference_ovsl(vport->upcall_portids);
> +
> +       if (vport->dp->user_features & OVS_DP_F_VPORT_PIDS)
> +               return nla_put(skb, OVS_VPORT_ATTR_UPCALL_PID,
> +                              ids->n_ids * sizeof(u32), (void *) ids->ids);
> +       else
> +               return nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, *ids->ids);
> +}
> +
> +/**
> + *     ovs_vport_find_portid - find the upcall portid to send upcall.
> + *
> + * @vport: vport from which the missed packet is received.
> + * @skb: skb that the missed packet was received.
> + *
> + * Uses the skb_get_rxhash() to select the upcall portid to send the
> + * upcall.
> + *
> + * Returns the portid of the target socket.  Must be called with 
> rcu_read_lock.
> + */
> +u32 ovs_vport_find_portid(const struct vport *p, struct sk_buff *skb)
> +{
> +       struct vport_portids *ids;
> +       u32 hash;
> +
> +       ids = rcu_dereference_ovsl(p->upcall_portids);
> +
Is this function ever called under ovs-lock? if not we can just use
rcu_dereference.

> +       if (ids->n_ids == 1 && *ids->ids == 0)
> +               return 0;
> +
> +       hash = skb_get_rxhash(skb);
> +       return ids->ids[hash - ids->n_ids * reciprocal_divide(hash, 
> ids->rn_ids)];
> +}
> +
>  /**
>   *     ovs_vport_receive - pass up received packet to the datapath for 
> processing
>   *
> diff --git a/datapath/vport.h b/datapath/vport.h
> index 18b723e..528040b 100644
> --- a/datapath/vport.h
> +++ b/datapath/vport.h
> @@ -23,6 +23,7 @@
>  #include <linux/list.h>
>  #include <linux/netlink.h>
>  #include <linux/openvswitch.h>
> +#include <linux/reciprocal_div.h>
>  #include <linux/skbuff.h>
>  #include <linux/spinlock.h>
>  #include <linux/u64_stats_sync.h>
> @@ -50,6 +51,11 @@ void ovs_vport_get_stats(struct vport *, struct 
> ovs_vport_stats *);
>  int ovs_vport_set_options(struct vport *, struct nlattr *options);
>  int ovs_vport_get_options(const struct vport *, struct sk_buff *);
>
> +int ovs_vport_set_upcall_portids(struct vport *, struct nlattr *pids);
> +int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *);
> +
> +u32 ovs_vport_find_portid(const struct vport *, struct sk_buff *);
> +
>  int ovs_vport_send(struct vport *, struct sk_buff *);
>
>  /* The following definitions are for implementers of vport devices: */
> @@ -60,13 +66,27 @@ struct vport_err_stats {
>         u64 tx_dropped;
>         u64 tx_errors;
>  };
> +/**
> + * struct vport_portids - array of netlink portids of a vport.
> + *                        must be protected by rcu.
> + * @rcu: RCU callback head for deferred destruction.
> + * @n_ids: Size of @ids array.
> + * @rn_ids: The reciprocal value of @n_ids.
> + * @ids: Array storing the Netlink socket pids to use for packets received
> + * on this port that miss the flow table.
> + */
> +struct vport_portids {
> +       u32 n_ids;
> +       struct reciprocal_value rn_ids;
> +       struct rcu_head rcu;
> +       u32 *ids;
> +};
>
if we use u32 ids[], we can save pointer space. Same as we have done
in struct sw_flow{} for stats.

>  /**
>   * struct vport - one port within a datapath
>   * @rcu: RCU callback head for deferred destruction.
>   * @dp: Datapath to which this port belongs.
> - * @upcall_portid: The Netlink port to use for packets received on this port 
> that
> - * miss the flow table.
> + * @upcall_portids: RCU protected 'struct vport_portids'.
>   * @port_no: Index into @dp's @ports array.
>   * @hash_node: Element in @dev_table hash table in vport.c.
>   * @dp_hash_node: Element in @datapath->ports hash table in datapath.c.
> @@ -80,7 +100,7 @@ struct vport_err_stats {
>  struct vport {
>         struct rcu_head rcu;
>         struct datapath *dp;
> -       u32 upcall_portid;
> +       struct vport_portids __rcu *upcall_portids;
>         u16 port_no;
>
>         struct hlist_node hash_node;
> @@ -112,7 +132,7 @@ struct vport_parms {
>         /* For ovs_vport_alloc(). */
>         struct datapath *dp;
>         u16 port_no;
> -       u32 upcall_portid;
> +       struct nlattr *upcall_portids;
>  };
>
>  /**
> diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
> index a88f6f1..b3977d1 100644
> --- a/include/linux/openvswitch.h
> +++ b/include/linux/openvswitch.h
> @@ -138,6 +138,9 @@ struct ovs_vport_stats {
>  /* Allow last Netlink attribute to be unaligned */
>  #define OVS_DP_F_UNALIGNED     (1 << 0)
>
> +/* Allow datapath to associate multiple Netlink PIDs to each vport */
> +#define OVS_DP_F_VPORT_PIDS    (1 << 1)
> +
Have you tested downgrade case with this patch?

>  /* Fixed logical ports. */
>  #define OVSP_LOCAL      ((__u32)0)
>
> @@ -225,9 +228,10 @@ enum ovs_vport_type {
>   * this is the name of the network device.  Maximum length %IFNAMSIZ-1 bytes
>   * plus a null terminator.
>   * @OVS_VPORT_ATTR_OPTIONS: Vport-specific configuration information.
> - * @OVS_VPORT_ATTR_UPCALL_PID: The Netlink socket in userspace that
> - * OVS_PACKET_CMD_MISS upcalls will be directed to for packets received on
> - * this port.  A value of zero indicates that upcalls should not be sent.
> + * @OVS_VPORT_ATTR_UPCALL_PID: The array of Netlink socket pids in userspace
> + * among which OVS_PACKET_CMD_MISS upcalls will be distributed for packets
> + * received on this port.  If this is a single-element array of value 0,
> + * upcalls should not be sent.
>   * @OVS_VPORT_ATTR_STATS: A &struct ovs_vport_stats giving statistics for
>   * packets sent or received through the vport.
>   *
> @@ -251,7 +255,8 @@ enum ovs_vport_attr {
>         OVS_VPORT_ATTR_TYPE,    /* u32 OVS_VPORT_TYPE_* constant. */
>         OVS_VPORT_ATTR_NAME,    /* string name, up to IFNAMSIZ bytes long */
>         OVS_VPORT_ATTR_OPTIONS, /* nested attributes, varies by vport type */
> -       OVS_VPORT_ATTR_UPCALL_PID, /* u32 Netlink PID to receive upcalls */
> +       OVS_VPORT_ATTR_UPCALL_PID, /* array of u32 Netlink socket PIDs for */
> +                               /* receiving upcalls */
>         OVS_VPORT_ATTR_STATS,   /* struct ovs_vport_stats */
>         __OVS_VPORT_ATTR_MAX
>  };
> --
> 1.7.9.5
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to