On Sat, Apr 5, 2014 at 5:23 AM, Alex Wang <al...@nicira.com> wrote: > In order to allow handlers directly read upcalls from datapath, > we need to support per-handler netlink socket for each vport in > datapath. This commit makes this happen. Also, it is guaranteed > backward and forward compatibility with previous branch. > > Signed-off-by: Alex Wang <al...@nicira.com> > Acked-by: Thomas Graf <tg...@redhat.com> > This looks pretty close now, I have few comments.
> --- > V6 -> V7: > - rebase. > > V5 -> V6: > - '__force' cast 'struct vport_portids *' in ovs_vport_free(). > > V4 -> V5: > - use nla_memcpy() instead of memcpy(). > - move 'struct rcu_head' back in the 'struct vport_portids' > for cacheline efficiency. > > V3 -> V4: > - define the type of OVS_VPORT_ATTR_UPCALL_PID to NLA_U32. > - add check of the OVS_VPORT_ATTR_UPCALL_PID in attr array in > ovs_vport_cmd_set(). > - use reciprocal_div() in the modular computation, to reduce the > overhead. > > V2 -> V3: > - use OVS_DP_ATTR_USER_FEATURES to control the type of > OVS_VPORT_ATTR_UPCALL_PID attribute returned by the ovs_vport_cmd_get(). > - directly free the 'vport->upcall_portids' in ovs_vport_free(). > - add check to guarantee pids array length is always positive. > > PATCH -> V2: > - rebase. > > major changes since RFC: > - guarantee the compatibility with branch-2.1 userspace. > - use rcu to protect upcall port_id array multi-access. > - use skb_get_rxhash() to select the port id to send upcall. > --- > datapath/datapath.c | 21 ++++++--- > datapath/vport.c | 103 > ++++++++++++++++++++++++++++++++++++++++++- > datapath/vport.h | 28 ++++++++++-- > include/linux/openvswitch.h | 13 ++++-- > 4 files changed, 149 insertions(+), 16 deletions(-) > > diff --git a/datapath/datapath.c b/datapath/datapath.c > index c6d42db..c2032b1 100644 > --- a/datapath/datapath.c > +++ b/datapath/datapath.c > @@ -268,7 +268,7 @@ void ovs_dp_process_received_packet(struct vport *p, > struct sk_buff *skb) > upcall.cmd = OVS_PACKET_CMD_MISS; > upcall.key = &key; > upcall.userdata = NULL; > - upcall.portid = p->upcall_portid; > + upcall.portid = ovs_vport_find_portid(p, skb); > ovs_dp_upcall(dp, skb, &upcall); > consume_skb(skb); > stats_counter = &stats->n_missed; > @@ -1381,7 +1381,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct > genl_info *info) > parms.options = NULL; > parms.dp = dp; > parms.port_no = OVSP_LOCAL; > - parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]); > + parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID]; > > ovs_dp_change(dp, a); > > @@ -1638,8 +1638,7 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, > struct sk_buff *skb, > > if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) || > nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) || > - nla_put_string(skb, OVS_VPORT_ATTR_NAME, > vport->ops->get_name(vport)) || > - nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid)) > + nla_put_string(skb, OVS_VPORT_ATTR_NAME, > vport->ops->get_name(vport))) > goto nla_put_failure; > > ovs_vport_get_stats(vport, &vport_stats); > @@ -1647,6 +1646,9 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, > struct sk_buff *skb, > &vport_stats)) > goto nla_put_failure; > > + if (ovs_vport_get_upcall_portids(vport, skb)) > + goto nla_put_failure; > + > err = ovs_vport_get_options(vport, skb); > if (err == -EMSGSIZE) > goto error; > @@ -1768,7 +1770,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, > struct genl_info *info) > parms.options = a[OVS_VPORT_ATTR_OPTIONS]; > parms.dp = dp; > parms.port_no = port_no; > - parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); > + parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID]; > > vport = new_vport(&parms); > err = PTR_ERR(vport); > @@ -1825,8 +1827,13 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, > struct genl_info *info) > if (a[OVS_VPORT_ATTR_STATS]) > ovs_vport_set_stats(vport, nla_data(a[OVS_VPORT_ATTR_STATS])); > > - if (a[OVS_VPORT_ATTR_UPCALL_PID]) > - vport->upcall_portid = > nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); > + > + if (a[OVS_VPORT_ATTR_UPCALL_PID]) { > + err = ovs_vport_set_upcall_portids(vport, > + > a[OVS_VPORT_ATTR_UPCALL_PID]); > + if (err) > + goto exit_unlock_free; > + } > > err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, > info->snd_seq, 0, OVS_VPORT_CMD_NEW); > diff --git a/datapath/vport.c b/datapath/vport.c > index 2673b81..9deba28 100644 > --- a/datapath/vport.c > +++ b/datapath/vport.c > @@ -136,10 +136,12 @@ struct vport *ovs_vport_alloc(int priv_size, const > struct vport_ops *ops, > > vport->dp = parms->dp; > vport->port_no = parms->port_no; > - vport->upcall_portid = parms->upcall_portid; > vport->ops = ops; > INIT_HLIST_NODE(&vport->dp_hash_node); > > + if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids)) > + return ERR_PTR(-EINVAL); > + > vport->percpu_stats = alloc_percpu(struct pcpu_tstats); > if (!vport->percpu_stats) { > kfree(vport); > @@ -169,6 +171,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct > vport_ops *ops, > */ > void ovs_vport_free(struct vport *vport) > { > + kfree((struct vport_portids __force *)vport->upcall_portids); > free_percpu(vport->percpu_stats); > kfree(vport); > } > @@ -355,6 +358,104 @@ int ovs_vport_get_options(const struct vport *vport, > struct sk_buff *skb) > return 0; > } > > +static void vport_portids_destroy_rcu_cb(struct rcu_head *rcu) > +{ > + struct vport_portids *ids = container_of(rcu, struct vport_portids, > + rcu); > + > + kfree(ids); > +} > + > +/** > + * ovs_vport_set_upcall_portids - set upcall portids of @vport. > + * > + * @vport: vport to modify. > + * @ids: new configuration, an array of port ids. > + * > + * Sets the vport's upcall_portids to @ids. > + * > + * Returns 0 if successful, -EINVAL if @ids is zero length or cannot be > parsed > + * as an array of U32. > + * > + * Must be called with rcu_read_lock. > + */ > +int ovs_vport_set_upcall_portids(struct vport *vport, struct nlattr *ids) > +{ > + struct vport_portids *old, *vport_portids; > + > + if (!nla_len(ids) || nla_len(ids) % sizeof(u32)) > + return -EINVAL; > + > + old = ovsl_dereference(vport->upcall_portids); > + > + vport_portids = kmalloc(sizeof *vport_portids + nla_len(ids), > + GFP_KERNEL); need to divide nla_len() by 4. > + vport_portids->ids = (void *) vport_portids + sizeof *vport_portids; > + vport_portids->n_ids = nla_len(ids) / sizeof(u32); > + vport_portids->rn_ids = reciprocal_value(vport_portids->n_ids); > + nla_memcpy(vport_portids->ids, ids, nla_len(ids)); > + > + rcu_assign_pointer(vport->upcall_portids, vport_portids); > + > + if (old) > + call_rcu(&old->rcu, vport_portids_destroy_rcu_cb); > + > + return 0; > +} > + > +/** > + * ovs_vport_get_upcall_portids - get the upcall_portids of @vport. > + * > + * @vport: vport from which to retrieve the portids. > + * @skb: sk_buff where portids should be appended. > + * > + * Retrieves the configuration of the given vport, appending the > + * %OVS_VPORT_ATTR_UPCALL_PID attribute which is the array of upcall > + * portids to @skb. > + * > + * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room. > + * If an error occurs, @skb is left unmodified. Must be called with > + * rcu_read_lock. > + */ > +int ovs_vport_get_upcall_portids(const struct vport *vport, > + struct sk_buff *skb) > +{ > + struct vport_portids *ids; > + > + ids = rcu_dereference_ovsl(vport->upcall_portids); > + > + if (vport->dp->user_features & OVS_DP_F_VPORT_PIDS) > + return nla_put(skb, OVS_VPORT_ATTR_UPCALL_PID, > + ids->n_ids * sizeof(u32), (void *) ids->ids); > + else > + return nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, *ids->ids); > +} > + > +/** > + * ovs_vport_find_portid - find the upcall portid to send upcall. > + * > + * @vport: vport from which the missed packet is received. > + * @skb: skb that the missed packet was received. > + * > + * Uses the skb_get_rxhash() to select the upcall portid to send the > + * upcall. > + * > + * Returns the portid of the target socket. Must be called with > rcu_read_lock. > + */ > +u32 ovs_vport_find_portid(const struct vport *p, struct sk_buff *skb) > +{ > + struct vport_portids *ids; > + u32 hash; > + > + ids = rcu_dereference_ovsl(p->upcall_portids); > + Is this function ever called under ovs-lock? if not we can just use rcu_dereference. > + if (ids->n_ids == 1 && *ids->ids == 0) > + return 0; > + > + hash = skb_get_rxhash(skb); > + return ids->ids[hash - ids->n_ids * reciprocal_divide(hash, > ids->rn_ids)]; > +} > + > /** > * ovs_vport_receive - pass up received packet to the datapath for > processing > * > diff --git a/datapath/vport.h b/datapath/vport.h > index 18b723e..528040b 100644 > --- a/datapath/vport.h > +++ b/datapath/vport.h > @@ -23,6 +23,7 @@ > #include <linux/list.h> > #include <linux/netlink.h> > #include <linux/openvswitch.h> > +#include <linux/reciprocal_div.h> > #include <linux/skbuff.h> > #include <linux/spinlock.h> > #include <linux/u64_stats_sync.h> > @@ -50,6 +51,11 @@ void ovs_vport_get_stats(struct vport *, struct > ovs_vport_stats *); > int ovs_vport_set_options(struct vport *, struct nlattr *options); > int ovs_vport_get_options(const struct vport *, struct sk_buff *); > > +int ovs_vport_set_upcall_portids(struct vport *, struct nlattr *pids); > +int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *); > + > +u32 ovs_vport_find_portid(const struct vport *, struct sk_buff *); > + > int ovs_vport_send(struct vport *, struct sk_buff *); > > /* The following definitions are for implementers of vport devices: */ > @@ -60,13 +66,27 @@ struct vport_err_stats { > u64 tx_dropped; > u64 tx_errors; > }; > +/** > + * struct vport_portids - array of netlink portids of a vport. > + * must be protected by rcu. > + * @rcu: RCU callback head for deferred destruction. > + * @n_ids: Size of @ids array. > + * @rn_ids: The reciprocal value of @n_ids. > + * @ids: Array storing the Netlink socket pids to use for packets received > + * on this port that miss the flow table. > + */ > +struct vport_portids { > + u32 n_ids; > + struct reciprocal_value rn_ids; > + struct rcu_head rcu; > + u32 *ids; > +}; > if we use u32 ids[], we can save pointer space. Same as we have done in struct sw_flow{} for stats. > /** > * struct vport - one port within a datapath > * @rcu: RCU callback head for deferred destruction. > * @dp: Datapath to which this port belongs. > - * @upcall_portid: The Netlink port to use for packets received on this port > that > - * miss the flow table. > + * @upcall_portids: RCU protected 'struct vport_portids'. > * @port_no: Index into @dp's @ports array. > * @hash_node: Element in @dev_table hash table in vport.c. > * @dp_hash_node: Element in @datapath->ports hash table in datapath.c. > @@ -80,7 +100,7 @@ struct vport_err_stats { > struct vport { > struct rcu_head rcu; > struct datapath *dp; > - u32 upcall_portid; > + struct vport_portids __rcu *upcall_portids; > u16 port_no; > > struct hlist_node hash_node; > @@ -112,7 +132,7 @@ struct vport_parms { > /* For ovs_vport_alloc(). */ > struct datapath *dp; > u16 port_no; > - u32 upcall_portid; > + struct nlattr *upcall_portids; > }; > > /** > diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h > index a88f6f1..b3977d1 100644 > --- a/include/linux/openvswitch.h > +++ b/include/linux/openvswitch.h > @@ -138,6 +138,9 @@ struct ovs_vport_stats { > /* Allow last Netlink attribute to be unaligned */ > #define OVS_DP_F_UNALIGNED (1 << 0) > > +/* Allow datapath to associate multiple Netlink PIDs to each vport */ > +#define OVS_DP_F_VPORT_PIDS (1 << 1) > + Have you tested downgrade case with this patch? > /* Fixed logical ports. */ > #define OVSP_LOCAL ((__u32)0) > > @@ -225,9 +228,10 @@ enum ovs_vport_type { > * this is the name of the network device. Maximum length %IFNAMSIZ-1 bytes > * plus a null terminator. > * @OVS_VPORT_ATTR_OPTIONS: Vport-specific configuration information. > - * @OVS_VPORT_ATTR_UPCALL_PID: The Netlink socket in userspace that > - * OVS_PACKET_CMD_MISS upcalls will be directed to for packets received on > - * this port. A value of zero indicates that upcalls should not be sent. > + * @OVS_VPORT_ATTR_UPCALL_PID: The array of Netlink socket pids in userspace > + * among which OVS_PACKET_CMD_MISS upcalls will be distributed for packets > + * received on this port. If this is a single-element array of value 0, > + * upcalls should not be sent. > * @OVS_VPORT_ATTR_STATS: A &struct ovs_vport_stats giving statistics for > * packets sent or received through the vport. > * > @@ -251,7 +255,8 @@ enum ovs_vport_attr { > OVS_VPORT_ATTR_TYPE, /* u32 OVS_VPORT_TYPE_* constant. */ > OVS_VPORT_ATTR_NAME, /* string name, up to IFNAMSIZ bytes long */ > OVS_VPORT_ATTR_OPTIONS, /* nested attributes, varies by vport type */ > - OVS_VPORT_ATTR_UPCALL_PID, /* u32 Netlink PID to receive upcalls */ > + OVS_VPORT_ATTR_UPCALL_PID, /* array of u32 Netlink socket PIDs for */ > + /* receiving upcalls */ > OVS_VPORT_ATTR_STATS, /* struct ovs_vport_stats */ > __OVS_VPORT_ATTR_MAX > }; > -- > 1.7.9.5 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev