Add support for building the in-tree kernel datapath for Linux kernels up to 3.14. There were some changes in the netlink area which required adding new compatibility code for this layer. Also, some new per-cpu stats initialization code was added.
Encompasses changes from the following upstream Linux kernel commits as well: 36d5fe6a000790f56039afe26834265db0a3ad4c: "core, nfqueue, openvswitch: Orphan frags in skb_zerocopy and handle errors" 63862b5bef7349dd1137e4c70702c67d77565785: "net: replace macros net_random and net_srandom with direct calls to prandom" Signed-off-by: Kyle Mestery <mest...@noironetworks.com> --- v4: Add support Linux 3.14 as well. Handle a few more changes based on upstream commits. v3: Correctly make genl_register_family backwards compatible. v2: Address a few comments from Pravin. Some of those comments proved challenging, please see email reply to the list. --- FAQ | 2 +- acinclude.m4 | 4 +- datapath/actions.c | 2 +- datapath/datapath.c | 36 +++++----- datapath/datapath.h | 2 +- datapath/dp_notify.c | 11 ++- datapath/linux/Modules.mk | 2 + datapath/linux/compat/genetlink-openvswitch.c | 20 ++++-- datapath/linux/compat/include/linux/percpu.h | 18 +++++ datapath/linux/compat/include/linux/random.h | 12 ++++ datapath/linux/compat/include/linux/skbuff.h | 2 +- datapath/linux/compat/include/net/genetlink.h | 96 ++++++++++++++++++++++++++- datapath/linux/compat/include/net/ip.h | 12 ++++ datapath/linux/compat/skbuff-openvswitch.c | 6 +- datapath/linux/compat/utils.c | 29 ++++++++ datapath/vport-lisp.c | 7 +- datapath/vport-vxlan.c | 3 +- datapath/vport.c | 2 + 18 files changed, 224 insertions(+), 42 deletions(-) create mode 100644 datapath/linux/compat/include/linux/percpu.h create mode 100644 datapath/linux/compat/include/linux/random.h diff --git a/FAQ b/FAQ index eec2d4f..b376320 100644 --- a/FAQ +++ b/FAQ @@ -149,7 +149,7 @@ A: The following table lists the Linux kernel versions against which the 1.11.x 2.6.18 to 3.8 2.0.x 2.6.32 to 3.10 2.1.x 2.6.32 to 3.11 - 2.2.x 2.6.32 to 3.12 + 2.2.x 2.6.32 to 3.14 Open vSwitch userspace should also work with the Linux kernel module built into Linux 3.3 and later. diff --git a/acinclude.m4 b/acinclude.m4 index 1f52cf1..5630213 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -134,10 +134,10 @@ AC_DEFUN([OVS_CHECK_LINUX], [ AC_MSG_RESULT([$kversion]) if test "$version" -ge 3; then - if test "$version" = 3 && test "$patchlevel" -le 12; then + if test "$version" = 3 && test "$patchlevel" -le 13; then : # Linux 3.x else - AC_ERROR([Linux kernel in $KBUILD is version $kversion, but version newer than 3.12.x is not supported]) + AC_ERROR([Linux kernel in $KBUILD is version $kversion, but version newer than 3.14.x is not supported]) fi else if test "$version" -le 1 || test "$patchlevel" -le 5 || test "$sublevel" -le 31; then diff --git a/datapath/actions.c b/datapath/actions.c index 0b66e7c..59c5855 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -446,7 +446,7 @@ static int sample(struct datapath *dp, struct sk_buff *skb, a = nla_next(a, &rem)) { switch (nla_type(a)) { case OVS_SAMPLE_ATTR_PROBABILITY: - if (net_random() >= nla_get_u32(a)) + if (prandom_u32() >= nla_get_u32(a)) return 0; break; diff --git a/datapath/datapath.c b/datapath/datapath.c index 0c77045..49402c3 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -70,14 +70,16 @@ static bool ovs_must_notify(struct genl_info *info, const struct genl_multicast_group *grp) { return info->nlhdr->nlmsg_flags & NLM_F_ECHO || - netlink_has_listeners(genl_info_net(info)->genl_sock, grp->id); + netlink_has_listeners_ovs(genl_info_net(info)->genl_sock, grp, 0); } +static struct genl_family dp_packet_genl_family; + static void ovs_notify(struct sk_buff *skb, struct genl_info *info, struct genl_multicast_group *grp) { - genl_notify(skb, genl_info_net(info), info->snd_portid, - grp->id, info->nlhdr, GFP_KERNEL); + genl_notify(&dp_packet_genl_family, skb, genl_info_net(info), + info->snd_portid, 0, info->nlhdr, GFP_KERNEL); } /** @@ -479,7 +481,9 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, } nla->nla_len = nla_attr_size(skb->len); - skb_zerocopy(user_skb, skb, skb->len, hlen); + err = skb_zerocopy(user_skb, skb, skb->len, hlen); + if (err) + goto out; /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */ if (!(dp->user_features & OVS_DP_F_UNALIGNED)) { @@ -493,6 +497,9 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid); out: + if (err) + skb_tx_error(skb); + kfree_skb(nskb); return err; } @@ -910,9 +917,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) if (!IS_ERR(reply)) ovs_notify(reply, info, &ovs_dp_flow_multicast_group); else - netlink_set_err(sock_net(skb->sk)->genl_sock, 0, - ovs_dp_flow_multicast_group.id, - PTR_ERR(reply)); + genl_set_err(&dp_flow_genl_family, sock_net(skb->sk), 0, + 0, PTR_ERR(reply)); } return 0; @@ -1244,6 +1250,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_destroy_table; } + ovs_init_per_cpu_dp_stats(dp->stats_percpu); + dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head), GFP_KERNEL); if (!dp->ports) { @@ -1489,7 +1497,7 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, }; -static struct genl_family dp_vport_genl_family = { +struct genl_family dp_vport_genl_family = { .id = GENL_ID_GENERATE, .hdrsize = sizeof(struct ovs_header), .name = OVS_VPORT_FAMILY, @@ -1892,19 +1900,13 @@ static int dp_register_genl(void) n_registered = 0; for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) { - const struct genl_family_and_ops *f = &dp_genl_families[i]; + const struct genl_family_and_ops_ovs *f = + (struct genl_family_and_ops_ovs *)&dp_genl_families[i]; - err = genl_register_family_with_ops(f->family, f->ops, - f->n_ops); + err = genl_register_family_ovs(f); if (err) goto error; n_registered++; - - if (f->group) { - err = genl_register_mc_group(f->family, f->group); - if (err) - goto error; - } } return 0; diff --git a/datapath/datapath.h b/datapath/datapath.h index d81e05c..57bffa6 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -184,7 +184,7 @@ static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_n } extern struct notifier_block ovs_dp_device_notifier; -extern struct genl_multicast_group ovs_dp_vport_multicast_group; +extern struct genl_family dp_vport_genl_family; void ovs_dp_process_received_packet(struct vport *, struct sk_buff *); void ovs_dp_detach_port(struct vport *); diff --git a/datapath/dp_notify.c b/datapath/dp_notify.c index 0b22d0c..e96b242 100644 --- a/datapath/dp_notify.c +++ b/datapath/dp_notify.c @@ -35,15 +35,14 @@ static void dp_detach_port_notify(struct vport *vport) OVS_VPORT_CMD_DEL); ovs_dp_detach_port(vport); if (IS_ERR(notify)) { - netlink_set_err(ovs_dp_get_net(dp)->genl_sock, 0, - ovs_dp_vport_multicast_group.id, - PTR_ERR(notify)); + genl_set_err(&dp_vport_genl_family, ovs_dp_get_net(dp), 0, + 0, PTR_ERR(notify)); return; } - genlmsg_multicast_netns(ovs_dp_get_net(dp), notify, 0, - ovs_dp_vport_multicast_group.id, - GFP_KERNEL); + genlmsg_multicast_netns(&dp_vport_genl_family, + ovs_dp_get_net(dp), notify, 0, + 0, GFP_KERNEL); } void ovs_dp_notify_wq(struct work_struct *work) diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk index 1e76305..4331222 100644 --- a/datapath/linux/Modules.mk +++ b/datapath/linux/Modules.mk @@ -46,7 +46,9 @@ openvswitch_headers += \ linux/compat/include/linux/netdevice.h \ linux/compat/include/linux/netdev_features.h \ linux/compat/include/linux/netlink.h \ + linux/compat/include/linux/percpu.h \ linux/compat/include/linux/poison.h \ + linux/compat/include/linux/random.h \ linux/compat/include/linux/rculist.h \ linux/compat/include/linux/rcupdate.h \ linux/compat/include/linux/reciprocal_div.h \ diff --git a/datapath/linux/compat/genetlink-openvswitch.c b/datapath/linux/compat/genetlink-openvswitch.c index 359f916..c573c2a 100644 --- a/datapath/linux/compat/genetlink-openvswitch.c +++ b/datapath/linux/compat/genetlink-openvswitch.c @@ -1,17 +1,27 @@ #include <net/genetlink.h> #include <linux/version.h> -/* This is analogous to rtnl_notify() but uses genl_sock instead of rtnl. - * - * This is not (yet) in any upstream kernel. */ -void genl_notify(struct sk_buff *skb, struct net *net, u32 portid, u32 group, +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,13,0) +void genl_notify(struct genl_family *family, + struct sk_buff *skb, struct net *net, u32 portid, u32 group, struct nlmsghdr *nlh, gfp_t flags) { struct sock *sk = net->genl_sock; int report = 0; + struct genl_multicast_group *grp; + int i = 0; if (nlh) report = nlmsg_report(nlh); - nlmsg_notify(sk, skb, portid, group, report, flags); + list_for_each_entry(grp, &family->mcast_groups, list) { + if (group == grp->id) + break; + i++; + } + + if (WARN_ON_ONCE(grp == NULL)) + return; + nlmsg_notify(sk, skb, portid, grp->id, report, flags); } +#endif /* kernel version < 3.13.0 */ diff --git a/datapath/linux/compat/include/linux/percpu.h b/datapath/linux/compat/include/linux/percpu.h new file mode 100644 index 0000000..114e8d8 --- /dev/null +++ b/datapath/linux/compat/include/linux/percpu.h @@ -0,0 +1,18 @@ +#ifndef __LINUX_PERCPU_WRAP_H +#define __LINUX_PERCPU_WRAP_H + +#include <linux/version.h> + +#include_next <linux/percpu.h> + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,13,0) +struct dp_stats_percpu; +struct pcpu_tstats; +void ovs_init_per_cpu_dp_stats(struct dp_stats_percpu *); +void ovs_init_per_cpu_t_stats(struct pcpu_tstats *); +#else +#define ovs_init_per_cpu_dp_stats(a) (void)(a) +#define ovs_init_per_cpu_t_stats(a) (void)(a) +#endif /* KERNEL_VERSION(3,13,0) */ + +#endif /* __LINUX_PERCPU_WRAP_H */ diff --git a/datapath/linux/compat/include/linux/random.h b/datapath/linux/compat/include/linux/random.h new file mode 100644 index 0000000..17d8ee2 --- /dev/null +++ b/datapath/linux/compat/include/linux/random.h @@ -0,0 +1,12 @@ +#ifndef __LINUX_RANDOM_WRAP_H +#define __LINUX_RANDOM_WRAP_H + +#include <linux/version.h> + +#include_next <linux/random.h> + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,6,12) +#define prandom_u32 random32 +#endif /* KERNEL_VERSION(3,13,0) */ + +#endif /* __LINUX_RANDOM_WRAP_H */ diff --git a/datapath/linux/compat/include/linux/skbuff.h b/datapath/linux/compat/include/linux/skbuff.h index de0c56a..c925788 100644 --- a/datapath/linux/compat/include/linux/skbuff.h +++ b/datapath/linux/compat/include/linux/skbuff.h @@ -260,7 +260,7 @@ static inline __u32 skb_get_rxhash(struct sk_buff *skb) #if LINUX_VERSION_CODE < KERNEL_VERSION(3,14,0) unsigned int skb_zerocopy_headlen(const struct sk_buff *from); -void skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, int len, +int skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen); #endif diff --git a/datapath/linux/compat/include/net/genetlink.h b/datapath/linux/compat/include/net/genetlink.h index 09ee23b..1aeff12 100644 --- a/datapath/linux/compat/include/net/genetlink.h +++ b/datapath/linux/compat/include/net/genetlink.h @@ -17,8 +17,43 @@ #define portid pid #endif -extern void genl_notify(struct sk_buff *skb, struct net *net, u32 portid, - u32 group, struct nlmsghdr *nlh, gfp_t flags); +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,13,0) +#define genl_notify rpl__genl_notify +void genl_notify(struct genl_family *family, + struct sk_buff *skb, struct net *net, u32 portid, u32 group, + struct nlmsghdr *nlh, gfp_t flags); + +#define genl_set_err rpl__genl_set_err +static inline int genl_set_err(struct genl_family *family, struct net *net, + u32 portid, u32 group, int code) +{ + struct genl_multicast_group *grp; + + list_for_each_entry(grp, &family->mcast_groups, list) { + if (group == grp->id) + break; + } + + return netlink_set_err(net->genl_sock, portid, grp->id, code); +} + +#define genlmsg_multicast_netns rpl__genlmsg_multicast_netns +static inline int genlmsg_multicast_netns(struct genl_family *family, + struct net *net, struct sk_buff *skb, + u32 portid, unsigned int group, gfp_t flags) +{ + struct genl_multicast_group *grp; + + list_for_each_entry(grp, &family->mcast_groups, list) { + if (group == grp->id) + break; + } + + if (WARN_ON_ONCE(grp == NULL)) + return -EINVAL; + return nlmsg_multicast(net->genl_sock, skb, portid, grp->id, flags); +} +#endif #if LINUX_VERSION_CODE < KERNEL_VERSION(3,14,0) static inline struct sk_buff *genlmsg_new_unicast(size_t payload, @@ -29,4 +64,61 @@ static inline struct sk_buff *genlmsg_new_unicast(size_t payload, } #endif +struct genl_family_and_ops_ovs { + struct genl_family *family; + struct genl_ops *ops; + int n_ops; + struct genl_multicast_group *group; +}; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,13,0) +static inline int genl_register_family_ovs(const struct genl_family_and_ops_ovs *f) +{ + f->family->ops = f->ops; + f->family->n_ops = f->n_ops; + f->family->mcgrps = f->group; + f->family->n_mcgrps = f->group ? 1 : 0; + + return genl_register_family(f->family); +} +#else +static inline int genl_register_family_ovs(const struct genl_family_and_ops_ovs *f) +{ + int err; + + err = genl_register_family_with_ops(f->family, f->ops, f->n_ops); + if (err) + goto error; + + if (f->group) { + err = genl_register_mc_group(f->family, f->group); + if (err) + goto error; + } +error: + return err; +} +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,13,0) +static inline int netlink_has_listeners_ovs(struct sock *genl_sock, + const struct genl_multicast_group *grp, + unsigned int id) +{ + /* Unused argument */ + (void)(grp); + + return netlink_has_listeners(genl_sock, id); +} +#else +static inline int netlink_has_listeners_ovs(struct sock *genl_sock, + const struct genl_multicast_group *grp, + unsigned int id) +{ + /* Unused arguments */ + (void)(id); + + return netlink_has_listeners(genl_sock, grp->id); +} +#endif #endif /* genetlink.h */ diff --git a/datapath/linux/compat/include/net/ip.h b/datapath/linux/compat/include/net/ip.h index 4193d32..4fd0b2c 100644 --- a/datapath/linux/compat/include/net/ip.h +++ b/datapath/linux/compat/include/net/ip.h @@ -12,4 +12,16 @@ static inline bool ip_is_fragment(const struct iphdr *iph) } #endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,13,0) +#define inet_get_local_port_range_ovs(a, b, c) \ + inet_get_local_port_range(a, b, c) +#else +static inline void inet_get_local_port_range_ovs(struct net *net, int *low, + int *high) +{ + (void)(net); + inet_get_local_port_range(low, high); +} +#endif + #endif diff --git a/datapath/linux/compat/skbuff-openvswitch.c b/datapath/linux/compat/skbuff-openvswitch.c index ddd7bc8..bcba930 100644 --- a/datapath/linux/compat/skbuff-openvswitch.c +++ b/datapath/linux/compat/skbuff-openvswitch.c @@ -62,7 +62,7 @@ skb_zerocopy_headlen(const struct sk_buff *from) * The `hlen` as calculated by skb_zerocopy_headlen() specifies the * headroom in the `to` buffer. */ -void +int skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) { int i, j = 0; @@ -75,7 +75,7 @@ skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) /* dont bother with small payloads */ if (len <= skb_tailroom(to)) { skb_copy_bits(from, 0, skb_put(to, len), len); - return; + return 0; } if (hlen) { @@ -107,5 +107,7 @@ skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) j++; } skb_shinfo(to)->nr_frags = j; + + return 0; } #endif diff --git a/datapath/linux/compat/utils.c b/datapath/linux/compat/utils.c index dc4df2a..c08b74c 100644 --- a/datapath/linux/compat/utils.c +++ b/datapath/linux/compat/utils.c @@ -6,6 +6,7 @@ #include <linux/mm.h> #include <linux/net.h> #include <net/checksum.h> +#include <net/ip.h> #include <linux/string.h> #include <linux/types.h> #include <linux/percpu.h> @@ -38,6 +39,7 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, } #endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,13,0) bool __net_get_random_once(void *buf, int nbytes, bool *done, atomic_t *done_key) { @@ -58,3 +60,30 @@ bool __net_get_random_once(void *buf, int nbytes, bool *done, return true; } +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,13,0) +#include "datapath.h" +#include "vport.h" +void ovs_init_per_cpu_dp_stats(struct dp_stats_percpu *stats) +{ + int i; + + for_each_possible_cpu(i) { + struct dp_stats_percpu *dpath_stats; + dpath_stats = per_cpu_ptr(stats, i); + u64_stats_init(&dpath_stats->sync); + } +} + +void ovs_init_per_cpu_t_stats(struct pcpu_tstats *stats) +{ + int i; + + for_each_possible_cpu(i) { + struct pcpu_tstats *vport_stats; + vport_stats = per_cpu_ptr(stats, i); + u64_stats_init(&vport_stats->syncp); + } +} +#endif diff --git a/datapath/vport-lisp.c b/datapath/vport-lisp.c index e33cffe..a1e4ca2 100644 --- a/datapath/vport-lisp.c +++ b/datapath/vport-lisp.c @@ -163,7 +163,7 @@ static __be64 instance_id_to_tunnel_id(__u8 *iid) /* Compute source UDP port for outgoing packet. * Currently we use the flow hash. */ -static u16 get_src_port(struct sk_buff *skb) +static u16 get_src_port(struct net *net, struct sk_buff *skb) { u32 hash = skb_get_rxhash(skb); unsigned int range; @@ -177,7 +177,7 @@ static u16 get_src_port(struct sk_buff *skb) sizeof(*pkt_key) / sizeof(u32), 0); } - inet_get_local_port_range(&low, &high); + inet_get_local_port_range_ovs(net, &low, &high); range = (high - low) + 1; return (((u64) hash * range) >> 32) + low; } @@ -185,13 +185,14 @@ static u16 get_src_port(struct sk_buff *skb) static void lisp_build_header(const struct vport *vport, struct sk_buff *skb) { + struct net *net = ovs_dp_get_net(vport->dp); struct lisp_port *lisp_port = lisp_vport(vport); struct udphdr *udph = udp_hdr(skb); struct lisphdr *lisph = (struct lisphdr *)(udph + 1); const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key; udph->dest = lisp_port->dst_port; - udph->source = htons(get_src_port(skb)); + udph->source = htons(get_src_port(net, skb)); udph->check = 0; udph->len = htons(skb->len - skb_transport_offset(skb)); diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c index d264785..4b36253 100644 --- a/datapath/vport-vxlan.c +++ b/datapath/vport-vxlan.c @@ -139,6 +139,7 @@ error: static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) { + struct net *net = ovs_dp_get_net(vport->dp); struct vxlan_port *vxlan_port = vxlan_vport(vport); __be16 dst_port = inet_sport(vxlan_port->vs->sock->sk); struct rtable *rt; @@ -172,7 +173,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) skb->local_df = 1; - inet_get_local_port_range(&port_min, &port_max); + inet_get_local_port_range_ovs(net, &port_min, &port_max); src_port = vxlan_src_port(port_min, port_max, skb); err = vxlan_xmit_skb(vxlan_port->vs, rt, skb, diff --git a/datapath/vport.c b/datapath/vport.c index 7f12acc..a62f214 100644 --- a/datapath/vport.c +++ b/datapath/vport.c @@ -145,6 +145,8 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, return ERR_PTR(-ENOMEM); } + ovs_init_per_cpu_t_stats(vport->percpu_stats); + spin_lock_init(&vport->stats_lock); return vport; -- 1.8.5.3 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev