git am warning: /home/pravin/ovs/w7/.git/rebase-apply/patch:53: trailing whitespace.
} warning: 1 line adds whitespace errors. ----------------------------------------------------------- compiler warning: lib/odp-util.c:869:15: warning: cast from 'uint8_t *' (aka 'unsigned char *') to 'struct geneve_opt *' increases required alignment from 1 to 2 [-Wcast-align] opt = (struct geneve_opt *)((uint8_t *)opt + len); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lib/odp-util.c:845:63: warning: unused parameter 'tun' [-Wunused-parameter] parse_geneve_opts(const struct nlattr *attr, struct flow_tnl *tun) ^ On Tue, Jun 10, 2014 at 4:47 PM, Jesse Gross <je...@nicira.com> wrote: > This adds support for Geneve - Generic Network Virtualization > Encapsulation. The protocol is documented at > http://tools.ietf.org/html/draft-gross-geneve-00 > > The kernel implementation is completely agnostic to the options > that are in use and can handle newly defined options without > further work. It does this by simply matching on a byte array > of options and allowing userspace to setup flows on this array. > > Userspace currently implements only support for basic version of > Geneve. It can work with the base header (including the VNI) and > is capable of parsing options but does not currently support any > particular option definitions. Over time, the intention is to > allow options to be matched through OpenFlow without requiring > explicit support in OVS userspace. > > Signed-off-by: Jesse Gross <je...@nicira.com> > --- > datapath/Modules.mk | 1 + > datapath/flow.c | 7 + > datapath/flow.h | 16 +- > datapath/flow_netlink.c | 112 +++++- > datapath/linux/Modules.mk | 1 + > datapath/linux/compat/include/net/geneve.h | 23 ++ > datapath/linux/compat/include/net/ip_tunnels.h | 1 + > datapath/vport-geneve.c | 464 > +++++++++++++++++++++++++ > datapath/vport-gre.c | 2 +- > datapath/vport-lisp.c | 2 +- > datapath/vport-vxlan.c | 2 +- > datapath/vport.c | 1 + > datapath/vport.h | 1 + > include/linux/openvswitch.h | 3 +- > lib/dpif-linux.c | 5 + > lib/netdev-vport.c | 16 +- > lib/odp-util.c | 43 +++ > lib/packets.h | 18 + > tests/ovs-vsctl.at | 6 +- > tests/tunnel.at | 12 + > vswitchd/vswitch.xml | 15 +- > 21 files changed, 726 insertions(+), 25 deletions(-) > create mode 100644 datapath/linux/compat/include/net/geneve.h > create mode 100644 datapath/vport-geneve.c > > diff --git a/datapath/Modules.mk b/datapath/Modules.mk > index b652411..41ffbea 100644 > --- a/datapath/Modules.mk > +++ b/datapath/Modules.mk > @@ -14,6 +14,7 @@ openvswitch_sources = \ > flow_netlink.c \ > flow_table.c \ > vport.c \ > + vport-geneve.c \ > vport-gre.c \ > vport-internal_dev.c \ > vport-lisp.c \ > diff --git a/datapath/flow.c b/datapath/flow.c > index f1bb95d..7b108ed 100644 > --- a/datapath/flow.c > +++ b/datapath/flow.c > @@ -455,6 +455,13 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, > struct sw_flow_key *key) > struct ovs_tunnel_info *tun_info = OVS_CB(skb)->tun_info; > memcpy(&key->tun_key, &tun_info->tunnel, > sizeof(key->tun_key)); > + if (tun_info->options) { > + memcpy(GENEVE_OPTS(key, tun_info->options_len), > + tun_info->options, tun_info->options_len); Need to check options_len before copying data from packet. > + key->tun_opts_len = tun_info->options_len; > + } else { > + key->tun_opts_len = 0; > + } > } else { > memset(&key->tun_key, 0, sizeof(key->tun_key)); > } > diff --git a/datapath/flow.h b/datapath/flow.h > index 0ecf78b..5d8383c 100644 > --- a/datapath/flow.h > +++ b/datapath/flow.h > @@ -53,11 +53,20 @@ struct ovs_key_ipv4_tunnel { > > struct ovs_tunnel_info { > struct ovs_key_ipv4_tunnel tunnel; > + struct geneve_opt *options; > + u8 options_len; > }; > > +#define GENEVE_OPTS(flow_key, opt_len) (struct geneve_opt *) \ > + ((flow_key)->tun_opts + \ > + FIELD_SIZEOF(struct sw_flow_key, > tun_opts) - \ > + opt_len) > + > static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, > const struct iphdr *iph, __be64 > tun_id, > - __be16 tun_flags) > + __be16 tun_flags, > + struct geneve_opt *opts, > + u8 opts_len) > { > tun_info->tunnel.tun_id = tun_id; > tun_info->tunnel.ipv4_src = iph->saddr; > @@ -69,9 +78,14 @@ static inline void ovs_flow_tun_info_init(struct > ovs_tunnel_info *tun_info, > /* clear struct padding. */ > memset((unsigned char *) &tun_info->tunnel + OVS_TUNNEL_KEY_SIZE, 0, > sizeof(tun_info->tunnel) - OVS_TUNNEL_KEY_SIZE); > + > + tun_info->options = opts; > + tun_info->options_len = opts_len; > } > > struct sw_flow_key { > + u8 tun_opts[255]; > + u8 tun_opts_len; > struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */ > struct { > u32 priority; /* Packet QoS priority. */ > diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c > index c5ca2f4..2e6d08c 100644 > --- a/datapath/flow_netlink.c > +++ b/datapath/flow_netlink.c > @@ -42,6 +42,7 @@ > #include <linux/icmp.h> > #include <linux/icmpv6.h> > #include <linux/rculist.h> > +#include <net/geneve.h> > #include <net/ip.h> > #include <net/ip_tunnels.h> > #include <net/ipv6.h> > @@ -89,18 +90,21 @@ static void update_range__(struct sw_flow_match *match, > } \ > } while (0) > > -#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ > +#define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \ > do { \ > - update_range__(match, offsetof(struct sw_flow_key, field), \ > - len, is_mask); \ > + update_range__(match, offset, len, is_mask); \ > if (is_mask) { \ > if ((match)->mask) \ > - memcpy(&(match)->mask->key.field, value_p, > len);\ > + memcpy((u8 *)&(match)->mask->key + offset, > value_p, len);\ > } else { \ > - memcpy(&(match)->key->field, value_p, len); \ > + memcpy((u8 *)(match)->key + offset, value_p, len); > \ > } \ > } while (0) > > +#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ > + SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), > \ > + value_p, len, is_mask) > + > static u16 range_n_bytes(const struct sw_flow_key_range *range) > { > return range->end - range->start; > @@ -348,6 +352,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, > [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, > [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, > [OVS_TUNNEL_KEY_ATTR_OAM] = 0, > + [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1, > }; > > if (type > OVS_TUNNEL_KEY_ATTR_MAX) { > @@ -356,7 +361,8 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, > return -EINVAL; > } > > - if (ovs_tunnel_key_lens[type] != nla_len(a)) { > + if (ovs_tunnel_key_lens[type] != nla_len(a) && > + ovs_tunnel_key_lens[type] != -1) { > OVS_NLERR("IPv4 tunnel attribute type has unexpected " > " length (type=%d, length=%d, > expected=%d).\n", > type, nla_len(a), > ovs_tunnel_key_lens[type]); > @@ -395,6 +401,38 @@ static int ipv4_tun_from_nlattr(const struct nlattr > *attr, > case OVS_TUNNEL_KEY_ATTR_OAM: > tun_flags |= TUNNEL_OAM; > break; > + case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: > + if (nla_len(a) > sizeof(match->key->tun_opts)) { > + OVS_NLERR("Geneve option length exceeds " > + "maximum size (len %d, max %zu).\n", > + nla_len(a), > + sizeof(match->key->tun_opts)); > + return -EINVAL; > + } > + > + if (nla_len(a) % 4 != 0) { > + OVS_NLERR("Geneve option length is not " > + "a multiple of 4 (len %d).\n", > + nla_len(a)); > + return -EINVAL; > + } > + > + /* We need to record the length of the options passed > + * down, otherwise packets with the same format but > + * additional options will be silently matched. > + */ > + if (!is_mask) { > + SW_FLOW_KEY_PUT(match, tun_opts_len, > nla_len(a), > + false); > + SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, > + true); > + } > + > + SW_FLOW_KEY_MEMCPY_OFFSET(match, > + (unsigned long)GENEVE_OPTS((struct > sw_flow_key *)0, > + nla_len(a)), > + nla_data(a), nla_len(a), is_mask); > + break; > default: > return -EINVAL; > } > @@ -423,8 +461,9 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, > } > > static int ipv4_tun_to_nlattr(struct sk_buff *skb, > - const struct ovs_key_ipv4_tunnel *tun_key, > - const struct ovs_key_ipv4_tunnel *output) > + const struct ovs_key_ipv4_tunnel *output, > + const struct geneve_opt *tun_opts, > + int swkey_tun_opts_len, int out_tun_opts_len) > { > struct nlattr *nla; > > @@ -455,6 +494,9 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb, > if ((output->tun_flags & TUNNEL_OAM) && > nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) > return -EMSGSIZE; > + if (out_tun_opts_len && > + nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, > + swkey_tun_opts_len, tun_opts)); > > nla_nest_end(skb, nla); > return 0; > @@ -916,9 +958,13 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, > if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) > goto nla_put_failure; > > - if ((swkey->tun_key.ipv4_dst || is_mask) && > - ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key)) > - goto nla_put_failure; > + if ((swkey->tun_key.ipv4_dst || is_mask)) { > + if (ipv4_tun_to_nlattr(skb, &output->tun_key, > + GENEVE_OPTS(output, > swkey->tun_opts_len), > + swkey->tun_opts_len, > + output->tun_opts_len)) > + goto nla_put_failure; > + } > > if (swkey->phy.in_port == DP_MAX_PORTS) { > if (is_mask && (output->phy.in_port == 0xffff)) > @@ -1309,17 +1355,55 @@ static int validate_and_copy_set_tun(const struct > nlattr *attr, > if (err) > return err; > > + if (key.tun_opts_len) { > + struct geneve_opt *option = GENEVE_OPTS(&key, > + key.tun_opts_len); > + int opts_len = key.tun_opts_len; > + bool crit_opt = false; > + > + while (opts_len > 0) { > + int len; > + > + if (opts_len < sizeof(*option)) > + return -EINVAL; > + > + len = sizeof(*option) + option->length * 4; > + if (len > opts_len) > + return -EINVAL; > + > + crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE); > + > + option = (struct geneve_opt *)((u8 *)option + len); > + opts_len -= len; > + }; > + > + key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; > + }; > + > start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET); > if (start < 0) > return start; > > a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL, > - sizeof(*tun_info)); > + sizeof(*tun_info) + key.tun_opts_len); > if (IS_ERR(a)) > return PTR_ERR(a); > > tun_info = nla_data(a); > tun_info->tunnel = key.tun_key; > + tun_info->options_len = key.tun_opts_len; > + > + if (tun_info->options_len) { > + /* We need to store the options in the action itself since > + * everything else will go away after flow setup. We can > append > + * it to tun_info and then point there. > + */ > + tun_info->options = (struct geneve_opt *)(tun_info + 1); > + memcpy(tun_info->options, GENEVE_OPTS(&key, key.tun_opts_len), > + key.tun_opts_len); > + } else { > + tun_info->options = NULL; > + } > > add_nested_action_end(*sfa, start); > > @@ -1611,7 +1695,9 @@ static int set_action_to_attr(const struct nlattr *a, > struct sk_buff *skb) > return -EMSGSIZE; > > err = ipv4_tun_to_nlattr(skb, &tun_info->tunnel, > - &tun_info->tunnel); > + tun_info->options, > + tun_info->options_len, > + tun_info->options_len); > if (err) > return err; > nla_nest_end(skb, start); > diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk > index 224eb02..46aa1f6 100644 > --- a/datapath/linux/Modules.mk > +++ b/datapath/linux/Modules.mk > @@ -63,6 +63,7 @@ openvswitch_headers += \ > linux/compat/include/net/dst.h \ > linux/compat/include/net/flow_keys.h \ > linux/compat/include/net/genetlink.h \ > + linux/compat/include/net/geneve.h \ > linux/compat/include/net/gre.h \ > linux/compat/include/net/inet_frag.h \ > linux/compat/include/net/ip.h \ > diff --git a/datapath/linux/compat/include/net/geneve.h > b/datapath/linux/compat/include/net/geneve.h > new file mode 100644 > index 0000000..2cb294f > --- /dev/null > +++ b/datapath/linux/compat/include/net/geneve.h > @@ -0,0 +1,23 @@ > +#ifndef __NET_GENEVE_WRAPPER_H > +#define __NET_GENEVE_WRAPPER_H 1 > + > +/* Not yet upstream. */ > +#define GENEVE_CRIT_OPT_TYPE (1 << 7) > +struct geneve_opt { > + __be16 opt_class; > + u8 type; > +#ifdef __LITTLE_ENDIAN_BITFIELD > + u8 length:5; > + u8 r3:1; > + u8 r2:1; > + u8 r1:1; > +#else > + u8 r1:1; > + u8 r2:1; > + u8 r3:1; > + u8 length:5; > +#endif > + u8 opt_data[]; > +}; > + > +#endif > diff --git a/datapath/linux/compat/include/net/ip_tunnels.h > b/datapath/linux/compat/include/net/ip_tunnels.h > index e2f3c30..c7a14ef 100644 > --- a/datapath/linux/compat/include/net/ip_tunnels.h > +++ b/datapath/linux/compat/include/net/ip_tunnels.h > @@ -47,5 +47,6 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, > __be16 inner_proto); > > /* Not yet upstream */ > #define TUNNEL_OAM __cpu_to_be16(0x0200) > +#define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400) > > #endif /* __NET_IP_TUNNELS_H */ > diff --git a/datapath/vport-geneve.c b/datapath/vport-geneve.c > new file mode 100644 > index 0000000..a6e9287 > --- /dev/null > +++ b/datapath/vport-geneve.c > @@ -0,0 +1,464 @@ > +/* > + * Copyright (c) 2014 Nicira, Inc. > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of version 2 of the GNU General Public > + * License as published by the Free Software Foundation. > + * > + * This program is distributed in the hope that it will be useful, but > + * WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA > + * 02110-1301, USA > + */ > + > +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt > + > +#include <linux/version.h> > + > +#include <linux/in.h> > +#include <linux/ip.h> > +#include <linux/net.h> > +#include <linux/rculist.h> > +#include <linux/udp.h> > + > +#include <net/geneve.h> > +#include <net/icmp.h> > +#include <net/ip.h> > +#include <net/route.h> > +#include <net/udp.h> > +#include <net/vxlan.h> > +#include <net/xfrm.h> > + > +#include "datapath.h" > +#include "gso.h" > +#include "vport.h" > + > +/* > + * Geneve Header: > + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ > + * |Ver| Opt Len |O|C| Rsvd. | Protocol Type | > + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ > + * | Virtual Network Identifier (VNI) | Reserved | > + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ > + * | Variable Length Options | > + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ > + * > + * Option Header: > + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ > + * | Option Class | Type |R|R|R| Length | > + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ > + * | Variable Option Data | > + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ > + */ > + > +struct genevehdr { > +#ifdef __LITTLE_ENDIAN_BITFIELD > + u8 opt_len:6; > + u8 ver:2; > + u8 rsvd1:6; > + u8 critical:1; > + u8 oam:1; > +#else > + u8 ver:2; > + u8 opt_len:6; > + u8 oam:1; > + u8 critical:1; > + u8 rsvd1:6; > +#endif > + __be16 proto_type; > + u8 vni[3]; > + u8 rsvd2; > + struct geneve_opt options[]; > +}; > + > +#define GENEVE_VER 0 > + > +#define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr)) > + > +/** > + * struct geneve_port - Keeps track of open UDP ports > + * @dst_port: Geneve UDP port number. > + * @list: list element in @geneve_ports. > + * @geneve_rcv_socket: The socket created for this port number. > + * @name: vport name. > + */ > +struct geneve_port { > + __be16 dst_port; > + struct list_head list; > + struct socket *geneve_rcv_socket; > + char name[IFNAMSIZ]; > +}; > + > +static LIST_HEAD(geneve_ports); > + > +static inline struct geneve_port *geneve_vport(const struct vport *vport) > +{ > + return vport_priv(vport); > +} > + > +static struct geneve_port *geneve_find_port(struct net *net, __be16 port) > +{ > + struct geneve_port *geneve_port; > + > + list_for_each_entry_rcu(geneve_port, &geneve_ports, list) { > + if (geneve_port->dst_port == port && > + net_eq(sock_net(geneve_port->geneve_rcv_socket->sk), net)) > + return geneve_port; > + } > + > + return NULL; > +} > + > +static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) > +{ > + return (struct genevehdr *)(udp_hdr(skb) + 1); > +} > + > +/* Convert 64 bit tunnel ID to 24 bit VNI. */ > +static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni) > +{ > +#ifdef __BIG_ENDIAN > + vni[0] = (__force __u8)(tun_id >> 16); > + vni[1] = (__force __u8)(tun_id >> 8); > + vni[2] = (__force __u8)tun_id; > +#else > + vni[0] = (__force __u8)((__force u64)tun_id >> 40); > + vni[1] = (__force __u8)((__force u64)tun_id >> 48); > + vni[2] = (__force __u8)((__force u64)tun_id >> 56); > +#endif > +} > + > +/* Convert 24 bit VNI to 64 bit tunnel ID. */ > +static __be64 vni_to_tunnel_id(__u8 *vni) > +{ > +#ifdef __BIG_ENDIAN > + return (vni[0] << 16) | (vni[1] << 8) | vni[2]; > +#else > + return (__force __be64)(((__force u64)vni[0] << 40) | > + ((__force u64)vni[1] << 48) | > + ((__force u64)vni[2] << 56)); > +#endif > +} > + > + > +static void geneve_build_header(const struct vport *vport, > + struct sk_buff *skb) > +{ > + struct geneve_port *geneve_port = geneve_vport(vport); > + struct udphdr *udph = udp_hdr(skb); > + struct genevehdr *geneveh = (struct genevehdr *)(udph + 1); > + const struct ovs_tunnel_info *tun_info = OVS_CB(skb)->tun_info; > + > + udph->dest = geneve_port->dst_port; > + udph->source = vxlan_src_port(0, USHRT_MAX, skb); > + udph->check = 0; > + udph->len = htons(skb->len - skb_transport_offset(skb)); > + > + geneveh->ver = GENEVE_VER; > + geneveh->opt_len = tun_info->options_len / 4; > + geneveh->oam = !!(tun_info->tunnel.tun_flags & TUNNEL_OAM); > + geneveh->critical = !!(tun_info->tunnel.tun_flags & TUNNEL_CRIT_OPT); > + geneveh->rsvd1 = 0; > + geneveh->proto_type = htons(ETH_P_TEB); > + tunnel_id_to_vni(tun_info->tunnel.tun_id, geneveh->vni); > + geneveh->rsvd2 = 0; > + > + memcpy(geneveh->options, tun_info->options, tun_info->options_len); > +} > + > +static int geneve_rcv(struct sock *sk, struct sk_buff *skb) > +{ > + struct geneve_port *geneve_port; > + struct genevehdr *geneveh; > + int opts_len; > + struct ovs_tunnel_info tun_info; > + __be64 key; > + __be16 flags; > + > +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0) > + if (unlikely(udp_lib_checksum_complete(skb))) > + goto error; > +#endif > + > + if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN))) > + goto error; > + > + geneveh = geneve_hdr(skb); > + > + if (unlikely(geneveh->ver != GENEVE_VER)) > + goto error; > + > + if (unlikely(geneveh->proto_type != htons(ETH_P_TEB))) > + goto error; > + > + geneve_port = geneve_find_port(dev_net(skb->dev), udp_hdr(skb)->dest); I guess we will start using rcu_dereference_sk_user_data() after uptreaming udp tunnels. > + if (unlikely(!geneve_port)) > + goto error; > + > + opts_len = geneveh->opt_len * 4; > + if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, > + htons(ETH_P_TEB))) > + goto error; > + > + geneveh = geneve_hdr(skb); > + > + flags = TUNNEL_KEY | > + (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) | > + (geneveh->oam ? TUNNEL_OAM : 0); > + I am not sure why TUNNEL_CRIT_OPT is not parsed here. > + key = vni_to_tunnel_id(geneveh->vni); > + ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key, flags, > + geneveh->options, opts_len); > + > + ovs_vport_receive(vport_from_priv(geneve_port), skb, &tun_info); > + goto out; > + > +error: > + kfree_skb(skb); > +out: > + return 0; > +} > + > +/* Arbitrary value. Irrelevant as long as it's not 0 since we set the > handler. */ > +#define UDP_ENCAP_GENEVE 1 > +static int geneve_socket_init(struct geneve_port *geneve_port, struct net > *net) > +{ > + struct sockaddr_in sin; > + int err; > + > + err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, > + &geneve_port->geneve_rcv_socket); > + if (err) > + goto error; > + > + /* release net ref. */ > + sk_change_net(geneve_port->geneve_rcv_socket->sk, net); > + > + sin.sin_family = AF_INET; > + sin.sin_addr.s_addr = htonl(INADDR_ANY); > + sin.sin_port = geneve_port->dst_port; > + > + err = kernel_bind(geneve_port->geneve_rcv_socket, > + (struct sockaddr *)&sin, sizeof(struct > sockaddr_in)); > + if (err) > + goto error_sock; > + > + udp_sk(geneve_port->geneve_rcv_socket->sk)->encap_type = > UDP_ENCAP_GENEVE; > + udp_sk(geneve_port->geneve_rcv_socket->sk)->encap_rcv = geneve_rcv; > + > + udp_encap_enable(); > + > + return 0; > + > +error_sock: > + sk_release_kernel(geneve_port->geneve_rcv_socket->sk); > +error: > + pr_warn("cannot register geneve protocol handler: %d\n", err); > + return err; > +} > + > +static int geneve_get_options(const struct vport *vport, > + struct sk_buff *skb) > +{ > + struct geneve_port *geneve_port = geneve_vport(vport); > + > + if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, > + ntohs(geneve_port->dst_port))) > + return -EMSGSIZE; > + return 0; > +} > + > +static void geneve_tnl_destroy(struct vport *vport) > +{ > + struct geneve_port *geneve_port = geneve_vport(vport); > + > + list_del_rcu(&geneve_port->list); > + /* Release socket */ > + sk_release_kernel(geneve_port->geneve_rcv_socket->sk); > + > + ovs_vport_deferred_free(vport); > +} > + > +static struct vport *geneve_tnl_create(const struct vport_parms *parms) > +{ > + struct net *net = ovs_dp_get_net(parms->dp); > + struct nlattr *options = parms->options; > + struct geneve_port *geneve_port; > + struct vport *vport; > + struct nlattr *a; > + int err; > + u16 dst_port; > + > + if (!options) { > + err = -EINVAL; > + goto error; > + } > + > + a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT); > + if (a && nla_len(a) == sizeof(u16)) { > + dst_port = nla_get_u16(a); > + } else { > + /* Require destination port from userspace. */ > + err = -EINVAL; > + goto error; > + } > + > + /* Verify if we already have a socket created for this port */ > + if (geneve_find_port(net, htons(dst_port))) { > + err = -EEXIST; > + goto error; > + } > + > + vport = ovs_vport_alloc(sizeof(struct geneve_port), > + &ovs_geneve_vport_ops, parms); > + if (IS_ERR(vport)) > + return vport; > + > + geneve_port = geneve_vport(vport); > + geneve_port->dst_port = htons(dst_port); > + strncpy(geneve_port->name, parms->name, IFNAMSIZ); > + > + err = geneve_socket_init(geneve_port, net); > + if (err) > + goto error_free; > + > + list_add_tail_rcu(&geneve_port->list, &geneve_ports); > + return vport; > + > +error_free: > + ovs_vport_free(vport); > +error: > + return ERR_PTR(err); > +} > + > +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,12,0) > + > +static void geneve_fix_segment(struct sk_buff *skb) > +{ > + struct udphdr *udph = udp_hdr(skb); > + > + udph->len = htons(skb->len - skb_transport_offset(skb)); > +} > + > +static int handle_offloads(struct sk_buff *skb) > +{ > + if (skb_is_gso(skb)) > + OVS_GSO_CB(skb)->fix_segment = geneve_fix_segment; > + else if (skb->ip_summed != CHECKSUM_PARTIAL) > + skb->ip_summed = CHECKSUM_NONE; > + return 0; > +} > +#else > +static int handle_offloads(struct sk_buff *skb) > +{ > + if (skb_is_gso(skb)) { > + int err = skb_unclone(skb, GFP_ATOMIC); > + if (unlikely(err)) > + return err; > + > + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; > + } else if (skb->ip_summed != CHECKSUM_PARTIAL) > + skb->ip_summed = CHECKSUM_NONE; > + > + skb->encapsulation = 1; > + return 0; > +} > +#endif > + > +static int geneve_send(struct vport *vport, struct sk_buff *skb) > +{ > + struct ovs_key_ipv4_tunnel *tun_key = &OVS_CB(skb)->tun_info->tunnel; > + int network_offset = skb_network_offset(skb); > + struct rtable *rt; > + int min_headroom; > + __be32 saddr; > + __be16 df; > + int sent_len; > + int err; > + > + if (unlikely(!OVS_CB(skb)->tun_info)) > + return -EINVAL; > + > + /* Route lookup */ > + saddr = tun_key->ipv4_src; > + rt = find_route(ovs_dp_get_net(vport->dp), > + &saddr, tun_key->ipv4_dst, > + IPPROTO_UDP, tun_key->ipv4_tos, > + skb->mark); > + if (IS_ERR(rt)) { > + err = PTR_ERR(rt); > + goto error; > + } > + > + min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + > rt_dst(rt).header_len > + + GENEVE_BASE_HLEN + sizeof(struct iphdr) > + + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); > + we need to add options_len to headroom calculation. > + if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { > + int head_delta = SKB_DATA_ALIGN(min_headroom - > + skb_headroom(skb) + > + 16); > + > + err = pskb_expand_head(skb, max_t(int, head_delta, 0), > + 0, GFP_ATOMIC); > + if (unlikely(err)) > + goto err_free_rt; > + } > + > + if (vlan_tx_tag_present(skb)) { > + if (unlikely(!__vlan_put_tag(skb, > + skb->vlan_proto, > + vlan_tx_tag_get(skb)))) { > + err = -ENOMEM; > + goto err_free_rt; > + } > + vlan_set_tci(skb, 0); > + } > + > + skb_reset_inner_headers(skb); > + > + __skb_push(skb, GENEVE_BASE_HLEN + > OVS_CB(skb)->tun_info->options_len); > + skb_reset_transport_header(skb); > + > + geneve_build_header(vport, skb); > + > + /* Offloading */ > + err = handle_offloads(skb); > + if (err) > + goto err_free_rt; > + > + df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; > + > + sent_len = iptunnel_xmit(rt, skb, > + saddr, tun_key->ipv4_dst, > + IPPROTO_UDP, tun_key->ipv4_tos, > + tun_key->ipv4_ttl, > + df, false); > + > + return sent_len > 0 ? sent_len + network_offset : sent_len; > + > +err_free_rt: > + ip_rt_put(rt); > +error: > + return err; > +} > + > +static const char *geneve_get_name(const struct vport *vport) > +{ > + struct geneve_port *geneve_port = geneve_vport(vport); > + return geneve_port->name; > +} > + > +const struct vport_ops ovs_geneve_vport_ops = { > + .type = OVS_VPORT_TYPE_GENEVE, > + .create = geneve_tnl_create, > + .destroy = geneve_tnl_destroy, > + .get_name = geneve_get_name, > + .get_options = geneve_get_options, > + .send = geneve_send, > +}; > diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c > index f30f090..d2a2602 100644 > --- a/datapath/vport-gre.c > +++ b/datapath/vport-gre.c > @@ -111,7 +111,7 @@ static int gre_rcv(struct sk_buff *skb, > > key = key_to_tunnel_id(tpi->key, tpi->seq); > ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key, > - filter_tnl_flags(tpi->flags)); > + filter_tnl_flags(tpi->flags), NULL, 0); > > ovs_vport_receive(vport, skb, &tun_info); > return PACKET_RCVD; > diff --git a/datapath/vport-lisp.c b/datapath/vport-lisp.c > index dbad387..64925b8 100644 > --- a/datapath/vport-lisp.c > +++ b/datapath/vport-lisp.c > @@ -242,7 +242,7 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb) > > /* Save outer tunnel values */ > iph = ip_hdr(skb); > - ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY); > + ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY, NULL, 0); > > /* Drop non-IP inner packets */ > inner_iph = (struct iphdr *)(lisph + 1); > diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c > index 41c1756..8a08af8 100644 > --- a/datapath/vport-vxlan.c > +++ b/datapath/vport-vxlan.c > @@ -68,7 +68,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff > *skb, __be32 vx_vni) > /* Save outer tunnel values */ > iph = ip_hdr(skb); > key = cpu_to_be64(ntohl(vx_vni) >> 8); > - ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY); > + ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY, NULL, 0); > > ovs_vport_receive(vport, skb, &tun_info); > } > diff --git a/datapath/vport.c b/datapath/vport.c > index 5fce377..02ccc89 100644 > --- a/datapath/vport.c > +++ b/datapath/vport.c > @@ -43,6 +43,7 @@ static void ovs_vport_record_error(struct vport *, > static const struct vport_ops *vport_ops_list[] = { > &ovs_netdev_vport_ops, > &ovs_internal_vport_ops, > + &ovs_geneve_vport_ops, > #if IS_ENABLED(CONFIG_NET_IPGRE_DEMUX) > &ovs_gre_vport_ops, > &ovs_gre64_vport_ops, > diff --git a/datapath/vport.h b/datapath/vport.h > index c02daf5..bdd9a89 100644 > --- a/datapath/vport.h > +++ b/datapath/vport.h > @@ -217,6 +217,7 @@ void ovs_vport_receive(struct vport *, struct sk_buff *, > * add yours to the list at the top of vport.c. */ > extern const struct vport_ops ovs_netdev_vport_ops; > extern const struct vport_ops ovs_internal_vport_ops; > +extern const struct vport_ops ovs_geneve_vport_ops; > extern const struct vport_ops ovs_gre_vport_ops; > extern const struct vport_ops ovs_gre64_vport_ops; > extern const struct vport_ops ovs_vxlan_vport_ops; > diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h > index 57d40e3..4f84045 100644 > --- a/include/linux/openvswitch.h > +++ b/include/linux/openvswitch.h > @@ -215,6 +215,7 @@ enum ovs_vport_type { > OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */ > OVS_VPORT_TYPE_GRE, /* GRE tunnel. */ > OVS_VPORT_TYPE_VXLAN, /* VXLAN tunnel */ > + OVS_VPORT_TYPE_GENEVE = 6, /* Geneve tunnel */ > OVS_VPORT_TYPE_GRE64 = 104, /* GRE tunnel with 64-bit keys */ > OVS_VPORT_TYPE_LISP = 105, /* LISP tunnel */ > __OVS_VPORT_TYPE_MAX > @@ -341,9 +342,9 @@ enum ovs_tunnel_key_attr { > OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT, /* No argument, set DF. */ > OVS_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. > */ > OVS_TUNNEL_KEY_ATTR_OAM, /* No argument, OAM frame. */ > + OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, /* Array of Geneve options */ > __OVS_TUNNEL_KEY_ATTR_MAX > }; > - > #define OVS_TUNNEL_KEY_ATTR_MAX (__OVS_TUNNEL_KEY_ATTR_MAX - 1) > > /** > diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c > index 63e66f3..2f03a2f 100644 > --- a/lib/dpif-linux.c > +++ b/lib/dpif-linux.c > @@ -579,6 +579,9 @@ get_vport_type(const struct dpif_linux_vport *vport) > case OVS_VPORT_TYPE_INTERNAL: > return "internal"; > > + case OVS_VPORT_TYPE_GENEVE: > + return "geneve"; > + > case OVS_VPORT_TYPE_GRE: > return "gre"; > > @@ -610,6 +613,8 @@ netdev_to_ovs_vport_type(const struct netdev *netdev) > return OVS_VPORT_TYPE_NETDEV; > } else if (!strcmp(type, "internal")) { > return OVS_VPORT_TYPE_INTERNAL; > + } else if (!strcmp(type, "geneve")) { > + return OVS_VPORT_TYPE_GENEVE; > } else if (strstr(type, "gre64")) { > return OVS_VPORT_TYPE_GRE64; > } else if (strstr(type, "gre")) { > diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c > index 4dca091..2c20622 100644 > --- a/lib/netdev-vport.c > +++ b/lib/netdev-vport.c > @@ -42,6 +42,7 @@ > > VLOG_DEFINE_THIS_MODULE(netdev_vport); > > +#define GENEVE_DST_PORT 6081 > #define VXLAN_DST_PORT 4789 > #define LISP_DST_PORT 4341 > > @@ -133,7 +134,8 @@ netdev_vport_needs_dst_port(const struct netdev *dev) > const char *type = netdev_get_type(dev); > > return (class->get_config == get_tunnel_config && > - (!strcmp("vxlan", type) || !strcmp("lisp", type))); > + (!strcmp("geneve", type) || !strcmp("vxlan", type) || > + !strcmp("lisp", type))); > } > > const char * > @@ -490,12 +492,15 @@ set_tunnel_config(struct netdev *dev_, const struct > smap *args) > } > } > > - /* Add a default destination port for VXLAN if none specified. */ > + /* Add a default destination port for tunnel ports if none specified. */ > + if (!strcmp(type, "geneve") && !tnl_cfg.dst_port) { > + tnl_cfg.dst_port = htons(GENEVE_DST_PORT); > + } > + > if (!strcmp(type, "vxlan") && !tnl_cfg.dst_port) { > tnl_cfg.dst_port = htons(VXLAN_DST_PORT); > } > > - /* Add a default destination port for LISP if none specified. */ > if (!strcmp(type, "lisp") && !tnl_cfg.dst_port) { > tnl_cfg.dst_port = htons(LISP_DST_PORT); > } > @@ -623,7 +628,8 @@ get_tunnel_config(const struct netdev *dev, struct smap > *args) > uint16_t dst_port = ntohs(tnl_cfg.dst_port); > const char *type = netdev_get_type(dev); > > - if ((!strcmp("vxlan", type) && dst_port != VXLAN_DST_PORT) || > + if ((!strcmp("geneve", type) && dst_port != GENEVE_DST_PORT) || > + (!strcmp("vxlan", type) && dst_port != VXLAN_DST_PORT) || > (!strcmp("lisp", type) && dst_port != LISP_DST_PORT)) { > smap_add_format(args, "dst_port", "%d", dst_port); > } > @@ -824,6 +830,8 @@ void > netdev_vport_tunnel_register(void) > { > static const struct vport_class vport_classes[] = { > + TUNNEL_CLASS("geneve", "geneve_system"), > + TUNNEL_CLASS("ipsec_geneve", "geneve_system"), > TUNNEL_CLASS("gre", "gre_system"), > TUNNEL_CLASS("ipsec_gre", "gre_system"), > TUNNEL_CLASS("gre64", "gre64_system"), > diff --git a/lib/odp-util.c b/lib/odp-util.c > index 8f71c7c..2807474 100644 > --- a/lib/odp-util.c > +++ b/lib/odp-util.c > @@ -833,12 +833,46 @@ tunnel_key_attr_len(int type) > case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: return 0; > case OVS_TUNNEL_KEY_ATTR_CSUM: return 0; > case OVS_TUNNEL_KEY_ATTR_OAM: return 0; > + case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: return -2; > case __OVS_TUNNEL_KEY_ATTR_MAX: > return -1; > } > return -1; > } > > +#define GENEVE_OPT(class, type) ((OVS_FORCE uint32_t)(class) << 8 | (type)) > +static int > +parse_geneve_opts(const struct nlattr *attr, struct flow_tnl *tun) > +{ > + int opts_len = nl_attr_get_size(attr); > + const struct geneve_opt *opt = nl_attr_get(attr); > + > + while (opts_len > 0) { > + int len; > + > + if (opts_len < sizeof(*opt)) { > + return -EINVAL; > + } > + > + len = sizeof(*opt) + opt->length * 4; > + if (len > opts_len) { > + return -EINVAL; > + } > + > + switch (GENEVE_OPT(opt->opt_class, opt->type)) { > + default: > + if (opt->type & GENEVE_CRIT_OPT_TYPE) { > + return -EINVAL; > + } > + }; > + > + opt = (struct geneve_opt *)((uint8_t *)opt + len); > + opts_len -= len; > + }; > + > + return 0; > +} > + > enum odp_key_fitness > odp_tun_key_from_attr(const struct nlattr *attr, struct flow_tnl *tun) > { > @@ -883,6 +917,15 @@ odp_tun_key_from_attr(const struct nlattr *attr, struct > flow_tnl *tun) > case OVS_TUNNEL_KEY_ATTR_OAM: > tun->flags |= FLOW_TNL_F_OAM; > break; > + case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: { > + if (parse_geneve_opts(a, tun)) { > + return ODP_FIT_ERROR; > + } > + /* It is necessary to reproduce options exactly (including order) > + * so it's easiest to just echo them back. */ > + unknown = true; > + break; > + } > default: > /* Allow this to show up as unexpected, if there are unknown > * tunnel attribute, eventually resulting in ODP_FIT_TOO_MUCH. */ > diff --git a/lib/packets.h b/lib/packets.h > index 4575dd0..c04e3bb 100644 > --- a/lib/packets.h > +++ b/lib/packets.h > @@ -674,6 +674,24 @@ static inline bool dl_type_is_ip_any(ovs_be16 dl_type) > || dl_type == htons(ETH_TYPE_IPV6); > } > > +#define GENEVE_CRIT_OPT_TYPE (1 << 7) > +struct geneve_opt { > + ovs_be16 opt_class; > + uint8_t type; > +#ifdef LITTLE_ENDIAN > + uint8_t length:5; > + uint8_t r3:1; > + uint8_t r2:1; > + uint8_t r1:1; > +#else > + uint8_t r1:1; > + uint8_t r2:1; > + uint8_t r3:1; > + uint8_t length:5; > +#endif > + uint8_t opt_data[]; > +}; > + > void format_ipv6_addr(char *addr_str, const struct in6_addr *addr); > void print_ipv6_addr(struct ds *string, const struct in6_addr *addr); > void print_ipv6_masked(struct ds *string, const struct in6_addr *addr, > diff --git a/tests/ovs-vsctl.at b/tests/ovs-vsctl.at > index 440bf1a..1e9c278 100644 > --- a/tests/ovs-vsctl.at > +++ b/tests/ovs-vsctl.at > @@ -1203,6 +1203,7 @@ m4_foreach( > [reserved_name], > [[ovs-netdev], > [ovs-dummy], > +[geneve_system], > [gre_system], > [gre64_system], > [lisp_system], > @@ -1233,11 +1234,14 @@ OVS_VSWITCHD_START([add-port br0 p1 -- set Interface > p1 type=gre \ > -- add-port br0 p3 -- set Interface p3 type=lisp \ > options:remote_ip=2.2.2.2 ofport_request=3 \ > -- add-port br0 p4 -- set Interface p4 type=vxlan \ > - options:remote_ip=2.2.2.2 ofport_request=4]) > + options:remote_ip=2.2.2.2 ofport_request=4 \ > + -- add-port br0 p5 -- set Interface p5 type=geneve \ > + options:remote_ip=2.2.2.2 ofport_request=5]) > > # Test creating all reserved tunnel port names > m4_foreach( > [reserved_name], > +[geneve_system], > [[gre_system], > [gre64_system], > [lisp_system], > diff --git a/tests/tunnel.at b/tests/tunnel.at > index aa16d58..2ae8179 100644 > --- a/tests/tunnel.at > +++ b/tests/tunnel.at > @@ -310,6 +310,18 @@ Datapath actions: drop > OVS_VSWITCHD_STOP > AT_CLEANUP > > +AT_SETUP([tunnel - Geneve]) > +OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=geneve \ > + options:remote_ip=1.1.1.1 ofport_request=1 > options:dst_port=5000]) > + > +AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl > + br0 65534/100: (dummy) > + p1 1/5000: (geneve: dst_port=5000, remote_ip=1.1.1.1) > +]) > + > +OVS_VSWITCHD_STOP > +AT_CLEANUP > + > AT_SETUP([tunnel - VXLAN]) > OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=vxlan \ > options:remote_ip=1.1.1.1 ofport_request=1]) > diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml > index acefed2..c3e10fb 100644 > --- a/vswitchd/vswitch.xml > +++ b/vswitchd/vswitch.xml > @@ -1386,6 +1386,16 @@ > <dt><code>tap</code></dt> > <dd>A TUN/TAP device managed by Open vSwitch.</dd> > > + <dt><code>geneve</code></dt> > + <dd> > + An Ethernet over Geneve > (<code>http://tools.ietf.org/html/draft-gross-geneve-00</code>) > + IPv4 tunnel. > + > + Geneve supports options as a means to transport additional > metadata, > + however, currently only the 24-bit VNI is supported. This is > planned > + to be extended in the future. > + </dd> > + > <dt><code>gre</code></dt> > <dd> > An Ethernet over RFC 2890 Generic Routing Encapsulation over IPv4 > @@ -1458,8 +1468,9 @@ > <group title="Tunnel Options"> > <p> > These options apply to interfaces with <ref column="type"/> of > - <code>gre</code>, <code>ipsec_gre</code>, <code>gre64</code>, > - <code>ipsec_gre64</code>, <code>vxlan</code>, and <code>lisp</code>. > + <code>geneve</code>, <code>gre</code>, <code>ipsec_gre</code>, > + <code>gre64</code>, <code>ipsec_gre64</code>, <code>vxlan</code>, > + and <code>lisp</code>. > </p> > > <p> > -- Otherwise Looks good. Acked-by: Pravin B Shelar <pshe...@nicira.com> > 1.9.1 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev