This commit implements the VXLAN tunneling protocol described at http://tools.ietf.org/html/draft-mahalingam-dutt-dcops-vxlan-00
Multicast support is not yet implemented, but it's in the works. In my simple VM-based test rig, "netperf" performance was the same as GRE. Jesse Gross <je...@nicira.com> really deserves credit for most of datapath/vport-vxlan.c. It's mostly a mixed-up and simplified copy of datapath/vport-capwap.c with a big of search-and-replace. Justin Pettit <jpet...@nicira.com> implemented the changes to debian/ovs-monitor-ipsec. I made only small changes there. I wrote a unit test and verified that it passed, but I didn't otherwise test the IPSEC support. I build (only) tested this on 2.6.25 (where it does not change anything), 2.6.26 through 2.6.34, inclusive, all on i386, plus 2.6.36 on x86-64. I ran netperf tests with VXLAN on 2.6.37 on i386 inside KVM. Signed-off-by: Ben Pfaff <b...@nicira.com> Bug #7558. --- NEWS | 4 + README | 2 +- datapath/Modules.mk | 3 +- datapath/linux/.gitignore | 1 + datapath/tunnel.h | 1 + datapath/vport-vxlan.c | 207 +++++++++++++++++++++++++++++++ datapath/vport.c | 1 + datapath/vport.h | 1 + debian/control | 4 +- debian/openvswitch-ipsec.init | 3 +- debian/openvswitch-switch.init | 2 + debian/ovs-monitor-ipsec | 117 +++++++++++------- include/openflow/nicira-ext.h | 9 +- include/openvswitch/datapath-protocol.h | 1 + lib/netdev-vport.c | 53 ++++++--- rhel/etc_init.d_openvswitch | 2 + tests/ovs-monitor-ipsec.at | 67 ++++++++++ vswitchd/vswitch.xml | 55 +++++++-- xenserver/etc_init.d_openvswitch | 2 + 19 files changed, 457 insertions(+), 78 deletions(-) create mode 100644 datapath/vport-vxlan.c diff --git a/NEWS b/NEWS index ff3bc44..540dbff 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,9 @@ Post-v1.2.0 ------------------------ + + - New support for the experimental VXLAN tunnel protocol (see + http://tools.ietf.org/html/draft-mahalingam-dutt-dcops-vxlan-00) + and VXLAN over IPSEC. - OpenFlow: - Added an OpenFlow extension which allows the "output" action to accept NXM fields. diff --git a/README b/README index dddad3f..8b0289a 100644 --- a/README +++ b/README @@ -24,7 +24,7 @@ vSwitch supports the following features: * NIC bonding with or without LACP on upstream switch * NetFlow, sFlow(R), SPAN, RSPAN, and ERSPAN for increased visibility * QoS (Quality of Service) configuration, plus policing - * GRE, GRE over IPSEC, and CAPWAP tunneling + * GRE, GRE over IPSEC, CAPWAP, VXLAN, and VXLAN over IPSEC tunneling * 802.1ag connectivity fault management * OpenFlow 1.0 plus numerous extensions * Transactional configuration database with C and Python bindings diff --git a/datapath/Modules.mk b/datapath/Modules.mk index 087cf44..c59a202 100644 --- a/datapath/Modules.mk +++ b/datapath/Modules.mk @@ -25,7 +25,8 @@ openvswitch_sources = \ vport-gre.c \ vport-internal_dev.c \ vport-netdev.c \ - vport-patch.c + vport-patch.c \ + vport-vxlan.c openvswitch_headers = \ actions.h \ diff --git a/datapath/linux/.gitignore b/datapath/linux/.gitignore index 0aee746..37cb9ff 100644 --- a/datapath/linux/.gitignore +++ b/datapath/linux/.gitignore @@ -38,4 +38,5 @@ /vport-internal_dev.c /vport-netdev.c /vport-patch.c +/vport-vxlan.c /vport.c diff --git a/datapath/tunnel.h b/datapath/tunnel.h index e7bafbc..59ad47e 100644 --- a/datapath/tunnel.h +++ b/datapath/tunnel.h @@ -29,6 +29,7 @@ */ #define TNL_T_PROTO_GRE 0 #define TNL_T_PROTO_CAPWAP 1 +#define TNL_T_PROTO_VXLAN 2 /* These flags are only needed when calling tnl_find_port(). */ #define TNL_T_KEY_EXACT (1 << 10) diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c new file mode 100644 index 0000000..a6e5439 --- /dev/null +++ b/datapath/vport-vxlan.c @@ -0,0 +1,207 @@ + /* + * Copyright (c) 2011 Nicira Networks. + * Distributed under the terms of the GNU GPL version 2. + * + * Significant portions of this file may be copied from parts of the Linux + * kernel, by Linus Torvalds and others. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/version.h> +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) + +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/net.h> +#include <linux/udp.h> + +#include <net/icmp.h> + +#include "tunnel.h" +#include "vport.h" +#include "vport-generic.h" + +#define VXLAN_DST_PORT 49170 +#define VXLAN_IPSEC_SRC_PORT 49171 + +#define VXLAN_FLAGS 0x08000000 /* struct vxlanhdr.vx_flags required value. */ + +/** + * struct vxlanhdr - VXLAN header + * @vx_flags: Must have the exact value %VXLAN_FLAGS. + * @vx_vni: VXLAN Network Identifier (VNI) in top 24 bits, low 8 bits zeroed. + */ +struct vxlanhdr { + __be32 vx_flags; + __be32 vx_vni; +}; + +static struct vxlanhdr *vxlan_hdr(const struct sk_buff *skb) +{ + return (struct vxlanhdr *)(udp_hdr(skb) + 1); +} + +#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) + +static struct socket *vxlan_rcv_socket; + +static int vxlan_hdr_len(const struct tnl_mutable_config *mutable) +{ + return VXLAN_HLEN; +} + +static __be16 get_src_port(const struct sk_buff *skb, + const struct tnl_mutable_config *mutable) +{ + if (mutable->flags & TNL_F_IPSEC) + return htons(VXLAN_IPSEC_SRC_PORT); + + /* Convert hash into a port between 32768 and 65535. */ + return (__force __be16)OVS_CB(skb)->flow->hash | htons(32768); +} +static void vxlan_build_header(const struct vport *vport, + const struct tnl_mutable_config *mutable, + void *header) +{ + struct udphdr *udph = header; + struct vxlanhdr *vxh = (struct vxlanhdr *)(udph + 1); + + udph->dest = htons(VXLAN_DST_PORT); + udph->check = 0; + + vxh->vx_flags = htonl(VXLAN_FLAGS); + vxh->vx_vni = htonl(be64_to_cpu(mutable->out_key) << 8); +} + +static struct sk_buff *vxlan_update_header(const struct vport *vport, + const struct tnl_mutable_config *mutable, + struct dst_entry *dst, + struct sk_buff *skb) +{ + struct udphdr *udph = udp_hdr(skb); + struct vxlanhdr *vxh = (struct vxlanhdr *)(udph + 1); + + if (mutable->flags & TNL_F_OUT_KEY_ACTION) + vxh->vx_vni = htonl(be64_to_cpu(OVS_CB(skb)->tun_id) << 8); + + udph->source = get_src_port(skb, mutable); + udph->len = htons(skb->len - skb_transport_offset(skb)); + + return skb; +} + +/* Called with rcu_read_lock and BH disabled. */ +static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct vport *vport; + struct vxlanhdr *vxh; + const struct tnl_mutable_config *mutable; + struct iphdr *iph; + __be64 key; + + if (unlikely(!pskb_may_pull(skb, VXLAN_HLEN + ETH_HLEN))) + goto error; + + vxh = vxlan_hdr(skb); + if (unlikely(vxh->vx_flags != htonl(VXLAN_FLAGS) || + vxh->vx_vni & htonl(0xff))) + goto error; + + __skb_pull(skb, VXLAN_HLEN); + skb_postpull_rcsum(skb, skb_transport_header(skb), VXLAN_HLEN + ETH_HLEN); + + key = cpu_to_be64(ntohl(vxh->vx_vni) >> 8); + + iph = ip_hdr(skb); + vport = tnl_find_port(iph->daddr, iph->saddr, key, + TNL_T_PROTO_VXLAN | TNL_T_KEY_EITHER, &mutable); + if (unlikely(!vport)) { + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); + goto error; + } + + if (mutable->flags & TNL_F_IN_KEY_MATCH) + OVS_CB(skb)->tun_id = key; + else + OVS_CB(skb)->tun_id = 0; + + tnl_rcv(vport, skb, iph->tos); + goto out; + +error: + kfree_skb(skb); +out: + return 0; +} + +static const struct tnl_ops vxlan_tnl_ops = { + .tunnel_type = TNL_T_PROTO_VXLAN, + .ipproto = IPPROTO_UDP, + .hdr_len = vxlan_hdr_len, + .build_header = vxlan_build_header, + .update_header = vxlan_update_header, +}; + +static struct vport *vxlan_create(const struct vport_parms *parms) +{ + return tnl_create(parms, &vxlan_vport_ops, &vxlan_tnl_ops); +} + +/* Random value. Irrelevant as long as it's not 0 since we set the handler. */ +#define UDP_ENCAP_VXLAN 10 +static int vxlan_init(void) +{ + int err; + struct sockaddr_in sin; + + err = sock_create(AF_INET, SOCK_DGRAM, 0, &vxlan_rcv_socket); + if (err) + goto error; + + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl(INADDR_ANY); + sin.sin_port = htons(VXLAN_DST_PORT); + + err = kernel_bind(vxlan_rcv_socket, (struct sockaddr *)&sin, + sizeof(struct sockaddr_in)); + if (err) + goto error_sock; + + udp_sk(vxlan_rcv_socket->sk)->encap_type = UDP_ENCAP_VXLAN; + udp_sk(vxlan_rcv_socket->sk)->encap_rcv = vxlan_rcv; + + return 0; + +error_sock: + sock_release(vxlan_rcv_socket); +error: + pr_warn("cannot register vxlan protocol handler\n"); + return err; +} + +static void vxlan_exit(void) +{ + sock_release(vxlan_rcv_socket); +} + +const struct vport_ops vxlan_vport_ops = { + .type = OVS_VPORT_TYPE_VXLAN, + .flags = VPORT_F_TUN_ID, + .init = vxlan_init, + .exit = vxlan_exit, + .create = vxlan_create, + .destroy = tnl_destroy, + .set_addr = tnl_set_addr, + .get_name = tnl_get_name, + .get_addr = tnl_get_addr, + .get_options = tnl_get_options, + .set_options = tnl_set_options, + .get_dev_flags = vport_gen_get_dev_flags, + .is_running = vport_gen_is_running, + .get_operstate = vport_gen_get_operstate, + .send = tnl_send, +}; +#else +#warning VXLAN tunneling will not be available on kernels before 2.6.26 +#endif /* Linux kernel < 2.6.26 */ diff --git a/datapath/vport.c b/datapath/vport.c index ad5a10e..d577639 100644 --- a/datapath/vport.c +++ b/datapath/vport.c @@ -31,6 +31,7 @@ static const struct vport_ops *base_vport_ops_list[] = { &gre_vport_ops, #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) &capwap_vport_ops, + &vxlan_vport_ops, #endif }; diff --git a/datapath/vport.h b/datapath/vport.h index b6b94e0..34a3d0e 100644 --- a/datapath/vport.h +++ b/datapath/vport.h @@ -252,5 +252,6 @@ extern const struct vport_ops internal_vport_ops; extern const struct vport_ops patch_vport_ops; extern const struct vport_ops gre_vport_ops; extern const struct vport_ops capwap_vport_ops; +extern const struct vport_ops vxlan_vport_ops; #endif /* vport.h */ diff --git a/debian/control b/debian/control index 1f3387a..4c23e59 100644 --- a/debian/control +++ b/debian/control @@ -60,9 +60,9 @@ Depends: openvswitch-common (= ${binary:Version}), openvswitch-switch (= ${binary:Version}), python-openvswitch (= ${source:Version}) -Description: Open vSwitch GRE-over-IPsec support +Description: Open vSwitch support for GRE and VXLAN over ISPEC The ovs-monitor-ipsec script provides support for encrypting GRE - tunnels with IPsec. + and VXLAN tunnels with IPsec. . Open vSwitch is a full-featured software-based Ethernet switch. diff --git a/debian/openvswitch-ipsec.init b/debian/openvswitch-ipsec.init index 17835a5..bb9a5bd 100755 --- a/debian/openvswitch-ipsec.init +++ b/debian/openvswitch-ipsec.init @@ -1,5 +1,6 @@ #!/bin/sh # +# Copyright (c) 2011 Nicira Networks # Copyright (c) 2007, 2009 Javier Fernandez-Sanguino <j...@debian.org> # # This is free software; you may redistribute it and/or modify @@ -23,7 +24,7 @@ # Required-Stop: $remote_fs # Default-Start: 2 3 4 5 # Default-Stop: 0 1 6 -# Short-Description: Open vSwitch GRE-over-IPsec daemon +# Short-Description: Open vSwitch IPsec tunnel daemon ### END INIT INFO PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin diff --git a/debian/openvswitch-switch.init b/debian/openvswitch-switch.init index 3d187a0..d9e57ca 100755 --- a/debian/openvswitch-switch.init +++ b/debian/openvswitch-switch.init @@ -63,7 +63,9 @@ start () { fi "$@" || exit $? + # Allow tunnel traffic. ovs_ctl --protocol=gre enable-protocol + ovs_ctl --protocol=udp --dport=49170 enable-protocol } stop () { diff --git a/debian/ovs-monitor-ipsec b/debian/ovs-monitor-ipsec index ac2cd7e..19abe1a 100755 --- a/debian/ovs-monitor-ipsec +++ b/debian/ovs-monitor-ipsec @@ -14,9 +14,10 @@ # limitations under the License. -# A daemon to monitor attempts to create GRE-over-IPsec tunnels. -# Uses racoon and setkey to support the configuration. Assumes that -# OVS has complete control over IPsec configuration for the box. +# A daemon to monitor attempts to create tunnels over IPsec. +# Racoon and setkey are used to support the configuration. It is +# assumed that OVS has complete control over IPsec configuration for +# the box. # xxx To-do: # - Doesn't actually check that Interface is connected to bridge @@ -41,7 +42,12 @@ import ovs.vlog vlog = ovs.vlog.Vlog("ovs-monitor-ipsec") root_prefix = '' # Prefix for absolute file names, for testing. -setkey = "/usr/sbin/setkey" +SETKEY = "/usr/sbin/setkey" + +# UDP ports used for VXLAN. The source port is only fixed for +# VXLAN-over-IPsec traffic. +VXLAN_DST_PORT = 49170 +VXLAN_SRC_PORT = 49171 # Class to configure the racoon daemon, which handles IKE negotiation @@ -251,17 +257,17 @@ path certificate "%s"; # Class to configure IPsec on a system using racoon for IKE and setkey # for maintaining the Security Association Database (SAD) and Security -# Policy Database (SPD). Only policies for GRE are supported. +# Policy Database (SPD). Only policies for GRE and VXLAN are supported. class IPsec: def __init__(self): self.sad_flush() self.spd_flush() self.racoon = Racoon() - self.entries = [] + self.entries = {} def call_setkey(self, cmds): try: - p = subprocess.Popen([root_prefix + setkey, "-c"], + p = subprocess.Popen([root_prefix + SETKEY, "-c"], stdin=subprocess.PIPE, stdout=subprocess.PIPE) except: @@ -317,26 +323,44 @@ class IPsec: self.call_setkey("spdflush;\n") def spd_add(self, local_ip, remote_ip): - cmds = ("spdadd %s %s gre -P out ipsec esp/transport//require;\n" % - (local_ip, remote_ip)) - cmds += ("spdadd %s %s gre -P in ipsec esp/transport//require;\n" % - (remote_ip, local_ip)) + tunnel_type = self.entries[remote_ip] + if tunnel_type == "vxlan": + cmds = ("spdadd %s[any] %s[any] udp -P out ipsec esp/transport/%s[%s]-%s[%s]/require;\n" + % (local_ip, remote_ip, local_ip, VXLAN_SRC_PORT, + remote_ip, VXLAN_DST_PORT)) + cmds += ("spdadd %s[any] %s[any] udp -P in ipsec esp/transport/%s[%s]-%s[%s]/require;\n" + % (remote_ip, local_ip, remote_ip, VXLAN_DST_PORT, + local_ip, VXLAN_SRC_PORT)) + else: + cmds = ("spdadd %s %s gre -P out ipsec esp/transport//require;\n" % + (local_ip, remote_ip)) + cmds += ("spdadd %s %s gre -P in ipsec esp/transport//require;\n" % + (remote_ip, local_ip)) self.call_setkey(cmds) def spd_del(self, local_ip, remote_ip): - cmds = "spddelete %s %s gre -P out;\n" % (local_ip, remote_ip) - cmds += "spddelete %s %s gre -P in;\n" % (remote_ip, local_ip) + tunnel_type = self.entries[remote_ip] + if tunnel_type == "vxlan": + cmds = ("spddelete %s %s udp -P out;\n" % (local_ip, remote_ip)) + cmds += ("spddelete %s %s udp -P in;\n" % (remote_ip, local_ip)) + else: + cmds = "spddelete %s %s gre -P out;\n" % (local_ip, remote_ip) + cmds += "spddelete %s %s gre -P in;\n" % (remote_ip, local_ip) self.call_setkey(cmds) def add_entry(self, local_ip, remote_ip, vals): + tunnel_type = vals["tunnel_type"] + if tunnel_type not in ("gre", "vxlan"): + raise error.Error("unknown tunnel type: %s" % tunnel_type) + if remote_ip in self.entries: raise error.Error("host %s already configured for ipsec" % remote_ip) self.racoon.add_entry(remote_ip, vals) - self.spd_add(local_ip, remote_ip) - self.entries.append(remote_ip) + self.entries[remote_ip] = tunnel_type + self.spd_add(local_ip, remote_ip) def del_entry(self, local_ip, remote_ip): if remote_ip in self.entries: @@ -344,7 +368,7 @@ class IPsec: self.spd_del(local_ip, remote_ip) self.sad_del(local_ip, remote_ip) - self.entries.remove(remote_ip) + del self.entries[remote_ip] def keep_table_columns(schema, table_name, column_types): @@ -463,36 +487,43 @@ def main(): new_interfaces = {} for rec in idl.tables["Interface"].rows.itervalues(): if rec.type == "ipsec_gre": - name = rec.name - options = rec.options - entry = { - "remote_ip": options.get("remote_ip"), - "local_ip": options.get("local_ip", "0.0.0.0/0"), - "certificate": options.get("certificate"), - "private_key": options.get("private_key"), - "use_ssl_cert": options.get("use_ssl_cert"), - "peer_cert": options.get("peer_cert"), - "psk": options.get("psk")} - - if entry["peer_cert"] and entry["psk"]: - vlog.warn("both 'peer_cert' and 'psk' defined for %s" - % name) - continue - elif not entry["peer_cert"] and not entry["psk"]: - vlog.warn("no 'peer_cert' or 'psk' defined for %s" % name) - continue + tunnel_type = "gre" + elif rec.type == "ipsec_vxlan": + tunnel_type = "vxlan" + else: + continue + + name = rec.name + options = rec.options + entry = { + "remote_ip": options.get("remote_ip"), + "local_ip": options.get("local_ip", "0.0.0.0/0"), + "certificate": options.get("certificate"), + "private_key": options.get("private_key"), + "use_ssl_cert": options.get("use_ssl_cert"), + "peer_cert": options.get("peer_cert"), + "psk": options.get("psk"), + "tunnel_type": tunnel_type } + + if entry["peer_cert"] and entry["psk"]: + vlog.warn("both 'peer_cert' and 'psk' defined for %s" + % name) + continue + elif not entry["peer_cert"] and not entry["psk"]: + vlog.warn("no 'peer_cert' or 'psk' defined for %s" % name) + continue - # The "use_ssl_cert" option is deprecated and will - # likely go away in the near future. - if entry["use_ssl_cert"] == "true": - if not ssl_cert: - vlog.warn("no valid SSL entry for %s" % name) - continue + # The "use_ssl_cert" option is deprecated and will + # likely go away in the near future. + if entry["use_ssl_cert"] == "true": + if not ssl_cert: + vlog.warn("no valid SSL entry for %s" % name) + continue - entry["certificate"] = ssl_cert[0] - entry["private_key"] = ssl_cert[1] + entry["certificate"] = ssl_cert[0] + entry["private_key"] = ssl_cert[1] - new_interfaces[name] = entry + new_interfaces[name] = entry if interfaces != new_interfaces: update_ipsec(ipsec, interfaces, new_interfaces) diff --git a/include/openflow/nicira-ext.h b/include/openflow/nicira-ext.h index 51f21f0..67b2717 100644 --- a/include/openflow/nicira-ext.h +++ b/include/openflow/nicira-ext.h @@ -1444,9 +1444,12 @@ OFP_ASSERT(sizeof(struct nx_action_output_reg) == 24); /* Tunnel ID. * - * For a packet received via GRE tunnel including a (32-bit) key, the key is - * stored in the low 32-bits and the high bits are zeroed. For other packets, - * the value is 0. + * The key, for a packet received via a keyed tunnel. If the key is less than + * 64 bits wide, this field holds the in its low-order bits and higher bits are + * zeroed. If the key is more than 64 bits wide, this field contains the 64 + * lowest-order bits. + * + * All zero bits, for packets not received via a keyed tunnel. * * Prereqs: None. * diff --git a/include/openvswitch/datapath-protocol.h b/include/openvswitch/datapath-protocol.h index 6c89411..58db169 100644 --- a/include/openvswitch/datapath-protocol.h +++ b/include/openvswitch/datapath-protocol.h @@ -200,6 +200,7 @@ enum ovs_vport_type { OVS_VPORT_TYPE_PATCH, /* virtual tunnel connecting two vports */ OVS_VPORT_TYPE_GRE, /* GRE tunnel */ OVS_VPORT_TYPE_CAPWAP, /* CAPWAP tunnel */ + OVS_VPORT_TYPE_VXLAN, /* VXLAN tunnel */ __OVS_VPORT_TYPE_MAX }; diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c index 06ec8fb..56216f2 100644 --- a/lib/netdev-vport.c +++ b/lib/netdev-vport.c @@ -134,11 +134,27 @@ netdev_vport_get_vport_type(const struct netdev *netdev) : OVS_VPORT_TYPE_UNSPEC); } -const char * -netdev_vport_get_netdev_type(const struct dpif_linux_vport *vport) +static const char * +get_maybe_ipsec_tunnel_type(const struct dpif_linux_vport *vport, + const char *plain_type, const char *ipsec_type) { struct nlattr *a[OVS_TUNNEL_ATTR_MAX + 1]; + uint32_t flags; + + if (tnl_port_config_from_nlattr(vport->options, vport->options_len, a)) { + VLOG_WARN_RL(&rl, "dp%d: cannot parse options for port `%s' (type %u)", + vport->dp_ifindex, vport->name, + (unsigned int) vport->type); + return "unknown"; + } + flags = nl_attr_get_u32(a[OVS_TUNNEL_ATTR_FLAGS]); + return flags & TNL_F_IPSEC ? ipsec_type : plain_type; +} + +const char * +netdev_vport_get_netdev_type(const struct dpif_linux_vport *vport) +{ switch (vport->type) { case OVS_VPORT_TYPE_UNSPEC: break; @@ -153,16 +169,14 @@ netdev_vport_get_netdev_type(const struct dpif_linux_vport *vport) return "patch"; case OVS_VPORT_TYPE_GRE: - if (tnl_port_config_from_nlattr(vport->options, vport->options_len, - a)) { - break; - } - return (nl_attr_get_u32(a[OVS_TUNNEL_ATTR_FLAGS]) & TNL_F_IPSEC - ? "ipsec_gre" : "gre"); + return get_maybe_ipsec_tunnel_type(vport, "gre", "ipsec_gre"); case OVS_VPORT_TYPE_CAPWAP: return "capwap"; + case OVS_VPORT_TYPE_VXLAN: + return get_maybe_ipsec_tunnel_type(vport, "vxlan", "ipsec_vxlan"); + case __OVS_VPORT_TYPE_MAX: break; } @@ -566,19 +580,18 @@ static int parse_tunnel_config(const char *name, const char *type, const struct shash *args, struct ofpbuf *options) { - bool is_gre = false; - bool is_ipsec = false; + bool supports_csum; + bool is_ipsec; struct shash_node *node; bool ipsec_mech_set = false; ovs_be32 daddr = htonl(0); uint32_t flags; + supports_csum = !strcmp(type, "gre") || !strcmp(type, "ipsec_gre"); + is_ipsec = !strncmp(type, "ipsec_", 6); + flags = TNL_F_DF_DEFAULT | TNL_F_PMTUD | TNL_F_HDR_CACHE; - if (!strcmp(type, "gre")) { - is_gre = true; - } else if (!strcmp(type, "ipsec_gre")) { - is_gre = true; - is_ipsec = true; + if (is_ipsec) { flags |= TNL_F_IPSEC; flags &= ~TNL_F_HDR_CACHE; } @@ -611,7 +624,7 @@ parse_tunnel_config(const char *name, const char *type, } else { nl_msg_put_u8(options, OVS_TUNNEL_ATTR_TTL, atoi(node->data)); } - } else if (!strcmp(node->name, "csum") && is_gre) { + } else if (!strcmp(node->name, "csum") && supports_csum) { if (!strcmp(node->data, "true")) { flags |= TNL_F_CSUM; } @@ -947,6 +960,14 @@ netdev_vport_register(void) { "capwap", VPORT_FUNCTIONS(netdev_vport_get_status) }, parse_tunnel_config, unparse_tunnel_config }, + { OVS_VPORT_TYPE_VXLAN, + { "vxlan", VPORT_FUNCTIONS(netdev_vport_get_status) }, + parse_tunnel_config, unparse_tunnel_config }, + + { OVS_VPORT_TYPE_VXLAN, + { "vxlan_ipsec", VPORT_FUNCTIONS(netdev_vport_get_status) }, + parse_tunnel_config, unparse_tunnel_config }, + { OVS_VPORT_TYPE_PATCH, { "patch", VPORT_FUNCTIONS(NULL) }, parse_patch_config, unparse_patch_config } diff --git a/rhel/etc_init.d_openvswitch b/rhel/etc_init.d_openvswitch index 5501d18..113d99a 100755 --- a/rhel/etc_init.d_openvswitch +++ b/rhel/etc_init.d_openvswitch @@ -47,7 +47,9 @@ start () { fi "$@" + # Allow tunnel traffic. $ovs_ctl --protocol=gre enable-protocol + $ovs_ctl --protocol=udp --dport=49170 enable-protocol touch /var/lock/subsys/openvswitch } diff --git a/tests/ovs-monitor-ipsec.at b/tests/ovs-monitor-ipsec.at index f9868e7..19a834a 100644 --- a/tests/ovs-monitor-ipsec.at +++ b/tests/ovs-monitor-ipsec.at @@ -308,4 +308,71 @@ sainfo anonymous { ]) AT_CHECK([test ! -f etc/racoon/certs/ovs-3.4.5.6.pem]) +### +### Add an ipsec_vxlan psk interface and check what ovs-monitor-ipsec does +### +AT_CHECK([ovs_vsctl \ + -- add-port br0 vxlan0 \ + -- set interface vxlan0 type=ipsec_vxlan \ + options:remote_ip=4.5.6.7 \ + options:psk=mishmash]) +OVS_WAIT_UNTIL([test -f actions && grep 'spdadd 4.5.6.7' actions >/dev/null]) +AT_CHECK([sed '1,41d' actions], [0], +[[racoon: reload +setkey: +> spdadd 0.0.0.0/0[any] 4.5.6.7[any] udp -P out ipsec esp/transport/0.0.0.0/0[49171]-4.5.6.7[49170]/require; +> spdadd 4.5.6.7[any] 0.0.0.0/0[any] udp -P in ipsec esp/transport/4.5.6.7[49170]-0.0.0.0/0[49171]/require; +]]) +AT_CHECK([trim etc/racoon/psk.txt], [0], [4.5.6.7 mishmash +]) +AT_CHECK([trim etc/racoon/racoon.conf], [0], [dnl +path pre_shared_key "/etc/racoon/psk.txt"; +path certificate "/etc/racoon/certs"; +remote 4.5.6.7 { + exchange_mode main; + nat_traversal on; + proposal { + encryption_algorithm aes; + hash_algorithm sha1; + authentication_method pre_shared_key; + dh_group 2; + } +} +sainfo anonymous { + pfs_group 2; + lifetime time 1 hour; + encryption_algorithm aes; + authentication_algorithm hmac_sha1, hmac_md5; + compression_algorithm deflate; +} +]) + +### +### Delete the ipsec_vxlan interface and check what ovs-monitor-ipsec does +### +AT_CHECK([ovs_vsctl del-port vxlan0]) +OVS_WAIT_UNTIL([test `wc -l < actions` -ge 17]) +AT_CHECK([sed '1,45d' actions], [0], [dnl +racoon: reload +setkey: +> spddelete 0.0.0.0/0 4.5.6.7 udp -P out; +> spddelete 4.5.6.7 0.0.0.0/0 udp -P in; +setkey: +> dump ; +setkey: +> dump ; +]) +AT_CHECK([trim etc/racoon/psk.txt], [0], []) +AT_CHECK([trim etc/racoon/racoon.conf], [0], [dnl +path pre_shared_key "/etc/racoon/psk.txt"; +path certificate "/etc/racoon/certs"; +sainfo anonymous { + pfs_group 2; + lifetime time 1 hour; + encryption_algorithm aes; + authentication_algorithm hmac_sha1, hmac_md5; + compression_algorithm deflate; +} +]) + AT_CLEANUP diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index d579b87..ea50733 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -916,8 +916,7 @@ <dt><code>gre</code></dt> <dd> An Ethernet over RFC 2890 Generic Routing Encapsulation over IPv4 - tunnel. See <ref group="Tunnel Options"/> for information on - configuring GRE tunnels. + tunnel. </dd> <dt><code>ipsec_gre</code></dt> @@ -936,6 +935,28 @@ with the Linux kernel datapath with kernel version 2.6.26 or later. </dd> + <dt><code>vxlan</code></dt> + <dd> + <p> + An Ethernet tunnel over the experimental, UDP-based VXLAN + protocol described at + <code>http://tools.ietf.org/html/draft-mahalingam-dutt-dcops-vxlan-00</code>. + VXLAN is currently supported only with the Linux kernel datapath + with kernel version 2.6.26 or later. + </p> + <p> + As an experimental protocol, VXLAN has no officially assigned UDP + port. Open vSwitch currently uses UDP destination port 49170. + The source port used for VXLAN traffic varies on a per-flow basis + between 32768 and 65535 to allow load balancing. + </p> + </dd> + + <dt><code>ipsec_vxlan</code></dt> + <dd> + VXLAN over an IPSEC tunnel. + </dd> + <dt><code>patch</code></dt> <dd> A pair of virtual devices that act as a patch cable. @@ -950,7 +971,8 @@ <group title="Tunnel Options"> <p> These options apply to interfaces with <ref column="type"/> of - <code>gre</code>, <code>ipsec_gre</code>, and <code>capwap</code>. + <code>gre</code>, <code>ipsec_gre</code>, <code>capwap</code>, and + <code>vxlan</code>. </p> <p> @@ -983,8 +1005,9 @@ key="in_key"/> at all. </li> <li> - A positive 32-bit (for GRE) or 64-bit (for CAPWAP) number. The - tunnel receives only packets with the specified key. + A positive 24-bit (for VXLAN), 32-bit (for GRE) or 64-bit (for + CAPWAP) number. The tunnel receives only packets with the + specified key. </li> <li> The word <code>flow</code>. The tunnel accepts packets with any @@ -1009,8 +1032,9 @@ key="out_key"/> at all. </li> <li> - A positive 32-bit (for GRE) or 64-bit (for CAPWAP) number. Packets - sent through the tunnel will have the specified key. + A positive 24-bit (for VXLAN), 32-bit (for GRE) or 64-bit (for + CAPWAP) number. Packets sent through the tunnel will have the + specified key. </li> <li> The word <code>flow</code>. Packets sent through the tunnel will @@ -1068,9 +1092,10 @@ enabled; set to <code>false</code> to disable. </column> - <group title="Tunnel Options: gre only"> + <group title="Tunnel Options: gre and vxlan only"> <p> - Only <code>gre</code> interfaces support these options. + Only <code>gre</code> and <code>vxlan</code> interfaces support these + options. </p> <column name="options" key="header_cache" type='{"type": "boolean"}'> @@ -1112,11 +1137,19 @@ </column> </group> - <group title="Tunnel Options: ipsec_gre only"> + <group title="Tunnel Options: ipsec_gre and ipsec_vxlan only"> <p> - Only <code>ipsec_gre</code> interfaces support these options. + Only <code>ipsec_gre</code> and <code>ipsec_vxlan</code> interfaces + support these options. </p> + <p> + These options are implemented through a separate daemon named + <code>ovs-monitor-ipsec</code> that so far has only been ported to + and packaged for Debian (including derivative distributions such as + Ubuntu). + </p> + <column name="options" key="peer_cert"> Required for certificate authentication. A string containing the peer's certificate in PEM format. Additionally the host's diff --git a/xenserver/etc_init.d_openvswitch b/xenserver/etc_init.d_openvswitch index 8ba8aee..75d8391 100755 --- a/xenserver/etc_init.d_openvswitch +++ b/xenserver/etc_init.d_openvswitch @@ -76,7 +76,9 @@ start () { --log-file --pidfile --detach --monitor unix:/var/run/openvswitch/db.sock fi + # Allow tunnel traffic. $ovs_ctl --protocol=gre enable-protocol + $ovs_ctl --protocol=udp --dport=49170 enable-protocol touch /var/lock/subsys/openvswitch } -- 1.7.2.5 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev