Re: [ovs-dev] [PATCH v4] datapath: Add support for VXLAN tunnels to Open vSwitch

Chris Wright Tue, 27 Nov 2012 19:25:05 -0800

* Kyle Mestery (kmest...@cisco.com) wrote:
> @@ -23,6 +23,7 @@
>  /kmemdup.c
>  /loop_counter.c
>  /modules.order
> +/net_namespace.c
>  /netdevice.c
>  /net_namespace.c
>  /random32.c


spurious change?

> @@ -39,5 +40,6 @@
>  /vport-internal_dev.c
>  /vport-netdev.c
>  /vport-patch.c
> +/vport-vxlan.c
>  /vport.c
>  /workqueue.c
> diff --git a/datapath/tunnel.c b/datapath/tunnel.c
> index fb4854a..05a73df 100644
> --- a/datapath/tunnel.c
> +++ b/datapath/tunnel.c
> @@ -1042,6 +1042,7 @@ static const struct nla_policy 
> tnl_policy[OVS_TUNNEL_ATTR_MAX + 1] = {
>       [OVS_TUNNEL_ATTR_IN_KEY]   = { .type = NLA_U64 },
>       [OVS_TUNNEL_ATTR_TOS]      = { .type = NLA_U8 },
>       [OVS_TUNNEL_ATTR_TTL]      = { .type = NLA_U8 },
> +     [OVS_TUNNEL_ATTR_DST_PORT] = { .type = NLA_U16 },
>  };
>  
>  /* Sets OVS_TUNNEL_ATTR_* fields in 'mutable', which must initially be
> @@ -1087,6 +1088,9 @@ static int tnl_set_config(struct net *net, struct 
> nlattr *options,
>       if (a[OVS_TUNNEL_ATTR_TTL])
>               mutable->ttl = nla_get_u8(a[OVS_TUNNEL_ATTR_TTL]);
>  
> +     if (a[OVS_TUNNEL_ATTR_DST_PORT])
> +             mutable->dst_port = nla_get_u16(a[OVS_TUNNEL_ATTR_DST_PORT]);
> +
>       if (!a[OVS_TUNNEL_ATTR_IN_KEY]) {
>               mutable->key.tunnel_type |= TNL_T_KEY_MATCH;
>               mutable->flags |= TNL_F_IN_KEY_MATCH;
> @@ -1242,6 +1246,9 @@ int ovs_tnl_get_options(const struct vport *vport, 
> struct sk_buff *skb)
>               goto nla_put_failure;
>       if (mutable->ttl && nla_put_u8(skb, OVS_TUNNEL_ATTR_TTL, mutable->ttl))
>               goto nla_put_failure;
> +     if (mutable->dst_port && nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT,
> +                                          mutable->dst_port))
> +             goto nla_put_failure;
>  
>       return 0;
>  
> diff --git a/datapath/tunnel.h b/datapath/tunnel.h
> index c268057..c0b50e7 100644
> --- a/datapath/tunnel.h
> +++ b/datapath/tunnel.h
> @@ -42,6 +42,7 @@
>  #define TNL_T_PROTO_GRE              0
>  #define TNL_T_PROTO_GRE64    1
>  #define TNL_T_PROTO_CAPWAP   2
> +#define TNL_T_PROTO_VXLAN    3
>  
>  /* These flags are only needed when calling tnl_find_port(). */
>  #define TNL_T_KEY_EXACT              (1 << 10)
> @@ -116,6 +117,7 @@ struct tnl_mutable_config {
>       u32     flags;
>       u8      tos;
>       u8      ttl;
> +     u16     dst_port;
>  
>       /* Multicast configuration. */
>       int     mlink;
> diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c
> new file mode 100644
> index 0000000..88e03d5
> --- /dev/null
> +++ b/datapath/vport-vxlan.c
> @@ -0,0 +1,459 @@
> + /*
> + * Copyright (c) 2011 Nicira, Inc.
> + * Copyright (c) 2012 Cisco Systems, Inc.
> + * Distributed under the terms of the GNU GPL version 2.
> + *
> + * Significant portions of this file may be copied from parts of the Linux
> + * kernel, by Linus Torvalds and others.

Only this and capwap have this unusual header bit.

> + */
> +
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +#include <linux/version.h>
> +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
> +
> +#include <linux/in.h>
> +#include <linux/ip.h>
> +#include <linux/jhash.h>
> +#include <linux/list.h>
> +#include <linux/net.h>
> +#include <linux/udp.h>
> +
> +#include <net/icmp.h>
> +#include <net/ip.h>
> +#include <net/udp.h>
> +
> +#include "datapath.h"
> +#include "tunnel.h"
> +#include "vport.h"
> +#include "vport-generic.h"
> +
> +/* Default to the OTV port, per the VXLAN IETF draft. */
> +#define VXLAN_DST_PORT 8472
> +
> +#define VXLAN_FLAGS 0x08000000  /* struct vxlanhdr.vx_flags required value. 
> */
> +
> +/**
> + * struct vxlanhdr - VXLAN header
> + * @vx_flags: Must have the exact value %VXLAN_FLAGS.
> + * @vx_vni: VXLAN Network Identifier (VNI) in top 24 bits, low 8 bits zeroed.
> + */
> +struct vxlanhdr {
> +     __be32 vx_flags;
> +     __be32 vx_vni;
> +};
> +
> +#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
> +
> +static inline int vxlan_hdr_len(const struct tnl_mutable_config *mutable,
> +                             const struct ovs_key_ipv4_tunnel *tun_key)
> +{
> +     return VXLAN_HLEN;
> +}
> +
> +/**
> + * struct vxlan_port - Keeps track of open UDP ports
> + * @port: The UDP port number.
> + * @socket: The socket created for this port number.
> + * @count: How many ports are using this socket/port.
> + * @hash_node: Hash node.
> + */
> +struct vxlan_port {
> +     u16 port;
> +     struct socket *vxlan_rcv_socket;
> +     int count;
> +
> +     /* Protected by RTNL lock. */
> +     struct hlist_node hash_node;
> +};
> +
> +/* Protected by RTNL lock. */
> +static struct hlist_head *vxlan_ports;
> +#define VXLAN_SOCK_HASH_BUCKETS 64
> +
> +/**
> + * struct vxlan_if - Maps port names to UDP port numbers
> + * @port: The UDP port number this interface is using.
> + * @ifname: The name of the interface.
> + * @hash_node: Hash node.
> + */
> +struct vxlan_if {
> +     u16 port;
> +     char ifname[IFNAMSIZ];
> +
> +     /* Protected by RTNL lock. */
> +     struct hlist_node hash_node;
> +};
> +
> +/* Protected by RTNL lock. */
> +static struct hlist_head *vxlan_ifs;
> +#define VXLAN_IF_HASH_BUCKETS 64
> +
> +static struct hlist_head *vxlan_hash_bucket(struct net *net, u16 port)
> +{
> +     unsigned int hash = jhash(&port, sizeof(port), (unsigned long) net);
> +     return &vxlan_ports[hash & (VXLAN_SOCK_HASH_BUCKETS - 1)];
> +}
> +
> +static struct vxlan_port *vxlan_port_exists(struct net *net, u16 port)
> +{
> +     struct hlist_head *bucket = vxlan_hash_bucket(net, port);
> +     struct vxlan_port *vxlan_port;
> +     struct hlist_node *node;
> +
> +     hlist_for_each_entry(vxlan_port, node, bucket, hash_node) {
> +             if (vxlan_port->port == port)
> +                     return vxlan_port;
> +     }
> +
> +     return NULL;
> +}
> +
> +static struct hlist_head *vxlanif_hash_bucket(struct net *net, const char 
> *name)
> +{
> +     unsigned int hash = jhash(name, strlen(name), (unsigned long) net);
> +     return &vxlan_ifs[hash & (VXLAN_IF_HASH_BUCKETS - 1)];
> +}
> +
> +static struct vxlan_if *vxlan_if_by_name(struct net *net, const char *name)
> +{
> +     struct hlist_head *bucket = vxlanif_hash_bucket(net, name);
> +     struct vxlan_if *vxlan_if;
> +     struct hlist_node *node;
> +
> +     hlist_for_each_entry(vxlan_if, node, bucket, hash_node) {
> +             if (!strcmp(vxlan_if->ifname, name))
> +                     return vxlan_if;
> +     }
> +
> +     return NULL;
> +}
> +
> +static inline struct vxlanhdr *vxlan_hdr(const struct sk_buff *skb)
> +{
> +     return (struct vxlanhdr *)(udp_hdr(skb) + 1);
> +}
> +
> +/* The below used as the min/max for the UDP port range */
> +#define VXLAN_SRC_PORT_MIN      32768
> +#define VXLAN_SRC_PORT_MAX      61000

This is consistent with Linux defaults, but inconsistent with comments
added below in vswitch.xml.  Any reason not to use inet_get_local_port_range()?

> +/* Compute source port for outgoing packet.
> + * Currently we use the flow hash.
> + */
> +static u16 get_src_port(struct sk_buff *skb)
> +{
> +     unsigned int range = (VXLAN_SRC_PORT_MAX - VXLAN_SRC_PORT_MIN) + 1;
> +     u32 hash = OVS_CB(skb)->flow->hash;
> +
> +     return (__force u16)(((u64) hash * range) >> 32) + VXLAN_SRC_PORT_MIN;
> +}
> +
> +static struct sk_buff *vxlan_build_header(const struct vport *vport,
> +                                       const struct tnl_mutable_config 
> *mutable,
> +                                       struct dst_entry *dst,
> +                                       struct sk_buff *skb,
> +                                       int tunnel_hlen)
> +{
> +     struct udphdr *udph = udp_hdr(skb);
> +     struct vxlanhdr *vxh = (struct vxlanhdr *)(udph + 1);
> +     const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
> +     __be64 out_key;
> +
> +     if (tun_key->ipv4_dst)
> +             out_key = tun_key->tun_id;
> +     else
> +             out_key = mutable->out_key;
> +
> +     if (mutable->dst_port)
> +             udph->dest = htons(mutable->dst_port);
> +     else
> +             udph->dest = htons(VXLAN_DST_PORT);
> +     udph->source = htons(get_src_port(skb));
> +     udph->check = 0;
> +     udph->len = htons(skb->len - skb_transport_offset(skb));
> +
> +     vxh->vx_flags = htonl(VXLAN_FLAGS);
> +     vxh->vx_vni = htonl(be64_to_cpu(out_key) << 8);
> +
> +     /*
> +      * Allow our local IP stack to fragment the outer packet even if the
> +      * DF bit is set as a last resort.  We also need to force selection of
> +      * an IP ID here because Linux will otherwise leave it at 0 if the
> +      * packet originally had DF set.
> +      */
> +     skb->local_df = 1;
> +     __ip_select_ident(ip_hdr(skb), dst, 0);
> +
> +     return skb;
> +}
> +
> +/* Called with rcu_read_lock and BH disabled. */
> +static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
> +{
> +     struct vport *vport;
> +     struct vxlanhdr *vxh;
> +     const struct tnl_mutable_config *mutable;
> +     struct iphdr *iph;
> +     struct ovs_key_ipv4_tunnel tun_key;
> +     int tunnel_type;
> +     __be64 key;
> +     u32 tunnel_flags = 0;
> +
> +     if (unlikely(!pskb_may_pull(skb, VXLAN_HLEN + ETH_HLEN)))
> +             goto error;
> +
> +     vxh = vxlan_hdr(skb);
> +     if (unlikely(vxh->vx_flags != htonl(VXLAN_FLAGS) ||
> +                  vxh->vx_vni & htonl(0xff)))
> +             goto error;
> +
> +     __skb_pull(skb, VXLAN_HLEN);
> +     skb_postpull_rcsum(skb, skb_transport_header(skb), VXLAN_HLEN + 
> ETH_HLEN);
> +
> +     key = cpu_to_be64(ntohl(vxh->vx_vni) >> 8);
> +
> +     tunnel_type = TNL_T_PROTO_VXLAN;
> +
> +     iph = ip_hdr(skb);
> +     vport = ovs_tnl_find_port(dev_net(skb->dev), iph->daddr, iph->saddr,
> +             key, tunnel_type, &mutable);
> +     if (unlikely(!vport)) {
> +             icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
> +             goto error;
> +     }
> +
> +     if (mutable->flags & TNL_F_IN_KEY_MATCH || !mutable->key.daddr)
> +             tunnel_flags = OVS_TNL_F_KEY;
> +     else
> +             key = 0;
> +
> +     /* Save outer tunnel values */
> +     tnl_tun_key_init(&tun_key, iph, key, tunnel_flags);
> +     OVS_CB(skb)->tun_key = &tun_key;
> +
> +     ovs_tnl_rcv(vport, skb);
> +     goto out;
> +
> +error:
> +     kfree_skb(skb);
> +out:
> +     return 0;
> +}
> +
> +/* Random value.  Irrelevant as long as it's not 0 since we set the handler. 
> */
> +#define UDP_ENCAP_VXLAN 10

Linux upstream is using 1 (like you said, !0 is the only functional
importance, but consistency is developer friendly ;)

> +static int vxlan_socket_init(struct vxlan_port *vxlan_port)
> +{
> +     int err;
> +     struct sockaddr_in sin;
> +
> +     err = sock_create(AF_INET, SOCK_DGRAM, 0, 
> &vxlan_port->vxlan_rcv_socket);
> +     if (err)
> +             goto error;
> +
> +     sin.sin_family = AF_INET;
> +     sin.sin_addr.s_addr = htonl(INADDR_ANY);
> +     sin.sin_port = htons(vxlan_port->port);
> +
> +     err = kernel_bind(vxlan_port->vxlan_rcv_socket, (struct sockaddr *)&sin,
> +                       sizeof(struct sockaddr_in));
> +     if (err)
> +             goto error_sock;
> +
> +     udp_sk(vxlan_port->vxlan_rcv_socket->sk)->encap_type = UDP_ENCAP_VXLAN;
> +     udp_sk(vxlan_port->vxlan_rcv_socket->sk)->encap_rcv = vxlan_rcv;
> +
> +     udp_encap_enable();

This is a new function, did you provide a fallback/noop (since it looks
like it's meant to build back to older >=2.6.26 kernels).

> +
> +     return 0;
> +
> +error_sock:
> +     sock_release(vxlan_port->vxlan_rcv_socket);
> +error:
> +     pr_warn("cannot register vxlan protocol handler\n");
> +     return err;
> +}
> +
> +static const struct nla_policy vxlan_policy[OVS_TUNNEL_ATTR_MAX + 1] = {
> +     [OVS_TUNNEL_ATTR_DST_PORT] = { .type = NLA_U16 },
> +};
> +
> +static int vxlan_tunnel_setup(struct net *net, const char *linkname,
> +                          struct nlattr *options)
> +{
> +     struct nlattr *a[OVS_TUNNEL_ATTR_MAX + 1];
> +     int err;
> +     u16 dst_port;
> +     struct vxlan_port *vxlan_port;
> +     struct vxlan_if *vxlan_if;
> +
> +     if (!options) {
> +             err = -EINVAL;
> +             goto out;
> +     }
> +
> +     err = nla_parse_nested(a, OVS_TUNNEL_ATTR_MAX, options, vxlan_policy);

This is already parsed in tnl_set_config() later.  So I'm not sure why
it's done twice during ->set_options() and ->create().


> +     if (err)
> +             goto out;
> +
> +     if (a[OVS_TUNNEL_ATTR_DST_PORT])
> +             dst_port = nla_get_u16(a[OVS_TUNNEL_ATTR_DST_PORT]);
> +     else
> +             dst_port = VXLAN_DST_PORT;
> +
> +     /* Verify if we already have a socket created for this port */
> +     vxlan_port = vxlan_port_exists(net, dst_port);
> +     if (vxlan_port) {
> +             vxlan_port->count++;
> +             err = 0;
> +             goto out;
> +     }
> +
> +     /* Add a new socket for this port */
> +     vxlan_port = kmalloc(sizeof(struct vxlan_port), GFP_KERNEL);
> +     if (!vxlan_port) {
> +             err = -ENOMEM;
> +             goto out;
> +     }
> +     memset (vxlan_port, 0, sizeof(struct vxlan_port));

kzalloc()

> +
> +     vxlan_port->port = dst_port;
> +     vxlan_port->count++;
> +     hlist_add_head(&vxlan_port->hash_node,
> +                    vxlan_hash_bucket(net, dst_port));

A little unusual to have a hashtable for this.  Is this expected to be
temporary until IANA port is allocated?

> +
> +     err = vxlan_socket_init(vxlan_port);
> +     if (err)
> +             goto error_vxlan_if;
> +
> +     vxlan_if = kmalloc(sizeof(struct vxlan_if), GFP_KERNEL);
> +     if (!vxlan_if) {
> +             err = -ENOMEM;
> +             goto error_vxlan_if;
> +     }
> +     memset(vxlan_if, 0, sizeof(*vxlan_if));

kzalloc()

> +     vxlan_if->port = dst_port;
> +     memcpy(vxlan_if->ifname, linkname, IFNAMSIZ);
> +     hlist_add_head(&vxlan_if->hash_node,
> +                    vxlanif_hash_bucket(net, linkname));
> +
> +out:
> +     return err;
> +error_vxlan_if:
> +     hlist_del(&vxlan_port->hash_node);
> +     kfree(vxlan_port);
> +     goto out;
> +}
> +
> +static int vxlan_set_options(struct vport *vport, struct nlattr *options)
> +{
> +     int err;
> +     const char *vname = vport->ops->get_name(vport);
> +
> +     err = vxlan_tunnel_setup(ovs_dp_get_net(vport->dp), vname, options);
> +     if (err)
> +             goto out;
> +
> +     err = ovs_tnl_set_options(vport, options);
> +
> +out:
> +     return err;
> +}
> +
> +static const struct tnl_ops ovs_vxlan_tnl_ops = {
> +     .tunnel_type    = TNL_T_PROTO_VXLAN,
> +     .ipproto        = IPPROTO_UDP,
> +     .hdr_len        = vxlan_hdr_len,
> +     .build_header   = vxlan_build_header,
> +};
> +
> +void vxlan_tnl_destroy(struct vport *vport)
> +{
> +     struct vxlan_if *vxlan_if;
> +     struct vxlan_port *vxlan_port;
> +     const char *vname = vport->ops->get_name(vport);
> +
> +     vxlan_if = vxlan_if_by_name(ovs_dp_get_net(vport->dp), vname);
> +     if (!vxlan_if)
> +             goto out;
> +
> +     vxlan_port = vxlan_port_exists(ovs_dp_get_net(vport->dp),
> +                                      vxlan_if->port);
> +     if (!vxlan_port)
> +             goto out_if;

Are the above two actually valid failure cases on destroy?

> +     if (!--vxlan_port->count) {
> +             sock_release(vxlan_port->vxlan_rcv_socket);
> +             hlist_del(&vxlan_port->hash_node);
> +             kfree(vxlan_port);
> +     }
> +
> +out_if:
> +     hlist_del(&vxlan_if->hash_node);
> +     kfree(vxlan_if);
> +out:
> +     ovs_tnl_destroy(vport);
> +}
> +
> +static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
> +{
> +     int err;
> +
> +     err = vxlan_tunnel_setup(ovs_dp_get_net(parms->dp), parms->name,
> +                                             parms->options);
> +     return ovs_tnl_create(parms, &ovs_vxlan_vport_ops, &ovs_vxlan_tnl_ops);
> +}
> +
> +static int vxlan_init(void)
> +{
> +     int err;
> +
> +     vxlan_ifs = kzalloc(VXLAN_IF_HASH_BUCKETS * sizeof(struct hlist_head),
> +                         GFP_KERNEL);
> +     if (!vxlan_ifs) {
> +             err = -ENOMEM;
> +             goto out;
> +     }
> +
> +     vxlan_ports = kzalloc(VXLAN_SOCK_HASH_BUCKETS * sizeof(struct 
> hlist_head),
> +                             GFP_KERNEL);

Suppose these hash tables could just be statically allocated...

> +     if (!vxlan_ports) {
> +             err = -ENOMEM;
> +             goto free_ifs;
> +     }
> +
> +out:
> +     return 0;
> +free_ifs:
> +     kfree(vxlan_ifs);
> +     goto out;
> +}
> +
> +static void vxlan_exit(void)
> +{
> +     kfree(vxlan_ports);
> +     kfree(vxlan_ifs);

...which would obviate the need for ->exit()

> +}
> +
> +const struct vport_ops ovs_vxlan_vport_ops = {
> +     .type           = OVS_VPORT_TYPE_VXLAN,
> +     .flags          = VPORT_F_TUN_ID,
> +     .init           = vxlan_init,
> +     .exit           = vxlan_exit,
> +     .create         = vxlan_tnl_create,
> +     .destroy        = vxlan_tnl_destroy,
> +     .set_addr       = ovs_tnl_set_addr,
> +     .get_name       = ovs_tnl_get_name,
> +     .get_addr       = ovs_tnl_get_addr,
> +     .get_options    = ovs_tnl_get_options,
> +     .set_options    = vxlan_set_options,
> +     .get_dev_flags  = ovs_vport_gen_get_dev_flags,
> +     .is_running     = ovs_vport_gen_is_running,
> +     .get_operstate  = ovs_vport_gen_get_operstate,
> +     .send           = ovs_tnl_send,
> +};
> +#else
> +#warning VXLAN tunneling will not be available on kernels before 2.6.26
> +#endif /* Linux kernel < 2.6.26 */
> diff --git a/datapath/vport.c b/datapath/vport.c
> index 4934ac1..a1c7542 100644
> --- a/datapath/vport.c
> +++ b/datapath/vport.c
> @@ -45,6 +45,7 @@ static const struct vport_ops *base_vport_ops_list[] = {
>       &ovs_gre64_vport_ops,
>  #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
>       &ovs_capwap_vport_ops,
> +     &ovs_vxlan_vport_ops,
>  #endif
>  };
>  
> diff --git a/datapath/vport.h b/datapath/vport.h
> index 5a7caf5..5080629 100644
> --- a/datapath/vport.h
> +++ b/datapath/vport.h
> @@ -257,5 +257,6 @@ extern const struct vport_ops ovs_gre_vport_ops;
>  extern const struct vport_ops ovs_gre_ft_vport_ops;
>  extern const struct vport_ops ovs_gre64_vport_ops;
>  extern const struct vport_ops ovs_capwap_vport_ops;
> +extern const struct vport_ops ovs_vxlan_vport_ops;
>  
>  #endif /* vport.h */
> diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
> index e7d4b49..2ae5681 100644
> --- a/include/linux/openvswitch.h
> +++ b/include/linux/openvswitch.h
> @@ -186,6 +186,7 @@ enum ovs_vport_type {
>       OVS_VPORT_TYPE_PATCH = 100, /* virtual tunnel connecting two vports */
>       OVS_VPORT_TYPE_GRE,      /* GRE tunnel */
>       OVS_VPORT_TYPE_CAPWAP,   /* CAPWAP tunnel */
> +     OVS_VPORT_TYPE_VXLAN,    /* VXLAN tunnel */
>       OVS_VPORT_TYPE_GRE64 = 104, /* GRE tunnel with 64-bit keys */
>       __OVS_VPORT_TYPE_MAX
>  };
> diff --git a/include/openflow/nicira-ext.h b/include/openflow/nicira-ext.h
> index 88eba19..11b761d 100644
> --- a/include/openflow/nicira-ext.h
> +++ b/include/openflow/nicira-ext.h
> @@ -1578,9 +1578,11 @@ OFP_ASSERT(sizeof(struct nx_action_output_reg) == 24);
>  
>  /* Tunnel ID.
>   *
> - * For a packet received via GRE tunnel including a (32-bit) key, the key is
> - * stored in the low 32-bits and the high bits are zeroed.  For other 
> packets,
> - * the value is 0.
> + * For a packet received via a GRE or VXLAN tunnel including a (32-bit) key, 
> the
> + * key is stored in the low 32-bits and the high bits are zeroed.  For other
> + * packets, the value is 0.

Confine to 80 columns since you're touching it already?

> + *
> + * All zero bits, for packets not received via a keyed tunnel.
>   *
>   * Prereqs: None.
>   *
> diff --git a/include/openvswitch/tunnel.h b/include/openvswitch/tunnel.h
> index 42c3621..23d8ba7 100644
> --- a/include/openvswitch/tunnel.h
> +++ b/include/openvswitch/tunnel.h
> @@ -57,6 +57,7 @@ enum {
>       OVS_TUNNEL_ATTR_IN_KEY,   /* __be64 key to match on input. */
>       OVS_TUNNEL_ATTR_TOS,      /* 8-bit TOS value. */
>       OVS_TUNNEL_ATTR_TTL,      /* 8-bit TTL value. */
> +     OVS_TUNNEL_ATTR_DST_PORT, /* 16-bit UDP port, used by VXLAN. */
>       __OVS_TUNNEL_ATTR_MAX
>  };
>  
> diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
> index 5171171..3dbb798 100644
> --- a/lib/netdev-vport.c
> +++ b/lib/netdev-vport.c
> @@ -173,6 +173,13 @@ netdev_vport_get_netdev_type(const struct 
> dpif_linux_vport *vport)
>      case OVS_VPORT_TYPE_CAPWAP:
>          return "capwap";
>  
> +    case OVS_VPORT_TYPE_VXLAN:
> +        if (tnl_port_config_from_nlattr(vport->options, vport->options_len,
> +                                        a)) {
> +            break;

Should tnl_port_config_from_nlattr() grow an optional OVS_TUNNEL_ATTR_DST_PORT
check?  And similarly, I seem to be missing the vport_class added to
netdev_vport_register().

> --- a/vswitchd/vswitch.xml
> +++ b/vswitchd/vswitch.xml
> @@ -1247,6 +1246,23 @@
>              February 2013.
>            </dd>
>  
> +          <dt><code>vxlan</code></dt>
> +          <dd>
> +         <p>
> +           An Ethernet tunnel over the experimental, UDP-based VXLAN
> +           protocol described at
> +           
> <code>http://tools.ietf.org/html/draft-mahalingam-dutt-dcops-vxlan-02</code>.
> +           VXLAN is currently supported only with the Linux kernel datapath
> +           with kernel version 2.6.26 or later.
> +         </p>
> +         <p>
> +           As an experimental protocol, VXLAN has no officially assigned UDP
> +           port.  Open vSwitch currently uses UDP destination port 8472.
> +           The source port used for VXLAN traffic varies on a per-flow basis
> +           between 32768 and 65535 to allow load balancing.

Inconsistent with code (and should sysctl's as base for local port
range).

> @@ -1427,11 +1447,19 @@
>          </column>
>        </group>
>  
> -      <group title="Tunnel Options: ipsec_gre only">
> +      <group title="Tunnel Options: ipsec_gre and ipsec_vxlan only">

Ooops, not completely removed...

>          <p>
> -          Only <code>ipsec_gre</code> interfaces support these options.
> +          Only <code>ipsec_gre</code> and <code>ipsec_vxlan</code> interfaces
> +          support these options.
>          </p>
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Re: [ovs-dev] [PATCH v4] datapath: Add support for VXLAN tunnels to Open vSwitch

Reply via email to