The kernel tunneling code currently needs to handle a large number
of operations when tunnel packets are encapsulated and decapsulated.
Some examples of this are: finding the correct tunnel port on receive,
TTL and ToS inheritance, ECN handling, etc.  All of these can be done
on a per-flow basis in userspace now that we have both the inner and
outer header information, which allows us to both simpify the kernel
and take advantage of userspace's information.  This ports the logic
from the kernel to userspace and also pulls in the tunnel-specific
configuration handling from netdev-vport.c.  Once tunnel packets are
redirected into this code, the redundant pieces can be removed from
other places.

Signed-off-by: Jesse Gross <je...@nicira.com>
---
 NEWS                 |    2 +
 lib/automake.mk      |    2 +
 lib/tunnel.c         |  739 ++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/tunnel.h         |   36 +++
 tests/automake.mk    |    1 +
 tests/testsuite.at   |    1 +
 tests/tunnel.at      |  112 ++++++++
 vswitchd/vswitch.xml |   14 +-
 8 files changed, 896 insertions(+), 11 deletions(-)
 create mode 100644 lib/tunnel.c
 create mode 100644 lib/tunnel.h
 create mode 100644 tests/tunnel.at

diff --git a/NEWS b/NEWS
index 6cd2947..d02369a 100644
--- a/NEWS
+++ b/NEWS
@@ -39,6 +39,8 @@ post-v1.8.0
         - Numeric values for reserved ports (see "ovs-ofctl" note above).
     - Tunneling requires the version of the kernel module paired with this
       release (or a newer release).
+    - Inheritance of the Don't Fragment bit in IP tunnels (df_inherit) is
+      no longer supported.
 
 
 v1.8.0 - xx xxx xxxx
diff --git a/lib/automake.mk b/lib/automake.mk
index 94b86f6..9513cd9 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -181,6 +181,8 @@ lib_libopenvswitch_a_SOURCES = \
        lib/timeval.h \
        lib/token-bucket.c \
        lib/token-bucket.h \
+       lib/tunnel.c \
+       lib/tunnel.h \
        lib/type-props.h \
        lib/unaligned.h \
        lib/unicode.c \
diff --git a/lib/tunnel.c b/lib/tunnel.c
new file mode 100644
index 0000000..115ff07
--- /dev/null
+++ b/lib/tunnel.c
@@ -0,0 +1,739 @@
+/* Copyright (c) 2012 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include <errno.h>
+
+#include "byte-order.h"
+#include "daemon.h"
+#include "dirs.h"
+#include "dynamic-string.h"
+#include "hash.h"
+#include "hmap.h"
+#include "packets.h"
+#include "smap.h"
+#include "socket-util.h"
+#include "tunnel.h"
+#include "unixctl.h"
+#include "vlog.h"
+
+/* TODO:
+ *
+ * Better hooks for tunnel creation/deletion/input/output
+ * Mechanism to create DP ports
+ * Ability to generate actions on input for ECN
+ * Revalidate flows on port add/remove/reconfigure
+ * Port stats
+ * Kernel interface needs flags defined (in particular for keys)
+ * IPsec flag needed?
+ */
+
+VLOG_DEFINE_THIS_MODULE(tunnel);
+
+#define DEFAULT_TTL 64
+
+struct tnl_match {
+    ovs_be64 in_key;
+    ovs_be32 ip_src;
+    ovs_be32 ip_dst;
+    uint32_t dp_portno;
+    bool key_present;
+    bool in_key_flow;
+    uint8_t zeroed[2];
+};
+BUILD_ASSERT_DECL(sizeof(struct tnl_match) % 8 == 0);
+
+struct tnl_port {
+    struct hmap_node match_node;
+    struct hmap_node portno_node;
+
+    const struct netdev *netdev;
+    struct tnl_match match;
+    ovs_be64 out_key;
+    uint32_t tnl_portno;
+    uint8_t ttl;
+    uint8_t tos;
+    bool out_key_flow;
+    bool key_present;
+    bool ttl_inherit;
+    bool tos_inherit;
+    bool dont_fragment;
+    bool csum;
+};
+
+static struct hmap tnl_match_map = HMAP_INITIALIZER(&tnl_match_map);
+static struct hmap tnl_portno_map = HMAP_INITIALIZER(&tnl_portno_map);
+
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+static struct vlog_rate_limit dbg_rl = VLOG_RATE_LIMIT_INIT(60, 60);
+
+static int tnl_config_get(const struct netdev *, struct tnl_port *);
+static struct tnl_port *tnl_find(struct tnl_match *);
+static struct tnl_port *tnl_find_exact(struct tnl_match *);
+static uint32_t tnl_hash(struct tnl_match *);
+static void tnl_match_fmt(const struct tnl_match *, struct ds *);
+static unixctl_cb_func tnl_port_dump;
+static void tnl_port_mod_log(uint32_t tnl_portno, const char *action);
+static struct tnl_port *tnl_portno_find(uint32_t tnl_portno);
+
+void tnl_init(void)
+{
+    static bool inited;
+
+    if (inited) {
+        return;
+    }
+    inited = true;
+
+    unixctl_command_register("tunnel/show", "[interface]", 0, 1,
+                             tnl_port_dump, NULL);
+}
+
+static bool tnl_type_check(const char *type)
+{
+    return !strcmp(type, "gre") || !strcmp(type, "ipsec_gre") ||
+           !strcmp(type, "capwap") || !strcmp(type, "dummy_tunnel");
+}
+
+static int tnl_port_add__(uint32_t tnl_portno, uint32_t dp_portno,
+                          const struct netdev *netdev)
+{
+    int err;
+    const char *type;
+    struct tnl_port *tnl_port;
+    struct tnl_port *existing_port;
+
+    type = netdev_get_type(netdev);
+    if (!tnl_type_check(type)) {
+        return 0;
+    }
+
+    tnl_port = xzalloc(sizeof *tnl_port);
+    err = tnl_config_get(netdev, tnl_port);
+    if (err) {
+        goto err_free;
+    }
+
+    tnl_port->match.dp_portno = dp_portno;
+    tnl_port->tnl_portno = tnl_portno;
+
+    existing_port = tnl_find_exact(&tnl_port->match);
+    if (existing_port) {
+        struct ds ds = DS_EMPTY_INITIALIZER;
+
+        tnl_match_fmt(&tnl_port->match, &ds);
+        VLOG_WARN("%s: attempting to add tunnel port with same config as "
+                  "port '%s' (%s)", netdev_get_name(tnl_port->netdev),
+                                    netdev_get_name(existing_port->netdev),
+                                    ds_cstr(&ds));
+        ds_destroy(&ds);
+        err = EEXIST;
+        goto err_free;
+    }
+
+    hmap_insert(&tnl_match_map, &tnl_port->match_node,
+                tnl_hash(&tnl_port->match));
+    hmap_insert(&tnl_portno_map, &tnl_port->portno_node,
+                hash_int(tnl_port->tnl_portno, 0));
+
+    return 0;
+
+err_free:
+    free(tnl_port);
+    return err;
+}
+
+int tnl_port_add(uint32_t tnl_portno, uint32_t dp_portno,
+                 const struct netdev *netdev)
+{
+    int err;
+
+    err = tnl_port_add__(tnl_portno, dp_portno, netdev);
+    if (!err) {
+        tnl_port_mod_log(tnl_portno, "adding");
+    }
+
+    return err;
+}
+
+static void tnl_port_del__(struct tnl_port *tnl_port)
+{
+    hmap_remove(&tnl_match_map, &tnl_port->match_node);
+    hmap_remove(&tnl_portno_map, &tnl_port->portno_node);
+    free(tnl_port);
+}
+
+void tnl_port_del(uint32_t tnl_portno)
+{
+    struct tnl_port *tnl_port;
+
+    tnl_port = tnl_portno_find(tnl_portno);
+    if (!tnl_port) {
+        return;
+    }
+
+    tnl_port_mod_log(tnl_portno, "removing");
+    tnl_port_del__(tnl_port);
+}
+
+int tnl_port_reconfigure(uint32_t tnl_portno, const struct netdev *netdev)
+{
+    struct tnl_port *tnl_port;
+    uint32_t dp_portno;
+    int err;
+
+    tnl_port = tnl_portno_find(tnl_portno);
+    if (!tnl_port) {
+        return 0;
+    }
+
+    dp_portno = tnl_port->match.dp_portno;
+
+    if (!tnl_type_check(netdev_get_type(netdev))) {
+        tnl_port_mod_log(tnl_portno, "removing");
+    }
+
+    tnl_port_del__(tnl_port);
+    err = tnl_port_add__(tnl_portno, dp_portno, netdev);
+    if (!err) {
+        tnl_port_mod_log(tnl_portno, "reconfiguring");
+    }
+
+    return err;
+}
+
+int tnl_stats_get(uint32_t tnl_portno OVS_UNUSED,
+                  struct netdev_stats *stats OVS_UNUSED)
+{
+    return EOPNOTSUPP;
+}
+
+static bool is_ip(const struct flow *flow)
+{
+    return flow->dl_type == htons(ETH_TYPE_IP) ||
+           flow->dl_type == htons(ETH_TYPE_IPV6);
+}
+
+int tnl_port_receive(struct flow *flow, bool *adjusted)
+{
+    struct tnl_match match;
+    struct tnl_port *tnl_port;
+    ovs_be64 tun_id;
+    bool ecn = false;
+    bool log = false;
+
+    *adjusted = false;
+
+    if (flow->tunnel.ip_dst == 0) {
+        return 0;
+    }
+
+    memset(&match, 0, sizeof match);
+    match.dp_portno = flow->in_port;
+    match.ip_src = flow->tunnel.ip_dst;
+    match.ip_dst = flow->tunnel.ip_src;
+    match.in_key = flow->tunnel.tun_id;
+    match.key_present = flow->tunnel.flags & FLOW_TNL_F_KEY;
+
+    tnl_port = tnl_find(&match);
+    if (!tnl_port) {
+        struct ds ds = DS_EMPTY_INITIALIZER;
+
+        tnl_match_fmt(&match, &ds);
+        VLOG_WARN_RL(&rl, "receive tunnel port not found (%s)", ds_cstr(&ds));
+        ds_destroy(&ds);
+
+        return EINVAL;
+    }
+
+    if (((flow->tunnel.ip_tos & IP_ECN_MASK) == IP_ECN_CE) && is_ip(flow)) {
+        if ((flow->nw_tos & IP_ECN_MASK) != IP_ECN_NOT_ECT) {
+            ecn = true;
+        } else {
+            VLOG_WARN_RL(&rl, "dropping tunnel packet marked ECN CE but is not"
+                              " ECN capable");
+            return EINVAL;
+        }
+    }
+
+    if (!VLOG_DROP_DBG(&dbg_rl)) {
+        log = true;
+        VLOG_DBG("ingress tunnel flow adjusted");
+        VLOG_DBG(" pre: %s", flow_to_string(flow));
+    }
+
+    *adjusted = true;
+
+    tun_id = flow->tunnel.tun_id;
+    flow->in_port = tnl_port->tnl_portno;
+    memset(&flow->tunnel, 0, sizeof flow->tunnel);
+    if (tnl_port->match.in_key_flow) {
+        flow->tunnel.tun_id = tun_id;
+    }
+
+    if (log) {
+        VLOG_DBG(" post: %s", flow_to_string(flow));
+    }
+
+    if (ecn) {
+        VLOG_WARN("\tECN CE");
+    }
+
+    return 0;
+}
+
+uint32_t tnl_port_send(struct flow *flow, uint32_t tnl_portno)
+{
+    struct tnl_port *tnl_port;
+    bool log = false;
+
+    tnl_port = tnl_portno_find(tnl_portno);
+    if (!tnl_port) {
+        return tnl_portno;
+    }
+
+    if (!VLOG_DROP_DBG(&dbg_rl)) {
+        log = true;
+        VLOG_DBG("egress tunnel flow adjusted");
+        VLOG_DBG(" pre: %s", flow_to_string(flow));
+    }
+
+    flow->tunnel.ip_src = tnl_port->match.ip_src;
+    flow->tunnel.ip_dst = tnl_port->match.ip_dst;
+    flow->tunnel.flags = 0;
+
+    if (!tnl_port->out_key_flow) {
+        flow->tunnel.tun_id = tnl_port->out_key;
+    }
+
+    if (tnl_port->ttl_inherit && is_ip(flow)) {
+        flow->tunnel.ip_ttl = flow->nw_ttl;
+    } else {
+        flow->tunnel.ip_ttl = tnl_port->ttl;
+    }
+
+    if (tnl_port->tos_inherit && is_ip(flow)) {
+        flow->tunnel.ip_tos = flow->nw_tos & IP_DSCP_MASK;
+    } else {
+        flow->tunnel.ip_tos = tnl_port->tos;
+    }
+    if ((flow->nw_tos & IP_ECN_MASK) == IP_ECN_CE) {
+        flow->tunnel.ip_tos |= IP_ECN_ECT_0;
+    } else {
+        flow->tunnel.ip_tos |= flow->nw_tos & IP_ECN_MASK;
+    }
+
+    flow->tunnel.flags |= tnl_port->dont_fragment ? FLOW_TNL_F_DONT_FRAGMENT : 
0;
+    flow->tunnel.flags |= tnl_port->csum ? FLOW_TNL_F_CSUM : 0;
+    flow->tunnel.flags |= tnl_port->key_present ? FLOW_TNL_F_KEY : 0;
+
+    if (log) {
+        VLOG_DBG(" post: %s", flow_to_string(flow));
+        VLOG_DBG(" output dp port: %"PRIu32, tnl_port->match.dp_portno);
+    }
+
+    return tnl_port->match.dp_portno;
+}
+
+static struct tnl_port *tnl_portno_find(uint32_t tnl_portno)
+{
+    struct tnl_port *tnl_port;
+
+    HMAP_FOR_EACH_IN_BUCKET (tnl_port, portno_node, hash_int(tnl_portno, 0),
+                             &tnl_portno_map) {
+        if (tnl_port->tnl_portno == tnl_portno) {
+            return tnl_port;
+        }
+    }
+
+    return NULL;
+}
+
+static uint32_t tnl_hash(struct tnl_match *match)
+{
+    return hash_bytes(match, sizeof *match, 0);
+}
+
+static struct tnl_port *tnl_find_exact(struct tnl_match *match)
+{
+    struct tnl_port *tnl_port;
+
+    HMAP_FOR_EACH_WITH_HASH (tnl_port, match_node, tnl_hash(match),
+                             &tnl_match_map) {
+        if (!memcmp(match, &tnl_port->match, sizeof *match)) {
+            return tnl_port;
+        }
+    }
+
+    return NULL;
+}
+
+static struct tnl_port *tnl_find(struct tnl_match *match_)
+{
+    struct tnl_match match = *match_;
+    bool is_multicast = ip_is_multicast(match.ip_src);
+    struct tnl_port *tnl_port;
+
+    /* remote_ip, local_ip, in_key */
+    if (!is_multicast) {
+        tnl_port = tnl_find_exact(&match);
+        if (tnl_port) {
+            return tnl_port;
+        }
+    }
+
+    /* remote_ip, in_key */
+    match.ip_src = 0;
+    tnl_port = tnl_find_exact(&match);
+    if (tnl_port) {
+        return tnl_port;
+    }
+    match.ip_src = match_->ip_src;
+
+    /* remote_ip, local_ip */
+    if (!is_multicast) {
+        match.in_key = 0;
+        match.in_key_flow = true;
+        tnl_port = tnl_find_exact(&match);
+        if (tnl_port) {
+            return tnl_port;
+        }
+        match.in_key = match_->in_key;
+        match.in_key_flow = false;
+    }
+
+    /* remote_ip */
+    match.ip_src = 0;
+    match.in_key = 0;
+    match.in_key_flow = true;
+    tnl_port = tnl_find_exact(&match);
+    if (tnl_port) {
+        return tnl_port;
+    }
+    match.ip_src = match_->ip_src;
+    match.in_key = match_->in_key;
+    match.in_key_flow = false;
+
+    if (is_multicast) {
+        match.ip_src = 0;
+        match.ip_dst = match_->ip_src;
+
+        /* multicast remote_ip, in_key */
+        tnl_port = tnl_find_exact(&match);
+        if (tnl_port) {
+            return tnl_port;
+        }
+
+        /* multicast remote_ip */
+        match.in_key = 0;
+        match.in_key_flow = true;
+        tnl_port = tnl_find_exact(&match);
+        if (tnl_port) {
+            return tnl_port;
+        }
+    }
+
+    return NULL;
+}
+
+static ovs_be64 tnl_key_get(const struct smap *args, const char *name,
+                            bool *present, bool *flow)
+{
+    const char *s;
+
+    *present = false;
+    *flow = false;
+
+    s = smap_get(args, name);
+    if (!s) {
+        s = smap_get(args, "key");
+        if (!s) {
+            return 0;
+        }
+    }
+
+    *present = true;
+
+    if (!strcmp(s, "flow")) {
+        *flow = true;
+        return 0;
+    } else {
+        return htonll(strtoull(s, NULL, 0));
+    }
+}
+
+static int tnl_config_get(const struct netdev *netdev, struct tnl_port 
*tnl_port)
+{
+    int err;
+    struct smap args = SMAP_INITIALIZER(&args);
+    struct smap_node *node;
+    const char *name, *type;
+    bool has_csum = false;
+    bool is_ipsec = false;
+    bool ipsec_mech_set = false;
+
+    err = netdev_get_config(netdev, &args);
+    if (err) {
+        return err;
+    }
+
+    name = netdev_get_name(netdev);
+    type = netdev_get_type(netdev);
+
+    tnl_port->netdev = netdev;
+
+    if (!strcmp(type, "gre")) {
+        has_csum = true;
+    } else if (!strcmp(type, "ipsec_gre")) {
+        has_csum = true;
+        is_ipsec = true;
+    }
+
+    tnl_port->dont_fragment = true;
+
+    SMAP_FOR_EACH (node, &args) {
+        if (!strcmp(node->key, "remote_ip")) {
+            struct in_addr in_addr;
+            if (lookup_ip(node->value, &in_addr)) {
+                VLOG_WARN("%s: bad %s 'remote_ip'", name, type);
+            } else {
+                tnl_port->match.ip_dst = in_addr.s_addr;
+            }
+        } else if (!strcmp(node->key, "local_ip")) {
+            struct in_addr in_addr;
+            if (lookup_ip(node->value, &in_addr)) {
+                VLOG_WARN("%s: bad %s 'local_ip'", name, type);
+            } else {
+                tnl_port->match.ip_src = in_addr.s_addr;
+            }
+        } else if (!strcmp(node->key, "key") ||
+                   !strcmp(node->key, "in_key") ||
+                   !strcmp(node->key, "out_key")) {
+            /* Handled separately below. */
+        } else if (!strcmp(node->key, "tos")) {
+            if (!strcmp(node->value, "inherit")) {
+                tnl_port->tos_inherit = true;
+            } else {
+                char *endptr;
+                int tos;
+                tos = strtol(node->value, &endptr, 0);
+                if (*endptr == '\0' && tos == (tos & IP_DSCP_MASK)) {
+                    tnl_port->tos = tos;
+                } else {
+                    VLOG_WARN("%s: invalid TOS %s", name, node->value);
+                }
+            }
+        } else if (!strcmp(node->key, "ttl")) {
+            if (!strcmp(node->value, "inherit")) {
+                tnl_port->ttl_inherit = true;
+            } else {
+                tnl_port->ttl = atoi(node->value);
+            }
+        } else if (!strcmp(node->key, "csum") && has_csum) {
+            if (!strcmp(node->value, "true")) {
+                tnl_port->csum = true;
+            }
+        } else if (!strcmp(node->key, "df_default")) {
+            if (!strcmp(node->value, "false")) {
+                tnl_port->dont_fragment = false;
+            }
+        } else if (!strcmp(node->key, "peer_cert") && is_ipsec) {
+            if (smap_get(&args, "certificate")) {
+                ipsec_mech_set = true;
+            } else {
+                const char *use_ssl_cert;
+
+                /* If the "use_ssl_cert" is true, then "certificate" and
+                 * "private_key" will be pulled from the SSL table.  The
+                 * use of this option is strongly discouraged, since it
+                 * will like be removed when multiple SSL configurations
+                 * are supported by OVS.
+                 */
+                use_ssl_cert = smap_get(&args, "use_ssl_cert");
+                if (!use_ssl_cert || strcmp(use_ssl_cert, "true")) {
+                    VLOG_ERR("%s: 'peer_cert' requires 'certificate' argument",
+                             name);
+                    return EINVAL;
+                }
+                ipsec_mech_set = true;
+            }
+        } else if (!strcmp(node->key, "psk") && is_ipsec) {
+            ipsec_mech_set = true;
+        } else if (is_ipsec
+                && (!strcmp(node->key, "certificate")
+                    || !strcmp(node->key, "private_key")
+                    || !strcmp(node->key, "use_ssl_cert"))) {
+            /* Ignore options not used by the netdev. */
+        } else {
+            VLOG_WARN("%s: unknown %s argument '%s'", name, type, node->key);
+        }
+    }
+
+    if (!tnl_port->match.ip_dst) {
+        VLOG_ERR("%s: %s type requires valid 'remote_ip' argument",
+                 name, type);
+        return EINVAL;
+    }
+
+    if (tnl_port->match.ip_src) {
+        if (ip_is_multicast(tnl_port->match.ip_dst)) {
+            VLOG_WARN("%s: remote_ip is multicast, ignoring local_ip", name);
+            tnl_port->match.ip_src = 0;
+        }
+    }
+
+    if (is_ipsec) {
+        static pid_t pid = 0;
+        if (pid <= 0) {
+            char *file_name = xasprintf("%s/%s", ovs_rundir(),
+                                        "ovs-monitor-ipsec.pid");
+            pid = read_pidfile(file_name);
+            free(file_name);
+        }
+
+        if (pid < 0) {
+            VLOG_ERR("%s: IPsec requires the ovs-monitor-ipsec daemon",
+                     name);
+            return EINVAL;
+        }
+
+        if (smap_get(&args, "peer_cert") && smap_get(&args, "psk")) {
+            VLOG_ERR("%s: cannot define both 'peer_cert' and 'psk'", name);
+            return EINVAL;
+        }
+
+        if (!ipsec_mech_set) {
+            VLOG_ERR("%s: IPsec requires an 'peer_cert' or psk' argument",
+                     name);
+            return EINVAL;
+        }
+    }
+
+    tnl_port->match.in_key = tnl_key_get(&args, "in_key",
+                                         &tnl_port->match.key_present,
+                                         &tnl_port->match.in_key_flow);
+    tnl_port->out_key = tnl_key_get(&args, "out_key", &tnl_port->key_present,
+                                    &tnl_port->out_key_flow);
+
+    if (tnl_port->ttl == 0) {
+        tnl_port->ttl = DEFAULT_TTL;
+    }
+
+    return 0;
+}
+
+static void tnl_match_fmt(const struct tnl_match *match, struct ds *ds)
+{
+    ds_put_format(ds, IP_FMT"->"IP_FMT, IP_ARGS(&match->ip_src),
+                  IP_ARGS(&match->ip_dst));
+
+    if (match->key_present) {
+        if (match->in_key_flow) {
+            ds_put_cstr(ds, ", key=flow");
+        } else {
+            ds_put_format(ds, ", key=%#"PRIx64, ntohll(match->in_key));
+        }
+    }
+
+    ds_put_format(ds, ", dp port=%"PRIu32, match->dp_portno);
+}
+
+static void tnl_port_mod_log(uint32_t tnl_portno, const char *action)
+{
+    if (VLOG_IS_DBG_ENABLED()) {
+        const struct tnl_port *tnl_port;
+
+        tnl_port = tnl_portno_find(tnl_portno);
+        if (tnl_port) {
+            struct ds ds = DS_EMPTY_INITIALIZER;
+
+            tnl_match_fmt(&tnl_port->match, &ds);
+            VLOG_DBG("%s tunnel port %"PRIu32" (%s)", action, tnl_portno,
+                                                      ds_cstr(&ds));
+            ds_destroy(&ds);
+        }
+    }
+}
+
+static void tnl_port_fmt(const struct tnl_port *tnl_port, struct ds *ds)
+{
+    ds_put_format(ds, "port %"PRIu32": %s (%s: ", tnl_port->match.dp_portno,
+                  netdev_get_name(tnl_port->netdev),
+                  netdev_get_type(tnl_port->netdev));
+    tnl_match_fmt(&tnl_port->match, ds);
+
+    if (tnl_port->out_key != tnl_port->match.in_key ||
+        tnl_port->key_present != tnl_port->match.key_present ||
+        tnl_port->out_key_flow != tnl_port->match.in_key_flow) {
+        ds_put_cstr(ds, ", out_key=");
+        if (!tnl_port->key_present) {
+            ds_put_cstr(ds, "none");
+        } else if (tnl_port->out_key_flow) {
+            ds_put_cstr(ds, "flow");
+        } else {
+            ds_put_format(ds, "%"PRIx64, ntohll(tnl_port->out_key));
+        }
+    }
+
+    if (tnl_port->ttl_inherit) {
+        ds_put_cstr(ds, ", ttl=inherit");
+    } else if (tnl_port->ttl != DEFAULT_TTL) {
+        ds_put_format(ds, ", ttl=%"PRIu8, tnl_port->ttl);
+    }
+
+    if (tnl_port->tos_inherit) {
+        ds_put_cstr(ds, ", tos=inherit");
+    } else if (tnl_port->tos) {
+        ds_put_format(ds, ", tos=%"PRIx8, tnl_port->tos);
+    }
+
+    if (!tnl_port->dont_fragment) {
+        ds_put_cstr(ds, ", df=false");
+    }
+
+    if (tnl_port->csum) {
+        ds_put_cstr(ds, ", csum=true");
+    }
+
+    ds_put_cstr(ds, ")\n");
+}
+
+static void
+tnl_port_dump(struct unixctl_conn *conn, int argc, const char *argv[],
+              void *aux OVS_UNUSED)
+{
+    struct tnl_port *tnl_port;
+    struct ds ds = DS_EMPTY_INITIALIZER;
+
+    if (argc > 1) {
+        HMAP_FOR_EACH (tnl_port, match_node, &tnl_match_map) {
+            if (!strcmp(netdev_get_name(tnl_port->netdev), argv[1])) {
+                tnl_port_fmt(tnl_port, &ds);
+                goto out;
+            }
+        }
+
+        unixctl_command_reply_error(conn,  "Unknown tunnel");
+        return;
+    } else {
+        HMAP_FOR_EACH (tnl_port, match_node, &tnl_match_map) {
+            tnl_port_fmt(tnl_port, &ds);
+        }
+    }
+
+out:
+    unixctl_command_reply(conn, ds_cstr(&ds));
+    ds_destroy(&ds);
+}
diff --git a/lib/tunnel.h b/lib/tunnel.h
new file mode 100644
index 0000000..e9bd754
--- /dev/null
+++ b/lib/tunnel.h
@@ -0,0 +1,36 @@
+/* Copyright (c) 2012 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TUNNEL_H
+#define TUNNEL_H 1
+
+#include <stdbool.h>
+
+#include "flow.h"
+#include "netdev.h"
+
+void tnl_init(void);
+
+int tnl_port_add(uint32_t tnl_portno, uint32_t dp_portno,
+                 const struct netdev *);
+int tnl_port_reconfigure(uint32_t tnl_portno, const struct netdev *);
+void tnl_port_del(uint32_t tnl_portno);
+
+int tnl_stats_get(uint32_t tnl_portno, struct netdev_stats *);
+
+int tnl_port_receive(struct flow *, bool *adjusted);
+uint32_t tnl_port_send(struct flow *, uint32_t tnl_portno);
+
+#endif /* tunnel.h */
diff --git a/tests/automake.mk b/tests/automake.mk
index 20f9e82..16dae30 100644
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -32,6 +32,7 @@ TESTSUITE_AT = \
        tests/jsonrpc.at \
        tests/jsonrpc-py.at \
        tests/timeval.at \
+       tests/tunnel.at \
        tests/lockfile.at \
        tests/reconnect.at \
        tests/ofproto-dpif.at \
diff --git a/tests/testsuite.at b/tests/testsuite.at
index 2b4ccdb..bf1d6dc 100644
--- a/tests/testsuite.at
+++ b/tests/testsuite.at
@@ -92,6 +92,7 @@ m4_include([tests/json.at])
 m4_include([tests/jsonrpc.at])
 m4_include([tests/jsonrpc-py.at])
 m4_include([tests/timeval.at])
+m4_include([tests/tunnel.at])
 m4_include([tests/lockfile.at])
 m4_include([tests/reconnect.at])
 m4_include([tests/ofproto.at])
diff --git a/tests/tunnel.at b/tests/tunnel.at
new file mode 100644
index 0000000..a3e10aa
--- /dev/null
+++ b/tests/tunnel.at
@@ -0,0 +1,112 @@
+AT_BANNER([tunnel])
+
+AT_SETUP([tunnel - input])
+OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=dummy_tunnel \
+                    options:remote_ip=1.1.1.1 \
+                    -- add-port br0 p2 -- set Interface p2 type=dummy_tunnel \
+                    options:local_ip=2.2.2.2 options:remote_ip=1.1.1.1 \
+                    -- add-port br0 p3 -- set Interface p3 type=dummy_tunnel \
+                    options:remote_ip=2.2.2.2])
+AT_DATA([flows.txt], [dnl
+actions=IN_PORT
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+dnl remote_ip
+AT_CHECK([ovs-appctl ofproto/trace br0 
'ipv4_tunnel(tun_id=0x0,flags=0x0,src=1.1.1.1,dst=2.2.2.2,tos=0x0,ttl=64),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'],
 [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+  [Datapath actions: 
set(ipv4_tunnel(tun_id=0x0,flags=0x0,src=0.0.0.0,dst=1.1.1.1,tos=0x0,ttl=64)),1
+])
+
+dnl local_ip, remote_ip
+AT_CHECK([ovs-appctl ofproto/trace br0 
'ipv4_tunnel(tun_id=0x0,flags=0x0,src=1.1.1.1,dst=2.2.2.2,tos=0x0,ttl=64),in_port(2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'],
 [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+  [Datapath actions: 
set(ipv4_tunnel(tun_id=0x0,flags=0x0,src=2.2.2.2,dst=1.1.1.1,tos=0x0,ttl=64)),2
+])
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([tunnel - ECN decapsulation])
+OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=dummy_tunnel \
+                    options:remote_ip=1.1.1.1])
+AT_DATA([flows.txt], [dnl
+actions=10
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+AT_CHECK([ovs-appctl ofproto/trace br0 
'ipv4_tunnel(tun_id=0x0,flags=0x0,src=1.1.1.1,dst=2.2.2.2,tos=0x3,ttl=64),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0x1,ttl=64,frag=no),tcp(src=8,dst=9)'],
 [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+  [Datapath actions: 10
+])
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([tunnel - output])
+OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=dummy_tunnel \
+                    options:remote_ip=1.1.1.1 options:local_ip=2.2.2.2 \
+                    options:key=5])
+AT_DATA([flows.txt], [dnl
+actions=output:1
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+dnl Basic
+AT_CHECK([ovs-appctl ofproto/trace br0 
'in_port(10),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0x4,ttl=128,frag=no),tcp(src=8,dst=9)'],
 [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+  [Datapath actions: 
set(ipv4_tunnel(tun_id=0x5,flags=0x0,src=2.2.2.2,dst=1.1.1.1,tos=0x0,ttl=64)),1
+])
+
+dnl ECN
+AT_CHECK([ovs-appctl ofproto/trace br0 
'in_port(10),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0x1,ttl=64,frag=no),tcp(src=8,dst=9)'],
 [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+  [Datapath actions: 
set(ipv4_tunnel(tun_id=0x5,flags=0x0,src=2.2.2.2,dst=1.1.1.1,tos=0x1,ttl=64)),1
+])
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([tunnel - ToS and TTL inheritance])
+OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=dummy_tunnel \
+                    options:remote_ip=1.1.1.1 options:tos=inherit \
+                    options:ttl=inherit])
+AT_DATA([flows.txt], [dnl
+actions=output:1
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+dnl Basic
+AT_CHECK([ovs-appctl ofproto/trace br0 
'in_port(10),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0x4,ttl=128,frag=no),tcp(src=8,dst=9)'],
 [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+  [Datapath actions: 
set(ipv4_tunnel(tun_id=0x0,flags=0x0,src=0.0.0.0,dst=1.1.1.1,tos=0x4,ttl=128)),1
+])
+
+dnl ECN
+AT_CHECK([ovs-appctl ofproto/trace br0 
'in_port(10),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0x5,ttl=128,frag=no),tcp(src=8,dst=9)'],
 [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+  [Datapath actions: 
set(ipv4_tunnel(tun_id=0x0,flags=0x0,src=0.0.0.0,dst=1.1.1.1,tos=0x5,ttl=128)),1
+])
+
+dnl non-IP
+AT_CHECK([ovs-appctl ofproto/trace br0 
'in_port(10),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0806),arp(sip=1.2.3.4,tip=5.6.7.8,op=1,sha=00:0f:10:11:12:13,tha=00:14:15:16:17:18)'],
 [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+  [Datapath actions: 
set(ipv4_tunnel(tun_id=0x0,flags=0x0,src=0.0.0.0,dst=1.1.1.1,tos=0x0,ttl=64)),1
+])
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([tunnel - set_tunnel])
+OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=dummy_tunnel \
+                    options:remote_ip=1.1.1.1 options:key=flow])
+AT_DATA([flows.txt], [dnl
+actions=set_tunnel:1,output:1
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+AT_CHECK([ovs-appctl ofproto/trace br0 
'in_port(10),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0x0,ttl=64,frag=no),tcp(src=8,dst=9)'],
 [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+  [Datapath actions: 
set(ipv4_tunnel(tun_id=0x1,flags=0x0,src=0.0.0.0,dst=1.1.1.1,tos=0x0,ttl=64)),1
+])
+OVS_VSWITCHD_STOP
+AT_CLEANUP
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index a1b99f8..a5b15e7 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -1327,19 +1327,11 @@
         system default, typically 64).  Default is the system default TTL.
       </column>
 
-      <column name="options" key="df_inherit" type='{"type": "boolean"}'>
-        Optional.  If enabled, the Don't Fragment bit will be copied from the
-        inner IP headers (those of the encapsulated traffic) to the outer
-        (tunnel) headers.  Default is disabled; set to <code>true</code> to
-        enable.
-      </column>
-
       <column name="options" key="df_default"
               type='{"type": "boolean"}'>
-        Optional.  If enabled, the Don't Fragment bit will be set by default on
-        tunnel headers if the <code>df_inherit</code> option is not set, or if
-        the encapsulated packet is not IP.  Default is enabled; set to
-        <code>false</code> to disable.
+        Optional.  If enabled, the Don't Fragment bit will be set on tunnel
+        outer headers to allow path MTU discovery. Default is enabled; set
+        to <code>false</code> to disable.
       </column>
 
       <column name="options" key="pmtud" type='{"type": "boolean"}'>
-- 
1.7.9.5

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to