The kernel implementation of Geneve options stores the TLV option
data in the flow exactly as received, without any further parsing.
This is then translated to known options for the purposes of matching
on flow setup (which will then install a datapath flow in the form
the kernel is expecting).

The userspace implementation behaves a little bit differently - it
looks up known options as each packet is received. The reason for this
is there is a much tighter coupling between datapath and flow translation
and the representation is generally expected to be the same. This works
but it incurs work on a per-packet basis that could be done per-flow
instead.

This introduces a small translation step for Geneve packets between
datapath and flow lookup for the userspace datapath in order to
allow the same kind of processing that the kernel does.

There is a second benefit to this as well: for some operations it is
preferable to keep the options exactly as they were received on the wire,
which this enables. One example is that for packets that are executed from
ofproto-dpif-upcall to the datapath, this avoids the translation of
Geneve metadata. Since this conversion is potentially lossy (for unknown
options), keeping everything in the same format removes the possibility
of dropping options if the packet comes back up to userspace and the
Geneve option translation table has changed. To help with these types of
operations, most functions can understand both formats of data and seamlessly
do the right thing.

Signed-off-by: Jesse Gross <je...@nicira.com>
---
 lib/automake.mk               |   1 +
 lib/dpif-netdev.c             |  55 ++++++-
 lib/flow.c                    |  48 ++++--
 lib/flow.h                    |  13 +-
 lib/geneve.h                  |  63 ++++++++
 lib/meta-flow.c               |   6 +-
 lib/netdev-vport.c            |  26 ++--
 lib/odp-execute.c             |   2 +-
 lib/odp-util.c                |  58 ++++---
 lib/odp-util.h                |  12 +-
 lib/packets.h                 |  41 +----
 lib/tun-metadata.c            | 352 ++++++++++++++++++++++++++++++------------
 lib/tun-metadata.h            |  74 ++++++---
 ofproto/ofproto-dpif-sflow.c  |   2 +-
 ofproto/ofproto-dpif-upcall.c |   2 +-
 tests/tunnel-push-pop.at      |   2 +-
 16 files changed, 534 insertions(+), 223 deletions(-)
 create mode 100644 lib/geneve.h

diff --git a/lib/automake.mk b/lib/automake.mk
index faca968..5b6e9e8 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -81,6 +81,7 @@ lib_libopenvswitch_la_SOURCES = \
        lib/fatal-signal.h \
        lib/flow.c \
        lib/flow.h \
+       lib/geneve.h \
        lib/guarded-list.c \
        lib/guarded-list.h \
        lib/hash.c \
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index f587df5..c31a7e0 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1884,8 +1884,8 @@ dpif_netdev_mask_from_nlattrs(const struct nlattr *key, 
uint32_t key_len,
     if (mask_key_len) {
         enum odp_key_fitness fitness;
 
-        fitness = odp_flow_key_to_mask(mask_key, mask_key_len, key, key_len,
-                                       &wc->masks, flow);
+        fitness = odp_flow_key_to_mask_udpif(mask_key, mask_key_len, key,
+                                             key_len, &wc->masks, flow);
         if (fitness) {
             /* This should not happen: it indicates that
              * odp_flow_key_from_mask() and odp_flow_key_to_mask()
@@ -1919,7 +1919,7 @@ dpif_netdev_flow_from_nlattrs(const struct nlattr *key, 
uint32_t key_len,
 {
     odp_port_t in_port;
 
-    if (odp_flow_key_to_flow(key, key_len, flow)) {
+    if (odp_flow_key_to_flow_udpif(key, key_len, flow)) {
         /* This should not happen: it indicates that odp_flow_key_from_flow()
          * and odp_flow_key_to_flow() disagree on the acceptable form of a
          * flow.  Log the problem as an error, with enough details to enable
@@ -3014,11 +3014,25 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, 
struct dp_packet *packet_,
                  struct ofpbuf *actions, struct ofpbuf *put_actions)
 {
     struct dp_netdev *dp = pmd->dp;
+    struct flow_tnl orig_tunnel;
+    int err;
 
     if (OVS_UNLIKELY(!dp->upcall_cb)) {
         return ENODEV;
     }
 
+    orig_tunnel.flags = flow->tunnel.flags;
+    if (flow->tunnel.flags & FLOW_TNL_F_UDPIF) {
+        orig_tunnel.metadata.present.len = flow->tunnel.metadata.present.len;
+        memcpy(orig_tunnel.metadata.opts.gnv, flow->tunnel.metadata.opts.gnv,
+               flow->tunnel.metadata.present.len);
+        err = tun_metadata_from_geneve_udpif(&orig_tunnel, &orig_tunnel,
+                                             &flow->tunnel);
+        if (err) {
+            return err;
+        }
+    }
+
     if (OVS_UNLIKELY(!VLOG_DROP_DBG(&upcall_rl))) {
         struct ds ds = DS_EMPTY_INITIALIZER;
         char *packet_str;
@@ -3046,8 +3060,39 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, 
struct dp_packet *packet_,
         ds_destroy(&ds);
     }
 
-    return dp->upcall_cb(packet_, flow, ufid, pmd->core_id, type, userdata,
-                         actions, wc, put_actions, dp->upcall_aux);
+    err = dp->upcall_cb(packet_, flow, ufid, pmd->core_id, type, userdata,
+                        actions, wc, put_actions, dp->upcall_aux);
+    if (err && err != ENOSPC) {
+        return err;
+    }
+
+    if (wc) {
+        if (wc->masks.tunnel.metadata.present.map) {
+            struct geneve_opt opts[GENEVE_TOT_OPT_SIZE /
+                                   sizeof(struct geneve_opt)];
+
+            tun_metadata_to_geneve_udpif_mask(&flow->tunnel,
+                                              &wc->masks.tunnel,
+                                              orig_tunnel.metadata.opts.gnv,
+                                              orig_tunnel.metadata.present.len,
+                                              opts);
+
+            memset(&wc->masks.tunnel.metadata, 0,
+                   sizeof wc->masks.tunnel.metadata);
+            memcpy(&wc->masks.tunnel.metadata.opts.gnv, opts,
+                   orig_tunnel.metadata.present.len);
+        }
+        wc->masks.tunnel.metadata.present.len = 0xff;
+    }
+
+    if (orig_tunnel.flags & FLOW_TNL_F_UDPIF) {
+        memcpy(&flow->tunnel.metadata.opts.gnv, orig_tunnel.metadata.opts.gnv,
+               orig_tunnel.metadata.present.len);
+        flow->tunnel.metadata.present.len = orig_tunnel.metadata.present.len;
+        flow->tunnel.flags |= FLOW_TNL_F_UDPIF;
+    }
+
+    return err;
 }
 
 static inline uint32_t
diff --git a/lib/flow.c b/lib/flow.c
index 352e9b8..d3d25e4 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -462,9 +462,22 @@ miniflow_extract(struct dp_packet *packet, struct miniflow 
*dst)
         miniflow_push_words(mf, tunnel, &md->tunnel,
                             offsetof(struct flow_tnl, metadata) /
                             sizeof(uint64_t));
-        if (md->tunnel.metadata.opt_map) {
-            miniflow_push_words(mf, tunnel.metadata, &md->tunnel.metadata,
-                                 sizeof md->tunnel.metadata / 
sizeof(uint64_t));
+
+        if (!(md->tunnel.flags & FLOW_TNL_F_UDPIF)) {
+            if (md->tunnel.metadata.present.map) {
+                miniflow_push_words(mf, tunnel.metadata, &md->tunnel.metadata,
+                                    sizeof md->tunnel.metadata /
+                                    sizeof(uint64_t));
+            }
+        } else {
+            if (md->tunnel.metadata.present.len) {
+                miniflow_push_words(mf, tunnel.metadata.present,
+                                    &md->tunnel.metadata.present, 1);
+                miniflow_push_words(mf, tunnel.metadata.opts.gnv,
+                                    md->tunnel.metadata.opts.gnv,
+                                    
DIV_ROUND_UP(md->tunnel.metadata.present.len,
+                                    sizeof(uint64_t)));
+            }
         }
     }
     if (md->skb_priority || md->pkt_mark) {
@@ -815,7 +828,7 @@ flow_get_metadata(const struct flow *flow, struct match 
*flow_metadata)
     if (flow->tunnel.gbp_flags) {
         match_set_tun_gbp_flags(flow_metadata, flow->tunnel.gbp_flags);
     }
-    tun_metadata_get_fmd(&flow->tunnel.metadata, flow_metadata);
+    tun_metadata_get_fmd(&flow->tunnel, flow_metadata);
     if (flow->metadata != htonll(0)) {
         match_set_metadata(flow_metadata, flow->metadata);
     }
@@ -1161,9 +1174,16 @@ void flow_wildcards_init_for_packet(struct 
flow_wildcards *wc,
         WC_MASK_FIELD(wc, tunnel.gbp_id);
         WC_MASK_FIELD(wc, tunnel.gbp_flags);
 
-        if (flow->tunnel.metadata.opt_map) {
-            wc->masks.tunnel.metadata.opt_map = flow->tunnel.metadata.opt_map;
-            WC_MASK_FIELD(wc, tunnel.metadata.opts);
+        if (!(flow->tunnel.flags & FLOW_TNL_F_UDPIF)) {
+            if (flow->tunnel.metadata.present.map) {
+                wc->masks.tunnel.metadata.present.map =
+                                              
flow->tunnel.metadata.present.map;
+                WC_MASK_FIELD(wc, tunnel.metadata.opts.u8);
+            }
+        } else {
+            WC_MASK_FIELD(wc, tunnel.metadata.present.len);
+            memset(wc->masks.tunnel.metadata.opts.gnv, 0xff,
+                   flow->tunnel.metadata.present.len);
         }
     } else if (flow->tunnel.tun_id) {
         WC_MASK_FIELD(wc, tunnel.tun_id);
@@ -1253,9 +1273,17 @@ flow_wc_map(const struct flow *flow, struct miniflow 
*map)
 
     map->tnl_map = 0;
     if (flow->tunnel.ip_dst) {
-        map->tnl_map = MINIFLOW_TNL_MAP(tunnel);
-        if (!flow->tunnel.metadata.opt_map) {
-            map->tnl_map &= ~MINIFLOW_TNL_MAP(tunnel.metadata);
+        map->tnl_map |= MINIFLOW_TNL_MAP__(tunnel,
+                                          offsetof(struct flow_tnl, metadata));
+        if (!(flow->tunnel.flags & FLOW_TNL_F_UDPIF)) {
+            if (flow->tunnel.metadata.present.map) {
+                map->tnl_map |= MINIFLOW_TNL_MAP__(tunnel.metadata,
+                                                 
sizeof(flow->tunnel.metadata));
+            }
+        } else {
+            map->tnl_map |= MINIFLOW_TNL_MAP(tunnel.metadata.present.len);
+            map->tnl_map |= MINIFLOW_TNL_MAP__(tunnel.metadata.opts.gnv,
+                                             
flow->tunnel.metadata.present.len);
         }
     }
 
diff --git a/lib/flow.h b/lib/flow.h
index 96aa4aa..5bc9267 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -80,6 +80,12 @@ BUILD_ASSERT_DECL(FLOW_TNL_F_OAM == NX_TUN_FLAG_OAM);
 
 #define FLOW_TNL_F_MASK ((1 << 4) - 1)
 
+/* Purely internal to OVS userspace. These flags should never be exposed to
+ * the outside world and so aren't included in the flags mask. */
+
+/* Tunnel information is in userspace datapath format. */
+#define FLOW_TNL_F_UDPIF (1 << 4)
+
 const char *flow_tun_flag_to_string(uint32_t flags);
 
 /* Maximum number of supported MPLS labels. */
@@ -518,9 +524,12 @@ flow_values_get_next_in_maps(struct 
flow_for_each_in_maps_aux *aux,
 #define FLOW_U64_SIZE(FIELD)                                            \
     DIV_ROUND_UP(sizeof(((struct flow *)0)->FIELD), sizeof(uint64_t))
 
-#define MINIFLOW_TNL_MAP(FIELD)                                         \
-    (((UINT64_C(1) << FLOW_U64_SIZE(FIELD)) - 1)                        \
+#define MINIFLOW_TNL_MAP__(FIELD, LEN)                                  \
+    (((UINT64_C(1) << DIV_ROUND_UP(LEN, sizeof(uint64_t))) - 1)         \
      << (offsetof(struct flow, FIELD) / sizeof(uint64_t)))
+
+#define MINIFLOW_TNL_MAP(FIELD)                                         \
+    MINIFLOW_TNL_MAP__(FIELD, sizeof(((struct flow *)0)->FIELD))
 #define MINIFLOW_PKT_MAP(FIELD)                                         \
     (((UINT64_C(1) << FLOW_U64_SIZE(FIELD)) - 1)                        \
      << ((offsetof(struct flow, FIELD) / sizeof(uint64_t)) - FLOW_TNL_U64S))
diff --git a/lib/geneve.h b/lib/geneve.h
new file mode 100644
index 0000000..f0256b1
--- /dev/null
+++ b/lib/geneve.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef GENEVE_H
+#define GENEVE_H 1
+
+#include "openvswitch/types.h"
+
+#define GENEVE_MAX_OPT_SIZE 124
+#define GENEVE_TOT_OPT_SIZE 252
+
+#define GENEVE_CRIT_OPT_TYPE (1 << 7)
+
+struct geneve_opt {
+    ovs_be16  opt_class;
+    uint8_t   type;
+#ifdef WORDS_BIGENDIAN
+    uint8_t   r1:1;
+    uint8_t   r2:1;
+    uint8_t   r3:1;
+    uint8_t   length:5;
+#else
+    uint8_t   length:5;
+    uint8_t   r3:1;
+    uint8_t   r2:1;
+    uint8_t   r1:1;
+#endif
+    /* Option data */
+};
+
+struct genevehdr {
+#ifdef WORDS_BIGENDIAN
+    uint8_t ver:2;
+    uint8_t opt_len:6;
+    uint8_t oam:1;
+    uint8_t critical:1;
+    uint8_t rsvd1:6;
+#else
+    uint8_t opt_len:6;
+    uint8_t ver:2;
+    uint8_t rsvd1:6;
+    uint8_t critical:1;
+    uint8_t oam:1;
+#endif
+    ovs_be16 proto_type;
+    ovs_16aligned_be32 vni;
+    struct geneve_opt options[];
+};
+
+#endif /* geneve.h */
diff --git a/lib/meta-flow.c b/lib/meta-flow.c
index 0c01414..4c7cf2c 100644
--- a/lib/meta-flow.c
+++ b/lib/meta-flow.c
@@ -196,7 +196,7 @@ mf_is_all_wild(const struct mf_field *mf, const struct 
flow_wildcards *wc)
     CASE_MFF_TUN_METADATA: {
         union mf_value value;
 
-        tun_metadata_read(&wc->masks.tunnel.metadata, mf, &value);
+        tun_metadata_read(&wc->masks.tunnel, mf, &value);
         return is_all_zeros(&value.tun_metadata, mf->n_bytes);
     }
     case MFF_METADATA:
@@ -616,7 +616,7 @@ mf_get_value(const struct mf_field *mf, const struct flow 
*flow,
         value->u8 = flow->tunnel.ip_tos;
         break;
     CASE_MFF_TUN_METADATA:
-        tun_metadata_read(&flow->tunnel.metadata, mf, value);
+        tun_metadata_read(&flow->tunnel, mf, value);
         break;
 
     case MFF_METADATA:
@@ -1119,7 +1119,7 @@ mf_set_flow_value(const struct mf_field *mf,
         flow->tunnel.ip_ttl = value->u8;
         break;
     CASE_MFF_TUN_METADATA:
-        tun_metadata_write(&flow->tunnel.metadata, mf, value);
+        tun_metadata_write(&flow->tunnel, mf, value);
         break;
     case MFF_METADATA:
         flow->metadata = value->be64;
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index a3394dd..a0e53b8 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -1054,11 +1054,10 @@ parse_gre_header(struct dp_packet *packet,
 static void
 pkt_metadata_init_tnl(struct pkt_metadata *md)
 {
-    memset(md, 0, offsetof(struct pkt_metadata, tunnel.metadata));
-
-    /* If 'opt_map' is zero then none of the rest of the tunnel metadata
-     * will be read, so we can skip clearing it. */
-    md->tunnel.metadata.opt_map = 0;
+    /* Zero up through the tunnel metadata options. The length and table
+     * are before this and as long as they are empty, the options won't
+     * be looked at. */
+    memset(md, 0, offsetof(struct pkt_metadata, tunnel.metadata.opts));
 }
 
 static int
@@ -1208,8 +1207,7 @@ netdev_geneve_pop_header(struct dp_packet *packet)
     struct pkt_metadata *md = &packet->md;
     struct flow_tnl *tnl = &md->tunnel;
     struct genevehdr *gnh;
-    unsigned int hlen;
-    int err;
+    unsigned int hlen, opts_len;
 
     pkt_metadata_init_tnl(md);
     if (GENEVE_BASE_HLEN > dp_packet_size(packet)) {
@@ -1223,7 +1221,8 @@ netdev_geneve_pop_header(struct dp_packet *packet)
         return EINVAL;
     }
 
-    hlen = GENEVE_BASE_HLEN + gnh->opt_len * 4;
+    opts_len = gnh->opt_len * 4;
+    hlen = GENEVE_BASE_HLEN + opts_len;
     if (hlen > dp_packet_size(packet)) {
         VLOG_WARN_RL(&err_rl, "geneve packet too small: header len=%u packet 
size=%u\n",
                      hlen, dp_packet_size(packet));
@@ -1245,12 +1244,9 @@ netdev_geneve_pop_header(struct dp_packet *packet)
     tnl->tun_id = htonll(ntohl(get_16aligned_be32(&gnh->vni)) >> 8);
     tnl->flags |= FLOW_TNL_F_KEY;
 
-    err = tun_metadata_from_geneve_header(gnh->options, gnh->opt_len * 4,
-                                          &tnl->metadata);
-    if (err) {
-        VLOG_WARN_RL(&err_rl, "invalid geneve options");
-        return err;
-    }
+    memcpy(tnl->metadata.opts.gnv, gnh->options, opts_len);
+    tnl->metadata.present.len = opts_len;
+    tnl->flags |= FLOW_TNL_F_UDPIF;
 
     dp_packet_reset_packet(packet, hlen);
 
@@ -1278,7 +1274,7 @@ netdev_geneve_build_header(const struct netdev *netdev,
 
     ovs_mutex_unlock(&dev->mutex);
 
-    opt_len = tun_metadata_to_geneve_header(&tnl_flow->tunnel.metadata,
+    opt_len = tun_metadata_to_geneve_header(&tnl_flow->tunnel,
                                             gnh->options, &crit_opt);
 
     gnh->opt_len = opt_len / 4;
diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index c676451..c4806e1 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -151,7 +151,7 @@ odp_set_tunnel_action(const struct nlattr *a, struct 
flow_tnl *tun_key)
 {
     enum odp_key_fitness fitness;
 
-    fitness = odp_tun_key_from_attr(a, tun_key);
+    fitness = odp_tun_key_from_attr(a, true, tun_key);
     ovs_assert(fitness != ODP_FIT_ERROR);
 }
 
diff --git a/lib/odp-util.c b/lib/odp-util.c
index eec0bfb..f142f03 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -1264,7 +1264,8 @@ ovs_frag_type_to_string(enum ovs_frag_type type)
 static enum odp_key_fitness
 odp_tun_key_from_attr__(const struct nlattr *attr,
                         const struct nlattr *flow_attrs, size_t flow_attr_len,
-                        const struct flow_tnl *src_tun, struct flow_tnl *tun)
+                        const struct flow_tnl *src_tun, struct flow_tnl *tun,
+                        bool udpif)
 {
     unsigned int left;
     const struct nlattr *a;
@@ -1335,8 +1336,7 @@ odp_tun_key_from_attr__(const struct nlattr *attr,
         }
         case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
             if (tun_metadata_from_geneve_nlattr(a, flow_attrs, flow_attr_len,
-                                                &src_tun->metadata,
-                                                &tun->metadata)) {
+                                                src_tun, udpif, tun)) {
                 return ODP_FIT_ERROR;
             }
             break;
@@ -1359,10 +1359,11 @@ odp_tun_key_from_attr__(const struct nlattr *attr,
 }
 
 enum odp_key_fitness
-odp_tun_key_from_attr(const struct nlattr *attr, struct flow_tnl *tun)
+odp_tun_key_from_attr(const struct nlattr *attr, bool udpif,
+                      struct flow_tnl *tun)
 {
     memset(tun, 0, sizeof *tun);
-    return odp_tun_key_from_attr__(attr, NULL, 0, NULL, tun);
+    return odp_tun_key_from_attr__(attr, NULL, 0, NULL, tun, udpif);
 }
 
 static void
@@ -1411,13 +1412,7 @@ tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl 
*tun_key,
                        (tun_key->gbp_flags << 16) | ntohs(tun_key->gbp_id));
         nl_msg_end_nested(a, vxlan_opts_ofs);
     }
-
-    if (tun_key == tun_flow_key) {
-        tun_metadata_to_geneve_nlattr_flow(&tun_key->metadata, a);
-    } else {
-        tun_metadata_to_geneve_nlattr_mask(key_buf, &tun_key->metadata,
-                                           &tun_flow_key->metadata, a);
-    }
+    tun_metadata_to_geneve_nlattr(tun_key, tun_flow_key, key_buf, a);
 
     nl_msg_end_nested(a, tun_key_ofs);
 }
@@ -3597,7 +3592,7 @@ odp_key_to_pkt_metadata(const struct nlattr *key, size_t 
key_len,
         case OVS_KEY_ATTR_TUNNEL: {
             enum odp_key_fitness res;
 
-            res = odp_tun_key_from_attr(nla, &md->tunnel);
+            res = odp_tun_key_from_attr(nla, true, &md->tunnel);
             if (res == ODP_FIT_ERROR) {
                 memset(&md->tunnel, 0, sizeof md->tunnel);
             } else if (res == ODP_FIT_PERFECT) {
@@ -4107,7 +4102,8 @@ parse_8021q_onward(const struct nlattr 
*attrs[OVS_KEY_ATTR_MAX + 1],
 static enum odp_key_fitness
 odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len,
                        const struct nlattr *src_key, size_t src_key_len,
-                       struct flow *flow, const struct flow *src_flow)
+                       struct flow *flow, const struct flow *src_flow,
+                       bool udpif)
 {
     const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1];
     uint64_t expected_attrs;
@@ -4150,9 +4146,10 @@ odp_flow_key_to_flow__(const struct nlattr *key, size_t 
key_len,
     if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_TUNNEL)) {
         enum odp_key_fitness res;
 
-        res = odp_tun_key_from_attr__(attrs[OVS_KEY_ATTR_TUNNEL], src_key,
+        res = odp_tun_key_from_attr__(attrs[OVS_KEY_ATTR_TUNNEL],
+                                      is_mask ? src_key : NULL,
                                       src_key_len, &src_flow->tunnel,
-                                      &flow->tunnel);
+                                      &flow->tunnel, udpif);
         if (res == ODP_FIT_ERROR) {
             return ODP_FIT_ERROR;
         } else if (res == ODP_FIT_PERFECT) {
@@ -4224,7 +4221,7 @@ enum odp_key_fitness
 odp_flow_key_to_flow(const struct nlattr *key, size_t key_len,
                      struct flow *flow)
 {
-   return odp_flow_key_to_flow__(key, key_len, NULL, 0, flow, flow);
+   return odp_flow_key_to_flow__(key, key_len, NULL, 0, flow, flow, false);
 }
 
 /* Converts the 'mask_key_len' bytes of OVS_KEY_ATTR_* attributes in 'mask_key'
@@ -4238,7 +4235,32 @@ odp_flow_key_to_mask(const struct nlattr *mask_key, 
size_t mask_key_len,
                      struct flow *mask, const struct flow *flow)
 {
    return odp_flow_key_to_flow__(mask_key, mask_key_len, flow_key, 
flow_key_len,
-                                 mask, flow);
+                                 mask, flow, false);
+}
+
+/* These functions are similar to their non-"_udpif" variants but output a
+ * 'flow' that is suitable for fast-path packet processing.
+ *
+ * Some fields have different representation for flow setup and per-
+ * packet processing (i.e. different between ofproto-dpif and userspace
+ * datapath). In particular, with the non-"_udpif" functions, struct
+ * tun_metadata is in the per-flow format (using 'present.map' and 'opts.u8');
+ * with these functions, struct tun_metadata is in the per-packet format
+ * (using 'present.len' and 'opts.gnv'). */
+enum odp_key_fitness
+odp_flow_key_to_flow_udpif(const struct nlattr *key, size_t key_len,
+                           struct flow *flow)
+{
+   return odp_flow_key_to_flow__(key, key_len, NULL, 0, flow, flow, true);
+}
+
+enum odp_key_fitness
+odp_flow_key_to_mask_udpif(const struct nlattr *mask_key, size_t mask_key_len,
+                           const struct nlattr *flow_key, size_t flow_key_len,
+                           struct flow *mask, const struct flow *flow)
+{
+   return odp_flow_key_to_flow__(mask_key, mask_key_len, flow_key, 
flow_key_len,
+                                 mask, flow, true);
 }
 
 /* Returns 'fitness' as a string, for use in debug messages. */
diff --git a/lib/odp-util.h b/lib/odp-util.h
index 1eaa06b..bc27794 100644
--- a/lib/odp-util.h
+++ b/lib/odp-util.h
@@ -144,7 +144,7 @@ struct odputil_keybuf {
     uint32_t keybuf[DIV_ROUND_UP(ODPUTIL_FLOW_KEY_BYTES, 4)];
 };
 
-enum odp_key_fitness odp_tun_key_from_attr(const struct nlattr *,
+enum odp_key_fitness odp_tun_key_from_attr(const struct nlattr *, bool udpif,
                                            struct flow_tnl *);
 
 int odp_ufid_from_string(const char *s_, ovs_u128 *ufid);
@@ -225,6 +225,16 @@ enum odp_key_fitness odp_flow_key_to_mask(const struct 
nlattr *mask_key,
                                           size_t flow_key_len,
                                           struct flow *mask,
                                           const struct flow *flow);
+
+enum odp_key_fitness odp_flow_key_to_flow_udpif(const struct nlattr *, size_t,
+                                                struct flow *);
+enum odp_key_fitness odp_flow_key_to_mask_udpif(const struct nlattr *mask_key,
+                                                size_t mask_key_len,
+                                                const struct nlattr *flow_key,
+                                                size_t flow_key_len,
+                                                struct flow *mask,
+                                                const struct flow *flow);
+
 const char *odp_key_fitness_to_string(enum odp_key_fitness);
 
 void commit_odp_tunnel_action(const struct flow *, struct flow *base,
diff --git a/lib/packets.h b/lib/packets.h
index c709af5..38af37b 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -23,6 +23,7 @@
 #include <stdint.h>
 #include <string.h>
 #include "compiler.h"
+#include "geneve.h"
 #include "openvswitch/types.h"
 #include "random.h"
 #include "hash.h"
@@ -802,46 +803,6 @@ static inline bool dl_type_is_ip_any(ovs_be16 dl_type)
 }
 
 /* Tunnel header */
-#define GENEVE_MAX_OPT_SIZE 124
-#define GENEVE_TOT_OPT_SIZE 252
-
-#define GENEVE_CRIT_OPT_TYPE (1 << 7)
-
-struct geneve_opt {
-    ovs_be16  opt_class;
-    uint8_t   type;
-#ifdef WORDS_BIGENDIAN
-    uint8_t   r1:1;
-    uint8_t   r2:1;
-    uint8_t   r3:1;
-    uint8_t   length:5;
-#else
-    uint8_t   length:5;
-    uint8_t   r3:1;
-    uint8_t   r2:1;
-    uint8_t   r1:1;
-#endif
-    /* Option data */
-};
-
-struct genevehdr {
-#ifdef WORDS_BIGENDIAN
-    uint8_t ver:2;
-    uint8_t opt_len:6;
-    uint8_t oam:1;
-    uint8_t critical:1;
-    uint8_t rsvd1:6;
-#else
-    uint8_t opt_len:6;
-    uint8_t ver:2;
-    uint8_t rsvd1:6;
-    uint8_t critical:1;
-    uint8_t oam:1;
-#endif
-    ovs_be16 proto_type;
-    ovs_16aligned_be32 vni;
-    struct geneve_opt options[];
-};
 
 /* GRE protocol header */
 struct gre_base_hdr {
diff --git a/lib/tun-metadata.c b/lib/tun-metadata.c
index 7d82fb7..216d5e4 100644
--- a/lib/tun-metadata.c
+++ b/lib/tun-metadata.c
@@ -226,7 +226,7 @@ tun_metadata_table_request(struct 
ofputil_geneve_table_reply *gtr)
     }
 }
 
-/* Copies the value of field 'mf' from 'metadata' into 'value'.
+/* Copies the value of field 'mf' from 'tnl' (which must be in non-UDPIF 
format) * into 'value'.
  *
  * 'mf' must be an MFF_TUN_METADATA* field.
  *
@@ -234,7 +234,7 @@ tun_metadata_table_request(struct 
ofputil_geneve_table_reply *gtr)
  * tun_metadata_init().  If no such table has been created or if 'mf' hasn't
  * been allocated in it yet, this just zeros 'value'. */
 void
-tun_metadata_read(const struct tun_metadata *metadata,
+tun_metadata_read(const struct flow_tnl *tnl,
                   const struct mf_field *mf, union mf_value *value)
 {
     struct tun_table *map = ovsrcu_get(struct tun_table *, &metadata_tab);
@@ -250,10 +250,10 @@ tun_metadata_read(const struct tun_metadata *metadata,
 
     memset(value->tun_metadata, 0, mf->n_bytes - loc->len);
     memcpy_from_metadata(value->tun_metadata + mf->n_bytes - loc->len,
-                         metadata, loc);
+                         &tnl->metadata, loc);
 }
 
-/* Copies 'value' into field 'mf' in 'metadata'.
+/* Copies 'value' into field 'mf' in 'tnl' (in non-UDPIF format).
  *
  * 'mf' must be an MFF_TUN_METADATA* field.
  *
@@ -261,7 +261,7 @@ tun_metadata_read(const struct tun_metadata *metadata,
  * tun_metadata_init().  If no such table has been created or if 'mf' hasn't
  * been allocated in it yet, this function does nothing. */
 void
-tun_metadata_write(struct tun_metadata *metadata,
+tun_metadata_write(struct flow_tnl *tnl,
                    const struct mf_field *mf, const union mf_value *value)
 {
     struct tun_table *map = ovsrcu_get(struct tun_table *, &metadata_tab);
@@ -274,9 +274,9 @@ tun_metadata_write(struct tun_metadata *metadata,
 
     loc = &map->entries[idx].loc;
 
-    ULLONG_SET1(metadata->opt_map, idx);
-    memcpy_to_metadata(metadata, value->tun_metadata + mf->n_bytes - loc->len,
-                       loc);
+    ULLONG_SET1(tnl->metadata.present.map, idx);
+    memcpy_to_metadata(&tnl->metadata,
+                       value->tun_metadata + mf->n_bytes - loc->len, loc);
 }
 
 static const struct tun_metadata_loc *
@@ -310,7 +310,7 @@ metadata_loc_from_match(struct tun_table *map, struct match 
*match,
 
 /* Makes 'match' match 'value'/'mask' on field 'mf'.
  *
- * 'mf' must be an MFF_TUN_METADATA* field.
+ * 'mf' must be an MFF_TUN_METADATA* field. 'match' must be in non-UDPIF 
format.
  *
  * If there is global tunnel metadata matching table, this function is
  * effective only if there is already a mapping for 'mf'.  Otherwise, the
@@ -334,6 +334,8 @@ tun_metadata_set_match(const struct mf_field *mf, const 
union mf_value *value,
     unsigned int data_offset;
     union mf_value data;
 
+    ovs_assert(!(match->flow.tunnel.flags & FLOW_TNL_F_UDPIF));
+
     field_len = mf_field_len(mf, value, mask);
     loc = metadata_loc_from_match(map, match, idx, field_len);
     if (!loc) {
@@ -353,7 +355,7 @@ tun_metadata_set_match(const struct mf_field *mf, const 
union mf_value *value,
                                    mask->tun_metadata[data_offset + i];
         }
     }
-    ULLONG_SET1(match->flow.tunnel.metadata.opt_map, idx);
+    ULLONG_SET1(match->flow.tunnel.metadata.present.map, idx);
     memcpy_to_metadata(&match->flow.tunnel.metadata, data.tun_metadata, loc);
 
     if (!value) {
@@ -363,31 +365,67 @@ tun_metadata_set_match(const struct mf_field *mf, const 
union mf_value *value,
     } else {
         memcpy(data.tun_metadata, mask->tun_metadata + data_offset, loc->len);
     }
-    ULLONG_SET1(match->wc.masks.tunnel.metadata.opt_map, idx);
+    ULLONG_SET1(match->wc.masks.tunnel.metadata.present.map, idx);
     memcpy_to_metadata(&match->wc.masks.tunnel.metadata, data.tun_metadata, 
loc);
 }
 
-/* Copies all MFF_TUN_METADATA* fields from 'metadata' to 'flow_metadata'. */
+static bool
+udpif_to_parsed(const struct flow_tnl *flow, const struct flow_tnl *mask,
+                struct flow_tnl *flow_xlate, struct flow_tnl *mask_xlate)
+{
+    if (flow->flags & FLOW_TNL_F_UDPIF) {
+        int err;
+
+        err = tun_metadata_from_geneve_udpif(flow, flow, flow_xlate);
+        if (err) {
+            return false;
+        }
+
+        if (mask) {
+            tun_metadata_from_geneve_udpif(flow, mask, mask_xlate);
+            if (err) {
+                return false;
+            }
+        }
+    } else {
+        if (flow->metadata.present.map == 0) {
+            /* There is no tunnel metadata, don't bother copying. */
+            return false;
+        }
+
+        memcpy(flow_xlate, flow, sizeof *flow_xlate);
+        if (mask) {
+            memcpy(mask_xlate, mask, sizeof *mask_xlate);
+        }
+
+        if (!flow_xlate->metadata.tab) {
+            flow_xlate->metadata.tab = ovsrcu_get(struct tun_table *,
+                                                  &metadata_tab);
+        }
+    }
+
+    return true;
+}
+
+/* Copies all MFF_TUN_METADATA* fields from 'tnl' to 'flow_metadata'. */
 void
-tun_metadata_get_fmd(const struct tun_metadata *metadata,
-                     struct match *flow_metadata)
+tun_metadata_get_fmd(const struct flow_tnl *tnl, struct match *flow_metadata)
 {
-    struct tun_table *map;
+    struct flow_tnl flow;
     int i;
 
-    map = metadata->tab;
-    if (!map) {
-        map = ovsrcu_get(struct tun_table *, &metadata_tab);
+    if (!udpif_to_parsed(tnl, NULL, &flow, NULL)) {
+        return;
     }
 
-    ULLONG_FOR_EACH_1 (i, metadata->opt_map) {
+    ULLONG_FOR_EACH_1 (i, flow.metadata.present.map) {
         union mf_value opts;
-        const struct tun_metadata_loc *old_loc = &map->entries[i].loc;
+        const struct tun_metadata_loc *old_loc = 
&flow.metadata.tab->entries[i].loc;
         const struct tun_metadata_loc *new_loc;
 
         new_loc = metadata_loc_from_match(NULL, flow_metadata, i, 
old_loc->len);
 
-        memcpy_from_metadata(opts.tun_metadata, metadata, old_loc);
+        memcpy_from_metadata(opts.tun_metadata, &flow.metadata, old_loc);
         memcpy_to_metadata(&flow_metadata->flow.tunnel.metadata,
                            opts.tun_metadata, new_loc);
 
@@ -424,7 +462,7 @@ memcpy_to_metadata(struct tun_metadata *dst, const void 
*src,
     int addr = 0;
 
     while (chain) {
-        memcpy(dst->opts + loc->c.offset + addr, (uint8_t *)src + addr,
+        memcpy(dst->opts.u8 + loc->c.offset + addr, (uint8_t *)src + addr,
                chain->len);
         addr += chain->len;
         chain = chain->next;
@@ -439,7 +477,7 @@ memcpy_from_metadata(void *dst, const struct tun_metadata 
*src,
     int addr = 0;
 
     while (chain) {
-        memcpy((uint8_t *)dst + addr, src->opts + loc->c.offset + addr,
+        memcpy((uint8_t *)dst + addr, src->opts.u8 + loc->c.offset + addr,
                chain->len);
         addr += chain->len;
         chain = chain->next;
@@ -579,10 +617,21 @@ tun_metadata_del_entry(struct tun_table *map, uint8_t idx)
 }
 
 static int
-tun_metadata_from_geneve__(struct tun_table *map, const struct geneve_opt *opt,
+tun_metadata_from_geneve__(const struct tun_metadata *flow_metadata,
+                           const struct geneve_opt *opt,
                            const struct geneve_opt *flow_opt, int opts_len,
                            struct tun_metadata *metadata)
 {
+    struct tun_table *map;
+    bool is_mask = flow_opt != opt;
+
+    if (!is_mask) {
+        map = ovsrcu_get(struct tun_table *, &metadata_tab);
+        metadata->tab = map;
+    } else {
+        map = flow_metadata->tab;
+    }
+
     if (!map) {
         return 0;
     }
@@ -606,7 +655,7 @@ tun_metadata_from_geneve__(struct tun_table *map, const 
struct geneve_opt *opt,
         if (entry) {
             if (entry->loc.len == flow_opt->length * 4) {
                 memcpy_to_metadata(metadata, opt + 1, &entry->loc);
-                ULLONG_SET1(metadata->opt_map, entry - map->entries);
+                ULLONG_SET1(metadata->present.map, entry - map->entries);
             } else {
                 return EINVAL;
             }
@@ -622,59 +671,97 @@ tun_metadata_from_geneve__(struct tun_table *map, const 
struct geneve_opt *opt,
     return 0;
 }
 
+static const struct nlattr *
+tun_metadata_find_geneve_key(const struct nlattr *key, uint32_t key_len)
+{
+    const struct nlattr *tnl_key;
+
+    tnl_key = nl_attr_find__(key, key_len, OVS_KEY_ATTR_TUNNEL);
+    if (!tnl_key) {
+        return NULL;
+    }
+
+    return nl_attr_find_nested(tnl_key, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS);
+}
+
+/* Converts from Geneve netlink attributes in 'attr' to tunnel metadata
+ * in 'tun'. The result may either in be UDPIF format or not, as determined
+ * by 'udpif'.
+ *
+ * In the event that a mask is being converted, it is also necessary to
+ * pass in flow information. This includes the full set of netlink attributes
+ * (i.e. not just the Geneve attribute) in 'flow_attrs'/'flow_attr_len' and
+ * the previously converted tunnel metadata 'flow_tun'.
+ *
+ * If a flow rather than mask is being converted, 'flow_attrs' must be NULL. */
 int
 tun_metadata_from_geneve_nlattr(const struct nlattr *attr,
                                 const struct nlattr *flow_attrs,
                                 size_t flow_attr_len,
-                                const struct tun_metadata *flow_metadata,
-                                struct tun_metadata *metadata)
+                                const struct flow_tnl *flow_tun, bool udpif,
+                                struct flow_tnl *tun)
 {
-    struct tun_table *map;
     bool is_mask = !!flow_attrs;
+    int attr_len = nl_attr_get_size(attr);
     const struct nlattr *flow;
 
-    if (is_mask) {
-        const struct nlattr *tnl_key;
-        int mask_len = nl_attr_get_size(attr);
+    /* No need for real translation, just copy things over. */
+    if (udpif) {
+        memcpy(tun->metadata.opts.gnv, nl_attr_get(attr), attr_len);
 
-        tnl_key = nl_attr_find__(flow_attrs, flow_attr_len, 
OVS_KEY_ATTR_TUNNEL);
-        if (!tnl_key) {
-            return mask_len ? EINVAL : 0;
+        if (!is_mask) {
+            tun->metadata.present.len = attr_len;
+            tun->flags |= FLOW_TNL_F_UDPIF;
+        } else {
+            /* We need to exact match on the length so we don't
+             * accidentally match on sets of options that are the same
+             * at the beginning but with additional options after. */
+            tun->metadata.present.len = 0xff;
         }
 
-        flow = nl_attr_find_nested(tnl_key, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS);
+        return 0;
+    }
+
+    if (is_mask) {
+        flow = tun_metadata_find_geneve_key(flow_attrs, flow_attr_len);
         if (!flow) {
-            return mask_len ? EINVAL : 0;
+            return attr_len ? EINVAL : 0;
         }
 
-        if (mask_len != nl_attr_get_size(flow)) {
+        if (attr_len != nl_attr_get_size(flow)) {
             return EINVAL;
         }
     } else {
         flow = attr;
     }
 
-    if (!is_mask) {
-        map = ovsrcu_get(struct tun_table *, &metadata_tab);
-        metadata->tab = map;
-    } else {
-        map = flow_metadata->tab;
-    }
-
-    return tun_metadata_from_geneve__(map, nl_attr_get(attr), 
nl_attr_get(flow),
-                                      nl_attr_get_size(flow), metadata);
+    return tun_metadata_from_geneve__(&flow_tun->metadata, nl_attr_get(attr),
+                                      nl_attr_get(flow), 
nl_attr_get_size(flow),
+                                      &tun->metadata);
 }
 
+/* Converts from the flat Geneve options representation extracted directly
+ * from the tunnel header to the representation that maps options to
+ * pre-allocated locations. The original version (in UDPIF form) is passed
+ * in 'src' and the translated form in stored in 'dst'.  To handle masks, the
+ * flow must also be passed in through 'flow' (in the original, raw form). */
 int
-tun_metadata_from_geneve_header(const struct geneve_opt *opts, int opt_len,
-                                struct tun_metadata *metadata)
+tun_metadata_from_geneve_udpif(const struct flow_tnl *flow,
+                               const struct flow_tnl *src,
+                               struct flow_tnl *dst)
 {
-    struct tun_table *map;
-
-    map = ovsrcu_get(struct tun_table *, &metadata_tab);
-    metadata->tab = map;
+    ovs_assert(flow->flags & FLOW_TNL_F_UDPIF);
 
-    return tun_metadata_from_geneve__(map, opts, opts, opt_len, metadata);
+    if (flow == src) {
+        dst->flags = flow->flags & ~FLOW_TNL_F_UDPIF;
+    } else {
+        dst->metadata.tab = NULL;
+    }
+    dst->metadata.present.map = 0;
+    return tun_metadata_from_geneve__(&flow->metadata, src->metadata.opts.gnv,
+                                      flow->metadata.opts.gnv,
+                                      flow->metadata.present.len,
+                                      &dst->metadata);
 }
 
 static void
@@ -691,7 +778,7 @@ tun_metadata_to_geneve__(const struct tun_metadata *flow, 
struct ofpbuf *b,
 
     *crit_opt = false;
 
-    ULLONG_FOR_EACH_1 (i, flow->opt_map) {
+    ULLONG_FOR_EACH_1 (i, flow->present.map) {
         struct tun_meta_entry *entry = &map->entries[i];
         struct geneve_opt *opt;
 
@@ -709,14 +796,14 @@ tun_metadata_to_geneve__(const struct tun_metadata *flow, 
struct ofpbuf *b,
     }
 }
 
-void
-tun_metadata_to_geneve_nlattr_flow(const struct tun_metadata *flow,
+static void
+tun_metadata_to_geneve_nlattr_flow(const struct flow_tnl *flow,
                                    struct ofpbuf *b)
 {
     size_t nlattr_offset;
     bool crit_opt;
 
-    if (!flow->opt_map) {
+    if (!flow->metadata.present.map) {
         return;
     }
 
@@ -725,58 +812,43 @@ tun_metadata_to_geneve_nlattr_flow(const struct 
tun_metadata *flow,
      * similar enough that we can use the same mechanism. */
     nlattr_offset = nl_msg_start_nested(b, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS);
 
-    tun_metadata_to_geneve__(flow, b, &crit_opt);
+    tun_metadata_to_geneve__(&flow->metadata, b, &crit_opt);
 
     nl_msg_end_nested(b, nlattr_offset);
 }
 
+/* Converts from processed tunnel metadata information (in non-udpif
+ * format) in 'flow' to a stream of Geneve options suitable for
+ * transmission in 'opts'. Additionally returns whether there were
+ * any critical options in 'crit_opt' as well as the total length of
+ * data. */
 int
-tun_metadata_to_geneve_header(const struct tun_metadata *flow,
+tun_metadata_to_geneve_header(const struct flow_tnl *flow,
                               struct geneve_opt *opts, bool *crit_opt)
 {
     struct ofpbuf b;
 
+    ovs_assert(!(flow->flags & FLOW_TNL_F_UDPIF));
+
     ofpbuf_use_stack(&b, opts, GENEVE_TOT_OPT_SIZE);
-    tun_metadata_to_geneve__(flow, &b, crit_opt);
+    tun_metadata_to_geneve__(&flow->metadata, &b, crit_opt);
 
     return b.size;
 }
 
-void
-tun_metadata_to_geneve_nlattr_mask(const struct ofpbuf *key,
-                                   const struct tun_metadata *mask,
-                                   const struct tun_metadata *flow,
-                                   struct ofpbuf *b)
+static void
+tun_metadata_to_geneve_mask__(const struct tun_metadata *flow,
+                              const struct tun_metadata *mask,
+                              struct geneve_opt *opt, int opts_len)
 {
     struct tun_table *map = flow->tab;
-    const struct nlattr *tnl_key, *geneve_key;
-    struct nlattr *geneve_mask;
-    struct geneve_opt *opt;
-    int opts_len;
 
     if (!map) {
         return;
     }
 
-    tnl_key = nl_attr_find(key, 0, OVS_KEY_ATTR_TUNNEL);
-    if (!tnl_key) {
-        return;
-    }
-
-    geneve_key = nl_attr_find_nested(tnl_key,
-                                     OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS);
-    if (!geneve_key) {
-        return;
-    }
-
-    geneve_mask = ofpbuf_tail(b);
-    nl_msg_put(b, geneve_key, geneve_key->nla_len);
-
     /* All of these options have already been validated, so no need
      * for sanity checking. */
-    opt = CONST_CAST(struct geneve_opt *, nl_attr_get(geneve_mask));
-    opts_len = nl_attr_get_size(geneve_mask);
-
     while (opts_len > 0) {
         struct tun_meta_entry *entry;
         int len = sizeof(*opt) + opt->length * 4;
@@ -801,6 +873,80 @@ tun_metadata_to_geneve_nlattr_mask(const struct ofpbuf 
*key,
     }
 }
 
+static void
+tun_metadata_to_geneve_nlattr_mask(const struct ofpbuf *key,
+                                   const struct flow_tnl *mask,
+                                   const struct flow_tnl *flow,
+                                   struct ofpbuf *b)
+{
+    const struct nlattr *geneve_key;
+    struct nlattr *geneve_mask;
+    struct geneve_opt *opt;
+    int opts_len;
+
+    if (!key) {
+        return;
+    }
+
+    geneve_key = tun_metadata_find_geneve_key(key->data, key->size);
+    if (!geneve_key) {
+        return;
+    }
+
+    geneve_mask = ofpbuf_tail(b);
+    nl_msg_put(b, geneve_key, geneve_key->nla_len);
+
+    opt = CONST_CAST(struct geneve_opt *, nl_attr_get(geneve_mask));
+    opts_len = nl_attr_get_size(geneve_mask);
+
+    tun_metadata_to_geneve_mask__(&flow->metadata, &mask->metadata,
+                                  opt, opts_len);
+}
+
+/* Convert from the tunnel metadata in 'tun' to netlink attributes stored
+ * in 'b'. Either UDPIF or non-UDPIF input forms are accepted.
+ *
+ * To assist with parsing, it is necessary to also pass in the tunnel metadata
+ * from the flow in 'flow' as well in the original netlink form of the flow in
+ * 'key'. */
+void
+tun_metadata_to_geneve_nlattr(const struct flow_tnl *tun,
+                              const struct flow_tnl *flow,
+                              const struct ofpbuf *key,
+                              struct ofpbuf *b)
+{
+    bool is_mask = tun != flow;
+
+    if (!(flow->flags & FLOW_TNL_F_UDPIF)) {
+        if (!is_mask) {
+            tun_metadata_to_geneve_nlattr_flow(tun, b);
+        } else {
+            tun_metadata_to_geneve_nlattr_mask(key, tun, flow, b);
+        }
+    } else if (flow->metadata.present.len || is_mask) {
+        nl_msg_put_unspec(b, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
+                          tun->metadata.opts.gnv,
+                          flow->metadata.present.len);
+    }
+}
+
+/* Converts 'mask_src' (in non-UDPIF format) to a series of masked options in
+ * 'dst'. 'flow_src' (also in non-UDPIF format) and the  original set of
+ * options 'flow_src_opt'/'opts_len' are needed as a guide to interpret the
+ * mask data. */
+void
+tun_metadata_to_geneve_udpif_mask(const struct flow_tnl *flow_src,
+                                  const struct flow_tnl *mask_src,
+                                  const struct geneve_opt *flow_src_opt,
+                                  int opts_len, struct geneve_opt *dst)
+{
+    ovs_assert(!(flow_src->flags & FLOW_TNL_F_UDPIF));
+
+    memcpy(dst, flow_src_opt, opts_len);
+    tun_metadata_to_geneve_mask__(&flow_src->metadata,
+                                  &mask_src->metadata, dst, opts_len);
+}
+
 static const struct tun_metadata_loc *
 metadata_loc_from_match_read(struct tun_table *map, const struct match *match,
                              unsigned int idx)
@@ -816,19 +962,22 @@ void
 tun_metadata_to_nx_match(struct ofpbuf *b, enum ofp_version oxm,
                          const struct match *match)
 {
-    struct tun_table *map = ovsrcu_get(struct tun_table *, &metadata_tab);
-    const struct tun_metadata *metadata = &match->flow.tunnel.metadata;
-    const struct tun_metadata *mask = &match->wc.masks.tunnel.metadata;
+    struct flow_tnl flow, mask;
     int i;
 
-    ULLONG_FOR_EACH_1 (i, mask->opt_map) {
+    if (!udpif_to_parsed(&match->flow.tunnel, &match->wc.masks.tunnel,
+                         &flow, &mask)) {
+        return;
+    }
+
+    ULLONG_FOR_EACH_1 (i, mask.metadata.present.map) {
         const struct tun_metadata_loc *loc;
         union mf_value opts;
         union mf_value mask_opts;
 
-        loc = metadata_loc_from_match_read(map, match, i);
-        memcpy_from_metadata(opts.tun_metadata, metadata, loc);
-        memcpy_from_metadata(mask_opts.tun_metadata, mask, loc);
+        loc = metadata_loc_from_match_read(flow.metadata.tab, match, i);
+        memcpy_from_metadata(opts.tun_metadata, &flow.metadata, loc);
+        memcpy_from_metadata(mask_opts.tun_metadata, &mask.metadata, loc);
         nxm_put(b, MFF_TUN_METADATA0 + i, oxm, opts.tun_metadata,
                 mask_opts.tun_metadata, loc->len);
     }
@@ -837,22 +986,25 @@ tun_metadata_to_nx_match(struct ofpbuf *b, enum 
ofp_version oxm,
 void
 tun_metadata_match_format(struct ds *s, const struct match *match)
 {
-    struct tun_table *map = ovsrcu_get(struct tun_table *, &metadata_tab);
-    const struct tun_metadata *metadata = &match->flow.tunnel.metadata;
-    const struct tun_metadata *mask = &match->wc.masks.tunnel.metadata;
+    struct flow_tnl flow, mask;
     unsigned int i;
 
-    ULLONG_FOR_EACH_1 (i, mask->opt_map) {
+    if (!udpif_to_parsed(&match->flow.tunnel, &match->wc.masks.tunnel,
+                         &flow, &mask)) {
+        return;
+    }
+
+    ULLONG_FOR_EACH_1 (i, mask.metadata.present.map) {
         const struct tun_metadata_loc *loc;
         union mf_value opts;
 
-        loc = metadata_loc_from_match_read(map, match, i);
+        loc = metadata_loc_from_match_read(flow.metadata.tab, match, i);
 
         ds_put_format(s, "tun_metadata%u=", i);
-        memcpy_from_metadata(opts.tun_metadata, metadata, loc);
+        memcpy_from_metadata(opts.tun_metadata, &flow.metadata, loc);
         ds_put_hex(s, opts.tun_metadata, loc->len);
 
-        memcpy_from_metadata(opts.tun_metadata, mask, loc);
+        memcpy_from_metadata(opts.tun_metadata, &mask.metadata, loc);
         if (!is_all_ones(opts.tun_metadata, loc->len)) {
             ds_put_char(s, '/');
             ds_put_hex(s, opts.tun_metadata, loc->len);
diff --git a/lib/tun-metadata.h b/lib/tun-metadata.h
index 56bdf2a..49db511 100644
--- a/lib/tun-metadata.h
+++ b/lib/tun-metadata.h
@@ -20,35 +20,56 @@
 #include <stdint.h>
 
 #include "dynamic-string.h"
+#include "geneve.h"
 #include "netlink.h"
 #include "ofpbuf.h"
 #include "openflow/openflow.h"
 
+struct flow_tnl;
 struct match;
 struct mf_field;
 union mf_value;
 struct ofputil_geneve_table_mod;
 struct ofputil_geneve_table_reply;
 struct tun_table;
-struct geneve_opt;
 
 #define TUN_METADATA_NUM_OPTS 64
 #define TUN_METADATA_TOT_OPT_SIZE 256
 
 /* Tunnel option data, plus metadata to aid in their interpretation.
  *
- * 'opt_map' is indexed by type, that is, by the <i> in TUN_METADATA<i>, so
- * that e.g. TUN_METADATA5 is present if 'opt_map & (1ULL << 5)' is nonzero.
- * The actual data for TUN_METADATA5, if present, might be anywhere in 'opts'
- * (not necessarily even contiguous), and finding it requires referring to
- * 'tab'. */
+ * The option data exists in two forms and is interpreted differently depending
+ * on whether FLOW_TNL_F_UDPIF is set in struct flow_tnl flags:
+ *
+ * When FLOW_TNL_F_UDPIF is set, the tunnel metadata is in "userspace datapath
+ * format". This is typically used for fast-path packet processing to avoid
+ * the cost of translating options and in situations where we need to maintain
+ * tunnel metadata exactly as it came in. In this case 'opts.gnv' is raw
+ * packet data from the tunnel header and 'present.len' indicates the length
+ * of the data stored there. In these situations, 'tab' is NULL.
+ *
+ * In all other cases, we are doing flow-based processing (such as during
+ * upcalls). FLOW_TNL_F_UDPIF is not set and options are reordered into
+ * pre-allocated locations. 'present.map' is indexed by type, that is, by the
+ * <i> in TUN_METADATA<i>, so that e.g. TUN_METADATA5 is present if
+ * 'present.map & (1ULL << 5)' is nonzero. The actual data for TUN_METADATA5,
+ * if present, might be anywhere in 'opts.u8' (not necessarily even 
contiguous),
+ * and finding it requires referring to 'tab', if set, or the global metadata
+ * table. */
 struct tun_metadata {
-    uint64_t opt_map;                        /* 1-bit for each present TLV. */
-    uint8_t opts[TUN_METADATA_TOT_OPT_SIZE]; /* Values from tunnel TLVs. */
+    union { /* Valid members of 'opts'. When 'opts' is sorted into known types,
+             * 'map' is used. When 'opts' is raw packet data, 'len' is used. */
+        uint64_t map;                      /* 1-bit for each present TLV. */
+        uint8_t len;                       /* Length of data in 'opts'. */
+    } present;
     struct tun_table *tab;      /* Types & lengths for 'opts' and 'opt_map'. */
     uint8_t pad[sizeof(uint64_t) - sizeof(struct tun_table *)]; /* Make 8 
bytes */
+    union {
+        uint8_t u8[TUN_METADATA_TOT_OPT_SIZE]; /* Values from tunnel TLVs. */
+        struct geneve_opt gnv[GENEVE_TOT_OPT_SIZE / sizeof(struct geneve_opt)];
+    } opts;
 };
-BUILD_ASSERT_DECL(sizeof(((struct tun_metadata *)0)->opt_map) * 8 >=
+BUILD_ASSERT_DECL(sizeof(((struct tun_metadata *)0)->present.map) * 8 >=
                   TUN_METADATA_NUM_OPTS);
 
 /* The location of an option can be stored either as a single offset/len
@@ -81,31 +102,34 @@ void tun_metadata_init(void);
 enum ofperr tun_metadata_table_mod(struct ofputil_geneve_table_mod *);
 void tun_metadata_table_request(struct ofputil_geneve_table_reply *);
 
-void tun_metadata_read(const struct tun_metadata *,
+void tun_metadata_read(const struct flow_tnl *,
                        const struct mf_field *, union mf_value *);
-void tun_metadata_write(struct tun_metadata *,
+void tun_metadata_write(struct flow_tnl *,
                         const struct mf_field *, const union mf_value *);
 void tun_metadata_set_match(const struct mf_field *,
                             const union mf_value *value,
                             const union mf_value *mask, struct match *);
-void tun_metadata_get_fmd(const struct tun_metadata *,
-                          struct match *flow_metadata);
+void tun_metadata_get_fmd(const struct flow_tnl *, struct match 
*flow_metadata);
 
 int tun_metadata_from_geneve_nlattr(const struct nlattr *attr,
                                     const struct nlattr *flow_attrs,
                                     size_t flow_attr_len,
-                                    const struct tun_metadata *flow_metadata,
-                                    struct tun_metadata *metadata);
-int tun_metadata_from_geneve_header(const struct geneve_opt *, int opt_len,
-                                    struct tun_metadata *metadata);
-
-void tun_metadata_to_geneve_nlattr_flow(const struct tun_metadata *flow,
-                                        struct ofpbuf *);
-void tun_metadata_to_geneve_nlattr_mask(const struct ofpbuf *key,
-                                        const struct tun_metadata *mask,
-                                        const struct tun_metadata *flow,
-                                        struct ofpbuf *);
-int tun_metadata_to_geneve_header(const struct tun_metadata *flow,
+                                    const struct flow_tnl *flow_tun,
+                                    bool udpif, struct flow_tnl *tun);
+void tun_metadata_to_geneve_nlattr(const struct flow_tnl *tun,
+                                   const struct flow_tnl *flow,
+                                   const struct ofpbuf *key,
+                                   struct ofpbuf *);
+
+int tun_metadata_from_geneve_udpif(const struct flow_tnl *flow,
+                                   const struct flow_tnl *src,
+                                   struct flow_tnl *dst);
+void tun_metadata_to_geneve_udpif_mask(const struct flow_tnl *flow_src,
+                                       const struct flow_tnl *mask_src,
+                                       const struct geneve_opt *flow_src_opt,
+                                       int opts_len, struct geneve_opt *dst);
+
+int tun_metadata_to_geneve_header(const struct flow_tnl *flow,
                                   struct geneve_opt *, bool *crit_opt);
 
 void tun_metadata_to_nx_match(struct ofpbuf *b, enum ofp_version oxm,
diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c
index e54d3fb..185addf 100644
--- a/ofproto/ofproto-dpif-sflow.c
+++ b/ofproto/ofproto-dpif-sflow.c
@@ -972,7 +972,7 @@ sflow_read_set_action(const struct nlattr *attr,
             /* Do not handle multi-encap for now. */
             sflow_actions->tunnel_err = true;
         } else {
-            if (odp_tun_key_from_attr(attr, &sflow_actions->tunnel)
+            if (odp_tun_key_from_attr(attr, false, &sflow_actions->tunnel)
                 == ODP_FIT_ERROR) {
                 /* Tunnel parsing error. */
                 sflow_actions->tunnel_err = true;
diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
index 440f9e9..2d75b13 100644
--- a/ofproto/ofproto-dpif-upcall.c
+++ b/ofproto/ofproto-dpif-upcall.c
@@ -1164,7 +1164,7 @@ process_upcall(struct udpif *udpif, struct upcall *upcall,
             memcpy(&cookie, nl_attr_get(userdata), sizeof cookie.ipfix);
 
             if (upcall->out_tun_key) {
-                odp_tun_key_from_attr(upcall->out_tun_key,
+                odp_tun_key_from_attr(upcall->out_tun_key, false,
                                       &output_tunnel_key);
             }
             dpif_ipfix_bridge_sample(upcall->ipfix, packet, flow,
diff --git a/tests/tunnel-push-pop.at b/tests/tunnel-push-pop.at
index bd95c8e..0f1724a 100644
--- a/tests/tunnel-push-pop.at
+++ b/tests/tunnel-push-pop.at
@@ -132,7 +132,7 @@ AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port  5'], 
[0], [dnl
   port  5: rx pkts=1, bytes=98, drop=0, errs=0, frame=0, over=0, crc=0
 ])
 AT_CHECK([ovs-appctl dpif/dump-flows int-br], [0], [dnl
-tunnel(tun_id=0x7b,src=1.1.2.92,dst=1.1.2.88,ttl=64,geneve({class=0xffff,type=0x80,len=4,0xa/0xf}),flags(-df-csum+key)),skb_mark(0),recirc_id(0),in_port(6081),eth_type(0x0800),ipv4(frag=no),
 packets:0, bytes:0, used:never, actions:drop
+tunnel(tun_id=0x7b,src=1.1.2.92,dst=1.1.2.88,ttl=64,geneve({class=0xffff,type=0x80,len=4,0xa/0xf}{class=0xffff,type=0,len=4}),flags(-df-csum+key)),skb_mark(0),recirc_id(0),in_port(6081),eth_type(0x0800),ipv4(frag=no),
 packets:0, bytes:0, used:never, actions:drop
 ])
 
 OVS_VSWITCHD_STOP
-- 
2.1.4

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to