The kernel implementation of Geneve options stores the TLV option
data in the flow exactly as received, without any further parsing.
This is then translated to known options for the purposes of matching
on flow setup (which will then install a datapath flow in the form
the kernel is expecting).

The userspace implementation behaves a little bit differently - it
looks up known options as each packet is received. The reason for this
is there is a much tighter coupling between datapath and flow translation
and the representation is generally expected to be the same. This works
but it incurs work on a per-packet basis that could be done per-flow
instead.

This introduces a small translation step for Geneve packets between
datapath and flow lookup for the userspace datapath in order to
allow the same kind of processing that the kernel does.

Signed-off-by: Jesse Gross <je...@nicira.com>
---
 lib/automake.mk          |   1 +
 lib/dpif-netdev.c        |  78 +++++++++++++++++++++-
 lib/flow.c               |  23 +++++--
 lib/geneve.h             |  63 ++++++++++++++++++
 lib/netdev-vport.c       |  23 +++----
 lib/odp-util.c           |  24 ++++---
 lib/odp-util.h           |   6 ++
 lib/packets.h            |  41 +-----------
 lib/tun-metadata.c       | 166 ++++++++++++++++++++++++++++++-----------------
 lib/tun-metadata.h       |  39 +++++++++--
 tests/tunnel-push-pop.at |   2 +-
 11 files changed, 327 insertions(+), 139 deletions(-)
 create mode 100644 lib/geneve.h

diff --git a/lib/automake.mk b/lib/automake.mk
index f72eb32..dae3fee 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -81,6 +81,7 @@ lib_libopenvswitch_la_SOURCES = \
        lib/fatal-signal.h \
        lib/flow.c \
        lib/flow.h \
+       lib/geneve.h \
        lib/guarded-list.c \
        lib/guarded-list.h \
        lib/hash.c \
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index b53d52a..50763bc 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1830,6 +1830,7 @@ dp_netdev_flow_to_dpif_flow(const struct dp_netdev_flow 
*netdev_flow,
             .flow = &netdev_flow->flow,
             .mask = &wc.masks,
             .support = dp_netdev_support,
+            .udpif_no_xlate = true,
         };
 
         miniflow_expand(&netdev_flow->cr.mask->mf, &wc.masks);
@@ -1861,6 +1862,23 @@ dp_netdev_flow_to_dpif_flow(const struct dp_netdev_flow 
*netdev_flow,
     get_dpif_flow_stats(netdev_flow, &flow->stats);
 }
 
+static void
+xlate_geneve_attr(const struct nlattr *key, uint32_t key_len,
+                  bool is_mask, struct flow *flow)
+{
+    const struct nlattr *geneve_key;
+
+    /* We don't actually want any translation for Geneve - just copy the
+     * original over. */
+    memset(&flow->tunnel.metadata, 0, sizeof flow->tunnel.metadata);
+    geneve_key = tun_metadata_find_geneve_key(key, key_len);
+    if (geneve_key) {
+        int len = nl_attr_get_size(geneve_key);
+        memcpy(&flow->tunnel.metadata.opts.gnv, nl_attr_get(geneve_key), len);
+        flow->tunnel.metadata.present.len = !is_mask ? len : 0xff;
+    }
+}
+
 static int
 dpif_netdev_mask_from_nlattrs(const struct nlattr *key, uint32_t key_len,
                               const struct nlattr *mask_key,
@@ -1892,6 +1910,9 @@ dpif_netdev_mask_from_nlattrs(const struct nlattr *key, 
uint32_t key_len,
 
             return EINVAL;
         }
+
+        xlate_geneve_attr(mask_key, mask_key_len, true, mask);
+
     } else {
         enum mf_field_id id;
         /* No mask key, unwildcard everything except fields whose
@@ -1951,6 +1972,8 @@ dpif_netdev_flow_from_nlattrs(const struct nlattr *key, 
uint32_t key_len,
         return EINVAL;
     }
 
+    xlate_geneve_attr(key, key_len, false, flow);
+
     return 0;
 }
 
@@ -1983,6 +2006,19 @@ dpif_netdev_flow_get(const struct dpif *dpif, const 
struct dpif_flow_get *get)
     return error;
 }
 
+static void
+geneve_netdev_flow_to_dpif_flow(struct flow *flow, struct flow_wildcards *wc)
+{
+    struct tun_metadata orig_tunnel;
+
+    tun_metadata_from_geneve_header(&flow->tunnel.metadata,
+                                    &wc->masks.tunnel.metadata,
+                                    &orig_tunnel);
+    tun_metadata_from_geneve_header(&flow->tunnel.metadata,
+                                    &flow->tunnel.metadata,
+                                    &orig_tunnel);
+}
+
 static struct dp_netdev_flow *
 dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
                    struct match *match, const ovs_u128 *ufid,
@@ -2019,6 +2055,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
 
         match.flow = flow->flow;
         miniflow_expand(&flow->cr.mask->mf, &match.wc.masks);
+        geneve_netdev_flow_to_dpif_flow(&match.flow, &match.wc);
 
         ds_put_cstr(&ds, "flow_add: ");
         odp_format_ufid(ufid, &ds);
@@ -3021,11 +3058,19 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, 
struct dp_packet *packet_,
                  struct ofpbuf *actions, struct ofpbuf *put_actions)
 {
     struct dp_netdev *dp = pmd->dp;
+    struct tun_metadata orig_tunnel;
+    int err;
 
     if (OVS_UNLIKELY(!dp->upcall_cb)) {
         return ENODEV;
     }
 
+    err = tun_metadata_from_geneve_header(&flow->tunnel.metadata,
+                                          &flow->tunnel.metadata, 
&orig_tunnel);
+    if (err) {
+        return err;
+    }
+
     if (OVS_UNLIKELY(!VLOG_DROP_DBG(&upcall_rl))) {
         struct ds ds = DS_EMPTY_INITIALIZER;
         char *packet_str;
@@ -3053,8 +3098,37 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, 
struct dp_packet *packet_,
         ds_destroy(&ds);
     }
 
-    return dp->upcall_cb(packet_, flow, ufid, pmd->core_id, type, userdata,
-                         actions, wc, put_actions, dp->upcall_aux);
+    err = dp->upcall_cb(packet_, flow, ufid, pmd->core_id, type, userdata,
+                        actions, wc, put_actions, dp->upcall_aux);
+    if (err && err != ENOSPC) {
+        return err;
+    }
+
+    if (wc) {
+        if (wc->masks.tunnel.metadata.present.map) {
+            struct geneve_opt opts[GENEVE_TOT_OPT_SIZE /
+                                   sizeof(struct geneve_opt)];
+
+            tun_metadata_to_geneve_header_mask(&flow->tunnel.metadata,
+                                               &wc->masks.tunnel.metadata,
+                                               orig_tunnel.opts.gnv, opts,
+                                               orig_tunnel.present.len);
+
+            memset(&wc->masks.tunnel.metadata, 0,
+                   sizeof wc->masks.tunnel.metadata);
+            memcpy(&wc->masks.tunnel.metadata.opts.gnv, opts,
+                   orig_tunnel.present.len);
+        }
+        wc->masks.tunnel.metadata.present.len = 0xff;
+    }
+
+    if (orig_tunnel.present.len) {
+        memcpy(&flow->tunnel.metadata.opts.gnv, &orig_tunnel.opts.gnv,
+               orig_tunnel.present.len);
+        flow->tunnel.metadata.present.len = orig_tunnel.present.len;
+    }
+
+    return err;
 }
 
 static inline uint32_t
diff --git a/lib/flow.c b/lib/flow.c
index 6bfe738..a7b3d10 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -440,9 +440,22 @@ miniflow_extract(struct dp_packet *packet, struct miniflow 
*dst)
         miniflow_push_words(mf, tunnel, &md->tunnel,
                             offsetof(struct flow_tnl, metadata) /
                             sizeof(uint64_t));
-        if (md->tunnel.metadata.opt_map) {
-            miniflow_push_words(mf, tunnel.metadata, &md->tunnel.metadata,
-                                 sizeof md->tunnel.metadata / 
sizeof(uint64_t));
+
+        if (md->tunnel.metadata.tab) {
+            if (md->tunnel.metadata.present.map) {
+                miniflow_push_words(mf, tunnel.metadata, &md->tunnel.metadata,
+                                    sizeof md->tunnel.metadata /
+                                    sizeof(uint64_t));
+            }
+        } else {
+            if (md->tunnel.metadata.present.len) {
+                miniflow_push_words(mf, tunnel.metadata.present,
+                                    &md->tunnel.metadata.present, 1);
+                miniflow_push_words(mf, tunnel.metadata.opts,
+                                    &md->tunnel.metadata.opts,
+                                    
DIV_ROUND_UP(md->tunnel.metadata.present.len,
+                                    sizeof(uint64_t)));
+            }
         }
     }
     if (md->skb_priority || md->pkt_mark) {
@@ -964,8 +977,8 @@ void flow_wildcards_init_for_packet(struct flow_wildcards 
*wc,
         WC_MASK_FIELD(wc, tunnel.gbp_id);
         WC_MASK_FIELD(wc, tunnel.gbp_flags);
 
-        if (flow->tunnel.metadata.opt_map) {
-            wc->masks.tunnel.metadata.opt_map = flow->tunnel.metadata.opt_map;
+        if (flow->tunnel.metadata.present.map) {
+            wc->masks.tunnel.metadata.present.map = 
flow->tunnel.metadata.present.map;
             WC_MASK_FIELD(wc, tunnel.metadata.opts);
         }
     } else if (flow->tunnel.tun_id) {
diff --git a/lib/geneve.h b/lib/geneve.h
new file mode 100644
index 0000000..f0256b1
--- /dev/null
+++ b/lib/geneve.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef GENEVE_H
+#define GENEVE_H 1
+
+#include "openvswitch/types.h"
+
+#define GENEVE_MAX_OPT_SIZE 124
+#define GENEVE_TOT_OPT_SIZE 252
+
+#define GENEVE_CRIT_OPT_TYPE (1 << 7)
+
+struct geneve_opt {
+    ovs_be16  opt_class;
+    uint8_t   type;
+#ifdef WORDS_BIGENDIAN
+    uint8_t   r1:1;
+    uint8_t   r2:1;
+    uint8_t   r3:1;
+    uint8_t   length:5;
+#else
+    uint8_t   length:5;
+    uint8_t   r3:1;
+    uint8_t   r2:1;
+    uint8_t   r1:1;
+#endif
+    /* Option data */
+};
+
+struct genevehdr {
+#ifdef WORDS_BIGENDIAN
+    uint8_t ver:2;
+    uint8_t opt_len:6;
+    uint8_t oam:1;
+    uint8_t critical:1;
+    uint8_t rsvd1:6;
+#else
+    uint8_t opt_len:6;
+    uint8_t ver:2;
+    uint8_t rsvd1:6;
+    uint8_t critical:1;
+    uint8_t oam:1;
+#endif
+    ovs_be16 proto_type;
+    ovs_16aligned_be32 vni;
+    struct geneve_opt options[];
+};
+
+#endif /* geneve.h */
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index a3394dd..8ac7374 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -1054,11 +1054,10 @@ parse_gre_header(struct dp_packet *packet,
 static void
 pkt_metadata_init_tnl(struct pkt_metadata *md)
 {
-    memset(md, 0, offsetof(struct pkt_metadata, tunnel.metadata));
-
-    /* If 'opt_map' is zero then none of the rest of the tunnel metadata
-     * will be read, so we can skip clearing it. */
-    md->tunnel.metadata.opt_map = 0;
+    /* Zero up through the tunnel metadata options. The length and table
+     * are before this and as long as they are empty, the options won't
+     * be looked at. */
+    memset(md, 0, offsetof(struct pkt_metadata, tunnel.metadata.opts));
 }
 
 static int
@@ -1208,8 +1207,7 @@ netdev_geneve_pop_header(struct dp_packet *packet)
     struct pkt_metadata *md = &packet->md;
     struct flow_tnl *tnl = &md->tunnel;
     struct genevehdr *gnh;
-    unsigned int hlen;
-    int err;
+    unsigned int hlen, opts_len;
 
     pkt_metadata_init_tnl(md);
     if (GENEVE_BASE_HLEN > dp_packet_size(packet)) {
@@ -1223,7 +1221,8 @@ netdev_geneve_pop_header(struct dp_packet *packet)
         return EINVAL;
     }
 
-    hlen = GENEVE_BASE_HLEN + gnh->opt_len * 4;
+    opts_len = gnh->opt_len * 4;
+    hlen = GENEVE_BASE_HLEN + opts_len;
     if (hlen > dp_packet_size(packet)) {
         VLOG_WARN_RL(&err_rl, "geneve packet too small: header len=%u packet 
size=%u\n",
                      hlen, dp_packet_size(packet));
@@ -1245,12 +1244,8 @@ netdev_geneve_pop_header(struct dp_packet *packet)
     tnl->tun_id = htonll(ntohl(get_16aligned_be32(&gnh->vni)) >> 8);
     tnl->flags |= FLOW_TNL_F_KEY;
 
-    err = tun_metadata_from_geneve_header(gnh->options, gnh->opt_len * 4,
-                                          &tnl->metadata);
-    if (err) {
-        VLOG_WARN_RL(&err_rl, "invalid geneve options");
-        return err;
-    }
+    memcpy(&tnl->metadata.opts.gnv, gnh->options, opts_len);
+    tnl->metadata.present.len = opts_len;
 
     dp_packet_reset_packet(packet, hlen);
 
diff --git a/lib/odp-util.c b/lib/odp-util.c
index 2eddb34..2de196b 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -1470,7 +1470,7 @@ odp_tun_key_from_attr(const struct nlattr *attr, struct 
flow_tnl *tun)
 static void
 tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key,
                 const struct flow_tnl *tun_flow_key,
-                const struct ofpbuf *key_buf)
+                const struct ofpbuf *key_buf, bool no_xlate)
 {
     size_t tun_key_ofs;
 
@@ -1514,11 +1514,17 @@ tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl 
*tun_key,
         nl_msg_end_nested(a, vxlan_opts_ofs);
     }
 
-    if (tun_key == tun_flow_key) {
-        tun_metadata_to_geneve_nlattr_flow(&tun_key->metadata, a);
-    } else {
-        tun_metadata_to_geneve_nlattr_mask(key_buf, &tun_key->metadata,
-                                           &tun_flow_key->metadata, a);
+    if (!no_xlate) {
+        if (tun_key == tun_flow_key) {
+            tun_metadata_to_geneve_nlattr_flow(&tun_key->metadata, a);
+        } else {
+            tun_metadata_to_geneve_nlattr_mask(key_buf, &tun_key->metadata,
+                                               &tun_flow_key->metadata, a);
+        }
+    } else if (tun_flow_key->metadata.present.len) {
+        nl_msg_put_unspec(a, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
+                          tun_key->metadata.opts.gnv,
+                          tun_flow_key->metadata.present.len);
     }
 
     nl_msg_end_nested(a, tun_key_ofs);
@@ -3462,7 +3468,7 @@ odp_flow_key_from_flow__(const struct odp_flow_key_parms 
*parms,
 
     if (flow->tunnel.ip_dst || export_mask) {
         tun_key_to_attr(buf, &data->tunnel, &parms->flow->tunnel,
-                        parms->key_buf);
+                        parms->key_buf, parms->udpif_no_xlate);
     }
 
     nl_msg_put_u32(buf, OVS_KEY_ATTR_SKB_MARK, data->pkt_mark);
@@ -3644,7 +3650,7 @@ odp_key_from_pkt_metadata(struct ofpbuf *buf, const 
struct pkt_metadata *md)
     nl_msg_put_u32(buf, OVS_KEY_ATTR_PRIORITY, md->skb_priority);
 
     if (md->tunnel.ip_dst) {
-        tun_key_to_attr(buf, &md->tunnel, &md->tunnel, NULL);
+        tun_key_to_attr(buf, &md->tunnel, &md->tunnel, NULL, false);
     }
 
     nl_msg_put_u32(buf, OVS_KEY_ATTR_SKB_MARK, md->pkt_mark);
@@ -4410,7 +4416,7 @@ odp_put_tunnel_action(const struct flow_tnl *tunnel,
                       struct ofpbuf *odp_actions)
 {
     size_t offset = nl_msg_start_nested(odp_actions, OVS_ACTION_ATTR_SET);
-    tun_key_to_attr(odp_actions, tunnel, tunnel, NULL);
+    tun_key_to_attr(odp_actions, tunnel, tunnel, NULL, false);
     nl_msg_end_nested(odp_actions, offset);
 }
 
diff --git a/lib/odp-util.h b/lib/odp-util.h
index f65b006..cce0e28 100644
--- a/lib/odp-util.h
+++ b/lib/odp-util.h
@@ -191,6 +191,12 @@ struct odp_flow_key_parms {
      * the mask cannot be constructed from the OVS internal representation
      * and needs to see the original form. */
     const struct ofpbuf *key_buf;
+
+    /* Some fields have different representation for flow setup and per-
+     * packet processing (i.e. different between ofproto-dpif and userspace
+     * datapath). This flag indicates that these fields are already in the
+     * per-packet format rather than per-flow, which is the normal input. */
+    bool udpif_no_xlate;
 };
 
 void odp_flow_key_from_flow(const struct odp_flow_key_parms *, struct ofpbuf 
*);
diff --git a/lib/packets.h b/lib/packets.h
index c709af5..38af37b 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -23,6 +23,7 @@
 #include <stdint.h>
 #include <string.h>
 #include "compiler.h"
+#include "geneve.h"
 #include "openvswitch/types.h"
 #include "random.h"
 #include "hash.h"
@@ -802,46 +803,6 @@ static inline bool dl_type_is_ip_any(ovs_be16 dl_type)
 }
 
 /* Tunnel header */
-#define GENEVE_MAX_OPT_SIZE 124
-#define GENEVE_TOT_OPT_SIZE 252
-
-#define GENEVE_CRIT_OPT_TYPE (1 << 7)
-
-struct geneve_opt {
-    ovs_be16  opt_class;
-    uint8_t   type;
-#ifdef WORDS_BIGENDIAN
-    uint8_t   r1:1;
-    uint8_t   r2:1;
-    uint8_t   r3:1;
-    uint8_t   length:5;
-#else
-    uint8_t   length:5;
-    uint8_t   r3:1;
-    uint8_t   r2:1;
-    uint8_t   r1:1;
-#endif
-    /* Option data */
-};
-
-struct genevehdr {
-#ifdef WORDS_BIGENDIAN
-    uint8_t ver:2;
-    uint8_t opt_len:6;
-    uint8_t oam:1;
-    uint8_t critical:1;
-    uint8_t rsvd1:6;
-#else
-    uint8_t opt_len:6;
-    uint8_t ver:2;
-    uint8_t rsvd1:6;
-    uint8_t critical:1;
-    uint8_t oam:1;
-#endif
-    ovs_be16 proto_type;
-    ovs_16aligned_be32 vni;
-    struct geneve_opt options[];
-};
 
 /* GRE protocol header */
 struct gre_base_hdr {
diff --git a/lib/tun-metadata.c b/lib/tun-metadata.c
index 7d82fb7..d57c228 100644
--- a/lib/tun-metadata.c
+++ b/lib/tun-metadata.c
@@ -274,7 +274,7 @@ tun_metadata_write(struct tun_metadata *metadata,
 
     loc = &map->entries[idx].loc;
 
-    ULLONG_SET1(metadata->opt_map, idx);
+    ULLONG_SET1(metadata->present.map, idx);
     memcpy_to_metadata(metadata, value->tun_metadata + mf->n_bytes - loc->len,
                        loc);
 }
@@ -353,7 +353,7 @@ tun_metadata_set_match(const struct mf_field *mf, const 
union mf_value *value,
                                    mask->tun_metadata[data_offset + i];
         }
     }
-    ULLONG_SET1(match->flow.tunnel.metadata.opt_map, idx);
+    ULLONG_SET1(match->flow.tunnel.metadata.present.map, idx);
     memcpy_to_metadata(&match->flow.tunnel.metadata, data.tun_metadata, loc);
 
     if (!value) {
@@ -363,7 +363,7 @@ tun_metadata_set_match(const struct mf_field *mf, const 
union mf_value *value,
     } else {
         memcpy(data.tun_metadata, mask->tun_metadata + data_offset, loc->len);
     }
-    ULLONG_SET1(match->wc.masks.tunnel.metadata.opt_map, idx);
+    ULLONG_SET1(match->wc.masks.tunnel.metadata.present.map, idx);
     memcpy_to_metadata(&match->wc.masks.tunnel.metadata, data.tun_metadata, 
loc);
 }
 
@@ -380,7 +380,7 @@ tun_metadata_get_fmd(const struct tun_metadata *metadata,
         map = ovsrcu_get(struct tun_table *, &metadata_tab);
     }
 
-    ULLONG_FOR_EACH_1 (i, metadata->opt_map) {
+    ULLONG_FOR_EACH_1 (i, metadata->present.map) {
         union mf_value opts;
         const struct tun_metadata_loc *old_loc = &map->entries[i].loc;
         const struct tun_metadata_loc *new_loc;
@@ -424,7 +424,7 @@ memcpy_to_metadata(struct tun_metadata *dst, const void 
*src,
     int addr = 0;
 
     while (chain) {
-        memcpy(dst->opts + loc->c.offset + addr, (uint8_t *)src + addr,
+        memcpy(dst->opts.u8 + loc->c.offset + addr, (uint8_t *)src + addr,
                chain->len);
         addr += chain->len;
         chain = chain->next;
@@ -439,7 +439,7 @@ memcpy_from_metadata(void *dst, const struct tun_metadata 
*src,
     int addr = 0;
 
     while (chain) {
-        memcpy((uint8_t *)dst + addr, src->opts + loc->c.offset + addr,
+        memcpy((uint8_t *)dst + addr, src->opts.u8 + loc->c.offset + addr,
                chain->len);
         addr += chain->len;
         chain = chain->next;
@@ -579,10 +579,21 @@ tun_metadata_del_entry(struct tun_table *map, uint8_t idx)
 }
 
 static int
-tun_metadata_from_geneve__(struct tun_table *map, const struct geneve_opt *opt,
+tun_metadata_from_geneve__(const struct tun_metadata *flow_metadata,
+                           const struct geneve_opt *opt,
                            const struct geneve_opt *flow_opt, int opts_len,
                            struct tun_metadata *metadata)
 {
+    struct tun_table *map;
+    bool is_mask = flow_opt != opt;
+
+    if (!is_mask) {
+        map = ovsrcu_get(struct tun_table *, &metadata_tab);
+        metadata->tab = map;
+    } else {
+        map = flow_metadata->tab;
+    }
+
     if (!map) {
         return 0;
     }
@@ -606,7 +617,7 @@ tun_metadata_from_geneve__(struct tun_table *map, const 
struct geneve_opt *opt,
         if (entry) {
             if (entry->loc.len == flow_opt->length * 4) {
                 memcpy_to_metadata(metadata, opt + 1, &entry->loc);
-                ULLONG_SET1(metadata->opt_map, entry - map->entries);
+                ULLONG_SET1(metadata->present.map, entry - map->entries);
             } else {
                 return EINVAL;
             }
@@ -629,20 +640,13 @@ tun_metadata_from_geneve_nlattr(const struct nlattr *attr,
                                 const struct tun_metadata *flow_metadata,
                                 struct tun_metadata *metadata)
 {
-    struct tun_table *map;
     bool is_mask = !!flow_attrs;
     const struct nlattr *flow;
 
     if (is_mask) {
-        const struct nlattr *tnl_key;
         int mask_len = nl_attr_get_size(attr);
 
-        tnl_key = nl_attr_find__(flow_attrs, flow_attr_len, 
OVS_KEY_ATTR_TUNNEL);
-        if (!tnl_key) {
-            return mask_len ? EINVAL : 0;
-        }
-
-        flow = nl_attr_find_nested(tnl_key, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS);
+        flow = tun_metadata_find_geneve_key(flow_attrs, flow_attr_len);
         if (!flow) {
             return mask_len ? EINVAL : 0;
         }
@@ -654,27 +658,37 @@ tun_metadata_from_geneve_nlattr(const struct nlattr *attr,
         flow = attr;
     }
 
-    if (!is_mask) {
-        map = ovsrcu_get(struct tun_table *, &metadata_tab);
-        metadata->tab = map;
-    } else {
-        map = flow_metadata->tab;
-    }
-
-    return tun_metadata_from_geneve__(map, nl_attr_get(attr), 
nl_attr_get(flow),
-                                      nl_attr_get_size(flow), metadata);
+    return tun_metadata_from_geneve__(flow_metadata, nl_attr_get(attr),
+                                      nl_attr_get(flow), 
nl_attr_get_size(flow),
+                                      metadata);
 }
 
+/* Converts from the flat Geneve options representation extracted directly
+ * from the tunnel header to the representation that maps options to
+ * pre-allocated locations. Since this is done in place for 'metadata',
+ * 'orig' is needed as a temporary scratch copy of the original. This
+ * can also be used by callers that need to restore the original version.
+ * To handle masks, the original flow must also be passed in through 'flow'
+ * (in the original, uncoverted form). */
 int
-tun_metadata_from_geneve_header(const struct geneve_opt *opts, int opt_len,
-                                struct tun_metadata *metadata)
+tun_metadata_from_geneve_header(const struct tun_metadata *flow,
+                                struct tun_metadata *metadata,
+                                struct tun_metadata *orig)
 {
-    struct tun_table *map;
+    bool is_mask = flow != metadata;
+    int len = flow->present.len;
 
-    map = ovsrcu_get(struct tun_table *, &metadata_tab);
-    metadata->tab = map;
+    orig->present.len = len;
+    if (!len) {
+        return 0;
+    }
+
+    memcpy(&orig->opts.gnv, &metadata->opts.gnv, flow->present.len);
 
-    return tun_metadata_from_geneve__(map, opts, opts, opt_len, metadata);
+    metadata->present.map = 0;
+    return tun_metadata_from_geneve__(flow, orig->opts.gnv, is_mask ?
+                                               flow->opts.gnv : orig->opts.gnv,
+                                      len, metadata);
 }
 
 static void
@@ -691,7 +705,7 @@ tun_metadata_to_geneve__(const struct tun_metadata *flow, 
struct ofpbuf *b,
 
     *crit_opt = false;
 
-    ULLONG_FOR_EACH_1 (i, flow->opt_map) {
+    ULLONG_FOR_EACH_1 (i, flow->present.map) {
         struct tun_meta_entry *entry = &map->entries[i];
         struct geneve_opt *opt;
 
@@ -716,7 +730,7 @@ tun_metadata_to_geneve_nlattr_flow(const struct 
tun_metadata *flow,
     size_t nlattr_offset;
     bool crit_opt;
 
-    if (!flow->opt_map) {
+    if (!flow->present.map) {
         return;
     }
 
@@ -742,41 +756,19 @@ tun_metadata_to_geneve_header(const struct tun_metadata 
*flow,
     return b.size;
 }
 
-void
-tun_metadata_to_geneve_nlattr_mask(const struct ofpbuf *key,
-                                   const struct tun_metadata *mask,
-                                   const struct tun_metadata *flow,
-                                   struct ofpbuf *b)
+static void
+tun_metadata_to_geneve_mask__(const struct tun_metadata *flow,
+                              const struct tun_metadata *mask,
+                              struct geneve_opt *opt, int opts_len)
 {
     struct tun_table *map = flow->tab;
-    const struct nlattr *tnl_key, *geneve_key;
-    struct nlattr *geneve_mask;
-    struct geneve_opt *opt;
-    int opts_len;
 
     if (!map) {
         return;
     }
 
-    tnl_key = nl_attr_find(key, 0, OVS_KEY_ATTR_TUNNEL);
-    if (!tnl_key) {
-        return;
-    }
-
-    geneve_key = nl_attr_find_nested(tnl_key,
-                                     OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS);
-    if (!geneve_key) {
-        return;
-    }
-
-    geneve_mask = ofpbuf_tail(b);
-    nl_msg_put(b, geneve_key, geneve_key->nla_len);
-
     /* All of these options have already been validated, so no need
      * for sanity checking. */
-    opt = CONST_CAST(struct geneve_opt *, nl_attr_get(geneve_mask));
-    opts_len = nl_attr_get_size(geneve_mask);
-
     while (opts_len > 0) {
         struct tun_meta_entry *entry;
         int len = sizeof(*opt) + opt->length * 4;
@@ -801,6 +793,45 @@ tun_metadata_to_geneve_nlattr_mask(const struct ofpbuf 
*key,
     }
 }
 
+void
+tun_metadata_to_geneve_nlattr_mask(const struct ofpbuf *key,
+                                   const struct tun_metadata *mask,
+                                   const struct tun_metadata *flow,
+                                   struct ofpbuf *b)
+{
+    const struct nlattr *geneve_key;
+    struct nlattr *geneve_mask;
+    struct geneve_opt *opt;
+    int opts_len;
+
+    if (!key) {
+        return;
+    }
+
+    geneve_key = tun_metadata_find_geneve_key(key->data, key->size);
+    if (!geneve_key) {
+        return;
+    }
+
+    geneve_mask = ofpbuf_tail(b);
+    nl_msg_put(b, geneve_key, geneve_key->nla_len);
+
+    opt = CONST_CAST(struct geneve_opt *, nl_attr_get(geneve_mask));
+    opts_len = nl_attr_get_size(geneve_mask);
+
+    tun_metadata_to_geneve_mask__(flow, mask, opt, opts_len);
+}
+
+void
+tun_metadata_to_geneve_header_mask(const struct tun_metadata *flow,
+                                   const struct tun_metadata *mask,
+                                   const struct geneve_opt *flow_opt,
+                                   struct geneve_opt *opt, int opts_len)
+{
+    memcpy(opt, flow_opt, opts_len);
+    tun_metadata_to_geneve_mask__(flow, mask, opt, opts_len);
+}
+
 static const struct tun_metadata_loc *
 metadata_loc_from_match_read(struct tun_table *map, const struct match *match,
                              unsigned int idx)
@@ -821,7 +852,7 @@ tun_metadata_to_nx_match(struct ofpbuf *b, enum ofp_version 
oxm,
     const struct tun_metadata *mask = &match->wc.masks.tunnel.metadata;
     int i;
 
-    ULLONG_FOR_EACH_1 (i, mask->opt_map) {
+    ULLONG_FOR_EACH_1 (i, mask->present.map) {
         const struct tun_metadata_loc *loc;
         union mf_value opts;
         union mf_value mask_opts;
@@ -842,7 +873,7 @@ tun_metadata_match_format(struct ds *s, const struct match 
*match)
     const struct tun_metadata *mask = &match->wc.masks.tunnel.metadata;
     unsigned int i;
 
-    ULLONG_FOR_EACH_1 (i, mask->opt_map) {
+    ULLONG_FOR_EACH_1 (i, mask->present.map) {
         const struct tun_metadata_loc *loc;
         union mf_value opts;
 
@@ -860,3 +891,16 @@ tun_metadata_match_format(struct ds *s, const struct match 
*match)
         ds_put_char(s, ',');
     }
 }
+
+const struct nlattr *
+tun_metadata_find_geneve_key(const struct nlattr *key, uint32_t key_len)
+{
+    const struct nlattr *tnl_key;
+
+    tnl_key = nl_attr_find__(key, key_len, OVS_KEY_ATTR_TUNNEL);
+    if (!tnl_key) {
+        return NULL;
+    }
+
+    return nl_attr_find_nested(tnl_key, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS);
+}
diff --git a/lib/tun-metadata.h b/lib/tun-metadata.h
index 56bdf2a..b67367e 100644
--- a/lib/tun-metadata.h
+++ b/lib/tun-metadata.h
@@ -37,18 +37,34 @@ struct geneve_opt;
 
 /* Tunnel option data, plus metadata to aid in their interpretation.
  *
- * 'opt_map' is indexed by type, that is, by the <i> in TUN_METADATA<i>, so
- * that e.g. TUN_METADATA5 is present if 'opt_map & (1ULL << 5)' is nonzero.
+ * The option data is used in different ways, depending on the code path:
+ *
+ * In places where we are doing per-packet fast path processing (i.e. userspace
+ * datapath), 'opts' is raw packet data from the tunnel header and 
'present.len'
+ * indicates the length of the data stored there. In these situations, 'tab'
+ * is always NULL.
+ *
+ * In all other cases, we are doing flow-based processing (such as during
+ * upcalls) and options are reordered into pre-allocated locations.
+ * 'present.map' is indexed by type, that is, by the <i> in TUN_METADATA<i>, so
+ * that e.g. TUN_METADATA5 is present if 'present.map & (1ULL << 5)' is 
nonzero.
  * The actual data for TUN_METADATA5, if present, might be anywhere in 'opts'
  * (not necessarily even contiguous), and finding it requires referring to
  * 'tab'. */
 struct tun_metadata {
-    uint64_t opt_map;                        /* 1-bit for each present TLV. */
-    uint8_t opts[TUN_METADATA_TOT_OPT_SIZE]; /* Values from tunnel TLVs. */
+    union { /* Valid members of 'opts'. When 'opts' is sorted into known types,
+             * 'map' is used. When 'opts' is raw packet data, 'len' is used. */
+        uint64_t map;                      /* 1-bit for each present TLV. */
+        uint8_t len;                       /* Length of data in 'opts'. */
+    } present;
     struct tun_table *tab;      /* Types & lengths for 'opts' and 'opt_map'. */
     uint8_t pad[sizeof(uint64_t) - sizeof(struct tun_table *)]; /* Make 8 
bytes */
+    union {
+        uint8_t u8[TUN_METADATA_TOT_OPT_SIZE]; /* Values from tunnel TLVs. */
+        struct geneve_opt gnv[GENEVE_TOT_OPT_SIZE / sizeof(struct geneve_opt)];
+    } opts;
 };
-BUILD_ASSERT_DECL(sizeof(((struct tun_metadata *)0)->opt_map) * 8 >=
+BUILD_ASSERT_DECL(sizeof(((struct tun_metadata *)0)->present.map) * 8 >=
                   TUN_METADATA_NUM_OPTS);
 
 /* The location of an option can be stored either as a single offset/len
@@ -96,8 +112,9 @@ int tun_metadata_from_geneve_nlattr(const struct nlattr 
*attr,
                                     size_t flow_attr_len,
                                     const struct tun_metadata *flow_metadata,
                                     struct tun_metadata *metadata);
-int tun_metadata_from_geneve_header(const struct geneve_opt *, int opt_len,
-                                    struct tun_metadata *metadata);
+int tun_metadata_from_geneve_header(const struct tun_metadata *flow,
+                                    struct tun_metadata *metadata,
+                                    struct tun_metadata *orig);
 
 void tun_metadata_to_geneve_nlattr_flow(const struct tun_metadata *flow,
                                         struct ofpbuf *);
@@ -105,11 +122,19 @@ void tun_metadata_to_geneve_nlattr_mask(const struct 
ofpbuf *key,
                                         const struct tun_metadata *mask,
                                         const struct tun_metadata *flow,
                                         struct ofpbuf *);
+
 int tun_metadata_to_geneve_header(const struct tun_metadata *flow,
                                   struct geneve_opt *, bool *crit_opt);
+void tun_metadata_to_geneve_header_mask(const struct tun_metadata *flow,
+                                        const struct tun_metadata *mask,
+                                        const struct geneve_opt *flow_opt,
+                                        struct geneve_opt *opt, int opts_len);
 
 void tun_metadata_to_nx_match(struct ofpbuf *b, enum ofp_version oxm,
                               const struct match *);
 void tun_metadata_match_format(struct ds *, const struct match *);
 
+const struct nlattr *tun_metadata_find_geneve_key(const struct nlattr *key,
+                                                  uint32_t key_len);
+
 #endif /* tun-metadata.h */
diff --git a/tests/tunnel-push-pop.at b/tests/tunnel-push-pop.at
index bd95c8e..0f1724a 100644
--- a/tests/tunnel-push-pop.at
+++ b/tests/tunnel-push-pop.at
@@ -132,7 +132,7 @@ AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port  5'], 
[0], [dnl
   port  5: rx pkts=1, bytes=98, drop=0, errs=0, frame=0, over=0, crc=0
 ])
 AT_CHECK([ovs-appctl dpif/dump-flows int-br], [0], [dnl
-tunnel(tun_id=0x7b,src=1.1.2.92,dst=1.1.2.88,ttl=64,geneve({class=0xffff,type=0x80,len=4,0xa/0xf}),flags(-df-csum+key)),skb_mark(0),recirc_id(0),in_port(6081),eth_type(0x0800),ipv4(frag=no),
 packets:0, bytes:0, used:never, actions:drop
+tunnel(tun_id=0x7b,src=1.1.2.92,dst=1.1.2.88,ttl=64,geneve({class=0xffff,type=0x80,len=4,0xa/0xf}{class=0xffff,type=0,len=4}),flags(-df-csum+key)),skb_mark(0),recirc_id(0),in_port(6081),eth_type(0x0800),ipv4(frag=no),
 packets:0, bytes:0, used:never, actions:drop
 ])
 
 OVS_VSWITCHD_STOP
-- 
2.1.4

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to