> -----Original Message----- > From: dev [mailto:dev-boun...@openvswitch.org] On Behalf Of Jesse Gross > Sent: Thursday, July 30, 2015 4:10 AM > To: dev@openvswitch.org > Subject: [ovs-dev] [PATCH 2/2] dpif-netdev: Translate Geneve options per- > flow, not per-packet. > > The kernel implementation of Geneve options stores the TLV option > data in the flow exactly as received, without any further parsing. > This is then translated to known options for the purposes of matching > on flow setup (which will then install a datapath flow in the form > the kernel is expecting). > > The userspace implementation behaves a little bit differently - it > looks up known options as each packet is received. The reason for this > is there is a much tighter coupling between datapath and flow translation > and the representation is generally expected to be the same. This works > but it incurs work on a per-packet basis that could be done per-flow > instead. > > This introduces a small translation step for Geneve packets between > datapath and flow lookup for the userspace datapath in order to > allow the same kind of processing that the kernel does. > > There is a second benefit to this as well: for some operations it is > preferable to keep the options exactly as they were received on the wire, > which this enables. One example is that for packets that are executed from > ofproto-dpif-upcall to the datapath, this avoids the translation of > Geneve metadata. Since this conversion is potentially lossy (for unknown > options), keeping everything in the same format removes the possibility > of dropping options if the packet comes back up to userspace and the > Geneve option translation table has changed. To help with these types of > operations, most functions can understand both formats of data and > seamlessly > do the right thing.
I tested std bi-directional phy-phy flows with dpdk to see if this affected performance for them and it looks to be fine - same performance with and without this patch. In general, my performance is down a few % from a couple of weeks ago but I think it's something in my setup. > > Signed-off-by: Jesse Gross <je...@nicira.com> > --- > lib/automake.mk | 1 + > lib/dpif-netdev.c | 55 ++++++- > lib/flow.c | 48 ++++-- > lib/flow.h | 13 +- > lib/geneve.h | 63 ++++++++ > lib/meta-flow.c | 6 +- > lib/netdev-vport.c | 26 ++-- > lib/odp-execute.c | 2 +- > lib/odp-util.c | 58 ++++--- > lib/odp-util.h | 12 +- > lib/packets.h | 41 +---- > lib/tun-metadata.c | 352 ++++++++++++++++++++++++++++++--------- > --- > lib/tun-metadata.h | 74 ++++++--- > ofproto/ofproto-dpif-sflow.c | 2 +- > ofproto/ofproto-dpif-upcall.c | 2 +- > tests/tunnel-push-pop.at | 2 +- > 16 files changed, 534 insertions(+), 223 deletions(-) > create mode 100644 lib/geneve.h > > diff --git a/lib/automake.mk b/lib/automake.mk > index faca968..5b6e9e8 100644 > --- a/lib/automake.mk > +++ b/lib/automake.mk > @@ -81,6 +81,7 @@ lib_libopenvswitch_la_SOURCES = \ > lib/fatal-signal.h \ > lib/flow.c \ > lib/flow.h \ > + lib/geneve.h \ > lib/guarded-list.c \ > lib/guarded-list.h \ > lib/hash.c \ > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index f587df5..c31a7e0 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -1884,8 +1884,8 @@ dpif_netdev_mask_from_nlattrs(const struct nlattr > *key, uint32_t key_len, > if (mask_key_len) { > enum odp_key_fitness fitness; > > - fitness = odp_flow_key_to_mask(mask_key, mask_key_len, key, > key_len, > - &wc->masks, flow); > + fitness = odp_flow_key_to_mask_udpif(mask_key, mask_key_len, key, > + key_len, &wc->masks, flow); > if (fitness) { > /* This should not happen: it indicates that > * odp_flow_key_from_mask() and odp_flow_key_to_mask() > @@ -1919,7 +1919,7 @@ dpif_netdev_flow_from_nlattrs(const struct nlattr > *key, uint32_t key_len, > { > odp_port_t in_port; > > - if (odp_flow_key_to_flow(key, key_len, flow)) { > + if (odp_flow_key_to_flow_udpif(key, key_len, flow)) { > /* This should not happen: it indicates that > odp_flow_key_from_flow() > * and odp_flow_key_to_flow() disagree on the acceptable form of a > * flow. Log the problem as an error, with enough details to > enable > @@ -3014,11 +3014,25 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, > struct dp_packet *packet_, > struct ofpbuf *actions, struct ofpbuf *put_actions) > { > struct dp_netdev *dp = pmd->dp; > + struct flow_tnl orig_tunnel; > + int err; > > if (OVS_UNLIKELY(!dp->upcall_cb)) { > return ENODEV; > } > > + orig_tunnel.flags = flow->tunnel.flags; > + if (flow->tunnel.flags & FLOW_TNL_F_UDPIF) { > + orig_tunnel.metadata.present.len = flow- > >tunnel.metadata.present.len; > + memcpy(orig_tunnel.metadata.opts.gnv, flow- > >tunnel.metadata.opts.gnv, > + flow->tunnel.metadata.present.len); > + err = tun_metadata_from_geneve_udpif(&orig_tunnel, &orig_tunnel, > + &flow->tunnel); > + if (err) { > + return err; > + } > + } > + > if (OVS_UNLIKELY(!VLOG_DROP_DBG(&upcall_rl))) { > struct ds ds = DS_EMPTY_INITIALIZER; > char *packet_str; > @@ -3046,8 +3060,39 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, > struct dp_packet *packet_, > ds_destroy(&ds); > } > > - return dp->upcall_cb(packet_, flow, ufid, pmd->core_id, type, userdata, > - actions, wc, put_actions, dp->upcall_aux); > + err = dp->upcall_cb(packet_, flow, ufid, pmd->core_id, type, userdata, > + actions, wc, put_actions, dp->upcall_aux); > + if (err && err != ENOSPC) { > + return err; > + } > + > + if (wc) { > + if (wc->masks.tunnel.metadata.present.map) { > + struct geneve_opt opts[GENEVE_TOT_OPT_SIZE / > + sizeof(struct geneve_opt)]; > + > + tun_metadata_to_geneve_udpif_mask(&flow->tunnel, > + &wc->masks.tunnel, > + > orig_tunnel.metadata.opts.gnv, > + > orig_tunnel.metadata.present.len, > + opts); > + > + memset(&wc->masks.tunnel.metadata, 0, > + sizeof wc->masks.tunnel.metadata); > + memcpy(&wc->masks.tunnel.metadata.opts.gnv, opts, > + orig_tunnel.metadata.present.len); > + } > + wc->masks.tunnel.metadata.present.len = 0xff; > + } > + > + if (orig_tunnel.flags & FLOW_TNL_F_UDPIF) { > + memcpy(&flow->tunnel.metadata.opts.gnv, > orig_tunnel.metadata.opts.gnv, > + orig_tunnel.metadata.present.len); > + flow->tunnel.metadata.present.len = > orig_tunnel.metadata.present.len; > + flow->tunnel.flags |= FLOW_TNL_F_UDPIF; > + } > + > + return err; > } > > static inline uint32_t > diff --git a/lib/flow.c b/lib/flow.c > index 352e9b8..d3d25e4 100644 > --- a/lib/flow.c > +++ b/lib/flow.c > @@ -462,9 +462,22 @@ miniflow_extract(struct dp_packet *packet, struct > miniflow *dst) > miniflow_push_words(mf, tunnel, &md->tunnel, > offsetof(struct flow_tnl, metadata) / > sizeof(uint64_t)); > - if (md->tunnel.metadata.opt_map) { > - miniflow_push_words(mf, tunnel.metadata, &md->tunnel.metadata, > - sizeof md->tunnel.metadata / > sizeof(uint64_t)); > + > + if (!(md->tunnel.flags & FLOW_TNL_F_UDPIF)) { > + if (md->tunnel.metadata.present.map) { > + miniflow_push_words(mf, tunnel.metadata, &md- > >tunnel.metadata, > + sizeof md->tunnel.metadata / > + sizeof(uint64_t)); > + } > + } else { > + if (md->tunnel.metadata.present.len) { > + miniflow_push_words(mf, tunnel.metadata.present, > + &md->tunnel.metadata.present, 1); > + miniflow_push_words(mf, tunnel.metadata.opts.gnv, > + md->tunnel.metadata.opts.gnv, > + DIV_ROUND_UP(md- > >tunnel.metadata.present.len, > + sizeof(uint64_t))); > + } > } > } > if (md->skb_priority || md->pkt_mark) { > @@ -815,7 +828,7 @@ flow_get_metadata(const struct flow *flow, struct match > *flow_metadata) > if (flow->tunnel.gbp_flags) { > match_set_tun_gbp_flags(flow_metadata, flow->tunnel.gbp_flags); > } > - tun_metadata_get_fmd(&flow->tunnel.metadata, flow_metadata); > + tun_metadata_get_fmd(&flow->tunnel, flow_metadata); > if (flow->metadata != htonll(0)) { > match_set_metadata(flow_metadata, flow->metadata); > } > @@ -1161,9 +1174,16 @@ void flow_wildcards_init_for_packet(struct > flow_wildcards *wc, > WC_MASK_FIELD(wc, tunnel.gbp_id); > WC_MASK_FIELD(wc, tunnel.gbp_flags); > > - if (flow->tunnel.metadata.opt_map) { > - wc->masks.tunnel.metadata.opt_map = flow- > >tunnel.metadata.opt_map; > - WC_MASK_FIELD(wc, tunnel.metadata.opts); > + if (!(flow->tunnel.flags & FLOW_TNL_F_UDPIF)) { > + if (flow->tunnel.metadata.present.map) { > + wc->masks.tunnel.metadata.present.map = > + flow- > >tunnel.metadata.present.map; > + WC_MASK_FIELD(wc, tunnel.metadata.opts.u8); > + } > + } else { > + WC_MASK_FIELD(wc, tunnel.metadata.present.len); > + memset(wc->masks.tunnel.metadata.opts.gnv, 0xff, > + flow->tunnel.metadata.present.len); > } > } else if (flow->tunnel.tun_id) { > WC_MASK_FIELD(wc, tunnel.tun_id); > @@ -1253,9 +1273,17 @@ flow_wc_map(const struct flow *flow, struct miniflow > *map) > > map->tnl_map = 0; > if (flow->tunnel.ip_dst) { > - map->tnl_map = MINIFLOW_TNL_MAP(tunnel); > - if (!flow->tunnel.metadata.opt_map) { > - map->tnl_map &= ~MINIFLOW_TNL_MAP(tunnel.metadata); > + map->tnl_map |= MINIFLOW_TNL_MAP__(tunnel, > + offsetof(struct flow_tnl, > metadata)); > + if (!(flow->tunnel.flags & FLOW_TNL_F_UDPIF)) { > + if (flow->tunnel.metadata.present.map) { > + map->tnl_map |= MINIFLOW_TNL_MAP__(tunnel.metadata, > + sizeof(flow- > >tunnel.metadata)); > + } > + } else { > + map->tnl_map |= MINIFLOW_TNL_MAP(tunnel.metadata.present.len); > + map->tnl_map |= MINIFLOW_TNL_MAP__(tunnel.metadata.opts.gnv, > + flow- > >tunnel.metadata.present.len); > } > } > > diff --git a/lib/flow.h b/lib/flow.h > index 96aa4aa..5bc9267 100644 > --- a/lib/flow.h > +++ b/lib/flow.h > @@ -80,6 +80,12 @@ BUILD_ASSERT_DECL(FLOW_TNL_F_OAM == NX_TUN_FLAG_OAM); > > #define FLOW_TNL_F_MASK ((1 << 4) - 1) > > +/* Purely internal to OVS userspace. These flags should never be exposed to > + * the outside world and so aren't included in the flags mask. */ > + > +/* Tunnel information is in userspace datapath format. */ > +#define FLOW_TNL_F_UDPIF (1 << 4) > + > const char *flow_tun_flag_to_string(uint32_t flags); > > /* Maximum number of supported MPLS labels. */ > @@ -518,9 +524,12 @@ flow_values_get_next_in_maps(struct > flow_for_each_in_maps_aux *aux, > #define FLOW_U64_SIZE(FIELD) \ > DIV_ROUND_UP(sizeof(((struct flow *)0)->FIELD), sizeof(uint64_t)) > > -#define MINIFLOW_TNL_MAP(FIELD) \ > - (((UINT64_C(1) << FLOW_U64_SIZE(FIELD)) - 1) \ > +#define MINIFLOW_TNL_MAP__(FIELD, LEN) \ > + (((UINT64_C(1) << DIV_ROUND_UP(LEN, sizeof(uint64_t))) - 1) \ > << (offsetof(struct flow, FIELD) / sizeof(uint64_t))) > + > +#define MINIFLOW_TNL_MAP(FIELD) \ > + MINIFLOW_TNL_MAP__(FIELD, sizeof(((struct flow *)0)->FIELD)) > #define MINIFLOW_PKT_MAP(FIELD) \ > (((UINT64_C(1) << FLOW_U64_SIZE(FIELD)) - 1) \ > << ((offsetof(struct flow, FIELD) / sizeof(uint64_t)) - > FLOW_TNL_U64S)) > diff --git a/lib/geneve.h b/lib/geneve.h > new file mode 100644 > index 0000000..f0256b1 > --- /dev/null > +++ b/lib/geneve.h > @@ -0,0 +1,63 @@ > +/* > + * Copyright (c) 2015 Nicira, Inc. > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at: > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +#ifndef GENEVE_H > +#define GENEVE_H 1 > + > +#include "openvswitch/types.h" > + > +#define GENEVE_MAX_OPT_SIZE 124 > +#define GENEVE_TOT_OPT_SIZE 252 > + > +#define GENEVE_CRIT_OPT_TYPE (1 << 7) > + > +struct geneve_opt { > + ovs_be16 opt_class; > + uint8_t type; > +#ifdef WORDS_BIGENDIAN > + uint8_t r1:1; > + uint8_t r2:1; > + uint8_t r3:1; > + uint8_t length:5; > +#else > + uint8_t length:5; > + uint8_t r3:1; > + uint8_t r2:1; > + uint8_t r1:1; > +#endif > + /* Option data */ > +}; > + > +struct genevehdr { > +#ifdef WORDS_BIGENDIAN > + uint8_t ver:2; > + uint8_t opt_len:6; > + uint8_t oam:1; > + uint8_t critical:1; > + uint8_t rsvd1:6; > +#else > + uint8_t opt_len:6; > + uint8_t ver:2; > + uint8_t rsvd1:6; > + uint8_t critical:1; > + uint8_t oam:1; > +#endif > + ovs_be16 proto_type; > + ovs_16aligned_be32 vni; > + struct geneve_opt options[]; > +}; > + > +#endif /* geneve.h */ > diff --git a/lib/meta-flow.c b/lib/meta-flow.c > index 0c01414..4c7cf2c 100644 > --- a/lib/meta-flow.c > +++ b/lib/meta-flow.c > @@ -196,7 +196,7 @@ mf_is_all_wild(const struct mf_field *mf, const struct > flow_wildcards *wc) > CASE_MFF_TUN_METADATA: { > union mf_value value; > > - tun_metadata_read(&wc->masks.tunnel.metadata, mf, &value); > + tun_metadata_read(&wc->masks.tunnel, mf, &value); > return is_all_zeros(&value.tun_metadata, mf->n_bytes); > } > case MFF_METADATA: > @@ -616,7 +616,7 @@ mf_get_value(const struct mf_field *mf, const struct > flow *flow, > value->u8 = flow->tunnel.ip_tos; > break; > CASE_MFF_TUN_METADATA: > - tun_metadata_read(&flow->tunnel.metadata, mf, value); > + tun_metadata_read(&flow->tunnel, mf, value); > break; > > case MFF_METADATA: > @@ -1119,7 +1119,7 @@ mf_set_flow_value(const struct mf_field *mf, > flow->tunnel.ip_ttl = value->u8; > break; > CASE_MFF_TUN_METADATA: > - tun_metadata_write(&flow->tunnel.metadata, mf, value); > + tun_metadata_write(&flow->tunnel, mf, value); > break; > case MFF_METADATA: > flow->metadata = value->be64; > diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c > index a3394dd..a0e53b8 100644 > --- a/lib/netdev-vport.c > +++ b/lib/netdev-vport.c > @@ -1054,11 +1054,10 @@ parse_gre_header(struct dp_packet *packet, > static void > pkt_metadata_init_tnl(struct pkt_metadata *md) > { > - memset(md, 0, offsetof(struct pkt_metadata, tunnel.metadata)); > - > - /* If 'opt_map' is zero then none of the rest of the tunnel metadata > - * will be read, so we can skip clearing it. */ > - md->tunnel.metadata.opt_map = 0; > + /* Zero up through the tunnel metadata options. The length and table > + * are before this and as long as they are empty, the options won't > + * be looked at. */ > + memset(md, 0, offsetof(struct pkt_metadata, tunnel.metadata.opts)); > } > > static int > @@ -1208,8 +1207,7 @@ netdev_geneve_pop_header(struct dp_packet *packet) > struct pkt_metadata *md = &packet->md; > struct flow_tnl *tnl = &md->tunnel; > struct genevehdr *gnh; > - unsigned int hlen; > - int err; > + unsigned int hlen, opts_len; > > pkt_metadata_init_tnl(md); > if (GENEVE_BASE_HLEN > dp_packet_size(packet)) { > @@ -1223,7 +1221,8 @@ netdev_geneve_pop_header(struct dp_packet *packet) > return EINVAL; > } > > - hlen = GENEVE_BASE_HLEN + gnh->opt_len * 4; > + opts_len = gnh->opt_len * 4; > + hlen = GENEVE_BASE_HLEN + opts_len; > if (hlen > dp_packet_size(packet)) { > VLOG_WARN_RL(&err_rl, "geneve packet too small: header len=%u > packet size=%u\n", > hlen, dp_packet_size(packet)); > @@ -1245,12 +1244,9 @@ netdev_geneve_pop_header(struct dp_packet *packet) > tnl->tun_id = htonll(ntohl(get_16aligned_be32(&gnh->vni)) >> 8); > tnl->flags |= FLOW_TNL_F_KEY; > > - err = tun_metadata_from_geneve_header(gnh->options, gnh->opt_len * 4, > - &tnl->metadata); > - if (err) { > - VLOG_WARN_RL(&err_rl, "invalid geneve options"); > - return err; > - } > + memcpy(tnl->metadata.opts.gnv, gnh->options, opts_len); > + tnl->metadata.present.len = opts_len; > + tnl->flags |= FLOW_TNL_F_UDPIF; > > dp_packet_reset_packet(packet, hlen); > > @@ -1278,7 +1274,7 @@ netdev_geneve_build_header(const struct netdev > *netdev, > > ovs_mutex_unlock(&dev->mutex); > > - opt_len = tun_metadata_to_geneve_header(&tnl_flow->tunnel.metadata, > + opt_len = tun_metadata_to_geneve_header(&tnl_flow->tunnel, > gnh->options, &crit_opt); > > gnh->opt_len = opt_len / 4; > diff --git a/lib/odp-execute.c b/lib/odp-execute.c > index c676451..c4806e1 100644 > --- a/lib/odp-execute.c > +++ b/lib/odp-execute.c > @@ -151,7 +151,7 @@ odp_set_tunnel_action(const struct nlattr *a, struct > flow_tnl *tun_key) > { > enum odp_key_fitness fitness; > > - fitness = odp_tun_key_from_attr(a, tun_key); > + fitness = odp_tun_key_from_attr(a, true, tun_key); > ovs_assert(fitness != ODP_FIT_ERROR); > } > > diff --git a/lib/odp-util.c b/lib/odp-util.c > index eec0bfb..f142f03 100644 > --- a/lib/odp-util.c > +++ b/lib/odp-util.c > @@ -1264,7 +1264,8 @@ ovs_frag_type_to_string(enum ovs_frag_type type) > static enum odp_key_fitness > odp_tun_key_from_attr__(const struct nlattr *attr, > const struct nlattr *flow_attrs, size_t > flow_attr_len, > - const struct flow_tnl *src_tun, struct flow_tnl > *tun) > + const struct flow_tnl *src_tun, struct flow_tnl > *tun, > + bool udpif) > { > unsigned int left; > const struct nlattr *a; > @@ -1335,8 +1336,7 @@ odp_tun_key_from_attr__(const struct nlattr *attr, > } > case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: > if (tun_metadata_from_geneve_nlattr(a, flow_attrs, > flow_attr_len, > - &src_tun->metadata, > - &tun->metadata)) { > + src_tun, udpif, tun)) { > return ODP_FIT_ERROR; > } > break; > @@ -1359,10 +1359,11 @@ odp_tun_key_from_attr__(const struct nlattr *attr, > } > > enum odp_key_fitness > -odp_tun_key_from_attr(const struct nlattr *attr, struct flow_tnl *tun) > +odp_tun_key_from_attr(const struct nlattr *attr, bool udpif, > + struct flow_tnl *tun) > { > memset(tun, 0, sizeof *tun); > - return odp_tun_key_from_attr__(attr, NULL, 0, NULL, tun); > + return odp_tun_key_from_attr__(attr, NULL, 0, NULL, tun, udpif); > } > > static void > @@ -1411,13 +1412,7 @@ tun_key_to_attr(struct ofpbuf *a, const struct > flow_tnl *tun_key, > (tun_key->gbp_flags << 16) | ntohs(tun_key- > >gbp_id)); > nl_msg_end_nested(a, vxlan_opts_ofs); > } > - > - if (tun_key == tun_flow_key) { > - tun_metadata_to_geneve_nlattr_flow(&tun_key->metadata, a); > - } else { > - tun_metadata_to_geneve_nlattr_mask(key_buf, &tun_key->metadata, > - &tun_flow_key->metadata, a); > - } > + tun_metadata_to_geneve_nlattr(tun_key, tun_flow_key, key_buf, a); > > nl_msg_end_nested(a, tun_key_ofs); > } > @@ -3597,7 +3592,7 @@ odp_key_to_pkt_metadata(const struct nlattr *key, > size_t key_len, > case OVS_KEY_ATTR_TUNNEL: { > enum odp_key_fitness res; > > - res = odp_tun_key_from_attr(nla, &md->tunnel); > + res = odp_tun_key_from_attr(nla, true, &md->tunnel); > if (res == ODP_FIT_ERROR) { > memset(&md->tunnel, 0, sizeof md->tunnel); > } else if (res == ODP_FIT_PERFECT) { > @@ -4107,7 +4102,8 @@ parse_8021q_onward(const struct nlattr > *attrs[OVS_KEY_ATTR_MAX + 1], > static enum odp_key_fitness > odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len, > const struct nlattr *src_key, size_t src_key_len, > - struct flow *flow, const struct flow *src_flow) > + struct flow *flow, const struct flow *src_flow, > + bool udpif) > { > const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1]; > uint64_t expected_attrs; > @@ -4150,9 +4146,10 @@ odp_flow_key_to_flow__(const struct nlattr *key, > size_t key_len, > if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_TUNNEL)) { > enum odp_key_fitness res; > > - res = odp_tun_key_from_attr__(attrs[OVS_KEY_ATTR_TUNNEL], src_key, > + res = odp_tun_key_from_attr__(attrs[OVS_KEY_ATTR_TUNNEL], > + is_mask ? src_key : NULL, > src_key_len, &src_flow->tunnel, > - &flow->tunnel); > + &flow->tunnel, udpif); > if (res == ODP_FIT_ERROR) { > return ODP_FIT_ERROR; > } else if (res == ODP_FIT_PERFECT) { > @@ -4224,7 +4221,7 @@ enum odp_key_fitness > odp_flow_key_to_flow(const struct nlattr *key, size_t key_len, > struct flow *flow) > { > - return odp_flow_key_to_flow__(key, key_len, NULL, 0, flow, flow); > + return odp_flow_key_to_flow__(key, key_len, NULL, 0, flow, flow, false); > } > > /* Converts the 'mask_key_len' bytes of OVS_KEY_ATTR_* attributes in > 'mask_key' > @@ -4238,7 +4235,32 @@ odp_flow_key_to_mask(const struct nlattr *mask_key, > size_t mask_key_len, > struct flow *mask, const struct flow *flow) > { > return odp_flow_key_to_flow__(mask_key, mask_key_len, flow_key, > flow_key_len, > - mask, flow); > + mask, flow, false); > +} > + > +/* These functions are similar to their non-"_udpif" variants but output a > + * 'flow' that is suitable for fast-path packet processing. > + * > + * Some fields have different representation for flow setup and per- > + * packet processing (i.e. different between ofproto-dpif and userspace > + * datapath). In particular, with the non-"_udpif" functions, struct > + * tun_metadata is in the per-flow format (using 'present.map' and > 'opts.u8'); > + * with these functions, struct tun_metadata is in the per-packet format > + * (using 'present.len' and 'opts.gnv'). */ > +enum odp_key_fitness > +odp_flow_key_to_flow_udpif(const struct nlattr *key, size_t key_len, > + struct flow *flow) > +{ > + return odp_flow_key_to_flow__(key, key_len, NULL, 0, flow, flow, true); > +} > + > +enum odp_key_fitness > +odp_flow_key_to_mask_udpif(const struct nlattr *mask_key, size_t > mask_key_len, > + const struct nlattr *flow_key, size_t > flow_key_len, > + struct flow *mask, const struct flow *flow) > +{ > + return odp_flow_key_to_flow__(mask_key, mask_key_len, flow_key, > flow_key_len, > + mask, flow, true); > } > > /* Returns 'fitness' as a string, for use in debug messages. */ > diff --git a/lib/odp-util.h b/lib/odp-util.h > index 1eaa06b..bc27794 100644 > --- a/lib/odp-util.h > +++ b/lib/odp-util.h > @@ -144,7 +144,7 @@ struct odputil_keybuf { > uint32_t keybuf[DIV_ROUND_UP(ODPUTIL_FLOW_KEY_BYTES, 4)]; > }; > > -enum odp_key_fitness odp_tun_key_from_attr(const struct nlattr *, > +enum odp_key_fitness odp_tun_key_from_attr(const struct nlattr *, bool > udpif, > struct flow_tnl *); > > int odp_ufid_from_string(const char *s_, ovs_u128 *ufid); > @@ -225,6 +225,16 @@ enum odp_key_fitness odp_flow_key_to_mask(const struct > nlattr *mask_key, > size_t flow_key_len, > struct flow *mask, > const struct flow *flow); > + > +enum odp_key_fitness odp_flow_key_to_flow_udpif(const struct nlattr *, > size_t, > + struct flow *); > +enum odp_key_fitness odp_flow_key_to_mask_udpif(const struct nlattr > *mask_key, > + size_t mask_key_len, > + const struct nlattr > *flow_key, > + size_t flow_key_len, > + struct flow *mask, > + const struct flow *flow); > + > const char *odp_key_fitness_to_string(enum odp_key_fitness); > > void commit_odp_tunnel_action(const struct flow *, struct flow *base, > diff --git a/lib/packets.h b/lib/packets.h > index c709af5..38af37b 100644 > --- a/lib/packets.h > +++ b/lib/packets.h > @@ -23,6 +23,7 @@ > #include <stdint.h> > #include <string.h> > #include "compiler.h" > +#include "geneve.h" > #include "openvswitch/types.h" > #include "random.h" > #include "hash.h" > @@ -802,46 +803,6 @@ static inline bool dl_type_is_ip_any(ovs_be16 dl_type) > } > > /* Tunnel header */ > -#define GENEVE_MAX_OPT_SIZE 124 > -#define GENEVE_TOT_OPT_SIZE 252 > - > -#define GENEVE_CRIT_OPT_TYPE (1 << 7) > - > -struct geneve_opt { > - ovs_be16 opt_class; > - uint8_t type; > -#ifdef WORDS_BIGENDIAN > - uint8_t r1:1; > - uint8_t r2:1; > - uint8_t r3:1; > - uint8_t length:5; > -#else > - uint8_t length:5; > - uint8_t r3:1; > - uint8_t r2:1; > - uint8_t r1:1; > -#endif > - /* Option data */ > -}; > - > -struct genevehdr { > -#ifdef WORDS_BIGENDIAN > - uint8_t ver:2; > - uint8_t opt_len:6; > - uint8_t oam:1; > - uint8_t critical:1; > - uint8_t rsvd1:6; > -#else > - uint8_t opt_len:6; > - uint8_t ver:2; > - uint8_t rsvd1:6; > - uint8_t critical:1; > - uint8_t oam:1; > -#endif > - ovs_be16 proto_type; > - ovs_16aligned_be32 vni; > - struct geneve_opt options[]; > -}; > > /* GRE protocol header */ > struct gre_base_hdr { > diff --git a/lib/tun-metadata.c b/lib/tun-metadata.c > index 7d82fb7..216d5e4 100644 > --- a/lib/tun-metadata.c > +++ b/lib/tun-metadata.c > @@ -226,7 +226,7 @@ tun_metadata_table_request(struct > ofputil_geneve_table_reply *gtr) > } > } > > -/* Copies the value of field 'mf' from 'metadata' into 'value'. > +/* Copies the value of field 'mf' from 'tnl' (which must be in non-UDPIF > format) * into 'value'. > * > * 'mf' must be an MFF_TUN_METADATA* field. > * > @@ -234,7 +234,7 @@ tun_metadata_table_request(struct > ofputil_geneve_table_reply *gtr) > * tun_metadata_init(). If no such table has been created or if 'mf' > hasn't > * been allocated in it yet, this just zeros 'value'. */ > void > -tun_metadata_read(const struct tun_metadata *metadata, > +tun_metadata_read(const struct flow_tnl *tnl, > const struct mf_field *mf, union mf_value *value) > { > struct tun_table *map = ovsrcu_get(struct tun_table *, &metadata_tab); > @@ -250,10 +250,10 @@ tun_metadata_read(const struct tun_metadata *metadata, > > memset(value->tun_metadata, 0, mf->n_bytes - loc->len); > memcpy_from_metadata(value->tun_metadata + mf->n_bytes - loc->len, > - metadata, loc); > + &tnl->metadata, loc); > } > > -/* Copies 'value' into field 'mf' in 'metadata'. > +/* Copies 'value' into field 'mf' in 'tnl' (in non-UDPIF format). > * > * 'mf' must be an MFF_TUN_METADATA* field. > * > @@ -261,7 +261,7 @@ tun_metadata_read(const struct tun_metadata *metadata, > * tun_metadata_init(). If no such table has been created or if 'mf' > hasn't > * been allocated in it yet, this function does nothing. */ > void > -tun_metadata_write(struct tun_metadata *metadata, > +tun_metadata_write(struct flow_tnl *tnl, > const struct mf_field *mf, const union mf_value *value) > { > struct tun_table *map = ovsrcu_get(struct tun_table *, &metadata_tab); > @@ -274,9 +274,9 @@ tun_metadata_write(struct tun_metadata *metadata, > > loc = &map->entries[idx].loc; > > - ULLONG_SET1(metadata->opt_map, idx); > - memcpy_to_metadata(metadata, value->tun_metadata + mf->n_bytes - loc- > >len, > - loc); > + ULLONG_SET1(tnl->metadata.present.map, idx); > + memcpy_to_metadata(&tnl->metadata, > + value->tun_metadata + mf->n_bytes - loc->len, loc); > } > > static const struct tun_metadata_loc * > @@ -310,7 +310,7 @@ metadata_loc_from_match(struct tun_table *map, struct > match *match, > > /* Makes 'match' match 'value'/'mask' on field 'mf'. > * > - * 'mf' must be an MFF_TUN_METADATA* field. > + * 'mf' must be an MFF_TUN_METADATA* field. 'match' must be in non-UDPIF > format. > * > * If there is global tunnel metadata matching table, this function is > * effective only if there is already a mapping for 'mf'. Otherwise, the > @@ -334,6 +334,8 @@ tun_metadata_set_match(const struct mf_field *mf, const > union mf_value *value, > unsigned int data_offset; > union mf_value data; > > + ovs_assert(!(match->flow.tunnel.flags & FLOW_TNL_F_UDPIF)); > + > field_len = mf_field_len(mf, value, mask); > loc = metadata_loc_from_match(map, match, idx, field_len); > if (!loc) { > @@ -353,7 +355,7 @@ tun_metadata_set_match(const struct mf_field *mf, const > union mf_value *value, > mask->tun_metadata[data_offset + i]; > } > } > - ULLONG_SET1(match->flow.tunnel.metadata.opt_map, idx); > + ULLONG_SET1(match->flow.tunnel.metadata.present.map, idx); > memcpy_to_metadata(&match->flow.tunnel.metadata, data.tun_metadata, > loc); > > if (!value) { > @@ -363,31 +365,67 @@ tun_metadata_set_match(const struct mf_field *mf, > const union mf_value *value, > } else { > memcpy(data.tun_metadata, mask->tun_metadata + data_offset, loc- > >len); > } > - ULLONG_SET1(match->wc.masks.tunnel.metadata.opt_map, idx); > + ULLONG_SET1(match->wc.masks.tunnel.metadata.present.map, idx); > memcpy_to_metadata(&match->wc.masks.tunnel.metadata, data.tun_metadata, > loc); > } > > -/* Copies all MFF_TUN_METADATA* fields from 'metadata' to 'flow_metadata'. > */ > +static bool > +udpif_to_parsed(const struct flow_tnl *flow, const struct flow_tnl *mask, > + struct flow_tnl *flow_xlate, struct flow_tnl *mask_xlate) > +{ > + if (flow->flags & FLOW_TNL_F_UDPIF) { > + int err; > + > + err = tun_metadata_from_geneve_udpif(flow, flow, flow_xlate); > + if (err) { > + return false; > + } > + > + if (mask) { > + tun_metadata_from_geneve_udpif(flow, mask, mask_xlate); > + if (err) { > + return false; > + } > + } > + } else { > + if (flow->metadata.present.map == 0) { > + /* There is no tunnel metadata, don't bother copying. */ > + return false; > + } > + > + memcpy(flow_xlate, flow, sizeof *flow_xlate); > + if (mask) { > + memcpy(mask_xlate, mask, sizeof *mask_xlate); > + } > + > + if (!flow_xlate->metadata.tab) { > + flow_xlate->metadata.tab = ovsrcu_get(struct tun_table *, > + &metadata_tab); > + } > + } > + > + return true; > +} > + > +/* Copies all MFF_TUN_METADATA* fields from 'tnl' to 'flow_metadata'. */ > void > -tun_metadata_get_fmd(const struct tun_metadata *metadata, > - struct match *flow_metadata) > +tun_metadata_get_fmd(const struct flow_tnl *tnl, struct match > *flow_metadata) > { > - struct tun_table *map; > + struct flow_tnl flow; > int i; > > - map = metadata->tab; > - if (!map) { > - map = ovsrcu_get(struct tun_table *, &metadata_tab); > + if (!udpif_to_parsed(tnl, NULL, &flow, NULL)) { > + return; > } > > - ULLONG_FOR_EACH_1 (i, metadata->opt_map) { > + ULLONG_FOR_EACH_1 (i, flow.metadata.present.map) { > union mf_value opts; > - const struct tun_metadata_loc *old_loc = &map->entries[i].loc; > + const struct tun_metadata_loc *old_loc = &flow.metadata.tab- > >entries[i].loc; > const struct tun_metadata_loc *new_loc; > > new_loc = metadata_loc_from_match(NULL, flow_metadata, i, old_loc- > >len); > > - memcpy_from_metadata(opts.tun_metadata, metadata, old_loc); > + memcpy_from_metadata(opts.tun_metadata, &flow.metadata, old_loc); > memcpy_to_metadata(&flow_metadata->flow.tunnel.metadata, > opts.tun_metadata, new_loc); > > @@ -424,7 +462,7 @@ memcpy_to_metadata(struct tun_metadata *dst, const void > *src, > int addr = 0; > > while (chain) { > - memcpy(dst->opts + loc->c.offset + addr, (uint8_t *)src + addr, > + memcpy(dst->opts.u8 + loc->c.offset + addr, (uint8_t *)src + addr, > chain->len); > addr += chain->len; > chain = chain->next; > @@ -439,7 +477,7 @@ memcpy_from_metadata(void *dst, const struct > tun_metadata *src, > int addr = 0; > > while (chain) { > - memcpy((uint8_t *)dst + addr, src->opts + loc->c.offset + addr, > + memcpy((uint8_t *)dst + addr, src->opts.u8 + loc->c.offset + addr, > chain->len); > addr += chain->len; > chain = chain->next; > @@ -579,10 +617,21 @@ tun_metadata_del_entry(struct tun_table *map, uint8_t > idx) > } > > static int > -tun_metadata_from_geneve__(struct tun_table *map, const struct geneve_opt > *opt, > +tun_metadata_from_geneve__(const struct tun_metadata *flow_metadata, > + const struct geneve_opt *opt, > const struct geneve_opt *flow_opt, int opts_len, > struct tun_metadata *metadata) > { > + struct tun_table *map; > + bool is_mask = flow_opt != opt; > + > + if (!is_mask) { > + map = ovsrcu_get(struct tun_table *, &metadata_tab); > + metadata->tab = map; > + } else { > + map = flow_metadata->tab; > + } > + > if (!map) { > return 0; > } > @@ -606,7 +655,7 @@ tun_metadata_from_geneve__(struct tun_table *map, const > struct geneve_opt *opt, > if (entry) { > if (entry->loc.len == flow_opt->length * 4) { > memcpy_to_metadata(metadata, opt + 1, &entry->loc); > - ULLONG_SET1(metadata->opt_map, entry - map->entries); > + ULLONG_SET1(metadata->present.map, entry - map->entries); > } else { > return EINVAL; > } > @@ -622,59 +671,97 @@ tun_metadata_from_geneve__(struct tun_table *map, > const struct geneve_opt *opt, > return 0; > } > > +static const struct nlattr * > +tun_metadata_find_geneve_key(const struct nlattr *key, uint32_t key_len) > +{ > + const struct nlattr *tnl_key; > + > + tnl_key = nl_attr_find__(key, key_len, OVS_KEY_ATTR_TUNNEL); > + if (!tnl_key) { > + return NULL; > + } > + > + return nl_attr_find_nested(tnl_key, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS); > +} > + > +/* Converts from Geneve netlink attributes in 'attr' to tunnel metadata > + * in 'tun'. The result may either in be UDPIF format or not, as determined > + * by 'udpif'. > + * > + * In the event that a mask is being converted, it is also necessary to > + * pass in flow information. This includes the full set of netlink > attributes > + * (i.e. not just the Geneve attribute) in 'flow_attrs'/'flow_attr_len' and > + * the previously converted tunnel metadata 'flow_tun'. > + * > + * If a flow rather than mask is being converted, 'flow_attrs' must be > NULL. */ > int > tun_metadata_from_geneve_nlattr(const struct nlattr *attr, > const struct nlattr *flow_attrs, > size_t flow_attr_len, > - const struct tun_metadata *flow_metadata, > - struct tun_metadata *metadata) > + const struct flow_tnl *flow_tun, bool > udpif, > + struct flow_tnl *tun) > { > - struct tun_table *map; > bool is_mask = !!flow_attrs; > + int attr_len = nl_attr_get_size(attr); > const struct nlattr *flow; > > - if (is_mask) { > - const struct nlattr *tnl_key; > - int mask_len = nl_attr_get_size(attr); > + /* No need for real translation, just copy things over. */ > + if (udpif) { > + memcpy(tun->metadata.opts.gnv, nl_attr_get(attr), attr_len); > > - tnl_key = nl_attr_find__(flow_attrs, flow_attr_len, > OVS_KEY_ATTR_TUNNEL); > - if (!tnl_key) { > - return mask_len ? EINVAL : 0; > + if (!is_mask) { > + tun->metadata.present.len = attr_len; > + tun->flags |= FLOW_TNL_F_UDPIF; > + } else { > + /* We need to exact match on the length so we don't > + * accidentally match on sets of options that are the same > + * at the beginning but with additional options after. */ > + tun->metadata.present.len = 0xff; > } > > - flow = nl_attr_find_nested(tnl_key, > OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS); > + return 0; > + } > + > + if (is_mask) { > + flow = tun_metadata_find_geneve_key(flow_attrs, flow_attr_len); > if (!flow) { > - return mask_len ? EINVAL : 0; > + return attr_len ? EINVAL : 0; > } > > - if (mask_len != nl_attr_get_size(flow)) { > + if (attr_len != nl_attr_get_size(flow)) { > return EINVAL; > } > } else { > flow = attr; > } > > - if (!is_mask) { > - map = ovsrcu_get(struct tun_table *, &metadata_tab); > - metadata->tab = map; > - } else { > - map = flow_metadata->tab; > - } > - > - return tun_metadata_from_geneve__(map, nl_attr_get(attr), > nl_attr_get(flow), > - nl_attr_get_size(flow), metadata); > + return tun_metadata_from_geneve__(&flow_tun->metadata, > nl_attr_get(attr), > + nl_attr_get(flow), > nl_attr_get_size(flow), > + &tun->metadata); > } > > +/* Converts from the flat Geneve options representation extracted directly > + * from the tunnel header to the representation that maps options to > + * pre-allocated locations. The original version (in UDPIF form) is passed > + * in 'src' and the translated form in stored in 'dst'. To handle masks, > the > + * flow must also be passed in through 'flow' (in the original, raw form). > */ > int > -tun_metadata_from_geneve_header(const struct geneve_opt *opts, int opt_len, > - struct tun_metadata *metadata) > +tun_metadata_from_geneve_udpif(const struct flow_tnl *flow, > + const struct flow_tnl *src, > + struct flow_tnl *dst) > { > - struct tun_table *map; > - > - map = ovsrcu_get(struct tun_table *, &metadata_tab); > - metadata->tab = map; > + ovs_assert(flow->flags & FLOW_TNL_F_UDPIF); > > - return tun_metadata_from_geneve__(map, opts, opts, opt_len, metadata); > + if (flow == src) { > + dst->flags = flow->flags & ~FLOW_TNL_F_UDPIF; > + } else { > + dst->metadata.tab = NULL; > + } > + dst->metadata.present.map = 0; > + return tun_metadata_from_geneve__(&flow->metadata, src- > >metadata.opts.gnv, > + flow->metadata.opts.gnv, > + flow->metadata.present.len, > + &dst->metadata); > } > > static void > @@ -691,7 +778,7 @@ tun_metadata_to_geneve__(const struct tun_metadata > *flow, struct ofpbuf *b, > > *crit_opt = false; > > - ULLONG_FOR_EACH_1 (i, flow->opt_map) { > + ULLONG_FOR_EACH_1 (i, flow->present.map) { > struct tun_meta_entry *entry = &map->entries[i]; > struct geneve_opt *opt; > > @@ -709,14 +796,14 @@ tun_metadata_to_geneve__(const struct tun_metadata > *flow, struct ofpbuf *b, > } > } > > -void > -tun_metadata_to_geneve_nlattr_flow(const struct tun_metadata *flow, > +static void > +tun_metadata_to_geneve_nlattr_flow(const struct flow_tnl *flow, > struct ofpbuf *b) > { > size_t nlattr_offset; > bool crit_opt; > > - if (!flow->opt_map) { > + if (!flow->metadata.present.map) { > return; > } > > @@ -725,58 +812,43 @@ tun_metadata_to_geneve_nlattr_flow(const struct > tun_metadata *flow, > * similar enough that we can use the same mechanism. */ > nlattr_offset = nl_msg_start_nested(b, > OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS); > > - tun_metadata_to_geneve__(flow, b, &crit_opt); > + tun_metadata_to_geneve__(&flow->metadata, b, &crit_opt); > > nl_msg_end_nested(b, nlattr_offset); > } > > +/* Converts from processed tunnel metadata information (in non-udpif > + * format) in 'flow' to a stream of Geneve options suitable for > + * transmission in 'opts'. Additionally returns whether there were > + * any critical options in 'crit_opt' as well as the total length of > + * data. */ > int > -tun_metadata_to_geneve_header(const struct tun_metadata *flow, > +tun_metadata_to_geneve_header(const struct flow_tnl *flow, > struct geneve_opt *opts, bool *crit_opt) > { > struct ofpbuf b; > > + ovs_assert(!(flow->flags & FLOW_TNL_F_UDPIF)); > + > ofpbuf_use_stack(&b, opts, GENEVE_TOT_OPT_SIZE); > - tun_metadata_to_geneve__(flow, &b, crit_opt); > + tun_metadata_to_geneve__(&flow->metadata, &b, crit_opt); > > return b.size; > } > > -void > -tun_metadata_to_geneve_nlattr_mask(const struct ofpbuf *key, > - const struct tun_metadata *mask, > - const struct tun_metadata *flow, > - struct ofpbuf *b) > +static void > +tun_metadata_to_geneve_mask__(const struct tun_metadata *flow, > + const struct tun_metadata *mask, > + struct geneve_opt *opt, int opts_len) > { > struct tun_table *map = flow->tab; > - const struct nlattr *tnl_key, *geneve_key; > - struct nlattr *geneve_mask; > - struct geneve_opt *opt; > - int opts_len; > > if (!map) { > return; > } > > - tnl_key = nl_attr_find(key, 0, OVS_KEY_ATTR_TUNNEL); > - if (!tnl_key) { > - return; > - } > - > - geneve_key = nl_attr_find_nested(tnl_key, > - OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS); > - if (!geneve_key) { > - return; > - } > - > - geneve_mask = ofpbuf_tail(b); > - nl_msg_put(b, geneve_key, geneve_key->nla_len); > - > /* All of these options have already been validated, so no need > * for sanity checking. */ > - opt = CONST_CAST(struct geneve_opt *, nl_attr_get(geneve_mask)); > - opts_len = nl_attr_get_size(geneve_mask); > - > while (opts_len > 0) { > struct tun_meta_entry *entry; > int len = sizeof(*opt) + opt->length * 4; > @@ -801,6 +873,80 @@ tun_metadata_to_geneve_nlattr_mask(const struct ofpbuf > *key, > } > } > > +static void > +tun_metadata_to_geneve_nlattr_mask(const struct ofpbuf *key, > + const struct flow_tnl *mask, > + const struct flow_tnl *flow, > + struct ofpbuf *b) > +{ > + const struct nlattr *geneve_key; > + struct nlattr *geneve_mask; > + struct geneve_opt *opt; > + int opts_len; > + > + if (!key) { > + return; > + } > + > + geneve_key = tun_metadata_find_geneve_key(key->data, key->size); > + if (!geneve_key) { > + return; > + } > + > + geneve_mask = ofpbuf_tail(b); > + nl_msg_put(b, geneve_key, geneve_key->nla_len); > + > + opt = CONST_CAST(struct geneve_opt *, nl_attr_get(geneve_mask)); > + opts_len = nl_attr_get_size(geneve_mask); > + > + tun_metadata_to_geneve_mask__(&flow->metadata, &mask->metadata, > + opt, opts_len); > +} > + > +/* Convert from the tunnel metadata in 'tun' to netlink attributes stored > + * in 'b'. Either UDPIF or non-UDPIF input forms are accepted. > + * > + * To assist with parsing, it is necessary to also pass in the tunnel > metadata > + * from the flow in 'flow' as well in the original netlink form of the flow > in > + * 'key'. */ > +void > +tun_metadata_to_geneve_nlattr(const struct flow_tnl *tun, > + const struct flow_tnl *flow, > + const struct ofpbuf *key, > + struct ofpbuf *b) > +{ > + bool is_mask = tun != flow; > + > + if (!(flow->flags & FLOW_TNL_F_UDPIF)) { > + if (!is_mask) { > + tun_metadata_to_geneve_nlattr_flow(tun, b); > + } else { > + tun_metadata_to_geneve_nlattr_mask(key, tun, flow, b); > + } > + } else if (flow->metadata.present.len || is_mask) { > + nl_msg_put_unspec(b, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, > + tun->metadata.opts.gnv, > + flow->metadata.present.len); > + } > +} > + > +/* Converts 'mask_src' (in non-UDPIF format) to a series of masked options > in > + * 'dst'. 'flow_src' (also in non-UDPIF format) and the original set of > + * options 'flow_src_opt'/'opts_len' are needed as a guide to interpret the > + * mask data. */ > +void > +tun_metadata_to_geneve_udpif_mask(const struct flow_tnl *flow_src, > + const struct flow_tnl *mask_src, > + const struct geneve_opt *flow_src_opt, > + int opts_len, struct geneve_opt *dst) > +{ > + ovs_assert(!(flow_src->flags & FLOW_TNL_F_UDPIF)); > + > + memcpy(dst, flow_src_opt, opts_len); > + tun_metadata_to_geneve_mask__(&flow_src->metadata, > + &mask_src->metadata, dst, opts_len); > +} > + > static const struct tun_metadata_loc * > metadata_loc_from_match_read(struct tun_table *map, const struct match > *match, > unsigned int idx) > @@ -816,19 +962,22 @@ void > tun_metadata_to_nx_match(struct ofpbuf *b, enum ofp_version oxm, > const struct match *match) > { > - struct tun_table *map = ovsrcu_get(struct tun_table *, &metadata_tab); > - const struct tun_metadata *metadata = &match->flow.tunnel.metadata; > - const struct tun_metadata *mask = &match->wc.masks.tunnel.metadata; > + struct flow_tnl flow, mask; > int i; > > - ULLONG_FOR_EACH_1 (i, mask->opt_map) { > + if (!udpif_to_parsed(&match->flow.tunnel, &match->wc.masks.tunnel, > + &flow, &mask)) { > + return; > + } > + > + ULLONG_FOR_EACH_1 (i, mask.metadata.present.map) { > const struct tun_metadata_loc *loc; > union mf_value opts; > union mf_value mask_opts; > > - loc = metadata_loc_from_match_read(map, match, i); > - memcpy_from_metadata(opts.tun_metadata, metadata, loc); > - memcpy_from_metadata(mask_opts.tun_metadata, mask, loc); > + loc = metadata_loc_from_match_read(flow.metadata.tab, match, i); > + memcpy_from_metadata(opts.tun_metadata, &flow.metadata, loc); > + memcpy_from_metadata(mask_opts.tun_metadata, &mask.metadata, loc); > nxm_put(b, MFF_TUN_METADATA0 + i, oxm, opts.tun_metadata, > mask_opts.tun_metadata, loc->len); > } > @@ -837,22 +986,25 @@ tun_metadata_to_nx_match(struct ofpbuf *b, enum > ofp_version oxm, > void > tun_metadata_match_format(struct ds *s, const struct match *match) > { > - struct tun_table *map = ovsrcu_get(struct tun_table *, &metadata_tab); > - const struct tun_metadata *metadata = &match->flow.tunnel.metadata; > - const struct tun_metadata *mask = &match->wc.masks.tunnel.metadata; > + struct flow_tnl flow, mask; > unsigned int i; > > - ULLONG_FOR_EACH_1 (i, mask->opt_map) { > + if (!udpif_to_parsed(&match->flow.tunnel, &match->wc.masks.tunnel, > + &flow, &mask)) { > + return; > + } > + > + ULLONG_FOR_EACH_1 (i, mask.metadata.present.map) { > const struct tun_metadata_loc *loc; > union mf_value opts; > > - loc = metadata_loc_from_match_read(map, match, i); > + loc = metadata_loc_from_match_read(flow.metadata.tab, match, i); > > ds_put_format(s, "tun_metadata%u=", i); > - memcpy_from_metadata(opts.tun_metadata, metadata, loc); > + memcpy_from_metadata(opts.tun_metadata, &flow.metadata, loc); > ds_put_hex(s, opts.tun_metadata, loc->len); > > - memcpy_from_metadata(opts.tun_metadata, mask, loc); > + memcpy_from_metadata(opts.tun_metadata, &mask.metadata, loc); > if (!is_all_ones(opts.tun_metadata, loc->len)) { > ds_put_char(s, '/'); > ds_put_hex(s, opts.tun_metadata, loc->len); > diff --git a/lib/tun-metadata.h b/lib/tun-metadata.h > index 56bdf2a..49db511 100644 > --- a/lib/tun-metadata.h > +++ b/lib/tun-metadata.h > @@ -20,35 +20,56 @@ > #include <stdint.h> > > #include "dynamic-string.h" > +#include "geneve.h" > #include "netlink.h" > #include "ofpbuf.h" > #include "openflow/openflow.h" > > +struct flow_tnl; > struct match; > struct mf_field; > union mf_value; > struct ofputil_geneve_table_mod; > struct ofputil_geneve_table_reply; > struct tun_table; > -struct geneve_opt; > > #define TUN_METADATA_NUM_OPTS 64 > #define TUN_METADATA_TOT_OPT_SIZE 256 > > /* Tunnel option data, plus metadata to aid in their interpretation. > * > - * 'opt_map' is indexed by type, that is, by the <i> in TUN_METADATA<i>, so > - * that e.g. TUN_METADATA5 is present if 'opt_map & (1ULL << 5)' is > nonzero. > - * The actual data for TUN_METADATA5, if present, might be anywhere in > 'opts' > - * (not necessarily even contiguous), and finding it requires referring to > - * 'tab'. */ > + * The option data exists in two forms and is interpreted differently > depending > + * on whether FLOW_TNL_F_UDPIF is set in struct flow_tnl flags: > + * > + * When FLOW_TNL_F_UDPIF is set, the tunnel metadata is in "userspace > datapath > + * format". This is typically used for fast-path packet processing to avoid > + * the cost of translating options and in situations where we need to > maintain > + * tunnel metadata exactly as it came in. In this case 'opts.gnv' is raw > + * packet data from the tunnel header and 'present.len' indicates the > length > + * of the data stored there. In these situations, 'tab' is NULL. > + * > + * In all other cases, we are doing flow-based processing (such as during > + * upcalls). FLOW_TNL_F_UDPIF is not set and options are reordered into > + * pre-allocated locations. 'present.map' is indexed by type, that is, by > the > + * <i> in TUN_METADATA<i>, so that e.g. TUN_METADATA5 is present if > + * 'present.map & (1ULL << 5)' is nonzero. The actual data for > TUN_METADATA5, > + * if present, might be anywhere in 'opts.u8' (not necessarily even > contiguous), > + * and finding it requires referring to 'tab', if set, or the global > metadata > + * table. */ > struct tun_metadata { > - uint64_t opt_map; /* 1-bit for each present TLV. > */ > - uint8_t opts[TUN_METADATA_TOT_OPT_SIZE]; /* Values from tunnel TLVs. */ > + union { /* Valid members of 'opts'. When 'opts' is sorted into known > types, > + * 'map' is used. When 'opts' is raw packet data, 'len' is > used. */ > + uint64_t map; /* 1-bit for each present TLV. > */ > + uint8_t len; /* Length of data in 'opts'. */ > + } present; > struct tun_table *tab; /* Types & lengths for 'opts' and > 'opt_map'. */ > uint8_t pad[sizeof(uint64_t) - sizeof(struct tun_table *)]; /* Make 8 > bytes */ > + union { > + uint8_t u8[TUN_METADATA_TOT_OPT_SIZE]; /* Values from tunnel TLVs. > */ > + struct geneve_opt gnv[GENEVE_TOT_OPT_SIZE / sizeof(struct > geneve_opt)]; > + } opts; > }; > -BUILD_ASSERT_DECL(sizeof(((struct tun_metadata *)0)->opt_map) * 8 >= > +BUILD_ASSERT_DECL(sizeof(((struct tun_metadata *)0)->present.map) * 8 >= > TUN_METADATA_NUM_OPTS); > > /* The location of an option can be stored either as a single offset/len > @@ -81,31 +102,34 @@ void tun_metadata_init(void); > enum ofperr tun_metadata_table_mod(struct ofputil_geneve_table_mod *); > void tun_metadata_table_request(struct ofputil_geneve_table_reply *); > > -void tun_metadata_read(const struct tun_metadata *, > +void tun_metadata_read(const struct flow_tnl *, > const struct mf_field *, union mf_value *); > -void tun_metadata_write(struct tun_metadata *, > +void tun_metadata_write(struct flow_tnl *, > const struct mf_field *, const union mf_value *); > void tun_metadata_set_match(const struct mf_field *, > const union mf_value *value, > const union mf_value *mask, struct match *); > -void tun_metadata_get_fmd(const struct tun_metadata *, > - struct match *flow_metadata); > +void tun_metadata_get_fmd(const struct flow_tnl *, struct match > *flow_metadata); > > int tun_metadata_from_geneve_nlattr(const struct nlattr *attr, > const struct nlattr *flow_attrs, > size_t flow_attr_len, > - const struct tun_metadata > *flow_metadata, > - struct tun_metadata *metadata); > -int tun_metadata_from_geneve_header(const struct geneve_opt *, int opt_len, > - struct tun_metadata *metadata); > - > -void tun_metadata_to_geneve_nlattr_flow(const struct tun_metadata *flow, > - struct ofpbuf *); > -void tun_metadata_to_geneve_nlattr_mask(const struct ofpbuf *key, > - const struct tun_metadata *mask, > - const struct tun_metadata *flow, > - struct ofpbuf *); > -int tun_metadata_to_geneve_header(const struct tun_metadata *flow, > + const struct flow_tnl *flow_tun, > + bool udpif, struct flow_tnl *tun); > +void tun_metadata_to_geneve_nlattr(const struct flow_tnl *tun, > + const struct flow_tnl *flow, > + const struct ofpbuf *key, > + struct ofpbuf *); > + > +int tun_metadata_from_geneve_udpif(const struct flow_tnl *flow, > + const struct flow_tnl *src, > + struct flow_tnl *dst); > +void tun_metadata_to_geneve_udpif_mask(const struct flow_tnl *flow_src, > + const struct flow_tnl *mask_src, > + const struct geneve_opt > *flow_src_opt, > + int opts_len, struct geneve_opt > *dst); > + > +int tun_metadata_to_geneve_header(const struct flow_tnl *flow, > struct geneve_opt *, bool *crit_opt); > > void tun_metadata_to_nx_match(struct ofpbuf *b, enum ofp_version oxm, > diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c > index e54d3fb..185addf 100644 > --- a/ofproto/ofproto-dpif-sflow.c > +++ b/ofproto/ofproto-dpif-sflow.c > @@ -972,7 +972,7 @@ sflow_read_set_action(const struct nlattr *attr, > /* Do not handle multi-encap for now. */ > sflow_actions->tunnel_err = true; > } else { > - if (odp_tun_key_from_attr(attr, &sflow_actions->tunnel) > + if (odp_tun_key_from_attr(attr, false, &sflow_actions->tunnel) > == ODP_FIT_ERROR) { > /* Tunnel parsing error. */ > sflow_actions->tunnel_err = true; > diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c > index 440f9e9..2d75b13 100644 > --- a/ofproto/ofproto-dpif-upcall.c > +++ b/ofproto/ofproto-dpif-upcall.c > @@ -1164,7 +1164,7 @@ process_upcall(struct udpif *udpif, struct upcall > *upcall, > memcpy(&cookie, nl_attr_get(userdata), sizeof cookie.ipfix); > > if (upcall->out_tun_key) { > - odp_tun_key_from_attr(upcall->out_tun_key, > + odp_tun_key_from_attr(upcall->out_tun_key, false, > &output_tunnel_key); > } > dpif_ipfix_bridge_sample(upcall->ipfix, packet, flow, > diff --git a/tests/tunnel-push-pop.at b/tests/tunnel-push-pop.at > index bd95c8e..0f1724a 100644 > --- a/tests/tunnel-push-pop.at > +++ b/tests/tunnel-push-pop.at > @@ -132,7 +132,7 @@ AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port 5'], > [0], [dnl > port 5: rx pkts=1, bytes=98, drop=0, errs=0, frame=0, over=0, crc=0 > ]) > AT_CHECK([ovs-appctl dpif/dump-flows int-br], [0], [dnl > - > tunnel(tun_id=0x7b,src=1.1.2.92,dst=1.1.2.88,ttl=64,geneve({class=0xffff,typ > e=0x80,len=4,0xa/0xf}),flags(-df- > csum+key)),skb_mark(0),recirc_id(0),in_port(6081),eth_type(0x0800),ipv4(frag > =no), packets:0, bytes:0, used:never, actions:drop > +tunnel(tun_id=0x7b,src=1.1.2.92,dst=1.1.2.88,ttl=64,geneve({class=0xffff,ty > pe=0x80,len=4,0xa/0xf}{class=0xffff,type=0,len=4}),flags(-df- > csum+key)),skb_mark(0),recirc_id(0),in_port(6081),eth_type(0x0800),ipv4(frag > =no), packets:0, bytes:0, used:never, actions:drop > ]) > > OVS_VSWITCHD_STOP > -- > 2.1.4 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev