Adding a new field called dl_type in flow tunnel structure to verify the validity of tunnel metadata. This field avoids the need of resetting and validating the entire ipv4/ipv6 tunnel destination address which caused a serious performance drop.
Fixes: 3ae91c019019 ("tunneling: add IPv6 support to netdev_tunnel_config") Signed-off-by: Sugesh Chandran <sugesh.chand...@intel.com> --- lib/flow.c | 7 ++--- lib/match.c | 12 ++++---- lib/meta-flow.c | 10 +++++-- lib/netdev-vport.c | 10 +++---- lib/odp-util.c | 10 +++---- lib/packets.c | 7 ++++- lib/packets.h | 69 ++++++++++++++++++++++++++++++++------------ ofproto/ofproto-dpif-ipfix.c | 3 +- ofproto/ofproto-dpif-rid.c | 3 +- ofproto/ofproto-dpif-sflow.c | 6 ++-- ofproto/tunnel.c | 12 ++++---- 11 files changed, 96 insertions(+), 53 deletions(-) diff --git a/lib/flow.c b/lib/flow.c index 5668d0c..33aadd1 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -818,15 +818,14 @@ flow_get_metadata(const struct flow *flow, struct match *flow_metadata) if (flow->tunnel.ip_src) { match_set_tun_src(flow_metadata, flow->tunnel.ip_src); } - if (flow->tunnel.ip_dst) { + if (flow->tunnel.dl_type == htons(ETH_TYPE_IP)) { match_set_tun_dst(flow_metadata, flow->tunnel.ip_dst); + } else if (flow->tunnel.dl_type == htons(ETH_TYPE_IPV6)) { + match_set_tun_ipv6_dst(flow_metadata, &flow->tunnel.ipv6_dst); } if (ipv6_addr_is_set(&flow->tunnel.ipv6_src)) { match_set_tun_ipv6_src(flow_metadata, &flow->tunnel.ipv6_src); } - if (ipv6_addr_is_set(&flow->tunnel.ipv6_dst)) { - match_set_tun_ipv6_dst(flow_metadata, &flow->tunnel.ipv6_dst); - } if (flow->tunnel.gbp_id != htons(0)) { match_set_tun_gbp_id(flow_metadata, flow->tunnel.gbp_id); } diff --git a/lib/match.c b/lib/match.c index 95d34bc..97a623e 100644 --- a/lib/match.c +++ b/lib/match.c @@ -186,8 +186,8 @@ match_set_tun_dst(struct match *match, ovs_be32 dst) void match_set_tun_dst_masked(struct match *match, ovs_be32 dst, ovs_be32 mask) { - match->wc.masks.tunnel.ip_dst = mask; - match->flow.tunnel.ip_dst = dst & mask; + set_ipv4_dst_tnl(&match->wc.masks.tunnel, mask); + set_ipv4_dst_tnl(&match->flow.tunnel, dst & mask); } void @@ -208,16 +208,16 @@ match_set_tun_ipv6_src_masked(struct match *match, const struct in6_addr *src, void match_set_tun_ipv6_dst(struct match *match, const struct in6_addr *dst) { - match->flow.tunnel.ipv6_dst = *dst; - match->wc.masks.tunnel.ipv6_dst = in6addr_exact; + set_ipv6_dst_tnl(&match->flow.tunnel, *dst); + set_ipv6_dst_tnl(&match->wc.masks.tunnel, in6addr_exact); } void match_set_tun_ipv6_dst_masked(struct match *match, const struct in6_addr *dst, const struct in6_addr *mask) { - match->flow.tunnel.ipv6_dst = ipv6_addr_bitand(dst, mask); - match->wc.masks.tunnel.ipv6_dst = *mask; + set_ipv6_dst_tnl(&match->flow.tunnel, ipv6_addr_bitand(dst, mask)); + set_ipv6_dst_tnl(&match->wc.masks.tunnel, *mask); } void diff --git a/lib/meta-flow.c b/lib/meta-flow.c index 6bd0b99..9d939be 100644 --- a/lib/meta-flow.c +++ b/lib/meta-flow.c @@ -1180,13 +1180,13 @@ mf_set_flow_value(const struct mf_field *mf, flow->tunnel.ip_src = value->be32; break; case MFF_TUN_DST: - flow->tunnel.ip_dst = value->be32; + set_ipv4_dst_tnl(&flow->tunnel, value->be32); break; case MFF_TUN_IPV6_SRC: flow->tunnel.ipv6_src = value->ipv6; break; case MFF_TUN_IPV6_DST: - flow->tunnel.ipv6_dst = value->ipv6; + set_ipv6_dst_tnl(&flow->tunnel, value->ipv6); break; case MFF_TUN_FLAGS: flow->tunnel.flags = (flow->tunnel.flags & ~FLOW_TNL_PUB_F_MASK) | @@ -1503,6 +1503,12 @@ mf_set_wild(const struct mf_field *mf, struct match *match, char **err_str) sizeof match->wc.masks.tunnel.ipv6_dst); memset(&match->flow.tunnel.ipv6_dst, 0, sizeof match->flow.tunnel.ipv6_dst); + /* What if flow have a valid ipv4 tunnel data?? + * Reset the dl_type only if thats not the case. + */ + match->wc.masks.tunnel.dl_type = (match->wc.masks.tunnel.dl_type == + htons(ETH_TYPE_IP)) ? + match->wc.masks.tunnel.dl_type : 0; break; case MFF_TUN_FLAGS: match_set_tun_flags_masked(match, 0, 0); diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c index 88f5022..94ddf59 100644 --- a/lib/netdev-vport.c +++ b/lib/netdev-vport.c @@ -877,7 +877,7 @@ ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl, { void *nh; struct ip_header *ip; - struct ovs_16aligned_ip6_hdr *ip6; + struct ip6_hdr *ip6; void *l4; int l3_size; @@ -919,19 +919,19 @@ ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl, ip_dst = get_16aligned_be32(&ip->ip_dst); tnl->ip_src = ip_src; - tnl->ip_dst = ip_dst; tnl->ip_tos = ip->ip_tos; tnl->ip_ttl = ip->ip_ttl; + set_ipv4_dst_tnl(tnl, ip_dst); *hlen += IP_HEADER_LEN; } else if (IP_VER(ip->ip_ihl_ver) == 6) { - memcpy(tnl->ipv6_src.s6_addr, ip6->ip6_src.be16, sizeof ip6->ip6_src); - memcpy(tnl->ipv6_dst.s6_addr, ip6->ip6_dst.be16, sizeof ip6->ip6_dst); + memcpy(tnl->ipv6_src.s6_addr, ip6->ip6_src.s6_addr, + sizeof ip6->ip6_src); tnl->ip_tos = 0; tnl->ip_ttl = ip6->ip6_hlim; - + set_ipv6_dst_tnl(tnl, *(struct in6_addr *)&ip6->ip6_dst); *hlen += IPV6_HEADER_LEN; } else { diff --git a/lib/odp-util.c b/lib/odp-util.c index f16e113..309bb9c 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -510,7 +510,7 @@ format_odp_tnl_push_header(struct ds *ds, struct ovs_action_push_tnl *data) gnh->oam ? "oam," : "", gnh->critical ? "crit," : "", ntohl(get_16aligned_be32(&gnh->vni)) >> 8); - + if (gnh->opt_len) { ds_put_cstr(ds, ",options("); format_geneve_opts(gnh->options, NULL, gnh->opt_len * 4, @@ -1864,13 +1864,13 @@ odp_tun_key_from_attr__(const struct nlattr *attr, tun->ip_src = nl_attr_get_be32(a); break; case OVS_TUNNEL_KEY_ATTR_IPV4_DST: - tun->ip_dst = nl_attr_get_be32(a); + set_ipv4_dst_tnl(tun, nl_attr_get_be32(a)); break; case OVS_TUNNEL_KEY_ATTR_IPV6_SRC: tun->ipv6_src = nl_attr_get_in6_addr(a); break; case OVS_TUNNEL_KEY_ATTR_IPV6_DST: - tun->ipv6_dst = nl_attr_get_in6_addr(a); + set_ipv6_dst_tnl(tun, nl_attr_get_in6_addr(a)); break; case OVS_TUNNEL_KEY_ATTR_TOS: tun->ip_tos = nl_attr_get_u8(a); @@ -1961,13 +1961,13 @@ tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key, if (tun_key->ip_src) { nl_msg_put_be32(a, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, tun_key->ip_src); } - if (tun_key->ip_dst) { + if (tun_key->dl_type == htons(ETH_TYPE_IP)) { nl_msg_put_be32(a, OVS_TUNNEL_KEY_ATTR_IPV4_DST, tun_key->ip_dst); } if (ipv6_addr_is_set(&tun_key->ipv6_src)) { nl_msg_put_in6_addr(a, OVS_TUNNEL_KEY_ATTR_IPV6_SRC, &tun_key->ipv6_src); } - if (ipv6_addr_is_set(&tun_key->ipv6_dst)) { + if (tun_key->dl_type == htons(ETH_TYPE_IPV6)) { nl_msg_put_in6_addr(a, OVS_TUNNEL_KEY_ATTR_IPV6_DST, &tun_key->ipv6_dst); } if (tun_key->ip_tos) { diff --git a/lib/packets.c b/lib/packets.c index d82341d..4f00c0e 100644 --- a/lib/packets.c +++ b/lib/packets.c @@ -39,7 +39,12 @@ const struct in6_addr in6addr_all_hosts = IN6ADDR_ALL_HOSTS_INIT; struct in6_addr flow_tnl_dst(const struct flow_tnl *tnl) { - return tnl->ip_dst ? in6_addr_mapped_ipv4(tnl->ip_dst) : tnl->ipv6_dst; + if (tnl->dl_type == htons(ETH_TYPE_IP)) { + return in6_addr_mapped_ipv4(tnl->ip_dst); + } else if (tnl->dl_type == htons(ETH_TYPE_IPV6)) { + return tnl->ipv6_dst; + } + return in6addr_any; } struct in6_addr diff --git a/lib/packets.h b/lib/packets.h index 834e8a4..7e8dd91 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -35,8 +35,26 @@ struct dp_packet; struct ds; +#define ETH_TYPE_IP 0x0800 +#define ETH_TYPE_ARP 0x0806 +#define ETH_TYPE_TEB 0x6558 +#define ETH_TYPE_VLAN_8021Q 0x8100 +#define ETH_TYPE_VLAN ETH_TYPE_VLAN_8021Q +#define ETH_TYPE_VLAN_8021AD 0x88a8 +#define ETH_TYPE_IPV6 0x86dd +#define ETH_TYPE_LACP 0x8809 +#define ETH_TYPE_RARP 0x8035 +#define ETH_TYPE_MPLS 0x8847 +#define ETH_TYPE_MPLS_MCAST 0x8848 + /* Tunnel information used in flow key and metadata. */ struct flow_tnl { + /* The tunnel destination ip/ipv6 address are not initializing to zero + * due to the performance constrains. the ethernet type field 'dl_type' is + * used to validate the destination addresses. 'dl_type' is set to zero + * for a invalid flow_tnl entry. + */ + ovs_be16 dl_type; ovs_be32 ip_dst; struct in6_addr ipv6_dst; ovs_be32 ip_src; @@ -49,7 +67,7 @@ struct flow_tnl { ovs_be16 tp_dst; ovs_be16 gbp_id; uint8_t gbp_flags; - uint8_t pad1[5]; /* Pad to 64 bits. */ + uint8_t pad1[1]; /* Pad to 64 bits. */ struct tun_metadata metadata; }; @@ -76,10 +94,36 @@ struct flow_tnl { static inline bool ipv6_addr_is_set(const struct in6_addr *addr); +static inline void +set_ipv4_dst_tnl(struct flow_tnl *tnl, ovs_be32 ip_dst) +{ + if (tnl) { + tnl->ip_dst = ip_dst; + if (tnl->ip_dst) { + tnl->dl_type = htons(ETH_TYPE_IP); + } else { + tnl->dl_type = 0; + } + } +} + +static inline void +set_ipv6_dst_tnl(struct flow_tnl *tnl, struct in6_addr ipv6_dst) +{ + if (tnl) { + tnl->ipv6_dst = ipv6_dst; + if (ipv6_addr_is_set(&tnl->ipv6_dst)) { + tnl->dl_type = htons(ETH_TYPE_IPV6); + } else { + tnl->dl_type = 0; + } + } +} + static inline bool flow_tnl_dst_is_set(const struct flow_tnl *tnl) { - return tnl->ip_dst || ipv6_addr_is_set(&tnl->ipv6_dst); + return (tnl->dl_type != 0); } struct in6_addr flow_tnl_dst(const struct flow_tnl *tnl); @@ -90,8 +134,8 @@ static inline size_t flow_tnl_size(const struct flow_tnl *src) { if (!flow_tnl_dst_is_set(src)) { - /* Covers ip_dst and ipv6_dst only. */ - return offsetof(struct flow_tnl, ip_src); + /* Covers dl_type field only. */ + return offsetof(struct flow_tnl, ip_dst); } if (src->flags & FLOW_TNL_F_UDPIF) { /* Datapath format, cover all options we have. */ @@ -154,11 +198,10 @@ static inline void pkt_metadata_init(struct pkt_metadata *md, odp_port_t port) { /* It can be expensive to zero out all of the tunnel metadata. However, - * we can just zero out ip_dst and the rest of the data will never be + * we can just zero out dl_type and the rest of the data will never be * looked at. */ memset(md, 0, offsetof(struct pkt_metadata, in_port)); - md->tunnel.ip_dst = 0; - md->tunnel.ipv6_dst = in6addr_any; + md->tunnel.dl_type = 0; md->in_port.odp_port = port; } @@ -347,18 +390,6 @@ ovs_be32 set_mpls_lse_values(uint8_t ttl, uint8_t tc, uint8_t bos, #define ETH_ADDR_SCAN_ARGS(EA) \ &(EA).ea[0], &(EA).ea[1], &(EA).ea[2], &(EA).ea[3], &(EA).ea[4], &(EA).ea[5] -#define ETH_TYPE_IP 0x0800 -#define ETH_TYPE_ARP 0x0806 -#define ETH_TYPE_TEB 0x6558 -#define ETH_TYPE_VLAN_8021Q 0x8100 -#define ETH_TYPE_VLAN ETH_TYPE_VLAN_8021Q -#define ETH_TYPE_VLAN_8021AD 0x88a8 -#define ETH_TYPE_IPV6 0x86dd -#define ETH_TYPE_LACP 0x8809 -#define ETH_TYPE_RARP 0x8035 -#define ETH_TYPE_MPLS 0x8847 -#define ETH_TYPE_MPLS_MCAST 0x8848 - static inline bool eth_type_mpls(ovs_be16 eth_type) { return eth_type == htons(ETH_TYPE_MPLS) || diff --git a/ofproto/ofproto-dpif-ipfix.c b/ofproto/ofproto-dpif-ipfix.c index a610c53..ca06583 100644 --- a/ofproto/ofproto-dpif-ipfix.c +++ b/ofproto/ofproto-dpif-ipfix.c @@ -1721,7 +1721,8 @@ dpif_ipfix_bridge_sample(struct dpif_ipfix *di, const struct dp_packet *packet, * of matched packets. */ packet_delta_count = UINT32_MAX / di->bridge_exporter.probability; if (di->bridge_exporter.options->enable_tunnel_sampling) { - if (output_odp_port == ODPP_NONE && flow->tunnel.ip_dst) { + if (output_odp_port == ODPP_NONE && + flow->tunnel.dl_type == htons(ETH_TYPE_IP)) { /* Input tunnel. */ tunnel_key = &flow->tunnel; tunnel_port = dpif_ipfix_find_port(di, input_odp_port); diff --git a/ofproto/ofproto-dpif-rid.c b/ofproto/ofproto-dpif-rid.c index d142933..735b5f3 100644 --- a/ofproto/ofproto-dpif-rid.c +++ b/ofproto/ofproto-dpif-rid.c @@ -297,8 +297,7 @@ uint32_t recirc_alloc_id(struct ofproto_dpif *ofproto) { struct flow_tnl tunnel; - tunnel.ip_dst = htonl(0); - tunnel.ipv6_dst = in6addr_any; + tunnel.dl_type = 0; struct recirc_state state = { .table_id = TBL_INTERNAL, .ofproto = ofproto, diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c index f11699c..ea21a1a 100644 --- a/ofproto/ofproto-dpif-sflow.c +++ b/ofproto/ofproto-dpif-sflow.c @@ -899,7 +899,7 @@ sflow_read_tnl_push_action(const struct nlattr *attr, /* IPv4 */ /* Cannot assume alignment so just use memcpy. */ sflow_actions->tunnel.ip_src = get_16aligned_be32(&ip->ip_src); - sflow_actions->tunnel.ip_dst = get_16aligned_be32(&ip->ip_dst); + set_ipv4_dst_tnl(&sflow_actions->tunnel, get_16aligned_be32(&ip->ip_dst)); sflow_actions->tunnel.ip_tos = ip->ip_tos; sflow_actions->tunnel.ip_ttl = ip->ip_ttl; /* The tnl_push action can supply the ip_protocol too. */ @@ -991,7 +991,7 @@ sflow_read_set_action(const struct nlattr *attr, sflow_actions->tunnel.ip_src = key->ipv4_src; } if (key->ipv4_dst) { - sflow_actions->tunnel.ip_dst = key->ipv4_dst; + set_ipv4_dst_tnl(&sflow_actions->tunnel, key->ipv4_dst); } if (key->ipv4_proto) { sflow_actions->tunnel_ipproto = key->ipv4_proto; @@ -1287,7 +1287,7 @@ dpif_sflow_received(struct dpif_sflow *ds, const struct dp_packet *packet, fs.output = cookie->sflow.output; /* Input tunnel. */ - if (flow->tunnel.ip_dst) { + if (flow->tunnel.dl_type == htons(ETH_TYPE_IP)) { memset(&tnlInElem, 0, sizeof(tnlInElem)); tnlInElem.tag = SFLFLOW_EX_IPV4_TUNNEL_INGRESS; tnlInProto = dpif_sflow_tunnel_proto(in_dsp->tunnel_type); diff --git a/ofproto/tunnel.c b/ofproto/tunnel.c index 5cf6c75..b2bd254 100644 --- a/ofproto/tunnel.c +++ b/ofproto/tunnel.c @@ -362,12 +362,12 @@ tnl_wc_init(struct flow *flow, struct flow_wildcards *wc) { if (tnl_port_should_receive(flow)) { wc->masks.tunnel.tun_id = OVS_BE64_MAX; - if (flow->tunnel.ip_dst) { + if (flow->tunnel.dl_type == htons(ETH_TYPE_IP)) { wc->masks.tunnel.ip_src = OVS_BE32_MAX; - wc->masks.tunnel.ip_dst = OVS_BE32_MAX; - } else { + set_ipv4_dst_tnl(&wc->masks.tunnel, OVS_BE32_MAX); + } else if (flow->tunnel.dl_type == htons(ETH_TYPE_IPV6)) { wc->masks.tunnel.ipv6_src = in6addr_exact; - wc->masks.tunnel.ipv6_dst = in6addr_exact; + set_ipv6_dst_tnl(&wc->masks.tunnel, in6addr_exact); } wc->masks.tunnel.flags = (FLOW_TNL_F_DONT_FRAGMENT | FLOW_TNL_F_CSUM | @@ -424,7 +424,9 @@ tnl_port_send(const struct ofport_dpif *ofport, struct flow *flow, if (!cfg->ip_dst_flow) { flow->tunnel.ip_dst = in6_addr_get_mapped_ipv4(&tnl_port->match.ipv6_dst); if (!flow->tunnel.ip_dst) { - flow->tunnel.ipv6_dst = tnl_port->match.ipv6_dst; + set_ipv6_dst_tnl(&flow->tunnel, tnl_port->match.ipv6_dst); + } else { + set_ipv4_dst_tnl(&flow->tunnel, flow->tunnel.ip_dst); } } flow->pkt_mark = tnl_port->match.pkt_mark; -- 1.9.1 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev