Hi Daniele, Thank you for sending out the patch. The proposal looks fine for me and also I verified that the performance is restored with the patch.
One suggestion is , Do we really have to pass two parameters to the "dp_netdev_input" to init and validate the metadata.? Can we use the pointer to the port_id/dp_netdev_port. This way there wont be a dependency between arguments to a function. What do you think?? Does it make more complicated?? Regards _Sugesh > -----Original Message----- > From: Daniele Di Proietto [mailto:diproiet...@vmware.com] > Sent: Friday, January 29, 2016 2:01 AM > To: Chandran, Sugesh <sugesh.chand...@intel.com>; jb...@redhat.com; > casca...@redhat.com > Cc: dev@openvswitch.org > Subject: Re: [ovs-dev] [PATCH v3] ipv6 tunneling: Fix for performance drop > introduced by ipv6 tunnel support. > > Hi Sugesh, > > Sorry for the delay, I've been trying to test different approaches. > > I tested the patch and I see that the performance is up again. I'm not sure > whether introducing the 'dl_type' flag is going to be very easy to maintain, > since we have to remember to update when we touch the structure. > Unfortunately that structure is used everywhere in OVS, not just in the fast > path. > > I have prepared a workaround to speed up the metadata initialization here: > > http://openvswitch.org/pipermail/dev/2016-January/065263.html > > In my setup it restores the throughput to the same numbers. It complicates > the code slightly, but the modifications are local to dpif-netdev. > > What do you think? > > Thanks, > > Daniele > > On 19/01/2016 09:23, "Sugesh Chandran" <sugesh.chand...@intel.com> > wrote: > > >Adding a new field called dl_type in flow tunnel structure to verify > >the validity of tunnel metadata. This field avoids the need of > >resetting and validating the entire ipv4/ipv6 tunnel destination > >address which caused a serious performance drop. > > > >Fixes: 3ae91c019019 ("tunneling: add IPv6 support to > >netdev_tunnel_config") > >Signed-off-by: Sugesh Chandran <sugesh.chand...@intel.com> > >--- > > lib/flow.c | 7 ++--- > > lib/match.c | 12 ++++---- > > lib/meta-flow.c | 10 +++++-- > > lib/netdev-vport.c | 10 +++---- > > lib/odp-util.c | 10 +++---- > > lib/packets.c | 7 ++++- > > lib/packets.h | 69 > >++++++++++++++++++++++++++++++++------------ > > ofproto/ofproto-dpif-ipfix.c | 3 +- > > ofproto/ofproto-dpif-rid.c | 3 +- > > ofproto/ofproto-dpif-sflow.c | 6 ++-- > > ofproto/tunnel.c | 12 ++++---- > > 11 files changed, 96 insertions(+), 53 deletions(-) > > > >diff --git a/lib/flow.c b/lib/flow.c > >index 5668d0c..33aadd1 100644 > >--- a/lib/flow.c > >+++ b/lib/flow.c > >@@ -818,15 +818,14 @@ flow_get_metadata(const struct flow *flow, > struct > >match *flow_metadata) > > if (flow->tunnel.ip_src) { > > match_set_tun_src(flow_metadata, flow->tunnel.ip_src); > > } > >- if (flow->tunnel.ip_dst) { > >+ if (flow->tunnel.dl_type == htons(ETH_TYPE_IP)) { > > match_set_tun_dst(flow_metadata, flow->tunnel.ip_dst); > >+ } else if (flow->tunnel.dl_type == htons(ETH_TYPE_IPV6)) { > >+ match_set_tun_ipv6_dst(flow_metadata, &flow->tunnel.ipv6_dst); > > } > > if (ipv6_addr_is_set(&flow->tunnel.ipv6_src)) { > > match_set_tun_ipv6_src(flow_metadata, &flow->tunnel.ipv6_src); > > } > >- if (ipv6_addr_is_set(&flow->tunnel.ipv6_dst)) { > >- match_set_tun_ipv6_dst(flow_metadata, &flow->tunnel.ipv6_dst); > >- } > > if (flow->tunnel.gbp_id != htons(0)) { > > match_set_tun_gbp_id(flow_metadata, flow->tunnel.gbp_id); > > } > >diff --git a/lib/match.c b/lib/match.c > >index 95d34bc..97a623e 100644 > >--- a/lib/match.c > >+++ b/lib/match.c > >@@ -186,8 +186,8 @@ match_set_tun_dst(struct match *match, ovs_be32 > >dst) void match_set_tun_dst_masked(struct match *match, ovs_be32 dst, > >ovs_be32 > >mask) > > { > >- match->wc.masks.tunnel.ip_dst = mask; > >- match->flow.tunnel.ip_dst = dst & mask; > >+ set_ipv4_dst_tnl(&match->wc.masks.tunnel, mask); > >+ set_ipv4_dst_tnl(&match->flow.tunnel, dst & mask); > > } > > > > void > >@@ -208,16 +208,16 @@ match_set_tun_ipv6_src_masked(struct match > >*match, const struct in6_addr *src, void > >match_set_tun_ipv6_dst(struct match *match, const struct in6_addr *dst) > >{ > >- match->flow.tunnel.ipv6_dst = *dst; > >- match->wc.masks.tunnel.ipv6_dst = in6addr_exact; > >+ set_ipv6_dst_tnl(&match->flow.tunnel, *dst); > >+ set_ipv6_dst_tnl(&match->wc.masks.tunnel, in6addr_exact); > > } > > > > void > > match_set_tun_ipv6_dst_masked(struct match *match, const struct > >in6_addr *dst, > > const struct in6_addr *mask) { > >- match->flow.tunnel.ipv6_dst = ipv6_addr_bitand(dst, mask); > >- match->wc.masks.tunnel.ipv6_dst = *mask; > >+ set_ipv6_dst_tnl(&match->flow.tunnel, ipv6_addr_bitand(dst, mask)); > >+ set_ipv6_dst_tnl(&match->wc.masks.tunnel, *mask); > > } > > > > void > >diff --git a/lib/meta-flow.c b/lib/meta-flow.c index 6bd0b99..9d939be > >100644 > >--- a/lib/meta-flow.c > >+++ b/lib/meta-flow.c > >@@ -1180,13 +1180,13 @@ mf_set_flow_value(const struct mf_field *mf, > > flow->tunnel.ip_src = value->be32; > > break; > > case MFF_TUN_DST: > >- flow->tunnel.ip_dst = value->be32; > >+ set_ipv4_dst_tnl(&flow->tunnel, value->be32); > > break; > > case MFF_TUN_IPV6_SRC: > > flow->tunnel.ipv6_src = value->ipv6; > > break; > > case MFF_TUN_IPV6_DST: > >- flow->tunnel.ipv6_dst = value->ipv6; > >+ set_ipv6_dst_tnl(&flow->tunnel, value->ipv6); > > break; > > case MFF_TUN_FLAGS: > > flow->tunnel.flags = (flow->tunnel.flags & > > ~FLOW_TNL_PUB_F_MASK) > >| > >@@ -1503,6 +1503,12 @@ mf_set_wild(const struct mf_field *mf, struct > >match *match, char **err_str) > > sizeof match->wc.masks.tunnel.ipv6_dst); > > memset(&match->flow.tunnel.ipv6_dst, 0, > > sizeof match->flow.tunnel.ipv6_dst); > >+ /* What if flow have a valid ipv4 tunnel data?? > >+ * Reset the dl_type only if thats not the case. > >+ */ > >+ match->wc.masks.tunnel.dl_type = > >+ (match->wc.masks.tunnel.dl_type > >== > >+ htons(ETH_TYPE_IP)) ? > >+ > >+ match->wc.masks.tunnel.dl_type > >: 0; > > break; > > case MFF_TUN_FLAGS: > > match_set_tun_flags_masked(match, 0, 0); diff --git > >a/lib/netdev-vport.c b/lib/netdev-vport.c index 88f5022..94ddf59 100644 > >--- a/lib/netdev-vport.c > >+++ b/lib/netdev-vport.c > >@@ -877,7 +877,7 @@ ip_extract_tnl_md(struct dp_packet *packet, struct > >flow_tnl *tnl, { > > void *nh; > > struct ip_header *ip; > >- struct ovs_16aligned_ip6_hdr *ip6; > >+ struct ip6_hdr *ip6; > > void *l4; > > int l3_size; > > > >@@ -919,19 +919,19 @@ ip_extract_tnl_md(struct dp_packet *packet, > >struct flow_tnl *tnl, > > ip_dst = get_16aligned_be32(&ip->ip_dst); > > > > tnl->ip_src = ip_src; > >- tnl->ip_dst = ip_dst; > > tnl->ip_tos = ip->ip_tos; > > tnl->ip_ttl = ip->ip_ttl; > >+ set_ipv4_dst_tnl(tnl, ip_dst); > > > > *hlen += IP_HEADER_LEN; > > > > } else if (IP_VER(ip->ip_ihl_ver) == 6) { > > > >- memcpy(tnl->ipv6_src.s6_addr, ip6->ip6_src.be16, sizeof > >ip6->ip6_src); > >- memcpy(tnl->ipv6_dst.s6_addr, ip6->ip6_dst.be16, sizeof > >ip6->ip6_dst); > >+ memcpy(tnl->ipv6_src.s6_addr, ip6->ip6_src.s6_addr, > >+ sizeof ip6->ip6_src); > > tnl->ip_tos = 0; > > tnl->ip_ttl = ip6->ip6_hlim; > >- > >+ set_ipv6_dst_tnl(tnl, *(struct in6_addr *)&ip6->ip6_dst); > > *hlen += IPV6_HEADER_LEN; > > > > } else { > >diff --git a/lib/odp-util.c b/lib/odp-util.c index f16e113..309bb9c > >100644 > >--- a/lib/odp-util.c > >+++ b/lib/odp-util.c > >@@ -510,7 +510,7 @@ format_odp_tnl_push_header(struct ds *ds, struct > >ovs_action_push_tnl *data) > > gnh->oam ? "oam," : "", > > gnh->critical ? "crit," : "", > > ntohl(get_16aligned_be32(&gnh->vni)) >> 8); > >- > >+ > > if (gnh->opt_len) { > > ds_put_cstr(ds, ",options("); > > format_geneve_opts(gnh->options, NULL, gnh->opt_len * 4, > >@@ -1864,13 +1864,13 @@ odp_tun_key_from_attr__(const struct nlattr > *attr, > > tun->ip_src = nl_attr_get_be32(a); > > break; > > case OVS_TUNNEL_KEY_ATTR_IPV4_DST: > >- tun->ip_dst = nl_attr_get_be32(a); > >+ set_ipv4_dst_tnl(tun, nl_attr_get_be32(a)); > > break; > > case OVS_TUNNEL_KEY_ATTR_IPV6_SRC: > > tun->ipv6_src = nl_attr_get_in6_addr(a); > > break; > > case OVS_TUNNEL_KEY_ATTR_IPV6_DST: > >- tun->ipv6_dst = nl_attr_get_in6_addr(a); > >+ set_ipv6_dst_tnl(tun, nl_attr_get_in6_addr(a)); > > break; > > case OVS_TUNNEL_KEY_ATTR_TOS: > > tun->ip_tos = nl_attr_get_u8(a); @@ -1961,13 +1961,13 @@ > >tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key, > > if (tun_key->ip_src) { > > nl_msg_put_be32(a, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, > >tun_key->ip_src); > > } > >- if (tun_key->ip_dst) { > >+ if (tun_key->dl_type == htons(ETH_TYPE_IP)) { > > nl_msg_put_be32(a, OVS_TUNNEL_KEY_ATTR_IPV4_DST, > >tun_key->ip_dst); > > } > > if (ipv6_addr_is_set(&tun_key->ipv6_src)) { > > nl_msg_put_in6_addr(a, OVS_TUNNEL_KEY_ATTR_IPV6_SRC, > >&tun_key->ipv6_src); > > } > >- if (ipv6_addr_is_set(&tun_key->ipv6_dst)) { > >+ if (tun_key->dl_type == htons(ETH_TYPE_IPV6)) { > > nl_msg_put_in6_addr(a, OVS_TUNNEL_KEY_ATTR_IPV6_DST, > >&tun_key->ipv6_dst); > > } > > if (tun_key->ip_tos) { > >diff --git a/lib/packets.c b/lib/packets.c index d82341d..4f00c0e > >100644 > >--- a/lib/packets.c > >+++ b/lib/packets.c > >@@ -39,7 +39,12 @@ const struct in6_addr in6addr_all_hosts = > >IN6ADDR_ALL_HOSTS_INIT; struct in6_addr flow_tnl_dst(const struct > >flow_tnl *tnl) { > >- return tnl->ip_dst ? in6_addr_mapped_ipv4(tnl->ip_dst) : > >tnl->ipv6_dst; > >+ if (tnl->dl_type == htons(ETH_TYPE_IP)) { > >+ return in6_addr_mapped_ipv4(tnl->ip_dst); > >+ } else if (tnl->dl_type == htons(ETH_TYPE_IPV6)) { > >+ return tnl->ipv6_dst; > >+ } > >+ return in6addr_any; > > } > > > > struct in6_addr > >diff --git a/lib/packets.h b/lib/packets.h index 834e8a4..7e8dd91 > >100644 > >--- a/lib/packets.h > >+++ b/lib/packets.h > >@@ -35,8 +35,26 @@ > > struct dp_packet; > > struct ds; > > > >+#define ETH_TYPE_IP 0x0800 > >+#define ETH_TYPE_ARP 0x0806 > >+#define ETH_TYPE_TEB 0x6558 > >+#define ETH_TYPE_VLAN_8021Q 0x8100 > >+#define ETH_TYPE_VLAN ETH_TYPE_VLAN_8021Q > >+#define ETH_TYPE_VLAN_8021AD 0x88a8 > >+#define ETH_TYPE_IPV6 0x86dd > >+#define ETH_TYPE_LACP 0x8809 > >+#define ETH_TYPE_RARP 0x8035 > >+#define ETH_TYPE_MPLS 0x8847 > >+#define ETH_TYPE_MPLS_MCAST 0x8848 > >+ > > /* Tunnel information used in flow key and metadata. */ struct > > flow_tnl { > >+ /* The tunnel destination ip/ipv6 address are not initializing to > >zero > >+ * due to the performance constrains. the ethernet type field > >'dl_type' is > >+ * used to validate the destination addresses. 'dl_type' is set to > >zero > >+ * for a invalid flow_tnl entry. > >+ */ > >+ ovs_be16 dl_type; > > ovs_be32 ip_dst; > > struct in6_addr ipv6_dst; > > ovs_be32 ip_src; > >@@ -49,7 +67,7 @@ struct flow_tnl { > > ovs_be16 tp_dst; > > ovs_be16 gbp_id; > > uint8_t gbp_flags; > >- uint8_t pad1[5]; /* Pad to 64 bits. */ > >+ uint8_t pad1[1]; /* Pad to 64 bits. */ > > struct tun_metadata metadata; > > }; > > > >@@ -76,10 +94,36 @@ struct flow_tnl { > > > > static inline bool ipv6_addr_is_set(const struct in6_addr *addr); > > > >+static inline void > >+set_ipv4_dst_tnl(struct flow_tnl *tnl, ovs_be32 ip_dst) { > >+ if (tnl) { > >+ tnl->ip_dst = ip_dst; > >+ if (tnl->ip_dst) { > >+ tnl->dl_type = htons(ETH_TYPE_IP); > >+ } else { > >+ tnl->dl_type = 0; > >+ } > >+ } > >+} > >+ > >+static inline void > >+set_ipv6_dst_tnl(struct flow_tnl *tnl, struct in6_addr ipv6_dst) { > >+ if (tnl) { > >+ tnl->ipv6_dst = ipv6_dst; > >+ if (ipv6_addr_is_set(&tnl->ipv6_dst)) { > >+ tnl->dl_type = htons(ETH_TYPE_IPV6); > >+ } else { > >+ tnl->dl_type = 0; > >+ } > >+ } > >+} > >+ > > static inline bool > > flow_tnl_dst_is_set(const struct flow_tnl *tnl) { > >- return tnl->ip_dst || ipv6_addr_is_set(&tnl->ipv6_dst); > >+ return (tnl->dl_type != 0); > > } > > > > struct in6_addr flow_tnl_dst(const struct flow_tnl *tnl); @@ -90,8 > >+134,8 @@ static inline size_t flow_tnl_size(const struct flow_tnl > >*src) { > > if (!flow_tnl_dst_is_set(src)) { > >- /* Covers ip_dst and ipv6_dst only. */ > >- return offsetof(struct flow_tnl, ip_src); > >+ /* Covers dl_type field only. */ > >+ return offsetof(struct flow_tnl, ip_dst); > > } > > if (src->flags & FLOW_TNL_F_UDPIF) { > > /* Datapath format, cover all options we have. */ @@ -154,11 > >+198,10 @@ static inline void pkt_metadata_init(struct pkt_metadata > >*md, odp_port_t port) { > > /* It can be expensive to zero out all of the tunnel metadata. > >However, > >- * we can just zero out ip_dst and the rest of the data will never be > >+ * we can just zero out dl_type and the rest of the data will > >+ never > >be > > * looked at. */ > > memset(md, 0, offsetof(struct pkt_metadata, in_port)); > >- md->tunnel.ip_dst = 0; > >- md->tunnel.ipv6_dst = in6addr_any; > >+ md->tunnel.dl_type = 0; > > > > md->in_port.odp_port = port; > > } > >@@ -347,18 +390,6 @@ ovs_be32 set_mpls_lse_values(uint8_t ttl, uint8_t > >tc, uint8_t bos, #define ETH_ADDR_SCAN_ARGS(EA) \ > > &(EA).ea[0], &(EA).ea[1], &(EA).ea[2], &(EA).ea[3], &(EA).ea[4], > >&(EA).ea[5] > > > >-#define ETH_TYPE_IP 0x0800 > >-#define ETH_TYPE_ARP 0x0806 > >-#define ETH_TYPE_TEB 0x6558 > >-#define ETH_TYPE_VLAN_8021Q 0x8100 > >-#define ETH_TYPE_VLAN ETH_TYPE_VLAN_8021Q > >-#define ETH_TYPE_VLAN_8021AD 0x88a8 > >-#define ETH_TYPE_IPV6 0x86dd > >-#define ETH_TYPE_LACP 0x8809 > >-#define ETH_TYPE_RARP 0x8035 > >-#define ETH_TYPE_MPLS 0x8847 > >-#define ETH_TYPE_MPLS_MCAST 0x8848 > >- > > static inline bool eth_type_mpls(ovs_be16 eth_type) { > > return eth_type == htons(ETH_TYPE_MPLS) || diff --git > >a/ofproto/ofproto-dpif-ipfix.c b/ofproto/ofproto-dpif-ipfix.c index > >a610c53..ca06583 100644 > >--- a/ofproto/ofproto-dpif-ipfix.c > >+++ b/ofproto/ofproto-dpif-ipfix.c > >@@ -1721,7 +1721,8 @@ dpif_ipfix_bridge_sample(struct dpif_ipfix *di, > >const struct dp_packet *packet, > > * of matched packets. */ > > packet_delta_count = UINT32_MAX / di->bridge_exporter.probability; > > if (di->bridge_exporter.options->enable_tunnel_sampling) { > >- if (output_odp_port == ODPP_NONE && flow->tunnel.ip_dst) { > >+ if (output_odp_port == ODPP_NONE && > >+ flow->tunnel.dl_type == htons(ETH_TYPE_IP)) { > > /* Input tunnel. */ > > tunnel_key = &flow->tunnel; > > tunnel_port = dpif_ipfix_find_port(di, input_odp_port); > >diff --git a/ofproto/ofproto-dpif-rid.c b/ofproto/ofproto-dpif-rid.c > >index d142933..735b5f3 100644 > >--- a/ofproto/ofproto-dpif-rid.c > >+++ b/ofproto/ofproto-dpif-rid.c > >@@ -297,8 +297,7 @@ uint32_t > > recirc_alloc_id(struct ofproto_dpif *ofproto) { > > struct flow_tnl tunnel; > >- tunnel.ip_dst = htonl(0); > >- tunnel.ipv6_dst = in6addr_any; > >+ tunnel.dl_type = 0; > > struct recirc_state state = { > > .table_id = TBL_INTERNAL, > > .ofproto = ofproto, > >diff --git a/ofproto/ofproto-dpif-sflow.c > >b/ofproto/ofproto-dpif-sflow.c index f11699c..ea21a1a 100644 > >--- a/ofproto/ofproto-dpif-sflow.c > >+++ b/ofproto/ofproto-dpif-sflow.c > >@@ -899,7 +899,7 @@ sflow_read_tnl_push_action(const struct nlattr > *attr, > > /* IPv4 */ > > /* Cannot assume alignment so just use memcpy. */ > > sflow_actions->tunnel.ip_src = get_16aligned_be32(&ip->ip_src); > >- sflow_actions->tunnel.ip_dst = get_16aligned_be32(&ip->ip_dst); > >+ set_ipv4_dst_tnl(&sflow_actions->tunnel, > >get_16aligned_be32(&ip->ip_dst)); > > sflow_actions->tunnel.ip_tos = ip->ip_tos; > > sflow_actions->tunnel.ip_ttl = ip->ip_ttl; > > /* The tnl_push action can supply the ip_protocol too. */ @@ > >-991,7 +991,7 @@ sflow_read_set_action(const struct nlattr *attr, > > sflow_actions->tunnel.ip_src = key->ipv4_src; > > } > > if (key->ipv4_dst) { > >- sflow_actions->tunnel.ip_dst = key->ipv4_dst; > >+ set_ipv4_dst_tnl(&sflow_actions->tunnel, > >+ key->ipv4_dst); > > } > > if (key->ipv4_proto) { > > sflow_actions->tunnel_ipproto = key->ipv4_proto; @@ > >-1287,7 +1287,7 @@ dpif_sflow_received(struct dpif_sflow *ds, const > >struct dp_packet *packet, > > fs.output = cookie->sflow.output; > > > > /* Input tunnel. */ > >- if (flow->tunnel.ip_dst) { > >+ if (flow->tunnel.dl_type == htons(ETH_TYPE_IP)) { > > memset(&tnlInElem, 0, sizeof(tnlInElem)); > > tnlInElem.tag = SFLFLOW_EX_IPV4_TUNNEL_INGRESS; > > tnlInProto = dpif_sflow_tunnel_proto(in_dsp->tunnel_type); > >diff --git a/ofproto/tunnel.c b/ofproto/tunnel.c index 5cf6c75..b2bd254 > >100644 > >--- a/ofproto/tunnel.c > >+++ b/ofproto/tunnel.c > >@@ -362,12 +362,12 @@ tnl_wc_init(struct flow *flow, struct > >flow_wildcards *wc) { > > if (tnl_port_should_receive(flow)) { > > wc->masks.tunnel.tun_id = OVS_BE64_MAX; > >- if (flow->tunnel.ip_dst) { > >+ if (flow->tunnel.dl_type == htons(ETH_TYPE_IP)) { > > wc->masks.tunnel.ip_src = OVS_BE32_MAX; > >- wc->masks.tunnel.ip_dst = OVS_BE32_MAX; > >- } else { > >+ set_ipv4_dst_tnl(&wc->masks.tunnel, OVS_BE32_MAX); > >+ } else if (flow->tunnel.dl_type == htons(ETH_TYPE_IPV6)) { > > wc->masks.tunnel.ipv6_src = in6addr_exact; > >- wc->masks.tunnel.ipv6_dst = in6addr_exact; > >+ set_ipv6_dst_tnl(&wc->masks.tunnel, in6addr_exact); > > } > > wc->masks.tunnel.flags = (FLOW_TNL_F_DONT_FRAGMENT | > > FLOW_TNL_F_CSUM | @@ -424,7 +424,9 > >@@ tnl_port_send(const struct ofport_dpif *ofport, struct flow *flow, > > if (!cfg->ip_dst_flow) { > > flow->tunnel.ip_dst = > >in6_addr_get_mapped_ipv4(&tnl_port->match.ipv6_dst); > > if (!flow->tunnel.ip_dst) { > >- flow->tunnel.ipv6_dst = tnl_port->match.ipv6_dst; > >+ set_ipv6_dst_tnl(&flow->tunnel, tnl_port->match.ipv6_dst); > >+ } else { > >+ set_ipv4_dst_tnl(&flow->tunnel, flow->tunnel.ip_dst); > > } > > } > > flow->pkt_mark = tnl_port->match.pkt_mark; > >-- > >1.9.1 > > > >_______________________________________________ > >dev mailing list > >dev@openvswitch.org > >http://openvswitch.org/mailman/listinfo/dev _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev