It partialy backport commit: commit d71785ffc7e7cae3fbdc4ea8a9d05b7a1c59f7b8 Author: Paolo Abeni <pab...@redhat.com>
net: add dst_cache to ovs vxlan lwtunnel In case of UDP traffic with datagram length below MTU this give about 2% performance increase when tunneling over ipv4 and about 60% when tunneling over ipv6 Signed-off-by: Paolo Abeni <pab...@redhat.com> Suggested-and-acked-by: Hannes Frederic Sowa <han...@stressinduktion.org> Signed-off-by: David S. Miller <da...@davemloft.net> Bug fix commit db3c6139e6e ("bpf, vxlan, geneve, gre: fix usage of dst_cache on xmit"). is also included. Geneve changes were added in 468dfffcd762cbb2777ec5a76bc21e3748ebf47e ("geneve: add dst caching support") Signed-off-by: Pravin B Shelar <pshe...@ovn.org> --- datapath/flow_netlink.c | 5 +++ datapath/linux/compat/geneve.c | 42 ++++++++++++++++++++++++++ datapath/linux/compat/gso.h | 3 ++ datapath/linux/compat/include/net/ip_tunnels.h | 18 +++++++++++ datapath/linux/compat/include/net/vxlan.h | 2 ++ datapath/linux/compat/vxlan.c | 35 ++++++++++++++++++--- 6 files changed, 101 insertions(+), 4 deletions(-) diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c index cbfa233..b6020ab 100644 --- a/datapath/flow_netlink.c +++ b/datapath/flow_netlink.c @@ -1964,6 +1964,11 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, if (!tun_dst) return -ENOMEM; + err = dst_cache_init(&tun_dst->u.tun_info.dst_cache, GFP_KERNEL); + if (err) { + dst_release((struct dst_entry *)tun_dst); + return err; + } a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL, sizeof(*ovs_tun), log); if (IS_ERR(a)) { diff --git a/datapath/linux/compat/geneve.c b/datapath/linux/compat/geneve.c index f5daefb..061ceb5 100644 --- a/datapath/linux/compat/geneve.c +++ b/datapath/linux/compat/geneve.c @@ -19,6 +19,7 @@ #include <linux/if_vlan.h> #include <net/addrconf.h> +#include <net/dst_cache.h> #include <net/dst_metadata.h> #include <net/net_namespace.h> #include <net/netns/generic.h> @@ -88,6 +89,7 @@ struct geneve_dev { __be16 dst_port; bool collect_md; u32 flags; + struct dst_cache dst_cache; }; /* Geneve device flags */ @@ -301,15 +303,27 @@ drop: /* Setup stats when device is created */ static int geneve_init(struct net_device *dev) { + struct geneve_dev *geneve = netdev_priv(dev); + int err; + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; + err = dst_cache_init(&geneve->dst_cache, GFP_KERNEL); + if (err) { + free_percpu(dev->tstats); + return err; + } + return 0; } static void geneve_uninit(struct net_device *dev) { + struct geneve_dev *geneve = netdev_priv(dev); + + dst_cache_destroy(&geneve->dst_cache); free_percpu(dev->tstats); } @@ -799,7 +813,9 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, struct flowi4 *fl4, struct ip_tunnel_info *info) { + bool use_cache = ip_tunnel_dst_cache_usable(skb, info); struct geneve_dev *geneve = netdev_priv(dev); + struct dst_cache *dst_cache; struct rtable *rt = NULL; __u8 tos; @@ -811,16 +827,25 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, fl4->daddr = info->key.u.ipv4.dst; fl4->saddr = info->key.u.ipv4.src; fl4->flowi4_tos = RT_TOS(info->key.tos); + dst_cache = &info->dst_cache; } else { tos = geneve->tos; if (tos == 1) { const struct iphdr *iip = ip_hdr(skb); tos = ip_tunnel_get_dsfield(iip, skb); + use_cache = false; } fl4->flowi4_tos = RT_TOS(tos); fl4->daddr = geneve->remote.sin.sin_addr.s_addr; + dst_cache = &geneve->dst_cache; + } + + if (use_cache) { + rt = dst_cache_get_ip4(dst_cache, &fl4->saddr); + if (rt) + return rt; } rt = ip_route_output_key(geneve->net, fl4); @@ -833,6 +858,8 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, ip_rt_put(rt); return ERR_PTR(-ELOOP); } + if (use_cache) + dst_cache_set_ip4(dst_cache, &rt->dst, fl4->saddr); return rt; } @@ -842,9 +869,11 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, struct flowi6 *fl6, struct ip_tunnel_info *info) { + bool use_cache = ip_tunnel_dst_cache_usable(skb, info); struct geneve_dev *geneve = netdev_priv(dev); struct geneve_sock *gs6 = geneve->sock6; struct dst_entry *dst = NULL; + struct dst_cache *dst_cache; __u8 prio; memset(fl6, 0, sizeof(*fl6)); @@ -856,17 +885,26 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, fl6->saddr = info->key.u.ipv6.src; fl6->flowlabel = ip6_make_flowinfo(RT_TOS(info->key.tos), info->key.label); + dst_cache = &info->dst_cache; } else { prio = geneve->tos; if (prio == 1) { const struct iphdr *iip = ip_hdr(skb); prio = ip_tunnel_get_dsfield(iip, skb); + use_cache = false; } fl6->flowlabel = ip6_make_flowinfo(RT_TOS(prio), geneve->label); fl6->daddr = geneve->remote.sin6.sin6_addr; + dst_cache = &geneve->dst_cache; + } + + if (use_cache) { + dst = dst_cache_get_ip6(dst_cache, &fl6->saddr); + if (dst) + return dst; } #ifdef HAVE_IPV6_DST_LOOKUP_NET @@ -887,6 +925,8 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, return ERR_PTR(-ELOOP); } + if (use_cache) + dst_cache_set_ip6(dst_cache, dst, &fl6->saddr); return dst; } #endif @@ -1374,6 +1414,8 @@ static int geneve_configure(struct net *net, struct net_device *dev, return -EPERM; } + dst_cache_reset(&geneve->dst_cache); + err = register_netdevice(dev); if (err) return err; diff --git a/datapath/linux/compat/gso.h b/datapath/linux/compat/gso.h index f082be1..9a38a19 100644 --- a/datapath/linux/compat/gso.h +++ b/datapath/linux/compat/gso.h @@ -191,6 +191,9 @@ static inline void ovs_dst_hold(void *dst) static inline void ovs_dst_release(struct dst_entry *dst) { + struct metadata_dst *tun_dst = (struct metadata_dst *) dst; + + dst_cache_destroy(&tun_dst->u.tun_info.dst_cache); kfree(dst); } diff --git a/datapath/linux/compat/include/net/ip_tunnels.h b/datapath/linux/compat/include/net/ip_tunnels.h index 7fe6a04..e3f9b60 100644 --- a/datapath/linux/compat/include/net/ip_tunnels.h +++ b/datapath/linux/compat/include/net/ip_tunnels.h @@ -197,6 +197,24 @@ static inline void ip_tunnel_key_init(struct ip_tunnel_key *key, #define ip_tunnel_collect_metadata() true +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0) +#define TUNNEL_NOCACHE 0 + +static inline bool +ip_tunnel_dst_cache_usable(const struct sk_buff *skb, + const struct ip_tunnel_info *info) +{ + if (skb->mark) + return false; + if (!info) + return true; + if (info->key.tun_flags & TUNNEL_NOCACHE) + return false; + + return true; +} +#endif + #define ip_tunnel rpl_ip_tunnel struct ip_tunnel { diff --git a/datapath/linux/compat/include/net/vxlan.h b/datapath/linux/compat/include/net/vxlan.h index a6a5f30..8212d3a 100644 --- a/datapath/linux/compat/include/net/vxlan.h +++ b/datapath/linux/compat/include/net/vxlan.h @@ -25,6 +25,7 @@ static inline void rpl_vxlan_cleanup_module(void) #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/udp.h> +#include <net/dst_cache.h> #include <net/dst_metadata.h> #include "compat.h" @@ -227,6 +228,7 @@ struct vxlan_rdst { u32 remote_ifindex; struct list_head list; struct rcu_head rcu; + struct dst_cache dst_cache; }; struct vxlan_config { diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c index 5d05047..bb230cb 100644 --- a/datapath/linux/compat/vxlan.c +++ b/datapath/linux/compat/vxlan.c @@ -907,11 +907,21 @@ out_free: static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, struct sk_buff *skb, int oif, u8 tos, __be32 daddr, __be32 *saddr, + struct dst_cache *dst_cache, const struct ip_tunnel_info *info) { + bool use_cache = ip_tunnel_dst_cache_usable(skb, info); struct rtable *rt = NULL; struct flowi4 fl4; + if (tos && !info) + use_cache = false; + if (use_cache) { + rt = dst_cache_get_ip4(dst_cache, saddr); + if (rt) + return rt; + } + memset(&fl4, 0, sizeof(fl4)); fl4.flowi4_oif = oif; fl4.flowi4_tos = RT_TOS(tos); @@ -923,6 +933,8 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, rt = ip_route_output_key(vxlan->net, &fl4); if (!IS_ERR(rt)) { *saddr = fl4.saddr; + if (use_cache) + dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr); } return rt; } @@ -933,12 +945,22 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, __be32 label, const struct in6_addr *daddr, struct in6_addr *saddr, + struct dst_cache *dst_cache, const struct ip_tunnel_info *info) { + bool use_cache = ip_tunnel_dst_cache_usable(skb, info); struct dst_entry *ndst; struct flowi6 fl6; int err; + if (tos && !info) + use_cache = false; + if (use_cache) { + ndst = dst_cache_get_ip6(dst_cache, saddr); + if (ndst) + return ndst; + } + memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; fl6.daddr = *daddr; @@ -963,6 +985,8 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, return ERR_PTR(err); *saddr = fl6.saddr; + if (use_cache) + dst_cache_set_ip6(dst_cache, ndst, saddr); return ndst; } #endif @@ -978,6 +1002,7 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, struct vxlan_rdst *rdst, bool did_rsc) { + struct dst_cache *dst_cache; struct ip_tunnel_info *info; struct vxlan_dev *vxlan = netdev_priv(dev); struct sock *sk; @@ -1002,6 +1027,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port; vni = rdst->remote_vni; dst = &rdst->remote_ip; + dst_cache = &rdst->dst_cache; } else { if (!info) { WARN_ONCE(1, "%s: Missing encapsulation instructions\n", @@ -1016,6 +1042,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, else remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst; dst = &remote_ip; + dst_cache = &info->dst_cache; } if (vxlan_addr_any(dst)) { @@ -1063,7 +1090,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, rt = vxlan_get_route(vxlan, skb, rdst ? rdst->remote_ifindex : 0, tos, dst->sin.sin_addr.s_addr, &saddr, - info); + dst_cache, info); if (IS_ERR(rt)) { netdev_dbg(dev, "no route to %pI4\n", &dst->sin.sin_addr.s_addr); @@ -1121,7 +1148,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ndst = vxlan6_get_route(vxlan, skb, rdst ? rdst->remote_ifindex : 0, tos, label, &dst->sin6.sin6_addr, &saddr, - info); + dst_cache, info); if (IS_ERR(ndst)) { netdev_dbg(dev, "no route to %pI6\n", &dst->sin6.sin6_addr); @@ -1409,7 +1436,7 @@ int ovs_vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) return -EINVAL; rt = vxlan_get_route(vxlan, skb, 0, info->key.tos, info->key.u.ipv4.dst, - &info->key.u.ipv4.src, info); + &info->key.u.ipv4.src, NULL, info); if (IS_ERR(rt)) return PTR_ERR(rt); ip_rt_put(rt); @@ -1421,7 +1448,7 @@ int ovs_vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) return -EINVAL; ndst = vxlan6_get_route(vxlan, skb, 0, info->key.tos, info->key.label, &info->key.u.ipv6.dst, - &info->key.u.ipv6.src, info); + &info->key.u.ipv6.src, NULL, info); if (IS_ERR(ndst)) return PTR_ERR(ndst); dst_release(ndst); -- 1.9.1 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev