Following patch backports updated iptunnel pull function.
Also brings in following upstream fix:

    commit a09a4c8dd1ec7f830e1fb9e59eb72bddc965d168
    Author: Jesse Gross <je...@kernel.org>
    Date:   Sat Mar 19 09:32:02 2016 -0700

    tunnels: Remove encapsulation offloads on decap.

    If a packet is either locally encapsulated or processed through GRO
    it is marked with the offloads that it requires. However, when it is
    decapsulated these tunnel offload indications are not removed. This
    means that if we receive an encapsulated TCP packet, aggregate it with
    GRO, decapsulate, and retransmit the resulting frame on a NIC that does
    not support encapsulation, we won't be able to take advantage of hardware
    offloads even though it is just a simple TCP packet at this point.

    This fixes the problem by stripping off encapsulation offload indications
    when packets are decapsulated.

    The performance impacts of this bug are significant. In a test where a
    Geneve encapsulated TCP stream is sent to a hypervisor, GRO'ed, 
decapsulated,
    and bridged to a VM performance is improved by 60% (5Gbps->8Gbps) as a
    result of avoiding unnecessary segmentation at the VM tap interface.

    Reported-by: Ramu Ramamurthy <srama...@linux.vnet.ibm.com>
    Fixes: 68c33163 ("v4 GRE: Add TCP segmentation offload for GRE")
    Signed-off-by: Jesse Gross <je...@kernel.org>
    Signed-off-by: David S. Miller <da...@davemloft.net>

Signed-off-by: Pravin B Shelar <pshe...@ovn.org>
---
 acinclude.m4                                   |  1 +
 datapath/linux/compat/geneve.c                 |  2 +-
 datapath/linux/compat/gre.c                    |  2 +-
 datapath/linux/compat/include/linux/skbuff.h   |  7 +++++
 datapath/linux/compat/include/net/ip_tunnels.h | 20 +++++++++-----
 datapath/linux/compat/ip_tunnels_core.c        | 38 ++++++++++++++++++--------
 datapath/linux/compat/lisp.c                   |  2 +-
 datapath/linux/compat/stt.c                    |  3 +-
 datapath/linux/compat/vxlan.c                  |  2 +-
 9 files changed, 54 insertions(+), 23 deletions(-)

diff --git a/acinclude.m4 b/acinclude.m4
index fc9f11e..45ae15e 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -496,6 +496,7 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
   OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [skb_ensure_writable])
   OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [skb_vlan_pop])
   OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [skb_vlan_push])
+  OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [skb_clear_hash_if_not_l4])
 
   OVS_GREP_IFELSE([$KSRC/include/linux/types.h], [bool],
                   [OVS_DEFINE([HAVE_BOOL_TYPE])])
diff --git a/datapath/linux/compat/geneve.c b/datapath/linux/compat/geneve.c
index f38d4a3..0f6bda0 100644
--- a/datapath/linux/compat/geneve.c
+++ b/datapath/linux/compat/geneve.c
@@ -247,7 +247,7 @@ static int geneve_udp_encap_recv(struct sock *sk, struct 
sk_buff *skb)
 
        opts_len = geneveh->opt_len * 4;
        if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len,
-                                htons(ETH_P_TEB)))
+                                htons(ETH_P_TEB), false))
                goto drop;
 
        gs = rcu_dereference_sk_user_data(sk);
diff --git a/datapath/linux/compat/gre.c b/datapath/linux/compat/gre.c
index fa8d936..bb49c8c 100644
--- a/datapath/linux/compat/gre.c
+++ b/datapath/linux/compat/gre.c
@@ -243,7 +243,7 @@ static int parse_gre_header(struct sk_buff *skb, struct 
tnl_ptk_info *tpi,
                }
        }
 
-       return iptunnel_pull_header(skb, hdr_len, tpi->proto);
+       return iptunnel_pull_header(skb, hdr_len, tpi->proto, false);
 }
 
 static struct gre_cisco_protocol __rcu *gre_cisco_proto;
diff --git a/datapath/linux/compat/include/linux/skbuff.h 
b/datapath/linux/compat/include/linux/skbuff.h
index 376dfda..03ea707 100644
--- a/datapath/linux/compat/include/linux/skbuff.h
+++ b/datapath/linux/compat/include/linux/skbuff.h
@@ -333,4 +333,11 @@ static inline void skb_pop_mac_header(struct sk_buff *skb)
 {
        skb->mac_header = skb->network_header;
 }
+
+#ifndef HAVE_SKB_CLEAR_HASH_IF_NOT_L4
+static inline void skb_clear_hash_if_not_l4(struct sk_buff *skb)
+{
+       skb_clear_hash(skb);
+}
+#endif
 #endif
diff --git a/datapath/linux/compat/include/net/ip_tunnels.h 
b/datapath/linux/compat/include/net/ip_tunnels.h
index 5eda8a2..bf67ac4 100644
--- a/datapath/linux/compat/include/net/ip_tunnels.h
+++ b/datapath/linux/compat/include/net/ip_tunnels.h
@@ -18,6 +18,19 @@
 #include <net/ip.h>
 #include <net/rtnetlink.h>
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,7,0)
+#define __iptunnel_pull_header rpl___iptunnel_pull_header
+int rpl___iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
+                          __be16 inner_proto, bool raw_proto, bool xnet);
+
+#define iptunnel_pull_header rpl_iptunnel_pull_header
+static inline int rpl_iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
+                                      __be16 inner_proto, bool xnet)
+{
+       return rpl___iptunnel_pull_header(skb, hdr_len, inner_proto, false, 
xnet);
+}
+#endif
+
 #if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
 struct sk_buff *ovs_iptunnel_handle_offloads(struct sk_buff *skb,
                                             bool csum_help, int gso_type_mask,
@@ -28,18 +41,11 @@ int rpl_iptunnel_xmit(struct sock *sk, struct rtable *rt, 
struct sk_buff *skb,
                      __be32 src, __be32 dst, __u8 proto, __u8 tos, __u8 ttl,
                      __be16 df, bool xnet);
 
-#define iptunnel_pull_header rpl_iptunnel_pull_header
-int rpl_iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 
inner_proto);
-
 #else
 
 #define ovs_iptunnel_handle_offloads(skb, csum_help, gso_type_mask, 
fix_segment) \
        iptunnel_handle_offloads(skb, csum_help, gso_type_mask)
 
-/* This macro is to make OVS build happy about declared functions name. */
-#define rpl_iptunnel_pull_header iptunnel_pull_header
-int rpl_iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 
inner_proto);
-
 #define rpl_iptunnel_xmit iptunnel_xmit
 int rpl_iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
                      __be32 src, __be32 dst, __u8 proto, __u8 tos, __u8 ttl,
diff --git a/datapath/linux/compat/ip_tunnels_core.c 
b/datapath/linux/compat/ip_tunnels_core.c
index 0858d02..2d91abb 100644
--- a/datapath/linux/compat/ip_tunnels_core.c
+++ b/datapath/linux/compat/ip_tunnels_core.c
@@ -139,23 +139,41 @@ error:
        return ERR_PTR(err);
 }
 EXPORT_SYMBOL_GPL(ovs_iptunnel_handle_offloads);
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,7,0)
+static inline int iptunnel_pull_offloads(struct sk_buff *skb)
+{
+       if (skb_is_gso(skb)) {
+               int err;
+
+               err = skb_unclone(skb, GFP_ATOMIC);
+               if (unlikely(err))
+                       return err;
+               skb_shinfo(skb)->gso_type &= ~(NETIF_F_GSO_ENCAP_ALL >>
+                                              NETIF_F_GSO_SHIFT);
+       }
 
-int rpl_iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 
inner_proto)
+       skb->encapsulation = 0;
+       return 0;
+}
+
+int rpl___iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
+                              __be16 inner_proto, bool raw_proto, bool xnet)
 {
        if (unlikely(!pskb_may_pull(skb, hdr_len)))
                return -ENOMEM;
 
        skb_pull_rcsum(skb, hdr_len);
 
-       if (inner_proto == htons(ETH_P_TEB)) {
+       if (!raw_proto && inner_proto == htons(ETH_P_TEB)) {
                struct ethhdr *eh;
 
                if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
                        return -ENOMEM;
 
                eh = (struct ethhdr *)skb->data;
-
-               if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN))
+               if (likely(eth_proto_is_802_3(eh->h_proto)))
                        skb->protocol = eh->h_proto;
                else
                        skb->protocol = htons(ETH_P_802_2);
@@ -164,16 +182,14 @@ int rpl_iptunnel_pull_header(struct sk_buff *skb, int 
hdr_len, __be16 inner_prot
                skb->protocol = inner_proto;
        }
 
-       nf_reset(skb);
-       secpath_reset(skb);
-       skb_clear_hash(skb);
-       skb_dst_drop(skb);
+       skb_clear_hash_if_not_l4(skb);
        skb->vlan_tci = 0;
        skb_set_queue_mapping(skb, 0);
-       skb->pkt_type = PACKET_HOST;
-       return 0;
+       skb_scrub_packet(skb, xnet);
+
+       return iptunnel_pull_offloads(skb);
 }
-EXPORT_SYMBOL_GPL(rpl_iptunnel_pull_header);
+EXPORT_SYMBOL_GPL(rpl___iptunnel_pull_header);
 
 #endif
 
diff --git a/datapath/linux/compat/lisp.c b/datapath/linux/compat/lisp.c
index 0a92dca..aa0d4da 100644
--- a/datapath/linux/compat/lisp.c
+++ b/datapath/linux/compat/lisp.c
@@ -222,7 +222,7 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb)
        if (unlikely(!dev))
                goto error;
 
-       if (iptunnel_pull_header(skb, LISP_HLEN, 0))
+       if (iptunnel_pull_header(skb, LISP_HLEN, 0, false))
                goto error;
 
        lisph = lisp_hdr(skb);
diff --git a/datapath/linux/compat/stt.c b/datapath/linux/compat/stt.c
index 1528cbe..532918f 100644
--- a/datapath/linux/compat/stt.c
+++ b/datapath/linux/compat/stt.c
@@ -1462,7 +1462,8 @@ static void stt_rcv(struct stt_dev *stt_dev, struct 
sk_buff *skb)
 
        err = iptunnel_pull_header(skb,
                                   sizeof(struct stthdr) + STT_ETH_PAD,
-                                  htons(ETH_P_TEB));
+                                  htons(ETH_P_TEB),
+                                  false);
        if (unlikely(err))
                goto drop;
 
diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c
index b92a902..a25db87 100644
--- a/datapath/linux/compat/vxlan.c
+++ b/datapath/linux/compat/vxlan.c
@@ -924,7 +924,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct 
sk_buff *skb)
                goto bad_flags;
        }
 
-       if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB)))
+       if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB), false))
                goto drop;
        vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
 
-- 
2.5.5

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to