There are two important GSO tunnel features that were introduced after the 3.12 cutoff for our current out of tree GSO implementation: * 3.16 introduced support for outer UDP checksums. * 3.18 introduced support for verifying hardware support for protocols other than VXLAN.
In cases where these features are used, we should use OVS GSO to ensure correct behavior. However, we also want to continue to use kernel GSO or hardware TSO in existing situations. Therefore, this extends the range of kernels where OVS GSO is available to 3.18 and makes it easier to select which one to use. Signed-off-by: Jesse Gross <je...@nicira.com> --- v2: Fix compilation on kernels 3.12 and 3.17. --- datapath/linux/compat/gso.c | 46 +++++++++----- datapath/linux/compat/gso.h | 28 +++++---- datapath/linux/compat/include/net/ip_tunnels.h | 36 ++++++----- datapath/linux/compat/ip_tunnels_core.c | 87 ++++++++++++++++++++++++-- 4 files changed, 147 insertions(+), 50 deletions(-) diff --git a/datapath/linux/compat/gso.c b/datapath/linux/compat/gso.c index b20ad8a..fbc0356 100644 --- a/datapath/linux/compat/gso.c +++ b/datapath/linux/compat/gso.c @@ -167,7 +167,9 @@ drop: kfree_skb(skb); return err; } +#endif /* 3.16 */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) static __be16 __skb_network_protocol(struct sk_buff *skb) { __be16 type = skb->protocol; @@ -190,16 +192,6 @@ static __be16 __skb_network_protocol(struct sk_buff *skb) return type; } -#if LINUX_VERSION_CODE < KERNEL_VERSION(3,12,0) -static void tnl_fix_segment(struct sk_buff *skb) -{ - if (OVS_GSO_CB(skb)->fix_segment) - OVS_GSO_CB(skb)->fix_segment(skb); -} -#else -static void tnl_fix_segment(struct sk_buff *skb) { } -#endif - static struct sk_buff *tnl_skb_gso_segment(struct sk_buff *skb, netdev_features_t features, bool tx_path) @@ -240,7 +232,9 @@ static struct sk_buff *tnl_skb_gso_segment(struct sk_buff *skb, memcpy(ip_hdr(skb), iph, pkt_hlen); memcpy(skb->cb, cb, sizeof(cb)); - tnl_fix_segment(skb); + + if (OVS_GSO_CB(skb)->fix_segment) + OVS_GSO_CB(skb)->fix_segment(skb); skb->protocol = proto; skb = skb->next; @@ -293,10 +287,9 @@ int rpl_ip_local_out(struct sk_buff *skb) } return ret; } -#endif /* 3.16 */ +#endif /* 3.18 */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(3,12,0) || \ - !defined USE_UPSTREAM_VXLAN +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,12,0) struct sk_buff *ovs_iptunnel_handle_offloads(struct sk_buff *skb, bool csum_help, void (*fix_segment)(struct sk_buff *)) @@ -345,4 +338,27 @@ error: kfree_skb(skb); return ERR_PTR(err); } -#endif /* 3.12 || !USE_UPSTREAM_VXLAN */ +#elif LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) +struct sk_buff *ovs_iptunnel_handle_offloads(struct sk_buff *skb, + bool csum_help, + void (*fix_segment)(struct sk_buff *)) +{ + int err; + + if (skb_is_gso(skb)) { + if (skb_is_encapsulated(skb)) { + err = -ENOSYS; + goto error; + } + } + + OVS_GSO_CB(skb)->fix_segment = fix_segment; + + /* Pass in zero for the GSO type, it won't get used anyways. */ + return iptunnel_handle_offloads(skb, csum_help, 0); + +error: + kfree_skb(skb); + return ERR_PTR(err); +} +#endif /* 3.18 */ diff --git a/datapath/linux/compat/gso.h b/datapath/linux/compat/gso.h index 023d6d3..fe9ff78 100644 --- a/datapath/linux/compat/gso.h +++ b/datapath/linux/compat/gso.h @@ -2,8 +2,7 @@ #define __LINUX_GSO_WRAPPER_H #include <linux/version.h> -#if LINUX_VERSION_CODE < KERNEL_VERSION(3,12,0) || \ - !defined USE_UPSTREAM_VXLAN +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) #include <linux/netdevice.h> #include <linux/skbuff.h> @@ -15,15 +14,25 @@ typedef void (*gso_fix_segment_t)(struct sk_buff *); struct ovs_gso_cb { struct ovs_skb_cb dp_cb; gso_fix_segment_t fix_segment; - sk_buff_data_t inner_mac_header; /* Offset from skb->head */ #if LINUX_VERSION_CODE < KERNEL_VERSION(3,11,0) __be16 inner_protocol; #endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,12,0) + sk_buff_data_t inner_mac_header; /* Offset from skb->head */ u16 inner_network_header; /* Offset from * inner_mac_header */ +#endif }; #define OVS_GSO_CB(skb) ((struct ovs_gso_cb *)(skb)->cb) +struct sk_buff *ovs_iptunnel_handle_offloads(struct sk_buff *skb, + bool csum_help, + gso_fix_segment_t fix_segment); + +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,12,0) + #define skb_inner_network_header rpl_skb_inner_network_header #ifdef NET_SKBUFF_DATA_USES_OFFSET @@ -66,24 +75,17 @@ static inline void skb_reset_inner_headers(struct sk_buff *skb) OVS_GSO_CB(skb)->fix_segment = NULL; } +#endif /* 3.12 */ -struct sk_buff *ovs_iptunnel_handle_offloads(struct sk_buff *skb, - bool csum_help, - gso_fix_segment_t fix_segment); - - -#endif /* 3.12 || !USE_UPSTREAM_VXLAN */ - -#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0) +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) #define ip_local_out rpl_ip_local_out int ip_local_out(struct sk_buff *skb); +#endif /* 3.18 */ -#define skb_inner_mac_offset rpl_skb_inner_mac_offset static inline int skb_inner_mac_offset(const struct sk_buff *skb) { return skb_inner_mac_header(skb) - skb->data; } -#endif /* 3.16 */ #if LINUX_VERSION_CODE < KERNEL_VERSION(3,11,0) static inline void ovs_skb_init_inner_protocol(struct sk_buff *skb) { diff --git a/datapath/linux/compat/include/net/ip_tunnels.h b/datapath/linux/compat/include/net/ip_tunnels.h index 74c6a2c..ad72be4 100644 --- a/datapath/linux/compat/include/net/ip_tunnels.h +++ b/datapath/linux/compat/include/net/ip_tunnels.h @@ -11,24 +11,30 @@ #define USE_KERNEL_TUNNEL_API #endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) +int ovs_iptunnel_xmit(struct sock *sk, struct rtable *rt, + struct sk_buff *skb, + __be32 src, __be32 dst, __u8 proto, + __u8 tos, __u8 ttl, __be16 df, bool xnet); +#endif + #ifdef USE_KERNEL_TUNNEL_API #include_next <net/ip_tunnels.h> -#if LINUX_VERSION_CODE < KERNEL_VERSION(3,15,0) -static inline int rpl_iptunnel_xmit(struct sock *sk, struct rtable *rt, - struct sk_buff *skb, __be32 src, - __be32 dst, __u8 proto, __u8 tos, - __u8 ttl, __be16 df, bool xnet) -{ -#ifdef HAVE_IPTUNNEL_XMIT_NET - return iptunnel_xmit(NULL, rt, skb, src, dst, proto, tos, ttl, df); -#else - return iptunnel_xmit(rt, skb, src, dst, proto, tos, ttl, df, xnet); -#endif -} +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) +int rpl_iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, + __be32 src, __be32 dst, __u8 proto, __u8 tos, __u8 ttl, + __be16 df, bool xnet); + #define iptunnel_xmit rpl_iptunnel_xmit #endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) +struct sk_buff *rpl_iptunnel_handle_offloads(struct sk_buff *skb, + bool csum_help, int gso_type_mask); +#define iptunnel_handle_offloads rpl_iptunnel_handle_offloads +#endif + #else #include <linux/if_tunnel.h> @@ -61,11 +67,7 @@ struct tnl_ptk_info { #define PACKET_RCVD 0 #define PACKET_REJECT 1 -int iptunnel_xmit(struct sock *sk, struct rtable *rt, - struct sk_buff *skb, - __be32 src, __be32 dst, __u8 proto, - __u8 tos, __u8 ttl, __be16 df, bool xnet); - +#define iptunnel_xmit ovs_iptunnel_xmit int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto); #endif diff --git a/datapath/linux/compat/ip_tunnels_core.c b/datapath/linux/compat/ip_tunnels_core.c index e71ba4e..d466494 100644 --- a/datapath/linux/compat/ip_tunnels_core.c +++ b/datapath/linux/compat/ip_tunnels_core.c @@ -35,11 +35,11 @@ #include "compat.h" #include "gso.h" -#ifndef USE_KERNEL_TUNNEL_API -int iptunnel_xmit(struct sock *sk, struct rtable *rt, - struct sk_buff *skb, - __be32 src, __be32 dst, __u8 proto, - __u8 tos, __u8 ttl, __be16 df, bool xnet) +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) +int ovs_iptunnel_xmit(struct sock *sk, struct rtable *rt, + struct sk_buff *skb, + __be32 src, __be32 dst, __u8 proto, + __u8 tos, __u8 ttl, __be16 df, bool xnet) { int pkt_len = skb->len; struct iphdr *iph; @@ -82,6 +82,82 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, return pkt_len; } +#ifdef USE_KERNEL_TUNNEL_API +int rpl_iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, + __be32 src, __be32 dst, __u8 proto, __u8 tos, __u8 ttl, + __be16 df, bool xnet) +{ + if (OVS_GSO_CB(skb)->fix_segment) + return ovs_iptunnel_xmit(sk, rt, skb, src, dst, proto, tos, + ttl, df, xnet); + +#undef iptunnel_xmit + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,15,0) +#ifdef HAVE_IPTUNNEL_XMIT_NET + return iptunnel_xmit(NULL, rt, skb, src, dst, proto, tos, ttl, df); +#else + return iptunnel_xmit(rt, skb, src, dst, proto, tos, ttl, df, xnet); +#endif +#else + return iptunnel_xmit(sk, rt, skb, src, dst, proto, tos, ttl, df, xnet); +#endif +} + +#undef iptunnel_handle_offloads +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,13,0) +static struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, + bool csum_help, + int gso_type_mask) +{ + int err; + + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + + if (skb_is_gso(skb)) { + err = skb_unclone(skb, GFP_ATOMIC); + if (unlikely(err)) + goto error; + skb_shinfo(skb)->gso_type |= gso_type_mask; + return skb; + } + + /* If packet is not gso and we are resolving any partial checksum, + * clear encapsulation flag. This allows setting CHECKSUM_PARTIAL + * on the outer header without confusing devices that implement + * NETIF_F_IP_CSUM with encapsulation. + */ + if (csum_help) + skb->encapsulation = 0; + + if (skb->ip_summed == CHECKSUM_PARTIAL && csum_help) { + err = skb_checksum_help(skb); + if (unlikely(err)) + goto error; + } else if (skb->ip_summed != CHECKSUM_PARTIAL) + skb->ip_summed = CHECKSUM_NONE; + + return skb; +error: + kfree_skb(skb); + return ERR_PTR(err); +} +#endif + +struct sk_buff *rpl_iptunnel_handle_offloads(struct sk_buff *skb, + bool csum_help, int gso_type_mask) +{ + BUILD_BUG_ON(sizeof(struct ovs_gso_cb) > FIELD_SIZEOF(struct sk_buff, cb)); + OVS_GSO_CB(skb)->fix_segment = NULL; + + return iptunnel_handle_offloads(skb, csum_help, gso_type_mask); +} + +#else + int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) { if (unlikely(!pskb_may_pull(skb, hdr_len))) @@ -117,6 +193,7 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) } #endif +#endif bool skb_is_encapsulated(struct sk_buff *skb) { -- 1.9.1 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev