On Thu, May 23, 2013 at 11:12 PM, Rajahalme, Jarno (NSN - FI/Espoo) <jarno.rajaha...@nsn.com> wrote: > Pravin, > > Please find some review comments below,
Thanks for comments. > > Jarno > > On May 23, 2013, at 23:01 , ext Pravin B Shelar wrote: > ... >> diff --git a/datapath/linux/compat/gre.c b/datapath/linux/compat/gre.c >> new file mode 100644 >> index 0000000..c352ff8 >> --- /dev/null >> +++ b/datapath/linux/compat/gre.c >> @@ -0,0 +1,352 @@ >> +/* >> + * Copyright (c) 2007-2013 Nicira, Inc. >> + * >> + * This program is free software; you can redistribute it and/or >> + * modify it under the terms of version 2 of the GNU General Public >> + * License as published by the Free Software Foundation. >> + * >> + * This program is distributed in the hope that it will be useful, but >> + * WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> + * General Public License for more details. >> + * >> + * You should have received a copy of the GNU General Public License >> + * along with this program; if not, write to the Free Software >> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA >> + * 02110-1301, USA >> + */ >> + >> +#include <linux/module.h> >> +#include <linux/if.h> >> +#include <linux/if_tunnel.h> >> +#include <linux/icmp.h> >> +#include <linux/in.h> >> +#include <linux/ip.h> >> +#include <linux/kernel.h> >> +#include <linux/kmod.h> >> +#include <linux/netdevice.h> >> +#include <linux/skbuff.h> >> +#include <linux/spinlock.h> >> + >> +#include <net/gre.h> >> +#include <net/icmp.h> >> +#include <net/protocol.h> >> +#include <net/route.h> >> +#include <net/xfrm.h> >> + >> +#include "gso.h" >> + >> +static struct gre_cisco_protocol __rcu *gre_cisco_proto; >> + >> +static void gre_csum_fix(struct sk_buff *skb) >> +{ >> + struct gre_base_hdr *greh; >> + __be32 *options; >> + int gre_offset = skb_transport_offset(skb); >> + >> + greh = (struct gre_base_hdr *)skb_transport_header(skb); >> + options = ((__be32 *)greh + 1); >> + >> + *options = 0; >> + *(__sum16 *)options = csum_fold(skb_checksum(skb, gre_offset, >> + skb->len - gre_offset, >> 0)); >> +} >> + >> +struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum) >> +{ >> + int err; >> + >> + skb_reset_inner_headers(skb); >> + >> + if (skb_is_gso(skb)) { >> + if (gre_csum) >> + OVS_GSO_CB(skb)->fix_segment = gre_csum_fix; >> + } else { >> + if (skb->ip_summed == CHECKSUM_PARTIAL && gre_csum) { >> + err = skb_checksum_help(skb); >> + if (err) >> + goto error; >> + >> + } else if (skb->ip_summed != CHECKSUM_PARTIAL) >> + skb->ip_summed = CHECKSUM_NONE; >> + } >> + return skb; >> +error: >> + kfree_skb(skb); >> + return ERR_PTR(err); >> +} >> + >> +static bool is_gre_gso(struct sk_buff *skb) >> +{ >> + return skb_is_gso(skb); >> +} > > What is the value of this? You already use skb_is_gso() above. > right, I will get rid of it. >> + >> +void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, >> + int hdr_len) >> +{ >> + struct gre_base_hdr *greh; >> + >> + __skb_push(skb, hdr_len); >> + >> + greh = (struct gre_base_hdr *)skb->data; >> + greh->flags = tnl_flags_to_gre_flags(tpi->flags); >> + greh->protocol = tpi->proto; >> + >> + if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) { > > Add some spaces? Also below. > ok. >> + __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); >> + >> + if (tpi->flags&TUNNEL_SEQ) { >> + *ptr = tpi->seq; >> + ptr--; >> + } >> + if (tpi->flags&TUNNEL_KEY) { >> + *ptr = tpi->key; >> + ptr--; >> + } >> + if (tpi->flags&TUNNEL_CSUM && !is_gre_gso(skb)) { >> + *ptr = 0; >> + *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, >> + skb->len, 0)); >> + } >> + } >> +} >> + >> +static __sum16 check_checksum(struct sk_buff *skb) >> +{ >> + __sum16 csum = 0; >> + >> + switch (skb->ip_summed) { >> + case CHECKSUM_COMPLETE: >> + csum = csum_fold(skb->csum); >> + >> + if (!csum) >> + break; >> + /* Fall through. */ >> + >> + case CHECKSUM_NONE: >> + skb->csum = 0; >> + csum = __skb_checksum_complete(skb); >> + skb->ip_summed = CHECKSUM_COMPLETE; >> + break; >> + } >> + >> + return csum; >> +} >> + >> +static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, >> + bool *csum_err) > > It seems the csum_err returned via the parameter pointer is ignored by the > caller, hence it could be removed. > This function will be defied in upstream gre module, so I will keep it same as that. >> +{ >> + unsigned int ip_hlen = ip_hdrlen(skb); >> + struct gre_base_hdr *greh; >> + __be32 *options; >> + int hdr_len; >> + >> + if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) >> + return -EINVAL; >> + >> + greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); >> + if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) >> + return -EINVAL; >> + >> + tpi->flags = gre_flags_to_tnl_flags(greh->flags); >> + hdr_len = ip_gre_calc_hlen(tpi->flags); >> + >> + if (!pskb_may_pull(skb, hdr_len)) >> + return -EINVAL; >> + >> + greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); >> + tpi->proto = greh->protocol; >> + >> + options = (__be32 *)(greh + 1); >> + if (greh->flags & GRE_CSUM) { >> + if (check_checksum(skb)) { >> + *csum_err = true; >> + return -EINVAL; >> + } >> + options++; >> + } >> + >> + if (greh->flags & GRE_KEY) { >> + tpi->key = *options; >> + options++; >> + } else >> + tpi->key = 0; >> + >> + if (unlikely(greh->flags & GRE_SEQ)) { >> + tpi->seq = *options; >> + options++; >> + } else >> + tpi->seq = 0; >> + >> + /* WCCP version 1 and 2 protocol decoding. >> + * - Change protocol to IP >> + * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header >> + */ >> + if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { >> + tpi->proto = htons(ETH_P_IP); >> + if ((*(u8 *)options & 0xF0) != 0x40) { >> + hdr_len += 4; >> + if (!pskb_may_pull(skb, hdr_len)) >> + return -EINVAL; >> + } >> + } >> + >> + return iptunnel_pull_header(skb, hdr_len, tpi->proto); >> +} >> + >> +static int gre_cisco_rcv(struct sk_buff *skb) >> +{ >> + struct tnl_ptk_info tpi; >> + bool csum_err = false; >> + struct gre_cisco_protocol *proto; >> + >> + rcu_read_lock(); >> + proto = rcu_dereference(gre_cisco_proto); >> + if (!proto) >> + goto drop; >> + >> + if (parse_gre_header(skb, &tpi, &csum_err) < 0) >> + goto drop; >> + proto->handler(skb, &tpi); >> + rcu_read_unlock(); >> + return 0; >> + >> +drop: >> + rcu_read_unlock(); >> + kfree_skb(skb); >> + return 0; >> +} >> + >> +static const struct gre_protocol ipgre_protocol = { >> + .handler = gre_cisco_rcv, >> +}; >> + >> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) >> +static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] >> __read_mostly; >> + >> +int gre_add_protocol(const struct gre_protocol *proto, u8 version) >> +{ >> + if (version >= GREPROTO_MAX) >> + return -EINVAL; >> + >> + return (cmpxchg((const struct gre_protocol **)&gre_proto[version], >> NULL, proto) == NULL) ? >> + 0 : -EBUSY; >> +} >> + >> +int gre_del_protocol(const struct gre_protocol *proto, u8 version) >> +{ >> + int ret; >> + >> + if (version >= GREPROTO_MAX) >> + return -EINVAL; >> + >> + ret = (cmpxchg((const struct gre_protocol **)&gre_proto[version], >> proto, NULL) == proto) ? >> + 0 : -EBUSY; >> + >> + if (ret) >> + return ret; >> + >> + synchronize_net(); >> + return 0; >> +} >> + >> +static int gre_rcv(struct sk_buff *skb) >> +{ >> + const struct gre_protocol *proto; >> + u8 ver; >> + int ret; >> + >> + if (!pskb_may_pull(skb, 12)) >> + goto drop; > > Is there a symbol you could use here instead the literal 12? > same as above, this function is copied from gre module. >> + >> + ver = skb->data[1]&0x7f; > > Spaces... > ok. >> + if (ver >= GREPROTO_MAX) >> + goto drop; >> + >> + rcu_read_lock(); >> + proto = rcu_dereference(gre_proto[ver]); >> + if (!proto || !proto->handler) >> + goto drop_unlock; >> + ret = proto->handler(skb); >> + rcu_read_unlock(); >> + return ret; >> + >> +drop_unlock: >> + rcu_read_unlock(); >> +drop: >> + kfree_skb(skb); >> + return NET_RX_DROP; >> +} >> + > ... >> diff --git a/datapath/linux/compat/gso.c b/datapath/linux/compat/gso.c >> new file mode 100644 >> index 0000000..f6a4ad3 >> --- /dev/null >> +++ b/datapath/linux/compat/gso.c >> @@ -0,0 +1,166 @@ >> +/* >> + * Copyright (c) 2007-2013 Nicira, Inc. >> + * >> + * This program is free software; you can redistribute it and/or >> + * modify it under the terms of version 2 of the GNU General Public >> + * License as published by the Free Software Foundation. >> + * >> + * This program is distributed in the hope that it will be useful, but >> + * WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> + * General Public License for more details. >> + * >> + * You should have received a copy of the GNU General Public License >> + * along with this program; if not, write to the Free Software >> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA >> + * 02110-1301, USA >> + */ >> + >> +#include <linux/module.h> >> +#include <linux/if.h> >> +#include <linux/if_tunnel.h> >> +#include <linux/icmp.h> >> +#include <linux/in.h> >> +#include <linux/ip.h> >> +#include <linux/kernel.h> >> +#include <linux/kmod.h> >> +#include <linux/netdevice.h> >> +#include <linux/skbuff.h> >> +#include <linux/spinlock.h> >> + >> +#include <net/gre.h> >> +#include <net/icmp.h> >> +#include <net/protocol.h> >> +#include <net/route.h> >> +#include <net/xfrm.h> >> + >> +#include "gso.h" >> + >> +static __be16 skb_network_protocol(struct sk_buff *skb) >> +{ >> + __be16 type = skb->protocol; >> + int vlan_depth = ETH_HLEN; > > Shouldn't this be 2*ETH_ALEN instead, since that is where VLAN header starts > at if there is one? > Linux vlan_hdr start at ETH_HLEN, ref: struct vlan_hd and struct vlan_ethhdr in if_vlan.h. >> + >> + while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) { >> + struct vlan_hdr *vh; >> + >> + if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) >> + return 0; >> + >> + vh = (struct vlan_hdr *)(skb->data + vlan_depth); >> + type = vh->h_vlan_encapsulated_proto; >> + vlan_depth += VLAN_HLEN; >> + } >> + >> + return type; >> +} >> + >> +static struct sk_buff *tnl_skb_gso_segment(struct sk_buff *skb, >> + netdev_features_t features, >> + bool tx_path) >> +{ >> + struct iphdr *iph = ip_hdr(skb); >> + int tnl_hlen = skb_inner_network_offset(skb); >> + struct sk_buff *skb1 = skb; >> + struct sk_buff *segs; >> + >> + __skb_pull(skb, tnl_hlen); >> + skb_reset_mac_header(skb); >> + skb_reset_network_header(skb); >> + skb_reset_transport_header(skb); >> + skb->protocol = skb_network_protocol(skb); >> + >> + segs = __skb_gso_segment(skb, 0, true); > > Are you sure you want to ignore the tx_path and insist on 'true' here? Using > 'false' selects a looser check of skb->ip_summed at __skb_gso_segment. We > have seen the check implied by 'true' having the kernel fill the disk with > non-rate-limited warnings, so this may be a significant choice you are making > here. > right, I will change it. >> + if (!segs || IS_ERR(segs)) >> + goto free; >> + >> + skb = segs; >> + while (skb) { >> + __skb_push(skb, tnl_hlen); >> + skb_reset_mac_header(skb); >> + skb_reset_network_header(skb); >> + skb_set_transport_header(skb, sizeof(struct iphdr)); >> + skb->mac_len = 0; >> + >> + memcpy(ip_hdr(skb), iph, tnl_hlen); > > Could you educate me a bit here and explain shortly why it is necessary to > overwrite the IP headers of the segmented skbs? > we have segmented inner skb, now we need to copy outer header to each segment from first segment. >> + if (OVS_GSO_CB(skb)->fix_segment) >> + OVS_GSO_CB(skb)->fix_segment(skb); >> + skb = skb->next; >> + } >> +free: >> + consume_skb(skb1); >> + return segs; >> +} >> + >> +static bool need_linearize(const struct sk_buff *skb) >> +{ >> + int i; >> + >> + if (unlikely(skb_shinfo(skb)->frag_list)) >> + return true; >> + >> + /* >> + * Generally speaking we should linearize if there are paged frags. >> + * However, if all of the refcounts are 1 we know nobody else can >> + * change them from underneath us and we can skip the linearization. >> + */ >> + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) >> + if >> (unlikely(page_count(skb_frag_page(&skb_shinfo(skb)->frags[i])) > 1)) >> + return true; >> + >> + return false; >> +} >> + >> +int ip_local_out(struct sk_buff *skb) > > IMO it would be less surprising if you used 'rpl_ip_local_out' here. Don't > know if that would go against a convention though. > ok. >> +{ >> + struct dst_entry *dst = skb_dst(skb); >> + int ret = NETDEV_TX_OK; >> + >> + if (skb_is_gso(skb)) { >> + skb = tnl_skb_gso_segment(skb, 0, true); > > Maybe should use 'false' as the tx_path argument (Jesse?). > ok. >> + if (!skb || IS_ERR(skb)) >> + return 0; >> + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { >> + int err; >> + >> + if (unlikely(need_linearize(skb))) { >> + err = __skb_linearize(skb); >> + if (unlikely(err)) >> + return 0; >> + } >> + >> + err = skb_checksum_help(skb); >> + if (unlikely(err)) >> + return 0; >> + } >> + >> + while (skb) { >> + struct sk_buff *next_skb = skb->next; >> + int err; >> + >> + if (next_skb) >> + skb_dst_set(skb, dst_clone(dst)); >> + else >> + skb_dst_set(skb, dst); >> + >> + skb->next = NULL; >> + >> + /* >> + * Allow our local IP stack to fragment the outer packet even >> + * if the DF bit is set as a last resort. We also need to >> + * force selection of an IP ID here because Linux will >> + * otherwise leave it at 0 if the packet originally had DF set. >> + */ > > This comment needs editing as the IP ID is not set here. > right, I missed it. >> + >> + skb->local_df = 1; >> + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); >> + > > Is clearing of the skb control buffer necessary here? > yes, we have seen issues due to garbage IPCB used by networking stack. >> +#undef ip_local_out >> + err = ip_local_out(skb); >> + if (unlikely(net_xmit_eval(err))) >> + ret = err; >> + >> + skb = next_skb; >> + } >> + return ret; >> +} > ... >> diff --git a/datapath/linux/compat/include/net/gre.h >> b/datapath/linux/compat/include/net/gre.h >> new file mode 100644 >> index 0000000..16dc54e >> --- /dev/null >> +++ b/datapath/linux/compat/include/net/gre.h >> @@ -0,0 +1,109 @@ >> +#ifndef __LINUX_GRE_WRAPPER_H >> +#define __LINUX_GRE_WRAPPER_H >> + >> +#include <linux/skbuff.h> >> +#include <net/ip_tunnels.h> >> + >> +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,37) >> +#include_next <net/gre.h> >> + >> +#else /* LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,37) */ >> + >> +#define GREPROTO_CISCO 0 >> +#define GREPROTO_PPTP 1 >> +#define GREPROTO_MAX 2 >> +#define GRE_IP_PROTO_MAX 2 >> + >> +struct gre_protocol { >> + int (*handler)(struct sk_buff *skb); >> + void (*err_handler)(struct sk_buff *skb, u32 info); >> +}; >> + >> +int gre_add_protocol(const struct gre_protocol *proto, u8 version); >> +int gre_del_protocol(const struct gre_protocol *proto, u8 version); >> + >> +#endif >> + >> +#define GRE_IP_PROTO_MAX 2 >> + >> +struct gre_base_hdr { >> + __be16 flags; >> + __be16 protocol; >> +}; >> +#define GRE_HEADER_SECTION 4 >> + >> +#define MAX_GRE_PROTO_PRIORITY 255 >> +struct gre_cisco_protocol { >> + int (*handler)(struct sk_buff *skb, const struct tnl_ptk_info *tpi); >> + int (*err_handler)(struct sk_buff *skb, u32 info, >> + const struct tnl_ptk_info *tpi); >> + u8 priority; >> +}; >> + >> +#define gre_build_header rpl_gre_build_header >> +void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, >> + int hdr_len); >> + >> +#define gre_handle_offloads rpl_gre_handle_offloads >> +struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum); >> + >> +int gre_cisco_register(struct gre_cisco_protocol *proto); >> +int gre_cisco_unregister(struct gre_cisco_protocol *proto); >> + >> +static inline int ip_gre_calc_hlen(__be16 o_flags) >> +{ >> + int addend = 4; >> + >> + if (o_flags&TUNNEL_CSUM) > > Spaces... > ok. >> + addend += 4; >> + if (o_flags&TUNNEL_KEY) >> + addend += 4; >> + if (o_flags&TUNNEL_SEQ) >> + addend += 4; >> + return addend; >> +} >> + > ... >> diff --git a/datapath/linux/compat/include/net/ip_tunnels.h >> b/datapath/linux/compat/include/net/ip_tunnels.h >> new file mode 100644 >> index 0000000..ad17c9d >> --- /dev/null >> +++ b/datapath/linux/compat/include/net/ip_tunnels.h >> @@ -0,0 +1,54 @@ >> +#ifndef __NET_IP_TUNNELS_WRAPPER_H >> +#define __NET_IP_TUNNELS_WRAPPER_H 1 >> + >> +#include <linux/if_tunnel.h> >> +#include <linux/netdevice.h> >> +#include <linux/skbuff.h> >> +#include <linux/types.h> >> +#include <net/dsfield.h> >> +#include <net/flow.h> >> +#include <net/inet_ecn.h> >> +#include <net/ip.h> >> +#include <net/rtnetlink.h> >> + >> +#define TUNNEL_CSUM __cpu_to_be16(0x01) >> +#define TUNNEL_ROUTING __cpu_to_be16(0x02) >> +#define TUNNEL_KEY __cpu_to_be16(0x04) >> +#define TUNNEL_SEQ __cpu_to_be16(0x08) >> +#define TUNNEL_STRICT __cpu_to_be16(0x10) >> +#define TUNNEL_REC __cpu_to_be16(0x20) >> +#define TUNNEL_VERSION __cpu_to_be16(0x40) >> +#define TUNNEL_NO_KEY __cpu_to_be16(0x80) >> +#define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100) >> + >> +struct tnl_ptk_info { >> + __be16 flags; >> + __be16 proto; >> + __be32 key; >> + __be32 seq; >> +}; >> + >> +#define PACKET_RCVD 0 >> +#define PACKET_REJECT 1 >> + >> +static inline void tunnel_ip_select_ident(struct sk_buff *skb, >> + const struct iphdr *old_iph, >> + struct dst_entry *dst) >> +{ >> + struct iphdr *iph = ip_hdr(skb); >> + >> + /* Use inner packet iph-id if possible. */ >> + if (skb->protocol == htons(ETH_P_IP) && old_iph->id) >> + iph->id = old_iph->id; > > Was it concluded that it is safe to use the inner packet ip id? I get that > this is a copy of upstream code, but would like to know. > I still need to think about it and propose fix for upstream kernel too. >> + else >> + __ip_select_ident(iph, dst, >> + (skb_shinfo(skb)->gso_segs ?: 1) - 1); >> +} >> + >> +int iptunnel_xmit(struct net *net, struct rtable *rt, >> + struct sk_buff *skb, >> + __be32 src, __be32 dst, __u8 proto, >> + __u8 tos, __u8 ttl, __be16 df); >> + >> +int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 >> inner_proto); >> +#endif /* __NET_IP_TUNNELS_H */ >> diff --git a/datapath/linux/compat/ip_tunnels_core.c >> b/datapath/linux/compat/ip_tunnels_core.c >> new file mode 100644 >> index 0000000..f8c8488 >> --- /dev/null >> +++ b/datapath/linux/compat/ip_tunnels_core.c >> @@ -0,0 +1,115 @@ >> +/* >> + * Copyright (c) 2007-2013 Nicira, Inc. >> + * >> + * This program is free software; you can redistribute it and/or >> + * modify it under the terms of version 2 of the GNU General Public >> + * License as published by the Free Software Foundation. >> + * >> + * This program is distributed in the hope that it will be useful, but >> + * WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> + * General Public License for more details. >> + * >> + * You should have received a copy of the GNU General Public License >> + * along with this program; if not, write to the Free Software >> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA >> + * 02110-1301, USA >> + */ >> + >> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt >> + >> +#include <linux/in.h> >> +#include <linux/in_route.h> >> +#include <linux/inetdevice.h> >> +#include <linux/jhash.h> >> +#include <linux/list.h> >> +#include <linux/kernel.h> >> +#include <linux/version.h> >> +#include <linux/workqueue.h> >> +#include <linux/rculist.h> >> +#include <net/ip_tunnels.h> >> +#include <net/route.h> >> +#include <net/xfrm.h> >> + >> +#include "gso.h" >> +#include "compat.h" >> + >> +int iptunnel_xmit(struct net *net, struct rtable *rt, >> + struct sk_buff *skb, >> + __be32 src, __be32 dst, __u8 proto, >> + __u8 tos, __u8 ttl, __be16 df) >> +{ >> + int pkt_len = skb->len; >> + struct iphdr *iph; >> + int err; >> + >> + nf_reset(skb); >> + secpath_reset(skb); >> + skb_clear_rxhash(skb); >> + skb_dst_drop(skb); >> + skb_dst_set(skb, &rt_dst(rt)); >> +#if 0 >> + /* Do not clear ovs-callback. It will be done in gso code. */ >> + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); >> +#endif > > Why does the skb control buffer need to be cleared in the first place? > it has inner packet offset that compat-gso code needs. >> + >> + /* Push down and install the IP header. */ >> + __skb_push(skb, sizeof(struct iphdr)); >> + skb_reset_network_header(skb); >> + >> + iph = ip_hdr(skb); >> + >> + iph->version = 4; >> + iph->ihl = sizeof(struct iphdr) >> 2; >> + iph->frag_off = df; >> + iph->protocol = proto; >> + iph->tos = tos; >> + iph->daddr = dst; >> + iph->saddr = src; >> + iph->ttl = ttl; >> + tunnel_ip_select_ident(skb, >> + (const struct iphdr >> *)skb_inner_network_header(skb), >> + &rt_dst(rt)); >> + >> + err = ip_local_out(skb); >> + if (unlikely(net_xmit_eval(err))) >> + pkt_len = 0; >> + return pkt_len; >> +} >> + > ... >> diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c >> index add17d9..1707eab 100644 >> --- a/datapath/vport-gre.c >> +++ b/datapath/vport-gre.c >> @@ -24,50 +24,29 @@ >> #include <linux/if_tunnel.h> >> #include <linux/if_vlan.h> >> #include <linux/in.h> >> +#include <linux/if_vlan.h> >> +#include <linux/in.h> >> +#include <linux/in_route.h> >> +#include <linux/inetdevice.h> >> +#include <linux/jhash.h> >> +#include <linux/list.h> >> +#include <linux/kernel.h> >> +#include <linux/workqueue.h> >> +#include <linux/rculist.h> >> +#include <net/route.h> >> +#include <net/xfrm.h> >> + >> >> #include <net/icmp.h> >> #include <net/ip.h> >> +#include <net/ip_tunnels.h> >> +#include <net/gre.h> >> #include <net/protocol.h> >> >> #include "datapath.h" >> #include "tunnel.h" >> #include "vport.h" >> >> -/* >> - * The GRE header is composed of a series of sections: a base and then a >> variable >> - * number of options. >> - */ >> -#define GRE_HEADER_SECTION 4 >> - >> -struct gre_base_hdr { >> - __be16 flags; >> - __be16 protocol; >> -}; >> - >> -static int gre_hdr_len(const struct ovs_key_ipv4_tunnel *tun_key) >> -{ >> - int len = GRE_HEADER_SECTION; >> - >> - if (tun_key->tun_flags & OVS_TNL_F_KEY) >> - len += GRE_HEADER_SECTION; >> - if (tun_key->tun_flags & OVS_TNL_F_CSUM) >> - len += GRE_HEADER_SECTION; >> - return len; >> -} >> - >> -static int gre64_hdr_len(const struct ovs_key_ipv4_tunnel *tun_key) >> -{ >> - /* Set key for GRE64 tunnels, even when key if is zero. */ >> - int len = GRE_HEADER_SECTION + /* GRE Hdr */ >> - GRE_HEADER_SECTION + /* GRE Key */ >> - GRE_HEADER_SECTION; /* GRE SEQ */ >> - >> - if (tun_key->tun_flags & OVS_TNL_F_CSUM) >> - len += GRE_HEADER_SECTION; >> - >> - return len; >> -} >> - >> /* Returns the least-significant 32 bits of a __be64. */ >> static __be32 be64_get_low32(__be64 x) >> { >> @@ -78,61 +57,31 @@ static __be32 be64_get_low32(__be64 x) >> #endif >> } >> >> -static __be32 be64_get_high32(__be64 x) >> +static __be16 filter_tnl_flags(__be16 flags) >> { >> -#ifdef __BIG_ENDIAN >> - return (__force __be32)((__force u64)x >> 32); >> -#else >> - return (__force __be32)x; >> -#endif >> + return (flags & (TUNNEL_CSUM | TUNNEL_KEY)); >> } >> >> -static void __gre_build_header(struct sk_buff *skb, >> - int tunnel_hlen, >> - bool is_gre64) >> +static struct sk_buff *__build_header(const struct vport *vport, > > 'vport' is not used, so it could be removed here. ok. > >> + struct sk_buff *skb, >> + int tunnel_hlen, >> + __be32 seq, __be16 gre64_flag) >> { >> const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key; >> - __be32 *options = (__be32 *)(skb_network_header(skb) + tunnel_hlen >> - - GRE_HEADER_SECTION); >> - struct gre_base_hdr *greh = (struct gre_base_hdr *) >> skb_transport_header(skb); >> - greh->protocol = htons(ETH_P_TEB); >> - greh->flags = 0; >> - >> - /* Work backwards over the options so the checksum is last. */ >> - if (tun_key->tun_flags & OVS_TNL_F_KEY || is_gre64) { >> - greh->flags |= GRE_KEY; >> - if (is_gre64) { >> - /* Set higher 32 bits to seq. */ >> - *options = be64_get_high32(tun_key->tun_id); >> - options--; >> - greh->flags |= GRE_SEQ; >> - } >> - *options = be64_get_low32(tun_key->tun_id); >> - options--; >> - } >> + struct tnl_ptk_info tpi; >> >> - if (tun_key->tun_flags & OVS_TNL_F_CSUM) { >> - greh->flags |= GRE_CSUM; >> - *options = 0; >> - *(__sum16 *)options = csum_fold(skb_checksum(skb, >> - skb_transport_offset(skb), >> - skb->len - >> skb_transport_offset(skb), >> - 0)); >> - } >> -} >> + skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM)); >> + if (IS_ERR(skb)) >> + return NULL; >> >> -static void gre_build_header(const struct vport *vport, >> - struct sk_buff *skb, >> - int tunnel_hlen) >> -{ >> - __gre_build_header(skb, tunnel_hlen, false); >> -} >> + tpi.flags = filter_tnl_flags(tun_key->tun_flags) | gre64_flag; >> >> -static void gre64_build_header(const struct vport *vport, >> - struct sk_buff *skb, >> - int tunnel_hlen) >> -{ >> - __gre_build_header(skb, tunnel_hlen, true); >> + tpi.proto = htons(ETH_P_TEB); >> + tpi.key = be64_get_low32(tun_key->tun_id); >> + tpi.seq = seq; >> + gre_build_header(skb, &tpi, tunnel_hlen); >> + >> + return skb; >> } >> > _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev