Pravin,

Please find some review comments below,

  Jarno

On May 23, 2013, at 23:01 , ext Pravin B Shelar wrote:
...
> diff --git a/datapath/linux/compat/gre.c b/datapath/linux/compat/gre.c
> new file mode 100644
> index 0000000..c352ff8
> --- /dev/null
> +++ b/datapath/linux/compat/gre.c
> @@ -0,0 +1,352 @@
> +/*
> + * Copyright (c) 2007-2013 Nicira, Inc.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of version 2 of the GNU General Public
> + * License as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> + * 02110-1301, USA
> + */
> +
> +#include <linux/module.h>
> +#include <linux/if.h>
> +#include <linux/if_tunnel.h>
> +#include <linux/icmp.h>
> +#include <linux/in.h>
> +#include <linux/ip.h>
> +#include <linux/kernel.h>
> +#include <linux/kmod.h>
> +#include <linux/netdevice.h>
> +#include <linux/skbuff.h>
> +#include <linux/spinlock.h>
> +
> +#include <net/gre.h>
> +#include <net/icmp.h>
> +#include <net/protocol.h>
> +#include <net/route.h>
> +#include <net/xfrm.h>
> +
> +#include "gso.h"
> +
> +static struct gre_cisco_protocol __rcu *gre_cisco_proto;
> +
> +static void gre_csum_fix(struct sk_buff *skb)
> +{
> +     struct gre_base_hdr *greh;
> +     __be32 *options;
> +     int gre_offset = skb_transport_offset(skb);
> +
> +     greh = (struct gre_base_hdr *)skb_transport_header(skb);
> +     options = ((__be32 *)greh + 1);
> +
> +     *options = 0;
> +     *(__sum16 *)options = csum_fold(skb_checksum(skb, gre_offset,
> +                                                  skb->len - gre_offset, 0));
> +}
> +
> +struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum)
> +{
> +     int err;
> +
> +     skb_reset_inner_headers(skb);
> +
> +     if (skb_is_gso(skb)) {
> +             if (gre_csum)
> +                     OVS_GSO_CB(skb)->fix_segment = gre_csum_fix;
> +     } else {
> +             if (skb->ip_summed == CHECKSUM_PARTIAL && gre_csum) {
> +                     err = skb_checksum_help(skb);
> +                     if (err)
> +                             goto error;
> +
> +             } else if (skb->ip_summed != CHECKSUM_PARTIAL)
> +                     skb->ip_summed = CHECKSUM_NONE;
> +     }
> +     return skb;
> +error:
> +     kfree_skb(skb);
> +     return ERR_PTR(err);
> +}
> +
> +static bool is_gre_gso(struct sk_buff *skb)
> +{
> +     return skb_is_gso(skb);
> +}

What is the value of this? You already use skb_is_gso() above.

> +
> +void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
> +                   int hdr_len)
> +{
> +     struct gre_base_hdr *greh;
> +
> +     __skb_push(skb, hdr_len);
> +
> +     greh = (struct gre_base_hdr *)skb->data;
> +     greh->flags = tnl_flags_to_gre_flags(tpi->flags);
> +     greh->protocol = tpi->proto;
> +
> +     if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) {

Add some spaces? Also below.

> +             __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
> +
> +             if (tpi->flags&TUNNEL_SEQ) {
> +                     *ptr = tpi->seq;
> +                     ptr--;
> +             }
> +             if (tpi->flags&TUNNEL_KEY) {
> +                     *ptr = tpi->key;
> +                     ptr--;
> +             }
> +             if (tpi->flags&TUNNEL_CSUM && !is_gre_gso(skb)) {
> +                     *ptr = 0;
> +                     *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
> +                                             skb->len, 0));
> +             }
> +     }
> +}
> +
> +static __sum16 check_checksum(struct sk_buff *skb)
> +{
> +     __sum16 csum = 0;
> +
> +     switch (skb->ip_summed) {
> +     case CHECKSUM_COMPLETE:
> +             csum = csum_fold(skb->csum);
> +
> +             if (!csum)
> +                     break;
> +             /* Fall through. */
> +
> +     case CHECKSUM_NONE:
> +             skb->csum = 0;
> +             csum = __skb_checksum_complete(skb);
> +             skb->ip_summed = CHECKSUM_COMPLETE;
> +             break;
> +     }
> +
> +     return csum;
> +}
> +
> +static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
> +                         bool *csum_err)

It seems the csum_err returned via the parameter pointer is ignored by the 
caller, hence it could be removed. 

> +{
> +     unsigned int ip_hlen = ip_hdrlen(skb);
> +     struct gre_base_hdr *greh;
> +     __be32 *options;
> +     int hdr_len;
> +
> +     if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
> +             return -EINVAL;
> +
> +     greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
> +     if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
> +             return -EINVAL;
> +
> +     tpi->flags = gre_flags_to_tnl_flags(greh->flags);
> +     hdr_len = ip_gre_calc_hlen(tpi->flags);
> +
> +     if (!pskb_may_pull(skb, hdr_len))
> +             return -EINVAL;
> +
> +     greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
> +     tpi->proto = greh->protocol;
> +
> +     options = (__be32 *)(greh + 1);
> +     if (greh->flags & GRE_CSUM) {
> +             if (check_checksum(skb)) {
> +                     *csum_err = true;
> +                     return -EINVAL;
> +             }
> +             options++;
> +     }
> +
> +     if (greh->flags & GRE_KEY) {
> +             tpi->key = *options;
> +             options++;
> +     } else
> +             tpi->key = 0;
> +
> +     if (unlikely(greh->flags & GRE_SEQ)) {
> +             tpi->seq = *options;
> +             options++;
> +     } else
> +             tpi->seq = 0;
> +
> +     /* WCCP version 1 and 2 protocol decoding.
> +      * - Change protocol to IP
> +      * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
> +      */
> +     if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
> +             tpi->proto = htons(ETH_P_IP);
> +             if ((*(u8 *)options & 0xF0) != 0x40) {
> +                     hdr_len += 4;
> +                     if (!pskb_may_pull(skb, hdr_len))
> +                             return -EINVAL;
> +             }
> +     }
> +
> +     return iptunnel_pull_header(skb, hdr_len, tpi->proto);
> +}
> +
> +static int gre_cisco_rcv(struct sk_buff *skb)
> +{
> +     struct tnl_ptk_info tpi;
> +     bool csum_err = false;
> +     struct gre_cisco_protocol *proto;
> +
> +     rcu_read_lock();
> +     proto = rcu_dereference(gre_cisco_proto);
> +     if (!proto)
> +             goto drop;
> +
> +     if (parse_gre_header(skb, &tpi, &csum_err) < 0)
> +             goto drop;
> +     proto->handler(skb, &tpi);
> +     rcu_read_unlock();
> +     return 0;
> +
> +drop:
> +     rcu_read_unlock();
> +     kfree_skb(skb);
> +     return 0;
> +}
> +
> +static const struct gre_protocol ipgre_protocol = {
> +     .handler        =       gre_cisco_rcv,
> +};
> +
> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
> +static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] 
> __read_mostly;
> +
> +int gre_add_protocol(const struct gre_protocol *proto, u8 version)
> +{
> +     if (version >= GREPROTO_MAX)
> +             return -EINVAL;
> +
> +     return (cmpxchg((const struct gre_protocol **)&gre_proto[version], 
> NULL, proto) == NULL) ?
> +             0 : -EBUSY;
> +}
> +
> +int gre_del_protocol(const struct gre_protocol *proto, u8 version)
> +{
> +     int ret;
> +
> +     if (version >= GREPROTO_MAX)
> +             return -EINVAL;
> +
> +     ret = (cmpxchg((const struct gre_protocol **)&gre_proto[version], 
> proto, NULL) == proto) ?
> +             0 : -EBUSY;
> +
> +     if (ret)
> +             return ret;
> +
> +     synchronize_net();
> +     return 0;
> +}
> +
> +static int gre_rcv(struct sk_buff *skb)
> +{
> +     const struct gre_protocol *proto;
> +     u8 ver;
> +     int ret;
> +
> +     if (!pskb_may_pull(skb, 12))
> +             goto drop;

Is there a symbol you could use here instead the literal 12?

> +
> +     ver = skb->data[1]&0x7f;

Spaces...

> +     if (ver >= GREPROTO_MAX)
> +             goto drop;
> +
> +     rcu_read_lock();
> +     proto = rcu_dereference(gre_proto[ver]);
> +     if (!proto || !proto->handler)
> +             goto drop_unlock;
> +     ret = proto->handler(skb);
> +     rcu_read_unlock();
> +     return ret;
> +
> +drop_unlock:
> +     rcu_read_unlock();
> +drop:
> +     kfree_skb(skb);
> +     return NET_RX_DROP;
> +}
> +
...
> diff --git a/datapath/linux/compat/gso.c b/datapath/linux/compat/gso.c
> new file mode 100644
> index 0000000..f6a4ad3
> --- /dev/null
> +++ b/datapath/linux/compat/gso.c
> @@ -0,0 +1,166 @@
> +/*
> + * Copyright (c) 2007-2013 Nicira, Inc.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of version 2 of the GNU General Public
> + * License as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> + * 02110-1301, USA
> + */
> +
> +#include <linux/module.h>
> +#include <linux/if.h>
> +#include <linux/if_tunnel.h>
> +#include <linux/icmp.h>
> +#include <linux/in.h>
> +#include <linux/ip.h>
> +#include <linux/kernel.h>
> +#include <linux/kmod.h>
> +#include <linux/netdevice.h>
> +#include <linux/skbuff.h>
> +#include <linux/spinlock.h>
> +
> +#include <net/gre.h>
> +#include <net/icmp.h>
> +#include <net/protocol.h>
> +#include <net/route.h>
> +#include <net/xfrm.h>
> +
> +#include "gso.h"
> +
> +static __be16 skb_network_protocol(struct sk_buff *skb)
> +{
> +     __be16 type = skb->protocol;
> +     int vlan_depth = ETH_HLEN;

Shouldn't this be 2*ETH_ALEN instead, since that is where VLAN header starts at 
if there is one?

> +
> +     while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
> +             struct vlan_hdr *vh;
> +
> +             if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
> +                     return 0;
> +
> +             vh = (struct vlan_hdr *)(skb->data + vlan_depth);
> +             type = vh->h_vlan_encapsulated_proto;
> +             vlan_depth += VLAN_HLEN;
> +     }
> +
> +     return type;
> +}
> +
> +static struct sk_buff *tnl_skb_gso_segment(struct sk_buff *skb,
> +                                        netdev_features_t features,
> +                                        bool tx_path)
> +{
> +     struct iphdr *iph       = ip_hdr(skb);
> +     int tnl_hlen            = skb_inner_network_offset(skb);
> +     struct sk_buff *skb1    = skb;
> +     struct sk_buff *segs;
> +
> +     __skb_pull(skb, tnl_hlen);
> +     skb_reset_mac_header(skb);
> +     skb_reset_network_header(skb);
> +     skb_reset_transport_header(skb);
> +     skb->protocol = skb_network_protocol(skb);
> +
> +     segs = __skb_gso_segment(skb, 0, true);

Are you sure you want to ignore the tx_path and insist on 'true' here? Using 
'false' selects a looser check of skb->ip_summed at __skb_gso_segment. We have 
seen the check implied by 'true' having the kernel fill the disk with 
non-rate-limited warnings, so this may be a significant choice you are making 
here.

> +     if (!segs || IS_ERR(segs))
> +             goto free;
> +
> +     skb = segs;
> +     while (skb) {
> +             __skb_push(skb, tnl_hlen);
> +             skb_reset_mac_header(skb);
> +             skb_reset_network_header(skb);
> +             skb_set_transport_header(skb, sizeof(struct iphdr));
> +             skb->mac_len = 0;
> +
> +             memcpy(ip_hdr(skb), iph, tnl_hlen);

Could you educate me a bit here and explain shortly why it is necessary to 
overwrite the IP headers of the segmented skbs?

> +             if (OVS_GSO_CB(skb)->fix_segment)
> +                     OVS_GSO_CB(skb)->fix_segment(skb);
> +             skb = skb->next;
> +     }
> +free:
> +     consume_skb(skb1);
> +     return segs;
> +}
> +
> +static bool need_linearize(const struct sk_buff *skb)
> +{
> +     int i;
> +
> +     if (unlikely(skb_shinfo(skb)->frag_list))
> +             return true;
> +
> +     /*
> +      * Generally speaking we should linearize if there are paged frags.
> +      * However, if all of the refcounts are 1 we know nobody else can
> +      * change them from underneath us and we can skip the linearization.
> +      */
> +     for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
> +             if 
> (unlikely(page_count(skb_frag_page(&skb_shinfo(skb)->frags[i])) > 1))
> +                     return true;
> +
> +     return false;
> +}
> +
> +int ip_local_out(struct sk_buff *skb)

IMO it would be less surprising if you used 'rpl_ip_local_out' here. Don't know 
if that would go against a convention though.

> +{
> +     struct dst_entry *dst = skb_dst(skb);
> +     int ret = NETDEV_TX_OK;
> +
> +     if (skb_is_gso(skb)) {
> +             skb = tnl_skb_gso_segment(skb, 0, true);

Maybe should use 'false' as the tx_path argument (Jesse?).

> +             if (!skb || IS_ERR(skb))
> +                     return 0;
> +     }  else if (skb->ip_summed == CHECKSUM_PARTIAL) {
> +             int err;
> +
> +             if (unlikely(need_linearize(skb))) {
> +                     err = __skb_linearize(skb);
> +                     if (unlikely(err))
> +                             return 0;
> +             }
> +
> +             err = skb_checksum_help(skb);
> +             if (unlikely(err))
> +                     return 0;
> +     }
> +
> +     while (skb) {
> +             struct sk_buff *next_skb = skb->next;
> +             int err;
> +
> +             if (next_skb)
> +                     skb_dst_set(skb, dst_clone(dst));
> +             else
> +                     skb_dst_set(skb, dst);
> +
> +             skb->next = NULL;
> +
> +             /*
> +              * Allow our local IP stack to fragment the outer packet even
> +              * if the DF bit is set as a last resort.  We also need to
> +              * force selection of an IP ID here because Linux will
> +              * otherwise leave it at 0 if the packet originally had DF set.
> +              */

This comment needs editing as the IP ID is not set here.

> +
> +             skb->local_df = 1;
> +             memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
> +

Is clearing of the skb control buffer necessary here?

> +#undef ip_local_out
> +             err = ip_local_out(skb);
> +             if (unlikely(net_xmit_eval(err)))
> +                     ret = err;
> +
> +             skb = next_skb;
> +     }
> +     return ret;
> +}
...
> diff --git a/datapath/linux/compat/include/net/gre.h 
> b/datapath/linux/compat/include/net/gre.h
> new file mode 100644
> index 0000000..16dc54e
> --- /dev/null
> +++ b/datapath/linux/compat/include/net/gre.h
> @@ -0,0 +1,109 @@
> +#ifndef __LINUX_GRE_WRAPPER_H
> +#define __LINUX_GRE_WRAPPER_H
> +
> +#include <linux/skbuff.h>
> +#include <net/ip_tunnels.h>
> +
> +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,37)
> +#include_next <net/gre.h>
> +
> +#else /* LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,37) */
> +
> +#define GREPROTO_CISCO               0
> +#define GREPROTO_PPTP                1
> +#define GREPROTO_MAX         2
> +#define GRE_IP_PROTO_MAX     2
> +
> +struct gre_protocol {
> +     int  (*handler)(struct sk_buff *skb);
> +     void (*err_handler)(struct sk_buff *skb, u32 info);
> +};
> +
> +int gre_add_protocol(const struct gre_protocol *proto, u8 version);
> +int gre_del_protocol(const struct gre_protocol *proto, u8 version);
> +
> +#endif
> +
> +#define GRE_IP_PROTO_MAX     2
> +
> +struct gre_base_hdr {
> +     __be16 flags;
> +     __be16 protocol;
> +};
> +#define GRE_HEADER_SECTION 4
> +
> +#define MAX_GRE_PROTO_PRIORITY 255
> +struct gre_cisco_protocol {
> +     int (*handler)(struct sk_buff *skb, const struct tnl_ptk_info *tpi);
> +     int (*err_handler)(struct sk_buff *skb, u32 info,
> +                        const struct tnl_ptk_info *tpi);
> +     u8 priority;
> +};
> +
> +#define gre_build_header rpl_gre_build_header
> +void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
> +                   int hdr_len);
> +
> +#define gre_handle_offloads rpl_gre_handle_offloads
> +struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum);
> +
> +int gre_cisco_register(struct gre_cisco_protocol *proto);
> +int gre_cisco_unregister(struct gre_cisco_protocol *proto);
> +
> +static inline int ip_gre_calc_hlen(__be16 o_flags)
> +{
> +     int addend = 4;
> +
> +     if (o_flags&TUNNEL_CSUM)

Spaces...

> +             addend += 4;
> +     if (o_flags&TUNNEL_KEY)
> +             addend += 4;
> +     if (o_flags&TUNNEL_SEQ)
> +             addend += 4;
> +     return addend;
> +}
> +
...
> diff --git a/datapath/linux/compat/include/net/ip_tunnels.h 
> b/datapath/linux/compat/include/net/ip_tunnels.h
> new file mode 100644
> index 0000000..ad17c9d
> --- /dev/null
> +++ b/datapath/linux/compat/include/net/ip_tunnels.h
> @@ -0,0 +1,54 @@
> +#ifndef __NET_IP_TUNNELS_WRAPPER_H
> +#define __NET_IP_TUNNELS_WRAPPER_H 1
> +
> +#include <linux/if_tunnel.h>
> +#include <linux/netdevice.h>
> +#include <linux/skbuff.h>
> +#include <linux/types.h>
> +#include <net/dsfield.h>
> +#include <net/flow.h>
> +#include <net/inet_ecn.h>
> +#include <net/ip.h>
> +#include <net/rtnetlink.h>
> +
> +#define TUNNEL_CSUM  __cpu_to_be16(0x01)
> +#define TUNNEL_ROUTING       __cpu_to_be16(0x02)
> +#define TUNNEL_KEY   __cpu_to_be16(0x04)
> +#define TUNNEL_SEQ   __cpu_to_be16(0x08)
> +#define TUNNEL_STRICT        __cpu_to_be16(0x10)
> +#define TUNNEL_REC   __cpu_to_be16(0x20)
> +#define TUNNEL_VERSION       __cpu_to_be16(0x40)
> +#define TUNNEL_NO_KEY        __cpu_to_be16(0x80)
> +#define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100)
> +
> +struct tnl_ptk_info {
> +     __be16 flags;
> +     __be16 proto;
> +     __be32 key;
> +     __be32 seq;
> +};
> +
> +#define PACKET_RCVD  0
> +#define PACKET_REJECT        1
> +
> +static inline void tunnel_ip_select_ident(struct sk_buff *skb,
> +                                       const struct iphdr  *old_iph,
> +                                       struct dst_entry *dst)
> +{
> +     struct iphdr *iph = ip_hdr(skb);
> +
> +     /* Use inner packet iph-id if possible. */
> +     if (skb->protocol == htons(ETH_P_IP) && old_iph->id)
> +             iph->id = old_iph->id;

Was it concluded that it is safe to use the inner packet ip id? I get that this 
is a copy of upstream code, but would like to know.

> +     else
> +             __ip_select_ident(iph, dst,
> +                             (skb_shinfo(skb)->gso_segs ?: 1) - 1);
> +}
> +
> +int iptunnel_xmit(struct net *net, struct rtable *rt,
> +               struct sk_buff *skb,
> +               __be32 src, __be32 dst, __u8 proto,
> +               __u8 tos, __u8 ttl, __be16 df);
> +
> +int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 
> inner_proto);
> +#endif /* __NET_IP_TUNNELS_H */
> diff --git a/datapath/linux/compat/ip_tunnels_core.c 
> b/datapath/linux/compat/ip_tunnels_core.c
> new file mode 100644
> index 0000000..f8c8488
> --- /dev/null
> +++ b/datapath/linux/compat/ip_tunnels_core.c
> @@ -0,0 +1,115 @@
> +/*
> + * Copyright (c) 2007-2013 Nicira, Inc.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of version 2 of the GNU General Public
> + * License as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> + * 02110-1301, USA
> + */
> +
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +#include <linux/in.h>
> +#include <linux/in_route.h>
> +#include <linux/inetdevice.h>
> +#include <linux/jhash.h>
> +#include <linux/list.h>
> +#include <linux/kernel.h>
> +#include <linux/version.h>
> +#include <linux/workqueue.h>
> +#include <linux/rculist.h>
> +#include <net/ip_tunnels.h>
> +#include <net/route.h>
> +#include <net/xfrm.h>
> +
> +#include "gso.h"
> +#include "compat.h"
> +
> +int iptunnel_xmit(struct net *net, struct rtable *rt,
> +               struct sk_buff *skb,
> +               __be32 src, __be32 dst, __u8 proto,
> +               __u8 tos, __u8 ttl, __be16 df)
> +{
> +     int pkt_len = skb->len;
> +     struct iphdr *iph;
> +     int err;
> +
> +     nf_reset(skb);
> +     secpath_reset(skb);
> +     skb_clear_rxhash(skb);
> +     skb_dst_drop(skb);
> +     skb_dst_set(skb, &rt_dst(rt));
> +#if 0
> +     /* Do not clear ovs-callback.  It will be done in gso code. */
> +     memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
> +#endif

Why does the skb control buffer need to be cleared in the first place?

> +
> +     /* Push down and install the IP header. */
> +     __skb_push(skb, sizeof(struct iphdr));
> +     skb_reset_network_header(skb);
> +
> +     iph = ip_hdr(skb);
> +
> +     iph->version    =       4;
> +     iph->ihl        =       sizeof(struct iphdr) >> 2;
> +     iph->frag_off   =       df;
> +     iph->protocol   =       proto;
> +     iph->tos        =       tos;
> +     iph->daddr      =       dst;
> +     iph->saddr      =       src;
> +     iph->ttl        =       ttl;
> +     tunnel_ip_select_ident(skb,
> +                            (const struct iphdr 
> *)skb_inner_network_header(skb),
> +                            &rt_dst(rt));
> +
> +     err = ip_local_out(skb);
> +     if (unlikely(net_xmit_eval(err)))
> +             pkt_len = 0;
> +     return pkt_len;
> +}
> +
...
> diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c
> index add17d9..1707eab 100644
> --- a/datapath/vport-gre.c
> +++ b/datapath/vport-gre.c
> @@ -24,50 +24,29 @@
> #include <linux/if_tunnel.h>
> #include <linux/if_vlan.h>
> #include <linux/in.h>
> +#include <linux/if_vlan.h>
> +#include <linux/in.h>
> +#include <linux/in_route.h>
> +#include <linux/inetdevice.h>
> +#include <linux/jhash.h>
> +#include <linux/list.h>
> +#include <linux/kernel.h>
> +#include <linux/workqueue.h>
> +#include <linux/rculist.h>
> +#include <net/route.h>
> +#include <net/xfrm.h>
> +
> 
> #include <net/icmp.h>
> #include <net/ip.h>
> +#include <net/ip_tunnels.h>
> +#include <net/gre.h>
> #include <net/protocol.h>
> 
> #include "datapath.h"
> #include "tunnel.h"
> #include "vport.h"
> 
> -/*
> - * The GRE header is composed of a series of sections: a base and then a 
> variable
> - * number of options.
> - */
> -#define GRE_HEADER_SECTION 4
> -
> -struct gre_base_hdr {
> -     __be16 flags;
> -     __be16 protocol;
> -};
> -
> -static int gre_hdr_len(const struct ovs_key_ipv4_tunnel *tun_key)
> -{
> -     int len = GRE_HEADER_SECTION;
> -
> -     if (tun_key->tun_flags & OVS_TNL_F_KEY)
> -             len += GRE_HEADER_SECTION;
> -     if (tun_key->tun_flags & OVS_TNL_F_CSUM)
> -             len += GRE_HEADER_SECTION;
> -     return len;
> -}
> -
> -static int gre64_hdr_len(const struct ovs_key_ipv4_tunnel *tun_key)
> -{
> -     /* Set key for GRE64 tunnels, even when key if is zero. */
> -     int len = GRE_HEADER_SECTION +          /* GRE Hdr */
> -               GRE_HEADER_SECTION +          /* GRE Key */
> -               GRE_HEADER_SECTION;           /* GRE SEQ */
> -
> -     if (tun_key->tun_flags & OVS_TNL_F_CSUM)
> -             len += GRE_HEADER_SECTION;
> -
> -     return len;
> -}
> -
> /* Returns the least-significant 32 bits of a __be64. */
> static __be32 be64_get_low32(__be64 x)
> {
> @@ -78,61 +57,31 @@ static __be32 be64_get_low32(__be64 x)
> #endif
> }
> 
> -static __be32 be64_get_high32(__be64 x)
> +static __be16 filter_tnl_flags(__be16 flags)
> {
> -#ifdef __BIG_ENDIAN
> -     return (__force __be32)((__force u64)x >> 32);
> -#else
> -     return (__force __be32)x;
> -#endif
> +     return (flags & (TUNNEL_CSUM | TUNNEL_KEY));
> }
> 
> -static void __gre_build_header(struct sk_buff *skb,
> -                            int tunnel_hlen,
> -                            bool is_gre64)
> +static struct sk_buff *__build_header(const struct vport *vport,

'vport' is not used, so it could be removed here. 

> +                                   struct sk_buff *skb,
> +                                   int tunnel_hlen,
> +                                   __be32 seq, __be16 gre64_flag)
> {
>       const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
> -     __be32 *options = (__be32 *)(skb_network_header(skb) + tunnel_hlen
> -                     - GRE_HEADER_SECTION);
> -     struct gre_base_hdr *greh = (struct gre_base_hdr *) 
> skb_transport_header(skb);
> -     greh->protocol = htons(ETH_P_TEB);
> -     greh->flags = 0;
> -
> -     /* Work backwards over the options so the checksum is last. */
> -     if (tun_key->tun_flags & OVS_TNL_F_KEY || is_gre64) {
> -             greh->flags |= GRE_KEY;
> -             if (is_gre64) {
> -                     /* Set higher 32 bits to seq. */
> -                     *options = be64_get_high32(tun_key->tun_id);
> -                     options--;
> -                     greh->flags |= GRE_SEQ;
> -             }
> -             *options = be64_get_low32(tun_key->tun_id);
> -             options--;
> -     }
> +     struct tnl_ptk_info tpi;
> 
> -     if (tun_key->tun_flags & OVS_TNL_F_CSUM) {
> -             greh->flags |= GRE_CSUM;
> -             *options = 0;
> -             *(__sum16 *)options = csum_fold(skb_checksum(skb,
> -                                             skb_transport_offset(skb),
> -                                             skb->len - 
> skb_transport_offset(skb),
> -                                             0));
> -     }
> -}
> +     skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM));
> +     if (IS_ERR(skb))
> +             return NULL;
> 
> -static void gre_build_header(const struct vport *vport,
> -                          struct sk_buff *skb,
> -                          int tunnel_hlen)
> -{
> -     __gre_build_header(skb, tunnel_hlen, false);
> -}
> +     tpi.flags = filter_tnl_flags(tun_key->tun_flags) | gre64_flag;
> 
> -static void gre64_build_header(const struct vport *vport,
> -                            struct sk_buff *skb,
> -                            int tunnel_hlen)
> -{
> -     __gre_build_header(skb, tunnel_hlen, true);
> +     tpi.proto = htons(ETH_P_TEB);
> +     tpi.key = be64_get_low32(tun_key->tun_id);
> +     tpi.seq = seq;
> +     gre_build_header(skb, &tpi, tunnel_hlen);
> +
> +     return skb;
> }
> 

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to