On Thu, May 23, 2013 at 11:12 PM, Rajahalme, Jarno (NSN - FI/Espoo)
<jarno.rajaha...@nsn.com> wrote:
> Pravin,
>
> Please find some review comments below,

Thanks for comments.

>
>   Jarno
>
> On May 23, 2013, at 23:01 , ext Pravin B Shelar wrote:
> ...
>> diff --git a/datapath/linux/compat/gre.c b/datapath/linux/compat/gre.c
>> new file mode 100644
>> index 0000000..c352ff8
>> --- /dev/null
>> +++ b/datapath/linux/compat/gre.c
>> @@ -0,0 +1,352 @@
>> +/*
>> + * Copyright (c) 2007-2013 Nicira, Inc.
>> + *
>> + * This program is free software; you can redistribute it and/or
>> + * modify it under the terms of version 2 of the GNU General Public
>> + * License as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful, but
>> + * WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + * General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
>> + * 02110-1301, USA
>> + */
>> +
>> +#include <linux/module.h>
>> +#include <linux/if.h>
>> +#include <linux/if_tunnel.h>
>> +#include <linux/icmp.h>
>> +#include <linux/in.h>
>> +#include <linux/ip.h>
>> +#include <linux/kernel.h>
>> +#include <linux/kmod.h>
>> +#include <linux/netdevice.h>
>> +#include <linux/skbuff.h>
>> +#include <linux/spinlock.h>
>> +
>> +#include <net/gre.h>
>> +#include <net/icmp.h>
>> +#include <net/protocol.h>
>> +#include <net/route.h>
>> +#include <net/xfrm.h>
>> +
>> +#include "gso.h"
>> +
>> +static struct gre_cisco_protocol __rcu *gre_cisco_proto;
>> +
>> +static void gre_csum_fix(struct sk_buff *skb)
>> +{
>> +     struct gre_base_hdr *greh;
>> +     __be32 *options;
>> +     int gre_offset = skb_transport_offset(skb);
>> +
>> +     greh = (struct gre_base_hdr *)skb_transport_header(skb);
>> +     options = ((__be32 *)greh + 1);
>> +
>> +     *options = 0;
>> +     *(__sum16 *)options = csum_fold(skb_checksum(skb, gre_offset,
>> +                                                  skb->len - gre_offset, 
>> 0));
>> +}
>> +
>> +struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum)
>> +{
>> +     int err;
>> +
>> +     skb_reset_inner_headers(skb);
>> +
>> +     if (skb_is_gso(skb)) {
>> +             if (gre_csum)
>> +                     OVS_GSO_CB(skb)->fix_segment = gre_csum_fix;
>> +     } else {
>> +             if (skb->ip_summed == CHECKSUM_PARTIAL && gre_csum) {
>> +                     err = skb_checksum_help(skb);
>> +                     if (err)
>> +                             goto error;
>> +
>> +             } else if (skb->ip_summed != CHECKSUM_PARTIAL)
>> +                     skb->ip_summed = CHECKSUM_NONE;
>> +     }
>> +     return skb;
>> +error:
>> +     kfree_skb(skb);
>> +     return ERR_PTR(err);
>> +}
>> +
>> +static bool is_gre_gso(struct sk_buff *skb)
>> +{
>> +     return skb_is_gso(skb);
>> +}
>
> What is the value of this? You already use skb_is_gso() above.
>
right, I will get rid of it.

>> +
>> +void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
>> +                   int hdr_len)
>> +{
>> +     struct gre_base_hdr *greh;
>> +
>> +     __skb_push(skb, hdr_len);
>> +
>> +     greh = (struct gre_base_hdr *)skb->data;
>> +     greh->flags = tnl_flags_to_gre_flags(tpi->flags);
>> +     greh->protocol = tpi->proto;
>> +
>> +     if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) {
>
> Add some spaces? Also below.
>
ok.

>> +             __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
>> +
>> +             if (tpi->flags&TUNNEL_SEQ) {
>> +                     *ptr = tpi->seq;
>> +                     ptr--;
>> +             }
>> +             if (tpi->flags&TUNNEL_KEY) {
>> +                     *ptr = tpi->key;
>> +                     ptr--;
>> +             }
>> +             if (tpi->flags&TUNNEL_CSUM && !is_gre_gso(skb)) {
>> +                     *ptr = 0;
>> +                     *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
>> +                                             skb->len, 0));
>> +             }
>> +     }
>> +}
>> +
>> +static __sum16 check_checksum(struct sk_buff *skb)
>> +{
>> +     __sum16 csum = 0;
>> +
>> +     switch (skb->ip_summed) {
>> +     case CHECKSUM_COMPLETE:
>> +             csum = csum_fold(skb->csum);
>> +
>> +             if (!csum)
>> +                     break;
>> +             /* Fall through. */
>> +
>> +     case CHECKSUM_NONE:
>> +             skb->csum = 0;
>> +             csum = __skb_checksum_complete(skb);
>> +             skb->ip_summed = CHECKSUM_COMPLETE;
>> +             break;
>> +     }
>> +
>> +     return csum;
>> +}
>> +
>> +static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
>> +                         bool *csum_err)
>
> It seems the csum_err returned via the parameter pointer is ignored by the 
> caller, hence it could be removed.
>
This function will be defied in upstream gre module, so I will keep it
same as that.

>> +{
>> +     unsigned int ip_hlen = ip_hdrlen(skb);
>> +     struct gre_base_hdr *greh;
>> +     __be32 *options;
>> +     int hdr_len;
>> +
>> +     if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
>> +             return -EINVAL;
>> +
>> +     greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
>> +     if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
>> +             return -EINVAL;
>> +
>> +     tpi->flags = gre_flags_to_tnl_flags(greh->flags);
>> +     hdr_len = ip_gre_calc_hlen(tpi->flags);
>> +
>> +     if (!pskb_may_pull(skb, hdr_len))
>> +             return -EINVAL;
>> +
>> +     greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
>> +     tpi->proto = greh->protocol;
>> +
>> +     options = (__be32 *)(greh + 1);
>> +     if (greh->flags & GRE_CSUM) {
>> +             if (check_checksum(skb)) {
>> +                     *csum_err = true;
>> +                     return -EINVAL;
>> +             }
>> +             options++;
>> +     }
>> +
>> +     if (greh->flags & GRE_KEY) {
>> +             tpi->key = *options;
>> +             options++;
>> +     } else
>> +             tpi->key = 0;
>> +
>> +     if (unlikely(greh->flags & GRE_SEQ)) {
>> +             tpi->seq = *options;
>> +             options++;
>> +     } else
>> +             tpi->seq = 0;
>> +
>> +     /* WCCP version 1 and 2 protocol decoding.
>> +      * - Change protocol to IP
>> +      * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
>> +      */
>> +     if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
>> +             tpi->proto = htons(ETH_P_IP);
>> +             if ((*(u8 *)options & 0xF0) != 0x40) {
>> +                     hdr_len += 4;
>> +                     if (!pskb_may_pull(skb, hdr_len))
>> +                             return -EINVAL;
>> +             }
>> +     }
>> +
>> +     return iptunnel_pull_header(skb, hdr_len, tpi->proto);
>> +}
>> +
>> +static int gre_cisco_rcv(struct sk_buff *skb)
>> +{
>> +     struct tnl_ptk_info tpi;
>> +     bool csum_err = false;
>> +     struct gre_cisco_protocol *proto;
>> +
>> +     rcu_read_lock();
>> +     proto = rcu_dereference(gre_cisco_proto);
>> +     if (!proto)
>> +             goto drop;
>> +
>> +     if (parse_gre_header(skb, &tpi, &csum_err) < 0)
>> +             goto drop;
>> +     proto->handler(skb, &tpi);
>> +     rcu_read_unlock();
>> +     return 0;
>> +
>> +drop:
>> +     rcu_read_unlock();
>> +     kfree_skb(skb);
>> +     return 0;
>> +}
>> +
>> +static const struct gre_protocol ipgre_protocol = {
>> +     .handler        =       gre_cisco_rcv,
>> +};
>> +
>> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
>> +static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] 
>> __read_mostly;
>> +
>> +int gre_add_protocol(const struct gre_protocol *proto, u8 version)
>> +{
>> +     if (version >= GREPROTO_MAX)
>> +             return -EINVAL;
>> +
>> +     return (cmpxchg((const struct gre_protocol **)&gre_proto[version], 
>> NULL, proto) == NULL) ?
>> +             0 : -EBUSY;
>> +}
>> +
>> +int gre_del_protocol(const struct gre_protocol *proto, u8 version)
>> +{
>> +     int ret;
>> +
>> +     if (version >= GREPROTO_MAX)
>> +             return -EINVAL;
>> +
>> +     ret = (cmpxchg((const struct gre_protocol **)&gre_proto[version], 
>> proto, NULL) == proto) ?
>> +             0 : -EBUSY;
>> +
>> +     if (ret)
>> +             return ret;
>> +
>> +     synchronize_net();
>> +     return 0;
>> +}
>> +
>> +static int gre_rcv(struct sk_buff *skb)
>> +{
>> +     const struct gre_protocol *proto;
>> +     u8 ver;
>> +     int ret;
>> +
>> +     if (!pskb_may_pull(skb, 12))
>> +             goto drop;
>
> Is there a symbol you could use here instead the literal 12?
>
same as above, this function is copied from gre module.

>> +
>> +     ver = skb->data[1]&0x7f;
>
> Spaces...
>
ok.

>> +     if (ver >= GREPROTO_MAX)
>> +             goto drop;
>> +
>> +     rcu_read_lock();
>> +     proto = rcu_dereference(gre_proto[ver]);
>> +     if (!proto || !proto->handler)
>> +             goto drop_unlock;
>> +     ret = proto->handler(skb);
>> +     rcu_read_unlock();
>> +     return ret;
>> +
>> +drop_unlock:
>> +     rcu_read_unlock();
>> +drop:
>> +     kfree_skb(skb);
>> +     return NET_RX_DROP;
>> +}
>> +
> ...
>> diff --git a/datapath/linux/compat/gso.c b/datapath/linux/compat/gso.c
>> new file mode 100644
>> index 0000000..f6a4ad3
>> --- /dev/null
>> +++ b/datapath/linux/compat/gso.c
>> @@ -0,0 +1,166 @@
>> +/*
>> + * Copyright (c) 2007-2013 Nicira, Inc.
>> + *
>> + * This program is free software; you can redistribute it and/or
>> + * modify it under the terms of version 2 of the GNU General Public
>> + * License as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful, but
>> + * WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + * General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
>> + * 02110-1301, USA
>> + */
>> +
>> +#include <linux/module.h>
>> +#include <linux/if.h>
>> +#include <linux/if_tunnel.h>
>> +#include <linux/icmp.h>
>> +#include <linux/in.h>
>> +#include <linux/ip.h>
>> +#include <linux/kernel.h>
>> +#include <linux/kmod.h>
>> +#include <linux/netdevice.h>
>> +#include <linux/skbuff.h>
>> +#include <linux/spinlock.h>
>> +
>> +#include <net/gre.h>
>> +#include <net/icmp.h>
>> +#include <net/protocol.h>
>> +#include <net/route.h>
>> +#include <net/xfrm.h>
>> +
>> +#include "gso.h"
>> +
>> +static __be16 skb_network_protocol(struct sk_buff *skb)
>> +{
>> +     __be16 type = skb->protocol;
>> +     int vlan_depth = ETH_HLEN;
>
> Shouldn't this be 2*ETH_ALEN instead, since that is where VLAN header starts 
> at if there is one?
>
Linux vlan_hdr start at ETH_HLEN, ref: struct vlan_hd and struct
vlan_ethhdr in if_vlan.h.

>> +
>> +     while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
>> +             struct vlan_hdr *vh;
>> +
>> +             if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
>> +                     return 0;
>> +
>> +             vh = (struct vlan_hdr *)(skb->data + vlan_depth);
>> +             type = vh->h_vlan_encapsulated_proto;
>> +             vlan_depth += VLAN_HLEN;
>> +     }
>> +
>> +     return type;
>> +}
>> +
>> +static struct sk_buff *tnl_skb_gso_segment(struct sk_buff *skb,
>> +                                        netdev_features_t features,
>> +                                        bool tx_path)
>> +{
>> +     struct iphdr *iph       = ip_hdr(skb);
>> +     int tnl_hlen            = skb_inner_network_offset(skb);
>> +     struct sk_buff *skb1    = skb;
>> +     struct sk_buff *segs;
>> +
>> +     __skb_pull(skb, tnl_hlen);
>> +     skb_reset_mac_header(skb);
>> +     skb_reset_network_header(skb);
>> +     skb_reset_transport_header(skb);
>> +     skb->protocol = skb_network_protocol(skb);
>> +
>> +     segs = __skb_gso_segment(skb, 0, true);
>
> Are you sure you want to ignore the tx_path and insist on 'true' here? Using 
> 'false' selects a looser check of skb->ip_summed at __skb_gso_segment. We 
> have seen the check implied by 'true' having the kernel fill the disk with 
> non-rate-limited warnings, so this may be a significant choice you are making 
> here.
>
right, I will change it.

>> +     if (!segs || IS_ERR(segs))
>> +             goto free;
>> +
>> +     skb = segs;
>> +     while (skb) {
>> +             __skb_push(skb, tnl_hlen);
>> +             skb_reset_mac_header(skb);
>> +             skb_reset_network_header(skb);
>> +             skb_set_transport_header(skb, sizeof(struct iphdr));
>> +             skb->mac_len = 0;
>> +
>> +             memcpy(ip_hdr(skb), iph, tnl_hlen);
>
> Could you educate me a bit here and explain shortly why it is necessary to 
> overwrite the IP headers of the segmented skbs?
>
we have segmented inner skb, now we need to copy outer header to each
segment from first segment.

>> +             if (OVS_GSO_CB(skb)->fix_segment)
>> +                     OVS_GSO_CB(skb)->fix_segment(skb);
>> +             skb = skb->next;
>> +     }
>> +free:
>> +     consume_skb(skb1);
>> +     return segs;
>> +}
>> +
>> +static bool need_linearize(const struct sk_buff *skb)
>> +{
>> +     int i;
>> +
>> +     if (unlikely(skb_shinfo(skb)->frag_list))
>> +             return true;
>> +
>> +     /*
>> +      * Generally speaking we should linearize if there are paged frags.
>> +      * However, if all of the refcounts are 1 we know nobody else can
>> +      * change them from underneath us and we can skip the linearization.
>> +      */
>> +     for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
>> +             if 
>> (unlikely(page_count(skb_frag_page(&skb_shinfo(skb)->frags[i])) > 1))
>> +                     return true;
>> +
>> +     return false;
>> +}
>> +
>> +int ip_local_out(struct sk_buff *skb)
>
> IMO it would be less surprising if you used 'rpl_ip_local_out' here. Don't 
> know if that would go against a convention though.
>
ok.

>> +{
>> +     struct dst_entry *dst = skb_dst(skb);
>> +     int ret = NETDEV_TX_OK;
>> +
>> +     if (skb_is_gso(skb)) {
>> +             skb = tnl_skb_gso_segment(skb, 0, true);
>
> Maybe should use 'false' as the tx_path argument (Jesse?).
>
ok.

>> +             if (!skb || IS_ERR(skb))
>> +                     return 0;
>> +     }  else if (skb->ip_summed == CHECKSUM_PARTIAL) {
>> +             int err;
>> +
>> +             if (unlikely(need_linearize(skb))) {
>> +                     err = __skb_linearize(skb);
>> +                     if (unlikely(err))
>> +                             return 0;
>> +             }
>> +
>> +             err = skb_checksum_help(skb);
>> +             if (unlikely(err))
>> +                     return 0;
>> +     }
>> +
>> +     while (skb) {
>> +             struct sk_buff *next_skb = skb->next;
>> +             int err;
>> +
>> +             if (next_skb)
>> +                     skb_dst_set(skb, dst_clone(dst));
>> +             else
>> +                     skb_dst_set(skb, dst);
>> +
>> +             skb->next = NULL;
>> +
>> +             /*
>> +              * Allow our local IP stack to fragment the outer packet even
>> +              * if the DF bit is set as a last resort.  We also need to
>> +              * force selection of an IP ID here because Linux will
>> +              * otherwise leave it at 0 if the packet originally had DF set.
>> +              */
>
> This comment needs editing as the IP ID is not set here.
>
right, I missed it.

>> +
>> +             skb->local_df = 1;
>> +             memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
>> +
>
> Is clearing of the skb control buffer necessary here?
>
yes, we have seen issues due to garbage IPCB used by networking stack.

>> +#undef ip_local_out
>> +             err = ip_local_out(skb);
>> +             if (unlikely(net_xmit_eval(err)))
>> +                     ret = err;
>> +
>> +             skb = next_skb;
>> +     }
>> +     return ret;
>> +}
> ...
>> diff --git a/datapath/linux/compat/include/net/gre.h 
>> b/datapath/linux/compat/include/net/gre.h
>> new file mode 100644
>> index 0000000..16dc54e
>> --- /dev/null
>> +++ b/datapath/linux/compat/include/net/gre.h
>> @@ -0,0 +1,109 @@
>> +#ifndef __LINUX_GRE_WRAPPER_H
>> +#define __LINUX_GRE_WRAPPER_H
>> +
>> +#include <linux/skbuff.h>
>> +#include <net/ip_tunnels.h>
>> +
>> +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,37)
>> +#include_next <net/gre.h>
>> +
>> +#else /* LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,37) */
>> +
>> +#define GREPROTO_CISCO               0
>> +#define GREPROTO_PPTP                1
>> +#define GREPROTO_MAX         2
>> +#define GRE_IP_PROTO_MAX     2
>> +
>> +struct gre_protocol {
>> +     int  (*handler)(struct sk_buff *skb);
>> +     void (*err_handler)(struct sk_buff *skb, u32 info);
>> +};
>> +
>> +int gre_add_protocol(const struct gre_protocol *proto, u8 version);
>> +int gre_del_protocol(const struct gre_protocol *proto, u8 version);
>> +
>> +#endif
>> +
>> +#define GRE_IP_PROTO_MAX     2
>> +
>> +struct gre_base_hdr {
>> +     __be16 flags;
>> +     __be16 protocol;
>> +};
>> +#define GRE_HEADER_SECTION 4
>> +
>> +#define MAX_GRE_PROTO_PRIORITY 255
>> +struct gre_cisco_protocol {
>> +     int (*handler)(struct sk_buff *skb, const struct tnl_ptk_info *tpi);
>> +     int (*err_handler)(struct sk_buff *skb, u32 info,
>> +                        const struct tnl_ptk_info *tpi);
>> +     u8 priority;
>> +};
>> +
>> +#define gre_build_header rpl_gre_build_header
>> +void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
>> +                   int hdr_len);
>> +
>> +#define gre_handle_offloads rpl_gre_handle_offloads
>> +struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum);
>> +
>> +int gre_cisco_register(struct gre_cisco_protocol *proto);
>> +int gre_cisco_unregister(struct gre_cisco_protocol *proto);
>> +
>> +static inline int ip_gre_calc_hlen(__be16 o_flags)
>> +{
>> +     int addend = 4;
>> +
>> +     if (o_flags&TUNNEL_CSUM)
>
> Spaces...
>
ok.

>> +             addend += 4;
>> +     if (o_flags&TUNNEL_KEY)
>> +             addend += 4;
>> +     if (o_flags&TUNNEL_SEQ)
>> +             addend += 4;
>> +     return addend;
>> +}
>> +
> ...
>> diff --git a/datapath/linux/compat/include/net/ip_tunnels.h 
>> b/datapath/linux/compat/include/net/ip_tunnels.h
>> new file mode 100644
>> index 0000000..ad17c9d
>> --- /dev/null
>> +++ b/datapath/linux/compat/include/net/ip_tunnels.h
>> @@ -0,0 +1,54 @@
>> +#ifndef __NET_IP_TUNNELS_WRAPPER_H
>> +#define __NET_IP_TUNNELS_WRAPPER_H 1
>> +
>> +#include <linux/if_tunnel.h>
>> +#include <linux/netdevice.h>
>> +#include <linux/skbuff.h>
>> +#include <linux/types.h>
>> +#include <net/dsfield.h>
>> +#include <net/flow.h>
>> +#include <net/inet_ecn.h>
>> +#include <net/ip.h>
>> +#include <net/rtnetlink.h>
>> +
>> +#define TUNNEL_CSUM  __cpu_to_be16(0x01)
>> +#define TUNNEL_ROUTING       __cpu_to_be16(0x02)
>> +#define TUNNEL_KEY   __cpu_to_be16(0x04)
>> +#define TUNNEL_SEQ   __cpu_to_be16(0x08)
>> +#define TUNNEL_STRICT        __cpu_to_be16(0x10)
>> +#define TUNNEL_REC   __cpu_to_be16(0x20)
>> +#define TUNNEL_VERSION       __cpu_to_be16(0x40)
>> +#define TUNNEL_NO_KEY        __cpu_to_be16(0x80)
>> +#define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100)
>> +
>> +struct tnl_ptk_info {
>> +     __be16 flags;
>> +     __be16 proto;
>> +     __be32 key;
>> +     __be32 seq;
>> +};
>> +
>> +#define PACKET_RCVD  0
>> +#define PACKET_REJECT        1
>> +
>> +static inline void tunnel_ip_select_ident(struct sk_buff *skb,
>> +                                       const struct iphdr  *old_iph,
>> +                                       struct dst_entry *dst)
>> +{
>> +     struct iphdr *iph = ip_hdr(skb);
>> +
>> +     /* Use inner packet iph-id if possible. */
>> +     if (skb->protocol == htons(ETH_P_IP) && old_iph->id)
>> +             iph->id = old_iph->id;
>
> Was it concluded that it is safe to use the inner packet ip id? I get that 
> this is a copy of upstream code, but would like to know.
>
I still need to think about it and propose fix for upstream kernel too.

>> +     else
>> +             __ip_select_ident(iph, dst,
>> +                             (skb_shinfo(skb)->gso_segs ?: 1) - 1);
>> +}
>> +
>> +int iptunnel_xmit(struct net *net, struct rtable *rt,
>> +               struct sk_buff *skb,
>> +               __be32 src, __be32 dst, __u8 proto,
>> +               __u8 tos, __u8 ttl, __be16 df);
>> +
>> +int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 
>> inner_proto);
>> +#endif /* __NET_IP_TUNNELS_H */
>> diff --git a/datapath/linux/compat/ip_tunnels_core.c 
>> b/datapath/linux/compat/ip_tunnels_core.c
>> new file mode 100644
>> index 0000000..f8c8488
>> --- /dev/null
>> +++ b/datapath/linux/compat/ip_tunnels_core.c
>> @@ -0,0 +1,115 @@
>> +/*
>> + * Copyright (c) 2007-2013 Nicira, Inc.
>> + *
>> + * This program is free software; you can redistribute it and/or
>> + * modify it under the terms of version 2 of the GNU General Public
>> + * License as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful, but
>> + * WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + * General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
>> + * 02110-1301, USA
>> + */
>> +
>> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>> +
>> +#include <linux/in.h>
>> +#include <linux/in_route.h>
>> +#include <linux/inetdevice.h>
>> +#include <linux/jhash.h>
>> +#include <linux/list.h>
>> +#include <linux/kernel.h>
>> +#include <linux/version.h>
>> +#include <linux/workqueue.h>
>> +#include <linux/rculist.h>
>> +#include <net/ip_tunnels.h>
>> +#include <net/route.h>
>> +#include <net/xfrm.h>
>> +
>> +#include "gso.h"
>> +#include "compat.h"
>> +
>> +int iptunnel_xmit(struct net *net, struct rtable *rt,
>> +               struct sk_buff *skb,
>> +               __be32 src, __be32 dst, __u8 proto,
>> +               __u8 tos, __u8 ttl, __be16 df)
>> +{
>> +     int pkt_len = skb->len;
>> +     struct iphdr *iph;
>> +     int err;
>> +
>> +     nf_reset(skb);
>> +     secpath_reset(skb);
>> +     skb_clear_rxhash(skb);
>> +     skb_dst_drop(skb);
>> +     skb_dst_set(skb, &rt_dst(rt));
>> +#if 0
>> +     /* Do not clear ovs-callback.  It will be done in gso code. */
>> +     memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
>> +#endif
>
> Why does the skb control buffer need to be cleared in the first place?
>
it has inner packet offset that compat-gso code needs.

>> +
>> +     /* Push down and install the IP header. */
>> +     __skb_push(skb, sizeof(struct iphdr));
>> +     skb_reset_network_header(skb);
>> +
>> +     iph = ip_hdr(skb);
>> +
>> +     iph->version    =       4;
>> +     iph->ihl        =       sizeof(struct iphdr) >> 2;
>> +     iph->frag_off   =       df;
>> +     iph->protocol   =       proto;
>> +     iph->tos        =       tos;
>> +     iph->daddr      =       dst;
>> +     iph->saddr      =       src;
>> +     iph->ttl        =       ttl;
>> +     tunnel_ip_select_ident(skb,
>> +                            (const struct iphdr 
>> *)skb_inner_network_header(skb),
>> +                            &rt_dst(rt));
>> +
>> +     err = ip_local_out(skb);
>> +     if (unlikely(net_xmit_eval(err)))
>> +             pkt_len = 0;
>> +     return pkt_len;
>> +}
>> +
> ...
>> diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c
>> index add17d9..1707eab 100644
>> --- a/datapath/vport-gre.c
>> +++ b/datapath/vport-gre.c
>> @@ -24,50 +24,29 @@
>> #include <linux/if_tunnel.h>
>> #include <linux/if_vlan.h>
>> #include <linux/in.h>
>> +#include <linux/if_vlan.h>
>> +#include <linux/in.h>
>> +#include <linux/in_route.h>
>> +#include <linux/inetdevice.h>
>> +#include <linux/jhash.h>
>> +#include <linux/list.h>
>> +#include <linux/kernel.h>
>> +#include <linux/workqueue.h>
>> +#include <linux/rculist.h>
>> +#include <net/route.h>
>> +#include <net/xfrm.h>
>> +
>>
>> #include <net/icmp.h>
>> #include <net/ip.h>
>> +#include <net/ip_tunnels.h>
>> +#include <net/gre.h>
>> #include <net/protocol.h>
>>
>> #include "datapath.h"
>> #include "tunnel.h"
>> #include "vport.h"
>>
>> -/*
>> - * The GRE header is composed of a series of sections: a base and then a 
>> variable
>> - * number of options.
>> - */
>> -#define GRE_HEADER_SECTION 4
>> -
>> -struct gre_base_hdr {
>> -     __be16 flags;
>> -     __be16 protocol;
>> -};
>> -
>> -static int gre_hdr_len(const struct ovs_key_ipv4_tunnel *tun_key)
>> -{
>> -     int len = GRE_HEADER_SECTION;
>> -
>> -     if (tun_key->tun_flags & OVS_TNL_F_KEY)
>> -             len += GRE_HEADER_SECTION;
>> -     if (tun_key->tun_flags & OVS_TNL_F_CSUM)
>> -             len += GRE_HEADER_SECTION;
>> -     return len;
>> -}
>> -
>> -static int gre64_hdr_len(const struct ovs_key_ipv4_tunnel *tun_key)
>> -{
>> -     /* Set key for GRE64 tunnels, even when key if is zero. */
>> -     int len = GRE_HEADER_SECTION +          /* GRE Hdr */
>> -               GRE_HEADER_SECTION +          /* GRE Key */
>> -               GRE_HEADER_SECTION;           /* GRE SEQ */
>> -
>> -     if (tun_key->tun_flags & OVS_TNL_F_CSUM)
>> -             len += GRE_HEADER_SECTION;
>> -
>> -     return len;
>> -}
>> -
>> /* Returns the least-significant 32 bits of a __be64. */
>> static __be32 be64_get_low32(__be64 x)
>> {
>> @@ -78,61 +57,31 @@ static __be32 be64_get_low32(__be64 x)
>> #endif
>> }
>>
>> -static __be32 be64_get_high32(__be64 x)
>> +static __be16 filter_tnl_flags(__be16 flags)
>> {
>> -#ifdef __BIG_ENDIAN
>> -     return (__force __be32)((__force u64)x >> 32);
>> -#else
>> -     return (__force __be32)x;
>> -#endif
>> +     return (flags & (TUNNEL_CSUM | TUNNEL_KEY));
>> }
>>
>> -static void __gre_build_header(struct sk_buff *skb,
>> -                            int tunnel_hlen,
>> -                            bool is_gre64)
>> +static struct sk_buff *__build_header(const struct vport *vport,
>
> 'vport' is not used, so it could be removed here.
ok.

>
>> +                                   struct sk_buff *skb,
>> +                                   int tunnel_hlen,
>> +                                   __be32 seq, __be16 gre64_flag)
>> {
>>       const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
>> -     __be32 *options = (__be32 *)(skb_network_header(skb) + tunnel_hlen
>> -                     - GRE_HEADER_SECTION);
>> -     struct gre_base_hdr *greh = (struct gre_base_hdr *) 
>> skb_transport_header(skb);
>> -     greh->protocol = htons(ETH_P_TEB);
>> -     greh->flags = 0;
>> -
>> -     /* Work backwards over the options so the checksum is last. */
>> -     if (tun_key->tun_flags & OVS_TNL_F_KEY || is_gre64) {
>> -             greh->flags |= GRE_KEY;
>> -             if (is_gre64) {
>> -                     /* Set higher 32 bits to seq. */
>> -                     *options = be64_get_high32(tun_key->tun_id);
>> -                     options--;
>> -                     greh->flags |= GRE_SEQ;
>> -             }
>> -             *options = be64_get_low32(tun_key->tun_id);
>> -             options--;
>> -     }
>> +     struct tnl_ptk_info tpi;
>>
>> -     if (tun_key->tun_flags & OVS_TNL_F_CSUM) {
>> -             greh->flags |= GRE_CSUM;
>> -             *options = 0;
>> -             *(__sum16 *)options = csum_fold(skb_checksum(skb,
>> -                                             skb_transport_offset(skb),
>> -                                             skb->len - 
>> skb_transport_offset(skb),
>> -                                             0));
>> -     }
>> -}
>> +     skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM));
>> +     if (IS_ERR(skb))
>> +             return NULL;
>>
>> -static void gre_build_header(const struct vport *vport,
>> -                          struct sk_buff *skb,
>> -                          int tunnel_hlen)
>> -{
>> -     __gre_build_header(skb, tunnel_hlen, false);
>> -}
>> +     tpi.flags = filter_tnl_flags(tun_key->tun_flags) | gre64_flag;
>>
>> -static void gre64_build_header(const struct vport *vport,
>> -                            struct sk_buff *skb,
>> -                            int tunnel_hlen)
>> -{
>> -     __gre_build_header(skb, tunnel_hlen, true);
>> +     tpi.proto = htons(ETH_P_TEB);
>> +     tpi.key = be64_get_low32(tun_key->tun_id);
>> +     tpi.seq = seq;
>> +     gre_build_header(skb, &tpi, tunnel_hlen);
>> +
>> +     return skb;
>> }
>>
>
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to