Currently gre modules allows one handler per GRE protocol.
Following patch extend gre module registration by adding api
to register multiple handler for GRE protocol.
This allows OVS and linux gre devices co-exist at same time.

Signed-off-by: Pravin B Shelar <pshe...@nicira.com>
---
 include/net/gre.h |   24 ++++++
 net/ipv4/gre.c    |  228 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 net/ipv4/ip_gre.c |  172 ++++++---------------------------------
 3 files changed, 274 insertions(+), 150 deletions(-)

diff --git a/include/net/gre.h b/include/net/gre.h
index 9f03a39..b7d9fa6 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -7,6 +7,7 @@
 #define GREPROTO_CISCO         0
 #define GREPROTO_PPTP          1
 #define GREPROTO_MAX           2
+#define GRE_IP_PROTO_MAX       2
 
 struct gre_protocol {
        int  (*handler)(struct sk_buff *skb);
@@ -22,6 +23,29 @@ struct gre_base_hdr {
 int gre_add_protocol(const struct gre_protocol *proto, u8 version);
 int gre_del_protocol(const struct gre_protocol *proto, u8 version);
 
+struct gre_cisco_protocol {
+       int (*handler)(struct sk_buff *skb, const struct tnl_ptk_info *tpi);
+       int (*err_handler)(struct sk_buff *skb, u32 info,
+                          const struct tnl_ptk_info *tpi);
+       u8 priority;
+};
+
+int gre_cisco_register(struct gre_cisco_protocol *proto);
+void gre_cisco_unregister(struct gre_cisco_protocol *proto);
+
+static inline int ip_gre_calc_hlen(__be16 o_flags)
+{
+       int addend = 4;
+
+       if (o_flags&TUNNEL_CSUM)
+               addend += 4;
+       if (o_flags&TUNNEL_KEY)
+               addend += 4;
+       if (o_flags&TUNNEL_SEQ)
+               addend += 4;
+       return addend;
+}
+
 static inline __be16 gre_flags_to_tnl_flags(__be16 flags)
 {
        __be16 tflags = 0;
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index d2d5a99..1744fc3 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -13,6 +13,8 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/module.h>
+#include <linux/if.h>
+#include <linux/icmp.h>
 #include <linux/kernel.h>
 #include <linux/kmod.h>
 #include <linux/skbuff.h>
@@ -23,9 +25,12 @@
 #include <linux/spinlock.h>
 #include <net/protocol.h>
 #include <net/gre.h>
-
+#include <net/icmp.h>
+#include <net/route.h>
+#include <net/xfrm.h>
 
 static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
+static struct gre_cisco_protocol __rcu *gre_cisco_proto_list[GRE_IP_PROTO_MAX];
 static DEFINE_SPINLOCK(gre_proto_lock);
 
 int gre_add_protocol(const struct gre_protocol *proto, u8 version)
@@ -69,6 +74,172 @@ err_out:
 }
 EXPORT_SYMBOL_GPL(gre_del_protocol);
 
+static __sum16 check_checksum(struct sk_buff *skb)
+{
+       __sum16 csum = 0;
+
+       switch (skb->ip_summed) {
+       case CHECKSUM_COMPLETE:
+               csum = csum_fold(skb->csum);
+
+               if (!csum)
+                       break;
+               /* Fall through. */
+
+       case CHECKSUM_NONE:
+               skb->csum = 0;
+               csum = __skb_checksum_complete(skb);
+               skb->ip_summed = CHECKSUM_COMPLETE;
+               break;
+       }
+
+       return csum;
+}
+
+static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
+                           bool *csum_err)
+{
+       struct iphdr *iph = ip_hdr(skb);
+       struct gre_base_hdr *greh;
+       __be32 *options;
+       int hdr_len;
+
+       if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
+               return -EINVAL;
+
+       greh = (struct gre_base_hdr *)((u8 *)iph + (iph->ihl << 2));
+       if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
+               return -EINVAL;
+
+       tpi->flags = gre_flags_to_tnl_flags(greh->flags);
+       hdr_len = ip_gre_calc_hlen(tpi->flags);
+
+       if (!pskb_may_pull(skb, hdr_len))
+               return -EINVAL;
+
+       tpi->proto = greh->protocol;
+
+       options = (__be32 *)(greh + 1);
+       if (greh->flags & GRE_CSUM) {
+               if (check_checksum(skb)) {
+                       *csum_err = true;
+                       return -EINVAL;
+               }
+               options++;
+       }
+
+       if (greh->flags & GRE_KEY) {
+               tpi->key = *options;
+               options++;
+       } else
+               tpi->key = 0;
+
+       if (unlikely(greh->flags & GRE_SEQ)) {
+               tpi->seq = *options;
+               options++;
+       } else
+               tpi->seq = 0;
+
+       /* WCCP version 1 and 2 protocol decoding.
+        * - Change protocol to IP
+        * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
+        */
+       if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
+               tpi->proto = htons(ETH_P_IP);
+               if ((*(u8 *)options & 0xF0) != 0x40) {
+                       hdr_len += 4;
+                       if (!pskb_may_pull(skb, hdr_len))
+                               return -EINVAL;
+               }
+       }
+       return 0;
+}
+
+static int gre_cisco_rcv(struct sk_buff *skb)
+{
+       struct tnl_ptk_info tpi;
+       int i;
+       bool csum_err = false;
+
+       if (parse_gre_header(skb, &tpi, &csum_err) < 0)
+               goto drop;
+
+       rcu_read_lock();
+       for (i = 0; i < GRE_IP_PROTO_MAX; i++) {
+               struct gre_cisco_protocol *proto;
+               int ret;
+
+               proto = rcu_dereference(gre_cisco_proto_list[i]);
+               if (!proto)
+                       continue;
+               ret = proto->handler(skb, &tpi);
+               if (ret == PACKET_RCVD) {
+                       rcu_read_unlock();
+                       return 0;
+               }
+       }
+       rcu_read_unlock();
+
+       icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+drop:
+       kfree_skb(skb);
+       return 0;
+}
+
+static void gre_cisco_err(struct sk_buff *skb, u32 info)
+{
+       /* All the routers (except for Linux) return only
+        * 8 bytes of packet payload. It means, that precise relaying of
+        * ICMP in the real Internet is absolutely infeasible.
+        *
+        * Moreover, Cisco "wise men" put GRE key to the third word
+        * in GRE header. It makes impossible maintaining even soft
+        * state for keyed
+        * GRE tunnels with enabled checksum. Tell them "thank you".
+        *
+        * Well, I wonder, rfc1812 was written by Cisco employee,
+        * what the hell these idiots break standards established
+        * by themselves???
+        */
+
+       const int type = icmp_hdr(skb)->type;
+       const int code = icmp_hdr(skb)->code;
+       struct tnl_ptk_info tpi;
+       bool csum_err = false;
+       int i;
+
+       if (parse_gre_header(skb, &tpi, &csum_err)) {
+               if (!csum_err)          /* ignore csum errors. */
+                       return;
+       }
+
+       if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
+               ipv4_update_pmtu(skb, dev_net(skb->dev), info,
+                               skb->dev->ifindex, 0, IPPROTO_GRE, 0);
+               return;
+       }
+       if (type == ICMP_REDIRECT) {
+               ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
+                               IPPROTO_GRE, 0);
+               return;
+       }
+
+       rcu_read_lock();
+       for (i = 0; i < GRE_IP_PROTO_MAX; i++) {
+               struct gre_cisco_protocol *proto;
+
+               proto = rcu_dereference(gre_cisco_proto_list[i]);
+               if (!proto)
+                       continue;
+
+               if (proto->err_handler(skb, info, &tpi) == PACKET_RCVD)
+                       goto out;
+
+       }
+out:
+       rcu_read_unlock();
+}
+
 static int gre_rcv(struct sk_buff *skb)
 {
        const struct gre_protocol *proto;
@@ -224,27 +395,75 @@ static const struct net_offload gre_offload = {
        },
 };
 
+static const struct gre_protocol ipgre_protocol = {
+       .handler     = gre_cisco_rcv,
+       .err_handler = gre_cisco_err,
+};
+
+int gre_cisco_register(struct gre_cisco_protocol *newp)
+{
+       int err;
+
+       if (newp->priority >= GRE_IP_PROTO_MAX)
+               return -EINVAL;
+
+       spin_lock(&gre_proto_lock);
+       if (gre_cisco_proto_list[newp->priority]) {
+               err = -EEXIST;
+               goto out;
+       }
+       err = 0;
+       rcu_assign_pointer(gre_cisco_proto_list[newp->priority], newp);
+out:
+       spin_unlock(&gre_proto_lock);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(gre_cisco_register);
+
+void gre_cisco_unregister(struct gre_cisco_protocol *proto)
+{
+       if (proto->priority >= GRE_IP_PROTO_MAX)
+               return;
+
+       spin_lock(&gre_proto_lock);
+       RCU_INIT_POINTER(gre_cisco_proto_list[proto->priority], NULL);
+       spin_unlock(&gre_proto_lock);
+       synchronize_net();
+}
+EXPORT_SYMBOL_GPL(gre_cisco_unregister);
+
 static int __init gre_init(void)
 {
        pr_info("GRE over IPv4 demultiplexor driver\n");
 
        if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) {
                pr_err("can't add protocol\n");
-               return -EAGAIN;
+               goto err;
+       }
+
+       if (gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) {
+               pr_info("%s: can't add ipgre handler\n", __func__);
+               goto err_gre;
        }
 
        if (inet_add_offload(&gre_offload, IPPROTO_GRE)) {
                pr_err("can't add protocol offload\n");
-               inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
-               return -EAGAIN;
+               goto err_gso;
        }
 
        return 0;
+err_gso:
+       gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
+err_gre:
+       inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
+err:
+       return -EAGAIN;
 }
 
 static void __exit gre_exit(void)
 {
        inet_del_offload(&gre_offload, IPPROTO_GRE);
+       gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
        inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
 }
 
@@ -254,4 +473,3 @@ module_exit(gre_exit);
 MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver");
 MODULE_AUTHOR("D. Kozlov (x...@mail.ru)");
 MODULE_LICENSE("GPL");
-
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index ad662e9..4e6ffe5 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -121,101 +121,8 @@ static int ipgre_tunnel_init(struct net_device *dev);
 static int ipgre_net_id __read_mostly;
 static int gre_tap_net_id __read_mostly;
 
-static __sum16 check_checksum(struct sk_buff *skb)
-{
-       __sum16 csum = 0;
-
-       switch (skb->ip_summed) {
-       case CHECKSUM_COMPLETE:
-               csum = csum_fold(skb->csum);
-
-               if (!csum)
-                       break;
-               /* Fall through. */
-
-       case CHECKSUM_NONE:
-               skb->csum = 0;
-               csum = __skb_checksum_complete(skb);
-               skb->ip_summed = CHECKSUM_COMPLETE;
-               break;
-       }
-
-       return csum;
-}
-
-static int ip_gre_calc_hlen(__be16 o_flags)
-{
-       int addend = 4;
-
-       if (o_flags&TUNNEL_CSUM)
-               addend += 4;
-       if (o_flags&TUNNEL_KEY)
-               addend += 4;
-       if (o_flags&TUNNEL_SEQ)
-               addend += 4;
-       return addend;
-}
-
-static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
-                           bool *csum_err, int *hdr_len)
-{
-       struct iphdr *iph = ip_hdr(skb);
-       struct gre_base_hdr *greh;
-       __be32 *options;
-
-       if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
-               return -EINVAL;
-
-       greh = (struct gre_base_hdr *)((u8 *)iph + (iph->ihl << 2));
-       if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
-               return -EINVAL;
-
-       tpi->flags = gre_flags_to_tnl_flags(greh->flags);
-       *hdr_len = ip_gre_calc_hlen(tpi->flags);
-
-       if (!pskb_may_pull(skb, *hdr_len))
-               return -EINVAL;
-
-       tpi->proto = greh->protocol;
-
-       options = (__be32 *)(greh + 1);
-       if (greh->flags & GRE_CSUM) {
-               if (check_checksum(skb)) {
-                       *csum_err = true;
-                       return -EINVAL;
-               }
-               options++;
-       }
-
-       if (greh->flags & GRE_KEY) {
-               tpi->key = *options;
-               options++;
-       } else
-               tpi->key = 0;
-
-       if (unlikely(greh->flags & GRE_SEQ)) {
-               tpi->seq = *options;
-               options++;
-       } else
-               tpi->seq = 0;
-
-       /* WCCP version 1 and 2 protocol decoding.
-        * - Change protocol to IP
-        * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
-        */
-       if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
-               tpi->proto = htons(ETH_P_IP);
-               if ((*(u8 *)options & 0xF0) != 0x40) {
-                       *hdr_len += 4;
-                       if (!pskb_may_pull(skb, *hdr_len))
-                               return -EINVAL;
-               }
-       }
-
-       return 0;
-}
-
-static void ipgre_err(struct sk_buff *skb, u32 info)
+static int ipgre_err(struct sk_buff *skb, u32 info,
+                    const struct tnl_ptk_info *tpi)
 {
 
        /* All the routers (except for Linux) return only
@@ -237,26 +144,18 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
        const int type = icmp_hdr(skb)->type;
        const int code = icmp_hdr(skb)->code;
        struct ip_tunnel *t;
-       struct tnl_ptk_info tpi;
-       int hdr_len;
-       bool csum_err = false;
-
-       if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len)) {
-               if (!csum_err)          /* ignore csum errors. */
-                       return;
-       }
 
        switch (type) {
        default:
        case ICMP_PARAMETERPROB:
-               return;
+               return PACKET_RCVD;
 
        case ICMP_DEST_UNREACH:
                switch (code) {
                case ICMP_SR_FAILED:
                case ICMP_PORT_UNREACH:
                        /* Impossible event. */
-                       return;
+                       return PACKET_RCVD;
                default:
                        /* All others are translated to HOST_UNREACH.
                           rfc2003 contains "deep thoughts" about NET_UNREACH,
@@ -267,78 +166,61 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
                break;
        case ICMP_TIME_EXCEEDED:
                if (code != ICMP_EXC_TTL)
-                       return;
+                       return PACKET_RCVD;
                break;
 
        case ICMP_REDIRECT:
                break;
        }
 
-       if (tpi.proto == htons(ETH_P_TEB))
+       if (tpi->proto == htons(ETH_P_TEB))
                itn = net_generic(net, gre_tap_net_id);
        else
                itn = net_generic(net, ipgre_net_id);
 
-       t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags,
-                            iph->daddr, iph->saddr, tpi.key);
+       t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
+                            iph->daddr, iph->saddr, tpi->key);
 
        if (t == NULL)
-               return;
+               return PACKET_REJECT;
 
-       if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
-               ipv4_update_pmtu(skb, dev_net(skb->dev), info,
-                                t->parms.link, 0, IPPROTO_GRE, 0);
-               return;
-       }
-       if (type == ICMP_REDIRECT) {
-               ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
-                             IPPROTO_GRE, 0);
-               return;
-       }
        if (t->parms.iph.daddr == 0 ||
            ipv4_is_multicast(t->parms.iph.daddr))
-               return;
+               return PACKET_RCVD;
 
        if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
-               return;
+               return PACKET_RCVD;
 
        if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
                t->err_count++;
        else
                t->err_count = 1;
        t->err_time = jiffies;
+       return PACKET_RCVD;
 }
 
-static int ipgre_rcv(struct sk_buff *skb)
+static int ipgre_rcv(struct sk_buff *skb,
+                    const struct tnl_ptk_info *tpi)
 {
        struct net *net = dev_net(skb->dev);
        struct ip_tunnel_net *itn;
        const struct iphdr *iph;
        struct ip_tunnel *tunnel;
-       struct tnl_ptk_info tpi;
-       int hdr_len;
-       bool csum_err = false;
-
-       if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len) < 0)
-               goto drop;
 
-       if (tpi.proto == htons(ETH_P_TEB))
+       if (tpi->proto == htons(ETH_P_TEB))
                itn = net_generic(net, gre_tap_net_id);
        else
                itn = net_generic(net, ipgre_net_id);
 
        iph = ip_hdr(skb);
-       tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags,
-                                 iph->saddr, iph->daddr, tpi.key);
+       tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
+                                 iph->saddr, iph->daddr, tpi->key);
 
        if (tunnel) {
-               ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
-               return 0;
+               ip_tunnel_rcv(tunnel, skb, tpi, log_ecn_error);
+               return PACKET_RCVD;
        }
-       icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
-drop:
-       kfree_skb(skb);
-       return 0;
+       return PACKET_REJECT;
 }
 
 static struct sk_buff *handle_offloads(struct ip_tunnel *tunnel, struct 
sk_buff *skb)
@@ -706,9 +588,10 @@ static int ipgre_tunnel_init(struct net_device *dev)
        return ip_tunnel_init(dev);
 }
 
-static const struct gre_protocol ipgre_protocol = {
-       .handler     = ipgre_rcv,
-       .err_handler = ipgre_err,
+static struct gre_cisco_protocol ipgre_protocol = {
+       .handler        = ipgre_rcv,
+       .err_handler    = ipgre_err,
+       .priority       = 0,
 };
 
 static int __net_init ipgre_init_net(struct net *net)
@@ -976,7 +859,7 @@ static int __init ipgre_init(void)
        if (err < 0)
                goto pnet_tap_faied;
 
-       err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
+       err = gre_cisco_register(&ipgre_protocol);
        if (err < 0) {
                pr_info("%s: can't add protocol\n", __func__);
                goto add_proto_failed;
@@ -995,7 +878,7 @@ static int __init ipgre_init(void)
 tap_ops_failed:
        rtnl_link_unregister(&ipgre_link_ops);
 rtnl_link_failed:
-       gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
+       gre_cisco_unregister(&ipgre_protocol);
 add_proto_failed:
        unregister_pernet_device(&ipgre_tap_net_ops);
 pnet_tap_faied:
@@ -1007,8 +890,7 @@ static void __exit ipgre_fini(void)
 {
        rtnl_link_unregister(&ipgre_tap_ops);
        rtnl_link_unregister(&ipgre_link_ops);
-       if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
-               pr_info("%s: can't remove protocol\n", __func__);
+       gre_cisco_unregister(&ipgre_protocol);
        unregister_pernet_device(&ipgre_tap_net_ops);
        unregister_pernet_device(&ipgre_net_ops);
 }
-- 
1.7.1

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to