From: Roopa Prabhu <ro...@cumulusnetworks.com>

provides ops to parse, build and output encaped
packets for drivers that want to attach tunnel encap
information to routes.

Signed-off-by: Roopa Prabhu <ro...@cumulusnetworks.com>
---
 include/linux/lwtunnel.h      |    6 ++
 include/net/lwtunnel.h        |   84 +++++++++++++++++++++
 include/uapi/linux/lwtunnel.h |   11 +++
 net/Kconfig                   |    5 ++
 net/core/Makefile             |    1 +
 net/core/lwtunnel.c           |  162 +++++++++++++++++++++++++++++++++++++++++
 6 files changed, 269 insertions(+)
 create mode 100644 include/linux/lwtunnel.h
 create mode 100644 include/net/lwtunnel.h
 create mode 100644 include/uapi/linux/lwtunnel.h
 create mode 100644 net/core/lwtunnel.c

diff --git a/include/linux/lwtunnel.h b/include/linux/lwtunnel.h
new file mode 100644
index 0000000..97f32f8
--- /dev/null
+++ b/include/linux/lwtunnel.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_LWTUNNEL_H_
+#define _LINUX_LWTUNNEL_H_
+
+#include <uapi/linux/lwtunnel.h>
+
+#endif /* _LINUX_LWTUNNEL_H_ */
diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
new file mode 100644
index 0000000..649da3c
--- /dev/null
+++ b/include/net/lwtunnel.h
@@ -0,0 +1,84 @@
+#ifndef __NET_LWTUNNEL_H
+#define __NET_LWTUNNEL_H 1
+
+#include <linux/lwtunnel.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <net/dsfield.h>
+#include <net/ip.h>
+#include <net/rtnetlink.h>
+
+#define LWTUNNEL_HASH_BITS   7
+#define LWTUNNEL_HASH_SIZE   (1 << LWTUNNEL_HASH_BITS)
+
+struct lwtunnel_hdr {
+       int             len;
+       __u8            data[0];
+};
+
+/* lw tunnel state flags */
+#define LWTUNNEL_STATE_OUTPUT_REDIRECT 0x1
+
+#define lwtunnel_output_redirect(lwtstate) (lwtstate && \
+                       (lwtstate->flags & LWTUNNEL_STATE_OUTPUT_REDIRECT))
+
+struct lwtunnel_state {
+       __u16           type;
+       __u16           flags;
+       atomic_t        refcnt;
+       struct lwtunnel_hdr tunnel;
+};
+
+struct lwtunnel_net {
+       struct hlist_head tunnels[LWTUNNEL_HASH_SIZE];
+};
+
+struct lwtunnel_encap_ops {
+       int (*build_state)(struct net_device *dev, struct nlattr *encap,
+                          struct lwtunnel_state **ts);
+       int (*output)(struct sock *sk, struct sk_buff *skb);
+       int (*fill_encap)(struct sk_buff *skb,
+                         struct lwtunnel_state *lwtstate);
+       int (*get_encap_size)(struct lwtunnel_state *lwtstate);
+};
+
+#define MAX_LWTUNNEL_ENCAP_OPS 8
+extern const struct lwtunnel_encap_ops __rcu *
+               lwtun_encaps[MAX_LWTUNNEL_ENCAP_OPS];
+
+static inline void lwtunnel_state_get(struct lwtunnel_state *lws)
+{
+       atomic_inc(&lws->refcnt);
+}
+
+static inline void lwtunnel_state_put(struct lwtunnel_state *lws)
+{
+       if (!lws)
+               return;
+
+       if (atomic_dec_and_test(&lws->refcnt))
+               kfree(lws);
+}
+
+static inline struct lwtunnel_state *lwtunnel_skb_lwstate(struct sk_buff *skb)
+{
+       struct rtable *rt = (struct rtable *)skb_dst(skb);
+
+       return rt->rt_lwtstate;
+}
+
+int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op,
+                          unsigned int num);
+int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op,
+                          unsigned int num);
+int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
+                        struct nlattr *encap,
+                        struct lwtunnel_state **lws);
+int lwtunnel_fill_encap(struct sk_buff *skb,
+                       struct lwtunnel_state *lwtstate);
+int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate);
+struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len);
+int lwtunnel_output(struct sock *sk, struct sk_buff *skb);
+
+#endif /* __NET_LWTUNNEL_H */
diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h
new file mode 100644
index 0000000..11150c0
--- /dev/null
+++ b/include/uapi/linux/lwtunnel.h
@@ -0,0 +1,11 @@
+#ifndef _UAPI_LWTUNNEL_H_
+#define _UAPI_LWTUNNEL_H_
+
+#include <linux/types.h>
+
+enum tunnel_encap_types {
+       LWTUNNEL_ENCAP_NONE,
+       LWTUNNEL_ENCAP_MPLS,
+};
+
+#endif /* _UAPI_LWTUNNEL_H_ */
diff --git a/net/Kconfig b/net/Kconfig
index 57a7c5a..e296d6f 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -374,9 +374,14 @@ source "net/caif/Kconfig"
 source "net/ceph/Kconfig"
 source "net/nfc/Kconfig"
 
+config LWTUNNEL
+       bool "Network light weight tunnels"
+       ---help---
+         light weight tunnels
 
 endif   # if NET
 
 # Used by archs to tell that they support BPF_JIT
 config HAVE_BPF_JIT
        bool
+
diff --git a/net/core/Makefile b/net/core/Makefile
index fec0856..086b01f 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -23,3 +23,4 @@ obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
 obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o
 obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
 obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
+obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
new file mode 100644
index 0000000..29c7802
--- /dev/null
+++ b/net/core/lwtunnel.c
@@ -0,0 +1,162 @@
+/*
+ * lwtunnel    Infrastructure for light weight tunnels like mpls
+ *
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/capability.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/init.h>
+#include <linux/err.h>
+
+#include <net/lwtunnel.h>
+#include <net/rtnetlink.h>
+
+struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len)
+{
+       struct lwtunnel_state *lws;
+
+       return kzalloc(sizeof(*lws) + hdr_len, GFP_KERNEL);
+}
+EXPORT_SYMBOL(lwtunnel_state_alloc);
+
+const struct lwtunnel_encap_ops __rcu *
+               lwtun_encaps[MAX_LWTUNNEL_ENCAP_OPS] __read_mostly;
+
+int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops,
+                          unsigned int num)
+{
+       if (num >= MAX_LWTUNNEL_ENCAP_OPS)
+               return -ERANGE;
+
+       return !cmpxchg((const struct lwtunnel_encap_ops **)
+                       &lwtun_encaps[num],
+                       NULL, ops) ? 0 : -1;
+}
+EXPORT_SYMBOL(lwtunnel_encap_add_ops);
+
+int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops,
+                          unsigned int num)
+{
+       int ret;
+
+       if (num >= MAX_LWTUNNEL_ENCAP_OPS)
+               return -ERANGE;
+
+       ret = (cmpxchg((const struct lwtunnel_encap_ops **)
+                      &lwtun_encaps[num],
+                      ops, NULL) == ops) ? 0 : -1;
+
+       synchronize_net();
+
+       return ret;
+}
+EXPORT_SYMBOL(lwtunnel_encap_del_ops);
+
+int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
+                        struct nlattr *encap, struct lwtunnel_state **lws)
+{
+       const struct lwtunnel_encap_ops *ops;
+       int ret = -EINVAL;
+
+       if (encap_type == LWTUNNEL_ENCAP_NONE ||
+           encap_type >= MAX_LWTUNNEL_ENCAP_OPS)
+               return ret;
+
+       ret = -EOPNOTSUPP;
+       rcu_read_lock();
+       ops = rcu_dereference(lwtun_encaps[encap_type]);
+       if (likely(ops && ops->build_state))
+               ret = ops->build_state(dev, encap, lws);
+       rcu_read_unlock();
+
+       return ret;
+}
+EXPORT_SYMBOL(lwtunnel_build_state);
+
+int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate)
+{
+       const struct lwtunnel_encap_ops *ops;
+       struct nlattr *nest;
+       int ret = -EINVAL;
+
+       if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
+           lwtstate->type >= MAX_LWTUNNEL_ENCAP_OPS)
+               return 0;
+
+       ret = -EOPNOTSUPP;
+       nest = nla_nest_start(skb, RTA_ENCAP);
+       rcu_read_lock();
+       ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
+       if (likely(ops && ops->fill_encap))
+               ret = ops->fill_encap(skb, lwtstate);
+       rcu_read_unlock();
+
+       if (ret)
+               goto errout;
+
+       nla_nest_end(skb, nest);
+
+       return 0;
+
+errout:
+       nla_nest_cancel(skb, nest);
+
+       return ret;
+}
+EXPORT_SYMBOL(lwtunnel_fill_encap);
+
+int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
+{
+       const struct lwtunnel_encap_ops *ops;
+       int ret = 0;
+
+       if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
+           lwtstate->type >= MAX_LWTUNNEL_ENCAP_OPS)
+               return 0;
+
+       rcu_read_lock();
+       ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
+       if (likely(ops && ops->get_encap_size))
+               ret = nla_total_size(ops->get_encap_size(lwtstate));
+       rcu_read_unlock();
+
+       return ret;
+}
+EXPORT_SYMBOL(lwtunnel_get_encap_size);
+
+int lwtunnel_output(struct sock *sk, struct sk_buff *skb)
+{
+       const struct lwtunnel_encap_ops *ops;
+       struct lwtunnel_state *lwtstate = lwtunnel_skb_lwstate(skb);
+       int ret = 0;
+
+       if (!lwtstate)
+               return -EINVAL;
+
+       if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
+           lwtstate->type >= MAX_LWTUNNEL_ENCAP_OPS)
+               return 0;
+
+       rcu_read_lock();
+       ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
+       if (likely(ops && ops->output))
+               ret = ops->output(sk, skb);
+       rcu_read_unlock();
+
+       return ret;
+}
+EXPORT_SYMBOL(lwtunnel_output);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to