From: Roopa Prabhu <ro...@cumulusnetworks.com> provides ops to parse, build and output encaped packets for drivers that want to attach tunnel encap information to routes.
Signed-off-by: Roopa Prabhu <ro...@cumulusnetworks.com> --- include/linux/lwtunnel.h | 6 ++ include/net/lwtunnel.h | 84 +++++++++++++++++++++ include/uapi/linux/lwtunnel.h | 11 +++ net/Kconfig | 5 ++ net/core/Makefile | 1 + net/core/lwtunnel.c | 162 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 269 insertions(+) create mode 100644 include/linux/lwtunnel.h create mode 100644 include/net/lwtunnel.h create mode 100644 include/uapi/linux/lwtunnel.h create mode 100644 net/core/lwtunnel.c diff --git a/include/linux/lwtunnel.h b/include/linux/lwtunnel.h new file mode 100644 index 0000000..97f32f8 --- /dev/null +++ b/include/linux/lwtunnel.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_LWTUNNEL_H_ +#define _LINUX_LWTUNNEL_H_ + +#include <uapi/linux/lwtunnel.h> + +#endif /* _LINUX_LWTUNNEL_H_ */ diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h new file mode 100644 index 0000000..649da3c --- /dev/null +++ b/include/net/lwtunnel.h @@ -0,0 +1,84 @@ +#ifndef __NET_LWTUNNEL_H +#define __NET_LWTUNNEL_H 1 + +#include <linux/lwtunnel.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/types.h> +#include <net/dsfield.h> +#include <net/ip.h> +#include <net/rtnetlink.h> + +#define LWTUNNEL_HASH_BITS 7 +#define LWTUNNEL_HASH_SIZE (1 << LWTUNNEL_HASH_BITS) + +struct lwtunnel_hdr { + int len; + __u8 data[0]; +}; + +/* lw tunnel state flags */ +#define LWTUNNEL_STATE_OUTPUT_REDIRECT 0x1 + +#define lwtunnel_output_redirect(lwtstate) (lwtstate && \ + (lwtstate->flags & LWTUNNEL_STATE_OUTPUT_REDIRECT)) + +struct lwtunnel_state { + __u16 type; + __u16 flags; + atomic_t refcnt; + struct lwtunnel_hdr tunnel; +}; + +struct lwtunnel_net { + struct hlist_head tunnels[LWTUNNEL_HASH_SIZE]; +}; + +struct lwtunnel_encap_ops { + int (*build_state)(struct net_device *dev, struct nlattr *encap, + struct lwtunnel_state **ts); + int (*output)(struct sock *sk, struct sk_buff *skb); + int (*fill_encap)(struct sk_buff *skb, + struct lwtunnel_state *lwtstate); + int (*get_encap_size)(struct lwtunnel_state *lwtstate); +}; + +#define MAX_LWTUNNEL_ENCAP_OPS 8 +extern const struct lwtunnel_encap_ops __rcu * + lwtun_encaps[MAX_LWTUNNEL_ENCAP_OPS]; + +static inline void lwtunnel_state_get(struct lwtunnel_state *lws) +{ + atomic_inc(&lws->refcnt); +} + +static inline void lwtunnel_state_put(struct lwtunnel_state *lws) +{ + if (!lws) + return; + + if (atomic_dec_and_test(&lws->refcnt)) + kfree(lws); +} + +static inline struct lwtunnel_state *lwtunnel_skb_lwstate(struct sk_buff *skb) +{ + struct rtable *rt = (struct rtable *)skb_dst(skb); + + return rt->rt_lwtstate; +} + +int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, + unsigned int num); +int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, + unsigned int num); +int lwtunnel_build_state(struct net_device *dev, u16 encap_type, + struct nlattr *encap, + struct lwtunnel_state **lws); +int lwtunnel_fill_encap(struct sk_buff *skb, + struct lwtunnel_state *lwtstate); +int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate); +struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len); +int lwtunnel_output(struct sock *sk, struct sk_buff *skb); + +#endif /* __NET_LWTUNNEL_H */ diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h new file mode 100644 index 0000000..11150c0 --- /dev/null +++ b/include/uapi/linux/lwtunnel.h @@ -0,0 +1,11 @@ +#ifndef _UAPI_LWTUNNEL_H_ +#define _UAPI_LWTUNNEL_H_ + +#include <linux/types.h> + +enum tunnel_encap_types { + LWTUNNEL_ENCAP_NONE, + LWTUNNEL_ENCAP_MPLS, +}; + +#endif /* _UAPI_LWTUNNEL_H_ */ diff --git a/net/Kconfig b/net/Kconfig index 57a7c5a..e296d6f 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -374,9 +374,14 @@ source "net/caif/Kconfig" source "net/ceph/Kconfig" source "net/nfc/Kconfig" +config LWTUNNEL + bool "Network light weight tunnels" + ---help--- + light weight tunnels endif # if NET # Used by archs to tell that they support BPF_JIT config HAVE_BPF_JIT bool + diff --git a/net/core/Makefile b/net/core/Makefile index fec0856..086b01f 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -23,3 +23,4 @@ obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o +obj-$(CONFIG_LWTUNNEL) += lwtunnel.o diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c new file mode 100644 index 0000000..29c7802 --- /dev/null +++ b/net/core/lwtunnel.c @@ -0,0 +1,162 @@ +/* + * lwtunnel Infrastructure for light weight tunnels like mpls + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/capability.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/in.h> +#include <linux/init.h> +#include <linux/err.h> + +#include <net/lwtunnel.h> +#include <net/rtnetlink.h> + +struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len) +{ + struct lwtunnel_state *lws; + + return kzalloc(sizeof(*lws) + hdr_len, GFP_KERNEL); +} +EXPORT_SYMBOL(lwtunnel_state_alloc); + +const struct lwtunnel_encap_ops __rcu * + lwtun_encaps[MAX_LWTUNNEL_ENCAP_OPS] __read_mostly; + +int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops, + unsigned int num) +{ + if (num >= MAX_LWTUNNEL_ENCAP_OPS) + return -ERANGE; + + return !cmpxchg((const struct lwtunnel_encap_ops **) + &lwtun_encaps[num], + NULL, ops) ? 0 : -1; +} +EXPORT_SYMBOL(lwtunnel_encap_add_ops); + +int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops, + unsigned int num) +{ + int ret; + + if (num >= MAX_LWTUNNEL_ENCAP_OPS) + return -ERANGE; + + ret = (cmpxchg((const struct lwtunnel_encap_ops **) + &lwtun_encaps[num], + ops, NULL) == ops) ? 0 : -1; + + synchronize_net(); + + return ret; +} +EXPORT_SYMBOL(lwtunnel_encap_del_ops); + +int lwtunnel_build_state(struct net_device *dev, u16 encap_type, + struct nlattr *encap, struct lwtunnel_state **lws) +{ + const struct lwtunnel_encap_ops *ops; + int ret = -EINVAL; + + if (encap_type == LWTUNNEL_ENCAP_NONE || + encap_type >= MAX_LWTUNNEL_ENCAP_OPS) + return ret; + + ret = -EOPNOTSUPP; + rcu_read_lock(); + ops = rcu_dereference(lwtun_encaps[encap_type]); + if (likely(ops && ops->build_state)) + ret = ops->build_state(dev, encap, lws); + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL(lwtunnel_build_state); + +int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate) +{ + const struct lwtunnel_encap_ops *ops; + struct nlattr *nest; + int ret = -EINVAL; + + if (lwtstate->type == LWTUNNEL_ENCAP_NONE || + lwtstate->type >= MAX_LWTUNNEL_ENCAP_OPS) + return 0; + + ret = -EOPNOTSUPP; + nest = nla_nest_start(skb, RTA_ENCAP); + rcu_read_lock(); + ops = rcu_dereference(lwtun_encaps[lwtstate->type]); + if (likely(ops && ops->fill_encap)) + ret = ops->fill_encap(skb, lwtstate); + rcu_read_unlock(); + + if (ret) + goto errout; + + nla_nest_end(skb, nest); + + return 0; + +errout: + nla_nest_cancel(skb, nest); + + return ret; +} +EXPORT_SYMBOL(lwtunnel_fill_encap); + +int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate) +{ + const struct lwtunnel_encap_ops *ops; + int ret = 0; + + if (lwtstate->type == LWTUNNEL_ENCAP_NONE || + lwtstate->type >= MAX_LWTUNNEL_ENCAP_OPS) + return 0; + + rcu_read_lock(); + ops = rcu_dereference(lwtun_encaps[lwtstate->type]); + if (likely(ops && ops->get_encap_size)) + ret = nla_total_size(ops->get_encap_size(lwtstate)); + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL(lwtunnel_get_encap_size); + +int lwtunnel_output(struct sock *sk, struct sk_buff *skb) +{ + const struct lwtunnel_encap_ops *ops; + struct lwtunnel_state *lwtstate = lwtunnel_skb_lwstate(skb); + int ret = 0; + + if (!lwtstate) + return -EINVAL; + + if (lwtstate->type == LWTUNNEL_ENCAP_NONE || + lwtstate->type >= MAX_LWTUNNEL_ENCAP_OPS) + return 0; + + rcu_read_lock(); + ops = rcu_dereference(lwtun_encaps[lwtstate->type]); + if (likely(ops && ops->output)) + ret = ops->output(sk, skb); + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL(lwtunnel_output); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html