From: Björn Töpel <bjorn.to...@intel.com> In this commit we add support for XDP programs to redirect frames to a bound AF_XDP socket.
Signed-off-by: Björn Töpel <bjorn.to...@intel.com> --- include/linux/filter.h | 2 +- include/net/xdp_sock.h | 28 ++++++++++++++++++++ net/core/dev.c | 28 +++++++++++--------- net/core/filter.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++---- 4 files changed, 111 insertions(+), 19 deletions(-) create mode 100644 include/net/xdp_sock.h diff --git a/include/linux/filter.h b/include/linux/filter.h index 276932d75975..43cacfe2cc2a 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -747,7 +747,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, * This does not appear to be a real limitation for existing software. */ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, - struct bpf_prog *prog); + struct xdp_buff *xdp, struct bpf_prog *prog); int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, struct bpf_prog *prog); diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h new file mode 100644 index 000000000000..132489fe0e70 --- /dev/null +++ b/include/net/xdp_sock.h @@ -0,0 +1,28 @@ +#ifndef _LINUX_AF_XDP_SOCK_H +#define _LINUX_AF_XDP_SOCK_H + +struct xdp_sock; +struct xdp_buff; + +#ifdef CONFIG_XDP_SOCKETS +int xsk_generic_rcv(struct xdp_buff *xdp); +struct xdp_sock *xsk_rcv(struct xdp_sock *xsk, struct xdp_buff *xdp); +void xsk_flush(struct xdp_sock *xsk); +#else +static inline int xsk_generic_rcv(struct xdp_buff *xdp) +{ + return -ENOTSUPP; +} + +static inline struct xdp_sock *xsk_rcv(struct xdp_sock *xsk, + struct xdp_buff *xdp) +{ + return ERR_PTR(-ENOTSUPP); +} + +static inline void xsk_flush(struct xdp_sock *xsk) +{ +} +#endif /* CONFIG_XDP_SOCKETS */ + +#endif /* _LINUX_AF_XDP_SOCK_H */ diff --git a/net/core/dev.c b/net/core/dev.c index dda9d7b9a840..94d2950fc33d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3951,11 +3951,11 @@ static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb) } static u32 netif_receive_generic_xdp(struct sk_buff *skb, + struct xdp_buff *xdp, struct bpf_prog *xdp_prog) { struct netdev_rx_queue *rxqueue; u32 metalen, act = XDP_DROP; - struct xdp_buff xdp; void *orig_data; int hlen, off; u32 mac_len; @@ -3991,18 +3991,18 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, */ mac_len = skb->data - skb_mac_header(skb); hlen = skb_headlen(skb) + mac_len; - xdp.data = skb->data - mac_len; - xdp.data_meta = xdp.data; - xdp.data_end = xdp.data + hlen; - xdp.data_hard_start = skb->data - skb_headroom(skb); - orig_data = xdp.data; + xdp->data = skb->data - mac_len; + xdp->data_meta = xdp->data; + xdp->data_end = xdp->data + hlen; + xdp->data_hard_start = skb->data - skb_headroom(skb); + orig_data = xdp->data; rxqueue = netif_get_rxqueue(skb); - xdp.rxq = &rxqueue->xdp_rxq; + xdp->rxq = &rxqueue->xdp_rxq; - act = bpf_prog_run_xdp(xdp_prog, &xdp); + act = bpf_prog_run_xdp(xdp_prog, xdp); - off = xdp.data - orig_data; + off = xdp->data - orig_data; if (off > 0) __skb_pull(skb, off); else if (off < 0) @@ -4015,7 +4015,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, __skb_push(skb, mac_len); break; case XDP_PASS: - metalen = xdp.data - xdp.data_meta; + metalen = xdp->data - xdp->data_meta; if (metalen) skb_metadata_set(skb, metalen); break; @@ -4065,17 +4065,19 @@ static struct static_key generic_xdp_needed __read_mostly; int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb) { if (xdp_prog) { - u32 act = netif_receive_generic_xdp(skb, xdp_prog); + struct xdp_buff xdp; + u32 act; int err; + act = netif_receive_generic_xdp(skb, &xdp, xdp_prog); if (act != XDP_PASS) { switch (act) { case XDP_REDIRECT: err = xdp_do_generic_redirect(skb->dev, skb, - xdp_prog); + &xdp, xdp_prog); if (err) goto out_redir; - /* fallthru to submit skb */ + break; case XDP_TX: generic_xdp_tx(skb, xdp_prog); break; diff --git a/net/core/filter.c b/net/core/filter.c index aedf57489cb5..eab47173bc9e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -57,6 +57,7 @@ #include <net/busy_poll.h> #include <net/tcp.h> #include <linux/bpf_trace.h> +#include <net/xdp_sock.h> /** * sk_filter_trim_cap - run a packet through a socket filter @@ -1809,8 +1810,8 @@ struct redirect_info { struct bpf_map *map; struct bpf_map *map_to_flush; unsigned long map_owner; - bool to_xsk; - /* XXX cache xsk socket here, to avoid lookup? */ + bool xsk; + struct xdp_sock *xsk_to_flush; }; static DEFINE_PER_CPU(struct redirect_info, redirect_info); @@ -2575,6 +2576,7 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, void xdp_do_flush_map(void) { struct redirect_info *ri = this_cpu_ptr(&redirect_info); + struct xdp_sock *xsk = ri->xsk_to_flush; struct bpf_map *map = ri->map_to_flush; ri->map_to_flush = NULL; @@ -2590,6 +2592,10 @@ void xdp_do_flush_map(void) break; } } + + ri->xsk_to_flush = NULL; + if (xsk) + xsk_flush(xsk); } EXPORT_SYMBOL_GPL(xdp_do_flush_map); @@ -2611,6 +2617,29 @@ static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog, return (unsigned long)xdp_prog->aux != aux; } +static int xdp_do_xsk_redirect(struct xdp_buff *xdp, struct bpf_prog *xdp_prog) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + struct xdp_sock *xsk; + + ri->ifindex = 0; + ri->map = NULL; + ri->map_owner = 0; + ri->xsk = false; + + xsk = xsk_rcv(ri->xsk_to_flush, xdp); + if (IS_ERR(xsk)) { + _trace_xdp_redirect_err(xdp->rxq->dev, xdp_prog, -1, + PTR_ERR(xsk)); + return PTR_ERR(xsk); + } + + ri->xsk_to_flush = xsk; + _trace_xdp_redirect(xdp->rxq->dev, xdp_prog, -1); + + return 0; +} + static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, struct bpf_prog *xdp_prog) { @@ -2624,6 +2653,7 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, ri->ifindex = 0; ri->map = NULL; ri->map_owner = 0; + ri->xsk = false; if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) { err = -EFAULT; @@ -2659,6 +2689,9 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, u32 index = ri->ifindex; int err; + if (ri->xsk) + return xdp_do_xsk_redirect(xdp, xdp_prog); + if (ri->map) return xdp_do_redirect_map(dev, xdp, xdp_prog); @@ -2681,6 +2714,30 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, } EXPORT_SYMBOL_GPL(xdp_do_redirect); +static int xdp_do_generic_xsk_redirect(struct sk_buff *skb, + struct xdp_buff *xdp, + struct bpf_prog *xdp_prog) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + int err; + + ri->ifindex = 0; + ri->map = NULL; + ri->map_owner = 0; + ri->xsk = false; + + err = xsk_generic_rcv(xdp); + if (err) { + _trace_xdp_redirect_err(xdp->rxq->dev, xdp_prog, -1, err); + return err; + } + + consume_skb(skb); + _trace_xdp_redirect(xdp->rxq->dev, xdp_prog, -1); /* XXX fix tracing to support xsk */ + + return 0; +} + static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd) { unsigned int len; @@ -2709,7 +2766,7 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, ri->ifindex = 0; ri->map = NULL; ri->map_owner = 0; - ri->to_xsk = false; + ri->xsk = false; if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) { err = -EFAULT; @@ -2733,6 +2790,7 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, } _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index); + generic_xdp_tx(skb, xdp_prog); return 0; err: _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err); @@ -2740,13 +2798,16 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, } int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, - struct bpf_prog *xdp_prog) + struct xdp_buff *xdp, struct bpf_prog *xdp_prog) { struct redirect_info *ri = this_cpu_ptr(&redirect_info); u32 index = ri->ifindex; struct net_device *fwd; int err = 0; + if (ri->xsk) + return xdp_do_generic_xsk_redirect(skb, xdp, xdp_prog); + if (ri->map) return xdp_do_generic_redirect_map(dev, skb, xdp_prog); @@ -2762,6 +2823,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, skb->dev = fwd; _trace_xdp_redirect(dev, xdp_prog, index); + generic_xdp_tx(skb, xdp_prog); return 0; err: _trace_xdp_redirect_err(dev, xdp_prog, index, err); @@ -2828,7 +2890,7 @@ BPF_CALL_0(bpf_xdpsk_redirect) * and XDP_ABORTED on failure? Also, then we can populate xsk * in ri, and don't have to do the lookup multiple times. */ - ri->to_xsk = true; + ri->xsk = true; return XDP_REDIRECT; } -- 2.14.1