From: Björn Töpel <bjorn.to...@intel.com>

In this commit we add support for XDP programs to redirect frames to a
bound AF_XDP socket.

Signed-off-by: Björn Töpel <bjorn.to...@intel.com>
---
 include/linux/filter.h |  2 +-
 include/net/xdp_sock.h | 28 ++++++++++++++++++++
 net/core/dev.c         | 28 +++++++++++---------
 net/core/filter.c      | 72 ++++++++++++++++++++++++++++++++++++++++++++++----
 4 files changed, 111 insertions(+), 19 deletions(-)
 create mode 100644 include/net/xdp_sock.h

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 276932d75975..43cacfe2cc2a 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -747,7 +747,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog 
*prog, u32 off,
  * This does not appear to be a real limitation for existing software.
  */
 int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
-                           struct bpf_prog *prog);
+                           struct xdp_buff *xdp, struct bpf_prog *prog);
 int xdp_do_redirect(struct net_device *dev,
                    struct xdp_buff *xdp,
                    struct bpf_prog *prog);
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
new file mode 100644
index 000000000000..132489fe0e70
--- /dev/null
+++ b/include/net/xdp_sock.h
@@ -0,0 +1,28 @@
+#ifndef _LINUX_AF_XDP_SOCK_H
+#define _LINUX_AF_XDP_SOCK_H
+
+struct xdp_sock;
+struct xdp_buff;
+
+#ifdef CONFIG_XDP_SOCKETS
+int xsk_generic_rcv(struct xdp_buff *xdp);
+struct xdp_sock *xsk_rcv(struct xdp_sock *xsk, struct xdp_buff *xdp);
+void xsk_flush(struct xdp_sock *xsk);
+#else
+static inline int xsk_generic_rcv(struct xdp_buff *xdp)
+{
+       return -ENOTSUPP;
+}
+
+static inline struct xdp_sock *xsk_rcv(struct xdp_sock *xsk,
+                                      struct xdp_buff *xdp)
+{
+       return ERR_PTR(-ENOTSUPP);
+}
+
+static inline void xsk_flush(struct xdp_sock *xsk)
+{
+}
+#endif /* CONFIG_XDP_SOCKETS */
+
+#endif /* _LINUX_AF_XDP_SOCK_H */
diff --git a/net/core/dev.c b/net/core/dev.c
index dda9d7b9a840..94d2950fc33d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3951,11 +3951,11 @@ static struct netdev_rx_queue *netif_get_rxqueue(struct 
sk_buff *skb)
 }
 
 static u32 netif_receive_generic_xdp(struct sk_buff *skb,
+                                    struct xdp_buff *xdp,
                                     struct bpf_prog *xdp_prog)
 {
        struct netdev_rx_queue *rxqueue;
        u32 metalen, act = XDP_DROP;
-       struct xdp_buff xdp;
        void *orig_data;
        int hlen, off;
        u32 mac_len;
@@ -3991,18 +3991,18 @@ static u32 netif_receive_generic_xdp(struct sk_buff 
*skb,
         */
        mac_len = skb->data - skb_mac_header(skb);
        hlen = skb_headlen(skb) + mac_len;
-       xdp.data = skb->data - mac_len;
-       xdp.data_meta = xdp.data;
-       xdp.data_end = xdp.data + hlen;
-       xdp.data_hard_start = skb->data - skb_headroom(skb);
-       orig_data = xdp.data;
+       xdp->data = skb->data - mac_len;
+       xdp->data_meta = xdp->data;
+       xdp->data_end = xdp->data + hlen;
+       xdp->data_hard_start = skb->data - skb_headroom(skb);
+       orig_data = xdp->data;
 
        rxqueue = netif_get_rxqueue(skb);
-       xdp.rxq = &rxqueue->xdp_rxq;
+       xdp->rxq = &rxqueue->xdp_rxq;
 
-       act = bpf_prog_run_xdp(xdp_prog, &xdp);
+       act = bpf_prog_run_xdp(xdp_prog, xdp);
 
-       off = xdp.data - orig_data;
+       off = xdp->data - orig_data;
        if (off > 0)
                __skb_pull(skb, off);
        else if (off < 0)
@@ -4015,7 +4015,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
                __skb_push(skb, mac_len);
                break;
        case XDP_PASS:
-               metalen = xdp.data - xdp.data_meta;
+               metalen = xdp->data - xdp->data_meta;
                if (metalen)
                        skb_metadata_set(skb, metalen);
                break;
@@ -4065,17 +4065,19 @@ static struct static_key generic_xdp_needed 
__read_mostly;
 int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
 {
        if (xdp_prog) {
-               u32 act = netif_receive_generic_xdp(skb, xdp_prog);
+               struct xdp_buff xdp;
+               u32 act;
                int err;
 
+               act = netif_receive_generic_xdp(skb, &xdp, xdp_prog);
                if (act != XDP_PASS) {
                        switch (act) {
                        case XDP_REDIRECT:
                                err = xdp_do_generic_redirect(skb->dev, skb,
-                                                             xdp_prog);
+                                                             &xdp, xdp_prog);
                                if (err)
                                        goto out_redir;
-                       /* fallthru to submit skb */
+                               break;
                        case XDP_TX:
                                generic_xdp_tx(skb, xdp_prog);
                                break;
diff --git a/net/core/filter.c b/net/core/filter.c
index aedf57489cb5..eab47173bc9e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -57,6 +57,7 @@
 #include <net/busy_poll.h>
 #include <net/tcp.h>
 #include <linux/bpf_trace.h>
+#include <net/xdp_sock.h>
 
 /**
  *     sk_filter_trim_cap - run a packet through a socket filter
@@ -1809,8 +1810,8 @@ struct redirect_info {
        struct bpf_map *map;
        struct bpf_map *map_to_flush;
        unsigned long   map_owner;
-       bool to_xsk;
-       /* XXX cache xsk socket here, to avoid lookup? */
+       bool xsk;
+       struct xdp_sock *xsk_to_flush;
 };
 
 static DEFINE_PER_CPU(struct redirect_info, redirect_info);
@@ -2575,6 +2576,7 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, 
void *fwd,
 void xdp_do_flush_map(void)
 {
        struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+       struct xdp_sock *xsk = ri->xsk_to_flush;
        struct bpf_map *map = ri->map_to_flush;
 
        ri->map_to_flush = NULL;
@@ -2590,6 +2592,10 @@ void xdp_do_flush_map(void)
                        break;
                }
        }
+
+       ri->xsk_to_flush = NULL;
+       if (xsk)
+               xsk_flush(xsk);
 }
 EXPORT_SYMBOL_GPL(xdp_do_flush_map);
 
@@ -2611,6 +2617,29 @@ static inline bool xdp_map_invalid(const struct bpf_prog 
*xdp_prog,
        return (unsigned long)xdp_prog->aux != aux;
 }
 
+static int xdp_do_xsk_redirect(struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
+{
+       struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+       struct xdp_sock *xsk;
+
+       ri->ifindex = 0;
+       ri->map = NULL;
+       ri->map_owner = 0;
+       ri->xsk = false;
+
+       xsk = xsk_rcv(ri->xsk_to_flush, xdp);
+       if (IS_ERR(xsk)) {
+               _trace_xdp_redirect_err(xdp->rxq->dev, xdp_prog, -1,
+                                       PTR_ERR(xsk));
+               return PTR_ERR(xsk);
+       }
+
+       ri->xsk_to_flush = xsk;
+       _trace_xdp_redirect(xdp->rxq->dev, xdp_prog, -1);
+
+       return 0;
+}
+
 static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
                               struct bpf_prog *xdp_prog)
 {
@@ -2624,6 +2653,7 @@ static int xdp_do_redirect_map(struct net_device *dev, 
struct xdp_buff *xdp,
        ri->ifindex = 0;
        ri->map = NULL;
        ri->map_owner = 0;
+       ri->xsk = false;
 
        if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
                err = -EFAULT;
@@ -2659,6 +2689,9 @@ int xdp_do_redirect(struct net_device *dev, struct 
xdp_buff *xdp,
        u32 index = ri->ifindex;
        int err;
 
+       if (ri->xsk)
+               return xdp_do_xsk_redirect(xdp, xdp_prog);
+
        if (ri->map)
                return xdp_do_redirect_map(dev, xdp, xdp_prog);
 
@@ -2681,6 +2714,30 @@ int xdp_do_redirect(struct net_device *dev, struct 
xdp_buff *xdp,
 }
 EXPORT_SYMBOL_GPL(xdp_do_redirect);
 
+static int xdp_do_generic_xsk_redirect(struct sk_buff *skb,
+                                      struct xdp_buff *xdp,
+                                      struct bpf_prog *xdp_prog)
+{
+       struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+       int err;
+
+       ri->ifindex = 0;
+       ri->map = NULL;
+       ri->map_owner = 0;
+       ri->xsk = false;
+
+       err = xsk_generic_rcv(xdp);
+       if (err) {
+               _trace_xdp_redirect_err(xdp->rxq->dev, xdp_prog, -1, err);
+               return err;
+       }
+
+       consume_skb(skb);
+       _trace_xdp_redirect(xdp->rxq->dev, xdp_prog, -1);  /* XXX fix tracing 
to support xsk */
+
+       return 0;
+}
+
 static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device 
*fwd)
 {
        unsigned int len;
@@ -2709,7 +2766,7 @@ static int xdp_do_generic_redirect_map(struct net_device 
*dev,
        ri->ifindex = 0;
        ri->map = NULL;
        ri->map_owner = 0;
-       ri->to_xsk = false;
+       ri->xsk = false;
 
        if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
                err = -EFAULT;
@@ -2733,6 +2790,7 @@ static int xdp_do_generic_redirect_map(struct net_device 
*dev,
        }
 
        _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
+       generic_xdp_tx(skb, xdp_prog);
        return 0;
 err:
        _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
@@ -2740,13 +2798,16 @@ static int xdp_do_generic_redirect_map(struct 
net_device *dev,
 }
 
 int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
-                           struct bpf_prog *xdp_prog)
+                           struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
 {
        struct redirect_info *ri = this_cpu_ptr(&redirect_info);
        u32 index = ri->ifindex;
        struct net_device *fwd;
        int err = 0;
 
+       if (ri->xsk)
+               return xdp_do_generic_xsk_redirect(skb, xdp, xdp_prog);
+
        if (ri->map)
                return xdp_do_generic_redirect_map(dev, skb, xdp_prog);
 
@@ -2762,6 +2823,7 @@ int xdp_do_generic_redirect(struct net_device *dev, 
struct sk_buff *skb,
 
        skb->dev = fwd;
        _trace_xdp_redirect(dev, xdp_prog, index);
+       generic_xdp_tx(skb, xdp_prog);
        return 0;
 err:
        _trace_xdp_redirect_err(dev, xdp_prog, index, err);
@@ -2828,7 +2890,7 @@ BPF_CALL_0(bpf_xdpsk_redirect)
         * and XDP_ABORTED on failure? Also, then we can populate xsk
         * in ri, and don't have to do the lookup multiple times.
         */
-       ri->to_xsk = true;
+       ri->xsk = true;
 
        return XDP_REDIRECT;
 }
-- 
2.14.1

Reply via email to