This patch changes the XDP-BPF implementation to use the generic
XDP infrastructure. This includes corresponding changes to the
Mellanox XDP code.

Signed-off-by: Tom Herbert <t...@herbertland.com>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 64 ++++----------------------
 drivers/net/ethernet/mellanox/mlx4/en_rx.c     | 25 ++++------
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |  1 -
 include/linux/filter.h                         | 13 ------
 net/core/dev.c                                 | 40 +++++++++++++---
 net/core/filter.c                              |  7 +--
 net/core/rtnetlink.c                           | 16 +++----
 7 files changed, 63 insertions(+), 103 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c 
b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 62516f8..47990b7 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2622,39 +2622,15 @@ static int mlx4_en_set_tx_maxrate(struct net_device 
*dev, int queue_index, u32 m
        return err;
 }
 
-static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
+static int mlx4_xdp_make_tx_rings(struct net_device *dev, int xdp_ring_num)
 {
        struct mlx4_en_priv *priv = netdev_priv(dev);
        struct mlx4_en_dev *mdev = priv->mdev;
-       struct bpf_prog *old_prog;
-       int xdp_ring_num;
        int port_up = 0;
        int err;
-       int i;
-
-       xdp_ring_num = prog ? ALIGN(priv->rx_ring_num, MLX4_EN_NUM_UP) : 0;
 
-       /* No need to reconfigure buffers when simply swapping the
-        * program for a new one.
-        */
-       if (priv->xdp_ring_num == xdp_ring_num) {
-               if (prog) {
-                       prog = bpf_prog_add(prog, priv->rx_ring_num - 1);
-                       if (IS_ERR(prog))
-                               return PTR_ERR(prog);
-               }
-               mutex_lock(&mdev->state_lock);
-               for (i = 0; i < priv->rx_ring_num; i++) {
-                       old_prog = rcu_dereference_protected(
-                                       priv->rx_ring[i]->xdp_prog,
-                                       lockdep_is_held(&mdev->state_lock));
-                       rcu_assign_pointer(priv->rx_ring[i]->xdp_prog, prog);
-                       if (old_prog)
-                               bpf_prog_put(old_prog);
-               }
-               mutex_unlock(&mdev->state_lock);
+       if (priv->xdp_ring_num == xdp_ring_num)
                return 0;
-       }
 
        if (priv->num_frags > 1) {
                en_err(priv, "Cannot set XDP if MTU requires multiple frags\n");
@@ -2668,12 +2644,6 @@ static int mlx4_xdp_set(struct net_device *dev, struct 
bpf_prog *prog)
                return -EINVAL;
        }
 
-       if (prog) {
-               prog = bpf_prog_add(prog, priv->rx_ring_num - 1);
-               if (IS_ERR(prog))
-                       return PTR_ERR(prog);
-       }
-
        mutex_lock(&mdev->state_lock);
        if (priv->port_up) {
                port_up = 1;
@@ -2684,15 +2654,6 @@ static int mlx4_xdp_set(struct net_device *dev, struct 
bpf_prog *prog)
        netif_set_real_num_tx_queues(dev, priv->tx_ring_num -
                                                        priv->xdp_ring_num);
 
-       for (i = 0; i < priv->rx_ring_num; i++) {
-               old_prog = rcu_dereference_protected(
-                                       priv->rx_ring[i]->xdp_prog,
-                                       lockdep_is_held(&mdev->state_lock));
-               rcu_assign_pointer(priv->rx_ring[i]->xdp_prog, prog);
-               if (old_prog)
-                       bpf_prog_put(old_prog);
-       }
-
        if (port_up) {
                err = mlx4_en_start_port(dev);
                if (err) {
@@ -2706,23 +2667,18 @@ static int mlx4_xdp_set(struct net_device *dev, struct 
bpf_prog *prog)
        return 0;
 }
 
-static bool mlx4_xdp_attached(struct net_device *dev)
+static int mlx4_xdp(struct net_device *dev, struct netdev_xdp *xdp)
 {
        struct mlx4_en_priv *priv = netdev_priv(dev);
 
-       return !!priv->xdp_ring_num;
-}
-
-static int mlx4_xdp(struct net_device *dev, struct netdev_xdp *xdp)
-{
        switch (xdp->command) {
-       case XDP_SETUP_PROG:
-               return mlx4_xdp_set(dev, xdp->prog);
-       case XDP_QUERY_PROG:
-               xdp->prog_attached = mlx4_xdp_attached(dev);
-               return 0;
+       case XDP_DEV_INIT:
+               return mlx4_xdp_make_tx_rings(dev,
+                   ALIGN(priv->rx_ring_num, MLX4_EN_NUM_UP));
+       case XDP_DEV_FINISH:
+               return mlx4_xdp_make_tx_rings(dev, 0);
        default:
-               return -EINVAL;
+               return 0;
        }
 }
 
@@ -3210,7 +3166,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int 
port,
 
        dev->vlan_features = dev->hw_features;
 
-       dev->hw_features |= NETIF_F_RXCSUM | NETIF_F_RXHASH;
+       dev->hw_features |= NETIF_F_RXCSUM | NETIF_F_RXHASH | NETIF_F_XDP;
        dev->features = dev->hw_features | NETIF_F_HIGHDMA |
                        NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
                        NETIF_F_HW_VLAN_CTAG_FILTER;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c 
b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index c80073e..e06ac63 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -42,6 +42,7 @@
 #include <linux/if_vlan.h>
 #include <linux/vmalloc.h>
 #include <linux/irq.h>
+#include <net/xdp.h>
 
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ip6_checksum.h>
@@ -535,13 +536,7 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
 {
        struct mlx4_en_dev *mdev = priv->mdev;
        struct mlx4_en_rx_ring *ring = *pring;
-       struct bpf_prog *old_prog;
 
-       old_prog = rcu_dereference_protected(
-                                       ring->xdp_prog,
-                                       lockdep_is_held(&mdev->state_lock));
-       if (old_prog)
-               bpf_prog_put(old_prog);
        mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
        vfree(ring->rx_info);
        ring->rx_info = NULL;
@@ -783,7 +778,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct 
mlx4_en_cq *cq, int bud
        struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring];
        struct mlx4_en_rx_alloc *frags;
        struct mlx4_en_rx_desc *rx_desc;
-       struct bpf_prog *xdp_prog;
        int doorbell_pending;
        struct sk_buff *skb;
        int tx_index;
@@ -795,6 +789,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct 
mlx4_en_cq *cq, int bud
        int factor = priv->cqe_factor;
        u64 timestamp;
        bool l2_tunnel;
+       bool run_xdp;
 
        if (!priv->port_up)
                return 0;
@@ -802,9 +797,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct 
mlx4_en_cq *cq, int bud
        if (budget <= 0)
                return polled;
 
-       /* Protect accesses to: ring->xdp_prog, priv->mac_hash list */
+       /* Protect accesses to: XDP hooks, priv->mac_hash list */
        rcu_read_lock();
-       xdp_prog = rcu_dereference(ring->xdp_prog);
+       run_xdp = xdp_hook_run_needed_check(&cq->napi);
        doorbell_pending = 0;
        tx_index = (priv->tx_ring_num - priv->xdp_ring_num) + cq->ring;
 
@@ -880,10 +875,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct 
mlx4_en_cq *cq, int bud
                l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) &&
                        (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL));
 
-               /* A bpf program gets first chance to drop the packet. It may
+               /* An xdp program gets first chance to drop the packet. It may
                 * read bytes but not past the end of the frag.
                 */
-               if (xdp_prog) {
+               if (run_xdp) {
                        struct xdp_buff xdp;
                        dma_addr_t dma;
                        u32 act;
@@ -897,7 +892,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct 
mlx4_en_cq *cq, int bud
                                                        frags[0].page_offset;
                        xdp.data_end = xdp.data + length;
 
-                       act = bpf_prog_run_xdp(xdp_prog, &xdp);
+                       act = xdp_hook_run(&cq->napi, &xdp);
                        switch (act) {
                        case XDP_PASS:
                                break;
@@ -906,14 +901,14 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct 
mlx4_en_cq *cq, int bud
                                                        length, tx_index,
                                                        &doorbell_pending))
                                        goto consumed;
-                               goto next; /* Drop on xmit failure */
-                       default:
-                               bpf_warn_invalid_xdp_action(act);
+                               break;
                        case XDP_ABORTED:
                        case XDP_DROP:
                                if (mlx4_en_rx_recycle(ring, frags))
                                        goto consumed;
                                goto next;
+                       default:
+                               xdp_warn_invalid_action(act);
                        }
                }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h 
b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index a3528dd..56d5950 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -340,7 +340,6 @@ struct mlx4_en_rx_ring {
        u8  fcs_del;
        void *buf;
        void *rx_info;
-       struct bpf_prog __rcu *xdp_prog;
        struct mlx4_en_page_cache page_cache;
        unsigned long bytes;
        unsigned long packets;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 2a26133..f9863ee 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -494,18 +494,6 @@ static inline u32 bpf_prog_run_clear_cb(const struct 
bpf_prog *prog,
        return BPF_PROG_RUN(prog, skb);
 }
 
-static inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
-                                  struct xdp_buff *xdp)
-{
-       u32 ret;
-
-       rcu_read_lock();
-       ret = BPF_PROG_RUN(prog, (void *)xdp);
-       rcu_read_unlock();
-
-       return ret;
-}
-
 static inline unsigned int bpf_prog_size(unsigned int proglen)
 {
        return max(sizeof(struct bpf_prog),
@@ -590,7 +578,6 @@ bool bpf_helper_changes_skb_data(void *func);
 
 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
                                       const struct bpf_insn *patch, u32 len);
-void bpf_warn_invalid_xdp_action(u32 act);
 
 #ifdef CONFIG_BPF_JIT
 extern int bpf_jit_enable;
diff --git a/net/core/dev.c b/net/core/dev.c
index 0d2c826..d35ee4d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -142,6 +142,7 @@
 #include <linux/sctp.h>
 #include <linux/crash_dump.h>
 #include <net/xdp.h>
+#include <linux/filter.h>
 
 #include "net-sysfs.h"
 
@@ -6635,6 +6636,32 @@ int dev_change_proto_down(struct net_device *dev, bool 
proto_down)
 }
 EXPORT_SYMBOL(dev_change_proto_down);
 
+static u32 dev_bpf_prog_run_xdp(const void *priv,
+                               struct xdp_buff *xdp)
+{
+       const struct bpf_prog *prog = (const struct bpf_prog *)priv;
+       u32 ret;
+
+       rcu_read_lock();
+       ret = BPF_PROG_RUN(prog, (void *)xdp);
+       rcu_read_unlock();
+
+       return ret;
+}
+
+static void dev_bpf_prog_put_xdp(const void *priv)
+{
+       bpf_prog_put((struct bpf_prog *)priv);
+}
+
+struct xdp_hook_ops xdp_bpf_hook_ops = {
+       .hook = dev_bpf_prog_run_xdp,
+       .put_priv = dev_bpf_prog_put_xdp,
+       .priority = 0,
+};
+
+static DEFINE_MUTEX(xdp_bpf_lock);
+
 /**
  *     dev_change_xdp_fd - set or clear a bpf program for a device rx path
  *     @dev: device
@@ -6644,22 +6671,23 @@ EXPORT_SYMBOL(dev_change_proto_down);
  */
 int dev_change_xdp_fd(struct net_device *dev, int fd)
 {
-       const struct net_device_ops *ops = dev->netdev_ops;
        struct bpf_prog *prog = NULL;
-       struct netdev_xdp xdp = {};
        int err;
 
-       if (!ops->ndo_xdp)
+       if (!(dev->features & NETIF_F_XDP))
                return -EOPNOTSUPP;
+
        if (fd >= 0) {
                prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
                if (IS_ERR(prog))
                        return PTR_ERR(prog);
        }
 
-       xdp.command = XDP_SETUP_PROG;
-       xdp.prog = prog;
-       err = ops->ndo_xdp(dev, &xdp);
+       mutex_lock(&xdp_bpf_lock); /* Since xdp_bpf_hook_ops is modified */
+       xdp_bpf_hook_ops.priv = prog;
+       err = xdp_change_dev_hook(dev, &xdp_bpf_hook_ops);
+       mutex_unlock(&xdp_bpf_lock);
+
        if (err < 0 && prog)
                bpf_prog_put(prog);
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 298b146..f4a1ea8 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -51,6 +51,7 @@
 #include <net/dst_metadata.h>
 #include <net/dst.h>
 #include <net/sock_reuseport.h>
+#include <net/xdp.h>
 
 /**
  *     sk_filter_trim_cap - run a packet through a socket filter
@@ -2595,12 +2596,6 @@ static bool xdp_is_valid_access(int off, int size,
        return __is_valid_xdp_access(off, size, type);
 }
 
-void bpf_warn_invalid_xdp_action(u32 act)
-{
-       WARN_ONCE(1, "Illegal XDP return value %u, expect packet loss\n", act);
-}
-EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
-
 static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
                                        int src_reg, int ctx_off,
                                        struct bpf_insn *insn_buf,
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 0dbae42..c1aeb71 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -56,6 +56,7 @@
 #include <net/fib_rules.h>
 #include <net/rtnetlink.h>
 #include <net/net_namespace.h>
+#include <net/xdp.h>
 
 struct rtnl_link {
        rtnl_doit_func          doit;
@@ -897,7 +898,7 @@ static size_t rtnl_xdp_size(const struct net_device *dev)
 {
        size_t xdp_size = nla_total_size(1);    /* XDP_ATTACHED */
 
-       if (!dev->netdev_ops->ndo_xdp)
+       if (!(dev->features & NETIF_F_XDP))
                return 0;
        else
                return xdp_size;
@@ -1226,20 +1227,19 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, 
struct net_device *dev)
 
 static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
 {
-       struct netdev_xdp xdp_op = {};
        struct nlattr *xdp;
+       struct xdp_hook_ops ret;
        int err;
 
-       if (!dev->netdev_ops->ndo_xdp)
-               return 0;
        xdp = nla_nest_start(skb, IFLA_XDP);
        if (!xdp)
                return -EMSGSIZE;
-       xdp_op.command = XDP_QUERY_PROG;
-       err = dev->netdev_ops->ndo_xdp(dev, &xdp_op);
-       if (err)
+
+       err = xdp_find_dev_hook(dev, &xdp_bpf_hook_ops, &ret);
+       if (err && err != -ENOENT)
                goto err_cancel;
-       err = nla_put_u8(skb, IFLA_XDP_ATTACHED, xdp_op.prog_attached);
+
+       err = nla_put_u8(skb, IFLA_XDP_ATTACHED, !err);
        if (err)
                goto err_cancel;
 
-- 
2.8.0.rc2

Reply via email to