Add support for the BPF_PROG_TYPE_PHYS_DEV hook in mlx4 driver. Since
bpf programs require a skb context to navigate the packet, build a
percpu fake skb with the minimal fields. This avoids the costly
allocation for packets that end up being dropped.

Since mlx4 is so far the only user of bpf_phys_dev_md, the build
function is defined locally.

Signed-off-by: Brenden Blanco <bbla...@plumgrid.com>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 65 ++++++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx4/en_rx.c     | 25 ++++++++--
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |  6 +++
 3 files changed, 92 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c 
b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index b4b258c..b228651 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -31,6 +31,7 @@
  *
  */
 
+#include <linux/bpf.h>
 #include <linux/etherdevice.h>
 #include <linux/tcp.h>
 #include <linux/if_vlan.h>
@@ -1966,6 +1967,9 @@ void mlx4_en_free_resources(struct mlx4_en_priv *priv)
                        mlx4_en_destroy_cq(priv, &priv->rx_cq[i]);
        }
 
+       if (priv->prog)
+               bpf_prog_put(priv->prog);
+
 }
 
 int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
@@ -2078,6 +2082,11 @@ static int mlx4_en_change_mtu(struct net_device *dev, 
int new_mtu)
                en_err(priv, "Bad MTU size:%d.\n", new_mtu);
                return -EPERM;
        }
+       if (priv->prog && MLX4_EN_EFF_MTU(new_mtu) > FRAG_SZ0) {
+               en_err(priv, "MTU size:%d requires frags but bpf prog running",
+                      new_mtu);
+               return -EOPNOTSUPP;
+       }
        dev->mtu = new_mtu;
 
        if (netif_running(dev)) {
@@ -2456,6 +2465,58 @@ static int mlx4_en_set_tx_maxrate(struct net_device 
*dev, int queue_index, u32 m
        return err;
 }
 
+static DEFINE_PER_CPU(struct sk_buff, percpu_bpf_phys_dev_md);
+
+static void build_bpf_phys_dev_md(struct sk_buff *skb, void *data,
+                                 unsigned int length)
+{
+       /* data_len is intentionally not set here so that skb_is_nonlinear()
+        * returns false
+        */
+
+       skb->len = length;
+       skb->head = data;
+       skb->data = data;
+}
+
+int mlx4_call_bpf(struct bpf_prog *prog, void *data, unsigned int length)
+{
+       struct sk_buff *skb = this_cpu_ptr(&percpu_bpf_phys_dev_md);
+       int ret;
+
+       build_bpf_phys_dev_md(skb, data, length);
+
+       rcu_read_lock();
+       ret = BPF_PROG_RUN(prog, (void *)skb);
+       rcu_read_unlock();
+
+       return ret;
+}
+
+static int mlx4_bpf_set(struct net_device *dev, struct bpf_prog *prog)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       struct bpf_prog *old_prog;
+
+       if (priv->num_frags > 1)
+               return -EOPNOTSUPP;
+
+       old_prog = xchg(&priv->prog, prog);
+       if (old_prog) {
+               synchronize_net();
+               bpf_prog_put(old_prog);
+       }
+
+       return 0;
+}
+
+static bool mlx4_bpf_get(struct net_device *dev)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+
+       return !!priv->prog;
+}
+
 static const struct net_device_ops mlx4_netdev_ops = {
        .ndo_open               = mlx4_en_open,
        .ndo_stop               = mlx4_en_close,
@@ -2486,6 +2547,8 @@ static const struct net_device_ops mlx4_netdev_ops = {
        .ndo_features_check     = mlx4_en_features_check,
 #endif
        .ndo_set_tx_maxrate     = mlx4_en_set_tx_maxrate,
+       .ndo_bpf_set            = mlx4_bpf_set,
+       .ndo_bpf_get            = mlx4_bpf_get,
 };
 
 static const struct net_device_ops mlx4_netdev_ops_master = {
@@ -2524,6 +2587,8 @@ static const struct net_device_ops mlx4_netdev_ops_master 
= {
        .ndo_features_check     = mlx4_en_features_check,
 #endif
        .ndo_set_tx_maxrate     = mlx4_en_set_tx_maxrate,
+       .ndo_bpf_set            = mlx4_bpf_set,
+       .ndo_bpf_get            = mlx4_bpf_get,
 };
 
 struct mlx4_en_bond {
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c 
b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 86bcfe5..287da02 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -748,6 +748,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct 
mlx4_en_cq *cq, int bud
        struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring];
        struct mlx4_en_rx_alloc *frags;
        struct mlx4_en_rx_desc *rx_desc;
+       struct bpf_prog *prog;
        struct sk_buff *skb;
        int index;
        int nr;
@@ -764,6 +765,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct 
mlx4_en_cq *cq, int bud
        if (budget <= 0)
                return polled;
 
+       prog = READ_ONCE(priv->prog);
+
        /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
         * descriptor offset can be deduced from the CQE index instead of
         * reading 'cqe->index' */
@@ -840,6 +843,23 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct 
mlx4_en_cq *cq, int bud
                l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) &&
                        (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL));
 
+               /* A bpf program gets first chance to drop the packet. It may
+                * read bytes but not past the end of the frag.
+                */
+               if (prog) {
+                       struct ethhdr *ethh;
+                       dma_addr_t dma;
+
+                       dma = be64_to_cpu(rx_desc->data[0].addr);
+                       dma_sync_single_for_cpu(priv->ddev, dma, sizeof(*ethh),
+                                               DMA_FROM_DEVICE);
+                       ethh = page_address(frags[0].page) +
+                                                       frags[0].page_offset;
+                       if (mlx4_call_bpf(prog, ethh, frags[0].page_size) ==
+                                                       BPF_PHYS_DEV_DROP)
+                               goto next;
+               }
+
                if (likely(dev->features & NETIF_F_RXCSUM)) {
                        if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP |
                                                      MLX4_CQE_STATUS_UDP)) {
@@ -1067,10 +1087,7 @@ static const int frag_sizes[] = {
 void mlx4_en_calc_rx_buf(struct net_device *dev)
 {
        struct mlx4_en_priv *priv = netdev_priv(dev);
-       /* VLAN_HLEN is added twice,to support skb vlan tagged with multiple
-        * headers. (For example: ETH_P_8021Q and ETH_P_8021AD).
-        */
-       int eff_mtu = dev->mtu + ETH_HLEN + (2 * VLAN_HLEN);
+       int eff_mtu = MLX4_EN_EFF_MTU(dev->mtu);
        int buf_size = 0;
        int i = 0;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h 
b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index d12ab6a..40eb32d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -164,6 +164,10 @@ enum {
 #define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETH_HLEN)
 
 #define MLX4_EN_MIN_MTU                46
+/* VLAN_HLEN is added twice,to support skb vlan tagged with multiple
+ * headers. (For example: ETH_P_8021Q and ETH_P_8021AD).
+ */
+#define MLX4_EN_EFF_MTU(mtu)   ((mtu) + ETH_HLEN + (2 * VLAN_HLEN))
 #define ETH_BCAST              0xffffffffffffULL
 
 #define MLX4_EN_LOOPBACK_RETRIES       5
@@ -568,6 +572,7 @@ struct mlx4_en_priv {
        struct hlist_head mac_hash[MLX4_EN_MAC_HASH_SIZE];
        struct hwtstamp_config hwtstamp_config;
        u32 counter_index;
+       struct bpf_prog *prog;
 
 #ifdef CONFIG_MLX4_EN_DCB
        struct ieee_ets ets;
@@ -682,6 +687,7 @@ int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv);
 void mlx4_en_destroy_drop_qp(struct mlx4_en_priv *priv);
 int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring);
 void mlx4_en_rx_irq(struct mlx4_cq *mcq);
+int mlx4_call_bpf(struct bpf_prog *prog, void *data, unsigned int length);
 
 int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 
mode);
 int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, struct mlx4_en_priv *priv);
-- 
2.8.0

Reply via email to