Introduce API to install BPF based filters on ethdev RX/TX path.
Current implementation is pure SW one, based on ethdev RX/TX
callback mechanism.

Signed-off-by: Konstantin Ananyev <konstantin.anan...@intel.com>
---
 lib/librte_bpf/Makefile            |   2 +
 lib/librte_bpf/bpf_pkt.c           | 524 +++++++++++++++++++++++++++++++++++++
 lib/librte_bpf/rte_bpf_ethdev.h    |  50 ++++
 lib/librte_bpf/rte_bpf_version.map |   4 +
 4 files changed, 580 insertions(+)
 create mode 100644 lib/librte_bpf/bpf_pkt.c
 create mode 100644 lib/librte_bpf/rte_bpf_ethdev.h

diff --git a/lib/librte_bpf/Makefile b/lib/librte_bpf/Makefile
index 44b12c439..501c49c60 100644
--- a/lib/librte_bpf/Makefile
+++ b/lib/librte_bpf/Makefile
@@ -22,6 +22,7 @@ LIBABIVER := 1
 SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf.c
 SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_exec.c
 SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_load.c
+SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_pkt.c
 SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_validate.c
 ifeq ($(CONFIG_RTE_ARCH_X86_64),y)
 SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_jit_x86.c
@@ -29,5 +30,6 @@ endif
 
 # install header files
 SYMLINK-$(CONFIG_RTE_LIBRTE_BPF)-include += rte_bpf.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_BPF)-include += rte_bpf_ethdev.h
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_bpf/bpf_pkt.c b/lib/librte_bpf/bpf_pkt.c
new file mode 100644
index 000000000..b0177ad82
--- /dev/null
+++ b/lib/librte_bpf/bpf_pkt.c
@@ -0,0 +1,524 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include <sys/queue.h>
+#include <sys/stat.h>
+
+#include <rte_common.h>
+#include <rte_byteorder.h>
+#include <rte_malloc.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_cycles.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_atomic.h>
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+
+#include <rte_bpf_ethdev.h>
+
+/*
+ * information about all installed BPF rx/tx callbacks
+ */
+
+struct bpf_eth_cbi {
+       uint32_t use;    /*usage counter */
+       void *cb;        /* callback handle */
+       struct rte_bpf *bpf;
+       struct rte_bpf_jit jit;
+} __rte_cache_aligned;
+
+/*
+ * Odd number means that callback is used by datapath.
+ * Even number means that callback is not used by datapath.
+ */
+#define BPF_ETH_CBI_INUSE  1
+
+static struct bpf_eth_cbi rx_cbi[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT];
+static struct bpf_eth_cbi tx_cbi[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT];
+
+/*
+ * Marks given callback as used by datapath.
+ */
+static __rte_always_inline void
+bpf_eth_cbi_inuse(struct bpf_eth_cbi *cbi)
+{
+       cbi->use++;
+       /* make sure no store/load reordering could happen */
+       rte_smp_mb();
+}
+
+/*
+ * Marks given callback list as not used by datapath.
+ */
+static __rte_always_inline void
+bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
+{
+       /* make sure all previous loads are completed */
+       rte_smp_rmb();
+       cbi->use++;
+}
+
+/*
+ * Waits till datapath finished using given callback.
+ */
+static void
+bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
+{
+       uint32_t nuse, puse;
+
+       /* make sure all previous loads and stores are completed */
+       rte_smp_mb();
+
+       puse = cbi->use;
+
+       /* in use, busy wait till current RX/TX iteration is finished */
+       if ((puse & BPF_ETH_CBI_INUSE) != 0) {
+               do {
+                       rte_pause();
+                       rte_compiler_barrier();
+                       nuse = cbi->use;
+               } while (nuse == puse);
+       }
+}
+
+static void
+bpf_eth_cbi_cleanup(struct bpf_eth_cbi *bc)
+{
+       bc->bpf = NULL;
+       memset(&bc->jit, 0, sizeof(bc->jit));
+}
+
+/*
+ * BPF packet processing routinies.
+ */
+
+static inline uint32_t
+apply_filter(struct rte_mbuf *mb[], const uint64_t rc[], uint32_t num,
+       uint32_t drop)
+{
+       uint32_t i, j, k;
+       struct rte_mbuf *dr[num];
+
+       for (i = 0, j = 0, k = 0; i != num; i++) {
+
+               /* filter matches */
+               if (rc[i] != 0)
+                       mb[j++] = mb[i];
+               /* no match */
+               else
+                       dr[k++] = mb[i];
+       }
+
+       if (drop != 0) {
+               /* free filtered out mbufs */
+               for (i = 0; i != k; i++)
+                       rte_pktmbuf_free(dr[i]);
+       } else {
+               /* copy filtered out mbufs beyond good ones */
+               for (i = 0; i != k; i++)
+                       mb[j + i] = dr[i];
+       }
+
+       return j;
+}
+
+static inline uint32_t
+pkt_filter_vm(const struct rte_bpf *bpf, struct rte_mbuf *mb[], uint32_t num,
+       uint32_t drop)
+{
+       uint32_t i;
+       void *dp[num];
+       uint64_t rc[num];
+
+       for (i = 0; i != num; i++)
+               dp[i] = rte_pktmbuf_mtod(mb[i], void *);
+
+       rte_bpf_exec_burst(bpf, dp, rc, num);
+       return apply_filter(mb, rc, num, drop);
+}
+
+static inline uint32_t
+pkt_filter_jit(const struct rte_bpf_jit *jit, struct rte_mbuf *mb[],
+       uint32_t num, uint32_t drop)
+{
+       uint32_t i;
+       void *dp;
+       uint64_t rc[num];
+
+       for (i = 0; i != num; i++) {
+               dp = rte_pktmbuf_mtod(mb[i], void *);
+               rc[i] = (jit->func(dp) != 0);
+       }
+
+       return apply_filter(mb, rc, num, drop);
+}
+
+static inline uint32_t
+pkt_filter_mb_vm(const struct rte_bpf *bpf, struct rte_mbuf *mb[], uint32_t 
num,
+       uint32_t drop)
+{
+       uint64_t rc[num];
+
+       rte_bpf_exec_burst(bpf, (void **)mb, rc, num);
+       return apply_filter(mb, rc, num, drop);
+}
+
+static inline uint32_t
+pkt_filter_mb_jit(const struct rte_bpf_jit *jit, struct rte_mbuf *mb[],
+       uint32_t num, uint32_t drop)
+{
+       uint32_t i;
+       uint64_t rc[num];
+
+       for (i = 0; i != num; i++)
+               rc[i] = (jit->func(mb[i]) != 0);
+
+       return apply_filter(mb, rc, num, drop);
+}
+
+/*
+ * RX/TX callbacks for raw data bpf.
+ */
+
+static uint16_t
+bpf_rx_callback_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+       struct rte_mbuf *pkt[], uint16_t nb_pkts,
+       __rte_unused uint16_t max_pkts, void *user_param)
+{
+       struct bpf_eth_cbi *cbi;
+       uint16_t rc;
+
+       cbi = user_param;
+
+       bpf_eth_cbi_inuse(cbi);
+       rc = (cbi->cb != NULL) ?
+               pkt_filter_vm(cbi->bpf, pkt, nb_pkts, 1) :
+               nb_pkts;
+       bpf_eth_cbi_unuse(cbi);
+       return rc;
+}
+
+static uint16_t
+bpf_rx_callback_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+       struct rte_mbuf *pkt[], uint16_t nb_pkts,
+       __rte_unused uint16_t max_pkts, void *user_param)
+{
+       struct bpf_eth_cbi *cbi;
+       uint16_t rc;
+
+       cbi = user_param;
+       bpf_eth_cbi_inuse(cbi);
+       rc = (cbi->cb != NULL) ?
+               pkt_filter_jit(&cbi->jit, pkt, nb_pkts, 1) :
+               nb_pkts;
+       bpf_eth_cbi_unuse(cbi);
+       return rc;
+}
+
+static uint16_t
+bpf_tx_callback_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+       struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
+{
+       struct bpf_eth_cbi *cbi;
+       uint16_t rc;
+
+       cbi = user_param;
+       bpf_eth_cbi_inuse(cbi);
+       rc = (cbi->cb != NULL) ?
+               pkt_filter_vm(cbi->bpf, pkt, nb_pkts, 0) :
+               nb_pkts;
+       bpf_eth_cbi_unuse(cbi);
+       return rc;
+}
+
+static uint16_t
+bpf_tx_callback_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+       struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
+{
+       struct bpf_eth_cbi *cbi;
+       uint16_t rc;
+
+       cbi = user_param;
+       bpf_eth_cbi_inuse(cbi);
+       rc = (cbi->cb != NULL) ?
+               pkt_filter_jit(&cbi->jit, pkt, nb_pkts, 0) :
+               nb_pkts;
+       bpf_eth_cbi_unuse(cbi);
+       return rc;
+}
+
+/*
+ * RX/TX callbacks for mbuf.
+ */
+
+static uint16_t
+bpf_rx_callback_mb_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+       struct rte_mbuf *pkt[], uint16_t nb_pkts,
+       __rte_unused uint16_t max_pkts, void *user_param)
+{
+       struct bpf_eth_cbi *cbi;
+       uint16_t rc;
+
+       cbi = user_param;
+       bpf_eth_cbi_inuse(cbi);
+       rc = (cbi->cb != NULL) ?
+               pkt_filter_mb_vm(cbi->bpf, pkt, nb_pkts, 1) :
+               nb_pkts;
+       bpf_eth_cbi_unuse(cbi);
+       return rc;
+}
+
+static uint16_t
+bpf_rx_callback_mb_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+       struct rte_mbuf *pkt[], uint16_t nb_pkts,
+       __rte_unused uint16_t max_pkts, void *user_param)
+{
+       struct bpf_eth_cbi *cbi;
+       uint16_t rc;
+
+       cbi = user_param;
+       bpf_eth_cbi_inuse(cbi);
+       rc = (cbi->cb != NULL) ?
+               pkt_filter_mb_jit(&cbi->jit, pkt, nb_pkts, 1) :
+               nb_pkts;
+       bpf_eth_cbi_unuse(cbi);
+       return rc;
+}
+
+static uint16_t
+bpf_tx_callback_mb_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+       struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
+{
+       struct bpf_eth_cbi *cbi;
+       uint16_t rc;
+
+       cbi = user_param;
+       bpf_eth_cbi_inuse(cbi);
+       rc = (cbi->cb != NULL) ?
+               pkt_filter_mb_vm(cbi->bpf, pkt, nb_pkts, 0) :
+               nb_pkts;
+       bpf_eth_cbi_unuse(cbi);
+       return rc;
+}
+
+static uint16_t
+bpf_tx_callback_mb_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+       struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
+{
+       struct bpf_eth_cbi *cbi;
+       uint16_t rc;
+
+       cbi = user_param;
+       bpf_eth_cbi_inuse(cbi);
+       rc = (cbi->cb != NULL) ?
+               pkt_filter_mb_jit(&cbi->jit, pkt, nb_pkts, 0) :
+               nb_pkts;
+       bpf_eth_cbi_unuse(cbi);
+       return rc;
+}
+
+static rte_rx_callback_fn
+select_rx_callback(enum rte_bpf_prog_type ptype, uint32_t flags)
+{
+       if (flags & RTE_BPF_ETH_F_JIT) {
+               if (ptype == RTE_BPF_PROG_TYPE_UNSPEC)
+                       return bpf_rx_callback_jit;
+               else if (ptype == RTE_BPF_PROG_TYPE_MBUF)
+                       return bpf_rx_callback_mb_jit;
+       } else if (ptype == RTE_BPF_PROG_TYPE_UNSPEC)
+               return bpf_rx_callback_vm;
+       else if (ptype == RTE_BPF_PROG_TYPE_MBUF)
+               return bpf_rx_callback_mb_vm;
+
+       return NULL;
+}
+
+static rte_tx_callback_fn
+select_tx_callback(enum rte_bpf_prog_type ptype, uint32_t flags)
+{
+       if (flags & RTE_BPF_ETH_F_JIT) {
+               if (ptype == RTE_BPF_PROG_TYPE_UNSPEC)
+                       return bpf_tx_callback_jit;
+               else if (ptype == RTE_BPF_PROG_TYPE_MBUF)
+                       return bpf_tx_callback_mb_jit;
+       } else if (ptype == RTE_BPF_PROG_TYPE_UNSPEC)
+               return bpf_tx_callback_vm;
+       else if (ptype == RTE_BPF_PROG_TYPE_MBUF)
+               return bpf_tx_callback_mb_vm;
+
+       return NULL;
+}
+
+/*
+ * helper function to perform BPF unload for given port/queue.
+ * have to introduce extra complexity (and slowdown) here,
+ * as right now there is no safe generic way to remove RX/TX callback
+ * while IO is active.
+ * Still don't free memory allocated for callback handle itself,
+ * again right now there is no safe way to do that without stopping RX/TX
+ * on given port/queue first.
+ */
+static void
+bpf_eth_unload(struct bpf_eth_cbi *bc)
+{
+       /* mark this cbi as empty */
+       bc->cb = NULL;
+       rte_smp_mb();
+
+       /* make sure datapath doesn't use bpf anymore, then destroy bpf */
+       bpf_eth_cbi_wait(bc);
+       rte_bpf_destroy(bc->bpf);
+       bpf_eth_cbi_cleanup(bc);
+}
+
+__rte_experimental void
+rte_bpf_eth_rx_unload(uint16_t port, uint16_t queue)
+{
+       struct bpf_eth_cbi *bc;
+       void *cb;
+
+       bc = &rx_cbi[port][queue];
+       cb = bc->cb;
+
+       if (cb == NULL)
+               return;
+
+       rte_eth_remove_rx_callback(port, queue, cb);
+       bpf_eth_unload(bc);
+}
+
+__rte_experimental void
+rte_bpf_eth_tx_unload(uint16_t port, uint16_t queue)
+{
+       struct bpf_eth_cbi *bc;
+       void *cb;
+
+       bc = &tx_cbi[port][queue];
+       cb = bc->cb;
+
+       if (cb == NULL)
+               return;
+
+       rte_eth_remove_tx_callback(port, queue, cb);
+       bpf_eth_unload(bc);
+}
+
+__rte_experimental int
+rte_bpf_eth_rx_elf_load(uint16_t port, uint16_t queue,
+       const struct rte_bpf_prm *prm, const char *fname, const char *sname,
+       uint32_t flags)
+{
+       int32_t rc;
+       struct bpf_eth_cbi *bc;
+       struct rte_bpf *bpf;
+       rte_rx_callback_fn fn;
+
+       if (prm == NULL)
+               return -EINVAL;
+
+       /* remove old one, if any */
+       rte_bpf_eth_rx_unload(port, queue);
+
+       fn = select_rx_callback(prm->prog_type, flags);
+       if (fn == NULL) {
+               RTE_LOG(ERR, USER1, "%s(%u, %u): no callback selected;\n",
+                       __func__, port, queue);
+               return -EINVAL;
+       }
+
+       bpf = rte_bpf_elf_load(prm, fname, sname);
+       if (bpf == NULL)
+               return -rte_errno;
+
+       /* update global callback info */
+       bc = &rx_cbi[port][queue];
+       bc->bpf = bpf;
+       rte_bpf_get_jit(bpf, &bc->jit);
+
+       rc = 0;
+
+       if ((flags & RTE_BPF_ETH_F_JIT) != 0 && bc->jit.func == NULL) {
+               RTE_LOG(ERR, USER1, "%s(%u, %u): no JIT generated;\n",
+                       __func__, port, queue);
+               rc = -EINVAL;
+       } else {
+               bc->cb = rte_eth_add_rx_callback(port, queue, fn, bc);
+               if (bc->cb == NULL)
+                       rc = -rte_errno;
+       }
+
+       if (rc != 0) {
+               rte_bpf_destroy(bpf);
+               bpf_eth_cbi_cleanup(bc);
+       }
+
+       return rc;
+}
+
+__rte_experimental int
+rte_bpf_eth_tx_elf_load(uint16_t port, uint16_t queue,
+       const struct rte_bpf_prm *prm, const char *fname, const char *sname,
+       uint32_t flags)
+{
+       int32_t rc;
+       struct bpf_eth_cbi *bc;
+       struct rte_bpf *bpf;
+       rte_tx_callback_fn fn;
+
+       if (prm == NULL)
+               return -EINVAL;
+
+       /* remove old one, if any */
+       rte_bpf_eth_tx_unload(port, queue);
+
+       fn = select_tx_callback(prm->prog_type, flags);
+       if (fn == NULL) {
+               RTE_LOG(ERR, USER1, "%s(%u, %u): no callback selected;\n",
+                       __func__, port, queue);
+               return -EINVAL;
+       }
+
+       bpf = rte_bpf_elf_load(prm, fname, sname);
+       if (bpf == NULL)
+               return -rte_errno;
+
+       /* update global callback info */
+       bc = &tx_cbi[port][queue];
+       bc->bpf = bpf;
+       rte_bpf_get_jit(bpf, &bc->jit);
+
+       rc = 0;
+
+       if ((flags & RTE_BPF_ETH_F_JIT) != 0 && bc->jit.func == NULL) {
+               RTE_LOG(ERR, USER1, "%s(%u, %u): no JIT generated;\n",
+                       __func__, port, queue);
+               rc = -EINVAL;
+       } else {
+               bc->cb = rte_eth_add_tx_callback(port, queue, fn, bc);
+               if (bc->cb == NULL)
+                       rc = -rte_errno;
+       }
+
+       if (rc != 0) {
+               rte_bpf_destroy(bpf);
+               bpf_eth_cbi_cleanup(bc);
+       }
+
+       return rc;
+}
diff --git a/lib/librte_bpf/rte_bpf_ethdev.h b/lib/librte_bpf/rte_bpf_ethdev.h
new file mode 100644
index 000000000..abc3b8e5f
--- /dev/null
+++ b/lib/librte_bpf/rte_bpf_ethdev.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _RTE_BPF_ETHDEV_H_
+#define _RTE_BPF_ETHDEV_H_
+
+#include <rte_bpf.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum {
+       RTE_BPF_ETH_F_NONE = 0,
+       RTE_BPF_ETH_F_JIT  = 0x1, /*< compile BPF into native ISA */
+};
+
+/*
+ * API to install BPF filter as RX/TX callbacks for eth devices.
+ * Note that right now:
+ * - it is not MT safe, i.e. it is not allowed to do load/unload for the
+ *   same port/queue from different threads in parallel.
+ * - though it allows to do load/unload at runtime
+ *   (while RX/TX is ongoing on given port/queue).
+ * - allows only one BPF program per port/queue,
+ * i.e. new load will replace previously loaded for that port/queue BPF 
program.
+ * Filter behaviour - if BPF program returns zero value for a given packet,
+ * then it will be dropped inside callback and no further processing
+ *   on RX - it will be dropped inside callback and no further processing
+ *   for that packet will happen.
+ *   on TX - packet will remain unsent, and it is responsibility of the user
+ *   to handle such situation (drop, try to send again, etc.).
+ */
+
+void rte_bpf_eth_rx_unload(uint16_t port, uint16_t queue);
+void rte_bpf_eth_tx_unload(uint16_t port, uint16_t queue);
+
+int rte_bpf_eth_rx_elf_load(uint16_t port, uint16_t queue,
+       const struct rte_bpf_prm *prm, const char *fname, const char *sname,
+       uint32_t flags);
+int rte_bpf_eth_tx_elf_load(uint16_t port, uint16_t queue,
+       const struct rte_bpf_prm *prm, const char *fname, const char *sname,
+       uint32_t flags);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BPF_ETHDEV_H_ */
diff --git a/lib/librte_bpf/rte_bpf_version.map 
b/lib/librte_bpf/rte_bpf_version.map
index ff65144df..a203e088e 100644
--- a/lib/librte_bpf/rte_bpf_version.map
+++ b/lib/librte_bpf/rte_bpf_version.map
@@ -3,6 +3,10 @@ EXPERIMENTAL {
 
        rte_bpf_destroy;
        rte_bpf_elf_load;
+       rte_bpf_eth_rx_elf_load;
+       rte_bpf_eth_rx_unload;
+       rte_bpf_eth_tx_elf_load;
+       rte_bpf_eth_tx_unload;
        rte_bpf_exec;
        rte_bpf_exec_burst;
        rte_bpf_get_jit;
-- 
2.13.6

Reply via email to