Hi Magnus

On 02/19, Magnus Karlsson wrote:
[snip]
>+static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
>+{
>+      bool prog_attached = false;
>+      __u32 prog_id = 0;
>+      int err;
>+
>+      err = bpf_get_link_xdp_id(xsk->ifindex, &prog_id,
>+                                xsk->config.xdp_flags);
>+      if (err)
>+              return err;
>+
>+      if (!prog_id) {
>+              prog_attached = true;
>+              err = xsk_create_bpf_maps(xsk);
>+              if (err)
>+                      return err;
>+
>+              err = xsk_load_xdp_prog(xsk);
>+              if (err)
>+                      goto out_maps;
>+      } else {
>+              xsk->fd = bpf_prog_get_fd_by_id(prog_id);

I suppose it should be 

                xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id);

>+      }
>+
>+      err = xsk_update_bpf_maps(xsk, true, xsk->fd);
>+      if (err)
>+              goto out_load;
>+
>+      return 0;
>+
>+out_load:
>+      if (prog_attached)
>+              close(xsk->prog_fd);
>+out_maps:
>+      if (prog_attached)
>+              xsk_delete_bpf_maps(xsk);
>+      return err;
>+}
>+
>+int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
>+                     __u32 queue_id, struct xsk_umem *umem,
>+                     struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
>+                     const struct xsk_socket_config *usr_config)
>+{
>+      struct sockaddr_xdp sxdp = {};
>+      struct xdp_mmap_offsets off;
>+      struct xsk_socket *xsk;
>+      socklen_t optlen;
>+      void *map;
>+      int err;
>+
>+      if (!umem || !xsk_ptr || !rx || !tx)
>+              return -EFAULT;
>+
>+      if (umem->refcount) {
>+              pr_warning("Error: shared umems not supported by libbpf.\n");
>+              return -EBUSY;
>+      }
>+
>+      xsk = calloc(1, sizeof(*xsk));
>+      if (!xsk)
>+              return -ENOMEM;
>+
>+      if (umem->refcount++ > 0) {
>+              xsk->fd = socket(AF_XDP, SOCK_RAW, 0);
>+              if (xsk->fd < 0) {
>+                      err = -errno;
>+                      goto out_xsk_alloc;
>+              }
>+      } else {
>+              xsk->fd = umem->fd;
>+      }
>+
>+      xsk->outstanding_tx = 0;
>+      xsk->queue_id = queue_id;
>+      xsk->umem = umem;
>+      xsk->ifindex = if_nametoindex(ifname);
>+      if (!xsk->ifindex) {
>+              err = -errno;
>+              goto out_socket;
>+      }
>+      strncpy(xsk->ifname, ifname, IFNAMSIZ);
>+
>+      xsk_set_xdp_socket_config(&xsk->config, usr_config);
>+
>+      if (rx) {
>+              err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
>+                               &xsk->config.rx_size,
>+                               sizeof(xsk->config.rx_size));
>+              if (err) {
>+                      err = -errno;
>+                      goto out_socket;
>+              }
>+      }
>+      if (tx) {
>+              err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING,
>+                               &xsk->config.tx_size,
>+                               sizeof(xsk->config.tx_size));
>+              if (err) {
>+                      err = -errno;
>+                      goto out_socket;
>+              }
>+      }
>+
>+      optlen = sizeof(off);
>+      err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
>+      if (err) {
>+              err = -errno;
>+              goto out_socket;
>+      }
>+
>+      if (rx) {
>+              map = xsk_mmap(NULL, off.rx.desc +
>+                             xsk->config.rx_size * sizeof(struct xdp_desc),
>+                             PROT_READ | PROT_WRITE,
>+                             MAP_SHARED | MAP_POPULATE,
>+                             xsk->fd, XDP_PGOFF_RX_RING);
>+              if (map == MAP_FAILED) {
>+                      err = -errno;
>+                      goto out_socket;
>+              }
>+
>+              rx->mask = xsk->config.rx_size - 1;
>+              rx->size = xsk->config.rx_size;
>+              rx->producer = map + off.rx.producer;
>+              rx->consumer = map + off.rx.consumer;
>+              rx->ring = map + off.rx.desc;
>+      }
>+      xsk->rx = rx;
>+
>+      if (tx) {
>+              map = xsk_mmap(NULL, off.tx.desc +
>+                             xsk->config.tx_size * sizeof(struct xdp_desc),
>+                             PROT_READ | PROT_WRITE,
>+                             MAP_SHARED | MAP_POPULATE,
>+                             xsk->fd, XDP_PGOFF_TX_RING);
>+              if (map == MAP_FAILED) {
>+                      err = -errno;
>+                      goto out_mmap_rx;
>+              }
>+
>+              tx->mask = xsk->config.tx_size - 1;
>+              tx->size = xsk->config.tx_size;
>+              tx->producer = map + off.tx.producer;
>+              tx->consumer = map + off.tx.consumer;
>+              tx->ring = map + off.tx.desc;
>+              tx->cached_cons = xsk->config.tx_size;
>+      }
>+      xsk->tx = tx;
>+
>+      sxdp.sxdp_family = PF_XDP;
>+      sxdp.sxdp_ifindex = xsk->ifindex;
>+      sxdp.sxdp_queue_id = xsk->queue_id;
>+      sxdp.sxdp_flags = xsk->config.bind_flags;
>+
>+      err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp));
>+      if (err) {
>+              err = -errno;
>+              goto out_mmap_tx;
>+      }
>+
>+      if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
>+              err = xsk_setup_xdp_prog(xsk);
>+              if (err)
>+                      goto out_mmap_tx;
>+      }
>+
>+      *xsk_ptr = xsk;
>+      return 0;
>+
>+out_mmap_tx:
>+      if (tx)
>+              munmap(xsk->tx,
>+                     off.tx.desc +
>+                     xsk->config.tx_size * sizeof(struct xdp_desc));
>+out_mmap_rx:
>+      if (rx)
>+              munmap(xsk->rx,
>+                     off.rx.desc +
>+                     xsk->config.rx_size * sizeof(struct xdp_desc));
>+out_socket:
>+      if (--umem->refcount)
>+              close(xsk->fd);
>+out_xsk_alloc:
>+      free(xsk);
>+      return err;
>+}
>+
>+int xsk_umem__delete(struct xsk_umem *umem)
>+{
>+      struct xdp_mmap_offsets off;
>+      socklen_t optlen;
>+      int err;
>+
>+      if (!umem)
>+              return 0;
>+
>+      if (umem->refcount)
>+              return -EBUSY;
>+
>+      optlen = sizeof(off);
>+      err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
>+      if (!err) {
>+              munmap(umem->fill->ring,
>+                     off.fr.desc + umem->config.fill_size * sizeof(__u64));
>+              munmap(umem->comp->ring,
>+                     off.cr.desc + umem->config.comp_size * sizeof(__u64));
>+      }
>+
>+      close(umem->fd);
>+      free(umem);
>+
>+      return 0;
>+}
>+
>+void xsk_socket__delete(struct xsk_socket *xsk)
>+{
>+      struct xdp_mmap_offsets off;
>+      socklen_t optlen;
>+      int err;
>+
>+      if (!xsk)
>+              return;
>+
>+      (void)xsk_update_bpf_maps(xsk, 0, 0);
>+
>+      optlen = sizeof(off);
>+      err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
>+      if (!err) {
>+              if (xsk->rx)
>+                      munmap(xsk->rx->ring,
>+                             off.rx.desc +
>+                             xsk->config.rx_size * sizeof(struct xdp_desc));
>+              if (xsk->tx)
>+                      munmap(xsk->tx->ring,
>+                             off.tx.desc +
>+                             xsk->config.tx_size * sizeof(struct xdp_desc));
>+      }
>+
>+      xsk->umem->refcount--;
>+      /* Do not close an fd that also has an associated umem connected
>+       * to it.
>+       */
>+      if (xsk->fd != xsk->umem->fd)
>+              close(xsk->fd);
>+      free(xsk);
>+}
>diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
>new file mode 100644
>index 0000000..a497f00
>--- /dev/null
>+++ b/tools/lib/bpf/xsk.h
>@@ -0,0 +1,203 @@
>+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
>+
>+/*
>+ * AF_XDP user-space access library.
>+ *
>+ * Copyright(c) 2018 - 2019 Intel Corporation.
>+ *
>+ * Author(s): Magnus Karlsson <magnus.karls...@intel.com>
>+ */
>+
>+#ifndef __LIBBPF_XSK_H
>+#define __LIBBPF_XSK_H
>+
>+#include <stdio.h>
>+#include <stdint.h>
>+#include <linux/if_xdp.h>
>+
>+#include "libbpf.h"
>+
>+#ifdef __cplusplus
>+extern "C" {
>+#endif
>+
>+/* Do not access these members directly. Use the functions below. */
>+#define DEFINE_XSK_RING(name) \
>+struct name { \
>+      __u32 cached_prod; \
>+      __u32 cached_cons; \
>+      __u32 mask; \
>+      __u32 size; \
>+      __u32 *producer; \
>+      __u32 *consumer; \
>+      void *ring; \
>+}
>+
>+DEFINE_XSK_RING(xsk_ring_prod);
>+DEFINE_XSK_RING(xsk_ring_cons);
>+
>+struct xsk_umem;
>+struct xsk_socket;
>+
>+static inline __u64 *xsk_ring_prod__fill_addr(struct xsk_ring_prod *fill,
>+                                            __u32 idx)
>+{
>+      __u64 *addrs = (__u64 *)fill->ring;
>+
>+      return &addrs[idx & fill->mask];
>+}
>+
>+static inline const __u64 *
>+xsk_ring_cons__comp_addr(const struct xsk_ring_cons *comp, __u32 idx)
>+{
>+      const __u64 *addrs = (const __u64 *)comp->ring;
>+
>+      return &addrs[idx & comp->mask];
>+}
>+
>+static inline struct xdp_desc *xsk_ring_prod__tx_desc(struct xsk_ring_prod 
>*tx,
>+                                                    __u32 idx)
>+{
>+      struct xdp_desc *descs = (struct xdp_desc *)tx->ring;
>+
>+      return &descs[idx & tx->mask];
>+}
>+
>+static inline const struct xdp_desc *
>+xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx)
>+{
>+      const struct xdp_desc *descs = (const struct xdp_desc *)rx->ring;
>+
>+      return &descs[idx & rx->mask];
>+}
>+
>+static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
>+{
>+      __u32 free_entries = r->cached_cons - r->cached_prod;
>+
>+      if (free_entries >= nb)
>+              return free_entries;
>+
>+      /* Refresh the local tail pointer.
>+       * cached_cons is r->size bigger than the real consumer pointer so
>+       * that this addition can be avoided in the more frequently
>+       * executed code that computs free_entries in the beginning of
>+       * this function. Without this optimization it whould have been
>+       * free_entries = r->cached_prod - r->cached_cons + r->size.
>+       */
>+      r->cached_cons = *r->consumer + r->size;
>+
>+      return r->cached_cons - r->cached_prod;
>+}
>+
>+static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb)
>+{
>+      __u32 entries = r->cached_prod - r->cached_cons;
>+
>+      if (entries == 0) {
>+              r->cached_prod = *r->producer;
>+              entries = r->cached_prod - r->cached_cons;
>+      }
>+
>+      return (entries > nb) ? nb : entries;
>+}
>+
>+static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod,
>+                                          size_t nb, __u32 *idx)
>+{
>+      if (unlikely(xsk_prod_nb_free(prod, nb) < nb))
>+              return 0;
>+
>+      *idx = prod->cached_prod;
>+      prod->cached_prod += nb;
>+
>+      return nb;
>+}
>+
>+static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, size_t 
>nb)
>+{
>+      /* Make sure everything has been written to the ring before signalling
>+       * this to the kernel.
>+       */
>+      smp_wmb();
>+
>+      *prod->producer += nb;
>+}
>+
>+static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons,
>+                                       size_t nb, __u32 *idx)
>+{
>+      size_t entries = xsk_cons_nb_avail(cons, nb);
>+
>+      if (likely(entries > 0)) {
>+              /* Make sure we do not speculatively read the data before
>+               * we have received the packet buffers from the ring.
>+               */
>+              smp_rmb();
>+
>+              *idx = cons->cached_cons;
>+              cons->cached_cons += entries;
>+      }
>+
>+      return entries;
>+}
>+
>+static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, size_t 
>nb)
>+{
>+      *cons->consumer += nb;
>+}
>+
>+static inline void *xsk_umem__get_data(void *umem_area, __u64 addr)
>+{
>+      return &((char *)umem_area)[addr];
>+}
>+
>+LIBBPF_API int xsk_umem__fd(const struct xsk_umem *umem);
>+LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk);
>+
>+#define XSK_RING_CONS__DEFAULT_NUM_DESCS      2048
>+#define XSK_RING_PROD__DEFAULT_NUM_DESCS      2048
>+#define XSK_UMEM__DEFAULT_FRAME_SHIFT    11 /* 2048 bytes */
>+#define XSK_UMEM__DEFAULT_FRAME_SIZE     (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT)
>+#define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0
>+
>+struct xsk_umem_config {
>+      __u32 fill_size;
>+      __u32 comp_size;
>+      __u32 frame_size;
>+      __u32 frame_headroom;
>+};
>+
>+/* Flags for the libbpf_flags field. */
>+#define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0)
>+
>+struct xsk_socket_config {
>+      __u32 rx_size;
>+      __u32 tx_size;
>+      __u32 libbpf_flags;
>+      __u32 xdp_flags;
>+      __u16 bind_flags;
>+};
>+
>+/* Set config to NULL to get the default configuration. */
>+LIBBPF_API int xsk_umem__create(struct xsk_umem **umem,
>+                              void *umem_area, __u64 size,
>+                              struct xsk_ring_prod *fill,
>+                              struct xsk_ring_cons *comp,
>+                              const struct xsk_umem_config *config);
>+LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk,
>+                                const char *ifname, __u32 queue_id,
>+                                struct xsk_umem *umem,
>+                                struct xsk_ring_cons *rx,
>+                                struct xsk_ring_prod *tx,
>+                                const struct xsk_socket_config *config);
>+
>+/* Returns 0 for success and -EBUSY if the umem is still in use. */
>+LIBBPF_API int xsk_umem__delete(struct xsk_umem *umem);
>+LIBBPF_API void xsk_socket__delete(struct xsk_socket *xsk);
>+
>+#ifdef __cplusplus
>+} /* extern "C" */
>+#endif
>+
>+#endif /* __LIBBPF_XSK_H */
>-- 
>2.7.4
>

Reply via email to