Hi Magnus On 02/19, Magnus Karlsson wrote: [snip] >+static int xsk_setup_xdp_prog(struct xsk_socket *xsk) >+{ >+ bool prog_attached = false; >+ __u32 prog_id = 0; >+ int err; >+ >+ err = bpf_get_link_xdp_id(xsk->ifindex, &prog_id, >+ xsk->config.xdp_flags); >+ if (err) >+ return err; >+ >+ if (!prog_id) { >+ prog_attached = true; >+ err = xsk_create_bpf_maps(xsk); >+ if (err) >+ return err; >+ >+ err = xsk_load_xdp_prog(xsk); >+ if (err) >+ goto out_maps; >+ } else { >+ xsk->fd = bpf_prog_get_fd_by_id(prog_id);
I suppose it should be xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id); >+ } >+ >+ err = xsk_update_bpf_maps(xsk, true, xsk->fd); >+ if (err) >+ goto out_load; >+ >+ return 0; >+ >+out_load: >+ if (prog_attached) >+ close(xsk->prog_fd); >+out_maps: >+ if (prog_attached) >+ xsk_delete_bpf_maps(xsk); >+ return err; >+} >+ >+int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, >+ __u32 queue_id, struct xsk_umem *umem, >+ struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, >+ const struct xsk_socket_config *usr_config) >+{ >+ struct sockaddr_xdp sxdp = {}; >+ struct xdp_mmap_offsets off; >+ struct xsk_socket *xsk; >+ socklen_t optlen; >+ void *map; >+ int err; >+ >+ if (!umem || !xsk_ptr || !rx || !tx) >+ return -EFAULT; >+ >+ if (umem->refcount) { >+ pr_warning("Error: shared umems not supported by libbpf.\n"); >+ return -EBUSY; >+ } >+ >+ xsk = calloc(1, sizeof(*xsk)); >+ if (!xsk) >+ return -ENOMEM; >+ >+ if (umem->refcount++ > 0) { >+ xsk->fd = socket(AF_XDP, SOCK_RAW, 0); >+ if (xsk->fd < 0) { >+ err = -errno; >+ goto out_xsk_alloc; >+ } >+ } else { >+ xsk->fd = umem->fd; >+ } >+ >+ xsk->outstanding_tx = 0; >+ xsk->queue_id = queue_id; >+ xsk->umem = umem; >+ xsk->ifindex = if_nametoindex(ifname); >+ if (!xsk->ifindex) { >+ err = -errno; >+ goto out_socket; >+ } >+ strncpy(xsk->ifname, ifname, IFNAMSIZ); >+ >+ xsk_set_xdp_socket_config(&xsk->config, usr_config); >+ >+ if (rx) { >+ err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, >+ &xsk->config.rx_size, >+ sizeof(xsk->config.rx_size)); >+ if (err) { >+ err = -errno; >+ goto out_socket; >+ } >+ } >+ if (tx) { >+ err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING, >+ &xsk->config.tx_size, >+ sizeof(xsk->config.tx_size)); >+ if (err) { >+ err = -errno; >+ goto out_socket; >+ } >+ } >+ >+ optlen = sizeof(off); >+ err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); >+ if (err) { >+ err = -errno; >+ goto out_socket; >+ } >+ >+ if (rx) { >+ map = xsk_mmap(NULL, off.rx.desc + >+ xsk->config.rx_size * sizeof(struct xdp_desc), >+ PROT_READ | PROT_WRITE, >+ MAP_SHARED | MAP_POPULATE, >+ xsk->fd, XDP_PGOFF_RX_RING); >+ if (map == MAP_FAILED) { >+ err = -errno; >+ goto out_socket; >+ } >+ >+ rx->mask = xsk->config.rx_size - 1; >+ rx->size = xsk->config.rx_size; >+ rx->producer = map + off.rx.producer; >+ rx->consumer = map + off.rx.consumer; >+ rx->ring = map + off.rx.desc; >+ } >+ xsk->rx = rx; >+ >+ if (tx) { >+ map = xsk_mmap(NULL, off.tx.desc + >+ xsk->config.tx_size * sizeof(struct xdp_desc), >+ PROT_READ | PROT_WRITE, >+ MAP_SHARED | MAP_POPULATE, >+ xsk->fd, XDP_PGOFF_TX_RING); >+ if (map == MAP_FAILED) { >+ err = -errno; >+ goto out_mmap_rx; >+ } >+ >+ tx->mask = xsk->config.tx_size - 1; >+ tx->size = xsk->config.tx_size; >+ tx->producer = map + off.tx.producer; >+ tx->consumer = map + off.tx.consumer; >+ tx->ring = map + off.tx.desc; >+ tx->cached_cons = xsk->config.tx_size; >+ } >+ xsk->tx = tx; >+ >+ sxdp.sxdp_family = PF_XDP; >+ sxdp.sxdp_ifindex = xsk->ifindex; >+ sxdp.sxdp_queue_id = xsk->queue_id; >+ sxdp.sxdp_flags = xsk->config.bind_flags; >+ >+ err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp)); >+ if (err) { >+ err = -errno; >+ goto out_mmap_tx; >+ } >+ >+ if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { >+ err = xsk_setup_xdp_prog(xsk); >+ if (err) >+ goto out_mmap_tx; >+ } >+ >+ *xsk_ptr = xsk; >+ return 0; >+ >+out_mmap_tx: >+ if (tx) >+ munmap(xsk->tx, >+ off.tx.desc + >+ xsk->config.tx_size * sizeof(struct xdp_desc)); >+out_mmap_rx: >+ if (rx) >+ munmap(xsk->rx, >+ off.rx.desc + >+ xsk->config.rx_size * sizeof(struct xdp_desc)); >+out_socket: >+ if (--umem->refcount) >+ close(xsk->fd); >+out_xsk_alloc: >+ free(xsk); >+ return err; >+} >+ >+int xsk_umem__delete(struct xsk_umem *umem) >+{ >+ struct xdp_mmap_offsets off; >+ socklen_t optlen; >+ int err; >+ >+ if (!umem) >+ return 0; >+ >+ if (umem->refcount) >+ return -EBUSY; >+ >+ optlen = sizeof(off); >+ err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); >+ if (!err) { >+ munmap(umem->fill->ring, >+ off.fr.desc + umem->config.fill_size * sizeof(__u64)); >+ munmap(umem->comp->ring, >+ off.cr.desc + umem->config.comp_size * sizeof(__u64)); >+ } >+ >+ close(umem->fd); >+ free(umem); >+ >+ return 0; >+} >+ >+void xsk_socket__delete(struct xsk_socket *xsk) >+{ >+ struct xdp_mmap_offsets off; >+ socklen_t optlen; >+ int err; >+ >+ if (!xsk) >+ return; >+ >+ (void)xsk_update_bpf_maps(xsk, 0, 0); >+ >+ optlen = sizeof(off); >+ err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); >+ if (!err) { >+ if (xsk->rx) >+ munmap(xsk->rx->ring, >+ off.rx.desc + >+ xsk->config.rx_size * sizeof(struct xdp_desc)); >+ if (xsk->tx) >+ munmap(xsk->tx->ring, >+ off.tx.desc + >+ xsk->config.tx_size * sizeof(struct xdp_desc)); >+ } >+ >+ xsk->umem->refcount--; >+ /* Do not close an fd that also has an associated umem connected >+ * to it. >+ */ >+ if (xsk->fd != xsk->umem->fd) >+ close(xsk->fd); >+ free(xsk); >+} >diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h >new file mode 100644 >index 0000000..a497f00 >--- /dev/null >+++ b/tools/lib/bpf/xsk.h >@@ -0,0 +1,203 @@ >+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ >+ >+/* >+ * AF_XDP user-space access library. >+ * >+ * Copyright(c) 2018 - 2019 Intel Corporation. >+ * >+ * Author(s): Magnus Karlsson <magnus.karls...@intel.com> >+ */ >+ >+#ifndef __LIBBPF_XSK_H >+#define __LIBBPF_XSK_H >+ >+#include <stdio.h> >+#include <stdint.h> >+#include <linux/if_xdp.h> >+ >+#include "libbpf.h" >+ >+#ifdef __cplusplus >+extern "C" { >+#endif >+ >+/* Do not access these members directly. Use the functions below. */ >+#define DEFINE_XSK_RING(name) \ >+struct name { \ >+ __u32 cached_prod; \ >+ __u32 cached_cons; \ >+ __u32 mask; \ >+ __u32 size; \ >+ __u32 *producer; \ >+ __u32 *consumer; \ >+ void *ring; \ >+} >+ >+DEFINE_XSK_RING(xsk_ring_prod); >+DEFINE_XSK_RING(xsk_ring_cons); >+ >+struct xsk_umem; >+struct xsk_socket; >+ >+static inline __u64 *xsk_ring_prod__fill_addr(struct xsk_ring_prod *fill, >+ __u32 idx) >+{ >+ __u64 *addrs = (__u64 *)fill->ring; >+ >+ return &addrs[idx & fill->mask]; >+} >+ >+static inline const __u64 * >+xsk_ring_cons__comp_addr(const struct xsk_ring_cons *comp, __u32 idx) >+{ >+ const __u64 *addrs = (const __u64 *)comp->ring; >+ >+ return &addrs[idx & comp->mask]; >+} >+ >+static inline struct xdp_desc *xsk_ring_prod__tx_desc(struct xsk_ring_prod >*tx, >+ __u32 idx) >+{ >+ struct xdp_desc *descs = (struct xdp_desc *)tx->ring; >+ >+ return &descs[idx & tx->mask]; >+} >+ >+static inline const struct xdp_desc * >+xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx) >+{ >+ const struct xdp_desc *descs = (const struct xdp_desc *)rx->ring; >+ >+ return &descs[idx & rx->mask]; >+} >+ >+static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb) >+{ >+ __u32 free_entries = r->cached_cons - r->cached_prod; >+ >+ if (free_entries >= nb) >+ return free_entries; >+ >+ /* Refresh the local tail pointer. >+ * cached_cons is r->size bigger than the real consumer pointer so >+ * that this addition can be avoided in the more frequently >+ * executed code that computs free_entries in the beginning of >+ * this function. Without this optimization it whould have been >+ * free_entries = r->cached_prod - r->cached_cons + r->size. >+ */ >+ r->cached_cons = *r->consumer + r->size; >+ >+ return r->cached_cons - r->cached_prod; >+} >+ >+static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb) >+{ >+ __u32 entries = r->cached_prod - r->cached_cons; >+ >+ if (entries == 0) { >+ r->cached_prod = *r->producer; >+ entries = r->cached_prod - r->cached_cons; >+ } >+ >+ return (entries > nb) ? nb : entries; >+} >+ >+static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod, >+ size_t nb, __u32 *idx) >+{ >+ if (unlikely(xsk_prod_nb_free(prod, nb) < nb)) >+ return 0; >+ >+ *idx = prod->cached_prod; >+ prod->cached_prod += nb; >+ >+ return nb; >+} >+ >+static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, size_t >nb) >+{ >+ /* Make sure everything has been written to the ring before signalling >+ * this to the kernel. >+ */ >+ smp_wmb(); >+ >+ *prod->producer += nb; >+} >+ >+static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons, >+ size_t nb, __u32 *idx) >+{ >+ size_t entries = xsk_cons_nb_avail(cons, nb); >+ >+ if (likely(entries > 0)) { >+ /* Make sure we do not speculatively read the data before >+ * we have received the packet buffers from the ring. >+ */ >+ smp_rmb(); >+ >+ *idx = cons->cached_cons; >+ cons->cached_cons += entries; >+ } >+ >+ return entries; >+} >+ >+static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, size_t >nb) >+{ >+ *cons->consumer += nb; >+} >+ >+static inline void *xsk_umem__get_data(void *umem_area, __u64 addr) >+{ >+ return &((char *)umem_area)[addr]; >+} >+ >+LIBBPF_API int xsk_umem__fd(const struct xsk_umem *umem); >+LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk); >+ >+#define XSK_RING_CONS__DEFAULT_NUM_DESCS 2048 >+#define XSK_RING_PROD__DEFAULT_NUM_DESCS 2048 >+#define XSK_UMEM__DEFAULT_FRAME_SHIFT 11 /* 2048 bytes */ >+#define XSK_UMEM__DEFAULT_FRAME_SIZE (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT) >+#define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0 >+ >+struct xsk_umem_config { >+ __u32 fill_size; >+ __u32 comp_size; >+ __u32 frame_size; >+ __u32 frame_headroom; >+}; >+ >+/* Flags for the libbpf_flags field. */ >+#define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0) >+ >+struct xsk_socket_config { >+ __u32 rx_size; >+ __u32 tx_size; >+ __u32 libbpf_flags; >+ __u32 xdp_flags; >+ __u16 bind_flags; >+}; >+ >+/* Set config to NULL to get the default configuration. */ >+LIBBPF_API int xsk_umem__create(struct xsk_umem **umem, >+ void *umem_area, __u64 size, >+ struct xsk_ring_prod *fill, >+ struct xsk_ring_cons *comp, >+ const struct xsk_umem_config *config); >+LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk, >+ const char *ifname, __u32 queue_id, >+ struct xsk_umem *umem, >+ struct xsk_ring_cons *rx, >+ struct xsk_ring_prod *tx, >+ const struct xsk_socket_config *config); >+ >+/* Returns 0 for success and -EBUSY if the umem is still in use. */ >+LIBBPF_API int xsk_umem__delete(struct xsk_umem *umem); >+LIBBPF_API void xsk_socket__delete(struct xsk_socket *xsk); >+ >+#ifdef __cplusplus >+} /* extern "C" */ >+#endif >+ >+#endif /* __LIBBPF_XSK_H */ >-- >2.7.4 >