On Thu, Feb 21, 2019 at 7:06 AM Ye Xiaolong <xiaolong...@intel.com> wrote: > > Hi Magnus > > On 02/19, Magnus Karlsson wrote: > [snip] > >+static int xsk_setup_xdp_prog(struct xsk_socket *xsk) > >+{ > >+ bool prog_attached = false; > >+ __u32 prog_id = 0; > >+ int err; > >+ > >+ err = bpf_get_link_xdp_id(xsk->ifindex, &prog_id, > >+ xsk->config.xdp_flags); > >+ if (err) > >+ return err; > >+ > >+ if (!prog_id) { > >+ prog_attached = true; > >+ err = xsk_create_bpf_maps(xsk); > >+ if (err) > >+ return err; > >+ > >+ err = xsk_load_xdp_prog(xsk); > >+ if (err) > >+ goto out_maps; > >+ } else { > >+ xsk->fd = bpf_prog_get_fd_by_id(prog_id); > > I suppose it should be > > xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id);
My bad, that is correct. Will spin a v6. Thanks: Magnus > >+ } > >+ > >+ err = xsk_update_bpf_maps(xsk, true, xsk->fd); > >+ if (err) > >+ goto out_load; > >+ > >+ return 0; > >+ > >+out_load: > >+ if (prog_attached) > >+ close(xsk->prog_fd); > >+out_maps: > >+ if (prog_attached) > >+ xsk_delete_bpf_maps(xsk); > >+ return err; > >+} > >+ > >+int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, > >+ __u32 queue_id, struct xsk_umem *umem, > >+ struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, > >+ const struct xsk_socket_config *usr_config) > >+{ > >+ struct sockaddr_xdp sxdp = {}; > >+ struct xdp_mmap_offsets off; > >+ struct xsk_socket *xsk; > >+ socklen_t optlen; > >+ void *map; > >+ int err; > >+ > >+ if (!umem || !xsk_ptr || !rx || !tx) > >+ return -EFAULT; > >+ > >+ if (umem->refcount) { > >+ pr_warning("Error: shared umems not supported by libbpf.\n"); > >+ return -EBUSY; > >+ } > >+ > >+ xsk = calloc(1, sizeof(*xsk)); > >+ if (!xsk) > >+ return -ENOMEM; > >+ > >+ if (umem->refcount++ > 0) { > >+ xsk->fd = socket(AF_XDP, SOCK_RAW, 0); > >+ if (xsk->fd < 0) { > >+ err = -errno; > >+ goto out_xsk_alloc; > >+ } > >+ } else { > >+ xsk->fd = umem->fd; > >+ } > >+ > >+ xsk->outstanding_tx = 0; > >+ xsk->queue_id = queue_id; > >+ xsk->umem = umem; > >+ xsk->ifindex = if_nametoindex(ifname); > >+ if (!xsk->ifindex) { > >+ err = -errno; > >+ goto out_socket; > >+ } > >+ strncpy(xsk->ifname, ifname, IFNAMSIZ); > >+ > >+ xsk_set_xdp_socket_config(&xsk->config, usr_config); > >+ > >+ if (rx) { > >+ err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, > >+ &xsk->config.rx_size, > >+ sizeof(xsk->config.rx_size)); > >+ if (err) { > >+ err = -errno; > >+ goto out_socket; > >+ } > >+ } > >+ if (tx) { > >+ err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING, > >+ &xsk->config.tx_size, > >+ sizeof(xsk->config.tx_size)); > >+ if (err) { > >+ err = -errno; > >+ goto out_socket; > >+ } > >+ } > >+ > >+ optlen = sizeof(off); > >+ err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); > >+ if (err) { > >+ err = -errno; > >+ goto out_socket; > >+ } > >+ > >+ if (rx) { > >+ map = xsk_mmap(NULL, off.rx.desc + > >+ xsk->config.rx_size * sizeof(struct xdp_desc), > >+ PROT_READ | PROT_WRITE, > >+ MAP_SHARED | MAP_POPULATE, > >+ xsk->fd, XDP_PGOFF_RX_RING); > >+ if (map == MAP_FAILED) { > >+ err = -errno; > >+ goto out_socket; > >+ } > >+ > >+ rx->mask = xsk->config.rx_size - 1; > >+ rx->size = xsk->config.rx_size; > >+ rx->producer = map + off.rx.producer; > >+ rx->consumer = map + off.rx.consumer; > >+ rx->ring = map + off.rx.desc; > >+ } > >+ xsk->rx = rx; > >+ > >+ if (tx) { > >+ map = xsk_mmap(NULL, off.tx.desc + > >+ xsk->config.tx_size * sizeof(struct xdp_desc), > >+ PROT_READ | PROT_WRITE, > >+ MAP_SHARED | MAP_POPULATE, > >+ xsk->fd, XDP_PGOFF_TX_RING); > >+ if (map == MAP_FAILED) { > >+ err = -errno; > >+ goto out_mmap_rx; > >+ } > >+ > >+ tx->mask = xsk->config.tx_size - 1; > >+ tx->size = xsk->config.tx_size; > >+ tx->producer = map + off.tx.producer; > >+ tx->consumer = map + off.tx.consumer; > >+ tx->ring = map + off.tx.desc; > >+ tx->cached_cons = xsk->config.tx_size; > >+ } > >+ xsk->tx = tx; > >+ > >+ sxdp.sxdp_family = PF_XDP; > >+ sxdp.sxdp_ifindex = xsk->ifindex; > >+ sxdp.sxdp_queue_id = xsk->queue_id; > >+ sxdp.sxdp_flags = xsk->config.bind_flags; > >+ > >+ err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp)); > >+ if (err) { > >+ err = -errno; > >+ goto out_mmap_tx; > >+ } > >+ > >+ if (!(xsk->config.libbpf_flags & > >XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { > >+ err = xsk_setup_xdp_prog(xsk); > >+ if (err) > >+ goto out_mmap_tx; > >+ } > >+ > >+ *xsk_ptr = xsk; > >+ return 0; > >+ > >+out_mmap_tx: > >+ if (tx) > >+ munmap(xsk->tx, > >+ off.tx.desc + > >+ xsk->config.tx_size * sizeof(struct xdp_desc)); > >+out_mmap_rx: > >+ if (rx) > >+ munmap(xsk->rx, > >+ off.rx.desc + > >+ xsk->config.rx_size * sizeof(struct xdp_desc)); > >+out_socket: > >+ if (--umem->refcount) > >+ close(xsk->fd); > >+out_xsk_alloc: > >+ free(xsk); > >+ return err; > >+} > >+ > >+int xsk_umem__delete(struct xsk_umem *umem) > >+{ > >+ struct xdp_mmap_offsets off; > >+ socklen_t optlen; > >+ int err; > >+ > >+ if (!umem) > >+ return 0; > >+ > >+ if (umem->refcount) > >+ return -EBUSY; > >+ > >+ optlen = sizeof(off); > >+ err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); > >+ if (!err) { > >+ munmap(umem->fill->ring, > >+ off.fr.desc + umem->config.fill_size * sizeof(__u64)); > >+ munmap(umem->comp->ring, > >+ off.cr.desc + umem->config.comp_size * sizeof(__u64)); > >+ } > >+ > >+ close(umem->fd); > >+ free(umem); > >+ > >+ return 0; > >+} > >+ > >+void xsk_socket__delete(struct xsk_socket *xsk) > >+{ > >+ struct xdp_mmap_offsets off; > >+ socklen_t optlen; > >+ int err; > >+ > >+ if (!xsk) > >+ return; > >+ > >+ (void)xsk_update_bpf_maps(xsk, 0, 0); > >+ > >+ optlen = sizeof(off); > >+ err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); > >+ if (!err) { > >+ if (xsk->rx) > >+ munmap(xsk->rx->ring, > >+ off.rx.desc + > >+ xsk->config.rx_size * sizeof(struct xdp_desc)); > >+ if (xsk->tx) > >+ munmap(xsk->tx->ring, > >+ off.tx.desc + > >+ xsk->config.tx_size * sizeof(struct xdp_desc)); > >+ } > >+ > >+ xsk->umem->refcount--; > >+ /* Do not close an fd that also has an associated umem connected > >+ * to it. > >+ */ > >+ if (xsk->fd != xsk->umem->fd) > >+ close(xsk->fd); > >+ free(xsk); > >+} > >diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h > >new file mode 100644 > >index 0000000..a497f00 > >--- /dev/null > >+++ b/tools/lib/bpf/xsk.h > >@@ -0,0 +1,203 @@ > >+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ > >+ > >+/* > >+ * AF_XDP user-space access library. > >+ * > >+ * Copyright(c) 2018 - 2019 Intel Corporation. > >+ * > >+ * Author(s): Magnus Karlsson <magnus.karls...@intel.com> > >+ */ > >+ > >+#ifndef __LIBBPF_XSK_H > >+#define __LIBBPF_XSK_H > >+ > >+#include <stdio.h> > >+#include <stdint.h> > >+#include <linux/if_xdp.h> > >+ > >+#include "libbpf.h" > >+ > >+#ifdef __cplusplus > >+extern "C" { > >+#endif > >+ > >+/* Do not access these members directly. Use the functions below. */ > >+#define DEFINE_XSK_RING(name) \ > >+struct name { \ > >+ __u32 cached_prod; \ > >+ __u32 cached_cons; \ > >+ __u32 mask; \ > >+ __u32 size; \ > >+ __u32 *producer; \ > >+ __u32 *consumer; \ > >+ void *ring; \ > >+} > >+ > >+DEFINE_XSK_RING(xsk_ring_prod); > >+DEFINE_XSK_RING(xsk_ring_cons); > >+ > >+struct xsk_umem; > >+struct xsk_socket; > >+ > >+static inline __u64 *xsk_ring_prod__fill_addr(struct xsk_ring_prod *fill, > >+ __u32 idx) > >+{ > >+ __u64 *addrs = (__u64 *)fill->ring; > >+ > >+ return &addrs[idx & fill->mask]; > >+} > >+ > >+static inline const __u64 * > >+xsk_ring_cons__comp_addr(const struct xsk_ring_cons *comp, __u32 idx) > >+{ > >+ const __u64 *addrs = (const __u64 *)comp->ring; > >+ > >+ return &addrs[idx & comp->mask]; > >+} > >+ > >+static inline struct xdp_desc *xsk_ring_prod__tx_desc(struct xsk_ring_prod > >*tx, > >+ __u32 idx) > >+{ > >+ struct xdp_desc *descs = (struct xdp_desc *)tx->ring; > >+ > >+ return &descs[idx & tx->mask]; > >+} > >+ > >+static inline const struct xdp_desc * > >+xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx) > >+{ > >+ const struct xdp_desc *descs = (const struct xdp_desc *)rx->ring; > >+ > >+ return &descs[idx & rx->mask]; > >+} > >+ > >+static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb) > >+{ > >+ __u32 free_entries = r->cached_cons - r->cached_prod; > >+ > >+ if (free_entries >= nb) > >+ return free_entries; > >+ > >+ /* Refresh the local tail pointer. > >+ * cached_cons is r->size bigger than the real consumer pointer so > >+ * that this addition can be avoided in the more frequently > >+ * executed code that computs free_entries in the beginning of > >+ * this function. Without this optimization it whould have been > >+ * free_entries = r->cached_prod - r->cached_cons + r->size. > >+ */ > >+ r->cached_cons = *r->consumer + r->size; > >+ > >+ return r->cached_cons - r->cached_prod; > >+} > >+ > >+static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb) > >+{ > >+ __u32 entries = r->cached_prod - r->cached_cons; > >+ > >+ if (entries == 0) { > >+ r->cached_prod = *r->producer; > >+ entries = r->cached_prod - r->cached_cons; > >+ } > >+ > >+ return (entries > nb) ? nb : entries; > >+} > >+ > >+static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod, > >+ size_t nb, __u32 *idx) > >+{ > >+ if (unlikely(xsk_prod_nb_free(prod, nb) < nb)) > >+ return 0; > >+ > >+ *idx = prod->cached_prod; > >+ prod->cached_prod += nb; > >+ > >+ return nb; > >+} > >+ > >+static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, size_t > >nb) > >+{ > >+ /* Make sure everything has been written to the ring before signalling > >+ * this to the kernel. > >+ */ > >+ smp_wmb(); > >+ > >+ *prod->producer += nb; > >+} > >+ > >+static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons, > >+ size_t nb, __u32 *idx) > >+{ > >+ size_t entries = xsk_cons_nb_avail(cons, nb); > >+ > >+ if (likely(entries > 0)) { > >+ /* Make sure we do not speculatively read the data before > >+ * we have received the packet buffers from the ring. > >+ */ > >+ smp_rmb(); > >+ > >+ *idx = cons->cached_cons; > >+ cons->cached_cons += entries; > >+ } > >+ > >+ return entries; > >+} > >+ > >+static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, > >size_t nb) > >+{ > >+ *cons->consumer += nb; > >+} > >+ > >+static inline void *xsk_umem__get_data(void *umem_area, __u64 addr) > >+{ > >+ return &((char *)umem_area)[addr]; > >+} > >+ > >+LIBBPF_API int xsk_umem__fd(const struct xsk_umem *umem); > >+LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk); > >+ > >+#define XSK_RING_CONS__DEFAULT_NUM_DESCS 2048 > >+#define XSK_RING_PROD__DEFAULT_NUM_DESCS 2048 > >+#define XSK_UMEM__DEFAULT_FRAME_SHIFT 11 /* 2048 bytes */ > >+#define XSK_UMEM__DEFAULT_FRAME_SIZE (1 << > >XSK_UMEM__DEFAULT_FRAME_SHIFT) > >+#define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0 > >+ > >+struct xsk_umem_config { > >+ __u32 fill_size; > >+ __u32 comp_size; > >+ __u32 frame_size; > >+ __u32 frame_headroom; > >+}; > >+ > >+/* Flags for the libbpf_flags field. */ > >+#define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0) > >+ > >+struct xsk_socket_config { > >+ __u32 rx_size; > >+ __u32 tx_size; > >+ __u32 libbpf_flags; > >+ __u32 xdp_flags; > >+ __u16 bind_flags; > >+}; > >+ > >+/* Set config to NULL to get the default configuration. */ > >+LIBBPF_API int xsk_umem__create(struct xsk_umem **umem, > >+ void *umem_area, __u64 size, > >+ struct xsk_ring_prod *fill, > >+ struct xsk_ring_cons *comp, > >+ const struct xsk_umem_config *config); > >+LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk, > >+ const char *ifname, __u32 queue_id, > >+ struct xsk_umem *umem, > >+ struct xsk_ring_cons *rx, > >+ struct xsk_ring_prod *tx, > >+ const struct xsk_socket_config *config); > >+ > >+/* Returns 0 for success and -EBUSY if the umem is still in use. */ > >+LIBBPF_API int xsk_umem__delete(struct xsk_umem *umem); > >+LIBBPF_API void xsk_socket__delete(struct xsk_socket *xsk); > >+ > >+#ifdef __cplusplus > >+} /* extern "C" */ > >+#endif > >+ > >+#endif /* __LIBBPF_XSK_H */ > >-- > >2.7.4 > >