On Wed, 2019-06-05 at 18:21 +0000, Eads, Gage wrote: > Hi Ola, > > Is it possible to add burst enqueue and dequeue functions as well, so we can > plug this into a mempool handler? Proper burst enqueue is difficult or at least not very efficient.
> Besides the mempool handler, I think the all-or-nothing semantics would be > useful for applications. Besides that, this RFC looks good at a high level. > > For a complete submission, a few more changes are needed: > - Builds: Need to add 'lfring' to lib/meson.build and mk/rte.app.mk > - Documentation: need to update release notes, add a new section in the > programmer's guide, and update the doxygen configuration files > - Tests: This patchset should add a set of lfring tests as well > > Code comments follow. Thanks for the review comments, I only had time to look at a few of them. I will return with more complete answers and a new version of the patch. -- Ola > > <snip> > > > diff --git a/lib/Makefile b/lib/Makefile index d6239d2..cd46339 100644 > --- a/lib/Makefile > +++ b/lib/Makefile > @@ -12,6 +12,8 @@ DIRS-$(CONFIG_RTE_LIBRTE_PCI) += librte_pci > DEPDIRS-librte_pci := librte_eal > DIRS-$(CONFIG_RTE_LIBRTE_RING) += librte_ring DEPDIRS-librte_ring := > librte_eal > +DIRS-$(CONFIG_RTE_LIBRTE_RING) += librte_lfring DEPDIRS-librte_lfring > +:= librte_eal > > LIBRTE_RING -> LIBRTE_LFRING > > > DIRS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += librte_mempool DEPDIRS- > librte_mempool := librte_eal librte_ring > DIRS-$(CONFIG_RTE_LIBRTE_MBUF) += librte_mbuf diff --git > a/lib/librte_lfring/Makefile b/lib/librte_lfring/Makefile new file mode 100644 > index 0000000..c04d2e9 > --- /dev/null > +++ b/lib/librte_lfring/Makefile > @@ -0,0 +1,22 @@ > +# SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2010-2014 Intel > +Corporation > > Need to correct the copyright > > > + > +include $(RTE_SDK)/mk/rte.vars.mk > + > +# library name > +LIB = librte_lfring.a > + > +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 LDLIBS += -lrte_eal > + > +EXPORT_MAP := rte_lfring_version.map > + > +LIBABIVER := 1 > + > +# all source are stored in SRCS-y > +SRCS-$(CONFIG_RTE_LIBRTE_RING) := rte_lfring.c > > LIBRTE_RING -> LIBRTE_LFRING > > > + > +# install includes > +SYMLINK-$(CONFIG_RTE_LIBRTE_RING)-include := rte_lfring.h lockfree16.h > > LIBRTE_RING -> LIBRTE_LFRING > > > + > +include $(RTE_SDK)/mk/rte.lib.mk > diff --git a/lib/librte_lfring/lockfree16.h b/lib/librte_lfring/lockfree16.h > new > file mode 100644 index 0000000..199add9 > --- /dev/null > +++ b/lib/librte_lfring/lockfree16.h > > This needs to be refactored to use the rte_atomic128_cmp_exchange interface. > > <snip> > > > diff --git a/lib/librte_lfring/meson.build b/lib/librte_lfring/meson.build new > file mode 100644 index 0000000..c8b90cb > --- /dev/null > +++ b/lib/librte_lfring/meson.build > @@ -0,0 +1,6 @@ > +# SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2017 Intel > +Corporation > + > > Correct copyright > > > +version = 1 > +sources = files('rte_lfring.c') > +headers = files('rte_lfring.h','lockfree16.h') > diff --git a/lib/librte_lfring/rte_lfring.c b/lib/librte_lfring/rte_lfring.c > new file > mode 100644 index 0000000..e130f71 > --- /dev/null > +++ b/lib/librte_lfring/rte_lfring.c > @@ -0,0 +1,264 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * > + * Copyright (c) 2010-2015 Intel Corporation > + * Copyright (c) 2019 Arm Ltd > + * All rights reserved. > + */ > + > +#include <stdio.h> > +#include <stdarg.h> > +#include <string.h> > +#include <stdint.h> > +#include <inttypes.h> > +#include <errno.h> > +#include <sys/queue.h> > + > +#include <rte_common.h> > +#include <rte_log.h> > +#include <rte_memory.h> > +#include <rte_memzone.h> > +#include <rte_malloc.h> > +#include <rte_launch.h> > +#include <rte_eal.h> > +#include <rte_eal_memconfig.h> > +#include <rte_atomic.h> > +#include <rte_per_lcore.h> > +#include <rte_lcore.h> > +#include <rte_branch_prediction.h> > +#include <rte_errno.h> > +#include <rte_string_fns.h> > +#include <rte_spinlock.h> > + > > You can reduce the system and DPDK includes down to: > > #include <inttypes.h> > #include <string.h> > > #include <rte_atomic.h> > #include <rte_eal.h> > #include <rte_eal_memconfig.h> > #include <rte_errno.h> > #include <rte_malloc.h> > #include <rte_memzone.h> > #include <rte_rwlock.h> > #include <rte_tailq.h> > > > +#include "rte_lfring.h" > + > +TAILQ_HEAD(rte_lfring_list, rte_tailq_entry); > + > +static struct rte_tailq_elem rte_lfring_tailq = { > + .name = RTE_TAILQ_LFRING_NAME, > +}; > +EAL_REGISTER_TAILQ(rte_lfring_tailq) > + > +/* true if x is a power of 2 */ > +#define POWEROF2(x) ((((x)-1) & (x)) == 0) > > DPDK provides this functionality with RTE_IS_POWER_OF_2(), in rte_common.h > > > + > +/* return the size of memory occupied by a ring */ ssize_t > +rte_lfring_get_memsize(unsigned int count) { > + ssize_t sz; > + > + /* count must be a power of 2 */ > + if ((!POWEROF2(count)) || (count > 0x80000000U)) { > + RTE_LOG(ERR, RING, > > This library needs to replace the RING logging with its own LFRING logging. > > <snip> > > > +/* create the ring */ > > The comments before function definitions (e.g. "create the ring", "free the > ring") probably aren't necessary, the names are self-explanatory. > > > +struct rte_lfring * > +rte_lfring_create(const char *name, unsigned int count, int socket_id, > + unsigned int flags) > +{ > + char mz_name[RTE_MEMZONE_NAMESIZE]; > + struct rte_lfring *r; > + struct rte_tailq_entry *te; > + const struct rte_memzone *mz; > + ssize_t ring_size; > + int mz_flags = 0; > + struct rte_lfring_list *ring_list = NULL; > + const unsigned int requested_count = count; > + int ret; > + > + ring_list = RTE_TAILQ_CAST(rte_lfring_tailq.head, rte_lfring_list); > + > + count = rte_align32pow2(count); > + > + ring_size = rte_lfring_get_memsize(count); > + if (ring_size < 0) { > + rte_errno = ring_size; > + return NULL; > + } > + > + ret = snprintf(mz_name, sizeof(mz_name), "%s%s", > + RTE_LFRING_MZ_PREFIX, name); > + if (ret < 0 || ret >= (int)sizeof(mz_name)) { > + rte_errno = ENAMETOOLONG; > + return NULL; > + } > + > + te = rte_zmalloc("LFRING_TAILQ_ENTRY", sizeof(*te), 0); > + if (te == NULL) { > + RTE_LOG(ERR, RING, "Cannot reserve memory for tailq\n"); > + rte_errno = ENOMEM; > + return NULL; > + } > + > + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); > + > + /* reserve a memory zone for this ring. If we can't get rte_config or > + * we are secondary process, the memzone_reserve function will set > + * rte_errno for us appropriately - hence no check in this this > + * function > + */ > > The "or we are secondary process" comment is out-of-date; the memzone reserve > functions can be called by a secondary process. (For that matter, the > documentation in rte_memzone.h is out-of-date as well.) This restriction was > removed in commit 916e4f4f4e45. > > <snip> > > > +/* free the ring */ > +void > +rte_lfring_free(struct rte_lfring *r) > +{ > + struct rte_lfring_list *ring_list = NULL; > + struct rte_tailq_entry *te; > + > + if (r == NULL) > + return; > + > + /* > + * Ring was not created with rte_lfring_create, > + * therefore, there is no memzone to free. > + */ > + if (r->memzone == NULL) { > + RTE_LOG(ERR, RING, "Cannot free ring (not created with > rte_lfring_create()"); > + return; > + } > + > + if (rte_memzone_free(r->memzone) != 0) { > + RTE_LOG(ERR, RING, "Cannot free memory\n"); > + return; > + } > > Better to free the ring's memzone after taking it off the list, in the > unlikely event another thread looks it up at the same time and attempts to use > it. > > > + > + ring_list = RTE_TAILQ_CAST(rte_lfring_tailq.head, rte_lfring_list); > + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); > + > + /* find out tailq entry */ > + TAILQ_FOREACH(te, ring_list, next) { > + if (te->data == (void *) r) > + break; > + } > + > + if (te == NULL) { > + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); > + return; > + } > + > + TAILQ_REMOVE(ring_list, te, next); > + > + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); > + > + rte_free(te); > +} > > <snip> > > > +/* search a ring from its name */ > +struct rte_lfring * > +rte_lfring_lookup(const char *name) > +{ > + struct rte_tailq_entry *te; > + struct rte_lfring *r = NULL; > + struct rte_lfring_list *ring_list; > + > + ring_list = RTE_TAILQ_CAST(rte_lfring_tailq.head, rte_lfring_list); > + > + rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK); > + > + TAILQ_FOREACH(te, ring_list, next) { > + r = (struct rte_lfring *) te->data; > + if (strncmp(name, r->name, RTE_LFRING_NAMESIZE) == 0) > > Missing a NULL pointer check before dereferencing 'name' Why shouldn't the program crash if someone passes a NULL pointer parameter? Callers will be internal, external users should be able to control whether NULL is passed instead of a valid pointer. A crash and a core dump is the best way to detect and debug errors. > > > + break; > + } > + > + rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK); > + > + if (te == NULL) { > + rte_errno = ENOENT; > + return NULL; > + } > + > + return r; > +} > diff --git a/lib/librte_lfring/rte_lfring.h b/lib/librte_lfring/rte_lfring.h > new file > mode 100644 index 0000000..22d3e1c > --- /dev/null > +++ b/lib/librte_lfring/rte_lfring.h > @@ -0,0 +1,621 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * > + * Copyright (c) 2010-2017 Intel Corporation > + * Copyright (c) 2019 Arm Ltd > + * All rights reserved. > + */ > + > +#ifndef _RTE_LFRING_H_ > +#define _RTE_LFRING_H_ > + > +/** > + * @file > + * RTE Lfring > + * > + * The Lfring Manager is a fixed-size queue, implemented as a table of > + * (pointers + counters). > + * > + * - FIFO (First In First Out) > + * - Maximum size is fixed; the pointers are stored in a table. > + * - Lock-free implementation. > + * - Multi- or single-consumer dequeue. > + * - Multi- or single-producer enqueue. > + * > + */ > + > +#ifdef __cplusplus > +extern "C" { > +#endif > + > +#include <stdio.h> > +#include <stdint.h> > +#include <sys/queue.h> > +#include <errno.h> > +#include <rte_common.h> > +#include <rte_config.h> > +#include <rte_memory.h> > +#include <rte_lcore.h> > +#include <rte_atomic.h> > +#include <rte_branch_prediction.h> > +#include <rte_memzone.h> > +#include <rte_pause.h> > + > > You should be able to clean up the includes here too, e.g. rte_pause.h doesn't > appear to be used. > > > +#include "lockfree16.h" > + > +#define RTE_TAILQ_LFRING_NAME "RTE_LFRING" > + > +#define RTE_LFRING_MZ_PREFIX "RG_" > +/**< The maximum length of an lfring name. */ #define > > Change "/**<" to "/**", otherwise doxygen applies the comment to > RTE_LFRING_MZ_PREFIX > > > +RTE_LFRING_NAMESIZE (RTE_MEMZONE_NAMESIZE - \ > + sizeof(RTE_LFRING_MZ_PREFIX) + 1) > + > +struct rte_memzone; /* forward declaration, so as not to require > +memzone.h */ > > This forward declaration is no longer necessary > > <snip> > > > +/** > + * Calculate the memory size needed for a lfring > + * > + * This function returns the number of bytes needed for a lfring, given > + * the number of elements in it. This value is the sum of the size of > + * the structure rte_lfring and the size of the memory needed by the > + * objects pointers. The value is aligned to a cache line size. > + * > + * @param count > + * The number of elements in the lfring (must be a power of 2). > + * @return > + * - The memory size needed for the lfring on success. > + * - -EINVAL if count is not a power of 2. > > Missing error case: -EINVAL if size exceeds size limit > > > + */ > +ssize_t rte_lfring_get_memsize(unsigned int count); > + > +/** > + * Initialize a lfring structure. > + * > + * Initialize a lfring structure in memory pointed by "r". The size of > +the > + * memory area must be large enough to store the lfring structure and > +the > + * object table. It is advised to use rte_lfring_get_memsize() to get > +the > + * appropriate size. > + * > + * The lfring size is set to *count*, which must be a power of two. > +Water > + * marking is disabled by default. The real usable lfring size is > + * *count-1* instead of *count* to differentiate a free lfring from an > + * empty lfring. > > "free lfring" -> should say "full lfring"? Same applies to > rte_lfring_create(). > > > + * > + * The lfring is not added in RTE_TAILQ_LFRING global list. Indeed, the > + * memory given by the caller may not be shareable among dpdk > + * processes. > > Here and in rte_lfring_create(), the documentation mentions whether or not the > lfring is added to the RTE_TAILQ_LFRING list. I don't think this makes sense > in the documentation, as it pertains to the implementation and not the > interface. > > > + * > + * @param r > + * The pointer to the lfring structure followed by the objects table. > + * @param name > + * The name of the lfring. > + * @param count > + * The number of elements in the lfring (must be a power of 2). > + * @param flags > + * @return > + * 0 on success, or a negative value on error. > + */ > +int rte_lfring_init(struct rte_lfring *r, const char *name, unsigned int > count, > + unsigned int flags); > + > +/** > + * Create a new lfring named *name* in memory. > + * > + * This function uses ``memzone_reserve()`` to allocate memory. Then it > + * calls rte_lfring_init() to initialize an empty lfring. > + * > + * The new lfring size is set to *count*, which must be a power of > + * two. Water marking is disabled by default. The real usable lfring > +size > + * is *count-1* instead of *count* to differentiate a free lfring from > +an > + * empty lfring. > + * > + * The lfring is added in RTE_TAILQ_LFRING list. > + * > + * @param name > + * The name of the lfring. > + * @param count > + * The size of the lfring (must be a power of 2). > + * @param socket_id > + * The *socket_id* argument is the socket identifier in case of > + * NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA > + * constraint for the reserved zone. > + * @param flags > + * An OR of the following: > + * - LFRING_F_SP_ENQ: If this flag is set, the default behavior when > + * using ``rte_lfring_enqueue()`` or ``rte_lfring_enqueue_bulk()`` > + * is "single-producer". Otherwise, it is "multi-producers". > + * - LFRING_F_SC_DEQ: If this flag is set, the default behavior when > + * using ``rte_lfring_dequeue()`` or ``rte_lfring_dequeue_bulk()`` > + * is "single-consumer". Otherwise, it is "multi-consumers". > + * @return > + * On success, the pointer to the new allocated lfring. NULL on error with > + * rte_errno set appropriately. Possible errno values include: > + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config > structure > + * - E_RTE_SECONDARY - function was called from a secondary process > instance > > E_RTE_SECONDARY and E_RTE_NO_CONFIG are not possible error cases > > <snip> > > > +#define ENQ_RETRY_LIMIT 32 > > Per the coding guidelines ( > https://doc.dpdk.org/guides/contributing/coding_style.html#macros), macros > should be prefixed with RTE_. > > <snip> > > > +/** > + * Return the number of elements which can be stored in the lfring. > + * > + * @param r > + * A pointer to the lfring structure. > + * @return > + * The usable size of the lfring. > + */ > +static inline unsigned int > +rte_lfring_get_capacity(const struct rte_lfring *r) { > + return r->size; > > I believe this should return r->mask, to account for the one unusable ring > entry. I think this is a mistake, all ring entries should be usable. > > <snip> > > > diff --git a/lib/librte_lfring/rte_lfring_version.map > b/lib/librte_lfring/rte_lfring_version.map > new file mode 100644 > index 0000000..d935efd > --- /dev/null > +++ b/lib/librte_lfring/rte_lfring_version.map > @@ -0,0 +1,19 @@ > +DPDK_2.0 { > + global: > + > + rte_ring_create; > + rte_ring_dump; > + rte_ring_get_memsize; > + rte_ring_init; > + rte_ring_list_dump; > + rte_ring_lookup; > > Need to fix function names and DPDK version number > > Thanks, > Gage > > -- Ola Liljedahl, System Architect, Arm