-----Original Message----- > Date: Fri, 7 Jul 2017 17:41:01 +0100 > From: Harry van Haaren <harry.van.haa...@intel.com> > To: dev@dpdk.org > CC: tho...@monjalon.net, jerin.ja...@caviumnetworks.com, > keith.wi...@intel.com, bruce.richard...@intel.com, Harry van Haaren > <harry.van.haa...@intel.com> > Subject: [PATCH v4 1/7] service cores: header and implementation > X-Mailer: git-send-email 2.7.4 > > Add header files, update .map files with new service > functions, and add the service header to the doxygen > for building. > > This service header API allows DPDK to use services as > a concept of something that requires CPU cycles. An example > is a PMD that runs in software to schedule events, where a > hardware version exists that does not require a CPU. > > The code presented here is based on an initial RFC: > http://dpdk.org/ml/archives/dev/2017-May/065207.html > This was then reworked, and RFC v2 with the changes posted: > http://dpdk.org/ml/archives/dev/2017-June/067194.html > > This is the fourth iteration of the service core concept, > with 2 RFCs and this being v2 of the implementation.
Remove above info from the git commit. > > Signed-off-by: Harry van Haaren <harry.van.haa...@intel.com> > > --- > doc/api/doxy-api-index.md | 1 + > lib/librte_eal/bsdapp/eal/Makefile | 1 + > lib/librte_eal/bsdapp/eal/rte_eal_version.map | 22 + > lib/librte_eal/common/Makefile | 1 + > lib/librte_eal/common/eal_common_lcore.c | 1 + > lib/librte_eal/common/include/rte_eal.h | 4 + > lib/librte_eal/common/include/rte_lcore.h | 3 +- > lib/librte_eal/common/include/rte_service.h | 383 ++++++++++++ > .../common/include/rte_service_private.h | 140 +++++ > lib/librte_eal/common/rte_service.c | 687 > +++++++++++++++++++++ > lib/librte_eal/linuxapp/eal/Makefile | 1 + > lib/librte_eal/linuxapp/eal/eal_thread.c | 9 +- > lib/librte_eal/linuxapp/eal/rte_eal_version.map | 22 + > 13 files changed, 1273 insertions(+), 2 deletions(-) > create mode 100644 lib/librte_eal/common/include/rte_service.h > create mode 100644 lib/librte_eal/common/include/rte_service_private.h > create mode 100644 lib/librte_eal/common/rte_service.c > > diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md > index 3b83288..e2abdf4 100644 > --- a/doc/api/doxy-api-index.md > +++ b/doc/api/doxy-api-index.md > @@ -159,6 +159,7 @@ There are many libraries, so their headers may be grouped > by topics: > [common] (@ref rte_common.h), > [ABI compat] (@ref rte_compat.h), > [keepalive] (@ref rte_keepalive.h), > + [service cores] (@ref rte_service.h), > [device metrics] (@ref rte_metrics.h), > [bitrate statistics] (@ref rte_bitrate.h), > [latency statistics] (@ref rte_latencystats.h), Fix the below mentioned documentation warning. +/export/dpdk.org/lib/librte_eal/common/include/rte_service.h:338: warning: argument 'enabled' of command @param is not found in the argument list of rte_service_set_stats_enable(int enable) +/export/dpdk.org/lib/librte_eal/common/include/rte_service.h:346: warning: The following parameters of rte_service_set_stats_enable(int enable) are not documented: + parameter 'enable' +/export/dpdk.org/lib/librte_eal/common/include/rte_service.h:349: warning: argument 'The' of command @param is not found in the argument list of rte_service_lcore_list(uint32_t array[], uint32_t n) +/export/dpdk.org/lib/librte_eal/common/include/rte_service.h:367: warning: The following parameters of rte_service_lcore_list(uint32_t array[], uint32_t n) are not documented: + parameter 'n' command to reproduce: ./devtools/test-build.sh -j8 x86_64-native-linuxapp-gcc+shared x86_64-native-linuxapp-gcc+debug > } DPDK_17.08; > diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile > index f2fe052..942f03c 100644 > --- a/lib/librte_eal/common/Makefile > +++ b/lib/librte_eal/common/Makefile > @@ -41,6 +41,7 @@ INC += rte_eal_memconfig.h rte_malloc_heap.h > INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h rte_vdev.h > INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h > INC += rte_malloc.h rte_keepalive.h rte_time.h > +INC += rte_service.h rte_service_private.h IMO, We don't need to expose rte_service_private.h to application. If you agree, add the following or similar change diff --git a/drivers/event/sw/Makefile b/drivers/event/sw/Makefile index 857a87cc5..442652e93 100644 --- a/drivers/event/sw/Makefile +++ b/drivers/event/sw/Makefile @@ -36,6 +36,8 @@ LIB = librte_pmd_sw_event.a # build flags CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include/ + # for older GCC versions, allow us to initialize an event using # designated initializers. ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile index 942f03cdd..ea6c1f8f1 100644 --- a/lib/librte_eal/common/Makefile +++ b/lib/librte_eal/common/Makefile @@ -41,7 +41,7 @@ INC += rte_eal_memconfig.h rte_malloc_heap.h INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h rte_vdev.h INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h INC += rte_malloc.h rte_keepalive.h rte_time.h -INC += rte_service.h rte_service_private.h +INC += rte_service.h diff --git a/test/test/Makefile b/test/test/Makefile index 42d9a49e2..b603b6563 100644 --- a/test/test/Makefile +++ b/test/test/Makefile @@ -152,6 +152,7 @@ SRCS-y += test_version.c SRCS-y += test_func_reentrancy.c SRCS-y += test_service_cores.c +CFLAGS_test_service_cores.o += -I$(RTE_SDK)/lib/librte_eal/common/include/ SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += test_cmdline.c SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += test_cmdline_num. > > GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h > GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h rte_rwlock.h > diff --git a/lib/librte_eal/common/eal_common_lcore.c > b/lib/librte_eal/common/eal_common_lcore.c > index 84fa0cb..0db1555 100644 > --- a/lib/librte_eal/common/eal_common_lcore.c > +++ b/lib/librte_eal/common/eal_common_lcore.c > @@ -81,6 +81,7 @@ rte_eal_cpu_init(void) > > /* By default, each detected core is enabled */ > config->lcore_role[lcore_id] = ROLE_RTE; > + lcore_config[lcore_id].core_role = ROLE_RTE; > lcore_config[lcore_id].core_id = eal_cpu_core_id(lcore_id); > lcore_config[lcore_id].socket_id = eal_cpu_socket_id(lcore_id); > if (lcore_config[lcore_id].socket_id >= RTE_MAX_NUMA_NODES) { > diff --git a/lib/librte_eal/common/include/rte_eal.h > b/lib/librte_eal/common/include/rte_eal.h > index abf020b..4dd0518 100644 > --- a/lib/librte_eal/common/include/rte_eal.h > +++ b/lib/librte_eal/common/include/rte_eal.h > @@ -61,6 +61,7 @@ extern "C" { > enum rte_lcore_role_t { > ROLE_RTE, > ROLE_OFF, > + ROLE_SERVICE, > }; > > + > +#define SERVICE_F_REGISTERED 0 > + > +/* runstates for services and lcores, denoting if they are active or not */ > +#define RUNSTATE_STOPPED 0 > +#define RUNSTATE_RUNNING 1 > + > +/* internal representation of a service */ > +struct rte_service_spec_impl { > + /* public part of the struct */ > + struct rte_service_spec spec; > + > + /* atomic lock that when set indicates a service core is currently > + * running this service callback. When not set, a core may take the > + * lock and then run the service callback. > + */ > + rte_atomic32_t execute_lock; > + > + /* API set/get-able variables */ > + int32_t runstate; > + uint8_t internal_flags; > + > + /* per service statistics */ > + uint32_t num_mapped_cores; > + uint64_t calls; > + uint64_t cycles_spent; > +} __rte_cache_aligned; > + > +/* the internal values of a service core */ > +struct core_state { Change to lcore_state. > + /* map of services IDs are run on this core */ > + uint64_t service_mask; > + uint8_t runstate; /* running or stopped */ > + uint8_t is_service_core; /* set if core is currently a service core */ > + uint8_t collect_statistics; /* if set, measure cycle counts */ > + > + /* extreme statistics */ > + uint64_t calls_per_service[RTE_SERVICE_NUM_MAX]; > +} __rte_cache_aligned; > + > +static uint32_t rte_service_count; > +static struct rte_service_spec_impl *rte_services; > +static struct core_state *cores_state; > +static uint32_t rte_service_library_initialized; > + > +int32_t rte_service_init(void) > +{ > + if (rte_service_library_initialized) { > + printf("service library init() called, init flag %d\n", > + rte_service_library_initialized); > + return -EALREADY; > + } > + > + rte_services = rte_calloc("rte_services", RTE_SERVICE_NUM_MAX, > + sizeof(struct rte_service_spec_impl), > + RTE_CACHE_LINE_SIZE); > + if (!rte_services) { > + printf("error allocating rte services array\n"); > + return -ENOMEM; > + } > + > + cores_state = rte_calloc("rte_service_core_states", RTE_MAX_LCORE, > + sizeof(struct core_state), RTE_CACHE_LINE_SIZE); > + if (!cores_state) { > + printf("error allocating core states array\n"); > + return -ENOMEM; > + } > + > + int i; > + int count = 0; > + struct rte_config *cfg = rte_eal_get_configuration(); > + for (i = 0; i < RTE_MAX_LCORE; i++) { > + if (lcore_config[i].core_role == ROLE_SERVICE) { > + if ((unsigned int)i == cfg->master_lcore) > + continue; > + rte_service_lcore_add(i); > + count++; > + } > + } > + > + rte_service_library_initialized = 1; > + return 0; > +} > + > +void rte_service_set_stats_enable(int enabled) IMO, It should be per service i.e rte_service_set_stats_enable(const struct rte_service_spec *spec, int enable) > +{ > + uint32_t i; > + for (i = 0; i < RTE_MAX_LCORE; i++) > + cores_state[i].collect_statistics = enabled; > +} > + > +/* returns 1 if service is registered and has not been unregistered > + * Returns 0 if service never registered, or has been unregistered > + */ > +static inline int > +service_valid(uint32_t id) { > + return !!(rte_services[id].internal_flags & > + (1 << SERVICE_F_REGISTERED)); > +} > + > +uint32_t > +rte_service_get_count(void) > +{ > + return rte_service_count; > +} > + > +struct rte_service_spec * > +rte_service_get_by_id(uint32_t id) > +{ > + struct rte_service_spec *service = NULL; > + if (id < rte_service_count) > + service = (struct rte_service_spec *)&rte_services[id]; > + > + return service; > +} > + > +struct rte_service_spec *rte_service_get_by_name(const char *name) > +{ > + struct rte_service_spec *service = NULL; > + int i; > + for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { > + if (service_valid(i) && > + strcmp(name, rte_services[i].spec.name) == 0) { > + service = (struct rte_service_spec *)&rte_services[i]; > + break; > + } > + } > + > + return service; > +} > + > +const char * > +rte_service_get_name(const struct rte_service_spec *service) > +{ > + return service->name; > +} > + > +int32_t > +rte_service_probe_capability(const struct rte_service_spec *service, > + uint32_t capability) > +{ > + return service->capabilities & capability; > +} > + > +int32_t > +rte_service_is_running(const struct rte_service_spec *spec) > +{ > + const struct rte_service_spec_impl *impl = > + (const struct rte_service_spec_impl *)spec; > + if (!impl) > + return -EINVAL; > + > + return (impl->runstate == RUNSTATE_RUNNING) && > + (impl->num_mapped_cores > 0); > +} > + > +int32_t > +rte_service_register(const struct rte_service_spec *spec) > +{ > + uint32_t i; > + int32_t free_slot = -1; > + > + if (spec->callback == NULL || strlen(spec->name) == 0) > + return -EINVAL; > + > + for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { > + if (!service_valid(i)) { > + free_slot = i; > + break; > + } > + } > + > + if ((free_slot < 0) || (i == RTE_SERVICE_NUM_MAX)) > + return -ENOSPC; > + > + struct rte_service_spec_impl *s = &rte_services[free_slot]; > + s->spec = *spec; > + s->internal_flags |= (1 << SERVICE_F_REGISTERED); > + > + rte_smp_wmb(); > + rte_service_count++; > + > + return 0; > +} > + > +int32_t > +rte_service_unregister(struct rte_service_spec *spec) > +{ > + struct rte_service_spec_impl *s = NULL; > + struct rte_service_spec_impl *spec_impl = > + (struct rte_service_spec_impl *)spec; > + > + uint32_t i; > + uint32_t service_id; > + for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { > + if (&rte_services[i] == spec_impl) { > + s = spec_impl; > + service_id = i; > + break; > + } > + } > + > + if (!s) > + return -EINVAL; > + > + rte_service_count--; > + rte_smp_wmb(); > + > + s->internal_flags &= ~(1 << SERVICE_F_REGISTERED); > + > + for (i = 0; i < RTE_MAX_LCORE; i++) > + cores_state[i].service_mask &= ~(1 << service_id); > + > + memset(&rte_services[service_id], 0, > + sizeof(struct rte_service_spec_impl)); > + > + return 0; > +} > + > +int32_t > +rte_service_start(struct rte_service_spec *service) > +{ > + struct rte_service_spec_impl *s = > + (struct rte_service_spec_impl *)service; > + s->runstate = RUNSTATE_RUNNING; > + rte_smp_wmb(); > + return 0; > +} > + > +int32_t > +rte_service_stop(struct rte_service_spec *service) > +{ > + struct rte_service_spec_impl *s = > + (struct rte_service_spec_impl *)service; > + s->runstate = RUNSTATE_STOPPED; > + rte_smp_wmb(); > + return 0; > +} > + > +static int32_t > +rte_service_runner_func(void *arg) > +{ > + RTE_SET_USED(arg); > + uint32_t i; > + const int lcore = rte_lcore_id(); > + struct core_state *cs = &cores_state[lcore]; > + > + while (cores_state[lcore].runstate == RUNSTATE_RUNNING) { > + const uint64_t service_mask = cs->service_mask; > + for (i = 0; i < rte_service_count; i++) { > + struct rte_service_spec_impl *s = &rte_services[i]; > + if (s->runstate != RUNSTATE_RUNNING || > + !(service_mask & (1 << i))) > + continue; > + > + /* check if this is the only core mapped, else use > + * atomic to serialize cores mapped to this service > + */ > + uint32_t *lock = (uint32_t *)&s->execute_lock; > + if ((s->spec.capabilities & RTE_SERVICE_CAP_MT_SAFE) || > + (s->num_mapped_cores == 1 || > + rte_atomic32_cmpset(lock, 0, 1))) { > + void *userdata = s->spec.callback_userdata; > + > + if (cs->collect_statistics) { > + uint64_t start = rte_rdtsc(); > + s->spec.callback(userdata); > + uint64_t end = rte_rdtsc(); > + s->cycles_spent += end - start; > + cs->calls_per_service[i]++; > + s->calls++; > + } else > + s->spec.callback(userdata); > + > + if ((s->spec.capabilities & > + RTE_SERVICE_CAP_MT_SAFE) == 0 && > + s->num_mapped_cores > 1) How about computing the non rte_atomic32_cmpset() mode value first and using in both place i.e here and in the top "if" loop const int need_cmpset = (s->spec.capabilities & RTE_SERVICE_CAP_MT_SAFE)... if (need_cmpset || rte_atomic32_cmpset(lock, 0, 1)) .. if (need_cmpset) rte_atomic32_clear().. > + rte_atomic32_clear(&s->execute_lock); > + } > + } > + > + rte_smp_rmb(); > + } > + > + lcore_config[lcore].state = WAIT; > + > + return 0; > +} > + > +int32_t > +rte_service_lcore_count(void) > +{ > + int32_t count = 0; > + uint32_t i; > + for (i = 0; i < RTE_MAX_LCORE; i++) > + count += cores_state[i].is_service_core; > + return count; > +} > + > +int32_t > +rte_service_lcore_list(uint32_t array[], uint32_t n) > +{ > + uint32_t count = rte_service_lcore_count(); > + if (count > n) > + return -ENOMEM; > + > + if (!array) > + return -EINVAL; > + > + uint32_t i; > + uint32_t idx = 0; > + for (i = 0; i < RTE_MAX_LCORE; i++) { > + struct core_state *cs = &cores_state[i]; > + if (cs->is_service_core) { > + array[idx] = i; > + idx++; > + } > + } > + > + return count; > +} > + > +int32_t > +rte_service_set_default_mapping(void) > +{ > + /* create a default mapping from cores to services, then start the > + * services to make them transparent to unaware applications. > + */ > + uint32_t i; > + int ret; > + uint32_t count = rte_service_get_count(); > + > + int32_t lcore_iter = 0; > + uint32_t ids[RTE_MAX_LCORE]; > + int32_t lcore_count = rte_service_lcore_list(ids, RTE_MAX_LCORE); > + > + for (i = 0; i < count; i++) { > + struct rte_service_spec *s = rte_service_get_by_id(i); > + if (!s) > + return -EINVAL; > + > + /* if no lcores available as services cores, don't setup map. > + * This means app logic must add cores, and setup mappings > + */ > + if (lcore_count > 0) { > + /* do 1:1 core mapping here, with each service getting > + * assigned a single core by default. Adding multiple > + * services should multiplex to a single core, or 1:1 > + * if services == cores > + */ > + ret = rte_service_enable_on_lcore(s, ids[lcore_iter]); > + if (ret) > + return -ENODEV; > + } > + > + lcore_iter++; > + if (lcore_iter >= lcore_count) > + lcore_iter = 0; > + > + ret = rte_service_start(s); IMO, we don't need to start the service if lcore_count == 0. How about moving the "if (lcore_count > 0)" check on top of for the loop and exist from the function if lcore_count == 0. > + if (ret) > + return -ENOEXEC; > + } > + > + return 0; > +} > + > +static int32_t > +service_update(struct rte_service_spec *service, uint32_t lcore, > + uint32_t *set, uint32_t *enabled) > +{ > + uint32_t i; > + int32_t sid = -1; > + > + for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { > + if ((struct rte_service_spec *)&rte_services[i] == service && > + service_valid(i)) { > + sid = i; > + break; > + } > + } > + > + if (sid == -1 || lcore >= RTE_MAX_LCORE) > + return -EINVAL; > + > + if (!cores_state[lcore].is_service_core) > + return -EINVAL; > + > + if (set) { > + if (*set) { > + cores_state[lcore].service_mask |= (1 << sid); > + rte_services[sid].num_mapped_cores++; > + } else { > + cores_state[lcore].service_mask &= ~(1 << sid); > + rte_services[sid].num_mapped_cores--; > + } > + } > + > + if (enabled) > + *enabled = (cores_state[lcore].service_mask & (1 << sid)); > + > + rte_smp_wmb(); > + > + return 0; > +} > + > +int32_t rte_service_get_enabled_on_lcore(struct rte_service_spec *service, > + uint32_t lcore) > +{ > + uint32_t enabled; > + int ret = service_update(service, lcore, 0, &enabled); > + if (ret == 0) > + return enabled; > + return -EINVAL; > +} > + > +int32_t > +rte_service_enable_on_lcore(struct rte_service_spec *service, uint32_t lcore) > +{ > + uint32_t on = 1; > + return service_update(service, lcore, &on, 0); > +} > + > +int32_t > +rte_service_disable_on_lcore(struct rte_service_spec *service, uint32_t > lcore) > +{ > + uint32_t off = 0; > + return service_update(service, lcore, &off, 0); > +} > + > +int32_t rte_service_lcore_reset_all(void) > +{ > + /* loop over cores, reset all to mask 0 */ > + uint32_t i; > + for (i = 0; i < RTE_MAX_LCORE; i++) { > + cores_state[i].service_mask = 0; > + cores_state[i].is_service_core = 0; > + cores_state[i].runstate = RUNSTATE_STOPPED; > + } > + for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) > + rte_services[i].num_mapped_cores = 0; > + > + rte_smp_wmb(); > + > + return 0; > +} > + > +static void > +set_lcore_state(uint32_t lcore, int32_t state) > +{ > + /* mark core state in hugepage backed config */ > + struct rte_config *cfg = rte_eal_get_configuration(); > + cfg->lcore_role[lcore] = state; > + > + /* mark state in process local lcore_config */ > + lcore_config[lcore].core_role = state; > + > + /* update per-lcore optimized state tracking */ > + cores_state[lcore].is_service_core = (state == ROLE_SERVICE); > +} > + > +int32_t > +rte_service_lcore_add(uint32_t lcore) > +{ > + if (lcore >= RTE_MAX_LCORE) > + return -EINVAL; > + if (cores_state[lcore].is_service_core) > + return -EALREADY; > + > + set_lcore_state(lcore, ROLE_SERVICE); > + > + /* ensure that after adding a core the mask and state are defaults */ > + cores_state[lcore].service_mask = 0; > + cores_state[lcore].runstate = RUNSTATE_STOPPED; If worker core can call rte_service_lcore_add() then add rte_smp_wmb() here. Applies to rte_service_lcore_del() as well.