-----Original Message----- > Date: Sun, 2 Jul 2017 22:35:08 +0100 > From: Harry van Haaren <harry.van.haa...@intel.com> > To: dev@dpdk.org > CC: jerin.ja...@caviumnetworks.com, tho...@monjalon.net, > keith.wi...@intel.com, bruce.richard...@intel.com, Harry van Haaren > <harry.van.haa...@intel.com> > Subject: [PATCH v3 1/7] service cores: header and implementation > X-Mailer: git-send-email 2.7.4 > > Add header files, update .map files with new service > functions, and add the service header to the doxygen > for building. > > This service header API allows DPDK to use services as > a concept of something that requires CPU cycles. An example > is a PMD that runs in software to schedule events, where a > hardware version exists that does not require a CPU. > > The code presented here is based on an initial RFC: > http://dpdk.org/ml/archives/dev/2017-May/065207.html > This was then reworked, and RFC v2 with the changes posted: > http://dpdk.org/ml/archives/dev/2017-June/067194.html > > This is the fourth iteration of the service core concept, > with 2 RFCs and this being v2 of the implementation. > > Signed-off-by: Harry van Haaren <harry.van.haa...@intel.com> > diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile > index a5bd108..2a93397 100644 > --- a/lib/librte_eal/common/Makefile > +++ b/lib/librte_eal/common/Makefile > @@ -41,6 +41,7 @@ INC += rte_eal_memconfig.h rte_malloc_heap.h > INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h rte_vdev.h > INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h > INC += rte_malloc.h rte_keepalive.h rte_time.h > +INC += rte_service.h rte_service_private.h
This will install rte_service_private.h file $RTE_TARGET. Based on earlier email, You don't want to expose rte_service_private.h to application. Right? If so, I think, we remove rte_service_private.h from here and add CFLAGS to point this header in local makefile of the _component_ which interested in getting eal component specific functions. > > GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h > GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h rte_rwlock.h > diff --git a/lib/librte_eal/common/eal_common_lcore.c > b/lib/librte_eal/common/eal_common_lcore.c > + * called on it. > + * @retval 0 Service successfully switched off > + */ > +int32_t rte_service_stop(struct rte_service_spec *service); > + > +/** Returns if *service* is currently running. > + * > + * This function retuns true if the service has been started using s/retuns/returns/ > +int32_t rte_service_lcore_reset_all(void); > + > +/** Enable or disable statistics collection. > + * > + * This function enables per core, per-service cycle count collection. > + * @param enabled Zero to turn off statistics collection, non-zero to enable. > + */ > +void rte_service_set_stats_enable(int enabled); Since it it is a "set" function, better argument is "int enable" > + > +/** Retrieve the list of currently enabled service cores. > + * > + * This function fills in an application supplied array, with each element > + * indicating the lcore_id of a service core. > + * > + * Adding and removing service cores can be performed using > + * *rte_service_lcore_add* and *rte_service_lcore_del*. > + * @param [out] array An array of at least N items. What is N here. It should be RTE_MAX_LCORE. Right? > + * @param [out] The size of *array*. > + * @retval >=0 Number of service cores that have been populated in the array > + * @retval -ENOMEM The provided array is not large enough to fill in the > + * service core list. No items have been populated, call this > function > + * with a size of at least *rte_service_core_count* items. > + */ > +int32_t rte_service_lcore_list(uint32_t array[], uint32_t n); > + > + cores_state = rte_calloc("rte_service_core_states", RTE_MAX_LCORE, > + sizeof(struct core_state), RTE_CACHE_LINE_SIZE); > + if (!cores_state) { > + printf("error allocating core states array\n"); > + return -ENOMEM; > + } > + > + int i; > + int count = 0; > + struct rte_config *cfg = rte_eal_get_configuration(); > + for (i = 0; i < RTE_MAX_LCORE; i++) { > + if (lcore_config[i].core_role == ROLE_SERVICE) { > + if ((unsigned)i == cfg->master_lcore) > + continue; > + rte_service_lcore_add(i); > + count++; > + } > + } > + > + rte_service_library_initialized = 1; > + return 0; > +} > + > +void rte_service_set_stats_enable(int enabled) Since it it is a "set" function, better argument is "int enable" > +{ > + uint32_t i; > + for (i = 0; i < RTE_MAX_LCORE; i++) > + cores_state[i].collect_statistics = enabled; > +} > + > +/* returns 1 if service is registered and has not been unregistered > + * Returns 0 if service never registered, or has been unregistered > + */ > +static inline int > +service_valid(uint32_t id) { > + return !!(rte_services[id].internal_flags & > + (1 << SERVICE_F_REGISTERED)); > +} > + > +uint32_t > +rte_service_get_count(void) > +{ > + return rte_service_count; > +} > + > +struct rte_service_spec * > +rte_service_get_by_id(uint32_t id) > +{ > + struct rte_service_spec *service = NULL; > + if (id < rte_service_count) > + service = (struct rte_service_spec *)&rte_services[id]; > + > + return service; > +} > + > +struct rte_service_spec *rte_service_get_by_name(const char *name) > +{ > + struct rte_service_spec *service = NULL; > + int i; > + for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { > + if (service_valid(i) && > + strcmp(name, rte_services[i].spec.name) == 0) > + service = (struct rte_service_spec *)&rte_services[i]; > + break; "break" should be under "if" condition. ie { and } for the "if" condition is missing > + } > + > + return service; > +} > + > +int32_t > +rte_service_register(const struct rte_service_spec *spec) > +{ > + uint32_t i; > + int32_t free_slot = -1; > + > + if (spec->callback == NULL || strlen(spec->name) == 0) > + return -EINVAL; > + > + for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { > + if (!service_valid(i)) { > + free_slot = i; > + break; > + } > + } > + > + if ((free_slot < 0) || (i == RTE_SERVICE_NUM_MAX)) > + return -ENOSPC; > + > + struct rte_service_spec_impl *s = &rte_services[free_slot]; > + s->spec = *spec; > + s->internal_flags |= (1 << SERVICE_F_REGISTERED); > + > + rte_smp_wmb(); > + rte_service_count++; My earlier comments on rte_smp_wmb() was in assumption that rte_service_register() called be from worker cores. Can rte_service_register() called from worker threads. No. Right? if yes then we need an atomic variable to increment rte_service_count. > + > + return 0; > +} > + > +int32_t > +rte_service_start(struct rte_service_spec *service) > +{ > + struct rte_service_spec_impl *s = > + (struct rte_service_spec_impl *)service; > + s->runstate = RUNSTATE_RUNNING; > + rte_smp_wmb(); > + return 0; > +} > + > +int32_t > +rte_service_stop(struct rte_service_spec *service) > +{ > + struct rte_service_spec_impl *s = > + (struct rte_service_spec_impl *)service; > + s->runstate = RUNSTATE_STOPPED; > + rte_smp_wmb(); > + return 0; > +} > + > +static int32_t > +rte_service_runner_func(void *arg) > +{ > + RTE_SET_USED(arg); > + uint32_t i; > + const int lcore = rte_lcore_id(); > + struct core_state *cs = &cores_state[lcore]; Couple of comments in fastpath code. IMO, better to move "service_mask" and access to global variable(so that it allocated from local stack) outside the loop. global variable may share with other frequent write variables. const uint64_t service_mask = cs->service_mask; const uint32_t __rte_service_count = rte_service_count; > + > + while (cores_state[lcore].runstate == RUNSTATE_RUNNING) { > + for (i = 0; i < rte_service_count; i++) { > + struct rte_service_spec_impl *s = &rte_services[i]; > + if (s->runstate != RUNSTATE_RUNNING || > + !(service_mask & (1 << i))) > + continue; > + > + /* check if this is the only core mapped, else use > + * atomic to serialize cores mapped to this service > + */ > + uint32_t *lock = (uint32_t *)&s->execute_lock; > + if ((s->spec.capabilities & RTE_SERVICE_CAP_MT_SAFE) || > + (s->num_mapped_cores == 1 || > + rte_atomic32_cmpset(lock, 0, 1))) { > + void *userdata = s->spec.callback_userdata; > + > + if (cs->collect_statistics) { > + uint64_t start = rte_rdtsc(); > + s->spec.callback(userdata); > + uint64_t end = rte_rdtsc(); > + s->cycles_spent += end - start; > + cs->calls_per_service[i]++; > + s->calls++; > + } else { > + cs->calls_per_service[i]++; Should we need this in non stat configuration ? > + s->spec.callback(userdata); > + s->calls++; Should we need this in non stat configuration ? > + } > + > + rte_atomic32_clear(&s->execute_lock); Call this only when "rte_atomic32_cmpset" in action as it is costly for normal case. IMO, we need to have rte_smp_rmb() here to update the status of cores_state[lcore].runstate or s->runstate(as it can be updated from other lcores) > + } > + } > + } > + > + lcore_config[lcore].state = WAIT; > + > + return 0; > +} > +