Use a multi-word bitset to track which services are mapped to which lcores, allowing the RTE_SERVICE_NUM_MAX compile-time constant to be > 64.
Replace array-of-bytes service-currently-active flags with a more compact multi-word bitset-based representation, reducing memory footprint somewhat. Signed-off-by: Mattias Rönnblom <mattias.ronnb...@ericsson.com> --- lib/eal/common/rte_service.c | 70 ++++++++++++++---------------------- 1 file changed, 27 insertions(+), 43 deletions(-) diff --git a/lib/eal/common/rte_service.c b/lib/eal/common/rte_service.c index 56379930b6..ec0f47e141 100644 --- a/lib/eal/common/rte_service.c +++ b/lib/eal/common/rte_service.c @@ -11,6 +11,7 @@ #include <eal_trace_internal.h> #include <rte_lcore.h> +#include <rte_bitset.h> #include <rte_branch_prediction.h> #include <rte_common.h> #include <rte_cycles.h> @@ -63,11 +64,11 @@ struct service_stats { /* the internal values of a service core */ struct __rte_cache_aligned core_state { /* map of services IDs are run on this core */ - uint64_t service_mask; + RTE_BITSET_DECLARE(mapped_services, RTE_SERVICE_NUM_MAX); RTE_ATOMIC(uint8_t) runstate; /* running or stopped */ RTE_ATOMIC(uint8_t) thread_active; /* indicates when thread is in service_run() */ uint8_t is_service_core; /* set if core is currently a service core */ - uint8_t service_active_on_lcore[RTE_SERVICE_NUM_MAX]; + RTE_BITSET_DECLARE(service_active_on_lcore, RTE_SERVICE_NUM_MAX); RTE_ATOMIC(uint64_t) loops; RTE_ATOMIC(uint64_t) cycles; struct service_stats service_stats[RTE_SERVICE_NUM_MAX]; @@ -81,11 +82,6 @@ static uint32_t rte_service_library_initialized; int32_t rte_service_init(void) { - /* Hard limit due to the use of an uint64_t-based bitmask (and the - * clzl intrinsic). - */ - RTE_BUILD_BUG_ON(RTE_SERVICE_NUM_MAX > 64); - if (rte_service_library_initialized) { EAL_LOG(NOTICE, "service library init() called, init flag %d", @@ -296,7 +292,7 @@ rte_service_component_unregister(uint32_t id) /* clear the run-bit in all cores */ for (i = 0; i < RTE_MAX_LCORE; i++) - lcore_states[i].service_mask &= ~(UINT64_C(1) << id); + rte_bitset_clear(lcore_states[i].mapped_services, id); memset(&rte_services[id], 0, sizeof(struct rte_service_spec_impl)); @@ -410,7 +406,7 @@ service_runner_do_callback(struct rte_service_spec_impl *s, /* Expects the service 's' is valid. */ static int32_t -service_run(uint32_t i, struct core_state *cs, uint64_t service_mask, +service_run(uint32_t i, struct core_state *cs, const uint64_t *mapped_services, struct rte_service_spec_impl *s, uint32_t serialize_mt_unsafe) { if (!s) @@ -424,12 +420,12 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask, RUNSTATE_RUNNING || rte_atomic_load_explicit(&s->app_runstate, rte_memory_order_acquire) != RUNSTATE_RUNNING || - !(service_mask & (UINT64_C(1) << i))) { - cs->service_active_on_lcore[i] = 0; + !rte_bitset_test(mapped_services, i)) { + rte_bitset_clear(cs->service_active_on_lcore, i); return -ENOEXEC; } - cs->service_active_on_lcore[i] = 1; + rte_bitset_set(cs->service_active_on_lcore, i); if ((service_mt_safe(s) == 0) && (serialize_mt_unsafe == 1)) { if (!rte_spinlock_trylock(&s->execute_lock)) @@ -454,7 +450,7 @@ rte_service_may_be_active(uint32_t id) return -EINVAL; for (i = 0; i < lcore_count; i++) { - if (lcore_states[ids[i]].service_active_on_lcore[id]) + if (rte_bitset_test(lcore_states[ids[i]].service_active_on_lcore, id)) return 1; } @@ -474,7 +470,9 @@ rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe) */ rte_atomic_fetch_add_explicit(&s->num_mapped_cores, 1, rte_memory_order_relaxed); - int ret = service_run(id, cs, UINT64_MAX, s, serialize_mt_unsafe); + RTE_BITSET_DECLARE(all_services, RTE_SERVICE_NUM_MAX); + rte_bitset_set_all(all_services, RTE_SERVICE_NUM_MAX); + int ret = service_run(id, cs, all_services, s, serialize_mt_unsafe); rte_atomic_fetch_sub_explicit(&s->num_mapped_cores, 1, rte_memory_order_relaxed); @@ -485,7 +483,6 @@ static int32_t service_runner_func(void *arg) { RTE_SET_USED(arg); - uint8_t i; const int lcore = rte_lcore_id(); struct core_state *cs = &lcore_states[lcore]; @@ -497,20 +494,11 @@ service_runner_func(void *arg) */ while (rte_atomic_load_explicit(&cs->runstate, rte_memory_order_acquire) == RUNSTATE_RUNNING) { + ssize_t id; - const uint64_t service_mask = cs->service_mask; - uint8_t start_id; - uint8_t end_id; - - if (service_mask == 0) - continue; - - start_id = rte_ctz64(service_mask); - end_id = 64 - rte_clz64(service_mask); - - for (i = start_id; i < end_id; i++) { + RTE_BITSET_FOREACH_SET(id, cs->mapped_services, RTE_SERVICE_NUM_MAX) { /* return value ignored as no change to code flow */ - service_run(i, cs, service_mask, service_get(i), 1); + service_run(id, cs, cs->mapped_services, service_get(id), 1); } rte_atomic_store_explicit(&cs->loops, cs->loops + 1, rte_memory_order_relaxed); @@ -519,8 +507,7 @@ service_runner_func(void *arg) /* Switch off this core for all services, to ensure that future * calls to may_be_active() know this core is switched off. */ - for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) - cs->service_active_on_lcore[i] = 0; + rte_bitset_clear_all(cs->service_active_on_lcore, RTE_SERVICE_NUM_MAX); /* Use SEQ CST memory ordering to avoid any re-ordering around * this store, ensuring that once this store is visible, the service @@ -586,7 +573,7 @@ rte_service_lcore_count_services(uint32_t lcore) if (!cs->is_service_core) return -ENOTSUP; - return rte_popcount64(cs->service_mask); + return rte_bitset_count_set(cs->mapped_services, RTE_SERVICE_NUM_MAX); } int32_t @@ -639,25 +626,23 @@ service_update(uint32_t sid, uint32_t lcore, uint32_t *set, uint32_t *enabled) !lcore_states[lcore].is_service_core) return -EINVAL; - uint64_t sid_mask = UINT64_C(1) << sid; if (set) { - uint64_t lcore_mapped = lcore_states[lcore].service_mask & - sid_mask; + uint64_t lcore_mapped = rte_bitset_test(lcore_states[lcore].mapped_services, sid); if (*set && !lcore_mapped) { - lcore_states[lcore].service_mask |= sid_mask; + rte_bitset_set(lcore_states[lcore].mapped_services, sid); rte_atomic_fetch_add_explicit(&rte_services[sid].num_mapped_cores, 1, rte_memory_order_relaxed); } if (!*set && lcore_mapped) { - lcore_states[lcore].service_mask &= ~(sid_mask); + rte_bitset_clear(lcore_states[lcore].mapped_services, sid); rte_atomic_fetch_sub_explicit(&rte_services[sid].num_mapped_cores, 1, rte_memory_order_relaxed); } } if (enabled) - *enabled = !!(lcore_states[lcore].service_mask & (sid_mask)); + *enabled = rte_bitset_test(lcore_states[lcore].mapped_services, sid); return 0; } @@ -699,11 +684,11 @@ set_lcore_state(uint32_t lcore, int32_t state) int32_t rte_service_lcore_reset_all(void) { - /* loop over cores, reset all to mask 0 */ + /* loop over cores, reset all mapped services */ uint32_t i; for (i = 0; i < RTE_MAX_LCORE; i++) { if (lcore_states[i].is_service_core) { - lcore_states[i].service_mask = 0; + rte_bitset_clear_all(lcore_states[i].mapped_services, RTE_SERVICE_NUM_MAX); set_lcore_state(i, ROLE_RTE); /* runstate act as guard variable Use * store-release memory order here to synchronize @@ -731,7 +716,7 @@ rte_service_lcore_add(uint32_t lcore) set_lcore_state(lcore, ROLE_SERVICE); /* ensure that after adding a core the mask and state are defaults */ - lcore_states[lcore].service_mask = 0; + rte_bitset_clear_all(lcore_states[lcore].mapped_services, RTE_SERVICE_NUM_MAX); /* Use store-release memory order here to synchronize with * load-acquire in runstate read functions. */ @@ -814,12 +799,11 @@ rte_service_lcore_stop(uint32_t lcore) uint32_t i; struct core_state *cs = &lcore_states[lcore]; - uint64_t service_mask = cs->service_mask; for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { - int32_t enabled = service_mask & (UINT64_C(1) << i); - int32_t service_running = rte_service_runstate_get(i); - int32_t only_core = (1 == + bool enabled = rte_bitset_test(cs->mapped_services, i); + bool service_running = rte_service_runstate_get(i); + bool only_core = (1 == rte_atomic_load_explicit(&rte_services[i].num_mapped_cores, rte_memory_order_relaxed)); -- 2.34.1