Replace static array of cache-aligned structs with an lcore variable, to slightly benefit code simplicity and performance.
Signed-off-by: Mattias Rönnblom <mattias.ronnb...@ericsson.com> --- lib/eal/common/rte_service.c | 119 ++++++++++++++++++++--------------- 1 file changed, 68 insertions(+), 51 deletions(-) diff --git a/lib/eal/common/rte_service.c b/lib/eal/common/rte_service.c index d959c91459..de205c5da5 100644 --- a/lib/eal/common/rte_service.c +++ b/lib/eal/common/rte_service.c @@ -11,6 +11,7 @@ #include <eal_trace_internal.h> #include <rte_lcore.h> +#include <rte_lcore_var.h> #include <rte_branch_prediction.h> #include <rte_common.h> #include <rte_cycles.h> @@ -75,7 +76,7 @@ struct core_state { static uint32_t rte_service_count; static struct rte_service_spec_impl *rte_services; -static struct core_state *lcore_states; +static RTE_LCORE_VAR_HANDLE(struct core_state, lcore_states); static uint32_t rte_service_library_initialized; int32_t @@ -101,11 +102,12 @@ rte_service_init(void) goto fail_mem; } - lcore_states = rte_calloc("rte_service_core_states", RTE_MAX_LCORE, - sizeof(struct core_state), RTE_CACHE_LINE_SIZE); - if (!lcore_states) { - EAL_LOG(ERR, "error allocating core states array"); - goto fail_mem; + if (lcore_states == NULL) + RTE_LCORE_VAR_ALLOC(lcore_states); + else { + struct core_state *cs; + RTE_LCORE_VAR_FOREACH_VALUE(cs, lcore_states) + memset(cs, 0, sizeof(struct core_state)); } int i; @@ -122,7 +124,6 @@ rte_service_init(void) return 0; fail_mem: rte_free(rte_services); - rte_free(lcore_states); return -ENOMEM; } @@ -136,7 +137,6 @@ rte_service_finalize(void) rte_eal_mp_wait_lcore(); rte_free(rte_services); - rte_free(lcore_states); rte_service_library_initialized = 0; } @@ -286,7 +286,6 @@ rte_service_component_register(const struct rte_service_spec *spec, int32_t rte_service_component_unregister(uint32_t id) { - uint32_t i; struct rte_service_spec_impl *s; SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL); @@ -294,9 +293,10 @@ rte_service_component_unregister(uint32_t id) s->internal_flags &= ~(SERVICE_F_REGISTERED); + struct core_state *cs; /* clear the run-bit in all cores */ - for (i = 0; i < RTE_MAX_LCORE; i++) - lcore_states[i].service_mask &= ~(UINT64_C(1) << id); + RTE_LCORE_VAR_FOREACH_VALUE(cs, lcore_states) + cs->service_mask &= ~(UINT64_C(1) << id); memset(&rte_services[id], 0, sizeof(struct rte_service_spec_impl)); @@ -454,7 +454,10 @@ rte_service_may_be_active(uint32_t id) return -EINVAL; for (i = 0; i < lcore_count; i++) { - if (lcore_states[ids[i]].service_active_on_lcore[id]) + struct core_state *cs = + RTE_LCORE_VAR_LCORE_PTR(ids[i], lcore_states); + + if (cs->service_active_on_lcore[id]) return 1; } @@ -464,7 +467,7 @@ rte_service_may_be_active(uint32_t id) int32_t rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe) { - struct core_state *cs = &lcore_states[rte_lcore_id()]; + struct core_state *cs = RTE_LCORE_VAR_PTR(lcore_states); struct rte_service_spec_impl *s; SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL); @@ -486,8 +489,7 @@ service_runner_func(void *arg) { RTE_SET_USED(arg); uint8_t i; - const int lcore = rte_lcore_id(); - struct core_state *cs = &lcore_states[lcore]; + struct core_state *cs = RTE_LCORE_VAR_PTR(lcore_states); rte_atomic_store_explicit(&cs->thread_active, 1, rte_memory_order_seq_cst); @@ -533,13 +535,16 @@ service_runner_func(void *arg) int32_t rte_service_lcore_may_be_active(uint32_t lcore) { - if (lcore >= RTE_MAX_LCORE || !lcore_states[lcore].is_service_core) + struct core_state *cs = + RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states); + + if (lcore >= RTE_MAX_LCORE || !cs->is_service_core) return -EINVAL; /* Load thread_active using ACQUIRE to avoid instructions dependent on * the result being re-ordered before this load completes. */ - return rte_atomic_load_explicit(&lcore_states[lcore].thread_active, + return rte_atomic_load_explicit(&cs->thread_active, rte_memory_order_acquire); } @@ -547,9 +552,11 @@ int32_t rte_service_lcore_count(void) { int32_t count = 0; - uint32_t i; - for (i = 0; i < RTE_MAX_LCORE; i++) - count += lcore_states[i].is_service_core; + + struct core_state *cs; + RTE_LCORE_VAR_FOREACH_VALUE(cs, lcore_states) + count += cs->is_service_core; + return count; } @@ -566,7 +573,8 @@ rte_service_lcore_list(uint32_t array[], uint32_t n) uint32_t i; uint32_t idx = 0; for (i = 0; i < RTE_MAX_LCORE; i++) { - struct core_state *cs = &lcore_states[i]; + struct core_state *cs = + RTE_LCORE_VAR_LCORE_PTR(i, lcore_states); if (cs->is_service_core) { array[idx] = i; idx++; @@ -582,7 +590,7 @@ rte_service_lcore_count_services(uint32_t lcore) if (lcore >= RTE_MAX_LCORE) return -EINVAL; - struct core_state *cs = &lcore_states[lcore]; + struct core_state *cs = RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states); if (!cs->is_service_core) return -ENOTSUP; @@ -634,30 +642,31 @@ rte_service_start_with_defaults(void) static int32_t service_update(uint32_t sid, uint32_t lcore, uint32_t *set, uint32_t *enabled) { + struct core_state *cs = RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states); + /* validate ID, or return error value */ if (!service_valid(sid) || lcore >= RTE_MAX_LCORE || - !lcore_states[lcore].is_service_core) + !cs->is_service_core) return -EINVAL; uint64_t sid_mask = UINT64_C(1) << sid; if (set) { - uint64_t lcore_mapped = lcore_states[lcore].service_mask & - sid_mask; + uint64_t lcore_mapped = cs->service_mask & sid_mask; if (*set && !lcore_mapped) { - lcore_states[lcore].service_mask |= sid_mask; + cs->service_mask |= sid_mask; rte_atomic_fetch_add_explicit(&rte_services[sid].num_mapped_cores, 1, rte_memory_order_relaxed); } if (!*set && lcore_mapped) { - lcore_states[lcore].service_mask &= ~(sid_mask); + cs->service_mask &= ~(sid_mask); rte_atomic_fetch_sub_explicit(&rte_services[sid].num_mapped_cores, 1, rte_memory_order_relaxed); } } if (enabled) - *enabled = !!(lcore_states[lcore].service_mask & (sid_mask)); + *enabled = !!(cs->service_mask & (sid_mask)); return 0; } @@ -685,13 +694,14 @@ set_lcore_state(uint32_t lcore, int32_t state) { /* mark core state in hugepage backed config */ struct rte_config *cfg = rte_eal_get_configuration(); + struct core_state *cs = RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states); cfg->lcore_role[lcore] = state; /* mark state in process local lcore_config */ lcore_config[lcore].core_role = state; /* update per-lcore optimized state tracking */ - lcore_states[lcore].is_service_core = (state == ROLE_SERVICE); + cs->is_service_core = (state == ROLE_SERVICE); rte_eal_trace_service_lcore_state_change(lcore, state); } @@ -702,14 +712,16 @@ rte_service_lcore_reset_all(void) /* loop over cores, reset all to mask 0 */ uint32_t i; for (i = 0; i < RTE_MAX_LCORE; i++) { - if (lcore_states[i].is_service_core) { - lcore_states[i].service_mask = 0; + struct core_state *cs = + RTE_LCORE_VAR_LCORE_PTR(i, lcore_states); + if (cs->is_service_core) { + cs->service_mask = 0; set_lcore_state(i, ROLE_RTE); /* runstate act as guard variable Use * store-release memory order here to synchronize * with load-acquire in runstate read functions. */ - rte_atomic_store_explicit(&lcore_states[i].runstate, + rte_atomic_store_explicit(&cs->runstate, RUNSTATE_STOPPED, rte_memory_order_release); } } @@ -725,17 +737,19 @@ rte_service_lcore_add(uint32_t lcore) { if (lcore >= RTE_MAX_LCORE) return -EINVAL; - if (lcore_states[lcore].is_service_core) + + struct core_state *cs = RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states); + if (cs->is_service_core) return -EALREADY; set_lcore_state(lcore, ROLE_SERVICE); /* ensure that after adding a core the mask and state are defaults */ - lcore_states[lcore].service_mask = 0; + cs->service_mask = 0; /* Use store-release memory order here to synchronize with * load-acquire in runstate read functions. */ - rte_atomic_store_explicit(&lcore_states[lcore].runstate, RUNSTATE_STOPPED, + rte_atomic_store_explicit(&cs->runstate, RUNSTATE_STOPPED, rte_memory_order_release); return rte_eal_wait_lcore(lcore); @@ -747,7 +761,7 @@ rte_service_lcore_del(uint32_t lcore) if (lcore >= RTE_MAX_LCORE) return -EINVAL; - struct core_state *cs = &lcore_states[lcore]; + struct core_state *cs = RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states); if (!cs->is_service_core) return -EINVAL; @@ -771,7 +785,7 @@ rte_service_lcore_start(uint32_t lcore) if (lcore >= RTE_MAX_LCORE) return -EINVAL; - struct core_state *cs = &lcore_states[lcore]; + struct core_state *cs = RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states); if (!cs->is_service_core) return -EINVAL; @@ -801,6 +815,8 @@ rte_service_lcore_start(uint32_t lcore) int32_t rte_service_lcore_stop(uint32_t lcore) { + struct core_state *cs = RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states); + if (lcore >= RTE_MAX_LCORE) return -EINVAL; @@ -808,12 +824,11 @@ rte_service_lcore_stop(uint32_t lcore) * memory order here to synchronize with store-release * in runstate update functions. */ - if (rte_atomic_load_explicit(&lcore_states[lcore].runstate, rte_memory_order_acquire) == + if (rte_atomic_load_explicit(&cs->runstate, rte_memory_order_acquire) == RUNSTATE_STOPPED) return -EALREADY; uint32_t i; - struct core_state *cs = &lcore_states[lcore]; uint64_t service_mask = cs->service_mask; for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { @@ -834,7 +849,7 @@ rte_service_lcore_stop(uint32_t lcore) /* Use store-release memory order here to synchronize with * load-acquire in runstate read functions. */ - rte_atomic_store_explicit(&lcore_states[lcore].runstate, RUNSTATE_STOPPED, + rte_atomic_store_explicit(&cs->runstate, RUNSTATE_STOPPED, rte_memory_order_release); rte_eal_trace_service_lcore_stop(lcore); @@ -845,7 +860,7 @@ rte_service_lcore_stop(uint32_t lcore) static uint64_t lcore_attr_get_loops(unsigned int lcore) { - struct core_state *cs = &lcore_states[lcore]; + struct core_state *cs = RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states); return rte_atomic_load_explicit(&cs->loops, rte_memory_order_relaxed); } @@ -853,7 +868,7 @@ lcore_attr_get_loops(unsigned int lcore) static uint64_t lcore_attr_get_cycles(unsigned int lcore) { - struct core_state *cs = &lcore_states[lcore]; + struct core_state *cs = RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states); return rte_atomic_load_explicit(&cs->cycles, rte_memory_order_relaxed); } @@ -861,7 +876,7 @@ lcore_attr_get_cycles(unsigned int lcore) static uint64_t lcore_attr_get_service_calls(uint32_t service_id, unsigned int lcore) { - struct core_state *cs = &lcore_states[lcore]; + struct core_state *cs = RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states); return rte_atomic_load_explicit(&cs->service_stats[service_id].calls, rte_memory_order_relaxed); @@ -870,7 +885,7 @@ lcore_attr_get_service_calls(uint32_t service_id, unsigned int lcore) static uint64_t lcore_attr_get_service_cycles(uint32_t service_id, unsigned int lcore) { - struct core_state *cs = &lcore_states[lcore]; + struct core_state *cs = RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states); return rte_atomic_load_explicit(&cs->service_stats[service_id].cycles, rte_memory_order_relaxed); @@ -886,7 +901,10 @@ attr_get(uint32_t id, lcore_attr_get_fun lcore_attr_get) uint64_t sum = 0; for (lcore = 0; lcore < RTE_MAX_LCORE; lcore++) { - if (lcore_states[lcore].is_service_core) + struct core_state *cs = + RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states); + + if (cs->is_service_core) sum += lcore_attr_get(id, lcore); } @@ -930,12 +948,11 @@ int32_t rte_service_lcore_attr_get(uint32_t lcore, uint32_t attr_id, uint64_t *attr_value) { - struct core_state *cs; + struct core_state *cs = RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states); if (lcore >= RTE_MAX_LCORE || !attr_value) return -EINVAL; - cs = &lcore_states[lcore]; if (!cs->is_service_core) return -ENOTSUP; @@ -960,7 +977,8 @@ rte_service_attr_reset_all(uint32_t id) return -EINVAL; for (lcore = 0; lcore < RTE_MAX_LCORE; lcore++) { - struct core_state *cs = &lcore_states[lcore]; + struct core_state *cs = + RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states); cs->service_stats[id] = (struct service_stats) {}; } @@ -971,12 +989,11 @@ rte_service_attr_reset_all(uint32_t id) int32_t rte_service_lcore_attr_reset_all(uint32_t lcore) { - struct core_state *cs; + struct core_state *cs = RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states); if (lcore >= RTE_MAX_LCORE) return -EINVAL; - cs = &lcore_states[lcore]; if (!cs->is_service_core) return -ENOTSUP; @@ -1011,7 +1028,7 @@ static void service_dump_calls_per_lcore(FILE *f, uint32_t lcore) { uint32_t i; - struct core_state *cs = &lcore_states[lcore]; + struct core_state *cs = RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states); fprintf(f, "%02d\t", lcore); for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { -- 2.34.1