[dpdk-dev] [PATCH] service: improve service run performance

Nikhil Rao Mon, 16 Sep 2019 03:02:09 -0700

For a valid service, the core mask of the service
is checked against the current core and the corresponding
entry in the active_on_lcore array is set or reset.


Upto 8 cores share the same cache line for their
service active_on_lcore array entries since each entry is a uint8_t.
Some number of these entries also share the cache line with
the internal_flags member of struct rte_service_spec_impl,
hence this false sharing also makes the service_valid() check
expensive.

Eliminate false sharing by moving the active_on_lcore array to
a per-core data structure. The array is now indexed by service id.

Signed-off-by: Nikhil Rao <nikhil....@intel.com>
---
 lib/librte_eal/common/rte_service.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/lib/librte_eal/common/rte_service.c 
b/lib/librte_eal/common/rte_service.c
index c3653eb..5d52a81 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -51,7 +51,6 @@ struct rte_service_spec_impl {
        rte_atomic32_t num_mapped_cores;
        uint64_t calls;
        uint64_t cycles_spent;
-       uint8_t active_on_lcore[RTE_MAX_LCORE];
 } __rte_cache_aligned;
 
 /* the internal values of a service core */
@@ -60,7 +59,7 @@ struct core_state {
        uint64_t service_mask;
        uint8_t runstate; /* running or stopped */
        uint8_t is_service_core; /* set if core is currently a service core */
-
+       uint8_t service_active_on_lcore[RTE_SERVICE_NUM_MAX];
        uint64_t loops;
        uint64_t calls_per_service[RTE_SERVICE_NUM_MAX];
 } __rte_cache_aligned;
@@ -347,7 +346,7 @@ int32_t rte_service_init(void)
 
 
 static inline int32_t
-service_run(uint32_t i, int lcore, struct core_state *cs, uint64_t 
service_mask)
+service_run(uint32_t i, struct core_state *cs, uint64_t service_mask)
 {
        if (!service_valid(i))
                return -EINVAL;
@@ -355,11 +354,11 @@ int32_t rte_service_init(void)
        if (s->comp_runstate != RUNSTATE_RUNNING ||
                        s->app_runstate != RUNSTATE_RUNNING ||
                        !(service_mask & (UINT64_C(1) << i))) {
-               s->active_on_lcore[lcore] = 0;
+               cs->service_active_on_lcore[i] = 0;
                return -ENOEXEC;
        }
 
-       s->active_on_lcore[lcore] = 1;
+       cs->service_active_on_lcore[i] = 1;
 
        /* check do we need cmpset, if MT safe or <= 1 core
         * mapped, atomic ops are not required.
@@ -382,7 +381,6 @@ int32_t rte_service_init(void)
 rte_service_may_be_active(uint32_t id)
 {
        uint32_t ids[RTE_MAX_LCORE] = {0};
-       struct rte_service_spec_impl *s = &rte_services[id];
        int32_t lcore_count = rte_service_lcore_list(ids, RTE_MAX_LCORE);
        int i;
 
@@ -390,7 +388,7 @@ int32_t rte_service_init(void)
                return -EINVAL;
 
        for (i = 0; i < lcore_count; i++) {
-               if (s->active_on_lcore[ids[i]])
+               if (lcore_states[i].service_active_on_lcore[id])
                        return 1;
        }
 
@@ -421,7 +419,7 @@ int32_t rte_service_run_iter_on_app_lcore(uint32_t id,
                return -EBUSY;
        }
 
-       int ret = service_run(id, rte_lcore_id(), cs, UINT64_MAX);
+       int ret = service_run(id, cs, UINT64_MAX);
 
        if (serialize_mt_unsafe)
                rte_atomic32_dec(&s->num_mapped_cores);
@@ -442,7 +440,7 @@ int32_t rte_service_run_iter_on_app_lcore(uint32_t id,
 
                for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
                        /* return value ignored as no change to code flow */
-                       service_run(i, lcore, cs, service_mask);
+                       service_run(i, cs, service_mask);
                }
 
                cs->loops++;
-- 
1.8.3.1

[dpdk-dev] [PATCH] service: improve service run performance

Reply via email to