From: Pavan Nikhilesh <pbhagavat...@marvell.com>

Increase the timer arm burst size to 16 and chunk size
for optimum performance.
Use fixed size chunk pool cache to avoid high alloc cycles.

Signed-off-by: Pavan Nikhilesh <pbhagavat...@marvell.com>
---
 drivers/event/cnxk/cnxk_tim_evdev.c  |  7 ++-----
 drivers/event/cnxk/cnxk_tim_evdev.h  |  5 ++---
 drivers/event/cnxk/cnxk_tim_worker.h | 12 +++++++++---
 3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/event/cnxk/cnxk_tim_evdev.c 
b/drivers/event/cnxk/cnxk_tim_evdev.c
index 5dd79cbd47..6ff3ca72f7 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.c
+++ b/drivers/event/cnxk/cnxk_tim_evdev.c
@@ -14,12 +14,11 @@ static int
 cnxk_tim_chnk_pool_create(struct cnxk_tim_ring *tim_ring,
                          struct rte_event_timer_adapter_conf *rcfg)
 {
-       unsigned int cache_sz = (tim_ring->nb_chunks / 1.5);
        unsigned int mp_flags = 0;
+       unsigned int cache_sz;
        char pool_name[25];
        int rc;
 
-       cache_sz /= rte_lcore_count();
        /* Create chunk pool. */
        if (rcfg->flags & RTE_EVENT_TIMER_ADAPTER_F_SP_PUT) {
                mp_flags = RTE_MEMPOOL_F_SP_PUT | RTE_MEMPOOL_F_SC_GET;
@@ -30,9 +29,7 @@ cnxk_tim_chnk_pool_create(struct cnxk_tim_ring *tim_ring,
        snprintf(pool_name, sizeof(pool_name), "cnxk_tim_chunk_pool%d",
                 tim_ring->ring_id);
 
-       if (cache_sz > CNXK_TIM_MAX_POOL_CACHE_SZ)
-               cache_sz = CNXK_TIM_MAX_POOL_CACHE_SZ;
-       cache_sz = cache_sz != 0 ? cache_sz : 2;
+       cache_sz = CNXK_TIM_MAX_POOL_CACHE_SZ;
        tim_ring->nb_chunks += (cache_sz * rte_lcore_count());
        if (!tim_ring->disable_npa) {
                tim_ring->chunk_pool = rte_mempool_create_empty(
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.h 
b/drivers/event/cnxk/cnxk_tim_evdev.h
index 0c192346c7..8c69d15c80 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.h
+++ b/drivers/event/cnxk/cnxk_tim_evdev.h
@@ -24,10 +24,9 @@
 
 #define CNXK_TIM_EVDEV_NAME        cnxk_tim_eventdev
 #define CNXK_TIM_MAX_BUCKETS       (0xFFFFF)
-#define CNXK_TIM_RING_DEF_CHUNK_SZ  (256)
+#define CNXK_TIM_RING_DEF_CHUNK_SZ  (1024)
 #define CNXK_TIM_CHUNK_ALIGNMENT    (16)
-#define CNXK_TIM_MAX_BURST         \
-                       (RTE_CACHE_LINE_SIZE / CNXK_TIM_CHUNK_ALIGNMENT)
+#define CNXK_TIM_MAX_BURST         (16)
 #define CNXK_TIM_NB_CHUNK_SLOTS(sz) (((sz) / CNXK_TIM_CHUNK_ALIGNMENT) - 1)
 #define CNXK_TIM_MIN_CHUNK_SLOTS    (0x1)
 #define CNXK_TIM_MAX_CHUNK_SLOTS    (0x1FFE)
diff --git a/drivers/event/cnxk/cnxk_tim_worker.h 
b/drivers/event/cnxk/cnxk_tim_worker.h
index 6be31f6f9d..87ac91f387 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.h
+++ b/drivers/event/cnxk/cnxk_tim_worker.h
@@ -106,11 +106,17 @@ cnxk_tim_bkt_inc_nent(struct cnxk_tim_bkt *bktp)
 }
 
 static inline void
-cnxk_tim_bkt_add_nent(struct cnxk_tim_bkt *bktp, uint32_t v)
+cnxk_tim_bkt_add_nent_relaxed(struct cnxk_tim_bkt *bktp, uint32_t v)
 {
        __atomic_add_fetch(&bktp->nb_entry, v, __ATOMIC_RELAXED);
 }
 
+static inline void
+cnxk_tim_bkt_add_nent(struct cnxk_tim_bkt *bktp, uint32_t v)
+{
+       __atomic_add_fetch(&bktp->nb_entry, v, __ATOMIC_RELEASE);
+}
+
 static inline uint64_t
 cnxk_tim_bkt_clr_nent(struct cnxk_tim_bkt *bktp)
 {
@@ -530,7 +536,7 @@ cnxk_tim_add_entry_brst(struct cnxk_tim_ring *const 
tim_ring,
                        index = cnxk_tim_cpy_wrk(index, chunk_remainder, chunk,
                                                 tim, ents, bkt);
                        cnxk_tim_bkt_sub_rem(bkt, chunk_remainder);
-                       cnxk_tim_bkt_add_nent(bkt, chunk_remainder);
+                       cnxk_tim_bkt_add_nent_relaxed(bkt, chunk_remainder);
                }
 
                if (flags & CNXK_TIM_ENA_FB)
@@ -561,7 +567,7 @@ cnxk_tim_add_entry_brst(struct cnxk_tim_ring *const 
tim_ring,
                cnxk_tim_bkt_add_nent(bkt, nb_timers);
        }
 
-       cnxk_tim_bkt_dec_lock(bkt);
+       cnxk_tim_bkt_dec_lock_relaxed(bkt);
 
        return nb_timers;
 }
-- 
2.25.1

Reply via email to