>Add worker for handling vector events to perf tests, vector events could >be generated by crypto adapter producer. > >Example: > ./dpdk-test-eventdev -l 0-2 -a <EVENTDEV> -a <CRYPTODEV> -- \ > --prod_type_cryptodev --crypto_adptr_mode 1 --test=perf_queue \ > --stlist=a --wlcores 1 --plcores 2 --prod_enq_burst_sz 32 \ > --enable_vector --vector_tmo_ns 0 --nb_flows 2 > >Signed-off-by: Volodymyr Fialko <vfia...@marvell.com> Acked-by: Shijith Thotton <sthot...@marvell.com>
>--- > app/test-eventdev/test_perf_atq.c | 62 ++++++++++++++-- > app/test-eventdev/test_perf_common.c | 68 +++++++++++++++--- > app/test-eventdev/test_perf_common.h | 102 >++++++++++++++++++++++++++- > app/test-eventdev/test_perf_queue.c | 63 +++++++++++++++-- > doc/guides/tools/testeventdev.rst | 12 ++-- > 5 files changed, 279 insertions(+), 28 deletions(-) > >diff --git a/app/test-eventdev/test_perf_atq.c b/app/test- >eventdev/test_perf_atq.c >index 9d30081117..4ac60cc38b 100644 >--- a/app/test-eventdev/test_perf_atq.c >+++ b/app/test-eventdev/test_perf_atq.c >@@ -24,14 +24,22 @@ atq_fwd_event(struct rte_event *const ev, uint8_t *const >sched_type_list, > ev->event_type = RTE_EVENT_TYPE_CPU; > } > >+static __rte_always_inline void >+atq_fwd_event_vector(struct rte_event *const ev, uint8_t *const >sched_type_list, >+ const uint8_t nb_stages) >+{ >+ ev->sub_event_type++; >+ ev->sched_type = sched_type_list[ev->sub_event_type % nb_stages]; >+ ev->op = RTE_EVENT_OP_FORWARD; >+ ev->event_type = RTE_EVENT_TYPE_CPU_VECTOR; >+} >+ > static int > perf_atq_worker(void *arg, const int enable_fwd_latency) > { >- struct perf_elt *pe = NULL; > uint16_t enq = 0, deq = 0; > struct rte_event ev; > PERF_WORKER_INIT; >- uint8_t stage; > > while (t->done == false) { > deq = rte_event_dequeue_burst(dev, port, &ev, 1, 0); >@@ -79,9 +87,7 @@ perf_atq_worker_burst(void *arg, const int >enable_fwd_latency) > /* +1 to avoid prefetch out of array check */ > struct rte_event ev[BURST_SIZE + 1]; > uint16_t enq = 0, nb_rx = 0; >- struct perf_elt *pe = NULL; > PERF_WORKER_INIT; >- uint8_t stage; > uint16_t i; > > while (t->done == false) { >@@ -134,6 +140,50 @@ perf_atq_worker_burst(void *arg, const int >enable_fwd_latency) > return 0; > } > >+static int >+perf_atq_worker_vector(void *arg, const int enable_fwd_latency) >+{ >+ uint16_t enq = 0, deq = 0; >+ struct rte_event ev; >+ PERF_WORKER_INIT; >+ >+ RTE_SET_USED(sz); >+ RTE_SET_USED(cnt); >+ RTE_SET_USED(prod_crypto_type); >+ >+ while (t->done == false) { >+ deq = rte_event_dequeue_burst(dev, port, &ev, 1, 0); >+ >+ if (!deq) >+ continue; >+ >+ if (ev.event_type == RTE_EVENT_TYPE_CRYPTODEV_VECTOR) { >+ if (perf_handle_crypto_vector_ev(&ev, &pe, >enable_fwd_latency)) >+ continue; >+ } >+ >+ stage = ev.sub_event_type % nb_stages; >+ /* First q in pipeline, mark timestamp to compute fwd latency */ >+ if (enable_fwd_latency && !prod_timer_type && stage == 0) >+ perf_mark_fwd_latency(pe); >+ >+ /* Last stage in pipeline */ >+ if (unlikely(stage == laststage)) { >+ perf_process_vector_last_stage(pool, t->ca_op_pool, >&ev, w, >+ enable_fwd_latency); >+ } else { >+ atq_fwd_event_vector(&ev, sched_type_list, nb_stages); >+ do { >+ enq = rte_event_enqueue_burst(dev, port, &ev, >1); >+ } while (!enq && !t->done); >+ } >+ } >+ >+ perf_worker_cleanup(pool, dev, port, &ev, enq, deq); >+ >+ return 0; >+} >+ > static int > worker_wrapper(void *arg) > { >@@ -144,7 +194,9 @@ worker_wrapper(void *arg) > const int fwd_latency = opt->fwd_latency; > > /* allow compiler to optimize */ >- if (!burst && !fwd_latency) >+ if (opt->ena_vector && opt->prod_type == >EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) >+ return perf_atq_worker_vector(arg, fwd_latency); >+ else if (!burst && !fwd_latency) > return perf_atq_worker(arg, 0); > else if (!burst && fwd_latency) > return perf_atq_worker(arg, 1); >diff --git a/app/test-eventdev/test_perf_common.c b/app/test- >eventdev/test_perf_common.c >index 140c0c2dc3..8d7e483c55 100644 >--- a/app/test-eventdev/test_perf_common.c >+++ b/app/test-eventdev/test_perf_common.c >@@ -827,10 +827,13 @@ perf_event_timer_adapter_setup(struct test_perf *t) > static int > perf_event_crypto_adapter_setup(struct test_perf *t, struct prod_data *p) > { >+ struct rte_event_crypto_adapter_queue_conf conf; > struct evt_options *opt = t->opt; > uint32_t cap; > int ret; > >+ memset(&conf, 0, sizeof(conf)); >+ > ret = rte_event_crypto_adapter_caps_get(p->dev_id, p->ca.cdev_id, >&cap); > if (ret) { > evt_err("Failed to get crypto adapter capabilities"); >@@ -849,19 +852,53 @@ perf_event_crypto_adapter_setup(struct test_perf *t, >struct prod_data *p) > return -ENOTSUP; > } > >- if (cap & >RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_QP_EV_BIND) { >- struct rte_event_crypto_adapter_queue_conf conf; >+ if (opt->ena_vector) { >+ struct rte_event_crypto_adapter_vector_limits limits; >+ >+ if (!(cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_EVENT_VECTOR)) >{ >+ evt_err("Crypto adapter doesn't support event vector"); >+ return -EINVAL; >+ } >+ >+ ret = rte_event_crypto_adapter_vector_limits_get(p->dev_id, p- >>ca.cdev_id, &limits); >+ if (ret) { >+ evt_err("Failed to get crypto adapter's vector limits"); >+ return ret; >+ } > >- memset(&conf, 0, sizeof(conf)); >+ if (opt->vector_size < limits.min_sz || opt->vector_size > >limits.max_sz) { >+ evt_err("Vector size [%d] not within limits max[%d] >min[%d]", >+ opt->vector_size, limits.max_sz, limits.min_sz); >+ return -EINVAL; >+ } >+ >+ if (limits.log2_sz && !rte_is_power_of_2(opt->vector_size)) { >+ evt_err("Vector size [%d] not power of 2", opt- >>vector_size); >+ return -EINVAL; >+ } >+ >+ if (opt->vector_tmo_nsec > limits.max_timeout_ns || >+ opt->vector_tmo_nsec < limits.min_timeout_ns) { >+ evt_err("Vector timeout [%" PRIu64 "] not within limits >" >+ "max[%" PRIu64 "] min[%" PRIu64 "]", >+ opt->vector_tmo_nsec, limits.max_timeout_ns, >limits.min_timeout_ns); >+ return -EINVAL; >+ } >+ >+ conf.vector_mp = t->ca_vector_pool; >+ conf.vector_sz = opt->vector_size; >+ conf.vector_timeout_ns = opt->vector_tmo_nsec; >+ conf.flags |= RTE_EVENT_CRYPTO_ADAPTER_EVENT_VECTOR; >+ } >+ >+ if (cap & >RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_QP_EV_BIND) { > conf.ev.sched_type = RTE_SCHED_TYPE_ATOMIC; > conf.ev.queue_id = p->queue_id; >- ret = rte_event_crypto_adapter_queue_pair_add( >- TEST_PERF_CA_ID, p->ca.cdev_id, p->ca.cdev_qp_id, >&conf); >- } else { >- ret = rte_event_crypto_adapter_queue_pair_add( >- TEST_PERF_CA_ID, p->ca.cdev_id, p->ca.cdev_qp_id, >NULL); > } > >+ ret = rte_event_crypto_adapter_queue_pair_add( >+ TEST_PERF_CA_ID, p->ca.cdev_id, p->ca.cdev_qp_id, &conf); >+ > return ret; > } > >@@ -1411,6 +1448,19 @@ perf_cryptodev_setup(struct evt_test *test, struct >evt_options *opt) > goto err; > } > >+ if (opt->ena_vector) { >+ unsigned int nb_elem = (opt->pool_sz / opt->vector_size) * 2; >+ nb_elem = RTE_MAX(512U, nb_elem); >+ nb_elem += evt_nr_active_lcores(opt->wlcores) * 32; >+ t->ca_vector_pool = >rte_event_vector_pool_create("vector_pool", nb_elem, 32, >+ opt->vector_size, opt->socket_id); >+ if (t->ca_vector_pool == NULL) { >+ evt_err("Failed to create event vector pool"); >+ ret = -ENOMEM; >+ goto err; >+ } >+ } >+ > /* > * Calculate number of needed queue pairs, based on the amount of > * available number of logical cores and crypto devices. For instance, >@@ -1467,6 +1517,7 @@ perf_cryptodev_setup(struct evt_test *test, struct >evt_options *opt) > rte_mempool_free(t->ca_op_pool); > rte_mempool_free(t->ca_sess_pool); > rte_mempool_free(t->ca_asym_sess_pool); >+ rte_mempool_free(t->ca_vector_pool); > > return ret; > } >@@ -1507,6 +1558,7 @@ perf_cryptodev_destroy(struct evt_test *test, struct >evt_options *opt) > rte_mempool_free(t->ca_op_pool); > rte_mempool_free(t->ca_sess_pool); > rte_mempool_free(t->ca_asym_sess_pool); >+ rte_mempool_free(t->ca_vector_pool); > } > > int >diff --git a/app/test-eventdev/test_perf_common.h b/app/test- >eventdev/test_perf_common.h >index 503b6aa1db..faedd471c6 100644 >--- a/app/test-eventdev/test_perf_common.h >+++ b/app/test-eventdev/test_perf_common.h >@@ -71,6 +71,7 @@ struct test_perf { > struct rte_mempool *ca_op_pool; > struct rte_mempool *ca_sess_pool; > struct rte_mempool *ca_asym_sess_pool; >+ struct rte_mempool *ca_vector_pool; > } __rte_cache_aligned; > > struct perf_elt { >@@ -103,6 +104,8 @@ struct perf_elt { > uint8_t cnt = 0;\ > void *bufs[16] __rte_cache_aligned;\ > int const sz = RTE_DIM(bufs);\ >+ uint8_t stage;\ >+ struct perf_elt *pe = NULL;\ > if (opt->verbose_level > 1)\ > printf("%s(): lcore %d dev_id %d port=%d\n", __func__,\ > rte_lcore_id(), dev, port) >@@ -143,6 +146,64 @@ perf_handle_crypto_ev(struct rte_event *ev, struct >perf_elt **pe, int enable_fwd > return 0; > } > >+static __rte_always_inline struct perf_elt * >+perf_elt_from_vec_get(struct rte_event_vector *vec) >+{ >+ /* Timestamp for vector event stored in first element */ >+ struct rte_crypto_op *cop = vec->ptrs[0]; >+ struct rte_mbuf *m; >+ >+ if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) { >+ m = cop->sym->m_dst == NULL ? cop->sym->m_src : cop->sym- >>m_dst; >+ return rte_pktmbuf_mtod(m, struct perf_elt *); >+ } else { >+ return RTE_PTR_ADD(cop->asym->modex.result.data, cop- >>asym->modex.result.length); >+ } >+} >+ >+static __rte_always_inline int >+perf_handle_crypto_vector_ev(struct rte_event *ev, struct perf_elt **pe, >+ const int enable_fwd_latency) >+{ >+ struct rte_event_vector *vec = ev->vec; >+ struct rte_crypto_op *cop; >+ struct rte_mbuf *m; >+ int i, n = 0; >+ void *data; >+ >+ for (i = 0; i < vec->nb_elem; i++) { >+ cop = vec->ptrs[i]; >+ if (unlikely(cop->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) { >+ if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) { >+ m = cop->sym->m_dst == NULL ? cop->sym- >>m_src : cop->sym->m_dst; >+ rte_pktmbuf_free(m); >+ } else { >+ data = cop->asym->modex.result.data; >+ > rte_mempool_put(rte_mempool_from_obj(data), data); >+ } >+ rte_crypto_op_free(cop); >+ continue; >+ } >+ vec->ptrs[n++] = cop; >+ } >+ >+ /* All cops failed, free the vector */ >+ if (n == 0) { >+ rte_mempool_put(rte_mempool_from_obj(vec), vec); >+ return -ENOENT; >+ } >+ >+ vec->nb_elem = n; >+ >+ /* Forward latency not enabled - perf data will be not accessed */ >+ if (!enable_fwd_latency) >+ return 0; >+ >+ /* Get pointer to perf data */ >+ *pe = perf_elt_from_vec_get(vec); >+ >+ return 0; >+} > > static __rte_always_inline int > perf_process_last_stage(struct rte_mempool *const pool, uint8_t >prod_crypto_type, >@@ -195,9 +256,8 @@ perf_process_last_stage_latency(struct rte_mempool >*const pool, uint8_t prod_cry > struct perf_elt *pe; > void *to_free_in_bulk; > >- /* release fence here ensures event_prt is >- * stored before updating the number of >- * processed packets for worker lcores >+ /* Release fence here ensures event_prt is stored before updating the >number of processed >+ * packets for worker lcores. > */ > rte_atomic_thread_fence(__ATOMIC_RELEASE); > w->processed_pkts++; >@@ -237,6 +297,42 @@ perf_process_last_stage_latency(struct rte_mempool >*const pool, uint8_t prod_cry > return count; > } > >+static __rte_always_inline void >+perf_process_vector_last_stage(struct rte_mempool *const pool, >+ struct rte_mempool *const ca_pool, struct rte_event *const ev, >+ struct worker_data *const w, const bool enable_fwd_latency) >+{ >+ struct rte_event_vector *vec = ev->vec; >+ struct rte_crypto_op *cop; >+ void *bufs[vec->nb_elem]; >+ struct perf_elt *pe; >+ uint64_t latency; >+ int i; >+ >+ /* Release fence here ensures event_prt is stored before updating the >number of processed >+ * packets for worker lcores. >+ */ >+ rte_atomic_thread_fence(__ATOMIC_RELEASE); >+ w->processed_pkts += vec->nb_elem; >+ >+ if (enable_fwd_latency) { >+ pe = perf_elt_from_vec_get(vec); >+ latency = rte_get_timer_cycles() - pe->timestamp; >+ w->latency += latency; >+ } >+ >+ for (i = 0; i < vec->nb_elem; i++) { >+ cop = vec->ptrs[i]; >+ if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) >+ bufs[i] = cop->sym->m_dst == NULL ? cop->sym->m_src : >cop->sym->m_dst; >+ else >+ bufs[i] = cop->asym->modex.result.data; >+ } >+ >+ rte_mempool_put_bulk(pool, bufs, vec->nb_elem); >+ rte_mempool_put_bulk(ca_pool, (void * const *)vec->ptrs, vec- >>nb_elem); >+ rte_mempool_put(rte_mempool_from_obj(vec), vec); >+} > > static inline int > perf_nb_event_ports(struct evt_options *opt) >diff --git a/app/test-eventdev/test_perf_queue.c b/app/test- >eventdev/test_perf_queue.c >index 69ef0ebbac..2399cfb69b 100644 >--- a/app/test-eventdev/test_perf_queue.c >+++ b/app/test-eventdev/test_perf_queue.c >@@ -25,15 +25,22 @@ fwd_event(struct rte_event *const ev, uint8_t *const >sched_type_list, > ev->event_type = RTE_EVENT_TYPE_CPU; > } > >+static __rte_always_inline void >+fwd_event_vector(struct rte_event *const ev, uint8_t *const sched_type_list, >+ const uint8_t nb_stages) >+{ >+ ev->queue_id++; >+ ev->sched_type = sched_type_list[ev->queue_id % nb_stages]; >+ ev->op = RTE_EVENT_OP_FORWARD; >+ ev->event_type = RTE_EVENT_TYPE_CPU_VECTOR; >+} >+ > static int > perf_queue_worker(void *arg, const int enable_fwd_latency) > { >- struct perf_elt *pe = NULL; > uint16_t enq = 0, deq = 0; > struct rte_event ev; > PERF_WORKER_INIT; >- uint8_t stage; >- > > while (t->done == false) { > deq = rte_event_dequeue_burst(dev, port, &ev, 1, 0); >@@ -82,9 +89,7 @@ perf_queue_worker_burst(void *arg, const int >enable_fwd_latency) > /* +1 to avoid prefetch out of array check */ > struct rte_event ev[BURST_SIZE + 1]; > uint16_t enq = 0, nb_rx = 0; >- struct perf_elt *pe = NULL; > PERF_WORKER_INIT; >- uint8_t stage; > uint16_t i; > > while (t->done == false) { >@@ -137,6 +142,50 @@ perf_queue_worker_burst(void *arg, const int >enable_fwd_latency) > return 0; > } > >+static int >+perf_queue_worker_vector(void *arg, const int enable_fwd_latency) >+{ >+ uint16_t enq = 0, deq = 0; >+ struct rte_event ev; >+ PERF_WORKER_INIT; >+ >+ RTE_SET_USED(sz); >+ RTE_SET_USED(cnt); >+ RTE_SET_USED(prod_crypto_type); >+ >+ while (t->done == false) { >+ deq = rte_event_dequeue_burst(dev, port, &ev, 1, 0); >+ >+ if (!deq) >+ continue; >+ >+ if (ev.event_type == RTE_EVENT_TYPE_CRYPTODEV_VECTOR) { >+ if (perf_handle_crypto_vector_ev(&ev, &pe, >enable_fwd_latency)) >+ continue; >+ } >+ >+ stage = ev.queue_id % nb_stages; >+ /* First q in pipeline, mark timestamp to compute fwd latency */ >+ if (enable_fwd_latency && !prod_timer_type && stage == 0) >+ perf_mark_fwd_latency(pe); >+ >+ /* Last stage in pipeline */ >+ if (unlikely(stage == laststage)) { >+ perf_process_vector_last_stage(pool, t->ca_op_pool, >&ev, w, >+ enable_fwd_latency); >+ } else { >+ fwd_event_vector(&ev, sched_type_list, nb_stages); >+ do { >+ enq = rte_event_enqueue_burst(dev, port, &ev, >1); >+ } while (!enq && !t->done); >+ } >+ } >+ >+ perf_worker_cleanup(pool, dev, port, &ev, enq, deq); >+ >+ return 0; >+} >+ > static int > worker_wrapper(void *arg) > { >@@ -147,7 +196,9 @@ worker_wrapper(void *arg) > const int fwd_latency = opt->fwd_latency; > > /* allow compiler to optimize */ >- if (!burst && !fwd_latency) >+ if (opt->ena_vector && opt->prod_type == >EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) >+ return perf_queue_worker_vector(arg, fwd_latency); >+ else if (!burst && !fwd_latency) > return perf_queue_worker(arg, 0); > else if (!burst && fwd_latency) > return perf_queue_worker(arg, 1); >diff --git a/doc/guides/tools/testeventdev.rst >b/doc/guides/tools/testeventdev.rst >index cd278e8998..6f065b9752 100644 >--- a/doc/guides/tools/testeventdev.rst >+++ b/doc/guides/tools/testeventdev.rst >@@ -185,18 +185,18 @@ The following are the application command-line options: > > * ``--enable_vector`` > >- Enable event vector for Rx/Tx adapters. >- Only applicable for `pipeline_atq` and `pipeline_queue` tests. >+ Enable event vector for Rx/Tx/crypto adapters. >+ Only applicable for `pipeline_*` and `perf_*` tests. > > * ``--vector_size`` > >- Vector size to configure for the Rx adapter. >- Only applicable for `pipeline_atq` and `pipeline_queue` tests. >+ Vector size to configure for the Rx/crypto adapter. >+ Only applicable for `pipeline_*` and `perf_*` tests. > > * ``--vector_tmo_ns`` > >- Vector timeout nanoseconds to be configured for the Rx adapter. >- Only applicable for `pipeline_atq` and `pipeline_queue` tests. >+ Vector timeout nanoseconds to be configured for the Rx/crypto adapter. >+ Only applicable for `pipeline_*` and `perf_*` tests. > > * ``--per_port_pool`` > >-- >2.25.1