Added support for burst enqueue for cn10k event crypto adapter. Instruction will be grouped based on the queue pair and sent in a burst.
Signed-off-by: Volodymyr Fialko <vfia...@marvell.com> --- drivers/crypto/cnxk/cn10k_cryptodev_ops.c | 147 ++++++++++++++++------ drivers/crypto/cnxk/cn10k_cryptodev_ops.h | 7 +- drivers/crypto/cnxk/meson.build | 2 +- drivers/event/cnxk/cn10k_eventdev.c | 2 +- drivers/event/cnxk/cn10k_worker.c | 10 -- drivers/event/cnxk/cn10k_worker.h | 2 - 6 files changed, 113 insertions(+), 57 deletions(-) diff --git a/drivers/crypto/cnxk/cn10k_cryptodev_ops.c b/drivers/crypto/cnxk/cn10k_cryptodev_ops.c index f761ba36e2..2fbf374da3 100644 --- a/drivers/crypto/cnxk/cn10k_cryptodev_ops.c +++ b/drivers/crypto/cnxk/cn10k_cryptodev_ops.c @@ -9,11 +9,12 @@ #include "cn10k_cryptodev.h" #include "cn10k_cryptodev_ops.h" -#include "cn10k_ipsec_la_ops.h" #include "cn10k_ipsec.h" +#include "cn10k_ipsec_la_ops.h" #include "cnxk_ae.h" #include "cnxk_cryptodev.h" #include "cnxk_cryptodev_ops.h" +#include "cnxk_eventdev.h" #include "cnxk_se.h" #include "roc_api.h" @@ -391,74 +392,140 @@ cn10k_ca_meta_info_extract(struct rte_crypto_op *op, return 0; } -uint16_t -cn10k_cpt_crypto_adapter_enqueue(uintptr_t base, struct rte_crypto_op *op) +static inline uint16_t +ca_lmtst_burst_submit(struct cn10k_sso_hws *ws, uint64_t w2[], + struct cnxk_cpt_qp *qp, struct rte_crypto_op *op[], + uint16_t nb_ops) { + struct cpt_inflight_req *infl_reqs[PKTS_PER_LOOP]; + uint64_t lmt_base, lmt_arg, io_addr; + struct cpt_inst_s *inst, *inst_base; struct cpt_inflight_req *infl_req; - uint64_t lmt_base, lmt_arg, w2; - struct cpt_inst_s *inst; union cpt_fc_write_s fc; - struct cnxk_cpt_qp *qp; uint64_t *fc_addr; uint16_t lmt_id; - int ret; + int ret, i; - ret = cn10k_ca_meta_info_extract(op, &qp, &w2); - if (unlikely(ret)) { - rte_errno = EINVAL; - return 0; - } + lmt_base = qp->lmtline.lmt_base; + io_addr = qp->lmtline.io_addr; + fc_addr = qp->lmtline.fc_addr; + + const uint32_t fc_thresh = qp->lmtline.fc_thresh; + + ROC_LMT_BASE_ID_GET(lmt_base, lmt_id); + inst_base = (struct cpt_inst_s *)lmt_base; if (unlikely(!qp->ca.enabled)) { rte_errno = EINVAL; return 0; } - if (unlikely(rte_mempool_get(qp->ca.req_mp, (void **)&infl_req))) { + if (unlikely(rte_mempool_get_bulk(qp->ca.req_mp, (void **)infl_reqs, + nb_ops))) { rte_errno = ENOMEM; return 0; } - infl_req->op_flags = 0; - lmt_base = qp->lmtline.lmt_base; - fc_addr = qp->lmtline.fc_addr; - - const uint32_t fc_thresh = qp->lmtline.fc_thresh; + for (i = 0; i < nb_ops; i++) { + inst = &inst_base[2 * i]; + infl_req = infl_reqs[i]; + infl_req->op_flags = 0; - ROC_LMT_BASE_ID_GET(lmt_base, lmt_id); - inst = (struct cpt_inst_s *)lmt_base; + ret = cn10k_cpt_fill_inst(qp, &op[i], inst, infl_req); + if (unlikely(ret != 1)) { + plt_dp_err("Could not process op: %p", op[i]); + if (i != 0) + goto submit; + else + goto put; + } - ret = cn10k_cpt_fill_inst(qp, &op, inst, infl_req); - if (unlikely(ret != 1)) { - plt_dp_err("Could not process op: %p", op); - rte_mempool_put(qp->ca.req_mp, infl_req); - return 0; + infl_req->res.cn10k.compcode = CPT_COMP_NOT_DONE; + infl_req->qp = qp; + inst->w0.u64 = 0; + inst->res_addr = (uint64_t)&infl_req->res; + inst->w2.u64 = w2[i]; + inst->w3.u64 = CNXK_CPT_INST_W3(1, infl_req); } - infl_req->cop = op; - infl_req->res.cn10k.compcode = CPT_COMP_NOT_DONE; - infl_req->qp = qp; - inst->w0.u64 = 0; - inst->res_addr = (uint64_t)&infl_req->res; - inst->w2.u64 = w2; - inst->w3.u64 = CNXK_CPT_INST_W3(1, infl_req); - fc.u64[0] = __atomic_load_n(fc_addr, __ATOMIC_RELAXED); if (unlikely(fc.s.qsize > fc_thresh)) { - rte_mempool_put(qp->ca.req_mp, infl_req); rte_errno = EAGAIN; - return 0; + i = 0; + goto put; } - if (inst->w2.s.tt == RTE_SCHED_TYPE_ORDERED) - roc_sso_hws_head_wait(base); +submit: + if (CNXK_TT_FROM_TAG(ws->gw_rdata) == SSO_TT_ORDERED) + roc_sso_hws_head_wait(ws->base); - lmt_arg = ROC_CN10K_CPT_LMT_ARG | (uint64_t)lmt_id; - roc_lmt_submit_steorl(lmt_arg, qp->lmtline.io_addr); + if (i > PKTS_PER_STEORL) { + lmt_arg = ROC_CN10K_CPT_LMT_ARG | (PKTS_PER_STEORL - 1) << 12 | + (uint64_t)lmt_id; + roc_lmt_submit_steorl(lmt_arg, io_addr); + lmt_arg = ROC_CN10K_CPT_LMT_ARG | + (i - PKTS_PER_STEORL - 1) << 12 | + (uint64_t)(lmt_id + PKTS_PER_STEORL); + roc_lmt_submit_steorl(lmt_arg, io_addr); + } else { + lmt_arg = ROC_CN10K_CPT_LMT_ARG | (i - 1) << 12 | + (uint64_t)lmt_id; + roc_lmt_submit_steorl(lmt_arg, io_addr); + } rte_io_wmb(); - return 1; +put: + if (unlikely(i != nb_ops)) + rte_mempool_put_bulk(qp->ca.req_mp, (void *)&infl_reqs[i], + nb_ops - i); + + return i; +} + +uint16_t __rte_hot +cn10k_cpt_crypto_adapter_enqueue(void *ws, struct rte_event ev[], + uint16_t nb_events) +{ + struct rte_crypto_op *ops[PKTS_PER_LOOP], *op; + struct cnxk_cpt_qp *qp, *curr_qp = NULL; + uint64_t w2s[PKTS_PER_LOOP], w2; + uint16_t submitted, count = 0; + int ret, i, ops_len = 0; + + for (i = 0; i < nb_events; i++) { + op = ev[i].event_ptr; + ret = cn10k_ca_meta_info_extract(op, &qp, &w2); + if (unlikely(ret)) { + rte_errno = EINVAL; + return count; + } + + if (qp != curr_qp) { + if (ops_len) { + submitted = ca_lmtst_burst_submit( + ws, w2s, curr_qp, ops, ops_len); + count += submitted; + if (unlikely(submitted != ops_len)) + return count; + ops_len = 0; + } + curr_qp = qp; + } + w2s[ops_len] = w2; + ops[ops_len] = op; + if (++ops_len == PKTS_PER_LOOP) { + submitted = ca_lmtst_burst_submit(ws, w2s, curr_qp, ops, + ops_len); + count += submitted; + if (unlikely(submitted != ops_len)) + return count; + ops_len = 0; + } + } + if (ops_len) + count += ca_lmtst_burst_submit(ws, w2s, curr_qp, ops, ops_len); + return count; } static inline void diff --git a/drivers/crypto/cnxk/cn10k_cryptodev_ops.h b/drivers/crypto/cnxk/cn10k_cryptodev_ops.h index 1ad4c16873..628d6a567c 100644 --- a/drivers/crypto/cnxk/cn10k_cryptodev_ops.h +++ b/drivers/crypto/cnxk/cn10k_cryptodev_ops.h @@ -5,16 +5,17 @@ #ifndef _CN10K_CRYPTODEV_OPS_H_ #define _CN10K_CRYPTODEV_OPS_H_ -#include <rte_cryptodev.h> #include <cryptodev_pmd.h> +#include <rte_cryptodev.h> +#include <rte_eventdev.h> extern struct rte_cryptodev_ops cn10k_cpt_ops; void cn10k_cpt_set_enqdeq_fns(struct rte_cryptodev *dev); __rte_internal -uint16_t cn10k_cpt_crypto_adapter_enqueue(uintptr_t base, - struct rte_crypto_op *op); +uint16_t __rte_hot cn10k_cpt_crypto_adapter_enqueue(void *ws, struct rte_event ev[], + uint16_t nb_events); __rte_internal uintptr_t cn10k_cpt_crypto_adapter_dequeue(uintptr_t get_work1); diff --git a/drivers/crypto/cnxk/meson.build b/drivers/crypto/cnxk/meson.build index 23a1cc3aac..952554ac12 100644 --- a/drivers/crypto/cnxk/meson.build +++ b/drivers/crypto/cnxk/meson.build @@ -24,7 +24,7 @@ sources = files( deps += ['bus_pci', 'common_cnxk', 'security', 'eventdev'] -includes += include_directories('../../../lib/net') +includes += include_directories('../../../lib/net', '../../event/cnxk') if get_option('buildtype').contains('debug') cflags += [ '-DLA_IPSEC_DEBUG' ] diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index 7adf80236f..b0982d6c3b 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -594,7 +594,7 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) } } } - event_dev->ca_enqueue = cn10k_sso_hws_ca_enq; + event_dev->ca_enqueue = cn10k_cpt_crypto_adapter_enqueue; if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) CN10K_SET_EVDEV_ENQ_OP(dev, event_dev->txa_enqueue, diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c index f953e19dd0..4581c41233 100644 --- a/drivers/event/cnxk/cn10k_worker.c +++ b/drivers/event/cnxk/cn10k_worker.c @@ -64,13 +64,3 @@ cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], return 1; } - -uint16_t __rte_hot -cn10k_sso_hws_ca_enq(void *port, struct rte_event ev[], uint16_t nb_events) -{ - struct cn10k_sso_hws *ws = port; - - RTE_SET_USED(nb_events); - - return cn10k_cpt_crypto_adapter_enqueue(ws->base, ev->event_ptr); -} diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index a71e076ff8..56bf4cec50 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -353,8 +353,6 @@ uint16_t __rte_hot cn10k_sso_hws_enq_new_burst(void *port, uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb_events); -uint16_t __rte_hot cn10k_sso_hws_ca_enq(void *port, struct rte_event ev[], - uint16_t nb_events); #define R(name, flags) \ uint16_t __rte_hot cn10k_sso_hws_deq_##name( \ -- 2.25.1