Added cnxk driver support for dma event enqueue and dequeue. Also added changes for work queue entry completion status and dual workslot DMA event enqueue.
Signed-off-by: Pavan Nikhilesh <pbhagavat...@marvell.com> Signed-off-by: Amit Prakash Shukla <amitpraka...@marvell.com> --- v3: - Rebased and fixed compilation error. v2: - Added dual workslot enqueue support. - Fixed compilation error. doc/guides/eventdevs/cnxk.rst | 5 + drivers/dma/cnxk/cnxk_dma_event_dp.h | 24 +++ drivers/dma/cnxk/cnxk_dmadev.c | 3 +- drivers/dma/cnxk/cnxk_dmadev.h | 20 +- drivers/dma/cnxk/cnxk_dmadev_fp.c | 290 +++++++++++++++++++++++++++ drivers/dma/cnxk/meson.build | 9 +- drivers/dma/cnxk/version.map | 10 + drivers/event/cnxk/cn9k_eventdev.c | 2 + 8 files changed, 360 insertions(+), 3 deletions(-) create mode 100644 drivers/dma/cnxk/cnxk_dma_event_dp.h create mode 100644 drivers/dma/cnxk/version.map diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst index cccb8a0304..9ff1052c53 100644 --- a/doc/guides/eventdevs/cnxk.rst +++ b/doc/guides/eventdevs/cnxk.rst @@ -227,3 +227,8 @@ ethernet devices connected to event device to override this applications can use `force_rx_bp=1` device arguments. Using unique mempool per each ethernet device is recommended when they are connected to event device. + +DMA adapter new mode support +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +DMA driver does not support DMA adapter configured in new mode. diff --git a/drivers/dma/cnxk/cnxk_dma_event_dp.h b/drivers/dma/cnxk/cnxk_dma_event_dp.h new file mode 100644 index 0000000000..5f890ab18b --- /dev/null +++ b/drivers/dma/cnxk/cnxk_dma_event_dp.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2023 Marvell. + */ + +#ifndef _CNXK_DMA_EVENT_DP_H_ +#define _CNXK_DMA_EVENT_DP_H_ + +#include <stdint.h> + +#include <rte_common.h> +#include <rte_eventdev.h> + +__rte_internal +uint16_t cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events); + +__rte_internal +uint16_t cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events); + +__rte_internal +uint16_t cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events); + +__rte_internal +uintptr_t cnxk_dma_adapter_dequeue(uintptr_t get_work1); +#endif /* _CNXK_DMA_EVENT_DP_H_ */ diff --git a/drivers/dma/cnxk/cnxk_dmadev.c b/drivers/dma/cnxk/cnxk_dmadev.c index 48ab09cc38..4ab3cfbdf2 100644 --- a/drivers/dma/cnxk/cnxk_dmadev.c +++ b/drivers/dma/cnxk/cnxk_dmadev.c @@ -589,10 +589,11 @@ cnxk_dmadev_probe(struct rte_pci_driver *pci_drv __rte_unused, struct rte_pci_de dmadev->fp_obj->copy_sg = cn10k_dmadev_copy_sg; } + dpivf->mcs_lock = NULL; rdpi = &dpivf->rdpi; rdpi->pci_dev = pci_dev; - rc = roc_dpi_dev_init(rdpi, 0); + rc = roc_dpi_dev_init(rdpi, offsetof(struct cnxk_dpi_compl_s, wqecs)); if (rc < 0) goto err_out_free; diff --git a/drivers/dma/cnxk/cnxk_dmadev.h b/drivers/dma/cnxk/cnxk_dmadev.h index 350ae73b5c..610a360ba2 100644 --- a/drivers/dma/cnxk/cnxk_dmadev.h +++ b/drivers/dma/cnxk/cnxk_dmadev.h @@ -14,11 +14,14 @@ #include <rte_eal.h> #include <rte_lcore.h> #include <rte_mbuf_pool_ops.h> +#include <rte_mcslock.h> #include <rte_mempool.h> #include <rte_pci.h> #include <roc_api.h> +#include "cnxk_dma_event_dp.h" + #define CNXK_DPI_MAX_POINTER 15 #define CNXK_DPI_STRM_INC(s, var) ((s).var = ((s).var + 1) & (s).max_cnt) #define CNXK_DPI_STRM_DEC(s, var) ((s).var = ((s).var - 1) == -1 ? (s).max_cnt : \ @@ -40,6 +43,11 @@ */ #define CNXK_DPI_REQ_CDATA 0xFF +/* Set Completion data to 0xDEADBEEF when request submitted for SSO. + * This helps differentiate if the dequeue is called after cnxk enueue. + */ +#define CNXK_DPI_REQ_SSO_CDATA 0xDEADBEEF + union cnxk_dpi_instr_cmd { uint64_t u; struct cn9k_dpi_instr_cmd { @@ -85,7 +93,10 @@ union cnxk_dpi_instr_cmd { struct cnxk_dpi_compl_s { uint64_t cdata; - void *cb_data; + void *op; + uint16_t dev_id; + uint16_t vchan; + uint32_t wqecs; }; struct cnxk_dpi_cdesc_data_s { @@ -95,6 +106,11 @@ struct cnxk_dpi_cdesc_data_s { uint16_t tail; }; +struct cnxk_dma_adapter_info { + bool enabled; /* Set if vchan queue is added to dma adapter. */ + struct rte_mempool *req_mp; /* DMA inflight request mempool. */ +}; + struct cnxk_dpi_conf { union cnxk_dpi_instr_cmd cmd; struct cnxk_dpi_cdesc_data_s c_desc; @@ -103,6 +119,7 @@ struct cnxk_dpi_conf { uint16_t desc_idx; struct rte_dma_stats stats; uint64_t completed_offset; + struct cnxk_dma_adapter_info adapter_info; }; struct cnxk_dpi_vf_s { @@ -112,6 +129,7 @@ struct cnxk_dpi_vf_s { uint16_t chunk_size_m1; struct rte_mempool *chunk_pool; struct cnxk_dpi_conf conf[CNXK_DPI_MAX_VCHANS_PER_QUEUE]; + RTE_ATOMIC(rte_mcslock_t *) mcs_lock; /* Slow path */ struct roc_dpi rdpi; uint32_t aura; diff --git a/drivers/dma/cnxk/cnxk_dmadev_fp.c b/drivers/dma/cnxk/cnxk_dmadev_fp.c index 95df19a2db..009a871e43 100644 --- a/drivers/dma/cnxk/cnxk_dmadev_fp.c +++ b/drivers/dma/cnxk/cnxk_dmadev_fp.c @@ -5,6 +5,10 @@ #include <rte_vect.h> #include "cnxk_dmadev.h" +#include <rte_event_dma_adapter.h> + +#include <cn10k_eventdev.h> +#include <cnxk_eventdev.h> static __plt_always_inline void __dpi_cpy_scalar(uint64_t *src, uint64_t *dst, uint8_t n) @@ -434,3 +438,289 @@ cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge return dpi_conf->desc_idx++; } + +static inline uint64_t +cnxk_dma_adapter_format_event(uint64_t event) +{ + uint64_t w0; + w0 = (event & 0xFFC000000000) >> 6 | + (event & 0xFFFFFFF) | RTE_EVENT_TYPE_DMADEV << 28; + + return w0; +} + +uint16_t +cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) +{ + const struct rte_dma_sge *src, *dst; + struct rte_event_dma_adapter_op *op; + struct cnxk_dpi_compl_s *comp_ptr; + struct cnxk_dpi_conf *dpi_conf; + struct cnxk_dpi_vf_s *dpivf; + struct rte_event *rsp_info; + struct cn10k_sso_hws *work; + uint16_t nb_src, nb_dst; + rte_mcslock_t mcs_lock_me; + uint64_t hdr[4]; + uint16_t count; + int rc; + + work = (struct cn10k_sso_hws *)ws; + + for (count = 0; count < nb_events; count++) { + op = ev[count].event_ptr; + rsp_info = (struct rte_event *)((uint8_t *)op + + sizeof(struct rte_event_dma_adapter_op)); + dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; + dpi_conf = &dpivf->conf[op->vchan]; + + if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr))) + return count; + + comp_ptr->op = op; + comp_ptr->dev_id = op->dma_dev_id; + comp_ptr->vchan = op->vchan; + comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA; + + nb_src = op->nb_src & CNXK_DPI_MAX_POINTER; + nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER; + + hdr[0] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 54); + hdr[0] |= (nb_dst << 6) | nb_src; + hdr[1] = ((uint64_t)comp_ptr); + hdr[2] = cnxk_dma_adapter_format_event(rsp_info->event); + + src = &op->src_seg[0]; + dst = &op->dst_seg[0]; + + if (CNXK_TAG_IS_HEAD(work->gw_rdata) || + ((CNXK_TT_FROM_TAG(work->gw_rdata) == SSO_TT_ORDERED) && + (rsp_info->sched_type & DPI_HDR_TT_MASK) == + RTE_SCHED_TYPE_ORDERED)) + roc_sso_hws_head_wait(work->base); + + rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me); + rc = __dpi_queue_write_sg(dpivf, hdr, src, dst, nb_src, nb_dst); + if (unlikely(rc)) { + rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me); + return rc; + } + + if (op->flags & RTE_DMA_OP_FLAG_SUBMIT) { + rte_wmb(); + plt_write64(dpi_conf->pnum_words + CNXK_DPI_CMD_LEN(nb_src, nb_dst), + dpivf->rdpi.rbase + DPI_VDMA_DBELL); + dpi_conf->stats.submitted += dpi_conf->pending + 1; + dpi_conf->pnum_words = 0; + dpi_conf->pending = 0; + } else { + dpi_conf->pnum_words += CNXK_DPI_CMD_LEN(nb_src, nb_dst); + dpi_conf->pending++; + } + rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me); + } + + return count; +} + +uint16_t +cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) +{ + const struct rte_dma_sge *fptr, *lptr; + struct rte_event_dma_adapter_op *op; + struct cnxk_dpi_compl_s *comp_ptr; + struct cn9k_sso_hws_dual *work; + struct cnxk_dpi_conf *dpi_conf; + struct cnxk_dpi_vf_s *dpivf; + struct rte_event *rsp_info; + uint16_t nb_src, nb_dst; + rte_mcslock_t mcs_lock_me; + uint64_t hdr[4]; + uint16_t count; + int rc; + + work = (struct cn9k_sso_hws_dual *)ws; + + for (count = 0; count < nb_events; count++) { + op = ev[count].event_ptr; + rsp_info = (struct rte_event *)((uint8_t *)op + + sizeof(struct rte_event_dma_adapter_op)); + dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; + dpi_conf = &dpivf->conf[op->vchan]; + + if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr))) + return count; + + comp_ptr->op = op; + comp_ptr->dev_id = op->dma_dev_id; + comp_ptr->vchan = op->vchan; + comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA; + + hdr[1] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 36); + hdr[2] = (uint64_t)comp_ptr; + + nb_src = op->nb_src & CNXK_DPI_MAX_POINTER; + nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER; + /* + * For inbound case, src pointers are last pointers. + * For all other cases, src pointers are first pointers. + */ + if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) { + fptr = &op->dst_seg[0]; + lptr = &op->src_seg[0]; + RTE_SWAP(nb_src, nb_dst); + } else { + fptr = &op->src_seg[0]; + lptr = &op->dst_seg[0]; + } + + hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48; + hdr[0] |= cnxk_dma_adapter_format_event(rsp_info->event); + + if ((rsp_info->sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED) + roc_sso_hws_head_wait(work->base[!work->vws]); + + rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me); + rc = __dpi_queue_write_sg(dpivf, hdr, fptr, lptr, nb_src, nb_dst); + if (unlikely(rc)) { + rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me); + return rc; + } + + if (op->flags & RTE_DMA_OP_FLAG_SUBMIT) { + rte_wmb(); + plt_write64(dpi_conf->pnum_words + CNXK_DPI_CMD_LEN(nb_src, nb_dst), + dpivf->rdpi.rbase + DPI_VDMA_DBELL); + dpi_conf->stats.submitted += dpi_conf->pending + 1; + dpi_conf->pnum_words = 0; + dpi_conf->pending = 0; + } else { + dpi_conf->pnum_words += CNXK_DPI_CMD_LEN(nb_src, nb_dst); + dpi_conf->pending++; + } + rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me); + } + + return count; +} + +uint16_t +cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) +{ + const struct rte_dma_sge *fptr, *lptr; + struct rte_event_dma_adapter_op *op; + struct cnxk_dpi_compl_s *comp_ptr; + struct cnxk_dpi_conf *dpi_conf; + struct cnxk_dpi_vf_s *dpivf; + struct rte_event *rsp_info; + struct cn9k_sso_hws *work; + uint16_t nb_src, nb_dst; + rte_mcslock_t mcs_lock_me; + uint64_t hdr[4]; + uint16_t count; + int rc; + + work = (struct cn9k_sso_hws *)ws; + + for (count = 0; count < nb_events; count++) { + op = ev[count].event_ptr; + rsp_info = (struct rte_event *)((uint8_t *)op + + sizeof(struct rte_event_dma_adapter_op)); + dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; + dpi_conf = &dpivf->conf[op->vchan]; + + if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr))) + return count; + + comp_ptr->op = op; + comp_ptr->dev_id = op->dma_dev_id; + comp_ptr->vchan = op->vchan; + comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA; + + hdr[1] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 36); + hdr[2] = (uint64_t)comp_ptr; + + nb_src = op->nb_src & CNXK_DPI_MAX_POINTER; + nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER; + /* + * For inbound case, src pointers are last pointers. + * For all other cases, src pointers are first pointers. + */ + if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) { + fptr = &op->dst_seg[0]; + lptr = &op->src_seg[0]; + RTE_SWAP(nb_src, nb_dst); + } else { + fptr = &op->src_seg[0]; + lptr = &op->dst_seg[0]; + } + + hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48; + hdr[0] |= cnxk_dma_adapter_format_event(rsp_info->event); + + if ((rsp_info->sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED) + roc_sso_hws_head_wait(work->base); + + rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me); + rc = __dpi_queue_write_sg(dpivf, hdr, fptr, lptr, nb_src, nb_dst); + if (unlikely(rc)) { + rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me); + return rc; + } + + if (op->flags & RTE_DMA_OP_FLAG_SUBMIT) { + rte_wmb(); + plt_write64(dpi_conf->pnum_words + CNXK_DPI_CMD_LEN(nb_src, nb_dst), + dpivf->rdpi.rbase + DPI_VDMA_DBELL); + dpi_conf->stats.submitted += dpi_conf->pending + 1; + dpi_conf->pnum_words = 0; + dpi_conf->pending = 0; + } else { + dpi_conf->pnum_words += CNXK_DPI_CMD_LEN(nb_src, nb_dst); + dpi_conf->pending++; + } + rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me); + } + + return count; +} + +uintptr_t +cnxk_dma_adapter_dequeue(uintptr_t get_work1) +{ + struct rte_event_dma_adapter_op *op; + struct cnxk_dpi_compl_s *comp_ptr; + struct cnxk_dpi_conf *dpi_conf; + struct cnxk_dpi_vf_s *dpivf; + rte_mcslock_t mcs_lock_me; + uint8_t *wqecs; + + comp_ptr = (struct cnxk_dpi_compl_s *)get_work1; + + /* Dequeue can be called without calling cnx_enqueue in case of + * dma_adapter. When its called from adapter, dma op will not be + * embedded in completion pointer. In those cases return op. + */ + if (comp_ptr->cdata != CNXK_DPI_REQ_SSO_CDATA) + return (uintptr_t)comp_ptr; + + dpivf = rte_dma_fp_objs[comp_ptr->dev_id].dev_private; + dpi_conf = &dpivf->conf[comp_ptr->vchan]; + + rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me); + wqecs = (uint8_t *)&comp_ptr->wqecs; + if (__atomic_load_n(wqecs, __ATOMIC_RELAXED) != 0) + dpi_conf->stats.errors++; + + /* Take into account errors also. This is similar to + * cnxk_dmadev_completed_status(). + */ + dpi_conf->stats.completed++; + rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me); + + op = (struct rte_event_dma_adapter_op *)comp_ptr->op; + + rte_mempool_put(dpi_conf->adapter_info.req_mp, comp_ptr); + + return (uintptr_t)op; +} diff --git a/drivers/dma/cnxk/meson.build b/drivers/dma/cnxk/meson.build index e557349368..8ccc1c2cb7 100644 --- a/drivers/dma/cnxk/meson.build +++ b/drivers/dma/cnxk/meson.build @@ -8,6 +8,13 @@ foreach flag: error_cflags endif endforeach -deps += ['bus_pci', 'common_cnxk', 'dmadev'] +driver_sdk_headers = files( + 'cnxk_dma_event_dp.h', +) + +deps += ['bus_pci', 'common_cnxk', 'dmadev', 'eventdev'] + +includes += include_directories('../../event/cnxk') + sources = files('cnxk_dmadev.c', 'cnxk_dmadev_fp.c') require_iova_in_mbuf = false diff --git a/drivers/dma/cnxk/version.map b/drivers/dma/cnxk/version.map new file mode 100644 index 0000000000..a1490abf97 --- /dev/null +++ b/drivers/dma/cnxk/version.map @@ -0,0 +1,10 @@ +INTERNAL { + global: + + cn10k_dma_adapter_enqueue; + cn9k_dma_adapter_enqueue; + cn9k_dma_adapter_dual_enqueue; + cnxk_dma_adapter_dequeue; + + local: *; +}; diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index 5e6305947b..00a87b3bcd 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -460,6 +460,7 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) } } event_dev->ca_enqueue = cn9k_sso_hws_ca_enq; + event_dev->dma_enqueue = cn9k_dma_adapter_enqueue; if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) CN9K_SET_EVDEV_ENQ_OP(dev, event_dev->txa_enqueue, @@ -475,6 +476,7 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) event_dev->enqueue_forward_burst = cn9k_sso_hws_dual_enq_fwd_burst; event_dev->ca_enqueue = cn9k_sso_hws_dual_ca_enq; + event_dev->dma_enqueue = cn9k_dma_adapter_dual_enqueue; event_dev->profile_switch = cn9k_sso_hws_dual_profile_switch; if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { -- 2.34.1