Added cnxk driver support for dma event enqueue and dequeue.
Also added changes for work queue entry completion status and
dual workslot DMA event enqueue.

Signed-off-by: Pavan Nikhilesh <pbhagavat...@marvell.com>
Signed-off-by: Amit Prakash Shukla <amitpraka...@marvell.com>
---
v3:
- Rebased and fixed compilation error.

v2:
- Added dual workslot enqueue support.
- Fixed compilation error.

 doc/guides/eventdevs/cnxk.rst        |   5 +
 drivers/dma/cnxk/cnxk_dma_event_dp.h |  24 +++
 drivers/dma/cnxk/cnxk_dmadev.c       |   3 +-
 drivers/dma/cnxk/cnxk_dmadev.h       |  20 +-
 drivers/dma/cnxk/cnxk_dmadev_fp.c    | 290 +++++++++++++++++++++++++++
 drivers/dma/cnxk/meson.build         |   9 +-
 drivers/dma/cnxk/version.map         |  10 +
 drivers/event/cnxk/cn9k_eventdev.c   |   2 +
 8 files changed, 360 insertions(+), 3 deletions(-)
 create mode 100644 drivers/dma/cnxk/cnxk_dma_event_dp.h
 create mode 100644 drivers/dma/cnxk/version.map

diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index cccb8a0304..9ff1052c53 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -227,3 +227,8 @@ ethernet devices connected to event device to override this 
applications can
 use `force_rx_bp=1` device arguments.
 Using unique mempool per each ethernet device is recommended when they are
 connected to event device.
+
+DMA adapter new mode support
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+DMA driver does not support DMA adapter configured in new mode.
diff --git a/drivers/dma/cnxk/cnxk_dma_event_dp.h 
b/drivers/dma/cnxk/cnxk_dma_event_dp.h
new file mode 100644
index 0000000000..5f890ab18b
--- /dev/null
+++ b/drivers/dma/cnxk/cnxk_dma_event_dp.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell.
+ */
+
+#ifndef _CNXK_DMA_EVENT_DP_H_
+#define _CNXK_DMA_EVENT_DP_H_
+
+#include <stdint.h>
+
+#include <rte_common.h>
+#include <rte_eventdev.h>
+
+__rte_internal
+uint16_t cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t 
nb_events);
+
+__rte_internal
+uint16_t cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t 
nb_events);
+
+__rte_internal
+uint16_t cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], 
uint16_t nb_events);
+
+__rte_internal
+uintptr_t cnxk_dma_adapter_dequeue(uintptr_t get_work1);
+#endif /* _CNXK_DMA_EVENT_DP_H_ */
diff --git a/drivers/dma/cnxk/cnxk_dmadev.c b/drivers/dma/cnxk/cnxk_dmadev.c
index 48ab09cc38..4ab3cfbdf2 100644
--- a/drivers/dma/cnxk/cnxk_dmadev.c
+++ b/drivers/dma/cnxk/cnxk_dmadev.c
@@ -589,10 +589,11 @@ cnxk_dmadev_probe(struct rte_pci_driver *pci_drv 
__rte_unused, struct rte_pci_de
                dmadev->fp_obj->copy_sg = cn10k_dmadev_copy_sg;
        }
 
+       dpivf->mcs_lock = NULL;
        rdpi = &dpivf->rdpi;
 
        rdpi->pci_dev = pci_dev;
-       rc = roc_dpi_dev_init(rdpi, 0);
+       rc = roc_dpi_dev_init(rdpi, offsetof(struct cnxk_dpi_compl_s, wqecs));
        if (rc < 0)
                goto err_out_free;
 
diff --git a/drivers/dma/cnxk/cnxk_dmadev.h b/drivers/dma/cnxk/cnxk_dmadev.h
index 350ae73b5c..610a360ba2 100644
--- a/drivers/dma/cnxk/cnxk_dmadev.h
+++ b/drivers/dma/cnxk/cnxk_dmadev.h
@@ -14,11 +14,14 @@
 #include <rte_eal.h>
 #include <rte_lcore.h>
 #include <rte_mbuf_pool_ops.h>
+#include <rte_mcslock.h>
 #include <rte_mempool.h>
 #include <rte_pci.h>
 
 #include <roc_api.h>
 
+#include "cnxk_dma_event_dp.h"
+
 #define CNXK_DPI_MAX_POINTER               15
 #define CNXK_DPI_STRM_INC(s, var)          ((s).var = ((s).var + 1) & 
(s).max_cnt)
 #define CNXK_DPI_STRM_DEC(s, var)          ((s).var = ((s).var - 1) == -1 ? 
(s).max_cnt :      \
@@ -40,6 +43,11 @@
  */
 #define CNXK_DPI_REQ_CDATA 0xFF
 
+/* Set Completion data to 0xDEADBEEF when request submitted for SSO.
+ * This helps differentiate if the dequeue is called after cnxk enueue.
+ */
+#define CNXK_DPI_REQ_SSO_CDATA    0xDEADBEEF
+
 union cnxk_dpi_instr_cmd {
        uint64_t u;
        struct cn9k_dpi_instr_cmd {
@@ -85,7 +93,10 @@ union cnxk_dpi_instr_cmd {
 
 struct cnxk_dpi_compl_s {
        uint64_t cdata;
-       void *cb_data;
+       void *op;
+       uint16_t dev_id;
+       uint16_t vchan;
+       uint32_t wqecs;
 };
 
 struct cnxk_dpi_cdesc_data_s {
@@ -95,6 +106,11 @@ struct cnxk_dpi_cdesc_data_s {
        uint16_t tail;
 };
 
+struct cnxk_dma_adapter_info {
+       bool enabled;               /* Set if vchan queue is added to dma 
adapter. */
+       struct rte_mempool *req_mp; /* DMA inflight request mempool. */
+};
+
 struct cnxk_dpi_conf {
        union cnxk_dpi_instr_cmd cmd;
        struct cnxk_dpi_cdesc_data_s c_desc;
@@ -103,6 +119,7 @@ struct cnxk_dpi_conf {
        uint16_t desc_idx;
        struct rte_dma_stats stats;
        uint64_t completed_offset;
+       struct cnxk_dma_adapter_info adapter_info;
 };
 
 struct cnxk_dpi_vf_s {
@@ -112,6 +129,7 @@ struct cnxk_dpi_vf_s {
        uint16_t chunk_size_m1;
        struct rte_mempool *chunk_pool;
        struct cnxk_dpi_conf conf[CNXK_DPI_MAX_VCHANS_PER_QUEUE];
+       RTE_ATOMIC(rte_mcslock_t *) mcs_lock;
        /* Slow path */
        struct roc_dpi rdpi;
        uint32_t aura;
diff --git a/drivers/dma/cnxk/cnxk_dmadev_fp.c 
b/drivers/dma/cnxk/cnxk_dmadev_fp.c
index 95df19a2db..009a871e43 100644
--- a/drivers/dma/cnxk/cnxk_dmadev_fp.c
+++ b/drivers/dma/cnxk/cnxk_dmadev_fp.c
@@ -5,6 +5,10 @@
 #include <rte_vect.h>
 
 #include "cnxk_dmadev.h"
+#include <rte_event_dma_adapter.h>
+
+#include <cn10k_eventdev.h>
+#include <cnxk_eventdev.h>
 
 static __plt_always_inline void
 __dpi_cpy_scalar(uint64_t *src, uint64_t *dst, uint8_t n)
@@ -434,3 +438,289 @@ cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, 
const struct rte_dma_sge
 
        return dpi_conf->desc_idx++;
 }
+
+static inline uint64_t
+cnxk_dma_adapter_format_event(uint64_t event)
+{
+       uint64_t w0;
+       w0 = (event & 0xFFC000000000) >> 6 |
+            (event & 0xFFFFFFF) | RTE_EVENT_TYPE_DMADEV << 28;
+
+       return w0;
+}
+
+uint16_t
+cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
+{
+       const struct rte_dma_sge *src, *dst;
+       struct rte_event_dma_adapter_op *op;
+       struct cnxk_dpi_compl_s *comp_ptr;
+       struct cnxk_dpi_conf *dpi_conf;
+       struct cnxk_dpi_vf_s *dpivf;
+       struct rte_event *rsp_info;
+       struct cn10k_sso_hws *work;
+       uint16_t nb_src, nb_dst;
+       rte_mcslock_t mcs_lock_me;
+       uint64_t hdr[4];
+       uint16_t count;
+       int rc;
+
+       work = (struct cn10k_sso_hws *)ws;
+
+       for (count = 0; count < nb_events; count++) {
+               op = ev[count].event_ptr;
+               rsp_info = (struct rte_event *)((uint8_t *)op +
+                            sizeof(struct rte_event_dma_adapter_op));
+               dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
+               dpi_conf = &dpivf->conf[op->vchan];
+
+               if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, 
(void **)&comp_ptr)))
+                       return count;
+
+               comp_ptr->op = op;
+               comp_ptr->dev_id = op->dma_dev_id;
+               comp_ptr->vchan = op->vchan;
+               comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA;
+
+               nb_src = op->nb_src & CNXK_DPI_MAX_POINTER;
+               nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER;
+
+               hdr[0] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 54);
+               hdr[0] |= (nb_dst << 6) | nb_src;
+               hdr[1] = ((uint64_t)comp_ptr);
+               hdr[2] = cnxk_dma_adapter_format_event(rsp_info->event);
+
+               src = &op->src_seg[0];
+               dst = &op->dst_seg[0];
+
+               if (CNXK_TAG_IS_HEAD(work->gw_rdata) ||
+                   ((CNXK_TT_FROM_TAG(work->gw_rdata) == SSO_TT_ORDERED) &&
+                   (rsp_info->sched_type & DPI_HDR_TT_MASK) ==
+                           RTE_SCHED_TYPE_ORDERED))
+                       roc_sso_hws_head_wait(work->base);
+
+               rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me);
+               rc = __dpi_queue_write_sg(dpivf, hdr, src, dst, nb_src, nb_dst);
+               if (unlikely(rc)) {
+                       rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me);
+                       return rc;
+               }
+
+               if (op->flags & RTE_DMA_OP_FLAG_SUBMIT) {
+                       rte_wmb();
+                       plt_write64(dpi_conf->pnum_words + 
CNXK_DPI_CMD_LEN(nb_src, nb_dst),
+                                   dpivf->rdpi.rbase + DPI_VDMA_DBELL);
+                       dpi_conf->stats.submitted += dpi_conf->pending + 1;
+                       dpi_conf->pnum_words = 0;
+                       dpi_conf->pending = 0;
+               } else {
+                       dpi_conf->pnum_words += CNXK_DPI_CMD_LEN(nb_src, 
nb_dst);
+                       dpi_conf->pending++;
+               }
+               rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me);
+       }
+
+       return count;
+}
+
+uint16_t
+cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t 
nb_events)
+{
+       const struct rte_dma_sge *fptr, *lptr;
+       struct rte_event_dma_adapter_op *op;
+       struct cnxk_dpi_compl_s *comp_ptr;
+       struct cn9k_sso_hws_dual *work;
+       struct cnxk_dpi_conf *dpi_conf;
+       struct cnxk_dpi_vf_s *dpivf;
+       struct rte_event *rsp_info;
+       uint16_t nb_src, nb_dst;
+       rte_mcslock_t mcs_lock_me;
+       uint64_t hdr[4];
+       uint16_t count;
+       int rc;
+
+       work = (struct cn9k_sso_hws_dual *)ws;
+
+       for (count = 0; count < nb_events; count++) {
+               op = ev[count].event_ptr;
+               rsp_info = (struct rte_event *)((uint8_t *)op +
+                                               sizeof(struct 
rte_event_dma_adapter_op));
+               dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
+               dpi_conf = &dpivf->conf[op->vchan];
+
+               if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, 
(void **)&comp_ptr)))
+                       return count;
+
+               comp_ptr->op = op;
+               comp_ptr->dev_id = op->dma_dev_id;
+               comp_ptr->vchan = op->vchan;
+               comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA;
+
+               hdr[1] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 36);
+               hdr[2] = (uint64_t)comp_ptr;
+
+               nb_src = op->nb_src & CNXK_DPI_MAX_POINTER;
+               nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER;
+               /*
+                * For inbound case, src pointers are last pointers.
+                * For all other cases, src pointers are first pointers.
+                */
+               if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == 
DPI_XTYPE_INBOUND) {
+                       fptr = &op->dst_seg[0];
+                       lptr = &op->src_seg[0];
+                       RTE_SWAP(nb_src, nb_dst);
+               } else {
+                       fptr = &op->src_seg[0];
+                       lptr = &op->dst_seg[0];
+               }
+
+               hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48;
+               hdr[0] |= cnxk_dma_adapter_format_event(rsp_info->event);
+
+               if ((rsp_info->sched_type & DPI_HDR_TT_MASK) == 
RTE_SCHED_TYPE_ORDERED)
+                       roc_sso_hws_head_wait(work->base[!work->vws]);
+
+               rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me);
+               rc = __dpi_queue_write_sg(dpivf, hdr, fptr, lptr, nb_src, 
nb_dst);
+               if (unlikely(rc)) {
+                       rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me);
+                       return rc;
+               }
+
+               if (op->flags & RTE_DMA_OP_FLAG_SUBMIT) {
+                       rte_wmb();
+                       plt_write64(dpi_conf->pnum_words + 
CNXK_DPI_CMD_LEN(nb_src, nb_dst),
+                                   dpivf->rdpi.rbase + DPI_VDMA_DBELL);
+                       dpi_conf->stats.submitted += dpi_conf->pending + 1;
+                       dpi_conf->pnum_words = 0;
+                       dpi_conf->pending = 0;
+               } else {
+                       dpi_conf->pnum_words += CNXK_DPI_CMD_LEN(nb_src, 
nb_dst);
+                       dpi_conf->pending++;
+               }
+               rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me);
+       }
+
+       return count;
+}
+
+uint16_t
+cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
+{
+       const struct rte_dma_sge *fptr, *lptr;
+       struct rte_event_dma_adapter_op *op;
+       struct cnxk_dpi_compl_s *comp_ptr;
+       struct cnxk_dpi_conf *dpi_conf;
+       struct cnxk_dpi_vf_s *dpivf;
+       struct rte_event *rsp_info;
+       struct cn9k_sso_hws *work;
+       uint16_t nb_src, nb_dst;
+       rte_mcslock_t mcs_lock_me;
+       uint64_t hdr[4];
+       uint16_t count;
+       int rc;
+
+       work = (struct cn9k_sso_hws *)ws;
+
+       for (count = 0; count < nb_events; count++) {
+               op = ev[count].event_ptr;
+               rsp_info = (struct rte_event *)((uint8_t *)op +
+                           sizeof(struct rte_event_dma_adapter_op));
+               dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
+               dpi_conf = &dpivf->conf[op->vchan];
+
+               if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, 
(void **)&comp_ptr)))
+                       return count;
+
+               comp_ptr->op = op;
+               comp_ptr->dev_id = op->dma_dev_id;
+               comp_ptr->vchan = op->vchan;
+               comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA;
+
+               hdr[1] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 36);
+               hdr[2] = (uint64_t)comp_ptr;
+
+               nb_src = op->nb_src & CNXK_DPI_MAX_POINTER;
+               nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER;
+               /*
+                * For inbound case, src pointers are last pointers.
+                * For all other cases, src pointers are first pointers.
+                */
+               if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == 
DPI_XTYPE_INBOUND) {
+                       fptr = &op->dst_seg[0];
+                       lptr = &op->src_seg[0];
+                       RTE_SWAP(nb_src, nb_dst);
+               } else {
+                       fptr = &op->src_seg[0];
+                       lptr = &op->dst_seg[0];
+               }
+
+               hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48;
+               hdr[0] |= cnxk_dma_adapter_format_event(rsp_info->event);
+
+               if ((rsp_info->sched_type & DPI_HDR_TT_MASK) == 
RTE_SCHED_TYPE_ORDERED)
+                       roc_sso_hws_head_wait(work->base);
+
+               rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me);
+               rc = __dpi_queue_write_sg(dpivf, hdr, fptr, lptr, nb_src, 
nb_dst);
+               if (unlikely(rc)) {
+                       rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me);
+                       return rc;
+               }
+
+               if (op->flags & RTE_DMA_OP_FLAG_SUBMIT) {
+                       rte_wmb();
+                       plt_write64(dpi_conf->pnum_words + 
CNXK_DPI_CMD_LEN(nb_src, nb_dst),
+                                   dpivf->rdpi.rbase + DPI_VDMA_DBELL);
+                       dpi_conf->stats.submitted += dpi_conf->pending + 1;
+                       dpi_conf->pnum_words = 0;
+                       dpi_conf->pending = 0;
+               } else {
+                       dpi_conf->pnum_words += CNXK_DPI_CMD_LEN(nb_src, 
nb_dst);
+                       dpi_conf->pending++;
+               }
+               rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me);
+       }
+
+       return count;
+}
+
+uintptr_t
+cnxk_dma_adapter_dequeue(uintptr_t get_work1)
+{
+       struct rte_event_dma_adapter_op *op;
+       struct cnxk_dpi_compl_s *comp_ptr;
+       struct cnxk_dpi_conf *dpi_conf;
+       struct cnxk_dpi_vf_s *dpivf;
+       rte_mcslock_t mcs_lock_me;
+       uint8_t *wqecs;
+
+       comp_ptr = (struct cnxk_dpi_compl_s *)get_work1;
+
+       /* Dequeue can be called without calling cnx_enqueue in case of
+        * dma_adapter. When its called from adapter, dma op will not be
+        * embedded in completion pointer. In those cases return op.
+        */
+       if (comp_ptr->cdata != CNXK_DPI_REQ_SSO_CDATA)
+               return (uintptr_t)comp_ptr;
+
+       dpivf = rte_dma_fp_objs[comp_ptr->dev_id].dev_private;
+       dpi_conf = &dpivf->conf[comp_ptr->vchan];
+
+       rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me);
+       wqecs = (uint8_t *)&comp_ptr->wqecs;
+       if (__atomic_load_n(wqecs, __ATOMIC_RELAXED) != 0)
+               dpi_conf->stats.errors++;
+
+       /* Take into account errors also. This is similar to
+        * cnxk_dmadev_completed_status().
+        */
+       dpi_conf->stats.completed++;
+       rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me);
+
+       op = (struct rte_event_dma_adapter_op *)comp_ptr->op;
+
+       rte_mempool_put(dpi_conf->adapter_info.req_mp, comp_ptr);
+
+       return (uintptr_t)op;
+}
diff --git a/drivers/dma/cnxk/meson.build b/drivers/dma/cnxk/meson.build
index e557349368..8ccc1c2cb7 100644
--- a/drivers/dma/cnxk/meson.build
+++ b/drivers/dma/cnxk/meson.build
@@ -8,6 +8,13 @@ foreach flag: error_cflags
     endif
 endforeach
 
-deps += ['bus_pci', 'common_cnxk', 'dmadev']
+driver_sdk_headers = files(
+        'cnxk_dma_event_dp.h',
+)
+
+deps += ['bus_pci', 'common_cnxk', 'dmadev', 'eventdev']
+
+includes += include_directories('../../event/cnxk')
+
 sources = files('cnxk_dmadev.c', 'cnxk_dmadev_fp.c')
 require_iova_in_mbuf = false
diff --git a/drivers/dma/cnxk/version.map b/drivers/dma/cnxk/version.map
new file mode 100644
index 0000000000..a1490abf97
--- /dev/null
+++ b/drivers/dma/cnxk/version.map
@@ -0,0 +1,10 @@
+INTERNAL {
+       global:
+
+       cn10k_dma_adapter_enqueue;
+       cn9k_dma_adapter_enqueue;
+       cn9k_dma_adapter_dual_enqueue;
+       cnxk_dma_adapter_dequeue;
+
+       local: *;
+};
diff --git a/drivers/event/cnxk/cn9k_eventdev.c 
b/drivers/event/cnxk/cn9k_eventdev.c
index 5e6305947b..00a87b3bcd 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -460,6 +460,7 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
                }
        }
        event_dev->ca_enqueue = cn9k_sso_hws_ca_enq;
+       event_dev->dma_enqueue = cn9k_dma_adapter_enqueue;
 
        if (dev->tx_offloads & NIX_TX_MULTI_SEG_F)
                CN9K_SET_EVDEV_ENQ_OP(dev, event_dev->txa_enqueue,
@@ -475,6 +476,7 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
                event_dev->enqueue_forward_burst =
                        cn9k_sso_hws_dual_enq_fwd_burst;
                event_dev->ca_enqueue = cn9k_sso_hws_dual_ca_enq;
+               event_dev->dma_enqueue = cn9k_dma_adapter_dual_enqueue;
                event_dev->profile_switch = cn9k_sso_hws_dual_profile_switch;
 
                if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
-- 
2.34.1

Reply via email to