From: Nipun Gupta <nipun.gu...@nxp.com>

This patch support copy, submit, completed and
completed status functionality of DMA driver.

Signed-off-by: Nipun Gupta <nipun.gu...@nxp.com>
---
 doc/guides/dmadevs/dpaa2.rst           |   10 +
 drivers/dma/dpaa2/dpaa2_qdma.c         | 1173 ++++++++++++++++++++++++
 drivers/dma/dpaa2/dpaa2_qdma.h         |   71 +-
 drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h |   77 ++
 drivers/dma/dpaa2/version.map          |    2 +
 5 files changed, 1268 insertions(+), 65 deletions(-)

diff --git a/doc/guides/dmadevs/dpaa2.rst b/doc/guides/dmadevs/dpaa2.rst
index 84e0db10d6..0fad9fabe0 100644
--- a/doc/guides/dmadevs/dpaa2.rst
+++ b/doc/guides/dmadevs/dpaa2.rst
@@ -12,6 +12,16 @@ via using the DPDMAI device exposed by MC.
 More information can be found at `NXP Official Website
 
<http://www.nxp.com/products/microcontrollers-and-processors/arm-processors/qoriq-arm-processors:QORIQ-ARM>`_.
 
+Features
+--------
+
+The DPAA2 QDMA implements following features in the dmadev API;
+
+- Supports issuing DMA of data within memory without hogging CPU while
+  performing DMA operation.
+- Supports configuring to optionally get status of the DMA translation on
+  per DMA operation basis.
+
 Supported DPAA2 SoCs
 --------------------
 
diff --git a/drivers/dma/dpaa2/dpaa2_qdma.c b/drivers/dma/dpaa2/dpaa2_qdma.c
index 54db806736..f1f92b5465 100644
--- a/drivers/dma/dpaa2/dpaa2_qdma.c
+++ b/drivers/dma/dpaa2/dpaa2_qdma.c
@@ -13,12 +13,1102 @@
 #include "rte_pmd_dpaa2_qdma.h"
 #include "dpaa2_qdma.h"
 #include "dpaa2_qdma_logs.h"
+
+#define DPAA2_QDMA_PREFETCH "prefetch"
+
 /* Dynamic log type identifier */
 int dpaa2_qdma_logtype;
 
 uint32_t dpaa2_coherent_no_alloc_cache;
 uint32_t dpaa2_coherent_alloc_cache;
 
+static inline int
+qdma_populate_fd_pci(phys_addr_t src, phys_addr_t dest,
+                    uint32_t len, struct qbman_fd *fd,
+                    struct rte_dpaa2_qdma_rbp *rbp, int ser)
+{
+       fd->simple_pci.saddr_lo = lower_32_bits((uint64_t) (src));
+       fd->simple_pci.saddr_hi = upper_32_bits((uint64_t) (src));
+
+       fd->simple_pci.len_sl = len;
+
+       fd->simple_pci.bmt = 1;
+       fd->simple_pci.fmt = 3;
+       fd->simple_pci.sl = 1;
+       fd->simple_pci.ser = ser;
+
+       fd->simple_pci.sportid = rbp->sportid;  /*pcie 3 */
+       fd->simple_pci.srbp = rbp->srbp;
+       if (rbp->srbp)
+               fd->simple_pci.rdttype = 0;
+       else
+               fd->simple_pci.rdttype = dpaa2_coherent_alloc_cache;
+
+       /*dest is pcie memory */
+       fd->simple_pci.dportid = rbp->dportid;  /*pcie 3 */
+       fd->simple_pci.drbp = rbp->drbp;
+       if (rbp->drbp)
+               fd->simple_pci.wrttype = 0;
+       else
+               fd->simple_pci.wrttype = dpaa2_coherent_no_alloc_cache;
+
+       fd->simple_pci.daddr_lo = lower_32_bits((uint64_t) (dest));
+       fd->simple_pci.daddr_hi = upper_32_bits((uint64_t) (dest));
+
+       return 0;
+}
+
+static inline int
+qdma_populate_fd_ddr(phys_addr_t src, phys_addr_t dest,
+                    uint32_t len, struct qbman_fd *fd, int ser)
+{
+       fd->simple_ddr.saddr_lo = lower_32_bits((uint64_t) (src));
+       fd->simple_ddr.saddr_hi = upper_32_bits((uint64_t) (src));
+
+       fd->simple_ddr.len = len;
+
+       fd->simple_ddr.bmt = 1;
+       fd->simple_ddr.fmt = 3;
+       fd->simple_ddr.sl = 1;
+       fd->simple_ddr.ser = ser;
+       /**
+        * src If RBP=0 {NS,RDTTYPE[3:0]}: 0_1011
+        * Coherent copy of cacheable memory,
+       * lookup in downstream cache, no allocate
+        * on miss
+        */
+       fd->simple_ddr.rns = 0;
+       fd->simple_ddr.rdttype = dpaa2_coherent_alloc_cache;
+       /**
+        * dest If RBP=0 {NS,WRTTYPE[3:0]}: 0_0111
+        * Coherent write of cacheable memory,
+        * lookup in downstream cache, no allocate on miss
+        */
+       fd->simple_ddr.wns = 0;
+       fd->simple_ddr.wrttype = dpaa2_coherent_no_alloc_cache;
+
+       fd->simple_ddr.daddr_lo = lower_32_bits((uint64_t) (dest));
+       fd->simple_ddr.daddr_hi = upper_32_bits((uint64_t) (dest));
+
+       return 0;
+}
+
+static void
+dpaa2_qdma_populate_fle(struct qbman_fle *fle,
+                       uint64_t fle_iova,
+                       struct rte_dpaa2_qdma_rbp *rbp,
+                       uint64_t src, uint64_t dest,
+                       size_t len, uint32_t flags, uint32_t fmt)
+{
+       struct qdma_sdd *sdd;
+       uint64_t sdd_iova;
+
+       sdd = (struct qdma_sdd *)
+                       ((uintptr_t)(uint64_t)fle - QDMA_FLE_FLE_OFFSET +
+                       QDMA_FLE_SDD_OFFSET);
+       sdd_iova = fle_iova - QDMA_FLE_FLE_OFFSET + QDMA_FLE_SDD_OFFSET;
+
+       /* first frame list to source descriptor */
+       DPAA2_SET_FLE_ADDR(fle, sdd_iova);
+       DPAA2_SET_FLE_LEN(fle, (2 * (sizeof(struct qdma_sdd))));
+
+       /* source and destination descriptor */
+       if (rbp && rbp->enable) {
+               /* source */
+               sdd->read_cmd.portid = rbp->sportid;
+               sdd->rbpcmd_simple.pfid = rbp->spfid;
+               sdd->rbpcmd_simple.vfid = rbp->svfid;
+
+               if (rbp->srbp) {
+                       sdd->read_cmd.rbp = rbp->srbp;
+                       sdd->read_cmd.rdtype = DPAA2_RBP_MEM_RW;
+               } else {
+                       sdd->read_cmd.rdtype = dpaa2_coherent_no_alloc_cache;
+               }
+               sdd++;
+               /* destination */
+               sdd->write_cmd.portid = rbp->dportid;
+               sdd->rbpcmd_simple.pfid = rbp->dpfid;
+               sdd->rbpcmd_simple.vfid = rbp->dvfid;
+
+               if (rbp->drbp) {
+                       sdd->write_cmd.rbp = rbp->drbp;
+                       sdd->write_cmd.wrttype = DPAA2_RBP_MEM_RW;
+               } else {
+                       sdd->write_cmd.wrttype = dpaa2_coherent_alloc_cache;
+               }
+
+       } else {
+               sdd->read_cmd.rdtype = dpaa2_coherent_no_alloc_cache;
+               sdd++;
+               sdd->write_cmd.wrttype = dpaa2_coherent_alloc_cache;
+       }
+       fle++;
+       /* source frame list to source buffer */
+       if (flags & RTE_DPAA2_QDMA_JOB_SRC_PHY) {
+               DPAA2_SET_FLE_ADDR(fle, src);
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+               DPAA2_SET_FLE_BMT(fle);
+#endif
+       } else {
+               DPAA2_SET_FLE_ADDR(fle, DPAA2_VADDR_TO_IOVA(src));
+       }
+       fle->word4.fmt = fmt;
+       DPAA2_SET_FLE_LEN(fle, len);
+
+       fle++;
+       /* destination frame list to destination buffer */
+       if (flags & RTE_DPAA2_QDMA_JOB_DEST_PHY) {
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+               DPAA2_SET_FLE_BMT(fle);
+#endif
+               DPAA2_SET_FLE_ADDR(fle, dest);
+       } else {
+               DPAA2_SET_FLE_ADDR(fle, DPAA2_VADDR_TO_IOVA(dest));
+       }
+       fle->word4.fmt = fmt;
+       DPAA2_SET_FLE_LEN(fle, len);
+
+       /* Final bit: 1, for last frame list */
+       DPAA2_SET_FLE_FIN(fle);
+}
+
+static inline int
+dpdmai_dev_set_fd_us(struct qdma_virt_queue *qdma_vq,
+                    struct qbman_fd *fd,
+                    struct rte_dpaa2_qdma_job **job,
+                    uint16_t nb_jobs)
+{
+       struct rte_dpaa2_qdma_rbp *rbp = &qdma_vq->rbp;
+       struct rte_dpaa2_qdma_job **ppjob;
+       size_t iova;
+       int ret = 0, loop;
+       int ser = (qdma_vq->flags & DPAA2_QDMA_VQ_NO_RESPONSE) ?
+                               0 : 1;
+
+       for (loop = 0; loop < nb_jobs; loop++) {
+               if (job[loop]->src & QDMA_RBP_UPPER_ADDRESS_MASK)
+                       iova = (size_t)job[loop]->dest;
+               else
+                       iova = (size_t)job[loop]->src;
+
+               /* Set the metadata */
+               job[loop]->vq_id = qdma_vq->vq_id;
+               ppjob = (struct rte_dpaa2_qdma_job **)DPAA2_IOVA_TO_VADDR(iova) 
- 1;
+               *ppjob = job[loop];
+
+               if ((rbp->drbp == 1) || (rbp->srbp == 1))
+                       ret = qdma_populate_fd_pci((phys_addr_t)job[loop]->src,
+                                       (phys_addr_t)job[loop]->dest,
+                                       job[loop]->len, &fd[loop], rbp, ser);
+               else
+                       ret = qdma_populate_fd_ddr((phys_addr_t)job[loop]->src,
+                                       (phys_addr_t)job[loop]->dest,
+                                       job[loop]->len, &fd[loop], ser);
+       }
+
+       return ret;
+}
+
+static uint32_t
+qdma_populate_sg_entry(struct rte_dpaa2_qdma_job **jobs,
+                      struct qdma_sg_entry *src_sge,
+                      struct qdma_sg_entry *dst_sge,
+                      uint16_t nb_jobs)
+{
+       uint16_t i;
+       uint32_t total_len = 0;
+       uint64_t iova;
+
+       for (i = 0; i < nb_jobs; i++) {
+               /* source SG */
+               if (likely(jobs[i]->flags & RTE_DPAA2_QDMA_JOB_SRC_PHY)) {
+                       src_sge->addr_lo = (uint32_t)jobs[i]->src;
+                       src_sge->addr_hi = (jobs[i]->src >> 32);
+               } else {
+                       iova = DPAA2_VADDR_TO_IOVA(jobs[i]->src);
+                       src_sge->addr_lo = (uint32_t)iova;
+                       src_sge->addr_hi = iova >> 32;
+               }
+               src_sge->data_len.data_len_sl0 = jobs[i]->len;
+               src_sge->ctrl.sl = QDMA_SG_SL_LONG;
+               src_sge->ctrl.fmt = QDMA_SG_FMT_SDB;
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+               src_sge->ctrl.bmt = QDMA_SG_BMT_ENABLE;
+#else
+               src_sge->ctrl.bmt = QDMA_SG_BMT_DISABLE;
+#endif
+               /* destination SG */
+               if (likely(jobs[i]->flags & RTE_DPAA2_QDMA_JOB_DEST_PHY)) {
+                       dst_sge->addr_lo = (uint32_t)jobs[i]->dest;
+                       dst_sge->addr_hi = (jobs[i]->dest >> 32);
+               } else {
+                       iova = DPAA2_VADDR_TO_IOVA(jobs[i]->dest);
+                       dst_sge->addr_lo = (uint32_t)iova;
+                       dst_sge->addr_hi = iova >> 32;
+               }
+               dst_sge->data_len.data_len_sl0 = jobs[i]->len;
+               dst_sge->ctrl.sl = QDMA_SG_SL_LONG;
+               dst_sge->ctrl.fmt = QDMA_SG_FMT_SDB;
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+               dst_sge->ctrl.bmt = QDMA_SG_BMT_ENABLE;
+#else
+               dst_sge->ctrl.bmt = QDMA_SG_BMT_DISABLE;
+#endif
+               total_len += jobs[i]->len;
+
+               if (i == (nb_jobs - 1)) {
+                       src_sge->ctrl.f = QDMA_SG_F;
+                       dst_sge->ctrl.f = QDMA_SG_F;
+               } else {
+                       src_sge->ctrl.f = 0;
+                       dst_sge->ctrl.f = 0;
+               }
+               src_sge++;
+               dst_sge++;
+       }
+
+       return total_len;
+}
+
+static inline int
+dpdmai_dev_set_multi_fd_lf_no_rsp(struct qdma_virt_queue *qdma_vq,
+                                 struct qbman_fd *fd,
+                                 struct rte_dpaa2_qdma_job **job,
+                                 uint16_t nb_jobs)
+{
+       struct rte_dpaa2_qdma_rbp *rbp = &qdma_vq->rbp;
+       struct rte_dpaa2_qdma_job **ppjob;
+       uint16_t i;
+       void *elem;
+       struct qbman_fle *fle;
+       uint64_t elem_iova, fle_iova;
+
+       for (i = 0; i < nb_jobs; i++) {
+               elem = job[i]->usr_elem;
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+               elem_iova = rte_mempool_virt2iova(elem);
+#else
+               elem_iova = DPAA2_VADDR_TO_IOVA(elem);
+#endif
+
+               ppjob = (struct rte_dpaa2_qdma_job **)
+                       ((uintptr_t)(uint64_t)elem +
+                        QDMA_FLE_SINGLE_JOB_OFFSET);
+               *ppjob = job[i];
+
+               job[i]->vq_id = qdma_vq->vq_id;
+
+               fle = (struct qbman_fle *)
+                       ((uintptr_t)(uint64_t)elem + QDMA_FLE_FLE_OFFSET);
+               fle_iova = elem_iova + QDMA_FLE_FLE_OFFSET;
+
+               DPAA2_SET_FD_ADDR(&fd[i], fle_iova);
+               DPAA2_SET_FD_COMPOUND_FMT(&fd[i]);
+
+               memset(fle, 0, DPAA2_QDMA_MAX_FLE * sizeof(struct qbman_fle) +
+                               DPAA2_QDMA_MAX_SDD * sizeof(struct qdma_sdd));
+
+               dpaa2_qdma_populate_fle(fle, fle_iova, rbp,
+                       job[i]->src, job[i]->dest, job[i]->len,
+                       job[i]->flags, QBMAN_FLE_WORD4_FMT_SBF);
+       }
+
+       return 0;
+}
+
+static inline int
+dpdmai_dev_set_multi_fd_lf(struct qdma_virt_queue *qdma_vq,
+                          struct qbman_fd *fd,
+                          struct rte_dpaa2_qdma_job **job,
+                          uint16_t nb_jobs)
+{
+       struct rte_dpaa2_qdma_rbp *rbp = &qdma_vq->rbp;
+       struct rte_dpaa2_qdma_job **ppjob;
+       uint16_t i;
+       int ret;
+       void *elem[DPAA2_QDMA_MAX_DESC];
+       struct qbman_fle *fle;
+       uint64_t elem_iova, fle_iova;
+
+       ret = rte_mempool_get_bulk(qdma_vq->fle_pool, elem, nb_jobs);
+       if (ret) {
+               DPAA2_QDMA_DP_DEBUG("Memory alloc failed for FLE");
+               return ret;
+       }
+
+       for (i = 0; i < nb_jobs; i++) {
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+               elem_iova = rte_mempool_virt2iova(elem[i]);
+#else
+               elem_iova = DPAA2_VADDR_TO_IOVA(elem[i]);
+#endif
+
+               ppjob = (struct rte_dpaa2_qdma_job **)
+                       ((uintptr_t)(uint64_t)elem[i] +
+                        QDMA_FLE_SINGLE_JOB_OFFSET);
+               *ppjob = job[i];
+
+               job[i]->vq_id = qdma_vq->vq_id;
+
+               fle = (struct qbman_fle *)
+                       ((uintptr_t)(uint64_t)elem[i] + QDMA_FLE_FLE_OFFSET);
+               fle_iova = elem_iova + QDMA_FLE_FLE_OFFSET;
+
+               DPAA2_SET_FD_ADDR(&fd[i], fle_iova);
+               DPAA2_SET_FD_COMPOUND_FMT(&fd[i]);
+               DPAA2_SET_FD_FRC(&fd[i], QDMA_SER_CTX);
+
+               memset(fle, 0, DPAA2_QDMA_MAX_FLE * sizeof(struct qbman_fle) +
+                       DPAA2_QDMA_MAX_SDD * sizeof(struct qdma_sdd));
+
+               dpaa2_qdma_populate_fle(fle, fle_iova, rbp,
+                               job[i]->src, job[i]->dest, job[i]->len,
+                               job[i]->flags, QBMAN_FLE_WORD4_FMT_SBF);
+       }
+
+       return 0;
+}
+
+static inline int
+dpdmai_dev_set_sg_fd_lf(struct qdma_virt_queue *qdma_vq,
+                       struct qbman_fd *fd,
+                       struct rte_dpaa2_qdma_job **job,
+                       uint16_t nb_jobs)
+{
+       struct rte_dpaa2_qdma_rbp *rbp = &qdma_vq->rbp;
+       struct rte_dpaa2_qdma_job **ppjob;
+       void *elem;
+       struct qbman_fle *fle;
+       uint64_t elem_iova, fle_iova, src, dst;
+       int ret = 0, i;
+       struct qdma_sg_entry *src_sge, *dst_sge;
+       uint32_t len, fmt, flags;
+
+       /*
+        * Get an FLE/SDD from FLE pool.
+        * Note: IO metadata is before the FLE and SDD memory.
+        */
+       if (qdma_vq->flags & DPAA2_QDMA_VQ_NO_RESPONSE) {
+               elem = job[0]->usr_elem;
+       } else {
+               ret = rte_mempool_get(qdma_vq->fle_pool, &elem);
+               if (ret) {
+                       DPAA2_QDMA_DP_DEBUG("Memory alloc failed for FLE");
+                       return ret;
+               }
+       }
+
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+       elem_iova = rte_mempool_virt2iova(elem);
+#else
+       elem_iova = DPAA2_VADDR_TO_IOVA(elem);
+#endif
+
+       /* Set the metadata */
+       /* Save job context. */
+       *((uint16_t *)
+       ((uintptr_t)(uint64_t)elem + QDMA_FLE_JOB_NB_OFFSET)) = nb_jobs;
+       ppjob = (struct rte_dpaa2_qdma_job **)
+               ((uintptr_t)(uint64_t)elem + QDMA_FLE_SG_JOBS_OFFSET);
+       for (i = 0; i < nb_jobs; i++)
+               ppjob[i] = job[i];
+
+       ppjob[0]->vq_id = qdma_vq->vq_id;
+
+       fle = (struct qbman_fle *)
+               ((uintptr_t)(uint64_t)elem + QDMA_FLE_FLE_OFFSET);
+       fle_iova = elem_iova + QDMA_FLE_FLE_OFFSET;
+
+       DPAA2_SET_FD_ADDR(fd, fle_iova);
+       DPAA2_SET_FD_COMPOUND_FMT(fd);
+       if (!(qdma_vq->flags & DPAA2_QDMA_VQ_NO_RESPONSE))
+               DPAA2_SET_FD_FRC(fd, QDMA_SER_CTX);
+
+       /* Populate FLE */
+       if (likely(nb_jobs > 1)) {
+               src_sge = (struct qdma_sg_entry *)
+                       ((uintptr_t)(uint64_t)elem + QDMA_FLE_SG_ENTRY_OFFSET);
+               dst_sge = src_sge + DPAA2_QDMA_MAX_SG_NB;
+               src = elem_iova + QDMA_FLE_SG_ENTRY_OFFSET;
+               dst = src +
+                       DPAA2_QDMA_MAX_SG_NB * sizeof(struct qdma_sg_entry);
+               len = qdma_populate_sg_entry(job, src_sge, dst_sge, nb_jobs);
+               fmt = QBMAN_FLE_WORD4_FMT_SGE;
+               flags = RTE_DPAA2_QDMA_JOB_SRC_PHY | 
RTE_DPAA2_QDMA_JOB_DEST_PHY;
+       } else {
+               src = job[0]->src;
+               dst = job[0]->dest;
+               len = job[0]->len;
+               fmt = QBMAN_FLE_WORD4_FMT_SBF;
+               flags = job[0]->flags;
+       }
+
+       memset(fle, 0, DPAA2_QDMA_MAX_FLE * sizeof(struct qbman_fle) +
+                       DPAA2_QDMA_MAX_SDD * sizeof(struct qdma_sdd));
+
+       dpaa2_qdma_populate_fle(fle, fle_iova, rbp,
+                                       src, dst, len, flags, fmt);
+
+       return 0;
+}
+
+static inline uint16_t
+dpdmai_dev_get_job_us(struct qdma_virt_queue *qdma_vq __rte_unused,
+                     const struct qbman_fd *fd,
+                     struct rte_dpaa2_qdma_job **job, uint16_t *nb_jobs)
+{
+       uint16_t vqid;
+       size_t iova;
+       struct rte_dpaa2_qdma_job **ppjob;
+
+       if (fd->simple_pci.saddr_hi & (QDMA_RBP_UPPER_ADDRESS_MASK >> 32))
+               iova = (size_t)(((uint64_t)fd->simple_pci.daddr_hi) << 32
+                               | (uint64_t)fd->simple_pci.daddr_lo);
+       else
+               iova = (size_t)(((uint64_t)fd->simple_pci.saddr_hi) << 32
+                               | (uint64_t)fd->simple_pci.saddr_lo);
+
+       ppjob = (struct rte_dpaa2_qdma_job **)DPAA2_IOVA_TO_VADDR(iova) - 1;
+       *job = (struct rte_dpaa2_qdma_job *)*ppjob;
+       (*job)->status = (fd->simple_pci.acc_err << 8) |
+                                       (fd->simple_pci.error);
+       vqid = (*job)->vq_id;
+       *nb_jobs = 1;
+
+       return vqid;
+}
+
+static inline uint16_t
+dpdmai_dev_get_single_job_lf(struct qdma_virt_queue *qdma_vq,
+                            const struct qbman_fd *fd,
+                            struct rte_dpaa2_qdma_job **job,
+                            uint16_t *nb_jobs)
+{
+       struct qbman_fle *fle;
+       struct rte_dpaa2_qdma_job **ppjob = NULL;
+       uint16_t status;
+
+       /*
+        * Fetch metadata from FLE. job and vq_id were set
+        * in metadata in the enqueue operation.
+        */
+       fle = (struct qbman_fle *)
+                       DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd));
+
+       *nb_jobs = 1;
+       ppjob = (struct rte_dpaa2_qdma_job **)((uintptr_t)(uint64_t)fle -
+                       QDMA_FLE_FLE_OFFSET + QDMA_FLE_SINGLE_JOB_OFFSET);
+
+       status = (DPAA2_GET_FD_ERR(fd) << 8) | (DPAA2_GET_FD_FRC(fd) & 0xFF);
+
+       *job = *ppjob;
+       (*job)->status = status;
+
+       /* Free FLE to the pool */
+       rte_mempool_put(qdma_vq->fle_pool,
+                       (void *)
+                       ((uintptr_t)(uint64_t)fle - QDMA_FLE_FLE_OFFSET));
+
+       return (*job)->vq_id;
+}
+
+static inline uint16_t
+dpdmai_dev_get_sg_job_lf(struct qdma_virt_queue *qdma_vq,
+                        const struct qbman_fd *fd,
+                        struct rte_dpaa2_qdma_job **job,
+                        uint16_t *nb_jobs)
+{
+       struct qbman_fle *fle;
+       struct rte_dpaa2_qdma_job **ppjob = NULL;
+       uint16_t i, status;
+
+       /*
+        * Fetch metadata from FLE. job and vq_id were set
+        * in metadata in the enqueue operation.
+        */
+       fle = (struct qbman_fle *)
+                       DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd));
+       *nb_jobs = *((uint16_t *)((uintptr_t)(uint64_t)fle -
+                               QDMA_FLE_FLE_OFFSET + QDMA_FLE_JOB_NB_OFFSET));
+       ppjob = (struct rte_dpaa2_qdma_job **)((uintptr_t)(uint64_t)fle -
+                               QDMA_FLE_FLE_OFFSET + QDMA_FLE_SG_JOBS_OFFSET);
+       status = (DPAA2_GET_FD_ERR(fd) << 8) | (DPAA2_GET_FD_FRC(fd) & 0xFF);
+
+       for (i = 0; i < (*nb_jobs); i++) {
+               job[i] = ppjob[i];
+               job[i]->status = status;
+       }
+
+       /* Free FLE to the pool */
+       rte_mempool_put(qdma_vq->fle_pool,
+                       (void *)
+                       ((uintptr_t)(uint64_t)fle - QDMA_FLE_FLE_OFFSET));
+
+       return job[0]->vq_id;
+}
+
+/* Function to receive a QDMA job for a given device and queue*/
+static int
+dpdmai_dev_dequeue_multijob_prefetch(struct qdma_virt_queue *qdma_vq,
+                                    uint16_t *vq_id,
+                                    struct rte_dpaa2_qdma_job **job,
+                                    uint16_t nb_jobs)
+{
+       struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_vq->dpdmai_dev;
+       struct dpaa2_queue *rxq = &(dpdmai_dev->rx_queue[0]);
+       struct qbman_result *dq_storage, *dq_storage1 = NULL;
+       struct qbman_pull_desc pulldesc;
+       struct qbman_swp *swp;
+       struct queue_storage_info_t *q_storage;
+       uint8_t status, pending;
+       uint8_t num_rx = 0;
+       const struct qbman_fd *fd;
+       uint16_t vqid, num_rx_ret;
+       uint16_t rx_fqid = rxq->fqid;
+       int ret, pull_size;
+
+       if (qdma_vq->flags & DPAA2_QDMA_VQ_FD_SG_FORMAT) {
+               /** Make sure there are enough space to get jobs.*/
+               if (unlikely(nb_jobs < DPAA2_QDMA_MAX_SG_NB))
+                       return -EINVAL;
+               nb_jobs = 1;
+       }
+
+       if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
+               ret = dpaa2_affine_qbman_swp();
+               if (ret) {
+                       DPAA2_QDMA_ERR(
+                               "Failed to allocate IO portal, tid: %d\n",
+                               rte_gettid());
+                       return 0;
+               }
+       }
+       swp = DPAA2_PER_LCORE_PORTAL;
+
+       pull_size = (nb_jobs > dpaa2_dqrr_size) ? dpaa2_dqrr_size : nb_jobs;
+       q_storage = rxq->q_storage;
+
+       if (unlikely(!q_storage->active_dqs)) {
+               q_storage->toggle = 0;
+               dq_storage = q_storage->dq_storage[q_storage->toggle];
+               q_storage->last_num_pkts = pull_size;
+               qbman_pull_desc_clear(&pulldesc);
+               qbman_pull_desc_set_numframes(&pulldesc,
+                                             q_storage->last_num_pkts);
+               qbman_pull_desc_set_fq(&pulldesc, rx_fqid);
+               qbman_pull_desc_set_storage(&pulldesc, dq_storage,
+                               (size_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1);
+               if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) {
+                       while (!qbman_check_command_complete(
+                               get_swp_active_dqs(
+                               DPAA2_PER_LCORE_DPIO->index)))
+                               ;
+                       clear_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index);
+               }
+               while (1) {
+                       if (qbman_swp_pull(swp, &pulldesc)) {
+                               DPAA2_QDMA_DP_WARN(
+                                       "VDQ command not issued.QBMAN busy\n");
+                                       /* Portal was busy, try again */
+                               continue;
+                       }
+                       break;
+               }
+               q_storage->active_dqs = dq_storage;
+               q_storage->active_dpio_id = DPAA2_PER_LCORE_DPIO->index;
+               set_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index,
+                                  dq_storage);
+       }
+
+       dq_storage = q_storage->active_dqs;
+       rte_prefetch0((void *)(size_t)(dq_storage));
+       rte_prefetch0((void *)(size_t)(dq_storage + 1));
+
+       /* Prepare next pull descriptor. This will give space for the
+        * prefething done on DQRR entries
+        */
+       q_storage->toggle ^= 1;
+       dq_storage1 = q_storage->dq_storage[q_storage->toggle];
+       qbman_pull_desc_clear(&pulldesc);
+       qbman_pull_desc_set_numframes(&pulldesc, pull_size);
+       qbman_pull_desc_set_fq(&pulldesc, rx_fqid);
+       qbman_pull_desc_set_storage(&pulldesc, dq_storage1,
+               (size_t)(DPAA2_VADDR_TO_IOVA(dq_storage1)), 1);
+
+       /* Check if the previous issued command is completed.
+        * Also seems like the SWP is shared between the Ethernet Driver
+        * and the SEC driver.
+        */
+       while (!qbman_check_command_complete(dq_storage))
+               ;
+       if (dq_storage == get_swp_active_dqs(q_storage->active_dpio_id))
+               clear_swp_active_dqs(q_storage->active_dpio_id);
+
+       pending = 1;
+
+       do {
+               /* Loop until the dq_storage is updated with
+                * new token by QBMAN
+                */
+               while (!qbman_check_new_result(dq_storage))
+                       ;
+               rte_prefetch0((void *)((size_t)(dq_storage + 2)));
+               /* Check whether Last Pull command is Expired and
+                * setting Condition for Loop termination
+                */
+               if (qbman_result_DQ_is_pull_complete(dq_storage)) {
+                       pending = 0;
+                       /* Check for valid frame. */
+                       status = qbman_result_DQ_flags(dq_storage);
+                       if (unlikely((status & QBMAN_DQ_STAT_VALIDFRAME) == 0))
+                               continue;
+               }
+               fd = qbman_result_DQ_fd(dq_storage);
+
+               vqid = qdma_vq->get_job(qdma_vq, fd, &job[num_rx],
+                                                               &num_rx_ret);
+               if (vq_id)
+                       vq_id[num_rx] = vqid;
+
+               dq_storage++;
+               num_rx += num_rx_ret;
+       } while (pending);
+
+       if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) {
+               while (!qbman_check_command_complete(
+                       get_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)))
+                       ;
+               clear_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index);
+       }
+       /* issue a volatile dequeue command for next pull */
+       while (1) {
+               if (qbman_swp_pull(swp, &pulldesc)) {
+                       DPAA2_QDMA_DP_WARN(
+                               "VDQ command is not issued. QBMAN is busy 
(2)\n");
+                       continue;
+               }
+               break;
+       }
+
+       q_storage->active_dqs = dq_storage1;
+       q_storage->active_dpio_id = DPAA2_PER_LCORE_DPIO->index;
+       set_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index, dq_storage1);
+
+       return num_rx;
+}
+
+static int
+dpdmai_dev_dequeue_multijob_no_prefetch(struct qdma_virt_queue *qdma_vq,
+                                       uint16_t *vq_id,
+                                       struct rte_dpaa2_qdma_job **job,
+                                       uint16_t nb_jobs)
+{
+       struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_vq->dpdmai_dev;
+       struct dpaa2_queue *rxq = &(dpdmai_dev->rx_queue[0]);
+       struct qbman_result *dq_storage;
+       struct qbman_pull_desc pulldesc;
+       struct qbman_swp *swp;
+       uint8_t status, pending;
+       uint8_t num_rx = 0;
+       const struct qbman_fd *fd;
+       uint16_t vqid, num_rx_ret;
+       uint16_t rx_fqid = rxq->fqid;
+       int ret, next_pull, num_pulled = 0;
+
+       if (qdma_vq->flags & DPAA2_QDMA_VQ_FD_SG_FORMAT) {
+               /** Make sure there are enough space to get jobs.*/
+               if (unlikely(nb_jobs < DPAA2_QDMA_MAX_SG_NB))
+                       return -EINVAL;
+               nb_jobs = 1;
+       }
+
+       next_pull = nb_jobs;
+
+       if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
+               ret = dpaa2_affine_qbman_swp();
+               if (ret) {
+                       DPAA2_QDMA_ERR(
+                               "Failed to allocate IO portal, tid: %d\n",
+                               rte_gettid());
+                       return 0;
+               }
+       }
+       swp = DPAA2_PER_LCORE_PORTAL;
+
+       rxq = &(dpdmai_dev->rx_queue[0]);
+
+       do {
+               dq_storage = rxq->q_storage->dq_storage[0];
+               /* Prepare dequeue descriptor */
+               qbman_pull_desc_clear(&pulldesc);
+               qbman_pull_desc_set_fq(&pulldesc, rx_fqid);
+               qbman_pull_desc_set_storage(&pulldesc, dq_storage,
+                       (uint64_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1);
+
+               if (next_pull > dpaa2_dqrr_size) {
+                       qbman_pull_desc_set_numframes(&pulldesc,
+                                       dpaa2_dqrr_size);
+                       next_pull -= dpaa2_dqrr_size;
+               } else {
+                       qbman_pull_desc_set_numframes(&pulldesc, next_pull);
+                       next_pull = 0;
+               }
+
+               while (1) {
+                       if (qbman_swp_pull(swp, &pulldesc)) {
+                               DPAA2_QDMA_DP_WARN(
+                                       "VDQ command not issued. QBMAN busy");
+                               /* Portal was busy, try again */
+                               continue;
+                       }
+                       break;
+               }
+
+               rte_prefetch0((void *)((size_t)(dq_storage + 1)));
+               /* Check if the previous issued command is completed. */
+               while (!qbman_check_command_complete(dq_storage))
+                       ;
+
+               num_pulled = 0;
+               pending = 1;
+
+               do {
+                       /* Loop until dq_storage is updated
+                        * with new token by QBMAN
+                        */
+                       while (!qbman_check_new_result(dq_storage))
+                               ;
+                       rte_prefetch0((void *)((size_t)(dq_storage + 2)));
+
+                       if (qbman_result_DQ_is_pull_complete(dq_storage)) {
+                               pending = 0;
+                               /* Check for valid frame. */
+                               status = qbman_result_DQ_flags(dq_storage);
+                               if (unlikely((status &
+                                       QBMAN_DQ_STAT_VALIDFRAME) == 0))
+                                       continue;
+                       }
+                       fd = qbman_result_DQ_fd(dq_storage);
+
+                       vqid = qdma_vq->get_job(qdma_vq, fd,
+                                               &job[num_rx], &num_rx_ret);
+                       if (vq_id)
+                               vq_id[num_rx] = vqid;
+
+                       dq_storage++;
+                       num_rx += num_rx_ret;
+                       num_pulled++;
+
+               } while (pending);
+       /* Last VDQ provided all packets and more packets are requested */
+       } while (next_pull && num_pulled == dpaa2_dqrr_size);
+
+       return num_rx;
+}
+
+static int
+dpdmai_dev_submit_multi(struct qdma_virt_queue *qdma_vq,
+                       struct rte_dpaa2_qdma_job **job,
+                       uint16_t nb_jobs)
+{
+       struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_vq->dpdmai_dev;
+       uint16_t txq_id = dpdmai_dev->tx_queue[0].fqid;
+       struct qbman_fd fd[DPAA2_QDMA_MAX_DESC];
+       struct qbman_eq_desc eqdesc;
+       struct qbman_swp *swp;
+       uint32_t num_to_send = 0;
+       uint16_t num_tx = 0;
+       uint32_t enqueue_loop, loop;
+       int ret;
+
+       if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
+               ret = dpaa2_affine_qbman_swp();
+               if (ret) {
+                       DPAA2_QDMA_ERR(
+                               "Failed to allocate IO portal, tid: %d\n",
+                               rte_gettid());
+                       return 0;
+               }
+       }
+       swp = DPAA2_PER_LCORE_PORTAL;
+
+       /* Prepare enqueue descriptor */
+       qbman_eq_desc_clear(&eqdesc);
+       qbman_eq_desc_set_fq(&eqdesc, txq_id);
+       qbman_eq_desc_set_no_orp(&eqdesc, 0);
+       qbman_eq_desc_set_response(&eqdesc, 0, 0);
+
+       if (qdma_vq->flags & DPAA2_QDMA_VQ_FD_SG_FORMAT) {
+               uint16_t fd_nb;
+               uint16_t sg_entry_nb = nb_jobs > DPAA2_QDMA_MAX_SG_NB ?
+                                               DPAA2_QDMA_MAX_SG_NB : nb_jobs;
+               uint16_t job_idx = 0;
+               uint16_t fd_sg_nb[8];
+               uint16_t nb_jobs_ret = 0;
+
+               if (nb_jobs % DPAA2_QDMA_MAX_SG_NB)
+                       fd_nb = nb_jobs / DPAA2_QDMA_MAX_SG_NB + 1;
+               else
+                       fd_nb = nb_jobs / DPAA2_QDMA_MAX_SG_NB;
+
+               memset(&fd[0], 0, sizeof(struct qbman_fd) * fd_nb);
+
+               for (loop = 0; loop < fd_nb; loop++) {
+                       ret = qdma_vq->set_fd(qdma_vq, &fd[loop], &job[job_idx],
+                                             sg_entry_nb);
+                       if (unlikely(ret < 0))
+                               return 0;
+                       fd_sg_nb[loop] = sg_entry_nb;
+                       nb_jobs -= sg_entry_nb;
+                       job_idx += sg_entry_nb;
+                       sg_entry_nb = nb_jobs > DPAA2_QDMA_MAX_SG_NB ?
+                                               DPAA2_QDMA_MAX_SG_NB : nb_jobs;
+               }
+
+               /* Enqueue the packet to the QBMAN */
+               enqueue_loop = 0;
+
+               while (enqueue_loop < fd_nb) {
+                       ret = qbman_swp_enqueue_multiple(swp,
+                                       &eqdesc, &fd[enqueue_loop],
+                                       NULL, fd_nb - enqueue_loop);
+                       if (likely(ret >= 0)) {
+                               for (loop = 0; loop < (uint32_t)ret; loop++)
+                                       nb_jobs_ret +=
+                                               fd_sg_nb[enqueue_loop + loop];
+                               enqueue_loop += ret;
+                       }
+               }
+
+               return nb_jobs_ret;
+       }
+
+       memset(fd, 0, nb_jobs * sizeof(struct qbman_fd));
+
+       while (nb_jobs > 0) {
+               num_to_send = (nb_jobs > dpaa2_eqcr_size) ?
+                       dpaa2_eqcr_size : nb_jobs;
+
+               ret = qdma_vq->set_fd(qdma_vq, &fd[num_tx],
+                                               &job[num_tx], num_to_send);
+               if (unlikely(ret < 0))
+                       break;
+
+               /* Enqueue the packet to the QBMAN */
+               enqueue_loop = 0;
+               loop = num_to_send;
+
+               while (enqueue_loop < loop) {
+                       ret = qbman_swp_enqueue_multiple(swp,
+                                               &eqdesc,
+                                               &fd[num_tx + enqueue_loop],
+                                               NULL,
+                                               loop - enqueue_loop);
+                       if (likely(ret >= 0))
+                               enqueue_loop += ret;
+               }
+               num_tx += num_to_send;
+               nb_jobs -= loop;
+       }
+
+       qdma_vq->num_enqueues += num_tx;
+
+       return num_tx;
+}
+
+static inline int
+dpaa2_qdma_submit(void *dev_private, uint16_t vchan)
+{
+       struct dpaa2_dpdmai_dev *dpdmai_dev = dev_private;
+       struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+       struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan];
+
+       dpdmai_dev_submit_multi(qdma_vq, qdma_vq->job_list,
+                               qdma_vq->num_valid_jobs);
+
+       qdma_vq->num_valid_jobs = 0;
+
+       return 0;
+}
+
+static int
+dpaa2_qdma_enqueue(void *dev_private, uint16_t vchan,
+                  rte_iova_t src, rte_iova_t dst,
+                  uint32_t length, uint64_t flags)
+{
+       struct dpaa2_dpdmai_dev *dpdmai_dev = dev_private;
+       struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+       struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan];
+       struct rte_dpaa2_qdma_job *job;
+       int idx, ret;
+
+       idx = (uint16_t)(qdma_vq->num_enqueues + qdma_vq->num_valid_jobs);
+
+       ret = rte_mempool_get(qdma_vq->job_pool, (void **)&job);
+       if (ret) {
+               DPAA2_QDMA_DP_DEBUG("Memory alloc failed for FLE");
+               return -ENOSPC;
+       }
+
+       job->src = src;
+       job->dest = dst;
+       job->len = length;
+       job->flags = flags;
+       job->status = 0;
+       job->vq_id = vchan;
+
+       qdma_vq->job_list[qdma_vq->num_valid_jobs] = job;
+       qdma_vq->num_valid_jobs++;
+
+       if (flags & RTE_DMA_OP_FLAG_SUBMIT)
+               dpaa2_qdma_submit(dev_private, vchan);
+
+       return idx;
+}
+
+int
+rte_dpaa2_qdma_copy_multi(int16_t dev_id, uint16_t vchan,
+                         struct rte_dpaa2_qdma_job **jobs,
+                         uint16_t nb_cpls)
+{
+       struct rte_dma_fp_object *obj = &rte_dma_fp_objs[dev_id];
+       struct dpaa2_dpdmai_dev *dpdmai_dev = obj->dev_private;
+       struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+       struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan];
+
+       return dpdmai_dev_submit_multi(qdma_vq, jobs, nb_cpls);
+}
+
+static uint16_t
+dpaa2_qdma_dequeue_multi(struct qdma_device *qdma_dev,
+                        struct qdma_virt_queue *qdma_vq,
+                        struct rte_dpaa2_qdma_job **jobs,
+                        uint16_t nb_jobs)
+{
+       struct qdma_virt_queue *temp_qdma_vq;
+       int ring_count;
+       int ret = 0, i;
+
+       if (qdma_vq->flags & DPAA2_QDMA_VQ_FD_SG_FORMAT) {
+               /** Make sure there are enough space to get jobs.*/
+               if (unlikely(nb_jobs < DPAA2_QDMA_MAX_SG_NB))
+                       return -EINVAL;
+       }
+
+       /* Only dequeue when there are pending jobs on VQ */
+       if (qdma_vq->num_enqueues == qdma_vq->num_dequeues)
+               return 0;
+
+       if (!(qdma_vq->flags & DPAA2_QDMA_VQ_FD_SG_FORMAT) &&
+               qdma_vq->num_enqueues < (qdma_vq->num_dequeues + nb_jobs))
+               nb_jobs = RTE_MIN((qdma_vq->num_enqueues -
+                               qdma_vq->num_dequeues), nb_jobs);
+
+       if (qdma_vq->exclusive_hw_queue) {
+               /* In case of exclusive queue directly fetch from HW queue */
+               ret = qdma_vq->dequeue_job(qdma_vq, NULL, jobs, nb_jobs);
+               if (ret < 0) {
+                       DPAA2_QDMA_ERR(
+                               "Dequeue from DPDMAI device failed: %d", ret);
+                       return ret;
+               }
+       } else {
+               uint16_t temp_vq_id[DPAA2_QDMA_MAX_DESC];
+
+               /* Get the QDMA completed jobs from the software ring.
+                * In case they are not available on the ring poke the HW
+                * to fetch completed jobs from corresponding HW queues
+                */
+               ring_count = rte_ring_count(qdma_vq->status_ring);
+               if (ring_count < nb_jobs) {
+                       ret = qdma_vq->dequeue_job(qdma_vq,
+                                       temp_vq_id, jobs, nb_jobs);
+                       for (i = 0; i < ret; i++) {
+                               temp_qdma_vq = &qdma_dev->vqs[temp_vq_id[i]];
+                               rte_ring_enqueue(temp_qdma_vq->status_ring,
+                                       (void *)(jobs[i]));
+                       }
+                       ring_count = rte_ring_count(
+                                       qdma_vq->status_ring);
+               }
+
+               if (ring_count) {
+                       /* Dequeue job from the software ring
+                        * to provide to the user
+                        */
+                       ret = rte_ring_dequeue_bulk(qdma_vq->status_ring,
+                                                   (void **)jobs,
+                                                   ring_count, NULL);
+               }
+       }
+
+       qdma_vq->num_dequeues += ret;
+       return ret;
+}
+
+static uint16_t
+dpaa2_qdma_dequeue_status(void *dev_private, uint16_t vchan,
+                         const uint16_t nb_cpls,
+                         uint16_t *last_idx,
+                         enum rte_dma_status_code *st)
+{
+       struct dpaa2_dpdmai_dev *dpdmai_dev = dev_private;
+       struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+       struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan];
+       struct rte_dpaa2_qdma_job *jobs[DPAA2_QDMA_MAX_DESC];
+       int ret, i;
+
+       ret = dpaa2_qdma_dequeue_multi(qdma_dev, qdma_vq, jobs, nb_cpls);
+
+       for (i = 0; i < ret; i++)
+               st[i] = jobs[i]->status;
+
+       rte_mempool_put_bulk(qdma_vq->job_pool, (void **)jobs, ret);
+
+       if (last_idx != NULL)
+               *last_idx = (uint16_t)(qdma_vq->num_dequeues - 1);
+
+       return ret;
+}
+
+static uint16_t
+dpaa2_qdma_dequeue(void *dev_private,
+                  uint16_t vchan, const uint16_t nb_cpls,
+                  uint16_t *last_idx, bool *has_error)
+{
+       struct dpaa2_dpdmai_dev *dpdmai_dev = dev_private;
+       struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+       struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan];
+       struct rte_dpaa2_qdma_job *jobs[DPAA2_QDMA_MAX_DESC];
+       int ret;
+
+       RTE_SET_USED(has_error);
+
+       ret = dpaa2_qdma_dequeue_multi(qdma_dev, qdma_vq,
+                               jobs, nb_cpls);
+
+       rte_mempool_put_bulk(qdma_vq->job_pool, (void **)jobs, ret);
+
+       if (last_idx != NULL)
+               *last_idx = (uint16_t)(qdma_vq->num_dequeues - 1);
+
+       return ret;
+}
+
+uint16_t
+rte_dpaa2_qdma_completed_multi(int16_t dev_id, uint16_t vchan,
+                              struct rte_dpaa2_qdma_job **jobs,
+                              uint16_t nb_cpls)
+{
+       struct rte_dma_fp_object *obj = &rte_dma_fp_objs[dev_id];
+       struct dpaa2_dpdmai_dev *dpdmai_dev = obj->dev_private;
+       struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+       struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan];
+
+       return dpaa2_qdma_dequeue_multi(qdma_dev, qdma_vq, jobs, nb_cpls);
+}
+
 static int
 dpaa2_qdma_info_get(const struct rte_dma_dev *dev,
                    struct rte_dma_info *dev_info,
@@ -74,6 +1164,44 @@ dpaa2_qdma_configure(struct rte_dma_dev *dev,
        return 0;
 }
 
+static int
+check_devargs_handler(__rte_unused const char *key,
+                     const char *value,
+                     __rte_unused void *opaque)
+{
+       if (strcmp(value, "1"))
+               return -1;
+
+       return 0;
+}
+
+static int
+dpaa2_qdma_get_devargs(struct rte_devargs *devargs, const char *key)
+{
+       struct rte_kvargs *kvlist;
+
+       if (!devargs)
+               return 0;
+
+       kvlist = rte_kvargs_parse(devargs->args, NULL);
+       if (!kvlist)
+               return 0;
+
+       if (!rte_kvargs_count(kvlist, key)) {
+               rte_kvargs_free(kvlist);
+               return 0;
+       }
+
+       if (rte_kvargs_process(kvlist, key,
+                              check_devargs_handler, NULL) < 0) {
+               rte_kvargs_free(kvlist);
+               return 0;
+       }
+       rte_kvargs_free(kvlist);
+
+       return 1;
+}
+
 /* Enable FD in Ultra Short format */
 void
 rte_dpaa2_qdma_vchan_fd_us_enable(int16_t dev_id, uint16_t vchan)
@@ -187,8 +1315,38 @@ dpaa2_qdma_vchan_setup(struct rte_dma_dev *dev, uint16_t 
vchan,
                return -ENOMEM;
        }
 
+       if (fd_long_format) {
+               if (sg_enable) {
+                       qdma_dev->vqs[vchan].set_fd = dpdmai_dev_set_sg_fd_lf;
+                       qdma_dev->vqs[vchan].get_job = dpdmai_dev_get_sg_job_lf;
+               } else {
+                       if (dev->data->dev_conf.enable_silent)
+                               qdma_dev->vqs[vchan].set_fd =
+                                       dpdmai_dev_set_multi_fd_lf_no_rsp;
+                       else
+                               qdma_dev->vqs[vchan].set_fd =
+                                       dpdmai_dev_set_multi_fd_lf;
+                       qdma_dev->vqs[vchan].get_job = 
dpdmai_dev_get_single_job_lf;
+               }
+       } else {
+               qdma_dev->vqs[vchan].set_fd = dpdmai_dev_set_fd_us;
+               qdma_dev->vqs[vchan].get_job = dpdmai_dev_get_job_us;
+       }
+
+       if (dpaa2_qdma_get_devargs(dev->device->devargs,
+                       DPAA2_QDMA_PREFETCH)) {
+               /* If no prefetch is configured. */
+               qdma_dev->vqs[vchan].dequeue_job =
+                               dpdmai_dev_dequeue_multijob_prefetch;
+               DPAA2_QDMA_INFO("Prefetch RX Mode enabled");
+       } else {
+               qdma_dev->vqs[vchan].dequeue_job =
+                       dpdmai_dev_dequeue_multijob_no_prefetch;
+       }
+
        qdma_dev->vqs[vchan].dpdmai_dev = dpdmai_dev;
        qdma_dev->vqs[vchan].nb_desc = conf->nb_desc;
+       qdma_dev->vqs[vchan].enqueue_job = dpdmai_dev_submit_multi;
 
        return 0;
 }
@@ -269,6 +1427,16 @@ dpaa2_qdma_close(__rte_unused struct rte_dma_dev *dev)
        return 0;
 }
 
+static uint16_t
+dpaa2_qdma_burst_capacity(const void *dev_private, uint16_t vchan)
+{
+       const struct dpaa2_dpdmai_dev *dpdmai_dev = dev_private;
+       struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+       struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan];
+
+       return qdma_vq->nb_desc - qdma_vq->num_valid_jobs;
+}
+
 static struct rte_dma_dev_ops dpaa2_qdma_ops = {
        .dev_info_get     = dpaa2_qdma_info_get,
        .dev_configure    = dpaa2_qdma_configure,
@@ -451,6 +1619,11 @@ dpaa2_qdma_probe(struct rte_dpaa2_driver *dpaa2_drv,
        dmadev->dev_ops = &dpaa2_qdma_ops;
        dmadev->device = &dpaa2_dev->device;
        dmadev->fp_obj->dev_private = dmadev->data->dev_private;
+       dmadev->fp_obj->copy = dpaa2_qdma_enqueue;
+       dmadev->fp_obj->submit = dpaa2_qdma_submit;
+       dmadev->fp_obj->completed = dpaa2_qdma_dequeue;
+       dmadev->fp_obj->completed_status = dpaa2_qdma_dequeue_status;
+       dmadev->fp_obj->burst_capacity = dpaa2_qdma_burst_capacity;
 
        /* Invoke PMD device initialization function */
        ret = dpaa2_dpdmai_dev_init(dmadev, dpaa2_dev->object_id);
diff --git a/drivers/dma/dpaa2/dpaa2_qdma.h b/drivers/dma/dpaa2/dpaa2_qdma.h
index da63f3998c..5941b5a5d3 100644
--- a/drivers/dma/dpaa2/dpaa2_qdma.h
+++ b/drivers/dma/dpaa2/dpaa2_qdma.h
@@ -80,65 +80,6 @@
 
 #define QDMA_RBP_UPPER_ADDRESS_MASK (0xfff0000000000)
 
-/** Determines a QDMA job */
-struct dpaa2_qdma_job {
-       /** Source Address from where DMA is (to be) performed */
-       uint64_t src;
-       /** Destination Address where DMA is (to be) done */
-       uint64_t dest;
-       /** Length of the DMA operation in bytes. */
-       uint32_t len;
-       /** See RTE_QDMA_JOB_ flags */
-       uint32_t flags;
-       /**
-        * Status of the transaction.
-        * This is filled in the dequeue operation by the driver.
-        * upper 8bits acc_err for route by port.
-        * lower 8bits fd error
-        */
-       uint16_t status;
-       uint16_t vq_id;
-       /**
-        * FLE pool element maintained by user, in case no qDMA response.
-        * Note: the address must be allocated from DPDK memory pool.
-        */
-       void *usr_elem;
-};
-
-struct dpaa2_qdma_rbp {
-       uint32_t use_ultrashort:1;
-       uint32_t enable:1;
-       /**
-        * dportid:
-        * 0000 PCI-Express 1
-        * 0001 PCI-Express 2
-        * 0010 PCI-Express 3
-        * 0011 PCI-Express 4
-        * 0100 PCI-Express 5
-        * 0101 PCI-Express 6
-        */
-       uint32_t dportid:4;
-       uint32_t dpfid:2;
-       uint32_t dvfid:6;
-       /*using route by port for destination */
-       uint32_t drbp:1;
-       /**
-        * sportid:
-        * 0000 PCI-Express 1
-        * 0001 PCI-Express 2
-        * 0010 PCI-Express 3
-        * 0011 PCI-Express 4
-        * 0100 PCI-Express 5
-        * 0101 PCI-Express 6
-        */
-       uint32_t sportid:4;
-       uint32_t spfid:2;
-       uint32_t svfid:6;
-       /* using route by port for source */
-       uint32_t srbp:1;
-       uint32_t rsv:4;
-};
-
 /** Source/Destination Descriptor */
 struct qdma_sdd {
        uint32_t rsv;
@@ -248,22 +189,22 @@ struct qdma_virt_queue;
 
 typedef uint16_t (qdma_get_job_t)(struct qdma_virt_queue *qdma_vq,
                                        const struct qbman_fd *fd,
-                                       struct dpaa2_qdma_job **job,
+                                       struct rte_dpaa2_qdma_job **job,
                                        uint16_t *nb_jobs);
 typedef int (qdma_set_fd_t)(struct qdma_virt_queue *qdma_vq,
                                        struct qbman_fd *fd,
-                                       struct dpaa2_qdma_job **job,
+                                       struct rte_dpaa2_qdma_job **job,
                                        uint16_t nb_jobs);
 
 typedef int (qdma_dequeue_multijob_t)(
                                struct qdma_virt_queue *qdma_vq,
                                uint16_t *vq_id,
-                               struct dpaa2_qdma_job **job,
+                               struct rte_dpaa2_qdma_job **job,
                                uint16_t nb_jobs);
 
 typedef int (qdma_enqueue_multijob_t)(
                        struct qdma_virt_queue *qdma_vq,
-                       struct dpaa2_qdma_job **job,
+                       struct rte_dpaa2_qdma_job **job,
                        uint16_t nb_jobs);
 
 /** Represents a QDMA virtual queue */
@@ -275,7 +216,7 @@ struct qdma_virt_queue {
        /** FLE pool for the queue */
        struct rte_mempool *fle_pool;
        /** Route by port */
-       struct dpaa2_qdma_rbp rbp;
+       struct rte_dpaa2_qdma_rbp rbp;
        /** States if this vq is in use or not */
        uint8_t in_use;
        /** States if this vq has exclusively associated hw queue */
@@ -290,7 +231,7 @@ struct qdma_virt_queue {
        uint16_t vq_id;
        uint32_t flags;
 
-       struct dpaa2_qdma_job *job_list[DPAA2_QDMA_MAX_DESC];
+       struct rte_dpaa2_qdma_job *job_list[DPAA2_QDMA_MAX_DESC];
        struct rte_mempool *job_pool;
        int num_valid_jobs;
 
diff --git a/drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h 
b/drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h
index a75cdd7e36..7744dacb2c 100644
--- a/drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h
+++ b/drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h
@@ -45,6 +45,31 @@ struct rte_dpaa2_qdma_rbp {
        uint32_t rsv:4;
 };
 
+/** Determines a QDMA job */
+struct rte_dpaa2_qdma_job {
+       /** Source Address from where DMA is (to be) performed */
+       uint64_t src;
+       /** Destination Address where DMA is (to be) done */
+       uint64_t dest;
+       /** Length of the DMA operation in bytes. */
+       uint32_t len;
+       /** See RTE_QDMA_JOB_ flags */
+       uint32_t flags;
+       /**
+        * Status of the transaction.
+        * This is filled in the dequeue operation by the driver.
+        * upper 8bits acc_err for route by port.
+        * lower 8bits fd error
+        */
+       uint16_t status;
+       uint16_t vq_id;
+       /**
+        * FLE pool element maintained by user, in case no qDMA response.
+        * Note: the address must be allocated from DPDK memory pool.
+        */
+       void *usr_elem;
+};
+
 /**
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice.
@@ -93,4 +118,56 @@ __rte_experimental
 void rte_dpaa2_qdma_vchan_rbp_enable(int16_t dev_id, uint16_t vchan,
                struct rte_dpaa2_qdma_rbp *rbp_config);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Enqueue a copy operation onto the virtual DMA channel for silent mode,
+ * when dequeue is not required.
+ *
+ * This queues up a copy operation to be performed by hardware, if the 'flags'
+ * parameter contains RTE_DMA_OP_FLAG_SUBMIT then trigger doorbell to begin
+ * this operation, otherwise do not trigger doorbell.
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ * @param vchan
+ *   The identifier of virtual DMA channel.
+ * @param jobs
+ *   Jobs to be submitted to QDMA.
+ * @param nb_cpls
+ *   Number of DMA jobs.
+ *
+ * @return
+ *   - >= 0..Number of enqueued job.
+ *   - -ENOSPC: if no space left to enqueue.
+ *   - other values < 0 on failure.
+ */
+__rte_experimental
+int rte_dpaa2_qdma_copy_multi(int16_t dev_id, uint16_t vchan,
+               struct rte_dpaa2_qdma_job **jobs, uint16_t nb_cpls);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Return the number of operations that have been successfully completed.
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ * @param vchan
+ *   The identifier of virtual DMA channel.
+ * @param jobs
+ *   Jobs completed by QDMA.
+ * @param nb_cpls
+ *   Number of completed DMA jobs.
+ *
+ * @return
+ *   The number of operations that successfully completed. This return value
+ *   must be less than or equal to the value of nb_cpls.
+ */
+__rte_experimental
+uint16_t rte_dpaa2_qdma_completed_multi(int16_t dev_id, uint16_t vchan,
+               struct rte_dpaa2_qdma_job **jobs, uint16_t nb_cpls);
+
 #endif /* _RTE_PMD_DPAA2_QDMA_H_ */
diff --git a/drivers/dma/dpaa2/version.map b/drivers/dma/dpaa2/version.map
index 3b3019267f..38d3d225db 100644
--- a/drivers/dma/dpaa2/version.map
+++ b/drivers/dma/dpaa2/version.map
@@ -3,6 +3,8 @@ DPDK_22 {
 };
 
 EXPERIMENTAL {
+       rte_dpaa2_qdma_completed_multi;
+       rte_dpaa2_qdma_copy_multi;
        rte_dpaa2_qdma_vchan_fd_us_enable;
        rte_dpaa2_qdma_vchan_internal_sg_enable;
        rte_dpaa2_qdma_vchan_rbp_enable;
-- 
2.17.1

Reply via email to