From: Jun Yang <jun.y...@nxp.com>

This patch refactor the DPAA DMA driver code with changes:
 - HW descriptors rename and update with details.
 - update qdma engine and queue structures
 - using rte_ring APIs for enqueue and dequeue.

Signed-off-by: Jun Yang <jun.y...@nxp.com>
Signed-off-by: Gagandeep Singh <g.si...@nxp.com>
---
 drivers/dma/dpaa/dpaa_qdma.c | 1334 +++++++++++++++++++---------------
 drivers/dma/dpaa/dpaa_qdma.h |  222 +++---
 2 files changed, 868 insertions(+), 688 deletions(-)

diff --git a/drivers/dma/dpaa/dpaa_qdma.c b/drivers/dma/dpaa/dpaa_qdma.c
index 3d4fd818f8..d162d84c45 100644
--- a/drivers/dma/dpaa/dpaa_qdma.c
+++ b/drivers/dma/dpaa/dpaa_qdma.c
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2021 NXP
+ * Copyright 2021-2024 NXP
  */
 
 #include <bus_dpaa_driver.h>
@@ -8,69 +8,71 @@
 #include "dpaa_qdma.h"
 #include "dpaa_qdma_logs.h"
 
+static uint32_t s_sg_max_entry_sz = 2000;
+
 static inline void
-qdma_desc_addr_set64(struct fsl_qdma_format *ccdf, u64 addr)
+qdma_desc_addr_set64(struct fsl_qdma_comp_cmd_desc *ccdf, u64 addr)
 {
        ccdf->addr_hi = upper_32_bits(addr);
        ccdf->addr_lo = rte_cpu_to_le_32(lower_32_bits(addr));
 }
 
-static inline u64
-qdma_ccdf_get_queue(const struct fsl_qdma_format *ccdf)
+static inline void
+qdma_desc_sge_addr_set64(struct fsl_qdma_comp_sg_desc *sge, u64 addr)
 {
-       return ccdf->cfg8b_w1 & 0xff;
+       sge->addr_hi = upper_32_bits(addr);
+       sge->addr_lo = rte_cpu_to_le_32(lower_32_bits(addr));
 }
 
 static inline int
-qdma_ccdf_get_offset(const struct fsl_qdma_format *ccdf)
+qdma_ccdf_get_queue(struct fsl_qdma_comp_cmd_desc *ccdf,
+       uint8_t *queue_idx)
 {
-       return (rte_le_to_cpu_32(ccdf->cfg) & QDMA_CCDF_MASK)
-               >> QDMA_CCDF_OFFSET;
-}
+       uint64_t addr = ((uint64_t)ccdf->addr_hi) << 32 | ccdf->addr_lo;
+
+       if (addr && queue_idx)
+               *queue_idx = ccdf->queue;
+       if (addr) {
+               ccdf->addr_hi = 0;
+               ccdf->addr_lo = 0;
+               return true;
+       }
 
-static inline void
-qdma_ccdf_set_format(struct fsl_qdma_format *ccdf, int offset)
-{
-       ccdf->cfg = rte_cpu_to_le_32(QDMA_CCDF_FOTMAT | offset);
+       return false;
 }
 
 static inline int
-qdma_ccdf_get_status(const struct fsl_qdma_format *ccdf)
+ilog2(int x)
 {
-       return (rte_le_to_cpu_32(ccdf->status) & QDMA_CCDF_MASK)
-               >> QDMA_CCDF_STATUS;
-}
+       int log = 0;
 
-static inline void
-qdma_ccdf_set_ser(struct fsl_qdma_format *ccdf, int status)
-{
-       ccdf->status = rte_cpu_to_le_32(QDMA_CCDF_SER | status);
+       x >>= 1;
+
+       while (x) {
+               log++;
+               x >>= 1;
+       }
+       return log;
 }
 
-static inline void
-qdma_csgf_set_len(struct fsl_qdma_format *csgf, int len)
+static inline int
+ilog2_qsize(uint32_t q_size)
 {
-       csgf->cfg = rte_cpu_to_le_32(len & QDMA_SG_LEN_MASK);
+       return (ilog2(q_size) - ilog2(64));
 }
 
-static inline void
-qdma_csgf_set_f(struct fsl_qdma_format *csgf, int len)
+static inline int
+ilog2_qthld(uint32_t q_thld)
 {
-       csgf->cfg = rte_cpu_to_le_32(QDMA_SG_FIN | (len & QDMA_SG_LEN_MASK));
+       return (ilog2(q_thld) - ilog2(16));
 }
 
 static inline int
-ilog2(int x)
+fsl_qdma_queue_bd_in_hw(struct fsl_qdma_queue *fsl_queue)
 {
-       int log = 0;
-
-       x >>= 1;
+       struct rte_dma_stats *stats = &fsl_queue->stats;
 
-       while (x) {
-               log++;
-               x >>= 1;
-       }
-       return log;
+       return (stats->submitted - stats->completed);
 }
 
 static u32
@@ -97,12 +99,12 @@ qdma_writel_be(u32 val, void *addr)
        QDMA_OUT_BE(addr, val);
 }
 
-static void
-*dma_pool_alloc(int size, int aligned, dma_addr_t *phy_addr)
+static void *
+dma_pool_alloc(char *nm, int size, int aligned, dma_addr_t *phy_addr)
 {
        void *virt_addr;
 
-       virt_addr = rte_malloc("dma pool alloc", size, aligned);
+       virt_addr = rte_zmalloc(nm, size, aligned);
        if (!virt_addr)
                return NULL;
 
@@ -111,268 +113,221 @@ static void
        return virt_addr;
 }
 
-static void
-dma_pool_free(void *addr)
-{
-       rte_free(addr);
-}
-
-static void
-fsl_qdma_free_chan_resources(struct fsl_qdma_chan *fsl_chan)
-{
-       struct fsl_qdma_queue *fsl_queue = fsl_chan->queue;
-       struct fsl_qdma_engine *fsl_qdma = fsl_chan->qdma;
-       struct fsl_qdma_comp *comp_temp, *_comp_temp;
-       int id;
-
-       if (--fsl_queue->count)
-               goto finally;
-
-       id = (fsl_qdma->block_base - fsl_queue->block_base) /
-             fsl_qdma->block_offset;
-
-       while (rte_atomic32_read(&wait_task[id]) == 1)
-               rte_delay_us(QDMA_DELAY);
-
-       list_for_each_entry_safe(comp_temp, _comp_temp,
-                                &fsl_queue->comp_used, list) {
-               list_del(&comp_temp->list);
-               dma_pool_free(comp_temp->virt_addr);
-               dma_pool_free(comp_temp->desc_virt_addr);
-               rte_free(comp_temp);
-       }
-
-       list_for_each_entry_safe(comp_temp, _comp_temp,
-                                &fsl_queue->comp_free, list) {
-               list_del(&comp_temp->list);
-               dma_pool_free(comp_temp->virt_addr);
-               dma_pool_free(comp_temp->desc_virt_addr);
-               rte_free(comp_temp);
-       }
-
-finally:
-       fsl_qdma->desc_allocated--;
-}
-
-static void
-fsl_qdma_comp_fill_memcpy(struct fsl_qdma_comp *fsl_comp,
-                                     dma_addr_t dst, dma_addr_t src, u32 len)
-{
-       struct fsl_qdma_format *csgf_src, *csgf_dest;
-
-       /* Note: command table (fsl_comp->virt_addr) is getting filled
-        * directly in cmd descriptors of queues while enqueuing the descriptor
-        * please refer fsl_qdma_enqueue_desc
-        * frame list table (virt_addr) + 1) and source,
-        * destination descriptor table
-        * (fsl_comp->desc_virt_addr and fsl_comp->desc_virt_addr+1) move to
-        * the control path to fsl_qdma_pre_request_enqueue_comp_sd_desc
-        */
-       csgf_src = (struct fsl_qdma_format *)fsl_comp->virt_addr + 2;
-       csgf_dest = (struct fsl_qdma_format *)fsl_comp->virt_addr + 3;
-
-       /* Status notification is enqueued to status queue. */
-       qdma_desc_addr_set64(csgf_src, src);
-       qdma_csgf_set_len(csgf_src, len);
-       qdma_desc_addr_set64(csgf_dest, dst);
-       qdma_csgf_set_len(csgf_dest, len);
-       /* This entry is the last entry. */
-       qdma_csgf_set_f(csgf_dest, len);
-}
-
 /*
  * Pre-request command descriptor and compound S/G for enqueue.
  */
 static int
-fsl_qdma_pre_request_enqueue_comp_sd_desc(
-                                       struct fsl_qdma_queue *queue,
-                                       int size, int aligned)
+fsl_qdma_pre_comp_sd_desc(struct fsl_qdma_queue *queue)
 {
-       struct fsl_qdma_comp *comp_temp, *_comp_temp;
        struct fsl_qdma_sdf *sdf;
        struct fsl_qdma_ddf *ddf;
-       struct fsl_qdma_format *csgf_desc;
-       int i;
-
-       for (i = 0; i < (int)(queue->n_cq + COMMAND_QUEUE_OVERFLOW); i++) {
-               comp_temp = rte_zmalloc("qdma: comp temp",
-                                       sizeof(*comp_temp), 0);
-               if (!comp_temp)
-                       return -ENOMEM;
-
-               comp_temp->virt_addr =
-               dma_pool_alloc(size, aligned, &comp_temp->bus_addr);
-               if (!comp_temp->virt_addr) {
-                       rte_free(comp_temp);
+       struct fsl_qdma_comp_cmd_desc *ccdf;
+       uint16_t i, j;
+       struct fsl_qdma_cmpd_ft *ft;
+
+       for (i = 0; i < queue->n_cq; i++) {
+               dma_addr_t phy_ft = 0;
+
+               queue->ft[i] = dma_pool_alloc(NULL,
+                       sizeof(struct fsl_qdma_cmpd_ft),
+                       RTE_CACHE_LINE_SIZE, &phy_ft);
+               if (!queue->ft[i])
+                       goto fail;
+               if (((uint64_t)queue->ft[i]) &
+                       (RTE_CACHE_LINE_SIZE - 1)) {
+                       DPAA_QDMA_ERR("FD[%d] addr(%p) not cache aligned",
+                               i, queue->ft[i]);
+                       rte_free(queue->ft[i]);
+                       queue->ft[i] = NULL;
                        goto fail;
                }
-
-               comp_temp->desc_virt_addr =
-               dma_pool_alloc(size, aligned, &comp_temp->desc_bus_addr);
-               if (!comp_temp->desc_virt_addr) {
-                       rte_free(comp_temp->virt_addr);
-                       rte_free(comp_temp);
+               if (((uint64_t)(&queue->ft[i]->desc_ssge[0])) &
+                       (RTE_CACHE_LINE_SIZE - 1)) {
+                       DPAA_QDMA_ERR("FD[%d] SGE addr(%p) not cache aligned",
+                               i, &queue->ft[i]->desc_ssge[0]);
+                       rte_free(queue->ft[i]);
+                       queue->ft[i] = NULL;
                        goto fail;
                }
-
-               memset(comp_temp->virt_addr, 0, FSL_QDMA_COMMAND_BUFFER_SIZE);
-               memset(comp_temp->desc_virt_addr, 0,
-                      FSL_QDMA_DESCRIPTOR_BUFFER_SIZE);
-
-               csgf_desc = (struct fsl_qdma_format *)comp_temp->virt_addr + 1;
-               sdf = (struct fsl_qdma_sdf *)comp_temp->desc_virt_addr;
-               ddf = (struct fsl_qdma_ddf *)comp_temp->desc_virt_addr + 1;
+               queue->ft[i]->phy_ssge = phy_ft +
+                       offsetof(struct fsl_qdma_cmpd_ft, desc_ssge);
+               queue->ft[i]->phy_dsge = phy_ft +
+                       offsetof(struct fsl_qdma_cmpd_ft, desc_dsge);
+               queue->ft[i]->phy_df = phy_ft +
+                       offsetof(struct fsl_qdma_cmpd_ft, df);
+
+               ft = queue->ft[i];
+               sdf = &ft->df.sdf;
+               ddf = &ft->df.ddf;
                /* Compound Command Descriptor(Frame List Table) */
-               qdma_desc_addr_set64(csgf_desc, comp_temp->desc_bus_addr);
+               qdma_desc_sge_addr_set64(&ft->desc_buf, ft->phy_df);
                /* It must be 32 as Compound S/G Descriptor */
-               qdma_csgf_set_len(csgf_desc, 32);
+               ft->desc_buf.length = sizeof(struct fsl_qdma_df);
+
                /* Descriptor Buffer */
-               sdf->cmd = rte_cpu_to_le_32(FSL_QDMA_CMD_RWTTYPE <<
-                              FSL_QDMA_CMD_RWTTYPE_OFFSET);
-               ddf->cmd = rte_cpu_to_le_32(FSL_QDMA_CMD_RWTTYPE <<
-                              FSL_QDMA_CMD_RWTTYPE_OFFSET);
-               ddf->cmd |= rte_cpu_to_le_32(FSL_QDMA_CMD_LWC <<
-                               FSL_QDMA_CMD_LWC_OFFSET);
-
-               list_add_tail(&comp_temp->list, &queue->comp_free);
+               sdf->srttype = FSL_QDMA_CMD_RWTTYPE;
+
+               ddf->dwttype = FSL_QDMA_CMD_RWTTYPE;
+               ddf->lwc = FSL_QDMA_CMD_LWC;
+
+               ccdf = &queue->cq[i];
+               qdma_desc_addr_set64(ccdf, phy_ft);
+               ccdf->format = FSL_QDMA_COMP_SG_FORMAT;
+
+               ccdf->queue = queue->queue_id;
        }
+       queue->ci = 0;
 
        return 0;
 
 fail:
-       list_for_each_entry_safe(comp_temp, _comp_temp,
-                                &queue->comp_free, list) {
-               list_del(&comp_temp->list);
-               rte_free(comp_temp->virt_addr);
-               rte_free(comp_temp->desc_virt_addr);
-               rte_free(comp_temp);
-       }
+       for (j = 0; j < i; j++)
+               rte_free(queue->ft[j]);
 
        return -ENOMEM;
 }
 
-/*
- * Request a command descriptor for enqueue.
- */
-static struct fsl_qdma_comp *
-fsl_qdma_request_enqueue_desc(struct fsl_qdma_chan *fsl_chan)
+static int
+fsl_qdma_alloc_queue_resources(struct fsl_qdma_engine *fsl_qdma,
+       int queue_id, int block_id)
 {
-       struct fsl_qdma_queue *queue = fsl_chan->queue;
-       struct fsl_qdma_comp *comp_temp;
-
-       if (!list_empty(&queue->comp_free)) {
-               comp_temp = list_first_entry(&queue->comp_free,
-                                            struct fsl_qdma_comp,
-                                            list);
-               list_del(&comp_temp->list);
-               return comp_temp;
+       struct fsl_qdma_queue *cmd_queue;
+       uint32_t queue_size;
+       char nm[RTE_MEMZONE_NAMESIZE];
+
+       cmd_queue = &fsl_qdma->cmd_queues[block_id][queue_id];
+       cmd_queue->engine = fsl_qdma;
+
+       queue_size = sizeof(struct fsl_qdma_comp_cmd_desc) *
+               QDMA_QUEUE_SIZE;
+
+       sprintf(nm, "Command queue_%d_%d",
+               block_id, queue_id);
+       cmd_queue->cq = dma_pool_alloc(nm, queue_size,
+               queue_size, &cmd_queue->bus_addr);
+       if (!cmd_queue->cq) {
+               DPAA_QDMA_ERR("%s alloc failed!", nm);
+               return -ENOMEM;
        }
 
-       return NULL;
-}
-
-static struct fsl_qdma_queue
-*fsl_qdma_alloc_queue_resources(struct fsl_qdma_engine *fsl_qdma)
-{
-       struct fsl_qdma_queue *queue_head, *queue_temp;
-       int len, i, j;
-       int queue_num;
-       int blocks;
-       unsigned int queue_size[FSL_QDMA_QUEUE_MAX];
-
-       queue_num = fsl_qdma->n_queues;
-       blocks = fsl_qdma->num_blocks;
-
-       len = sizeof(*queue_head) * queue_num * blocks;
-       queue_head = rte_zmalloc("qdma: queue head", len, 0);
-       if (!queue_head)
-               return NULL;
+       cmd_queue->block_vir = fsl_qdma->block_base +
+               FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, block_id);
+       cmd_queue->n_cq = QDMA_QUEUE_SIZE;
+       cmd_queue->queue_id = queue_id;
+       cmd_queue->block_id = block_id;
+       cmd_queue->pending_start = 0;
+       cmd_queue->pending_num = 0;
+       cmd_queue->complete_start = 0;
+
+       sprintf(nm, "Compound Table_%d_%d",
+               block_id, queue_id);
+       cmd_queue->ft = rte_zmalloc(nm,
+                       sizeof(void *) * QDMA_QUEUE_SIZE, 0);
+       if (!cmd_queue->ft) {
+               DPAA_QDMA_ERR("%s zmalloc failed!", nm);
+               rte_free(cmd_queue->cq);
+               return -ENOMEM;
+       }
+       sprintf(nm, "Pending_desc_%d_%d",
+               block_id, queue_id);
+       cmd_queue->pending_desc = rte_zmalloc(nm,
+               sizeof(struct fsl_qdma_desc) * FSL_QDMA_MAX_DESC_NUM, 0);
+       if (!cmd_queue->pending_desc) {
+               DPAA_QDMA_ERR("%s zmalloc failed!", nm);
+               rte_free(cmd_queue->ft);
+               rte_free(cmd_queue->cq);
+               return -ENOMEM;
+       }
+       sprintf(nm, "complete-burst_ring_%d_%d",
+               block_id, queue_id);
+       cmd_queue->complete_burst = rte_ring_create(nm,
+               QDMA_QUEUE_SIZE * 2, 0,
+               RING_F_SP_ENQ | RING_F_SC_DEQ);
+       if (!cmd_queue->complete_burst) {
+               DPAA_QDMA_ERR("%s create failed!", nm);
+               rte_free(cmd_queue->pending_desc);
+               rte_free(cmd_queue->ft);
+               rte_free(cmd_queue->cq);
+               return -ENOMEM;
+       }
+       sprintf(nm, "complete-desc_ring_%d_%d",
+               block_id, queue_id);
+       cmd_queue->complete_desc = rte_ring_create(nm,
+               FSL_QDMA_MAX_DESC_NUM * 2, 0,
+               RING_F_SP_ENQ | RING_F_SC_DEQ);
+       if (!cmd_queue->complete_desc) {
+               DPAA_QDMA_ERR("%s create failed!", nm);
+               rte_ring_free(cmd_queue->complete_burst);
+               rte_free(cmd_queue->pending_desc);
+               rte_free(cmd_queue->ft);
+               rte_free(cmd_queue->cq);
+               return -ENOMEM;
+       }
+       sprintf(nm, "complete-pool-desc_ring_%d_%d",
+               block_id, queue_id);
+       cmd_queue->complete_pool = rte_ring_create(nm,
+               FSL_QDMA_MAX_DESC_NUM * 2, 0,
+               RING_F_SP_ENQ | RING_F_SC_DEQ);
+       if (!cmd_queue->complete_pool) {
+               DPAA_QDMA_ERR("%s create failed!", nm);
+               rte_ring_free(cmd_queue->complete_desc);
+               rte_ring_free(cmd_queue->complete_burst);
+               rte_free(cmd_queue->pending_desc);
+               rte_free(cmd_queue->ft);
+               rte_free(cmd_queue->cq);
+               return -ENOMEM;
+       }
 
-       for (i = 0; i < FSL_QDMA_QUEUE_MAX; i++)
-               queue_size[i] = QDMA_QUEUE_SIZE;
+       memset(&cmd_queue->stats, 0, sizeof(struct rte_dma_stats));
+       cmd_queue->pending_max = FSL_QDMA_MAX_DESC_NUM;
 
-       for (j = 0; j < blocks; j++) {
-               for (i = 0; i < queue_num; i++) {
-                       if (queue_size[i] > FSL_QDMA_CIRCULAR_DESC_SIZE_MAX ||
-                           queue_size[i] < FSL_QDMA_CIRCULAR_DESC_SIZE_MIN) {
-                               DPAA_QDMA_ERR("Get wrong queue-sizes.");
-                               goto fail;
-                       }
-                       queue_temp = queue_head + i + (j * queue_num);
-
-                       queue_temp->cq =
-                       dma_pool_alloc(sizeof(struct fsl_qdma_format) *
-                                      queue_size[i],
-                                      sizeof(struct fsl_qdma_format) *
-                                      queue_size[i], &queue_temp->bus_addr);
-
-                       if (!queue_temp->cq)
-                               goto fail;
-
-                       memset(queue_temp->cq, 0x0, queue_size[i] *
-                              sizeof(struct fsl_qdma_format));
-
-                       queue_temp->block_base = fsl_qdma->block_base +
-                               FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, j);
-                       queue_temp->n_cq = queue_size[i];
-                       queue_temp->id = i;
-                       queue_temp->count = 0;
-                       queue_temp->pending = 0;
-                       queue_temp->virt_head = queue_temp->cq;
-                       queue_temp->stats = (struct rte_dma_stats){0};
-               }
-       }
-       return queue_head;
+       return 0;
+}
 
-fail:
-       for (j = 0; j < blocks; j++) {
-               for (i = 0; i < queue_num; i++) {
-                       queue_temp = queue_head + i + (j * queue_num);
-                       dma_pool_free(queue_temp->cq);
-               }
-       }
-       rte_free(queue_head);
+static void
+fsl_qdma_free_cmdq_res(struct fsl_qdma_queue *queue)
+{
+       rte_free(queue->ft);
+       rte_free(queue->cq);
+       rte_free(queue->pending_desc);
+       rte_ring_free(queue->complete_burst);
+       rte_ring_free(queue->complete_desc);
+       rte_ring_free(queue->complete_pool);
+}
 
-       return NULL;
+static void
+fsl_qdma_free_stq_res(struct fsl_qdma_status_queue *queue)
+{
+       rte_free(queue->cq);
 }
 
-static struct
-fsl_qdma_queue *fsl_qdma_prep_status_queue(void)
+static int
+fsl_qdma_prep_status_queue(struct fsl_qdma_engine *fsl_qdma,
+       uint32_t block_id)
 {
-       struct fsl_qdma_queue *status_head;
-       unsigned int status_size;
+       struct fsl_qdma_status_queue *status;
+       uint32_t status_size;
 
-       status_size = QDMA_STATUS_SIZE;
-       if (status_size > FSL_QDMA_CIRCULAR_DESC_SIZE_MAX ||
-           status_size < FSL_QDMA_CIRCULAR_DESC_SIZE_MIN) {
-               DPAA_QDMA_ERR("Get wrong status_size.");
-               return NULL;
-       }
+       status = &fsl_qdma->stat_queues[block_id];
+       status->engine = fsl_qdma;
 
-       status_head = rte_zmalloc("qdma: status head", sizeof(*status_head), 0);
-       if (!status_head)
-               return NULL;
+       status_size = QDMA_STATUS_SIZE *
+               sizeof(struct fsl_qdma_comp_cmd_desc);
 
-       /*
-        * Buffer for queue command
-        */
-       status_head->cq = dma_pool_alloc(sizeof(struct fsl_qdma_format) *
-                                        status_size,
-                                        sizeof(struct fsl_qdma_format) *
-                                        status_size,
-                                        &status_head->bus_addr);
-
-       if (!status_head->cq) {
-               rte_free(status_head);
-               return NULL;
-       }
+       status->cq = dma_pool_alloc(NULL, status_size,
+               status_size, &status->bus_addr);
+
+       if (!status->cq)
+               return -ENOMEM;
 
-       memset(status_head->cq, 0x0, status_size *
-              sizeof(struct fsl_qdma_format));
-       status_head->n_cq = status_size;
-       status_head->virt_head = status_head->cq;
+       memset(status->cq, 0x0, status_size);
+       status->n_cq = QDMA_STATUS_SIZE;
+       status->complete = 0;
+       status->block_id = block_id;
+       status->block_vir = fsl_qdma->block_base +
+               FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, block_id);
 
-       return status_head;
+       return 0;
 }
 
 static int
@@ -420,59 +375,41 @@ fsl_qdma_halt(struct fsl_qdma_engine *fsl_qdma)
        return 0;
 }
 
-static int
-fsl_qdma_queue_transfer_complete(struct fsl_qdma_engine *fsl_qdma,
-                                void *block, int id, const uint16_t nb_cpls,
-                                uint16_t *last_idx,
-                                enum rte_dma_status_code *status)
+static void
+fsl_qdma_data_validation(struct fsl_qdma_desc *desc[],
+       uint8_t num, struct fsl_qdma_queue *fsl_queue)
 {
-       struct fsl_qdma_queue *fsl_queue = fsl_qdma->queue;
-       struct fsl_qdma_queue *fsl_status = fsl_qdma->status[id];
-       struct fsl_qdma_queue *temp_queue;
-       struct fsl_qdma_format *status_addr;
-       struct fsl_qdma_comp *fsl_comp = NULL;
-       u32 reg, i;
-       int count = 0;
-
-       while (count < nb_cpls) {
-               reg = qdma_readl_be(block + FSL_QDMA_BSQSR);
-               if (reg & FSL_QDMA_BSQSR_QE_BE)
-                       return count;
-
-               status_addr = fsl_status->virt_head;
-
-               i = qdma_ccdf_get_queue(status_addr) +
-                       id * fsl_qdma->n_queues;
-               temp_queue = fsl_queue + i;
-               fsl_comp = list_first_entry(&temp_queue->comp_used,
-                                           struct fsl_qdma_comp,
-                                           list);
-               list_del(&fsl_comp->list);
-
-               reg = qdma_readl_be(block + FSL_QDMA_BSQMR);
-               reg |= FSL_QDMA_BSQMR_DI_BE;
-
-               qdma_desc_addr_set64(status_addr, 0x0);
-               fsl_status->virt_head++;
-               if (fsl_status->virt_head == fsl_status->cq + fsl_status->n_cq)
-                       fsl_status->virt_head = fsl_status->cq;
-               qdma_writel_be(reg, block + FSL_QDMA_BSQMR);
-               *last_idx = fsl_comp->index;
-               if (status != NULL)
-                       status[count] = RTE_DMA_STATUS_SUCCESSFUL;
-
-               list_add_tail(&fsl_comp->list, &temp_queue->comp_free);
-               count++;
-
+       uint32_t i, j;
+       uint8_t *v_src, *v_dst;
+       char err_msg[512];
+       int offset;
+
+
+       offset = sprintf(err_msg, "Fatal TC%d/queue%d: ",
+               fsl_queue->block_id,
+               fsl_queue->queue_id);
+       for (i = 0; i < num; i++) {
+               v_src = rte_mem_iova2virt(desc[i]->src);
+               v_dst = rte_mem_iova2virt(desc[i]->dst);
+               for (j = 0; j < desc[i]->len; j++) {
+                       if (v_src[j] != v_dst[j]) {
+                               sprintf(&err_msg[offset],
+                                       
"job[%"PRIu64"]:src(%p)[%d](%d)!=dst(%p)[%d](%d)",
+                                       desc[i]->flag, v_src, j, v_src[j],
+                                       v_dst, j, v_dst[j]);
+                               DPAA_QDMA_ERR("%s, stop validating!",
+                                       err_msg);
+                               return;
+                       }
+               }
        }
-       return count;
 }
 
 static int
 fsl_qdma_reg_init(struct fsl_qdma_engine *fsl_qdma)
 {
-       struct fsl_qdma_queue *fsl_queue = fsl_qdma->queue;
        struct fsl_qdma_queue *temp;
+       struct fsl_qdma_status_queue *temp_stat;
        void *ctrl = fsl_qdma->ctrl_base;
        void *block;
        u32 i, j;
@@ -489,8 +426,8 @@ fsl_qdma_reg_init(struct fsl_qdma_engine *fsl_qdma)
        for (j = 0; j < fsl_qdma->num_blocks; j++) {
                block = fsl_qdma->block_base +
                        FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, j);
-               for (i = 0; i < fsl_qdma->n_queues; i++) {
-                       temp = fsl_queue + i + (j * fsl_qdma->n_queues);
+               for (i = 0; i < QDMA_QUEUES; i++) {
+                       temp = &fsl_qdma->cmd_queues[j][i];
                        /*
                         * Initialize Command Queue registers to
                         * point to the first
@@ -531,18 +468,15 @@ fsl_qdma_reg_init(struct fsl_qdma_engine *fsl_qdma)
                 * Enqueue Pointer Address Registers
                 */
 
-               qdma_writel(
-                           upper_32_bits(fsl_qdma->status[j]->bus_addr),
-                           block + FSL_QDMA_SQEEPAR);
-               qdma_writel(
-                           lower_32_bits(fsl_qdma->status[j]->bus_addr),
-                           block + FSL_QDMA_SQEPAR);
-               qdma_writel(
-                           upper_32_bits(fsl_qdma->status[j]->bus_addr),
-                           block + FSL_QDMA_SQEDPAR);
-               qdma_writel(
-                           lower_32_bits(fsl_qdma->status[j]->bus_addr),
-                           block + FSL_QDMA_SQDPAR);
+               temp_stat = &fsl_qdma->stat_queues[j];
+               qdma_writel(upper_32_bits(temp_stat->bus_addr),
+                       block + FSL_QDMA_SQEEPAR);
+               qdma_writel(lower_32_bits(temp_stat->bus_addr),
+                       block + FSL_QDMA_SQEPAR);
+               qdma_writel(upper_32_bits(temp_stat->bus_addr),
+                       block + FSL_QDMA_SQEDPAR);
+               qdma_writel(lower_32_bits(temp_stat->bus_addr),
+                       block + FSL_QDMA_SQDPAR);
                /* Desiable status queue interrupt. */
 
                qdma_writel(0x0, block + FSL_QDMA_BCQIER(0));
@@ -551,7 +485,7 @@ fsl_qdma_reg_init(struct fsl_qdma_engine *fsl_qdma)
 
                /* Initialize the status queue mode. */
                reg = FSL_QDMA_BSQMR_EN;
-               val = ilog2(fsl_qdma->status[j]->n_cq) - 6;
+               val = ilog2_qsize(temp_stat->n_cq);
                reg |= FSL_QDMA_BSQMR_CQ_SIZE(val);
                qdma_writel(reg, block + FSL_QDMA_BSQMR);
        }
@@ -563,158 +497,393 @@ fsl_qdma_reg_init(struct fsl_qdma_engine *fsl_qdma)
        return 0;
 }
 
-static void *
-fsl_qdma_prep_memcpy(void *fsl_chan, dma_addr_t dst,
-                          dma_addr_t src, size_t len,
-                          void *call_back,
-                          void *param)
+static uint16_t
+dpaa_qdma_block_dequeue(struct fsl_qdma_engine *fsl_qdma,
+       uint8_t block_id)
 {
-       struct fsl_qdma_comp *fsl_comp;
+       struct fsl_qdma_status_queue *stat_queue;
+       struct fsl_qdma_queue *cmd_queue;
+       struct fsl_qdma_comp_cmd_desc *cq;
+       uint16_t start, count = 0;
+       uint8_t qid = 0;
+       uint32_t reg;
+       int ret;
+       uint8_t *block;
+       uint16_t *dq_complete;
+       struct fsl_qdma_desc *desc[FSL_QDMA_SG_MAX_ENTRY];
 
-       fsl_comp =
-       fsl_qdma_request_enqueue_desc((struct fsl_qdma_chan *)fsl_chan);
-       if (!fsl_comp)
-               return NULL;
+       stat_queue = &fsl_qdma->stat_queues[block_id];
+       cq = stat_queue->cq;
+       start = stat_queue->complete;
+
+       block = fsl_qdma->block_base +
+               FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, block_id);
 
-       fsl_comp->qchan = fsl_chan;
-       fsl_comp->call_back_func = call_back;
-       fsl_comp->params = param;
+       do {
+               reg = qdma_readl_be(block + FSL_QDMA_BSQSR);
+               if (reg & FSL_QDMA_BSQSR_QE_BE)
+                       break;
 
-       fsl_qdma_comp_fill_memcpy(fsl_comp, dst, src, len);
-       return (void *)fsl_comp;
+               qdma_writel_be(FSL_QDMA_BSQMR_DI, block + FSL_QDMA_BSQMR);
+               ret = qdma_ccdf_get_queue(&cq[start], &qid);
+               if (ret == true) {
+                       cmd_queue = &fsl_qdma->cmd_queues[block_id][qid];
+
+                       ret = rte_ring_dequeue(cmd_queue->complete_burst,
+                               (void **)&dq_complete);
+                       if (ret) {
+                               DPAA_QDMA_ERR("DQ desc number failed!");
+                               break;
+                       }
+
+                       ret = rte_ring_dequeue_bulk(cmd_queue->complete_desc,
+                               (void **)desc, *dq_complete, NULL);
+                       if (ret != (*dq_complete)) {
+                               DPAA_QDMA_ERR("DQ %d descs failed!(%d)",
+                                       *dq_complete, ret);
+                               break;
+                       }
+
+                       fsl_qdma_data_validation(desc, *dq_complete, cmd_queue);
+
+                       ret = rte_ring_enqueue_bulk(cmd_queue->complete_pool,
+                               (void **)desc, (*dq_complete), NULL);
+                       if (ret != (*dq_complete)) {
+                               DPAA_QDMA_ERR("Failed desc eq %d!=%d to %s",
+                                       ret, *dq_complete,
+                                       cmd_queue->complete_pool->name);
+                               break;
+                       }
+
+                       cmd_queue->complete_start =
+                               (cmd_queue->complete_start + (*dq_complete)) &
+                               (cmd_queue->pending_max - 1);
+                       cmd_queue->stats.completed++;
+
+                       start++;
+                       if (unlikely(start == stat_queue->n_cq))
+                               start = 0;
+                       count++;
+               } else {
+                       DPAA_QDMA_ERR("Block%d not empty but dq-queue failed!",
+                               block_id);
+                       break;
+               }
+       } while (1);
+       stat_queue->complete = start;
+
+       return count;
 }
 
 static int
-fsl_qdma_enqueue_desc(struct fsl_qdma_chan *fsl_chan,
-                                 struct fsl_qdma_comp *fsl_comp,
-                                 uint64_t flags)
+fsl_qdma_enqueue_desc_to_ring(struct fsl_qdma_queue *fsl_queue,
+       uint16_t num)
 {
-       struct fsl_qdma_queue *fsl_queue = fsl_chan->queue;
-       void *block = fsl_queue->block_base;
-       struct fsl_qdma_format *ccdf;
-       u32 reg;
+       uint16_t i, idx, start, dq;
+       int ret, dq_cnt;
+
 
-       /* retrieve and store the register value in big endian
-        * to avoid bits swap
-        */
-       reg = qdma_readl_be(block +
-                        FSL_QDMA_BCQSR(fsl_queue->id));
-       if (reg & (FSL_QDMA_BCQSR_QF_XOFF_BE))
-               return -1;
-
-       /* filling descriptor  command table */
-       ccdf = (struct fsl_qdma_format *)fsl_queue->virt_head;
-       qdma_desc_addr_set64(ccdf, fsl_comp->bus_addr + 16);
-       qdma_ccdf_set_format(ccdf, qdma_ccdf_get_offset(fsl_comp->virt_addr));
-       qdma_ccdf_set_ser(ccdf, qdma_ccdf_get_status(fsl_comp->virt_addr));
-       fsl_comp->index = fsl_queue->virt_head - fsl_queue->cq;
-       fsl_queue->virt_head++;
-
-       if (fsl_queue->virt_head == fsl_queue->cq + fsl_queue->n_cq)
-               fsl_queue->virt_head = fsl_queue->cq;
-
-       list_add_tail(&fsl_comp->list, &fsl_queue->comp_used);
-
-       if (flags == RTE_DMA_OP_FLAG_SUBMIT) {
-               reg = qdma_readl_be(block + FSL_QDMA_BCQMR(fsl_queue->id));
-               reg |= FSL_QDMA_BCQMR_EI_BE;
-               qdma_writel_be(reg, block + FSL_QDMA_BCQMR(fsl_queue->id));
-               fsl_queue->stats.submitted++;
-       } else {
-               fsl_queue->pending++;
+       fsl_queue->desc_in_hw[fsl_queue->ci] = num;
+eq_again:
+       ret = rte_ring_enqueue(fsl_queue->complete_burst,
+                       &fsl_queue->desc_in_hw[fsl_queue->ci]);
+       if (ret) {
+               DPAA_QDMA_DP_DEBUG("%s: Queue is full, try dequeue first",
+                       __func__);
+               DPAA_QDMA_DP_DEBUG("%s: submitted:%"PRIu64", 
completed:%"PRIu64"",
+                       __func__, fsl_queue->stats.submitted,
+                       fsl_queue->stats.completed);
+               dq_cnt = 0;
+dq_again:
+               dq = dpaa_qdma_block_dequeue(fsl_queue->engine,
+                       fsl_queue->block_id);
+               dq_cnt++;
+               if (dq > 0) {
+                       goto eq_again;
+               } else {
+                       if (dq_cnt < 100)
+                               goto dq_again;
+                       DPAA_QDMA_ERR("%s: Dq block%d failed!",
+                               __func__, fsl_queue->block_id);
+               }
+               return ret;
+       }
+       start = fsl_queue->pending_start;
+       for (i = 0; i < num; i++) {
+               idx = (start + i) & (fsl_queue->pending_max - 1);
+               ret = rte_ring_enqueue(fsl_queue->complete_desc,
+                               &fsl_queue->pending_desc[idx]);
+               if (ret) {
+                       DPAA_QDMA_ERR("Descriptors eq failed!");
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
+static int
+fsl_qdma_enqueue_overflow(struct fsl_qdma_queue *fsl_queue)
+{
+       int overflow = 0;
+       uint16_t blk_drain, check_num, drain_num;
+       const struct rte_dma_stats *st = &fsl_queue->stats;
+       struct fsl_qdma_engine *fsl_qdma = fsl_queue->engine;
+
+       check_num = 0;
+overflow_check:
+       overflow = (fsl_qdma_queue_bd_in_hw(fsl_queue) >=
+                       QDMA_QUEUE_CR_WM) ? 1 : 0;
+
+       if (likely(!overflow))
+               return 0;
+
+       DPAA_QDMA_DP_DEBUG("TC%d/Q%d submitted(%"PRIu64")-completed(%"PRIu64") 
>= %d",
+               fsl_queue->block_id, fsl_queue->queue_id,
+               st->submitted, st->completed, QDMA_QUEUE_CR_WM);
+       drain_num = 0;
+
+drain_again:
+       blk_drain = dpaa_qdma_block_dequeue(fsl_qdma,
+               fsl_queue->block_id);
+       if (!blk_drain) {
+               drain_num++;
+               if (drain_num >= 10000) {
+                       DPAA_QDMA_WARN("TC%d failed drain, Q%d's %"PRIu64" bd 
in HW.",
+                               fsl_queue->block_id, fsl_queue->queue_id,
+                               st->submitted - st->completed);
+                       drain_num = 0;
+               }
+               goto drain_again;
+       }
+       check_num++;
+       if (check_num >= 1000) {
+               DPAA_QDMA_WARN("TC%d failed check, Q%d's %"PRIu64" bd in HW.",
+                       fsl_queue->block_id, fsl_queue->queue_id,
+                       st->submitted - st->completed);
+               check_num = 0;
        }
-       return fsl_comp->index;
+       goto overflow_check;
+
+       return 0;
 }
 
 static int
-fsl_qdma_alloc_chan_resources(struct fsl_qdma_chan *fsl_chan)
+fsl_qdma_enqueue_desc_single(struct fsl_qdma_queue *fsl_queue,
+       dma_addr_t dst, dma_addr_t src, size_t len)
 {
-       struct fsl_qdma_queue *fsl_queue = fsl_chan->queue;
-       struct fsl_qdma_engine *fsl_qdma = fsl_chan->qdma;
+       uint8_t *block = fsl_queue->block_vir;
+       struct fsl_qdma_comp_sg_desc *csgf_src, *csgf_dest;
+       struct fsl_qdma_cmpd_ft *ft;
        int ret;
 
-       if (fsl_queue->count++)
-               goto finally;
+       ret = fsl_qdma_enqueue_overflow(fsl_queue);
+       if (unlikely(ret))
+               return ret;
 
-       INIT_LIST_HEAD(&fsl_queue->comp_free);
-       INIT_LIST_HEAD(&fsl_queue->comp_used);
+       ft = fsl_queue->ft[fsl_queue->ci];
 
-       ret = fsl_qdma_pre_request_enqueue_comp_sd_desc(fsl_queue,
-                               FSL_QDMA_COMMAND_BUFFER_SIZE, 64);
-       if (ret) {
-               DPAA_QDMA_ERR(
-                       "failed to alloc dma buffer for comp descriptor");
-               goto exit;
-       }
+       csgf_src = &ft->desc_sbuf;
+       csgf_dest = &ft->desc_dbuf;
+       qdma_desc_sge_addr_set64(csgf_src, src);
+       csgf_src->length = len;
+       csgf_src->extion = 0;
+       qdma_desc_sge_addr_set64(csgf_dest, dst);
+       csgf_dest->length = len;
+       csgf_dest->extion = 0;
+       /* This entry is the last entry. */
+       csgf_dest->final = 1;
+
+       ret = fsl_qdma_enqueue_desc_to_ring(fsl_queue, 1);
+       if (ret)
+               return ret;
+       fsl_queue->ci = (fsl_queue->ci + 1) & (fsl_queue->n_cq - 1);
 
-finally:
-       return fsl_qdma->desc_allocated++;
+       qdma_writel(fsl_queue->le_cqmr | FSL_QDMA_BCQMR_EI,
+               block + FSL_QDMA_BCQMR(fsl_queue->queue_id));
+       fsl_queue->stats.submitted++;
 
-exit:
-       return -ENOMEM;
+       return 0;
 }
 
 static int
-dpaa_info_get(const struct rte_dma_dev *dev, struct rte_dma_info *dev_info,
-             uint32_t info_sz)
+fsl_qdma_enqueue_desc_sg(struct fsl_qdma_queue *fsl_queue)
 {
-#define DPAADMA_MAX_DESC        64
-#define DPAADMA_MIN_DESC        64
+       uint8_t *block = fsl_queue->block_vir;
+       struct fsl_qdma_comp_sg_desc *csgf_src, *csgf_dest;
+       struct fsl_qdma_cmpd_ft *ft;
+       uint32_t total_len;
+       uint16_t start, idx, num, i, next_idx;
+       int ret;
 
-       RTE_SET_USED(dev);
-       RTE_SET_USED(info_sz);
+eq_sg:
+       total_len = 0;
+       start = fsl_queue->pending_start;
+       if (fsl_queue->pending_desc[start].len > s_sg_max_entry_sz ||
+               fsl_queue->pending_num == 1) {
+               ret = fsl_qdma_enqueue_desc_single(fsl_queue,
+                       fsl_queue->pending_desc[start].dst,
+                       fsl_queue->pending_desc[start].src,
+                       fsl_queue->pending_desc[start].len);
+               if (!ret) {
+                       fsl_queue->pending_start =
+                               (start + 1) & (fsl_queue->pending_max - 1);
+                       fsl_queue->pending_num--;
+               }
+               if (fsl_queue->pending_num > 0)
+                       goto eq_sg;
 
-       dev_info->dev_capa = RTE_DMA_CAPA_MEM_TO_MEM |
-                            RTE_DMA_CAPA_MEM_TO_DEV |
-                            RTE_DMA_CAPA_DEV_TO_DEV |
-                            RTE_DMA_CAPA_DEV_TO_MEM |
-                            RTE_DMA_CAPA_SILENT |
-                            RTE_DMA_CAPA_OPS_COPY;
-       dev_info->max_vchans = 1;
-       dev_info->max_desc = DPAADMA_MAX_DESC;
-       dev_info->min_desc = DPAADMA_MIN_DESC;
+               return ret;
+       }
+
+       ret = fsl_qdma_enqueue_overflow(fsl_queue);
+       if (unlikely(ret))
+               return ret;
+
+       if (fsl_queue->pending_num > FSL_QDMA_SG_MAX_ENTRY)
+               num = FSL_QDMA_SG_MAX_ENTRY;
+       else
+               num = fsl_queue->pending_num;
+
+       ft = fsl_queue->ft[fsl_queue->ci];
+       csgf_src = &ft->desc_sbuf;
+       csgf_dest = &ft->desc_dbuf;
+
+       qdma_desc_sge_addr_set64(csgf_src, ft->phy_ssge);
+       csgf_src->extion = 1;
+       qdma_desc_sge_addr_set64(csgf_dest, ft->phy_dsge);
+       csgf_dest->extion = 1;
+       /* This entry is the last entry. */
+       csgf_dest->final = 1;
+       for (i = 0; i < num; i++) {
+               idx = (start + i) & (fsl_queue->pending_max - 1);
+               qdma_desc_sge_addr_set64(&ft->desc_ssge[i],
+                       fsl_queue->pending_desc[idx].src);
+               ft->desc_ssge[i].length = fsl_queue->pending_desc[idx].len;
+               ft->desc_ssge[i].final = 0;
+               qdma_desc_sge_addr_set64(&ft->desc_dsge[i],
+                       fsl_queue->pending_desc[idx].dst);
+               ft->desc_dsge[i].length = fsl_queue->pending_desc[idx].len;
+               ft->desc_dsge[i].final = 0;
+               total_len += fsl_queue->pending_desc[idx].len;
+               if ((i + 1) != num) {
+                       next_idx = (idx + 1) & (fsl_queue->pending_max - 1);
+                       if (fsl_queue->pending_desc[next_idx].len >
+                               s_sg_max_entry_sz) {
+                               num = i + 1;
+                               break;
+                       }
+               }
+       }
+
+       ft->desc_ssge[num - 1].final = 1;
+       ft->desc_dsge[num - 1].final = 1;
+       csgf_src->length = total_len;
+       csgf_dest->length = total_len;
+       ret = fsl_qdma_enqueue_desc_to_ring(fsl_queue, num);
+       if (ret)
+               return ret;
+
+       fsl_queue->ci = (fsl_queue->ci + 1) & (fsl_queue->n_cq - 1);
+
+       qdma_writel(fsl_queue->le_cqmr | FSL_QDMA_BCQMR_EI,
+               block + FSL_QDMA_BCQMR(fsl_queue->queue_id));
+       fsl_queue->stats.submitted++;
+
+       fsl_queue->pending_start =
+               (start + num) & (fsl_queue->pending_max - 1);
+       fsl_queue->pending_num -= num;
+       if (fsl_queue->pending_num > 0)
+               goto eq_sg;
 
        return 0;
 }
 
 static int
-dpaa_get_channel(struct fsl_qdma_engine *fsl_qdma,  uint16_t vchan)
+fsl_qdma_enqueue_desc(struct fsl_qdma_queue *fsl_queue)
 {
-       u32 i, start, end;
+       uint16_t start = fsl_queue->pending_start;
        int ret;
 
-       start = fsl_qdma->free_block_id * QDMA_QUEUES;
-       fsl_qdma->free_block_id++;
+       if (fsl_queue->pending_num == 1) {
+               ret = fsl_qdma_enqueue_desc_single(fsl_queue,
+                       fsl_queue->pending_desc[start].dst,
+                       fsl_queue->pending_desc[start].src,
+                       fsl_queue->pending_desc[start].len);
+               if (!ret) {
+                       fsl_queue->pending_start =
+                               (start + 1) & (fsl_queue->pending_max - 1);
+                       fsl_queue->pending_num = 0;
+               }
+               return ret;
+       }
+
+       return fsl_qdma_enqueue_desc_sg(fsl_queue);
+}
 
-       end = start + 1;
-       for (i = start; i < end; i++) {
-               struct fsl_qdma_chan *fsl_chan = &fsl_qdma->chans[i];
+static int
+dpaa_qdma_info_get(const struct rte_dma_dev *dev,
+       struct rte_dma_info *dev_info, __rte_unused uint32_t info_sz)
+{
+       struct fsl_qdma_engine *fsl_qdma = dev->data->dev_private;
 
-               if (fsl_chan->free) {
-                       fsl_chan->free = false;
-                       ret = fsl_qdma_alloc_chan_resources(fsl_chan);
-                       if (ret)
-                               return ret;
+       dev_info->dev_capa = RTE_DMA_CAPA_MEM_TO_MEM |
+               RTE_DMA_CAPA_SILENT | RTE_DMA_CAPA_OPS_COPY |
+               RTE_DMA_CAPA_OPS_COPY_SG;
+       dev_info->dev_capa |= DPAA_QDMA_FLAGS_INDEX;
+       dev_info->max_vchans = fsl_qdma->n_queues;
+       dev_info->max_desc = FSL_QDMA_MAX_DESC_NUM;
+       dev_info->min_desc = QDMA_QUEUE_SIZE;
+       dev_info->max_sges = FSL_QDMA_SG_MAX_ENTRY;
 
-                       fsl_qdma->vchan_map[vchan] = i;
-                       return 0;
+       return 0;
+}
+
+static int
+dpaa_get_channel(struct fsl_qdma_engine *fsl_qdma,
+       uint16_t vchan)
+{
+       int ret, i, j, found = 0;
+       struct fsl_qdma_queue *fsl_queue = fsl_qdma->chan[vchan];
+
+       if (fsl_queue) {
+               found = 1;
+               goto queue_found;
+       }
+
+       for (i = 0; i < QDMA_BLOCKS; i++) {
+               for (j = 0; j < QDMA_QUEUES; j++) {
+                       fsl_queue = &fsl_qdma->cmd_queues[i][j];
+
+                       if (fsl_queue->channel_id == vchan) {
+                               found = 1;
+                               fsl_qdma->chan[vchan] = fsl_queue;
+                               goto queue_found;
+                       }
                }
        }
 
-       return -1;
-}
+queue_found:
+       if (!found)
+               return -ENXIO;
 
-static void
-dma_release(void *fsl_chan)
-{
-       ((struct fsl_qdma_chan *)fsl_chan)->free = true;
-       fsl_qdma_free_chan_resources((struct fsl_qdma_chan *)fsl_chan);
+       if (fsl_queue->used)
+               return 0;
+
+       ret = fsl_qdma_pre_comp_sd_desc(fsl_queue);
+       if (ret)
+               return ret;
+
+       fsl_queue->used = 1;
+       fsl_qdma->block_queues[fsl_queue->block_id]++;
+
+       return 0;
 }
 
 static int
 dpaa_qdma_configure(__rte_unused struct rte_dma_dev *dmadev,
-                   __rte_unused const struct rte_dma_conf *dev_conf,
-                   __rte_unused uint32_t conf_sz)
+       __rte_unused const struct rte_dma_conf *dev_conf,
+       __rte_unused uint32_t conf_sz)
 {
        return 0;
 }
@@ -745,148 +914,112 @@ dpaa_qdma_queue_setup(struct rte_dma_dev *dmadev,
 static int
 dpaa_qdma_submit(void *dev_private, uint16_t vchan)
 {
-       struct fsl_qdma_engine *fsl_qdma = (struct fsl_qdma_engine 
*)dev_private;
-       struct fsl_qdma_chan *fsl_chan =
-               &fsl_qdma->chans[fsl_qdma->vchan_map[vchan]];
-       struct fsl_qdma_queue *fsl_queue = fsl_chan->queue;
-       void *block = fsl_queue->block_base;
-       u32 reg;
+       struct fsl_qdma_engine *fsl_qdma = dev_private;
+       struct fsl_qdma_queue *fsl_queue = fsl_qdma->chan[vchan];
 
-       while (fsl_queue->pending) {
-               reg = qdma_readl_be(block + FSL_QDMA_BCQMR(fsl_queue->id));
-               reg |= FSL_QDMA_BCQMR_EI_BE;
-               qdma_writel_be(reg, block + FSL_QDMA_BCQMR(fsl_queue->id));
-               fsl_queue->pending--;
-               fsl_queue->stats.submitted++;
-       }
+       if (!fsl_queue->pending_num)
+               return 0;
 
-       return 0;
+       return fsl_qdma_enqueue_desc(fsl_queue);
 }
 
 static int
 dpaa_qdma_enqueue(void *dev_private, uint16_t vchan,
-                 rte_iova_t src, rte_iova_t dst,
-                 uint32_t length, uint64_t flags)
+       rte_iova_t src, rte_iova_t dst,
+       uint32_t length, uint64_t flags)
 {
-       struct fsl_qdma_engine *fsl_qdma = (struct fsl_qdma_engine 
*)dev_private;
-       struct fsl_qdma_chan *fsl_chan =
-               &fsl_qdma->chans[fsl_qdma->vchan_map[vchan]];
+       struct fsl_qdma_engine *fsl_qdma = dev_private;
+       struct fsl_qdma_queue *fsl_queue = fsl_qdma->chan[vchan];
+       uint16_t start = fsl_queue->pending_start;
+       uint8_t pending = fsl_queue->pending_num;
+       uint16_t idx;
        int ret;
 
-       void *fsl_comp = NULL;
-
-       fsl_comp = fsl_qdma_prep_memcpy(fsl_chan,
-                       (dma_addr_t)dst, (dma_addr_t)src,
-                       length, NULL, NULL);
-       if (!fsl_comp) {
-               DPAA_QDMA_DP_DEBUG("fsl_comp is NULL");
-               return -1;
+       if (pending >= fsl_queue->pending_max) {
+               DPAA_QDMA_ERR("Too many pending jobs(%d) on queue%d",
+                       pending, vchan);
+               return -ENOSPC;
        }
-       ret = fsl_qdma_enqueue_desc(fsl_chan, fsl_comp, flags);
+       idx = (start + pending) & (fsl_queue->pending_max - 1);
+
+       fsl_queue->pending_desc[idx].src = src;
+       fsl_queue->pending_desc[idx].dst = dst;
+       fsl_queue->pending_desc[idx].flag =
+               DPAA_QDMA_IDX_FROM_FLAG(flags);
+       fsl_queue->pending_desc[idx].len = length;
+       fsl_queue->pending_num++;
+
+       if (!(flags & RTE_DMA_OP_FLAG_SUBMIT))
+               return idx;
+
+       ret = fsl_qdma_enqueue_desc(fsl_queue);
+       if (!ret)
+               return fsl_queue->pending_start;
 
        return ret;
 }
 
+
 static uint16_t
 dpaa_qdma_dequeue_status(void *dev_private, uint16_t vchan,
-                        const uint16_t nb_cpls, uint16_t *last_idx,
-                        enum rte_dma_status_code *st)
+       const uint16_t nb_cpls, uint16_t *last_idx,
+       enum rte_dma_status_code *st)
 {
-       struct fsl_qdma_engine *fsl_qdma = (struct fsl_qdma_engine 
*)dev_private;
-       int id = (int)((fsl_qdma->vchan_map[vchan]) / QDMA_QUEUES);
-       void *block;
-       int intr;
-       void *status = fsl_qdma->status_base;
-       struct fsl_qdma_chan *fsl_chan =
-               &fsl_qdma->chans[fsl_qdma->vchan_map[vchan]];
-       struct fsl_qdma_queue *fsl_queue = fsl_chan->queue;
-
-       intr = qdma_readl_be(status + FSL_QDMA_DEDR);
-       if (intr) {
-               DPAA_QDMA_ERR("DMA transaction error! %x", intr);
-               intr = qdma_readl(status + FSL_QDMA_DECFDW0R);
-               DPAA_QDMA_INFO("reg FSL_QDMA_DECFDW0R %x", intr);
-               intr = qdma_readl(status + FSL_QDMA_DECFDW1R);
-               DPAA_QDMA_INFO("reg FSL_QDMA_DECFDW1R %x", intr);
-               intr = qdma_readl(status + FSL_QDMA_DECFDW2R);
-               DPAA_QDMA_INFO("reg FSL_QDMA_DECFDW2R %x", intr);
-               intr = qdma_readl(status + FSL_QDMA_DECFDW3R);
-               DPAA_QDMA_INFO("reg FSL_QDMA_DECFDW3R %x", intr);
-               intr = qdma_readl(status + FSL_QDMA_DECFQIDR);
-               DPAA_QDMA_INFO("reg FSL_QDMA_DECFQIDR %x", intr);
-               intr = qdma_readl(status + FSL_QDMA_DECBR);
-               DPAA_QDMA_INFO("reg FSL_QDMA_DECBR %x", intr);
-               qdma_writel(0xffffffff,
-                           status + FSL_QDMA_DEDR);
-               intr = qdma_readl(status + FSL_QDMA_DEDR);
-               fsl_queue->stats.errors++;
+       struct fsl_qdma_engine *fsl_qdma = dev_private;
+       struct fsl_qdma_queue *fsl_queue = fsl_qdma->chan[vchan];
+       struct fsl_qdma_desc *desc_complete[nb_cpls];
+       uint16_t i, dq_num;
+
+       dq_num = dpaa_qdma_block_dequeue(fsl_qdma,
+                       fsl_queue->block_id);
+       DPAA_QDMA_DP_DEBUG("%s: block dq(%d)",
+               __func__, dq_num);
+
+       dq_num = rte_ring_dequeue_burst(fsl_queue->complete_pool,
+                       (void **)desc_complete, nb_cpls, NULL);
+       for (i = 0; i < dq_num; i++)
+               last_idx[i] = desc_complete[i]->flag;
+
+       if (st) {
+               for (i = 0; i < dq_num; i++)
+                       st[i] = RTE_DMA_STATUS_SUCCESSFUL;
        }
 
-       block = fsl_qdma->block_base +
-               FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, id);
-
-       intr = fsl_qdma_queue_transfer_complete(fsl_qdma, block, id, nb_cpls,
-                                               last_idx, st);
-       fsl_queue->stats.completed += intr;
 
-       return intr;
+       return dq_num;
 }
 
-
 static uint16_t
 dpaa_qdma_dequeue(void *dev_private,
-                 uint16_t vchan, const uint16_t nb_cpls,
-                 uint16_t *last_idx, bool *has_error)
+       uint16_t vchan, const uint16_t nb_cpls,
+       uint16_t *last_idx, bool *has_error)
 {
-       struct fsl_qdma_engine *fsl_qdma = (struct fsl_qdma_engine 
*)dev_private;
-       int id = (int)((fsl_qdma->vchan_map[vchan]) / QDMA_QUEUES);
-       void *block;
-       int intr;
-       void *status = fsl_qdma->status_base;
-       struct fsl_qdma_chan *fsl_chan =
-               &fsl_qdma->chans[fsl_qdma->vchan_map[vchan]];
-       struct fsl_qdma_queue *fsl_queue = fsl_chan->queue;
-
-       intr = qdma_readl_be(status + FSL_QDMA_DEDR);
-       if (intr) {
-               DPAA_QDMA_ERR("DMA transaction error! %x", intr);
-               intr = qdma_readl(status + FSL_QDMA_DECFDW0R);
-               DPAA_QDMA_INFO("reg FSL_QDMA_DECFDW0R %x", intr);
-               intr = qdma_readl(status + FSL_QDMA_DECFDW1R);
-               DPAA_QDMA_INFO("reg FSL_QDMA_DECFDW1R %x", intr);
-               intr = qdma_readl(status + FSL_QDMA_DECFDW2R);
-               DPAA_QDMA_INFO("reg FSL_QDMA_DECFDW2R %x", intr);
-               intr = qdma_readl(status + FSL_QDMA_DECFDW3R);
-               DPAA_QDMA_INFO("reg FSL_QDMA_DECFDW3R %x", intr);
-               intr = qdma_readl(status + FSL_QDMA_DECFQIDR);
-               DPAA_QDMA_INFO("reg FSL_QDMA_DECFQIDR %x", intr);
-               intr = qdma_readl(status + FSL_QDMA_DECBR);
-               DPAA_QDMA_INFO("reg FSL_QDMA_DECBR %x", intr);
-               qdma_writel(0xffffffff,
-                           status + FSL_QDMA_DEDR);
-               intr = qdma_readl(status + FSL_QDMA_DEDR);
-               *has_error = true;
-               fsl_queue->stats.errors++;
-       }
+       struct fsl_qdma_engine *fsl_qdma = dev_private;
+       struct fsl_qdma_queue *fsl_queue = fsl_qdma->chan[vchan];
+       struct fsl_qdma_desc *desc_complete[nb_cpls];
+       uint16_t i, dq_num;
 
-       block = fsl_qdma->block_base +
-               FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, id);
 
-       intr = fsl_qdma_queue_transfer_complete(fsl_qdma, block, id, nb_cpls,
-                                               last_idx, NULL);
-       fsl_queue->stats.completed += intr;
+       *has_error = false;
+       dq_num = dpaa_qdma_block_dequeue(fsl_qdma,
+               fsl_queue->block_id);
+       DPAA_QDMA_DP_DEBUG("%s: block dq(%d)",
+               __func__, dq_num);
 
-       return intr;
+       dq_num = rte_ring_dequeue_burst(fsl_queue->complete_pool,
+                       (void **)desc_complete, nb_cpls, NULL);
+       for (i = 0; i < dq_num; i++)
+               last_idx[i] = desc_complete[i]->flag;
+
+       return dq_num;
 }
 
 static int
-dpaa_qdma_stats_get(const struct rte_dma_dev *dmadev, uint16_t vchan,
-                   struct rte_dma_stats *rte_stats, uint32_t size)
+dpaa_qdma_stats_get(const struct rte_dma_dev *dmadev,
+       uint16_t vchan, struct rte_dma_stats *rte_stats, uint32_t size)
 {
        struct fsl_qdma_engine *fsl_qdma = dmadev->data->dev_private;
-       struct fsl_qdma_chan *fsl_chan =
-               &fsl_qdma->chans[fsl_qdma->vchan_map[vchan]];
-       struct fsl_qdma_queue *fsl_queue = fsl_chan->queue;
+       struct fsl_qdma_queue *fsl_queue = fsl_qdma->chan[vchan];
        struct rte_dma_stats *stats = &fsl_queue->stats;
 
        if (size < sizeof(rte_stats))
@@ -903,17 +1036,15 @@ static int
 dpaa_qdma_stats_reset(struct rte_dma_dev *dmadev, uint16_t vchan)
 {
        struct fsl_qdma_engine *fsl_qdma = dmadev->data->dev_private;
-       struct fsl_qdma_chan *fsl_chan =
-               &fsl_qdma->chans[fsl_qdma->vchan_map[vchan]];
-       struct fsl_qdma_queue *fsl_queue = fsl_chan->queue;
+       struct fsl_qdma_queue *fsl_queue = fsl_qdma->chan[vchan];
 
-       fsl_queue->stats = (struct rte_dma_stats){0};
+       memset(&fsl_queue->stats, 0, sizeof(struct rte_dma_stats));
 
        return 0;
 }
 
 static struct rte_dma_dev_ops dpaa_qdma_ops = {
-       .dev_info_get             = dpaa_info_get,
+       .dev_info_get             = dpaa_qdma_info_get,
        .dev_configure            = dpaa_qdma_configure,
        .dev_start                = dpaa_qdma_start,
        .dev_close                = dpaa_qdma_close,
@@ -926,90 +1057,80 @@ static int
 dpaa_qdma_init(struct rte_dma_dev *dmadev)
 {
        struct fsl_qdma_engine *fsl_qdma = dmadev->data->dev_private;
-       struct fsl_qdma_chan *fsl_chan;
        uint64_t phys_addr;
-       unsigned int len;
        int ccsr_qdma_fd;
        int regs_size;
        int ret;
-       u32 i;
+       uint32_t i, j, k;
 
-       fsl_qdma->desc_allocated = 0;
-       fsl_qdma->n_chans = VIRT_CHANNELS;
-       fsl_qdma->n_queues = QDMA_QUEUES;
+       fsl_qdma->n_queues = QDMA_QUEUES * QDMA_BLOCKS;
        fsl_qdma->num_blocks = QDMA_BLOCKS;
        fsl_qdma->block_offset = QDMA_BLOCK_OFFSET;
 
-       len = sizeof(*fsl_chan) * fsl_qdma->n_chans;
-       fsl_qdma->chans = rte_zmalloc("qdma: fsl chans", len, 0);
-       if (!fsl_qdma->chans)
-               return -1;
-
-       len = sizeof(struct fsl_qdma_queue *) * fsl_qdma->num_blocks;
-       fsl_qdma->status = rte_zmalloc("qdma: fsl status", len, 0);
-       if (!fsl_qdma->status) {
-               rte_free(fsl_qdma->chans);
-               return -1;
-       }
-
-       for (i = 0; i < fsl_qdma->num_blocks; i++) {
-               rte_atomic32_init(&wait_task[i]);
-               fsl_qdma->status[i] = fsl_qdma_prep_status_queue();
-               if (!fsl_qdma->status[i])
-                       goto err;
-       }
-
        ccsr_qdma_fd = open("/dev/mem", O_RDWR);
        if (unlikely(ccsr_qdma_fd < 0)) {
                DPAA_QDMA_ERR("Can not open /dev/mem for qdma CCSR map");
-               goto err;
+               return ccsr_qdma_fd;
        }
 
-       regs_size = fsl_qdma->block_offset * (fsl_qdma->num_blocks + 2);
+       regs_size = fsl_qdma->block_offset * fsl_qdma->num_blocks;
+       regs_size += (QDMA_CTRL_REGION_SIZE + QDMA_STATUS_REGION_SIZE);
        phys_addr = QDMA_CCSR_BASE;
-       fsl_qdma->ctrl_base = mmap(NULL, regs_size, PROT_READ |
-                                        PROT_WRITE, MAP_SHARED,
-                                        ccsr_qdma_fd, phys_addr);
+       fsl_qdma->reg_base = mmap(NULL, regs_size,
+               PROT_READ | PROT_WRITE, MAP_SHARED,
+               ccsr_qdma_fd, phys_addr);
 
        close(ccsr_qdma_fd);
-       if (fsl_qdma->ctrl_base == MAP_FAILED) {
-               DPAA_QDMA_ERR("Can not map CCSR base qdma: Phys: %08" PRIx64
-                      "size %d", phys_addr, regs_size);
-               goto err;
+       if (fsl_qdma->reg_base == MAP_FAILED) {
+               DPAA_QDMA_ERR("Map qdma reg: Phys(0x%"PRIx64"), size(%d)",
+                       phys_addr, regs_size);
+               return -ENOMEM;
        }
 
-       fsl_qdma->status_base = fsl_qdma->ctrl_base + QDMA_BLOCK_OFFSET;
-       fsl_qdma->block_base = fsl_qdma->status_base + QDMA_BLOCK_OFFSET;
-
-       fsl_qdma->queue = fsl_qdma_alloc_queue_resources(fsl_qdma);
-       if (!fsl_qdma->queue) {
-               munmap(fsl_qdma->ctrl_base, regs_size);
-               goto err;
+       fsl_qdma->ctrl_base =
+               fsl_qdma->reg_base + QDMA_CTRL_REGION_OFFSET;
+       fsl_qdma->status_base =
+               fsl_qdma->reg_base + QDMA_STATUS_REGION_OFFSET;
+       fsl_qdma->block_base =
+               fsl_qdma->status_base + QDMA_STATUS_REGION_SIZE;
+
+       for (i = 0; i < QDMA_BLOCKS; i++) {
+               ret = fsl_qdma_prep_status_queue(fsl_qdma, i);
+               if (ret)
+                       goto mem_free;
        }
 
-       for (i = 0; i < fsl_qdma->n_chans; i++) {
-               struct fsl_qdma_chan *fsl_chan = &fsl_qdma->chans[i];
-
-               fsl_chan->qdma = fsl_qdma;
-               fsl_chan->queue = fsl_qdma->queue + i % (fsl_qdma->n_queues *
-                                                       fsl_qdma->num_blocks);
-               fsl_chan->free = true;
+       k = 0;
+       for (i = 0; i < QDMA_QUEUES; i++) {
+               for (j = 0; j < QDMA_BLOCKS; j++) {
+                       ret = fsl_qdma_alloc_queue_resources(fsl_qdma, i, j);
+                       if (ret)
+                               goto mem_free;
+                       fsl_qdma->cmd_queues[j][i].channel_id = k;
+                       k++;
+               }
        }
 
        ret = fsl_qdma_reg_init(fsl_qdma);
        if (ret) {
                DPAA_QDMA_ERR("Can't Initialize the qDMA engine.");
-               munmap(fsl_qdma->ctrl_base, regs_size);
-               goto err;
+               goto mem_free;
        }
 
        return 0;
 
-err:
-       rte_free(fsl_qdma->chans);
-       rte_free(fsl_qdma->status);
+mem_free:
+       for (i = 0; i < fsl_qdma->num_blocks; i++)
+               fsl_qdma_free_stq_res(&fsl_qdma->stat_queues[i]);
+
+       for (i = 0; i < fsl_qdma->num_blocks; i++) {
+               for (j = 0; j < QDMA_QUEUES; j++)
+                       fsl_qdma_free_cmdq_res(&fsl_qdma->cmd_queues[i][j]);
+       }
 
-       return -1;
+       munmap(fsl_qdma->ctrl_base, regs_size);
+
+       return ret;
 }
 
 static int
@@ -1052,17 +1173,20 @@ dpaa_qdma_remove(struct rte_dpaa_device *dpaa_dev)
 {
        struct rte_dma_dev *dmadev = dpaa_dev->dmadev;
        struct fsl_qdma_engine *fsl_qdma = dmadev->data->dev_private;
-       int i = 0, max = QDMA_QUEUES * QDMA_BLOCKS;
+       uint32_t i, j, regs_size;
+
+       regs_size = fsl_qdma->block_offset * fsl_qdma->num_blocks;
+       regs_size += (QDMA_CTRL_REGION_SIZE + QDMA_STATUS_REGION_SIZE);
 
-       for (i = 0; i < max; i++) {
-               struct fsl_qdma_chan *fsl_chan = &fsl_qdma->chans[i];
+       for (i = 0; i < QDMA_BLOCKS; i++)
+               fsl_qdma_free_stq_res(&fsl_qdma->stat_queues[i]);
 
-               if (fsl_chan->free == false)
-                       dma_release(fsl_chan);
+       for (i = 0; i < QDMA_BLOCKS; i++) {
+               for (j = 0; j < QDMA_QUEUES; j++)
+                       fsl_qdma_free_cmdq_res(&fsl_qdma->cmd_queues[i][j]);
        }
 
-       rte_free(fsl_qdma->status);
-       rte_free(fsl_qdma->chans);
+       munmap(fsl_qdma->ctrl_base, regs_size);
 
        (void)rte_dma_pmd_release(dpaa_dev->device.name);
 
diff --git a/drivers/dma/dpaa/dpaa_qdma.h b/drivers/dma/dpaa/dpaa_qdma.h
index 7e9e76e21a..75c014f32f 100644
--- a/drivers/dma/dpaa/dpaa_qdma.h
+++ b/drivers/dma/dpaa/dpaa_qdma.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2021 NXP
+ * Copyright 2021-2024 NXP
  */
 
 #ifndef _DPAA_QDMA_H_
@@ -11,7 +11,6 @@
 #define BIT(nr)                (1UL << (nr))
 #endif
 
-#define CORE_NUMBER 4
 #define RETRIES        5
 
 #ifndef GENMASK
@@ -20,6 +19,14 @@
                (((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h))))
 #endif
 
+#define QDMA_CTRL_REGION_OFFSET 0
+#define QDMA_CTRL_REGION_SIZE 0x10000
+#define QDMA_STATUS_REGION_OFFSET \
+       (QDMA_CTRL_REGION_OFFSET + QDMA_CTRL_REGION_SIZE)
+#define QDMA_STATUS_REGION_SIZE 0x10000
+#define DPAA_QDMA_COPY_IDX_OFFSET 8
+#define DPAA_QDMA_FLAGS_INDEX RTE_BIT64(63)
+
 #define FSL_QDMA_DMR                   0x0
 #define FSL_QDMA_DSR                   0x4
 #define FSL_QDMA_DEDR                  0xe04
@@ -54,15 +61,16 @@
 #define FSL_QDMA_QUEUE_MAX             8
 
 #define FSL_QDMA_BCQMR_EN              0x80000000
-#define FSL_QDMA_BCQMR_EI_BE           0x40
+#define FSL_QDMA_BCQMR_EI              0x40000000
+
 #define FSL_QDMA_BCQMR_CD_THLD(x)      ((x) << 20)
 #define FSL_QDMA_BCQMR_CQ_SIZE(x)      ((x) << 16)
 
 #define FSL_QDMA_BCQSR_QF_XOFF_BE      0x1000100
 
 #define FSL_QDMA_BSQMR_EN              0x80000000
-#define FSL_QDMA_BSQMR_DI_BE           0x40
 #define FSL_QDMA_BSQMR_CQ_SIZE(x)      ((x) << 16)
+#define FSL_QDMA_BSQMR_DI              0xc0
 
 #define FSL_QDMA_BSQSR_QE_BE           0x200
 
@@ -75,23 +83,14 @@
 #define FSL_QDMA_CIRCULAR_DESC_SIZE_MAX        16384
 #define FSL_QDMA_QUEUE_NUM_MAX         8
 
+#define FSL_QDMA_COMP_SG_FORMAT                0x1
+
 #define FSL_QDMA_CMD_RWTTYPE           0x4
 #define FSL_QDMA_CMD_LWC               0x2
 
 #define FSL_QDMA_CMD_RWTTYPE_OFFSET    28
 #define FSL_QDMA_CMD_LWC_OFFSET                16
 
-#define QDMA_CCDF_STATUS               20
-#define QDMA_CCDF_OFFSET               20
-#define QDMA_CCDF_MASK                 GENMASK(28, 20)
-#define QDMA_CCDF_FOTMAT               BIT(29)
-#define QDMA_CCDF_SER                  BIT(30)
-
-#define QDMA_SG_FIN                    BIT(30)
-#define QDMA_SG_LEN_MASK               GENMASK(29, 0)
-
-#define COMMAND_QUEUE_OVERFLOW         10
-
 /* qdma engine attribute */
 #define QDMA_QUEUE_SIZE                        64
 #define QDMA_STATUS_SIZE               64
@@ -101,6 +100,7 @@
 #define QDMA_BLOCKS                    4
 #define QDMA_QUEUES                    8
 #define QDMA_DELAY                     1000
+#define QDMA_QUEUE_CR_WM 32
 
 #define QDMA_BIG_ENDIAN                        1
 #ifdef QDMA_BIG_ENDIAN
@@ -118,89 +118,145 @@
 #define FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma_engine, x)                 \
        (((fsl_qdma_engine)->block_offset) * (x))
 
-typedef void (*dma_call_back)(void *params);
-
 /* qDMA Command Descriptor Formats */
-struct fsl_qdma_format {
-       __le32 status; /* ser, status */
-       __le32 cfg;     /* format, offset */
-       union {
-               struct {
-                       __le32 addr_lo; /* low 32-bits of 40-bit address */
-                       u8 addr_hi;     /* high 8-bits of 40-bit address */
-                       u8 __reserved1[2];
-                       u8 cfg8b_w1; /* dd, queue */
-               };
-               __le64 data;
-       };
-};
+struct fsl_qdma_comp_cmd_desc {
+       uint8_t status;
+       uint32_t rsv0:22;
+       uint32_t ser:1;
+       uint32_t rsv1:21;
+       uint32_t offset:9;
+       uint32_t format:3;
+       uint32_t addr_lo;
+       uint8_t addr_hi;
+       uint16_t rsv3;
+       uint8_t queue:3;
+       uint8_t rsv4:3;
+       uint8_t dd:2;
+} __rte_packed;
+
+struct fsl_qdma_comp_sg_desc {
+       uint32_t offset:13;
+       uint32_t rsv0:19;
+       uint32_t length:30;
+       uint32_t final:1;
+       uint32_t extion:1;
+       uint32_t addr_lo;
+       uint8_t addr_hi;
+       uint32_t rsv1:24;
+} __rte_packed;
 
-/* qDMA Source Descriptor Format */
 struct fsl_qdma_sdf {
-       __le32 rev3;
-       __le32 cfg; /* rev4, bit[0-11] - ssd, bit[12-23] sss */
-       __le32 rev5;
-       __le32 cmd;
-};
+       uint32_t rsv0;
+       uint32_t ssd:12;
+       uint32_t sss:12;
+       uint32_t rsv1:8;
+       uint32_t rsv2;
+
+       uint32_t rsv3:17;
+       uint32_t prefetch:1;
+       uint32_t rsv4:1;
+       uint32_t ssen:1;
+       uint32_t rthrotl:4;
+       uint32_t sqos:3;
+       uint32_t ns:1;
+       uint32_t srttype:4;
+} __rte_packed;
 
-/* qDMA Destination Descriptor Format */
 struct fsl_qdma_ddf {
-       __le32 rev1;
-       __le32 cfg; /* rev2, bit[0-11] - dsd, bit[12-23] - dss */
-       __le32 rev3;
-       __le32 cmd;
+       uint32_t rsv0;
+       uint32_t dsd:12;
+       uint32_t dss:12;
+       uint32_t rsv1:8;
+       uint32_t rsv2;
+
+       uint16_t rsv3;
+       uint32_t lwc:2;
+       uint32_t rsv4:1;
+       uint32_t dsen:1;
+       uint32_t wthrotl:4;
+       uint32_t dqos:3;
+       uint32_t ns:1;
+       uint32_t dwttype:4;
+} __rte_packed;
+
+struct fsl_qdma_df {
+       struct fsl_qdma_sdf sdf;
+       struct fsl_qdma_ddf ddf;
 };
 
-struct fsl_qdma_chan {
-       struct fsl_qdma_engine  *qdma;
-       struct fsl_qdma_queue   *queue;
-       bool                    free;
-       struct list_head        list;
+#define FSL_QDMA_SG_MAX_ENTRY 64
+#define FSL_QDMA_MAX_DESC_NUM (FSL_QDMA_SG_MAX_ENTRY * QDMA_QUEUE_SIZE)
+struct fsl_qdma_cmpd_ft {
+       struct fsl_qdma_comp_sg_desc desc_buf;
+       struct fsl_qdma_comp_sg_desc desc_sbuf;
+       struct fsl_qdma_comp_sg_desc desc_dbuf;
+       uint64_t cache_align[2];
+       struct fsl_qdma_comp_sg_desc desc_ssge[FSL_QDMA_SG_MAX_ENTRY];
+       struct fsl_qdma_comp_sg_desc desc_dsge[FSL_QDMA_SG_MAX_ENTRY];
+       struct fsl_qdma_df df;
+       uint64_t phy_ssge;
+       uint64_t phy_dsge;
+       uint64_t phy_df;
+} __rte_packed;
+
+#define DPAA_QDMA_IDX_FROM_FLAG(flag) \
+       ((flag) >> DPAA_QDMA_COPY_IDX_OFFSET)
+
+struct fsl_qdma_desc {
+       rte_iova_t src;
+       rte_iova_t dst;
+       uint64_t flag;
+       uint64_t len;
 };
 
 struct fsl_qdma_queue {
-       struct fsl_qdma_format  *virt_head;
-       struct list_head        comp_used;
-       struct list_head        comp_free;
-       dma_addr_t              bus_addr;
-       u32                     n_cq;
-       u32                     id;
-       u32                     count;
-       u32                     pending;
-       struct fsl_qdma_format  *cq;
-       void                    *block_base;
-       struct rte_dma_stats    stats;
+       int used;
+       struct fsl_qdma_cmpd_ft **ft;
+       uint16_t ci;
+       struct rte_ring *complete_burst;
+       struct rte_ring *complete_desc;
+       struct rte_ring *complete_pool;
+       uint16_t n_cq;
+       uint8_t block_id;
+       uint8_t queue_id;
+       uint8_t channel_id;
+       void *block_vir;
+       uint32_t le_cqmr;
+       struct fsl_qdma_comp_cmd_desc *cq;
+       uint16_t desc_in_hw[QDMA_QUEUE_SIZE];
+       struct rte_dma_stats stats;
+       struct fsl_qdma_desc *pending_desc;
+       uint16_t pending_max;
+       uint16_t pending_start;
+       uint16_t pending_num;
+       uint16_t complete_start;
+       dma_addr_t bus_addr;
+       void *engine;
 };
 
-struct fsl_qdma_comp {
-       dma_addr_t              bus_addr;
-       dma_addr_t              desc_bus_addr;
-       void                    *virt_addr;
-       int                     index;
-       void                    *desc_virt_addr;
-       struct fsl_qdma_chan    *qchan;
-       dma_call_back           call_back_func;
-       void                    *params;
-       struct list_head        list;
+struct fsl_qdma_status_queue {
+       uint16_t n_cq;
+       uint16_t complete;
+       uint8_t block_id;
+       void *block_vir;
+       struct fsl_qdma_comp_cmd_desc *cq;
+       struct rte_dma_stats stats;
+       dma_addr_t bus_addr;
+       void *engine;
 };
 
 struct fsl_qdma_engine {
-       int                     desc_allocated;
-       void                    *ctrl_base;
-       void                    *status_base;
-       void                    *block_base;
-       u32                     n_chans;
-       u32                     n_queues;
-       int                     error_irq;
-       struct fsl_qdma_queue   *queue;
-       struct fsl_qdma_queue   **status;
-       struct fsl_qdma_chan    *chans;
-       u32                     num_blocks;
-       u8                      free_block_id;
-       u32                     vchan_map[4];
-       int                     block_offset;
+       void *reg_base;
+       void *ctrl_base;
+       void *status_base;
+       void *block_base;
+       uint32_t n_queues;
+       uint8_t block_queues[QDMA_BLOCKS];
+       struct fsl_qdma_queue cmd_queues[QDMA_BLOCKS][QDMA_QUEUES];
+       struct fsl_qdma_status_queue stat_queues[QDMA_BLOCKS];
+       struct fsl_qdma_queue *chan[QDMA_BLOCKS * QDMA_QUEUES];
+       uint32_t num_blocks;
+       int block_offset;
 };
 
-static rte_atomic32_t wait_task[CORE_NUMBER];
-
 #endif /* _DPAA_QDMA_H_ */
-- 
2.25.1

Reply via email to