Dmadev driver changes to align with dpdk spec.

Fixes: 681851b347ad ("dma/cnxk: support CN10K DMA engine")
Cc: sta...@dpdk.org

Signed-off-by: Amit Prakash Shukla <amitpraka...@marvell.com>
---
v2:
- Fix for bugs observed in v1.
- Squashed few commits.

v3:
- Resolved review suggestions.
- Code improvement.

 drivers/dma/cnxk/cnxk_dmadev.c | 464 ++++++++++++++++++++-------------
 drivers/dma/cnxk/cnxk_dmadev.h |  24 +-
 2 files changed, 294 insertions(+), 194 deletions(-)

diff --git a/drivers/dma/cnxk/cnxk_dmadev.c b/drivers/dma/cnxk/cnxk_dmadev.c
index a6f4a31e0e..a0152fc6df 100644
--- a/drivers/dma/cnxk/cnxk_dmadev.c
+++ b/drivers/dma/cnxk/cnxk_dmadev.c
@@ -7,68 +7,76 @@
 
 #include <bus_pci_driver.h>
 #include <rte_common.h>
+#include <rte_dmadev.h>
+#include <rte_dmadev_pmd.h>
 #include <rte_eal.h>
 #include <rte_lcore.h>
 #include <rte_mempool.h>
 #include <rte_pci.h>
-#include <rte_dmadev.h>
-#include <rte_dmadev_pmd.h>
 
-#include <roc_api.h>
 #include <cnxk_dmadev.h>
 
 static int
-cnxk_dmadev_info_get(const struct rte_dma_dev *dev,
-                    struct rte_dma_info *dev_info, uint32_t size)
+cnxk_dmadev_info_get(const struct rte_dma_dev *dev, struct rte_dma_info 
*dev_info, uint32_t size)
 {
        RTE_SET_USED(dev);
        RTE_SET_USED(size);
 
        dev_info->max_vchans = 1;
        dev_info->nb_vchans = 1;
-       dev_info->dev_capa = RTE_DMA_CAPA_MEM_TO_MEM |
-               RTE_DMA_CAPA_MEM_TO_DEV | RTE_DMA_CAPA_DEV_TO_MEM |
-               RTE_DMA_CAPA_DEV_TO_DEV | RTE_DMA_CAPA_OPS_COPY |
-               RTE_DMA_CAPA_OPS_COPY_SG;
+       dev_info->dev_capa = RTE_DMA_CAPA_MEM_TO_MEM | RTE_DMA_CAPA_MEM_TO_DEV |
+                            RTE_DMA_CAPA_DEV_TO_MEM | RTE_DMA_CAPA_DEV_TO_DEV |
+                            RTE_DMA_CAPA_OPS_COPY | RTE_DMA_CAPA_OPS_COPY_SG;
        dev_info->max_desc = DPI_MAX_DESC;
-       dev_info->min_desc = 1;
+       dev_info->min_desc = 2;
        dev_info->max_sges = DPI_MAX_POINTER;
 
        return 0;
 }
 
 static int
-cnxk_dmadev_configure(struct rte_dma_dev *dev,
-                     const struct rte_dma_conf *conf, uint32_t conf_sz)
+cnxk_dmadev_configure(struct rte_dma_dev *dev, const struct rte_dma_conf 
*conf, uint32_t conf_sz)
 {
        struct cnxk_dpi_vf_s *dpivf = NULL;
        int rc = 0;
 
        RTE_SET_USED(conf);
-       RTE_SET_USED(conf);
-       RTE_SET_USED(conf_sz);
        RTE_SET_USED(conf_sz);
+
        dpivf = dev->fp_obj->dev_private;
+
+       if (dpivf->flag & CNXK_DPI_DEV_CONFIG)
+               return rc;
+
        rc = roc_dpi_configure(&dpivf->rdpi);
-       if (rc < 0)
+       if (rc < 0) {
                plt_err("DMA configure failed err = %d", rc);
+               goto done;
+       }
 
+       dpivf->flag |= CNXK_DPI_DEV_CONFIG;
+
+done:
        return rc;
 }
 
 static int
 cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
-                       const struct rte_dma_vchan_conf *conf,
-                       uint32_t conf_sz)
+                       const struct rte_dma_vchan_conf *conf, uint32_t conf_sz)
 {
        struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;
-       struct cnxk_dpi_compl_s *comp_data;
-       union dpi_instr_hdr_s *header = &dpivf->conf.hdr;
+       struct cnxk_dpi_conf *dpi_conf = &dpivf->conf;
+       union dpi_instr_hdr_s *header = &dpi_conf->hdr;
+       uint16_t max_desc;
+       uint32_t size;
        int i;
 
        RTE_SET_USED(vchan);
        RTE_SET_USED(conf_sz);
 
+       if (dpivf->flag & CNXK_DPI_VCHAN_CONFIG)
+               return 0;
+
        header->cn9k.pt = DPI_HDR_PT_ZBW_CA;
 
        switch (conf->direction) {
@@ -96,35 +104,54 @@ cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t 
vchan,
                header->cn9k.fport = conf->dst_port.pcie.coreid;
        };
 
-       for (i = 0; i < conf->nb_desc; i++) {
-               comp_data = rte_zmalloc(NULL, sizeof(*comp_data), 0);
-               if (comp_data == NULL) {
-                       plt_err("Failed to allocate for comp_data");
-                       return -ENOMEM;
-               }
-               comp_data->cdata = DPI_REQ_CDATA;
-               dpivf->conf.c_desc.compl_ptr[i] = comp_data;
-       };
-       dpivf->conf.c_desc.max_cnt = DPI_MAX_DESC;
-       dpivf->conf.c_desc.head = 0;
-       dpivf->conf.c_desc.tail = 0;
+       max_desc = conf->nb_desc;
+       if (!rte_is_power_of_2(max_desc))
+               max_desc = rte_align32pow2(max_desc);
+
+       if (max_desc > DPI_MAX_DESC)
+               max_desc = DPI_MAX_DESC;
+
+       size = (max_desc * sizeof(struct cnxk_dpi_compl_s *));
+       dpi_conf->c_desc.compl_ptr = rte_zmalloc(NULL, size, 0);
+
+       if (dpi_conf->c_desc.compl_ptr == NULL) {
+               plt_err("Failed to allocate for comp_data");
+               return -ENOMEM;
+       }
+
+       for (i = 0; i < max_desc; i++) {
+               dpi_conf->c_desc.compl_ptr[i] =
+                       rte_zmalloc(NULL, sizeof(struct cnxk_dpi_compl_s), 0);
+               dpi_conf->c_desc.compl_ptr[i]->cdata = DPI_REQ_CDATA;
+       }
+
+       dpi_conf->c_desc.max_cnt = (max_desc - 1);
+       dpi_conf->c_desc.head = 0;
+       dpi_conf->c_desc.tail = 0;
+       dpivf->pnum_words = 0;
+       dpivf->pending = 0;
+       dpivf->flag |= CNXK_DPI_VCHAN_CONFIG;
 
        return 0;
 }
 
 static int
 cn10k_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
-                        const struct rte_dma_vchan_conf *conf,
-                        uint32_t conf_sz)
+                        const struct rte_dma_vchan_conf *conf, uint32_t 
conf_sz)
 {
        struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;
-       struct cnxk_dpi_compl_s *comp_data;
-       union dpi_instr_hdr_s *header = &dpivf->conf.hdr;
+       struct cnxk_dpi_conf *dpi_conf = &dpivf->conf;
+       union dpi_instr_hdr_s *header = &dpi_conf->hdr;
+       uint16_t max_desc;
+       uint32_t size;
        int i;
 
        RTE_SET_USED(vchan);
        RTE_SET_USED(conf_sz);
 
+       if (dpivf->flag & CNXK_DPI_VCHAN_CONFIG)
+               return 0;
+
        header->cn10k.pt = DPI_HDR_PT_ZBW_CA;
 
        switch (conf->direction) {
@@ -152,18 +179,33 @@ cn10k_dmadev_vchan_setup(struct rte_dma_dev *dev, 
uint16_t vchan,
                header->cn10k.fport = conf->dst_port.pcie.coreid;
        };
 
-       for (i = 0; i < conf->nb_desc; i++) {
-               comp_data = rte_zmalloc(NULL, sizeof(*comp_data), 0);
-               if (comp_data == NULL) {
-                       plt_err("Failed to allocate for comp_data");
-                       return -ENOMEM;
-               }
-               comp_data->cdata = DPI_REQ_CDATA;
-               dpivf->conf.c_desc.compl_ptr[i] = comp_data;
-       };
-       dpivf->conf.c_desc.max_cnt = DPI_MAX_DESC;
-       dpivf->conf.c_desc.head = 0;
-       dpivf->conf.c_desc.tail = 0;
+       max_desc = conf->nb_desc;
+       if (!rte_is_power_of_2(max_desc))
+               max_desc = rte_align32pow2(max_desc);
+
+       if (max_desc > DPI_MAX_DESC)
+               max_desc = DPI_MAX_DESC;
+
+       size = (max_desc * sizeof(struct cnxk_dpi_compl_s *));
+       dpi_conf->c_desc.compl_ptr = rte_zmalloc(NULL, size, 0);
+
+       if (dpi_conf->c_desc.compl_ptr == NULL) {
+               plt_err("Failed to allocate for comp_data");
+               return -ENOMEM;
+       }
+
+       for (i = 0; i < max_desc; i++) {
+               dpi_conf->c_desc.compl_ptr[i] =
+                       rte_zmalloc(NULL, sizeof(struct cnxk_dpi_compl_s), 0);
+               dpi_conf->c_desc.compl_ptr[i]->cdata = DPI_REQ_CDATA;
+       }
+
+       dpi_conf->c_desc.max_cnt = (max_desc - 1);
+       dpi_conf->c_desc.head = 0;
+       dpi_conf->c_desc.tail = 0;
+       dpivf->pnum_words = 0;
+       dpivf->pending = 0;
+       dpivf->flag |= CNXK_DPI_VCHAN_CONFIG;
 
        return 0;
 }
@@ -173,10 +215,16 @@ cnxk_dmadev_start(struct rte_dma_dev *dev)
 {
        struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;
 
+       if (dpivf->flag & CNXK_DPI_DEV_START)
+               return 0;
+
        dpivf->desc_idx = 0;
-       dpivf->num_words = 0;
+       dpivf->pending = 0;
+       dpivf->pnum_words = 0;
        roc_dpi_enable(&dpivf->rdpi);
 
+       dpivf->flag |= CNXK_DPI_DEV_START;
+
        return 0;
 }
 
@@ -187,6 +235,8 @@ cnxk_dmadev_stop(struct rte_dma_dev *dev)
 
        roc_dpi_disable(&dpivf->rdpi);
 
+       dpivf->flag &= ~CNXK_DPI_DEV_START;
+
        return 0;
 }
 
@@ -198,6 +248,8 @@ cnxk_dmadev_close(struct rte_dma_dev *dev)
        roc_dpi_disable(&dpivf->rdpi);
        roc_dpi_dev_fini(&dpivf->rdpi);
 
+       dpivf->flag = 0;
+
        return 0;
 }
 
@@ -206,8 +258,7 @@ __dpi_queue_write(struct roc_dpi *dpi, uint64_t *cmds, int 
cmd_count)
 {
        uint64_t *ptr = dpi->chunk_base;
 
-       if ((cmd_count < DPI_MIN_CMD_SIZE) || (cmd_count > DPI_MAX_CMD_SIZE) ||
-           cmds == NULL)
+       if ((cmd_count < DPI_MIN_CMD_SIZE) || (cmd_count > DPI_MAX_CMD_SIZE) || 
cmds == NULL)
                return -EINVAL;
 
        /*
@@ -223,11 +274,15 @@ __dpi_queue_write(struct roc_dpi *dpi, uint64_t *cmds, 
int cmd_count)
                int count;
                uint64_t *new_buff = dpi->chunk_next;
 
-               dpi->chunk_next =
-                       (void *)roc_npa_aura_op_alloc(dpi->aura_handle, 0);
+               dpi->chunk_next = (void 
*)roc_npa_aura_op_alloc(dpi->aura_handle, 0);
                if (!dpi->chunk_next) {
-                       plt_err("Failed to alloc next buffer from NPA");
-                       return -ENOMEM;
+                       plt_dp_dbg("Failed to alloc next buffer from NPA");
+
+                       /* NPA failed to allocate a buffer. Restoring chunk_next
+                        * to its original address.
+                        */
+                       dpi->chunk_next = new_buff;
+                       return -ENOSPC;
                }
 
                /*
@@ -261,13 +316,17 @@ __dpi_queue_write(struct roc_dpi *dpi, uint64_t *cmds, 
int cmd_count)
                /* queue index may be greater than pool size */
                if (dpi->chunk_head >= dpi->pool_size_m1) {
                        new_buff = dpi->chunk_next;
-                       dpi->chunk_next =
-                               (void *)roc_npa_aura_op_alloc(dpi->aura_handle,
-                                                             0);
+                       dpi->chunk_next = (void 
*)roc_npa_aura_op_alloc(dpi->aura_handle, 0);
                        if (!dpi->chunk_next) {
-                               plt_err("Failed to alloc next buffer from NPA");
-                               return -ENOMEM;
+                               plt_dp_dbg("Failed to alloc next buffer from 
NPA");
+
+                               /* NPA failed to allocate a buffer. Restoring 
chunk_next
+                                * to its original address.
+                                */
+                               dpi->chunk_next = new_buff;
+                               return -ENOSPC;
                        }
+
                        /* Write next buffer address */
                        *ptr = (uint64_t)new_buff;
                        dpi->chunk_base = new_buff;
@@ -279,12 +338,13 @@ __dpi_queue_write(struct roc_dpi *dpi, uint64_t *cmds, 
int cmd_count)
 }
 
 static int
-cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src,
-                rte_iova_t dst, uint32_t length, uint64_t flags)
+cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t 
dst, uint32_t length,
+                uint64_t flags)
 {
        struct cnxk_dpi_vf_s *dpivf = dev_private;
        union dpi_instr_hdr_s *header = &dpivf->conf.hdr;
        struct cnxk_dpi_compl_s *comp_ptr;
+       uint64_t cmd[DPI_MAX_CMD_SIZE];
        rte_iova_t fptr, lptr;
        int num_words = 0;
        int rc;
@@ -292,9 +352,8 @@ cnxk_dmadev_copy(void *dev_private, uint16_t vchan, 
rte_iova_t src,
        RTE_SET_USED(vchan);
 
        comp_ptr = dpivf->conf.c_desc.compl_ptr[dpivf->conf.c_desc.tail];
-       comp_ptr->cdata = DPI_REQ_CDATA;
        header->cn9k.ptr = (uint64_t)comp_ptr;
-       STRM_INC(dpivf->conf.c_desc);
+       STRM_INC(dpivf->conf.c_desc, tail);
 
        header->cn9k.nfst = 1;
        header->cn9k.nlst = 1;
@@ -311,103 +370,110 @@ cnxk_dmadev_copy(void *dev_private, uint16_t vchan, 
rte_iova_t src,
                lptr = dst;
        }
 
-       dpivf->cmd[0] = header->u[0];
-       dpivf->cmd[1] = header->u[1];
-       dpivf->cmd[2] = header->u[2];
+       cmd[0] = header->u[0];
+       cmd[1] = header->u[1];
+       cmd[2] = header->u[2];
        /* word3 is always 0 */
        num_words += 4;
-       dpivf->cmd[num_words++] = length;
-       dpivf->cmd[num_words++] = fptr;
-       dpivf->cmd[num_words++] = length;
-       dpivf->cmd[num_words++] = lptr;
-
-       rc = __dpi_queue_write(&dpivf->rdpi, dpivf->cmd, num_words);
-       if (!rc) {
-               if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
-                       rte_wmb();
-                       plt_write64(num_words,
-                                   dpivf->rdpi.rbase + DPI_VDMA_DBELL);
-                       dpivf->stats.submitted++;
-               }
-               dpivf->num_words += num_words;
+       cmd[num_words++] = length;
+       cmd[num_words++] = fptr;
+       cmd[num_words++] = length;
+       cmd[num_words++] = lptr;
+
+       rc = __dpi_queue_write(&dpivf->rdpi, cmd, num_words);
+       if (unlikely(rc)) {
+               STRM_DEC(dpivf->conf.c_desc, tail);
+               return rc;
        }
 
-       return dpivf->desc_idx++;
+       rte_wmb();
+       if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
+               plt_write64(num_words, dpivf->rdpi.rbase + DPI_VDMA_DBELL);
+               dpivf->stats.submitted++;
+       } else {
+               dpivf->pnum_words += num_words;
+               dpivf->pending++;
+       }
+
+       return (dpivf->desc_idx++);
 }
 
 static int
-cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan,
-                   const struct rte_dma_sge *src,
-                   const struct rte_dma_sge *dst,
-                   uint16_t nb_src, uint16_t nb_dst, uint64_t flags)
+cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct 
rte_dma_sge *src,
+                   const struct rte_dma_sge *dst, uint16_t nb_src, uint16_t 
nb_dst, uint64_t flags)
 {
        struct cnxk_dpi_vf_s *dpivf = dev_private;
        union dpi_instr_hdr_s *header = &dpivf->conf.hdr;
        const struct rte_dma_sge *fptr, *lptr;
        struct cnxk_dpi_compl_s *comp_ptr;
+       uint64_t cmd[DPI_MAX_CMD_SIZE];
        int num_words = 0;
        int i, rc;
 
        RTE_SET_USED(vchan);
 
        comp_ptr = dpivf->conf.c_desc.compl_ptr[dpivf->conf.c_desc.tail];
-       comp_ptr->cdata = DPI_REQ_CDATA;
        header->cn9k.ptr = (uint64_t)comp_ptr;
-       STRM_INC(dpivf->conf.c_desc);
+       STRM_INC(dpivf->conf.c_desc, tail);
 
        /*
         * For inbound case, src pointers are last pointers.
         * For all other cases, src pointers are first pointers.
         */
        if (header->cn9k.xtype == DPI_XTYPE_INBOUND) {
-               header->cn9k.nfst = nb_dst & 0xf;
-               header->cn9k.nlst = nb_src & 0xf;
+               header->cn9k.nfst = nb_dst & DPI_MAX_POINTER;
+               header->cn9k.nlst = nb_src & DPI_MAX_POINTER;
                fptr = &dst[0];
                lptr = &src[0];
        } else {
-               header->cn9k.nfst = nb_src & 0xf;
-               header->cn9k.nlst = nb_dst & 0xf;
+               header->cn9k.nfst = nb_src & DPI_MAX_POINTER;
+               header->cn9k.nlst = nb_dst & DPI_MAX_POINTER;
                fptr = &src[0];
                lptr = &dst[0];
        }
 
-       dpivf->cmd[0] = header->u[0];
-       dpivf->cmd[1] = header->u[1];
-       dpivf->cmd[2] = header->u[2];
+       cmd[0] = header->u[0];
+       cmd[1] = header->u[1];
+       cmd[2] = header->u[2];
        num_words += 4;
        for (i = 0; i < header->cn9k.nfst; i++) {
-               dpivf->cmd[num_words++] = (uint64_t)fptr->length;
-               dpivf->cmd[num_words++] = fptr->addr;
+               cmd[num_words++] = (uint64_t)fptr->length;
+               cmd[num_words++] = fptr->addr;
                fptr++;
        }
 
        for (i = 0; i < header->cn9k.nlst; i++) {
-               dpivf->cmd[num_words++] = (uint64_t)lptr->length;
-               dpivf->cmd[num_words++] = lptr->addr;
+               cmd[num_words++] = (uint64_t)lptr->length;
+               cmd[num_words++] = lptr->addr;
                lptr++;
        }
 
-       rc = __dpi_queue_write(&dpivf->rdpi, dpivf->cmd, num_words);
-       if (!rc) {
-               if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
-                       rte_wmb();
-                       plt_write64(num_words,
-                                   dpivf->rdpi.rbase + DPI_VDMA_DBELL);
-                       dpivf->stats.submitted += nb_src;
-               }
-               dpivf->num_words += num_words;
+       rc = __dpi_queue_write(&dpivf->rdpi, cmd, num_words);
+       if (unlikely(rc)) {
+               STRM_DEC(dpivf->conf.c_desc, tail);
+               return rc;
        }
 
-       return (rc < 0) ? rc : dpivf->desc_idx++;
+       if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
+               rte_wmb();
+               plt_write64(num_words, dpivf->rdpi.rbase + DPI_VDMA_DBELL);
+               dpivf->stats.submitted += nb_src;
+       } else {
+               dpivf->pnum_words += num_words;
+               dpivf->pending++;
+       }
+
+       return (dpivf->desc_idx++);
 }
 
 static int
-cn10k_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src,
-                 rte_iova_t dst, uint32_t length, uint64_t flags)
+cn10k_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, 
rte_iova_t dst,
+                 uint32_t length, uint64_t flags)
 {
        struct cnxk_dpi_vf_s *dpivf = dev_private;
        union dpi_instr_hdr_s *header = &dpivf->conf.hdr;
        struct cnxk_dpi_compl_s *comp_ptr;
+       uint64_t cmd[DPI_MAX_CMD_SIZE];
        rte_iova_t fptr, lptr;
        int num_words = 0;
        int rc;
@@ -415,9 +481,8 @@ cn10k_dmadev_copy(void *dev_private, uint16_t vchan, 
rte_iova_t src,
        RTE_SET_USED(vchan);
 
        comp_ptr = dpivf->conf.c_desc.compl_ptr[dpivf->conf.c_desc.tail];
-       comp_ptr->cdata = DPI_REQ_CDATA;
        header->cn10k.ptr = (uint64_t)comp_ptr;
-       STRM_INC(dpivf->conf.c_desc);
+       STRM_INC(dpivf->conf.c_desc, tail);
 
        header->cn10k.nfst = 1;
        header->cn10k.nlst = 1;
@@ -425,131 +490,140 @@ cn10k_dmadev_copy(void *dev_private, uint16_t vchan, 
rte_iova_t src,
        fptr = src;
        lptr = dst;
 
-       dpivf->cmd[0] = header->u[0];
-       dpivf->cmd[1] = header->u[1];
-       dpivf->cmd[2] = header->u[2];
+       cmd[0] = header->u[0];
+       cmd[1] = header->u[1];
+       cmd[2] = header->u[2];
        /* word3 is always 0 */
        num_words += 4;
-       dpivf->cmd[num_words++] = length;
-       dpivf->cmd[num_words++] = fptr;
-       dpivf->cmd[num_words++] = length;
-       dpivf->cmd[num_words++] = lptr;
-
-       rc = __dpi_queue_write(&dpivf->rdpi, dpivf->cmd, num_words);
-       if (!rc) {
-               if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
-                       rte_wmb();
-                       plt_write64(num_words,
-                                   dpivf->rdpi.rbase + DPI_VDMA_DBELL);
-                       dpivf->stats.submitted++;
-               }
-               dpivf->num_words += num_words;
+       cmd[num_words++] = length;
+       cmd[num_words++] = fptr;
+       cmd[num_words++] = length;
+       cmd[num_words++] = lptr;
+
+       rc = __dpi_queue_write(&dpivf->rdpi, cmd, num_words);
+       if (unlikely(rc)) {
+               STRM_DEC(dpivf->conf.c_desc, tail);
+               return rc;
+       }
+
+       if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
+               rte_wmb();
+               plt_write64(num_words, dpivf->rdpi.rbase + DPI_VDMA_DBELL);
+               dpivf->stats.submitted++;
+       } else {
+               dpivf->pnum_words += num_words;
+               dpivf->pending++;
        }
 
        return dpivf->desc_idx++;
 }
 
 static int
-cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan,
-                    const struct rte_dma_sge *src,
-                    const struct rte_dma_sge *dst, uint16_t nb_src,
-                    uint16_t nb_dst, uint64_t flags)
+cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct 
rte_dma_sge *src,
+                    const struct rte_dma_sge *dst, uint16_t nb_src, uint16_t 
nb_dst,
+                    uint64_t flags)
 {
        struct cnxk_dpi_vf_s *dpivf = dev_private;
        union dpi_instr_hdr_s *header = &dpivf->conf.hdr;
        const struct rte_dma_sge *fptr, *lptr;
        struct cnxk_dpi_compl_s *comp_ptr;
+       uint64_t cmd[DPI_MAX_CMD_SIZE];
        int num_words = 0;
        int i, rc;
 
        RTE_SET_USED(vchan);
 
        comp_ptr = dpivf->conf.c_desc.compl_ptr[dpivf->conf.c_desc.tail];
-       comp_ptr->cdata = DPI_REQ_CDATA;
        header->cn10k.ptr = (uint64_t)comp_ptr;
-       STRM_INC(dpivf->conf.c_desc);
+       STRM_INC(dpivf->conf.c_desc, tail);
 
-       header->cn10k.nfst = nb_src & 0xf;
-       header->cn10k.nlst = nb_dst & 0xf;
+       header->cn10k.nfst = nb_src & DPI_MAX_POINTER;
+       header->cn10k.nlst = nb_dst & DPI_MAX_POINTER;
        fptr = &src[0];
        lptr = &dst[0];
 
-       dpivf->cmd[0] = header->u[0];
-       dpivf->cmd[1] = header->u[1];
-       dpivf->cmd[2] = header->u[2];
+       cmd[0] = header->u[0];
+       cmd[1] = header->u[1];
+       cmd[2] = header->u[2];
        num_words += 4;
 
        for (i = 0; i < header->cn10k.nfst; i++) {
-               dpivf->cmd[num_words++] = (uint64_t)fptr->length;
-               dpivf->cmd[num_words++] = fptr->addr;
+               cmd[num_words++] = (uint64_t)fptr->length;
+               cmd[num_words++] = fptr->addr;
                fptr++;
        }
 
        for (i = 0; i < header->cn10k.nlst; i++) {
-               dpivf->cmd[num_words++] = (uint64_t)lptr->length;
-               dpivf->cmd[num_words++] = lptr->addr;
+               cmd[num_words++] = (uint64_t)lptr->length;
+               cmd[num_words++] = lptr->addr;
                lptr++;
        }
 
-       rc = __dpi_queue_write(&dpivf->rdpi, dpivf->cmd, num_words);
-       if (!rc) {
-               if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
-                       rte_wmb();
-                       plt_write64(num_words,
-                                   dpivf->rdpi.rbase + DPI_VDMA_DBELL);
-                       dpivf->stats.submitted += nb_src;
-               }
-               dpivf->num_words += num_words;
+       rc = __dpi_queue_write(&dpivf->rdpi, cmd, num_words);
+       if (unlikely(rc)) {
+               STRM_DEC(dpivf->conf.c_desc, tail);
+               return rc;
+       }
+
+       if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
+               rte_wmb();
+               plt_write64(num_words, dpivf->rdpi.rbase + DPI_VDMA_DBELL);
+               dpivf->stats.submitted += nb_src;
+       } else {
+               dpivf->pnum_words += num_words;
+               dpivf->pending++;
        }
 
-       return (rc < 0) ? rc : dpivf->desc_idx++;
+       return (dpivf->desc_idx++);
 }
 
 static uint16_t
-cnxk_dmadev_completed(void *dev_private, uint16_t vchan, const uint16_t 
nb_cpls,
-                     uint16_t *last_idx, bool *has_error)
+cnxk_dmadev_completed(void *dev_private, uint16_t vchan, const uint16_t 
nb_cpls, uint16_t *last_idx,
+                     bool *has_error)
 {
        struct cnxk_dpi_vf_s *dpivf = dev_private;
+       struct cnxk_dpi_cdesc_data_s *c_desc = &dpivf->conf.c_desc;
+       struct cnxk_dpi_compl_s *comp_ptr;
        int cnt;
 
        RTE_SET_USED(vchan);
 
-       if (dpivf->stats.submitted == dpivf->stats.completed)
-               return 0;
-
        for (cnt = 0; cnt < nb_cpls; cnt++) {
-               struct cnxk_dpi_compl_s *comp_ptr =
-                       dpivf->conf.c_desc.compl_ptr[cnt];
+               comp_ptr = c_desc->compl_ptr[c_desc->head];
 
                if (comp_ptr->cdata) {
                        if (comp_ptr->cdata == DPI_REQ_CDATA)
                                break;
                        *has_error = 1;
                        dpivf->stats.errors++;
+                       STRM_INC(*c_desc, head);
                        break;
                }
+
+               comp_ptr->cdata = DPI_REQ_CDATA;
+               STRM_INC(*c_desc, head);
        }
 
-       *last_idx = cnt - 1;
-       dpivf->conf.c_desc.tail = cnt;
        dpivf->stats.completed += cnt;
+       *last_idx = dpivf->stats.completed - 1;
 
        return cnt;
 }
 
 static uint16_t
-cnxk_dmadev_completed_status(void *dev_private, uint16_t vchan,
-                            const uint16_t nb_cpls, uint16_t *last_idx,
-                            enum rte_dma_status_code *status)
+cnxk_dmadev_completed_status(void *dev_private, uint16_t vchan, const uint16_t 
nb_cpls,
+                            uint16_t *last_idx, enum rte_dma_status_code 
*status)
 {
        struct cnxk_dpi_vf_s *dpivf = dev_private;
+       struct cnxk_dpi_cdesc_data_s *c_desc = &dpivf->conf.c_desc;
+       struct cnxk_dpi_compl_s *comp_ptr;
        int cnt;
 
        RTE_SET_USED(vchan);
        RTE_SET_USED(last_idx);
+
        for (cnt = 0; cnt < nb_cpls; cnt++) {
-               struct cnxk_dpi_compl_s *comp_ptr =
-                       dpivf->conf.c_desc.compl_ptr[cnt];
+               comp_ptr = c_desc->compl_ptr[c_desc->head];
                status[cnt] = comp_ptr->cdata;
                if (status[cnt]) {
                        if (status[cnt] == DPI_REQ_CDATA)
@@ -557,30 +631,52 @@ cnxk_dmadev_completed_status(void *dev_private, uint16_t 
vchan,
 
                        dpivf->stats.errors++;
                }
+               comp_ptr->cdata = DPI_REQ_CDATA;
+               STRM_INC(*c_desc, head);
        }
 
-       *last_idx = cnt - 1;
-       dpivf->conf.c_desc.tail = 0;
        dpivf->stats.completed += cnt;
+       *last_idx = dpivf->stats.completed - 1;
 
        return cnt;
 }
 
+static uint16_t
+cnxk_damdev_burst_capacity(const void *dev_private, uint16_t vchan)
+{
+       const struct cnxk_dpi_vf_s *dpivf = (const struct cnxk_dpi_vf_s 
*)dev_private;
+       uint16_t burst_cap;
+
+       RTE_SET_USED(vchan);
+
+       burst_cap = dpivf->conf.c_desc.max_cnt -
+                   ((dpivf->stats.submitted - dpivf->stats.completed) + 
dpivf->pending) + 1;
+
+       return burst_cap;
+}
+
 static int
 cnxk_dmadev_submit(void *dev_private, uint16_t vchan __rte_unused)
 {
        struct cnxk_dpi_vf_s *dpivf = dev_private;
+       uint32_t num_words = dpivf->pnum_words;
+
+       if (!dpivf->pnum_words)
+               return 0;
 
        rte_wmb();
-       plt_write64(dpivf->num_words, dpivf->rdpi.rbase + DPI_VDMA_DBELL);
-       dpivf->stats.submitted++;
+       plt_write64(num_words, dpivf->rdpi.rbase + DPI_VDMA_DBELL);
+
+       dpivf->stats.submitted += dpivf->pending;
+       dpivf->pnum_words = 0;
+       dpivf->pending = 0;
 
        return 0;
 }
 
 static int
-cnxk_stats_get(const struct rte_dma_dev *dev, uint16_t vchan,
-              struct rte_dma_stats *rte_stats, uint32_t size)
+cnxk_stats_get(const struct rte_dma_dev *dev, uint16_t vchan, struct 
rte_dma_stats *rte_stats,
+              uint32_t size)
 {
        struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;
        struct rte_dma_stats *stats = &dpivf->stats;
@@ -628,8 +724,7 @@ static const struct rte_dma_dev_ops cnxk_dmadev_ops = {
 };
 
 static int
-cnxk_dmadev_probe(struct rte_pci_driver *pci_drv __rte_unused,
-                 struct rte_pci_device *pci_dev)
+cnxk_dmadev_probe(struct rte_pci_driver *pci_drv __rte_unused, struct 
rte_pci_device *pci_dev)
 {
        struct cnxk_dpi_vf_s *dpivf = NULL;
        char name[RTE_DEV_NAME_MAX_LEN];
@@ -648,8 +743,7 @@ cnxk_dmadev_probe(struct rte_pci_driver *pci_drv 
__rte_unused,
        memset(name, 0, sizeof(name));
        rte_pci_device_name(&pci_dev->addr, name, sizeof(name));
 
-       dmadev = rte_dma_pmd_allocate(name, pci_dev->device.numa_node,
-                                     sizeof(*dpivf));
+       dmadev = rte_dma_pmd_allocate(name, pci_dev->device.numa_node, 
sizeof(*dpivf));
        if (dmadev == NULL) {
                plt_err("dma device allocation failed for %s", name);
                return -ENOMEM;
@@ -666,6 +760,7 @@ cnxk_dmadev_probe(struct rte_pci_driver *pci_drv 
__rte_unused,
        dmadev->fp_obj->submit = cnxk_dmadev_submit;
        dmadev->fp_obj->completed = cnxk_dmadev_completed;
        dmadev->fp_obj->completed_status = cnxk_dmadev_completed_status;
+       dmadev->fp_obj->burst_capacity = cnxk_damdev_burst_capacity;
 
        if (pci_dev->id.subsystem_device_id == PCI_SUBSYSTEM_DEVID_CN10KA ||
            pci_dev->id.subsystem_device_id == PCI_SUBSYSTEM_DEVID_CNF10KA ||
@@ -682,6 +777,8 @@ cnxk_dmadev_probe(struct rte_pci_driver *pci_drv 
__rte_unused,
        if (rc < 0)
                goto err_out_free;
 
+       dmadev->state = RTE_DMA_DEV_READY;
+
        return 0;
 
 err_out_free:
@@ -703,20 +800,17 @@ cnxk_dmadev_remove(struct rte_pci_device *pci_dev)
 }
 
 static const struct rte_pci_id cnxk_dma_pci_map[] = {
-       {
-               RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM,
-                              PCI_DEVID_CNXK_DPI_VF)
-       },
+       {RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CNXK_DPI_VF)},
        {
                .vendor_id = 0,
        },
 };
 
 static struct rte_pci_driver cnxk_dmadev = {
-       .id_table  = cnxk_dma_pci_map,
+       .id_table = cnxk_dma_pci_map,
        .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_NEED_IOVA_AS_VA,
-       .probe     = cnxk_dmadev_probe,
-       .remove    = cnxk_dmadev_remove,
+       .probe = cnxk_dmadev_probe,
+       .remove = cnxk_dmadev_remove,
 };
 
 RTE_PMD_REGISTER_PCI(cnxk_dmadev_pci_driver, cnxk_dmadev);
diff --git a/drivers/dma/cnxk/cnxk_dmadev.h b/drivers/dma/cnxk/cnxk_dmadev.h
index e1f5694f50..9563295af0 100644
--- a/drivers/dma/cnxk/cnxk_dmadev.h
+++ b/drivers/dma/cnxk/cnxk_dmadev.h
@@ -4,16 +4,21 @@
 #ifndef CNXK_DMADEV_H
 #define CNXK_DMADEV_H
 
-#define DPI_MAX_POINTER                15
-#define DPI_QUEUE_STOP         0x0
-#define DPI_QUEUE_START                0x1
-#define STRM_INC(s)            ((s).tail = ((s).tail + 1) % (s).max_cnt)
-#define DPI_MAX_DESC           1024
+#include <roc_api.h>
+
+#define DPI_MAX_POINTER         15
+#define STRM_INC(s, var) ((s).var = ((s).var + 1) & (s).max_cnt)
+#define STRM_DEC(s, var) ((s).var = ((s).var - 1) == -1 ? (s).max_cnt : 
((s).var - 1))
+#define DPI_MAX_DESC    1024
 
 /* Set Completion data to 0xFF when request submitted,
  * upon successful request completion engine reset to completion status
  */
-#define DPI_REQ_CDATA          0xFF
+#define DPI_REQ_CDATA 0xFF
+
+#define CNXK_DPI_DEV_CONFIG   (1ULL << 0)
+#define CNXK_DPI_VCHAN_CONFIG (1ULL << 1)
+#define CNXK_DPI_DEV_START    (1ULL << 2)
 
 struct cnxk_dpi_compl_s {
        uint64_t cdata;
@@ -21,7 +26,7 @@ struct cnxk_dpi_compl_s {
 };
 
 struct cnxk_dpi_cdesc_data_s {
-       struct cnxk_dpi_compl_s *compl_ptr[DPI_MAX_DESC];
+       struct cnxk_dpi_compl_s **compl_ptr;
        uint16_t max_cnt;
        uint16_t head;
        uint16_t tail;
@@ -36,9 +41,10 @@ struct cnxk_dpi_vf_s {
        struct roc_dpi rdpi;
        struct cnxk_dpi_conf conf;
        struct rte_dma_stats stats;
-       uint64_t cmd[DPI_MAX_CMD_SIZE];
-       uint32_t num_words;
+       uint16_t pending;
+       uint16_t pnum_words;
        uint16_t desc_idx;
+       uint16_t flag;
 };
 
 #endif
-- 
2.25.1

Reply via email to