From: Pavan Nikhilesh <pbhagavat...@marvell.com> Add support to configure higher chunk size by using the new OPEN_V2 mailbox, this improves performance as the number of mempool allocs are reduced. Add timeout when polling for queue idle timeout.
Signed-off-by: Pavan Nikhilesh <pbhagavat...@marvell.com> Signed-off-by: Amit Prakash Shukla <amitpraka...@marvell.com> --- v2 Changes: - Update release notes. - Use timeout when polling for queue idle state. doc/guides/rel_notes/release_24_07.rst | 6 +++ drivers/common/cnxk/roc_dpi.c | 72 ++++++++++++++++++++++---- drivers/common/cnxk/roc_dpi.h | 3 ++ drivers/common/cnxk/roc_dpi_priv.h | 3 ++ drivers/common/cnxk/version.map | 2 + drivers/dma/cnxk/cnxk_dmadev.c | 37 ++++++++----- drivers/dma/cnxk/cnxk_dmadev.h | 1 + 7 files changed, 101 insertions(+), 23 deletions(-) diff --git a/doc/guides/rel_notes/release_24_07.rst b/doc/guides/rel_notes/release_24_07.rst index a69f24cf99..60b92e4842 100644 --- a/doc/guides/rel_notes/release_24_07.rst +++ b/doc/guides/rel_notes/release_24_07.rst @@ -55,6 +55,12 @@ New Features Also, make sure to start the actual text at the margin. ======================================================= +* **Updated Marvell CNXK DMA driver.** + + * Updated DMA driver internal pool to use higher chunk size, effectively + reducing the number of mempool allocs needed, thereby increasing DMA + performance. + Removed Items ------------- diff --git a/drivers/common/cnxk/roc_dpi.c b/drivers/common/cnxk/roc_dpi.c index 1ee777d779..892685d185 100644 --- a/drivers/common/cnxk/roc_dpi.c +++ b/drivers/common/cnxk/roc_dpi.c @@ -38,6 +38,24 @@ send_msg_to_pf(struct plt_pci_addr *pci_addr, const char *value, int size) return 0; } +int +roc_dpi_wait_queue_idle(struct roc_dpi *roc_dpi) +{ + const uint64_t cyc = (DPI_QUEUE_IDLE_TMO_MS * plt_tsc_hz()) / 1E3; + const uint64_t start = plt_tsc_cycles(); + uint64_t reg; + + /* Wait for SADDR to become idle */ + reg = plt_read64(roc_dpi->rbase + DPI_VDMA_SADDR); + while (!(reg & BIT_ULL(63))) { + reg = plt_read64(roc_dpi->rbase + DPI_VDMA_SADDR); + if (plt_tsc_cycles() - start == cyc) + return -ETIMEDOUT; + } + + return 0; +} + int roc_dpi_enable(struct roc_dpi *dpi) { @@ -57,7 +75,6 @@ roc_dpi_configure(struct roc_dpi *roc_dpi, uint32_t chunk_sz, uint64_t aura, uin { struct plt_pci_device *pci_dev; dpi_mbox_msg_t mbox_msg; - uint64_t reg; int rc; if (!roc_dpi) { @@ -68,9 +85,9 @@ roc_dpi_configure(struct roc_dpi *roc_dpi, uint32_t chunk_sz, uint64_t aura, uin pci_dev = roc_dpi->pci_dev; roc_dpi_disable(roc_dpi); - reg = plt_read64(roc_dpi->rbase + DPI_VDMA_SADDR); - while (!(reg & BIT_ULL(63))) - reg = plt_read64(roc_dpi->rbase + DPI_VDMA_SADDR); + rc = roc_dpi_wait_queue_idle(roc_dpi); + if (rc) + return rc; plt_write64(0x0, roc_dpi->rbase + DPI_VDMA_REQQ_CTL); plt_write64(chunk_base, roc_dpi->rbase + DPI_VDMA_SADDR); @@ -87,6 +104,45 @@ roc_dpi_configure(struct roc_dpi *roc_dpi, uint32_t chunk_sz, uint64_t aura, uin if (mbox_msg.s.wqecsoff) mbox_msg.s.wqecs = 1; + rc = send_msg_to_pf(&pci_dev->addr, (const char *)&mbox_msg, sizeof(dpi_mbox_msg_t)); + if (rc < 0) + plt_err("Failed to send mbox message %d to DPI PF, err %d", mbox_msg.s.cmd, rc); + + return rc; +} + +int +roc_dpi_configure_v2(struct roc_dpi *roc_dpi, uint32_t chunk_sz, uint64_t aura, uint64_t chunk_base) +{ + struct plt_pci_device *pci_dev; + dpi_mbox_msg_t mbox_msg; + int rc; + + if (!roc_dpi) { + plt_err("roc_dpi is NULL"); + return -EINVAL; + } + + pci_dev = roc_dpi->pci_dev; + + roc_dpi_disable(roc_dpi); + + rc = roc_dpi_wait_queue_idle(roc_dpi); + if (rc) + return rc; + + plt_write64(0x0, roc_dpi->rbase + DPI_VDMA_REQQ_CTL); + plt_write64(chunk_base, roc_dpi->rbase + DPI_VDMA_SADDR); + mbox_msg.u[0] = 0; + mbox_msg.u[1] = 0; + /* DPI PF driver expects vfid starts from index 0 */ + mbox_msg.s.vfid = roc_dpi->vfid; + mbox_msg.s.cmd = DPI_QUEUE_OPEN_V2; + mbox_msg.s.csize = chunk_sz / 8; + mbox_msg.s.aura = aura; + mbox_msg.s.sso_pf_func = idev_sso_pffunc_get(); + mbox_msg.s.npa_pf_func = idev_npa_pffunc_get(); + rc = send_msg_to_pf(&pci_dev->addr, (const char *)&mbox_msg, sizeof(dpi_mbox_msg_t)); if (rc < 0) @@ -116,13 +172,11 @@ roc_dpi_dev_fini(struct roc_dpi *roc_dpi) { struct plt_pci_device *pci_dev = roc_dpi->pci_dev; dpi_mbox_msg_t mbox_msg; - uint64_t reg; int rc; - /* Wait for SADDR to become idle */ - reg = plt_read64(roc_dpi->rbase + DPI_VDMA_SADDR); - while (!(reg & BIT_ULL(63))) - reg = plt_read64(roc_dpi->rbase + DPI_VDMA_SADDR); + rc = roc_dpi_wait_queue_idle(roc_dpi); + if (rc) + return rc; mbox_msg.u[0] = 0; mbox_msg.u[1] = 0; diff --git a/drivers/common/cnxk/roc_dpi.h b/drivers/common/cnxk/roc_dpi.h index 978e2badb2..7b4f9d4f4f 100644 --- a/drivers/common/cnxk/roc_dpi.h +++ b/drivers/common/cnxk/roc_dpi.h @@ -16,7 +16,10 @@ int __roc_api roc_dpi_dev_fini(struct roc_dpi *roc_dpi); int __roc_api roc_dpi_configure(struct roc_dpi *dpi, uint32_t chunk_sz, uint64_t aura, uint64_t chunk_base); +int __roc_api roc_dpi_configure_v2(struct roc_dpi *roc_dpi, uint32_t chunk_sz, uint64_t aura, + uint64_t chunk_base); int __roc_api roc_dpi_enable(struct roc_dpi *dpi); +int __roc_api roc_dpi_wait_queue_idle(struct roc_dpi *dpi); int __roc_api roc_dpi_disable(struct roc_dpi *dpi); #endif diff --git a/drivers/common/cnxk/roc_dpi_priv.h b/drivers/common/cnxk/roc_dpi_priv.h index 52962c8bc0..844e5f37ee 100644 --- a/drivers/common/cnxk/roc_dpi_priv.h +++ b/drivers/common/cnxk/roc_dpi_priv.h @@ -15,6 +15,9 @@ #define DPI_QUEUE_CLOSE 0x2 #define DPI_REG_DUMP 0x3 #define DPI_GET_REG_CFG 0x4 +#define DPI_QUEUE_OPEN_V2 0x5 + +#define DPI_QUEUE_IDLE_TMO_MS 1E3 typedef union dpi_mbox_msg_t { uint64_t u[2]; diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map index 424ad7f484..cc9f47e0ad 100644 --- a/drivers/common/cnxk/version.map +++ b/drivers/common/cnxk/version.map @@ -82,10 +82,12 @@ INTERNAL { roc_cpt_int_misc_cb_register; roc_cpt_int_misc_cb_unregister; roc_dpi_configure; + roc_dpi_configure_v2; roc_dpi_dev_fini; roc_dpi_dev_init; roc_dpi_disable; roc_dpi_enable; + roc_dpi_wait_queue_idle; roc_error_msg_get; roc_eswitch_nix_process_repte_notify_cb_register; roc_eswitch_nix_process_repte_notify_cb_unregister; diff --git a/drivers/dma/cnxk/cnxk_dmadev.c b/drivers/dma/cnxk/cnxk_dmadev.c index 4ab3cfbdf2..2de0a0a3ce 100644 --- a/drivers/dma/cnxk/cnxk_dmadev.c +++ b/drivers/dma/cnxk/cnxk_dmadev.c @@ -291,6 +291,7 @@ cnxk_dmadev_start(struct rte_dma_dev *dev) struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private; struct cnxk_dpi_conf *dpi_conf; uint32_t chunks, nb_desc = 0; + uint32_t queue_buf_sz; int i, j, rc = 0; void *chunk; @@ -310,34 +311,44 @@ cnxk_dmadev_start(struct rte_dma_dev *dev) dpi_conf->completed_offset = 0; } - chunks = CNXK_DPI_CHUNKS_FROM_DESC(CNXK_DPI_QUEUE_BUF_SIZE, nb_desc); - rc = cnxk_dmadev_chunk_pool_create(dev, chunks, CNXK_DPI_QUEUE_BUF_SIZE); + queue_buf_sz = CNXK_DPI_QUEUE_BUF_SIZE_V2; + /* Max block size allowed by cnxk mempool driver is (128 * 1024). + * Block size = elt_size + mp->header + mp->trailer. + * + * Note from cn9k mempool driver: + * In cn9k additional padding of 128 bytes is added to mempool->trailer to + * ensure that the element size always occupies odd number of cachelines + * to ensure even distribution of elements among L1D cache sets. + */ + if (!roc_model_is_cn10k()) + queue_buf_sz = CNXK_DPI_QUEUE_BUF_SIZE_V2 - 128; + + chunks = CNXK_DPI_CHUNKS_FROM_DESC(queue_buf_sz, nb_desc); + rc = cnxk_dmadev_chunk_pool_create(dev, chunks, queue_buf_sz); if (rc < 0) { plt_err("DMA pool configure failed err = %d", rc); - goto done; + goto error; } rc = rte_mempool_get(dpivf->chunk_pool, &chunk); if (rc < 0) { plt_err("DMA failed to get chunk pointer err = %d", rc); rte_mempool_free(dpivf->chunk_pool); - goto done; + goto error; } - rc = roc_dpi_configure(&dpivf->rdpi, CNXK_DPI_QUEUE_BUF_SIZE, dpivf->aura, (uint64_t)chunk); + rc = roc_dpi_configure_v2(&dpivf->rdpi, queue_buf_sz, dpivf->aura, (uint64_t)chunk); if (rc < 0) { plt_err("DMA configure failed err = %d", rc); rte_mempool_free(dpivf->chunk_pool); - goto done; + goto error; } - dpivf->chunk_base = chunk; dpivf->chunk_head = 0; - dpivf->chunk_size_m1 = (CNXK_DPI_QUEUE_BUF_SIZE >> 3) - 2; + dpivf->chunk_size_m1 = (queue_buf_sz >> 3) - 2; roc_dpi_enable(&dpivf->rdpi); - -done: +error: return rc; } @@ -345,11 +356,9 @@ static int cnxk_dmadev_stop(struct rte_dma_dev *dev) { struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private; - uint64_t reg; - reg = plt_read64(dpivf->rdpi.rbase + DPI_VDMA_SADDR); - while (!(reg & BIT_ULL(63))) - reg = plt_read64(dpivf->rdpi.rbase + DPI_VDMA_SADDR); + if (roc_dpi_wait_queue_idle(&dpivf->rdpi)) + return -EAGAIN; roc_dpi_disable(&dpivf->rdpi); rte_mempool_free(dpivf->chunk_pool); diff --git a/drivers/dma/cnxk/cnxk_dmadev.h b/drivers/dma/cnxk/cnxk_dmadev.h index 610a360ba2..3d8f875ada 100644 --- a/drivers/dma/cnxk/cnxk_dmadev.h +++ b/drivers/dma/cnxk/cnxk_dmadev.h @@ -30,6 +30,7 @@ #define CNXK_DPI_MIN_DESC 2 #define CNXK_DPI_MAX_VCHANS_PER_QUEUE 4 #define CNXK_DPI_QUEUE_BUF_SIZE 16256 +#define CNXK_DPI_QUEUE_BUF_SIZE_V2 130944 #define CNXK_DPI_POOL_MAX_CACHE_SZ (16) #define CNXK_DPI_DW_PER_SINGLE_CMD 8 #define CNXK_DPI_HDR_LEN 4 -- 2.25.1