From: Pavan Nikhilesh <pbhagavat...@marvell.com> Add support to configure higher chunk size by using the new OPEN_V2 mailbox, this improves performance as the number of mempool allocs are reduced.
Signed-off-by: Pavan Nikhilesh <pbhagavat...@marvell.com> Signed-off-by: Amit Prakash Shukla <amitpraka...@marvell.com> --- drivers/common/cnxk/roc_dpi.c | 39 ++++++++++++++++++++++++++++++ drivers/common/cnxk/roc_dpi.h | 2 ++ drivers/common/cnxk/roc_dpi_priv.h | 1 + drivers/common/cnxk/version.map | 1 + drivers/dma/cnxk/cnxk_dmadev.c | 31 ++++++++++++++++-------- drivers/dma/cnxk/cnxk_dmadev.h | 1 + 6 files changed, 65 insertions(+), 10 deletions(-) diff --git a/drivers/common/cnxk/roc_dpi.c b/drivers/common/cnxk/roc_dpi.c index 1ee777d779..90138fbabd 100644 --- a/drivers/common/cnxk/roc_dpi.c +++ b/drivers/common/cnxk/roc_dpi.c @@ -87,6 +87,45 @@ roc_dpi_configure(struct roc_dpi *roc_dpi, uint32_t chunk_sz, uint64_t aura, uin if (mbox_msg.s.wqecsoff) mbox_msg.s.wqecs = 1; + rc = send_msg_to_pf(&pci_dev->addr, (const char *)&mbox_msg, sizeof(dpi_mbox_msg_t)); + if (rc < 0) + plt_err("Failed to send mbox message %d to DPI PF, err %d", mbox_msg.s.cmd, rc); + + return rc; +} + +int +roc_dpi_configure_v2(struct roc_dpi *roc_dpi, uint32_t chunk_sz, uint64_t aura, uint64_t chunk_base) +{ + struct plt_pci_device *pci_dev; + dpi_mbox_msg_t mbox_msg; + uint64_t reg; + int rc; + + if (!roc_dpi) { + plt_err("roc_dpi is NULL"); + return -EINVAL; + } + + pci_dev = roc_dpi->pci_dev; + + roc_dpi_disable(roc_dpi); + reg = plt_read64(roc_dpi->rbase + DPI_VDMA_SADDR); + while (!(reg & BIT_ULL(63))) + reg = plt_read64(roc_dpi->rbase + DPI_VDMA_SADDR); + + plt_write64(0x0, roc_dpi->rbase + DPI_VDMA_REQQ_CTL); + plt_write64(chunk_base, roc_dpi->rbase + DPI_VDMA_SADDR); + mbox_msg.u[0] = 0; + mbox_msg.u[1] = 0; + /* DPI PF driver expects vfid starts from index 0 */ + mbox_msg.s.vfid = roc_dpi->vfid; + mbox_msg.s.cmd = DPI_QUEUE_OPEN_V2; + mbox_msg.s.csize = chunk_sz / 8; + mbox_msg.s.aura = aura; + mbox_msg.s.sso_pf_func = idev_sso_pffunc_get(); + mbox_msg.s.npa_pf_func = idev_npa_pffunc_get(); + rc = send_msg_to_pf(&pci_dev->addr, (const char *)&mbox_msg, sizeof(dpi_mbox_msg_t)); if (rc < 0) diff --git a/drivers/common/cnxk/roc_dpi.h b/drivers/common/cnxk/roc_dpi.h index 978e2badb2..628a71d1a2 100644 --- a/drivers/common/cnxk/roc_dpi.h +++ b/drivers/common/cnxk/roc_dpi.h @@ -16,6 +16,8 @@ int __roc_api roc_dpi_dev_fini(struct roc_dpi *roc_dpi); int __roc_api roc_dpi_configure(struct roc_dpi *dpi, uint32_t chunk_sz, uint64_t aura, uint64_t chunk_base); +int __roc_api roc_dpi_configure_v2(struct roc_dpi *roc_dpi, uint32_t chunk_sz, uint64_t aura, + uint64_t chunk_base); int __roc_api roc_dpi_enable(struct roc_dpi *dpi); int __roc_api roc_dpi_disable(struct roc_dpi *dpi); diff --git a/drivers/common/cnxk/roc_dpi_priv.h b/drivers/common/cnxk/roc_dpi_priv.h index 52962c8bc0..06b3b46267 100644 --- a/drivers/common/cnxk/roc_dpi_priv.h +++ b/drivers/common/cnxk/roc_dpi_priv.h @@ -15,6 +15,7 @@ #define DPI_QUEUE_CLOSE 0x2 #define DPI_REG_DUMP 0x3 #define DPI_GET_REG_CFG 0x4 +#define DPI_QUEUE_OPEN_V2 0x5 typedef union dpi_mbox_msg_t { uint64_t u[2]; diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map index 424ad7f484..606d865672 100644 --- a/drivers/common/cnxk/version.map +++ b/drivers/common/cnxk/version.map @@ -82,6 +82,7 @@ INTERNAL { roc_cpt_int_misc_cb_register; roc_cpt_int_misc_cb_unregister; roc_dpi_configure; + roc_dpi_configure_v2; roc_dpi_dev_fini; roc_dpi_dev_init; roc_dpi_disable; diff --git a/drivers/dma/cnxk/cnxk_dmadev.c b/drivers/dma/cnxk/cnxk_dmadev.c index 4ab3cfbdf2..d3a8665020 100644 --- a/drivers/dma/cnxk/cnxk_dmadev.c +++ b/drivers/dma/cnxk/cnxk_dmadev.c @@ -291,6 +291,7 @@ cnxk_dmadev_start(struct rte_dma_dev *dev) struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private; struct cnxk_dpi_conf *dpi_conf; uint32_t chunks, nb_desc = 0; + uint32_t queue_buf_sz; int i, j, rc = 0; void *chunk; @@ -310,34 +311,44 @@ cnxk_dmadev_start(struct rte_dma_dev *dev) dpi_conf->completed_offset = 0; } - chunks = CNXK_DPI_CHUNKS_FROM_DESC(CNXK_DPI_QUEUE_BUF_SIZE, nb_desc); - rc = cnxk_dmadev_chunk_pool_create(dev, chunks, CNXK_DPI_QUEUE_BUF_SIZE); + queue_buf_sz = CNXK_DPI_QUEUE_BUF_SIZE_V2; + /* Max block size allowed by cnxk mempool driver is (128 * 1024). + * Block size = elt_size + mp->header + mp->trailer. + * + * Note from cn9k mempool driver: + * In cn9k additional padding of 128 bytes is added to mempool->trailer to + * ensure that the element size always occupies odd number of cachelines + * to ensure even distribution of elements among L1D cache sets. + */ + if (!roc_model_is_cn10k()) + queue_buf_sz = CNXK_DPI_QUEUE_BUF_SIZE_V2 - 128; + + chunks = CNXK_DPI_CHUNKS_FROM_DESC(queue_buf_sz, nb_desc); + rc = cnxk_dmadev_chunk_pool_create(dev, chunks, queue_buf_sz); if (rc < 0) { plt_err("DMA pool configure failed err = %d", rc); - goto done; + goto error; } rc = rte_mempool_get(dpivf->chunk_pool, &chunk); if (rc < 0) { plt_err("DMA failed to get chunk pointer err = %d", rc); rte_mempool_free(dpivf->chunk_pool); - goto done; + goto error; } - rc = roc_dpi_configure(&dpivf->rdpi, CNXK_DPI_QUEUE_BUF_SIZE, dpivf->aura, (uint64_t)chunk); + rc = roc_dpi_configure_v2(&dpivf->rdpi, queue_buf_sz, dpivf->aura, (uint64_t)chunk); if (rc < 0) { plt_err("DMA configure failed err = %d", rc); rte_mempool_free(dpivf->chunk_pool); - goto done; + goto error; } - dpivf->chunk_base = chunk; dpivf->chunk_head = 0; - dpivf->chunk_size_m1 = (CNXK_DPI_QUEUE_BUF_SIZE >> 3) - 2; + dpivf->chunk_size_m1 = (queue_buf_sz >> 3) - 2; roc_dpi_enable(&dpivf->rdpi); - -done: +error: return rc; } diff --git a/drivers/dma/cnxk/cnxk_dmadev.h b/drivers/dma/cnxk/cnxk_dmadev.h index 610a360ba2..3d8f875ada 100644 --- a/drivers/dma/cnxk/cnxk_dmadev.h +++ b/drivers/dma/cnxk/cnxk_dmadev.h @@ -30,6 +30,7 @@ #define CNXK_DPI_MIN_DESC 2 #define CNXK_DPI_MAX_VCHANS_PER_QUEUE 4 #define CNXK_DPI_QUEUE_BUF_SIZE 16256 +#define CNXK_DPI_QUEUE_BUF_SIZE_V2 130944 #define CNXK_DPI_POOL_MAX_CACHE_SZ (16) #define CNXK_DPI_DW_PER_SINGLE_CMD 8 #define CNXK_DPI_HDR_LEN 4 -- 2.25.1