On Tue, Oct 26, 2021 at 1:49 AM Jerin Jacob <jerinjac...@gmail.com> wrote: > > On Tue, Oct 26, 2021 at 9:43 AM Radha Mohan Chintakuntla > <rad...@marvell.com> wrote: > > > > Add functions for the dmadev vchan setup and DMA operations. > > > > Signed-off-by: Radha Mohan Chintakuntla <rad...@marvell.com> > > --- > > drivers/dma/cnxk/cnxk_dmadev.c | 322 +++++++++++++++++++++++++++++++++ > > drivers/dma/cnxk/cnxk_dmadev.h | 53 ++++++ > > drivers/dma/cnxk/version.map | 3 + > > 3 files changed, 378 insertions(+) > > create mode 100644 drivers/dma/cnxk/version.map > > > > diff --git a/drivers/dma/cnxk/cnxk_dmadev.c b/drivers/dma/cnxk/cnxk_dmadev.c > > index 620766743d..8434579aa2 100644 > > --- a/drivers/dma/cnxk/cnxk_dmadev.c > > +++ b/drivers/dma/cnxk/cnxk_dmadev.c > > @@ -18,6 +18,322 @@ > > #include <roc_api.h> > > #include <cnxk_dmadev.h> > > > > +static int > > +cnxk_dmadev_info_get(const struct rte_dma_dev *dev, > > + struct rte_dma_info *dev_info, uint32_t size) > > +{ > > + RTE_SET_USED(dev); > > + RTE_SET_USED(size); > > + > > + dev_info->max_vchans = 1; > > + dev_info->nb_vchans = 1; > > + dev_info->dev_capa = RTE_DMA_CAPA_MEM_TO_MEM | > > + RTE_DMA_CAPA_MEM_TO_DEV | RTE_DMA_CAPA_DEV_TO_MEM | > > + RTE_DMA_CAPA_OPS_COPY; > > + dev_info->max_desc = DPI_MAX_DESC; > > + dev_info->min_desc = 1; > > + dev_info->max_sges = DPI_MAX_POINTER; > > + > > + return 0; > > +} > > + > > +static int > > +cnxk_dmadev_configure(struct rte_dma_dev *dev, > > + const struct rte_dma_conf *conf, uint32_t conf_sz) > > +{ > > + struct cnxk_dpi_vf_s *dpivf = NULL; > > + int rc = 0; > > + > > + RTE_SET_USED(conf); > > + RTE_SET_USED(conf); > > + RTE_SET_USED(conf_sz); > > + RTE_SET_USED(conf_sz); > > + dpivf = dev->fp_obj->dev_private; > > + rc = roc_dpi_queue_configure(&dpivf->rdpi); > > + if (rc < 0) > > + plt_err("DMA queue configure failed err = %d", rc); > > + > > + return rc; > > +} > > + > > +static int > > +cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan, > > + const struct rte_dma_vchan_conf *conf, > > + uint32_t conf_sz) > > +{ > > + struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private; > > + struct cnxk_dpi_compl_s *comp_data; > > + int i; > > + > > + RTE_SET_USED(vchan); > > + RTE_SET_USED(conf_sz); > > + > > + switch (conf->direction) { > > + case RTE_DMA_DIR_DEV_TO_MEM: > > + dpivf->conf.direction = DPI_XTYPE_INBOUND; > > + dpivf->conf.src_port = conf->src_port.pcie.coreid; > > + dpivf->conf.dst_port = 0; > > + break; > > + case RTE_DMA_DIR_MEM_TO_DEV: > > + dpivf->conf.direction = DPI_XTYPE_OUTBOUND; > > + dpivf->conf.src_port = 0; > > + dpivf->conf.dst_port = conf->dst_port.pcie.coreid; > > + break; > > + case RTE_DMA_DIR_MEM_TO_MEM: > > + dpivf->conf.direction = DPI_XTYPE_INTERNAL_ONLY; > > + dpivf->conf.src_port = 0; > > + dpivf->conf.dst_port = 0; > > + break; > > + case RTE_DMA_DIR_DEV_TO_DEV: > > + dpivf->conf.direction = DPI_XTYPE_EXTERNAL_ONLY; > > + dpivf->conf.src_port = conf->src_port.pcie.coreid; > > + dpivf->conf.dst_port = conf->src_port.pcie.coreid; > > + }; > > + > > + for (i = 0; i < conf->nb_desc; i++) { > > + comp_data = rte_zmalloc(NULL, sizeof(*comp_data), 0); > > + dpivf->conf.c_desc.compl_ptr[i] = comp_data; > > + }; > > + dpivf->conf.c_desc.max_cnt = DPI_MAX_DESC; > > + dpivf->conf.c_desc.head = 0; > > + dpivf->conf.c_desc.tail = 0; > > + > > + return 0; > > +} > > + > > +static int > > +cnxk_dmadev_start(struct rte_dma_dev *dev) > > +{ > > + struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private; > > + > > + roc_dpi_queue_start(&dpivf->rdpi); > > + > > + return 0; > > +} > > + > > +static int > > +cnxk_dmadev_stop(struct rte_dma_dev *dev) > > +{ > > + struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private; > > + > > + roc_dpi_queue_stop(&dpivf->rdpi); > > + > > + return 0; > > +} > > + > > +static int > > +cnxk_dmadev_close(struct rte_dma_dev *dev) > > +{ > > + struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private; > > + > > + roc_dpi_queue_stop(&dpivf->rdpi); > > + roc_dpi_dev_fini(&dpivf->rdpi); > > + > > + return 0; > > +} > > + > > +static inline int > > +__dpi_queue_write(struct roc_dpi *dpi, uint64_t *cmds, int cmd_count) > > +{ > > + uint64_t *ptr = dpi->chunk_base; > > + > > + if ((cmd_count < DPI_MIN_CMD_SIZE) || (cmd_count > > > DPI_MAX_CMD_SIZE) || > > + cmds == NULL) > > + return -EINVAL; > > + > > + /* > > + * Normally there is plenty of room in the current buffer for the > > + * command > > + */ > > + if (dpi->chunk_head + cmd_count < dpi->pool_size_m1) { > > + ptr += dpi->chunk_head; > > + dpi->chunk_head += cmd_count; > > + while (cmd_count--) > > + *ptr++ = *cmds++; > > + } else { > > + int count; > > + uint64_t *new_buff = dpi->chunk_next; > > + > > + dpi->chunk_next = > > + (void *)roc_npa_aura_op_alloc(dpi->aura_handle, 0); > > + if (!dpi->chunk_next) { > > + plt_err("Failed to alloc next buffer from NPA"); > > + return -ENOMEM; > > + } > > + > > + /* > > + * Figure out how many cmd words will fit in this buffer. > > + * One location will be needed for the next buffer pointer. > > + */ > > + count = dpi->pool_size_m1 - dpi->chunk_head; > > + ptr += dpi->chunk_head; > > + cmd_count -= count; > > + while (count--) > > + *ptr++ = *cmds++; > > + > > + /* > > + * chunk next ptr is 2 DWORDS > > + * second DWORD is reserved. > > + */ > > + *ptr++ = (uint64_t)new_buff; > > + *ptr = 0; > > + > > + /* > > + * The current buffer is full and has a link to the next > > + * buffers. Time to write the rest of the commands into the > > new > > + * buffer. > > + */ > > + dpi->chunk_base = new_buff; > > + dpi->chunk_head = cmd_count; > > + ptr = new_buff; > > + while (cmd_count--) > > + *ptr++ = *cmds++; > > + > > + /* queue index may be greater than pool size */ > > + if (dpi->chunk_head >= dpi->pool_size_m1) { > > + new_buff = dpi->chunk_next; > > + dpi->chunk_next = > > + (void > > *)roc_npa_aura_op_alloc(dpi->aura_handle, > > + 0); > > + if (!dpi->chunk_next) { > > + plt_err("Failed to alloc next buffer from > > NPA"); > > + return -ENOMEM; > > + } > > + /* Write next buffer address */ > > + *ptr = (uint64_t)new_buff; > > + dpi->chunk_base = new_buff; > > + dpi->chunk_head = 0; > > + } > > + } > > + > > + return 0; > > +} > > + > > +static int > > +cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, > > + rte_iova_t dst, uint32_t length, uint64_t flags) > > +{ > > + uint64_t cmd[DPI_MAX_CMD_SIZE] = {0}; > > + union dpi_instr_hdr_s *header = (union dpi_instr_hdr_s *)&cmd[0]; > > + rte_iova_t fptr, lptr; > > + struct cnxk_dpi_vf_s *dpivf = dev_private; > > + struct cnxk_dpi_compl_s *comp_ptr; > > + int num_words = 0; > > + int rc; > > + > > + RTE_SET_USED(vchan); > > + > > + header->s.xtype = dpivf->conf.direction; > > + header->s.pt = DPI_HDR_PT_ZBW_CA; > > + comp_ptr = dpivf->conf.c_desc.compl_ptr[dpivf->conf.c_desc.tail]; > > + comp_ptr->cdata = DPI_REQ_CDATA; > > + header->s.ptr = (uint64_t)comp_ptr; > > + STRM_INC(dpivf->conf.c_desc); > > + > > + /* pvfe should be set for inbound and outbound only */ > > + if (header->s.xtype <= 1) > > + header->s.pvfe = 1; > > + num_words += 4; > > + > > + header->s.nfst = 1; > > + header->s.nlst = 1; > > Including filling zeros in cmd and the rest of the filling can be > moved to slow path.. > > Please change the logic to populate the static items based on > configure/channel setup > in slowpath and update only per transfer-specific items to have better > performance. > These are instruction header values that we are filling. If you look at it there is really one 64bit field that can be filled beforehand a.k.a slowpath in vchan_setup(). Rest of the header can only be filled here like nlst, nfst (these are number of pointers to be DMA'ed) and completion pointer. So just for that I do not see a value in moving around the code.
<snip>