add Tx data path burst API for the device. Signed-off-by: Aman Kumar <aman.ku...@vvdntech.in> --- drivers/net/qdma/meson.build | 2 + drivers/net/qdma/qdma_devops.c | 10 - drivers/net/qdma/qdma_rxtx.c | 422 +++++++++++++++++++++++++++++++++ drivers/net/qdma/qdma_rxtx.h | 10 + drivers/net/qdma/qdma_user.c | 75 ++++++ 5 files changed, 509 insertions(+), 10 deletions(-)
diff --git a/drivers/net/qdma/meson.build b/drivers/net/qdma/meson.build index e2da7f25ec..8c86412b83 100644 --- a/drivers/net/qdma/meson.build +++ b/drivers/net/qdma/meson.build @@ -19,6 +19,8 @@ includes += include_directories('qdma_access/qdma_s80_hard_access') headers += files('rte_pmd_qdma.h') +deps += ['mempool_ring'] + sources = files( 'qdma_common.c', 'qdma_devops.c', diff --git a/drivers/net/qdma/qdma_devops.c b/drivers/net/qdma/qdma_devops.c index 28de783207..10d7d67b87 100644 --- a/drivers/net/qdma/qdma_devops.c +++ b/drivers/net/qdma/qdma_devops.c @@ -1760,16 +1760,6 @@ uint16_t qdma_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, return 0; } -uint16_t qdma_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t nb_pkts) -{ - (void)tx_queue; - (void)tx_pkts; - (void)nb_pkts; - - return 0; -} - void qdma_dev_ops_init(struct rte_eth_dev *dev) { dev->dev_ops = &qdma_eth_dev_ops; diff --git a/drivers/net/qdma/qdma_rxtx.c b/drivers/net/qdma/qdma_rxtx.c index 102671e16f..1605c9973c 100644 --- a/drivers/net/qdma/qdma_rxtx.c +++ b/drivers/net/qdma/qdma_rxtx.c @@ -47,7 +47,166 @@ static void qdma_ul_get_cmpt_pkt_len_v(void *ul_cmpt_entry, __m128i *data) } #endif /* QDMA_RX_VEC_X86_64 */ +#ifdef QDMA_TX_VEC_X86_64 +/* Vector implementation to update H2C descriptor */ +static int qdma_ul_update_st_h2c_desc_v(void *qhndl, uint64_t q_offloads, + struct rte_mbuf *mb) +{ + (void)q_offloads; + int nsegs = mb->nb_segs; + uint16_t flags = S_H2C_DESC_F_SOP | S_H2C_DESC_F_EOP; + uint16_t id; + struct qdma_ul_st_h2c_desc *tx_ring_st; + struct qdma_tx_queue *txq = (struct qdma_tx_queue *)qhndl; + + tx_ring_st = (struct qdma_ul_st_h2c_desc *)txq->tx_ring; + id = txq->q_pidx_info.pidx; + + if (nsegs == 1) { + __m128i descriptor; + uint16_t datalen = mb->data_len; + + descriptor = _mm_set_epi64x(mb->buf_iova + mb->data_off, + (uint64_t)datalen << 16 | + (uint64_t)datalen << 32 | + (uint64_t)flags << 48); + _mm_store_si128((__m128i *)&tx_ring_st[id], descriptor); + + id++; + if (unlikely(id >= (txq->nb_tx_desc - 1))) + id -= (txq->nb_tx_desc - 1); + } else { + int pkt_segs = nsegs; + while (nsegs && mb) { + __m128i descriptor; + uint16_t datalen = mb->data_len; + + flags = 0; + if (nsegs == pkt_segs) + flags |= S_H2C_DESC_F_SOP; + if (nsegs == 1) + flags |= S_H2C_DESC_F_EOP; + + descriptor = _mm_set_epi64x(mb->buf_iova + mb->data_off, + (uint64_t)datalen << 16 | + (uint64_t)datalen << 32 | + (uint64_t)flags << 48); + _mm_store_si128((__m128i *)&tx_ring_st[id], descriptor); + + nsegs--; + mb = mb->next; + id++; + if (unlikely(id >= (txq->nb_tx_desc - 1))) + id -= (txq->nb_tx_desc - 1); + } + } + + txq->q_pidx_info.pidx = id; + + return 0; +} +#endif /* QDMA_TX_VEC_X86_64 */ + /******** User logic dependent functions end **********/ +static int reclaim_tx_mbuf(struct qdma_tx_queue *txq, + uint16_t cidx, uint16_t free_cnt) +{ + int fl_desc = 0; + uint16_t count; + int id; + + id = txq->tx_fl_tail; + fl_desc = (int)cidx - id; + if (fl_desc < 0) + fl_desc += (txq->nb_tx_desc - 1); + + if (free_cnt && fl_desc > free_cnt) + fl_desc = free_cnt; + + if ((id + fl_desc) < (txq->nb_tx_desc - 1)) { + for (count = 0; count < ((uint16_t)fl_desc & 0xFFFF); + count++) { + rte_pktmbuf_free(txq->sw_ring[id]); + txq->sw_ring[id++] = NULL; + } + } else { + fl_desc -= (txq->nb_tx_desc - 1 - id); + for (; id < (txq->nb_tx_desc - 1); id++) { + rte_pktmbuf_free(txq->sw_ring[id]); + txq->sw_ring[id] = NULL; + } + + id -= (txq->nb_tx_desc - 1); + for (count = 0; count < ((uint16_t)fl_desc & 0xFFFF); + count++) { + rte_pktmbuf_free(txq->sw_ring[id]); + txq->sw_ring[id++] = NULL; + } + } + txq->tx_fl_tail = id; + + return fl_desc; +} + +#ifdef TEST_64B_DESC_BYPASS +static uint16_t qdma_xmit_64B_desc_bypass(struct qdma_tx_queue *txq, + struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + uint16_t count, id; + uint8_t *tx_ring_st_bypass = NULL; + int ofd = -1, ret = 0; + char fln[50]; + struct qdma_pci_dev *qdma_dev = txq->dev->data->dev_private; + + id = txq->q_pidx_info.pidx; + + for (count = 0; count < nb_pkts; count++) { + tx_ring_st_bypass = (uint8_t *)txq->tx_ring; + memset(&tx_ring_st_bypass[id * (txq->bypass_desc_sz)], + ((id % 255) + 1), txq->bypass_desc_sz); + + snprintf(fln, sizeof(fln), "q_%u_%s", txq->queue_id, + "h2c_desc_data.txt"); + ofd = open(fln, O_RDWR | O_CREAT | O_APPEND | O_SYNC, + 0666); + if (ofd < 0) { + PMD_DRV_LOG(INFO, " txq[%d] unable to create " + "outfile to dump descriptor" + " data", txq->queue_id); + return 0; + } + ret = write(ofd, &(tx_ring_st_bypass[id * + (txq->bypass_desc_sz)]), + txq->bypass_desc_sz); + if (ret < txq->bypass_desc_sz) + PMD_DRV_LOG(DEBUG, "Txq[%d] descriptor data " + "len: %d, written to inputfile" + " :%d bytes", txq->queue_id, + txq->bypass_desc_sz, ret); + close(ofd); + + rte_pktmbuf_free(tx_pkts[count]); + + id++; + if (unlikely(id >= (txq->nb_tx_desc - 1))) + id -= (txq->nb_tx_desc - 1); + } + + /* Make sure writes to the H2C descriptors are synchronized + * before updating PIDX + */ + rte_wmb(); + + txq->q_pidx_info.pidx = id; + qdma_dev->hw_access->qdma_queue_pidx_update(txq->dev, qdma_dev->is_vf, + txq->queue_id, 0, &txq->q_pidx_info); + + PMD_DRV_LOG(DEBUG, " xmit completed with count:%d\n", count); + + return count; +} +#endif + uint16_t qdma_get_rx_queue_id(void *queue_hndl) { struct qdma_rx_queue *rxq = (struct qdma_rx_queue *)queue_hndl; @@ -80,6 +239,50 @@ uint32_t get_mm_buff_size(void *queue_hndl) return rxq->rx_buff_size; } +struct qdma_ul_st_h2c_desc *get_st_h2c_desc(void *queue_hndl) +{ + volatile uint16_t id; + struct qdma_ul_st_h2c_desc *tx_ring_st; + struct qdma_ul_st_h2c_desc *desc; + struct qdma_tx_queue *txq = (struct qdma_tx_queue *)queue_hndl; + + id = txq->q_pidx_info.pidx; + tx_ring_st = (struct qdma_ul_st_h2c_desc *)txq->tx_ring; + desc = (struct qdma_ul_st_h2c_desc *)&tx_ring_st[id]; + + id++; + if (unlikely(id >= (txq->nb_tx_desc - 1))) + id -= (txq->nb_tx_desc - 1); + + txq->q_pidx_info.pidx = id; + + return desc; +} + +struct qdma_ul_mm_desc *get_mm_h2c_desc(void *queue_hndl) +{ + struct qdma_ul_mm_desc *desc; + struct qdma_tx_queue *txq = (struct qdma_tx_queue *)queue_hndl; + struct qdma_ul_mm_desc *tx_ring = + (struct qdma_ul_mm_desc *)txq->tx_ring; + uint32_t id; + + id = txq->q_pidx_info.pidx; + desc = (struct qdma_ul_mm_desc *)&tx_ring[id]; + + id = (id + 1) % (txq->nb_tx_desc - 1); + txq->q_pidx_info.pidx = id; + + return desc; +} + +uint64_t get_mm_h2c_ep_addr(void *queue_hndl) +{ + struct qdma_tx_queue *txq = (struct qdma_tx_queue *)queue_hndl; + + return txq->ep_addr; +} + #ifdef QDMA_LATENCY_OPTIMIZED static void adjust_c2h_cntr_avgs(struct qdma_rx_queue *rxq) { @@ -276,6 +479,7 @@ qdma_dev_rx_queue_count(void *rxq) { return rx_queue_count(rxq); } + /** * DPDK callback to check the status of a Rx descriptor in the queue. * @@ -326,3 +530,221 @@ qdma_dev_rx_descriptor_status(void *rx_queue, uint16_t offset) return RTE_ETH_RX_DESC_AVAIL; } + +/* Transmit API for Streaming mode */ +uint16_t qdma_xmit_pkts_st(struct qdma_tx_queue *txq, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + struct rte_mbuf *mb; + uint64_t pkt_len = 0; + int avail, in_use, ret, nsegs; + uint16_t cidx = 0; + uint16_t count = 0, id; + struct qdma_pci_dev *qdma_dev = txq->dev->data->dev_private; +#ifdef TEST_64B_DESC_BYPASS + int bypass_desc_sz_idx = qmda_get_desc_sz_idx(txq->bypass_desc_sz); + + if (unlikely(txq->en_bypass && + bypass_desc_sz_idx == SW_DESC_CNTXT_64B_BYPASS_DMA)) { + return qdma_xmit_64B_desc_bypass(txq, tx_pkts, nb_pkts); + } +#endif + + id = txq->q_pidx_info.pidx; + cidx = txq->wb_status->cidx; + PMD_DRV_LOG(DEBUG, "Xmit start on tx queue-id:%d, tail index:%d\n", + txq->queue_id, id); + + /* Free transmitted mbufs back to pool */ + reclaim_tx_mbuf(txq, cidx, 0); + + in_use = (int)id - cidx; + if (in_use < 0) + in_use += (txq->nb_tx_desc - 1); + + /* Make 1 less available, otherwise if we allow all descriptors + * to be filled, when nb_pkts = nb_tx_desc - 1, pidx will be same + * as old pidx and HW will treat this as no new descriptors were added. + * Hence, DMA won't happen with new descriptors. + */ + avail = txq->nb_tx_desc - 2 - in_use; + if (!avail) { + PMD_DRV_LOG(DEBUG, "Tx queue full, in_use = %d", in_use); + return 0; + } + + for (count = 0; count < nb_pkts; count++) { + mb = tx_pkts[count]; + nsegs = mb->nb_segs; + if (nsegs > avail) { + /* Number of segments in current mbuf are greater + * than number of descriptors available, + * hence update PIDX and return + */ + break; + } + avail -= nsegs; + id = txq->q_pidx_info.pidx; + txq->sw_ring[id] = mb; + pkt_len += rte_pktmbuf_pkt_len(mb); + +#ifdef QDMA_TX_VEC_X86_64 + ret = qdma_ul_update_st_h2c_desc_v(txq, txq->offloads, mb); +#else + ret = qdma_ul_update_st_h2c_desc(txq, txq->offloads, mb); +#endif /* RTE_ARCH_X86_64 */ + if (ret < 0) + break; + } + + txq->stats.pkts += count; + txq->stats.bytes += pkt_len; + + /* Make sure writes to the H2C descriptors are synchronized + * before updating PIDX + */ + rte_wmb(); + +#if (MIN_TX_PIDX_UPDATE_THRESHOLD > 1) + rte_spinlock_lock(&txq->pidx_update_lock); +#endif + txq->tx_desc_pend += count; + + /* Send PIDX update only if pending desc is more than threshold + * Saves frequent Hardware transactions + */ + if (txq->tx_desc_pend >= MIN_TX_PIDX_UPDATE_THRESHOLD) { + qdma_dev->hw_access->qdma_queue_pidx_update(txq->dev, + qdma_dev->is_vf, + txq->queue_id, 0, &txq->q_pidx_info); + + txq->tx_desc_pend = 0; + } +#if (MIN_TX_PIDX_UPDATE_THRESHOLD > 1) + rte_spinlock_unlock(&txq->pidx_update_lock); +#endif + PMD_DRV_LOG(DEBUG, " xmit completed with count:%d\n", count); + + return count; +} + +/* Transmit API for Memory mapped mode */ +uint16_t qdma_xmit_pkts_mm(struct qdma_tx_queue *txq, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + struct rte_mbuf *mb; + uint32_t count, id; + uint64_t len = 0; + int avail, in_use; + struct qdma_pci_dev *qdma_dev = txq->dev->data->dev_private; + uint16_t cidx = 0; + int nsegs = 0; + +#ifdef TEST_64B_DESC_BYPASS + int bypass_desc_sz_idx = qmda_get_desc_sz_idx(txq->bypass_desc_sz); +#endif + + id = txq->q_pidx_info.pidx; + PMD_DRV_LOG(DEBUG, "Xmit start on tx queue-id:%d, tail index:%d\n", + txq->queue_id, id); + +#ifdef TEST_64B_DESC_BYPASS + if (unlikely(txq->en_bypass && + bypass_desc_sz_idx == SW_DESC_CNTXT_64B_BYPASS_DMA)) { + PMD_DRV_LOG(DEBUG, "For MM mode, example design doesn't " + "support 64B bypass testing\n"); + return 0; + } +#endif + cidx = txq->wb_status->cidx; + /* Free transmitted mbufs back to pool */ + reclaim_tx_mbuf(txq, cidx, 0); + in_use = (int)id - cidx; + if (in_use < 0) + in_use += (txq->nb_tx_desc - 1); + + /* Make 1 less available, otherwise if we allow all descriptors to be + * filled, when nb_pkts = nb_tx_desc - 1, pidx will be same as old pidx + * and HW will treat this as no new descriptors were added. + * Hence, DMA won't happen with new descriptors. + */ + avail = txq->nb_tx_desc - 2 - in_use; + if (!avail) { + PMD_DRV_LOG(ERR, "Tx queue full, in_use = %d", in_use); + return 0; + } + + if (nb_pkts > avail) + nb_pkts = avail; + + /* Set the xmit descriptors and control bits */ + for (count = 0; count < nb_pkts; count++) { + mb = tx_pkts[count]; + txq->sw_ring[id] = mb; + nsegs = mb->nb_segs; + if (nsegs > avail) { + /* Number of segments in current mbuf are greater + * than number of descriptors available, + * hence update PIDX and return + */ + break; + } + + txq->ep_addr = mb->dynfield1[1]; + txq->ep_addr = (txq->ep_addr << 32) | mb->dynfield1[0]; + + while (nsegs && mb) { + /* Update the descriptor control feilds */ + qdma_ul_update_mm_h2c_desc(txq, mb); + + len = rte_pktmbuf_data_len(mb); + txq->ep_addr = txq->ep_addr + len; + id = txq->q_pidx_info.pidx; + mb = mb->next; + } + } + + /* Make sure writes to the H2C descriptors are synchronized before + * updating PIDX + */ + rte_wmb(); + + /* update pidx pointer */ + if (count > 0) { + qdma_dev->hw_access->qdma_queue_pidx_update(txq->dev, + qdma_dev->is_vf, + txq->queue_id, 0, &txq->q_pidx_info); + } + + PMD_DRV_LOG(DEBUG, " xmit completed with count:%d", count); + return count; +} +/** + * DPDK callback for transmitting packets in burst. + * + * @param tx_queue + G* Generic pointer to TX queue structure. + * @param[in] tx_pkts + * Packets to transmit. + * @param nb_pkts + * Number of packets in array. + * + * @return + * Number of packets successfully transmitted (<= nb_pkts). + */ +uint16_t qdma_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + struct qdma_tx_queue *txq = tx_queue; + uint16_t count; + + if (txq->status != RTE_ETH_QUEUE_STATE_STARTED) + return 0; + + if (txq->st_mode) + count = qdma_xmit_pkts_st(txq, tx_pkts, nb_pkts); + else + count = qdma_xmit_pkts_mm(txq, tx_pkts, nb_pkts); + + return count; +} diff --git a/drivers/net/qdma/qdma_rxtx.h b/drivers/net/qdma/qdma_rxtx.h index 5f902df695..397740abc0 100644 --- a/drivers/net/qdma/qdma_rxtx.h +++ b/drivers/net/qdma/qdma_rxtx.h @@ -7,6 +7,9 @@ #include "qdma_access_export.h" +/* forward declaration */ +struct qdma_tx_queue; + /* Supporting functions for user logic pluggability */ uint16_t qdma_get_rx_queue_id(void *queue_hndl); void qdma_get_device_info(void *queue_hndl, @@ -15,6 +18,13 @@ void qdma_get_device_info(void *queue_hndl, struct qdma_ul_st_h2c_desc *get_st_h2c_desc(void *queue_hndl); struct qdma_ul_mm_desc *get_mm_h2c_desc(void *queue_hndl); uint32_t get_mm_c2h_ep_addr(void *queue_hndl); +uint64_t get_mm_h2c_ep_addr(void *queue_hndl); uint32_t get_mm_buff_size(void *queue_hndl); +uint16_t qdma_xmit_pkts_st(struct qdma_tx_queue *txq, + struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); +uint16_t qdma_xmit_pkts_mm(struct qdma_tx_queue *txq, + struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); #endif /* QDMA_DPDK_RXTX_H_ */ diff --git a/drivers/net/qdma/qdma_user.c b/drivers/net/qdma/qdma_user.c index 312bb86670..82f6750616 100644 --- a/drivers/net/qdma/qdma_user.c +++ b/drivers/net/qdma/qdma_user.c @@ -125,6 +125,55 @@ int qdma_ul_process_immediate_data_st(void *qhndl, void *cmpt_entry, return 0; } +/** + * Updates the ST H2C descriptor. + * + * @param qhndl + * Pointer to TX queue handle. + * @param q_offloads + * Offloads supported for the queue. + * @param mb + * Pointer to memory buffer. + * + * @return + * None. + */ +int qdma_ul_update_st_h2c_desc(void *qhndl, uint64_t q_offloads, + struct rte_mbuf *mb) +{ + (void)q_offloads; + struct qdma_ul_st_h2c_desc *desc_info; + int nsegs = mb->nb_segs; + int pkt_segs = nsegs; + + if (nsegs == 1) { + desc_info = get_st_h2c_desc(qhndl); + desc_info->len = rte_pktmbuf_data_len(mb); + desc_info->pld_len = desc_info->len; + desc_info->src_addr = mb->buf_iova + mb->data_off; + desc_info->flags = (S_H2C_DESC_F_SOP | S_H2C_DESC_F_EOP); + desc_info->cdh_flags = 0; + } else { + while (nsegs && mb) { + desc_info = get_st_h2c_desc(qhndl); + + desc_info->len = rte_pktmbuf_data_len(mb); + desc_info->pld_len = desc_info->len; + desc_info->src_addr = mb->buf_iova + mb->data_off; + desc_info->flags = 0; + if (nsegs == pkt_segs) + desc_info->flags |= S_H2C_DESC_F_SOP; + if (nsegs == 1) + desc_info->flags |= S_H2C_DESC_F_EOP; + desc_info->cdh_flags = 0; + + nsegs--; + mb = mb->next; + } + } + return 0; +} + /** * updates the MM c2h descriptor. * @@ -155,6 +204,32 @@ int qdma_ul_update_mm_c2h_desc(void *qhndl, struct rte_mbuf *mb, void *desc) return 0; } +/** + * updates the MM h2c descriptor. + * + * @param qhndl + * Pointer to TX queue handle. + * @param mb + * Pointer to memory buffer. + * + * @return + * None. + */ +int qdma_ul_update_mm_h2c_desc(void *qhndl, struct rte_mbuf *mb) +{ + struct qdma_ul_mm_desc *desc_info; + + desc_info = (struct qdma_ul_mm_desc *)get_mm_h2c_desc(qhndl); + desc_info->src_addr = mb->buf_iova + mb->data_off; + desc_info->dst_addr = get_mm_h2c_ep_addr(qhndl); + desc_info->dv = 1; + desc_info->eop = 1; + desc_info->sop = 1; + desc_info->len = rte_pktmbuf_data_len(mb); + + return 0; +} + /** * Processes the completion data from the given completion entry. * -- 2.36.1