Implement xsc PMD transmit function.

Signed-off-by: WanRenyong <wa...@yunsilicon.com>
Signed-off-by: Rong Qian <qi...@yunsilicon.com>
---
 doc/guides/nics/features/xsc.ini |   4 +
 drivers/net/xsc/xsc_rxtx.c       | 231 ++++++++++++++++++++++++++++++-
 drivers/net/xsc/xsc_rxtx.h       |   9 ++
 3 files changed, 242 insertions(+), 2 deletions(-)

diff --git a/doc/guides/nics/features/xsc.ini b/doc/guides/nics/features/xsc.ini
index bdeb7a984b..772c6418c4 100644
--- a/doc/guides/nics/features/xsc.ini
+++ b/doc/guides/nics/features/xsc.ini
@@ -7,6 +7,10 @@
 RSS hash             = Y
 RSS key update       = Y
 RSS reta update      = Y
+L3 checksum offload  = Y
+L4 checksum offload  = Y
+Inner L3 checksum    = Y
+Inner L4 checksum    = Y
 Linux                = Y
 ARMv8                = Y
 x86-64               = Y
diff --git a/drivers/net/xsc/xsc_rxtx.c b/drivers/net/xsc/xsc_rxtx.c
index 28360e62ff..7a31cd428c 100644
--- a/drivers/net/xsc/xsc_rxtx.c
+++ b/drivers/net/xsc/xsc_rxtx.c
@@ -14,6 +14,8 @@
 #define XSC_CQE_OWNER_HW   0x2
 #define XSC_CQE_OWNER_SW   0x4
 #define XSC_CQE_OWNER_ERR  0x8
+#define XSC_OPCODE_RAW 0x7
+#define XSC_TX_COMP_CQE_HANDLE_MAX 2
 
 #define XSC_MAX_RX_BURST_MBUFS 64
 
@@ -201,9 +203,234 @@ xsc_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
        return nb_pkts;
 }
 
+static __rte_always_inline void
+xsc_tx_elts_free(struct xsc_txq_data *__rte_restrict txq, uint16_t tail)
+{
+       uint16_t elts_n = tail - txq->elts_tail;
+       uint32_t free_n;
+
+       do {
+               free_n = txq->elts_s - (txq->elts_tail & txq->elts_m);
+               free_n = RTE_MIN(free_n, elts_n);
+               rte_pktmbuf_free_bulk(&txq->elts[txq->elts_tail & txq->elts_m], 
free_n);
+               txq->elts_tail += free_n;
+               elts_n -= free_n;
+       } while (elts_n > 0);
+}
+
+static void
+xsc_tx_cqes_handle(struct xsc_txq_data *__rte_restrict txq)
+{
+       uint32_t count = XSC_TX_COMP_CQE_HANDLE_MAX;
+       volatile struct xsc_cqe *last_cqe = NULL;
+       volatile struct xsc_cqe *cqe;
+       bool doorbell = false;
+       int ret;
+       uint16_t tail;
+
+       do {
+               cqe = &txq->cqes[txq->cq_ci & txq->cqe_m];
+               ret = check_cqe_own(cqe, txq->cqe_n, txq->cq_ci);
+               if (unlikely(ret != XSC_CQE_OWNER_SW)) {
+                       if (likely(ret != XSC_CQE_OWNER_ERR))
+                               /* No new CQEs in completion queue. */
+                               break;
+                       doorbell = true;
+                       ++txq->cq_ci;
+                       txq->cq_pi = txq->cq_ci;
+                       last_cqe = NULL;
+                       continue;
+               }
+
+               doorbell = true;
+               ++txq->cq_ci;
+               last_cqe = cqe;
+       } while (--count > 0);
+
+       if (likely(doorbell)) {
+               union xsc_cq_doorbell cq_db = {
+                       .cq_data = 0
+               };
+               cq_db.next_cid = txq->cq_ci;
+               cq_db.cq_num = txq->cqn;
+
+               /* Ring doorbell */
+               rte_compiler_barrier();
+               *txq->cq_db = rte_cpu_to_le_32(cq_db.cq_data);
+
+               /* Release completed elts */
+               if (likely(last_cqe != NULL)) {
+                       txq->wqe_pi = rte_le_to_cpu_16(last_cqe->wqe_id) >> 
txq->wqe_ds_n;
+                       tail = txq->fcqs[(txq->cq_ci - 1) & txq->cqe_m];
+                       if (likely(tail != txq->elts_tail))
+                               xsc_tx_elts_free(txq, tail);
+               }
+       }
+}
+
+static __rte_always_inline void
+xsc_tx_wqe_ctrl_seg_init(struct xsc_txq_data *__rte_restrict txq,
+                        struct rte_mbuf *__rte_restrict mbuf,
+                        struct xsc_wqe *__rte_restrict wqe)
+{
+       struct xsc_send_wqe_ctrl_seg *cs = &wqe->cseg;
+       int i = 0;
+       int ds_max = (1 << txq->wqe_ds_n) - 1;
+
+       cs->msg_opcode = XSC_OPCODE_RAW;
+       cs->wqe_id = rte_cpu_to_le_16(txq->wqe_ci << txq->wqe_ds_n);
+       cs->has_pph = 0;
+       /* clear dseg's seg len */
+       if (cs->ds_data_num > 1 && cs->ds_data_num <= ds_max) {
+               for (i = 1; i < cs->ds_data_num; i++)
+                       wqe->dseg[i].seg_len = 0;
+       }
+
+       cs->ds_data_num = mbuf->nb_segs;
+       if (mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM)
+               cs->csum_en = 0x2;
+       else
+               cs->csum_en = 0;
+
+       if (txq->tso_en == 1 && (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
+               cs->has_pph = 0;
+               cs->so_type = 1;
+               cs->so_hdr_len = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len;
+               cs->so_data_size = rte_cpu_to_le_16(mbuf->tso_segsz);
+       }
+
+       cs->msg_len = rte_cpu_to_le_32(rte_pktmbuf_pkt_len(mbuf));
+       if (unlikely(cs->msg_len == 0))
+               cs->msg_len = rte_cpu_to_le_32(rte_pktmbuf_data_len(mbuf));
+
+       /* do not generate cqe for every pkts */
+       cs->ce = 0;
+}
+
+static __rte_always_inline void
+xsc_tx_wqe_data_seg_init(struct rte_mbuf *mbuf, struct xsc_wqe *wqe)
+{
+       uint16_t i, nb_segs = mbuf->nb_segs;
+       uint32_t data_len;
+       rte_iova_t iova;
+       struct xsc_wqe_data_seg *dseg;
+
+       for (i = 0; i < nb_segs; ++i) {
+               dseg = &wqe->dseg[i];
+               iova = rte_pktmbuf_iova(mbuf);
+               data_len = rte_pktmbuf_data_len(mbuf);
+
+               dseg->in_line = 0;
+               dseg->seg_len = rte_cpu_to_le_32(data_len);
+               dseg->lkey = 0;
+               dseg->va = rte_cpu_to_le_64(iova);
+               mbuf = mbuf->next;
+       }
+}
+
+static __rte_always_inline struct xsc_wqe *
+xsc_tx_wqes_fill(struct xsc_txq_data *__rte_restrict txq,
+                struct rte_mbuf **__rte_restrict pkts,
+                uint32_t pkts_n)
+{
+       uint32_t i;
+       struct xsc_wqe *wqe = NULL;
+       struct rte_mbuf *mbuf;
+
+       for (i = 0; i < pkts_n; i++) {
+               mbuf = pkts[i];
+               rte_prefetch0(mbuf);
+               wqe = (struct xsc_wqe *)((struct xsc_send_wqe_ctrl_seg 
*)txq->wqes +
+                     (txq->wqe_ci & txq->wqe_m) * (1 << txq->wqe_ds_n));
+
+               /* init wqe ctrl seg */
+               xsc_tx_wqe_ctrl_seg_init(txq, mbuf, wqe);
+               /* init wqe data segs */
+               xsc_tx_wqe_data_seg_init(mbuf, wqe);
+               ++txq->wqe_ci;
+       }
+
+       return wqe;
+}
+
+static __rte_always_inline void
+xsc_tx_doorbell_ring(volatile uint32_t *db, uint32_t index,
+                    uint32_t qpn, uint16_t ds_n)
+{
+       union xsc_send_doorbell tx_db;
+
+       rte_io_wmb();
+       tx_db.next_pid = index << ds_n;
+       tx_db.qp_num = qpn;
+       *db  = rte_cpu_to_le_32(tx_db.send_data);
+
+       rte_wmb();
+}
+
+static __rte_always_inline void
+xsc_tx_elts_store(struct xsc_txq_data *__rte_restrict txq,
+                 struct rte_mbuf **__rte_restrict pkts,
+                 uint32_t pkts_n)
+{
+       uint32_t part;
+       struct rte_mbuf **elts = (struct rte_mbuf **)txq->elts;
+
+       part = txq->elts_s - (txq->elts_head & txq->elts_m);
+       rte_memcpy((void *)(elts + (txq->elts_head & txq->elts_m)),
+                  (void *)pkts,
+                  RTE_MIN(part, pkts_n) * sizeof(struct rte_mbuf *));
+
+       if (unlikely(part < pkts_n))
+               rte_memcpy((void *)elts, (void *)(pkts + part),
+                          (pkts_n - part) * sizeof(struct rte_mbuf *));
+}
+
 uint16_t
 xsc_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
-       return 0;
-}
+       struct xsc_txq_data *txq = dpdk_txq;
+       uint32_t tx_n, remain_n = pkts_n;
+       uint16_t idx, elts_free, wqe_free;
+       uint16_t elts_head;
+       struct xsc_wqe *last_wqe;
 
+       if (unlikely(!pkts_n))
+               return 0;
+loop:
+       xsc_tx_cqes_handle(txq);
+
+       elts_free = txq->elts_s - (uint16_t)(txq->elts_head - txq->elts_tail);
+       wqe_free = txq->wqe_s - (((txq->wqe_ci << txq->wqe_ds_n) -
+                  (txq->wqe_pi << txq->wqe_ds_n)) >> txq->wqe_ds_n);
+       if (unlikely(elts_free == 0 || wqe_free == 0))
+               goto exit;
+
+       /* Fill in WQEs */
+       tx_n = RTE_MIN(remain_n, wqe_free);
+       idx = pkts_n - remain_n;
+       last_wqe = xsc_tx_wqes_fill(txq, &pkts[idx], tx_n);
+       remain_n -= tx_n;
+       last_wqe->cseg.ce = 1;
+
+       /* Update free-cqs, elts_comp */
+       elts_head = txq->elts_head;
+       elts_head += tx_n;
+       if ((uint16_t)(elts_head - txq->elts_comp) > 0) {
+               txq->elts_comp = elts_head;
+               txq->fcqs[txq->cq_pi++ & txq->cqe_m] = elts_head;
+       }
+
+       /* Ring tx doorbell */
+       xsc_tx_doorbell_ring(txq->qp_db, txq->wqe_ci,
+                       txq->qpn, txq->wqe_ds_n);
+
+       xsc_tx_elts_store(txq, &pkts[idx], tx_n);
+       txq->elts_head += tx_n;
+
+       if (remain_n > 0)
+               /* Try to process cqe, if wqe free is still 0, exit */
+               goto loop;
+
+exit:
+       return pkts_n - remain_n;
+}
diff --git a/drivers/net/xsc/xsc_rxtx.h b/drivers/net/xsc/xsc_rxtx.h
index 454d5c0378..ce6e47ad4c 100644
--- a/drivers/net/xsc/xsc_rxtx.h
+++ b/drivers/net/xsc/xsc_rxtx.h
@@ -166,6 +166,15 @@ struct __rte_cache_aligned xsc_rxq_data {
        uint32_t rss_hash:1; /* RSS hash enabled */
 };
 
+union xsc_cq_doorbell {
+       struct {
+               uint32_t next_cid : 16;
+               uint32_t cq_num : 15;
+               uint32_t cq_sta : 1;
+       };
+       uint32_t cq_data;
+};
+
 union xsc_recv_doorbell {
        struct {
                uint32_t next_pid : 13;
-- 
2.25.1

Reply via email to