Added loopback functionality use when the chip is a VF in order to
enable packet transmission between VFs and between VFs and PF.

Signed-off-by: Moti Haimovsky <mo...@mellanox.com>
---
 drivers/net/mlx4/mlx4_prm.h  |  2 +-
 drivers/net/mlx4/mlx4_rxtx.c | 28 ++++++++++++++++++++++------
 drivers/net/mlx4/mlx4_rxtx.h |  2 ++
 drivers/net/mlx4/mlx4_txq.c  |  2 ++
 4 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/drivers/net/mlx4/mlx4_prm.h b/drivers/net/mlx4/mlx4_prm.h
index 38e9a45..e328cff 100644
--- a/drivers/net/mlx4/mlx4_prm.h
+++ b/drivers/net/mlx4/mlx4_prm.h
@@ -168,7 +168,7 @@
                  uint32_t srcrb_flags, uint32_t imm)
 {
        seg->fence_size = fence_size;
-       seg->srcrb_flags = rte_cpu_to_be_32(srcrb_flags);
+       seg->srcrb_flags = srcrb_flags;
        /*
         * The caller should prepare "imm" in advance based on WR opcode.
         * For IBV_WR_SEND_WITH_IMM and IBV_WR_RDMA_WRITE_WITH_IMM,
diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index 3415f63..ed19c72 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -426,7 +426,11 @@
        struct mlx4_wqe_data_seg *dseg;
        struct mlx4_sq *sq = &txq->msq;
        struct ibv_sge sge[wr->num_sge];
-       uint32_t srcrb_flags;
+       union {
+               uint32_t flags;
+               uint16_t flags16[2];
+       } srcrb;
+       uint32_t imm = 0;
        uint8_t fence_size;
        uint32_t head_idx = sq->head & sq->txbb_cnt_mask;
        uint32_t owner_opcode;
@@ -466,10 +470,10 @@
        /*  Request Tx completion. */
        txq->elts_comp_cd -= nr_txbbs;
        if (unlikely(txq->elts_comp_cd <= 0)) {
-               srcrb_flags = MLX4_WQE_CTRL_SOLICIT | MLX4_WQE_CTRL_CQ_UPDATE;
+               srcrb.flags = MLX4_WQE_CTRL_SOLICIT | MLX4_WQE_CTRL_CQ_UPDATE;
                txq->elts_comp_cd = txq->elts_comp_cd_init;
        } else {
-               srcrb_flags = MLX4_WQE_CTRL_SOLICIT;
+               srcrb.flags = MLX4_WQE_CTRL_SOLICIT;
        }
        fence_size = (wr->send_flags & IBV_SEND_FENCE ?
                MLX4_WQE_CTRL_FENCE : 0) | ((wqe_real_size / 16) & 0x3f);
@@ -487,14 +491,26 @@
                        owner_opcode |= MLX4_WQE_CTRL_IIP_HDR_CSUM |
                                        MLX4_WQE_CTRL_IL4_HDR_CSUM;
                        if (pkt->ol_flags & PKT_TX_OUTER_IP_CKSUM)
-                               srcrb_flags |= MLX4_WQE_CTRL_IP_HDR_CSUM;
+                               srcrb.flags |= MLX4_WQE_CTRL_IP_HDR_CSUM;
                } else {
-                       srcrb_flags |= MLX4_WQE_CTRL_IP_HDR_CSUM |
+                       srcrb.flags |= MLX4_WQE_CTRL_IP_HDR_CSUM |
                                      MLX4_WQE_CTRL_TCP_UDP_CSUM;
                }
        }
+       /* convert flags to BE before adding the mac address (if at all)
+        * to it
+        */
+       srcrb.flags = rte_cpu_to_be_32(srcrb.flags);
+       /* Copy dst mac address to wqe. This allows loopback in eSwitch,
+        * so that VFs and PF can communicate with each other
+        */
+       if (txq->lb) {
+               srcrb.flags16[0] = *(rte_pktmbuf_mtod(pkt, uint16_t *));
+               imm = *(rte_pktmbuf_mtod_offset(pkt, uint32_t *,
+                                               sizeof(uint16_t)));
+       }
        /* fill in ctrl info but ownership */
-       mlx4_set_ctrl_seg(ctrl, fence_size, srcrb_flags, 0);
+       mlx4_set_ctrl_seg(ctrl, fence_size, srcrb.flags, imm);
        /* If we used a bounce buffer then copy wqe back into sq */
        if (unlikely(bounce))
                ctrl = mlx4_bounce_to_desc(txq, head_idx, wqe_size);
diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h
index b4675b7..8e407f5 100644
--- a/drivers/net/mlx4/mlx4_rxtx.h
+++ b/drivers/net/mlx4/mlx4_rxtx.h
@@ -148,6 +148,8 @@ struct txq {
        struct mlx4_cq mcq; /**< Info for directly manipulating the CQ. */
        uint16_t tunnel_en:1;
        /* When set TX offload for tunneled packets are supported. */
+       uint16_t lb:1;
+       /* Whether pkts should be looped-back by eswitch or not */
        char *bounce_buf; /**< Side memory to be used when wqe wraps around */
 };
 
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index cecd5e8..296d72d 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -410,6 +410,8 @@ struct txq_mp2mr_mbuf_check_data {
                      (void *)dev, strerror(rte_errno));
                goto error;
        }
+       /* If a VF device - need to loopback xmitted packets */
+       tmpl.lb = !!(priv->vf);
        /* Clean up txq in case we're reinitializing it. */
        DEBUG("%p: cleaning-up old txq just in case", (void *)txq);
        mlx4_txq_cleanup(txq);
-- 
1.8.3.1

Reply via email to