Added loopback functionality use when the chip is a VF in order to enable packet transmission between VFs and between VFs and PF.
Signed-off-by: Moti Haimovsky <mo...@mellanox.com> --- drivers/net/mlx4/mlx4_prm.h | 2 +- drivers/net/mlx4/mlx4_rxtx.c | 28 ++++++++++++++++++++++------ drivers/net/mlx4/mlx4_rxtx.h | 2 ++ drivers/net/mlx4/mlx4_txq.c | 2 ++ 4 files changed, 27 insertions(+), 7 deletions(-) diff --git a/drivers/net/mlx4/mlx4_prm.h b/drivers/net/mlx4/mlx4_prm.h index 38e9a45..e328cff 100644 --- a/drivers/net/mlx4/mlx4_prm.h +++ b/drivers/net/mlx4/mlx4_prm.h @@ -168,7 +168,7 @@ uint32_t srcrb_flags, uint32_t imm) { seg->fence_size = fence_size; - seg->srcrb_flags = rte_cpu_to_be_32(srcrb_flags); + seg->srcrb_flags = srcrb_flags; /* * The caller should prepare "imm" in advance based on WR opcode. * For IBV_WR_SEND_WITH_IMM and IBV_WR_RDMA_WRITE_WITH_IMM, diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c index 3415f63..ed19c72 100644 --- a/drivers/net/mlx4/mlx4_rxtx.c +++ b/drivers/net/mlx4/mlx4_rxtx.c @@ -426,7 +426,11 @@ struct mlx4_wqe_data_seg *dseg; struct mlx4_sq *sq = &txq->msq; struct ibv_sge sge[wr->num_sge]; - uint32_t srcrb_flags; + union { + uint32_t flags; + uint16_t flags16[2]; + } srcrb; + uint32_t imm = 0; uint8_t fence_size; uint32_t head_idx = sq->head & sq->txbb_cnt_mask; uint32_t owner_opcode; @@ -466,10 +470,10 @@ /* Request Tx completion. */ txq->elts_comp_cd -= nr_txbbs; if (unlikely(txq->elts_comp_cd <= 0)) { - srcrb_flags = MLX4_WQE_CTRL_SOLICIT | MLX4_WQE_CTRL_CQ_UPDATE; + srcrb.flags = MLX4_WQE_CTRL_SOLICIT | MLX4_WQE_CTRL_CQ_UPDATE; txq->elts_comp_cd = txq->elts_comp_cd_init; } else { - srcrb_flags = MLX4_WQE_CTRL_SOLICIT; + srcrb.flags = MLX4_WQE_CTRL_SOLICIT; } fence_size = (wr->send_flags & IBV_SEND_FENCE ? MLX4_WQE_CTRL_FENCE : 0) | ((wqe_real_size / 16) & 0x3f); @@ -487,14 +491,26 @@ owner_opcode |= MLX4_WQE_CTRL_IIP_HDR_CSUM | MLX4_WQE_CTRL_IL4_HDR_CSUM; if (pkt->ol_flags & PKT_TX_OUTER_IP_CKSUM) - srcrb_flags |= MLX4_WQE_CTRL_IP_HDR_CSUM; + srcrb.flags |= MLX4_WQE_CTRL_IP_HDR_CSUM; } else { - srcrb_flags |= MLX4_WQE_CTRL_IP_HDR_CSUM | + srcrb.flags |= MLX4_WQE_CTRL_IP_HDR_CSUM | MLX4_WQE_CTRL_TCP_UDP_CSUM; } } + /* convert flags to BE before adding the mac address (if at all) + * to it + */ + srcrb.flags = rte_cpu_to_be_32(srcrb.flags); + /* Copy dst mac address to wqe. This allows loopback in eSwitch, + * so that VFs and PF can communicate with each other + */ + if (txq->lb) { + srcrb.flags16[0] = *(rte_pktmbuf_mtod(pkt, uint16_t *)); + imm = *(rte_pktmbuf_mtod_offset(pkt, uint32_t *, + sizeof(uint16_t))); + } /* fill in ctrl info but ownership */ - mlx4_set_ctrl_seg(ctrl, fence_size, srcrb_flags, 0); + mlx4_set_ctrl_seg(ctrl, fence_size, srcrb.flags, imm); /* If we used a bounce buffer then copy wqe back into sq */ if (unlikely(bounce)) ctrl = mlx4_bounce_to_desc(txq, head_idx, wqe_size); diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h index b4675b7..8e407f5 100644 --- a/drivers/net/mlx4/mlx4_rxtx.h +++ b/drivers/net/mlx4/mlx4_rxtx.h @@ -148,6 +148,8 @@ struct txq { struct mlx4_cq mcq; /**< Info for directly manipulating the CQ. */ uint16_t tunnel_en:1; /* When set TX offload for tunneled packets are supported. */ + uint16_t lb:1; + /* Whether pkts should be looped-back by eswitch or not */ char *bounce_buf; /**< Side memory to be used when wqe wraps around */ }; diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c index cecd5e8..296d72d 100644 --- a/drivers/net/mlx4/mlx4_txq.c +++ b/drivers/net/mlx4/mlx4_txq.c @@ -410,6 +410,8 @@ struct txq_mp2mr_mbuf_check_data { (void *)dev, strerror(rte_errno)); goto error; } + /* If a VF device - need to loopback xmitted packets */ + tmpl.lb = !!(priv->vf); /* Clean up txq in case we're reinitializing it. */ DEBUG("%p: cleaning-up old txq just in case", (void *)txq); mlx4_txq_cleanup(txq); -- 1.8.3.1