From: Yajun Wu <yaj...@nvidia.com> To speed up queue create time, event qp and cq will create only once. Each virtq creation will reuse same event qp and cq.
Because FW will set event qp to error state during virtq destroy, need modify event qp to RESET state, then modify qp to RTS state as usual. This can save about 1.5ms for each virtq creation. After SW qp reset, qp pi/ci all become 0 while cq pi/ci keep as previous. Add new variable qp_ci to save SW qp ci. Move qp pi independently with cq ci. Add new function mlx5_vdpa_drain_cq to drain cq CQE after virtq release. Signed-off-by: Yajun Wu <yaj...@nvidia.com> --- drivers/vdpa/mlx5/mlx5_vdpa.c | 8 ++++ drivers/vdpa/mlx5/mlx5_vdpa.h | 12 +++++- drivers/vdpa/mlx5/mlx5_vdpa_event.c | 60 +++++++++++++++++++++++++++-- drivers/vdpa/mlx5/mlx5_vdpa_virtq.c | 6 +-- 4 files changed, 78 insertions(+), 8 deletions(-) diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c b/drivers/vdpa/mlx5/mlx5_vdpa.c index faf833ee2f..ee99952e11 100644 --- a/drivers/vdpa/mlx5/mlx5_vdpa.c +++ b/drivers/vdpa/mlx5/mlx5_vdpa.c @@ -269,6 +269,7 @@ mlx5_vdpa_dev_close(int vid) } mlx5_vdpa_steer_unset(priv); mlx5_vdpa_virtqs_release(priv); + mlx5_vdpa_drain_cq(priv); if (priv->lm_mr.addr) mlx5_os_wrapped_mkey_destroy(&priv->lm_mr); priv->state = MLX5_VDPA_STATE_PROBED; @@ -555,7 +556,14 @@ mlx5_vdpa_virtq_resource_prepare(struct mlx5_vdpa_priv *priv) return 0; for (index = 0; index < (priv->queues * 2); ++index) { struct mlx5_vdpa_virtq *virtq = &priv->virtqs[index]; + int ret = mlx5_vdpa_event_qp_prepare(priv, priv->queue_size, + -1, &virtq->eqp); + if (ret) { + DRV_LOG(ERR, "Failed to create event QPs for virtq %d.", + index); + return -1; + } if (priv->caps.queue_counters_valid) { if (!virtq->counters) virtq->counters = diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h b/drivers/vdpa/mlx5/mlx5_vdpa.h index f6719a3c60..bf82026e37 100644 --- a/drivers/vdpa/mlx5/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/mlx5_vdpa.h @@ -55,6 +55,7 @@ struct mlx5_vdpa_event_qp { struct mlx5_vdpa_cq cq; struct mlx5_devx_obj *fw_qp; struct mlx5_devx_qp sw_qp; + uint16_t qp_pi; }; struct mlx5_vdpa_query_mr { @@ -226,7 +227,7 @@ int mlx5_vdpa_mem_register(struct mlx5_vdpa_priv *priv); * @return * 0 on success, -1 otherwise and rte_errno is set. */ -int mlx5_vdpa_event_qp_create(struct mlx5_vdpa_priv *priv, uint16_t desc_n, +int mlx5_vdpa_event_qp_prepare(struct mlx5_vdpa_priv *priv, uint16_t desc_n, int callfd, struct mlx5_vdpa_event_qp *eqp); /** @@ -479,4 +480,13 @@ mlx5_vdpa_virtq_stats_get(struct mlx5_vdpa_priv *priv, int qid, */ int mlx5_vdpa_virtq_stats_reset(struct mlx5_vdpa_priv *priv, int qid); + +/** + * Drain virtq CQ CQE. + * + * @param[in] priv + * The vdpa driver private structure. + */ +void +mlx5_vdpa_drain_cq(struct mlx5_vdpa_priv *priv); #endif /* RTE_PMD_MLX5_VDPA_H_ */ diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_event.c b/drivers/vdpa/mlx5/mlx5_vdpa_event.c index 7167a98db0..b43dca9255 100644 --- a/drivers/vdpa/mlx5/mlx5_vdpa_event.c +++ b/drivers/vdpa/mlx5/mlx5_vdpa_event.c @@ -137,7 +137,7 @@ mlx5_vdpa_cq_poll(struct mlx5_vdpa_cq *cq) }; uint32_t word; } last_word; - uint16_t next_wqe_counter = cq->cq_ci; + uint16_t next_wqe_counter = eqp->qp_pi; uint16_t cur_wqe_counter; uint16_t comp; @@ -156,9 +156,10 @@ mlx5_vdpa_cq_poll(struct mlx5_vdpa_cq *cq) rte_io_wmb(); /* Ring CQ doorbell record. */ cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci); + eqp->qp_pi += comp; rte_io_wmb(); /* Ring SW QP doorbell record. */ - eqp->sw_qp.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci + cq_size); + eqp->sw_qp.db_rec[0] = rte_cpu_to_be_32(eqp->qp_pi + cq_size); } return comp; } @@ -232,6 +233,25 @@ mlx5_vdpa_queues_complete(struct mlx5_vdpa_priv *priv) return max; } +void +mlx5_vdpa_drain_cq(struct mlx5_vdpa_priv *priv) +{ + unsigned int i; + + for (i = 0; i < priv->caps.max_num_virtio_queues * 2; i++) { + struct mlx5_vdpa_cq *cq = &priv->virtqs[i].eqp.cq; + + mlx5_vdpa_queue_complete(cq); + if (cq->cq_obj.cq) { + cq->cq_obj.cqes[0].wqe_counter = + rte_cpu_to_be_16(UINT16_MAX); + priv->virtqs[i].eqp.qp_pi = 0; + if (!cq->armed) + mlx5_vdpa_cq_arm(priv, cq); + } + } +} + /* Wait on all CQs channel for completion event. */ static struct mlx5_vdpa_cq * mlx5_vdpa_event_wait(struct mlx5_vdpa_priv *priv __rte_unused) @@ -574,14 +594,44 @@ mlx5_vdpa_qps2rts(struct mlx5_vdpa_event_qp *eqp) return 0; } +static int +mlx5_vdpa_qps2rst2rts(struct mlx5_vdpa_event_qp *eqp) +{ + if (mlx5_devx_cmd_modify_qp_state(eqp->fw_qp, MLX5_CMD_OP_QP_2RST, + eqp->sw_qp.qp->id)) { + DRV_LOG(ERR, "Failed to modify FW QP to RST state(%u).", + rte_errno); + return -1; + } + if (mlx5_devx_cmd_modify_qp_state(eqp->sw_qp.qp, + MLX5_CMD_OP_QP_2RST, eqp->fw_qp->id)) { + DRV_LOG(ERR, "Failed to modify SW QP to RST state(%u).", + rte_errno); + return -1; + } + return mlx5_vdpa_qps2rts(eqp); +} + int -mlx5_vdpa_event_qp_create(struct mlx5_vdpa_priv *priv, uint16_t desc_n, +mlx5_vdpa_event_qp_prepare(struct mlx5_vdpa_priv *priv, uint16_t desc_n, int callfd, struct mlx5_vdpa_event_qp *eqp) { struct mlx5_devx_qp_attr attr = {0}; uint16_t log_desc_n = rte_log2_u32(desc_n); uint32_t ret; + if (eqp->cq.cq_obj.cq != NULL && log_desc_n == eqp->cq.log_desc_n) { + /* Reuse existing resources. */ + eqp->cq.callfd = callfd; + /* FW will set event qp to error state in q destroy. */ + if (!mlx5_vdpa_qps2rst2rts(eqp)) { + rte_write32(rte_cpu_to_be_32(RTE_BIT32(log_desc_n)), + &eqp->sw_qp.db_rec[0]); + return 0; + } + } + if (eqp->fw_qp) + mlx5_vdpa_event_qp_destroy(eqp); if (mlx5_vdpa_cq_create(priv, log_desc_n, callfd, &eqp->cq)) return -1; attr.pd = priv->cdev->pdn; @@ -608,8 +658,10 @@ mlx5_vdpa_event_qp_create(struct mlx5_vdpa_priv *priv, uint16_t desc_n, } if (mlx5_vdpa_qps2rts(eqp)) goto error; + eqp->qp_pi = 0; /* First ringing. */ - rte_write32(rte_cpu_to_be_32(RTE_BIT32(log_desc_n)), + if (eqp->sw_qp.db_rec) + rte_write32(rte_cpu_to_be_32(RTE_BIT32(log_desc_n)), &eqp->sw_qp.db_rec[0]); return 0; error: diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c index c258eb3024..6637ba1503 100644 --- a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c +++ b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c @@ -87,6 +87,8 @@ mlx5_vdpa_virtqs_cleanup(struct mlx5_vdpa_priv *priv) } virtq->umems[j].size = 0; } + if (virtq->eqp.fw_qp) + mlx5_vdpa_event_qp_destroy(&virtq->eqp); } } @@ -117,8 +119,6 @@ mlx5_vdpa_virtq_unset(struct mlx5_vdpa_virtq *virtq) claim_zero(mlx5_devx_cmd_destroy(virtq->virtq)); } virtq->virtq = NULL; - if (virtq->eqp.fw_qp) - mlx5_vdpa_event_qp_destroy(&virtq->eqp); virtq->notifier_state = MLX5_VDPA_NOTIFIER_STATE_DISABLED; return 0; } @@ -246,7 +246,7 @@ mlx5_vdpa_virtq_setup(struct mlx5_vdpa_priv *priv, int index) MLX5_VIRTQ_EVENT_MODE_QP : MLX5_VIRTQ_EVENT_MODE_NO_MSIX; if (attr.event_mode == MLX5_VIRTQ_EVENT_MODE_QP) { - ret = mlx5_vdpa_event_qp_create(priv, vq.size, vq.callfd, + ret = mlx5_vdpa_event_qp_prepare(priv, vq.size, vq.callfd, &virtq->eqp); if (ret) { DRV_LOG(ERR, "Failed to create event QPs for virtq %d.", -- 2.31.1