From: Abhimanyu Saini <absa...@amd.com> libvhost calls dev_conf() before prosessing the VHOST_USER_SET_VRING_CALL message for the last VQ. So this message is processed after dev_conf() returns.
However, the dev_conf() function spawns a thread to set rte_vhost_host_notifier_ctrl() before returning control to libvhost. This parallel thread in turn invokes get_notify_area(). To get the notify_area, the vdpa driver needs to query the HW and for this query it needs an enabled VQ. But at the same time libvhost is processing the last VHOST_USER_SET_VRING_CALL, and to do that it disables the last VQ. Hence there is a race b/w the libvhost and the vdpa driver. To resolve this race condition, query the HW and cache notify_area inside dev_conf() instead of doing it the parallel thread. Signed-off-by: Abhimanyu Saini <absa...@amd.com> --- drivers/vdpa/sfc/sfc_vdpa_ops.c | 36 ++++++++++++++++++------------------ drivers/vdpa/sfc/sfc_vdpa_ops.h | 1 + 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/drivers/vdpa/sfc/sfc_vdpa_ops.c b/drivers/vdpa/sfc/sfc_vdpa_ops.c index 63aa52d..b84699d 100644 --- a/drivers/vdpa/sfc/sfc_vdpa_ops.c +++ b/drivers/vdpa/sfc/sfc_vdpa_ops.c @@ -222,6 +222,7 @@ sfc_vdpa_virtq_start(struct sfc_vdpa_ops_data *ops_data, int vq_num) { int rc; + uint32_t doorbell; efx_virtio_vq_t *vq; struct sfc_vdpa_vring_info vring; efx_virtio_vq_cfg_t vq_cfg; @@ -270,22 +271,35 @@ /* Start virtqueue */ rc = efx_virtio_qstart(vq, &vq_cfg, &vq_dyncfg); if (rc != 0) { - /* destroy virtqueue */ sfc_vdpa_err(ops_data->dev_handle, "virtqueue start failed: %s", rte_strerror(rc)); - efx_virtio_qdestroy(vq); goto fail_virtio_qstart; } sfc_vdpa_info(ops_data->dev_handle, "virtqueue started successfully for vq_num %d", vq_num); + rc = efx_virtio_get_doorbell_offset(vq, &doorbell); + if (rc != 0) { + sfc_vdpa_err(ops_data->dev_handle, + "failed to get doorbell offset: %s", + rte_strerror(rc)); + goto fail_doorbell; + } + + /* + * Cache the bar_offset here for each VQ here, it will come + * in handy when sfc_vdpa_get_notify_area() is invoked. + */ + ops_data->vq_cxt[vq_num].doorbell = (void *)(uintptr_t)doorbell; ops_data->vq_cxt[vq_num].enable = B_TRUE; return rc; +fail_doorbell: fail_virtio_qstart: + efx_virtio_qdestroy(vq); fail_vring_info: return rc; } @@ -792,8 +806,6 @@ int ret; efx_nic_t *nic; int vfio_dev_fd; - efx_rc_t rc; - unsigned int bar_offset; volatile void *doorbell; struct rte_pci_device *pci_dev; struct rte_vdpa_device *vdpa_dev; @@ -824,19 +836,6 @@ return -1; } - if (ops_data->vq_cxt[qid].enable != B_TRUE) { - sfc_vdpa_err(dev, "vq is not enabled"); - return -1; - } - - rc = efx_virtio_get_doorbell_offset(ops_data->vq_cxt[qid].vq, - &bar_offset); - if (rc != 0) { - sfc_vdpa_err(dev, "failed to get doorbell offset: %s", - rte_strerror(rc)); - return rc; - } - reg.index = sfc_vdpa_adapter_by_dev_handle(dev)->mem_bar.esb_rid; ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ®); if (ret != 0) { @@ -845,7 +844,8 @@ return ret; } - *offset = reg.offset + bar_offset; + /* Use bar_offset that was cached during sfc_vdpa_virtq_start() */ + *offset = reg.offset + (uint64_t)ops_data->vq_cxt[qid].doorbell; len = (1U << encp->enc_vi_window_shift) / 2; if (len >= sysconf(_SC_PAGESIZE)) { diff --git a/drivers/vdpa/sfc/sfc_vdpa_ops.h b/drivers/vdpa/sfc/sfc_vdpa_ops.h index 6d790fd..9dbd5b8 100644 --- a/drivers/vdpa/sfc/sfc_vdpa_ops.h +++ b/drivers/vdpa/sfc/sfc_vdpa_ops.h @@ -35,6 +35,7 @@ struct sfc_vdpa_vring_info { }; typedef struct sfc_vdpa_vq_context_s { + volatile void *doorbell; uint8_t enable; uint32_t pidx; uint32_t cidx; -- 1.8.3.1