Hi Maxime, > -----Original Message----- > From: Maxime Coquelin <maxime.coque...@redhat.com> > Sent: Monday, August 3, 2020 5:51 PM > To: Xueming(Steven) Li <xuemi...@mellanox.com> > Cc: dev@dpdk.org; a...@dpdk.org; "Penso <asafp"@mellanox.com > Subject: Re: [dpdk-dev] [v2] vdpa/mlx5: fix queue update synchronization > > > > On 8/2/20 11:21 AM, Xueming Li wrote: > > The driver CQ event management is done by non vhost library thread, > > either the dpdk host thread or the internal vDPA driver thread. > > > > When a queue is updated the CQ may be destroyed and created by the > > vhost library thread via the queue state operation. > > > > When the queue update feature was added, it didn't synchronize the CQ > > management to the queue update what may cause invalid memory access. > > > > Add the aforementioned synchronization by a new per device > > configuration mutex. > > > > Fixes: c47d6e83334e ("vdpa/mlx5: support queue update") > > > > Signed-off-by: Xueming Li <xuemi...@mellanox.com> > > Acked-by: Matan Azrad <ma...@mellanox.com> > > --- > > drivers/vdpa/mlx5/mlx5_vdpa.c | 8 +++++++- > > drivers/vdpa/mlx5/mlx5_vdpa.h | 1 + > > drivers/vdpa/mlx5/mlx5_vdpa_event.c | 8 ++++++++ > > 3 files changed, 16 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c > > b/drivers/vdpa/mlx5/mlx5_vdpa.c index c0b87bcc01..a8f3e4b1de 100644 > > --- a/drivers/vdpa/mlx5/mlx5_vdpa.c > > +++ b/drivers/vdpa/mlx5/mlx5_vdpa.c > > @@ -133,6 +133,7 @@ mlx5_vdpa_set_vring_state(int vid, int vring, int state) > > struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid); > > struct mlx5_vdpa_priv *priv = > > mlx5_vdpa_find_priv_resource_by_vdev(vdev); > > + int ret; > > > > if (priv == NULL) { > > DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device- > >name); @@ > > -142,7 +143,10 @@ mlx5_vdpa_set_vring_state(int vid, int vring, int state) > > DRV_LOG(ERR, "Too big vring id: %d.", vring); > > return -E2BIG; > > } > > - return mlx5_vdpa_virtq_enable(priv, vring, state); > > + pthread_mutex_lock(&priv->vq_config_lock); > > + ret = mlx5_vdpa_virtq_enable(priv, vring, state); > > + pthread_mutex_unlock(&priv->vq_config_lock); > > + return ret; > > } > > > > static int > > @@ -742,6 +746,7 @@ mlx5_vdpa_pci_probe(struct rte_pci_driver *pci_drv > __rte_unused, > > } > > mlx5_vdpa_config_get(pci_dev->device.devargs, priv); > > SLIST_INIT(&priv->mr_list); > > + pthread_mutex_init(&priv->vq_config_lock, NULL); > > pthread_mutex_lock(&priv_list_lock); > > TAILQ_INSERT_TAIL(&priv_list, priv, next); > > pthread_mutex_unlock(&priv_list_lock); > > @@ -793,6 +798,7 @@ mlx5_vdpa_pci_remove(struct rte_pci_device > *pci_dev) > > priv->var = NULL; > > } > > mlx5_glue->close_device(priv->ctx); > > + pthread_mutex_destroy(&priv->vq_config_lock); > > rte_free(priv); > > } > > return 0; > > diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h > > b/drivers/vdpa/mlx5/mlx5_vdpa.h index 57044d9d33..462805a352 100644 > > --- a/drivers/vdpa/mlx5/mlx5_vdpa.h > > +++ b/drivers/vdpa/mlx5/mlx5_vdpa.h > > @@ -120,6 +120,7 @@ enum { > > struct mlx5_vdpa_priv { > > TAILQ_ENTRY(mlx5_vdpa_priv) next; > > uint8_t configured; > > + pthread_mutex_t vq_config_lock; > > uint64_t last_traffic_tic; > > pthread_t timer_tid; > > pthread_mutex_t timer_lock; > > diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_event.c > > b/drivers/vdpa/mlx5/mlx5_vdpa_event.c > > index 7dc1ac0fa9..4a8b7b0bd9 100644 > > --- a/drivers/vdpa/mlx5/mlx5_vdpa_event.c > > +++ b/drivers/vdpa/mlx5/mlx5_vdpa_event.c > > @@ -274,6 +274,7 @@ mlx5_vdpa_poll_handle(void *arg) > > priv- > >event_us; > > while (1) { > > max = 0; > > + pthread_mutex_lock(&priv->vq_config_lock); > > for (i = 0; i < priv->nr_virtqs; i++) { > > cq = &priv->virtqs[i].eqp.cq; > > if (cq->cq && !cq->armed) { > > @@ -298,6 +299,7 @@ mlx5_vdpa_poll_handle(void *arg) > > priv->vdev->device->name); > > mlx5_vdpa_arm_all_cqs(priv); > > pthread_mutex_lock(&priv->timer_lock); > > + pthread_mutex_unlock(&priv- > >vq_config_lock); > > Is it mandatory to hold timer_lock before releasing vq_config_lock? > If not, swapping would be maybe safer.
Yes, could you please replace lines in integration? We don't care according the lock usage in cq handling. > > > priv->timer_on = 0; > > while (!priv->timer_on) > > pthread_cond_wait(&priv- > >timer_cond, > > @@ -312,6 +314,7 @@ mlx5_vdpa_poll_handle(void *arg) > > } else { > > priv->last_traffic_tic = current_tic; > > } > > + pthread_mutex_unlock(&priv->vq_config_lock); > > mlx5_vdpa_timer_sleep(priv, max); > > } > > return NULL; > > @@ -327,6 +330,7 @@ mlx5_vdpa_interrupt_handler(void *cb_arg) > > uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) + > 128]; > > } out; > > > > + pthread_mutex_lock(&priv->vq_config_lock); > > while (mlx5_glue->devx_get_event(priv->eventc, &out.event_resp, > > sizeof(out.buf)) >= > > (ssize_t)sizeof(out.event_resp.cookie)) > { @@ -337,12 > > +341,15 @@ mlx5_vdpa_interrupt_handler(void *cb_arg) > > struct mlx5_vdpa_virtq *virtq = container_of(eqp, > > struct mlx5_vdpa_virtq, eqp); > > > > + if (!virtq->enable) > > + continue; > > mlx5_vdpa_cq_poll(cq); > > /* Notify guest for descs consuming. */ > > if (cq->callfd != -1) > > eventfd_write(cq->callfd, (eventfd_t)1); > > if (priv->event_mode == > MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT) { > > mlx5_vdpa_cq_arm(priv, cq); > > + pthread_mutex_unlock(&priv->vq_config_lock); > > return; > > } > > /* Don't arm again - timer will take control. */ @@ -363,6 > +370,7 > > @@ mlx5_vdpa_interrupt_handler(void *cb_arg) > > pthread_cond_signal(&priv->timer_cond); > > } > > pthread_mutex_unlock(&priv->timer_lock); > > + pthread_mutex_unlock(&priv->vq_config_lock); > > } > > > > int > >