virtio_pmem_flush() waits for either a free virtqueue descriptor (-ENOSPC) or a host completion. If the request virtqueue becomes broken (e.g. virtqueue_kick() notify failure), those waiters may never make progress.
Track a device-level broken state and converge all error paths to -EIO. Fail fast for new requests, wake all -ENOSPC waiters, and drain/detach outstanding request tokens to complete them with an error. Signed-off-by: Li Chen <[email protected]> --- drivers/nvdimm/nd_virtio.c | 73 +++++++++++++++++++++++++++++++++--- drivers/nvdimm/virtio_pmem.c | 7 ++++ drivers/nvdimm/virtio_pmem.h | 4 ++ 3 files changed, 78 insertions(+), 6 deletions(-) diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c index d0385d4646f2..de1e3dde85eb 100644 --- a/drivers/nvdimm/nd_virtio.c +++ b/drivers/nvdimm/nd_virtio.c @@ -17,6 +17,18 @@ static void virtio_pmem_req_release(struct kref *kref) kfree(req); } +static void virtio_pmem_signal_done(struct virtio_pmem_request *req) +{ + WRITE_ONCE(req->done, true); + wake_up(&req->host_acked); +} + +static void virtio_pmem_complete_err(struct virtio_pmem_request *req) +{ + req->resp.ret = cpu_to_le32(1); + virtio_pmem_signal_done(req); +} + static void virtio_pmem_wake_one_waiter(struct virtio_pmem *vpmem) { struct virtio_pmem_request *req_buf; @@ -31,6 +43,40 @@ static void virtio_pmem_wake_one_waiter(struct virtio_pmem *vpmem) wake_up(&req_buf->wq_buf); } +static void virtio_pmem_wake_all_waiters(struct virtio_pmem *vpmem) +{ + struct virtio_pmem_request *req, *tmp; + + list_for_each_entry_safe(req, tmp, &vpmem->req_list, list) { + WRITE_ONCE(req->wq_buf_avail, true); + wake_up(&req->wq_buf); + list_del_init(&req->list); + } +} + +void virtio_pmem_mark_broken_and_drain(struct virtio_pmem *vpmem) +{ + struct virtio_pmem_request *req; + unsigned int len; + + if (READ_ONCE(vpmem->broken)) + return; + + WRITE_ONCE(vpmem->broken, true); + dev_err_once(&vpmem->vdev->dev, "virtqueue is broken\n"); + virtio_pmem_wake_all_waiters(vpmem); + + while ((req = virtqueue_get_buf(vpmem->req_vq, &len)) != NULL) { + virtio_pmem_complete_err(req); + kref_put(&req->kref, virtio_pmem_req_release); + } + + while ((req = virtqueue_detach_unused_buf(vpmem->req_vq)) != NULL) { + virtio_pmem_complete_err(req); + kref_put(&req->kref, virtio_pmem_req_release); + } +} + /* The interrupt handler */ void virtio_pmem_host_ack(struct virtqueue *vq) { @@ -42,8 +88,7 @@ void virtio_pmem_host_ack(struct virtqueue *vq) spin_lock_irqsave(&vpmem->pmem_lock, flags); while ((req_data = virtqueue_get_buf(vq, &len)) != NULL) { virtio_pmem_wake_one_waiter(vpmem); - WRITE_ONCE(req_data->done, true); - wake_up(&req_data->host_acked); + virtio_pmem_signal_done(req_data); kref_put(&req_data->kref, virtio_pmem_req_release); } spin_unlock_irqrestore(&vpmem->pmem_lock, flags); @@ -69,6 +114,9 @@ static int virtio_pmem_flush(struct nd_region *nd_region) return -EIO; } + if (READ_ONCE(vpmem->broken)) + return -EIO; + might_sleep(); req_data = kmalloc(sizeof(*req_data), GFP_KERNEL); if (!req_data) @@ -114,22 +162,35 @@ static int virtio_pmem_flush(struct nd_region *nd_region) spin_unlock_irqrestore(&vpmem->pmem_lock, flags); /* A host response results in "host_ack" getting called */ - wait_event(req_data->wq_buf, READ_ONCE(req_data->wq_buf_avail)); + wait_event(req_data->wq_buf, READ_ONCE(req_data->wq_buf_avail) || + READ_ONCE(vpmem->broken)); spin_lock_irqsave(&vpmem->pmem_lock, flags); + + if (READ_ONCE(vpmem->broken)) + break; } - err1 = virtqueue_kick(vpmem->req_vq); + if (err == -EIO || virtqueue_is_broken(vpmem->req_vq)) + virtio_pmem_mark_broken_and_drain(vpmem); + + err1 = true; + if (!err && !READ_ONCE(vpmem->broken)) { + err1 = virtqueue_kick(vpmem->req_vq); + if (!err1) + virtio_pmem_mark_broken_and_drain(vpmem); + } spin_unlock_irqrestore(&vpmem->pmem_lock, flags); /* * virtqueue_add_sgs failed with error different than -ENOSPC, we can't * do anything about that. */ - if (err || !err1) { + if (READ_ONCE(vpmem->broken) || err || !err1) { dev_info(&vdev->dev, "failed to send command to virtio pmem device\n"); err = -EIO; } else { /* A host response results in "host_ack" getting called */ - wait_event(req_data->host_acked, READ_ONCE(req_data->done)); + wait_event(req_data->host_acked, READ_ONCE(req_data->done) || + READ_ONCE(vpmem->broken)); err = le32_to_cpu(req_data->resp.ret); } diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c index 2396d19ce549..aa07328e3ff9 100644 --- a/drivers/nvdimm/virtio_pmem.c +++ b/drivers/nvdimm/virtio_pmem.c @@ -25,6 +25,7 @@ static int init_vq(struct virtio_pmem *vpmem) spin_lock_init(&vpmem->pmem_lock); INIT_LIST_HEAD(&vpmem->req_list); + WRITE_ONCE(vpmem->broken, false); return 0; }; @@ -137,6 +138,12 @@ static int virtio_pmem_probe(struct virtio_device *vdev) static void virtio_pmem_remove(struct virtio_device *vdev) { struct nvdimm_bus *nvdimm_bus = dev_get_drvdata(&vdev->dev); + struct virtio_pmem *vpmem = vdev->priv; + unsigned long flags; + + spin_lock_irqsave(&vpmem->pmem_lock, flags); + virtio_pmem_mark_broken_and_drain(vpmem); + spin_unlock_irqrestore(&vpmem->pmem_lock, flags); nvdimm_bus_unregister(nvdimm_bus); vdev->config->del_vqs(vdev); diff --git a/drivers/nvdimm/virtio_pmem.h b/drivers/nvdimm/virtio_pmem.h index fc8f613f8f28..49dd2e62d198 100644 --- a/drivers/nvdimm/virtio_pmem.h +++ b/drivers/nvdimm/virtio_pmem.h @@ -44,6 +44,9 @@ struct virtio_pmem { /* List to store deferred work if virtqueue is full */ struct list_head req_list; + /* Fail fast and wake waiters if the request virtqueue is broken. */ + bool broken; + /* Synchronize virtqueue data */ spinlock_t pmem_lock; @@ -53,5 +56,6 @@ struct virtio_pmem { }; void virtio_pmem_host_ack(struct virtqueue *vq); +void virtio_pmem_mark_broken_and_drain(struct virtio_pmem *vpmem); int async_pmem_flush(struct nd_region *nd_region, struct bio *bio); #endif -- 2.51.0

