From: Chao Leng <[email protected]>

[ Upstream commit 7674073b2ed35ac951a49c425dec6b39d5a57140 ]

A crash happens when inject completing request long time(nearly 30s).
Each name space has a request queue, when inject completing request long
time, multi request queues may have time out requests at the same time,
nvme_rdma_timeout will execute concurrently. Multi requests in different
request queues may be queued in the same rdma queue, multi
nvme_rdma_timeout may call nvme_rdma_stop_queue at the same time.
The first nvme_rdma_timeout will clear NVME_RDMA_Q_LIVE and continue
stopping the rdma queue(drain qp), but the others check NVME_RDMA_Q_LIVE
is already cleared, and then directly complete the requests, complete
request before the qp is fully drained may lead to a use-after-free
condition.

Add a multex lock to serialize nvme_rdma_stop_queue.

Signed-off-by: Chao Leng <[email protected]>
Tested-by: Israel Rukshin <[email protected]>
Reviewed-by: Israel Rukshin <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>
Signed-off-by: Sasha Levin <[email protected]>
---
 drivers/nvme/host/rdma.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 65e3d0ef36e1a..493ed7ba86ed2 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -97,6 +97,7 @@ struct nvme_rdma_queue {
        struct completion       cm_done;
        bool                    pi_support;
        int                     cq_size;
+       struct mutex            queue_lock;
 };
 
 struct nvme_rdma_ctrl {
@@ -579,6 +580,7 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl 
*ctrl,
        int ret;
 
        queue = &ctrl->queues[idx];
+       mutex_init(&queue->queue_lock);
        queue->ctrl = ctrl;
        if (idx && ctrl->ctrl.max_integrity_segments)
                queue->pi_support = true;
@@ -598,7 +600,8 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl 
*ctrl,
        if (IS_ERR(queue->cm_id)) {
                dev_info(ctrl->ctrl.device,
                        "failed to create CM ID: %ld\n", PTR_ERR(queue->cm_id));
-               return PTR_ERR(queue->cm_id);
+               ret = PTR_ERR(queue->cm_id);
+               goto out_destroy_mutex;
        }
 
        if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR)
@@ -628,6 +631,8 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl 
*ctrl,
 out_destroy_cm_id:
        rdma_destroy_id(queue->cm_id);
        nvme_rdma_destroy_queue_ib(queue);
+out_destroy_mutex:
+       mutex_destroy(&queue->queue_lock);
        return ret;
 }
 
@@ -639,9 +644,10 @@ static void __nvme_rdma_stop_queue(struct nvme_rdma_queue 
*queue)
 
 static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
 {
-       if (!test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags))
-               return;
-       __nvme_rdma_stop_queue(queue);
+       mutex_lock(&queue->queue_lock);
+       if (test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags))
+               __nvme_rdma_stop_queue(queue);
+       mutex_unlock(&queue->queue_lock);
 }
 
 static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
@@ -651,6 +657,7 @@ static void nvme_rdma_free_queue(struct nvme_rdma_queue 
*queue)
 
        nvme_rdma_destroy_queue_ib(queue);
        rdma_destroy_id(queue->cm_id);
+       mutex_destroy(&queue->queue_lock);
 }
 
 static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl)
-- 
2.27.0



Reply via email to