This function is introduced for dequeuing request
from sw queue so that we can dispatch it in
scheduler's way.

More importantly, some SCSI devices may set
q->queue_depth, which is a per-request_queue limit,
and applied on pending I/O from all hctxs. This
function is introduced for avoiding to dequeue too
many requests from sw queue when ->dispatch isn't
flushed completely.

Tested-by: Oleksandr Natalenko <oleksa...@natalenko.name>
Tested-by: Tom Nguyen <tom81...@gmail.com>
Tested-by: Paolo Valente <paolo.vale...@linaro.org>
Reviewed-by: Bart Van Assche <bart.vanass...@wdc.com>
Signed-off-by: Ming Lei <ming....@redhat.com>
---
 block/blk-mq.c | 38 ++++++++++++++++++++++++++++++++++++++
 block/blk-mq.h |  2 ++
 2 files changed, 40 insertions(+)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index d1b9fb539eba..8b49af1ade7f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -882,6 +882,44 @@ void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, 
struct list_head *list)
 }
 EXPORT_SYMBOL_GPL(blk_mq_flush_busy_ctxs);
 
+struct dispatch_rq_data {
+       struct blk_mq_hw_ctx *hctx;
+       struct request *rq;
+};
+
+static bool dispatch_rq_from_ctx(struct sbitmap *sb, unsigned int bitnr, void 
*data)
+{
+       struct dispatch_rq_data *dispatch_data = data;
+       struct blk_mq_hw_ctx *hctx = dispatch_data->hctx;
+       struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
+
+       spin_lock(&ctx->lock);
+       if (unlikely(!list_empty(&ctx->rq_list))) {
+               dispatch_data->rq = list_entry_rq(ctx->rq_list.next);
+               list_del_init(&dispatch_data->rq->queuelist);
+               if (list_empty(&ctx->rq_list))
+                       sbitmap_clear_bit(sb, bitnr);
+       }
+       spin_unlock(&ctx->lock);
+
+       return !dispatch_data->rq;
+}
+
+struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
+                                       struct blk_mq_ctx *start)
+{
+       unsigned off = start ? start->index_hw : 0;
+       struct dispatch_rq_data data = {
+               .hctx = hctx,
+               .rq   = NULL,
+       };
+
+       __sbitmap_for_each_set(&hctx->ctx_map, off,
+                              dispatch_rq_from_ctx, &data);
+
+       return data.rq;
+}
+
 static inline unsigned int queued_to_index(unsigned int queued)
 {
        if (!queued)
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 61aecf398a4b..915de58572e7 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -35,6 +35,8 @@ void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, 
struct list_head *list);
 bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx);
 bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
                                bool wait);
+struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
+                                       struct blk_mq_ctx *start);
 
 /*
  * Internal helpers for allocating/freeing the request map
-- 
2.9.5

Reply via email to