blk_mq_tags/requests of specific hardware queue are mostly used in
specific cpus, which might not be in the same numa node as disk. For
example, a nvme card is in node 0. half hardware queue will be used by
node 0, the other node 1.

Signed-off-by: Shaohua Li <s...@fb.com>
---
 block/blk-mq.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 48df5fd..888077c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1666,16 +1666,20 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct 
blk_mq_tag_set *set,
                                        unsigned int reserved_tags)
 {
        struct blk_mq_tags *tags;
+       int node;
+
+       node = blk_mq_hw_queue_to_node(set->mq_map, hctx_idx);
+       if (node == NUMA_NO_NODE)
+               node = set->numa_node;
 
-       tags = blk_mq_init_tags(nr_tags, reserved_tags,
-                               set->numa_node,
+       tags = blk_mq_init_tags(nr_tags, reserved_tags, node,
                                BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));
        if (!tags)
                return NULL;
 
        tags->rqs = kzalloc_node(nr_tags * sizeof(struct request *),
                                 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
-                                set->numa_node);
+                                node);
        if (!tags->rqs) {
                blk_mq_free_tags(tags);
                return NULL;
@@ -1683,7 +1687,7 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct 
blk_mq_tag_set *set,
 
        tags->static_rqs = kzalloc_node(nr_tags * sizeof(struct request *),
                                 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
-                                set->numa_node);
+                                node);
        if (!tags->static_rqs) {
                kfree(tags->rqs);
                blk_mq_free_tags(tags);
@@ -1703,6 +1707,11 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct 
blk_mq_tags *tags,
 {
        unsigned int i, j, entries_per_page, max_order = 4;
        size_t rq_size, left;
+       int node;
+
+       node = blk_mq_hw_queue_to_node(set->mq_map, hctx_idx);
+       if (node == NUMA_NO_NODE)
+               node = set->numa_node;
 
        INIT_LIST_HEAD(&tags->page_list);
 
@@ -1724,7 +1733,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct 
blk_mq_tags *tags,
                        this_order--;
 
                do {
-                       page = alloc_pages_node(set->numa_node,
+                       page = alloc_pages_node(node,
                                GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | 
__GFP_ZERO,
                                this_order);
                        if (page)
@@ -1757,7 +1766,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct 
blk_mq_tags *tags,
                        if (set->ops->init_request) {
                                if (set->ops->init_request(set->driver_data,
                                                rq, hctx_idx, i,
-                                               set->numa_node)) {
+                                               node)) {
                                        tags->static_rqs[i] = NULL;
                                        goto fail;
                                }
-- 
2.9.3

Reply via email to