Change the sli4 queue creation code to use numa node based memory
allocation based on the cpu the queues will be related to.

Signed-off-by: Dick Kennedy <dick.kenn...@broadcom.com>
Signed-off-by: James Smart <jsmart2...@gmail.com>
---
 drivers/scsi/lpfc/lpfc_init.c | 72 ++++++++++++++++++++++++++-----------------
 drivers/scsi/lpfc/lpfc_sli.c  | 10 +++---
 drivers/scsi/lpfc/lpfc_sli4.h |  6 ++--
 3 files changed, 53 insertions(+), 35 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 212adc9d2c56..2a51146661aa 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -8631,10 +8631,12 @@ static int
 lpfc_alloc_nvme_wq_cq(struct lpfc_hba *phba, int wqidx)
 {
        struct lpfc_queue *qdesc;
+       int cpu;
 
+       cpu = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ);
        qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE,
                                      phba->sli4_hba.cq_esize,
-                                     LPFC_CQE_EXP_COUNT);
+                                     LPFC_CQE_EXP_COUNT, cpu);
        if (!qdesc) {
                lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                                "0508 Failed allocate fast-path NVME CQ (%d)\n",
@@ -8643,11 +8645,12 @@ lpfc_alloc_nvme_wq_cq(struct lpfc_hba *phba, int wqidx)
        }
        qdesc->qe_valid = 1;
        qdesc->hdwq = wqidx;
-       qdesc->chann = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ);
+       qdesc->chann = cpu;
        phba->sli4_hba.hdwq[wqidx].nvme_cq = qdesc;
 
        qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE,
-                                     LPFC_WQE128_SIZE, LPFC_WQE_EXP_COUNT);
+                                     LPFC_WQE128_SIZE, LPFC_WQE_EXP_COUNT,
+                                     cpu);
        if (!qdesc) {
                lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                                "0509 Failed allocate fast-path NVME WQ (%d)\n",
@@ -8666,18 +8669,20 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx)
 {
        struct lpfc_queue *qdesc;
        uint32_t wqesize;
+       int cpu;
 
+       cpu = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ);
        /* Create Fast Path FCP CQs */
        if (phba->enab_exp_wqcq_pages)
                /* Increase the CQ size when WQEs contain an embedded cdb */
                qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE,
                                              phba->sli4_hba.cq_esize,
-                                             LPFC_CQE_EXP_COUNT);
+                                             LPFC_CQE_EXP_COUNT, cpu);
 
        else
                qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
                                              phba->sli4_hba.cq_esize,
-                                             phba->sli4_hba.cq_ecount);
+                                             phba->sli4_hba.cq_ecount, cpu);
        if (!qdesc) {
                lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                        "0499 Failed allocate fast-path FCP CQ (%d)\n", wqidx);
@@ -8685,7 +8690,7 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx)
        }
        qdesc->qe_valid = 1;
        qdesc->hdwq = wqidx;
-       qdesc->chann = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ);
+       qdesc->chann = cpu;
        phba->sli4_hba.hdwq[wqidx].fcp_cq = qdesc;
 
        /* Create Fast Path FCP WQs */
@@ -8695,11 +8700,11 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx)
                        LPFC_WQE128_SIZE : phba->sli4_hba.wq_esize;
                qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE,
                                              wqesize,
-                                             LPFC_WQE_EXP_COUNT);
+                                             LPFC_WQE_EXP_COUNT, cpu);
        } else
                qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
                                              phba->sli4_hba.wq_esize,
-                                             phba->sli4_hba.wq_ecount);
+                                             phba->sli4_hba.wq_ecount, cpu);
 
        if (!qdesc) {
                lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
@@ -8732,7 +8737,7 @@ int
 lpfc_sli4_queue_create(struct lpfc_hba *phba)
 {
        struct lpfc_queue *qdesc;
-       int idx, eqidx;
+       int idx, eqidx, cpu;
        struct lpfc_sli4_hdw_queue *qp;
        struct lpfc_eq_intr_info *eqi;
 
@@ -8819,13 +8824,15 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 
        /* Create HBA Event Queues (EQs) */
        for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+               /* determine EQ affinity */
+               eqidx = lpfc_find_eq_handle(phba, idx);
+               cpu = lpfc_find_cpu_handle(phba, eqidx, LPFC_FIND_BY_EQ);
                /*
                 * If there are more Hardware Queues than available
-                * CQs, multiple Hardware Queues may share a common EQ.
+                * EQs, multiple Hardware Queues may share a common EQ.
                 */
                if (idx >= phba->cfg_irq_chann) {
                        /* Share an existing EQ */
-                       eqidx = lpfc_find_eq_handle(phba, idx);
                        phba->sli4_hba.hdwq[idx].hba_eq =
                                phba->sli4_hba.hdwq[eqidx].hba_eq;
                        continue;
@@ -8833,7 +8840,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                /* Create an EQ */
                qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
                                              phba->sli4_hba.eq_esize,
-                                             phba->sli4_hba.eq_ecount);
+                                             phba->sli4_hba.eq_ecount, cpu);
                if (!qdesc) {
                        lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                                        "0497 Failed allocate EQ (%d)\n", idx);
@@ -8843,9 +8850,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                qdesc->hdwq = idx;
 
                /* Save the CPU this EQ is affinitised to */
-               eqidx = lpfc_find_eq_handle(phba, idx);
-               qdesc->chann = lpfc_find_cpu_handle(phba, eqidx,
-                                                   LPFC_FIND_BY_EQ);
+               qdesc->chann = cpu;
                phba->sli4_hba.hdwq[idx].hba_eq = qdesc;
                qdesc->last_cpu = qdesc->chann;
                eqi = per_cpu_ptr(phba->sli4_hba.eq_info, qdesc->last_cpu);
@@ -8868,11 +8873,14 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 
                if (phba->nvmet_support) {
                        for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) {
+                               cpu = lpfc_find_cpu_handle(phba, idx,
+                                                          LPFC_FIND_BY_HDWQ);
                                qdesc = lpfc_sli4_queue_alloc(
                                                      phba,
                                                      LPFC_DEFAULT_PAGE_SIZE,
                                                      phba->sli4_hba.cq_esize,
-                                                     phba->sli4_hba.cq_ecount);
+                                                     phba->sli4_hba.cq_ecount,
+                                                     cpu);
                                if (!qdesc) {
                                        lpfc_printf_log(
                                                phba, KERN_ERR, LOG_INIT,
@@ -8882,7 +8890,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                                }
                                qdesc->qe_valid = 1;
                                qdesc->hdwq = idx;
-                               qdesc->chann = idx;
+                               qdesc->chann = cpu;
                                phba->sli4_hba.nvmet_cqset[idx] = qdesc;
                        }
                }
@@ -8892,10 +8900,11 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
         * Create Slow Path Completion Queues (CQs)
         */
 
+       cpu = lpfc_find_cpu_handle(phba, 0, LPFC_FIND_BY_EQ);
        /* Create slow-path Mailbox Command Complete Queue */
        qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
                                      phba->sli4_hba.cq_esize,
-                                     phba->sli4_hba.cq_ecount);
+                                     phba->sli4_hba.cq_ecount, cpu);
        if (!qdesc) {
                lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                                "0500 Failed allocate slow-path mailbox CQ\n");
@@ -8907,7 +8916,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
        /* Create slow-path ELS Complete Queue */
        qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
                                      phba->sli4_hba.cq_esize,
-                                     phba->sli4_hba.cq_ecount);
+                                     phba->sli4_hba.cq_ecount, cpu);
        if (!qdesc) {
                lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                                "0501 Failed allocate slow-path ELS CQ\n");
@@ -8926,7 +8935,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 
        qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
                                      phba->sli4_hba.mq_esize,
-                                     phba->sli4_hba.mq_ecount);
+                                     phba->sli4_hba.mq_ecount, cpu);
        if (!qdesc) {
                lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                                "0505 Failed allocate slow-path MQ\n");
@@ -8942,7 +8951,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
        /* Create slow-path ELS Work Queue */
        qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
                                      phba->sli4_hba.wq_esize,
-                                     phba->sli4_hba.wq_ecount);
+                                     phba->sli4_hba.wq_ecount, cpu);
        if (!qdesc) {
                lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                                "0504 Failed allocate slow-path ELS WQ\n");
@@ -8956,7 +8965,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                /* Create NVME LS Complete Queue */
                qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
                                              phba->sli4_hba.cq_esize,
-                                             phba->sli4_hba.cq_ecount);
+                                             phba->sli4_hba.cq_ecount, cpu);
                if (!qdesc) {
                        lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                                        "6079 Failed allocate NVME LS CQ\n");
@@ -8969,7 +8978,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                /* Create NVME LS Work Queue */
                qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
                                              phba->sli4_hba.wq_esize,
-                                             phba->sli4_hba.wq_ecount);
+                                             phba->sli4_hba.wq_ecount, cpu);
                if (!qdesc) {
                        lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                                        "6080 Failed allocate NVME LS WQ\n");
@@ -8987,7 +8996,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
        /* Create Receive Queue for header */
        qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
                                      phba->sli4_hba.rq_esize,
-                                     phba->sli4_hba.rq_ecount);
+                                     phba->sli4_hba.rq_ecount, cpu);
        if (!qdesc) {
                lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                                "0506 Failed allocate receive HRQ\n");
@@ -8998,7 +9007,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
        /* Create Receive Queue for data */
        qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
                                      phba->sli4_hba.rq_esize,
-                                     phba->sli4_hba.rq_ecount);
+                                     phba->sli4_hba.rq_ecount, cpu);
        if (!qdesc) {
                lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                                "0507 Failed allocate receive DRQ\n");
@@ -9009,11 +9018,14 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
        if ((phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) &&
            phba->nvmet_support) {
                for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) {
+                       cpu = lpfc_find_cpu_handle(phba, idx,
+                                                  LPFC_FIND_BY_HDWQ);
                        /* Create NVMET Receive Queue for header */
                        qdesc = lpfc_sli4_queue_alloc(phba,
                                                      LPFC_DEFAULT_PAGE_SIZE,
                                                      phba->sli4_hba.rq_esize,
-                                                     LPFC_NVMET_RQE_DEF_COUNT);
+                                                     LPFC_NVMET_RQE_DEF_COUNT,
+                                                     cpu);
                        if (!qdesc) {
                                lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                                                "3146 Failed allocate "
@@ -9024,8 +9036,9 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                        phba->sli4_hba.nvmet_mrq_hdr[idx] = qdesc;
 
                        /* Only needed for header of RQ pair */
-                       qdesc->rqbp = kzalloc(sizeof(struct lpfc_rqb),
-                                             GFP_KERNEL);
+                       qdesc->rqbp = kzalloc_node(sizeof(*qdesc->rqbp),
+                                                  GFP_KERNEL,
+                                                  cpu_to_node(cpu));
                        if (qdesc->rqbp == NULL) {
                                lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                                                "6131 Failed allocate "
@@ -9040,7 +9053,8 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                        qdesc = lpfc_sli4_queue_alloc(phba,
                                                      LPFC_DEFAULT_PAGE_SIZE,
                                                      phba->sli4_hba.rq_esize,
-                                                     LPFC_NVMET_RQE_DEF_COUNT);
+                                                     LPFC_NVMET_RQE_DEF_COUNT,
+                                                     cpu);
                        if (!qdesc) {
                                lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                                                "3156 Failed allocate "
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 5b630643d950..5f556f8bc6fb 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -14496,6 +14496,7 @@ lpfc_sli4_queue_free(struct lpfc_queue *queue)
  * @page_size: The size of a queue page
  * @entry_size: The size of each queue entry for this queue.
  * @entry count: The number of entries that this queue will handle.
+ * @cpu: The cpu that will primarily utilize this queue.
  *
  * This function allocates a queue structure and the DMAable memory used for
  * the host resident queue. This function must be called before creating the
@@ -14503,7 +14504,7 @@ lpfc_sli4_queue_free(struct lpfc_queue *queue)
  **/
 struct lpfc_queue *
 lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size,
-                     uint32_t entry_size, uint32_t entry_count)
+                     uint32_t entry_size, uint32_t entry_count, int cpu)
 {
        struct lpfc_queue *queue;
        struct lpfc_dmabuf *dmabuf;
@@ -14519,8 +14520,8 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t 
page_size,
        if (pgcnt > phba->sli4_hba.pc_sli4_params.wqpcnt)
                pgcnt = phba->sli4_hba.pc_sli4_params.wqpcnt;
 
-       queue = kzalloc(sizeof(struct lpfc_queue) +
-                       (sizeof(void *) * pgcnt), GFP_KERNEL);
+       queue = kzalloc_node(sizeof(*queue) + (sizeof(void *) * pgcnt),
+                            GFP_KERNEL, cpu_to_node(cpu));
        if (!queue)
                return NULL;
 
@@ -14543,7 +14544,8 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t 
page_size,
        queue->phba = phba;
 
        for (x = 0; x < queue->page_count; x++) {
-               dmabuf = kzalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
+               dmabuf = kzalloc_node(sizeof(*dmabuf), GFP_KERNEL,
+                                     dev_to_node(&phba->pcidev->dev));
                if (!dmabuf)
                        goto out_fail;
                dmabuf->virt = dma_zalloc_coherent(&phba->pcidev->dev,
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index b86ac85b65d0..bd5b5c3de35e 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -989,8 +989,10 @@ int lpfc_sli4_mbx_read_fcf_rec(struct lpfc_hba *, struct 
lpfcMboxq *,
                               uint16_t);
 
 void lpfc_sli4_hba_reset(struct lpfc_hba *);
-struct lpfc_queue *lpfc_sli4_queue_alloc(struct lpfc_hba *, uint32_t,
-                                        uint32_t, uint32_t);
+struct lpfc_queue *lpfc_sli4_queue_alloc(struct lpfc_hba *phba,
+                                        uint32_t page_size,
+                                        uint32_t entry_size,
+                                        uint32_t entry_count, int cpu);
 void lpfc_sli4_queue_free(struct lpfc_queue *);
 int lpfc_eq_create(struct lpfc_hba *, struct lpfc_queue *, uint32_t);
 void lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
-- 
2.13.7

Reply via email to