Currently the driver maintains a sideband structure which has a pointer
for each queue element. However, at 8bytes a pointer, and up to 4k
elements per queue, and 100's of queues, this can take up a lot of memory.

Convert the driver to using an access routine that calculates the
element address based on it's index rather than using the pointer table.

Signed-off-by: Dick Kennedy <dick.kenn...@broadcom.com>
Signed-off-by: James Smart <jsmart2...@gmail.com>
---
 drivers/scsi/lpfc/lpfc_debugfs.c |  4 +--
 drivers/scsi/lpfc/lpfc_debugfs.h |  2 +-
 drivers/scsi/lpfc/lpfc_sli.c     | 55 +++++++++++++++++++++-------------------
 drivers/scsi/lpfc/lpfc_sli4.h    | 19 +++-----------
 4 files changed, 35 insertions(+), 45 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 1215eaa530db..cdf26eb02225 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -4135,7 +4135,7 @@ lpfc_idiag_queacc_read_qe(char *pbuffer, int len, struct 
lpfc_queue *pque,
                        "QE-INDEX[%04d]:\n", index);
 
        offset = 0;
-       pentry = pque->qe[index].address;
+       pentry = lpfc_sli4_qe(pque, index);
        while (esize > 0) {
                len += snprintf(pbuffer+len, LPFC_QUE_ACC_BUF_SIZE-len,
                                "%08x ", *pentry);
@@ -4485,7 +4485,7 @@ lpfc_idiag_queacc_write(struct file *file, const char 
__user *buf,
                pque = (struct lpfc_queue *)idiag.ptr_private;
                if (offset > pque->entry_size/sizeof(uint32_t) - 1)
                        goto error_out;
-               pentry = pque->qe[index].address;
+               pentry = lpfc_sli4_qe(pque, index);
                pentry += offset;
                if (idiag.cmd.opcode == LPFC_IDIAG_CMD_QUEACC_WR)
                        *pentry = value;
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h
index 93ab7dfb8ee0..e42c1fac72cf 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.h
+++ b/drivers/scsi/lpfc/lpfc_debugfs.h
@@ -345,7 +345,7 @@ lpfc_debug_dump_qe(struct lpfc_queue *q, uint32_t idx)
 
        esize = q->entry_size;
        qe_word_cnt = esize / sizeof(uint32_t);
-       pword = q->qe[idx].address;
+       pword = lpfc_sli4_qe(q, idx);
 
        len = 0;
        len += snprintf(line_buf+len, LPFC_LBUF_SZ-len, "QE[%04d]: ", idx);
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 2b130f7bcc97..5b630643d950 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -151,7 +151,7 @@ lpfc_sli4_wq_put(struct lpfc_queue *q, union lpfc_wqe128 
*wqe)
        /* sanity check on queue memory */
        if (unlikely(!q))
                return -ENOMEM;
-       temp_wqe = q->qe[q->host_index].wqe;
+       temp_wqe = lpfc_sli4_qe(q, q->host_index);
 
        /* If the host has not yet processed the next entry then we are done */
        idx = ((q->host_index + 1) % q->entry_count);
@@ -271,7 +271,7 @@ lpfc_sli4_mq_put(struct lpfc_queue *q, struct lpfc_mqe *mqe)
        /* sanity check on queue memory */
        if (unlikely(!q))
                return -ENOMEM;
-       temp_mqe = q->qe[q->host_index].mqe;
+       temp_mqe = lpfc_sli4_qe(q, q->host_index);
 
        /* If the host has not yet processed the next entry then we are done */
        if (((q->host_index + 1) % q->entry_count) == q->hba_index)
@@ -331,7 +331,7 @@ lpfc_sli4_eq_get(struct lpfc_queue *q)
        /* sanity check on queue memory */
        if (unlikely(!q))
                return NULL;
-       eqe = q->qe[q->host_index].eqe;
+       eqe = lpfc_sli4_qe(q, q->host_index);
 
        /* If the next EQE is not valid then we are done */
        if (bf_get_le32(lpfc_eqe_valid, eqe) != q->qe_valid)
@@ -545,7 +545,7 @@ lpfc_sli4_cq_get(struct lpfc_queue *q)
        /* sanity check on queue memory */
        if (unlikely(!q))
                return NULL;
-       cqe = q->qe[q->host_index].cqe;
+       cqe = lpfc_sli4_qe(q, q->host_index);
 
        /* If the next CQE is not valid then we are done */
        if (bf_get_le32(lpfc_cqe_valid, cqe) != q->qe_valid)
@@ -667,8 +667,8 @@ lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue 
*dq,
                return -ENOMEM;
        hq_put_index = hq->host_index;
        dq_put_index = dq->host_index;
-       temp_hrqe = hq->qe[hq_put_index].rqe;
-       temp_drqe = dq->qe[dq_put_index].rqe;
+       temp_hrqe = lpfc_sli4_qe(hq, hq_put_index);
+       temp_drqe = lpfc_sli4_qe(dq, dq_put_index);
 
        if (hq->type != LPFC_HRQ || dq->type != LPFC_DRQ)
                return -EINVAL;
@@ -7879,8 +7879,9 @@ lpfc_sli4_mbox_completions_pending(struct lpfc_hba *phba)
        mcq = phba->sli4_hba.mbx_cq;
        idx = mcq->hba_index;
        qe_valid = mcq->qe_valid;
-       while (bf_get_le32(lpfc_cqe_valid, mcq->qe[idx].cqe) == qe_valid) {
-               mcqe = (struct lpfc_mcqe *)mcq->qe[idx].cqe;
+       while (bf_get_le32(lpfc_cqe_valid,
+              (struct lpfc_cqe *)lpfc_sli4_qe(mcq, idx)) == qe_valid) {
+               mcqe = (struct lpfc_mcqe *)(lpfc_sli4_qe(mcq, idx));
                if (bf_get_le32(lpfc_trailer_completed, mcqe) &&
                    (!bf_get_le32(lpfc_trailer_async, mcqe))) {
                        pending_completions = true;
@@ -14506,24 +14507,22 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t 
page_size,
 {
        struct lpfc_queue *queue;
        struct lpfc_dmabuf *dmabuf;
-       int x, total_qe_count;
-       void *dma_pointer;
        uint32_t hw_page_size = phba->sli4_hba.pc_sli4_params.if_page_sz;
+       uint16_t x, pgcnt;
 
        if (!phba->sli4_hba.pc_sli4_params.supported)
                hw_page_size = page_size;
 
+       pgcnt = ALIGN(entry_size * entry_count, hw_page_size) / hw_page_size;
+
+       /* If needed, Adjust page count to match the max the adapter supports */
+       if (pgcnt > phba->sli4_hba.pc_sli4_params.wqpcnt)
+               pgcnt = phba->sli4_hba.pc_sli4_params.wqpcnt;
+
        queue = kzalloc(sizeof(struct lpfc_queue) +
-                       (sizeof(union sli4_qe) * entry_count), GFP_KERNEL);
+                       (sizeof(void *) * pgcnt), GFP_KERNEL);
        if (!queue)
                return NULL;
-       queue->page_count = (ALIGN(entry_size * entry_count,
-                       hw_page_size))/hw_page_size;
-
-       /* If needed, Adjust page count to match the max the adapter supports */
-       if (phba->sli4_hba.pc_sli4_params.wqpcnt &&
-           (queue->page_count > phba->sli4_hba.pc_sli4_params.wqpcnt))
-               queue->page_count = phba->sli4_hba.pc_sli4_params.wqpcnt;
 
        INIT_LIST_HEAD(&queue->list);
        INIT_LIST_HEAD(&queue->wq_list);
@@ -14535,12 +14534,15 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t 
page_size,
        /* Set queue parameters now.  If the system cannot provide memory
         * resources, the free routine needs to know what was allocated.
         */
+       queue->page_count = pgcnt;
+       queue->q_pgs = (void **)&queue[1];
+       queue->entry_cnt_per_pg = hw_page_size / entry_size;
        queue->entry_size = entry_size;
        queue->entry_count = entry_count;
        queue->page_size = hw_page_size;
        queue->phba = phba;
 
-       for (x = 0, total_qe_count = 0; x < queue->page_count; x++) {
+       for (x = 0; x < queue->page_count; x++) {
                dmabuf = kzalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
                if (!dmabuf)
                        goto out_fail;
@@ -14553,13 +14555,8 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t 
page_size,
                }
                dmabuf->buffer_tag = x;
                list_add_tail(&dmabuf->list, &queue->page_list);
-               /* initialize queue's entry array */
-               dma_pointer = dmabuf->virt;
-               for (; total_qe_count < entry_count &&
-                    dma_pointer < (hw_page_size + dmabuf->virt);
-                    total_qe_count++, dma_pointer += entry_size) {
-                       queue->qe[total_qe_count].address = dma_pointer;
-               }
+               /* use lpfc_sli4_qe to index a paritcular entry in this page */
+               queue->q_pgs[x] = dmabuf->virt;
        }
        INIT_WORK(&queue->irqwork, lpfc_sli4_hba_process_cq);
        INIT_WORK(&queue->spwork, lpfc_sli4_sp_process_cq);
@@ -14574,6 +14571,12 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t 
page_size,
        return NULL;
 }
 
+inline void *lpfc_sli4_qe(struct lpfc_queue *q, uint16_t idx)
+{
+       return q->q_pgs[idx / q->entry_cnt_per_pg] +
+               (q->entry_size * (idx % q->entry_cnt_per_pg));
+}
+
 /**
  * lpfc_dual_chute_pci_bar_map - Map pci base address register to host memory
  * @phba: HBA structure that indicates port to create a queue on.
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 325069abc087..b86ac85b65d0 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -117,21 +117,6 @@ enum lpfc_sli4_queue_subtype {
        LPFC_USOL
 };
 
-union sli4_qe {
-       void *address;
-       struct lpfc_eqe *eqe;
-       struct lpfc_cqe *cqe;
-       struct lpfc_mcqe *mcqe;
-       struct lpfc_wcqe_complete *wcqe_complete;
-       struct lpfc_wcqe_release *wcqe_release;
-       struct sli4_wcqe_xri_aborted *wcqe_xri_aborted;
-       struct lpfc_rcqe_complete *rcqe_complete;
-       struct lpfc_mqe *mqe;
-       union  lpfc_wqe *wqe;
-       union  lpfc_wqe128 *wqe128;
-       struct lpfc_rqe *rqe;
-};
-
 /* RQ buffer list */
 struct lpfc_rqb {
        uint16_t entry_count;     /* Current number of RQ slots */
@@ -157,6 +142,7 @@ struct lpfc_queue {
        struct list_head cpu_list;
        uint32_t entry_count;   /* Number of entries to support on the queue */
        uint32_t entry_size;    /* Size of each queue entry. */
+       uint32_t entry_cnt_per_pg;
        uint32_t notify_interval; /* Queue Notification Interval
                                   * For chip->host queues (EQ, CQ, RQ):
                                   *  specifies the interval (number of
@@ -254,7 +240,7 @@ struct lpfc_queue {
        uint16_t last_cpu;      /* most recent cpu */
        uint8_t qe_valid;
        struct lpfc_queue *assoc_qp;
-       union sli4_qe qe[1];    /* array to index entries (must be last) */
+       void **q_pgs;   /* array to index entries per page */
 };
 
 struct lpfc_sli4_link {
@@ -1092,3 +1078,4 @@ int lpfc_sli4_post_status_check(struct lpfc_hba *);
 uint8_t lpfc_sli_config_mbox_subsys_get(struct lpfc_hba *, LPFC_MBOXQ_t *);
 uint8_t lpfc_sli_config_mbox_opcode_get(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_sli4_ras_dma_free(struct lpfc_hba *phba);
+inline void *lpfc_sli4_qe(struct lpfc_queue *, uint16_t);
-- 
2.13.7

Reply via email to