diff mbox series

[25/30] lpfc: Reduce memory footprint for lpfc_queue

Message ID 20190312233033.32670-26-jsmart2021@gmail.com (mailing list archive)
State Accepted
Headers show
Series lpfc updates for 12.2.0.1 | expand

Commit Message

James Smart March 12, 2019, 11:30 p.m. UTC
Currently the driver maintains a sideband structure which has a pointer
for each queue element. However, at 8bytes a pointer, and up to 4k
elements per queue, and 100's of queues, this can take up a lot of memory.

Convert the driver to using an access routine that calculates the
element address based on it's index rather than using the pointer table.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
---
 drivers/scsi/lpfc/lpfc_debugfs.c |  4 +--
 drivers/scsi/lpfc/lpfc_debugfs.h |  2 +-
 drivers/scsi/lpfc/lpfc_sli.c     | 55 +++++++++++++++++++++-------------------
 drivers/scsi/lpfc/lpfc_sli4.h    | 19 +++-----------
 4 files changed, 35 insertions(+), 45 deletions(-)

Comments

James Bottomley March 20, 2019, 1:02 a.m. UTC | #1
On Tue, 2019-03-12 at 16:30 -0700, James Smart wrote:
> Currently the driver maintains a sideband structure which has a
> pointer for each queue element. However, at 8bytes a pointer, and up
> to 4k elements per queue, and 100's of queues, this can take up a lot
> of memory.
> 
> Convert the driver to using an access routine that calculates the
> element address based on it's index rather than using the pointer
> table.

We're getting a failure from the ppc builds according to linux-next:

n file included from drivers/scsi/lpfc/lpfc_debugfs.c:46:
drivers/scsi/lpfc/lpfc_debugfs.c: In function 'lpfc_idiag_queacc_write':
drivers/scsi/lpfc/lpfc_sli4.h:1083:14: error: inlining failed in call to always_inline 'lpfc_sli4_qe': function body not available
 inline void *lpfc_sli4_qe(struct lpfc_queue *, uint16_t);
              ^~~~~~~~~~~~
drivers/scsi/lpfc/lpfc_debugfs.c:4488:12: note: called from here
   pentry = lpfc_sli4_qe(pque, index);
            ^~~~~~~~~~~~~~~~~~~~~~~~~
In file included from drivers/scsi/lpfc/lpfc_debugfs.c:46:
drivers/scsi/lpfc/lpfc_sli4.h:1083:14: error: inlining failed in call to always_inline 'lpfc_sli4_qe': function body not available
 inline void *lpfc_sli4_qe(struct lpfc_queue *, uint16_t);
              ^~~~~~~~~~~~
drivers/scsi/lpfc/lpfc_debugfs.c:4488:12: note: called from here
   pentry = lpfc_sli4_qe(pque, index);
            ^~~~~~~~~~~~~~~~~~~~~~~~~

You can't declare a function inline in a header if it doesn't have a
body available to the compiler.  So realistically you either don't
declare it inline or you make it a static inline in the header.  I
think the latter applies in this case, so this should be the fix

James

---

diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 6fc9ef888813..d6ea0c473ed7 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -14574,12 +14574,6 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size,
 	return NULL;
 }
 
-inline void *lpfc_sli4_qe(struct lpfc_queue *q, uint16_t idx)
-{
-	return q->q_pgs[idx / q->entry_cnt_per_pg] +
-		(q->entry_size * (idx % q->entry_cnt_per_pg));
-}
-
 /**
  * lpfc_dual_chute_pci_bar_map - Map pci base address register to host memory
  * @phba: HBA structure that indicates port to create a queue on.
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index bd5b5c3de35e..20bc6d3d0653 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -1080,4 +1080,8 @@ int lpfc_sli4_post_status_check(struct lpfc_hba *);
 uint8_t lpfc_sli_config_mbox_subsys_get(struct lpfc_hba *, LPFC_MBOXQ_t *);
 uint8_t lpfc_sli_config_mbox_opcode_get(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_sli4_ras_dma_free(struct lpfc_hba *phba);
-inline void *lpfc_sli4_qe(struct lpfc_queue *, uint16_t);
+static inline void *lpfc_sli4_qe(struct lpfc_queue *q, uint16_t idx)
+{
+	return q->q_pgs[idx / q->entry_cnt_per_pg] +
+		(q->entry_size * (idx % q->entry_cnt_per_pg));
+}
James Smart March 20, 2019, 3:35 a.m. UTC | #2
> On Mar 19, 2019, at 6:02 PM, James Bottomley <James.Bottomley@HansenPartnership.com> wrote:
> 
> On Tue, 2019-03-12 at 16:30 -0700, James Smart wrote:
>> Currently the driver maintains a sideband structure which has a
>> pointer for each queue element. However, at 8bytes a pointer, and up
>> to 4k elements per queue, and 100's of queues, this can take up a lot
>> of memory.
>> 
>> Convert the driver to using an access routine that calculates the
>> element address based on it's index rather than using the pointer
>> table.
> 
> We're getting a failure from the ppc builds according to linux-next:
> 
> n file included from drivers/scsi/lpfc/lpfc_debugfs.c:46:
> drivers/scsi/lpfc/lpfc_debugfs.c: In function 'lpfc_idiag_queacc_write':
> drivers/scsi/lpfc/lpfc_sli4.h:1083:14: error: inlining failed in call to always_inline 'lpfc_sli4_qe': function body not available
> inline void *lpfc_sli4_qe(struct lpfc_queue *, uint16_t);
>              ^~~~~~~~~~~~
> drivers/scsi/lpfc/lpfc_debugfs.c:4488:12: note: called from here
>   pentry = lpfc_sli4_qe(pque, index);
>            ^~~~~~~~~~~~~~~~~~~~~~~~~
> In file included from drivers/scsi/lpfc/lpfc_debugfs.c:46:
> drivers/scsi/lpfc/lpfc_sli4.h:1083:14: error: inlining failed in call to always_inline 'lpfc_sli4_qe': function body not available
> inline void *lpfc_sli4_qe(struct lpfc_queue *, uint16_t);
>              ^~~~~~~~~~~~
> drivers/scsi/lpfc/lpfc_debugfs.c:4488:12: note: called from here
>   pentry = lpfc_sli4_qe(pque, index);
>            ^~~~~~~~~~~~~~~~~~~~~~~~~
> 
> You can't declare a function inline in a header if it doesn't have a
> body available to the compiler.  So realistically you either don't
> declare it inline or you make it a static inline in the header.  I
> think the latter applies in this case, so this should be the fix
> 
> James
> 
> ---
> 
> diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
> index 6fc9ef888813..d6ea0c473ed7 100644
> --- a/drivers/scsi/lpfc/lpfc_sli.c
> +++ b/drivers/scsi/lpfc/lpfc_sli.c
> @@ -14574,12 +14574,6 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size,
> 	return NULL;
> }
> 
> -inline void *lpfc_sli4_qe(struct lpfc_queue *q, uint16_t idx)
> -{
> -	return q->q_pgs[idx / q->entry_cnt_per_pg] +
> -		(q->entry_size * (idx % q->entry_cnt_per_pg));
> -}
> -
> /**
>  * lpfc_dual_chute_pci_bar_map - Map pci base address register to host memory
>  * @phba: HBA structure that indicates port to create a queue on.
> diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
> index bd5b5c3de35e..20bc6d3d0653 100644
> --- a/drivers/scsi/lpfc/lpfc_sli4.h
> +++ b/drivers/scsi/lpfc/lpfc_sli4.h
> @@ -1080,4 +1080,8 @@ int lpfc_sli4_post_status_check(struct lpfc_hba *);
> uint8_t lpfc_sli_config_mbox_subsys_get(struct lpfc_hba *, LPFC_MBOXQ_t *);
> uint8_t lpfc_sli_config_mbox_opcode_get(struct lpfc_hba *, LPFC_MBOXQ_t *);
> void lpfc_sli4_ras_dma_free(struct lpfc_hba *phba);
> -inline void *lpfc_sli4_qe(struct lpfc_queue *, uint16_t);
> +static inline void *lpfc_sli4_qe(struct lpfc_queue *q, uint16_t idx)
> +{
> +	return q->q_pgs[idx / q->entry_cnt_per_pg] +
> +		(q->entry_size * (idx % q->entry_cnt_per_pg));
> +}


Agree. Thank You James.

— james

Reviewed-by:  James Smart  <james.smart@broadcom.com>
James Bottomley March 20, 2019, 3:16 p.m. UTC | #3
On Tue, 2019-03-19 at 20:35 -0700, James Smart wrote:
> > On Mar 19, 2019, at 6:02 PM, James Bottomley <James.Bottomley@Hanse
> > nPartnership.com> wrote:
> > 
> > On Tue, 2019-03-12 at 16:30 -0700, James Smart wrote:
> > > Currently the driver maintains a sideband structure which has a
> > > pointer for each queue element. However, at 8bytes a pointer, and
> > > up
> > > to 4k elements per queue, and 100's of queues, this can take up a
> > > lot
> > > of memory.
> > > 
> > > Convert the driver to using an access routine that calculates the
> > > element address based on it's index rather than using the pointer
> > > table.
> > 
> > We're getting a failure from the ppc builds according to linux-
> > next:
> > 
> > n file included from drivers/scsi/lpfc/lpfc_debugfs.c:46:
> > drivers/scsi/lpfc/lpfc_debugfs.c: In function
> > 'lpfc_idiag_queacc_write':
> > drivers/scsi/lpfc/lpfc_sli4.h:1083:14: error: inlining failed in
> > call to always_inline 'lpfc_sli4_qe': function body not available
> > inline void *lpfc_sli4_qe(struct lpfc_queue *, uint16_t);
> >              ^~~~~~~~~~~~
> > drivers/scsi/lpfc/lpfc_debugfs.c:4488:12: note: called from here
> >   pentry = lpfc_sli4_qe(pque, index);
> >            ^~~~~~~~~~~~~~~~~~~~~~~~~
> > In file included from drivers/scsi/lpfc/lpfc_debugfs.c:46:
> > drivers/scsi/lpfc/lpfc_sli4.h:1083:14: error: inlining failed in
> > call to always_inline 'lpfc_sli4_qe': function body not available
> > inline void *lpfc_sli4_qe(struct lpfc_queue *, uint16_t);
> >              ^~~~~~~~~~~~
> > drivers/scsi/lpfc/lpfc_debugfs.c:4488:12: note: called from here
> >   pentry = lpfc_sli4_qe(pque, index);
> >            ^~~~~~~~~~~~~~~~~~~~~~~~~
> > 
> > You can't declare a function inline in a header if it doesn't have
> > a
> > body available to the compiler.  So realistically you either don't
> > declare it inline or you make it a static inline in the header.  I
> > think the latter applies in this case, so this should be the fix
> > 
> > James
> > 
> > ---
> > 
> > diff --git a/drivers/scsi/lpfc/lpfc_sli.c
> > b/drivers/scsi/lpfc/lpfc_sli.c
> > index 6fc9ef888813..d6ea0c473ed7 100644
> > --- a/drivers/scsi/lpfc/lpfc_sli.c
> > +++ b/drivers/scsi/lpfc/lpfc_sli.c
> > @@ -14574,12 +14574,6 @@ lpfc_sli4_queue_alloc(struct lpfc_hba
> > *phba, uint32_t page_size,
> > 	return NULL;
> > }
> > 
> > -inline void *lpfc_sli4_qe(struct lpfc_queue *q, uint16_t idx)
> > -{
> > -	return q->q_pgs[idx / q->entry_cnt_per_pg] +
> > -		(q->entry_size * (idx % q->entry_cnt_per_pg));
> > -}
> > -
> > /**
> >  * lpfc_dual_chute_pci_bar_map - Map pci base address register to
> > host memory
> >  * @phba: HBA structure that indicates port to create a queue on.
> > diff --git a/drivers/scsi/lpfc/lpfc_sli4.h
> > b/drivers/scsi/lpfc/lpfc_sli4.h
> > index bd5b5c3de35e..20bc6d3d0653 100644
> > --- a/drivers/scsi/lpfc/lpfc_sli4.h
> > +++ b/drivers/scsi/lpfc/lpfc_sli4.h
> > @@ -1080,4 +1080,8 @@ int lpfc_sli4_post_status_check(struct
> > lpfc_hba *);
> > uint8_t lpfc_sli_config_mbox_subsys_get(struct lpfc_hba *,
> > LPFC_MBOXQ_t *);
> > uint8_t lpfc_sli_config_mbox_opcode_get(struct lpfc_hba *,
> > LPFC_MBOXQ_t *);
> > void lpfc_sli4_ras_dma_free(struct lpfc_hba *phba);
> > -inline void *lpfc_sli4_qe(struct lpfc_queue *, uint16_t);
> > +static inline void *lpfc_sli4_qe(struct lpfc_queue *q, uint16_t
> > idx)
> > +{
> > +	return q->q_pgs[idx / q->entry_cnt_per_pg] +
> > +		(q->entry_size * (idx % q->entry_cnt_per_pg));
> > +}
> 
> 
> Agree. Thank You James.

So there are a couple of other instances you can fix at your leisure:
they're not causing immediate linux-next problems because the body of
they're only apparently used within one file so the body is available,
but if the use expands we'll get the same problem:

lpfc_sli4.h:inline void lpfc_sli4_eq_clr_intr(struct lpfc_queue *);
lpfc_sli4.h:inline void lpfc_sli4_if6_eq_clr_intr(struct lpfc_queue *q);

James
Martin K. Petersen March 20, 2019, 3:21 p.m. UTC | #4
James,

> So there are a couple of other instances you can fix at your leisure:
> they're not causing immediate linux-next problems because the body of
> they're only apparently used within one file so the body is available,
> but if the use expands we'll get the same problem:
>
> lpfc_sli4.h:inline void lpfc_sli4_eq_clr_intr(struct lpfc_queue *);
> lpfc_sli4.h:inline void lpfc_sli4_if6_eq_clr_intr(struct lpfc_queue *q);

Let's just get all these fixed up. James S: Please send me a patch ASAP.
James Smart March 20, 2019, 5:44 p.m. UTC | #5
On 3/20/2019 8:21 AM, Martin K. Petersen wrote:
> 
> James,
> 
>> So there are a couple of other instances you can fix at your leisure:
>> they're not causing immediate linux-next problems because the body of
>> they're only apparently used within one file so the body is available,
>> but if the use expands we'll get the same problem:
>>
>> lpfc_sli4.h:inline void lpfc_sli4_eq_clr_intr(struct lpfc_queue *);
>> lpfc_sli4.h:inline void lpfc_sli4_if6_eq_clr_intr(struct lpfc_queue *q);
> 
> Let's just get all these fixed up. James S: Please send me a patch ASAP.
> 

done

-- james
diff mbox series

Patch

diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 1215eaa530db..cdf26eb02225 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -4135,7 +4135,7 @@  lpfc_idiag_queacc_read_qe(char *pbuffer, int len, struct lpfc_queue *pque,
 			"QE-INDEX[%04d]:\n", index);
 
 	offset = 0;
-	pentry = pque->qe[index].address;
+	pentry = lpfc_sli4_qe(pque, index);
 	while (esize > 0) {
 		len += snprintf(pbuffer+len, LPFC_QUE_ACC_BUF_SIZE-len,
 				"%08x ", *pentry);
@@ -4485,7 +4485,7 @@  lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
 		pque = (struct lpfc_queue *)idiag.ptr_private;
 		if (offset > pque->entry_size/sizeof(uint32_t) - 1)
 			goto error_out;
-		pentry = pque->qe[index].address;
+		pentry = lpfc_sli4_qe(pque, index);
 		pentry += offset;
 		if (idiag.cmd.opcode == LPFC_IDIAG_CMD_QUEACC_WR)
 			*pentry = value;
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h
index 93ab7dfb8ee0..e42c1fac72cf 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.h
+++ b/drivers/scsi/lpfc/lpfc_debugfs.h
@@ -345,7 +345,7 @@  lpfc_debug_dump_qe(struct lpfc_queue *q, uint32_t idx)
 
 	esize = q->entry_size;
 	qe_word_cnt = esize / sizeof(uint32_t);
-	pword = q->qe[idx].address;
+	pword = lpfc_sli4_qe(q, idx);
 
 	len = 0;
 	len += snprintf(line_buf+len, LPFC_LBUF_SZ-len, "QE[%04d]: ", idx);
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 2b130f7bcc97..5b630643d950 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -151,7 +151,7 @@  lpfc_sli4_wq_put(struct lpfc_queue *q, union lpfc_wqe128 *wqe)
 	/* sanity check on queue memory */
 	if (unlikely(!q))
 		return -ENOMEM;
-	temp_wqe = q->qe[q->host_index].wqe;
+	temp_wqe = lpfc_sli4_qe(q, q->host_index);
 
 	/* If the host has not yet processed the next entry then we are done */
 	idx = ((q->host_index + 1) % q->entry_count);
@@ -271,7 +271,7 @@  lpfc_sli4_mq_put(struct lpfc_queue *q, struct lpfc_mqe *mqe)
 	/* sanity check on queue memory */
 	if (unlikely(!q))
 		return -ENOMEM;
-	temp_mqe = q->qe[q->host_index].mqe;
+	temp_mqe = lpfc_sli4_qe(q, q->host_index);
 
 	/* If the host has not yet processed the next entry then we are done */
 	if (((q->host_index + 1) % q->entry_count) == q->hba_index)
@@ -331,7 +331,7 @@  lpfc_sli4_eq_get(struct lpfc_queue *q)
 	/* sanity check on queue memory */
 	if (unlikely(!q))
 		return NULL;
-	eqe = q->qe[q->host_index].eqe;
+	eqe = lpfc_sli4_qe(q, q->host_index);
 
 	/* If the next EQE is not valid then we are done */
 	if (bf_get_le32(lpfc_eqe_valid, eqe) != q->qe_valid)
@@ -545,7 +545,7 @@  lpfc_sli4_cq_get(struct lpfc_queue *q)
 	/* sanity check on queue memory */
 	if (unlikely(!q))
 		return NULL;
-	cqe = q->qe[q->host_index].cqe;
+	cqe = lpfc_sli4_qe(q, q->host_index);
 
 	/* If the next CQE is not valid then we are done */
 	if (bf_get_le32(lpfc_cqe_valid, cqe) != q->qe_valid)
@@ -667,8 +667,8 @@  lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq,
 		return -ENOMEM;
 	hq_put_index = hq->host_index;
 	dq_put_index = dq->host_index;
-	temp_hrqe = hq->qe[hq_put_index].rqe;
-	temp_drqe = dq->qe[dq_put_index].rqe;
+	temp_hrqe = lpfc_sli4_qe(hq, hq_put_index);
+	temp_drqe = lpfc_sli4_qe(dq, dq_put_index);
 
 	if (hq->type != LPFC_HRQ || dq->type != LPFC_DRQ)
 		return -EINVAL;
@@ -7879,8 +7879,9 @@  lpfc_sli4_mbox_completions_pending(struct lpfc_hba *phba)
 	mcq = phba->sli4_hba.mbx_cq;
 	idx = mcq->hba_index;
 	qe_valid = mcq->qe_valid;
-	while (bf_get_le32(lpfc_cqe_valid, mcq->qe[idx].cqe) == qe_valid) {
-		mcqe = (struct lpfc_mcqe *)mcq->qe[idx].cqe;
+	while (bf_get_le32(lpfc_cqe_valid,
+	       (struct lpfc_cqe *)lpfc_sli4_qe(mcq, idx)) == qe_valid) {
+		mcqe = (struct lpfc_mcqe *)(lpfc_sli4_qe(mcq, idx));
 		if (bf_get_le32(lpfc_trailer_completed, mcqe) &&
 		    (!bf_get_le32(lpfc_trailer_async, mcqe))) {
 			pending_completions = true;
@@ -14506,24 +14507,22 @@  lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size,
 {
 	struct lpfc_queue *queue;
 	struct lpfc_dmabuf *dmabuf;
-	int x, total_qe_count;
-	void *dma_pointer;
 	uint32_t hw_page_size = phba->sli4_hba.pc_sli4_params.if_page_sz;
+	uint16_t x, pgcnt;
 
 	if (!phba->sli4_hba.pc_sli4_params.supported)
 		hw_page_size = page_size;
 
+	pgcnt = ALIGN(entry_size * entry_count, hw_page_size) / hw_page_size;
+
+	/* If needed, Adjust page count to match the max the adapter supports */
+	if (pgcnt > phba->sli4_hba.pc_sli4_params.wqpcnt)
+		pgcnt = phba->sli4_hba.pc_sli4_params.wqpcnt;
+
 	queue = kzalloc(sizeof(struct lpfc_queue) +
-			(sizeof(union sli4_qe) * entry_count), GFP_KERNEL);
+			(sizeof(void *) * pgcnt), GFP_KERNEL);
 	if (!queue)
 		return NULL;
-	queue->page_count = (ALIGN(entry_size * entry_count,
-			hw_page_size))/hw_page_size;
-
-	/* If needed, Adjust page count to match the max the adapter supports */
-	if (phba->sli4_hba.pc_sli4_params.wqpcnt &&
-	    (queue->page_count > phba->sli4_hba.pc_sli4_params.wqpcnt))
-		queue->page_count = phba->sli4_hba.pc_sli4_params.wqpcnt;
 
 	INIT_LIST_HEAD(&queue->list);
 	INIT_LIST_HEAD(&queue->wq_list);
@@ -14535,12 +14534,15 @@  lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size,
 	/* Set queue parameters now.  If the system cannot provide memory
 	 * resources, the free routine needs to know what was allocated.
 	 */
+	queue->page_count = pgcnt;
+	queue->q_pgs = (void **)&queue[1];
+	queue->entry_cnt_per_pg = hw_page_size / entry_size;
 	queue->entry_size = entry_size;
 	queue->entry_count = entry_count;
 	queue->page_size = hw_page_size;
 	queue->phba = phba;
 
-	for (x = 0, total_qe_count = 0; x < queue->page_count; x++) {
+	for (x = 0; x < queue->page_count; x++) {
 		dmabuf = kzalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
 		if (!dmabuf)
 			goto out_fail;
@@ -14553,13 +14555,8 @@  lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size,
 		}
 		dmabuf->buffer_tag = x;
 		list_add_tail(&dmabuf->list, &queue->page_list);
-		/* initialize queue's entry array */
-		dma_pointer = dmabuf->virt;
-		for (; total_qe_count < entry_count &&
-		     dma_pointer < (hw_page_size + dmabuf->virt);
-		     total_qe_count++, dma_pointer += entry_size) {
-			queue->qe[total_qe_count].address = dma_pointer;
-		}
+		/* use lpfc_sli4_qe to index a paritcular entry in this page */
+		queue->q_pgs[x] = dmabuf->virt;
 	}
 	INIT_WORK(&queue->irqwork, lpfc_sli4_hba_process_cq);
 	INIT_WORK(&queue->spwork, lpfc_sli4_sp_process_cq);
@@ -14574,6 +14571,12 @@  lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size,
 	return NULL;
 }
 
+inline void *lpfc_sli4_qe(struct lpfc_queue *q, uint16_t idx)
+{
+	return q->q_pgs[idx / q->entry_cnt_per_pg] +
+		(q->entry_size * (idx % q->entry_cnt_per_pg));
+}
+
 /**
  * lpfc_dual_chute_pci_bar_map - Map pci base address register to host memory
  * @phba: HBA structure that indicates port to create a queue on.
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 325069abc087..b86ac85b65d0 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -117,21 +117,6 @@  enum lpfc_sli4_queue_subtype {
 	LPFC_USOL
 };
 
-union sli4_qe {
-	void *address;
-	struct lpfc_eqe *eqe;
-	struct lpfc_cqe *cqe;
-	struct lpfc_mcqe *mcqe;
-	struct lpfc_wcqe_complete *wcqe_complete;
-	struct lpfc_wcqe_release *wcqe_release;
-	struct sli4_wcqe_xri_aborted *wcqe_xri_aborted;
-	struct lpfc_rcqe_complete *rcqe_complete;
-	struct lpfc_mqe *mqe;
-	union  lpfc_wqe *wqe;
-	union  lpfc_wqe128 *wqe128;
-	struct lpfc_rqe *rqe;
-};
-
 /* RQ buffer list */
 struct lpfc_rqb {
 	uint16_t entry_count;	  /* Current number of RQ slots */
@@ -157,6 +142,7 @@  struct lpfc_queue {
 	struct list_head cpu_list;
 	uint32_t entry_count;	/* Number of entries to support on the queue */
 	uint32_t entry_size;	/* Size of each queue entry. */
+	uint32_t entry_cnt_per_pg;
 	uint32_t notify_interval; /* Queue Notification Interval
 				   * For chip->host queues (EQ, CQ, RQ):
 				   *  specifies the interval (number of
@@ -254,7 +240,7 @@  struct lpfc_queue {
 	uint16_t last_cpu;	/* most recent cpu */
 	uint8_t	qe_valid;
 	struct lpfc_queue *assoc_qp;
-	union sli4_qe qe[1];	/* array to index entries (must be last) */
+	void **q_pgs;	/* array to index entries per page */
 };
 
 struct lpfc_sli4_link {
@@ -1092,3 +1078,4 @@  int lpfc_sli4_post_status_check(struct lpfc_hba *);
 uint8_t lpfc_sli_config_mbox_subsys_get(struct lpfc_hba *, LPFC_MBOXQ_t *);
 uint8_t lpfc_sli_config_mbox_opcode_get(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_sli4_ras_dma_free(struct lpfc_hba *phba);
+inline void *lpfc_sli4_qe(struct lpfc_queue *, uint16_t);