diff mbox

[v3,04/13] lpfc: Add push-to-adapter support to sli4

Message ID 20180213193457.21206-5-jsmart2021@gmail.com (mailing list archive)
State Changes Requested
Headers show

Commit Message

James Smart Feb. 13, 2018, 7:34 p.m. UTC
New if_type=6 adapters support an additional BAR that provides
apertures to allow direct WQE to adapter push support - termed
Direct Packet Push (DPP). WQ creation differs slightly to ask for
a WQ to be DPP-ized. When submitting a WQE to a DPP WQ, it is
submitted to the host memory for the WQ normally, but is also
written by the host cpu directly to a BAR aperture.  Write buffer
coalescing in hardware is (hopefully) turned on, enabling single
pci write operation support. The doorbell is thing rung to indicate
the WQE is available and was pushed to the aperture.

This patch:
- Updates the WQ Create commands for the DPP options
- Adds the bar mapping for if_type=6 DPP bar
- Adds the WQE pushing to the DDP aperture received from WQ create
- Adds a new module parameter to disable DPP operation if desired.
  Default is enabled.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>

---
v3:
  remove unnecessary parens
  use ioremap_wc() instead of set_memory_wc(). the wc property
    is now set by default on the BAR. if direct push is disabled,
    the BAR won't be used so it won't matter what is set on it.
    Track cases where the ioremap_wc() may not succeed, leaving
    bar pointer NULL. In this case, disable direct push.
  As some platforms will honor ioremap_wc() but not truly enable
    wc, change default for direct push so enabled only on X86.
---
 drivers/scsi/lpfc/lpfc.h      |   3 +-
 drivers/scsi/lpfc/lpfc_attr.c |  14 +++
 drivers/scsi/lpfc/lpfc_hw4.h  |  31 ++++++
 drivers/scsi/lpfc/lpfc_init.c |  17 ++++
 drivers/scsi/lpfc/lpfc_sli.c  | 218 ++++++++++++++++++++++++++----------------
 drivers/scsi/lpfc/lpfc_sli4.h |  16 +++-
 6 files changed, 212 insertions(+), 87 deletions(-)

Comments

Johannes Thumshirn Feb. 14, 2018, 9:30 a.m. UTC | #1
On Tue, Feb 13, 2018 at 11:34:48AM -0800, James Smart wrote:
[...]
> diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
> index 3bff1f9c5df7..5e03b2c969e5 100644
> --- a/drivers/scsi/lpfc/lpfc_sli.c
> +++ b/drivers/scsi/lpfc/lpfc_sli.c
> @@ -35,6 +35,9 @@
>  #include <scsi/scsi_transport_fc.h>
>  #include <scsi/fc/fc_fs.h>
>  #include <linux/aer.h>
> +#ifdef CONFIG_X86
> +#include <asm/set_memory.h>
> +#endif

Not needed anymore now you've killed set_memory_wc(), isn't it?

[...]

> +	if (q->dpp_enable && q->phba->cfg_enable_dpp) {
> +		/* write to DPP aperture taking advatage of Combined Writes */
> +		tmp = (uint8_t *)wqe;
> +#ifdef CONFIG_64BIT
> +		for (i = 0; i < q->entry_size; i += sizeof(uint64_t))
> +			writeq(*((uint64_t *)(tmp + i)), q->dpp_regaddr + i);
> +#else
> +		for (i = 0; i < q->entry_size; i += sizeof(uint32_t))
> +			writel(*((uint32_t *)(tmp + i)), q->dpp_regaddr + i);
> +#endif
> +	}
> +	/* ensure WQE bcopy and DPP flushed before doorbell write */

Any reason you can't use writeq() on 32 Bit as well? There's a compat version
in linux/io-64-nonatomic-hi-lo.h.

Thanks,
	Johannes
James Smart Feb. 16, 2018, 4:53 p.m. UTC | #2
On 2/14/2018 1:30 AM, Johannes Thumshirn wrote:
> On Tue, Feb 13, 2018 at 11:34:48AM -0800, James Smart wrote:
> [...]
>> diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
>> index 3bff1f9c5df7..5e03b2c969e5 100644
>> --- a/drivers/scsi/lpfc/lpfc_sli.c
>> +++ b/drivers/scsi/lpfc/lpfc_sli.c
>> @@ -35,6 +35,9 @@
>>   #include <scsi/scsi_transport_fc.h>
>>   #include <scsi/fc/fc_fs.h>
>>   #include <linux/aer.h>
>> +#ifdef CONFIG_X86
>> +#include <asm/set_memory.h>
>> +#endif
> 
> Not needed anymore now you've killed set_memory_wc(), isn't it?

Agree... but, we've done more timing and it turns out the ioremap_wc() 
on X86 isn't behaving quite the same as set_memory_wc().  Works, but 
it's actually slower. I think ioremap_wc() is additionally making it 
cacheable, which seems to be delaying the postings to the io bus (even 
if wc) until the memory barrier. While the set_memory_wc() seems to 
flush as soon as the cacheline is filled.

Given everything we've seen so far - I'm going back to using 
set_memory_wc() as it's the fastest latency option we've measured.


> 
> [...]
> 
>> +	if (q->dpp_enable && q->phba->cfg_enable_dpp) {
>> +		/* write to DPP aperture taking advatage of Combined Writes */
>> +		tmp = (uint8_t *)wqe;
>> +#ifdef CONFIG_64BIT
>> +		for (i = 0; i < q->entry_size; i += sizeof(uint64_t))
>> +			writeq(*((uint64_t *)(tmp + i)), q->dpp_regaddr + i);
>> +#else
>> +		for (i = 0; i < q->entry_size; i += sizeof(uint32_t))
>> +			writel(*((uint32_t *)(tmp + i)), q->dpp_regaddr + i);
>> +#endif
>> +	}
>> +	/* ensure WQE bcopy and DPP flushed before doorbell write */
> 
> Any reason you can't use writeq() on 32 Bit as well? There's a compat version
> in linux/io-64-nonatomic-hi-lo.h.

We actually ran into issues on the existence of writeq() on a 32bit 
platform. Thus this code block.

-- james
Johannes Thumshirn Feb. 19, 2018, 8:14 a.m. UTC | #3
On Fri, Feb 16, 2018 at 08:53:44AM -0800, James Smart wrote:
> > Any reason you can't use writeq() on 32 Bit as well? There's a compat version
> > in linux/io-64-nonatomic-hi-lo.h.
> 
> We actually ran into issues on the existence of writeq() on a 32bit
> platform. Thus this code block.
 
Oh can you elaborate more on the issue? I bet if we merge it that way, someone
comes around with a patch chaning it to writeq() on 32Bit as well.

Wouldn't it be better to improve the 32Bit writeq() code?

Generally speaking (same for the WC issue), ifdefs (especially architecture
specific ones) in driver code should be avoided.

Thanks,
	Johannes
James Smart Feb. 20, 2018, 6:25 p.m. UTC | #4
On 2/19/2018 12:14 AM, Johannes Thumshirn wrote:
> On Fri, Feb 16, 2018 at 08:53:44AM -0800, James Smart wrote:
>>> Any reason you can't use writeq() on 32 Bit as well? There's a compat version
>>> in linux/io-64-nonatomic-hi-lo.h.
>>
>> We actually ran into issues on the existence of writeq() on a 32bit
>> platform. Thus this code block.
>   
> Oh can you elaborate more on the issue? I bet if we merge it that way, someone
> comes around with a patch chaning it to writeq() on 32Bit as well.
> 
> Wouldn't it be better to improve the 32Bit writeq() code?

Well, now that I'm asking for specific details internally, I'm finding 
that no one can find the failing machine any more.

I'm going to keep looking (and testing) for another day or two, and if 
nothing pops up, will repost removing the 64bit define.


> 
> Generally speaking (same for the WC issue), ifdefs (especially architecture
> specific ones) in driver code should be avoided.

Yes - I know. On the WC issue though, given how tightly bound the 
behavior is with the platform as well as whether it provides a real 
benefit vs a simple "it works", I don't believe this is one that I want 
to be "generic" on.

-- james
Johannes Thumshirn Feb. 21, 2018, 6:56 a.m. UTC | #5
On Tue, Feb 20, 2018 at 10:25:56AM -0800, James Smart wrote:
> > Wouldn't it be better to improve the 32Bit writeq() code?
> 
> Well, now that I'm asking for specific details internally, I'm finding that
> no one can find the failing machine any more.
> 
> I'm going to keep looking (and testing) for another day or two, and if
> nothing pops up, will repost removing the 64bit define.

Thanks :-)

> Yes - I know. On the WC issue though, given how tightly bound the behavior
> is with the platform as well as whether it provides a real benefit vs a
> simple "it works", I don't believe this is one that I want to be "generic"
> on.

Hmmm OK, but this won't make out ARM and PPC teams very happy.

Well let's see,
     Johannes
James Smart Feb. 21, 2018, 3:41 p.m. UTC | #6
On 2/20/2018 10:56 PM, Johannes Thumshirn wrote:
>> Yes - I know. On the WC issue though, given how tightly bound the behavior
>> is with the platform as well as whether it provides a real benefit vs a
>> simple "it works", I don't believe this is one that I want to be "generic"
>> on.
> 
> Hmmm OK, but this won't make out ARM and PPC teams very happy.

They are free to contact me, suggest something different, we'll 
benchmark and and will certainly change is there's a benefit.  That's
what we did on PPC at the start of this thread.

-- james
diff mbox

Patch

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 9698b9635058..86ffb9756e65 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -840,7 +840,8 @@  struct lpfc_hba {
 	uint32_t cfg_enable_SmartSAN;
 	uint32_t cfg_enable_mds_diags;
 	uint32_t cfg_enable_fc4_type;
-	uint32_t cfg_enable_bbcr;	/*Enable BB Credit Recovery*/
+	uint32_t cfg_enable_bbcr;	/* Enable BB Credit Recovery */
+	uint32_t cfg_enable_dpp;	/* Enable Direct Packet Push */
 	uint32_t cfg_xri_split;
 #define LPFC_ENABLE_FCP  1
 #define LPFC_ENABLE_NVME 2
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 7be4bdef4d42..e90d5066f66b 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -5186,6 +5186,18 @@  LPFC_ATTR_R(enable_mds_diags, 0, 0, 1, "Enable MDS Diagnostics");
  */
 LPFC_BBCR_ATTR_RW(enable_bbcr, 1, 0, 1, "Enable BBC Recovery");
 
+/*
+ * lpfc_enable_dpp: Enable DPP on G7
+ *       0  = DPP on G7 disabled
+ *       1  = DPP on G7 enabled (default)
+ * Value range is [0,1]. Default value is 1 on X86, 0 on other architectures.
+ */
+#ifdef CONFIG_X86
+LPFC_ATTR_RW(enable_dpp, 1, 0, 1, "Enable Direct Packet Push");
+#else
+LPFC_ATTR_RW(enable_dpp, 0, 0, 1, "Enable Direct Packet Push");
+#endif
+
 struct device_attribute *lpfc_hba_attrs[] = {
 	&dev_attr_nvme_info,
 	&dev_attr_bg_info,
@@ -5294,6 +5306,7 @@  struct device_attribute *lpfc_hba_attrs[] = {
 	&dev_attr_lpfc_xlane_supported,
 	&dev_attr_lpfc_enable_mds_diags,
 	&dev_attr_lpfc_enable_bbcr,
+	&dev_attr_lpfc_enable_dpp,
 	NULL,
 };
 
@@ -6306,6 +6319,7 @@  lpfc_get_cfgparam(struct lpfc_hba *phba)
 	lpfc_fcp_io_channel_init(phba, lpfc_fcp_io_channel);
 	lpfc_nvme_io_channel_init(phba, lpfc_nvme_io_channel);
 	lpfc_enable_bbcr_init(phba, lpfc_enable_bbcr);
+	lpfc_enable_dpp_init(phba, lpfc_enable_dpp);
 
 	if (phba->sli_rev != LPFC_SLI_REV4) {
 		/* NVME only supported on SLI4 */
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index 93fd9fd10a0f..60ccff6fa8b0 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -1372,6 +1372,15 @@  struct lpfc_mbx_wq_create {
 #define lpfc_mbx_wq_create_page_size_MASK	0x000000FF
 #define lpfc_mbx_wq_create_page_size_WORD	word1
 #define LPFC_WQ_PAGE_SIZE_4096	0x1
+#define lpfc_mbx_wq_create_dpp_req_SHIFT	15
+#define lpfc_mbx_wq_create_dpp_req_MASK		0x00000001
+#define lpfc_mbx_wq_create_dpp_req_WORD		word1
+#define lpfc_mbx_wq_create_doe_SHIFT		14
+#define lpfc_mbx_wq_create_doe_MASK		0x00000001
+#define lpfc_mbx_wq_create_doe_WORD		word1
+#define lpfc_mbx_wq_create_toe_SHIFT		13
+#define lpfc_mbx_wq_create_toe_MASK		0x00000001
+#define lpfc_mbx_wq_create_toe_WORD		word1
 #define lpfc_mbx_wq_create_wqe_size_SHIFT	8
 #define lpfc_mbx_wq_create_wqe_size_MASK	0x0000000F
 #define lpfc_mbx_wq_create_wqe_size_WORD	word1
@@ -1400,6 +1409,28 @@  struct lpfc_mbx_wq_create {
 #define lpfc_mbx_wq_create_db_format_MASK	0x0000FFFF
 #define lpfc_mbx_wq_create_db_format_WORD	word2
 		} response;
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_wq_create_dpp_rsp_SHIFT	31
+#define lpfc_mbx_wq_create_dpp_rsp_MASK		0x00000001
+#define lpfc_mbx_wq_create_dpp_rsp_WORD		word0
+#define lpfc_mbx_wq_create_v1_q_id_SHIFT	0
+#define lpfc_mbx_wq_create_v1_q_id_MASK		0x0000FFFF
+#define lpfc_mbx_wq_create_v1_q_id_WORD		word0
+			uint32_t word1;
+#define lpfc_mbx_wq_create_v1_bar_set_SHIFT	0
+#define lpfc_mbx_wq_create_v1_bar_set_MASK	0x0000000F
+#define lpfc_mbx_wq_create_v1_bar_set_WORD	word1
+			uint32_t doorbell_offset;
+			uint32_t word3;
+#define lpfc_mbx_wq_create_dpp_id_SHIFT		16
+#define lpfc_mbx_wq_create_dpp_id_MASK		0x0000001F
+#define lpfc_mbx_wq_create_dpp_id_WORD		word3
+#define lpfc_mbx_wq_create_dpp_bar_SHIFT	0
+#define lpfc_mbx_wq_create_dpp_bar_MASK		0x0000000F
+#define lpfc_mbx_wq_create_dpp_bar_WORD		word3
+			uint32_t dpp_offset;
+		} response_1;
 	} u;
 };
 
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 0d51ecb7317f..6db631ace750 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -9611,6 +9611,23 @@  lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
 		}
 	}
 
+	if (if_type == LPFC_SLI_INTF_IF_TYPE_6 &&
+	    pci_resource_start(pdev, PCI_64BIT_BAR4)) {
+		/*
+		 * Map SLI4 if type 6 HBA DPP Register base to a kernel
+		 * virtual address and setup the registers.
+		 */
+		phba->pci_bar2_map = pci_resource_start(pdev, PCI_64BIT_BAR4);
+		bar2map_len = pci_resource_len(pdev, PCI_64BIT_BAR4);
+		phba->sli4_hba.dpp_regs_memmap_p =
+				ioremap_wc(phba->pci_bar2_map, bar2map_len);
+		if (!phba->sli4_hba.dpp_regs_memmap_p)
+			/* not a failure, we simply won't use dpp */
+			dev_err(&pdev->dev,
+			   "ioremap failed for SLI4 HBA dpp registers.\n");
+		phba->pci_bar4_memmap_p = phba->sli4_hba.dpp_regs_memmap_p;
+	}
+
 	/* Set up the EQ/CQ register handeling functions now */
 	switch (if_type) {
 	case LPFC_SLI_INTF_IF_TYPE_0:
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 3bff1f9c5df7..5e03b2c969e5 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -35,6 +35,9 @@ 
 #include <scsi/scsi_transport_fc.h>
 #include <scsi/fc/fc_fs.h>
 #include <linux/aer.h>
+#ifdef CONFIG_X86
+#include <asm/set_memory.h>
+#endif
 
 #include <linux/nvme-fc-driver.h>
 
@@ -112,6 +115,8 @@  lpfc_sli4_wq_put(struct lpfc_queue *q, union lpfc_wqe *wqe)
 	struct lpfc_register doorbell;
 	uint32_t host_index;
 	uint32_t idx;
+	uint32_t i = 0;
+	uint8_t *tmp;
 
 	/* sanity check on queue memory */
 	if (unlikely(!q))
@@ -133,7 +138,18 @@  lpfc_sli4_wq_put(struct lpfc_queue *q, union lpfc_wqe *wqe)
 	if (q->phba->sli3_options & LPFC_SLI4_PHWQ_ENABLED)
 		bf_set(wqe_wqid, &wqe->generic.wqe_com, q->queue_id);
 	lpfc_sli_pcimem_bcopy(wqe, temp_wqe, q->entry_size);
-	/* ensure WQE bcopy flushed before doorbell write */
+	if (q->dpp_enable && q->phba->cfg_enable_dpp) {
+		/* write to DPP aperture taking advatage of Combined Writes */
+		tmp = (uint8_t *)wqe;
+#ifdef CONFIG_64BIT
+		for (i = 0; i < q->entry_size; i += sizeof(uint64_t))
+			writeq(*((uint64_t *)(tmp + i)), q->dpp_regaddr + i);
+#else
+		for (i = 0; i < q->entry_size; i += sizeof(uint32_t))
+			writel(*((uint32_t *)(tmp + i)), q->dpp_regaddr + i);
+#endif
+	}
+	/* ensure WQE bcopy and DPP flushed before doorbell write */
 	wmb();
 
 	/* Update the host index before invoking device */
@@ -144,9 +160,18 @@  lpfc_sli4_wq_put(struct lpfc_queue *q, union lpfc_wqe *wqe)
 	/* Ring Doorbell */
 	doorbell.word0 = 0;
 	if (q->db_format == LPFC_DB_LIST_FORMAT) {
-		bf_set(lpfc_wq_db_list_fm_num_posted, &doorbell, 1);
-		bf_set(lpfc_wq_db_list_fm_index, &doorbell, host_index);
-		bf_set(lpfc_wq_db_list_fm_id, &doorbell, q->queue_id);
+		if (q->dpp_enable && q->phba->cfg_enable_dpp) {
+			bf_set(lpfc_if6_wq_db_list_fm_num_posted, &doorbell, 1);
+			bf_set(lpfc_if6_wq_db_list_fm_dpp, &doorbell, 1);
+			bf_set(lpfc_if6_wq_db_list_fm_dpp_id, &doorbell,
+			    q->dpp_id);
+			bf_set(lpfc_if6_wq_db_list_fm_id, &doorbell,
+			    q->queue_id);
+		} else {
+			bf_set(lpfc_wq_db_list_fm_num_posted, &doorbell, 1);
+			bf_set(lpfc_wq_db_list_fm_index, &doorbell, host_index);
+			bf_set(lpfc_wq_db_list_fm_id, &doorbell, q->queue_id);
+		}
 	} else if (q->db_format == LPFC_DB_RING_FORMAT) {
 		bf_set(lpfc_wq_db_ring_fm_num_posted, &doorbell, 1);
 		bf_set(lpfc_wq_db_ring_fm_id, &doorbell, q->queue_id);
@@ -15023,6 +15048,8 @@  lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
 	void __iomem *bar_memmap_p;
 	uint32_t db_offset;
 	uint16_t pci_barset;
+	uint8_t dpp_barset;
+	uint32_t dpp_offset;
 	uint8_t wq_create_version;
 
 	/* sanity check on queue memory */
@@ -15056,38 +15083,13 @@  lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
 	else
 		wq_create_version = LPFC_Q_CREATE_VERSION_0;
 
-	switch (wq_create_version) {
-	case LPFC_Q_CREATE_VERSION_0:
-		switch (wq->entry_size) {
-		default:
-		case 64:
-			/* Nothing to do, version 0 ONLY supports 64 byte */
-			page = wq_create->u.request.page;
-			break;
-		case 128:
-			if (!(phba->sli4_hba.pc_sli4_params.wqsize &
-			    LPFC_WQ_SZ128_SUPPORT)) {
-				status = -ERANGE;
-				goto out;
-			}
-			/* If we get here the HBA MUST also support V1 and
-			 * we MUST use it
-			 */
-			bf_set(lpfc_mbox_hdr_version, &shdr->request,
-			       LPFC_Q_CREATE_VERSION_1);
 
-			bf_set(lpfc_mbx_wq_create_wqe_count,
-			       &wq_create->u.request_1, wq->entry_count);
-			bf_set(lpfc_mbx_wq_create_wqe_size,
-			       &wq_create->u.request_1,
-			       LPFC_WQ_WQE_SIZE_128);
-			bf_set(lpfc_mbx_wq_create_page_size,
-			       &wq_create->u.request_1,
-			       LPFC_WQ_PAGE_SIZE_4096);
-			page = wq_create->u.request_1.page;
-			break;
-		}
-		break;
+	if (phba->sli4_hba.pc_sli4_params.wqsize & LPFC_WQ_SZ128_SUPPORT)
+		wq_create_version = LPFC_Q_CREATE_VERSION_1;
+	else
+		wq_create_version = LPFC_Q_CREATE_VERSION_0;
+
+	switch (wq_create_version) {
 	case LPFC_Q_CREATE_VERSION_1:
 		bf_set(lpfc_mbx_wq_create_wqe_count, &wq_create->u.request_1,
 		       wq->entry_count);
@@ -15102,24 +15104,21 @@  lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
 			       LPFC_WQ_WQE_SIZE_64);
 			break;
 		case 128:
-			if (!(phba->sli4_hba.pc_sli4_params.wqsize &
-				LPFC_WQ_SZ128_SUPPORT)) {
-				status = -ERANGE;
-				goto out;
-			}
 			bf_set(lpfc_mbx_wq_create_wqe_size,
 			       &wq_create->u.request_1,
 			       LPFC_WQ_WQE_SIZE_128);
 			break;
 		}
+		/* Request DPP by default */
+		bf_set(lpfc_mbx_wq_create_dpp_req, &wq_create->u.request_1, 1);
 		bf_set(lpfc_mbx_wq_create_page_size,
 		       &wq_create->u.request_1,
 		       (wq->page_size / SLI4_PAGE_SIZE));
 		page = wq_create->u.request_1.page;
 		break;
 	default:
-		status = -ERANGE;
-		goto out;
+		page = wq_create->u.request.page;
+		break;
 	}
 
 	list_for_each_entry(dmabuf, &wq->page_list, list) {
@@ -15143,52 +15142,105 @@  lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
 		status = -ENXIO;
 		goto out;
 	}
-	wq->queue_id = bf_get(lpfc_mbx_wq_create_q_id, &wq_create->u.response);
+
+	if (wq_create_version == LPFC_Q_CREATE_VERSION_0)
+		wq->queue_id = bf_get(lpfc_mbx_wq_create_q_id,
+					&wq_create->u.response);
+	else
+		wq->queue_id = bf_get(lpfc_mbx_wq_create_v1_q_id,
+					&wq_create->u.response_1);
+
 	if (wq->queue_id == 0xFFFF) {
 		status = -ENXIO;
 		goto out;
 	}
-	if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) {
-		wq->db_format = bf_get(lpfc_mbx_wq_create_db_format,
-				       &wq_create->u.response);
-		if ((wq->db_format != LPFC_DB_LIST_FORMAT) &&
-		    (wq->db_format != LPFC_DB_RING_FORMAT)) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"3265 WQ[%d] doorbell format not "
-					"supported: x%x\n", wq->queue_id,
-					wq->db_format);
-			status = -EINVAL;
-			goto out;
-		}
-		pci_barset = bf_get(lpfc_mbx_wq_create_bar_set,
-				    &wq_create->u.response);
-		bar_memmap_p = lpfc_dual_chute_pci_bar_map(phba, pci_barset);
-		if (!bar_memmap_p) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"3263 WQ[%d] failed to memmap pci "
-					"barset:x%x\n", wq->queue_id,
-					pci_barset);
-			status = -ENOMEM;
-			goto out;
-		}
-		db_offset = wq_create->u.response.doorbell_offset;
-		if ((db_offset != LPFC_ULP0_WQ_DOORBELL) &&
-		    (db_offset != LPFC_ULP1_WQ_DOORBELL)) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"3252 WQ[%d] doorbell offset not "
-					"supported: x%x\n", wq->queue_id,
-					db_offset);
-			status = -EINVAL;
-			goto out;
-		}
-		wq->db_regaddr = bar_memmap_p + db_offset;
-		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-				"3264 WQ[%d]: barset:x%x, offset:x%x, "
-				"format:x%x\n", wq->queue_id, pci_barset,
-				db_offset, wq->db_format);
+
+	wq->db_format = LPFC_DB_LIST_FORMAT;
+	if (wq_create_version == LPFC_Q_CREATE_VERSION_0) {
+		if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) {
+			wq->db_format = bf_get(lpfc_mbx_wq_create_db_format,
+					       &wq_create->u.response);
+			if ((wq->db_format != LPFC_DB_LIST_FORMAT) &&
+			    (wq->db_format != LPFC_DB_RING_FORMAT)) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"3265 WQ[%d] doorbell format "
+						"not supported: x%x\n",
+						wq->queue_id, wq->db_format);
+				status = -EINVAL;
+				goto out;
+			}
+			pci_barset = bf_get(lpfc_mbx_wq_create_bar_set,
+					    &wq_create->u.response);
+			bar_memmap_p = lpfc_dual_chute_pci_bar_map(phba,
+								   pci_barset);
+			if (!bar_memmap_p) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"3263 WQ[%d] failed to memmap "
+						"pci barset:x%x\n",
+						wq->queue_id, pci_barset);
+				status = -ENOMEM;
+				goto out;
+			}
+			db_offset = wq_create->u.response.doorbell_offset;
+			if ((db_offset != LPFC_ULP0_WQ_DOORBELL) &&
+			    (db_offset != LPFC_ULP1_WQ_DOORBELL)) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"3252 WQ[%d] doorbell offset "
+						"not supported: x%x\n",
+						wq->queue_id, db_offset);
+				status = -EINVAL;
+				goto out;
+			}
+			wq->db_regaddr = bar_memmap_p + db_offset;
+			lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+					"3264 WQ[%d]: barset:x%x, offset:x%x, "
+					"format:x%x\n", wq->queue_id,
+					pci_barset, db_offset, wq->db_format);
+		} else
+			wq->db_regaddr = phba->sli4_hba.WQDBregaddr;
 	} else {
-		wq->db_format = LPFC_DB_LIST_FORMAT;
-		wq->db_regaddr = phba->sli4_hba.WQDBregaddr;
+		/* Check if DPP was honored by the firmware */
+		wq->dpp_enable = bf_get(lpfc_mbx_wq_create_dpp_rsp,
+				    &wq_create->u.response_1);
+		if (wq->dpp_enable) {
+			pci_barset = bf_get(lpfc_mbx_wq_create_v1_bar_set,
+					    &wq_create->u.response_1);
+			bar_memmap_p = lpfc_dual_chute_pci_bar_map(phba,
+								   pci_barset);
+			if (!bar_memmap_p) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"3267 WQ[%d] failed to memmap "
+						"pci barset:x%x\n",
+						wq->queue_id, pci_barset);
+				status = -ENOMEM;
+				goto out;
+			}
+			db_offset = wq_create->u.response_1.doorbell_offset;
+			wq->db_regaddr = bar_memmap_p + db_offset;
+			wq->dpp_id = bf_get(lpfc_mbx_wq_create_dpp_id,
+					    &wq_create->u.response_1);
+			dpp_barset = bf_get(lpfc_mbx_wq_create_dpp_bar,
+					    &wq_create->u.response_1);
+			bar_memmap_p = lpfc_dual_chute_pci_bar_map(phba,
+								   dpp_barset);
+			if (bar_memmap_p) {
+				dpp_offset = wq_create->u.response_1.dpp_offset;
+				wq->dpp_regaddr = bar_memmap_p + dpp_offset;
+				lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+					"3271 WQ[%d]: barset:x%x, offset:x%x, "
+					"dpp_id:x%x dpp_barset:x%x "
+					"dpp_offset:x%x\n",
+					wq->queue_id, pci_barset, db_offset,
+					wq->dpp_id, dpp_barset, dpp_offset);
+			} else {
+				phba->cfg_enable_dpp = 0;
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"3268 WQ[%d] dpp memmap "
+						"unavailable pci barset:x%x\n",
+						wq->queue_id, dpp_barset);
+			}
+		} else
+			wq->db_regaddr = phba->sli4_hba.WQDBregaddr;
 	}
 	wq->pring = kzalloc(sizeof(struct lpfc_sli_ring), GFP_KERNEL);
 	if (wq->pring == NULL) {
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 33838b4b28d9..708167b309bd 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -180,6 +180,10 @@  struct lpfc_queue {
 	uint8_t q_flag;
 #define HBA_NVMET_WQFULL	0x1 /* We hit WQ Full condition for NVMET */
 	void __iomem *db_regaddr;
+	uint16_t dpp_enable;
+	uint16_t dpp_id;
+	void __iomem *dpp_regaddr;
+
 	/* For q stats */
 	uint32_t q_cnt_1;
 	uint32_t q_cnt_2;
@@ -524,11 +528,17 @@  struct lpfc_vector_map_info {
 /* SLI4 HBA data structure entries */
 struct lpfc_sli4_hba {
 	void __iomem *conf_regs_memmap_p; /* Kernel memory mapped address for
-					     PCI BAR0, config space registers */
+					   * config space registers
+					   */
 	void __iomem *ctrl_regs_memmap_p; /* Kernel memory mapped address for
-					     PCI BAR1, control registers */
+					   * control registers
+					   */
 	void __iomem *drbl_regs_memmap_p; /* Kernel memory mapped address for
-					     PCI BAR2, doorbell registers */
+					   * doorbell registers
+					   */
+	void __iomem *dpp_regs_memmap_p;  /* Kernel memory mapped address for
+					   * dpp registers
+					   */
 	union {
 		struct {
 			/* IF Type 0, BAR 0 PCI cfg space reg mem map */