diff mbox

scsi: csiostor: update ingress pack and pad boundary value

Message ID 1531837526-7066-1-git-send-email-varun@chelsio.com (mailing list archive)
State Accepted
Headers show

Commit Message

Varun Prakash July 17, 2018, 2:25 p.m. UTC
T5/T6 can have different pack and pad boundary value.
This patch sets packing boundary based on cache
line size and PCI-E maximum payload size and sets
smallest padding boundary value.

Signed-off-by: Varun Prakash <varun@chelsio.com>
---
 drivers/scsi/csiostor/csio_wr.c | 84 ++++++++++++++++++++++++++++++++++++++---
 1 file changed, 79 insertions(+), 5 deletions(-)

Comments

Martin K. Petersen July 20, 2018, 2:04 a.m. UTC | #1
Varun,

> T5/T6 can have different pack and pad boundary value.  This patch sets
> packing boundary based on cache line size and PCI-E maximum payload
> size and sets smallest padding boundary value.

Applied to 4.19/scsi-queue, thanks!
diff mbox

Patch

diff --git a/drivers/scsi/csiostor/csio_wr.c b/drivers/scsi/csiostor/csio_wr.c
index faa357b..5022e82 100644
--- a/drivers/scsi/csiostor/csio_wr.c
+++ b/drivers/scsi/csiostor/csio_wr.c
@@ -39,6 +39,7 @@ 
 #include <asm/page.h>
 #include <linux/cache.h>
 
+#include "t4_values.h"
 #include "csio_hw.h"
 #include "csio_wr.h"
 #include "csio_mb.h"
@@ -1309,8 +1310,11 @@  csio_wr_fixup_host_params(struct csio_hw *hw)
 	struct csio_sge *sge = &wrm->sge;
 	uint32_t clsz = L1_CACHE_BYTES;
 	uint32_t s_hps = PAGE_SHIFT - 10;
-	uint32_t ingpad = 0;
 	uint32_t stat_len = clsz > 64 ? 128 : 64;
+	u32 fl_align = clsz < 32 ? 32 : clsz;
+	u32 pack_align;
+	u32 ingpad, ingpack;
+	int pcie_cap;
 
 	csio_wr_reg32(hw, HOSTPAGESIZEPF0_V(s_hps) | HOSTPAGESIZEPF1_V(s_hps) |
 		      HOSTPAGESIZEPF2_V(s_hps) | HOSTPAGESIZEPF3_V(s_hps) |
@@ -1318,14 +1322,82 @@  csio_wr_fixup_host_params(struct csio_hw *hw)
 		      HOSTPAGESIZEPF6_V(s_hps) | HOSTPAGESIZEPF7_V(s_hps),
 		      SGE_HOST_PAGE_SIZE_A);
 
-	sge->csio_fl_align = clsz < 32 ? 32 : clsz;
-	ingpad = ilog2(sge->csio_fl_align) - 5;
+	/* T5 introduced the separation of the Free List Padding and
+	 * Packing Boundaries.  Thus, we can select a smaller Padding
+	 * Boundary to avoid uselessly chewing up PCIe Link and Memory
+	 * Bandwidth, and use a Packing Boundary which is large enough
+	 * to avoid false sharing between CPUs, etc.
+	 *
+	 * For the PCI Link, the smaller the Padding Boundary the
+	 * better.  For the Memory Controller, a smaller Padding
+	 * Boundary is better until we cross under the Memory Line
+	 * Size (the minimum unit of transfer to/from Memory).  If we
+	 * have a Padding Boundary which is smaller than the Memory
+	 * Line Size, that'll involve a Read-Modify-Write cycle on the
+	 * Memory Controller which is never good.
+	 */
+
+	/* We want the Packing Boundary to be based on the Cache Line
+	 * Size in order to help avoid False Sharing performance
+	 * issues between CPUs, etc.  We also want the Packing
+	 * Boundary to incorporate the PCI-E Maximum Payload Size.  We
+	 * get best performance when the Packing Boundary is a
+	 * multiple of the Maximum Payload Size.
+	 */
+	pack_align = fl_align;
+	pcie_cap = pci_find_capability(hw->pdev, PCI_CAP_ID_EXP);
+	if (pcie_cap) {
+		u32 mps, mps_log;
+		u16 devctl;
+
+		/* The PCIe Device Control Maximum Payload Size field
+		 * [bits 7:5] encodes sizes as powers of 2 starting at
+		 * 128 bytes.
+		 */
+		pci_read_config_word(hw->pdev,
+				     pcie_cap + PCI_EXP_DEVCTL,
+				     &devctl);
+		mps_log = ((devctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5) + 7;
+		mps = 1 << mps_log;
+		if (mps > pack_align)
+			pack_align = mps;
+	}
+
+	/* T5/T6 have a special interpretation of the "0"
+	 * value for the Packing Boundary.  This corresponds to 16
+	 * bytes instead of the expected 32 bytes.
+	 */
+	if (pack_align <= 16) {
+		ingpack = INGPACKBOUNDARY_16B_X;
+		fl_align = 16;
+	} else if (pack_align == 32) {
+		ingpack = INGPACKBOUNDARY_64B_X;
+		fl_align = 64;
+	} else {
+		u32 pack_align_log = fls(pack_align) - 1;
+
+		ingpack = pack_align_log - INGPACKBOUNDARY_SHIFT_X;
+		fl_align = pack_align;
+	}
+
+	/* Use the smallest Ingress Padding which isn't smaller than
+	 * the Memory Controller Read/Write Size.  We'll take that as
+	 * being 8 bytes since we don't know of any system with a
+	 * wider Memory Controller Bus Width.
+	 */
+	if (csio_is_t5(hw->pdev->device & CSIO_HW_CHIP_MASK))
+		ingpad = INGPADBOUNDARY_32B_X;
+	else
+		ingpad = T6_INGPADBOUNDARY_8B_X;
 
 	csio_set_reg_field(hw, SGE_CONTROL_A,
 			   INGPADBOUNDARY_V(INGPADBOUNDARY_M) |
 			   EGRSTATUSPAGESIZE_F,
 			   INGPADBOUNDARY_V(ingpad) |
 			   EGRSTATUSPAGESIZE_V(stat_len != 64));
+	csio_set_reg_field(hw, SGE_CONTROL2_A,
+			   INGPACKBOUNDARY_V(INGPACKBOUNDARY_M),
+			   INGPACKBOUNDARY_V(ingpack));
 
 	/* FL BUFFER SIZE#0 is Page size i,e already aligned to cache line */
 	csio_wr_reg32(hw, PAGE_SIZE, SGE_FL_BUFFER_SIZE0_A);
@@ -1337,14 +1409,16 @@  csio_wr_fixup_host_params(struct csio_hw *hw)
 	if (hw->flags & CSIO_HWF_USING_SOFT_PARAMS) {
 		csio_wr_reg32(hw,
 			(csio_rd_reg32(hw, SGE_FL_BUFFER_SIZE2_A) +
-			sge->csio_fl_align - 1) & ~(sge->csio_fl_align - 1),
+			fl_align - 1) & ~(fl_align - 1),
 			SGE_FL_BUFFER_SIZE2_A);
 		csio_wr_reg32(hw,
 			(csio_rd_reg32(hw, SGE_FL_BUFFER_SIZE3_A) +
-			sge->csio_fl_align - 1) & ~(sge->csio_fl_align - 1),
+			fl_align - 1) & ~(fl_align - 1),
 			SGE_FL_BUFFER_SIZE3_A);
 	}
 
+	sge->csio_fl_align = fl_align;
+
 	csio_wr_reg32(hw, HPZ0_V(PAGE_SHIFT - 12), ULP_RX_TDDP_PSZ_A);
 
 	/* default value of rx_dma_offset of the NIC driver */