diff mbox series

[v1,for-next] spi: mchp-pci1xxxx: Updated memcpy implementation for x64 and bcm2711 processors

Message ID 20250224125153.13728-1-rengarajan.s@microchip.com (mailing list archive)
State New
Headers show
Series [v1,for-next] spi: mchp-pci1xxxx: Updated memcpy implementation for x64 and bcm2711 processors | expand

Commit Message

Rengarajan S Feb. 24, 2025, 12:51 p.m. UTC
In Raspberry-pi CM4 devices with BCM2711 processor, the documentation
points to a limitation with 64-bit accesses. Using memcpy_fromio and
memcpy_toio for each 64-bit SPI read/write causes the first 4 bytes to be
repeated. To address the limitation, each read/write is limited to 4
bytes in case of BCM2711 processors.

On x64 systems, using memcpy_toio and memcpy_fromio results in 4-byte TLP
writes instead of 8-byte. Add the custom IO write and read for enabling
64-bit access by default.

Tested and verified performance improvement on x64 devices while
transferring 1024 bytes for 20000 iterations at 25 MHz clock frequency:

Test 1: With memcpy_fromio and memcpy_toio
spi mode: 0x0
bits per word: 8
max speed: 25000000 Hz (25000 kHz)
rate: tx 6232.5kbps, rx 6232.5kbps
rate: tx 6889.5kbps, rx 6889.5kbps
rate: tx 6765.0kbps, rx 6765.0kbps
rate: tx 6873.1kbps, rx 6873.1kbps
total: tx 20000.0KB, rx 20000.0KB

Test 2: With the custom IO write and read
spi mode: 0x0
bits per word: 8
max speed: 25000000 Hz (25000 kHz)
rate: tx 9774.7kbps, rx 9774.7kbps
rate: tx 10985.5kbps, rx 10985.5kbps
rate: tx 10749.5kbps, rx 10749.5kbps
total: tx 20000.0KB, rx 20000.0KB

Signed-off-by: Rengarajan S <rengarajan.s@microchip.com>
---
 drivers/spi/spi-pci1xxxx.c | 95 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 91 insertions(+), 4 deletions(-)

Comments

Mark Brown Feb. 24, 2025, 2:30 p.m. UTC | #1
On Mon, Feb 24, 2025 at 06:21:53PM +0530, Rengarajan S wrote:
> In Raspberry-pi CM4 devices with BCM2711 processor, the documentation
> points to a limitation with 64-bit accesses. Using memcpy_fromio and
> memcpy_toio for each 64-bit SPI read/write causes the first 4 bytes to be
> repeated. To address the limitation, each read/write is limited to 4
> bytes in case of BCM2711 processors.

This feels like something we ought to be able to figure out from the PCI
subsystem rather than requiring us to enumerate specific SoCs, or at
least have PCI drivers be able to enumerate the system PCI quirk from
the PCI core.  What's the story with making this a per driver per SoC
thing - is there some reason it won't come up elsewhere?
kernel test robot Feb. 27, 2025, 2 p.m. UTC | #2
Hi Rengarajan,

kernel test robot noticed the following build errors:

[auto build test ERROR on broonie-spi/for-next]
[also build test ERROR on linus/master v6.14-rc4 next-20250227]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Rengarajan-S/spi-mchp-pci1xxxx-Updated-memcpy-implementation-for-x64-and-bcm2711-processors/20250224-205745
base:   https://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git for-next
patch link:    https://lore.kernel.org/r/20250224125153.13728-1-rengarajan.s%40microchip.com
patch subject: [PATCH v1 for-next] spi: mchp-pci1xxxx: Updated memcpy implementation for x64 and bcm2711 processors
config: sparc-randconfig-001-20250227 (https://download.01.org/0day-ci/archive/20250227/202502272153.zJWKuv3R-lkp@intel.com/config)
compiler: sparc-linux-gcc (GCC) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250227/202502272153.zJWKuv3R-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202502272153.zJWKuv3R-lkp@intel.com/

All errors (new ones prefixed by >>):

   drivers/spi/spi-pci1xxxx.c: In function 'pci1xxxx_spi_write_to_io':
>> drivers/spi/spi-pci1xxxx.c:417:25: error: implicit declaration of function '__raw_writeq'; did you mean '__raw_writel'? [-Wimplicit-function-declaration]
     417 |                         __raw_writeq(*(u64 *)from, to);
         |                         ^~~~~~~~~~~~
         |                         __raw_writel
   drivers/spi/spi-pci1xxxx.c: In function 'pci1xxxx_spi_read_from_io':
>> drivers/spi/spi-pci1xxxx.c:448:38: error: implicit declaration of function '__raw_readq'; did you mean '__raw_readl'? [-Wimplicit-function-declaration]
     448 |                         *(u64 *)to = __raw_readq(from);
         |                                      ^~~~~~~~~~~
         |                                      __raw_readl


vim +417 drivers/spi/spi-pci1xxxx.c

   410	
   411	static void pci1xxxx_spi_write_to_io(void __iomem *to, const void *from,
   412					     size_t count, size_t size)
   413	{
   414		while (count) {
   415			if (size == 8 && (IS_ALIGNED((unsigned long)to, 8)) &&
   416			    count >= 8) {
 > 417				__raw_writeq(*(u64 *)from, to);
   418				from += 8;
   419				to += 8;
   420				count -= 8;
   421			} else if (size >= 4 && (IS_ALIGNED((unsigned long)to, 4)) &&
   422				   count >= 4) {
   423				__raw_writel(*(u32 *)from, to);
   424				from += 4;
   425				to += 4;
   426				count -= 4;
   427			} else if (size >= 2 && (IS_ALIGNED((unsigned long)to, 2)) &&
   428				   count >= 2) {
   429				__raw_writew(*(u16 *)from, to);
   430				from += 2;
   431				to += 2;
   432				count -= 2;
   433			} else {
   434				__raw_writeb(*(u8 *)from, to);
   435				from += 1;
   436				to += 1;
   437				count -= 1;
   438			}
   439		}
   440	}
   441	
   442	static void pci1xxxx_spi_read_from_io(void *to, const void __iomem *from,
   443					      size_t count, size_t size)
   444	{
   445		while (count) {
   446			if (size == 8 && (IS_ALIGNED((unsigned long)from, 8)) &&
   447			    count >= 8) {
 > 448				*(u64 *)to = __raw_readq(from);
   449				from += 8;
   450				to += 8;
   451				count -= 8;
   452			} else if (size >= 4 && (IS_ALIGNED((unsigned long)from, 4)) &&
   453				   count >= 4) {
   454				*(u32 *)to = __raw_readl(from);
   455				from += 4;
   456				to += 4;
   457				count -= 4;
   458			} else if (size >= 2 && (IS_ALIGNED((unsigned long)from, 2)) &&
   459				   count >= 2) {
   460				*(u16 *)to = __raw_readw(from);
   461				from += 2;
   462				to += 2;
   463				count -= 2;
   464			} else {
   465				*(u8 *)to = __raw_readb(from);
   466				from += 1;
   467				to += 1;
   468				count -= 1;
   469			}
   470		}
   471	}
   472
kernel test robot Feb. 27, 2025, 7:50 p.m. UTC | #3
Hi Rengarajan,

kernel test robot noticed the following build errors:

[auto build test ERROR on broonie-spi/for-next]
[also build test ERROR on linus/master v6.14-rc4 next-20250227]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Rengarajan-S/spi-mchp-pci1xxxx-Updated-memcpy-implementation-for-x64-and-bcm2711-processors/20250224-205745
base:   https://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git for-next
patch link:    https://lore.kernel.org/r/20250224125153.13728-1-rengarajan.s%40microchip.com
patch subject: [PATCH v1 for-next] spi: mchp-pci1xxxx: Updated memcpy implementation for x64 and bcm2711 processors
config: arm-randconfig-002-20250227 (https://download.01.org/0day-ci/archive/20250228/202502280356.AulpRaPU-lkp@intel.com/config)
compiler: clang version 17.0.6 (https://github.com/llvm/llvm-project 6009708b4367171ccdbf4b5905cb6a803753fe18)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250228/202502280356.AulpRaPU-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202502280356.AulpRaPU-lkp@intel.com/

All errors (new ones prefixed by >>):

>> drivers/spi/spi-pci1xxxx.c:417:4: error: call to undeclared function '__raw_writeq'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
     417 |                         __raw_writeq(*(u64 *)from, to);
         |                         ^
>> drivers/spi/spi-pci1xxxx.c:448:17: error: call to undeclared function '__raw_readq'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
     448 |                         *(u64 *)to = __raw_readq(from);
         |                                      ^
   2 errors generated.


vim +/__raw_writeq +417 drivers/spi/spi-pci1xxxx.c

   410	
   411	static void pci1xxxx_spi_write_to_io(void __iomem *to, const void *from,
   412					     size_t count, size_t size)
   413	{
   414		while (count) {
   415			if (size == 8 && (IS_ALIGNED((unsigned long)to, 8)) &&
   416			    count >= 8) {
 > 417				__raw_writeq(*(u64 *)from, to);
   418				from += 8;
   419				to += 8;
   420				count -= 8;
   421			} else if (size >= 4 && (IS_ALIGNED((unsigned long)to, 4)) &&
   422				   count >= 4) {
   423				__raw_writel(*(u32 *)from, to);
   424				from += 4;
   425				to += 4;
   426				count -= 4;
   427			} else if (size >= 2 && (IS_ALIGNED((unsigned long)to, 2)) &&
   428				   count >= 2) {
   429				__raw_writew(*(u16 *)from, to);
   430				from += 2;
   431				to += 2;
   432				count -= 2;
   433			} else {
   434				__raw_writeb(*(u8 *)from, to);
   435				from += 1;
   436				to += 1;
   437				count -= 1;
   438			}
   439		}
   440	}
   441	
   442	static void pci1xxxx_spi_read_from_io(void *to, const void __iomem *from,
   443					      size_t count, size_t size)
   444	{
   445		while (count) {
   446			if (size == 8 && (IS_ALIGNED((unsigned long)from, 8)) &&
   447			    count >= 8) {
 > 448				*(u64 *)to = __raw_readq(from);
   449				from += 8;
   450				to += 8;
   451				count -= 8;
   452			} else if (size >= 4 && (IS_ALIGNED((unsigned long)from, 4)) &&
   453				   count >= 4) {
   454				*(u32 *)to = __raw_readl(from);
   455				from += 4;
   456				to += 4;
   457				count -= 4;
   458			} else if (size >= 2 && (IS_ALIGNED((unsigned long)from, 2)) &&
   459				   count >= 2) {
   460				*(u16 *)to = __raw_readw(from);
   461				from += 2;
   462				to += 2;
   463				count -= 2;
   464			} else {
   465				*(u8 *)to = __raw_readb(from);
   466				from += 1;
   467				to += 1;
   468				count -= 1;
   469			}
   470		}
   471	}
   472
diff mbox series

Patch

diff --git a/drivers/spi/spi-pci1xxxx.c b/drivers/spi/spi-pci1xxxx.c
index fc98979eba48..ae1d76f03268 100644
--- a/drivers/spi/spi-pci1xxxx.c
+++ b/drivers/spi/spi-pci1xxxx.c
@@ -12,6 +12,7 @@ 
 #include <linux/irq.h>
 #include <linux/module.h>
 #include <linux/msi.h>
+#include <linux/of.h>
 #include <linux/pci_regs.h>
 #include <linux/pci.h>
 #include <linux/spinlock.h>
@@ -407,6 +408,68 @@  static void pci1xxxx_start_spi_xfer(struct pci1xxxx_spi_internal *p, u8 hw_inst)
 	writel(regval, p->parent->reg_base + SPI_MST_CTL_REG_OFFSET(hw_inst));
 }
 
+static void pci1xxxx_spi_write_to_io(void __iomem *to, const void *from,
+				     size_t count, size_t size)
+{
+	while (count) {
+		if (size == 8 && (IS_ALIGNED((unsigned long)to, 8)) &&
+		    count >= 8) {
+			__raw_writeq(*(u64 *)from, to);
+			from += 8;
+			to += 8;
+			count -= 8;
+		} else if (size >= 4 && (IS_ALIGNED((unsigned long)to, 4)) &&
+			   count >= 4) {
+			__raw_writel(*(u32 *)from, to);
+			from += 4;
+			to += 4;
+			count -= 4;
+		} else if (size >= 2 && (IS_ALIGNED((unsigned long)to, 2)) &&
+			   count >= 2) {
+			__raw_writew(*(u16 *)from, to);
+			from += 2;
+			to += 2;
+			count -= 2;
+		} else {
+			__raw_writeb(*(u8 *)from, to);
+			from += 1;
+			to += 1;
+			count -= 1;
+		}
+	}
+}
+
+static void pci1xxxx_spi_read_from_io(void *to, const void __iomem *from,
+				      size_t count, size_t size)
+{
+	while (count) {
+		if (size == 8 && (IS_ALIGNED((unsigned long)from, 8)) &&
+		    count >= 8) {
+			*(u64 *)to = __raw_readq(from);
+			from += 8;
+			to += 8;
+			count -= 8;
+		} else if (size >= 4 && (IS_ALIGNED((unsigned long)from, 4)) &&
+			   count >= 4) {
+			*(u32 *)to = __raw_readl(from);
+			from += 4;
+			to += 4;
+			count -= 4;
+		} else if (size >= 2 && (IS_ALIGNED((unsigned long)from, 2)) &&
+			   count >= 2) {
+			*(u16 *)to = __raw_readw(from);
+			from += 2;
+			to += 2;
+			count -= 2;
+		} else {
+			*(u8 *)to = __raw_readb(from);
+			from += 1;
+			to += 1;
+			count -= 1;
+		}
+	}
+}
+
 static int pci1xxxx_spi_transfer_with_io(struct spi_controller *spi_ctlr,
 					 struct spi_device *spi, struct spi_transfer *xfer)
 {
@@ -444,8 +507,23 @@  static int pci1xxxx_spi_transfer_with_io(struct spi_controller *spi_ctlr,
 				len = transfer_len % SPI_MAX_DATA_LEN;
 
 			reinit_completion(&p->spi_xfer_done);
-			memcpy_toio(par->reg_base + SPI_MST_CMD_BUF_OFFSET(p->hw_inst),
-				    &tx_buf[bytes_transfered], len);
+			/*
+			 * Raspberry Pi CM4 BCM2711 doesn't support 64-bit
+			 * accesses.
+			 */
+			if (of_machine_is_compatible("brcm,bcm2711")) {
+				pci1xxxx_spi_write_to_io(par->reg_base +
+							 SPI_MST_CMD_BUF_OFFSET
+							 (p->hw_inst),
+							 &tx_buf[bytes_transfered],
+							 len, 4);
+			} else {
+				pci1xxxx_spi_write_to_io(par->reg_base +
+							 SPI_MST_CMD_BUF_OFFSET
+							 (p->hw_inst),
+							 &tx_buf[bytes_transfered],
+							 len, 8);
+			}
 			bytes_transfered += len;
 			pci1xxxx_spi_setup(par, p->hw_inst, spi->mode, clkdiv, len);
 			pci1xxxx_start_spi_xfer(p, p->hw_inst);
@@ -457,8 +535,17 @@  static int pci1xxxx_spi_transfer_with_io(struct spi_controller *spi_ctlr,
 				return -ETIMEDOUT;
 
 			if (rx_buf) {
-				memcpy_fromio(&rx_buf[bytes_recvd], par->reg_base +
-					      SPI_MST_RSP_BUF_OFFSET(p->hw_inst), len);
+				if (of_machine_is_compatible("brcm,bcm2711")) {
+					pci1xxxx_spi_read_from_io(&rx_buf[bytes_recvd],
+								  par->reg_base +
+								  SPI_MST_RSP_BUF_OFFSET
+								  (p->hw_inst), len, 4);
+				} else {
+					pci1xxxx_spi_read_from_io(&rx_buf[bytes_recvd],
+								  par->reg_base +
+								  SPI_MST_RSP_BUF_OFFSET
+								  (p->hw_inst), len, 8);
+				}
 				bytes_recvd += len;
 			}
 		}