diff mbox

[2/2] spi: imx: dynamic burst length adjust for DMA mode

Message ID 20170208062028.22313-3-jiada_wang@mentor.com (mailing list archive)
State New, archived
Headers show

Commit Message

Wang, Jiada Feb. 8, 2017, 6:20 a.m. UTC
previously burst length (BURST_LENGTH) is always set to equal
to bits_per_word, causes a 10us gap between each word in
transfer, which significantly affects performance.

This patch uses 32 bits transfer to simulate lower bits transfer,
and adjusts burst length to reduce the number of gaps in DMA
transfer.

Signed-off-by: Jiada Wang <jiada_wang@mentor.com>
---
 drivers/spi/spi-imx.c | 154 ++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 130 insertions(+), 24 deletions(-)

Comments

kernel test robot Feb. 8, 2017, 8:46 a.m. UTC | #1
Hi Jiada,

[auto build test ERROR on next-20170207]

url:    https://github.com/0day-ci/linux/commits/Jiada-Wang/improve-imx-spi-performance/20170208-142524
config: i386-allmodconfig (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All errors (new ones prefixed by >>):

   drivers/spi/spi-imx.c: In function 'spi_imx_dma_transfer':
>> drivers/spi/spi-imx.c:1298:3: error: implicit declaration of function 'dmac_flush_range' [-Werror=implicit-function-declaration]
      dmac_flush_range(transfer->rx_buf,
      ^~~~~~~~~~~~~~~~
>> drivers/spi/spi-imx.c:1300:3: error: implicit declaration of function 'outer_flush_range' [-Werror=implicit-function-declaration]
      outer_flush_range(virt_to_phys(transfer->rx_buf),
      ^~~~~~~~~~~~~~~~~
   cc1: some warnings being treated as errors

vim +/dmac_flush_range +1298 drivers/spi/spi-imx.c

  1292	
  1293			if (spi_imx->bpw_w == 1)
  1294				spi_imx_u32_swap_u8(transfer, (u8 *)transfer->rx_buf);
  1295			if (spi_imx->bpw_w == 2)
  1296				spi_imx_u32_swap_u16(transfer,
  1297						(u16 *)transfer->rx_buf);
> 1298			dmac_flush_range(transfer->rx_buf,
  1299					transfer->rx_buf + transfer->len);
> 1300			outer_flush_range(virt_to_phys(transfer->rx_buf),
  1301					virt_to_phys(transfer->rx_buf) +
  1302					transfer->len);
  1303		}

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
kernel test robot Feb. 8, 2017, 10:59 a.m. UTC | #2
Hi Jiada,

[auto build test ERROR on next-20170207]

url:    https://github.com/0day-ci/linux/commits/Jiada-Wang/improve-imx-spi-performance/20170208-142524
config: arm-arm5 (attached as .config)
compiler: arm-linux-gnueabi-gcc (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
        wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=arm 

All errors (new ones prefixed by >>):

>> ERROR: "arm926_dma_flush_range" [drivers/spi/spi-imx.ko] undefined!

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
diff mbox

Patch

diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index 04b4ea8..68ff781 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -39,6 +39,8 @@ 
 #include <linux/of_device.h>
 #include <linux/of_gpio.h>
 
+#include <asm/cacheflush.h>
+
 #include <linux/platform_data/dma-imx.h>
 #include <linux/platform_data/spi-imx.h>
 
@@ -216,6 +218,7 @@  static bool spi_imx_can_dma(struct spi_master *master, struct spi_device *spi,
 {
 	struct spi_imx_data *spi_imx = spi_master_get_devdata(master);
 	unsigned int bpw, i;
+	u32 length, div;
 
 	if (!master->dma_rx)
 		return false;
@@ -232,8 +235,18 @@  static bool spi_imx_can_dma(struct spi_master *master, struct spi_device *spi,
 	if (bpw != 1 && bpw != 2 && bpw != 4)
 		return false;
 
+	length = transfer->len;
+
+	if (spi_imx->dynamic_burst) {
+		bpw = sizeof(u32);
+		length = transfer->len - transfer->len % sizeof(u32);
+		div = length / MX51_ECSPI_CTRL_MAX_BURST  + 1;
+		length = (length / div) - (length / div) % sizeof(u32);
+		spi_imx->count_index = transfer->len - length * div;
+	}
+
 	for (i = spi_imx_get_fifosize(spi_imx) / 2; i > 0; i--) {
-		if (!(transfer->len % (i * bpw)))
+		if (!(length % (i * bpw)))
 			break;
 	}
 
@@ -423,6 +436,7 @@  static int mx51_ecspi_config(struct spi_device *spi,
 	u32 ctrl = MX51_ECSPI_CTRL_ENABLE;
 	u32 clk = config->speed_hz, delay, reg;
 	u32 cfg = readl(spi_imx->base + MX51_ECSPI_CONFIG);
+	u32 div, length;
 
 	/*
 	 * The hardware seems to have a race condition when changing modes. The
@@ -441,9 +455,18 @@  static int mx51_ecspi_config(struct spi_device *spi,
 	ctrl |= MX51_ECSPI_CTRL_CS(spi->chip_select);
 
 	if (spi_imx->dynamic_burst) {
-		if (config->len > MX51_ECSPI_CTRL_MAX_BURST)
-			ctrl |= MX51_ECSPI_CTRL_BL_MASK;
-		else
+		if (config->len > MX51_ECSPI_CTRL_MAX_BURST) {
+			if (spi_imx->usedma) {
+				length = config->len -
+					 config->len % sizeof(u32);
+				div = length / MX51_ECSPI_CTRL_MAX_BURST  + 1;
+				length = (length / div) -
+					 (length / div) % sizeof(u32);
+				ctrl |= ((length * 8 - 1) <<
+					MX51_ECSPI_CTRL_BL_OFFSET);
+			} else
+				ctrl |= MX51_ECSPI_CTRL_BL_MASK;
+		} else
 			ctrl |= (((config->len - config->len % 4) * 8 - 1) <<
 				MX51_ECSPI_CTRL_BL_OFFSET);
 	} else
@@ -933,10 +956,16 @@  static int spi_imx_dma_configure(struct spi_master *master,
 		buswidth = DMA_SLAVE_BUSWIDTH_4_BYTES;
 		break;
 	case 2:
-		buswidth = DMA_SLAVE_BUSWIDTH_2_BYTES;
+		if (spi_imx->dynamic_burst)
+			buswidth = DMA_SLAVE_BUSWIDTH_4_BYTES;
+		else
+			buswidth = DMA_SLAVE_BUSWIDTH_2_BYTES;
 		break;
 	case 1:
-		buswidth = DMA_SLAVE_BUSWIDTH_1_BYTE;
+		if (spi_imx->dynamic_burst)
+			buswidth = DMA_SLAVE_BUSWIDTH_4_BYTES;
+		else
+			buswidth = DMA_SLAVE_BUSWIDTH_1_BYTE;
 		break;
 	default:
 		return -EINVAL;
@@ -1122,6 +1151,32 @@  static int spi_imx_calculate_timeout(struct spi_imx_data *spi_imx, int size)
 	return msecs_to_jiffies(2 * timeout * MSEC_PER_SEC);
 }
 
+static int spi_imx_pio_txrx(struct spi_imx_data *spi_imx)
+{
+	unsigned long transfer_timeout;
+	unsigned long timeout;
+
+	spi_imx->txfifo = 0;
+
+	reinit_completion(&spi_imx->xfer_done);
+
+	spi_imx_push(spi_imx);
+
+	spi_imx->devtype_data->intctrl(spi_imx, MXC_INT_TE);
+
+	transfer_timeout = spi_imx_calculate_timeout(spi_imx, spi_imx->count);
+
+	timeout = wait_for_completion_timeout(&spi_imx->xfer_done,
+					      transfer_timeout);
+	if (!timeout) {
+		dev_err(spi_imx->dev, "I/O Error in PIO\n");
+		spi_imx->devtype_data->reset(spi_imx);
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
 static int spi_imx_dma_transfer(struct spi_imx_data *spi_imx,
 				struct spi_transfer *transfer)
 {
@@ -1130,6 +1185,20 @@  static int spi_imx_dma_transfer(struct spi_imx_data *spi_imx,
 	unsigned long timeout;
 	struct spi_master *master = spi_imx->bitbang.master;
 	struct sg_table *tx = &transfer->tx_sg, *rx = &transfer->rx_sg;
+	unsigned int old_nents = 0;
+	int ret;
+
+	spi_imx->count = transfer->len - spi_imx->count_index;
+	if (spi_imx->dynamic_burst && spi_imx->count_index) {
+		/* Cut RX data tail */
+		old_nents = rx->nents;
+		WARN_ON(sg_dma_len(&rx->sgl[rx->nents - 1]) <
+			spi_imx->count_index);
+		sg_dma_len(&rx->sgl[rx->nents - 1]) -=
+			spi_imx->count_index;
+		if (sg_dma_len(&rx->sgl[rx->nents - 1]) == 0)
+			--rx->nents;
+	}
 
 	/*
 	 * The TX DMA setup starts the transfer, so make sure RX is configured
@@ -1147,6 +1216,30 @@  static int spi_imx_dma_transfer(struct spi_imx_data *spi_imx,
 	reinit_completion(&spi_imx->dma_rx_completion);
 	dma_async_issue_pending(master->dma_rx);
 
+	if (spi_imx->dynamic_burst) {
+		dma_sync_sg_for_cpu(master->dma_tx->device->dev,
+				    tx->sgl, tx->nents, DMA_TO_DEVICE);
+		if (spi_imx->bpw_w == 1)
+			spi_imx_u32_swap_u8(transfer, (u8 *)transfer->tx_buf);
+		if (spi_imx->bpw_w == 2)
+			spi_imx_u32_swap_u16(transfer,
+				(u16 *)transfer->tx_buf);
+
+		if (spi_imx->count_index) {
+			/* Cut TX data tail */
+			old_nents = tx->nents;
+			WARN_ON(sg_dma_len(&tx->sgl[tx->nents - 1]) <
+				spi_imx->count_index);
+			sg_dma_len(&tx->sgl[tx->nents - 1]) -=
+				spi_imx->count_index;
+			if (sg_dma_len(&tx->sgl[tx->nents - 1]) == 0)
+				--tx->nents;
+		}
+
+		dma_sync_sg_for_device(master->dma_tx->device->dev,
+				       tx->sgl, tx->nents, DMA_TO_DEVICE);
+	}
+
 	desc_tx = dmaengine_prep_slave_sg(master->dma_tx,
 				tx->sgl, tx->nents, DMA_MEM_TO_DEV,
 				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
@@ -1161,6 +1254,12 @@  static int spi_imx_dma_transfer(struct spi_imx_data *spi_imx,
 	reinit_completion(&spi_imx->dma_tx_completion);
 	dma_async_issue_pending(master->dma_tx);
 
+	if (spi_imx->dynamic_burst && spi_imx->count_index) {
+		spi_imx->tx_buf = transfer->tx_buf + spi_imx->count;
+		spi_imx->rx_buf = transfer->rx_buf + spi_imx->count;
+		spi_imx->count = spi_imx->count_index;
+	}
+
 	transfer_timeout = spi_imx_calculate_timeout(spi_imx, transfer->len);
 
 	/* Wait SDMA to finish the data transfer.*/
@@ -1182,6 +1281,27 @@  static int spi_imx_dma_transfer(struct spi_imx_data *spi_imx,
 		return -ETIMEDOUT;
 	}
 
+	if (spi_imx->dynamic_burst) {
+		spi_imx->dynamic_burst = 0;
+
+		if (spi_imx->count_index) {
+			ret = spi_imx_pio_txrx(spi_imx);
+			if (ret < 0)
+				return ret;
+		}
+
+		if (spi_imx->bpw_w == 1)
+			spi_imx_u32_swap_u8(transfer, (u8 *)transfer->rx_buf);
+		if (spi_imx->bpw_w == 2)
+			spi_imx_u32_swap_u16(transfer,
+					(u16 *)transfer->rx_buf);
+		dmac_flush_range(transfer->rx_buf,
+				transfer->rx_buf + transfer->len);
+		outer_flush_range(virt_to_phys(transfer->rx_buf),
+				virt_to_phys(transfer->rx_buf) +
+				transfer->len);
+	}
+
 	return transfer->len;
 }
 
@@ -1189,13 +1309,11 @@  static int spi_imx_pio_transfer(struct spi_device *spi,
 				struct spi_transfer *transfer)
 {
 	struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master);
-	unsigned long transfer_timeout;
-	unsigned long timeout;
+	int ret;
 
 	spi_imx->tx_buf = transfer->tx_buf;
 	spi_imx->rx_buf = transfer->rx_buf;
 	spi_imx->count = transfer->len;
-	spi_imx->txfifo = 0;
 
 	if (spi_imx->dynamic_burst) {
 		spi_imx->count_index =
@@ -1211,21 +1329,9 @@  static int spi_imx_pio_transfer(struct spi_device *spi,
 					(u16 *)transfer->tx_buf);
 	}
 
-	reinit_completion(&spi_imx->xfer_done);
-
-	spi_imx_push(spi_imx);
-
-	spi_imx->devtype_data->intctrl(spi_imx, MXC_INT_TE);
-
-	transfer_timeout = spi_imx_calculate_timeout(spi_imx, transfer->len);
-
-	timeout = wait_for_completion_timeout(&spi_imx->xfer_done,
-					      transfer_timeout);
-	if (!timeout) {
-		dev_err(&spi->dev, "I/O Error in PIO\n");
-		spi_imx->devtype_data->reset(spi_imx);
-		return -ETIMEDOUT;
-	}
+	ret = spi_imx_pio_txrx(spi_imx);
+	if (ret < 0)
+		return ret;
 
 	if (spi_imx->dynamic_burst) {
 		if (spi_imx->bpw_w == 1)