diff mbox

[08/10,RESEND] spi: Add DMA support into SPI driver

Message ID 1343076052-27312-9-git-send-email-marex@denx.de (mailing list archive)
State New, archived
Headers show

Commit Message

Marek Vasut July 23, 2012, 8:40 p.m. UTC
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Attila Kinali <attila@kinali.ch>
Cc: Chris Ball <cjb@laptop.org>
CC: Dong Aisheng <b29396@freescale.com>
Cc: Fabio Estevam <fabio.estevam@freescale.com>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: Linux ARM kernel <linux-arm-kernel@lists.infradead.org>
Cc: Mark Brown <broonie@opensource.wolfsonmicro.com>
CC: Shawn Guo <shawn.guo@linaro.org>
---
 drivers/spi/spi-mxs.c |  229 +++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 214 insertions(+), 15 deletions(-)

Comments

Mark Brown Aug. 1, 2012, 8:34 p.m. UTC | #1
On Mon, Jul 23, 2012 at 10:40:50PM +0200, Marek Vasut wrote:

> +		/*
> +		 * Small blocks can be transfered via PIO.
> +		 * Measured by empiric means:
> +		 *
> +		 * dd if=/dev/mtdblock0 of=/dev/null bs=1024k count=1
> +		 *
> +		 * DMA only: 2.164808 seconds, 473.0KB/s
> +		 * Combined: 1.676276 seconds, 610.9KB/s
> +		 */

I've seen other devices end up using the FIFO length as the decision
point here which makes intuitive sense to me unless the FIFO is
extremely deep.

Not reviewed this properly yet but it looks OK from a first read
through.
Marek Vasut Aug. 2, 2012, 3 p.m. UTC | #2
Dear Mark Brown,

> On Mon, Jul 23, 2012 at 10:40:50PM +0200, Marek Vasut wrote:
> > +		/*
> > +		 * Small blocks can be transfered via PIO.
> > +		 * Measured by empiric means:
> > +		 *
> > +		 * dd if=/dev/mtdblock0 of=/dev/null bs=1024k count=1
> > +		 *
> > +		 * DMA only: 2.164808 seconds, 473.0KB/s
> > +		 * Combined: 1.676276 seconds, 610.9KB/s
> > +		 */
> 
> I've seen other devices end up using the FIFO length as the decision
> point here which makes intuitive sense to me unless the FIFO is
> extremely deep.

Yep, that's all good. This device has not too deep FIFO, but the reason I chose 
not to go that way is because the DMA allocations have some overhead too and 
this values seemed to be the best.

> Not reviewed this properly yet but it looks OK from a first read
> through.

Thanks!

Best regards,
Marek Vasut
diff mbox

Patch

diff --git a/drivers/spi/spi-mxs.c b/drivers/spi/spi-mxs.c
index bd2f2fd..d6a80a1 100644
--- a/drivers/spi/spi-mxs.c
+++ b/drivers/spi/spi-mxs.c
@@ -55,8 +55,12 @@ 
 
 #define SSP_TIMEOUT		1000	/* 1000 ms */
 
+#define SG_NUM			4
+#define SG_MAXLEN		0xff00
+
 struct mxs_spi {
 	struct mxs_ssp		ssp;
+	struct completion	c;
 };
 
 static int mxs_spi_setup_transfer(struct spi_device *dev,
@@ -192,6 +196,115 @@  static int mxs_ssp_wait(struct mxs_spi *spi, int offset, int mask, bool set)
 	return 0;
 }
 
+static void mxs_ssp_dma_irq_callback(void *param)
+{
+	struct mxs_spi *spi = param;
+	complete(&spi->c);
+}
+
+static irqreturn_t mxs_ssp_irq_handler(int irq, void *dev_id)
+{
+	struct mxs_ssp *ssp = dev_id;
+	dev_err(ssp->dev, "%s[%i] CTRL1=%08x STATUS=%08x\n",
+		__func__, __LINE__,
+		readl(ssp->base + HW_SSP_CTRL1(ssp)),
+		readl(ssp->base + HW_SSP_STATUS(ssp)));
+	return IRQ_HANDLED;
+}
+
+static int mxs_spi_txrx_dma(struct mxs_spi *spi, int cs,
+			    unsigned char *buf, int len,
+			    int *first, int *last, int write)
+{
+	struct mxs_ssp *ssp = &spi->ssp;
+	struct dma_async_tx_descriptor *desc;
+	struct scatterlist sg[SG_NUM];
+	int sg_count;
+	uint32_t pio = BM_SSP_CTRL0_DATA_XFER | mxs_spi_cs_to_reg(cs);
+	int ret;
+
+	if (len > SG_NUM * SG_MAXLEN) {
+		dev_err(ssp->dev, "Data chunk too big for DMA\n");
+		return -EINVAL;
+	}
+
+	init_completion(&spi->c);
+
+	if (*first)
+		pio |= BM_SSP_CTRL0_LOCK_CS;
+	if (*last)
+		pio |= BM_SSP_CTRL0_IGNORE_CRC;
+	if (!write)
+		pio |= BM_SSP_CTRL0_READ;
+
+	if (ssp->devid == IMX23_SSP)
+		pio |= len;
+	else
+		writel(len, ssp->base + HW_SSP_XFER_SIZE);
+
+	/* Queue the PIO register write transfer. */
+	desc = dmaengine_prep_slave_sg(ssp->dmach,
+			(struct scatterlist *)&pio,
+			1, DMA_TRANS_NONE, 0);
+	if (!desc) {
+		dev_err(ssp->dev,
+			"Failed to get PIO reg. write descriptor.\n");
+		return -EINVAL;
+	}
+
+	/* Queue the DMA data transfer. */
+	sg_init_table(sg, (len / SG_MAXLEN) + 1);
+	sg_count = 0;
+	while (len) {
+		sg_set_buf(&sg[sg_count++], buf, min(len, SG_MAXLEN));
+		len -= min(len, SG_MAXLEN);
+		buf += min(len, SG_MAXLEN);
+	}
+	dma_map_sg(ssp->dev, sg, sg_count,
+		write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+
+	desc = dmaengine_prep_slave_sg(ssp->dmach, sg, sg_count,
+			write ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM,
+			DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+	if (!desc) {
+		dev_err(ssp->dev,
+			"Failed to get DMA data write descriptor.\n");
+		ret = -EINVAL;
+		goto err;
+	}
+
+	/*
+	 * The last descriptor must have this callback,
+	 * to finish the DMA transaction.
+	 */
+	desc->callback = mxs_ssp_dma_irq_callback;
+	desc->callback_param = spi;
+
+	/* Start the transfer. */
+	dmaengine_submit(desc);
+	dma_async_issue_pending(ssp->dmach);
+
+	ret = wait_for_completion_timeout(&spi->c,
+				msecs_to_jiffies(SSP_TIMEOUT));
+
+	if (!ret) {
+		dev_err(ssp->dev, "DMA transfer timeout\n");
+		ret = -ETIMEDOUT;
+		goto err;
+	}
+
+	ret = 0;
+
+err:
+	for (--sg_count; sg_count >= 0; sg_count--) {
+		dma_unmap_sg(ssp->dev, &sg[sg_count], 1,
+			write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+	}
+
+	return ret;
+}
+
 static int mxs_spi_txrx_pio(struct mxs_spi *spi, int cs,
 			    unsigned char *buf, int len,
 			    int *first, int *last, int write)
@@ -277,18 +390,48 @@  static int mxs_spi_transfer_one(struct spi_master *master,
 			first = 1;
 		if (&t->transfer_list == m->transfers.prev)
 			last = 1;
-		if (t->rx_buf && t->tx_buf) {
+		if ((t->rx_buf && t->tx_buf) || (t->rx_dma && t->tx_dma)) {
 			dev_err(ssp->dev,
 				"Cannot send and receive simultaneously\n");
 			return -EINVAL;
 		}
 
-		if (t->tx_buf)
-			status = mxs_spi_txrx_pio(spi, cs, (void *)t->tx_buf,
-					     t->len, &first, &last, 1);
-		if (t->rx_buf)
-			status = mxs_spi_txrx_pio(spi, cs, t->rx_buf,
-					     t->len, &first, &last, 0);
+		/*
+		 * Small blocks can be transfered via PIO.
+		 * Measured by empiric means:
+		 *
+		 * dd if=/dev/mtdblock0 of=/dev/null bs=1024k count=1
+		 *
+		 * DMA only: 2.164808 seconds, 473.0KB/s
+		 * Combined: 1.676276 seconds, 610.9KB/s
+		 */
+		if (t->len <= 256) {
+			writel(BM_SSP_CTRL1_DMA_ENABLE,
+				ssp->base + HW_SSP_CTRL1(ssp) +
+				STMP_OFFSET_REG_CLR);
+
+			if (t->tx_buf)
+				status = mxs_spi_txrx_pio(spi, cs,
+						(void *)t->tx_buf,
+						t->len, &first, &last, 1);
+			if (t->rx_buf)
+				status = mxs_spi_txrx_pio(spi, cs,
+						t->rx_buf, t->len,
+						&first, &last, 0);
+		} else {
+			writel(BM_SSP_CTRL1_DMA_ENABLE,
+				ssp->base + HW_SSP_CTRL1(ssp) +
+				STMP_OFFSET_REG_SET);
+
+			if (t->tx_buf)
+				status = mxs_spi_txrx_dma(spi, cs,
+						(void *)t->tx_buf, t->len,
+						&first, &last, 1);
+			if (t->rx_buf)
+				status = mxs_spi_txrx_dma(spi, cs,
+						t->rx_buf, t->len,
+						&first, &last, 0);
+		}
 
 		m->actual_length += t->len;
 		if (status)
@@ -303,6 +446,21 @@  static int mxs_spi_transfer_one(struct spi_master *master,
 	return status;
 }
 
+static bool mxs_ssp_dma_filter(struct dma_chan *chan, void *param)
+{
+	struct mxs_ssp *ssp = param;
+
+	if (!mxs_dma_is_apbh(chan))
+		return false;
+
+	if (chan->chan_id != ssp->dma_channel)
+		return false;
+
+	chan->private = &ssp->dma_data;
+
+	return true;
+}
+
 static const struct of_device_id mxs_spi_dt_ids[] = {
 	{ .compatible = "fsl,imx23-spi", .data = (void *) IMX23_SSP, },
 	{ .compatible = "fsl,imx28-spi", .data = (void *) IMX28_SSP, },
@@ -318,15 +476,18 @@  static int __devinit mxs_spi_probe(struct platform_device *pdev)
 	struct spi_master *master;
 	struct mxs_spi *spi;
 	struct mxs_ssp *ssp;
-	struct resource *iores;
+	struct resource *iores, *dmares;
 	struct pinctrl *pinctrl;
 	struct clk *clk;
 	void __iomem *base;
-	int devid;
-	int ret = 0;
+	int devid, dma_channel;
+	int ret = 0, irq_err, irq_dma;
+	dma_cap_mask_t mask;
 
 	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!iores)
+	irq_err = platform_get_irq(pdev, 0);
+	irq_dma = platform_get_irq(pdev, 1);
+	if (!iores || irq_err < 0 || irq_dma < 0)
 		return -EINVAL;
 
 	base = devm_request_and_ioremap(&pdev->dev, iores);
@@ -341,10 +502,26 @@  static int __devinit mxs_spi_probe(struct platform_device *pdev)
 	if (IS_ERR(clk))
 		return PTR_ERR(clk);
 
-	if (np)
+	if (np) {
 		devid = (enum mxs_ssp_id) of_id->data;
-	else
+		/*
+		 * TODO: This is a temporary solution and should be changed
+		 * to use generic DMA binding later when the helpers get in.
+		 */
+		ret = of_property_read_u32(np, "fsl,ssp-dma-channel",
+					   &dma_channel);
+		if (ret) {
+			dev_err(&pdev->dev,
+				"Failed to get DMA channel\n");
+			return -EINVAL;
+		}
+	} else {
+		dmares = platform_get_resource(pdev, IORESOURCE_DMA, 0);
+		if (!dmares)
+			return -EINVAL;
 		devid = pdev->id_entry->driver_data;
+		dma_channel = dmares->start;
+	}
 
 	master = spi_alloc_master(&pdev->dev, sizeof(*spi));
 	if (!master)
@@ -364,8 +541,28 @@  static int __devinit mxs_spi_probe(struct platform_device *pdev)
 	ssp->clk = clk;
 	ssp->base = base;
 	ssp->devid = devid;
+	ssp->dma_channel = dma_channel;
+
+	ret = devm_request_irq(&pdev->dev, irq_err, mxs_ssp_irq_handler, 0,
+			       DRIVER_NAME, ssp);
+	if (ret)
+		goto out_master_free;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+	ssp->dma_data.chan_irq = irq_dma;
+	ssp->dmach = dma_request_channel(mask, mxs_ssp_dma_filter, ssp);
+	if (!ssp->dmach) {
+		dev_err(ssp->dev, "Failed to request DMA\n");
+		goto out_master_free;
+	}
 
+	/*
+	 * Crank up the clock to 120MHz, this will be further divided onto a
+	 * proper speed.
+	 */
 	clk_prepare_enable(ssp->clk);
+	clk_set_rate(ssp->clk, 120 * 1000 * 1000);
 	ssp->clk_rate = clk_get_rate(ssp->clk) / 1000;
 
 	stmp_reset_block(ssp->base);
@@ -375,13 +572,15 @@  static int __devinit mxs_spi_probe(struct platform_device *pdev)
 	ret = spi_register_master(master);
 	if (ret) {
 		dev_err(&pdev->dev, "Cannot register SPI master, %d\n", ret);
-		goto out_master_free;
+		goto out_free_dma;
 	}
 
 	return 0;
 
-out_master_free:
+out_free_dma:
+	dma_release_channel(ssp->dmach);
 	clk_disable_unprepare(ssp->clk);
+out_master_free:
 	spi_master_put(master);
 	kfree(master);
 	return ret;