diff mbox series

spi: spi-sun6i: implement DMA-based transfer mode

Message ID 20201015154740.20825-1-al.kochet@gmail.com (mailing list archive)
State New, archived
Headers show
Series spi: spi-sun6i: implement DMA-based transfer mode | expand

Commit Message

Alexander Kochetkov Oct. 15, 2020, 3:47 p.m. UTC
DMA-based transfer will be enabled if data length is larger than FIFO size
(64 bytes for A64). This greatly reduce number of interrupts for
transferring data.

For smaller data size PIO mode will be used. In PIO mode whole buffer will
be loaded into FIFO.

If driver failed to request DMA channels then it fallback for PIO mode.

Tested on SOPINE (https://www.pine64.org/sopine/)

Signed-off-by: Alexander Kochetkov <al.kochet@gmail.com>
---
 drivers/spi/spi-sun6i.c | 171 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 159 insertions(+), 12 deletions(-)

Comments

Maxime Ripard Oct. 19, 2020, 8:21 a.m. UTC | #1
Hi!

On Thu, Oct 15, 2020 at 06:47:40PM +0300, Alexander Kochetkov wrote:
> DMA-based transfer will be enabled if data length is larger than FIFO size
> (64 bytes for A64). This greatly reduce number of interrupts for
> transferring data.
> 
> For smaller data size PIO mode will be used. In PIO mode whole buffer will
> be loaded into FIFO.
> 
> If driver failed to request DMA channels then it fallback for PIO mode.
> 
> Tested on SOPINE (https://www.pine64.org/sopine/)
> 
> Signed-off-by: Alexander Kochetkov <al.kochet@gmail.com>

Thanks for working on this, it's been a bit overdue

> ---
>  drivers/spi/spi-sun6i.c | 171 +++++++++++++++++++++++++++++++++++++---
>  1 file changed, 159 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/spi/spi-sun6i.c b/drivers/spi/spi-sun6i.c
> index 19238e1b76b4..38e5b8af7da6 100644
> --- a/drivers/spi/spi-sun6i.c
> +++ b/drivers/spi/spi-sun6i.c
> @@ -18,6 +18,7 @@
>  #include <linux/platform_device.h>
>  #include <linux/pm_runtime.h>
>  #include <linux/reset.h>
> +#include <linux/dmaengine.h>
>  
>  #include <linux/spi/spi.h>
>  
> @@ -52,10 +53,12 @@
>  
>  #define SUN6I_FIFO_CTL_REG		0x18
>  #define SUN6I_FIFO_CTL_RF_RDY_TRIG_LEVEL_MASK	0xff
> +#define SUN6I_FIFO_CTL_RF_DRQ_EN		BIT(8)
>  #define SUN6I_FIFO_CTL_RF_RDY_TRIG_LEVEL_BITS	0
>  #define SUN6I_FIFO_CTL_RF_RST			BIT(15)
>  #define SUN6I_FIFO_CTL_TF_ERQ_TRIG_LEVEL_MASK	0xff
>  #define SUN6I_FIFO_CTL_TF_ERQ_TRIG_LEVEL_BITS	16
> +#define SUN6I_FIFO_CTL_TF_DRQ_EN		BIT(24)
>  #define SUN6I_FIFO_CTL_TF_RST			BIT(31)
>  
>  #define SUN6I_FIFO_STA_REG		0x1c
> @@ -83,6 +86,8 @@
>  struct sun6i_spi {
>  	struct spi_master	*master;
>  	void __iomem		*base_addr;
> +	dma_addr_t		dma_addr_rx;
> +	dma_addr_t		dma_addr_tx;
>  	struct clk		*hclk;
>  	struct clk		*mclk;
>  	struct reset_control	*rstc;
> @@ -92,6 +97,7 @@ struct sun6i_spi {
>  	const u8		*tx_buf;
>  	u8			*rx_buf;
>  	int			len;
> +	bool			use_dma;
>  	unsigned long		fifo_depth;
>  };
>  
> @@ -182,6 +188,66 @@ static size_t sun6i_spi_max_transfer_size(struct spi_device *spi)
>  	return SUN6I_MAX_XFER_SIZE - 1;
>  }
>  
> +static int sun6i_spi_prepare_dma(struct sun6i_spi *sspi,
> +				 struct spi_transfer *tfr)
> +{
> +	struct dma_async_tx_descriptor *rxdesc, *txdesc;
> +	struct spi_master *master = sspi->master;
> +
> +	rxdesc = NULL;
> +	if (tfr->rx_buf) {
> +		struct dma_slave_config rxconf = {
> +			.direction = DMA_DEV_TO_MEM,
> +			.src_addr = sspi->dma_addr_rx,
> +			.src_addr_width = 1,
> +			.src_maxburst = 1,
> +		};

That doesn't really look optimal, the controller seems to be able to
read / write 32 bits at a time from its FIFO and we probably can
increase the burst length too?

> +
> +		dmaengine_slave_config(master->dma_rx, &rxconf);
> +
> +		rxdesc = dmaengine_prep_slave_sg(
> +				master->dma_rx,
> +				tfr->rx_sg.sgl, tfr->rx_sg.nents,
> +				DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT);

checkpatch --strict complains about this one (your line shouldn't end
with a parenthesis).

> +		if (!rxdesc)
> +			return -EINVAL;
> +	}
> +
> +	txdesc = NULL;
> +	if (tfr->tx_buf) {
> +		struct dma_slave_config txconf = {
> +			.direction = DMA_MEM_TO_DEV,
> +			.dst_addr = sspi->dma_addr_tx,
> +			.dst_addr_width = 1,
> +			.dst_maxburst = 1,
> +		};
> +
> +		dmaengine_slave_config(master->dma_tx, &txconf);
> +
> +		txdesc = dmaengine_prep_slave_sg(
> +				master->dma_tx,
> +				tfr->tx_sg.sgl, tfr->tx_sg.nents,
> +				DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT);
> +		if (!txdesc) {
> +			if (rxdesc)
> +				dmaengine_terminate_sync(master->dma_rx);
> +			return -EINVAL;
> +		}
> +	}
> +
> +	if (tfr->rx_buf) {
> +		dmaengine_submit(rxdesc);
> +		dma_async_issue_pending(master->dma_rx);
> +	}
> +
> +	if (tfr->tx_buf) {
> +		dmaengine_submit(txdesc);
> +		dma_async_issue_pending(master->dma_tx);
> +	}
> +
> +	return 0;
> +}
> +
>  static int sun6i_spi_transfer_one(struct spi_master *master,
>  				  struct spi_device *spi,
>  				  struct spi_transfer *tfr)
> @@ -201,6 +267,8 @@ static int sun6i_spi_transfer_one(struct spi_master *master,
>  	sspi->tx_buf = tfr->tx_buf;
>  	sspi->rx_buf = tfr->rx_buf;
>  	sspi->len = tfr->len;
> +	sspi->use_dma = master->can_dma ?
> +			master->can_dma(master, spi, tfr) : false;
>  
>  	/* Clear pending interrupts */
>  	sun6i_spi_write(sspi, SUN6I_INT_STA_REG, ~0);
> @@ -216,9 +284,17 @@ static int sun6i_spi_transfer_one(struct spi_master *master,
>  	 * (See spi-sun4i.c)
>  	 */
>  	trig_level = sspi->fifo_depth / 4 * 3;
> -	sun6i_spi_write(sspi, SUN6I_FIFO_CTL_REG,
> -			(trig_level << SUN6I_FIFO_CTL_RF_RDY_TRIG_LEVEL_BITS) |
> -			(trig_level << SUN6I_FIFO_CTL_TF_ERQ_TRIG_LEVEL_BITS));
> +	reg = (trig_level << SUN6I_FIFO_CTL_RF_RDY_TRIG_LEVEL_BITS) |
> +	      (trig_level << SUN6I_FIFO_CTL_TF_ERQ_TRIG_LEVEL_BITS);
> +
> +	if (sspi->use_dma) {
> +		if (tfr->tx_buf)
> +			reg |= SUN6I_FIFO_CTL_TF_DRQ_EN;
> +		if (tfr->rx_buf)
> +			reg |= SUN6I_FIFO_CTL_RF_DRQ_EN;
> +	}
> +
> +	sun6i_spi_write(sspi, SUN6I_FIFO_CTL_REG, reg);
>  
>  	/*
>  	 * Setup the transfer control register: Chip Select,
> @@ -300,16 +376,28 @@ static int sun6i_spi_transfer_one(struct spi_master *master,
>  	sun6i_spi_write(sspi, SUN6I_XMIT_CNT_REG, tx_len);
>  	sun6i_spi_write(sspi, SUN6I_BURST_CTL_CNT_REG, tx_len);
>  
> -	/* Fill the TX FIFO */
> -	sun6i_spi_fill_fifo(sspi);
> +	if (!sspi->use_dma) {
> +		/* Fill the TX FIFO */
> +		sun6i_spi_fill_fifo(sspi);
> +	} else {
> +		ret = sun6i_spi_prepare_dma(sspi, tfr);
> +		if (ret) {
> +			dev_warn(&master->dev,
> +				 "%s: prepare DMA failed, ret=%d",
> +				 dev_name(&spi->dev), ret);
> +			return ret;
> +		}
> +	}
>  
>  	/* Enable the interrupts */
>  	reg = SUN6I_INT_CTL_TC;
>  
> -	if (rx_len > sspi->fifo_depth)
> -		reg |= SUN6I_INT_CTL_RF_RDY;
> -	if (tx_len > sspi->fifo_depth)
> -		reg |= SUN6I_INT_CTL_TF_ERQ;
> +	if (!sspi->use_dma) {
> +		if (rx_len > sspi->fifo_depth)
> +			reg |= SUN6I_INT_CTL_RF_RDY;
> +		if (tx_len > sspi->fifo_depth)
> +			reg |= SUN6I_INT_CTL_TF_ERQ;
> +	}
>  
>  	sun6i_spi_write(sspi, SUN6I_INT_CTL_REG, reg);
>  
> @@ -332,6 +420,11 @@ static int sun6i_spi_transfer_one(struct spi_master *master,
>  
>  	sun6i_spi_write(sspi, SUN6I_INT_CTL_REG, 0);
>  
> +	if (ret && sspi->use_dma) {
> +		dmaengine_terminate_sync(master->dma_rx);
> +		dmaengine_terminate_sync(master->dma_tx);
> +	}
> +
>  	return ret;
>  }
>  
> @@ -343,7 +436,8 @@ static irqreturn_t sun6i_spi_handler(int irq, void *dev_id)
>  	/* Transfer complete */
>  	if (status & SUN6I_INT_CTL_TC) {
>  		sun6i_spi_write(sspi, SUN6I_INT_STA_REG, SUN6I_INT_CTL_TC);
> -		sun6i_spi_drain_fifo(sspi);
> +		if (!sspi->use_dma)
> +			sun6i_spi_drain_fifo(sspi);

Is it causing any issue? The FIFO will be drained only if there's
something remaining in the FIFO, which shouldn't happen with DMA?

Maxime
Alexander Kochetkov Oct. 19, 2020, 1:17 p.m. UTC | #2
Hi, Maxime! Thanks for reviewing patches!


>> 
>> +static int sun6i_spi_prepare_dma(struct sun6i_spi *sspi,
>> +				 struct spi_transfer *tfr)
>> +{
>> +	struct dma_async_tx_descriptor *rxdesc, *txdesc;
>> +	struct spi_master *master = sspi->master;
>> +
>> +	rxdesc = NULL;
>> +	if (tfr->rx_buf) {
>> +		struct dma_slave_config rxconf = {
>> +			.direction = DMA_DEV_TO_MEM,
>> +			.src_addr = sspi->dma_addr_rx,
>> +			.src_addr_width = 1,
>> +			.src_maxburst = 1,
>> +		};
> 
> That doesn't really look optimal, the controller seems to be able to
> read / write 32 bits at a time from its FIFO and we probably can
> increase the burst length too?


I had doubts if it would work. I didn’t know will DMA work for transfers with lengths not
aligned to 32 bits. For example, if we init DMA with src_addr_width = 1 and
.src_maxburst = 8 will DMA work for transfer with length 11? I expect that DMA fill FIFO
with 16 bytes and SPI transfer only 11 bytes and 5 bytes will leave in TX fifo.  I did the test
and there is no additional data left in the fifo buffer. Also at reception the is no memory
overwrites.

I made test with src_addr_width = 4, src_maxburst = 1 and transfer length 3. Looks
like in that case DMA doesn’t issue 4 bytes transfer.

For test with src_addr_width = 4, src_maxburst = 8 I had to adjust RF_RDY_TRIG_LEVEL_BITS
and TF_ERQ_TRIG_LEVEL_BITS of FIFO_CTL_REG to half of FIFO (32 bytes). With the config
DMA will transfer burst of half of FIFO length during transfer and remaining bytes at the end of
transfer.


>> 
>> @@ -343,7 +436,8 @@ static irqreturn_t sun6i_spi_handler(int irq, void *dev_id)
>> 	/* Transfer complete */
>> 	if (status & SUN6I_INT_CTL_TC) {
>> 		sun6i_spi_write(sspi, SUN6I_INT_STA_REG, SUN6I_INT_CTL_TC);
>> -		sun6i_spi_drain_fifo(sspi);
>> +		if (!sspi->use_dma)
>> +			sun6i_spi_drain_fifo(sspi);
> 
> Is it causing any issue? The FIFO will be drained only if there's
> something remaining in the FIFO, which shouldn't happen with DMA?
> 

No. It’s for make code patch explicit.
Remove the change?

Alexander.
Alexander Kochetkov Oct. 19, 2020, 5:43 p.m. UTC | #3
> 19 окт. 2020 г., в 11:21, Maxime Ripard <maxime@cerno.tech> написал(а):
> 
> Hi!
> 
> On Thu, Oct 15, 2020 at 06:47:40PM +0300, Alexander Kochetkov wrote:
>> DMA-based transfer will be enabled if data length is larger than FIFO size
>> (64 bytes for A64). This greatly reduce number of interrupts for
>> transferring data.
>> 
>> For smaller data size PIO mode will be used. In PIO mode whole buffer will
>> be loaded into FIFO.
>> 
>> If driver failed to request DMA channels then it fallback for PIO mode.
>> 
>> Tested on SOPINE (https://www.pine64.org/sopine/)
>> 
>> Signed-off-by: Alexander Kochetkov <al.kochet@gmail.com>
> 
> Thanks for working on this, it's been a bit overdue

Hi, Maxime!

We did custom A64 based computation module for our product.
Do you mean that A64 is obsolete or EOL product?
If so, can you recommend active replacement for A64 from Allwinner same price?

Alexander
Chen-Yu Tsai Oct. 20, 2020, 3:52 a.m. UTC | #4
On Tue, Oct 20, 2020 at 1:43 AM Alexander Kochetkov <al.kochet@gmail.com> wrote:
>
>
>
> > 19 окт. 2020 г., в 11:21, Maxime Ripard <maxime@cerno.tech> написал(а):
> >
> > Hi!
> >
> > On Thu, Oct 15, 2020 at 06:47:40PM +0300, Alexander Kochetkov wrote:
> >> DMA-based transfer will be enabled if data length is larger than FIFO size
> >> (64 bytes for A64). This greatly reduce number of interrupts for
> >> transferring data.
> >>
> >> For smaller data size PIO mode will be used. In PIO mode whole buffer will
> >> be loaded into FIFO.
> >>
> >> If driver failed to request DMA channels then it fallback for PIO mode.
> >>
> >> Tested on SOPINE (https://www.pine64.org/sopine/)
> >>
> >> Signed-off-by: Alexander Kochetkov <al.kochet@gmail.com>
> >
> > Thanks for working on this, it's been a bit overdue
>
> Hi, Maxime!
>
> We did custom A64 based computation module for our product.
> Do you mean that A64 is obsolete or EOL product?
> If so, can you recommend active replacement for A64 from Allwinner same price?

I believe what Maxime meant was that DMA transfer for SPI is a long
sought-after feature, but no one had finished it.

ChenYu
Maxime Ripard Oct. 21, 2020, 3:03 p.m. UTC | #5
On Tue, Oct 20, 2020 at 11:52:34AM +0800, Chen-Yu Tsai wrote:
> On Tue, Oct 20, 2020 at 1:43 AM Alexander Kochetkov <al.kochet@gmail.com> wrote:
> >
> >
> >
> > > 19 окт. 2020 г., в 11:21, Maxime Ripard <maxime@cerno.tech> написал(а):
> > >
> > > Hi!
> > >
> > > On Thu, Oct 15, 2020 at 06:47:40PM +0300, Alexander Kochetkov wrote:
> > >> DMA-based transfer will be enabled if data length is larger than FIFO size
> > >> (64 bytes for A64). This greatly reduce number of interrupts for
> > >> transferring data.
> > >>
> > >> For smaller data size PIO mode will be used. In PIO mode whole buffer will
> > >> be loaded into FIFO.
> > >>
> > >> If driver failed to request DMA channels then it fallback for PIO mode.
> > >>
> > >> Tested on SOPINE (https://www.pine64.org/sopine/)
> > >>
> > >> Signed-off-by: Alexander Kochetkov <al.kochet@gmail.com>
> > >
> > > Thanks for working on this, it's been a bit overdue
> >
> > Hi, Maxime!
> >
> > We did custom A64 based computation module for our product.
> > Do you mean that A64 is obsolete or EOL product?
> > If so, can you recommend active replacement for A64 from Allwinner same price?
> 
> I believe what Maxime meant was that DMA transfer for SPI is a long
> sought-after feature, but no one had finished it.

Yeah, that's what I meant :)

Maxime
Maxime Ripard Oct. 21, 2020, 4:39 p.m. UTC | #6
Hi,

On Mon, Oct 19, 2020 at 04:17:18PM +0300, Alexander Kochetkov wrote:
> >> +static int sun6i_spi_prepare_dma(struct sun6i_spi *sspi,
> >> +				 struct spi_transfer *tfr)
> >> +{
> >> +	struct dma_async_tx_descriptor *rxdesc, *txdesc;
> >> +	struct spi_master *master = sspi->master;
> >> +
> >> +	rxdesc = NULL;
> >> +	if (tfr->rx_buf) {
> >> +		struct dma_slave_config rxconf = {
> >> +			.direction = DMA_DEV_TO_MEM,
> >> +			.src_addr = sspi->dma_addr_rx,
> >> +			.src_addr_width = 1,
> >> +			.src_maxburst = 1,
> >> +		};
> > 
> > That doesn't really look optimal, the controller seems to be able to
> > read / write 32 bits at a time from its FIFO and we probably can
> > increase the burst length too?
> 
> 
> I had doubts if it would work. I didn’t know will DMA work for
> transfers with lengths not aligned to 32 bits. For example, if we init
> DMA with src_addr_width = 1 and .src_maxburst = 8 will DMA work for
> transfer with length 11?

Bursts are usually defined by how many transfers the controller is
allowed to do at once, so it shouldn't cause any harm if the length
isn't aligned, it will just do less than the maximum number allowed.

Whether or not the hardware agrees to that definition is something else
though, but from experience it should

> I expect that DMA fill FIFO with 16 bytes and
> SPI transfer only 11 bytes and 5 bytes will leave in TX fifo. I did
> the test and there is no additional data left in the fifo buffer. Also
> at reception the is no memory overwrites.
> 
> I made test with src_addr_width = 4, src_maxburst = 1 and transfer
> length 3. Looks like in that case DMA doesn’t issue 4 bytes transfer.
> 
> For test with src_addr_width = 4, src_maxburst = 8 I had to adjust
> RF_RDY_TRIG_LEVEL_BITS and TF_ERQ_TRIG_LEVEL_BITS of FIFO_CTL_REG to
> half of FIFO (32 bytes). With the config DMA will transfer burst of
> half of FIFO length during transfer and remaining bytes at the end of
> transfer.

Yeah, that might need some tuning. With the width, I guess we should pay
attention to the order the bytes are sent in, but it can be done later.

> >> 
> >> @@ -343,7 +436,8 @@ static irqreturn_t sun6i_spi_handler(int irq, void *dev_id)
> >> 	/* Transfer complete */
> >> 	if (status & SUN6I_INT_CTL_TC) {
> >> 		sun6i_spi_write(sspi, SUN6I_INT_STA_REG, SUN6I_INT_CTL_TC);
> >> -		sun6i_spi_drain_fifo(sspi);
> >> +		if (!sspi->use_dma)
> >> +			sun6i_spi_drain_fifo(sspi);
> > 
> > Is it causing any issue? The FIFO will be drained only if there's
> > something remaining in the FIFO, which shouldn't happen with DMA?
> > 
> 
> No. It’s for make code patch explicit.
> Remove the change?

Yes, that also simplifies the driver since we don't have to rely on the
boolean in the main structure anymore

Maxime
diff mbox series

Patch

diff --git a/drivers/spi/spi-sun6i.c b/drivers/spi/spi-sun6i.c
index 19238e1b76b4..38e5b8af7da6 100644
--- a/drivers/spi/spi-sun6i.c
+++ b/drivers/spi/spi-sun6i.c
@@ -18,6 +18,7 @@ 
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
+#include <linux/dmaengine.h>
 
 #include <linux/spi/spi.h>
 
@@ -52,10 +53,12 @@ 
 
 #define SUN6I_FIFO_CTL_REG		0x18
 #define SUN6I_FIFO_CTL_RF_RDY_TRIG_LEVEL_MASK	0xff
+#define SUN6I_FIFO_CTL_RF_DRQ_EN		BIT(8)
 #define SUN6I_FIFO_CTL_RF_RDY_TRIG_LEVEL_BITS	0
 #define SUN6I_FIFO_CTL_RF_RST			BIT(15)
 #define SUN6I_FIFO_CTL_TF_ERQ_TRIG_LEVEL_MASK	0xff
 #define SUN6I_FIFO_CTL_TF_ERQ_TRIG_LEVEL_BITS	16
+#define SUN6I_FIFO_CTL_TF_DRQ_EN		BIT(24)
 #define SUN6I_FIFO_CTL_TF_RST			BIT(31)
 
 #define SUN6I_FIFO_STA_REG		0x1c
@@ -83,6 +86,8 @@ 
 struct sun6i_spi {
 	struct spi_master	*master;
 	void __iomem		*base_addr;
+	dma_addr_t		dma_addr_rx;
+	dma_addr_t		dma_addr_tx;
 	struct clk		*hclk;
 	struct clk		*mclk;
 	struct reset_control	*rstc;
@@ -92,6 +97,7 @@  struct sun6i_spi {
 	const u8		*tx_buf;
 	u8			*rx_buf;
 	int			len;
+	bool			use_dma;
 	unsigned long		fifo_depth;
 };
 
@@ -182,6 +188,66 @@  static size_t sun6i_spi_max_transfer_size(struct spi_device *spi)
 	return SUN6I_MAX_XFER_SIZE - 1;
 }
 
+static int sun6i_spi_prepare_dma(struct sun6i_spi *sspi,
+				 struct spi_transfer *tfr)
+{
+	struct dma_async_tx_descriptor *rxdesc, *txdesc;
+	struct spi_master *master = sspi->master;
+
+	rxdesc = NULL;
+	if (tfr->rx_buf) {
+		struct dma_slave_config rxconf = {
+			.direction = DMA_DEV_TO_MEM,
+			.src_addr = sspi->dma_addr_rx,
+			.src_addr_width = 1,
+			.src_maxburst = 1,
+		};
+
+		dmaengine_slave_config(master->dma_rx, &rxconf);
+
+		rxdesc = dmaengine_prep_slave_sg(
+				master->dma_rx,
+				tfr->rx_sg.sgl, tfr->rx_sg.nents,
+				DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT);
+		if (!rxdesc)
+			return -EINVAL;
+	}
+
+	txdesc = NULL;
+	if (tfr->tx_buf) {
+		struct dma_slave_config txconf = {
+			.direction = DMA_MEM_TO_DEV,
+			.dst_addr = sspi->dma_addr_tx,
+			.dst_addr_width = 1,
+			.dst_maxburst = 1,
+		};
+
+		dmaengine_slave_config(master->dma_tx, &txconf);
+
+		txdesc = dmaengine_prep_slave_sg(
+				master->dma_tx,
+				tfr->tx_sg.sgl, tfr->tx_sg.nents,
+				DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT);
+		if (!txdesc) {
+			if (rxdesc)
+				dmaengine_terminate_sync(master->dma_rx);
+			return -EINVAL;
+		}
+	}
+
+	if (tfr->rx_buf) {
+		dmaengine_submit(rxdesc);
+		dma_async_issue_pending(master->dma_rx);
+	}
+
+	if (tfr->tx_buf) {
+		dmaengine_submit(txdesc);
+		dma_async_issue_pending(master->dma_tx);
+	}
+
+	return 0;
+}
+
 static int sun6i_spi_transfer_one(struct spi_master *master,
 				  struct spi_device *spi,
 				  struct spi_transfer *tfr)
@@ -201,6 +267,8 @@  static int sun6i_spi_transfer_one(struct spi_master *master,
 	sspi->tx_buf = tfr->tx_buf;
 	sspi->rx_buf = tfr->rx_buf;
 	sspi->len = tfr->len;
+	sspi->use_dma = master->can_dma ?
+			master->can_dma(master, spi, tfr) : false;
 
 	/* Clear pending interrupts */
 	sun6i_spi_write(sspi, SUN6I_INT_STA_REG, ~0);
@@ -216,9 +284,17 @@  static int sun6i_spi_transfer_one(struct spi_master *master,
 	 * (See spi-sun4i.c)
 	 */
 	trig_level = sspi->fifo_depth / 4 * 3;
-	sun6i_spi_write(sspi, SUN6I_FIFO_CTL_REG,
-			(trig_level << SUN6I_FIFO_CTL_RF_RDY_TRIG_LEVEL_BITS) |
-			(trig_level << SUN6I_FIFO_CTL_TF_ERQ_TRIG_LEVEL_BITS));
+	reg = (trig_level << SUN6I_FIFO_CTL_RF_RDY_TRIG_LEVEL_BITS) |
+	      (trig_level << SUN6I_FIFO_CTL_TF_ERQ_TRIG_LEVEL_BITS);
+
+	if (sspi->use_dma) {
+		if (tfr->tx_buf)
+			reg |= SUN6I_FIFO_CTL_TF_DRQ_EN;
+		if (tfr->rx_buf)
+			reg |= SUN6I_FIFO_CTL_RF_DRQ_EN;
+	}
+
+	sun6i_spi_write(sspi, SUN6I_FIFO_CTL_REG, reg);
 
 	/*
 	 * Setup the transfer control register: Chip Select,
@@ -300,16 +376,28 @@  static int sun6i_spi_transfer_one(struct spi_master *master,
 	sun6i_spi_write(sspi, SUN6I_XMIT_CNT_REG, tx_len);
 	sun6i_spi_write(sspi, SUN6I_BURST_CTL_CNT_REG, tx_len);
 
-	/* Fill the TX FIFO */
-	sun6i_spi_fill_fifo(sspi);
+	if (!sspi->use_dma) {
+		/* Fill the TX FIFO */
+		sun6i_spi_fill_fifo(sspi);
+	} else {
+		ret = sun6i_spi_prepare_dma(sspi, tfr);
+		if (ret) {
+			dev_warn(&master->dev,
+				 "%s: prepare DMA failed, ret=%d",
+				 dev_name(&spi->dev), ret);
+			return ret;
+		}
+	}
 
 	/* Enable the interrupts */
 	reg = SUN6I_INT_CTL_TC;
 
-	if (rx_len > sspi->fifo_depth)
-		reg |= SUN6I_INT_CTL_RF_RDY;
-	if (tx_len > sspi->fifo_depth)
-		reg |= SUN6I_INT_CTL_TF_ERQ;
+	if (!sspi->use_dma) {
+		if (rx_len > sspi->fifo_depth)
+			reg |= SUN6I_INT_CTL_RF_RDY;
+		if (tx_len > sspi->fifo_depth)
+			reg |= SUN6I_INT_CTL_TF_ERQ;
+	}
 
 	sun6i_spi_write(sspi, SUN6I_INT_CTL_REG, reg);
 
@@ -332,6 +420,11 @@  static int sun6i_spi_transfer_one(struct spi_master *master,
 
 	sun6i_spi_write(sspi, SUN6I_INT_CTL_REG, 0);
 
+	if (ret && sspi->use_dma) {
+		dmaengine_terminate_sync(master->dma_rx);
+		dmaengine_terminate_sync(master->dma_tx);
+	}
+
 	return ret;
 }
 
@@ -343,7 +436,8 @@  static irqreturn_t sun6i_spi_handler(int irq, void *dev_id)
 	/* Transfer complete */
 	if (status & SUN6I_INT_CTL_TC) {
 		sun6i_spi_write(sspi, SUN6I_INT_STA_REG, SUN6I_INT_CTL_TC);
-		sun6i_spi_drain_fifo(sspi);
+		if (!sspi->use_dma)
+			sun6i_spi_drain_fifo(sspi);
 		complete(&sspi->done);
 		return IRQ_HANDLED;
 	}
@@ -422,10 +516,24 @@  static int sun6i_spi_runtime_suspend(struct device *dev)
 	return 0;
 }
 
+static bool sun6i_spi_can_dma(struct spi_master *master,
+				 struct spi_device *spi,
+				 struct spi_transfer *xfer)
+{
+	struct sun6i_spi *sspi = spi_master_get_devdata(master);
+
+	/* if the number of spi words to transfer is less or equal than
+	 * the fifo length we can just fill the fifo and wait for a single
+	 * irq, so don't bother setting up dma
+	 */
+	return xfer->len > sspi->fifo_depth;
+}
+
 static int sun6i_spi_probe(struct platform_device *pdev)
 {
 	struct spi_master *master;
 	struct sun6i_spi *sspi;
+	struct resource *mem;
 	int ret = 0, irq;
 
 	master = spi_alloc_master(&pdev->dev, sizeof(struct sun6i_spi));
@@ -437,7 +545,7 @@  static int sun6i_spi_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, master);
 	sspi = spi_master_get_devdata(master);
 
-	sspi->base_addr = devm_platform_ioremap_resource(pdev, 0);
+	sspi->base_addr = devm_platform_get_and_ioremap_resource(pdev, 0, &mem);
 	if (IS_ERR(sspi->base_addr)) {
 		ret = PTR_ERR(sspi->base_addr);
 		goto err_free_master;
@@ -494,6 +602,33 @@  static int sun6i_spi_probe(struct platform_device *pdev)
 		goto err_free_master;
 	}
 
+	master->dma_tx = dma_request_chan(&pdev->dev, "tx");
+	if (IS_ERR(master->dma_tx)) {
+		/* Check tx to see if we need defer probing driver */
+		if (PTR_ERR(master->dma_tx) == -EPROBE_DEFER) {
+			ret = -EPROBE_DEFER;
+			goto err_free_master;
+		}
+		dev_warn(&pdev->dev, "Failed to request TX DMA channel\n");
+		master->dma_tx = NULL;
+	}
+
+	master->dma_rx = dma_request_chan(&pdev->dev, "rx");
+	if (IS_ERR(master->dma_rx)) {
+		if (PTR_ERR(master->dma_rx) == -EPROBE_DEFER) {
+			ret = -EPROBE_DEFER;
+			goto err_free_dma_tx;
+		}
+		dev_warn(&pdev->dev, "Failed to request RX DMA channel\n");
+		master->dma_rx = NULL;
+	}
+
+	if (master->dma_tx && master->dma_rx) {
+		sspi->dma_addr_tx = mem->start + SUN6I_TXDATA_REG;
+		sspi->dma_addr_rx = mem->start + SUN6I_RXDATA_REG;
+		master->can_dma = sun6i_spi_can_dma;
+	}
+
 	/*
 	 * This wake-up/shutdown pattern is to be able to have the
 	 * device woken up, even if runtime_pm is disabled
@@ -501,7 +636,7 @@  static int sun6i_spi_probe(struct platform_device *pdev)
 	ret = sun6i_spi_runtime_resume(&pdev->dev);
 	if (ret) {
 		dev_err(&pdev->dev, "Couldn't resume the device\n");
-		goto err_free_master;
+		goto err_free_dma_rx;
 	}
 
 	pm_runtime_set_active(&pdev->dev);
@@ -519,6 +654,12 @@  static int sun6i_spi_probe(struct platform_device *pdev)
 err_pm_disable:
 	pm_runtime_disable(&pdev->dev);
 	sun6i_spi_runtime_suspend(&pdev->dev);
+err_free_dma_rx:
+	if (master->dma_rx)
+		dma_release_channel(master->dma_rx);
+err_free_dma_tx:
+	if (master->dma_tx)
+		dma_release_channel(master->dma_tx);
 err_free_master:
 	spi_master_put(master);
 	return ret;
@@ -526,8 +667,14 @@  static int sun6i_spi_probe(struct platform_device *pdev)
 
 static int sun6i_spi_remove(struct platform_device *pdev)
 {
+	struct spi_master *master = platform_get_drvdata(pdev);
+
 	pm_runtime_force_suspend(&pdev->dev);
 
+	if (master->dma_tx)
+		dma_release_channel(master->dma_tx);
+	if (master->dma_rx)
+		dma_release_channel(master->dma_rx);
 	return 0;
 }