diff mbox

[4/7] dmaengine: rcar-dmac: Add Renesas R-Car Gen2 DMA Controller (DMAC) driver

Message ID 1405455522-20676-5-git-send-email-laurent.pinchart+renesas@ideasonboard.com (mailing list archive)
State Superseded
Delegated to: Vinod Koul
Headers show

Commit Message

Laurent Pinchart July 15, 2014, 8:18 p.m. UTC
The DMAC is a general purpose multi-channel DMA controller that supports
both slave and memcpy transfers.

The driver currently supports the DMAC found in the r8a7790 and r8a7791
SoCs. Support for compatible DMA controllers (such as the audio DMAC)
will be added later.

Feature-wise, automatic hardware handling of descriptors chains isn't
supported yet. LPAE support is implemented.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
---
 drivers/dma/sh/Kconfig     |    8 +
 drivers/dma/sh/Makefile    |    1 +
 drivers/dma/sh/rcar-dmac.c | 1514 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 1523 insertions(+)
 create mode 100644 drivers/dma/sh/rcar-dmac.c

Comments

Kuninori Morimoto July 16, 2014, 1:33 a.m. UTC | #1
Hi Laurent

Thank you for your patch

> The DMAC is a general purpose multi-channel DMA controller that supports
> both slave and memcpy transfers.
> 
> The driver currently supports the DMAC found in the r8a7790 and r8a7791
> SoCs. Support for compatible DMA controllers (such as the audio DMAC)
> will be added later.
> 
> Feature-wise, automatic hardware handling of descriptors chains isn't
> supported yet. LPAE support is implemented.
> 
> Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
> ---
(snip)
> diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
> new file mode 100644
> index 0000000..9cb1dd3
> --- /dev/null
> +++ b/drivers/dma/sh/rcar-dmac.c
> @@ -0,0 +1,1514 @@
> +/*
> + * Renesas R-Car Gen2 DMA Controller Driver

We can reuse this driver in Gen3 too ?
"Renesas R-Car" only is very enough.

> +static void rcar_dmac_chan_start_xfer(struct rcar_dmac_chan *chan)
> +{
> +	struct rcar_dmac_desc *desc = chan->desc.running;
> +	struct rcar_dmac_hw_desc *hwdesc = desc->running;
> +
> +	dev_dbg(chan->chan.device->dev,
> +		"chan%u: queue hwdesc %p: %u@%pad -> %pad\n",
> +		chan->index, hwdesc, hwdesc->size, &hwdesc->src_addr,
> +		&hwdesc->dst_addr);
> +
> +	WARN_ON_ONCE(rcar_dmac_chan_is_busy(chan));
> +
> +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
> +	rcar_dmac_chan_write(chan, RCAR_DMAFIXSAR, hwdesc->src_addr >> 32);
> +	rcar_dmac_chan_write(chan, RCAR_DMAFIXDAR, hwdesc->dst_addr >> 32);
> +#endif
> +	rcar_dmac_chan_write(chan, RCAR_DMASAR, hwdesc->src_addr & 0xffffffff);
> +	rcar_dmac_chan_write(chan, RCAR_DMADAR, hwdesc->dst_addr & 0xffffffff);
> +
> +	if (chan->mid_rid >= 0)
> +		rcar_dmac_chan_write(chan, RCAR_DMARS, chan->mid_rid);

I guess (chan->mid_rid < 0) case should be error

> +
> +	rcar_dmac_chan_write(chan, RCAR_DMATCR,
> +			     hwdesc->size >> desc->xfer_shift);
> +
> +	rcar_dmac_chan_write(chan, RCAR_DMACHCR, desc->chcr | RCAR_DMACHCR_DE |
> +			     RCAR_DMACHCR_IE);
> +}
(snip)
> +static dma_cookie_t rcar_dmac_tx_submit(struct dma_async_tx_descriptor *tx)
> +{
> +	struct rcar_dmac_chan *chan = to_rcar_dmac_chan(tx->chan);
> +	struct rcar_dmac_desc *desc = to_rcar_dmac_desc(tx);
> +	unsigned long flags;
> +	dma_cookie_t cookie;
> +	bool resume;
> +
> +	mutex_lock(&chan->power_lock);
> +
> +	spin_lock_irqsave(&chan->lock, flags);
> +
> +	cookie = dma_cookie_assign(tx);
> +
> +	dev_dbg(chan->chan.device->dev, "chan%u: submit #%d@%p\n",
> +		chan->index, tx->cookie, desc);
> +
> +	list_add_tail(&desc->node, &chan->desc.pending);
> +	desc->running = list_first_entry(&desc->hwdescs,
> +					 struct rcar_dmac_hw_desc, node);
> +
> +	/* Resume the device when submitting the first descriptor. */
> +	resume = chan->desc.submitted++ == 0;
> +
> +	spin_unlock_irqrestore(&chan->lock, flags);
> +
> +	if (resume)
> +		pm_runtime_get_sync(chan->chan.device->dev);

pm_runtime_xxx itself has inside counter ?

> +
> +	mutex_unlock(&chan->power_lock);
> +
> +	return cookie;
> +}
(snip)
> +static int rcar_dmac_hw_desc_alloc(struct rcar_dmac_chan *chan)
> +{
> +	struct rcar_dmac_desc_page *page;
> +	LIST_HEAD(list);
> +	unsigned int i;
> +
> +	page = (void *)get_zeroed_page(GFP_KERNEL);
> +	if (page == NULL)
> +		return -ENOMEM;

I like "if (!page)"

> +
> +	for (i = 0; i < RCAR_DMAC_HWDESCS_PER_PAGE; ++i) {
> +		struct rcar_dmac_hw_desc *hwdesc = &page->hwdescs[i];
> +
> +		list_add_tail(&hwdesc->node, &list);
> +	}
> +
> +	spin_lock_irq(&chan->lock);
> +	list_splice_tail(&list, &chan->desc.hw_free);
> +	list_add_tail(&page->node, &chan->desc.pages);
> +	spin_unlock_irq(&chan->lock);
> +
> +	return 0;
> +}
(snip)
> +static void rcar_dmac_chan_configure_desc(struct rcar_dmac_chan *chan,
> +					  struct rcar_dmac_desc *desc)
> +{
> +	static const u32 chcr_ts[] = {
> +		RCAR_DMACHCR_TS_1B, RCAR_DMACHCR_TS_2B,
> +		RCAR_DMACHCR_TS_4B, RCAR_DMACHCR_TS_8B,
> +		RCAR_DMACHCR_TS_16B, RCAR_DMACHCR_TS_32B,
> +		RCAR_DMACHCR_TS_64B,
> +	};
> +
> +	unsigned int xfer_size;
> +	u32 chcr;
> +
> +	switch (desc->direction) {
> +	case DMA_DEV_TO_MEM:
> +		chcr = RCAR_DMACHCR_DM_INC | RCAR_DMACHCR_SM_FIXED
> +		     | RCAR_DMACHCR_RS_DMARS;
> +		xfer_size = chan->xfer_size;
> +		break;
> +
> +	case DMA_MEM_TO_DEV:
> +		chcr = RCAR_DMACHCR_DM_FIXED | RCAR_DMACHCR_SM_INC
> +		     | RCAR_DMACHCR_RS_DMARS;
> +		xfer_size = chan->xfer_size;
> +		break;
> +
> +	case DMA_MEM_TO_MEM:
> +	default:
> +		chcr = RCAR_DMACHCR_DM_INC | RCAR_DMACHCR_SM_INC
> +		     | RCAR_DMACHCR_RS_AUTO;
> +		xfer_size = RCAR_DMAC_MEMCPY_XFER_SIZE;
> +		break;
> +	}
> +
> +	desc->xfer_shift = ilog2(xfer_size);
> +	desc->chcr = chcr | chcr_ts[desc->xfer_shift];
> +}

Here and rcar_dmac_slave_config()'s chcr method is very nice for me !!

> +/*
> + * rcar_dmac_chan_prep_sg - prepare transfer descriptors from an SG list
> + *
> + * Common routine for public (MEMCPY) and slave DMA. The MEMCPY case is also
> + * converted to scatter-gather to guarantee consistent locking and a correct
> + * list manipulation. For slave DMA direction carries the usual meaning, and,
> + * logically, the SG list is RAM and the addr variable contains slave address,
> + * e.g., the FIFO I/O register. For MEMCPY direction equals DMA_MEM_TO_MEM
> + * and the SG list contains only one element and points at the source buffer.
> + */
> +static struct dma_async_tx_descriptor *
> +rcar_dmac_chan_prep_sg(struct rcar_dmac_chan *chan, struct scatterlist *sgl,
> +		       unsigned int sg_len, dma_addr_t dev_addr,
> +		       enum dma_transfer_direction dir, unsigned long dma_flags,
> +		       bool cyclic)
> +{
> +	struct rcar_dmac_hw_desc *hwdesc;
> +	struct rcar_dmac_desc *desc;
> +	struct scatterlist *sg = sgl;
> +	size_t full_size = 0;
> +	unsigned int i;
> +
> +	desc = rcar_dmac_desc_get(chan);
> +	if (desc == NULL)
> +		return NULL;

ditto

> +static struct dma_async_tx_descriptor *
> +rcar_dmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
> +			  size_t buf_len, size_t period_len,
> +			  enum dma_transfer_direction dir, unsigned long flags,
> +			  void *context)
> +{
> +	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
> +	struct dma_async_tx_descriptor *desc;
> +	struct scatterlist *sgl;
> +	unsigned int sg_len;
> +	unsigned int i;
> +
> +	/* Someone calling slave DMA on a generic channel? */
> +	if (rchan->mid_rid < 0 || buf_len < period_len) {
> +		dev_warn(chan->device->dev,
> +			"%s: bad parameter: buf_len=%zu, period_len=%zu, id=%d\n",
> +			__func__, buf_len, period_len, rchan->mid_rid);
> +		return NULL;
> +	}
> +
> +	sg_len = buf_len / period_len;
> +	if (sg_len > RCAR_DMAC_MAX_SG_LEN) {
> +		dev_err(chan->device->dev,
> +			"chan%u: sg length %d exceds limit %d",
> +			rchan->index, sg_len, RCAR_DMAC_MAX_SG_LEN);
> +		return NULL;
> +	}
> +
> +	sgl = kcalloc(RCAR_DMAC_MAX_SG_LEN, sizeof(*sgl), GFP_NOWAIT);
> +	if (!sgl)
> +		return NULL;

This kcalloc() is using fixed size.
we can't use this ?
	struct scatterlist sgl[RCAR_DMAC_MAX_SG_LEN];

> +
> +	sg_init_table(sgl, sg_len);
> +
> +	for (i = 0; i < sg_len; ++i) {
> +		dma_addr_t src = buf_addr + (period_len * i);
> +
> +		sg_set_page(&sgl[i], pfn_to_page(PFN_DOWN(src)), period_len,
> +			    offset_in_page(src));
> +		sg_dma_address(&sgl[i]) = src;
> +		sg_dma_len(&sgl[i]) = period_len;
> +	}
> +
> +	desc = rcar_dmac_chan_prep_sg(rchan, sgl, sg_len, rchan->slave_addr,
> +				      dir, flags, true);
> +
> +	kfree(sgl);
> +	return desc;
> +}
(snip)
> +static int rcar_dmac_probe(struct platform_device *pdev)
> +{
> +	struct dma_device *engine;
> +	struct rcar_dmac *dmac;
> +	struct resource *mem;
> +	unsigned int i;
> +	int irq;
> +	int ret;
> +
> +	dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL);
> +	if (!dmac)
> +		return -ENOMEM;
> +
> +	dmac->dev = &pdev->dev;
> +	platform_set_drvdata(pdev, dmac);
> +
> +	ret = rcar_dmac_parse_of(&pdev->dev, dmac);
> +	if (ret < 0)
> +		return ret;
> +
> +	dmac->channels = devm_kcalloc(&pdev->dev, dmac->n_channels,
> +				      sizeof(*dmac->channels), GFP_KERNEL);
> +	if (!dmac->channels)
> +		return -ENOMEM;
> +
> +	/* Request resources. */
> +	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	dmac->iomem = devm_ioremap_resource(&pdev->dev, mem);
> +	if (IS_ERR(dmac->iomem))
> +		return PTR_ERR(dmac->iomem);
> +
> +	irq = platform_get_irq_byname(pdev, "error");
> +	if (irq < 0) {
> +		dev_err(&pdev->dev, "no error IRQ specified\n");
> +		return -ENODEV;
> +	}
> +
> +	ret = devm_request_irq(&pdev->dev, irq, rcar_dmac_isr_error, 0,
> +			       "DMAC Address Error", dmac);
> +	if (ret) {
> +		dev_err(&pdev->dev, "failed to request IRQ %u (%d)\n",
> +			irq, ret);
> +		return ret;
> +	}
> +
> +	dmac->clk = clk_get(&pdev->dev, "fck");
> +	if (IS_ERR(dmac->clk)) {
> +		dev_err(&pdev->dev, "unable to get fck clock\n");
> +		return PTR_ERR(dmac->clk);
> +	}

Maybe this is my misunderstanding,
but, which clock are you expecting to "fck" clock ?
Is it not same as pm_runtime_xxx()'s clock ? 

> +	/* Initialize the channels. */
> +	INIT_LIST_HEAD(&dmac->engine.channels);
> +
> +	for (i = 0; i < dmac->n_channels; ++i) {
> +		ret = rcar_dmac_chan_probe(dmac, &dmac->channels[i], i);
> +		if (ret < 0)
> +			return ret;
> +	}
> +
> +	/* Enable runtime PM and initialize the device. */
> +	pm_runtime_enable(&pdev->dev);
> +	ret = pm_runtime_get_sync(&pdev->dev);
> +	if (ret < 0) {
> +		dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret);
> +		return ret;
> +	}
> +
> +	ret = rcar_dmac_init(dmac);
> +	pm_runtime_put(&pdev->dev);

power ON -> initialize -> power OFF here,
and shdmac.c has similar method.
And it works, but, it is a littile bit strange for me :P

Best regards
---
Kuninori Morimoto
--
To unsubscribe from this list: send the line "unsubscribe dmaengine" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Laurent Pinchart July 16, 2014, 9:30 a.m. UTC | #2
Hi Morimoto-san,

Thank you for the review.

On Tuesday 15 July 2014 18:33:50 Kuninori Morimoto wrote:
> > The DMAC is a general purpose multi-channel DMA controller that supports
> > both slave and memcpy transfers.
> > 
> > The driver currently supports the DMAC found in the r8a7790 and r8a7791
> > SoCs. Support for compatible DMA controllers (such as the audio DMAC)
> > will be added later.
> > 
> > Feature-wise, automatic hardware handling of descriptors chains isn't
> > supported yet. LPAE support is implemented.
> > 
> > Signed-off-by: Laurent Pinchart
> > <laurent.pinchart+renesas@ideasonboard.com>
> > ---
> 
> (snip)
> 
> > diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
> > new file mode 100644
> > index 0000000..9cb1dd3
> > --- /dev/null
> > +++ b/drivers/dma/sh/rcar-dmac.c
> > @@ -0,0 +1,1514 @@
> > +/*
> > + * Renesas R-Car Gen2 DMA Controller Driver
> 
> We can reuse this driver in Gen3 too ?
> "Renesas R-Car" only is very enough.

Given that I don't have Gen3 hardware or documentation, I don't know :-) 
"Gen2" doesn't appear in the compatible string, so it would be trivial to 
change that later if the Gen3 DMAC is indeed compatible. Note that the R-Car 
Gen1 DMAC isn't compatible with this driver.

> > +static void rcar_dmac_chan_start_xfer(struct rcar_dmac_chan *chan)
> > +{
> > +	struct rcar_dmac_desc *desc = chan->desc.running;
> > +	struct rcar_dmac_hw_desc *hwdesc = desc->running;
> > +
> > +	dev_dbg(chan->chan.device->dev,
> > +		"chan%u: queue hwdesc %p: %u@%pad -> %pad\n",
> > +		chan->index, hwdesc, hwdesc->size, &hwdesc->src_addr,
> > +		&hwdesc->dst_addr);
> > +
> > +	WARN_ON_ONCE(rcar_dmac_chan_is_busy(chan));
> > +
> > +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
> > +	rcar_dmac_chan_write(chan, RCAR_DMAFIXSAR, hwdesc->src_addr >> 32);
> > +	rcar_dmac_chan_write(chan, RCAR_DMAFIXDAR, hwdesc->dst_addr >> 32);
> > +#endif
> > +	rcar_dmac_chan_write(chan, RCAR_DMASAR, hwdesc->src_addr & 
0xffffffff);
> > +	rcar_dmac_chan_write(chan, RCAR_DMADAR, hwdesc->dst_addr & 
0xffffffff);
> > +
> > +	if (chan->mid_rid >= 0)
> > +		rcar_dmac_chan_write(chan, RCAR_DMARS, chan->mid_rid);
> 
> I guess (chan->mid_rid < 0) case should be error

No, chan->mid_rid can be 0 when using memcpy mode. It's an error in slave mode 
only.

> > +
> > +	rcar_dmac_chan_write(chan, RCAR_DMATCR,
> > +			     hwdesc->size >> desc->xfer_shift);
> > +
> > +	rcar_dmac_chan_write(chan, RCAR_DMACHCR, desc->chcr | RCAR_DMACHCR_DE 
> > +			     RCAR_DMACHCR_IE);
> > +}
> 
> (snip)
> 
> > +static dma_cookie_t rcar_dmac_tx_submit(struct dma_async_tx_descriptor
> > *tx) +{
> > +	struct rcar_dmac_chan *chan = to_rcar_dmac_chan(tx->chan);
> > +	struct rcar_dmac_desc *desc = to_rcar_dmac_desc(tx);
> > +	unsigned long flags;
> > +	dma_cookie_t cookie;
> > +	bool resume;
> > +
> > +	mutex_lock(&chan->power_lock);
> > +
> > +	spin_lock_irqsave(&chan->lock, flags);
> > +
> > +	cookie = dma_cookie_assign(tx);
> > +
> > +	dev_dbg(chan->chan.device->dev, "chan%u: submit #%d@%p\n",
> > +		chan->index, tx->cookie, desc);
> > +
> > +	list_add_tail(&desc->node, &chan->desc.pending);
> > +	desc->running = list_first_entry(&desc->hwdescs,
> > +					 struct rcar_dmac_hw_desc, node);
> > +
> > +	/* Resume the device when submitting the first descriptor. */
> > +	resume = chan->desc.submitted++ == 0;
> > +
> > +	spin_unlock_irqrestore(&chan->lock, flags);
> > +
> > +	if (resume)
> > +		pm_runtime_get_sync(chan->chan.device->dev);
> 
> pm_runtime_xxx itself has inside counter ?

Yes, but the problem is that the DMA engine API doens't provide a counterpart 
operation to tx_submit when completing the descriptor. I currently call 
pm_runtime_put in the completion interrupt handler when no descriptor is left 
to be processed, and in terminate_all if descriptors have been submitted. If I 
relied on the runtime PM counter I would need to call pm_runtime_put in a loop 
in terminate_all, which isn't very nice.

> > +
> > +	mutex_unlock(&chan->power_lock);
> > +
> > +	return cookie;
> > +}
> 
> (snip)
> 
> > +static int rcar_dmac_hw_desc_alloc(struct rcar_dmac_chan *chan)
> > +{
> > +	struct rcar_dmac_desc_page *page;
> > +	LIST_HEAD(list);
> > +	unsigned int i;
> > +
> > +	page = (void *)get_zeroed_page(GFP_KERNEL);
> > +	if (page == NULL)
> > +		return -ENOMEM;
> 
> I like "if (!page)"

Given that I use the "!" style through the driver I'll change it here and in 
the couple of other locations where "== NULL" is used.

> > +
> > +	for (i = 0; i < RCAR_DMAC_HWDESCS_PER_PAGE; ++i) {
> > +		struct rcar_dmac_hw_desc *hwdesc = &page->hwdescs[i];
> > +
> > +		list_add_tail(&hwdesc->node, &list);
> > +	}
> > +
> > +	spin_lock_irq(&chan->lock);
> > +	list_splice_tail(&list, &chan->desc.hw_free);
> > +	list_add_tail(&page->node, &chan->desc.pages);
> > +	spin_unlock_irq(&chan->lock);
> > +
> > +	return 0;
> > +}
> 
> (snip)
> 
> > +static void rcar_dmac_chan_configure_desc(struct rcar_dmac_chan *chan,
> > +					  struct rcar_dmac_desc *desc)
> > +{
> > +	static const u32 chcr_ts[] = {
> > +		RCAR_DMACHCR_TS_1B, RCAR_DMACHCR_TS_2B,
> > +		RCAR_DMACHCR_TS_4B, RCAR_DMACHCR_TS_8B,
> > +		RCAR_DMACHCR_TS_16B, RCAR_DMACHCR_TS_32B,
> > +		RCAR_DMACHCR_TS_64B,
> > +	};
> > +
> > +	unsigned int xfer_size;
> > +	u32 chcr;
> > +
> > +	switch (desc->direction) {
> > +	case DMA_DEV_TO_MEM:
> > +		chcr = RCAR_DMACHCR_DM_INC | RCAR_DMACHCR_SM_FIXED
> > +		     | RCAR_DMACHCR_RS_DMARS;
> > +		xfer_size = chan->xfer_size;
> > +		break;
> > +
> > +	case DMA_MEM_TO_DEV:
> > +		chcr = RCAR_DMACHCR_DM_FIXED | RCAR_DMACHCR_SM_INC
> > +		     | RCAR_DMACHCR_RS_DMARS;
> > +		xfer_size = chan->xfer_size;
> > +		break;
> > +
> > +	case DMA_MEM_TO_MEM:
> > +	default:
> > +		chcr = RCAR_DMACHCR_DM_INC | RCAR_DMACHCR_SM_INC
> > +		     | RCAR_DMACHCR_RS_AUTO;
> > +		xfer_size = RCAR_DMAC_MEMCPY_XFER_SIZE;
> > +		break;
> > +	}
> > +
> > +	desc->xfer_shift = ilog2(xfer_size);
> > +	desc->chcr = chcr | chcr_ts[desc->xfer_shift];
> > +}
> 
> Here and rcar_dmac_slave_config()'s chcr method is very nice for me !!

[snip]

> > +static struct dma_async_tx_descriptor *
> > +rcar_dmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
> > +			  size_t buf_len, size_t period_len,
> > +			  enum dma_transfer_direction dir, unsigned long flags,
> > +			  void *context)
> > +{
> > +	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
> > +	struct dma_async_tx_descriptor *desc;
> > +	struct scatterlist *sgl;
> > +	unsigned int sg_len;
> > +	unsigned int i;
> > +
> > +	/* Someone calling slave DMA on a generic channel? */
> > +	if (rchan->mid_rid < 0 || buf_len < period_len) {
> > +		dev_warn(chan->device->dev,
> > +			"%s: bad parameter: buf_len=%zu, period_len=%zu, id=%d\n",
> > +			__func__, buf_len, period_len, rchan->mid_rid);
> > +		return NULL;
> > +	}
> > +
> > +	sg_len = buf_len / period_len;
> > +	if (sg_len > RCAR_DMAC_MAX_SG_LEN) {
> > +		dev_err(chan->device->dev,
> > +			"chan%u: sg length %d exceds limit %d",
> > +			rchan->index, sg_len, RCAR_DMAC_MAX_SG_LEN);
> > +		return NULL;
> > +	}
> > +
> > +	sgl = kcalloc(RCAR_DMAC_MAX_SG_LEN, sizeof(*sgl), GFP_NOWAIT);
> > +	if (!sgl)
> > +		return NULL;
> 
> This kcalloc() is using fixed size.
> we can't use this ?
> 	struct scatterlist sgl[RCAR_DMAC_MAX_SG_LEN];

That's what the shdma-base driver does, and gcc warns that it consumes more 
than 1024 bytes of stack space. Given that the kernel stack size is fixed we 
should be conservative to avoid stack overflows. I think we should fix shdma-
base.

> > +
> > +	sg_init_table(sgl, sg_len);
> > +
> > +	for (i = 0; i < sg_len; ++i) {
> > +		dma_addr_t src = buf_addr + (period_len * i);
> > +
> > +		sg_set_page(&sgl[i], pfn_to_page(PFN_DOWN(src)), period_len,
> > +			    offset_in_page(src));
> > +		sg_dma_address(&sgl[i]) = src;
> > +		sg_dma_len(&sgl[i]) = period_len;
> > +	}
> > +
> > +	desc = rcar_dmac_chan_prep_sg(rchan, sgl, sg_len, rchan->slave_addr,
> > +				      dir, flags, true);
> > +
> > +	kfree(sgl);
> > +	return desc;
> > +}
> 
> (snip)
> 
> > +static int rcar_dmac_probe(struct platform_device *pdev)
> > +{
> > +	struct dma_device *engine;
> > +	struct rcar_dmac *dmac;
> > +	struct resource *mem;
> > +	unsigned int i;
> > +	int irq;
> > +	int ret;
> > +
> > +	dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL);
> > +	if (!dmac)
> > +		return -ENOMEM;
> > +
> > +	dmac->dev = &pdev->dev;
> > +	platform_set_drvdata(pdev, dmac);
> > +
> > +	ret = rcar_dmac_parse_of(&pdev->dev, dmac);
> > +	if (ret < 0)
> > +		return ret;
> > +
> > +	dmac->channels = devm_kcalloc(&pdev->dev, dmac->n_channels,
> > +				      sizeof(*dmac->channels), GFP_KERNEL);
> > +	if (!dmac->channels)
> > +		return -ENOMEM;
> > +
> > +	/* Request resources. */
> > +	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> > +	dmac->iomem = devm_ioremap_resource(&pdev->dev, mem);
> > +	if (IS_ERR(dmac->iomem))
> > +		return PTR_ERR(dmac->iomem);
> > +
> > +	irq = platform_get_irq_byname(pdev, "error");
> > +	if (irq < 0) {
> > +		dev_err(&pdev->dev, "no error IRQ specified\n");
> > +		return -ENODEV;
> > +	}
> > +
> > +	ret = devm_request_irq(&pdev->dev, irq, rcar_dmac_isr_error, 0,
> > +			       "DMAC Address Error", dmac);
> > +	if (ret) {
> > +		dev_err(&pdev->dev, "failed to request IRQ %u (%d)\n",
> > +			irq, ret);
> > +		return ret;
> > +	}
> > +
> > +	dmac->clk = clk_get(&pdev->dev, "fck");
> > +	if (IS_ERR(dmac->clk)) {
> > +		dev_err(&pdev->dev, "unable to get fck clock\n");
> > +		return PTR_ERR(dmac->clk);
> > +	}
> 
> Maybe this is my misunderstanding,
> but, which clock are you expecting to "fck" clock ?
> Is it not same as pm_runtime_xxx()'s clock ?

It's the MSTP clock for the DMAC.

> > +	/* Initialize the channels. */
> > +	INIT_LIST_HEAD(&dmac->engine.channels);
> > +
> > +	for (i = 0; i < dmac->n_channels; ++i) {
> > +		ret = rcar_dmac_chan_probe(dmac, &dmac->channels[i], i);
> > +		if (ret < 0)
> > +			return ret;
> > +	}
> > +
> > +	/* Enable runtime PM and initialize the device. */
> > +	pm_runtime_enable(&pdev->dev);
> > +	ret = pm_runtime_get_sync(&pdev->dev);
> > +	if (ret < 0) {
> > +		dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret);
> > +		return ret;
> > +	}
> > +
> > +	ret = rcar_dmac_init(dmac);
> > +	pm_runtime_put(&pdev->dev);
> 
> power ON -> initialize -> power OFF here,
> and shdmac.c has similar method.
> And it works, but, it is a littile bit strange for me :P

Don't forget that runtime PM can be disabled in the kernel configuration. In 
that case this is the only place where the DMAC hardware will be initialized.
Kuninori Morimoto July 17, 2014, 12:59 a.m. UTC | #3
Hi Laurent

> > > +	if (chan->mid_rid >= 0)
> > > +		rcar_dmac_chan_write(chan, RCAR_DMARS, chan->mid_rid);
> > 
> > I guess (chan->mid_rid < 0) case should be error
> 
> No, chan->mid_rid can be 0 when using memcpy mode. It's an error in slave mode 
> only.

?
mid_rid > 0 : slave mode
mid_rid = 0 : memcpy mode
mid_rid < 0 : error

We can't do this ?

static void rcar_dmac_chan_start_xfer(xxx)
{
   if (chan->mid_rid < 0)
          return err;
   ...
   rcar_dmac_chan_write(chan, RCAR_DMARS, chan->mid_rid);
   ...
}


> > This kcalloc() is using fixed size.
> > we can't use this ?
> > 	struct scatterlist sgl[RCAR_DMAC_MAX_SG_LEN];
> 
> That's what the shdma-base driver does, and gcc warns that it consumes more 
> than 1024 bytes of stack space. Given that the kernel stack size is fixed we 
> should be conservative to avoid stack overflows. I think we should fix shdma-
> base.

Ahh... I understand
Is it possible to put this in comment line ?

> > > +	dmac->clk = clk_get(&pdev->dev, "fck");
> > > +	if (IS_ERR(dmac->clk)) {
> > > +		dev_err(&pdev->dev, "unable to get fck clock\n");
> > > +		return PTR_ERR(dmac->clk);
> > > +	}
> > 
> > Maybe this is my misunderstanding,
> > but, which clock are you expecting to "fck" clock ?
> > Is it not same as pm_runtime_xxx()'s clock ?
> 
> It's the MSTP clock for the DMAC.

Hmm...
So why do you use clk_xxx() instead of pm_runime_xxx() on suspend/resume ?
I forgot detail, but, I got trouble when it used both clk_xxx() and pm_runime_xxx()
for same clock.
Because these have different counter.

> > > +	/* Enable runtime PM and initialize the device. */
> > > +	pm_runtime_enable(&pdev->dev);
> > > +	ret = pm_runtime_get_sync(&pdev->dev);
> > > +	if (ret < 0) {
> > > +		dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret);
> > > +		return ret;
> > > +	}
> > > +
> > > +	ret = rcar_dmac_init(dmac);
> > > +	pm_runtime_put(&pdev->dev);
> > 
> > power ON -> initialize -> power OFF here,
> > and shdmac.c has similar method.
> > And it works, but, it is a littile bit strange for me :P
> 
> Don't forget that runtime PM can be disabled in the kernel configuration. In 
> that case this is the only place where the DMAC hardware will be initialized.

This driver is counting driver users.
So, I thought that we can initialize driver if it was 1st user.

Best regards
---
Kuninori Morimoto
--
To unsubscribe from this list: send the line "unsubscribe dmaengine" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Laurent Pinchart July 17, 2014, 1:08 a.m. UTC | #4
Hi Morimoto-san,

On Wednesday 16 July 2014 17:59:39 Kuninori Morimoto wrote:
> Hi Laurent
> 
> > > > +	if (chan->mid_rid >= 0)
> > > > +		rcar_dmac_chan_write(chan, RCAR_DMARS, chan->mid_rid);
> > > 
> > > I guess (chan->mid_rid < 0) case should be error
> > 
> > No, chan->mid_rid can be 0 when using memcpy mode. It's an error in slave
> > mode only.
> 
> ?
> mid_rid > 0 : slave mode
> mid_rid = 0 : memcpy mode
> mid_rid < 0 : error

Sorry, I spoke too fast. mid_rid == 0 is a valid hardware value in slave mode. 
mid_rid < 0 denotes memcpy mode.

> We can't do this ?
> 
> static void rcar_dmac_chan_start_xfer(xxx)
> {
>    if (chan->mid_rid < 0)
>           return err;
>    ...
>    rcar_dmac_chan_write(chan, RCAR_DMARS, chan->mid_rid);
>    ...
> }
> 
> > > This kcalloc() is using fixed size.
> > > we can't use this ?
> > > 
> > > 	struct scatterlist sgl[RCAR_DMAC_MAX_SG_LEN];
> > 
> > That's what the shdma-base driver does, and gcc warns that it consumes
> > more than 1024 bytes of stack space. Given that the kernel stack size is
> > fixed we should be conservative to avoid stack overflows. I think we
> > should fix shdma- base.
> 
> Ahh... I understand
> Is it possible to put this in comment line ?

Sure.

> > > > +	dmac->clk = clk_get(&pdev->dev, "fck");
> > > > +	if (IS_ERR(dmac->clk)) {
> > > > +		dev_err(&pdev->dev, "unable to get fck clock\n");
> > > > +		return PTR_ERR(dmac->clk);
> > > > +	}
> > > 
> > > Maybe this is my misunderstanding,
> > > but, which clock are you expecting to "fck" clock ?
> > > Is it not same as pm_runtime_xxx()'s clock ?
> > 
> > It's the MSTP clock for the DMAC.
> 
> Hmm...
> So why do you use clk_xxx() instead of pm_runime_xxx() on suspend/resume ?
> I forgot detail, but, I got trouble when it used both clk_xxx() and
> pm_runime_xxx() for same clock.
> Because these have different counter.

Are you talking about letting the drivers/sh/pm_runtime.c code handle the 
clock automatically ? Yes, I think that should work. I'll test it.

> > > > +	/* Enable runtime PM and initialize the device. */
> > > > +	pm_runtime_enable(&pdev->dev);
> > > > +	ret = pm_runtime_get_sync(&pdev->dev);
> > > > +	if (ret < 0) {
> > > > +		dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret);
> > > > +		return ret;
> > > > +	}
> > > > +
> > > > +	ret = rcar_dmac_init(dmac);
> > > > +	pm_runtime_put(&pdev->dev);
> > > 
> > > power ON -> initialize -> power OFF here,
> > > and shdmac.c has similar method.
> > > And it works, but, it is a littile bit strange for me :P
> > 
> > Don't forget that runtime PM can be disabled in the kernel configuration.
> > In that case this is the only place where the DMAC hardware will be
> > initialized.
>
> This driver is counting driver users.
> So, I thought that we can initialize driver if it was 1st user.

It could be done, but that would be more complex as the DMAC would be 
reinitialized every time the use count increases from 0 to 1.
Kuninori Morimoto July 17, 2014, 3:02 a.m. UTC | #5
Hi Laurent again

I found bug/trouble

> +/* -----------------------------------------------------------------------------
> + * OF xlate and channel filter
> + */
> +
> +static bool rcar_dmac_chan_filter(struct dma_chan *chan, void *arg)
> +{
> +	struct rcar_dmac *dmac = to_rcar_dmac(chan->device);
> +	unsigned int id = (unsigned int)arg;
> +
> +	return !test_and_set_bit(id, dmac->modules);
> +}

I got 2 troubles on this filter.

You know sound driver needs both "Audio DMAC" and "Audio DMAC peri peri"
now, DMAC drivers are
 - Audio DMAC           : rcar-dma
 - Audio DMAC peri peri : rcar-audma (= shdma-base)

If I enabled both DMAC, this filter will be called
with Audio DMAC peri peri side (= shdma-base) chan.
This filter needs to check "chan" itself.
(I used "chan->private" for checking as local hack.)
This is 1st trouble.

2nd trouble is chan/filter again :P
Now, your [6/7] patch adds 2 SYS-DMAC on DTSI file.
And, I need to add Audio DMAC / Audio DMAC peri peri entry
So, total entrys are...

	dmac0: dma-controller@e6700000 {
	..
	};
	dmac1: dma-controller@e6720000 {
	...
	};
	audma0: dma-contorller@ec700000 {
	...
	};
	audma1: dma-controller@ec720000 {
	...
	};
	audmapp: audio-dma-pp@0xec740000 {
	...
	};

But, this filter doesn't check DMAC itself.
So, my driver requests "audma0's 0x01 ID", but,
this filter accepts as "dmac0's 0x01 ID".


> +/* -----------------------------------------------------------------------------
> + * Probe and remove
> + */
> +
> +static int rcar_dmac_chan_probe(struct rcar_dmac *dmac,
> +				struct rcar_dmac_chan *rchan,
> +				unsigned int index)
> +{
> +	struct platform_device *pdev = to_platform_device(dmac->dev);
> +	struct dma_chan *chan = &rchan->chan;
> +	char irqname[16];
> +	int irq;
> +	int ret;
> +
> +	rchan->index = index;
> +	rchan->iomem = dmac->iomem + RCAR_DMAC_CHAN_OFFSET(index);
> +	rchan->mid_rid = -EINVAL;
> +
> +	spin_lock_init(&rchan->lock);
> +	mutex_init(&rchan->power_lock);
> +
> +	/* Request the channel interrupt. */
> +	sprintf(irqname, "ch%u", index);
> +	irq = platform_get_irq_byname(pdev, irqname);
> +	if (irq < 0) {
> +		dev_err(dmac->dev, "no IRQ specified for channel %u\n", index);
> +		return -ENODEV;
> +	}
> +
> +	sprintf(irqname, "DMAC channel %u", index);
> +	ret = devm_request_threaded_irq(dmac->dev, irq, rcar_dmac_isr_channel,
> +					rcar_dmac_isr_channel_thread, 0,
> +					irqname, rchan);
> +	if (ret) {
> +		dev_err(dmac->dev, "failed to request IRQ %u (%d)\n", irq, ret);
> +		return ret;
> +	}

Here, you used "irqname" which exists on stack memory for devm_request_threaded_irq().
It breaks /proc/interrupt interrupt names.

Best regards
---
Kuninori Morimoto
--
To unsubscribe from this list: send the line "unsubscribe dmaengine" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kuninori Morimoto July 17, 2014, 3:25 a.m. UTC | #6
Hi Laurent

> > mid_rid > 0 : slave mode
> > mid_rid = 0 : memcpy mode
> > mid_rid < 0 : error
> 
> Sorry, I spoke too fast. mid_rid == 0 is a valid hardware value in slave mode. 
> mid_rid < 0 denotes memcpy mode.

Ahh, OK I understand.
Thank you for your explaining

> > This driver is counting driver users.
> > So, I thought that we can initialize driver if it was 1st user.
> 
> It could be done, but that would be more complex as the DMAC would be 
> reinitialized every time the use count increases from 0 to 1.

I don't have strong objection to current method.
So, you don't need to exchange it in v2

But, current code is doing
   MSTP ON -> device init -> MSTP OFF.
and we will use this driver, like
   MSTP ON -> transfer -> MSTP OFF.
My concern is that is this initialization really
still enabled after MSTP OFF/ON ?
Maybe this is OK current DMAC,
but, we don't know future DMAC...

--
To unsubscribe from this list: send the line "unsubscribe dmaengine" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Laurent Pinchart July 18, 2014, 12:52 p.m. UTC | #7
Hi Morimoto-san,

On Wednesday 16 July 2014 20:25:50 Kuninori Morimoto wrote:
> Hi Laurent
> 
> > > mid_rid > 0 : slave mode
> > > mid_rid = 0 : memcpy mode
> > > mid_rid < 0 : error
> > 
> > Sorry, I spoke too fast. mid_rid == 0 is a valid hardware value in slave
> > mode. mid_rid < 0 denotes memcpy mode.
> 
> Ahh, OK I understand.
> Thank you for your explaining
> 
> > > This driver is counting driver users.
> > > So, I thought that we can initialize driver if it was 1st user.
> > 
> > It could be done, but that would be more complex as the DMAC would be
> > reinitialized every time the use count increases from 0 to 1.
> 
> I don't have strong objection to current method.
> So, you don't need to exchange it in v2
> 
> But, current code is doing
>    MSTP ON -> device init -> MSTP OFF.
> and we will use this driver, like
>    MSTP ON -> transfer -> MSTP OFF.

If runtime PM is enabled, the pm_runtime_get_sync() call will result in the 
runtime_resume operation (implemented in rcar_dmac_resume) being called. 
There's a call to rcar_dmac_init() there, so the DMAC will be reinitialized. 
The order of operations is thus

	MSTP ON -> device init -> transfer -> MSTP OFF

If runtime PM is disabled, pm_runtime_get_sync() will be a no-op. The 
rcar_dmac_resume() function will never be called, and the MSTP clock gate will 
always be on. The device will be initialized once only, at probe time. 

> My concern is that is this initialization really
> still enabled after MSTP OFF/ON ?
> Maybe this is OK current DMAC,
> but, we don't know future DMAC...

If we don't know, we can't really decide now, but we can always fix the driver 
if this becomes a problem in the future :-)
Laurent Pinchart July 18, 2014, 1:06 p.m. UTC | #8
Hi Morimoto-san,

On Wednesday 16 July 2014 20:02:48 Kuninori Morimoto wrote:
> Hi Laurent again
> 
> I found bug/trouble
> 
> > +/*
> > -------------------------------------------------------------------------
> > ---- + * OF xlate and channel filter
> > + */
> > +
> > +static bool rcar_dmac_chan_filter(struct dma_chan *chan, void *arg)
> > +{
> > +	struct rcar_dmac *dmac = to_rcar_dmac(chan->device);
> > +	unsigned int id = (unsigned int)arg;
> > +
> > +	return !test_and_set_bit(id, dmac->modules);
> > +}
> 
> I got 2 troubles on this filter.
> 
> You know sound driver needs both "Audio DMAC" and "Audio DMAC peri peri"
> now, DMAC drivers are
>  - Audio DMAC           : rcar-dma
>  - Audio DMAC peri peri : rcar-audma (= shdma-base)
> 
> If I enabled both DMAC, this filter will be called
> with Audio DMAC peri peri side (= shdma-base) chan.
> This filter needs to check "chan" itself.
> (I used "chan->private" for checking as local hack.)
> This is 1st trouble.

Right. I'll fix that in the driver, but this is really hackish. The 
rcar_dmac_of_xlate() function, called by of_dma_request_slave_channel(), 
currently calls dma_request_channel() with a private filter function. This 
will iterate over all channels from all devices, even though we have a pointer 
to the struct dma_device at that time, and we know we want to allocate a 
channel from that particular dma_device.

Wouldn't it make more sense, to support OF platforms, to extend the dmaengine 
core with a function that requests a specific channel from a specific 
dma_device, without using a filter function ?

> 2nd trouble is chan/filter again :P
> Now, your [6/7] patch adds 2 SYS-DMAC on DTSI file.
> And, I need to add Audio DMAC / Audio DMAC peri peri entry
> So, total entrys are...
> 
> 	dmac0: dma-controller@e6700000 {
> 	..
> 	};
> 	dmac1: dma-controller@e6720000 {
> 	...
> 	};
> 	audma0: dma-contorller@ec700000 {
> 	...
> 	};
> 	audma1: dma-controller@ec720000 {
> 	...
> 	};
> 	audmapp: audio-dma-pp@0xec740000 {
> 	...
> 	};
> 
> But, this filter doesn't check DMAC itself.
> So, my driver requests "audma0's 0x01 ID", but,
> this filter accepts as "dmac0's 0x01 ID".

I'll fix that at the same time as the first problem.

> > +/* ----------------------------------------------------------------------
> > + * Probe and remove
> > + */
> > +
> > +static int rcar_dmac_chan_probe(struct rcar_dmac *dmac,
> > +				struct rcar_dmac_chan *rchan,
> > +				unsigned int index)
> > +{
> > +	struct platform_device *pdev = to_platform_device(dmac->dev);
> > +	struct dma_chan *chan = &rchan->chan;
> > +	char irqname[16];
> > +	int irq;
> > +	int ret;
> > +
> > +	rchan->index = index;
> > +	rchan->iomem = dmac->iomem + RCAR_DMAC_CHAN_OFFSET(index);
> > +	rchan->mid_rid = -EINVAL;
> > +
> > +	spin_lock_init(&rchan->lock);
> > +	mutex_init(&rchan->power_lock);
> > +
> > +	/* Request the channel interrupt. */
> > +	sprintf(irqname, "ch%u", index);
> > +	irq = platform_get_irq_byname(pdev, irqname);
> > +	if (irq < 0) {
> > +		dev_err(dmac->dev, "no IRQ specified for channel %u\n", index);
> > +		return -ENODEV;
> > +	}
> > +
> > +	sprintf(irqname, "DMAC channel %u", index);
> > +	ret = devm_request_threaded_irq(dmac->dev, irq, 
rcar_dmac_isr_channel,
> > +					rcar_dmac_isr_channel_thread, 0,
> > +					irqname, rchan);
> > +	if (ret) {
> > +		dev_err(dmac->dev, "failed to request IRQ %u (%d)\n", irq, ret);
> > +		return ret;
> > +	}
> 
> Here, you used "irqname" which exists on stack memory for
> devm_request_threaded_irq(). It breaks /proc/interrupt interrupt names.

I've noticed that too :-) I have already fixed the problem in my tree, the fix 
will be integrated in v2.
diff mbox

Patch

diff --git a/drivers/dma/sh/Kconfig b/drivers/dma/sh/Kconfig
index 383b21f..fff6baf 100644
--- a/drivers/dma/sh/Kconfig
+++ b/drivers/dma/sh/Kconfig
@@ -52,3 +52,11 @@  config RCAR_AUDMAC_PP
 	depends on SH_DMAE_BASE
 	help
 	  Enable support for the Renesas R-Car Audio DMAC Peripheral Peripheral controllers.
+
+config RCAR_DMAC
+	tristate "Renesas R-Car Gen2 DMA Controller"
+	depends on ARCH_SHMOBILE || COMPILE_TEST
+	select DMA_ENGINE
+	help
+	  This driver supports the general purpose DMA controller found in the
+	  Renesas R-Car second generator SoCs.
diff --git a/drivers/dma/sh/Makefile b/drivers/dma/sh/Makefile
index 0b13b8e..cfc8268 100644
--- a/drivers/dma/sh/Makefile
+++ b/drivers/dma/sh/Makefile
@@ -16,3 +16,4 @@  obj-$(CONFIG_SH_DMAE) += shdma.o
 obj-$(CONFIG_SUDMAC) += sudmac.o
 obj-$(CONFIG_RCAR_HPB_DMAE) += rcar-hpbdma.o
 obj-$(CONFIG_RCAR_AUDMAC_PP) += rcar-audmapp.o
+obj-$(CONFIG_RCAR_DMAC) += rcar-dmac.o
diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
new file mode 100644
index 0000000..9cb1dd3
--- /dev/null
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -0,0 +1,1514 @@ 
+/*
+ * Renesas R-Car Gen2 DMA Controller Driver
+ *
+ * Copyright (C) 2014 Renesas Electronics Inc.
+ *
+ * Author: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "../dmaengine.h"
+
+/*
+ * struct rcar_dmac_hw_desc - Descriptor for a single hardware transfer
+ * @node: entry in the parent's hwdesc list
+ * @src_addr: device source address
+ * @dst_addr: device destination address
+ * @size: transfer size in bytes
+ */
+struct rcar_dmac_hw_desc {
+	struct list_head node;
+
+	dma_addr_t src_addr;
+	dma_addr_t dst_addr;
+	u32 size;
+};
+
+/*
+ * struct rcar_dmac_desc - R-Car Gen2 DMA Transfer Descriptor
+ * @async_tx: base DMA asynchronous transaction descriptor
+ * @direction: direction of the DMA transfer
+ * @xfer_shift: log2 of the transfer size
+ * @chcr: value of the channel configuration register for this transfer
+ * @node: entry in the channel's descriptors lists
+ * @hwdescs: list of hardware descriptors for this transfer
+ * @running: the hardware transfer being currently processed
+ * @size: transfer size in bytes
+ * @cyclic: when set indicates that the DMA transfer is cyclic
+ */
+struct rcar_dmac_desc {
+	struct dma_async_tx_descriptor async_tx;
+	enum dma_transfer_direction direction;
+	unsigned int xfer_shift;
+	u32 chcr;
+
+	struct list_head node;
+	struct list_head hwdescs;
+	struct rcar_dmac_hw_desc *running;
+
+	size_t size;
+	bool cyclic;
+};
+
+#define to_rcar_dmac_desc(d)	container_of(d, struct rcar_dmac_desc, async_tx)
+
+/*
+ * struct rcar_dmac_desc_page - One page worth of descriptors
+ * @node: entry in the channel's pages list
+ * @descs: array of DMA descriptors
+ * @hwdescs: array of hardware descriptors
+ */
+struct rcar_dmac_desc_page {
+	struct list_head node;
+
+	union {
+		struct rcar_dmac_desc descs[0];
+		struct rcar_dmac_hw_desc hwdescs[0];
+	};
+};
+
+#define RCAR_DMAC_DESCS_PER_PAGE					\
+	((PAGE_SIZE - offsetof(struct rcar_dmac_desc_page, descs)) /	\
+	sizeof(struct rcar_dmac_desc))
+#define RCAR_DMAC_HWDESCS_PER_PAGE					\
+	((PAGE_SIZE - offsetof(struct rcar_dmac_desc_page, hwdescs)) /	\
+	sizeof(struct rcar_dmac_hw_desc))
+
+/*
+ * struct rcar_dmac_chan - R-Car Gen2 DMA Controller Channel
+ * @chan: base DMA channel object
+ * @iomem: channel I/O memory base
+ * @index: index of this channel in the controller
+ * @xfer_size: size (in bytes) of individual hardware transfers
+ * @slave_addr: slave source or destination memory address
+ * @mid_rid: hardware MID/RID for the DMA client using this channel
+ * @lock: protects the channel CHCR register and the desc members
+ * @power_lock: protects the submitted counter for pm_runtime_get_sync
+ * @desc.free: list of free descriptors
+ * @desc.pending: list of pending descriptors (submitted with tx_submit)
+ * @desc.active: list of active descriptors (activated with issue_pending)
+ * @desc.done: list of completed descriptors
+ * @desc.wait: list of descriptors waiting for an ack
+ * @desc.running: the descriptor being processed (a member of the active list)
+ * @desc.submitted: number of descriptors submitted and not complete yet
+ * @desc.hw_free: list of free hardware descriptors
+ * @desc.pages: list of pages used by allocated descriptors
+ */
+struct rcar_dmac_chan {
+	struct dma_chan chan;
+	void __iomem *iomem;
+	unsigned int index;
+
+	unsigned int xfer_size;
+	dma_addr_t slave_addr;
+	int mid_rid;
+
+	spinlock_t lock;
+	struct mutex power_lock;
+
+	struct {
+		struct list_head free;
+		struct list_head pending;
+		struct list_head active;
+		struct list_head done;
+		struct list_head wait;
+		struct rcar_dmac_desc *running;
+		unsigned int submitted;
+
+		struct list_head hw_free;
+
+		struct list_head pages;
+	} desc;
+};
+
+#define to_rcar_dmac_chan(c)	container_of(c, struct rcar_dmac_chan, chan)
+
+/*
+ * struct rcar_dmac - R-Car Gen2 DMA Controller
+ * @engine: base DMA engine object
+ * @dev: the hardware device
+ * @iomem: remapped I/O memory base
+ * @clk: functional clock
+ * @n_channels: number of available channels
+ * @channels: array of DMAC channels
+ * @modules: bitmask of client modules in use
+ */
+struct rcar_dmac {
+	struct dma_device engine;
+	struct device *dev;
+	void __iomem *iomem;
+	struct clk *clk;
+
+	unsigned int n_channels;
+	struct rcar_dmac_chan *channels;
+
+	unsigned long modules[256 / BITS_PER_LONG];
+};
+
+#define to_rcar_dmac(d)		container_of(d, struct rcar_dmac, engine)
+
+/* -----------------------------------------------------------------------------
+ * Registers
+ */
+
+#define RCAR_DMAC_CHAN_OFFSET(i)	(0x8000 + 0x80 * (i))
+
+#define RCAR_DMAISTA			0x0020
+#define RCAR_DMASEC			0x0030
+#define RCAR_DMAOR			0x0060
+#define RCAR_DMAOR_PRI_FIXED		(0 << 8)
+#define RCAR_DMAOR_PRI_ROUND_ROBIN	(3 << 8)
+#define RCAR_DMAOR_AE			(1 << 2)
+#define RCAR_DMAOR_DME			(1 << 0)
+#define RCAR_DMACHCLR			0x0080
+#define RCAR_DMADPSEC			0x00a0
+
+#define RCAR_DMASAR			0x0000
+#define RCAR_DMADAR			0x0004
+#define RCAR_DMATCR			0x0008
+#define RCAR_DMATCR_MASK		0x00ffffff
+#define RCAR_DMATSR			0x0028
+#define RCAR_DMACHCR			0x000c
+#define RCAR_DMACHCR_CAE		(1 << 31)
+#define RCAR_DMACHCR_CAIE		(1 << 30)
+#define RCAR_DMACHCR_DPM_DISABLED	(0 << 28)
+#define RCAR_DMACHCR_DPM_ENABLED	(1 << 28)
+#define RCAR_DMACHCR_DPM_REPEAT		(2 << 28)
+#define RCAR_DMACHCR_DPM_INFINIE	(3 << 28)
+#define RCAR_DMACHCR_RPT_SAR		(1 << 27)
+#define RCAR_DMACHCR_RPT_DAR		(1 << 26)
+#define RCAR_DMACHCR_RPT_TCR		(1 << 25)
+#define RCAR_DMACHCR_DPB		(1 << 22)
+#define RCAR_DMACHCR_DSE		(1 << 19)
+#define RCAR_DMACHCR_DSIE		(1 << 18)
+#define RCAR_DMACHCR_TS_1B		((0 << 20) | (0 << 3))
+#define RCAR_DMACHCR_TS_2B		((0 << 20) | (1 << 3))
+#define RCAR_DMACHCR_TS_4B		((0 << 20) | (2 << 3))
+#define RCAR_DMACHCR_TS_16B		((0 << 20) | (3 << 3))
+#define RCAR_DMACHCR_TS_32B		((1 << 20) | (0 << 3))
+#define RCAR_DMACHCR_TS_64B		((1 << 20) | (1 << 3))
+#define RCAR_DMACHCR_TS_8B		((1 << 20) | (3 << 3))
+#define RCAR_DMACHCR_DM_FIXED		(0 << 14)
+#define RCAR_DMACHCR_DM_INC		(1 << 14)
+#define RCAR_DMACHCR_DM_DEC		(2 << 14)
+#define RCAR_DMACHCR_SM_FIXED		(0 << 12)
+#define RCAR_DMACHCR_SM_INC		(1 << 12)
+#define RCAR_DMACHCR_SM_DEC		(2 << 12)
+#define RCAR_DMACHCR_RS_AUTO		(4 << 8)
+#define RCAR_DMACHCR_RS_DMARS		(8 << 8)
+#define RCAR_DMACHCR_IE			(1 << 2)
+#define RCAR_DMACHCR_TE			(1 << 1)
+#define RCAR_DMACHCR_DE			(1 << 0)
+#define RCAR_DMATCRB			0x0018
+#define RCAR_DMATSRB			0x0038
+#define RCAR_DMACHCRB			0x001c
+#define RCAR_DMACHCRB_DCNT(n)		((n) << 24)
+#define RCAR_DMACHCRB_DPTR(n)		((n) << 16)
+#define RCAR_DMACHCRB_DRST		(1 << 15)
+#define RCAR_DMACHCRB_DTS		(1 << 8)
+#define RCAR_DMACHCRB_SLM_NORMAL	(0 << 4)
+#define RCAR_DMACHCRB_SLM_CLK(n)	((8 | (n)) << 4)
+#define RCAR_DMACHCRB_PRI(n)		((n) << 0)
+#define RCAR_DMARS			0x0040
+#define RCAR_DMABUFCR			0x0048
+#define RCAR_DMABUFCR_MBU(n)		((n) << 16)
+#define RCAR_DMABUFCR_ULB(n)		((n) << 0)
+#define RCAR_DMADPBASE			0x0050
+#define RCAR_DMADPBASE_MASK		0xfffffff0
+#define RCAR_DMADPBASE_SEL		(1 << 0)
+#define RCAR_DMADPCR			0x0054
+#define RCAR_DMADPCR_DIPT(n)		((n) << 24)
+#define RCAR_DMAFIXSAR			0x0010
+#define RCAR_DMAFIXDAR			0x0014
+#define RCAR_DMAFIXDPBASE		0x0060
+
+/* Hardcode the MEMCPY transfer size to 4 bytes. */
+#define RCAR_DMAC_MEMCPY_XFER_SIZE	4
+#define RCAR_DMAC_MAX_XFER_LEN		(RCAR_DMATCR_MASK + 1)
+
+/* -----------------------------------------------------------------------------
+ * Device access
+ */
+
+static void rcar_dmac_write(struct rcar_dmac *dmac, u32 reg, u32 data)
+{
+	if (reg == RCAR_DMAOR)
+		writew(data, dmac->iomem + reg);
+	else
+		writel(data, dmac->iomem + reg);
+}
+
+static u32 rcar_dmac_read(struct rcar_dmac *dmac, u32 reg)
+{
+	if (reg == RCAR_DMAOR)
+		return readw(dmac->iomem + reg);
+	else
+		return readl(dmac->iomem + reg);
+}
+
+static u32 rcar_dmac_chan_read(struct rcar_dmac_chan *chan, u32 reg)
+{
+	if (reg == RCAR_DMARS)
+		return readw(chan->iomem + reg);
+	else
+		return readl(chan->iomem + reg);
+}
+
+static void rcar_dmac_chan_write(struct rcar_dmac_chan *chan, u32 reg, u32 data)
+{
+	if (reg == RCAR_DMARS)
+		writew(data, chan->iomem + reg);
+	else
+		writel(data, chan->iomem + reg);
+}
+
+/* -----------------------------------------------------------------------------
+ * Initialization and configuration
+ */
+
+static bool rcar_dmac_chan_is_busy(struct rcar_dmac_chan *chan)
+{
+	u32 chcr = rcar_dmac_chan_read(chan, RCAR_DMACHCR);
+
+	return (chcr & (RCAR_DMACHCR_DE | RCAR_DMACHCR_TE)) == RCAR_DMACHCR_DE;
+}
+
+static void rcar_dmac_chan_start_xfer(struct rcar_dmac_chan *chan)
+{
+	struct rcar_dmac_desc *desc = chan->desc.running;
+	struct rcar_dmac_hw_desc *hwdesc = desc->running;
+
+	dev_dbg(chan->chan.device->dev,
+		"chan%u: queue hwdesc %p: %u@%pad -> %pad\n",
+		chan->index, hwdesc, hwdesc->size, &hwdesc->src_addr,
+		&hwdesc->dst_addr);
+
+	WARN_ON_ONCE(rcar_dmac_chan_is_busy(chan));
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	rcar_dmac_chan_write(chan, RCAR_DMAFIXSAR, hwdesc->src_addr >> 32);
+	rcar_dmac_chan_write(chan, RCAR_DMAFIXDAR, hwdesc->dst_addr >> 32);
+#endif
+	rcar_dmac_chan_write(chan, RCAR_DMASAR, hwdesc->src_addr & 0xffffffff);
+	rcar_dmac_chan_write(chan, RCAR_DMADAR, hwdesc->dst_addr & 0xffffffff);
+
+	if (chan->mid_rid >= 0)
+		rcar_dmac_chan_write(chan, RCAR_DMARS, chan->mid_rid);
+
+	rcar_dmac_chan_write(chan, RCAR_DMATCR,
+			     hwdesc->size >> desc->xfer_shift);
+
+	rcar_dmac_chan_write(chan, RCAR_DMACHCR, desc->chcr | RCAR_DMACHCR_DE |
+			     RCAR_DMACHCR_IE);
+}
+
+static int rcar_dmac_init(struct rcar_dmac *dmac)
+{
+	u16 dmaor;
+
+	/* Clear all channels and enable the DMAC globally. */
+	rcar_dmac_write(dmac, RCAR_DMACHCLR, 0x7fff);
+	rcar_dmac_write(dmac, RCAR_DMAOR,
+			RCAR_DMAOR_PRI_FIXED | RCAR_DMAOR_DME);
+
+	dmaor = rcar_dmac_read(dmac, RCAR_DMAOR);
+	if ((dmaor & (RCAR_DMAOR_AE | RCAR_DMAOR_DME)) != RCAR_DMAOR_DME) {
+		dev_warn(dmac->dev, "DMAOR initialization failed.\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/* -----------------------------------------------------------------------------
+ * Descriptors submission
+ */
+
+static dma_cookie_t rcar_dmac_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct rcar_dmac_chan *chan = to_rcar_dmac_chan(tx->chan);
+	struct rcar_dmac_desc *desc = to_rcar_dmac_desc(tx);
+	unsigned long flags;
+	dma_cookie_t cookie;
+	bool resume;
+
+	mutex_lock(&chan->power_lock);
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	cookie = dma_cookie_assign(tx);
+
+	dev_dbg(chan->chan.device->dev, "chan%u: submit #%d@%p\n",
+		chan->index, tx->cookie, desc);
+
+	list_add_tail(&desc->node, &chan->desc.pending);
+	desc->running = list_first_entry(&desc->hwdescs,
+					 struct rcar_dmac_hw_desc, node);
+
+	/* Resume the device when submitting the first descriptor. */
+	resume = chan->desc.submitted++ == 0;
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	if (resume)
+		pm_runtime_get_sync(chan->chan.device->dev);
+
+	mutex_unlock(&chan->power_lock);
+
+	return cookie;
+}
+
+/* -----------------------------------------------------------------------------
+ * Descriptors allocation and free
+ */
+
+/*
+ * rcar_dmac_desc_alloc - Allocate a page worth of DMA descriptors
+ * @chan: the DMA channel
+ */
+static int rcar_dmac_desc_alloc(struct rcar_dmac_chan *chan)
+{
+	struct rcar_dmac_desc_page *page;
+	LIST_HEAD(list);
+	unsigned int i;
+
+	page = (void *)get_zeroed_page(GFP_KERNEL);
+	if (page == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < RCAR_DMAC_DESCS_PER_PAGE; ++i) {
+		struct rcar_dmac_desc *desc = &page->descs[i];
+
+		dma_async_tx_descriptor_init(&desc->async_tx, &chan->chan);
+		desc->async_tx.tx_submit = rcar_dmac_tx_submit;
+		INIT_LIST_HEAD(&desc->hwdescs);
+
+		list_add_tail(&desc->node, &list);
+	}
+
+	spin_lock_irq(&chan->lock);
+	list_splice_tail(&list, &chan->desc.free);
+	list_add_tail(&page->node, &chan->desc.pages);
+	spin_unlock_irq(&chan->lock);
+
+	return 0;
+}
+
+/*
+ * rcar_dmac_desc_put - Release a DMA transfer descriptor
+ * @chan: the DMA channel
+ * @desc: the descriptor
+ *
+ * Put the descriptor and its hardware descriptors back in the channel's free
+ * descriptors lists. The descriptor's hwdesc will be reinitialized to an empty
+ * list as a result.
+ *
+ * The descriptor must have been removed from the channel's done list before
+ * calling this function.
+ *
+ * Locking: Must be called with the channel lock held.
+ */
+static void rcar_dmac_desc_put(struct rcar_dmac_chan *chan,
+			       struct rcar_dmac_desc *desc)
+{
+	list_splice_tail_init(&desc->hwdescs, &chan->desc.hw_free);
+	list_add_tail(&desc->node, &chan->desc.free);
+}
+
+static void rcar_dmac_desc_recycle_acked(struct rcar_dmac_chan *chan)
+{
+	struct rcar_dmac_desc *desc, *_desc;
+
+	list_for_each_entry_safe(desc, _desc, &chan->desc.wait, node) {
+		if (async_tx_test_ack(&desc->async_tx)) {
+			list_del(&desc->node);
+			rcar_dmac_desc_put(chan, desc);
+		}
+	}
+}
+
+/*
+ * rcar_dmac_desc_get - Allocate a descriptor for a DMA transfer
+ * @chan: the DMA channel
+ *
+ * Locking: This function must be called in a non-atomic context.
+ *
+ * Return: A pointer to the allocated descriptor or NULL if no descriptor can
+ * be allocated.
+ */
+static struct rcar_dmac_desc *rcar_dmac_desc_get(struct rcar_dmac_chan *chan)
+{
+	struct rcar_dmac_desc *desc;
+	int ret;
+
+	spin_lock_irq(&chan->lock);
+
+	/* Recycle acked descriptors before attempting allocation. */
+	rcar_dmac_desc_recycle_acked(chan);
+
+	do {
+		if (list_empty(&chan->desc.free)) {
+			/*
+			 * No free descriptors, allocate a page worth of them
+			 * and try again, as someone else could race us to get
+			 * the newly allocated descriptors. If the allocation
+			 * fails return an error.
+			 */
+			spin_unlock_irq(&chan->lock);
+			ret = rcar_dmac_desc_alloc(chan);
+			if (ret < 0)
+				return NULL;
+			spin_lock_irq(&chan->lock);
+			continue;
+		}
+
+		desc = list_first_entry(&chan->desc.free, struct rcar_dmac_desc,
+					node);
+		list_del(&desc->node);
+	} while (!desc);
+
+	spin_unlock_irq(&chan->lock);
+
+	return desc;
+}
+
+/*
+ * rcar_dmac_hw_desc_alloc - Allocate a page worth of hardware descriptors
+ * @chan: the DMA channel
+ */
+static int rcar_dmac_hw_desc_alloc(struct rcar_dmac_chan *chan)
+{
+	struct rcar_dmac_desc_page *page;
+	LIST_HEAD(list);
+	unsigned int i;
+
+	page = (void *)get_zeroed_page(GFP_KERNEL);
+	if (page == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < RCAR_DMAC_HWDESCS_PER_PAGE; ++i) {
+		struct rcar_dmac_hw_desc *hwdesc = &page->hwdescs[i];
+
+		list_add_tail(&hwdesc->node, &list);
+	}
+
+	spin_lock_irq(&chan->lock);
+	list_splice_tail(&list, &chan->desc.hw_free);
+	list_add_tail(&page->node, &chan->desc.pages);
+	spin_unlock_irq(&chan->lock);
+
+	return 0;
+}
+
+/*
+ * rcar_dmac_hw_desc_get - Allocate a hardware descriptor for a DMA transfer
+ * @chan: the DMA channel
+ *
+ * Locking: This function must be called in a non-atomic context.
+ *
+ * Return: A pointer to the allocated hardware descriptor or NULL if no
+ * descriptor can be allocated.
+ */
+static struct rcar_dmac_hw_desc *
+rcar_dmac_hw_desc_get(struct rcar_dmac_chan *chan)
+{
+	struct rcar_dmac_hw_desc *hwdesc;
+	int ret;
+
+	spin_lock_irq(&chan->lock);
+
+	do {
+		if (list_empty(&chan->desc.hw_free)) {
+			/*
+			 * No free descriptors, allocate a page worth of them
+			 * and try again, as someone else could race us to get
+			 * the newly allocated descriptors. If the allocation
+			 * fails return an error.
+			 */
+			spin_unlock_irq(&chan->lock);
+			ret = rcar_dmac_hw_desc_alloc(chan);
+			if (ret < 0)
+				return NULL;
+			spin_lock_irq(&chan->lock);
+			continue;
+		}
+
+		hwdesc = list_first_entry(&chan->desc.hw_free,
+					  struct rcar_dmac_hw_desc, node);
+		list_del(&hwdesc->node);
+	} while (!hwdesc);
+
+	spin_unlock_irq(&chan->lock);
+
+	return hwdesc;
+}
+
+/* -----------------------------------------------------------------------------
+ * Stop and reset
+ */
+
+static void rcar_dmac_chan_halt(struct rcar_dmac_chan *chan)
+{
+	u32 chcr = rcar_dmac_chan_read(chan, RCAR_DMACHCR);
+
+	chcr &= ~(RCAR_DMACHCR_IE | RCAR_DMACHCR_TE | RCAR_DMACHCR_DE);
+	rcar_dmac_chan_write(chan, RCAR_DMACHCR, chcr);
+}
+
+static void rcar_dmac_chan_reinit(struct rcar_dmac_chan *chan)
+{
+	struct rcar_dmac_desc *desc, *_desc;
+	unsigned long flags;
+	LIST_HEAD(descs);
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	/* Move all non-free descriptors to the local lists. */
+	list_splice_init(&chan->desc.pending, &descs);
+	list_splice_init(&chan->desc.active, &descs);
+	list_splice_init(&chan->desc.done, &descs);
+	list_splice_init(&chan->desc.wait, &descs);
+
+	/* Suspend the device if it was running. */
+	if (chan->desc.submitted)
+		pm_runtime_put(chan->chan.device->dev);
+
+	chan->desc.running = NULL;
+	chan->desc.submitted = 0;
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	list_for_each_entry_safe(desc, _desc, &descs, node) {
+		list_del(&desc->node);
+		rcar_dmac_desc_put(chan, desc);
+	}
+}
+
+static void rcar_dmac_chan_terminate_all(struct rcar_dmac_chan *chan)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	rcar_dmac_chan_halt(chan);
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	/*
+	 * FIXME: No new interrupt can occur now, but the IRQ thread might still
+	 * be running.
+	 */
+
+	rcar_dmac_chan_reinit(chan);
+}
+
+static void rcar_dmac_stop(struct rcar_dmac *dmac)
+{
+	rcar_dmac_write(dmac, RCAR_DMAOR, 0);
+}
+
+static void rcar_dmac_abort(struct rcar_dmac *dmac)
+{
+	unsigned int i;
+
+	/* Stop all channels. */
+	for (i = 0; i < dmac->n_channels; ++i) {
+		struct rcar_dmac_chan *chan = &dmac->channels[i];
+
+		/* Stop and reinitialize the channel. */
+		spin_lock(&chan->lock);
+		rcar_dmac_chan_halt(chan);
+		spin_unlock(&chan->lock);
+
+		rcar_dmac_chan_reinit(chan);
+	}
+}
+
+/* -----------------------------------------------------------------------------
+ * Descriptors preparation
+ */
+
+static void rcar_dmac_chan_configure_desc(struct rcar_dmac_chan *chan,
+					  struct rcar_dmac_desc *desc)
+{
+	static const u32 chcr_ts[] = {
+		RCAR_DMACHCR_TS_1B, RCAR_DMACHCR_TS_2B,
+		RCAR_DMACHCR_TS_4B, RCAR_DMACHCR_TS_8B,
+		RCAR_DMACHCR_TS_16B, RCAR_DMACHCR_TS_32B,
+		RCAR_DMACHCR_TS_64B,
+	};
+
+	unsigned int xfer_size;
+	u32 chcr;
+
+	switch (desc->direction) {
+	case DMA_DEV_TO_MEM:
+		chcr = RCAR_DMACHCR_DM_INC | RCAR_DMACHCR_SM_FIXED
+		     | RCAR_DMACHCR_RS_DMARS;
+		xfer_size = chan->xfer_size;
+		break;
+
+	case DMA_MEM_TO_DEV:
+		chcr = RCAR_DMACHCR_DM_FIXED | RCAR_DMACHCR_SM_INC
+		     | RCAR_DMACHCR_RS_DMARS;
+		xfer_size = chan->xfer_size;
+		break;
+
+	case DMA_MEM_TO_MEM:
+	default:
+		chcr = RCAR_DMACHCR_DM_INC | RCAR_DMACHCR_SM_INC
+		     | RCAR_DMACHCR_RS_AUTO;
+		xfer_size = RCAR_DMAC_MEMCPY_XFER_SIZE;
+		break;
+	}
+
+	desc->xfer_shift = ilog2(xfer_size);
+	desc->chcr = chcr | chcr_ts[desc->xfer_shift];
+}
+
+/*
+ * rcar_dmac_chan_prep_sg - prepare transfer descriptors from an SG list
+ *
+ * Common routine for public (MEMCPY) and slave DMA. The MEMCPY case is also
+ * converted to scatter-gather to guarantee consistent locking and a correct
+ * list manipulation. For slave DMA direction carries the usual meaning, and,
+ * logically, the SG list is RAM and the addr variable contains slave address,
+ * e.g., the FIFO I/O register. For MEMCPY direction equals DMA_MEM_TO_MEM
+ * and the SG list contains only one element and points at the source buffer.
+ */
+static struct dma_async_tx_descriptor *
+rcar_dmac_chan_prep_sg(struct rcar_dmac_chan *chan, struct scatterlist *sgl,
+		       unsigned int sg_len, dma_addr_t dev_addr,
+		       enum dma_transfer_direction dir, unsigned long dma_flags,
+		       bool cyclic)
+{
+	struct rcar_dmac_hw_desc *hwdesc;
+	struct rcar_dmac_desc *desc;
+	struct scatterlist *sg = sgl;
+	size_t full_size = 0;
+	unsigned int i;
+
+	desc = rcar_dmac_desc_get(chan);
+	if (desc == NULL)
+		return NULL;
+
+	desc->async_tx.flags = dma_flags;
+	desc->async_tx.cookie = -EBUSY;
+
+	desc->cyclic = cyclic;
+	desc->direction = dir;
+
+	rcar_dmac_chan_configure_desc(chan, desc);
+
+	/*
+	 * Allocate and fill the hardware descriptors. We own the only reference
+	 * to the DMA descriptor, there's no need for locking.
+	 */
+	for_each_sg(sgl, sg, sg_len, i) {
+		dma_addr_t mem_addr = sg_dma_address(sg);
+		size_t len = sg_dma_len(sg);
+
+		full_size += len;
+
+		while (len) {
+			size_t size = min_t(size_t, len, RCAR_DMAC_MAX_XFER_LEN);
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+			/*
+			 * Prevent individual transfers from crossing 4GB
+			 * boundaries.
+			 */
+			if (dev_addr >> 32 != (dev_addr + size - 1) >> 32)
+				size = ALIGN(dev_addr, 1ULL << 32) - dev_addr;
+			if (mem_addr >> 32 != (mem_addr + size - 1) >> 32)
+				size = ALIGN(mem_addr, 1ULL << 32) - mem_addr;
+#endif
+
+			hwdesc = rcar_dmac_hw_desc_get(chan);
+			if (!hwdesc) {
+				rcar_dmac_desc_put(chan, desc);
+				return NULL;
+			}
+
+			if (dir == DMA_DEV_TO_MEM) {
+				hwdesc->src_addr = dev_addr;
+				hwdesc->dst_addr = mem_addr;
+			} else {
+				hwdesc->src_addr = mem_addr;
+				hwdesc->dst_addr = dev_addr;
+			}
+
+			hwdesc->size = size;
+
+			dev_dbg(chan->chan.device->dev,
+				"chan%u: hwdesc %p/%p sgl %u@%p, %u/%u %pad -> %pad\n",
+				chan->index, hwdesc, desc, i, sg, size, len,
+				&hwdesc->src_addr, &hwdesc->dst_addr);
+
+			mem_addr += size;
+			if (dir == DMA_MEM_TO_MEM)
+				dev_addr += size;
+
+			len -= size;
+
+			list_add_tail(&hwdesc->node, &desc->hwdescs);
+		}
+	}
+
+	desc->size = full_size;
+
+	return &desc->async_tx;
+}
+
+/* -----------------------------------------------------------------------------
+ * DMA engine operations
+ */
+
+static int rcar_dmac_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+	int ret;
+
+	INIT_LIST_HEAD(&rchan->desc.free);
+	INIT_LIST_HEAD(&rchan->desc.pending);
+	INIT_LIST_HEAD(&rchan->desc.active);
+	INIT_LIST_HEAD(&rchan->desc.done);
+	INIT_LIST_HEAD(&rchan->desc.wait);
+	INIT_LIST_HEAD(&rchan->desc.hw_free);
+	INIT_LIST_HEAD(&rchan->desc.pages);
+
+	/* Preallocate descriptors. */
+	ret = rcar_dmac_hw_desc_alloc(rchan);
+	if (ret < 0)
+		return -ENOMEM;
+
+	return rcar_dmac_desc_alloc(rchan);
+}
+
+static void rcar_dmac_free_chan_resources(struct dma_chan *chan)
+{
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+	struct rcar_dmac *dmac = to_rcar_dmac(chan->device);
+	struct rcar_dmac_desc_page *page, *_page;
+
+	/* Protect against ISR */
+	spin_lock_irq(&rchan->lock);
+	rcar_dmac_chan_halt(rchan);
+	spin_unlock_irq(&rchan->lock);
+
+	/* Now no new interrupts will occur */
+
+	if (rchan->mid_rid >= 0) {
+		/* The caller is holding dma_list_mutex */
+		clear_bit(rchan->mid_rid, dmac->modules);
+		rchan->mid_rid = -EINVAL;
+	}
+
+	list_for_each_entry_safe(page, _page, &rchan->desc.pages, node) {
+		list_del(&page->node);
+		free_page((unsigned long)page);
+	}
+}
+
+static struct dma_async_tx_descriptor *
+rcar_dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest,
+			  dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+	struct scatterlist sgl;
+
+	if (!len)
+		return NULL;
+
+	sg_init_table(&sgl, 1);
+	sg_set_page(&sgl, pfn_to_page(PFN_DOWN(dma_src)), len,
+		    offset_in_page(dma_src));
+	sg_dma_address(&sgl) = dma_src;
+	sg_dma_len(&sgl) = len;
+
+	return rcar_dmac_chan_prep_sg(rchan, &sgl, 1, dma_dest,
+				      DMA_MEM_TO_MEM, flags, false);
+}
+
+static struct dma_async_tx_descriptor *
+rcar_dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+			unsigned int sg_len, enum dma_transfer_direction dir,
+			unsigned long flags, void *context)
+{
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+
+	/* Someone calling slave DMA on a generic channel? */
+	if (rchan->mid_rid < 0 || !sg_len) {
+		dev_warn(chan->device->dev,
+			 "%s: bad parameter: len=%d, id=%d\n",
+			 __func__, sg_len, rchan->mid_rid);
+		return NULL;
+	}
+
+	return rcar_dmac_chan_prep_sg(rchan, sgl, sg_len, rchan->slave_addr,
+				      dir, flags, false);
+}
+
+#define RCAR_DMAC_MAX_SG_LEN	32
+
+static struct dma_async_tx_descriptor *
+rcar_dmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
+			  size_t buf_len, size_t period_len,
+			  enum dma_transfer_direction dir, unsigned long flags,
+			  void *context)
+{
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+	struct dma_async_tx_descriptor *desc;
+	struct scatterlist *sgl;
+	unsigned int sg_len;
+	unsigned int i;
+
+	/* Someone calling slave DMA on a generic channel? */
+	if (rchan->mid_rid < 0 || buf_len < period_len) {
+		dev_warn(chan->device->dev,
+			"%s: bad parameter: buf_len=%zu, period_len=%zu, id=%d\n",
+			__func__, buf_len, period_len, rchan->mid_rid);
+		return NULL;
+	}
+
+	sg_len = buf_len / period_len;
+	if (sg_len > RCAR_DMAC_MAX_SG_LEN) {
+		dev_err(chan->device->dev,
+			"chan%u: sg length %d exceds limit %d",
+			rchan->index, sg_len, RCAR_DMAC_MAX_SG_LEN);
+		return NULL;
+	}
+
+	sgl = kcalloc(RCAR_DMAC_MAX_SG_LEN, sizeof(*sgl), GFP_NOWAIT);
+	if (!sgl)
+		return NULL;
+
+	sg_init_table(sgl, sg_len);
+
+	for (i = 0; i < sg_len; ++i) {
+		dma_addr_t src = buf_addr + (period_len * i);
+
+		sg_set_page(&sgl[i], pfn_to_page(PFN_DOWN(src)), period_len,
+			    offset_in_page(src));
+		sg_dma_address(&sgl[i]) = src;
+		sg_dma_len(&sgl[i]) = period_len;
+	}
+
+	desc = rcar_dmac_chan_prep_sg(rchan, sgl, sg_len, rchan->slave_addr,
+				      dir, flags, true);
+
+	kfree(sgl);
+	return desc;
+}
+
+static void rcar_dmac_slave_config(struct rcar_dmac_chan *chan,
+				  struct dma_slave_config *cfg)
+{
+	/*
+	 * We could lock this, but you shouldn't be configuring the
+	 * channel, while using it...
+	 */
+
+	if (cfg->direction == DMA_DEV_TO_MEM) {
+		chan->slave_addr = cfg->src_addr;
+		chan->xfer_size = cfg->src_addr_width;
+	} else {
+		chan->slave_addr = cfg->dst_addr;
+		chan->xfer_size = cfg->dst_addr_width;
+	}
+}
+
+static int rcar_dmac_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+			     unsigned long arg)
+{
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		rcar_dmac_chan_terminate_all(rchan);
+		break;
+
+	case DMA_SLAVE_CONFIG:
+		rcar_dmac_slave_config(rchan, (struct dma_slave_config *)arg);
+		break;
+
+	default:
+		return -ENXIO;
+	}
+
+	return 0;
+}
+
+static size_t rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
+					 dma_cookie_t cookie)
+{
+	struct rcar_dmac_desc *desc = chan->desc.running;
+	struct rcar_dmac_hw_desc *hwdesc;
+	size_t residue = 0;
+
+	if (!desc)
+		return 0;
+
+	/*
+	 * If the cookie doesn't correspond to the currently running transfer
+	 * then the descriptor hasn't been processed yet, and the residue is
+	 * equal to the full descriptor size.
+	 */
+	if (cookie != desc->async_tx.cookie)
+		return desc->size;
+
+	/* Compute the size of all chunks still to be transferred. */
+	list_for_each_entry_reverse(hwdesc, &desc->hwdescs, node) {
+		if (hwdesc == desc->running)
+			break;
+
+		residue += hwdesc->size;
+	}
+
+	/* Add the residue for the current chunk. */
+	residue += rcar_dmac_chan_read(chan, RCAR_DMATCR) << desc->xfer_shift;
+
+	return residue;
+}
+
+static enum dma_status rcar_dmac_tx_status(struct dma_chan *chan,
+					   dma_cookie_t cookie,
+					   struct dma_tx_state *txstate)
+{
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+	enum dma_status status;
+	unsigned long flags;
+	size_t residue;
+
+	status = dma_cookie_status(chan, cookie, txstate);
+	if (status == DMA_COMPLETE || !txstate)
+		return status;
+
+	spin_lock_irqsave(&rchan->lock, flags);
+	residue = rcar_dmac_chan_get_residue(rchan, cookie);
+	spin_unlock_irqrestore(&rchan->lock, flags);
+
+	dma_set_residue(txstate, residue);
+
+	return status;
+}
+
+static void rcar_dmac_issue_pending(struct dma_chan *chan)
+{
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&rchan->lock, flags);
+
+	if (list_empty(&rchan->desc.pending))
+		goto done;
+
+	/* Append the pending list to the active list. */
+	list_splice_tail_init(&rchan->desc.pending, &rchan->desc.active);
+
+	/*
+	 * If no transfer is running pick the first descriptor from the active
+	 * list and start the transfer.
+	 */
+	if (!rchan->desc.running) {
+		struct rcar_dmac_desc *desc;
+
+		desc = list_first_entry(&rchan->desc.active,
+					struct rcar_dmac_desc, node);
+		rchan->desc.running = desc;
+
+		rcar_dmac_chan_start_xfer(rchan);
+	}
+
+done:
+	spin_unlock_irqrestore(&rchan->lock, flags);
+}
+
+static int rcar_dmac_slave_caps(struct dma_chan *chan,
+				struct dma_slave_caps *caps)
+{
+	memset(caps, 0, sizeof(*caps));
+
+	/*
+	 * The device supports all widths from 1 to 64 bytes. As the
+	 * dma_slave_buswidth enumeration is limited to 8 bytes, set the
+	 * numerical value directly.
+	 */
+	caps->src_addr_widths = 0x7f;
+	caps->dstn_addr_widths = 0x7f;
+	caps->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
+	caps->cmd_terminate = true;
+	caps->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+	return 0;
+}
+
+/* -----------------------------------------------------------------------------
+ * IRQ handling
+ */
+
+static irqreturn_t rcar_dmac_isr_transfer_end(struct rcar_dmac_chan *chan)
+{
+	struct rcar_dmac_desc *desc = chan->desc.running;
+	struct rcar_dmac_hw_desc *hwdesc;
+	irqreturn_t ret = IRQ_WAKE_THREAD;
+
+	if (WARN_ON(!desc)) {
+		/*
+		 * This should never happen, there should always be
+		 * a running descriptor when a transfer ends. Warn and
+		 * return.
+		 */
+		return IRQ_NONE;
+	}
+
+	/*
+	 * If we haven't completed the last hardware descriptor simply move to
+	 * the next one. Only wake the IRQ thread if the transfer is cyclic.
+	 */
+	hwdesc = desc->running;
+	if (!list_is_last(&hwdesc->node, &desc->hwdescs)) {
+		desc->running = list_next_entry(hwdesc, node);
+		if (!desc->cyclic)
+			ret = IRQ_HANDLED;
+		goto done;
+	}
+
+	/*
+	 * We've completed the last hardware. If the transfer is cyclic, move
+	 * back to the first one.
+	 */
+	if (desc->cyclic) {
+		desc->running = list_first_entry(&desc->hwdescs,
+						 struct rcar_dmac_hw_desc,
+						 node);
+		goto done;
+	}
+
+	/* The descriptor is complete, move it to the done list. */
+	list_move_tail(&desc->node, &chan->desc.done);
+	chan->desc.submitted--;
+
+	/* Queue the next descriptor, if any. */
+	if (!list_empty(&chan->desc.active))
+		chan->desc.running = list_first_entry(&chan->desc.active,
+						      struct rcar_dmac_desc,
+						      node);
+	else
+		chan->desc.running = NULL;
+
+	/* Suspend the device if no descriptor is pending. */
+	if (!chan->desc.submitted)
+		pm_runtime_put(chan->chan.device->dev);
+
+done:
+	if (chan->desc.running)
+		rcar_dmac_chan_start_xfer(chan);
+
+	return ret;
+}
+
+static irqreturn_t rcar_dmac_isr_channel(int irq, void *dev)
+{
+	struct rcar_dmac_chan *chan = dev;
+	irqreturn_t ret = IRQ_NONE;
+	u32 chcr;
+
+	spin_lock(&chan->lock);
+
+	chcr = rcar_dmac_chan_read(chan, RCAR_DMACHCR);
+	rcar_dmac_chan_write(chan, RCAR_DMACHCR,
+			     chcr & ~(RCAR_DMACHCR_TE | RCAR_DMACHCR_DE));
+
+	if (chcr & RCAR_DMACHCR_TE)
+		ret |= rcar_dmac_isr_transfer_end(chan);
+
+	spin_unlock(&chan->lock);
+
+	return ret;
+}
+
+static irqreturn_t rcar_dmac_isr_channel_thread(int irq, void *dev)
+{
+	struct rcar_dmac_chan *chan = dev;
+	struct rcar_dmac_desc *desc;
+
+	spin_lock_irq(&chan->lock);
+
+	/* For cyclic transfers notify the user after every chunk. */
+	if (chan->desc.running && chan->desc.running->cyclic) {
+		dma_async_tx_callback callback;
+		void *callback_param;
+
+		desc = chan->desc.running;
+		callback = desc->async_tx.callback;
+		callback_param = desc->async_tx.callback_param;
+
+		spin_unlock_irq(&chan->lock);
+		callback(callback_param);
+		spin_lock_irq(&chan->lock);
+	}
+
+	/*
+	 * Call the callback function for all descriptors on the done list and
+	 * move them to the ack wait list.
+	 */
+	while (!list_empty(&chan->desc.done)) {
+		desc = list_first_entry(&chan->desc.done, struct rcar_dmac_desc,
+					node);
+		dma_cookie_complete(&desc->async_tx);
+		list_del(&desc->node);
+
+		spin_unlock_irq(&chan->lock);
+
+		/*
+		 * We own the only reference to this descriptor, we can safely
+		 * dereference it.
+		 */
+		desc->async_tx.callback(desc->async_tx.callback_param);
+
+		spin_lock_irq(&chan->lock);
+
+		list_add_tail(&desc->node, &chan->desc.wait);
+	}
+
+	/* Recycle all acked descriptors. */
+	rcar_dmac_desc_recycle_acked(chan);
+
+	spin_unlock_irq(&chan->lock);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t rcar_dmac_isr_error(int irq, void *data)
+{
+	struct rcar_dmac *dmac = data;
+
+	if (!(rcar_dmac_read(dmac, RCAR_DMAOR) & RCAR_DMAOR_AE))
+		return IRQ_NONE;
+
+	/*
+	 * An unrecoverable error occured on an unknown channel. Halt the DMAC,
+	 * abort transfers on all channels, and reinitialize the DMAC.
+	 */
+	rcar_dmac_stop(dmac);
+	rcar_dmac_abort(dmac);
+	rcar_dmac_init(dmac);
+
+	return IRQ_HANDLED;
+}
+
+/* -----------------------------------------------------------------------------
+ * OF xlate and channel filter
+ */
+
+static bool rcar_dmac_chan_filter(struct dma_chan *chan, void *arg)
+{
+	struct rcar_dmac *dmac = to_rcar_dmac(chan->device);
+	unsigned int id = (unsigned int)arg;
+
+	return !test_and_set_bit(id, dmac->modules);
+}
+
+static struct dma_chan *rcar_dmac_of_xlate(struct of_phandle_args *dma_spec,
+					   struct of_dma *ofdma)
+{
+	struct rcar_dmac_chan *rchan;
+	struct dma_chan *chan;
+	dma_cap_mask_t mask;
+
+	if (dma_spec->args_count != 1)
+		return NULL;
+
+	/* Only slave DMA channels can be allocated via DT */
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	chan = dma_request_channel(mask, rcar_dmac_chan_filter,
+				   (void *)dma_spec->args[0]);
+	if (!chan)
+		return NULL;
+
+	rchan = to_rcar_dmac_chan(chan);
+	rchan->mid_rid = dma_spec->args[0];
+
+	return chan;
+}
+
+/* -----------------------------------------------------------------------------
+ * Power management
+ */
+
+#if defined(CONFIG_PM_SLEEP) || defined(CONFIG_PM_RUNTIME)
+static int rcar_dmac_suspend(struct device *dev)
+{
+	struct rcar_dmac *dmac = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(dmac->clk);
+
+	return 0;
+}
+
+static int rcar_dmac_resume(struct device *dev)
+{
+	struct rcar_dmac *dmac = dev_get_drvdata(dev);
+	int ret;
+
+	ret = clk_prepare_enable(dmac->clk);
+	if (ret < 0)
+		return ret;
+
+	ret = rcar_dmac_init(dmac);
+	if (ret < 0)
+		clk_disable_unprepare(dmac->clk);
+
+	return ret;
+}
+#endif
+
+static const struct dev_pm_ops rcar_dmac_pm = {
+	SET_SYSTEM_SLEEP_PM_OPS(rcar_dmac_suspend, rcar_dmac_resume)
+	SET_RUNTIME_PM_OPS(rcar_dmac_suspend, rcar_dmac_resume, NULL)
+};
+
+/* -----------------------------------------------------------------------------
+ * Probe and remove
+ */
+
+static int rcar_dmac_chan_probe(struct rcar_dmac *dmac,
+				struct rcar_dmac_chan *rchan,
+				unsigned int index)
+{
+	struct platform_device *pdev = to_platform_device(dmac->dev);
+	struct dma_chan *chan = &rchan->chan;
+	char irqname[16];
+	int irq;
+	int ret;
+
+	rchan->index = index;
+	rchan->iomem = dmac->iomem + RCAR_DMAC_CHAN_OFFSET(index);
+	rchan->mid_rid = -EINVAL;
+
+	spin_lock_init(&rchan->lock);
+	mutex_init(&rchan->power_lock);
+
+	/* Request the channel interrupt. */
+	sprintf(irqname, "ch%u", index);
+	irq = platform_get_irq_byname(pdev, irqname);
+	if (irq < 0) {
+		dev_err(dmac->dev, "no IRQ specified for channel %u\n", index);
+		return -ENODEV;
+	}
+
+	sprintf(irqname, "DMAC channel %u", index);
+	ret = devm_request_threaded_irq(dmac->dev, irq, rcar_dmac_isr_channel,
+					rcar_dmac_isr_channel_thread, 0,
+					irqname, rchan);
+	if (ret) {
+		dev_err(dmac->dev, "failed to request IRQ %u (%d)\n", irq, ret);
+		return ret;
+	}
+
+	/*
+	 * Initialize the DMA engine channel and add it to the DMA engine
+	 * channels list.
+	 */
+	chan->device = &dmac->engine;
+	dma_cookie_init(chan);
+
+	list_add_tail(&chan->device_node, &dmac->engine.channels);
+
+	return 0;
+}
+
+static int rcar_dmac_parse_of(struct device *dev, struct rcar_dmac *dmac)
+{
+	struct device_node *np = dev->of_node;
+	int ret;
+
+	ret = of_property_read_u32(np, "dma-channels", &dmac->n_channels);
+	if (ret < 0) {
+		dev_err(dev, "unable to read dma-channels property\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int rcar_dmac_probe(struct platform_device *pdev)
+{
+	struct dma_device *engine;
+	struct rcar_dmac *dmac;
+	struct resource *mem;
+	unsigned int i;
+	int irq;
+	int ret;
+
+	dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL);
+	if (!dmac)
+		return -ENOMEM;
+
+	dmac->dev = &pdev->dev;
+	platform_set_drvdata(pdev, dmac);
+
+	ret = rcar_dmac_parse_of(&pdev->dev, dmac);
+	if (ret < 0)
+		return ret;
+
+	dmac->channels = devm_kcalloc(&pdev->dev, dmac->n_channels,
+				      sizeof(*dmac->channels), GFP_KERNEL);
+	if (!dmac->channels)
+		return -ENOMEM;
+
+	/* Request resources. */
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	dmac->iomem = devm_ioremap_resource(&pdev->dev, mem);
+	if (IS_ERR(dmac->iomem))
+		return PTR_ERR(dmac->iomem);
+
+	irq = platform_get_irq_byname(pdev, "error");
+	if (irq < 0) {
+		dev_err(&pdev->dev, "no error IRQ specified\n");
+		return -ENODEV;
+	}
+
+	ret = devm_request_irq(&pdev->dev, irq, rcar_dmac_isr_error, 0,
+			       "DMAC Address Error", dmac);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to request IRQ %u (%d)\n",
+			irq, ret);
+		return ret;
+	}
+
+	dmac->clk = clk_get(&pdev->dev, "fck");
+	if (IS_ERR(dmac->clk)) {
+		dev_err(&pdev->dev, "unable to get fck clock\n");
+		return PTR_ERR(dmac->clk);
+	}
+
+	/* Initialize the channels. */
+	INIT_LIST_HEAD(&dmac->engine.channels);
+
+	for (i = 0; i < dmac->n_channels; ++i) {
+		ret = rcar_dmac_chan_probe(dmac, &dmac->channels[i], i);
+		if (ret < 0)
+			return ret;
+	}
+
+	/* Enable runtime PM and initialize the device. */
+	pm_runtime_enable(&pdev->dev);
+	ret = pm_runtime_get_sync(&pdev->dev);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret);
+		return ret;
+	}
+
+	ret = rcar_dmac_init(dmac);
+	pm_runtime_put(&pdev->dev);
+
+	if (ret) {
+		dev_err(&pdev->dev, "failed to reset device\n");
+		goto error;
+	}
+
+	/* Register the DMAC as a DMA provider for DT. */
+	ret = of_dma_controller_register(pdev->dev.of_node, rcar_dmac_of_xlate,
+					 NULL);
+	if (ret < 0)
+		goto error;
+
+	/*
+	 * Register the DMA engine device.
+	 *
+	 * Default transfer size of 32 bytes requires 32-byte alignment.
+	 */
+	engine = &dmac->engine;
+	dma_cap_set(DMA_MEMCPY, engine->cap_mask);
+	dma_cap_set(DMA_SLAVE, engine->cap_mask);
+
+	engine->dev = &pdev->dev;
+	engine->copy_align = ilog2(RCAR_DMAC_MEMCPY_XFER_SIZE);
+
+	engine->device_alloc_chan_resources = rcar_dmac_alloc_chan_resources;
+	engine->device_free_chan_resources = rcar_dmac_free_chan_resources;
+	engine->device_prep_dma_memcpy = rcar_dmac_prep_dma_memcpy;
+	engine->device_prep_slave_sg = rcar_dmac_prep_slave_sg;
+	engine->device_prep_dma_cyclic = rcar_dmac_prep_dma_cyclic;
+	engine->device_control = rcar_dmac_control;
+	engine->device_tx_status = rcar_dmac_tx_status;
+	engine->device_issue_pending = rcar_dmac_issue_pending;
+	engine->device_slave_caps = rcar_dmac_slave_caps;
+
+	ret = dma_async_device_register(engine);
+	if (ret < 0)
+		goto error;
+
+	return 0;
+
+error:
+	of_dma_controller_free(pdev->dev.of_node);
+	pm_runtime_disable(&pdev->dev);
+	return ret;
+}
+
+static int rcar_dmac_remove(struct platform_device *pdev)
+{
+	struct rcar_dmac *dmac = platform_get_drvdata(pdev);
+	unsigned int i;
+
+	of_dma_controller_free(pdev->dev.of_node);
+	dma_async_device_unregister(&dmac->engine);
+
+	pm_runtime_disable(&pdev->dev);
+
+	for (i = 0; i < dmac->n_channels; ++i)
+		mutex_destroy(&dmac->channels[i].power_lock);
+
+	return 0;
+}
+
+static void rcar_dmac_shutdown(struct platform_device *pdev)
+{
+	struct rcar_dmac *dmac = platform_get_drvdata(pdev);
+
+	rcar_dmac_stop(dmac);
+}
+
+static const struct of_device_id rcar_dmac_of_ids[] = {
+	{ .compatible = "renesas,rcar-dmac", },
+	{ /* Sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, rcar_dmac_of_ids);
+
+static struct platform_driver rcar_dmac_driver = {
+	.driver		= {
+		.owner	= THIS_MODULE,
+		.pm	= &rcar_dmac_pm,
+		.name	= "rcar-dmac",
+		.of_match_table = rcar_dmac_of_ids,
+	},
+	.probe		= rcar_dmac_probe,
+	.remove		= rcar_dmac_remove,
+	.shutdown	= rcar_dmac_shutdown,
+};
+
+module_platform_driver(rcar_dmac_driver);
+
+MODULE_DESCRIPTION("R-Car Gen2 DMA Controller Driver");
+MODULE_AUTHOR("Laurent Pinchart <laurent.pinchart@ideasonboard.com>");
+MODULE_LICENSE("GPL v2");