diff mbox

dmaengine: Add support for BCM2835.

Message ID 527D1DDA.2040004@koalo.de (mailing list archive)
State New, archived
Headers show

Commit Message

Florian Meier Nov. 8, 2013, 5:22 p.m. UTC
Add support for DMA controller of BCM2835 as used in the Raspberry Pi.
Currently it only supports cyclic DMA for serving the I2S driver.

Signed-off-by: Florian Meier <florian.meier@koalo.de>
---
 arch/arm/boot/dts/bcm2835.dtsi |   22 +
 drivers/dma/Kconfig            |    6 +
 drivers/dma/Makefile           |    1 +
 drivers/dma/bcm2835-dma.c      |  880 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 909 insertions(+)
 create mode 100644 drivers/dma/bcm2835-dma.c

Comments

Mark Brown Nov. 8, 2013, 6:17 p.m. UTC | #1
On Fri, Nov 08, 2013 at 06:22:34PM +0100, Florian Meier wrote:
> Add support for DMA controller of BCM2835 as used in the Raspberry Pi.
> Currently it only supports cyclic DMA for serving the I2S driver.

Adding in Martin Sperl who's been looking at DMA with regard to the SPI
controller (which will want non-cyclic mode but I guess there's a lot of
shared code).

> 
> Signed-off-by: Florian Meier <florian.meier@koalo.de>
> ---
>  arch/arm/boot/dts/bcm2835.dtsi |   22 +
>  drivers/dma/Kconfig            |    6 +
>  drivers/dma/Makefile           |    1 +
>  drivers/dma/bcm2835-dma.c      |  880 ++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 909 insertions(+)
>  create mode 100644 drivers/dma/bcm2835-dma.c
> 
> diff --git a/arch/arm/boot/dts/bcm2835.dtsi b/arch/arm/boot/dts/bcm2835.dtsi
> index 1e12aef..1514198 100644
> --- a/arch/arm/boot/dts/bcm2835.dtsi
> +++ b/arch/arm/boot/dts/bcm2835.dtsi
> @@ -103,6 +103,28 @@
>  			clocks = <&clk_mmc>;
>  			status = "disabled";
>  		};
> +
> +		dma: dma@7e007000 {
> +			compatible = "brcm,bcm2835-dma";
> +			reg = <0x7e007000 0xf00>;
> +			interrupts = <1 16
> +				      1 17
> +				      1 18
> +				      1 19
> +				      1 20
> +				      1 21
> +				      1 22
> +				      1 23
> +				      1 24
> +				      1 25
> +				      1 26
> +				      1 27
> +				      1 28>;
> +
> +			#dma-cells = <1>;
> +			dma-channels = <15>;   /* DMA channel 15 is not handled yet */
> +			dma-requests = <32>;
> +		};
>  	};
>  
>  	clocks {
> diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
> index f238cfd..f2d253b 100644
> --- a/drivers/dma/Kconfig
> +++ b/drivers/dma/Kconfig
> @@ -288,6 +288,12 @@ config DMA_OMAP
>  	select DMA_ENGINE
>  	select DMA_VIRTUAL_CHANNELS
>  
> +config DMA_BCM2835
> +	tristate "BCM2835 DMA engine support"
> +	depends on (ARCH_BCM2835 || MACH_BCM2708)
> +	select DMA_ENGINE
> +	select DMA_VIRTUAL_CHANNELS
> +
>  config TI_CPPI41
>  	tristate "AM33xx CPPI41 DMA support"
>  	depends on ARCH_OMAP
> diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
> index db89035..6348157 100644
> --- a/drivers/dma/Makefile
> +++ b/drivers/dma/Makefile
> @@ -37,6 +37,7 @@ obj-$(CONFIG_EP93XX_DMA) += ep93xx_dma.o
>  obj-$(CONFIG_DMA_SA11X0) += sa11x0-dma.o
>  obj-$(CONFIG_MMP_TDMA) += mmp_tdma.o
>  obj-$(CONFIG_DMA_OMAP) += omap-dma.o
> +obj-$(CONFIG_DMA_BCM2835) += bcm2835-dma.o
>  obj-$(CONFIG_MMP_PDMA) += mmp_pdma.o
>  obj-$(CONFIG_DMA_JZ4740) += dma-jz4740.o
>  obj-$(CONFIG_TI_CPPI41) += cppi41.o
> diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c
> new file mode 100644
> index 0000000..c3e53b3
> --- /dev/null
> +++ b/drivers/dma/bcm2835-dma.c
> @@ -0,0 +1,880 @@
> +/*
> + * BCM2835 DMA engine support
> + *
> + * This driver only supports cyclic DMA transfers
> + * as needed for the I2S module.
> + *
> + * Author:      Florian Meier, <florian.meier@koalo.de>
> + *              Copyright 2013
> + *
> + * based on
> + *	OMAP DMAengine support by Russell King
> + *
> + *	BCM2708 DMA Driver
> + *	Copyright (C) 2010 Broadcom
> + *
> + *	Raspberry Pi PCM I2S ALSA Driver
> + *	Copyright (c) by Phil Poole 2013
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> + */
> +#include <linux/dmaengine.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/err.h>
> +#include <linux/init.h>
> +#include <linux/interrupt.h>
> +#include <linux/list.h>
> +#include <linux/module.h>
> +#include <linux/platform_device.h>
> +#include <linux/slab.h>
> +#include <linux/io.h>
> +#include <linux/spinlock.h>
> +#include <linux/irq.h>
> +#include <linux/of.h>
> +#include <linux/of_dma.h>
> +
> +#include "virt-dma.h"
> +
> +struct bcm2835_dmadev {
> +	struct dma_device ddev;
> +	spinlock_t lock;
> +	struct tasklet_struct task;
> +	struct list_head pending;
> +	uint32_t chans_available;
> +	void __iomem *dma_base;
> +	int *dma_irq_numbers;
> +};
> +
> +struct bcm2835_dma_cb {
> +	unsigned long info;
> +	unsigned long src;
> +	unsigned long dst;
> +	unsigned long length;
> +	unsigned long stride;
> +	unsigned long next;
> +	unsigned long pad[2];
> +};
> +
> +struct bcm2835_chan {
> +	struct virt_dma_chan vc;
> +	struct list_head node;
> +
> +	struct dma_slave_config	cfg;
> +	unsigned dma_sig;
> +	bool cyclic;
> +
> +	int dma_ch;
> +	struct bcm2835_desc *desc;
> +	unsigned sgidx;
> +
> +	void __iomem *dma_chan_base;
> +	int dma_irq_number;
> +	struct irqaction dma_irq_handler;
> +};
> +
> +struct bcm2835_sg {
> +	dma_addr_t addr;
> +	uint32_t en;		/* number of elements (24-bit) */
> +	uint32_t fn;		/* number of frames (16-bit) */
> +};
> +
> +struct bcm2835_desc {
> +	struct virt_dma_desc vd;
> +	enum dma_transfer_direction dir;
> +	dma_addr_t dev_addr;
> +
> +	uint8_t es;
> +	unsigned int sync_type;
> +	unsigned int sync_dreq;
> +
> +	unsigned int control_block_size;
> +	struct bcm2835_dma_cb *control_block_base;
> +	dma_addr_t control_block_base_phys;
> +
> +	unsigned sglen;
> +	struct bcm2835_sg sg[0];
> +};
> +
> +#define BCM2835_DMA_CS		0x00
> +#define BCM2835_DMA_ADDR	0x04
> +#define BCM2835_DMA_SOURCE_AD	0x0c
> +#define BCM2835_DMA_DEST_AD	0x10
> +#define BCM2835_DMA_NEXTCB	0x1C
> +
> +/* DMA CS Control and Status bits */
> +#define BCM2835_DMA_ACTIVE	(1 << 0)
> +#define BCM2835_DMA_INT		(1 << 2)
> +#define BCM2835_DMA_ISPAUSED	(1 << 4)  /* Pause requested or not active */
> +#define BCM2835_DMA_ISHELD	(1 << 5)  /* Is held by DREQ flow control */
> +#define BCM2835_DMA_ERR		(1 << 8)
> +#define BCM2835_DMA_ABORT	(1 << 30) /* stop current CB, go to next, WO */
> +#define BCM2835_DMA_RESET	(1 << 31) /* WO, self clearing */
> +
> +#define BCM2835_DMA_INT_EN	(1 << 0)
> +#define BCM2835_DMA_D_INC	(1 << 4)
> +#define BCM2835_DMA_D_DREQ	(1 << 6)
> +#define BCM2835_DMA_S_INC	(1 << 8)
> +#define BCM2835_DMA_S_DREQ	(1 << 6)
> +
> +#define	BCM2835_DMA_PER_MAP(x)	((x) << 16)
> +
> +#define BCM2835_DMA_DATA_TYPE_S8 1
> +#define BCM2835_DMA_DATA_TYPE_S16 2
> +#define BCM2835_DMA_DATA_TYPE_S32 4
> +#define BCM2835_DMA_DATA_TYPE_S128 16
> +
> +#define BCM2835_DMA_CHANNEL_MASK 32565
> +
> +/* valid only for channels 0 - 14, 15 has its own base address */
> +#define BCM2835_DMA_CHAN(n)	((n)<<8) /* base address */
> +#define BCM2835_DMA_CHANIO(dma_base, n) ((dma_base)+BCM2835_DMA_CHAN(n))
> +
> +static const unsigned es_bytes[] = {
> +	[BCM2835_DMA_DATA_TYPE_S8] = 1,
> +	[BCM2835_DMA_DATA_TYPE_S16] = 2,
> +	[BCM2835_DMA_DATA_TYPE_S32] = 4,
> +	[BCM2835_DMA_DATA_TYPE_S128] = 16
> +};
> +
> +static inline struct bcm2835_dmadev *to_bcm2835_dma_dev(struct dma_device *d)
> +{
> +	return container_of(d, struct bcm2835_dmadev, ddev);
> +}
> +
> +static inline struct bcm2835_chan *to_bcm2835_dma_chan(struct dma_chan *c)
> +{
> +	return container_of(c, struct bcm2835_chan, vc.chan);
> +}
> +
> +static inline struct bcm2835_desc *to_bcm2835_dma_desc(
> +		struct dma_async_tx_descriptor *t)
> +{
> +	return container_of(t, struct bcm2835_desc, vd.tx);
> +}
> +
> +static bool bcm2835_dma_filter_fn(struct dma_chan *chan, void *param)
> +{
> +	return true;
> +}
> +
> +static struct of_dma_filter_info bcm2835_dma_info = {
> +	.filter_fn = bcm2835_dma_filter_fn,
> +};
> +
> +static void bcm2835_dma_desc_free(struct virt_dma_desc *vd)
> +{
> +	struct bcm2835_desc *desc = container_of(vd, struct bcm2835_desc, vd);
> +	dma_free_coherent(desc->vd.tx.chan->device->dev,
> +			desc->control_block_size,
> +			desc->control_block_base,
> +			desc->control_block_base_phys);
> +	kfree(desc);
> +}
> +
> +static void bcm2835_dma_start(void __iomem *dma_chan_base,
> +		dma_addr_t control_block)
> +{
> +	dsb();	/* ARM data synchronization (push) operation */
> +
> +	writel(control_block, dma_chan_base+BCM2835_DMA_ADDR);
> +	writel(BCM2835_DMA_ACTIVE, dma_chan_base+BCM2835_DMA_CS);
> +}
> +
> +static int bcm2835_dma_abort(void __iomem *dma_chan_base)
> +{
> +	unsigned long int cs;
> +	int rc = 0;
> +
> +	cs = readl(dma_chan_base + BCM2835_DMA_CS);
> +
> +	if (BCM2835_DMA_ACTIVE & cs) {
> +		long int timeout = 10000;
> +
> +		/* write 0 to the active bit - pause the DMA */
> +		writel(0, dma_chan_base + BCM2835_DMA_CS);
> +
> +		/* wait for any current AXI transfer to complete */
> +		while (0 != (cs & BCM2835_DMA_ISPAUSED) && --timeout >= 0)
> +			cs = readl(dma_chan_base + BCM2835_DMA_CS);
> +
> +		if (0 != (cs & BCM2835_DMA_ISPAUSED)) {
> +			/* we'll un-pause when we set of our next DMA */
> +			rc = -ETIMEDOUT;
> +
> +		} else if (BCM2835_DMA_ACTIVE & cs) {
> +			/* terminate the control block chain */
> +			writel(0, dma_chan_base + BCM2835_DMA_NEXTCB);
> +
> +			/* abort the whole DMA */
> +			writel(BCM2835_DMA_ABORT | BCM2835_DMA_ACTIVE,
> +			       dma_chan_base + BCM2835_DMA_CS);
> +		}
> +	}
> +
> +	return rc;
> +}
> +
> +static void bcm2835_dma_start_sg(struct bcm2835_chan *c, struct bcm2835_desc *d,
> +		unsigned idx)
> +{
> +	struct bcm2835_sg *sg = d->sg + idx;
> +	int frame;
> +	int frames = sg->fn;
> +
> +	/*
> +	 * Iterate over all frames and create a control block
> +	 * for each frame and link them together.
> +	 */
> +	for (frame = 0; frame < frames; frame++) {
> +		struct bcm2835_dma_cb *control_block =
> +			&d->control_block_base[frame];
> +
> +		/* Setup adresses */
> +		if (d->dir == DMA_DEV_TO_MEM) {
> +			control_block->info = BCM2835_DMA_D_INC;
> +			control_block->src = d->dev_addr;
> +			control_block->dst = sg->addr+frame*sg->en;
> +		} else {
> +			control_block->info = BCM2835_DMA_S_INC;
> +			control_block->src = sg->addr+frame*sg->en;
> +			control_block->dst = d->dev_addr;
> +		}
> +
> +		/* Enable interrupt */
> +		control_block->info |= BCM2835_DMA_INT_EN;
> +
> +		/* Setup synchronization */
> +		if (d->sync_type != 0)
> +			control_block->info |= d->sync_type;
> +
> +		/* Setup DREQ channel */
> +		if (d->sync_dreq != 0)
> +			control_block->info |=
> +				BCM2835_DMA_PER_MAP(d->sync_dreq);
> +
> +		/* Length of a frame */
> +		control_block->length = sg->en;
> +
> +		/*
> +		 * Next block is the next frame.
> +		 * This DMA engine driver currently only supports cyclic DMA.
> +		 * Therefore, wrap around at number of frames.
> +		 */
> +		control_block->next = d->control_block_base_phys +
> +			sizeof(struct bcm2835_dma_cb)*((frame+1)%(frames));
> +
> +		/* The following fields are not used here */
> +		control_block->stride = 0;
> +		control_block->pad[0] = 0;
> +		control_block->pad[1] = 0;
> +	}
> +
> +	/* Start the DMA transfer */
> +	bcm2835_dma_start(c->dma_chan_base, d->control_block_base_phys);
> +}
> +
> +static void bcm2835_dma_start_desc(struct bcm2835_chan *c)
> +{
> +	struct virt_dma_desc *vd = vchan_next_desc(&c->vc);
> +	struct bcm2835_desc *d;
> +
> +	if (!vd) {
> +		c->desc = NULL;
> +		return;
> +	}
> +
> +	list_del(&vd->node);
> +
> +	c->desc = d = to_bcm2835_dma_desc(&vd->tx);
> +	c->sgidx = 0;
> +
> +	bcm2835_dma_start_sg(c, d, 0);
> +}
> +
> +static irqreturn_t bcm2835_dma_callback(int irq, void *data)
> +{
> +	struct bcm2835_chan *c = data;
> +	struct bcm2835_desc *d;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&c->vc.lock, flags);
> +
> +	/* acknowledge interrupt */
> +	writel(BCM2835_DMA_INT, c->dma_chan_base + BCM2835_DMA_CS);
> +
> +	d = c->desc;
> +
> +	if (d) {
> +		if (!c->cyclic) {
> +			if (++c->sgidx < d->sglen) {
> +				bcm2835_dma_start_sg(c, d, c->sgidx);
> +			} else {
> +				bcm2835_dma_start_desc(c);
> +				vchan_cookie_complete(&d->vd);
> +			}
> +		} else {
> +			vchan_cyclic_callback(&d->vd);
> +		}
> +	}
> +
> +	/* keep the DMA engine running */
> +	dsb(); /* ARM synchronization barrier */
> +	writel(BCM2835_DMA_ACTIVE, c->dma_chan_base + BCM2835_DMA_CS);
> +
> +	spin_unlock_irqrestore(&c->vc.lock, flags);
> +
> +	return IRQ_HANDLED;
> +}
> +
> +/*
> + * This callback schedules all pending channels.  We could be more
> + * clever here by postponing allocation of the real DMA channels to
> + * this point, and freeing them when our virtual channel becomes idle.
> + *
> + * We would then need to deal with 'all channels in-use'
> + */
> +static void bcm2835_dma_sched(unsigned long data)
> +{
> +	struct bcm2835_dmadev *d = (struct bcm2835_dmadev *)data;
> +	LIST_HEAD(head);
> +
> +	spin_lock_irq(&d->lock);
> +	list_splice_tail_init(&d->pending, &head);
> +	spin_unlock_irq(&d->lock);
> +
> +	while (!list_empty(&head)) {
> +		struct bcm2835_chan *c = list_first_entry(&head,
> +			struct bcm2835_chan, node);
> +
> +		spin_lock_irq(&c->vc.lock);
> +		list_del_init(&c->node);
> +		bcm2835_dma_start_desc(c);
> +		spin_unlock_irq(&c->vc.lock);
> +	}
> +}
> +
> +static int bcm2835_dma_alloc_chan_resources(struct dma_chan *chan)
> +{
> +	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
> +	int ret;
> +	struct bcm2835_dmadev *d = to_bcm2835_dma_dev(chan->device);
> +	uint32_t chans = d->chans_available;
> +	int chanID = 0;
> +
> +	dev_dbg(c->vc.chan.device->dev,
> +			"allocating channel for %u\n", c->dma_sig);
> +
> +	/* do not use the FIQ and BULK channels */
> +	chans &= ~0xD;
> +
> +	if (chans) {
> +		/* return the ordinal of the first channel in the bitmap */
> +		while (chans != 0 && (chans & 1) == 0) {
> +			chans >>= 1;
> +			chanID++;
> +		}
> +
> +		/* claim the channel */
> +		d->chans_available &= ~(1 << chanID);
> +
> +		c->dma_chan_base = BCM2835_DMA_CHANIO(d->dma_base, chanID);
> +
> +		c->dma_irq_number = d->dma_irq_numbers[chanID];
> +
> +		c->dma_ch = chanID;
> +	} else {
> +		return -ENOMEM;
> +	}
> +
> +	c->dma_irq_handler.name = "DMA engine IRQ handler";
> +	c->dma_irq_handler.flags = 0;
> +	c->dma_irq_handler.handler = bcm2835_dma_callback;
> +
> +	ret = request_any_context_irq(c->dma_irq_number,
> +			bcm2835_dma_callback, 0, "DMA IRQ", c);
> +	if (ret < 0)
> +		return ret;
> +
> +	return 0;
> +}
> +
> +static void bcm2835_dma_free_chan_resources(struct dma_chan *chan)
> +{
> +	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
> +	struct bcm2835_dmadev *d = to_bcm2835_dma_dev(chan->device);
> +
> +	vchan_free_chan_resources(&c->vc);
> +	d->chans_available |= (1 << c->dma_ch);
> +	free_irq(c->dma_irq_number, c);
> +
> +	dev_dbg(c->vc.chan.device->dev, "freeing channel for %u\n", c->dma_sig);
> +}
> +
> +static size_t bcm2835_dma_sg_size(struct bcm2835_sg *sg)
> +{
> +	return sg->en * sg->fn;
> +}
> +
> +static size_t bcm2835_dma_desc_size(struct bcm2835_desc *d)
> +{
> +	unsigned i;
> +	size_t size;
> +
> +	for (size = i = 0; i < d->sglen; i++)
> +		size += bcm2835_dma_sg_size(&d->sg[i]);
> +
> +	return size * es_bytes[d->es];
> +}
> +
> +static size_t bcm2835_dma_desc_size_pos(struct bcm2835_desc *d, dma_addr_t addr)
> +{
> +	unsigned i;
> +	size_t size;
> +
> +	for (size = i = 0; i < d->sglen; i++) {
> +		size_t this_size = bcm2835_dma_sg_size(&d->sg[i]);
> +
> +		if (size)
> +			size += this_size;
> +		else if (addr >= d->sg[i].addr &&
> +			 addr < d->sg[i].addr + this_size)
> +			size += d->sg[i].addr + this_size - addr;
> +	}
> +	return size;
> +}
> +
> +
> +/*
> + * Returns current physical source address for the given DMA channel.
> + * If the channel is running the caller must disable interrupts prior calling
> + * this function and process the returned value before re-enabling interrupt to
> + * prevent races with the interrupt handler.
> + */
> +static dma_addr_t bcm2835_get_dma_src_pos(struct bcm2835_chan *c)
> +{
> +	return readl(c->dma_chan_base + BCM2835_DMA_SOURCE_AD);
> +}
> +
> +/*
> + * Returns current physical destination address for the given DMA channel.
> + * If the channel is running the caller must disable interrupts prior calling
> + * this function and process the returned value before re-enabling interrupt to
> + * prevent races with the interrupt handler.
> + */
> +static dma_addr_t bcm2835_get_dma_dst_pos(struct bcm2835_chan *c)
> +{
> +	return readl(c->dma_chan_base + BCM2835_DMA_DEST_AD);
> +}
> +
> +static enum dma_status bcm2835_dma_tx_status(struct dma_chan *chan,
> +	dma_cookie_t cookie, struct dma_tx_state *txstate)
> +{
> +	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
> +	struct virt_dma_desc *vd;
> +	enum dma_status ret;
> +	unsigned long flags;
> +
> +	ret = dma_cookie_status(chan, cookie, txstate);
> +	if (ret == DMA_SUCCESS || !txstate)
> +		return ret;
> +
> +	spin_lock_irqsave(&c->vc.lock, flags);
> +	vd = vchan_find_desc(&c->vc, cookie);
> +	if (vd) {
> +		txstate->residue =
> +			bcm2835_dma_desc_size(to_bcm2835_dma_desc(&vd->tx));
> +	} else if (c->desc && c->desc->vd.tx.cookie == cookie) {
> +		struct bcm2835_desc *d = c->desc;
> +		dma_addr_t pos;
> +
> +		if (d->dir == DMA_MEM_TO_DEV)
> +			pos = bcm2835_get_dma_src_pos(c);
> +		else if (d->dir == DMA_DEV_TO_MEM)
> +			pos = bcm2835_get_dma_dst_pos(c);
> +		else
> +			pos = 0;
> +
> +		txstate->residue = bcm2835_dma_desc_size_pos(d, pos);
> +	} else {
> +		txstate->residue = 0;
> +	}
> +
> +	spin_unlock_irqrestore(&c->vc.lock, flags);
> +
> +	return ret;
> +}
> +
> +static void bcm2835_dma_issue_pending(struct dma_chan *chan)
> +{
> +	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&c->vc.lock, flags);
> +	if (vchan_issue_pending(&c->vc) && !c->desc) {
> +		struct bcm2835_dmadev *d = to_bcm2835_dma_dev(chan->device);
> +		spin_lock(&d->lock);
> +		if (list_empty(&c->node))
> +			list_add_tail(&c->node, &d->pending);
> +		spin_unlock(&d->lock);
> +		tasklet_schedule(&d->task);
> +	}
> +	spin_unlock_irqrestore(&c->vc.lock, flags);
> +}
> +
> +
> +static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic(
> +	struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
> +	size_t period_len, enum dma_transfer_direction direction,
> +	unsigned long flags, void *context)
> +{
> +	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
> +	enum dma_slave_buswidth dev_width;
> +	struct bcm2835_desc *d;
> +	dma_addr_t dev_addr;
> +	unsigned int es, sync_type, sync_dreq;
> +
> +	/* Grab configuration */
> +	if (direction == DMA_DEV_TO_MEM) {
> +		dev_addr = c->cfg.src_addr;
> +		dev_width = c->cfg.src_addr_width;
> +		sync_type = BCM2835_DMA_S_DREQ;
> +		sync_dreq = c->cfg.slave_id;
> +	} else if (direction == DMA_MEM_TO_DEV) {
> +		dev_addr = c->cfg.dst_addr;
> +		dev_width = c->cfg.dst_addr_width;
> +		sync_type = BCM2835_DMA_D_DREQ;
> +		sync_dreq = c->cfg.slave_id;
> +	} else {
> +		dev_err(chan->device->dev, "%s: bad direction?\n", __func__);
> +		return NULL;
> +	}
> +
> +	/* Bus width translates to the element size (ES) */
> +	switch (dev_width) {
> +	case DMA_SLAVE_BUSWIDTH_4_BYTES:
> +		es = BCM2835_DMA_DATA_TYPE_S32;
> +		break;
> +	default:
> +		return NULL;
> +	}
> +
> +	/* Now allocate and setup the descriptor. */
> +	d = kzalloc(sizeof(*d) + sizeof(d->sg[0]), GFP_ATOMIC);
> +	if (!d)
> +		return NULL;
> +
> +	d->dir = direction;
> +	d->dev_addr = dev_addr;
> +	d->es = es;
> +	d->sync_type = sync_type;
> +	d->sync_dreq = sync_dreq;
> +	d->sg[0].addr = buf_addr;
> +	d->sg[0].en = period_len;
> +	d->sg[0].fn = buf_len / period_len;
> +	d->sglen = 1;
> +
> +	/* Allocate memory for control blocks */
> +	d->control_block_size = d->sg[0].fn*sizeof(struct bcm2835_dma_cb);
> +	d->control_block_base = dma_alloc_coherent(chan->device->dev,
> +			d->control_block_size, &d->control_block_base_phys,
> +			GFP_KERNEL);
> +
> +	if (!d->control_block_base) {
> +		dev_err(chan->device->dev,
> +				"%s: Memory allocation error\n", __func__);
> +		return NULL;
> +	}
> +
> +	memset(d->control_block_base, 0, d->control_block_size);
> +
> +	if (!c->cyclic) {
> +		c->cyclic = true;
> +		/* nothing else is implemented */
> +	}
> +
> +	return vchan_tx_prep(&c->vc, &d->vd, DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
> +}
> +
> +static int bcm2835_dma_slave_config(struct bcm2835_chan *c,
> +		struct dma_slave_config *cfg)
> +{
> +	if ((cfg->direction == DMA_DEV_TO_MEM
> +			&& cfg->src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) ||
> +	    (cfg->direction == DMA_MEM_TO_DEV
> +			&& cfg->dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES)) {
> +		return -EINVAL;
> +	}
> +
> +	memcpy(&c->cfg, cfg, sizeof(c->cfg));
> +
> +	return 0;
> +}
> +
> +static int bcm2835_dma_terminate_all(struct bcm2835_chan *c)
> +{
> +	struct bcm2835_dmadev *d = to_bcm2835_dma_dev(c->vc.chan.device);
> +	unsigned long flags;
> +	LIST_HEAD(head);
> +
> +	spin_lock_irqsave(&c->vc.lock, flags);
> +
> +	/* Prevent this channel being scheduled */
> +	spin_lock(&d->lock);
> +	list_del_init(&c->node);
> +	spin_unlock(&d->lock);
> +
> +	/*
> +	 * Stop DMA activity: we assume the callback will not be called
> +	 * after bcm_dma_abort() returns (even if it does, it will see
> +	 * c->desc is NULL and exit.)
> +	 */
> +	if (c->desc) {
> +		c->desc = NULL;
> +		bcm2835_dma_abort(c->dma_chan_base);
> +
> +		/* Wait for stopping */
> +		while (readl(c->dma_chan_base + BCM2835_DMA_CS)
> +			& BCM2835_DMA_ACTIVE)
> +			;
> +	}
> +
> +	vchan_get_all_descriptors(&c->vc, &head);
> +	spin_unlock_irqrestore(&c->vc.lock, flags);
> +	vchan_dma_desc_free_list(&c->vc, &head);
> +
> +	return 0;
> +}
> +
> +static int bcm2835_dma_pause(struct bcm2835_chan *c)
> +{
> +	/* FIXME: not supported by platform private API */
> +	return -EINVAL;
> +}
> +
> +static int bcm2835_dma_resume(struct bcm2835_chan *c)
> +{
> +	/* FIXME: not supported by platform private API */
> +	return -EINVAL;
> +}
> +
> +static int bcm2835_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
> +	unsigned long arg)
> +{
> +	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
> +	int ret;
> +
> +	switch (cmd) {
> +	case DMA_SLAVE_CONFIG:
> +		ret = bcm2835_dma_slave_config(c,
> +				(struct dma_slave_config *)arg);
> +		break;
> +
> +	case DMA_TERMINATE_ALL:
> +		ret = bcm2835_dma_terminate_all(c);
> +		break;
> +
> +	case DMA_PAUSE:
> +		ret = bcm2835_dma_pause(c);
> +		break;
> +
> +	case DMA_RESUME:
> +		ret = bcm2835_dma_resume(c);
> +		break;
> +
> +	default:
> +		ret = -ENXIO;
> +		break;
> +	}
> +
> +	return ret;
> +}
> +
> +static int bcm2835_dma_chan_init(struct bcm2835_dmadev *od, int dma_sig)
> +{
> +	struct bcm2835_chan *c;
> +
> +	c = kzalloc(sizeof(*c), GFP_KERNEL);
> +	if (!c)
> +		return -ENOMEM;
> +
> +	c->dma_sig = dma_sig;
> +	c->vc.desc_free = bcm2835_dma_desc_free;
> +	vchan_init(&c->vc, &od->ddev);
> +	INIT_LIST_HEAD(&c->node);
> +
> +	od->ddev.chancnt++;
> +
> +	return 0;
> +}
> +
> +static void bcm2835_dma_free(struct bcm2835_dmadev *od)
> +{
> +	tasklet_kill(&od->task);
> +	while (!list_empty(&od->ddev.channels)) {
> +		struct bcm2835_chan *c = list_first_entry(&od->ddev.channels,
> +			struct bcm2835_chan, vc.chan.device_node);
> +
> +		list_del(&c->vc.chan.device_node);
> +		tasklet_kill(&c->vc.task);
> +		kfree(c);
> +	}
> +}
> +
> +#if defined(CONFIG_OF)
> +static const struct of_device_id bcm2835_dma_of_match[] = {
> +	{
> +		.compatible = "brcm,bcm2835-dma",
> +	}
> +};
> +MODULE_DEVICE_TABLE(of, bcm2835_dma_of_match);
> +#endif
> +
> +static int bcm2835_dma_probe(struct platform_device *pdev)
> +{
> +	struct bcm2835_dmadev *od;
> +	struct resource *dma_res = NULL;
> +	void __iomem *dma_base = NULL;
> +	int rc = 0;
> +	int i;
> +	struct resource *irq;
> +	int irq_resources;
> +
> +	od = devm_kzalloc(&pdev->dev, sizeof(*od), GFP_KERNEL);
> +	if (!od)
> +		return -ENOMEM;
> +
> +	dma_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	dma_base = devm_ioremap_resource(&pdev->dev, dma_res);
> +	if (IS_ERR(dma_base))
> +		return PTR_ERR(dma_base);
> +
> +	od->dma_base = dma_base;
> +	od->chans_available = BCM2835_DMA_CHANNEL_MASK;
> +
> +	dma_cap_set(DMA_SLAVE, od->ddev.cap_mask);
> +	dma_cap_set(DMA_CYCLIC, od->ddev.cap_mask);
> +	od->ddev.device_alloc_chan_resources = bcm2835_dma_alloc_chan_resources;
> +	od->ddev.device_free_chan_resources = bcm2835_dma_free_chan_resources;
> +	od->ddev.device_tx_status = bcm2835_dma_tx_status;
> +	od->ddev.device_issue_pending = bcm2835_dma_issue_pending;
> +	od->ddev.device_prep_dma_cyclic = bcm2835_dma_prep_dma_cyclic;
> +	od->ddev.device_control = bcm2835_dma_control;
> +	od->ddev.dev = &pdev->dev;
> +	INIT_LIST_HEAD(&od->ddev.channels);
> +	INIT_LIST_HEAD(&od->pending);
> +	spin_lock_init(&od->lock);
> +
> +	tasklet_init(&od->task, bcm2835_dma_sched, (unsigned long)od);
> +
> +	irq_resources = 0;
> +
> +	for (i = 0; i < pdev->num_resources; i++) {
> +		if (IORESOURCE_IRQ == resource_type(&pdev->resource[i]))
> +			irq_resources++;
> +	}
> +
> +	od->dma_irq_numbers = devm_kzalloc(&pdev->dev,
> +			irq_resources*sizeof(int), GFP_KERNEL);
> +	if (!od)
> +		return -ENOMEM;
> +
> +	for (i = 0; i < irq_resources; i++) {
> +		rc = bcm2835_dma_chan_init(od, i);
> +		if (rc) {
> +			bcm2835_dma_free(od);
> +			return rc;
> +		}
> +
> +		irq = platform_get_resource(pdev, IORESOURCE_IRQ, i);
> +		if (!irq) {
> +			dev_err(&pdev->dev,
> +					"No IRQ resource for channel %i\n", i);
> +			return -ENODEV;
> +		}
> +		od->dma_irq_numbers[i] = irq->start;
> +	}
> +
> +	rc = dma_async_device_register(&od->ddev);
> +	if (rc) {
> +		dev_err(&pdev->dev,
> +			"Failed to register slave DMA engine device: %d\n", rc);
> +		bcm2835_dma_free(od);
> +		return rc;
> +	}
> +
> +	platform_set_drvdata(pdev, od);
> +
> +	if (pdev->dev.of_node) {
> +		bcm2835_dma_info.dma_cap = od->ddev.cap_mask;
> +
> +		/* Device-tree DMA controller registration */
> +		rc = of_dma_controller_register(pdev->dev.of_node,
> +				of_dma_simple_xlate, &bcm2835_dma_info);
> +		if (rc) {
> +			dev_err(&pdev->dev, "Failed to register DMA controller\n");
> +			dma_async_device_unregister(&od->ddev);
> +			bcm2835_dma_free(od);
> +			return rc;
> +		}
> +	}
> +
> +	dev_dbg(&pdev->dev, "Load BCM2835 DMA engine driver\n");
> +
> +	return rc;
> +}
> +
> +static int bcm2835_dma_remove(struct platform_device *pdev)
> +{
> +	struct bcm2835_dmadev *od = platform_get_drvdata(pdev);
> +
> +	dma_async_device_unregister(&od->ddev);
> +	bcm2835_dma_free(od);
> +
> +	return 0;
> +}
> +
> +static struct platform_driver bcm2835_dma_driver = {
> +	.probe	= bcm2835_dma_probe,
> +	.remove	= bcm2835_dma_remove,
> +	.driver = {
> +		.name = "bcm2835-dma",
> +		.owner = THIS_MODULE,
> +		.of_match_table = of_match_ptr(bcm2835_dma_of_match),
> +	},
> +};
> +
> +static const struct platform_device_info bcm2835_dma_dev_info = {
> +	.name = "bcm2835-dma",
> +	.id = -1,
> +	.dma_mask = DMA_BIT_MASK(32),
> +};
> +
> +static int bcm2835_dma_init(void)
> +{
> +	int rc = platform_driver_register(&bcm2835_dma_driver);
> +	return rc;
> +}
> +subsys_initcall(bcm2835_dma_init);
> +
> +static void __exit bcm2835_dma_exit(void)
> +{
> +	platform_driver_unregister(&bcm2835_dma_driver);
> +}
> +module_exit(bcm2835_dma_exit);
> +
> +MODULE_AUTHOR("Florian Meier");
> +MODULE_DESCRIPTION("BCM2835 DMA engine driver");
> +MODULE_LICENSE("GPL");
> +
> -- 
> 1.7.9.5
> 
>
Russell King - ARM Linux Nov. 8, 2013, 7:11 p.m. UTC | #2
On Fri, Nov 08, 2013 at 06:22:34PM +0100, Florian Meier wrote:

Hi Florian, some initial comments.

> +#define BCM2835_DMA_DATA_TYPE_S8 1
> +#define BCM2835_DMA_DATA_TYPE_S16 2
> +#define BCM2835_DMA_DATA_TYPE_S32 4
> +#define BCM2835_DMA_DATA_TYPE_S128 16
...
> +
> +static const unsigned es_bytes[] = {
> +	[BCM2835_DMA_DATA_TYPE_S8] = 1,
> +	[BCM2835_DMA_DATA_TYPE_S16] = 2,
> +	[BCM2835_DMA_DATA_TYPE_S32] = 4,
> +	[BCM2835_DMA_DATA_TYPE_S128] = 16
> +};

This looks rather fun - and the only place d->es is used is to convey
this as an index into this table for bcm2835_dma_desc_size().  I can't
quite see the point of this table existing.

> +static void bcm2835_dma_start_sg(struct bcm2835_chan *c, struct bcm2835_desc *d,
> +		unsigned idx)
> +{
> +	struct bcm2835_sg *sg = d->sg + idx;
> +	int frame;
> +	int frames = sg->fn;
> +
> +	/*
> +	 * Iterate over all frames and create a control block
> +	 * for each frame and link them together.
> +	 */
> +	for (frame = 0; frame < frames; frame++) {
> +		struct bcm2835_dma_cb *control_block =
> +			&d->control_block_base[frame];
> +
> +		/* Setup adresses */
> +		if (d->dir == DMA_DEV_TO_MEM) {
> +			control_block->info = BCM2835_DMA_D_INC;
> +			control_block->src = d->dev_addr;
> +			control_block->dst = sg->addr+frame*sg->en;
> +		} else {
> +			control_block->info = BCM2835_DMA_S_INC;
> +			control_block->src = sg->addr+frame*sg->en;
> +			control_block->dst = d->dev_addr;
> +		}
> +
> +		/* Enable interrupt */
> +		control_block->info |= BCM2835_DMA_INT_EN;
> +
> +		/* Setup synchronization */
> +		if (d->sync_type != 0)
> +			control_block->info |= d->sync_type;
> +
> +		/* Setup DREQ channel */
> +		if (d->sync_dreq != 0)
> +			control_block->info |=
> +				BCM2835_DMA_PER_MAP(d->sync_dreq);
> +
> +		/* Length of a frame */
> +		control_block->length = sg->en;
> +
> +		/*
> +		 * Next block is the next frame.
> +		 * This DMA engine driver currently only supports cyclic DMA.
> +		 * Therefore, wrap around at number of frames.
> +		 */
> +		control_block->next = d->control_block_base_phys +
> +			sizeof(struct bcm2835_dma_cb)*((frame+1)%(frames));
> +
> +		/* The following fields are not used here */
> +		control_block->stride = 0;
> +		control_block->pad[0] = 0;
> +		control_block->pad[1] = 0;
> +	}

Why not move the initialisation of this control block to the preparation
function?  I think doing that would simplify this code somewhat, as
you won't be converting the information passed to the preparation function
multiple times.

> +static int bcm2835_dma_alloc_chan_resources(struct dma_chan *chan)
> +{
> +	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
> +	int ret;
> +	struct bcm2835_dmadev *d = to_bcm2835_dma_dev(chan->device);
> +	uint32_t chans = d->chans_available;
> +	int chanID = 0;
> +
> +	dev_dbg(c->vc.chan.device->dev,
> +			"allocating channel for %u\n", c->dma_sig);
> +
> +	/* do not use the FIQ and BULK channels */
> +	chans &= ~0xD;
> +
> +	if (chans) {
> +		/* return the ordinal of the first channel in the bitmap */
> +		while (chans != 0 && (chans & 1) == 0) {
> +			chans >>= 1;
> +			chanID++;
> +		}
> +
> +		/* claim the channel */
> +		d->chans_available &= ~(1 << chanID);
> +
> +		c->dma_chan_base = BCM2835_DMA_CHANIO(d->dma_base, chanID);
> +
> +		c->dma_irq_number = d->dma_irq_numbers[chanID];
> +
> +		c->dma_ch = chanID;
> +	} else {
> +		return -ENOMEM;
> +	}
> +
> +	c->dma_irq_handler.name = "DMA engine IRQ handler";
> +	c->dma_irq_handler.flags = 0;
> +	c->dma_irq_handler.handler = bcm2835_dma_callback;
> +
> +	ret = request_any_context_irq(c->dma_irq_number,
> +			bcm2835_dma_callback, 0, "DMA IRQ", c);

Hmm.  Why "request_any_context_irq" ?  Looking at what your "dma callback"
is doing, it's operating entirely beneath a spinlock with IRQs disabled.
You might as well handle it in hard IRQ context.

> +static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic(
> +	struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
> +	size_t period_len, enum dma_transfer_direction direction,
> +	unsigned long flags, void *context)
> +{
> +	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
> +	enum dma_slave_buswidth dev_width;
> +	struct bcm2835_desc *d;
> +	dma_addr_t dev_addr;
> +	unsigned int es, sync_type, sync_dreq;
> +
> +	/* Grab configuration */
> +	if (direction == DMA_DEV_TO_MEM) {
> +		dev_addr = c->cfg.src_addr;
> +		dev_width = c->cfg.src_addr_width;
> +		sync_type = BCM2835_DMA_S_DREQ;
> +		sync_dreq = c->cfg.slave_id;
> +	} else if (direction == DMA_MEM_TO_DEV) {
> +		dev_addr = c->cfg.dst_addr;
> +		dev_width = c->cfg.dst_addr_width;
> +		sync_type = BCM2835_DMA_D_DREQ;
> +		sync_dreq = c->cfg.slave_id;
> +	} else {
> +		dev_err(chan->device->dev, "%s: bad direction?\n", __func__);
> +		return NULL;
> +	}

Please move sync_dreq out of the if() statements; it doesn't appear to
depend on the direction (there's only one of them in that structure too.)
While there, you might as well assign it directly to d->sync_dreq below.

Even better, with the code in bcm2835_dma_start_sg() moved into this
function to generate the control block, you don't need to save a lot
of the information in your descriptor.

> +
> +	/* Bus width translates to the element size (ES) */
> +	switch (dev_width) {
> +	case DMA_SLAVE_BUSWIDTH_4_BYTES:
> +		es = BCM2835_DMA_DATA_TYPE_S32;
> +		break;
> +	default:
> +		return NULL;
> +	}
> +
> +	/* Now allocate and setup the descriptor. */
> +	d = kzalloc(sizeof(*d) + sizeof(d->sg[0]), GFP_ATOMIC);
> +	if (!d)
> +		return NULL;
> +
> +	d->dir = direction;
> +	d->dev_addr = dev_addr;
> +	d->es = es;
> +	d->sync_type = sync_type;
> +	d->sync_dreq = sync_dreq;
> +	d->sg[0].addr = buf_addr;
> +	d->sg[0].en = period_len;
> +	d->sg[0].fn = buf_len / period_len;
> +	d->sglen = 1;
> +
> +	/* Allocate memory for control blocks */
> +	d->control_block_size = d->sg[0].fn*sizeof(struct bcm2835_dma_cb);
> +	d->control_block_base = dma_alloc_coherent(chan->device->dev,
> +			d->control_block_size, &d->control_block_base_phys,
> +			GFP_KERNEL);
> +
> +	if (!d->control_block_base) {
> +		dev_err(chan->device->dev,
> +				"%s: Memory allocation error\n", __func__);
> +		return NULL;
> +	}
> +
> +	memset(d->control_block_base, 0, d->control_block_size);
> +
> +	if (!c->cyclic) {
> +		c->cyclic = true;
> +		/* nothing else is implemented */
> +	}

This is needlessly complex; please simplify this.

> +
> +	return vchan_tx_prep(&c->vc, &d->vd, DMA_CTRL_ACK | DMA_PREP_INTERRUPT);

You should pass 'flags' as the 3rd argument here.
Florian Meier Nov. 11, 2013, 12:36 p.m. UTC | #3
Thank you for your helpful comments.
I have applied them to my code and will upload a new version soon
(hoping that I understand everything correctly).

2013/11/8 Russell King - ARM Linux <linux@arm.linux.org.uk>:
> On Fri, Nov 08, 2013 at 06:22:34PM +0100, Florian Meier wrote:
>
> Hi Florian, some initial comments.
>
>> +#define BCM2835_DMA_DATA_TYPE_S8 1
>> +#define BCM2835_DMA_DATA_TYPE_S16 2
>> +#define BCM2835_DMA_DATA_TYPE_S32 4
>> +#define BCM2835_DMA_DATA_TYPE_S128 16
> ...
>> +
>> +static const unsigned es_bytes[] = {
>> +     [BCM2835_DMA_DATA_TYPE_S8] = 1,
>> +     [BCM2835_DMA_DATA_TYPE_S16] = 2,
>> +     [BCM2835_DMA_DATA_TYPE_S32] = 4,
>> +     [BCM2835_DMA_DATA_TYPE_S128] = 16
>> +};
>
> This looks rather fun - and the only place d->es is used is to convey
> this as an index into this table for bcm2835_dma_desc_size().  I can't
> quite see the point of this table existing.
>
>> +static void bcm2835_dma_start_sg(struct bcm2835_chan *c, struct bcm2835_desc *d,
>> +             unsigned idx)
>> +{
>> +     struct bcm2835_sg *sg = d->sg + idx;
>> +     int frame;
>> +     int frames = sg->fn;
>> +
>> +     /*
>> +      * Iterate over all frames and create a control block
>> +      * for each frame and link them together.
>> +      */
>> +     for (frame = 0; frame < frames; frame++) {
>> +             struct bcm2835_dma_cb *control_block =
>> +                     &d->control_block_base[frame];
>> +
>> +             /* Setup adresses */
>> +             if (d->dir == DMA_DEV_TO_MEM) {
>> +                     control_block->info = BCM2835_DMA_D_INC;
>> +                     control_block->src = d->dev_addr;
>> +                     control_block->dst = sg->addr+frame*sg->en;
>> +             } else {
>> +                     control_block->info = BCM2835_DMA_S_INC;
>> +                     control_block->src = sg->addr+frame*sg->en;
>> +                     control_block->dst = d->dev_addr;
>> +             }
>> +
>> +             /* Enable interrupt */
>> +             control_block->info |= BCM2835_DMA_INT_EN;
>> +
>> +             /* Setup synchronization */
>> +             if (d->sync_type != 0)
>> +                     control_block->info |= d->sync_type;
>> +
>> +             /* Setup DREQ channel */
>> +             if (d->sync_dreq != 0)
>> +                     control_block->info |=
>> +                             BCM2835_DMA_PER_MAP(d->sync_dreq);
>> +
>> +             /* Length of a frame */
>> +             control_block->length = sg->en;
>> +
>> +             /*
>> +              * Next block is the next frame.
>> +              * This DMA engine driver currently only supports cyclic DMA.
>> +              * Therefore, wrap around at number of frames.
>> +              */
>> +             control_block->next = d->control_block_base_phys +
>> +                     sizeof(struct bcm2835_dma_cb)*((frame+1)%(frames));
>> +
>> +             /* The following fields are not used here */
>> +             control_block->stride = 0;
>> +             control_block->pad[0] = 0;
>> +             control_block->pad[1] = 0;
>> +     }
>
> Why not move the initialisation of this control block to the preparation
> function?  I think doing that would simplify this code somewhat, as
> you won't be converting the information passed to the preparation function
> multiple times.
>
>> +static int bcm2835_dma_alloc_chan_resources(struct dma_chan *chan)
>> +{
>> +     struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
>> +     int ret;
>> +     struct bcm2835_dmadev *d = to_bcm2835_dma_dev(chan->device);
>> +     uint32_t chans = d->chans_available;
>> +     int chanID = 0;
>> +
>> +     dev_dbg(c->vc.chan.device->dev,
>> +                     "allocating channel for %u\n", c->dma_sig);
>> +
>> +     /* do not use the FIQ and BULK channels */
>> +     chans &= ~0xD;
>> +
>> +     if (chans) {
>> +             /* return the ordinal of the first channel in the bitmap */
>> +             while (chans != 0 && (chans & 1) == 0) {
>> +                     chans >>= 1;
>> +                     chanID++;
>> +             }
>> +
>> +             /* claim the channel */
>> +             d->chans_available &= ~(1 << chanID);
>> +
>> +             c->dma_chan_base = BCM2835_DMA_CHANIO(d->dma_base, chanID);
>> +
>> +             c->dma_irq_number = d->dma_irq_numbers[chanID];
>> +
>> +             c->dma_ch = chanID;
>> +     } else {
>> +             return -ENOMEM;
>> +     }
>> +
>> +     c->dma_irq_handler.name = "DMA engine IRQ handler";
>> +     c->dma_irq_handler.flags = 0;
>> +     c->dma_irq_handler.handler = bcm2835_dma_callback;
>> +
>> +     ret = request_any_context_irq(c->dma_irq_number,
>> +                     bcm2835_dma_callback, 0, "DMA IRQ", c);
>
> Hmm.  Why "request_any_context_irq" ?  Looking at what your "dma callback"
> is doing, it's operating entirely beneath a spinlock with IRQs disabled.
> You might as well handle it in hard IRQ context.
>
>> +static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic(
>> +     struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
>> +     size_t period_len, enum dma_transfer_direction direction,
>> +     unsigned long flags, void *context)
>> +{
>> +     struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
>> +     enum dma_slave_buswidth dev_width;
>> +     struct bcm2835_desc *d;
>> +     dma_addr_t dev_addr;
>> +     unsigned int es, sync_type, sync_dreq;
>> +
>> +     /* Grab configuration */
>> +     if (direction == DMA_DEV_TO_MEM) {
>> +             dev_addr = c->cfg.src_addr;
>> +             dev_width = c->cfg.src_addr_width;
>> +             sync_type = BCM2835_DMA_S_DREQ;
>> +             sync_dreq = c->cfg.slave_id;
>> +     } else if (direction == DMA_MEM_TO_DEV) {
>> +             dev_addr = c->cfg.dst_addr;
>> +             dev_width = c->cfg.dst_addr_width;
>> +             sync_type = BCM2835_DMA_D_DREQ;
>> +             sync_dreq = c->cfg.slave_id;
>> +     } else {
>> +             dev_err(chan->device->dev, "%s: bad direction?\n", __func__);
>> +             return NULL;
>> +     }
>
> Please move sync_dreq out of the if() statements; it doesn't appear to
> depend on the direction (there's only one of them in that structure too.)
> While there, you might as well assign it directly to d->sync_dreq below.
>
> Even better, with the code in bcm2835_dma_start_sg() moved into this
> function to generate the control block, you don't need to save a lot
> of the information in your descriptor.
>
>> +
>> +     /* Bus width translates to the element size (ES) */
>> +     switch (dev_width) {
>> +     case DMA_SLAVE_BUSWIDTH_4_BYTES:
>> +             es = BCM2835_DMA_DATA_TYPE_S32;
>> +             break;
>> +     default:
>> +             return NULL;
>> +     }
>> +
>> +     /* Now allocate and setup the descriptor. */
>> +     d = kzalloc(sizeof(*d) + sizeof(d->sg[0]), GFP_ATOMIC);
>> +     if (!d)
>> +             return NULL;
>> +
>> +     d->dir = direction;
>> +     d->dev_addr = dev_addr;
>> +     d->es = es;
>> +     d->sync_type = sync_type;
>> +     d->sync_dreq = sync_dreq;
>> +     d->sg[0].addr = buf_addr;
>> +     d->sg[0].en = period_len;
>> +     d->sg[0].fn = buf_len / period_len;
>> +     d->sglen = 1;
>> +
>> +     /* Allocate memory for control blocks */
>> +     d->control_block_size = d->sg[0].fn*sizeof(struct bcm2835_dma_cb);
>> +     d->control_block_base = dma_alloc_coherent(chan->device->dev,
>> +                     d->control_block_size, &d->control_block_base_phys,
>> +                     GFP_KERNEL);
>> +
>> +     if (!d->control_block_base) {
>> +             dev_err(chan->device->dev,
>> +                             "%s: Memory allocation error\n", __func__);
>> +             return NULL;
>> +     }
>> +
>> +     memset(d->control_block_base, 0, d->control_block_size);
>> +
>> +     if (!c->cyclic) {
>> +             c->cyclic = true;
>> +             /* nothing else is implemented */
>> +     }
>
> This is needlessly complex; please simplify this.
>
>> +
>> +     return vchan_tx_prep(&c->vc, &d->vd, DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
>
> You should pass 'flags' as the 3rd argument here.
Vinod Koul Nov. 13, 2013, 5 a.m. UTC | #4
On Fri, Nov 08, 2013 at 06:17:43PM +0000, Mark Brown wrote:
> On Fri, Nov 08, 2013 at 06:22:34PM +0100, Florian Meier wrote:
> > Add support for DMA controller of BCM2835 as used in the Raspberry Pi.
> > Currently it only supports cyclic DMA for serving the I2S driver.
> 
> Adding in Martin Sperl who's been looking at DMA with regard to the SPI
> controller (which will want non-cyclic mode but I guess there's a lot of
> shared code).
Is there a plan to add a library in SPI for dma ops, on lines of what is done in
sound?

--
~Vinod
Mark Brown Nov. 13, 2013, 12:51 p.m. UTC | #5
On Wed, Nov 13, 2013 at 10:30:17AM +0530, Vinod Koul wrote:
> On Fri, Nov 08, 2013 at 06:17:43PM +0000, Mark Brown wrote:

> > Adding in Martin Sperl who's been looking at DMA with regard to the SPI
> > controller (which will want non-cyclic mode but I guess there's a lot of
> > shared code).

> Is there a plan to add a library in SPI for dma ops, on lines of what
> is done in sound?

I'm probably going to be looking at that at some point, though perhaps
Martin will get to it since he seems to be looking at this.
Vinod Koul Nov. 13, 2013, 1:35 p.m. UTC | #6
On Wed, Nov 13, 2013 at 12:51:11PM +0000, Mark Brown wrote:
> On Wed, Nov 13, 2013 at 10:30:17AM +0530, Vinod Koul wrote:
> > On Fri, Nov 08, 2013 at 06:17:43PM +0000, Mark Brown wrote:
> 
> > > Adding in Martin Sperl who's been looking at DMA with regard to the SPI
> > > controller (which will want non-cyclic mode but I guess there's a lot of
> > > shared code).
> 
> > Is there a plan to add a library in SPI for dma ops, on lines of what
> > is done in sound?
> 
> I'm probably going to be looking at that at some point, though perhaps
> Martin will get to it since he seems to be looking at this.
Okay sounds good. This will also help in streamline the implementations..

--
~Vinod
Vinod Koul Nov. 13, 2013, 2:31 p.m. UTC | #7
On Wed, Nov 13, 2013 at 02:54:41PM +0000, Mark Brown wrote:
> On Wed, Nov 13, 2013 at 07:05:36PM +0530, Vinod Koul wrote:
> > On Wed, Nov 13, 2013 at 12:51:11PM +0000, Mark Brown wrote:
> 
> > > I'm probably going to be looking at that at some point, though perhaps
> > > Martin will get to it since he seems to be looking at this.
> 
> > Okay sounds good. This will also help in streamline the implementations..
> 
> Yes, though I think some of the stuff Martin was talking about will need
> changes to the dmaengine APIs.  He was wanting to reuse DMA lists which
> currently dmaengine needs to have built up each time.
Can you elobrate a bit more on what you guys want to do here?

--
~Vinod
Mark Brown Nov. 13, 2013, 2:54 p.m. UTC | #8
On Wed, Nov 13, 2013 at 07:05:36PM +0530, Vinod Koul wrote:
> On Wed, Nov 13, 2013 at 12:51:11PM +0000, Mark Brown wrote:

> > I'm probably going to be looking at that at some point, though perhaps
> > Martin will get to it since he seems to be looking at this.

> Okay sounds good. This will also help in streamline the implementations..

Yes, though I think some of the stuff Martin was talking about will need
changes to the dmaengine APIs.  He was wanting to reuse DMA lists which
currently dmaengine needs to have built up each time.
Tomasz Figa Nov. 13, 2013, 3:02 p.m. UTC | #9
Hi Florian,

Most of technical issues have been already mentioned by Russell, but let
me also point those that I found. Please see my comments inline.

On Friday 08 of November 2013 18:22:34 Florian Meier wrote:
> Add support for DMA controller of BCM2835 as used in the Raspberry Pi.
> Currently it only supports cyclic DMA for serving the I2S driver.
> 
> Signed-off-by: Florian Meier <florian.meier@koalo.de>
> ---
>  arch/arm/boot/dts/bcm2835.dtsi |   22 +
>  drivers/dma/Kconfig            |    6 +
>  drivers/dma/Makefile           |    1 +
>  drivers/dma/bcm2835-dma.c      |  880 ++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 909 insertions(+)
>  create mode 100644 drivers/dma/bcm2835-dma.c
> 
> diff --git a/arch/arm/boot/dts/bcm2835.dtsi b/arch/arm/boot/dts/bcm2835.dtsi
> index 1e12aef..1514198 100644
> --- a/arch/arm/boot/dts/bcm2835.dtsi
> +++ b/arch/arm/boot/dts/bcm2835.dtsi
> @@ -103,6 +103,28 @@
>  			clocks = <&clk_mmc>;
>  			status = "disabled";
>  		};
> +
> +		dma: dma@7e007000 {
> +			compatible = "brcm,bcm2835-dma";
> +			reg = <0x7e007000 0xf00>;
> +			interrupts = <1 16
> +				      1 17
> +				      1 18
> +				      1 19
> +				      1 20
> +				      1 21
> +				      1 22
> +				      1 23
> +				      1 24
> +				      1 25
> +				      1 26
> +				      1 27
> +				      1 28>;
> +
> +			#dma-cells = <1>;
> +			dma-channels = <15>;   /* DMA channel 15 is not handled yet */

This should represent the real configuration of the hardware, regardless
of what the driver supports.

> +			dma-requests = <32>;
> +		};
>  	};
>  
>  	clocks {

This should be a separate patch, following the one adding the driver.

In addition, you are introducing a new device tree binding with this
patch, so you should document it in appropriate location under
Documentation/devicetree/bindings.

> diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c
> new file mode 100644
> index 0000000..c3e53b3
> --- /dev/null
> +++ b/drivers/dma/bcm2835-dma.c
> @@ -0,0 +1,880 @@
[snip]
> +struct bcm2835_dma_cb {
> +	unsigned long info;
> +	unsigned long src;
> +	unsigned long dst;
> +	unsigned long length;
> +	unsigned long stride;
> +	unsigned long next;
> +	unsigned long pad[2];
> +};

Is unsigned long really what you want here, not some explicitly sized
types, such as u32 or uint32_t? This seems to be some kind of hardware
interface, so the latter sounds more reasonable to me.

[snip]
> +#if defined(CONFIG_OF)
> +static const struct of_device_id bcm2835_dma_of_match[] = {
> +	{
> +		.compatible = "brcm,bcm2835-dma",
> +	}

A dummy terminating entry is needed here.

> +};
> +MODULE_DEVICE_TABLE(of, bcm2835_dma_of_match);
> +#endif
> +
> +static int bcm2835_dma_probe(struct platform_device *pdev)
> +{
> +	struct bcm2835_dmadev *od;
> +	struct resource *dma_res = NULL;
> +	void __iomem *dma_base = NULL;
> +	int rc = 0;
> +	int i;
> +	struct resource *irq;
> +	int irq_resources;
> +
> +	od = devm_kzalloc(&pdev->dev, sizeof(*od), GFP_KERNEL);
> +	if (!od)
> +		return -ENOMEM;
> +
> +	dma_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	dma_base = devm_ioremap_resource(&pdev->dev, dma_res);
> +	if (IS_ERR(dma_base))
> +		return PTR_ERR(dma_base);
> +
> +	od->dma_base = dma_base;
> +	od->chans_available = BCM2835_DMA_CHANNEL_MASK;
> +
> +	dma_cap_set(DMA_SLAVE, od->ddev.cap_mask);
> +	dma_cap_set(DMA_CYCLIC, od->ddev.cap_mask);
> +	od->ddev.device_alloc_chan_resources = bcm2835_dma_alloc_chan_resources;
> +	od->ddev.device_free_chan_resources = bcm2835_dma_free_chan_resources;
> +	od->ddev.device_tx_status = bcm2835_dma_tx_status;
> +	od->ddev.device_issue_pending = bcm2835_dma_issue_pending;
> +	od->ddev.device_prep_dma_cyclic = bcm2835_dma_prep_dma_cyclic;
> +	od->ddev.device_control = bcm2835_dma_control;
> +	od->ddev.dev = &pdev->dev;
> +	INIT_LIST_HEAD(&od->ddev.channels);
> +	INIT_LIST_HEAD(&od->pending);
> +	spin_lock_init(&od->lock);
> +
> +	tasklet_init(&od->task, bcm2835_dma_sched, (unsigned long)od);

Just a question out of curiosity, as I don't really know much about the
DMA engine subsystem:

What is the reason to use tasklets here instead of, let's say, a workqueue?

> +
> +	irq_resources = 0;
> +
> +	for (i = 0; i < pdev->num_resources; i++) {
> +		if (IORESOURCE_IRQ == resource_type(&pdev->resource[i]))
> +			irq_resources++;
> +	}
> +
> +	od->dma_irq_numbers = devm_kzalloc(&pdev->dev,
> +			irq_resources*sizeof(int), GFP_KERNEL);
> +	if (!od)
> +		return -ENOMEM;
> +
> +	for (i = 0; i < irq_resources; i++) {

You could call platform_get_irq() here and break out of the loop if it
fails with -ENXIO. Then the IRQ number could be passed to
bcm2835_dma_chan_init() and stored in per-channel struct. This way you
could remove the ugly IRQ counting code above and IRQ array allocation.

> +		rc = bcm2835_dma_chan_init(od, i);
> +		if (rc) {
> +			bcm2835_dma_free(od);
> +			return rc;
> +		}
> +
> +		irq = platform_get_resource(pdev, IORESOURCE_IRQ, i);

There is platform_get_irq() for reading IRQ resources specifically.

> +		if (!irq) {
> +			dev_err(&pdev->dev,
> +					"No IRQ resource for channel %i\n", i);
> +			return -ENODEV;
> +		}
> +		od->dma_irq_numbers[i] = irq->start;
> +	}
> +
> +	rc = dma_async_device_register(&od->ddev);

This should be called at the end of probe, to ensure that any potential
users can start generating requests to your DMA engine as soon as
it is registered.

> +	if (rc) {
> +		dev_err(&pdev->dev,
> +			"Failed to register slave DMA engine device: %d\n", rc);
> +		bcm2835_dma_free(od);
> +		return rc;
> +	}
> +
> +	platform_set_drvdata(pdev, od);
[snip]
> +
> +static const struct platform_device_info bcm2835_dma_dev_info = {
> +	.name = "bcm2835-dma",
> +	.id = -1,
> +	.dma_mask = DMA_BIT_MASK(32),
> +};

What's this?

> +
> +static int bcm2835_dma_init(void)
> +{
> +	int rc = platform_driver_register(&bcm2835_dma_driver);
> +	return rc;
> +}
> +subsys_initcall(bcm2835_dma_init);

Do you really need subsys_initcall here?

Best regards,
Tomasz
Mark Brown Nov. 13, 2013, 4:30 p.m. UTC | #10
On Wed, Nov 13, 2013 at 08:01:01PM +0530, Vinod Koul wrote:
> On Wed, Nov 13, 2013 at 02:54:41PM +0000, Mark Brown wrote:

> > Yes, though I think some of the stuff Martin was talking about will need
> > changes to the dmaengine APIs.  He was wanting to reuse DMA lists which
> > currently dmaengine needs to have built up each time.

> Can you elobrate a bit more on what you guys want to do here?

He wants to be able to submit a descriptor to the hardware more than
once to save on the costs of of constructing the descriptor.  I'm not
convinced this is a huge benefit most of the time.
diff mbox

Patch

diff --git a/arch/arm/boot/dts/bcm2835.dtsi b/arch/arm/boot/dts/bcm2835.dtsi
index 1e12aef..1514198 100644
--- a/arch/arm/boot/dts/bcm2835.dtsi
+++ b/arch/arm/boot/dts/bcm2835.dtsi
@@ -103,6 +103,28 @@ 
 			clocks = <&clk_mmc>;
 			status = "disabled";
 		};
+
+		dma: dma@7e007000 {
+			compatible = "brcm,bcm2835-dma";
+			reg = <0x7e007000 0xf00>;
+			interrupts = <1 16
+				      1 17
+				      1 18
+				      1 19
+				      1 20
+				      1 21
+				      1 22
+				      1 23
+				      1 24
+				      1 25
+				      1 26
+				      1 27
+				      1 28>;
+
+			#dma-cells = <1>;
+			dma-channels = <15>;   /* DMA channel 15 is not handled yet */
+			dma-requests = <32>;
+		};
 	};
 
 	clocks {
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index f238cfd..f2d253b 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -288,6 +288,12 @@  config DMA_OMAP
 	select DMA_ENGINE
 	select DMA_VIRTUAL_CHANNELS
 
+config DMA_BCM2835
+	tristate "BCM2835 DMA engine support"
+	depends on (ARCH_BCM2835 || MACH_BCM2708)
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+
 config TI_CPPI41
 	tristate "AM33xx CPPI41 DMA support"
 	depends on ARCH_OMAP
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index db89035..6348157 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -37,6 +37,7 @@  obj-$(CONFIG_EP93XX_DMA) += ep93xx_dma.o
 obj-$(CONFIG_DMA_SA11X0) += sa11x0-dma.o
 obj-$(CONFIG_MMP_TDMA) += mmp_tdma.o
 obj-$(CONFIG_DMA_OMAP) += omap-dma.o
+obj-$(CONFIG_DMA_BCM2835) += bcm2835-dma.o
 obj-$(CONFIG_MMP_PDMA) += mmp_pdma.o
 obj-$(CONFIG_DMA_JZ4740) += dma-jz4740.o
 obj-$(CONFIG_TI_CPPI41) += cppi41.o
diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c
new file mode 100644
index 0000000..c3e53b3
--- /dev/null
+++ b/drivers/dma/bcm2835-dma.c
@@ -0,0 +1,880 @@ 
+/*
+ * BCM2835 DMA engine support
+ *
+ * This driver only supports cyclic DMA transfers
+ * as needed for the I2S module.
+ *
+ * Author:      Florian Meier, <florian.meier@koalo.de>
+ *              Copyright 2013
+ *
+ * based on
+ *	OMAP DMAengine support by Russell King
+ *
+ *	BCM2708 DMA Driver
+ *	Copyright (C) 2010 Broadcom
+ *
+ *	Raspberry Pi PCM I2S ALSA Driver
+ *	Copyright (c) by Phil Poole 2013
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/irq.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+
+#include "virt-dma.h"
+
+struct bcm2835_dmadev {
+	struct dma_device ddev;
+	spinlock_t lock;
+	struct tasklet_struct task;
+	struct list_head pending;
+	uint32_t chans_available;
+	void __iomem *dma_base;
+	int *dma_irq_numbers;
+};
+
+struct bcm2835_dma_cb {
+	unsigned long info;
+	unsigned long src;
+	unsigned long dst;
+	unsigned long length;
+	unsigned long stride;
+	unsigned long next;
+	unsigned long pad[2];
+};
+
+struct bcm2835_chan {
+	struct virt_dma_chan vc;
+	struct list_head node;
+
+	struct dma_slave_config	cfg;
+	unsigned dma_sig;
+	bool cyclic;
+
+	int dma_ch;
+	struct bcm2835_desc *desc;
+	unsigned sgidx;
+
+	void __iomem *dma_chan_base;
+	int dma_irq_number;
+	struct irqaction dma_irq_handler;
+};
+
+struct bcm2835_sg {
+	dma_addr_t addr;
+	uint32_t en;		/* number of elements (24-bit) */
+	uint32_t fn;		/* number of frames (16-bit) */
+};
+
+struct bcm2835_desc {
+	struct virt_dma_desc vd;
+	enum dma_transfer_direction dir;
+	dma_addr_t dev_addr;
+
+	uint8_t es;
+	unsigned int sync_type;
+	unsigned int sync_dreq;
+
+	unsigned int control_block_size;
+	struct bcm2835_dma_cb *control_block_base;
+	dma_addr_t control_block_base_phys;
+
+	unsigned sglen;
+	struct bcm2835_sg sg[0];
+};
+
+#define BCM2835_DMA_CS		0x00
+#define BCM2835_DMA_ADDR	0x04
+#define BCM2835_DMA_SOURCE_AD	0x0c
+#define BCM2835_DMA_DEST_AD	0x10
+#define BCM2835_DMA_NEXTCB	0x1C
+
+/* DMA CS Control and Status bits */
+#define BCM2835_DMA_ACTIVE	(1 << 0)
+#define BCM2835_DMA_INT		(1 << 2)
+#define BCM2835_DMA_ISPAUSED	(1 << 4)  /* Pause requested or not active */
+#define BCM2835_DMA_ISHELD	(1 << 5)  /* Is held by DREQ flow control */
+#define BCM2835_DMA_ERR		(1 << 8)
+#define BCM2835_DMA_ABORT	(1 << 30) /* stop current CB, go to next, WO */
+#define BCM2835_DMA_RESET	(1 << 31) /* WO, self clearing */
+
+#define BCM2835_DMA_INT_EN	(1 << 0)
+#define BCM2835_DMA_D_INC	(1 << 4)
+#define BCM2835_DMA_D_DREQ	(1 << 6)
+#define BCM2835_DMA_S_INC	(1 << 8)
+#define BCM2835_DMA_S_DREQ	(1 << 6)
+
+#define	BCM2835_DMA_PER_MAP(x)	((x) << 16)
+
+#define BCM2835_DMA_DATA_TYPE_S8 1
+#define BCM2835_DMA_DATA_TYPE_S16 2
+#define BCM2835_DMA_DATA_TYPE_S32 4
+#define BCM2835_DMA_DATA_TYPE_S128 16
+
+#define BCM2835_DMA_CHANNEL_MASK 32565
+
+/* valid only for channels 0 - 14, 15 has its own base address */
+#define BCM2835_DMA_CHAN(n)	((n)<<8) /* base address */
+#define BCM2835_DMA_CHANIO(dma_base, n) ((dma_base)+BCM2835_DMA_CHAN(n))
+
+static const unsigned es_bytes[] = {
+	[BCM2835_DMA_DATA_TYPE_S8] = 1,
+	[BCM2835_DMA_DATA_TYPE_S16] = 2,
+	[BCM2835_DMA_DATA_TYPE_S32] = 4,
+	[BCM2835_DMA_DATA_TYPE_S128] = 16
+};
+
+static inline struct bcm2835_dmadev *to_bcm2835_dma_dev(struct dma_device *d)
+{
+	return container_of(d, struct bcm2835_dmadev, ddev);
+}
+
+static inline struct bcm2835_chan *to_bcm2835_dma_chan(struct dma_chan *c)
+{
+	return container_of(c, struct bcm2835_chan, vc.chan);
+}
+
+static inline struct bcm2835_desc *to_bcm2835_dma_desc(
+		struct dma_async_tx_descriptor *t)
+{
+	return container_of(t, struct bcm2835_desc, vd.tx);
+}
+
+static bool bcm2835_dma_filter_fn(struct dma_chan *chan, void *param)
+{
+	return true;
+}
+
+static struct of_dma_filter_info bcm2835_dma_info = {
+	.filter_fn = bcm2835_dma_filter_fn,
+};
+
+static void bcm2835_dma_desc_free(struct virt_dma_desc *vd)
+{
+	struct bcm2835_desc *desc = container_of(vd, struct bcm2835_desc, vd);
+	dma_free_coherent(desc->vd.tx.chan->device->dev,
+			desc->control_block_size,
+			desc->control_block_base,
+			desc->control_block_base_phys);
+	kfree(desc);
+}
+
+static void bcm2835_dma_start(void __iomem *dma_chan_base,
+		dma_addr_t control_block)
+{
+	dsb();	/* ARM data synchronization (push) operation */
+
+	writel(control_block, dma_chan_base+BCM2835_DMA_ADDR);
+	writel(BCM2835_DMA_ACTIVE, dma_chan_base+BCM2835_DMA_CS);
+}
+
+static int bcm2835_dma_abort(void __iomem *dma_chan_base)
+{
+	unsigned long int cs;
+	int rc = 0;
+
+	cs = readl(dma_chan_base + BCM2835_DMA_CS);
+
+	if (BCM2835_DMA_ACTIVE & cs) {
+		long int timeout = 10000;
+
+		/* write 0 to the active bit - pause the DMA */
+		writel(0, dma_chan_base + BCM2835_DMA_CS);
+
+		/* wait for any current AXI transfer to complete */
+		while (0 != (cs & BCM2835_DMA_ISPAUSED) && --timeout >= 0)
+			cs = readl(dma_chan_base + BCM2835_DMA_CS);
+
+		if (0 != (cs & BCM2835_DMA_ISPAUSED)) {
+			/* we'll un-pause when we set of our next DMA */
+			rc = -ETIMEDOUT;
+
+		} else if (BCM2835_DMA_ACTIVE & cs) {
+			/* terminate the control block chain */
+			writel(0, dma_chan_base + BCM2835_DMA_NEXTCB);
+
+			/* abort the whole DMA */
+			writel(BCM2835_DMA_ABORT | BCM2835_DMA_ACTIVE,
+			       dma_chan_base + BCM2835_DMA_CS);
+		}
+	}
+
+	return rc;
+}
+
+static void bcm2835_dma_start_sg(struct bcm2835_chan *c, struct bcm2835_desc *d,
+		unsigned idx)
+{
+	struct bcm2835_sg *sg = d->sg + idx;
+	int frame;
+	int frames = sg->fn;
+
+	/*
+	 * Iterate over all frames and create a control block
+	 * for each frame and link them together.
+	 */
+	for (frame = 0; frame < frames; frame++) {
+		struct bcm2835_dma_cb *control_block =
+			&d->control_block_base[frame];
+
+		/* Setup adresses */
+		if (d->dir == DMA_DEV_TO_MEM) {
+			control_block->info = BCM2835_DMA_D_INC;
+			control_block->src = d->dev_addr;
+			control_block->dst = sg->addr+frame*sg->en;
+		} else {
+			control_block->info = BCM2835_DMA_S_INC;
+			control_block->src = sg->addr+frame*sg->en;
+			control_block->dst = d->dev_addr;
+		}
+
+		/* Enable interrupt */
+		control_block->info |= BCM2835_DMA_INT_EN;
+
+		/* Setup synchronization */
+		if (d->sync_type != 0)
+			control_block->info |= d->sync_type;
+
+		/* Setup DREQ channel */
+		if (d->sync_dreq != 0)
+			control_block->info |=
+				BCM2835_DMA_PER_MAP(d->sync_dreq);
+
+		/* Length of a frame */
+		control_block->length = sg->en;
+
+		/*
+		 * Next block is the next frame.
+		 * This DMA engine driver currently only supports cyclic DMA.
+		 * Therefore, wrap around at number of frames.
+		 */
+		control_block->next = d->control_block_base_phys +
+			sizeof(struct bcm2835_dma_cb)*((frame+1)%(frames));
+
+		/* The following fields are not used here */
+		control_block->stride = 0;
+		control_block->pad[0] = 0;
+		control_block->pad[1] = 0;
+	}
+
+	/* Start the DMA transfer */
+	bcm2835_dma_start(c->dma_chan_base, d->control_block_base_phys);
+}
+
+static void bcm2835_dma_start_desc(struct bcm2835_chan *c)
+{
+	struct virt_dma_desc *vd = vchan_next_desc(&c->vc);
+	struct bcm2835_desc *d;
+
+	if (!vd) {
+		c->desc = NULL;
+		return;
+	}
+
+	list_del(&vd->node);
+
+	c->desc = d = to_bcm2835_dma_desc(&vd->tx);
+	c->sgidx = 0;
+
+	bcm2835_dma_start_sg(c, d, 0);
+}
+
+static irqreturn_t bcm2835_dma_callback(int irq, void *data)
+{
+	struct bcm2835_chan *c = data;
+	struct bcm2835_desc *d;
+	unsigned long flags;
+
+	spin_lock_irqsave(&c->vc.lock, flags);
+
+	/* acknowledge interrupt */
+	writel(BCM2835_DMA_INT, c->dma_chan_base + BCM2835_DMA_CS);
+
+	d = c->desc;
+
+	if (d) {
+		if (!c->cyclic) {
+			if (++c->sgidx < d->sglen) {
+				bcm2835_dma_start_sg(c, d, c->sgidx);
+			} else {
+				bcm2835_dma_start_desc(c);
+				vchan_cookie_complete(&d->vd);
+			}
+		} else {
+			vchan_cyclic_callback(&d->vd);
+		}
+	}
+
+	/* keep the DMA engine running */
+	dsb(); /* ARM synchronization barrier */
+	writel(BCM2835_DMA_ACTIVE, c->dma_chan_base + BCM2835_DMA_CS);
+
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * This callback schedules all pending channels.  We could be more
+ * clever here by postponing allocation of the real DMA channels to
+ * this point, and freeing them when our virtual channel becomes idle.
+ *
+ * We would then need to deal with 'all channels in-use'
+ */
+static void bcm2835_dma_sched(unsigned long data)
+{
+	struct bcm2835_dmadev *d = (struct bcm2835_dmadev *)data;
+	LIST_HEAD(head);
+
+	spin_lock_irq(&d->lock);
+	list_splice_tail_init(&d->pending, &head);
+	spin_unlock_irq(&d->lock);
+
+	while (!list_empty(&head)) {
+		struct bcm2835_chan *c = list_first_entry(&head,
+			struct bcm2835_chan, node);
+
+		spin_lock_irq(&c->vc.lock);
+		list_del_init(&c->node);
+		bcm2835_dma_start_desc(c);
+		spin_unlock_irq(&c->vc.lock);
+	}
+}
+
+static int bcm2835_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+	int ret;
+	struct bcm2835_dmadev *d = to_bcm2835_dma_dev(chan->device);
+	uint32_t chans = d->chans_available;
+	int chanID = 0;
+
+	dev_dbg(c->vc.chan.device->dev,
+			"allocating channel for %u\n", c->dma_sig);
+
+	/* do not use the FIQ and BULK channels */
+	chans &= ~0xD;
+
+	if (chans) {
+		/* return the ordinal of the first channel in the bitmap */
+		while (chans != 0 && (chans & 1) == 0) {
+			chans >>= 1;
+			chanID++;
+		}
+
+		/* claim the channel */
+		d->chans_available &= ~(1 << chanID);
+
+		c->dma_chan_base = BCM2835_DMA_CHANIO(d->dma_base, chanID);
+
+		c->dma_irq_number = d->dma_irq_numbers[chanID];
+
+		c->dma_ch = chanID;
+	} else {
+		return -ENOMEM;
+	}
+
+	c->dma_irq_handler.name = "DMA engine IRQ handler";
+	c->dma_irq_handler.flags = 0;
+	c->dma_irq_handler.handler = bcm2835_dma_callback;
+
+	ret = request_any_context_irq(c->dma_irq_number,
+			bcm2835_dma_callback, 0, "DMA IRQ", c);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static void bcm2835_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+	struct bcm2835_dmadev *d = to_bcm2835_dma_dev(chan->device);
+
+	vchan_free_chan_resources(&c->vc);
+	d->chans_available |= (1 << c->dma_ch);
+	free_irq(c->dma_irq_number, c);
+
+	dev_dbg(c->vc.chan.device->dev, "freeing channel for %u\n", c->dma_sig);
+}
+
+static size_t bcm2835_dma_sg_size(struct bcm2835_sg *sg)
+{
+	return sg->en * sg->fn;
+}
+
+static size_t bcm2835_dma_desc_size(struct bcm2835_desc *d)
+{
+	unsigned i;
+	size_t size;
+
+	for (size = i = 0; i < d->sglen; i++)
+		size += bcm2835_dma_sg_size(&d->sg[i]);
+
+	return size * es_bytes[d->es];
+}
+
+static size_t bcm2835_dma_desc_size_pos(struct bcm2835_desc *d, dma_addr_t addr)
+{
+	unsigned i;
+	size_t size;
+
+	for (size = i = 0; i < d->sglen; i++) {
+		size_t this_size = bcm2835_dma_sg_size(&d->sg[i]);
+
+		if (size)
+			size += this_size;
+		else if (addr >= d->sg[i].addr &&
+			 addr < d->sg[i].addr + this_size)
+			size += d->sg[i].addr + this_size - addr;
+	}
+	return size;
+}
+
+
+/*
+ * Returns current physical source address for the given DMA channel.
+ * If the channel is running the caller must disable interrupts prior calling
+ * this function and process the returned value before re-enabling interrupt to
+ * prevent races with the interrupt handler.
+ */
+static dma_addr_t bcm2835_get_dma_src_pos(struct bcm2835_chan *c)
+{
+	return readl(c->dma_chan_base + BCM2835_DMA_SOURCE_AD);
+}
+
+/*
+ * Returns current physical destination address for the given DMA channel.
+ * If the channel is running the caller must disable interrupts prior calling
+ * this function and process the returned value before re-enabling interrupt to
+ * prevent races with the interrupt handler.
+ */
+static dma_addr_t bcm2835_get_dma_dst_pos(struct bcm2835_chan *c)
+{
+	return readl(c->dma_chan_base + BCM2835_DMA_DEST_AD);
+}
+
+static enum dma_status bcm2835_dma_tx_status(struct dma_chan *chan,
+	dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+	struct virt_dma_desc *vd;
+	enum dma_status ret;
+	unsigned long flags;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_SUCCESS || !txstate)
+		return ret;
+
+	spin_lock_irqsave(&c->vc.lock, flags);
+	vd = vchan_find_desc(&c->vc, cookie);
+	if (vd) {
+		txstate->residue =
+			bcm2835_dma_desc_size(to_bcm2835_dma_desc(&vd->tx));
+	} else if (c->desc && c->desc->vd.tx.cookie == cookie) {
+		struct bcm2835_desc *d = c->desc;
+		dma_addr_t pos;
+
+		if (d->dir == DMA_MEM_TO_DEV)
+			pos = bcm2835_get_dma_src_pos(c);
+		else if (d->dir == DMA_DEV_TO_MEM)
+			pos = bcm2835_get_dma_dst_pos(c);
+		else
+			pos = 0;
+
+		txstate->residue = bcm2835_dma_desc_size_pos(d, pos);
+	} else {
+		txstate->residue = 0;
+	}
+
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+
+	return ret;
+}
+
+static void bcm2835_dma_issue_pending(struct dma_chan *chan)
+{
+	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&c->vc.lock, flags);
+	if (vchan_issue_pending(&c->vc) && !c->desc) {
+		struct bcm2835_dmadev *d = to_bcm2835_dma_dev(chan->device);
+		spin_lock(&d->lock);
+		if (list_empty(&c->node))
+			list_add_tail(&c->node, &d->pending);
+		spin_unlock(&d->lock);
+		tasklet_schedule(&d->task);
+	}
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+}
+
+
+static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic(
+	struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+	size_t period_len, enum dma_transfer_direction direction,
+	unsigned long flags, void *context)
+{
+	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+	enum dma_slave_buswidth dev_width;
+	struct bcm2835_desc *d;
+	dma_addr_t dev_addr;
+	unsigned int es, sync_type, sync_dreq;
+
+	/* Grab configuration */
+	if (direction == DMA_DEV_TO_MEM) {
+		dev_addr = c->cfg.src_addr;
+		dev_width = c->cfg.src_addr_width;
+		sync_type = BCM2835_DMA_S_DREQ;
+		sync_dreq = c->cfg.slave_id;
+	} else if (direction == DMA_MEM_TO_DEV) {
+		dev_addr = c->cfg.dst_addr;
+		dev_width = c->cfg.dst_addr_width;
+		sync_type = BCM2835_DMA_D_DREQ;
+		sync_dreq = c->cfg.slave_id;
+	} else {
+		dev_err(chan->device->dev, "%s: bad direction?\n", __func__);
+		return NULL;
+	}
+
+	/* Bus width translates to the element size (ES) */
+	switch (dev_width) {
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		es = BCM2835_DMA_DATA_TYPE_S32;
+		break;
+	default:
+		return NULL;
+	}
+
+	/* Now allocate and setup the descriptor. */
+	d = kzalloc(sizeof(*d) + sizeof(d->sg[0]), GFP_ATOMIC);
+	if (!d)
+		return NULL;
+
+	d->dir = direction;
+	d->dev_addr = dev_addr;
+	d->es = es;
+	d->sync_type = sync_type;
+	d->sync_dreq = sync_dreq;
+	d->sg[0].addr = buf_addr;
+	d->sg[0].en = period_len;
+	d->sg[0].fn = buf_len / period_len;
+	d->sglen = 1;
+
+	/* Allocate memory for control blocks */
+	d->control_block_size = d->sg[0].fn*sizeof(struct bcm2835_dma_cb);
+	d->control_block_base = dma_alloc_coherent(chan->device->dev,
+			d->control_block_size, &d->control_block_base_phys,
+			GFP_KERNEL);
+
+	if (!d->control_block_base) {
+		dev_err(chan->device->dev,
+				"%s: Memory allocation error\n", __func__);
+		return NULL;
+	}
+
+	memset(d->control_block_base, 0, d->control_block_size);
+
+	if (!c->cyclic) {
+		c->cyclic = true;
+		/* nothing else is implemented */
+	}
+
+	return vchan_tx_prep(&c->vc, &d->vd, DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
+}
+
+static int bcm2835_dma_slave_config(struct bcm2835_chan *c,
+		struct dma_slave_config *cfg)
+{
+	if ((cfg->direction == DMA_DEV_TO_MEM
+			&& cfg->src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) ||
+	    (cfg->direction == DMA_MEM_TO_DEV
+			&& cfg->dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES)) {
+		return -EINVAL;
+	}
+
+	memcpy(&c->cfg, cfg, sizeof(c->cfg));
+
+	return 0;
+}
+
+static int bcm2835_dma_terminate_all(struct bcm2835_chan *c)
+{
+	struct bcm2835_dmadev *d = to_bcm2835_dma_dev(c->vc.chan.device);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&c->vc.lock, flags);
+
+	/* Prevent this channel being scheduled */
+	spin_lock(&d->lock);
+	list_del_init(&c->node);
+	spin_unlock(&d->lock);
+
+	/*
+	 * Stop DMA activity: we assume the callback will not be called
+	 * after bcm_dma_abort() returns (even if it does, it will see
+	 * c->desc is NULL and exit.)
+	 */
+	if (c->desc) {
+		c->desc = NULL;
+		bcm2835_dma_abort(c->dma_chan_base);
+
+		/* Wait for stopping */
+		while (readl(c->dma_chan_base + BCM2835_DMA_CS)
+			& BCM2835_DMA_ACTIVE)
+			;
+	}
+
+	vchan_get_all_descriptors(&c->vc, &head);
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+	vchan_dma_desc_free_list(&c->vc, &head);
+
+	return 0;
+}
+
+static int bcm2835_dma_pause(struct bcm2835_chan *c)
+{
+	/* FIXME: not supported by platform private API */
+	return -EINVAL;
+}
+
+static int bcm2835_dma_resume(struct bcm2835_chan *c)
+{
+	/* FIXME: not supported by platform private API */
+	return -EINVAL;
+}
+
+static int bcm2835_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+	unsigned long arg)
+{
+	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+	int ret;
+
+	switch (cmd) {
+	case DMA_SLAVE_CONFIG:
+		ret = bcm2835_dma_slave_config(c,
+				(struct dma_slave_config *)arg);
+		break;
+
+	case DMA_TERMINATE_ALL:
+		ret = bcm2835_dma_terminate_all(c);
+		break;
+
+	case DMA_PAUSE:
+		ret = bcm2835_dma_pause(c);
+		break;
+
+	case DMA_RESUME:
+		ret = bcm2835_dma_resume(c);
+		break;
+
+	default:
+		ret = -ENXIO;
+		break;
+	}
+
+	return ret;
+}
+
+static int bcm2835_dma_chan_init(struct bcm2835_dmadev *od, int dma_sig)
+{
+	struct bcm2835_chan *c;
+
+	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	if (!c)
+		return -ENOMEM;
+
+	c->dma_sig = dma_sig;
+	c->vc.desc_free = bcm2835_dma_desc_free;
+	vchan_init(&c->vc, &od->ddev);
+	INIT_LIST_HEAD(&c->node);
+
+	od->ddev.chancnt++;
+
+	return 0;
+}
+
+static void bcm2835_dma_free(struct bcm2835_dmadev *od)
+{
+	tasklet_kill(&od->task);
+	while (!list_empty(&od->ddev.channels)) {
+		struct bcm2835_chan *c = list_first_entry(&od->ddev.channels,
+			struct bcm2835_chan, vc.chan.device_node);
+
+		list_del(&c->vc.chan.device_node);
+		tasklet_kill(&c->vc.task);
+		kfree(c);
+	}
+}
+
+#if defined(CONFIG_OF)
+static const struct of_device_id bcm2835_dma_of_match[] = {
+	{
+		.compatible = "brcm,bcm2835-dma",
+	}
+};
+MODULE_DEVICE_TABLE(of, bcm2835_dma_of_match);
+#endif
+
+static int bcm2835_dma_probe(struct platform_device *pdev)
+{
+	struct bcm2835_dmadev *od;
+	struct resource *dma_res = NULL;
+	void __iomem *dma_base = NULL;
+	int rc = 0;
+	int i;
+	struct resource *irq;
+	int irq_resources;
+
+	od = devm_kzalloc(&pdev->dev, sizeof(*od), GFP_KERNEL);
+	if (!od)
+		return -ENOMEM;
+
+	dma_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	dma_base = devm_ioremap_resource(&pdev->dev, dma_res);
+	if (IS_ERR(dma_base))
+		return PTR_ERR(dma_base);
+
+	od->dma_base = dma_base;
+	od->chans_available = BCM2835_DMA_CHANNEL_MASK;
+
+	dma_cap_set(DMA_SLAVE, od->ddev.cap_mask);
+	dma_cap_set(DMA_CYCLIC, od->ddev.cap_mask);
+	od->ddev.device_alloc_chan_resources = bcm2835_dma_alloc_chan_resources;
+	od->ddev.device_free_chan_resources = bcm2835_dma_free_chan_resources;
+	od->ddev.device_tx_status = bcm2835_dma_tx_status;
+	od->ddev.device_issue_pending = bcm2835_dma_issue_pending;
+	od->ddev.device_prep_dma_cyclic = bcm2835_dma_prep_dma_cyclic;
+	od->ddev.device_control = bcm2835_dma_control;
+	od->ddev.dev = &pdev->dev;
+	INIT_LIST_HEAD(&od->ddev.channels);
+	INIT_LIST_HEAD(&od->pending);
+	spin_lock_init(&od->lock);
+
+	tasklet_init(&od->task, bcm2835_dma_sched, (unsigned long)od);
+
+	irq_resources = 0;
+
+	for (i = 0; i < pdev->num_resources; i++) {
+		if (IORESOURCE_IRQ == resource_type(&pdev->resource[i]))
+			irq_resources++;
+	}
+
+	od->dma_irq_numbers = devm_kzalloc(&pdev->dev,
+			irq_resources*sizeof(int), GFP_KERNEL);
+	if (!od)
+		return -ENOMEM;
+
+	for (i = 0; i < irq_resources; i++) {
+		rc = bcm2835_dma_chan_init(od, i);
+		if (rc) {
+			bcm2835_dma_free(od);
+			return rc;
+		}
+
+		irq = platform_get_resource(pdev, IORESOURCE_IRQ, i);
+		if (!irq) {
+			dev_err(&pdev->dev,
+					"No IRQ resource for channel %i\n", i);
+			return -ENODEV;
+		}
+		od->dma_irq_numbers[i] = irq->start;
+	}
+
+	rc = dma_async_device_register(&od->ddev);
+	if (rc) {
+		dev_err(&pdev->dev,
+			"Failed to register slave DMA engine device: %d\n", rc);
+		bcm2835_dma_free(od);
+		return rc;
+	}
+
+	platform_set_drvdata(pdev, od);
+
+	if (pdev->dev.of_node) {
+		bcm2835_dma_info.dma_cap = od->ddev.cap_mask;
+
+		/* Device-tree DMA controller registration */
+		rc = of_dma_controller_register(pdev->dev.of_node,
+				of_dma_simple_xlate, &bcm2835_dma_info);
+		if (rc) {
+			dev_err(&pdev->dev, "Failed to register DMA controller\n");
+			dma_async_device_unregister(&od->ddev);
+			bcm2835_dma_free(od);
+			return rc;
+		}
+	}
+
+	dev_dbg(&pdev->dev, "Load BCM2835 DMA engine driver\n");
+
+	return rc;
+}
+
+static int bcm2835_dma_remove(struct platform_device *pdev)
+{
+	struct bcm2835_dmadev *od = platform_get_drvdata(pdev);
+
+	dma_async_device_unregister(&od->ddev);
+	bcm2835_dma_free(od);
+
+	return 0;
+}
+
+static struct platform_driver bcm2835_dma_driver = {
+	.probe	= bcm2835_dma_probe,
+	.remove	= bcm2835_dma_remove,
+	.driver = {
+		.name = "bcm2835-dma",
+		.owner = THIS_MODULE,
+		.of_match_table = of_match_ptr(bcm2835_dma_of_match),
+	},
+};
+
+static const struct platform_device_info bcm2835_dma_dev_info = {
+	.name = "bcm2835-dma",
+	.id = -1,
+	.dma_mask = DMA_BIT_MASK(32),
+};
+
+static int bcm2835_dma_init(void)
+{
+	int rc = platform_driver_register(&bcm2835_dma_driver);
+	return rc;
+}
+subsys_initcall(bcm2835_dma_init);
+
+static void __exit bcm2835_dma_exit(void)
+{
+	platform_driver_unregister(&bcm2835_dma_driver);
+}
+module_exit(bcm2835_dma_exit);
+
+MODULE_AUTHOR("Florian Meier");
+MODULE_DESCRIPTION("BCM2835 DMA engine driver");
+MODULE_LICENSE("GPL");
+