diff mbox

[1/5] dmaengine: dw_dmac: move to generic DMA binding

Message ID 1359410300-26113-2-git-send-email-arnd@arndb.de (mailing list archive)
State New, archived
Headers show

Commit Message

Arnd Bergmann Jan. 28, 2013, 9:58 p.m. UTC
The original device tree binding for this driver, from Viresh Kumar
unfortunately conflicted with the generic DMA binding, and did not allow
to completely seperate slave device configuration from the controller.

This is an attempt to replace it with an implementation of the generic
binding, but it is currently completely untested, because I do not have
any hardware with this particular controller.

The patch applies on top of linux-next, which contains both the base
support for the generic DMA binding, as well as the earlier attempt from
Viresh. Both of these are currently not merged upstream however.

There are a couple of TODO items that are left remaining and are open
for ideas from other people.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Vinod Koul <vinod.koul@linux.intel.com>
Cc: devicetree-discuss@lists.ozlabs.org
Cc: linux-arm-kernel@lists.infradead.org
---
 Documentation/devicetree/bindings/dma/snps-dma.txt |  71 ++++++-----
 drivers/dma/dw_dmac.c                              | 137 ++++++++++-----------
 drivers/dma/dw_dmac_regs.h                         |   4 -
 include/linux/dw_dmac.h                            |   5 -
 4 files changed, 102 insertions(+), 115 deletions(-)

Comments

Viresh Kumar Jan. 29, 2013, 7:24 a.m. UTC | #1
On 29 January 2013 03:28, Arnd Bergmann <arnd@arndb.de> wrote:
> The original device tree binding for this driver, from Viresh Kumar
> unfortunately conflicted with the generic DMA binding, and did not allow
> to completely seperate slave device configuration from the controller.
>
> This is an attempt to replace it with an implementation of the generic
> binding, but it is currently completely untested, because I do not have
> any hardware with this particular controller.
>
> The patch applies on top of linux-next, which contains both the base
> support for the generic DMA binding, as well as the earlier attempt from
> Viresh. Both of these are currently not merged upstream however.

This was really my work and i am feeling bad that i couldn't allocate any time
for it. Thanks for starting it.

> diff --git a/Documentation/devicetree/bindings/dma/snps-dma.txt b/Documentation/devicetree/bindings/dma/snps-dma.txt
> index 5bb3dfb..212d387 100644
> --- a/Documentation/devicetree/bindings/dma/snps-dma.txt
> +++ b/Documentation/devicetree/bindings/dma/snps-dma.txt
> @@ -3,59 +3,62 @@
>  Required properties:
>  - compatible: "snps,dma-spear1340"
>  - reg: Address range of the DMAC registers
> -- interrupt-parent: Should be the phandle for the interrupt controller
> -  that services interrupts for this device
>  - interrupt: Should contain the DMAC interrupt number
> -- nr_channels: Number of channels supported by hardware
> -- is_private: The device channels should be marked as private and not for by the
> -  general purpose DMA channel allocator. False if not passed.
> +- dma-channels: Number of channels supported by hardware
> +- dma-requests: Number of DMA request lines supported
> +- dma-masters: Number of AHB masters supported by the controller
> +- #dma-cells: must be <3>

Shouldn't this be 4? Would be better to mention what fields are these,
right here.
I have seen them below though.

> +DMA clients connected to the Designware DMA controller must use the format
> +described in the dma.txt file, using a five-cell specifier for each channel.
> +The five cells in order are:
> +
> +1. A phandle pointing to the DMA controller
> +2. The value for the cfg_hi register.
> +3. The value for the cfg_lo register.
> +4. Source master for transfers on allocated channel.
> +5. Destination master for transfers on allocated channel.

> diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
> index 8cfaaf8..88504c2 100644
> --- a/drivers/dma/dw_dmac.c
> +++ b/drivers/dma/dw_dmac.c
> @@ -18,6 +18,7 @@
>  #include <linux/interrupt.h>
>  #include <linux/io.h>
>  #include <linux/of.h>
> +#include <linux/of_dma.h>
>  #include <linux/mm.h>
>  #include <linux/module.h>
>  #include <linux/platform_device.h>
> @@ -1179,49 +1180,69 @@ static void dwc_free_chan_resources(struct dma_chan *chan)
>         dev_vdbg(chan2dev(chan), "%s: done\n", __func__);
>  }
>
> -bool dw_dma_generic_filter(struct dma_chan *chan, void *param)
> +/* forward declaration used in filter */
> +static struct platform_driver dw_driver;

extern? This is not a declaration but definition.

> +
> +struct dw_dma_filter_args {
> +       struct dw_dma *dw;
> +       u32 cfg_lo;
> +       u32 cfg_hi;
> +       unsigned src;

Currently named as sms

> +       unsigned dst;

dms

> +};
> +
> +static bool dw_dma_generic_filter(struct dma_chan *chan, void *param)
>  {
>         struct dw_dma *dw = to_dw_dma(chan->device);
> -       static struct dw_dma *last_dw;
> -       static char *last_bus_id;
> -       int i = -1;
> +       struct dw_dma_filter_args *fargs = param;
> +       struct dw_dma_slave *sd;
>
> -       /*
> -        * dmaengine framework calls this routine for all channels of all dma
> -        * controller, until true is returned. If 'param' bus_id is not
> -        * registered with a dma controller (dw), then there is no need of
> -        * running below function for all channels of dw.
> -        *
> -        * This block of code does this by saving the parameters of last
> -        * failure. If dw and param are same, i.e. trying on same dw with
> -        * different channel, return false.
> -        */
> -       if ((last_dw == dw) && (last_bus_id == param))
> +       /* both the driver and the device must match */
> +        if (chan->device->dev->driver != &dw_driver.driver)
> +                return false;

Can this ever happen? Isn't it the case that this routine would be called
only for dw_dmac?

> +       if (dw != fargs->dw)
>                 return false;
> -       /*
> -        * Return true:
> -        * - If dw_dma's platform data is not filled with slave info, then all
> -        *   dma controllers are fine for transfer.
> -        * - Or if param is NULL
> -        */
> -       if (!dw->sd || !param)
> -               return true;
>
> -       while (++i < dw->sd_count) {
> -               if (!strcmp(dw->sd[i].bus_id, param)) {
> -                       chan->private = &dw->sd[i];
> -                       last_dw = NULL;
> -                       last_bus_id = NULL;
> +       /* FIXME: memory leak! could we put this into dw_dma_chan? */
> +       sd = devm_kzalloc(dw->dma.dev, sizeof (*sd), GFP_KERNEL);

Yes.

> +       if (!sd)
> +               return false;
>
> -                       return true;
> -               }
> -       }
> +       sd->dma_dev     = dw->dma.dev;
> +       sd->cfg_hi      = fargs->cfg_hi;
> +       sd->cfg_lo      = fargs->cfg_lo;
> +       sd->src_master  = fargs->src;
> +       sd->dst_master  = fargs->dst;
> +
> +       chan->private = sd;
>
> -       last_dw = dw;
> -       last_bus_id = param;
> -       return false;
> +       return true;
> +}
> +
> +static struct dma_chan *dw_dma_xlate(struct of_phandle_args *dma_spec,
> +                                        struct of_dma *ofdma)
> +{
> +       struct dw_dma *dw = ofdma->of_dma_data;
> +       struct dw_dma_filter_args fargs = {
> +               .dw = dw,
> +       };
> +       dma_cap_mask_t cap;
> +
> +       if (dma_spec->args_count != 4)

args_count contains count of all params leaving the phandle?

> +               return NULL;
> +
> +       /* FIXME: This binding is rather clumsy. Can't we use the
> +          request line numbers here instead? */

yes.

> +       fargs.cfg_lo = be32_to_cpup(dma_spec->args+0);
> +       fargs.cfg_hi = be32_to_cpup(dma_spec->args+1);
> +       fargs.src = be32_to_cpup(dma_spec->args+2);
> +       fargs.dst = be32_to_cpup(dma_spec->args+3);
> +
> +       dma_cap_zero(cap);
> +       dma_cap_set(DMA_SLAVE, cap);
> +       /* FIXME: there should be a simpler way to do this */
> +       return dma_request_channel(cap, dw_dma_generic_filter, &dma_spec->args[0]);

don't you need to send &fargs as the last argument?
Arnd Bergmann Jan. 29, 2013, 10:35 a.m. UTC | #2
On Tuesday 29 January 2013, Viresh Kumar wrote:
> > diff --git a/Documentation/devicetree/bindings/dma/snps-dma.txt b/Documentation/devicetree/bindings/dma/snps-dma.txt
> > index 5bb3dfb..212d387 100644
> > --- a/Documentation/devicetree/bindings/dma/snps-dma.txt
> > +++ b/Documentation/devicetree/bindings/dma/snps-dma.txt
> > @@ -3,59 +3,62 @@
> >  Required properties:
> >  - compatible: "snps,dma-spear1340"
> >  - reg: Address range of the DMAC registers
> > -- interrupt-parent: Should be the phandle for the interrupt controller
> > -  that services interrupts for this device
> >  - interrupt: Should contain the DMAC interrupt number
> > -- nr_channels: Number of channels supported by hardware
> > -- is_private: The device channels should be marked as private and not for by the
> > -  general purpose DMA channel allocator. False if not passed.
> > +- dma-channels: Number of channels supported by hardware
> > +- dma-requests: Number of DMA request lines supported
> > +- dma-masters: Number of AHB masters supported by the controller
> > +- #dma-cells: must be <3>
> 
> Shouldn't this be 4? Would be better to mention what fields are these,
> right here. I have seen them below though.

Correct. I changed these a couple of times while trying to understand
what the fields are, and I missed this instance. I'm still not sure
whether we actually need all four fields, or what the simplest format
for them would be. This just mirrors what you had in your binding.

> > -bool dw_dma_generic_filter(struct dma_chan *chan, void *param)
> > +/* forward declaration used in filter */
> > +static struct platform_driver dw_driver;
> 
> extern? This is not a declaration but definition.

No. You can have multiple declarations for a static symbol like this,
but only one of them with an initilizer. I usually recommend against
doing this myself, because it's confusing and somewhat bad style, but
it is correct C.

> > -       /*
> > -        * dmaengine framework calls this routine for all channels of all dma
> > -        * controller, until true is returned. If 'param' bus_id is not
> > -        * registered with a dma controller (dw), then there is no need of
> > -        * running below function for all channels of dw.
> > -        *
> > -        * This block of code does this by saving the parameters of last
> > -        * failure. If dw and param are same, i.e. trying on same dw with
> > -        * different channel, return false.
> > -        */
> > -       if ((last_dw == dw) && (last_bus_id == param))
> > +       /* both the driver and the device must match */
> > +        if (chan->device->dev->driver != &dw_driver.driver)
> > +                return false;
> 
> Can this ever happen? Isn't it the case that this routine would be called
> only for dw_dmac?

I think not. AFAIK the filter function will be called for each channel
on each DMA engine until one of them matches.

> > -       while (++i < dw->sd_count) {
> > -               if (!strcmp(dw->sd[i].bus_id, param)) {
> > -                       chan->private = &dw->sd[i];
> > -                       last_dw = NULL;
> > -                       last_bus_id = NULL;
> > +       /* FIXME: memory leak! could we put this into dw_dma_chan? */
> > +       sd = devm_kzalloc(dw->dma.dev, sizeof (*sd), GFP_KERNEL);
> 
> Yes.

Yes it can be in dw_dma_chan or yes it is a memory leak?

> > +static struct dma_chan *dw_dma_xlate(struct of_phandle_args *dma_spec,
> > +                                        struct of_dma *ofdma)
> > +{
> > +       struct dw_dma *dw = ofdma->of_dma_data;
> > +       struct dw_dma_filter_args fargs = {
> > +               .dw = dw,
> > +       };
> > +       dma_cap_mask_t cap;
> > +
> > +       if (dma_spec->args_count != 4)
> 
> args_count contains count of all params leaving the phandle?

That was my interpretation from reading the code, but I have not tried it.

> > +       /* FIXME: This binding is rather clumsy. Can't we use the
> > +          request line numbers here instead? */
> 
> yes.

Ok, Very good. What is the encoding of the registers then?

> > +       fargs.cfg_lo = be32_to_cpup(dma_spec->args+0);
> > +       fargs.cfg_hi = be32_to_cpup(dma_spec->args+1);
> > +       fargs.src = be32_to_cpup(dma_spec->args+2);
> > +       fargs.dst = be32_to_cpup(dma_spec->args+3);
> > +
> > +       dma_cap_zero(cap);
> > +       dma_cap_set(DMA_SLAVE, cap);
> > +       /* FIXME: there should be a simpler way to do this */
> > +       return dma_request_channel(cap, dw_dma_generic_filter, &dma_spec->args[0]);
> 
> don't you need to send &fargs as the last argument?

Right, my mistake.

Thanks a lot for the input. When I fix the above, are actually able
to test the changes, or have you lost access to the hardware when
leaving ST?

	Arnd
Viresh Kumar Jan. 29, 2013, 10:49 a.m. UTC | #3
On 29 January 2013 16:05, Arnd Bergmann <arnd@arndb.de> wrote:
> On Tuesday 29 January 2013, Viresh Kumar wrote:

>> Shouldn't this be 4? Would be better to mention what fields are these,
>> right here. I have seen them below though.
>
> Correct. I changed these a couple of times while trying to understand
> what the fields are, and I missed this instance. I'm still not sure
> whether we actually need all four fields, or what the simplest format
> for them would be. This just mirrors what you had in your binding.

You can add request_line number and leave first two fields, cfghi and lo.

>> > +       /* FIXME: memory leak! could we put this into dw_dma_chan? */
>> > +       sd = devm_kzalloc(dw->dma.dev, sizeof (*sd), GFP_KERNEL);
>>
>> Yes.
>
> Yes it can be in dw_dma_chan or yes it is a memory leak?

Yes it can be in dw_dma_chan :)

>> > +       if (dma_spec->args_count != 4)
>>
>> args_count contains count of all params leaving the phandle?
>
> That was my interpretation from reading the code, but I have not tried it.

Okay, it was just a question from my side :)

>> > +       /* FIXME: This binding is rather clumsy. Can't we use the
>> > +          request line numbers here instead? */
>>
>> yes.
>
> Ok, Very good. What is the encoding of the registers then?

You can still keep fargs as is and just fill them as:

       fargs.cfg_lo = 0;

if (DMA_TO_DEV)
       // dest is periph
       fargs.cfg_hi = be32_to_cpup(dma_spec->args+0) << 11;
else if (DEV_TO_DMA)
       // src is periph
       fargs.cfg_hi = be32_to_cpup(dma_spec->args+0) << 7;

The field size is 4 bits.

> Thanks a lot for the input. When I fix the above, are actually able
> to test the changes, or have you lost access to the hardware when
> leaving ST?

I don't have any sort of access for testing these :(
But, Vipul might try these at his end.
Arnd Bergmann Jan. 29, 2013, 10:50 a.m. UTC | #4
(putting back the Cc list, I assumed you dropped them accidentally)

On Tuesday 29 January 2013, Andy Shevchenko wrote:
> On Mon, 2013-01-28 at 21:58 +0000, Arnd Bergmann wrote: 
> > -	if ((last_dw == dw) && (last_bus_id == param))
> > +	/* both the driver and the device must match */
> > +        if (chan->device->dev->driver != &dw_driver.driver)
> 
> Could we somehow pass the &.driver to the generic filter function (via
> *_dma_controller_register() ? ) and do this to each DMA driver?

My hope is still that we can avoid using filter functions entirely
when we use xlate() logic, and instead just go through the channels
of the dma engine we know we are looking at.

I would also assume that the argument passed to *_dma_controller_register
normally holds a pointer to the dma device. Now that I think about it,
the check 'if (dw != fargs->dw)' already implies that we are looking
at the correct driver, but it feels dirty to cast a random dma_device
pointer to a driver specific one before we know which driver it is
for.

However, since we have a valid pointer to a dw_dma object, we can
extract the driver from there:

	struct dw_dma_filter_args *fargs = param;
	struct dma_device *ddev = &fargs->dw->dma;
	if (dma != chan->device)
		return -EINVAL;

which is simpler and cleaner that what I had.

> > +	sd->dma_dev	= dw->dma.dev;
> > +	sd->cfg_hi	= fargs->cfg_hi;
> > +	sd->cfg_lo	= fargs->cfg_lo;
> > +	sd->src_master	= fargs->src;
> > +	sd->dst_master	= fargs->dst;
> 
> Could we use fargs structure directly?

We could probably have no fargs but use the dw_dma_slave structure
directly to pass the data, if cannot figure out a way to avoid
the need for a filter function.  I thought about doing that, but
intermediate versions of my patch had a different layout here.

	Arnd
Andy Shevchenko Jan. 29, 2013, 10:54 a.m. UTC | #5
On Tue, 2013-01-29 at 16:19 +0530, Viresh Kumar wrote: 
> On 29 January 2013 16:05, Arnd Bergmann <arnd@arndb.de> wrote:
> > On Tuesday 29 January 2013, Viresh Kumar wrote:

> >> > +       /* FIXME: This binding is rather clumsy. Can't we use the
> >> > +          request line numbers here instead? */
> >>
> >> yes.
> >
> > Ok, Very good. What is the encoding of the registers then?
> 
> You can still keep fargs as is and just fill them as:
> 
>        fargs.cfg_lo = 0;
> 
> if (DMA_TO_DEV)
>        // dest is periph
>        fargs.cfg_hi = be32_to_cpup(dma_spec->args+0) << 11;
> else if (DEV_TO_DMA)
>        // src is periph
>        fargs.cfg_hi = be32_to_cpup(dma_spec->args+0) << 7;

We have macros for such shifts.

drivers/dma/dw_dmac.c:187:   cfghi = DWC_CFGH_DST_PER(...
drivers/dma/dw_dmac.c:189:   cfghi = DWC_CFGH_SRC_PER(...
Viresh Kumar Jan. 29, 2013, 10:57 a.m. UTC | #6
On 29 January 2013 16:24, Andy Shevchenko
<andriy.shevchenko@linux.intel.com> wrote:
> On Tue, 2013-01-29 at 16:19 +0530, Viresh Kumar wrote:
>> if (DMA_TO_DEV)
>>        // dest is periph
>>        fargs.cfg_hi = be32_to_cpup(dma_spec->args+0) << 11;
>> else if (DEV_TO_DMA)
>>        // src is periph
>>        fargs.cfg_hi = be32_to_cpup(dma_spec->args+0) << 7;
>
> We have macros for such shifts.
>
> drivers/dma/dw_dmac.c:187:   cfghi = DWC_CFGH_DST_PER(...
> drivers/dma/dw_dmac.c:189:   cfghi = DWC_CFGH_SRC_PER(...

I am getting older now, bad memory :)
I grepped into drivers/dma/dw_dmac_regs.h and left include/linux/dw_dmac.h :(
Andy Shevchenko Jan. 29, 2013, 11:14 a.m. UTC | #7
On Tue, 2013-01-29 at 16:27 +0530, Viresh Kumar wrote: 
> On 29 January 2013 16:24, Andy Shevchenko
> <andriy.shevchenko@linux.intel.com> wrote:
> > On Tue, 2013-01-29 at 16:19 +0530, Viresh Kumar wrote:
> >> if (DMA_TO_DEV)
> >>        // dest is periph
> >>        fargs.cfg_hi = be32_to_cpup(dma_spec->args+0) << 11;
> >> else if (DEV_TO_DMA)
> >>        // src is periph
> >>        fargs.cfg_hi = be32_to_cpup(dma_spec->args+0) << 7;
> >
> > We have macros for such shifts.
> >
> > drivers/dma/dw_dmac.c:187:   cfghi = DWC_CFGH_DST_PER(...
> > drivers/dma/dw_dmac.c:189:   cfghi = DWC_CFGH_SRC_PER(...
> 
> I am getting older now, bad memory :)
> I grepped into drivers/dma/dw_dmac_regs.h and left include/linux/dw_dmac.h :(

Moreover the excerpt I showed from dw_dmac.c is the same piece of code
you wrote above as a sample.
Russell King - ARM Linux Jan. 29, 2013, 11:18 a.m. UTC | #8
On Tue, Jan 29, 2013 at 10:50:23AM +0000, Arnd Bergmann wrote:
> (putting back the Cc list, I assumed you dropped them accidentally)

That'll be why I don't have a copy of Andy's email to reply to.

> On Tuesday 29 January 2013, Andy Shevchenko wrote:
> > On Mon, 2013-01-28 at 21:58 +0000, Arnd Bergmann wrote: 
> > > -	if ((last_dw == dw) && (last_bus_id == param))
> > > +	/* both the driver and the device must match */
> > > +        if (chan->device->dev->driver != &dw_driver.driver)
> > 
> > Could we somehow pass the &.driver to the generic filter function (via
> > *_dma_controller_register() ? ) and do this to each DMA driver?

How, and what driver gets passed?

> My hope is still that we can avoid using filter functions entirely
> when we use xlate() logic, and instead just go through the channels
> of the dma engine we know we are looking at.

Has anyone yet determined whether any of these new DMA engine slave APIs
are usable for implementations which have a separate MUX between the
DMA engine and the DMA peripheral?
Arnd Bergmann Jan. 29, 2013, 1:31 p.m. UTC | #9
On Tuesday 29 January 2013, Viresh Kumar wrote:
> You can still keep fargs as is and just fill them as:
> 
>        fargs.cfg_lo = 0;
> 
> if (DMA_TO_DEV)
>        // dest is periph
>        fargs.cfg_hi = be32_to_cpup(dma_spec->args+0) << 11;
> else if (DEV_TO_DMA)
>        // src is periph
>        fargs.cfg_hi = be32_to_cpup(dma_spec->args+0) << 7;
> 
> The field size is 4 bits.

Ah, good. So I guess the "dma-requests" property should actually
be "16" then.

Does this mean that an implicit zero request line means memory?

Could we have device-to-device DMAs with this controller, and if
we can, should we have both 'src' and 'dst' fields? Are the
two number ranges sharing the same address space, i.e. is
request '7' as the destination guaranteed to be the same device
as request '7' in the source?

If we need two lines, we could interleave them with the bus
master numbers:

dmas = <&dwdma0 // controller
	7   // source request 7
	1   // source master 1
	0   // dest   request 0
	0>, // dest   master 1
       <&dwdma0
	0   // source request 0
	0   // source master 0
	8   // dest   request 8
	1>; // dest   master 1

In theory, we could use bit-stuffing to put them all into
a single 32 bit word I guess, but generally people don't
seem to like that for new bindings.

> > Thanks a lot for the input. When I fix the above, are actually able
> > to test the changes, or have you lost access to the hardware when
> > leaving ST?
> 
> I don't have any sort of access for testing these :(
> But, Vipul might try these at his end.

Ok, I see.

	Arnd
Arnd Bergmann Jan. 29, 2013, 1:44 p.m. UTC | #10
On Tuesday 29 January 2013, Russell King - ARM Linux wrote:
> > On Tuesday 29 January 2013, Andy Shevchenko wrote:
> > > On Mon, 2013-01-28 at 21:58 +0000, Arnd Bergmann wrote: 
> > > > - if ((last_dw == dw) && (last_bus_id == param))
> > > > + /* both the driver and the device must match */
> > > > +        if (chan->device->dev->driver != &dw_driver.driver)
> > > 
> > > Could we somehow pass the &.driver to the generic filter function (via
> > > *_dma_controller_register() ? ) and do this to each DMA driver?
> 
> How, and what driver gets passed?

of_dma_controller_register (see linux-next) already gets a device_node
of a device that is owned by the driver calling this function. This
driver could in theory pass its 'struct device_driver' as another
argument, although I would expect it to pass its own device specific
object (e.g. struct dw_dma or struct dma_pl330_dmac) as the 'data'
pointer, and from there it can easily check if the device matches.

> > My hope is still that we can avoid using filter functions entirely
> > when we use xlate() logic, and instead just go through the channels
> > of the dma engine we know we are looking at.
> 
> Has anyone yet determined whether any of these new DMA engine slave APIs
> are usable for implementations which have a separate MUX between the
> DMA engine and the DMA peripheral?

Can you give an example for this? We were careful to make sure it
works with platforms that connect a slave to multiple dma engines,
out of which any could be used for a given transfer. In the device
tree binding, you specify all possible controllers and give the
alternatives the same name, for example:

	serial@10000000 {
		compatible = "arm,pl011", "arm,primecell";
		dmas = <dwdma0 7 0>, <dwdma0 8 1>, <&pl330 17>, <&pl330 15>;
		dma-names = "rx", "tx", "rx", "tx;
		...
	};

Here, you hve a UART that can use DMA for both receive and transmit, and
can use either the dw_dma or the pl330 controller for each channel.
The common dmaengine code will try to pick the first available channel
on either one. We can probably try to be smarter about making the decision
which one to use.

Is this what you are referring to?

	Arnd
Andy Shevchenko Jan. 29, 2013, 1:45 p.m. UTC | #11
On Tue, 2013-01-29 at 13:31 +0000, Arnd Bergmann wrote: 
> On Tuesday 29 January 2013, Viresh Kumar wrote:
> > You can still keep fargs as is and just fill them as:
> > 
> >        fargs.cfg_lo = 0;
> > 
> > if (DMA_TO_DEV)
> >        // dest is periph
> >        fargs.cfg_hi = be32_to_cpup(dma_spec->args+0) << 11;
> > else if (DEV_TO_DMA)
> >        // src is periph
> >        fargs.cfg_hi = be32_to_cpup(dma_spec->args+0) << 7;
> > 
> > The field size is 4 bits.
> 
> Ah, good. So I guess the "dma-requests" property should actually
> be "16" then.
> 
> Does this mean that an implicit zero request line means memory?

No, it doesn't.
When dma is doing mem2mem transfers the request line field is ignored by
the hw.

> Could we have device-to-device DMAs with this controller, and if
> we can, should we have both 'src' and 'dst' fields?

As far as I know there is no driver under dmaengine that supports
per2per transfers.

>  Are the
> two number ranges sharing the same address space, i.e. is
> request '7' as the destination guaranteed to be the same device
> as request '7' in the source?

Request line should be unique. It means a real wire from slave hw to the
dmac.
Russell King - ARM Linux Jan. 29, 2013, 2:24 p.m. UTC | #12
On Tue, Jan 29, 2013 at 01:44:10PM +0000, Arnd Bergmann wrote:
> Can you give an example for this? We were careful to make sure it
> works with platforms that connect a slave to multiple dma engines,
> out of which any could be used for a given transfer. In the device
> tree binding, you specify all possible controllers and give the
> alternatives the same name, for example:
> 
> 	serial@10000000 {
> 		compatible = "arm,pl011", "arm,primecell";
> 		dmas = <dwdma0 7 0>, <dwdma0 8 1>, <&pl330 17>, <&pl330 15>;
> 		dma-names = "rx", "tx", "rx", "tx;
> 		...
> 	};

No, that's not what I mean.  I mean the situation we find on Versatile
platforms:

            8     3                   >3
PL080 DMA --/--+--/------> FPGA Mux --/--> {bunch of off-CPU peripherals}
               |  5
               `--/------> {On-CPU peripherals}

This is something that I've been raising _every time_ I've been involved
with DMA engine discussions when it comes to the matching stuff, so this
is nothing new, and it's not unknown about.

What is different this time around is that I've been purposely omitted
from the discussions (like what seems to be happening more and more - I
notice that I'm no longer copied on PL011 patches despite being the
driver author, or GIC patches, or VIC patches) so this stuff doesn't
get properly considered.

But it doesn't matter anymore; I'm soo out of the loop on stuff like DT
and the like that my input would be more of a hinderence now.
Russell King - ARM Linux Jan. 29, 2013, 2:26 p.m. UTC | #13
On Tue, Jan 29, 2013 at 03:45:40PM +0200, Andy Shevchenko wrote:
> On Tue, 2013-01-29 at 13:31 +0000, Arnd Bergmann wrote: 
> > On Tuesday 29 January 2013, Viresh Kumar wrote:
> > > You can still keep fargs as is and just fill them as:
> > > 
> > >        fargs.cfg_lo = 0;
> > > 
> > > if (DMA_TO_DEV)
> > >        // dest is periph
> > >        fargs.cfg_hi = be32_to_cpup(dma_spec->args+0) << 11;
> > > else if (DEV_TO_DMA)
> > >        // src is periph
> > >        fargs.cfg_hi = be32_to_cpup(dma_spec->args+0) << 7;
> > > 
> > > The field size is 4 bits.
> > 
> > Ah, good. So I guess the "dma-requests" property should actually
> > be "16" then.
> > 
> > Does this mean that an implicit zero request line means memory?
> 
> No, it doesn't.
> When dma is doing mem2mem transfers the request line field is ignored by
> the hw.

Memory to memory transfers are dealt with using a totally different API
to the slave API.  Look at the rest of the DMA engine API to see how it's
used - any channel is selected with a DMA_MEMCPY capability.

(IMHO, the MEM2MEM transfer type against the slave API should never have
been permitted.)
Arnd Bergmann Jan. 29, 2013, 2:55 p.m. UTC | #14
On Tuesday 29 January 2013, Russell King - ARM Linux wrote:
> No, that's not what I mean.  I mean the situation we find on Versatile
> platforms:
> 
>             8     3                   >3
> PL080 DMA --/--+--/------> FPGA Mux --/--> {bunch of off-CPU peripherals}
>                |  5
>                `--/------> {On-CPU peripherals}
> 
> This is something that I've been raising _every time_ I've been involved
> with DMA engine discussions when it comes to the matching stuff, so this
> is nothing new, and it's not unknown about.

Ok, I see. I have not actually been involved with the DMA engine API
much, so for me it's the first time /I/ see this being explained.

From the DT binding perspective, doing this should be no problem. I guess
you would model the MUX as a trivial dma engine device that forwards to
another one, just like we do for nested interrupt controllers:

	pl080: dma-engine@10000000 {
		compatible ="arm,pl080", "arm,primecell";
		reg = <0x10000000 0x1000>;
		dma-requests = <8>;
		dma-channels = <4>;
		#dma-cells = <2>;
	};

	fpga {
		mux: dma-mux@f0008000 {
			reg = <0xf0008000 100>;
			compatible = "arm,versatile-fpga-dmamux";
			dma-requests = <7>;
			dma-channels = <3>;
			#dma-cells = <1>;
			dmas = <&pl080 5 0>, <&pl080 6 0> <&pl080 7 0>;
			dma-names = "mux5", "mux6", "mux7";
		};
		...

		some-device@f000a000 {
			compatible = "foo,using-muxed-dma";
			dmas = <&mux 3>, <&mux 4>;
			dma-names = "rx", "tx";
		};
	};

The driver for foo,using-muxed-dma just requests a slave channel for its
lines, which ends up calling the xlate function of the driver for the mux.
That driver still needs to get written, of course, but it should be able
to recursively call dma_request_slave_channel on the pl080 device.
The arm,versatile-fpga-dmamux driver would not be registered to the dmaengine
layer, but it would register as an of_dma_controller.

	Arnd
Viresh Kumar Jan. 29, 2013, 3:17 p.m. UTC | #15
On 29 January 2013 19:01, Arnd Bergmann <arnd@arndb.de> wrote:
> Ah, good. So I guess the "dma-requests" property should actually
> be "16" then.

yes, even i was checking on that separately :)

> Does this mean that an implicit zero request line means memory?

No. 0 is also request line for a peripheral and numbers are from 0 to 15.
memcpy are identified by setting type of transfer as memcpy.

> Could we have device-to-device DMAs with this controller, and if
> we can, should we have both 'src' and 'dst' fields? Are the
> two number ranges sharing the same address space, i.e. is
> request '7' as the destination guaranteed to be the same device
> as request '7' in the source?

Request lines are per master... So, for a master single request line
is independent of direction. Many DMA controllers have capability of
doing dev-to-dev transfers but DMAENGINE doesn't have any support
for it, even we don't have a usecase too :)

> If we need two lines, we could interleave them with the bus
> master numbers:

not required.

> In theory, we could use bit-stuffing to put them all into
> a single 32 bit word I guess, but generally people don't
> seem to like that for new bindings.

:)

--
viresh
Arnd Bergmann Jan. 29, 2013, 3:28 p.m. UTC | #16
On Tuesday 29 January 2013, Andy Shevchenko wrote:
> On Tue, 2013-01-29 at 13:31 +0000, Arnd Bergmann wrote: 
> > On Tuesday 29 January 2013, Viresh Kumar wrote:
> > Ah, good. So I guess the "dma-requests" property should actually
> > be "16" then.
> > 
> > Does this mean that an implicit zero request line means memory?
> 
> No, it doesn't.
> When dma is doing mem2mem transfers the request line field is ignored by
> the hw.

Ok.

> > Could we have device-to-device DMAs with this controller, and if
> > we can, should we have both 'src' and 'dst' fields?
> 
> As far as I know there is no driver under dmaengine that supports
> per2per transfers.

Well, I'm asking because we need to describe the hardware properly,
even if drivers today don't do this does not mean that it is not
possible. We can always change the Linux implementation, but we
cannot as easily change an established DT binding, just like
we don't change user space interfaces.

Of course, if that is a thing we don't expect anyone to do,
we don't have to describe it, or we could make a compatible
extension to the binding later.

> >  Are the
> > two number ranges sharing the same address space, i.e. is
> > request '7' as the destination guaranteed to be the same device
> > as request '7' in the source?
> 
> Request line should be unique. It means a real wire from slave
> hw to the dmac.

Ok, good.

	Arnd
Russell King - ARM Linux Jan. 29, 2013, 3:44 p.m. UTC | #17
On Tue, Jan 29, 2013 at 02:55:49PM +0000, Arnd Bergmann wrote:
> On Tuesday 29 January 2013, Russell King - ARM Linux wrote:
> > No, that's not what I mean.  I mean the situation we find on Versatile
> > platforms:
> > 
> >             8     3                   >3
> > PL080 DMA --/--+--/------> FPGA Mux --/--> {bunch of off-CPU peripherals}
> >                |  5
> >                `--/------> {On-CPU peripherals}
> > 
> > This is something that I've been raising _every time_ I've been involved
> > with DMA engine discussions when it comes to the matching stuff, so this
> > is nothing new, and it's not unknown about.
> 
> Ok, I see. I have not actually been involved with the DMA engine API
> much, so for me it's the first time /I/ see this being explained.
> 
> From the DT binding perspective, doing this should be no problem. I guess
> you would model the MUX as a trivial dma engine device that forwards to
> another one, just like we do for nested interrupt controllers:
> 
> 	pl080: dma-engine@10000000 {
> 		compatible ="arm,pl080", "arm,primecell";
> 		reg = <0x10000000 0x1000>;
> 		dma-requests = <8>;
> 		dma-channels = <4>;
> 		#dma-cells = <2>;
> 	};
> 
> 	fpga {
> 		mux: dma-mux@f0008000 {
> 			reg = <0xf0008000 100>;
> 			compatible = "arm,versatile-fpga-dmamux";
> 			dma-requests = <7>;
> 			dma-channels = <3>;
> 			#dma-cells = <1>;
> 			dmas = <&pl080 5 0>, <&pl080 6 0> <&pl080 7 0>;
> 			dma-names = "mux5", "mux6", "mux7";
> 		};
> 		...
> 
> 		some-device@f000a000 {
> 			compatible = "foo,using-muxed-dma";
> 			dmas = <&mux 3>, <&mux 4>;
> 			dma-names = "rx", "tx";
> 		};
> 	};
> 
> The driver for foo,using-muxed-dma just requests a slave channel for its
> lines, which ends up calling the xlate function of the driver for the mux.
> That driver still needs to get written, of course, but it should be able
> to recursively call dma_request_slave_channel on the pl080 device.
> The arm,versatile-fpga-dmamux driver would not be registered to the dmaengine
> layer, but it would register as an of_dma_controller.

That's a good way to represent it but it fails in a very big way:
You're stuffing N peripherals down to 3 request lines to the DMA
engine, and you may want more than 3 of those peripherals to be
making use of the DMA engine at any one time.

Before anyone says "you shouldn't be doing this" consider this:
your typical DMA slave engine already has this structure:

N DMA channels <---> M DMA requests <---> M peripherals

where N < M.  In other words, there is _already_ a MUX between the
peripherals and the DMA engine channels themselves (what do you think
the "request index" which you have to program into DMA channel control
registers is doing...

We support this external mux today in the PL080 driver - and we do that
by having the PL080 driver do the scheduling of virtual DMA channels on
the actual hardware itself.  The PL080 driver has knowledge that there
may be some sort of additional muxing layer between it and the
peripheral.

As the APIs stand today, you just can't do this without having the
DMA engine driver itself intimately involved because a layer above
doesn't really have much clue as to what's going on, and the DMA
engine stuff itself doesn't layer particularly well.
Arnd Bergmann Jan. 29, 2013, 4:21 p.m. UTC | #18
On Tuesday 29 January 2013, Viresh Kumar wrote:
> On 29 January 2013 19:01, Arnd Bergmann <arnd@arndb.de> wrote:
> > Ah, good. So I guess the "dma-requests" property should actually
> > be "16" then.
> 
> yes, even i was checking on that separately :)

Actually, I just discovered something odd in the
arch/arm/mach-spear/spear13xx-dma.h file that gets removed
in the last patch: there, we define request numbers up to
32, e.g.

-       SPEAR1310_DMA_REQ_UART2_RX = 14,
-       SPEAR1310_DMA_REQ_UART2_TX = 15,
-       SPEAR1310_DMA_REQ_UART5_RX = 16,
-       SPEAR1310_DMA_REQ_UART5_TX = 17,

What is the meaning of this, if the maximum request number is 15?
 
> > Could we have device-to-device DMAs with this controller, and if
> > we can, should we have both 'src' and 'dst' fields? Are the
> > two number ranges sharing the same address space, i.e. is
> > request '7' as the destination guaranteed to be the same device
> > as request '7' in the source?
> 
> Request lines are per master... So, for a master single request line
> is independent of direction. Many DMA controllers have capability of
> doing dev-to-dev transfers but DMAENGINE doesn't have any support
> for it, even we don't have a usecase too :)
> 
> > If we need two lines, we could interleave them with the bus
> > master numbers:
> 
> not required.

Ok. Would it be enough to have only one master and one request
field in the DT dma descriptor then, and have the code figure
whether to use it as source or destination, based on the
configuration? Which one should come first? Since you have
multiple masters per controller, and multiple requests per
master, it sounds like the cleanest descriptor form would
be 

	<controller master request>;

Or possibly

	<controller master request direction>;

if the direction needs to be known at the time the channel
is requested.

	Arnd
Arnd Bergmann Jan. 29, 2013, 4:36 p.m. UTC | #19
On Tuesday 29 January 2013, Russell King - ARM Linux wrote:
> That's a good way to represent it but it fails in a very big way:
> You're stuffing N peripherals down to 3 request lines to the DMA
> engine, and you may want more than 3 of those peripherals to be
> making use of the DMA engine at any one time.
> 
> Before anyone says "you shouldn't be doing this" consider this:
> your typical DMA slave engine already has this structure:
> 
> N DMA channels <---> M DMA requests <---> M peripherals
> 
> where N < M.  In other words, there is already a MUX between the
> peripherals and the DMA engine channels themselves (what do you think
> the "request index" which you have to program into DMA channel control
> registers is doing...

Ok.

> We support this external mux today in the PL080 driver - and we do that
> by having the PL080 driver do the scheduling of virtual DMA channels on
> the actual hardware itself.  The PL080 driver has knowledge that there
> may be some sort of additional muxing layer between it and the
> peripheral.
> 
> As the APIs stand today, you just can't do this without having the
> DMA engine driver itself intimately involved because a layer above
> doesn't really have much clue as to what's going on, and the DMA
> engine stuff itself doesn't layer particularly well.

If the pl080 driver already has code for the mux in it, then it should
handle both of_dma_controller instances in my example. It would
not change anything regarding the binding, which just describes the
way that the hardware is connected. I have not looked at the implementation
of the pl080 driver, but I'd assume we could get away with just having
two separate xlate() functions. It's slightly ugly to have one driver
take responsibility for two device_node:s, but it's not unheard of.
In the probe function for the pl080 node, the driver can walk the
entire device tree to find any mux devices connected to it and
register an of_dma_controller() with its xlate function for those.

Unless you see another issue with this, I'd assume it's all covered
by the new interface, but it also doesn't get better than what we
have today.

	Arnd
Russell King - ARM Linux Jan. 29, 2013, 5:45 p.m. UTC | #20
On Tue, Jan 29, 2013 at 04:36:38PM +0000, Arnd Bergmann wrote:
> If the pl080 driver already has code for the mux in it, then it should
> handle both of_dma_controller instances in my example. It would
> not change anything regarding the binding, which just describes the
> way that the hardware is connected. I have not looked at the implementation
> of the pl080 driver, but I'd assume we could get away with just having
> two separate xlate() functions.

Well, how it all works in the PL08x driver at present is:

- the platform code passes into the PL08x driver a description of the
  virtual channels, eg:

        [1] = {
                /* Muxed on channel 0-3 */
                .bus_id = "aacirx",
                .min_signal = 0,
                .max_signal = 2,
                .muxval = 0x01,
                .periph_buses = PL08X_AHB1 | PL08X_AHB2,
        },

- the virtual channels include:
  - the minimum and maximum index of the DMA request signals into the
    PL08x that can be used with this peripheral.
  - the the register value for the external mux, if any.
  - other PL08x specific data to do with this DMA peripheral

- when a virtual channel is requested by a DMA client, it claims just
  the virtual channel.  No actual hardware resources are assigned,
  and no mappings are setup.

- when a transfer is prepared on a virtual channel, part of the
  preparation is to locate the request signal to be used - and
  platform code is requested to supply that from the description
  associated with the channel (such as the above fragment.)

- the versatile PL08x code looks at min_signal/max_signal, and walks
  this space looking for an unassigned request signal.  If there is
  an external mux associated with this request signal, the mux is
  appropriately programmed.  If a request signal is currently assigned
  the next request signal will be tried until there are no further
  possibilities, when failure occurs.  In that case, the prepare
  function also fails.

- the PL08x driver then knows which request signal is associated with
  the peripheral, and sets up the descriptors to be dependent on this
  request signal.  This mapping must not change until the transfer
  has completed.

- when the descriptor is completed - and freed, the muxing is released
  and the DMA request signal becomes available for other users to make
  use of.

Practically, what this means is that:

(a) we've ensured that all drivers using PL08x do _not_ expect that
descriptors can always be prepared; they must continue to work correctly
when the prepare function returns NULL.

(b) several devices end up sharing the first three request signals
and they're used on an opportunistic basis.  Theoretically, if you have
one of these boards where AACI DMA works, you can be using one of these
DMA requests for audio playback, another for record, and the remaining
one can be used on an opportunistic basis between the second MMCI
interface (should that also work - which I've proven is an impossiblity!)
and the 3rd UART... or the USB if there was a driver for that device.
Arnd Bergmann Jan. 29, 2013, 8:40 p.m. UTC | #21
On Tuesday 29 January 2013, Russell King - ARM Linux wrote:
> Well, how it all works in the PL08x driver at present is:

<snip>

Thanks for the explanations. If I end up implementing the DT support
for pl08x, this will be very helpful. I looked at the git history
for mach-versatile and could not find any of it there, although
the patches were certainly on the mailing list
http://lists.infradead.org/pipermail/linux-arm-kernel/2010-June/017818.html

Do you (or Linus) know what happened to the patch series?

Based on your description, it sounds all doable, but the split
between platform specific code and device driver code would be
different: While the muxing that you describe all takes place
in the get_signal/put_signal functions in platform code, doing
a proper DT binding for the mux would imply moving that into the
pl080 driver itself, at least as a compile-time option for the
driver. Do you think that would be acceptable?

While I guess we could still keep the get_signal/put_signal
callbacks in mach-versatile but drop them for platforms
without a mux, I'm not sure if that would make the overall
driver better or worse.

	Arnd
Linus Walleij Jan. 29, 2013, 9:59 p.m. UTC | #22
On Tue, Jan 29, 2013 at 9:40 PM, Arnd Bergmann <arnd@arndb.de> wrote:

> the patches were certainly on the mailing list
> http://lists.infradead.org/pipermail/linux-arm-kernel/2010-June/017818.html
>
> Do you (or Linus) know what happened to the patch series?

Just stalled. My fault probably, and IIRC Russell sent a
modified version of this patch at one time.

> Based on your description, it sounds all doable, but the split
> between platform specific code and device driver code would be
> different: While the muxing that you describe all takes place
> in the get_signal/put_signal functions in platform code, doing
> a proper DT binding for the mux would imply moving that into the
> pl080 driver itself, at least as a compile-time option for the
> driver. Do you think that would be acceptable?

This is the pushing-to-driver paradigm, and for the legacy
support it will be troublesome as it needs to supply the
base address of the mux etc through the platform data,
and then with an ampersand reference to the syscon
in the dma node.

> While I guess we could still keep the get_signal/put_signal
> callbacks in mach-versatile but drop them for platforms
> without a mux, I'm not sure if that would make the overall
> driver better or worse.

I think that is not the big issue though.

I think the most important thing is to come up
with a OS-neutral description of channels vs
muxes and signals.

Basically all DMA hardware has a mux, but in
many cases it is fused with the DMA controller
itself, and sometimes even hidden from software,
i.e. it seems there is one channel per peripheral,
but if you try to use them all at the same time
something like round-robin bursts can be expected
(in best case).

For each channel, you need to be able to specify
a number of applicable request lines, with an
optional mux component inbetween. And I think
the bindings should be generic, because the problem
is generic.

I tried to express this in DT syntax but failed, I
just don't know how to do that, DT seems to be
very much not about muxes and things that take
optional paths, more about describing how static
electronics are set up.

The basic assumption of a 1:1 mapping between
a peripheral and a channel is bogus - this is true on
some hardware (like the COH901318), but certainly
not all, the Versatile and RealView being the most
problematic examples to date with the separate mux
and all.

Maybe stupid analogy:
http://en.wikipedia.org/wiki/File:JT_Switchboard_770x540.jpg
The actual channels are the cords.
The the sockets are the devices.
The telephonist is the mux.

So looking up a DMA channel for RX for the PL011
can be like trying to find a blue cord to put in the
PL011 socket when the red light goes on for RX
DMA. If no blue cord is available the telephonist can
plug in the handset and say "sorry, failed to find a
free line".

Conversely to TX DMA you may need a red
cord. Having a blue cord at hand will not help
you.

Coding it down in the device tree like in the
example is like plugging the cords in to some
holes at boot time and let the telephonist go
home.

Yours,
Linus Walleij
Viresh Kumar Jan. 30, 2013, 2:04 a.m. UTC | #23
On 29 January 2013 21:51, Arnd Bergmann <arnd@arndb.de> wrote:
> On Tuesday 29 January 2013, Viresh Kumar wrote:
>> On 29 January 2013 19:01, Arnd Bergmann <arnd@arndb.de> wrote:
>> > Ah, good. So I guess the "dma-requests" property should actually
>> > be "16" then.
>>
>> yes, even i was checking on that separately :)
>
> Actually, I just discovered something odd in the
> arch/arm/mach-spear/spear13xx-dma.h file that gets removed
> in the last patch: there, we define request numbers up to
> 32, e.g.
>
> -       SPEAR1310_DMA_REQ_UART2_RX = 14,
> -       SPEAR1310_DMA_REQ_UART2_TX = 15,
> -       SPEAR1310_DMA_REQ_UART5_RX = 16,
> -       SPEAR1310_DMA_REQ_UART5_TX = 17,
>
> What is the meaning of this, if the maximum request number is 15?

I knew you will come to this :)
So, the hardware is like: there are 16 request line slots per master, a
platform can choose to connect same or separate devices to these.

So, these are really 16 per master.

> Ok. Would it be enough to have only one master and one request
> field in the DT dma descriptor then, and have the code figure
> whether to use it as source or destination, based on the
> configuration? Which one should come first? Since you have
> multiple masters per controller, and multiple requests per
> master, it sounds like the cleanest descriptor form would
> be
>
>         <controller master request>;
>
> Or possibly
>
>         <controller master request direction>;
>
> if the direction needs to be known at the time the channel
> is requested.

Its better to keep masters as is. So, that we can use appropriate
masters for peripheral and memory to make the transfer fast.
Arnd Bergmann Jan. 30, 2013, 9:41 a.m. UTC | #24
On Wednesday 30 January 2013, Viresh Kumar wrote:
> On 29 January 2013 21:51, Arnd Bergmann <arnd@arndb.de> wrote:
> > On Tuesday 29 January 2013, Viresh Kumar wrote:
> >> On 29 January 2013 19:01, Arnd Bergmann <arnd@arndb.de> wrote:
> >> > Ah, good. So I guess the "dma-requests" property should actually
> >> > be "16" then.
> >>
> >> yes, even i was checking on that separately :)
> >
> > Actually, I just discovered something odd in the
> > arch/arm/mach-spear/spear13xx-dma.h file that gets removed
> > in the last patch: there, we define request numbers up to
> > 32, e.g.
> >
> > -       SPEAR1310_DMA_REQ_UART2_RX = 14,
> > -       SPEAR1310_DMA_REQ_UART2_TX = 15,
> > -       SPEAR1310_DMA_REQ_UART5_RX = 16,
> > -       SPEAR1310_DMA_REQ_UART5_TX = 17,
> >
> > What is the meaning of this, if the maximum request number is 15?
> 
> I knew you will come to this :)
> So, the hardware is like: there are 16 request line slots per master, a
> platform can choose to connect same or separate devices to these.
> 
> So, these are really 16 per master.

Ok, I see. Do you know how these are numbered in the data sheet?

If the convention is to have subsequent numbers for these in the
hardware description, we should probably just have that single
request number in the binding, too, and calculate the master number
from that. If it lists pairs of request/master number, we should
use pairs in the binding as well, in the same order.

> > Ok. Would it be enough to have only one master and one request
> > field in the DT dma descriptor then, and have the code figure
> > whether to use it as source or destination, based on the
> > configuration? Which one should come first? Since you have
> > multiple masters per controller, and multiple requests per
> > master, it sounds like the cleanest descriptor form would
> > be
> >
> >         <controller master request>;
> >
> > Or possibly
> >
> >         <controller master request direction>;
> >
> > if the direction needs to be known at the time the channel
> > is requested.
> 
> Its better to keep masters as is. So, that we can use appropriate
> masters for peripheral and memory to make the transfer fast.

So you mean keep the format as

	<controller request src-master dst-master>;

?

	Arnd
Viresh Kumar Jan. 30, 2013, 9:48 a.m. UTC | #25
On Wed, Jan 30, 2013 at 3:11 PM, Arnd Bergmann <arnd@arndb.de> wrote:
> On Wednesday 30 January 2013, Viresh Kumar wrote:
>> I knew you will come to this :)
>> So, the hardware is like: there are 16 request line slots per master, a
>> platform can choose to connect same or separate devices to these.
>>
>> So, these are really 16 per master.
>
> Ok, I see. Do you know how these are numbered in the data sheet?
>
> If the convention is to have subsequent numbers for these in the
> hardware description, we should probably just have that single
> request number in the binding, too, and calculate the master number
> from that. If it lists pairs of request/master number, we should
> use pairs in the binding as well, in the same order.

Actually what would be better to have is:
- have this range from 0-15 only
- together with the master we want to use for peripheral

this should be enough.

Datasheet of dw_dmac doesn't tell much about it.. just four bits for programming
it and so values are from 0-15 :)

>> > Ok. Would it be enough to have only one master and one request
>> > field in the DT dma descriptor then, and have the code figure
>> > whether to use it as source or destination, based on the
>> > configuration? Which one should come first? Since you have
>> > multiple masters per controller, and multiple requests per
>> > master, it sounds like the cleanest descriptor form would
>> > be
>> >
>> >         <controller master request>;
>> >
>> > Or possibly
>> >
>> >         <controller master request direction>;
>> >
>> > if the direction needs to be known at the time the channel
>> > is requested.
>>
>> Its better to keep masters as is. So, that we can use appropriate
>> masters for peripheral and memory to make the transfer fast.
>
> So you mean keep the format as
>
>         <controller request src-master dst-master>;
>
> ?

Yes..
Arnd Bergmann Jan. 30, 2013, 10:08 a.m. UTC | #26
On Wednesday 30 January 2013, Viresh Kumar wrote:
> On Wed, Jan 30, 2013 at 3:11 PM, Arnd Bergmann <arnd@arndb.de> wrote:
> > On Wednesday 30 January 2013, Viresh Kumar wrote:
> >> I knew you will come to this :)
> >> So, the hardware is like: there are 16 request line slots per master, a
> >> platform can choose to connect same or separate devices to these.
> >>
> >> So, these are really 16 per master.
> >
> > Ok, I see. Do you know how these are numbered in the data sheet?
> >
> > If the convention is to have subsequent numbers for these in the
> > hardware description, we should probably just have that single
> > request number in the binding, too, and calculate the master number
> > from that. If it lists pairs of request/master number, we should
> > use pairs in the binding as well, in the same order.
> 
> Actually what would be better to have is:
> - have this range from 0-15 only
> - together with the master we want to use for peripheral
>
> this should be enough.

Ok.
 
> Datasheet of dw_dmac doesn't tell much about it.. just four bits for programming
> it and so values are from 0-15 :)

I meant the spear13xx data sheet, which has to list the request lines
for its integrated components. There may be other SoCs using the
same dw_dmac, but this is the main one that is upstream now, and it's
probably as good as any other one. I just wouldn't want to establish
a binding that doesn't match any of the known implementations in the
way it expresses request lines.

	Arnd
Viresh Kumar Jan. 30, 2013, 10:32 a.m. UTC | #27
On 30 January 2013 15:38, Arnd Bergmann <arnd@arndb.de> wrote:
> I meant the spear13xx data sheet, which has to list the request lines
> for its integrated components. There may be other SoCs using the
> same dw_dmac, but this is the main one that is upstream now, and it's
> probably as good as any other one. I just wouldn't want to establish
> a binding that doesn't match any of the known implementations in the
> way it expresses request lines.

Your binding (<controller request-line src-master dst-master) is good
enough for SPEAr :)

SPEAr13xx is a bit complex. Request lines are distributed among masters.
+ there is multiplexing among them.. over that there are two controllers..

But yes, your binding is good enough :)
Andy Shevchenko Feb. 15, 2013, 8:50 a.m. UTC | #28
On Mon, 2013-01-28 at 21:58 +0000, Arnd Bergmann wrote: 
> The original device tree binding for this driver, from Viresh Kumar
> unfortunately conflicted with the generic DMA binding, and did not allow
> to completely seperate slave device configuration from the controller.
> 
> This is an attempt to replace it with an implementation of the generic
> binding, but it is currently completely untested, because I do not have
> any hardware with this particular controller.
> 
> The patch applies on top of linux-next, which contains both the base
> support for the generic DMA binding, as well as the earlier attempt from
> Viresh. Both of these are currently not merged upstream however.
> 
> There are a couple of TODO items that are left remaining and are open
> for ideas from other people.

Have one question and one comment.

So, what is the status of this work? Do you manage to provide something
for v3.9? (Oh, two questions :-) )

> --- a/drivers/dma/dw_dmac.c
> +++ b/drivers/dma/dw_dmac.c

> @@ -1765,7 +1753,11 @@ static int dw_probe(struct platform_device *pdev)
>  
>  	dma_async_device_register(&dw->dma);
>  
> -	return 0;
> +	err = of_dma_controller_register(pdev->dev.of_node, dw_dma_xlate, dw);
> +	if (err)
> +		dma_async_device_unregister(&dw->dma);

I don't think this is a good idea. The impossibility to register in the
of-dma helper is not critical. Just printing debug message is enough.
Arnd Bergmann Feb. 15, 2013, 11:17 a.m. UTC | #29
On Friday 15 February 2013, Andy Shevchenko wrote:
> Have one question and one comment.
> 
> So, what is the status of this work? Do you manage to provide something
> for v3.9? (Oh, two questions :-) )

I was going to bring it up today myself. Unfortunately the patches have
never been tested on real hardware and I have not posted a version that
includes the feedback I got, so I don't think it's a good idea to
use this in v3.9. However, not doing it causes problems since Vinod's
dma-slave tree still contains Viresh's earlier patches, causing
a few problems:

* With these patches, the spear3xx platform currently does not build.
  (this one is easy to fix though)
* There is a conflict between these patches and my spear multiplatform
  series, which I have not yet queued up for 3.9 because of this,
  since that would have meant that Stephen Rothwell would have to
  discard either the arm-soc tree or the dma-slave tree from linux-next.
* I really don't want the broken binding to appear in 3.9.

I believe the best way out at this point is that I do an updated
version of my patch, and Vinod first reverts the patch f9965aa20
"ARM: SPEAr13xx: Pass DW DMAC platform data from DT" in his tree
and then applies my update. This will give us the right DT binding
for dw-dmac but no in-tree users, which means that nothing breaks
if I get it wrong.

I can then decide with Olof whether or not to take the spear multiplatform
changes that no longer conflict with the dma slave tree as a "late"
branch into 3.9 or wait until 3.10, but that is something you don't
need to worry about then. Also the conversion of spear to use
the new binding (patch 5 of this series) can go through the arm-soc
tree for 3.10 after the ST folks have tested that it works.

> > --- a/drivers/dma/dw_dmac.c
> > +++ b/drivers/dma/dw_dmac.c
> 
> > @@ -1765,7 +1753,11 @@ static int dw_probe(struct platform_device *pdev)
> >  
> >       dma_async_device_register(&dw->dma);
> >  
> > -     return 0;
> > +     err = of_dma_controller_register(pdev->dev.of_node, dw_dma_xlate, dw);
> > +     if (err)
> > +             dma_async_device_unregister(&dw->dma);
> 
> I don't think this is a good idea. The impossibility to register in the
> of-dma helper is not critical. Just printing debug message is enough.

Ok, makes sense. Thanks!

	Arnd
diff mbox

Patch

diff --git a/Documentation/devicetree/bindings/dma/snps-dma.txt b/Documentation/devicetree/bindings/dma/snps-dma.txt
index 5bb3dfb..212d387 100644
--- a/Documentation/devicetree/bindings/dma/snps-dma.txt
+++ b/Documentation/devicetree/bindings/dma/snps-dma.txt
@@ -3,59 +3,62 @@ 
 Required properties:
 - compatible: "snps,dma-spear1340"
 - reg: Address range of the DMAC registers
-- interrupt-parent: Should be the phandle for the interrupt controller
-  that services interrupts for this device
 - interrupt: Should contain the DMAC interrupt number
-- nr_channels: Number of channels supported by hardware
-- is_private: The device channels should be marked as private and not for by the
-  general purpose DMA channel allocator. False if not passed.
+- dma-channels: Number of channels supported by hardware
+- dma-requests: Number of DMA request lines supported
+- dma-masters: Number of AHB masters supported by the controller
+- #dma-cells: must be <3>
 - chan_allocation_order: order of allocation of channel, 0 (default): ascending,
   1: descending
 - chan_priority: priority of channels. 0 (default): increase from chan 0->n, 1:
   increase from chan n->0
 - block_size: Maximum block size supported by the controller
-- nr_masters: Number of AHB masters supported by the controller
 - data_width: Maximum data width supported by hardware per AHB master
   (0 - 8bits, 1 - 16bits, ..., 5 - 256bits)
-- slave_info:
-	- bus_id: name of this device channel, not just a device name since
-	  devices may have more than one channel e.g. "foo_tx". For using the
-	  dw_generic_filter(), slave drivers must pass exactly this string as
-	  param to filter function.
-	- cfg_hi: Platform-specific initializer for the CFG_HI register
-	- cfg_lo: Platform-specific initializer for the CFG_LO register
-	- src_master: src master for transfers on allocated channel.
-	- dst_master: dest master for transfers on allocated channel.
+
+
+Optional properties:
+- interrupt-parent: Should be the phandle for the interrupt controller
+  that services interrupts for this device
+- is_private: The device channels should be marked as private and not for by the
+  general purpose DMA channel allocator. False if not passed.
 
 Example:
 
-	dma@fc000000 {
+	dmahost: dma@fc000000 {
 		compatible = "snps,dma-spear1340";
 		reg = <0xfc000000 0x1000>;
 		interrupt-parent = <&vic1>;
 		interrupts = <12>;
 
-		nr_channels = <8>;
+		dma-channels = <8>;
+		dma-requests = <32>;
+		dma-masters = <2>;
+		#dma-cells = <3>;
 		chan_allocation_order = <1>;
 		chan_priority = <1>;
 		block_size = <0xfff>;
-		nr_masters = <2>;
 		data_width = <3 3 0 0>;
+	};
 
-		slave_info {
-			uart0-tx {
-				bus_id = "uart0-tx";
-				cfg_hi = <0x4000>;	/* 0x8 << 11 */
-				cfg_lo = <0>;
-				src_master = <0>;
-				dst_master = <1>;
-			};
-			spi0-tx {
-				bus_id = "spi0-tx";
-				cfg_hi = <0x2000>;	/* 0x4 << 11 */
-				cfg_lo = <0>;
-				src_master = <0>;
-				dst_master = <0>;
-			};
-		};
+DMA clients connected to the Designware DMA controller must use the format
+described in the dma.txt file, using a five-cell specifier for each channel.
+The five cells in order are:
+
+1. A phandle pointing to the DMA controller
+2. The value for the cfg_hi register.
+3. The value for the cfg_lo register.
+4. Source master for transfers on allocated channel.
+5. Destination master for transfers on allocated channel.
+
+Example:
+	
+	serial@e0000000 {
+		compatible = "arm,pl011", "arm,primecell";
+		reg = <0xe0000000 0x1000>;
+		interrupts = <0 35 0x4>;
+		status = "disabled";
+		dmas = <&dmahost 0x6000 0 0 1>,
+			<&dmahost 0x680 0 1 0>;
+		dma-names = "rx", "rx";
 	};
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 8cfaaf8..88504c2 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -18,6 +18,7 @@ 
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/of.h>
+#include <linux/of_dma.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
@@ -1179,49 +1180,69 @@  static void dwc_free_chan_resources(struct dma_chan *chan)
 	dev_vdbg(chan2dev(chan), "%s: done\n", __func__);
 }
 
-bool dw_dma_generic_filter(struct dma_chan *chan, void *param)
+/* forward declaration used in filter */
+static struct platform_driver dw_driver;
+
+struct dw_dma_filter_args {
+	struct dw_dma *dw;
+	u32 cfg_lo;
+	u32 cfg_hi;
+	unsigned src;
+	unsigned dst;
+};
+
+static bool dw_dma_generic_filter(struct dma_chan *chan, void *param)
 {
 	struct dw_dma *dw = to_dw_dma(chan->device);
-	static struct dw_dma *last_dw;
-	static char *last_bus_id;
-	int i = -1;
+	struct dw_dma_filter_args *fargs = param;
+	struct dw_dma_slave *sd;
 
-	/*
-	 * dmaengine framework calls this routine for all channels of all dma
-	 * controller, until true is returned. If 'param' bus_id is not
-	 * registered with a dma controller (dw), then there is no need of
-	 * running below function for all channels of dw.
-	 *
-	 * This block of code does this by saving the parameters of last
-	 * failure. If dw and param are same, i.e. trying on same dw with
-	 * different channel, return false.
-	 */
-	if ((last_dw == dw) && (last_bus_id == param))
+	/* both the driver and the device must match */
+        if (chan->device->dev->driver != &dw_driver.driver)
+                return false;
+	if (dw != fargs->dw)
 		return false;
-	/*
-	 * Return true:
-	 * - If dw_dma's platform data is not filled with slave info, then all
-	 *   dma controllers are fine for transfer.
-	 * - Or if param is NULL
-	 */
-	if (!dw->sd || !param)
-		return true;
 
-	while (++i < dw->sd_count) {
-		if (!strcmp(dw->sd[i].bus_id, param)) {
-			chan->private = &dw->sd[i];
-			last_dw = NULL;
-			last_bus_id = NULL;
+	/* FIXME: memory leak! could we put this into dw_dma_chan? */
+	sd = devm_kzalloc(dw->dma.dev, sizeof (*sd), GFP_KERNEL);
+	if (!sd)
+		return false;
 
-			return true;
-		}
-	}
+	sd->dma_dev	= dw->dma.dev;
+	sd->cfg_hi	= fargs->cfg_hi;
+	sd->cfg_lo	= fargs->cfg_lo;
+	sd->src_master	= fargs->src;
+	sd->dst_master	= fargs->dst;
+
+	chan->private = sd;
 
-	last_dw = dw;
-	last_bus_id = param;
-	return false;
+	return true;
+}
+
+static struct dma_chan *dw_dma_xlate(struct of_phandle_args *dma_spec,
+					 struct of_dma *ofdma)
+{
+	struct dw_dma *dw = ofdma->of_dma_data;
+	struct dw_dma_filter_args fargs = {
+		.dw = dw,
+	};
+	dma_cap_mask_t cap;
+
+	if (dma_spec->args_count != 4)
+		return NULL;
+
+	/* FIXME: This binding is rather clumsy. Can't we use the
+	   request line numbers here instead? */
+	fargs.cfg_lo = be32_to_cpup(dma_spec->args+0);
+	fargs.cfg_hi = be32_to_cpup(dma_spec->args+1);
+	fargs.src = be32_to_cpup(dma_spec->args+2);
+	fargs.dst = be32_to_cpup(dma_spec->args+3);
+
+	dma_cap_zero(cap);
+	dma_cap_set(DMA_SLAVE, cap);
+	/* FIXME: there should be a simpler way to do this */
+	return dma_request_channel(cap, dw_dma_generic_filter, &dma_spec->args[0]);
 }
-EXPORT_SYMBOL(dw_dma_generic_filter);
 
 /* --------------------- Cyclic DMA API extensions -------------------- */
 
@@ -1510,9 +1531,8 @@  static void dw_dma_off(struct dw_dma *dw)
 static struct dw_dma_platform_data *
 dw_dma_parse_dt(struct platform_device *pdev)
 {
-	struct device_node *sn, *cn, *np = pdev->dev.of_node;
+	struct device_node *np = pdev->dev.of_node;
 	struct dw_dma_platform_data *pdata;
-	struct dw_dma_slave *sd;
 	u32 tmp, arr[4];
 
 	if (!np) {
@@ -1524,7 +1544,7 @@  dw_dma_parse_dt(struct platform_device *pdev)
 	if (!pdata)
 		return NULL;
 
-	if (of_property_read_u32(np, "nr_channels", &pdata->nr_channels))
+	if (of_property_read_u32(np, "dma-channels", &pdata->nr_channels))
 		return NULL;
 
 	if (of_property_read_bool(np, "is_private"))
@@ -1539,7 +1559,7 @@  dw_dma_parse_dt(struct platform_device *pdev)
 	if (!of_property_read_u32(np, "block_size", &tmp))
 		pdata->block_size = tmp;
 
-	if (!of_property_read_u32(np, "nr_masters", &tmp)) {
+	if (!of_property_read_u32(np, "dma-masters", &tmp)) {
 		if (tmp > 4)
 			return NULL;
 
@@ -1551,36 +1571,6 @@  dw_dma_parse_dt(struct platform_device *pdev)
 		for (tmp = 0; tmp < pdata->nr_masters; tmp++)
 			pdata->data_width[tmp] = arr[tmp];
 
-	/* parse slave data */
-	sn = of_find_node_by_name(np, "slave_info");
-	if (!sn)
-		return pdata;
-
-	/* calculate number of slaves */
-	tmp = of_get_child_count(sn);
-	if (!tmp)
-		return NULL;
-
-	sd = devm_kzalloc(&pdev->dev, sizeof(*sd) * tmp, GFP_KERNEL);
-	if (!sd)
-		return NULL;
-
-	pdata->sd = sd;
-	pdata->sd_count = tmp;
-
-	for_each_child_of_node(sn, cn) {
-		sd->dma_dev = &pdev->dev;
-		of_property_read_string(cn, "bus_id", &sd->bus_id);
-		of_property_read_u32(cn, "cfg_hi", &sd->cfg_hi);
-		of_property_read_u32(cn, "cfg_lo", &sd->cfg_lo);
-		if (!of_property_read_u32(cn, "src_master", &tmp))
-			sd->src_master = tmp;
-
-		if (!of_property_read_u32(cn, "dst_master", &tmp))
-			sd->dst_master = tmp;
-		sd++;
-	}
-
 	return pdata;
 }
 #else
@@ -1644,8 +1634,6 @@  static int dw_probe(struct platform_device *pdev)
 	clk_prepare_enable(dw->clk);
 
 	dw->regs = regs;
-	dw->sd = pdata->sd;
-	dw->sd_count = pdata->sd_count;
 
 	/* get hardware configuration parameters */
 	if (autocfg) {
@@ -1765,7 +1753,11 @@  static int dw_probe(struct platform_device *pdev)
 
 	dma_async_device_register(&dw->dma);
 
-	return 0;
+	err = of_dma_controller_register(pdev->dev.of_node, dw_dma_xlate, dw);
+	if (err)
+		dma_async_device_unregister(&dw->dma);
+
+	return err;
 }
 
 static int dw_remove(struct platform_device *pdev)
@@ -1773,6 +1765,7 @@  static int dw_remove(struct platform_device *pdev)
 	struct dw_dma		*dw = platform_get_drvdata(pdev);
 	struct dw_dma_chan	*dwc, *_dwc;
 
+	of_dma_controller_free(pdev->dev.of_node);
 	dw_dma_off(dw);
 	dma_async_device_unregister(&dw->dma);
 
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
index 88a069f..8896559 100644
--- a/drivers/dma/dw_dmac_regs.h
+++ b/drivers/dma/dw_dmac_regs.h
@@ -239,10 +239,6 @@  struct dw_dma {
 	struct tasklet_struct	tasklet;
 	struct clk		*clk;
 
-	/* slave information */
-	struct dw_dma_slave	*sd;
-	unsigned int		sd_count;
-
 	u8			all_chan_mask;
 
 	/* hardware configuration */
diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h
index 41766de..481ab23 100644
--- a/include/linux/dw_dmac.h
+++ b/include/linux/dw_dmac.h
@@ -27,7 +27,6 @@ 
  */
 struct dw_dma_slave {
 	struct device		*dma_dev;
-	const char		*bus_id;
 	u32			cfg_hi;
 	u32			cfg_lo;
 	u8			src_master;
@@ -60,9 +59,6 @@  struct dw_dma_platform_data {
 	unsigned short	block_size;
 	unsigned char	nr_masters;
 	unsigned char	data_width[4];
-
-	struct dw_dma_slave *sd;
-	unsigned int sd_count;
 };
 
 /* bursts size */
@@ -114,6 +110,5 @@  void dw_dma_cyclic_stop(struct dma_chan *chan);
 dma_addr_t dw_dma_get_src_addr(struct dma_chan *chan);
 
 dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan);
-bool dw_dma_generic_filter(struct dma_chan *chan, void *param);
 
 #endif /* DW_DMAC_H */