diff mbox

[1/2] dmaengine: rcar-dmac: add iommu support for slave transfers

Message ID 1452478667-30966-2-git-send-email-niklas.soderlund+renesas@ragnatech.se (mailing list archive)
State Superseded
Delegated to: Geert Uytterhoeven
Headers show

Commit Message

Niklas Söderlund Jan. 11, 2016, 2:17 a.m. UTC
Enable slave transfers to devices behind IPMMU:s by mapping the slave
addresses using the dma-mapping API.

Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
---
 drivers/dma/sh/rcar-dmac.c | 64 +++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 60 insertions(+), 4 deletions(-)

Comments

kernel test robot Jan. 11, 2016, 2:37 a.m. UTC | #1
Hi Niklas,

[auto build test ERROR on renesas/next]
[also build test ERROR on v4.4 next-20160108]
[if your patch is applied to the wrong git tree, please drop us a note to help improving the system]

url:    https://github.com/0day-ci/linux/commits/Niklas-S-derlund/dmaengine-rcar-dmac-add-iommu-support-for-slave-transfers/20160111-102813
base:   https://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas.git next
config: xtensa-allyesconfig (attached as .config)
reproduce:
        wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=xtensa 

All error/warnings (new ones prefixed by >>):

   drivers/dma/sh/rcar-dmac.c: In function '__rcar_dmac_dma_map':
>> drivers/dma/sh/rcar-dmac.c:1109:9: error: implicit declaration of function 'phys_to_page' [-Werror=implicit-function-declaration]
     struct page *page = phys_to_page(addr);
            ^
>> drivers/dma/sh/rcar-dmac.c:1109:22: warning: initialization makes pointer from integer without a cast
     struct page *page = phys_to_page(addr);
                         ^
   cc1: some warnings being treated as errors

vim +/phys_to_page +1109 drivers/dma/sh/rcar-dmac.c

  1103	}
  1104	
  1105	static dma_addr_t __rcar_dmac_dma_map(struct dma_chan *chan, phys_addr_t addr,
  1106			size_t size, enum dma_data_direction dir)
  1107	{
  1108		struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
> 1109		struct page *page = phys_to_page(addr);
  1110		size_t offset = addr - page_to_phys(page);
  1111		dma_addr_t map = dma_map_page(chan->device->dev, page, offset, size,
  1112				dir);

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
Geert Uytterhoeven Jan. 11, 2016, 7:55 a.m. UTC | #2
Hi Niklas,

On Mon, Jan 11, 2016 at 3:17 AM, Niklas Söderlund
<niklas.soderlund+renesas@ragnatech.se> wrote:
> --- a/drivers/dma/sh/rcar-dmac.c
> +++ b/drivers/dma/sh/rcar-dmac.c
> @@ -13,6 +13,7 @@
>  #include <linux/dma-mapping.h>
>  #include <linux/dmaengine.h>
>  #include <linux/interrupt.h>
> +#include <linux/iommu.h>
>  #include <linux/list.h>
>  #include <linux/module.h>
>  #include <linux/mutex.h>
> @@ -1101,6 +1102,24 @@ rcar_dmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
>         return desc;
>  }
>
> +static dma_addr_t __rcar_dmac_dma_map(struct dma_chan *chan, phys_addr_t addr,
> +               size_t size, enum dma_data_direction dir)
> +{
> +       struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
> +       struct page *page = phys_to_page(addr);
> +       size_t offset = addr - page_to_phys(page);
> +       dma_addr_t map = dma_map_page(chan->device->dev, page, offset, size,
> +                       dir);
> +
> +       if (dma_mapping_error(chan->device->dev, map)) {
> +               dev_err(chan->device->dev, "chan%u: failed to map %zx@%pap",
> +                               rchan->index, size, &addr);
> +               return 0;

Zero may be a valid DMA address. Please use DMA_ERROR_CODE instead.

> +       }
> +
> +       return map;
> +}

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
--
To unsubscribe from this list: send the line "unsubscribe linux-sh" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Niklas Söderlund Jan. 11, 2016, 6:59 p.m. UTC | #3
* Geert Uytterhoeven <geert@linux-m68k.org> [2016-01-11 08:55:48 +0100]:

> Hi Niklas,
>
> On Mon, Jan 11, 2016 at 3:17 AM, Niklas Söderlund
> <niklas.soderlund+renesas@ragnatech.se> wrote:
> > --- a/drivers/dma/sh/rcar-dmac.c
> > +++ b/drivers/dma/sh/rcar-dmac.c
> > @@ -13,6 +13,7 @@
> >  #include <linux/dma-mapping.h>
> >  #include <linux/dmaengine.h>
> >  #include <linux/interrupt.h>
> > +#include <linux/iommu.h>
> >  #include <linux/list.h>
> >  #include <linux/module.h>
> >  #include <linux/mutex.h>
> > @@ -1101,6 +1102,24 @@ rcar_dmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
> >         return desc;
> >  }
> >
> > +static dma_addr_t __rcar_dmac_dma_map(struct dma_chan *chan, phys_addr_t addr,
> > +               size_t size, enum dma_data_direction dir)
> > +{
> > +       struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
> > +       struct page *page = phys_to_page(addr);
> > +       size_t offset = addr - page_to_phys(page);
> > +       dma_addr_t map = dma_map_page(chan->device->dev, page, offset, size,
> > +                       dir);
> > +
> > +       if (dma_mapping_error(chan->device->dev, map)) {
> > +               dev_err(chan->device->dev, "chan%u: failed to map %zx@%pap",
> > +                               rchan->index, size, &addr);
> > +               return 0;
>
> Zero may be a valid DMA address. Please use DMA_ERROR_CODE instead.

Thanks will fix together with the issue kbuild test robot found.

>
> > +       }
> > +
> > +       return map;
> > +}
>
> Gr{oetje,eeting}s,
>
>                         Geert
>
> --
> Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
>
> In personal conversations with technical people, I call myself a hacker. But
> when I'm talking to journalists I just say "programmer" or something like that.
>                                 -- Linus Torvalds
--
To unsubscribe from this list: send the line "unsubscribe linux-sh" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vinod Koul Jan. 13, 2016, 1:36 p.m. UTC | #4
On Mon, Jan 11, 2016 at 03:17:46AM +0100, Niklas Söderlund wrote:
> Enable slave transfers to devices behind IPMMU:s by mapping the slave
> addresses using the dma-mapping API.
> 
> Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
> ---
>  drivers/dma/sh/rcar-dmac.c | 64 +++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 60 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
> index 7820d07..da94809 100644
> --- a/drivers/dma/sh/rcar-dmac.c
> +++ b/drivers/dma/sh/rcar-dmac.c
> @@ -13,6 +13,7 @@
>  #include <linux/dma-mapping.h>
>  #include <linux/dmaengine.h>
>  #include <linux/interrupt.h>
> +#include <linux/iommu.h>
>  #include <linux/list.h>
>  #include <linux/module.h>
>  #include <linux/mutex.h>
> @@ -1101,6 +1102,24 @@ rcar_dmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
>  	return desc;
>  }
>  
> +static dma_addr_t __rcar_dmac_dma_map(struct dma_chan *chan, phys_addr_t addr,
> +		size_t size, enum dma_data_direction dir)
> +{
> +	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
> +	struct page *page = phys_to_page(addr);
> +	size_t offset = addr - page_to_phys(page);
> +	dma_addr_t map = dma_map_page(chan->device->dev, page, offset, size,
> +			dir);

Hmmmm, dmaengine APIs for slave cases expect that client has already
ammped and provided an address which the dmaengine understands. So doing
this in driver here does not sound good to me
Niklas Söderlund Jan. 13, 2016, 1:55 p.m. UTC | #5
* Vinod Koul <vinod.koul@intel.com> [2016-01-13 19:06:01 +0530]:

> On Mon, Jan 11, 2016 at 03:17:46AM +0100, Niklas Söderlund wrote:
> > Enable slave transfers to devices behind IPMMU:s by mapping the slave
> > addresses using the dma-mapping API.
> >
> > Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
> > ---
> >  drivers/dma/sh/rcar-dmac.c | 64 +++++++++++++++++++++++++++++++++++++++++++---
> >  1 file changed, 60 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
> > index 7820d07..da94809 100644
> > --- a/drivers/dma/sh/rcar-dmac.c
> > +++ b/drivers/dma/sh/rcar-dmac.c
> > @@ -13,6 +13,7 @@
> >  #include <linux/dma-mapping.h>
> >  #include <linux/dmaengine.h>
> >  #include <linux/interrupt.h>
> > +#include <linux/iommu.h>
> >  #include <linux/list.h>
> >  #include <linux/module.h>
> >  #include <linux/mutex.h>
> > @@ -1101,6 +1102,24 @@ rcar_dmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
> >  	return desc;
> >  }
> >
> > +static dma_addr_t __rcar_dmac_dma_map(struct dma_chan *chan, phys_addr_t addr,
> > +		size_t size, enum dma_data_direction dir)
> > +{
> > +	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
> > +	struct page *page = phys_to_page(addr);
> > +	size_t offset = addr - page_to_phys(page);
> > +	dma_addr_t map = dma_map_page(chan->device->dev, page, offset, size,
> > +			dir);
>
> Hmmmm, dmaengine APIs for slave cases expect that client has already
> ammped and provided an address which the dmaengine understands. So doing
> this in driver here does not sound good to me

It was my understanding that clients do not do this mapping and in fact
are expected not to. Is this not what Linus Walleij is trying to address
in '[PATCH] dmaengine: use phys_addr_t for slave configuration'?

 >> On Fri, Apr 26, 2013 at 11:06 AM, Linus Walleij
 >> <linus.walleij@linaro.org> wrote:
 >> > The documentation already says these are physical addresses, and
 >> > we have concluded that any translation into the DMA address space
 >> > needs to reside in the dmaengine driver, so change the type of
 >> > the passed arguments

--
// Niklas
--
To unsubscribe from this list: send the line "unsubscribe linux-sh" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Laurent Pinchart Jan. 13, 2016, 11:13 p.m. UTC | #6
Hi Vinod,

(CC'ing Linus as he's mentioned)

On Wednesday 13 January 2016 14:55:50 Niklas Söderlund wrote:
> * Vinod Koul <vinod.koul@intel.com> [2016-01-13 19:06:01 +0530]:
> > On Mon, Jan 11, 2016 at 03:17:46AM +0100, Niklas Söderlund wrote:
> >> Enable slave transfers to devices behind IPMMU:s by mapping the slave
> >> addresses using the dma-mapping API.
> >> 
> >> Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
> >> ---
> >> 
> >>  drivers/dma/sh/rcar-dmac.c | 64 +++++++++++++++++++++++++++++++++++++---
> >>  1 file changed, 60 insertions(+), 4 deletions(-)
> >> 
> >> diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
> >> index 7820d07..da94809 100644
> >> --- a/drivers/dma/sh/rcar-dmac.c
> >> +++ b/drivers/dma/sh/rcar-dmac.c
> >> @@ -13,6 +13,7 @@
> >>  #include <linux/dma-mapping.h>
> >>  #include <linux/dmaengine.h>
> >>  #include <linux/interrupt.h>
> >> +#include <linux/iommu.h>
> >>  #include <linux/list.h>
> >>  #include <linux/module.h>
> >>  #include <linux/mutex.h>
> >> @@ -1101,6 +1102,24 @@ rcar_dmac_prep_dma_cyclic(struct dma_chan *chan,
> >> dma_addr_t buf_addr,
> >>  	return desc;
> >>  }
> >> 
> >> +static dma_addr_t __rcar_dmac_dma_map(struct dma_chan *chan,
> >> phys_addr_t addr,
> >> +		size_t size, enum dma_data_direction dir)
> >> +{
> >> +	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
> >> +	struct page *page = phys_to_page(addr);
> >> +	size_t offset = addr - page_to_phys(page);
> >> +	dma_addr_t map = dma_map_page(chan->device->dev, page, offset, size,
> >> +			dir);
> > 
> > Hmmmm, dmaengine APIs for slave cases expect that client has already
> > ammped and provided an address which the dmaengine understands. So doing
> > this in driver here does not sound good to me
> 
> It was my understanding that clients do not do this mapping and in fact
> are expected not to. Is this not what Linus Walleij is trying to address
> in '[PATCH] dmaengine: use phys_addr_t for slave configuration'?

There's a problem somewhere and we need to fix it. Clients currently pass 
physical addresses and the DMA engine API expects a DMA address. There's only 
two ways to fix that, either modify the API to expect a phys_addr_t, or modify 
the clients to provide a dma_addr_t.

The struct device used to map buffer through the DMA mapping API needs to be 
the DMA engine struct device, not the client struct device. As the client is 
not expected to have access to the DMA engine device I would argue that DMA 
engines should perform the mapping and the API should take a phys_addr_t.

Vinod, unless you have reasons to do it otherwise, can we get your ack on this 
approach and start hammering at the code ? The problem has remained known and 
unfixed for too long, we need to move on.

> >> On Fri, Apr 26, 2013 at 11:06 AM, Linus Walleij wrote:
> >>> The documentation already says these are physical addresses, and
> >>> we have concluded that any translation into the DMA address space
> >>> needs to reside in the dmaengine driver, so change the type of
> >>> the passed arguments
Laurent Pinchart Jan. 13, 2016, 11:15 p.m. UTC | #7
(Again with Linus' e-mail address fixed, sorry for the noise)

On Thursday 14 January 2016 01:13:20 Laurent Pinchart wrote:
> Hi Vinod,
> 
> (CC'ing Linus as he's mentioned)
> 
> On Wednesday 13 January 2016 14:55:50 Niklas Söderlund wrote:
> > * Vinod Koul <vinod.koul@intel.com> [2016-01-13 19:06:01 +0530]:
> >> On Mon, Jan 11, 2016 at 03:17:46AM +0100, Niklas Söderlund wrote:
> >>> Enable slave transfers to devices behind IPMMU:s by mapping the slave
> >>> addresses using the dma-mapping API.
> >>> 
> >>> Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
> >>> ---
> >>> 
> >>>  drivers/dma/sh/rcar-dmac.c | 64 +++++++++++++++++++++++++++++++++++---
> >>>  1 file changed, 60 insertions(+), 4 deletions(-)
> >>> 
> >>> diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
> >>> index 7820d07..da94809 100644
> >>> --- a/drivers/dma/sh/rcar-dmac.c
> >>> +++ b/drivers/dma/sh/rcar-dmac.c
> >>> @@ -13,6 +13,7 @@
> >>>  #include <linux/dma-mapping.h>
> >>>  #include <linux/dmaengine.h>
> >>>  #include <linux/interrupt.h>
> >>> +#include <linux/iommu.h>
> >>>  #include <linux/list.h>
> >>>  #include <linux/module.h>
> >>>  #include <linux/mutex.h>
> >>> @@ -1101,6 +1102,24 @@ rcar_dmac_prep_dma_cyclic(struct dma_chan *chan,
> >>> dma_addr_t buf_addr,
> >>>  	return desc;
> >>>  }
> >>> 
> >>> +static dma_addr_t __rcar_dmac_dma_map(struct dma_chan *chan,
> >>> phys_addr_t addr,
> >>> +		size_t size, enum dma_data_direction dir)
> >>> +{
> >>> +	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
> >>> +	struct page *page = phys_to_page(addr);
> >>> +	size_t offset = addr - page_to_phys(page);
> >>> +	dma_addr_t map = dma_map_page(chan->device->dev, page, offset, size,
> >>> +			dir);
> >> 
> >> Hmmmm, dmaengine APIs for slave cases expect that client has already
> >> ammped and provided an address which the dmaengine understands. So doing
> >> this in driver here does not sound good to me
> > 
> > It was my understanding that clients do not do this mapping and in fact
> > are expected not to. Is this not what Linus Walleij is trying to address
> > in '[PATCH] dmaengine: use phys_addr_t for slave configuration'?
> 
> There's a problem somewhere and we need to fix it. Clients currently pass
> physical addresses and the DMA engine API expects a DMA address. There's
> only two ways to fix that, either modify the API to expect a phys_addr_t,
> or modify the clients to provide a dma_addr_t.
> 
> The struct device used to map buffer through the DMA mapping API needs to be
> the DMA engine struct device, not the client struct device. As the client
> is not expected to have access to the DMA engine device I would argue that
> DMA engines should perform the mapping and the API should take a
> phys_addr_t.
> 
> Vinod, unless you have reasons to do it otherwise, can we get your ack on
> this approach and start hammering at the code ? The problem has remained
> known and unfixed for too long, we need to move on.
> 
> >>> On Fri, Apr 26, 2013 at 11:06 AM, Linus Walleij wrote:
> >>>> The documentation already says these are physical addresses, and
> >>>> we have concluded that any translation into the DMA address space
> >>>> needs to reside in the dmaengine driver, so change the type of
> >>>> the passed arguments
Laurent Pinchart Jan. 13, 2016, 11:37 p.m. UTC | #8
Hi Niklas,

Thank you for the patch, and welcome to the hairy details of the DMA mapping 
API :-)

On Monday 11 January 2016 03:17:46 Niklas Söderlund wrote:
> Enable slave transfers to devices behind IPMMU:s by mapping the slave
> addresses using the dma-mapping API.
> 
> Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
> ---
>  drivers/dma/sh/rcar-dmac.c | 64 ++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 60 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
> index 7820d07..da94809 100644
> --- a/drivers/dma/sh/rcar-dmac.c
> +++ b/drivers/dma/sh/rcar-dmac.c
> @@ -13,6 +13,7 @@
>  #include <linux/dma-mapping.h>
>  #include <linux/dmaengine.h>
>  #include <linux/interrupt.h>
> +#include <linux/iommu.h>
>  #include <linux/list.h>
>  #include <linux/module.h>
>  #include <linux/mutex.h>
> @@ -1101,6 +1102,24 @@ rcar_dmac_prep_dma_cyclic(struct dma_chan *chan,
> dma_addr_t buf_addr, return desc;
>  }
> 
> +static dma_addr_t __rcar_dmac_dma_map(struct dma_chan *chan, phys_addr_t
> addr,
> +		size_t size, enum dma_data_direction dir)
> +{
> +	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
> +	struct page *page = phys_to_page(addr);

I wonder if that's really safe given that the physical address, not being part 
of RAM, is (unless I'm mistaken) not backed by a struct page.

> +	size_t offset = addr - page_to_phys(page);
> +	dma_addr_t map = dma_map_page(chan->device->dev, page, offset, size,
> +			dir);

You might want to use an _attrs() version of the call to pass the 
DMA_ATTR_NO_KERNEL_MAPPING and DMA_ATTR_SKIP_CPU_SYNC flags. Unfortunately 
there's no dma_map_page_attrs(), maybe it should be added.

> +
> +	if (dma_mapping_error(chan->device->dev, map)) {
> +		dev_err(chan->device->dev, "chan%u: failed to map %zx@%pap",
> +				rchan->index, size, &addr);
> +		return 0;
> +	}
> +
> +	return map;
> +}
> +
>  static int rcar_dmac_device_config(struct dma_chan *chan,
>  				   struct dma_slave_config *cfg)
>  {
> @@ -1110,10 +1129,47 @@ static int rcar_dmac_device_config(struct dma_chan
> *chan, * We could lock this, but you shouldn't be configuring the
>  	 * channel, while using it...
>  	 */
> -	rchan->src_slave_addr = cfg->src_addr;
> -	rchan->dst_slave_addr = cfg->dst_addr;
> -	rchan->src_xfer_size = cfg->src_addr_width;
> -	rchan->dst_xfer_size = cfg->dst_addr_width;
> +
> +	/* If we don't have a iommu domain no idea to trying to use it */
> +	if (!iommu_get_domain_for_dev(chan->device->dev)) {
> +		rchan->src_slave_addr = cfg->src_addr;
> +		rchan->dst_slave_addr = cfg->dst_addr;
> +		rchan->src_xfer_size = cfg->src_addr_width;
> +		rchan->dst_xfer_size = cfg->dst_addr_width;
> +		return 0;
> +	}

Driver are not supposed to deal with the IOMMU API directly. Would it be an 
issue dropping this check ? The dma_map_page() call should work fine without 
an IOMMU and return a DMA address identical to the physical address. Unless 
the memory is not DMA-ble, in which case bounce buffers would be used, and 
possible a few other corner cases. I'm not sure if we need to care about them.

> +	/* unmap old */
> +	if (rchan->src_slave_addr) {
> +		dma_unmap_page(chan->device->dev, rchan->src_slave_addr,
> +				rchan->src_xfer_size, DMA_FROM_DEVICE);
> +		rchan->src_slave_addr = 0;
> +		rchan->src_xfer_size = 0;
> +	}
> +
> +	if (rchan->dst_slave_addr) {
> +		dma_unmap_page(chan->device->dev, rchan->dst_slave_addr,
> +				rchan->dst_xfer_size, DMA_TO_DEVICE);
> +		rchan->dst_slave_addr = 0;
> +		rchan->dst_xfer_size = 0;
> +	}
> +
> +	/* map new */
> +	if (cfg->src_addr) {
> +		rchan->src_slave_addr = __rcar_dmac_dma_map(chan, cfg->src_addr,
> +				cfg->src_addr_width, DMA_FROM_DEVICE);
> +		if (!rchan->src_slave_addr)
> +			return -EIO;
> +		rchan->src_xfer_size = cfg->src_addr_width;
> +	}
> +
> +	if (cfg->dst_addr) {
> +		rchan->dst_slave_addr = __rcar_dmac_dma_map(chan, cfg->dst_addr,
> +				cfg->dst_addr_width, DMA_TO_DEVICE);
> +		if (!rchan->dst_slave_addr)
> +			return -EIO;
> +		rchan->dst_xfer_size = cfg->dst_addr_width;
> +	}
> 
>  	return 0;
>  }
Vinod Koul Jan. 14, 2016, 3:52 a.m. UTC | #9
On Thu, Jan 14, 2016 at 01:13:20AM +0200, Laurent Pinchart wrote:
> Hi Vinod,
> 
> (CC'ing Linus as he's mentioned)
> 
> On Wednesday 13 January 2016 14:55:50 Niklas Söderlund wrote:
> > * Vinod Koul <vinod.koul@intel.com> [2016-01-13 19:06:01 +0530]:
> > > On Mon, Jan 11, 2016 at 03:17:46AM +0100, Niklas Söderlund wrote:
> > >> Enable slave transfers to devices behind IPMMU:s by mapping the slave
> > >> addresses using the dma-mapping API.
> > >> 
> > >> Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
> > >> ---
> > >> 
> > >>  drivers/dma/sh/rcar-dmac.c | 64 +++++++++++++++++++++++++++++++++++++---
> > >>  1 file changed, 60 insertions(+), 4 deletions(-)
> > >> 
> > >> diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
> > >> index 7820d07..da94809 100644
> > >> --- a/drivers/dma/sh/rcar-dmac.c
> > >> +++ b/drivers/dma/sh/rcar-dmac.c
> > >> @@ -13,6 +13,7 @@
> > >>  #include <linux/dma-mapping.h>
> > >>  #include <linux/dmaengine.h>
> > >>  #include <linux/interrupt.h>
> > >> +#include <linux/iommu.h>
> > >>  #include <linux/list.h>
> > >>  #include <linux/module.h>
> > >>  #include <linux/mutex.h>
> > >> @@ -1101,6 +1102,24 @@ rcar_dmac_prep_dma_cyclic(struct dma_chan *chan,
> > >> dma_addr_t buf_addr,
> > >>  	return desc;
> > >>  }
> > >> 
> > >> +static dma_addr_t __rcar_dmac_dma_map(struct dma_chan *chan,
> > >> phys_addr_t addr,
> > >> +		size_t size, enum dma_data_direction dir)
> > >> +{
> > >> +	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
> > >> +	struct page *page = phys_to_page(addr);
> > >> +	size_t offset = addr - page_to_phys(page);
> > >> +	dma_addr_t map = dma_map_page(chan->device->dev, page, offset, size,
> > >> +			dir);
> > > 
> > > Hmmmm, dmaengine APIs for slave cases expect that client has already
> > > ammped and provided an address which the dmaengine understands. So doing
> > > this in driver here does not sound good to me
> > 
> > It was my understanding that clients do not do this mapping and in fact
> > are expected not to. Is this not what Linus Walleij is trying to address
> > in '[PATCH] dmaengine: use phys_addr_t for slave configuration'?
> There's a problem somewhere and we need to fix it. Clients currently pass 
> physical addresses and the DMA engine API expects a DMA address. There's only 
> two ways to fix that, either modify the API to expect a phys_addr_t, or modify 
> the clients to provide a dma_addr_t.

Okay I am in two minds for this, doing phys_addr_t seems okay but somehow I
feel we should rather pass dma_addr_t and dmaengien driver get a right dma
address to use and thus fix the clients, that maybe the right thing to do
here, thoughts...?

The assumption from API was always that the client should perform the
mapping...

> The struct device used to map buffer through the DMA mapping API needs to be 
> the DMA engine struct device, not the client struct device. As the client is 
> not expected to have access to the DMA engine device I would argue that DMA 
> engines should perform the mapping and the API should take a phys_addr_t.

That is not a right assumption. Once the client gets a channel, they have
access to dmaengine device and should use that to map. Yes the key is to map
using dmaengine device and not client device. You can use chan->device->dev.

> 
> Vinod, unless you have reasons to do it otherwise, can we get your ack on this 
> approach and start hammering at the code ? The problem has remained known and 
> unfixed for too long, we need to move on.
Laurent Pinchart Jan. 14, 2016, 1:59 p.m. UTC | #10
Hi Vinod,

(CC'ing Maxime, I know he misses working on the DMA engine core ;-))

On Thursday 14 January 2016 09:22:25 Vinod Koul wrote:
> On Thu, Jan 14, 2016 at 01:13:20AM +0200, Laurent Pinchart wrote:
> > On Wednesday 13 January 2016 14:55:50 Niklas Söderlund wrote:
> >> * Vinod Koul <vinod.koul@intel.com> [2016-01-13 19:06:01 +0530]:
> >>> On Mon, Jan 11, 2016 at 03:17:46AM +0100, Niklas Söderlund wrote:
> >>>> Enable slave transfers to devices behind IPMMU:s by mapping the slave
> >>>> addresses using the dma-mapping API.
> >>>> 
> >>>> Signed-off-by: Niklas Söderlund
> >>>> <niklas.soderlund+renesas@ragnatech.se>
> >>>> ---
> >>>> 
> >>>>  drivers/dma/sh/rcar-dmac.c | 64 ++++++++++++++++++++++++++++++++++---
> >>>>  1 file changed, 60 insertions(+), 4 deletions(-)
> >>>> 
> >>>> diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
> >>>> index 7820d07..da94809 100644
> >>>> --- a/drivers/dma/sh/rcar-dmac.c
> >>>> +++ b/drivers/dma/sh/rcar-dmac.c
> >>>> @@ -13,6 +13,7 @@
> >>>>  #include <linux/dma-mapping.h>
> >>>>  #include <linux/dmaengine.h>
> >>>>  #include <linux/interrupt.h>
> >>>> +#include <linux/iommu.h>
> >>>>  #include <linux/list.h>
> >>>>  #include <linux/module.h>
> >>>>  #include <linux/mutex.h>
> >>>> @@ -1101,6 +1102,24 @@ rcar_dmac_prep_dma_cyclic(struct dma_chan
> >>>> *chan, dma_addr_t buf_addr,
> >>>>  	return desc;
> >>>>  }
> >>>> 
> >>>> +static dma_addr_t __rcar_dmac_dma_map(struct dma_chan *chan,
> >>>> phys_addr_t addr,
> >>>> +		size_t size, enum dma_data_direction dir)
> >>>> +{
> >>>> +	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
> >>>> +	struct page *page = phys_to_page(addr);
> >>>> +	size_t offset = addr - page_to_phys(page);
> >>>> +	dma_addr_t map = dma_map_page(chan->device->dev, page, offset,
> >>>> size,
> >>>> +			dir);
> >>> 
> >>> Hmmmm, dmaengine APIs for slave cases expect that client has already
> >>> ammped and provided an address which the dmaengine understands. So
> >>> doing this in driver here does not sound good to me
> >> 
> >> It was my understanding that clients do not do this mapping and in fact
> >> are expected not to. Is this not what Linus Walleij is trying to address
> >> in '[PATCH] dmaengine: use phys_addr_t for slave configuration'?
> > 
> > There's a problem somewhere and we need to fix it. Clients currently pass
> > physical addresses and the DMA engine API expects a DMA address. There's
> > only two ways to fix that, either modify the API to expect a phys_addr_t,
> > or modify the clients to provide a dma_addr_t.
> 
> Okay I am in two minds for this, doing phys_addr_t seems okay but somehow I
> feel we should rather pass dma_addr_t and dmaengien driver get a right dma
> address to use and thus fix the clients, that maybe the right thing to do
> here, thoughts...?

Given that there should be more clients than DMA engine drivers, and given 
that knowledge of what has to be done to map a physical address to a DMA 
address accessible by the DMA engine should not be included in client drivers 
(in most case I assume using the DMA mapping API will be enough, but details 
may vary), I believe it makes more sense to pass a phys_addr_t and let the DMA 
engine drivers handle it.

There's another issue I just remembered. Consider the following cases.

1. DMA engine channel that has an optional IOMMU covering both the src and dst 
side. In that case mapping can be performed by the client or DMA engine 
driver, the DMA mapping API will handle the IOMMU behind the scene.

2. DMA engine channel that has an optional IOMMU on the memory side and no 
support for IOMMU on the slave (in the sense of the register in front of the 
client's FIFO) side. In that case a client mapping buffers on both the src and 
dst side would set an IOMMU mapped address for the slave side, which wouldn't 
work. If the DMA engine driver were to perform the mapping then it could skip 
it on the slave side, knowing that the slave side has no IOMMU.

3. DMA engine channel that has independently optional IOMMUs on both sides. 
This can't be supported today as we have a single struct device per channel 
and thus can't configure the IOMMU independently on the two sides.

It's getting messy :-)

> The assumption from API was always that the client should perform the
> mapping...
> 
> > The struct device used to map buffer through the DMA mapping API needs to
> > be the DMA engine struct device, not the client struct device. As the
> > client is not expected to have access to the DMA engine device I would
> > argue that DMA engines should perform the mapping and the API should take
> > a phys_addr_t.
>
> That is not a right assumption. Once the client gets a channel, they have
> access to dmaengine device and should use that to map. Yes the key is to map
> using dmaengine device and not client device. You can use chan->device->dev.

Right, that's required by the DMA engine API even when not using slave 
transfers. Which raises an interesting consistency issue in the API, I agree 
about that.

> > Vinod, unless you have reasons to do it otherwise, can we get your ack on
> > this approach and start hammering at the code ? The problem has remained
> > known and unfixed for too long, we need to move on.
Niklas Söderlund Jan. 14, 2016, 9:37 p.m. UTC | #11
Hi Laurent,

* Laurent Pinchart <laurent.pinchart@ideasonboard.com> [2016-01-14 01:37:37 +0200]:

> Hi Niklas,
>
> Thank you for the patch, and welcome to the hairy details of the DMA mapping
> API :-)

Thanks and thank you for your feedback.

>
> On Monday 11 January 2016 03:17:46 Niklas Söderlund wrote:
> > Enable slave transfers to devices behind IPMMU:s by mapping the slave
> > addresses using the dma-mapping API.
> >
> > Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
> > ---
> >  drivers/dma/sh/rcar-dmac.c | 64 ++++++++++++++++++++++++++++++++++++++++---
> >  1 file changed, 60 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
> > index 7820d07..da94809 100644
> > --- a/drivers/dma/sh/rcar-dmac.c
> > +++ b/drivers/dma/sh/rcar-dmac.c
> > @@ -13,6 +13,7 @@
> >  #include <linux/dma-mapping.h>
> >  #include <linux/dmaengine.h>
> >  #include <linux/interrupt.h>
> > +#include <linux/iommu.h>
> >  #include <linux/list.h>
> >  #include <linux/module.h>
> >  #include <linux/mutex.h>
> > @@ -1101,6 +1102,24 @@ rcar_dmac_prep_dma_cyclic(struct dma_chan *chan,
> > dma_addr_t buf_addr, return desc;
> >  }
> >
> > +static dma_addr_t __rcar_dmac_dma_map(struct dma_chan *chan, phys_addr_t
> > addr,
> > +		size_t size, enum dma_data_direction dir)
> > +{
> > +	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
> > +	struct page *page = phys_to_page(addr);
>
> I wonder if that's really safe given that the physical address, not being part
> of RAM, is (unless I'm mistaken) not backed by a struct page.

I agree it's not how I wanted to do it but I could not figure out a way
that do not end up calling ops->map_page in dma-mapping-common.h. I did
also get a note from 'kbuild test robot' that phys_to_page is not
available on all platforms. So in my v2 of this series I currently use:

    page = pfn_to_page(addr >> PAGE_SHIFT);

But I guess it's just as safe as phys_to_page.

>
> > +	size_t offset = addr - page_to_phys(page);
> > +	dma_addr_t map = dma_map_page(chan->device->dev, page, offset, size,
> > +			dir);
>
> You might want to use an _attrs() version of the call to pass the
> DMA_ATTR_NO_KERNEL_MAPPING and DMA_ATTR_SKIP_CPU_SYNC flags. Unfortunately
> there's no dma_map_page_attrs(), maybe it should be added.

I have implemented and are using such a construct for my v2. I also
found the following patch but I could not determine if the idea of of a
dma_map_pag_attrs was accepted or rejected.

PATCH v1 1/2] dma-mapping-common: add dma_map_page_attrs API
https://www.spinics.net/lists/linux-arch/msg32334.html

>
> > +
> > +	if (dma_mapping_error(chan->device->dev, map)) {
> > +		dev_err(chan->device->dev, "chan%u: failed to map %zx@%pap",
> > +				rchan->index, size, &addr);
> > +		return 0;
> > +	}
> > +
> > +	return map;
> > +}
> > +
> >  static int rcar_dmac_device_config(struct dma_chan *chan,
> >  				   struct dma_slave_config *cfg)
> >  {
> > @@ -1110,10 +1129,47 @@ static int rcar_dmac_device_config(struct dma_chan
> > *chan, * We could lock this, but you shouldn't be configuring the
> >  	 * channel, while using it...
> >  	 */
> > -	rchan->src_slave_addr = cfg->src_addr;
> > -	rchan->dst_slave_addr = cfg->dst_addr;
> > -	rchan->src_xfer_size = cfg->src_addr_width;
> > -	rchan->dst_xfer_size = cfg->dst_addr_width;
> > +
> > +	/* If we don't have a iommu domain no idea to trying to use it */
> > +	if (!iommu_get_domain_for_dev(chan->device->dev)) {
> > +		rchan->src_slave_addr = cfg->src_addr;
> > +		rchan->dst_slave_addr = cfg->dst_addr;
> > +		rchan->src_xfer_size = cfg->src_addr_width;
> > +		rchan->dst_xfer_size = cfg->dst_addr_width;
> > +		return 0;
> > +	}
>
> Driver are not supposed to deal with the IOMMU API directly. Would it be an
> issue dropping this check ? The dma_map_page() call should work fine without
> an IOMMU and return a DMA address identical to the physical address. Unless
> the memory is not DMA-ble, in which case bounce buffers would be used, and
> possible a few other corner cases. I'm not sure if we need to care about them.
>

You are correct, this check can be removed. It was needed in a earlier
version. I will wait a few days and see if it becomes clearer if the
mapping should happen in the dmaengine or in the client. And depending
on that outcome I will send out an updated version.


> > +	/* unmap old */
> > +	if (rchan->src_slave_addr) {
> > +		dma_unmap_page(chan->device->dev, rchan->src_slave_addr,
> > +				rchan->src_xfer_size, DMA_FROM_DEVICE);
> > +		rchan->src_slave_addr = 0;
> > +		rchan->src_xfer_size = 0;
> > +	}
> > +
> > +	if (rchan->dst_slave_addr) {
> > +		dma_unmap_page(chan->device->dev, rchan->dst_slave_addr,
> > +				rchan->dst_xfer_size, DMA_TO_DEVICE);
> > +		rchan->dst_slave_addr = 0;
> > +		rchan->dst_xfer_size = 0;
> > +	}
> > +
> > +	/* map new */
> > +	if (cfg->src_addr) {
> > +		rchan->src_slave_addr = __rcar_dmac_dma_map(chan, cfg->src_addr,
> > +				cfg->src_addr_width, DMA_FROM_DEVICE);
> > +		if (!rchan->src_slave_addr)
> > +			return -EIO;
> > +		rchan->src_xfer_size = cfg->src_addr_width;
> > +	}
> > +
> > +	if (cfg->dst_addr) {
> > +		rchan->dst_slave_addr = __rcar_dmac_dma_map(chan, cfg->dst_addr,
> > +				cfg->dst_addr_width, DMA_TO_DEVICE);
> > +		if (!rchan->dst_slave_addr)
> > +			return -EIO;
> > +		rchan->dst_xfer_size = cfg->dst_addr_width;
> > +	}
> >
> >  	return 0;
> >  }
--
// Niklas
--
To unsubscribe from this list: send the line "unsubscribe linux-sh" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Laurent Pinchart Jan. 14, 2016, 11:27 p.m. UTC | #12
Hi Niklas,

On Thursday 14 January 2016 22:37:18 Niklas Söderlund wrote:
> * Laurent Pinchart [2016-01-14 01:37:37 +0200]:
> > Hi Niklas,
> > 
> > Thank you for the patch, and welcome to the hairy details of the DMA
> > mapping API :-)
> 
> Thanks and thank you for your feedback.
> 
> > On Monday 11 January 2016 03:17:46 Niklas Söderlund wrote:
> > > Enable slave transfers to devices behind IPMMU:s by mapping the slave
> > > addresses using the dma-mapping API.
> > > 
> > > Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
> > > ---
> > > 
> > >  drivers/dma/sh/rcar-dmac.c | 64 ++++++++++++++++++++++++++++++++++++---
> > >  1 file changed, 60 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
> > > index 7820d07..da94809 100644
> > > --- a/drivers/dma/sh/rcar-dmac.c
> > > +++ b/drivers/dma/sh/rcar-dmac.c
> > > @@ -13,6 +13,7 @@
> > >  #include <linux/dma-mapping.h>
> > >  #include <linux/dmaengine.h>
> > >  #include <linux/interrupt.h>
> > > +#include <linux/iommu.h>
> > >  #include <linux/list.h>
> > >  #include <linux/module.h>
> > >  #include <linux/mutex.h>
> > > @@ -1101,6 +1102,24 @@ rcar_dmac_prep_dma_cyclic(struct dma_chan *chan,
> > > dma_addr_t buf_addr,
> > >  	return desc;
> > >  }
> > > 
> > > +static dma_addr_t __rcar_dmac_dma_map(struct dma_chan *chan,
> > > phys_addr_t addr,
> > > +		size_t size, enum dma_data_direction dir)
> > > +{
> > > +	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
> > > +	struct page *page = phys_to_page(addr);
> > 
> > I wonder if that's really safe given that the physical address, not being
> > part of RAM, is (unless I'm mistaken) not backed by a struct page.
> 
> I agree it's not how I wanted to do it but I could not figure out a way
> that do not end up calling ops->map_page in dma-mapping-common.h. I did
> also get a note from 'kbuild test robot' that phys_to_page is not
> available on all platforms. So in my v2 of this series I currently use:
> 
>     page = pfn_to_page(addr >> PAGE_SHIFT);
> 
> But I guess it's just as safe as phys_to_page.

At least the kmemcheck_mark_initialized(page_address(page) + offset, size); 
and debug_dma_map_page() calls in dma_map_page() look dangerous to me, as 
page_address() can dereference the struct page pointer depending on the 
platform.

Even the page_to_pfn() and page_to_phys() calls in the ARM map_page 
implementation would need to be checked for correctness as they depend on the 
memory model used by the platform. The cache sync code paths also worry me, 
but we should skip them anyway.

I wonder if we should have a dma_map_pfn or dma_map_phys...

> > > +	size_t offset = addr - page_to_phys(page);
> > > +	dma_addr_t map = dma_map_page(chan->device->dev, page, offset, size,
> > > +			dir);
> > 
> > You might want to use an _attrs() version of the call to pass the
> > DMA_ATTR_NO_KERNEL_MAPPING and DMA_ATTR_SKIP_CPU_SYNC flags. Unfortunately
> > there's no dma_map_page_attrs(), maybe it should be added.
> 
> I have implemented and are using such a construct for my v2. I also
> found the following patch but I could not determine if the idea of of a
> dma_map_pag_attrs was accepted or rejected.
> 
> PATCH v1 1/2] dma-mapping-common: add dma_map_page_attrs API
> https://www.spinics.net/lists/linux-arch/msg32334.html

The approach looks fine to me (and the second patch could be interesting too, 
but might not be the best way to do that), the series should probably be 
resurrected.

> > > +
> > > +	if (dma_mapping_error(chan->device->dev, map)) {
> > > +		dev_err(chan->device->dev, "chan%u: failed to map %zx@%pap",
> > > +				rchan->index, size, &addr);
> > > +		return 0;
> > > +	}
> > > +
> > > +	return map;
> > > +}
> > > +
> > > 
> > >  static int rcar_dmac_device_config(struct dma_chan *chan,
> > >  
> > >  				   struct dma_slave_config *cfg)
> > >  
> > >  {
> > > 
> > > @@ -1110,10 +1129,47 @@ static int rcar_dmac_device_config(struct
> > > dma_chan
> > > *chan, * We could lock this, but you shouldn't be configuring the
> > > 
> > >  	 * channel, while using it...
> > >  	 */
> > > 
> > > -	rchan->src_slave_addr = cfg->src_addr;
> > > -	rchan->dst_slave_addr = cfg->dst_addr;
> > > -	rchan->src_xfer_size = cfg->src_addr_width;
> > > -	rchan->dst_xfer_size = cfg->dst_addr_width;
> > > +
> > > +	/* If we don't have a iommu domain no idea to trying to use it */
> > > +	if (!iommu_get_domain_for_dev(chan->device->dev)) {
> > > +		rchan->src_slave_addr = cfg->src_addr;
> > > +		rchan->dst_slave_addr = cfg->dst_addr;
> > > +		rchan->src_xfer_size = cfg->src_addr_width;
> > > +		rchan->dst_xfer_size = cfg->dst_addr_width;
> > > +		return 0;
> > > +	}
> > 
> > Driver are not supposed to deal with the IOMMU API directly. Would it be
> > an issue dropping this check ? The dma_map_page() call should work fine
> > without an IOMMU and return a DMA address identical to the physical
> > address. Unless the memory is not DMA-ble, in which case bounce buffers
> > would be used, and possible a few other corner cases. I'm not sure if we
> > need to care about them.
>
> You are correct, this check can be removed. It was needed in a earlier
> version. I will wait a few days and see if it becomes clearer if the
> mapping should happen in the dmaengine or in the client. And depending
> on that outcome I will send out an updated version.
> 
> > > +	/* unmap old */
> > > +	if (rchan->src_slave_addr) {
> > > +		dma_unmap_page(chan->device->dev, rchan->src_slave_addr,
> > > +				rchan->src_xfer_size, DMA_FROM_DEVICE);
> > > +		rchan->src_slave_addr = 0;
> > > +		rchan->src_xfer_size = 0;
> > > +	}
> > > +
> > > +	if (rchan->dst_slave_addr) {
> > > +		dma_unmap_page(chan->device->dev, rchan->dst_slave_addr,
> > > +				rchan->dst_xfer_size, DMA_TO_DEVICE);
> > > +		rchan->dst_slave_addr = 0;
> > > +		rchan->dst_xfer_size = 0;
> > > +	}
> > > +
> > > +	/* map new */
> > > +	if (cfg->src_addr) {
> > > +		rchan->src_slave_addr = __rcar_dmac_dma_map(chan, cfg->src_addr,
> > > +				cfg->src_addr_width, DMA_FROM_DEVICE);
> > > +		if (!rchan->src_slave_addr)
> > > +			return -EIO;
> > > +		rchan->src_xfer_size = cfg->src_addr_width;
> > > +	}
> > > +
> > > +	if (cfg->dst_addr) {
> > > +		rchan->dst_slave_addr = __rcar_dmac_dma_map(chan, cfg->dst_addr,
> > > +				cfg->dst_addr_width, DMA_TO_DEVICE);
> > > +		if (!rchan->dst_slave_addr)
> > > +			return -EIO;
> > > +		rchan->dst_xfer_size = cfg->dst_addr_width;
> > > +	}
> > > 
> > >  	return 0;
> > >  }
Vinod Koul Jan. 18, 2016, 1:36 p.m. UTC | #13
On Thu, Jan 14, 2016 at 03:59:40PM +0200, Laurent Pinchart wrote:
> Hi Vinod,
> 
> (CC'ing Maxime, I know he misses working on the DMA engine core ;-))
> 
> On Thursday 14 January 2016 09:22:25 Vinod Koul wrote:
> > On Thu, Jan 14, 2016 at 01:13:20AM +0200, Laurent Pinchart wrote:
> > > On Wednesday 13 January 2016 14:55:50 Niklas Söderlund wrote:
> > >> * Vinod Koul <vinod.koul@intel.com> [2016-01-13 19:06:01 +0530]:
> > >>> On Mon, Jan 11, 2016 at 03:17:46AM +0100, Niklas Söderlund wrote:
> > >>>> Enable slave transfers to devices behind IPMMU:s by mapping the slave
> > >>>> addresses using the dma-mapping API.
> > >>>> 
> > >>>> Signed-off-by: Niklas Söderlund
> > >>>> <niklas.soderlund+renesas@ragnatech.se>
> > >>>> ---
> > >>>> 
> > >>>>  drivers/dma/sh/rcar-dmac.c | 64 ++++++++++++++++++++++++++++++++++---
> > >>>>  1 file changed, 60 insertions(+), 4 deletions(-)
> > >>>> 
> > >>>> diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
> > >>>> index 7820d07..da94809 100644
> > >>>> --- a/drivers/dma/sh/rcar-dmac.c
> > >>>> +++ b/drivers/dma/sh/rcar-dmac.c
> > >>>> @@ -13,6 +13,7 @@
> > >>>>  #include <linux/dma-mapping.h>
> > >>>>  #include <linux/dmaengine.h>
> > >>>>  #include <linux/interrupt.h>
> > >>>> +#include <linux/iommu.h>
> > >>>>  #include <linux/list.h>
> > >>>>  #include <linux/module.h>
> > >>>>  #include <linux/mutex.h>
> > >>>> @@ -1101,6 +1102,24 @@ rcar_dmac_prep_dma_cyclic(struct dma_chan
> > >>>> *chan, dma_addr_t buf_addr,
> > >>>>  	return desc;
> > >>>>  }
> > >>>> 
> > >>>> +static dma_addr_t __rcar_dmac_dma_map(struct dma_chan *chan,
> > >>>> phys_addr_t addr,
> > >>>> +		size_t size, enum dma_data_direction dir)
> > >>>> +{
> > >>>> +	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
> > >>>> +	struct page *page = phys_to_page(addr);
> > >>>> +	size_t offset = addr - page_to_phys(page);
> > >>>> +	dma_addr_t map = dma_map_page(chan->device->dev, page, offset,
> > >>>> size,
> > >>>> +			dir);
> > >>> 
> > >>> Hmmmm, dmaengine APIs for slave cases expect that client has already
> > >>> ammped and provided an address which the dmaengine understands. So
> > >>> doing this in driver here does not sound good to me
> > >> 
> > >> It was my understanding that clients do not do this mapping and in fact
> > >> are expected not to. Is this not what Linus Walleij is trying to address
> > >> in '[PATCH] dmaengine: use phys_addr_t for slave configuration'?
> > > 
> > > There's a problem somewhere and we need to fix it. Clients currently pass
> > > physical addresses and the DMA engine API expects a DMA address. There's
> > > only two ways to fix that, either modify the API to expect a phys_addr_t,
> > > or modify the clients to provide a dma_addr_t.
> > 
> > Okay I am in two minds for this, doing phys_addr_t seems okay but somehow I
> > feel we should rather pass dma_addr_t and dmaengien driver get a right dma
> > address to use and thus fix the clients, that maybe the right thing to do
> > here, thoughts...?
> 
> Given that there should be more clients than DMA engine drivers, and given 
> that knowledge of what has to be done to map a physical address to a DMA 
> address accessible by the DMA engine should not be included in client drivers 
> (in most case I assume using the DMA mapping API will be enough, but details 
> may vary), I believe it makes more sense to pass a phys_addr_t and let the DMA 
> engine drivers handle it.
> 
> There's another issue I just remembered. Consider the following cases.
> 
> 1. DMA engine channel that has an optional IOMMU covering both the src and dst 
> side. In that case mapping can be performed by the client or DMA engine 
> driver, the DMA mapping API will handle the IOMMU behind the scene.
> 
> 2. DMA engine channel that has an optional IOMMU on the memory side and no 
> support for IOMMU on the slave (in the sense of the register in front of the 
> client's FIFO) side. In that case a client mapping buffers on both the src and 
> dst side would set an IOMMU mapped address for the slave side, which wouldn't 
> work. If the DMA engine driver were to perform the mapping then it could skip 
> it on the slave side, knowing that the slave side has no IOMMU.
> 
> 3. DMA engine channel that has independently optional IOMMUs on both sides. 
> This can't be supported today as we have a single struct device per channel 
> and thus can't configure the IOMMU independently on the two sides.
> 
> It's getting messy :-)

Yes I do agree on that, but the problem is today none of the slave drivers
expect or do the mapping, changing that will cause issues...

And how many do really have an IOMMU behind them, few out of large set we
have...

> > The assumption from API was always that the client should perform the
> > mapping...
> > 
> > > The struct device used to map buffer through the DMA mapping API needs to
> > > be the DMA engine struct device, not the client struct device. As the
> > > client is not expected to have access to the DMA engine device I would
> > > argue that DMA engines should perform the mapping and the API should take
> > > a phys_addr_t.
> >
> > That is not a right assumption. Once the client gets a channel, they have
> > access to dmaengine device and should use that to map. Yes the key is to map
> > using dmaengine device and not client device. You can use chan->device->dev.
> 
> Right, that's required by the DMA engine API even when not using slave 
> transfers. Which raises an interesting consistency issue in the API, I agree 
> about that.
> 
> > > Vinod, unless you have reasons to do it otherwise, can we get your ack on
> > > this approach and start hammering at the code ? The problem has remained
> > > known and unfixed for too long, we need to move on.
> 
> -- 
> Regards,
> 
> Laurent Pinchart
>
Laurent Pinchart Jan. 24, 2016, 10:38 p.m. UTC | #14
Hi Vinod,

On Monday 18 January 2016 19:06:29 Vinod Koul wrote:
> On Thu, Jan 14, 2016 at 03:59:40PM +0200, Laurent Pinchart wrote:
> > On Thursday 14 January 2016 09:22:25 Vinod Koul wrote:
> >> On Thu, Jan 14, 2016 at 01:13:20AM +0200, Laurent Pinchart wrote:
> >>> On Wednesday 13 January 2016 14:55:50 Niklas Söderlund wrote:
> >>>> * Vinod Koul <vinod.koul@intel.com> [2016-01-13 19:06:01 +0530]:
> >>>>> On Mon, Jan 11, 2016 at 03:17:46AM +0100, Niklas Söderlund wrote:
> >>>>>> Enable slave transfers to devices behind IPMMU:s by mapping the
> >>>>>> slave addresses using the dma-mapping API.
> >>>>>> 
> >>>>>> Signed-off-by: Niklas Söderlund
> >>>>>> <niklas.soderlund+renesas@ragnatech.se>
> >>>>>> ---
> >>>>>> 
> >>>>>>  drivers/dma/sh/rcar-dmac.c | 64 ++++++++++++++++++++++++++++++++---
> >>>>>>  1 file changed, 60 insertions(+), 4 deletions(-)
> >>>>>> 
> >>>>>> diff --git a/drivers/dma/sh/rcar-dmac.c
> >>>>>> b/drivers/dma/sh/rcar-dmac.c
> >>>>>> index 7820d07..da94809 100644
> >>>>>> --- a/drivers/dma/sh/rcar-dmac.c
> >>>>>> +++ b/drivers/dma/sh/rcar-dmac.c
> >>>>>> @@ -13,6 +13,7 @@
> >>>>>>  #include <linux/dma-mapping.h>
> >>>>>>  #include <linux/dmaengine.h>
> >>>>>>  #include <linux/interrupt.h>
> >>>>>> +#include <linux/iommu.h>
> >>>>>>  #include <linux/list.h>
> >>>>>>  #include <linux/module.h>
> >>>>>>  #include <linux/mutex.h>
> >>>>>> @@ -1101,6 +1102,24 @@ rcar_dmac_prep_dma_cyclic(struct dma_chan
> >>>>>> *chan, dma_addr_t buf_addr,
> >>>>>>  	return desc;
> >>>>>>  }
> >>>>>> 
> >>>>>> +static dma_addr_t __rcar_dmac_dma_map(struct dma_chan *chan,
> >>>>>> phys_addr_t addr,
> >>>>>> +		size_t size, enum dma_data_direction dir)
> >>>>>> +{
> >>>>>> +	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
> >>>>>> +	struct page *page = phys_to_page(addr);
> >>>>>> +	size_t offset = addr - page_to_phys(page);
> >>>>>> +	dma_addr_t map = dma_map_page(chan->device->dev, page, offset,
> >>>>>> size,
> >>>>>> +			dir);
> >>>>> 
> >>>>> Hmmmm, dmaengine APIs for slave cases expect that client has already
> >>>>> ammped and provided an address which the dmaengine understands. So
> >>>>> doing this in driver here does not sound good to me
> >>>> 
> >>>> It was my understanding that clients do not do this mapping and in
> >>>> fact are expected not to. Is this not what Linus Walleij is trying to
> >>>> address in '[PATCH] dmaengine: use phys_addr_t for slave
> >>>> configuration'?
> >>> 
> >>> There's a problem somewhere and we need to fix it. Clients currently
> >>> pass physical addresses and the DMA engine API expects a DMA address.
> >>> There's only two ways to fix that, either modify the API to expect a
> >>> phys_addr_t, or modify the clients to provide a dma_addr_t.
> >> 
> >> Okay I am in two minds for this, doing phys_addr_t seems okay but
> >> somehow I feel we should rather pass dma_addr_t and dmaengien driver get
> >> a right dma address to use and thus fix the clients, that maybe the
> >> right thing to do here, thoughts...?
> > 
> > Given that there should be more clients than DMA engine drivers, and given
> > that knowledge of what has to be done to map a physical address to a DMA
> > address accessible by the DMA engine should not be included in client
> > drivers (in most case I assume using the DMA mapping API will be enough,
> > but details may vary), I believe it makes more sense to pass a
> > phys_addr_t and let the DMA engine drivers handle it.
> > 
> > There's another issue I just remembered. Consider the following cases.
> > 
> > 1. DMA engine channel that has an optional IOMMU covering both the src and
> > dst side. In that case mapping can be performed by the client or DMA
> > engine driver, the DMA mapping API will handle the IOMMU behind the
> > scene.
> > 
> > 2. DMA engine channel that has an optional IOMMU on the memory side and no
> > support for IOMMU on the slave (in the sense of the register in front of
> > the client's FIFO) side. In that case a client mapping buffers on both
> > the src and dst side would set an IOMMU mapped address for the slave
> > side, which wouldn't work. If the DMA engine driver were to perform the
> > mapping then it could skip it on the slave side, knowing that the slave
> > side has no IOMMU.
> > 
> > 3. DMA engine channel that has independently optional IOMMUs on both
> > sides. This can't be supported today as we have a single struct device per
> > channel and thus can't configure the IOMMU independently on the two sides.
> > 
> > It's getting messy :-)
> 
> Yes I do agree on that, but the problem is today none of the slave drivers
> expect or do the mapping, changing that will cause issues...
> 
> And how many do really have an IOMMU behind them, few out of large set we
> have...

Today neither the DMA engine drivers nor the client drivers do the mapping, so 
we have any issue anyway. The question is on which side to solve it. If I 
understand correctly you fear that mapping the address in the DMA engine 
drivers would cause issues with client drivers that don't expect that 
behaviour, but I don't really see where the issue is. Could you please 
elaborate ?

> >> The assumption from API was always that the client should perform the
> >> mapping...
> >> 
> >>> The struct device used to map buffer through the DMA mapping API needs
> >>> to be the DMA engine struct device, not the client struct device. As
> >>> the client is not expected to have access to the DMA engine device I
> >>> would argue that DMA engines should perform the mapping and the API
> >>> should take a phys_addr_t.
> >> 
> >> That is not a right assumption. Once the client gets a channel, they
> >> have access to dmaengine device and should use that to map. Yes the key
> >> is to map using dmaengine device and not client device. You can use
> >> chan->device->dev.
> >
> > Right, that's required by the DMA engine API even when not using slave
> > transfers. Which raises an interesting consistency issue in the API, I
> > agree about that.
> > 
> >>> Vinod, unless you have reasons to do it otherwise, can we get your ack
> >>> on this approach and start hammering at the code ? The problem has
> >>> remained known and unfixed for too long, we need to move on.
Laurent Pinchart Feb. 3, 2016, 12:04 p.m. UTC | #15
Hi Vinod,

On Monday 25 January 2016 00:38:33 Laurent Pinchart wrote:
> On Monday 18 January 2016 19:06:29 Vinod Koul wrote:
> > On Thu, Jan 14, 2016 at 03:59:40PM +0200, Laurent Pinchart wrote:
> > > On Thursday 14 January 2016 09:22:25 Vinod Koul wrote:
> > >> On Thu, Jan 14, 2016 at 01:13:20AM +0200, Laurent Pinchart wrote:
> > >>> On Wednesday 13 January 2016 14:55:50 Niklas Söderlund wrote:
> > >>>> * Vinod Koul <vinod.koul@intel.com> [2016-01-13 19:06:01 +0530]:
> > >>>>> On Mon, Jan 11, 2016 at 03:17:46AM +0100, Niklas Söderlund wrote:
> > >>>>>> Enable slave transfers to devices behind IPMMU:s by mapping the
> > >>>>>> slave addresses using the dma-mapping API.
> > >>>>>> 
> > >>>>>> Signed-off-by: Niklas Söderlund
> > >>>>>> <niklas.soderlund+renesas@ragnatech.se>
> > >>>>>> ---
> > >>>>>> 
> > >>>>>>  drivers/dma/sh/rcar-dmac.c | 64 +++++++++++++++++++++++++++++++---
> > >>>>>>  1 file changed, 60 insertions(+), 4 deletions(-)
> > >>>>>> 
> > >>>>>> diff --git a/drivers/dma/sh/rcar-dmac.c
> > >>>>>> b/drivers/dma/sh/rcar-dmac.c
> > >>>>>> index 7820d07..da94809 100644
> > >>>>>> --- a/drivers/dma/sh/rcar-dmac.c
> > >>>>>> +++ b/drivers/dma/sh/rcar-dmac.c
> > >>>>>> @@ -13,6 +13,7 @@
> > >>>>>>  #include <linux/dma-mapping.h>
> > >>>>>>  #include <linux/dmaengine.h>
> > >>>>>>  #include <linux/interrupt.h>
> > >>>>>> +#include <linux/iommu.h>
> > >>>>>>  #include <linux/list.h>
> > >>>>>>  #include <linux/module.h>
> > >>>>>>  #include <linux/mutex.h>
> > >>>>>> @@ -1101,6 +1102,24 @@ rcar_dmac_prep_dma_cyclic(struct dma_chan
> > >>>>>> *chan, dma_addr_t buf_addr,
> > >>>>>>  	return desc;
> > >>>>>>  }
> > >>>>>> 
> > >>>>>> +static dma_addr_t __rcar_dmac_dma_map(struct dma_chan *chan,
> > >>>>>> phys_addr_t addr,
> > >>>>>> +		size_t size, enum dma_data_direction dir)
> > >>>>>> +{
> > >>>>>> +	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
> > >>>>>> +	struct page *page = phys_to_page(addr);
> > >>>>>> +	size_t offset = addr - page_to_phys(page);
> > >>>>>> +	dma_addr_t map = dma_map_page(chan->device->dev, page, offset,
> > >>>>>> size,
> > >>>>>> +			dir);
> > >>>>> 
> > >>>>> Hmmmm, dmaengine APIs for slave cases expect that client has already
> > >>>>> ammped and provided an address which the dmaengine understands. So
> > >>>>> doing this in driver here does not sound good to me
> > >>>> 
> > >>>> It was my understanding that clients do not do this mapping and in
> > >>>> fact are expected not to. Is this not what Linus Walleij is trying to
> > >>>> address in '[PATCH] dmaengine: use phys_addr_t for slave
> > >>>> configuration'?
> > >>> 
> > >>> There's a problem somewhere and we need to fix it. Clients currently
> > >>> pass physical addresses and the DMA engine API expects a DMA address.
> > >>> There's only two ways to fix that, either modify the API to expect a
> > >>> phys_addr_t, or modify the clients to provide a dma_addr_t.
> > >> 
> > >> Okay I am in two minds for this, doing phys_addr_t seems okay but
> > >> somehow I feel we should rather pass dma_addr_t and dmaengien driver
> > >> get
> > >> a right dma address to use and thus fix the clients, that maybe the
> > >> right thing to do here, thoughts...?
> > > 
> > > Given that there should be more clients than DMA engine drivers, and
> > > given that knowledge of what has to be done to map a physical address to
> > > a DMA address accessible by the DMA engine should not be included in
> > > client drivers (in most case I assume using the DMA mapping API will be
> > > enough, but details may vary), I believe it makes more sense to pass a
> > > phys_addr_t and let the DMA engine drivers handle it.
> > > 
> > > There's another issue I just remembered. Consider the following cases.
> > > 
> > > 1. DMA engine channel that has an optional IOMMU covering both the src
> > > and dst side. In that case mapping can be performed by the client or DMA
> > > engine driver, the DMA mapping API will handle the IOMMU behind the
> > > scene.
> > > 
> > > 2. DMA engine channel that has an optional IOMMU on the memory side and
> > > no support for IOMMU on the slave (in the sense of the register in front
> > > of the client's FIFO) side. In that case a client mapping buffers on
> > > both the src and dst side would set an IOMMU mapped address for the
> > > slave side, which wouldn't work. If the DMA engine driver were to
> > > perform the mapping then it could skip it on the slave side, knowing
> > > that the slave side has no IOMMU.
> > > 
> > > 3. DMA engine channel that has independently optional IOMMUs on both
> > > sides. This can't be supported today as we have a single struct device
> > > per channel and thus can't configure the IOMMU independently on the two
> > > sides.
> > > 
> > > It's getting messy :-)
> > 
> > Yes I do agree on that, but the problem is today none of the slave drivers
> > expect or do the mapping, changing that will cause issues...
> > 
> > And how many do really have an IOMMU behind them, few out of large set we
> > have...
> 
> Today neither the DMA engine drivers nor the client drivers do the mapping,
> so we have any issue anyway. The question is on which side to solve it. If
> I understand correctly you fear that mapping the address in the DMA engine
> drivers would cause issues with client drivers that don't expect that
> behaviour, but I don't really see where the issue is. Could you please
> elaborate ?

Ping. I don't think we're very far from finding an agreement on this topic. If 
you prefer we could discuss it on IRC, it can be faster than e-mail.

> >>> The assumption from API was always that the client should perform the
> >>> mapping...
> >>> 
> >>>> The struct device used to map buffer through the DMA mapping API needs
> >>>> to be the DMA engine struct device, not the client struct device. As
> >>>> the client is not expected to have access to the DMA engine device I
> >>> would argue that DMA engines should perform the mapping and the API
> >>>> should take a phys_addr_t.
> >>> 
> >>> That is not a right assumption. Once the client gets a channel, they
> >>> have access to dmaengine device and should use that to map. Yes the key
> >>> is to map using dmaengine device and not client device. You can use
> >>> chan->device->dev.
> >> 
> >> Right, that's required by the DMA engine API even when not using slave
> >> transfers. Which raises an interesting consistency issue in the API, I
> >> agree about that.
> >> 
> >>>> Vinod, unless you have reasons to do it otherwise, can we get your ack
> >>>> on this approach and start hammering at the code ? The problem has
> >>>> remained known and unfixed for too long, we need to move on.
Vinod Koul Feb. 8, 2016, 3:35 a.m. UTC | #16
On Wed, Feb 03, 2016 at 02:04:06PM +0200, Laurent Pinchart wrote:
> > > >> Okay I am in two minds for this, doing phys_addr_t seems okay but
> > > >> somehow I feel we should rather pass dma_addr_t and dmaengien driver
> > > >> get
> > > >> a right dma address to use and thus fix the clients, that maybe the
> > > >> right thing to do here, thoughts...?
> > > > 
> > > > Given that there should be more clients than DMA engine drivers, and
> > > > given that knowledge of what has to be done to map a physical address to
> > > > a DMA address accessible by the DMA engine should not be included in
> > > > client drivers (in most case I assume using the DMA mapping API will be
> > > > enough, but details may vary), I believe it makes more sense to pass a
> > > > phys_addr_t and let the DMA engine drivers handle it.
> > > > 
> > > > There's another issue I just remembered. Consider the following cases.
> > > > 
> > > > 1. DMA engine channel that has an optional IOMMU covering both the src
> > > > and dst side. In that case mapping can be performed by the client or DMA
> > > > engine driver, the DMA mapping API will handle the IOMMU behind the
> > > > scene.
> > > > 
> > > > 2. DMA engine channel that has an optional IOMMU on the memory side and
> > > > no support for IOMMU on the slave (in the sense of the register in front
> > > > of the client's FIFO) side. In that case a client mapping buffers on
> > > > both the src and dst side would set an IOMMU mapped address for the
> > > > slave side, which wouldn't work. If the DMA engine driver were to
> > > > perform the mapping then it could skip it on the slave side, knowing
> > > > that the slave side has no IOMMU.
> > > > 
> > > > 3. DMA engine channel that has independently optional IOMMUs on both
> > > > sides. This can't be supported today as we have a single struct device
> > > > per channel and thus can't configure the IOMMU independently on the two
> > > > sides.
> > > > 
> > > > It's getting messy :-)
> > > 
> > > Yes I do agree on that, but the problem is today none of the slave drivers
> > > expect or do the mapping, changing that will cause issues...
> > > 
> > > And how many do really have an IOMMU behind them, few out of large set we
> > > have...
> > 
> > Today neither the DMA engine drivers nor the client drivers do the mapping,
> > so we have any issue anyway. The question is on which side to solve it. If
> > I understand correctly you fear that mapping the address in the DMA engine
> > drivers would cause issues with client drivers that don't expect that
> > behaviour, but I don't really see where the issue is. Could you please
> > elaborate ?
> 
> Ping. I don't think we're very far from finding an agreement on this topic. If 
> you prefer we could discuss it on IRC, it can be faster than e-mail.

Sorry about the delay,

Okay I did look back and checked. I tend to agree with you on this and
client are not really taking care of mapping so easy approach would be to
get this fixed in dmanegine which helps in IOMMU case (which I still think
is in infancy, but who know how designers will throw their pipe dreams at
us)

Now, am checking this with Dan on why we started with client based mapping
assumption in case of slave, we don't want to miss anything here, so I will
get back in couple of days..
Laurent Pinchart Feb. 10, 2016, 11:51 p.m. UTC | #17
Hi Vinod,

On Monday 08 February 2016 09:05:31 Vinod Koul wrote:
> On Wed, Feb 03, 2016 at 02:04:06PM +0200, Laurent Pinchart wrote:
> >>>>> Okay I am in two minds for this, doing phys_addr_t seems okay but
> >>>>> somehow I feel we should rather pass dma_addr_t and dmaengien
> >>>>> driver get a right dma address to use and thus fix the clients,
> >>>>> that maybe the right thing to do here, thoughts...?
> >>>> 
> >>>> Given that there should be more clients than DMA engine drivers, and
> >>>> given that knowledge of what has to be done to map a physical
> >>>> address to a DMA address accessible by the DMA engine should not be
> >>>> included in client drivers (in most case I assume using the DMA
> >>>> mapping API will be enough, but details may vary), I believe it
> >>>> makes more sense to pass a phys_addr_t and let the DMA engine
> >>>> drivers handle it.
> >>>> 
> >>>> There's another issue I just remembered. Consider the following
> >>>> cases.
> >>>> 
> >>>> 1. DMA engine channel that has an optional IOMMU covering both the
> >>>> src and dst side. In that case mapping can be performed by the
> >>>> client or DMA engine driver, the DMA mapping API will handle the
> >>>> IOMMU behind the scene.
> >>>> 
> >>>> 2. DMA engine channel that has an optional IOMMU on the memory side
> >>>> and no support for IOMMU on the slave (in the sense of the register
> >>>> in front of the client's FIFO) side. In that case a client mapping
> >>>> buffers on both the src and dst side would set an IOMMU mapped
> >>>> address for the slave side, which wouldn't work. If the DMA engine
> >>>> driver were to perform the mapping then it could skip it on the
> >>>> slave side, knowing that the slave side has no IOMMU.
> >>>> 
> >>>> 3. DMA engine channel that has independently optional IOMMUs on both
> >>>> sides. This can't be supported today as we have a single struct
> >>>> device per channel and thus can't configure the IOMMU independently
> >>>> on the two sides.
> >>>> 
> >>>> It's getting messy :-)
> >>> 
> >>> Yes I do agree on that, but the problem is today none of the slave
> >>> drivers expect or do the mapping, changing that will cause issues...
> >>> 
> >>> And how many do really have an IOMMU behind them, few out of large set
> >>> we have...
> >> 
> >> Today neither the DMA engine drivers nor the client drivers do the
> >> mapping, so we have any issue anyway. The question is on which side to
> >> solve it. If I understand correctly you fear that mapping the address in
> >> the DMA engine drivers would cause issues with client drivers that don't
> >> expect that behaviour, but I don't really see where the issue is. Could
> >> you please elaborate ?
> > 
> > Ping. I don't think we're very far from finding an agreement on this
> > topic. If you prefer we could discuss it on IRC, it can be faster than
> > e-mail.
>
> Sorry about the delay,

No worries.

> Okay I did look back and checked. I tend to agree with you on this and
> client are not really taking care of mapping so easy approach would be to
> get this fixed in dmanegine which helps in IOMMU case (which I still think
> is in infancy, but who know how designers will throw their pipe dreams at
> us)
> 
> Now, am checking this with Dan on why we started with client based mapping
> assumption in case of slave, we don't want to miss anything here, so I will
> get back in couple of days..

Thank you.

Niklas, that's the direction you've already explored with the rcar-dmac 
driver, so it shouldn't cause any issue with your "[PATCH v3 0/8] dmaengine: 
rcar-dmac: add iommu support for slave transfers" patch series. After 
receiving confirmation from Dan and Vinod, could you add an additional patch 
to use phys_addr_t in struct dma_slave_config ?
Laurent Pinchart Feb. 10, 2016, 11:59 p.m. UTC | #18
Hi Niklas,

On Thursday 11 February 2016 01:51:31 Laurent Pinchart wrote:
> On Monday 08 February 2016 09:05:31 Vinod Koul wrote:
> > On Wed, Feb 03, 2016 at 02:04:06PM +0200, Laurent Pinchart wrote:
> >>>>>> Okay I am in two minds for this, doing phys_addr_t seems okay but
> >>>>>> somehow I feel we should rather pass dma_addr_t and dmaengien
> >>>>>> driver get a right dma address to use and thus fix the clients,
> >>>>>> that maybe the right thing to do here, thoughts...?
> >>>>> 
> >>>>> Given that there should be more clients than DMA engine drivers, and
> >>>>> given that knowledge of what has to be done to map a physical
> >>>>> address to a DMA address accessible by the DMA engine should not be
> >>>>> included in client drivers (in most case I assume using the DMA
> >>>>> mapping API will be enough, but details may vary), I believe it
> >>>>> makes more sense to pass a phys_addr_t and let the DMA engine
> >>>>> drivers handle it.
> >>>>> 
> >>>>> There's another issue I just remembered. Consider the following
> >>>>> cases.
> >>>>> 
> >>>>> 1. DMA engine channel that has an optional IOMMU covering both the
> >>>>> src and dst side. In that case mapping can be performed by the
> >>>>> client or DMA engine driver, the DMA mapping API will handle the
> >>>>> IOMMU behind the scene.
> >>>>> 
> >>>>> 2. DMA engine channel that has an optional IOMMU on the memory side
> >>>>> and no support for IOMMU on the slave (in the sense of the register
> >>>>> in front of the client's FIFO) side. In that case a client mapping
> >>>>> buffers on both the src and dst side would set an IOMMU mapped
> >>>>> address for the slave side, which wouldn't work. If the DMA engine
> >>>>> driver were to perform the mapping then it could skip it on the
> >>>>> slave side, knowing that the slave side has no IOMMU.
> >>>>> 
> >>>>> 3. DMA engine channel that has independently optional IOMMUs on both
> >>>>> sides. This can't be supported today as we have a single struct
> >>>>> device per channel and thus can't configure the IOMMU independently
> >>>>> on the two sides.
> >>>>> 
> >>>>> It's getting messy :-)
> >>>> 
> >>>> Yes I do agree on that, but the problem is today none of the slave
> >>>> drivers expect or do the mapping, changing that will cause issues...
> >>>> 
> >>>> And how many do really have an IOMMU behind them, few out of large set
> >>>> we have...
> >>> 
> >>> Today neither the DMA engine drivers nor the client drivers do the
> >>> mapping, so we have any issue anyway. The question is on which side to
> >>> solve it. If I understand correctly you fear that mapping the address
> >>> in the DMA engine drivers would cause issues with client drivers that
> >>> don't expect that behaviour, but I don't really see where the issue is.
> >>> Could you please elaborate ?
> >> 
> >> Ping. I don't think we're very far from finding an agreement on this
> >> topic. If you prefer we could discuss it on IRC, it can be faster than
> >> e-mail.
> > 
> > Sorry about the delay,
> 
> No worries.
> 
> > Okay I did look back and checked. I tend to agree with you on this and
> > client are not really taking care of mapping so easy approach would be to
> > get this fixed in dmanegine which helps in IOMMU case (which I still think
> > is in infancy, but who know how designers will throw their pipe dreams at
> > us)
> > 
> > Now, am checking this with Dan on why we started with client based mapping
> > assumption in case of slave, we don't want to miss anything here, so I
> > will get back in couple of days..
> 
> Thank you.
> 
> Niklas, that's the direction you've already explored with the rcar-dmac
> driver, so it shouldn't cause any issue with your "[PATCH v3 0/8] dmaengine:
> rcar-dmac: add iommu support for slave transfers" patch series. After
> receiving confirmation from Dan and Vinod, could you add an additional
> patch to use phys_addr_t in struct dma_slave_config ?

Scratch that, I see that the patch already exists :-)
Vinod Koul Feb. 15, 2016, 5:29 p.m. UTC | #19
On Thu, Feb 11, 2016 at 01:51:31AM +0200, Laurent Pinchart wrote:
> Hi Vinod,
> 
> > Okay I did look back and checked. I tend to agree with you on this and
> > client are not really taking care of mapping so easy approach would be to
> > get this fixed in dmanegine which helps in IOMMU case (which I still think
> > is in infancy, but who know how designers will throw their pipe dreams at
> > us)
> > 
> > Now, am checking this with Dan on why we started with client based mapping
> > assumption in case of slave, we don't want to miss anything here, so I will
> > get back in couple of days..

So discussed a bit with Dan, and now have ressurected the patch from Linus
and compiled with it, nothing seems to break. But I think for slave cases we
should also change the prep_  calls to take phys_addr_t which will fix the
case for IOMMU being on either side as you pointed out.

Should we keep interleaved template as dma or phys, am for latter. Only non
slave cases will be dma types and mapping done by clients.
diff mbox

Patch

diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index 7820d07..da94809 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -13,6 +13,7 @@ 
 #include <linux/dma-mapping.h>
 #include <linux/dmaengine.h>
 #include <linux/interrupt.h>
+#include <linux/iommu.h>
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
@@ -1101,6 +1102,24 @@  rcar_dmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
 	return desc;
 }
 
+static dma_addr_t __rcar_dmac_dma_map(struct dma_chan *chan, phys_addr_t addr,
+		size_t size, enum dma_data_direction dir)
+{
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+	struct page *page = phys_to_page(addr);
+	size_t offset = addr - page_to_phys(page);
+	dma_addr_t map = dma_map_page(chan->device->dev, page, offset, size,
+			dir);
+
+	if (dma_mapping_error(chan->device->dev, map)) {
+		dev_err(chan->device->dev, "chan%u: failed to map %zx@%pap",
+				rchan->index, size, &addr);
+		return 0;
+	}
+
+	return map;
+}
+
 static int rcar_dmac_device_config(struct dma_chan *chan,
 				   struct dma_slave_config *cfg)
 {
@@ -1110,10 +1129,47 @@  static int rcar_dmac_device_config(struct dma_chan *chan,
 	 * We could lock this, but you shouldn't be configuring the
 	 * channel, while using it...
 	 */
-	rchan->src_slave_addr = cfg->src_addr;
-	rchan->dst_slave_addr = cfg->dst_addr;
-	rchan->src_xfer_size = cfg->src_addr_width;
-	rchan->dst_xfer_size = cfg->dst_addr_width;
+
+	/* If we don't have a iommu domain no idea to trying to use it */
+	if (!iommu_get_domain_for_dev(chan->device->dev)) {
+		rchan->src_slave_addr = cfg->src_addr;
+		rchan->dst_slave_addr = cfg->dst_addr;
+		rchan->src_xfer_size = cfg->src_addr_width;
+		rchan->dst_xfer_size = cfg->dst_addr_width;
+		return 0;
+	}
+
+	/* unmap old */
+	if (rchan->src_slave_addr) {
+		dma_unmap_page(chan->device->dev, rchan->src_slave_addr,
+				rchan->src_xfer_size, DMA_FROM_DEVICE);
+		rchan->src_slave_addr = 0;
+		rchan->src_xfer_size = 0;
+	}
+
+	if (rchan->dst_slave_addr) {
+		dma_unmap_page(chan->device->dev, rchan->dst_slave_addr,
+				rchan->dst_xfer_size, DMA_TO_DEVICE);
+		rchan->dst_slave_addr = 0;
+		rchan->dst_xfer_size = 0;
+	}
+
+	/* map new */
+	if (cfg->src_addr) {
+		rchan->src_slave_addr = __rcar_dmac_dma_map(chan, cfg->src_addr,
+				cfg->src_addr_width, DMA_FROM_DEVICE);
+		if (!rchan->src_slave_addr)
+			return -EIO;
+		rchan->src_xfer_size = cfg->src_addr_width;
+	}
+
+	if (cfg->dst_addr) {
+		rchan->dst_slave_addr = __rcar_dmac_dma_map(chan, cfg->dst_addr,
+				cfg->dst_addr_width, DMA_TO_DEVICE);
+		if (!rchan->dst_slave_addr)
+			return -EIO;
+		rchan->dst_xfer_size = cfg->dst_addr_width;
+	}
 
 	return 0;
 }