diff mbox

[v2,1/5] dma-mapping: add dma_{map,unmap}_page_attrs

Message ID 56B20124.2030809@arm.com (mailing list archive)
State Superseded
Delegated to: Geert Uytterhoeven
Headers show

Commit Message

Robin Murphy Feb. 3, 2016, 1:31 p.m. UTC
On 21/01/16 14:01, Niklas Söderlund wrote:
> Add a version of dmap_{map,unmap}_page that can pass on attributes to
> the underlaying map_page. This already exists for dma_{map,unmap}_single
> and dmap_{map,unmap}_sg versions.

This looks reasonable in isolation, but for the task at hand I'm pretty 
sure it's the wrong thing to do. The problem is that the DMA mapping and 
IOMMU APIs are all geared around dealing with RAM, so what you're going 
to end up with if you use this on an ARM system is the slave device's 
MMIO registers mapped in the IOMMU as Normal memory. The net result of 
that is that the interconnects between the IOMMU's downstream port and 
the slave device are going to have free reign to reorder or merge the 
DMA engine's transactions, and I'm sure you can imagine how utterly 
disastrous that would be for e.g. reading/writing a FIFO. It's even 
worse if the DMA engine is cache-coherent (either naturally, or by 
virtue of the IOMMU), in which case the prospect of reads and writes 
coming out of the IOMMU marked as Normal-Cacheable and allocating into 
system caches is even more terrifyingly unpredictable.

I spent a little time recently looking into how we might handle platform 
MSIs with IOMMU-backed DMA mapping, and the notion of slave DMA being a 
similar problem did cross my mind, so I'm glad it's already being looked 
at :) My initial thought was that a robust general solution probably 
involves extending the DMA API with something like dma_map_resource(), 
which would be a no-op with no IOMMU (or with IOMMUs like VT-d where 
magic hardware solves the problem), interacting with something like the 
below extension of the IOMMU API (plucked from my local MSI 
proof-of-concept hacks).

Robin.

--->8---
From: Robin Murphy <robin.murphy@arm.com>
Date: Wed, 23 Sep 2015 15:49:05 +0100
Subject: [PATCH] iommu: Add MMIO mapping type

On some platforms, MMIO regions might need slightly different treatment
compared to mapping regular memory; add the notion of MMIO mappings to
the IOMMU API's memory type flags, so that callers can let the IOMMU
drivers know to do the right thing.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
  drivers/iommu/io-pgtable-arm.c | 4 +++-
  include/linux/iommu.h          | 1 +
  2 files changed, 4 insertions(+), 1 deletion(-)

--->8---

> Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
> ---
>   include/asm-generic/dma-mapping-common.h | 20 +++++++++++++-------
>   1 file changed, 13 insertions(+), 7 deletions(-)
>
> diff --git a/include/asm-generic/dma-mapping-common.h b/include/asm-generic/dma-mapping-common.h
> index b1bc954..bb08302 100644
> --- a/include/asm-generic/dma-mapping-common.h
> +++ b/include/asm-generic/dma-mapping-common.h
> @@ -74,29 +74,33 @@ static inline void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg
>   		ops->unmap_sg(dev, sg, nents, dir, attrs);
>   }
>
> -static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
> -				      size_t offset, size_t size,
> -				      enum dma_data_direction dir)
> +static inline dma_addr_t dma_map_page_attrs(struct device *dev,
> +					    struct page *page,
> +					    size_t offset, size_t size,
> +					    enum dma_data_direction dir,
> +					    struct dma_attrs *attrs)
>   {
>   	struct dma_map_ops *ops = get_dma_ops(dev);
>   	dma_addr_t addr;
>
>   	kmemcheck_mark_initialized(page_address(page) + offset, size);
>   	BUG_ON(!valid_dma_direction(dir));
> -	addr = ops->map_page(dev, page, offset, size, dir, NULL);
> +	addr = ops->map_page(dev, page, offset, size, dir, attrs);
>   	debug_dma_map_page(dev, page, offset, size, dir, addr, false);
>
>   	return addr;
>   }
>
> -static inline void dma_unmap_page(struct device *dev, dma_addr_t addr,
> -				  size_t size, enum dma_data_direction dir)
> +static inline void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr,
> +					size_t size,
> +					enum dma_data_direction dir,
> +					struct dma_attrs *attrs)
>   {
>   	struct dma_map_ops *ops = get_dma_ops(dev);
>
>   	BUG_ON(!valid_dma_direction(dir));
>   	if (ops->unmap_page)
> -		ops->unmap_page(dev, addr, size, dir, NULL);
> +		ops->unmap_page(dev, addr, size, dir, attrs);
>   	debug_dma_unmap_page(dev, addr, size, dir, false);
>   }
>
> @@ -181,6 +185,8 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
>   #define dma_unmap_single(d, a, s, r) dma_unmap_single_attrs(d, a, s, r, NULL)
>   #define dma_map_sg(d, s, n, r) dma_map_sg_attrs(d, s, n, r, NULL)
>   #define dma_unmap_sg(d, s, n, r) dma_unmap_sg_attrs(d, s, n, r, NULL)
> +#define dma_map_page(d, p, o, s, r) dma_map_page_attrs(d, p, o, s, r, NULL)
> +#define dma_unmap_page(d, a, s, r) dma_unmap_page_attrs(d, a, s, r, NULL)
>
>   extern int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
>   			   void *cpu_addr, dma_addr_t dma_addr, size_t size);
>

Comments

Laurent Pinchart Feb. 3, 2016, 5:39 p.m. UTC | #1
Hi Robin,

On Wednesday 03 February 2016 13:31:16 Robin Murphy wrote:
> On 21/01/16 14:01, Niklas Söderlund wrote:
> > Add a version of dmap_{map,unmap}_page that can pass on attributes to
> > the underlaying map_page. This already exists for dma_{map,unmap}_single
> > and dmap_{map,unmap}_sg versions.
> 
> This looks reasonable in isolation, but for the task at hand I'm pretty
> sure it's the wrong thing to do. The problem is that the DMA mapping and
> IOMMU APIs are all geared around dealing with RAM, so what you're going
> to end up with if you use this on an ARM system is the slave device's
> MMIO registers mapped in the IOMMU as Normal memory. The net result of
> that is that the interconnects between the IOMMU's downstream port and
> the slave device are going to have free reign to reorder or merge the
> DMA engine's transactions, and I'm sure you can imagine how utterly
> disastrous that would be for e.g. reading/writing a FIFO. It's even
> worse if the DMA engine is cache-coherent (either naturally, or by
> virtue of the IOMMU), in which case the prospect of reads and writes
> coming out of the IOMMU marked as Normal-Cacheable and allocating into
> system caches is even more terrifyingly unpredictable.
> 
> I spent a little time recently looking into how we might handle platform
> MSIs with IOMMU-backed DMA mapping, and the notion of slave DMA being a
> similar problem did cross my mind, so I'm glad it's already being looked
> at :) My initial thought was that a robust general solution probably
> involves extending the DMA API with something like dma_map_resource(),
> which would be a no-op with no IOMMU (or with IOMMUs like VT-d where
> magic hardware solves the problem),

I had mentioned a dma_map_phys() in a previous reply to one of the patches in 
this thread, so I think we came up to the same conclusion. I'll let you decide 
whether we're both right or wrong, and hope you'll pick the first option :-) 
dma_map_resource() could be a more specific API than dma_map_phys() (depending 
on the parameters it takes), and thus less prone to abuse, or at least more 
explicit.

> interacting with something like the below extension of the IOMMU API
> (plucked from my local MSI proof-of-concept hacks).
> 
> Robin.
> 
> --->8---
> From: Robin Murphy <robin.murphy@arm.com>
> Date: Wed, 23 Sep 2015 15:49:05 +0100
> Subject: [PATCH] iommu: Add MMIO mapping type
> 
> On some platforms, MMIO regions might need slightly different treatment
> compared to mapping regular memory; add the notion of MMIO mappings to
> the IOMMU API's memory type flags, so that callers can let the IOMMU
> drivers know to do the right thing.
> 
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>

This looks good to me.

Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>

> ---
>   drivers/iommu/io-pgtable-arm.c | 4 +++-
>   include/linux/iommu.h          | 1 +
>   2 files changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
> index 8bbcbfe..5b5c299 100644
> --- a/drivers/iommu/io-pgtable-arm.c
> +++ b/drivers/iommu/io-pgtable-arm.c
> @@ -363,7 +363,9 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct
> arm_lpae_io_pgtable *data,
>   			pte |= ARM_LPAE_PTE_HAP_READ;
>   		if (prot & IOMMU_WRITE)
>   			pte |= ARM_LPAE_PTE_HAP_WRITE;
> -		if (prot & IOMMU_CACHE)
> +		if (prot & IOMMU_MMIO)
> +			pte |= ARM_LPAE_PTE_MEMATTR_DEV;
> +		else if (prot & IOMMU_CACHE)
>   			pte |= ARM_LPAE_PTE_MEMATTR_OIWB;
>   		else
>   			pte |= ARM_LPAE_PTE_MEMATTR_NC;
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index f28dff3..addd25d 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -30,6 +30,7 @@
>   #define IOMMU_WRITE	(1 << 1)
>   #define IOMMU_CACHE	(1 << 2) /* DMA cache coherency */
>   #define IOMMU_NOEXEC	(1 << 3)
> +#define IOMMU_MMIO	(1 << 4) /* e.g. things like MSI doorbells */
> 
>   struct iommu_ops;
>   struct iommu_group;
> --->8---
> 
> > Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
> > ---
> > 
> >   include/asm-generic/dma-mapping-common.h | 20 +++++++++++++-------
> >   1 file changed, 13 insertions(+), 7 deletions(-)
> > 
> > diff --git a/include/asm-generic/dma-mapping-common.h
> > b/include/asm-generic/dma-mapping-common.h index b1bc954..bb08302 100644
> > --- a/include/asm-generic/dma-mapping-common.h
> > +++ b/include/asm-generic/dma-mapping-common.h
> > @@ -74,29 +74,33 @@ static inline void dma_unmap_sg_attrs(struct device
> > *dev, struct scatterlist *sg> 
> >   		ops->unmap_sg(dev, sg, nents, dir, attrs);
> >   
> >   }
> > 
> > -static inline dma_addr_t dma_map_page(struct device *dev, struct page
> > *page, -				      size_t offset, size_t size,
> > -				      enum dma_data_direction dir)
> > +static inline dma_addr_t dma_map_page_attrs(struct device *dev,
> > +					    struct page *page,
> > +					    size_t offset, size_t size,
> > +					    enum dma_data_direction dir,
> > +					    struct dma_attrs *attrs)
> > 
> >   {
> >   
> >   	struct dma_map_ops *ops = get_dma_ops(dev);
> >   	dma_addr_t addr;
> >   	
> >   	kmemcheck_mark_initialized(page_address(page) + offset, size);
> >   	BUG_ON(!valid_dma_direction(dir));
> > 
> > -	addr = ops->map_page(dev, page, offset, size, dir, NULL);
> > +	addr = ops->map_page(dev, page, offset, size, dir, attrs);
> > 
> >   	debug_dma_map_page(dev, page, offset, size, dir, addr, false);
> >   	
> >   	return addr;
> >   
> >   }
> > 
> > -static inline void dma_unmap_page(struct device *dev, dma_addr_t addr,
> > -				  size_t size, enum dma_data_direction dir)
> > +static inline void dma_unmap_page_attrs(struct device *dev, dma_addr_t
> > addr, +					size_t size,
> > +					enum dma_data_direction dir,
> > +					struct dma_attrs *attrs)
> > 
> >   {
> >   
> >   	struct dma_map_ops *ops = get_dma_ops(dev);
> >   	
> >   	BUG_ON(!valid_dma_direction(dir));
> >   	if (ops->unmap_page)
> > 
> > -		ops->unmap_page(dev, addr, size, dir, NULL);
> > +		ops->unmap_page(dev, addr, size, dir, attrs);
> > 
> >   	debug_dma_unmap_page(dev, addr, size, dir, false);
> >   
> >   }
> > 
> > @@ -181,6 +185,8 @@ dma_sync_sg_for_device(struct device *dev, struct
> > scatterlist *sg,> 
> >   #define dma_unmap_single(d, a, s, r) dma_unmap_single_attrs(d, a, s, r,
> >   NULL) #define dma_map_sg(d, s, n, r) dma_map_sg_attrs(d, s, n, r, NULL)
> >   #define dma_unmap_sg(d, s, n, r) dma_unmap_sg_attrs(d, s, n, r, NULL)
> > 
> > +#define dma_map_page(d, p, o, s, r) dma_map_page_attrs(d, p, o, s, r,
> > NULL) +#define dma_unmap_page(d, a, s, r) dma_unmap_page_attrs(d, a, s,
> > r, NULL)> 
> >   extern int dma_common_mmap(struct device *dev, struct vm_area_struct
> >   *vma,
> >   
> >   			   void *cpu_addr, dma_addr_t dma_addr, size_t size);
diff mbox

Patch

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 8bbcbfe..5b5c299 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -363,7 +363,9 @@  static arm_lpae_iopte arm_lpae_prot_to_pte(struct 
arm_lpae_io_pgtable *data,
  			pte |= ARM_LPAE_PTE_HAP_READ;
  		if (prot & IOMMU_WRITE)
  			pte |= ARM_LPAE_PTE_HAP_WRITE;
-		if (prot & IOMMU_CACHE)
+		if (prot & IOMMU_MMIO)
+			pte |= ARM_LPAE_PTE_MEMATTR_DEV;
+		else if (prot & IOMMU_CACHE)
  			pte |= ARM_LPAE_PTE_MEMATTR_OIWB;
  		else
  			pte |= ARM_LPAE_PTE_MEMATTR_NC;
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index f28dff3..addd25d 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -30,6 +30,7 @@ 
  #define IOMMU_WRITE	(1 << 1)
  #define IOMMU_CACHE	(1 << 2) /* DMA cache coherency */
  #define IOMMU_NOEXEC	(1 << 3)
+#define IOMMU_MMIO	(1 << 4) /* e.g. things like MSI doorbells */

  struct iommu_ops;
  struct iommu_group;