diff mbox series

[03/19] dma-iommu: don't use a scatterlist in iommu_dma_alloc

Message ID 20190114094159.27326-4-hch@lst.de (mailing list archive)
State New, archived
Headers show
Series [01/19] dma-mapping: add a Kconfig symbol to indicated arch_dma_prep_coherent presence | expand

Commit Message

Christoph Hellwig Jan. 14, 2019, 9:41 a.m. UTC
Directly iterating over the pages makes the code a bit simpler and
prepares for the following changes.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/iommu/dma-iommu.c | 40 +++++++++++++++++----------------------
 1 file changed, 17 insertions(+), 23 deletions(-)

Comments

Robin Murphy Feb. 1, 2019, 3:24 p.m. UTC | #1
On 14/01/2019 09:41, Christoph Hellwig wrote:
> Directly iterating over the pages makes the code a bit simpler and
> prepares for the following changes.

It also defeats the whole purpose of __iommu_dma_alloc_pages(), so I'm 
not really buying the simplification angle - you've *seen* that code, 
right? ;)

If you want simple, get rid of the pages array entirely. However, as 
I've touched on previously, it's all there for a reason, because making 
the individual iommu_map() calls as large as possible gives significant 
performance/power benefits in many cases which I'm not too keen to 
regress. In fact I still have the spark of an idea to sort the filled 
pages array for optimal physical layout, I've just never had the free 
time to play with it. FWIW, since iommu_map_sg() was new and promising 
at the time, using sg_alloc_table_from_pages() actually *was* the 
simplification over copying arch/arm's __iommu_create_mapping() logic.

Robin.

> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   drivers/iommu/dma-iommu.c | 40 +++++++++++++++++----------------------
>   1 file changed, 17 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index d19f3d6b43c1..4f5546a103d8 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -30,6 +30,7 @@
>   #include <linux/mm.h>
>   #include <linux/pci.h>
>   #include <linux/scatterlist.h>
> +#include <linux/highmem.h>
>   #include <linux/vmalloc.h>
>   
>   struct iommu_dma_msi_page {
> @@ -549,9 +550,9 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
>   	struct iommu_dma_cookie *cookie = domain->iova_cookie;
>   	struct iova_domain *iovad = &cookie->iovad;
>   	struct page **pages;
> -	struct sg_table sgt;
>   	dma_addr_t iova;
> -	unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
> +	unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap, i;
> +	size_t mapped = 0;
>   
>   	*handle = DMA_MAPPING_ERROR;
>   
> @@ -576,32 +577,25 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
>   	if (!iova)
>   		goto out_free_pages;
>   
> -	if (sg_alloc_table_from_pages(&sgt, pages, count, 0, size, GFP_KERNEL))
> -		goto out_free_iova;
> +	for (i = 0; i < count; i++) {
> +		phys_addr_t phys = page_to_phys(pages[i]);
>   
> -	if (!(prot & IOMMU_CACHE)) {
> -		struct sg_mapping_iter miter;
> -		/*
> -		 * The CPU-centric flushing implied by SG_MITER_TO_SG isn't
> -		 * sufficient here, so skip it by using the "wrong" direction.
> -		 */
> -		sg_miter_start(&miter, sgt.sgl, sgt.orig_nents, SG_MITER_FROM_SG);
> -		while (sg_miter_next(&miter))
> -			flush_page(dev, miter.addr, page_to_phys(miter.page));
> -		sg_miter_stop(&miter);
> -	}
> +		if (!(prot & IOMMU_CACHE)) {
> +			void *vaddr = kmap_atomic(pages[i]);
>   
> -	if (iommu_map_sg(domain, iova, sgt.sgl, sgt.orig_nents, prot)
> -			< size)
> -		goto out_free_sg;
> +			flush_page(dev, vaddr, phys);
> +			kunmap_atomic(vaddr);
> +		}
> +
> +		if (iommu_map(domain, iova + mapped, phys, PAGE_SIZE, prot))
> +			goto out_unmap;
> +		mapped += PAGE_SIZE;
> +	}
>   
>   	*handle = iova;
> -	sg_free_table(&sgt);
>   	return pages;
> -
> -out_free_sg:
> -	sg_free_table(&sgt);
> -out_free_iova:
> +out_unmap:
> +	iommu_unmap(domain, iova, mapped);
>   	iommu_dma_free_iova(cookie, iova, size);
>   out_free_pages:
>   	__iommu_dma_free_pages(pages, count);
>
Christoph Hellwig Feb. 1, 2019, 4:16 p.m. UTC | #2
On Fri, Feb 01, 2019 at 03:24:45PM +0000, Robin Murphy wrote:
> On 14/01/2019 09:41, Christoph Hellwig wrote:
>> Directly iterating over the pages makes the code a bit simpler and
>> prepares for the following changes.
>
> It also defeats the whole purpose of __iommu_dma_alloc_pages(), so I'm not 
> really buying the simplification angle - you've *seen* that code, right? ;)

How does it defeat the purpose of __iommu_dma_alloc_pages?
Robin Murphy Feb. 6, 2019, 3:28 p.m. UTC | #3
On 01/02/2019 16:16, Christoph Hellwig wrote:
> On Fri, Feb 01, 2019 at 03:24:45PM +0000, Robin Murphy wrote:
>> On 14/01/2019 09:41, Christoph Hellwig wrote:
>>> Directly iterating over the pages makes the code a bit simpler and
>>> prepares for the following changes.
>>
>> It also defeats the whole purpose of __iommu_dma_alloc_pages(), so I'm not
>> really buying the simplification angle - you've *seen* that code, right? ;)
> 
> How does it defeat the purpose of __iommu_dma_alloc_pages?

Because if iommu_map() only gets called at PAGE_SIZE granularity, then 
the IOMMU PTEs will be created at PAGE_SIZE (or smaller) granularity, so 
any effort to get higher-order allocations matching larger IOMMU block 
sizes is wasted, and we may as well have just done this:

	for (i = 0; i < count; i++) {
		struct page *page = alloc_page(gfp);
		...
		iommu_map(..., page_to_phys(page), PAGE_SIZE, ...);
	}

Really, it's a shame we have to split huge pages for the CPU remap, 
since in the common case the CPU MMU will have a matching block size, 
but IIRC there was something in vmap() or thereabouts that explicitly 
chokes on them.

Robin.
Christoph Hellwig Feb. 11, 2019, 4 p.m. UTC | #4
On Wed, Feb 06, 2019 at 03:28:28PM +0000, Robin Murphy wrote:
> Because if iommu_map() only gets called at PAGE_SIZE granularity, then the 
> IOMMU PTEs will be created at PAGE_SIZE (or smaller) granularity, so any 
> effort to get higher-order allocations matching larger IOMMU block sizes is 
> wasted, and we may as well have just done this:
>
> 	for (i = 0; i < count; i++) {
> 		struct page *page = alloc_page(gfp);
> 		...
> 		iommu_map(..., page_to_phys(page), PAGE_SIZE, ...);
> 	}

True.  I've dropped this patch.

> Really, it's a shame we have to split huge pages for the CPU remap, since 
> in the common case the CPU MMU will have a matching block size, but IIRC 
> there was something in vmap() or thereabouts that explicitly chokes on 
> them.

That just needs a volunteer to fix the implementation, as there is no
fundamental reason not to remap large pages.
diff mbox series

Patch

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index d19f3d6b43c1..4f5546a103d8 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -30,6 +30,7 @@ 
 #include <linux/mm.h>
 #include <linux/pci.h>
 #include <linux/scatterlist.h>
+#include <linux/highmem.h>
 #include <linux/vmalloc.h>
 
 struct iommu_dma_msi_page {
@@ -549,9 +550,9 @@  struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
 	struct iova_domain *iovad = &cookie->iovad;
 	struct page **pages;
-	struct sg_table sgt;
 	dma_addr_t iova;
-	unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
+	unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap, i;
+	size_t mapped = 0;
 
 	*handle = DMA_MAPPING_ERROR;
 
@@ -576,32 +577,25 @@  struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
 	if (!iova)
 		goto out_free_pages;
 
-	if (sg_alloc_table_from_pages(&sgt, pages, count, 0, size, GFP_KERNEL))
-		goto out_free_iova;
+	for (i = 0; i < count; i++) {
+		phys_addr_t phys = page_to_phys(pages[i]);
 
-	if (!(prot & IOMMU_CACHE)) {
-		struct sg_mapping_iter miter;
-		/*
-		 * The CPU-centric flushing implied by SG_MITER_TO_SG isn't
-		 * sufficient here, so skip it by using the "wrong" direction.
-		 */
-		sg_miter_start(&miter, sgt.sgl, sgt.orig_nents, SG_MITER_FROM_SG);
-		while (sg_miter_next(&miter))
-			flush_page(dev, miter.addr, page_to_phys(miter.page));
-		sg_miter_stop(&miter);
-	}
+		if (!(prot & IOMMU_CACHE)) {
+			void *vaddr = kmap_atomic(pages[i]);
 
-	if (iommu_map_sg(domain, iova, sgt.sgl, sgt.orig_nents, prot)
-			< size)
-		goto out_free_sg;
+			flush_page(dev, vaddr, phys);
+			kunmap_atomic(vaddr);
+		}
+
+		if (iommu_map(domain, iova + mapped, phys, PAGE_SIZE, prot))
+			goto out_unmap;
+		mapped += PAGE_SIZE;
+	}
 
 	*handle = iova;
-	sg_free_table(&sgt);
 	return pages;
-
-out_free_sg:
-	sg_free_table(&sgt);
-out_free_iova:
+out_unmap:
+	iommu_unmap(domain, iova, mapped);
 	iommu_dma_free_iova(cookie, iova, size);
 out_free_pages:
 	__iommu_dma_free_pages(pages, count);