diff mbox series

[02/10] arm64/iommu: don't remap contiguous allocations for coherent devices

Message ID 20181208173702.15158-3-hch@lst.de (mailing list archive)
State Not Applicable
Headers show
Series [01/10] dma-direct: provide a generic implementation of DMA_ATTR_NON_CONSISTENT | expand

Commit Message

Christoph Hellwig Dec. 8, 2018, 5:36 p.m. UTC
There is no need to have an additional kernel mapping for a contiguous
allocation if the device already is DMA coherent, so skip it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/arm64/mm/dma-mapping.c | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

Comments

Robin Murphy Dec. 10, 2018, 7:19 p.m. UTC | #1
On 08/12/2018 17:36, Christoph Hellwig wrote:
> There is no need to have an additional kernel mapping for a contiguous
> allocation if the device already is DMA coherent, so skip it.

FWIW, the "need" was that it kept the code in this path simple and the 
mapping behaviour consistent with the regular iommu_dma_alloc() case. 
One could quite easily retort that there is no need for the extra 
complexity of this patch, since vmalloc is cheap on a 64-bit architecture ;)

> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   arch/arm64/mm/dma-mapping.c | 35 ++++++++++++++++++++++-------------
>   1 file changed, 22 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
> index 4c0f498069e8..d39b60113539 100644
> --- a/arch/arm64/mm/dma-mapping.c
> +++ b/arch/arm64/mm/dma-mapping.c
> @@ -255,13 +255,18 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
>   						    size >> PAGE_SHIFT);
>   			return NULL;
>   		}
> +
> +		if (coherent) {
> +			memset(addr, 0, size);
> +			return addr;
> +		}
> +
>   		addr = dma_common_contiguous_remap(page, size, VM_USERMAP,
>   						   prot,
>   						   __builtin_return_address(0));
>   		if (addr) {
>   			memset(addr, 0, size);
> -			if (!coherent)
> -				__dma_flush_area(page_to_virt(page), iosize);
> +			__dma_flush_area(page_to_virt(page), iosize);

Oh poo - seems I missed it at the time but the existing logic here is 
wrong. Let me send a separate fix to flip those statements into the 
correct order...

Robin.

>   		} else {
>   			iommu_dma_unmap_page(dev, *handle, iosize, 0, attrs);
>   			dma_release_from_contiguous(dev, page,
> @@ -309,7 +314,9 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
>   
>   		iommu_dma_unmap_page(dev, handle, iosize, 0, attrs);
>   		dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT);
> -		dma_common_free_remap(cpu_addr, size, VM_USERMAP);
> +
> +		if (!dev_is_dma_coherent(dev))
> +			dma_common_free_remap(cpu_addr, size, VM_USERMAP);
>   	} else if (is_vmalloc_addr(cpu_addr)){
>   		struct vm_struct *area = find_vm_area(cpu_addr);
>   
> @@ -336,11 +343,12 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
>   		return ret;
>   
>   	if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
> -		/*
> -		 * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped,
> -		 * hence in the vmalloc space.
> -		 */
> -		unsigned long pfn = vmalloc_to_pfn(cpu_addr);
> +		unsigned long pfn;
> +
> +		if (dev_is_dma_coherent(dev))
> +			pfn = virt_to_pfn(cpu_addr);
> +		else
> +			pfn = vmalloc_to_pfn(cpu_addr);
>   		return __swiotlb_mmap_pfn(vma, pfn, size);
>   	}
>   
> @@ -359,11 +367,12 @@ static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
>   	struct vm_struct *area = find_vm_area(cpu_addr);
>   
>   	if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
> -		/*
> -		 * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped,
> -		 * hence in the vmalloc space.
> -		 */
> -		struct page *page = vmalloc_to_page(cpu_addr);
> +		struct page *page;
> +
> +		if (dev_is_dma_coherent(dev))
> +			page = virt_to_page(cpu_addr);
> +		else
> +			page = vmalloc_to_page(cpu_addr);
>   		return __swiotlb_get_sgtable_page(sgt, page, size);
>   	}
>   
>
Christoph Hellwig Dec. 10, 2018, 7:25 p.m. UTC | #2
On Mon, Dec 10, 2018 at 07:19:30PM +0000, Robin Murphy wrote:
> On 08/12/2018 17:36, Christoph Hellwig wrote:
>> There is no need to have an additional kernel mapping for a contiguous
>> allocation if the device already is DMA coherent, so skip it.
>
> FWIW, the "need" was that it kept the code in this path simple and the 
> mapping behaviour consistent with the regular iommu_dma_alloc() case. One 
> could quite easily retort that there is no need for the extra complexity of 
> this patch, since vmalloc is cheap on a 64-bit architecture ;)

Heh.  Well, without the remap we do less work, we prepare for a simple
implementation of DMA_ATTR_NON_CONSISTENT, and also prepapre the code
to be better reusable for architectures that don't do remapping of
DMA allocations at all.

>>   		if (addr) {
>>   			memset(addr, 0, size);
>> -			if (!coherent)
>> -				__dma_flush_area(page_to_virt(page), iosize);
>> +			__dma_flush_area(page_to_virt(page), iosize);
>
> Oh poo - seems I missed it at the time but the existing logic here is 
> wrong. Let me send a separate fix to flip those statements into the correct 
> order...

Yes, flushing the remapped alias only after zeroing it looks odd.
diff mbox series

Patch

diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 4c0f498069e8..d39b60113539 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -255,13 +255,18 @@  static void *__iommu_alloc_attrs(struct device *dev, size_t size,
 						    size >> PAGE_SHIFT);
 			return NULL;
 		}
+
+		if (coherent) {
+			memset(addr, 0, size);
+			return addr;
+		}
+
 		addr = dma_common_contiguous_remap(page, size, VM_USERMAP,
 						   prot,
 						   __builtin_return_address(0));
 		if (addr) {
 			memset(addr, 0, size);
-			if (!coherent)
-				__dma_flush_area(page_to_virt(page), iosize);
+			__dma_flush_area(page_to_virt(page), iosize);
 		} else {
 			iommu_dma_unmap_page(dev, *handle, iosize, 0, attrs);
 			dma_release_from_contiguous(dev, page,
@@ -309,7 +314,9 @@  static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
 
 		iommu_dma_unmap_page(dev, handle, iosize, 0, attrs);
 		dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT);
-		dma_common_free_remap(cpu_addr, size, VM_USERMAP);
+
+		if (!dev_is_dma_coherent(dev))
+			dma_common_free_remap(cpu_addr, size, VM_USERMAP);
 	} else if (is_vmalloc_addr(cpu_addr)){
 		struct vm_struct *area = find_vm_area(cpu_addr);
 
@@ -336,11 +343,12 @@  static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
 		return ret;
 
 	if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
-		/*
-		 * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped,
-		 * hence in the vmalloc space.
-		 */
-		unsigned long pfn = vmalloc_to_pfn(cpu_addr);
+		unsigned long pfn;
+
+		if (dev_is_dma_coherent(dev))
+			pfn = virt_to_pfn(cpu_addr);
+		else
+			pfn = vmalloc_to_pfn(cpu_addr);
 		return __swiotlb_mmap_pfn(vma, pfn, size);
 	}
 
@@ -359,11 +367,12 @@  static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
 	struct vm_struct *area = find_vm_area(cpu_addr);
 
 	if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
-		/*
-		 * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped,
-		 * hence in the vmalloc space.
-		 */
-		struct page *page = vmalloc_to_page(cpu_addr);
+		struct page *page;
+
+		if (dev_is_dma_coherent(dev))
+			page = virt_to_page(cpu_addr);
+		else
+			page = vmalloc_to_page(cpu_addr);
 		return __swiotlb_get_sgtable_page(sgt, page, size);
 	}