Message ID | 1485861178-22694-1-git-send-email-geert+renesas@glider.be (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Geert, On 31/01/17 11:12, Geert Uytterhoeven wrote: > Add support for allocating physically contiguous DMA buffers on arm64 > systems with an IOMMU. This can be useful when two or more devices > with different memory requirements are involved in buffer sharing. > > Note that as this uses the CMA allocator, setting the > DMA_ATTR_FORCE_CONTIGUOUS attribute has a runtime-dependency on > CONFIG_DMA_CMA, just like on arm32. > > For arm64 systems using swiotlb, no changes are needed to support the > allocation of physically contiguous DMA buffers: > - swiotlb always uses physically contiguous buffers (up to > IO_TLB_SEGSIZE = 128 pages), > - arm64's __dma_alloc_coherent() already calls > dma_alloc_from_contiguous() when CMA is available. > > Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be> > Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com> > --- > v3: > - Add Acked-by, > - Update comment to "one of _4_ things", > - Call dma_alloc_from_contiguous() and iommu_dma_map_page() directly, > as suggested by Robin Murphy, > > v2: > - New, handle dispatching in the arch (arm64) code, as requested by > Robin Murphy. > --- > arch/arm64/mm/dma-mapping.c | 63 ++++++++++++++++++++++++++++++++++----------- > 1 file changed, 48 insertions(+), 15 deletions(-) > > diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c > index 1d7d5d2881db7c19..b314256fcee028ce 100644 > --- a/arch/arm64/mm/dma-mapping.c > +++ b/arch/arm64/mm/dma-mapping.c > @@ -577,20 +577,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, > */ > gfp |= __GFP_ZERO; > > - if (gfpflags_allow_blocking(gfp)) { > - struct page **pages; > - pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); > - > - pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot, > - handle, flush_page); > - if (!pages) > - return NULL; > - > - addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, > - __builtin_return_address(0)); > - if (!addr) > - iommu_dma_free(dev, pages, iosize, handle); > - } else { > + if (!gfpflags_allow_blocking(gfp)) { > struct page *page; > /* > * In atomic context we can't remap anything, so we'll only > @@ -614,6 +601,45 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, > __free_from_pool(addr, size); > addr = NULL; > } > + } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { > + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); > + struct page *page; > + > + page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, > + get_order(size)); > + if (!page) > + return NULL; > + > + *handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot); > + if (iommu_dma_mapping_error(dev, *handle)) { > + dma_release_from_contiguous(dev, page, > + size >> PAGE_SHIFT); > + return NULL; > + } > + if (!coherent) I think we might be able to return early if coherent here, as the non-IOMMU __dma_alloc() does. We still need a (cacheable) remap in the normal coherent case to make whatever pages iommu_dma_alloc() scrapes together appear contiguous, but that obviously isn't a concern here. However, I'm not entirely confident that that wouldn't break (or at least further complicate) the already-horrible "what the hell is this?" logic in __iommu_free_attrs() below, so I'm inclined to leave it as-is. > + __dma_flush_area(page_to_virt(page), iosize); > + > + addr = dma_common_contiguous_remap(page, size, VM_USERMAP, > + prot, > + __builtin_return_address(0)); > + if (!addr) { > + iommu_dma_unmap_page(dev, *handle, iosize, 0, attrs); > + dma_release_from_contiguous(dev, page, > + size >> PAGE_SHIFT); > + } > + } else { > + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); > + struct page **pages; > + > + pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot, > + handle, flush_page); > + if (!pages) > + return NULL; > + > + addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, > + __builtin_return_address(0)); > + if (!addr) > + iommu_dma_free(dev, pages, iosize, handle); > } > return addr; > } > @@ -625,7 +651,8 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, > > size = PAGE_ALIGN(size); > /* > - * @cpu_addr will be one of 3 things depending on how it was allocated: > + * @cpu_addr will be one of 4 things depending on how it was allocated: > + * - A remapped array of pages for contiguous allocations. > * - A remapped array of pages from iommu_dma_alloc(), for all > * non-atomic allocations. > * - A non-cacheable alias from the atomic pool, for atomic > @@ -637,6 +664,12 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, > if (__in_atomic_pool(cpu_addr, size)) { > iommu_dma_unmap_page(dev, handle, iosize, 0, 0); > __free_from_pool(cpu_addr, size); > + } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { > + struct page *page = phys_to_page(dma_to_phys(dev, handle)); handle is the IOVA returned by the original iommu_dma_map_page() call, so I can't see this working very well. I was about to naively suggest virt_to_page(cpu_addr), except of course cpu_addr isn't going to be a linear map address :( I think that works out to another argument in favour of not trying to skip the remap when coherent, since that would mean we can consistently rely on vmalloc_to_page(cpu_addr) here, and everything *should* be OK (crosses fingers...) Robin. > + > + iommu_dma_unmap_page(dev, handle, iosize, 0, attrs); > + dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); > + dma_common_free_remap(cpu_addr, size, VM_USERMAP); > } else if (is_vmalloc_addr(cpu_addr)){ > struct vm_struct *area = find_vm_area(cpu_addr); > >
Hi Robin, On Thu, Feb 2, 2017 at 2:15 PM, Robin Murphy <robin.murphy@arm.com> wrote: > On 31/01/17 11:12, Geert Uytterhoeven wrote: >> Add support for allocating physically contiguous DMA buffers on arm64 >> systems with an IOMMU. This can be useful when two or more devices >> with different memory requirements are involved in buffer sharing. >> >> Note that as this uses the CMA allocator, setting the >> DMA_ATTR_FORCE_CONTIGUOUS attribute has a runtime-dependency on >> CONFIG_DMA_CMA, just like on arm32. >> @@ -625,7 +651,8 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, >> >> size = PAGE_ALIGN(size); >> /* >> - * @cpu_addr will be one of 3 things depending on how it was allocated: >> + * @cpu_addr will be one of 4 things depending on how it was allocated: >> + * - A remapped array of pages for contiguous allocations. >> * - A remapped array of pages from iommu_dma_alloc(), for all >> * non-atomic allocations. >> * - A non-cacheable alias from the atomic pool, for atomic >> @@ -637,6 +664,12 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, >> if (__in_atomic_pool(cpu_addr, size)) { >> iommu_dma_unmap_page(dev, handle, iosize, 0, 0); >> __free_from_pool(cpu_addr, size); >> + } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { >> + struct page *page = phys_to_page(dma_to_phys(dev, handle)); > > handle is the IOVA returned by the original iommu_dma_map_page() call, > so I can't see this working very well. I was about to naively suggest > virt_to_page(cpu_addr), except of course cpu_addr isn't going to be a > linear map address :( You're right: upon closer look, this will free the wrong pages in the CMA allocator. Funny, that the system didn't scream... Thanks for noticing, I'll look into fixing that... Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 1d7d5d2881db7c19..b314256fcee028ce 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -577,20 +577,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, */ gfp |= __GFP_ZERO; - if (gfpflags_allow_blocking(gfp)) { - struct page **pages; - pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); - - pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot, - handle, flush_page); - if (!pages) - return NULL; - - addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, - __builtin_return_address(0)); - if (!addr) - iommu_dma_free(dev, pages, iosize, handle); - } else { + if (!gfpflags_allow_blocking(gfp)) { struct page *page; /* * In atomic context we can't remap anything, so we'll only @@ -614,6 +601,45 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, __free_from_pool(addr, size); addr = NULL; } + } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); + struct page *page; + + page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, + get_order(size)); + if (!page) + return NULL; + + *handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot); + if (iommu_dma_mapping_error(dev, *handle)) { + dma_release_from_contiguous(dev, page, + size >> PAGE_SHIFT); + return NULL; + } + if (!coherent) + __dma_flush_area(page_to_virt(page), iosize); + + addr = dma_common_contiguous_remap(page, size, VM_USERMAP, + prot, + __builtin_return_address(0)); + if (!addr) { + iommu_dma_unmap_page(dev, *handle, iosize, 0, attrs); + dma_release_from_contiguous(dev, page, + size >> PAGE_SHIFT); + } + } else { + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); + struct page **pages; + + pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot, + handle, flush_page); + if (!pages) + return NULL; + + addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, + __builtin_return_address(0)); + if (!addr) + iommu_dma_free(dev, pages, iosize, handle); } return addr; } @@ -625,7 +651,8 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, size = PAGE_ALIGN(size); /* - * @cpu_addr will be one of 3 things depending on how it was allocated: + * @cpu_addr will be one of 4 things depending on how it was allocated: + * - A remapped array of pages for contiguous allocations. * - A remapped array of pages from iommu_dma_alloc(), for all * non-atomic allocations. * - A non-cacheable alias from the atomic pool, for atomic @@ -637,6 +664,12 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, if (__in_atomic_pool(cpu_addr, size)) { iommu_dma_unmap_page(dev, handle, iosize, 0, 0); __free_from_pool(cpu_addr, size); + } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { + struct page *page = phys_to_page(dma_to_phys(dev, handle)); + + iommu_dma_unmap_page(dev, handle, iosize, 0, attrs); + dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); + dma_common_free_remap(cpu_addr, size, VM_USERMAP); } else if (is_vmalloc_addr(cpu_addr)){ struct vm_struct *area = find_vm_area(cpu_addr);