Message ID | 1496695157-19926-4-git-send-email-yong.zhi@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Yong, +Robin, Joerg, IOMMU ML Please see my comments inline. On Tue, Jun 6, 2017 at 5:39 AM, Yong Zhi <yong.zhi@intel.com> wrote: > IPU3 mmu based DMA mapping driver > > Signed-off-by: Yong Zhi <yong.zhi@intel.com> > --- > drivers/media/pci/intel/ipu3/Kconfig | 6 + > drivers/media/pci/intel/ipu3/Makefile | 1 + > drivers/media/pci/intel/ipu3/ipu3-dmamap.c | 408 +++++++++++++++++++++++++++++ > drivers/media/pci/intel/ipu3/ipu3-dmamap.h | 20 ++ > 4 files changed, 435 insertions(+) > create mode 100644 drivers/media/pci/intel/ipu3/ipu3-dmamap.c > create mode 100644 drivers/media/pci/intel/ipu3/ipu3-dmamap.h > > diff --git a/drivers/media/pci/intel/ipu3/Kconfig b/drivers/media/pci/intel/ipu3/Kconfig > index ab2edcb..2030be7 100644 > --- a/drivers/media/pci/intel/ipu3/Kconfig > +++ b/drivers/media/pci/intel/ipu3/Kconfig > @@ -26,3 +26,9 @@ config INTEL_IPU3_MMU > > Say Y here if you have Skylake/Kaby Lake SoC with IPU3. > Say N if un-sure. > + > +config INTEL_IPU3_DMAMAP > + bool "Intel ipu3 DMA mapping driver" > + select IOMMU_IOVA > + ---help--- > + This is IPU3 IOMMU domain specific DMA driver. > diff --git a/drivers/media/pci/intel/ipu3/Makefile b/drivers/media/pci/intel/ipu3/Makefile > index 2b669df..2c2a035 100644 > --- a/drivers/media/pci/intel/ipu3/Makefile > +++ b/drivers/media/pci/intel/ipu3/Makefile > @@ -1,2 +1,3 @@ > obj-$(CONFIG_VIDEO_IPU3_CIO2) += ipu3-cio2.o > obj-$(CONFIG_INTEL_IPU3_MMU) += ipu3-mmu.o > +obj-$(CONFIG_INTEL_IPU3_DMAMAP) += ipu3-dmamap.o > diff --git a/drivers/media/pci/intel/ipu3/ipu3-dmamap.c b/drivers/media/pci/intel/ipu3/ipu3-dmamap.c > new file mode 100644 > index 0000000..74704d9 > --- /dev/null > +++ b/drivers/media/pci/intel/ipu3/ipu3-dmamap.c > @@ -0,0 +1,408 @@ > +/* > + * Copyright (c) 2017 Intel Corporation. > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License version > + * 2 as published by the Free Software Foundation. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + */ > +#include <linux/highmem.h> > +#include <linux/slab.h> > +#include <linux/version.h> > +#include <linux/vmalloc.h> > +#include "ipu3-mmu.h" > + > +/* Begin of things adapted from arch/arm/mm/dma-mapping.c */ ARM's DMA ops are not a good example of today's coding standards. There are already generic DMA mapping helpers available in drivers/iommu/dma-iommu.c and drivers/base/dma-*. (Hmm, I remember writing this already, déjà vu maybe...) > +static void ipu3_dmamap_clear_buffer(struct page *page, size_t size, > + unsigned long attrs) > +{ > + /* > + * Ensure that the allocated pages are zeroed, and that any data > + * lurking in the kernel direct-mapped region is invalidated. > + */ > + if (PageHighMem(page)) { > + while (size > 0) { > + void *ptr = kmap_atomic(page); > + > + memset(ptr, 0, PAGE_SIZE); > + if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) > + clflush_cache_range(ptr, PAGE_SIZE); > + kunmap_atomic(ptr); > + page++; > + size -= PAGE_SIZE; > + } > + } else { > + void *ptr = page_address(page); > + > + memset(ptr, 0, size); > + if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) > + clflush_cache_range(ptr, size); > + } > +} > + > +/** > + * ipu3_dmamap_alloc_buffer - allocate buffer based on attributes > + * @dev: struct device pointer > + * @size: size of buffer in bytes > + * @gfp: specify the free page type > + * @attrs: defined in linux/dma-attrs.h > + * > + * This is a helper function for physical page allocation > + * > + * Return array representing buffer from alloc_pages() on success > + * or NULL on failure > + * > + * Must be freed with ipu3_dmamap_free_buffer. > + */ > +static struct page **ipu3_dmamap_alloc_buffer(struct device *dev, size_t size, > + gfp_t gfp, unsigned long attrs) > +{ > + struct page **pages; > + int count = size >> PAGE_SHIFT; > + int array_size = count * sizeof(struct page *); > + int i = 0; > + > + /* Allocate mem for array of page ptrs */ > + if (array_size <= PAGE_SIZE) > + pages = kzalloc(array_size, GFP_KERNEL); > + else > + pages = vzalloc(array_size); > + if (!pages) > + return NULL; > + > + gfp |= __GFP_NOWARN; > + > + while (count) { > + int j, order = __fls(count); > + > + pages[i] = alloc_pages(gfp, order); > + while (!pages[i] && order) > + pages[i] = alloc_pages(gfp, --order); > + if (!pages[i]) > + goto error; > + > + if (order) { > + split_page(pages[i], order); > + j = 1 << order; > + while (--j) > + pages[i + j] = pages[i] + j; > + } > + /* Zero and invalidate */ > + ipu3_dmamap_clear_buffer(pages[i], PAGE_SIZE << order, attrs); > + i += 1 << order; > + count -= 1 << order; > + } > + > + return pages; > + > +error: > + while (i--) > + if (pages[i]) > + __free_pages(pages[i], 0); > + if (array_size <= PAGE_SIZE) > + kfree(pages); > + else > + vfree(pages); > + > + return NULL; > +} > + > +/* > + * Free a buffer allocated by ipu3_dmamap_alloc_buffer() > + */ > +static int ipu3_dmamap_free_buffer(struct device *dev, struct page **pages, > + size_t size, unsigned long attrs) > +{ > + int count = size >> PAGE_SHIFT; > + int array_size = count * sizeof(struct page *); > + int i; > + > + for (i = 0; i < count; i++) { > + if (pages[i]) { > + ipu3_dmamap_clear_buffer(pages[i], PAGE_SIZE, attrs); > + __free_pages(pages[i], 0); > + } > + } > + > + if (array_size <= PAGE_SIZE) > + kfree(pages); > + else > + vfree(pages); > + return 0; > +} I believe you don't need the 3 functions above if you use the helpers I mentioned. > + > +/* End of things adapted from arch/arm/mm/dma-mapping.c */ > +static void ipu3_dmamap_sync_single_for_cpu(struct device *dev, > + dma_addr_t dma_handle, size_t size, > + enum dma_data_direction dir) > +{ > + struct ipu3_mmu *mmu = to_ipu3_mmu(dev); > + dma_addr_t daddr = iommu_iova_to_phys(mmu->domain, dma_handle); > + > + clflush_cache_range(phys_to_virt(daddr), size); You might need to consider another IOMMU on the way here. Generally, given that daddr is your MMU DMA address (not necessarily CPU physical address), you should be able to call dma_sync_single_for_cpu(<your pci device>, daddr, size, dir) > +} > + > +/* > + * Synchronization function to transfer ownership to CPU > + */ > +static void ipu3_dmamap_sync_sg_for_cpu(struct device *dev, > + struct scatterlist *sglist, int nents, > + enum dma_data_direction dir) > +{ > + struct ipu3_mmu *mmu = to_ipu3_mmu(dev); > + struct scatterlist *sg; > + int i; > + > + for_each_sg(sglist, sg, nents, i) { > + clflush_cache_range( > + phys_to_virt(iommu_iova_to_phys(mmu->domain, > + sg_dma_address(sg))), sg->length); > + } Similarly here. > +} > + > +/** > + * ipu3_dmamap_alloc - allocate and map a buffer into KVA > + * @dev: struct device pointer > + * @size: size of buffer in bytes > + * @gfp: specify the get free page type > + * @attrs: defined in linux/dma-attrs.h > + * > + * Return KVA on success or NULL on failure > + * > + */ > +static void *ipu3_dmamap_alloc(struct device *dev, size_t size, > + dma_addr_t *dma_handle, gfp_t gfp, > + unsigned long attrs) > +{ > + struct ipu3_mmu *mmu = to_ipu3_mmu(dev); > + struct page **pages; > + struct iova *iova; > + struct vm_struct *area; > + int i; > + int rval; > + > + size = PAGE_ALIGN(size); > + > + iova = alloc_iova(&mmu->iova_domain, size >> PAGE_SHIFT, > + dma_get_mask(dev) >> PAGE_SHIFT, 0); > + if (!iova) > + return NULL; > + > + pages = ipu3_dmamap_alloc_buffer(dev, size, gfp, attrs); > + if (!pages) > + goto out_free_iova; > + > + /* Call IOMMU driver to setup pgt */ > + for (i = 0; iova->pfn_lo + i <= iova->pfn_hi; i++) { > + rval = iommu_map(mmu->domain, > + (iova->pfn_lo + i) << PAGE_SHIFT, > + page_to_phys(pages[i]), PAGE_SIZE, 0); > + if (rval) > + goto out_unmap; > + } I think most of the code above is already implemented in drivers/iommu/dma-iommu.c. > + /* Now grab a virtual region */ > + area = __get_vm_area(size, 0, VMALLOC_START, VMALLOC_END); > + if (!area) > + goto out_unmap; > + > + area->pages = pages; > + /* And map it in KVA */ > + if (map_vm_area(area, PAGE_KERNEL, pages)) > + goto out_vunmap; > + > + *dma_handle = iova->pfn_lo << PAGE_SHIFT; > + > + return area->addr; > + > +out_vunmap: > + vunmap(area->addr); > + > +out_unmap: > + ipu3_dmamap_free_buffer(dev, pages, size, attrs); > + for (i--; i >= 0; i--) { > + iommu_unmap(mmu->domain, (iova->pfn_lo + i) << PAGE_SHIFT, > + PAGE_SIZE); > + } > + > +out_free_iova: > + __free_iova(&mmu->iova_domain, iova); > + > + return NULL; > +} > + > +/* > + * Counterpart of ipu3_dmamap_alloc > + */ > +static void ipu3_dmamap_free(struct device *dev, size_t size, void *vaddr, > + dma_addr_t dma_handle, unsigned long attrs) > +{ > + struct ipu3_mmu *mmu = to_ipu3_mmu(dev); > + struct vm_struct *area = find_vm_area(vaddr); > + struct iova *iova = find_iova(&mmu->iova_domain, > + dma_handle >> PAGE_SHIFT); > + > + if (WARN_ON(!area) || WARN_ON(!iova)) > + return; > + > + if (WARN_ON(!area->pages)) > + return; > + > + size = PAGE_ALIGN(size); > + > + iommu_unmap(mmu->domain, iova->pfn_lo << PAGE_SHIFT, > + (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT); > + > + __free_iova(&mmu->iova_domain, iova); > + > + ipu3_dmamap_free_buffer(dev, area->pages, size, attrs); > + > + vunmap(vaddr); > +} > + > +/* > + * Insert each page into user VMA > + */ > +static int ipu3_dmamap_mmap(struct device *dev, struct vm_area_struct *vma, > + void *addr, dma_addr_t iova, size_t size, > + unsigned long attrs) > +{ > + struct vm_struct *area = find_vm_area(addr); > + size_t count = PAGE_ALIGN(size) >> PAGE_SHIFT; > + size_t i; > + > + if (!area) > + return -EFAULT; > + > + if (vma->vm_start & ~PAGE_MASK) > + return -EINVAL; > + > + if (size > area->size) > + return -EFAULT; > + > + for (i = 0; i < count; i++) > + vm_insert_page(vma, vma->vm_start + (i << PAGE_SHIFT), > + area->pages[i]); Already implemented in dma-iommu.c. Generally it looks like most of the code in this file can be removed by using the generic helpers. (Preserving rest of the code for added recipients.) Best regards, Tomasz > + > + return 0; > +} > + > +static void ipu3_dmamap_unmap_sg(struct device *dev, struct scatterlist *sglist, > + int nents, enum dma_data_direction dir, > + unsigned long attrs) > +{ > + struct ipu3_mmu *mmu = to_ipu3_mmu(dev); > + struct iova *iova = find_iova(&mmu->iova_domain, > + sg_dma_address(sglist) >> PAGE_SHIFT); > + > + if (!nents || WARN_ON(!iova)) > + return; > + > + if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) > + ipu3_dmamap_sync_sg_for_cpu(dev, sglist, nents, > + DMA_BIDIRECTIONAL); > + > + iommu_unmap(mmu->domain, iova->pfn_lo << PAGE_SHIFT, > + (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT); > + > + __free_iova(&mmu->iova_domain, iova); > +} > + > +static int ipu3_dmamap_map_sg(struct device *dev, struct scatterlist *sglist, > + int nents, enum dma_data_direction dir, > + unsigned long attrs) > +{ > + struct ipu3_mmu *mmu = to_ipu3_mmu(dev); > + struct scatterlist *sg; > + struct iova *iova; > + size_t size = 0; > + uint32_t iova_addr; > + int i; > + > + for_each_sg(sglist, sg, nents, i) > + size += PAGE_ALIGN(sg->length) >> PAGE_SHIFT; > + > + dev_dbg(dev, "dmamap: mapping sg %d entries, %zu pages\n", nents, size); > + > + iova = alloc_iova(&mmu->iova_domain, size, > + dma_get_mask(dev) >> PAGE_SHIFT, 0); > + if (!iova) > + return 0; > + > + dev_dbg(dev, "dmamap: iova low pfn %lu, high pfn %lu\n", iova->pfn_lo, > + iova->pfn_hi); > + > + iova_addr = iova->pfn_lo; > + > + for_each_sg(sglist, sg, nents, i) { > + int rval; > + > + dev_dbg(dev, > + "dmamap: entry %d: iova 0x%8.8x, phys 0x%16.16llx\n", > + i, iova_addr << PAGE_SHIFT, page_to_phys(sg_page(sg))); > + rval = iommu_map(mmu->domain, iova_addr << PAGE_SHIFT, > + page_to_phys(sg_page(sg)), > + PAGE_ALIGN(sg->length), 0); > + if (rval) > + goto out_fail; > + sg_dma_address(sg) = iova_addr << PAGE_SHIFT; > +#ifdef CONFIG_NEED_SG_DMA_LENGTH > + sg_dma_len(sg) = sg->length; > +#endif /* CONFIG_NEED_SG_DMA_LENGTH */ > + > + iova_addr += PAGE_ALIGN(sg->length) >> PAGE_SHIFT; > + } > + > + if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) > + ipu3_dmamap_sync_sg_for_cpu(dev, sglist, nents, > + DMA_BIDIRECTIONAL); > + > + return nents; > + > +out_fail: > + ipu3_dmamap_unmap_sg(dev, sglist, i, dir, attrs); > + > + return 0; > +} > + > +/* > + * Create scatter-list for the already allocated DMA buffer > + */ > +static int ipu3_dmamap_get_sgtable(struct device *dev, struct sg_table *sgt, > + void *cpu_addr, dma_addr_t handle, > + size_t size, unsigned long attrs) > +{ > + struct vm_struct *area = find_vm_area(cpu_addr); > + int n_pages; > + int ret; > + > + if (!area || (WARN_ON(!area->pages))) > + return -ENOMEM; > + > + n_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; > + > + ret = sg_alloc_table_from_pages(sgt, area->pages, n_pages, 0, size, > + GFP_KERNEL); > + if (ret) > + dev_dbg(dev, "failed to get sgt table\n"); > + > + return ret; > +} > + > +struct dma_map_ops ipu3_dmamap_ops = { > + .alloc = ipu3_dmamap_alloc, > + .free = ipu3_dmamap_free, > + .mmap = ipu3_dmamap_mmap, > + .map_sg = ipu3_dmamap_map_sg, > + .unmap_sg = ipu3_dmamap_unmap_sg, > + .sync_single_for_cpu = ipu3_dmamap_sync_single_for_cpu, > + .sync_single_for_device = ipu3_dmamap_sync_single_for_cpu, > + .sync_sg_for_cpu = ipu3_dmamap_sync_sg_for_cpu, > + .sync_sg_for_device = ipu3_dmamap_sync_sg_for_cpu, > + .get_sgtable = ipu3_dmamap_get_sgtable, > +}; > +EXPORT_SYMBOL_GPL(ipu3_dmamap_ops); > diff --git a/drivers/media/pci/intel/ipu3/ipu3-dmamap.h b/drivers/media/pci/intel/ipu3/ipu3-dmamap.h > new file mode 100644 > index 0000000..714bac0 > --- /dev/null > +++ b/drivers/media/pci/intel/ipu3/ipu3-dmamap.h > @@ -0,0 +1,20 @@ > +/* > + * Copyright (c) 2017 Intel Corporation. > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License version > + * 2 as published by the Free Software Foundation. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + */ > + > +#ifndef __IPU3_DMAMAP_H > +#define __IPU3_DMAMAP_H > + > +extern struct dma_map_ops ipu3_dmamap_ops; > + > +#endif > -- > 2.7.4 >
> > + struct ipu3_mmu *mmu = to_ipu3_mmu(dev); > > + dma_addr_t daddr = iommu_iova_to_phys(mmu->domain, dma_handle); > > + > > + clflush_cache_range(phys_to_virt(daddr), size); > > You might need to consider another IOMMU on the way here. Generally, > given that daddr is your MMU DMA address (not necessarily CPU physical > address), you should be able to call > > dma_sync_single_for_cpu(<your pci device>, daddr, size, dir) Te system IOMMU (if enabled) may be cache coherent - and on x86 would be, so it doesn't think it needs to do anything for cache synchronization and the dma_sync won't actually do any work. Alan
Hi Alan, On Thu, Jun 8, 2017 at 2:45 AM, Alan Cox <gnomes@lxorguk.ukuu.org.uk> wrote: >> > + struct ipu3_mmu *mmu = to_ipu3_mmu(dev); >> > + dma_addr_t daddr = iommu_iova_to_phys(mmu->domain, dma_handle); >> > + >> > + clflush_cache_range(phys_to_virt(daddr), size); >> >> You might need to consider another IOMMU on the way here. Generally, >> given that daddr is your MMU DMA address (not necessarily CPU physical >> address), you should be able to call >> >> dma_sync_single_for_cpu(<your pci device>, daddr, size, dir) > > Te system IOMMU (if enabled) may be cache coherent - and on x86 would be, > so it doesn't think it needs to do anything for cache synchronization > and the dma_sync won't actually do any work. I'm not very familiar with x86, but typically I found coherency to be an attribute of the DMA master (i.e. if it is connected to a coherent memory port). Looking at all the IPU3 code, it looks like the whole PCI device is non-coherent for some reason (e.g. you can see implicit cache flushes for page tables). So I would have expected that a non-coherent variant of x86 dma_ops is used for the PCI struct device, which would do cache maintenance in its dma_sync_* ops. Best regards, Tomasz
On 07/06/17 10:47, Tomasz Figa wrote: > Hi Yong, > > +Robin, Joerg, IOMMU ML > > Please see my comments inline. > > On Tue, Jun 6, 2017 at 5:39 AM, Yong Zhi <yong.zhi@intel.com> wrote: >> IPU3 mmu based DMA mapping driver >> >> Signed-off-by: Yong Zhi <yong.zhi@intel.com> >> --- >> drivers/media/pci/intel/ipu3/Kconfig | 6 + >> drivers/media/pci/intel/ipu3/Makefile | 1 + >> drivers/media/pci/intel/ipu3/ipu3-dmamap.c | 408 +++++++++++++++++++++++++++++ >> drivers/media/pci/intel/ipu3/ipu3-dmamap.h | 20 ++ >> 4 files changed, 435 insertions(+) >> create mode 100644 drivers/media/pci/intel/ipu3/ipu3-dmamap.c >> create mode 100644 drivers/media/pci/intel/ipu3/ipu3-dmamap.h >> >> diff --git a/drivers/media/pci/intel/ipu3/Kconfig b/drivers/media/pci/intel/ipu3/Kconfig >> index ab2edcb..2030be7 100644 >> --- a/drivers/media/pci/intel/ipu3/Kconfig >> +++ b/drivers/media/pci/intel/ipu3/Kconfig >> @@ -26,3 +26,9 @@ config INTEL_IPU3_MMU >> >> Say Y here if you have Skylake/Kaby Lake SoC with IPU3. >> Say N if un-sure. >> + >> +config INTEL_IPU3_DMAMAP >> + bool "Intel ipu3 DMA mapping driver" >> + select IOMMU_IOVA >> + ---help--- >> + This is IPU3 IOMMU domain specific DMA driver. >> diff --git a/drivers/media/pci/intel/ipu3/Makefile b/drivers/media/pci/intel/ipu3/Makefile >> index 2b669df..2c2a035 100644 >> --- a/drivers/media/pci/intel/ipu3/Makefile >> +++ b/drivers/media/pci/intel/ipu3/Makefile >> @@ -1,2 +1,3 @@ >> obj-$(CONFIG_VIDEO_IPU3_CIO2) += ipu3-cio2.o >> obj-$(CONFIG_INTEL_IPU3_MMU) += ipu3-mmu.o >> +obj-$(CONFIG_INTEL_IPU3_DMAMAP) += ipu3-dmamap.o >> diff --git a/drivers/media/pci/intel/ipu3/ipu3-dmamap.c b/drivers/media/pci/intel/ipu3/ipu3-dmamap.c >> new file mode 100644 >> index 0000000..74704d9 >> --- /dev/null >> +++ b/drivers/media/pci/intel/ipu3/ipu3-dmamap.c >> @@ -0,0 +1,408 @@ >> +/* >> + * Copyright (c) 2017 Intel Corporation. >> + * >> + * This program is free software; you can redistribute it and/or >> + * modify it under the terms of the GNU General Public License version >> + * 2 as published by the Free Software Foundation. >> + * >> + * This program is distributed in the hope that it will be useful, >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >> + * GNU General Public License for more details. >> + * >> + */ >> +#include <linux/highmem.h> >> +#include <linux/slab.h> >> +#include <linux/version.h> >> +#include <linux/vmalloc.h> >> +#include "ipu3-mmu.h" >> + >> +/* Begin of things adapted from arch/arm/mm/dma-mapping.c */ > > ARM's DMA ops are not a good example of today's coding standards. > There are already generic DMA mapping helpers available in > drivers/iommu/dma-iommu.c and drivers/base/dma-*. (Hmm, I remember > writing this already, déjà vu maybe...) Yes, dma-iommu exists for precisely this purpose - arch/arm64/mm/dma-mapping.c would have been a better point of reference. >> +static void ipu3_dmamap_clear_buffer(struct page *page, size_t size, >> + unsigned long attrs) >> +{ >> + /* >> + * Ensure that the allocated pages are zeroed, and that any data >> + * lurking in the kernel direct-mapped region is invalidated. >> + */ >> + if (PageHighMem(page)) { >> + while (size > 0) { >> + void *ptr = kmap_atomic(page); >> + >> + memset(ptr, 0, PAGE_SIZE); >> + if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) >> + clflush_cache_range(ptr, PAGE_SIZE); >> + kunmap_atomic(ptr); >> + page++; >> + size -= PAGE_SIZE; >> + } >> + } else { >> + void *ptr = page_address(page); >> + >> + memset(ptr, 0, size); >> + if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) >> + clflush_cache_range(ptr, size); >> + } >> +} >> + >> +/** >> + * ipu3_dmamap_alloc_buffer - allocate buffer based on attributes >> + * @dev: struct device pointer >> + * @size: size of buffer in bytes >> + * @gfp: specify the free page type >> + * @attrs: defined in linux/dma-attrs.h >> + * >> + * This is a helper function for physical page allocation >> + * >> + * Return array representing buffer from alloc_pages() on success >> + * or NULL on failure >> + * >> + * Must be freed with ipu3_dmamap_free_buffer. >> + */ >> +static struct page **ipu3_dmamap_alloc_buffer(struct device *dev, size_t size, >> + gfp_t gfp, unsigned long attrs) >> +{ >> + struct page **pages; >> + int count = size >> PAGE_SHIFT; >> + int array_size = count * sizeof(struct page *); >> + int i = 0; >> + >> + /* Allocate mem for array of page ptrs */ >> + if (array_size <= PAGE_SIZE) >> + pages = kzalloc(array_size, GFP_KERNEL); >> + else >> + pages = vzalloc(array_size); >> + if (!pages) >> + return NULL; >> + >> + gfp |= __GFP_NOWARN; >> + >> + while (count) { >> + int j, order = __fls(count); >> + >> + pages[i] = alloc_pages(gfp, order); >> + while (!pages[i] && order) >> + pages[i] = alloc_pages(gfp, --order); >> + if (!pages[i]) >> + goto error; >> + >> + if (order) { >> + split_page(pages[i], order); >> + j = 1 << order; >> + while (--j) >> + pages[i + j] = pages[i] + j; >> + } >> + /* Zero and invalidate */ >> + ipu3_dmamap_clear_buffer(pages[i], PAGE_SIZE << order, attrs); >> + i += 1 << order; >> + count -= 1 << order; >> + } >> + >> + return pages; >> + >> +error: >> + while (i--) >> + if (pages[i]) >> + __free_pages(pages[i], 0); >> + if (array_size <= PAGE_SIZE) >> + kfree(pages); >> + else >> + vfree(pages); >> + >> + return NULL; >> +} >> + >> +/* >> + * Free a buffer allocated by ipu3_dmamap_alloc_buffer() >> + */ >> +static int ipu3_dmamap_free_buffer(struct device *dev, struct page **pages, >> + size_t size, unsigned long attrs) >> +{ >> + int count = size >> PAGE_SHIFT; >> + int array_size = count * sizeof(struct page *); >> + int i; >> + >> + for (i = 0; i < count; i++) { >> + if (pages[i]) { >> + ipu3_dmamap_clear_buffer(pages[i], PAGE_SIZE, attrs); >> + __free_pages(pages[i], 0); >> + } >> + } >> + >> + if (array_size <= PAGE_SIZE) >> + kfree(pages); >> + else >> + vfree(pages); >> + return 0; >> +} > > I believe you don't need the 3 functions above if you use the helpers > I mentioned. > >> + >> +/* End of things adapted from arch/arm/mm/dma-mapping.c */ >> +static void ipu3_dmamap_sync_single_for_cpu(struct device *dev, >> + dma_addr_t dma_handle, size_t size, >> + enum dma_data_direction dir) >> +{ >> + struct ipu3_mmu *mmu = to_ipu3_mmu(dev); >> + dma_addr_t daddr = iommu_iova_to_phys(mmu->domain, dma_handle); >> + >> + clflush_cache_range(phys_to_virt(daddr), size); > > You might need to consider another IOMMU on the way here. Generally, > given that daddr is your MMU DMA address (not necessarily CPU physical > address), you should be able to call > > dma_sync_single_for_cpu(<your pci device>, daddr, size, dir) I'd hope that this IPU complex is some kind of embedded endpoint thing that bypasses the VT-d IOMMU or is always using its own local RAM, because it would be pretty much unworkable otherwise. The whole infrastructure isn't really capable of dealing with nested IOMMUs, and nested DMA ops would be an equally horrible idea. >> +} >> + >> +/* >> + * Synchronization function to transfer ownership to CPU >> + */ >> +static void ipu3_dmamap_sync_sg_for_cpu(struct device *dev, >> + struct scatterlist *sglist, int nents, >> + enum dma_data_direction dir) >> +{ >> + struct ipu3_mmu *mmu = to_ipu3_mmu(dev); >> + struct scatterlist *sg; >> + int i; >> + >> + for_each_sg(sglist, sg, nents, i) { >> + clflush_cache_range( >> + phys_to_virt(iommu_iova_to_phys(mmu->domain, >> + sg_dma_address(sg))), sg->length); Reverse-engineering the IOVA instead of just using sg_phys() here seems a little needless. >> + } > > Similarly here. > >> +} >> + >> +/** >> + * ipu3_dmamap_alloc - allocate and map a buffer into KVA >> + * @dev: struct device pointer >> + * @size: size of buffer in bytes >> + * @gfp: specify the get free page type >> + * @attrs: defined in linux/dma-attrs.h >> + * >> + * Return KVA on success or NULL on failure >> + * >> + */ >> +static void *ipu3_dmamap_alloc(struct device *dev, size_t size, >> + dma_addr_t *dma_handle, gfp_t gfp, >> + unsigned long attrs) >> +{ >> + struct ipu3_mmu *mmu = to_ipu3_mmu(dev); >> + struct page **pages; >> + struct iova *iova; >> + struct vm_struct *area; >> + int i; >> + int rval; >> + >> + size = PAGE_ALIGN(size); >> + >> + iova = alloc_iova(&mmu->iova_domain, size >> PAGE_SHIFT, >> + dma_get_mask(dev) >> PAGE_SHIFT, 0); >> + if (!iova) >> + return NULL; >> + >> + pages = ipu3_dmamap_alloc_buffer(dev, size, gfp, attrs); >> + if (!pages) >> + goto out_free_iova; >> + >> + /* Call IOMMU driver to setup pgt */ >> + for (i = 0; iova->pfn_lo + i <= iova->pfn_hi; i++) { >> + rval = iommu_map(mmu->domain, >> + (iova->pfn_lo + i) << PAGE_SHIFT, >> + page_to_phys(pages[i]), PAGE_SIZE, 0); >> + if (rval) >> + goto out_unmap; >> + } > > I think most of the code above is already implemented in > drivers/iommu/dma-iommu.c. > >> + /* Now grab a virtual region */ >> + area = __get_vm_area(size, 0, VMALLOC_START, VMALLOC_END); >> + if (!area) >> + goto out_unmap; >> + >> + area->pages = pages; >> + /* And map it in KVA */ >> + if (map_vm_area(area, PAGE_KERNEL, pages)) >> + goto out_vunmap; >> + >> + *dma_handle = iova->pfn_lo << PAGE_SHIFT; >> + >> + return area->addr; >> + >> +out_vunmap: >> + vunmap(area->addr); >> + >> +out_unmap: >> + ipu3_dmamap_free_buffer(dev, pages, size, attrs); >> + for (i--; i >= 0; i--) { >> + iommu_unmap(mmu->domain, (iova->pfn_lo + i) << PAGE_SHIFT, >> + PAGE_SIZE); >> + } >> + >> +out_free_iova: >> + __free_iova(&mmu->iova_domain, iova); >> + >> + return NULL; >> +} >> + >> +/* >> + * Counterpart of ipu3_dmamap_alloc >> + */ >> +static void ipu3_dmamap_free(struct device *dev, size_t size, void *vaddr, >> + dma_addr_t dma_handle, unsigned long attrs) >> +{ >> + struct ipu3_mmu *mmu = to_ipu3_mmu(dev); >> + struct vm_struct *area = find_vm_area(vaddr); >> + struct iova *iova = find_iova(&mmu->iova_domain, >> + dma_handle >> PAGE_SHIFT); >> + >> + if (WARN_ON(!area) || WARN_ON(!iova)) >> + return; >> + >> + if (WARN_ON(!area->pages)) >> + return; >> + >> + size = PAGE_ALIGN(size); >> + >> + iommu_unmap(mmu->domain, iova->pfn_lo << PAGE_SHIFT, >> + (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT); >> + >> + __free_iova(&mmu->iova_domain, iova); >> + >> + ipu3_dmamap_free_buffer(dev, area->pages, size, attrs); >> + >> + vunmap(vaddr); >> +} >> + >> +/* >> + * Insert each page into user VMA >> + */ >> +static int ipu3_dmamap_mmap(struct device *dev, struct vm_area_struct *vma, >> + void *addr, dma_addr_t iova, size_t size, >> + unsigned long attrs) >> +{ >> + struct vm_struct *area = find_vm_area(addr); >> + size_t count = PAGE_ALIGN(size) >> PAGE_SHIFT; >> + size_t i; >> + >> + if (!area) >> + return -EFAULT; >> + >> + if (vma->vm_start & ~PAGE_MASK) >> + return -EINVAL; >> + >> + if (size > area->size) >> + return -EFAULT; >> + >> + for (i = 0; i < count; i++) >> + vm_insert_page(vma, vma->vm_start + (i << PAGE_SHIFT), >> + area->pages[i]); > > Already implemented in dma-iommu.c. > > Generally it looks like most of the code in this file can be removed > by using the generic helpers. > > (Preserving rest of the code for added recipients.) > > Best regards, > Tomasz > >> + >> + return 0; >> +} >> + >> +static void ipu3_dmamap_unmap_sg(struct device *dev, struct scatterlist *sglist, >> + int nents, enum dma_data_direction dir, >> + unsigned long attrs) >> +{ >> + struct ipu3_mmu *mmu = to_ipu3_mmu(dev); >> + struct iova *iova = find_iova(&mmu->iova_domain, >> + sg_dma_address(sglist) >> PAGE_SHIFT); >> + >> + if (!nents || WARN_ON(!iova)) >> + return; >> + >> + if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) >> + ipu3_dmamap_sync_sg_for_cpu(dev, sglist, nents, >> + DMA_BIDIRECTIONAL); You have the correct DMA direction from the caller, there's no need to hard-code one like this (even if it is ultimately ignored). >> + iommu_unmap(mmu->domain, iova->pfn_lo << PAGE_SHIFT, >> + (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT); >> + >> + __free_iova(&mmu->iova_domain, iova); >> +} >> + >> +static int ipu3_dmamap_map_sg(struct device *dev, struct scatterlist *sglist, >> + int nents, enum dma_data_direction dir, >> + unsigned long attrs) >> +{ >> + struct ipu3_mmu *mmu = to_ipu3_mmu(dev); >> + struct scatterlist *sg; >> + struct iova *iova; >> + size_t size = 0; >> + uint32_t iova_addr; >> + int i; >> + >> + for_each_sg(sglist, sg, nents, i) >> + size += PAGE_ALIGN(sg->length) >> PAGE_SHIFT; >> + >> + dev_dbg(dev, "dmamap: mapping sg %d entries, %zu pages\n", nents, size); >> + >> + iova = alloc_iova(&mmu->iova_domain, size, >> + dma_get_mask(dev) >> PAGE_SHIFT, 0); >> + if (!iova) >> + return 0; >> + >> + dev_dbg(dev, "dmamap: iova low pfn %lu, high pfn %lu\n", iova->pfn_lo, >> + iova->pfn_hi); >> + >> + iova_addr = iova->pfn_lo; >> + >> + for_each_sg(sglist, sg, nents, i) { >> + int rval; >> + >> + dev_dbg(dev, >> + "dmamap: entry %d: iova 0x%8.8x, phys 0x%16.16llx\n", >> + i, iova_addr << PAGE_SHIFT, page_to_phys(sg_page(sg))); >> + rval = iommu_map(mmu->domain, iova_addr << PAGE_SHIFT, >> + page_to_phys(sg_page(sg)), >> + PAGE_ALIGN(sg->length), 0); >> + if (rval) >> + goto out_fail; >> + sg_dma_address(sg) = iova_addr << PAGE_SHIFT; >> +#ifdef CONFIG_NEED_SG_DMA_LENGTH >> + sg_dma_len(sg) = sg->length; >> +#endif /* CONFIG_NEED_SG_DMA_LENGTH */ >> + >> + iova_addr += PAGE_ALIGN(sg->length) >> PAGE_SHIFT; >> + } >> + >> + if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) >> + ipu3_dmamap_sync_sg_for_cpu(dev, sglist, nents, >> + DMA_BIDIRECTIONAL); Ditto here. To echo Tomasz' point, all you should really need to implement here is the cache maintenance and VMA management - everything else can just be handed off to dma-iommu provided your IOMMU driver sets up the domain appropriately (see e.g. drivers/iommu/mtk-iommu.c for a reasonably similar exmaple of a media-subsystem kind of IOMMU). Robin. >> + >> + return nents; >> + >> +out_fail: >> + ipu3_dmamap_unmap_sg(dev, sglist, i, dir, attrs); >> + >> + return 0; >> +} >> + >> +/* >> + * Create scatter-list for the already allocated DMA buffer >> + */ >> +static int ipu3_dmamap_get_sgtable(struct device *dev, struct sg_table *sgt, >> + void *cpu_addr, dma_addr_t handle, >> + size_t size, unsigned long attrs) >> +{ >> + struct vm_struct *area = find_vm_area(cpu_addr); >> + int n_pages; >> + int ret; >> + >> + if (!area || (WARN_ON(!area->pages))) >> + return -ENOMEM; >> + >> + n_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; >> + >> + ret = sg_alloc_table_from_pages(sgt, area->pages, n_pages, 0, size, >> + GFP_KERNEL); >> + if (ret) >> + dev_dbg(dev, "failed to get sgt table\n"); >> + >> + return ret; >> +} >> + >> +struct dma_map_ops ipu3_dmamap_ops = { >> + .alloc = ipu3_dmamap_alloc, >> + .free = ipu3_dmamap_free, >> + .mmap = ipu3_dmamap_mmap, >> + .map_sg = ipu3_dmamap_map_sg, >> + .unmap_sg = ipu3_dmamap_unmap_sg, >> + .sync_single_for_cpu = ipu3_dmamap_sync_single_for_cpu, >> + .sync_single_for_device = ipu3_dmamap_sync_single_for_cpu, >> + .sync_sg_for_cpu = ipu3_dmamap_sync_sg_for_cpu, >> + .sync_sg_for_device = ipu3_dmamap_sync_sg_for_cpu, >> + .get_sgtable = ipu3_dmamap_get_sgtable, >> +}; >> +EXPORT_SYMBOL_GPL(ipu3_dmamap_ops); >> diff --git a/drivers/media/pci/intel/ipu3/ipu3-dmamap.h b/drivers/media/pci/intel/ipu3/ipu3-dmamap.h >> new file mode 100644 >> index 0000000..714bac0 >> --- /dev/null >> +++ b/drivers/media/pci/intel/ipu3/ipu3-dmamap.h >> @@ -0,0 +1,20 @@ >> +/* >> + * Copyright (c) 2017 Intel Corporation. >> + * >> + * This program is free software; you can redistribute it and/or >> + * modify it under the terms of the GNU General Public License version >> + * 2 as published by the Free Software Foundation. >> + * >> + * This program is distributed in the hope that it will be useful, >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >> + * GNU General Public License for more details. >> + * >> + */ >> + >> +#ifndef __IPU3_DMAMAP_H >> +#define __IPU3_DMAMAP_H >> + >> +extern struct dma_map_ops ipu3_dmamap_ops; >> + >> +#endif >> -- >> 2.7.4 >>
On Thu, Jun 8, 2017 at 10:22 PM, Robin Murphy <robin.murphy@arm.com> wrote: > On 07/06/17 10:47, Tomasz Figa wrote: >> Hi Yong, >> >> +Robin, Joerg, IOMMU ML >> >> Please see my comments inline. >> >> On Tue, Jun 6, 2017 at 5:39 AM, Yong Zhi <yong.zhi@intel.com> wrote: [snip] >>> + >>> +/* End of things adapted from arch/arm/mm/dma-mapping.c */ >>> +static void ipu3_dmamap_sync_single_for_cpu(struct device *dev, >>> + dma_addr_t dma_handle, size_t size, >>> + enum dma_data_direction dir) >>> +{ >>> + struct ipu3_mmu *mmu = to_ipu3_mmu(dev); >>> + dma_addr_t daddr = iommu_iova_to_phys(mmu->domain, dma_handle); >>> + >>> + clflush_cache_range(phys_to_virt(daddr), size); >> >> You might need to consider another IOMMU on the way here. Generally, >> given that daddr is your MMU DMA address (not necessarily CPU physical >> address), you should be able to call >> >> dma_sync_single_for_cpu(<your pci device>, daddr, size, dir) > > I'd hope that this IPU complex is some kind of embedded endpoint thing > that bypasses the VT-d IOMMU or is always using its own local RAM, > because it would be pretty much unworkable otherwise. It uses system RAM and, as far as my understanding goes, by default it operates without the VT-d IOMMU and that's how it's used right now. I'm suggesting VT-d IOMMU as a way to further strengthen the security and error resilience in future (due to the IPU complex being non-coherent and also running a closed source firmware). > The whole > infrastructure isn't really capable of dealing with nested IOMMUs, and > nested DMA ops would be an equally horrible idea. Could you elaborate a bit more on this? I think we should be able to deal with this in a way I suggested before: a) the PCI device would use the system DMA ops, b) the PCI device would implement a secondary bus for which it would provide its own DMA and IOMMU ops. c) a secondary device would be registered on the secondary bus, d) all memory for the IPU would be managed on behalf of the secondary device. In fact, the driver already is designed in a way that all the points above are true. If I'm not missing something, the only significant missing point is calling into system DMA ops from IPU DMA ops. Best regards, Tomasz
Hi Tomasz and Alan, On Thu, Jun 08, 2017 at 11:55:18AM +0900, Tomasz Figa wrote: > Hi Alan, > > On Thu, Jun 8, 2017 at 2:45 AM, Alan Cox <gnomes@lxorguk.ukuu.org.uk> wrote: > >> > + struct ipu3_mmu *mmu = to_ipu3_mmu(dev); > >> > + dma_addr_t daddr = iommu_iova_to_phys(mmu->domain, dma_handle); > >> > + > >> > + clflush_cache_range(phys_to_virt(daddr), size); > >> > >> You might need to consider another IOMMU on the way here. Generally, > >> given that daddr is your MMU DMA address (not necessarily CPU physical > >> address), you should be able to call > >> > >> dma_sync_single_for_cpu(<your pci device>, daddr, size, dir) > > > > Te system IOMMU (if enabled) may be cache coherent - and on x86 would be, > > so it doesn't think it needs to do anything for cache synchronization > > and the dma_sync won't actually do any work. > > I'm not very familiar with x86, but typically I found coherency to be > an attribute of the DMA master (i.e. if it is connected to a coherent > memory port). > > Looking at all the IPU3 code, it looks like the whole PCI device is > non-coherent for some reason (e.g. you can see implicit cache flushes > for page tables). So I would have expected that a non-coherent variant > of x86 dma_ops is used for the PCI struct device, which would do cache > maintenance in its dma_sync_* ops. It can actually do both --- in most cases. The MMU page tables are an exception so they will still need an explicit flush.
On 08/06/17 15:35, Tomasz Figa wrote: > On Thu, Jun 8, 2017 at 10:22 PM, Robin Murphy <robin.murphy@arm.com> wrote: >> On 07/06/17 10:47, Tomasz Figa wrote: >>> Hi Yong, >>> >>> +Robin, Joerg, IOMMU ML >>> >>> Please see my comments inline. >>> >>> On Tue, Jun 6, 2017 at 5:39 AM, Yong Zhi <yong.zhi@intel.com> wrote: > [snip] >>>> + >>>> +/* End of things adapted from arch/arm/mm/dma-mapping.c */ >>>> +static void ipu3_dmamap_sync_single_for_cpu(struct device *dev, >>>> + dma_addr_t dma_handle, size_t size, >>>> + enum dma_data_direction dir) >>>> +{ >>>> + struct ipu3_mmu *mmu = to_ipu3_mmu(dev); >>>> + dma_addr_t daddr = iommu_iova_to_phys(mmu->domain, dma_handle); >>>> + >>>> + clflush_cache_range(phys_to_virt(daddr), size); >>> >>> You might need to consider another IOMMU on the way here. Generally, >>> given that daddr is your MMU DMA address (not necessarily CPU physical >>> address), you should be able to call >>> >>> dma_sync_single_for_cpu(<your pci device>, daddr, size, dir) >> >> I'd hope that this IPU complex is some kind of embedded endpoint thing >> that bypasses the VT-d IOMMU or is always using its own local RAM, >> because it would be pretty much unworkable otherwise. > > It uses system RAM and, as far as my understanding goes, by default it > operates without the VT-d IOMMU and that's how it's used right now. OK, if it *is* behind a DMAR unit then booting with "iommu=force" (or whatever the exact incantation for intel-iommu is) should be fun... > I'm suggesting VT-d IOMMU as a way to further strengthen the security > and error resilience in future (due to the IPU complex being > non-coherent and also running a closed source firmware). TBH, doing DMA remapping through *two* IOMMUS will add horrible hardware overhead, increase the scope for kernel-side bugs, and not much more. If we don't trust this IOMMU to behave, why are we trying to drive it in the first place? If we do, then a second IOMMU behind it won't protect anything that the first one doesn't already. >> The whole >> infrastructure isn't really capable of dealing with nested IOMMUs, and >> nested DMA ops would be an equally horrible idea. > > Could you elaborate a bit more on this? I think we should be able to > deal with this in a way I suggested before: > > a) the PCI device would use the system DMA ops, > b) the PCI device would implement a secondary bus for which it would > provide its own DMA and IOMMU ops. > c) a secondary device would be registered on the secondary bus, > d) all memory for the IPU would be managed on behalf of the secondary device. > > In fact, the driver already is designed in a way that all the points > above are true. If I'm not missing something, the only significant > missing point is calling into system DMA ops from IPU DMA ops. I don't believe x86 has any non-coherent DMA ops, therefore the IPU DMA ops would still probably have to do all their own cache maintenance. Allocation/mapping, though, would have to be done with the parent DMA ops first (in case DMA address != physical address), *then* mapped at the IPU MMU, which is the real killer - if the PCI DMA ops are from intel-iommu, then there's little need for the IPU MMU mapping to be anything other than 1:1, so you may as well not bother. If the PCI DMA ops are from SWIOTLB, then the constraints of having to go through that first eliminate all the scatter-gather benefit of the IPU MMU. The IOMMU API ops would have to be handled similarly, by checking for ops on the parent bus, calling those first if present, then running the intermediate results through the IPU MMU's own functions. Sure, it's not impossible, but it's really really grim. Not to mention that all the IPU MMU's page tables/control structures/etc. would also have to be DMA-allocated/mapped because it may or may not be operating in physical address space. The reasonable option - assuming the topology really is this way - would seem to be special-casing the IPU in intel-iommu in a similar manner to integrated graphics, to make sure it gets a passthrough domain for DMA ops, but still allowing the whole PCI device to be passed through to a guest VM via VFIO if desired (which is really the only case where nested translation does start to make sense). Robin. > > Best regards, > Tomasz >
On Fri, Jun 9, 2017 at 3:07 AM, Robin Murphy <robin.murphy@arm.com> wrote: > On 08/06/17 15:35, Tomasz Figa wrote: >> On Thu, Jun 8, 2017 at 10:22 PM, Robin Murphy <robin.murphy@arm.com> wrote: >>> On 07/06/17 10:47, Tomasz Figa wrote: >>>> Hi Yong, >>>> >>>> +Robin, Joerg, IOMMU ML >>>> >>>> Please see my comments inline. >>>> >>>> On Tue, Jun 6, 2017 at 5:39 AM, Yong Zhi <yong.zhi@intel.com> wrote: >> [snip] >>>>> + >>>>> +/* End of things adapted from arch/arm/mm/dma-mapping.c */ >>>>> +static void ipu3_dmamap_sync_single_for_cpu(struct device *dev, >>>>> + dma_addr_t dma_handle, size_t size, >>>>> + enum dma_data_direction dir) >>>>> +{ >>>>> + struct ipu3_mmu *mmu = to_ipu3_mmu(dev); >>>>> + dma_addr_t daddr = iommu_iova_to_phys(mmu->domain, dma_handle); >>>>> + >>>>> + clflush_cache_range(phys_to_virt(daddr), size); >>>> >>>> You might need to consider another IOMMU on the way here. Generally, >>>> given that daddr is your MMU DMA address (not necessarily CPU physical >>>> address), you should be able to call >>>> >>>> dma_sync_single_for_cpu(<your pci device>, daddr, size, dir) >>> >>> I'd hope that this IPU complex is some kind of embedded endpoint thing >>> that bypasses the VT-d IOMMU or is always using its own local RAM, >>> because it would be pretty much unworkable otherwise. >> >> It uses system RAM and, as far as my understanding goes, by default it >> operates without the VT-d IOMMU and that's how it's used right now. > > OK, if it *is* behind a DMAR unit then booting with "iommu=force" (or > whatever the exact incantation for intel-iommu is) should be fun... > >> I'm suggesting VT-d IOMMU as a way to further strengthen the security >> and error resilience in future (due to the IPU complex being >> non-coherent and also running a closed source firmware). > > TBH, doing DMA remapping through *two* IOMMUS will add horrible hardware > overhead, Not necessarily, if done right and with right hardware (I lack the details about Intel hardware unfortunately). One can for example notice the fact that the IOVA ranges from the parent IOMMU are going to be contiguous for the child IOMMU, so one could use huge pages in the child IOMMU and essentially make a selective 1:1 mapping. > increase the scope for kernel-side bugs, and not much more. If > we don't trust this IOMMU to behave, why are we trying to drive it in > the first place? If we do, then a second IOMMU behind it won't protect > anything that the first one doesn't already. That's a valid point, right. But on the other hand, I lack the hardware details on whether we can just disable the internal IOMMU and use DMAR alone instead. > >>> The whole >>> infrastructure isn't really capable of dealing with nested IOMMUs, and >>> nested DMA ops would be an equally horrible idea. >> >> Could you elaborate a bit more on this? I think we should be able to >> deal with this in a way I suggested before: >> >> a) the PCI device would use the system DMA ops, >> b) the PCI device would implement a secondary bus for which it would >> provide its own DMA and IOMMU ops. >> c) a secondary device would be registered on the secondary bus, >> d) all memory for the IPU would be managed on behalf of the secondary device. >> >> In fact, the driver already is designed in a way that all the points >> above are true. If I'm not missing something, the only significant >> missing point is calling into system DMA ops from IPU DMA ops. > > I don't believe x86 has any non-coherent DMA ops, therefore the IPU DMA > ops would still probably have to do all their own cache maintenance. I'd argue that it means that we need to add non-coherent DMA ops on x86, as we have on other archs, which can have both coherent and non-coherent devices in the same system. > Allocation/mapping, though, would have to be done with the parent DMA > ops first (in case DMA address != physical address), *then* mapped at > the IPU MMU, which is the real killer - if the PCI DMA ops are from > intel-iommu, then there's little need for the IPU MMU mapping to be > anything other than 1:1, so you may as well not bother. Okay, I think I can agree with you on this. It indeed makes little sense to use both MMUs at the same time, if there is a way to disable one of them. Let's just keep this unaware of DMAR at this point of time, as a starter, and get back to it later whenever someone wants to use DMAR instead. I guess the way to proceed then would be either disabling the internal MMU, if possible, or making it use a 1:1 (huge page, if possible) mapping, if not. > If the PCI DMA > ops are from SWIOTLB, then the constraints of having to go through that > first eliminate all the scatter-gather benefit of the IPU MMU. Does the SWIOTLB give you a physically contiguous memory? If not, you still need the IPU MMU to actually be able to access the memory. > > The IOMMU API ops would have to be handled similarly, by checking for > ops on the parent bus, calling those first if present, then running the > intermediate results through the IPU MMU's own functions. Sure, it's not > impossible, but it's really really grim. Not to mention that all the IPU > MMU's page tables/control structures/etc. would also have to be > DMA-allocated/mapped because it may or may not be operating in physical > address space. DMA-allocation isn't really good for this use case, but is a DMA mapping operation really such a bad thing? > > The reasonable option - assuming the topology really is this way - would > seem to be special-casing the IPU in intel-iommu in a similar manner to > integrated graphics, to make sure it gets a passthrough domain for DMA > ops, but still allowing the whole PCI device to be passed through to a > guest VM via VFIO if desired (which is really the only case where nested > translation does start to make sense). Yeah, given that we need some start, it sounds sane to me. We can then revisit different options later. Thanks a lot for your input. Best regards, Tomasz
On 09/06/17 07:20, Tomasz Figa wrote: > On Fri, Jun 9, 2017 at 3:07 AM, Robin Murphy <robin.murphy@arm.com> wrote: >> On 08/06/17 15:35, Tomasz Figa wrote: >>> On Thu, Jun 8, 2017 at 10:22 PM, Robin Murphy <robin.murphy@arm.com> wrote: >>>> On 07/06/17 10:47, Tomasz Figa wrote: >>>>> Hi Yong, >>>>> >>>>> +Robin, Joerg, IOMMU ML >>>>> >>>>> Please see my comments inline. >>>>> >>>>> On Tue, Jun 6, 2017 at 5:39 AM, Yong Zhi <yong.zhi@intel.com> wrote: >>> [snip] >>>>>> + >>>>>> +/* End of things adapted from arch/arm/mm/dma-mapping.c */ >>>>>> +static void ipu3_dmamap_sync_single_for_cpu(struct device *dev, >>>>>> + dma_addr_t dma_handle, size_t size, >>>>>> + enum dma_data_direction dir) >>>>>> +{ >>>>>> + struct ipu3_mmu *mmu = to_ipu3_mmu(dev); >>>>>> + dma_addr_t daddr = iommu_iova_to_phys(mmu->domain, dma_handle); >>>>>> + >>>>>> + clflush_cache_range(phys_to_virt(daddr), size); >>>>> >>>>> You might need to consider another IOMMU on the way here. Generally, >>>>> given that daddr is your MMU DMA address (not necessarily CPU physical >>>>> address), you should be able to call >>>>> >>>>> dma_sync_single_for_cpu(<your pci device>, daddr, size, dir) >>>> >>>> I'd hope that this IPU complex is some kind of embedded endpoint thing >>>> that bypasses the VT-d IOMMU or is always using its own local RAM, >>>> because it would be pretty much unworkable otherwise. >>> >>> It uses system RAM and, as far as my understanding goes, by default it >>> operates without the VT-d IOMMU and that's how it's used right now. >> >> OK, if it *is* behind a DMAR unit then booting with "iommu=force" (or >> whatever the exact incantation for intel-iommu is) should be fun... >> >>> I'm suggesting VT-d IOMMU as a way to further strengthen the security >>> and error resilience in future (due to the IPU complex being >>> non-coherent and also running a closed source firmware). >> >> TBH, doing DMA remapping through *two* IOMMUS will add horrible hardware >> overhead, > > Not necessarily, if done right and with right hardware (I lack the > details about Intel hardware unfortunately). One can for example > notice the fact that the IOVA ranges from the parent IOMMU are going > to be contiguous for the child IOMMU, so one could use huge pages in > the child IOMMU and essentially make a selective 1:1 mapping. Note that many IOMMUs don't actually implement TLBs for larger page sizes, even if they support them. And that still doesn't really help the main issue of the way in which nested table walks blow up exponentially: For a n-level pagetable at the first level and an m-level table at the second level, a single access by the device can become m * (n + 1) memory accesses (and that's generously ignoring additional things like source-ID-to-context lookups). >> increase the scope for kernel-side bugs, and not much more. If >> we don't trust this IOMMU to behave, why are we trying to drive it in >> the first place? If we do, then a second IOMMU behind it won't protect >> anything that the first one doesn't already. > > That's a valid point, right. But on the other hand, I lack the > hardware details on whether we can just disable the internal IOMMU and > use DMAR alone instead. > >> >>>> The whole >>>> infrastructure isn't really capable of dealing with nested IOMMUs, and >>>> nested DMA ops would be an equally horrible idea. >>> >>> Could you elaborate a bit more on this? I think we should be able to >>> deal with this in a way I suggested before: >>> >>> a) the PCI device would use the system DMA ops, >>> b) the PCI device would implement a secondary bus for which it would >>> provide its own DMA and IOMMU ops. >>> c) a secondary device would be registered on the secondary bus, >>> d) all memory for the IPU would be managed on behalf of the secondary device. >>> >>> In fact, the driver already is designed in a way that all the points >>> above are true. If I'm not missing something, the only significant >>> missing point is calling into system DMA ops from IPU DMA ops. >> >> I don't believe x86 has any non-coherent DMA ops, therefore the IPU DMA >> ops would still probably have to do all their own cache maintenance. > > I'd argue that it means that we need to add non-coherent DMA ops on > x86, as we have on other archs, which can have both coherent and > non-coherent devices in the same system. I'd argue that that's what this patch *is* doing - x86 already has DMA ops all over the place for special cases, and this is really just another special case. I think trying to introduce some notion of non-coherent devices to the arch code, plus some generic way to identify them, plus some way to make sure everywhere else that overrides DMA ops still does the right thing, would be a lot more work for very little gain. >> Allocation/mapping, though, would have to be done with the parent DMA >> ops first (in case DMA address != physical address), *then* mapped at >> the IPU MMU, which is the real killer - if the PCI DMA ops are from >> intel-iommu, then there's little need for the IPU MMU mapping to be >> anything other than 1:1, so you may as well not bother. > > Okay, I think I can agree with you on this. It indeed makes little > sense to use both MMUs at the same time, if there is a way to disable > one of them. > > Let's just keep this unaware of DMAR at this point of time, as a > starter, and get back to it later whenever someone wants to use DMAR > instead. I guess the way to proceed then would be either disabling the > internal MMU, if possible, or making it use a 1:1 (huge page, if > possible) mapping, if not. Generally, when you have a device-specific IOMMU like this, that's the one you'd want to keep using because it'll be optimised for the device's workload (e.g. the fact that it's non-coherent here is very likely the appropriate performance choice). >> If the PCI DMA >> ops are from SWIOTLB, then the constraints of having to go through that >> first eliminate all the scatter-gather benefit of the IPU MMU. > > Does the SWIOTLB give you a physically contiguous memory? If not, you > still need the IPU MMU to actually be able to access the memory. The SWIOTLB itself is really only for streaming mappings - coherent allocations come from CMA (although it may fall back to allocating directly out of its bounce buffers in certain circumstances). Either way, yes, everything's physically contiguous, because it's designed for when there is no hardware IOMMU available. I can well imagine video/ISP workloads being able to exhaust CMA at the drop of a hat too, which makes the idea all the worse. >> >> The IOMMU API ops would have to be handled similarly, by checking for >> ops on the parent bus, calling those first if present, then running the >> intermediate results through the IPU MMU's own functions. Sure, it's not >> impossible, but it's really really grim. Not to mention that all the IPU >> MMU's page tables/control structures/etc. would also have to be >> DMA-allocated/mapped because it may or may not be operating in physical >> address space. > > DMA-allocation isn't really good for this use case, but is a DMA > mapping operation really such a bad thing? Not really, although it's still a bit of extra complication. We do it in io-pgtable for the sake of coherency, but we enforce that DMA == phys so that we don't have to double the memory overhead to keep track of the kernel VAs as well. I think the Tegra IOMMU driver is one that does go the whole way. Coming back to the earlier point, though, if you'd end up back in your driver's own DMA ops anyway then there seems very little justification for going the long way round. >> The reasonable option - assuming the topology really is this way - would >> seem to be special-casing the IPU in intel-iommu in a similar manner to >> integrated graphics, to make sure it gets a passthrough domain for DMA >> ops, but still allowing the whole PCI device to be passed through to a >> guest VM via VFIO if desired (which is really the only case where nested >> translation does start to make sense). > > Yeah, given that we need some start, it sounds sane to me. We can then > revisit different options later. Thanks a lot for your input. Thankfully, it seems like this will remain a theoretical problem. Phew! Robin. > > Best regards, > Tomasz >
diff --git a/drivers/media/pci/intel/ipu3/Kconfig b/drivers/media/pci/intel/ipu3/Kconfig index ab2edcb..2030be7 100644 --- a/drivers/media/pci/intel/ipu3/Kconfig +++ b/drivers/media/pci/intel/ipu3/Kconfig @@ -26,3 +26,9 @@ config INTEL_IPU3_MMU Say Y here if you have Skylake/Kaby Lake SoC with IPU3. Say N if un-sure. + +config INTEL_IPU3_DMAMAP + bool "Intel ipu3 DMA mapping driver" + select IOMMU_IOVA + ---help--- + This is IPU3 IOMMU domain specific DMA driver. diff --git a/drivers/media/pci/intel/ipu3/Makefile b/drivers/media/pci/intel/ipu3/Makefile index 2b669df..2c2a035 100644 --- a/drivers/media/pci/intel/ipu3/Makefile +++ b/drivers/media/pci/intel/ipu3/Makefile @@ -1,2 +1,3 @@ obj-$(CONFIG_VIDEO_IPU3_CIO2) += ipu3-cio2.o obj-$(CONFIG_INTEL_IPU3_MMU) += ipu3-mmu.o +obj-$(CONFIG_INTEL_IPU3_DMAMAP) += ipu3-dmamap.o diff --git a/drivers/media/pci/intel/ipu3/ipu3-dmamap.c b/drivers/media/pci/intel/ipu3/ipu3-dmamap.c new file mode 100644 index 0000000..74704d9 --- /dev/null +++ b/drivers/media/pci/intel/ipu3/ipu3-dmamap.c @@ -0,0 +1,408 @@ +/* + * Copyright (c) 2017 Intel Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include <linux/highmem.h> +#include <linux/slab.h> +#include <linux/version.h> +#include <linux/vmalloc.h> +#include "ipu3-mmu.h" + +/* Begin of things adapted from arch/arm/mm/dma-mapping.c */ +static void ipu3_dmamap_clear_buffer(struct page *page, size_t size, + unsigned long attrs) +{ + /* + * Ensure that the allocated pages are zeroed, and that any data + * lurking in the kernel direct-mapped region is invalidated. + */ + if (PageHighMem(page)) { + while (size > 0) { + void *ptr = kmap_atomic(page); + + memset(ptr, 0, PAGE_SIZE); + if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) + clflush_cache_range(ptr, PAGE_SIZE); + kunmap_atomic(ptr); + page++; + size -= PAGE_SIZE; + } + } else { + void *ptr = page_address(page); + + memset(ptr, 0, size); + if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) + clflush_cache_range(ptr, size); + } +} + +/** + * ipu3_dmamap_alloc_buffer - allocate buffer based on attributes + * @dev: struct device pointer + * @size: size of buffer in bytes + * @gfp: specify the free page type + * @attrs: defined in linux/dma-attrs.h + * + * This is a helper function for physical page allocation + * + * Return array representing buffer from alloc_pages() on success + * or NULL on failure + * + * Must be freed with ipu3_dmamap_free_buffer. + */ +static struct page **ipu3_dmamap_alloc_buffer(struct device *dev, size_t size, + gfp_t gfp, unsigned long attrs) +{ + struct page **pages; + int count = size >> PAGE_SHIFT; + int array_size = count * sizeof(struct page *); + int i = 0; + + /* Allocate mem for array of page ptrs */ + if (array_size <= PAGE_SIZE) + pages = kzalloc(array_size, GFP_KERNEL); + else + pages = vzalloc(array_size); + if (!pages) + return NULL; + + gfp |= __GFP_NOWARN; + + while (count) { + int j, order = __fls(count); + + pages[i] = alloc_pages(gfp, order); + while (!pages[i] && order) + pages[i] = alloc_pages(gfp, --order); + if (!pages[i]) + goto error; + + if (order) { + split_page(pages[i], order); + j = 1 << order; + while (--j) + pages[i + j] = pages[i] + j; + } + /* Zero and invalidate */ + ipu3_dmamap_clear_buffer(pages[i], PAGE_SIZE << order, attrs); + i += 1 << order; + count -= 1 << order; + } + + return pages; + +error: + while (i--) + if (pages[i]) + __free_pages(pages[i], 0); + if (array_size <= PAGE_SIZE) + kfree(pages); + else + vfree(pages); + + return NULL; +} + +/* + * Free a buffer allocated by ipu3_dmamap_alloc_buffer() + */ +static int ipu3_dmamap_free_buffer(struct device *dev, struct page **pages, + size_t size, unsigned long attrs) +{ + int count = size >> PAGE_SHIFT; + int array_size = count * sizeof(struct page *); + int i; + + for (i = 0; i < count; i++) { + if (pages[i]) { + ipu3_dmamap_clear_buffer(pages[i], PAGE_SIZE, attrs); + __free_pages(pages[i], 0); + } + } + + if (array_size <= PAGE_SIZE) + kfree(pages); + else + vfree(pages); + return 0; +} + +/* End of things adapted from arch/arm/mm/dma-mapping.c */ +static void ipu3_dmamap_sync_single_for_cpu(struct device *dev, + dma_addr_t dma_handle, size_t size, + enum dma_data_direction dir) +{ + struct ipu3_mmu *mmu = to_ipu3_mmu(dev); + dma_addr_t daddr = iommu_iova_to_phys(mmu->domain, dma_handle); + + clflush_cache_range(phys_to_virt(daddr), size); +} + +/* + * Synchronization function to transfer ownership to CPU + */ +static void ipu3_dmamap_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sglist, int nents, + enum dma_data_direction dir) +{ + struct ipu3_mmu *mmu = to_ipu3_mmu(dev); + struct scatterlist *sg; + int i; + + for_each_sg(sglist, sg, nents, i) { + clflush_cache_range( + phys_to_virt(iommu_iova_to_phys(mmu->domain, + sg_dma_address(sg))), sg->length); + } +} + +/** + * ipu3_dmamap_alloc - allocate and map a buffer into KVA + * @dev: struct device pointer + * @size: size of buffer in bytes + * @gfp: specify the get free page type + * @attrs: defined in linux/dma-attrs.h + * + * Return KVA on success or NULL on failure + * + */ +static void *ipu3_dmamap_alloc(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, + unsigned long attrs) +{ + struct ipu3_mmu *mmu = to_ipu3_mmu(dev); + struct page **pages; + struct iova *iova; + struct vm_struct *area; + int i; + int rval; + + size = PAGE_ALIGN(size); + + iova = alloc_iova(&mmu->iova_domain, size >> PAGE_SHIFT, + dma_get_mask(dev) >> PAGE_SHIFT, 0); + if (!iova) + return NULL; + + pages = ipu3_dmamap_alloc_buffer(dev, size, gfp, attrs); + if (!pages) + goto out_free_iova; + + /* Call IOMMU driver to setup pgt */ + for (i = 0; iova->pfn_lo + i <= iova->pfn_hi; i++) { + rval = iommu_map(mmu->domain, + (iova->pfn_lo + i) << PAGE_SHIFT, + page_to_phys(pages[i]), PAGE_SIZE, 0); + if (rval) + goto out_unmap; + } + /* Now grab a virtual region */ + area = __get_vm_area(size, 0, VMALLOC_START, VMALLOC_END); + if (!area) + goto out_unmap; + + area->pages = pages; + /* And map it in KVA */ + if (map_vm_area(area, PAGE_KERNEL, pages)) + goto out_vunmap; + + *dma_handle = iova->pfn_lo << PAGE_SHIFT; + + return area->addr; + +out_vunmap: + vunmap(area->addr); + +out_unmap: + ipu3_dmamap_free_buffer(dev, pages, size, attrs); + for (i--; i >= 0; i--) { + iommu_unmap(mmu->domain, (iova->pfn_lo + i) << PAGE_SHIFT, + PAGE_SIZE); + } + +out_free_iova: + __free_iova(&mmu->iova_domain, iova); + + return NULL; +} + +/* + * Counterpart of ipu3_dmamap_alloc + */ +static void ipu3_dmamap_free(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, unsigned long attrs) +{ + struct ipu3_mmu *mmu = to_ipu3_mmu(dev); + struct vm_struct *area = find_vm_area(vaddr); + struct iova *iova = find_iova(&mmu->iova_domain, + dma_handle >> PAGE_SHIFT); + + if (WARN_ON(!area) || WARN_ON(!iova)) + return; + + if (WARN_ON(!area->pages)) + return; + + size = PAGE_ALIGN(size); + + iommu_unmap(mmu->domain, iova->pfn_lo << PAGE_SHIFT, + (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT); + + __free_iova(&mmu->iova_domain, iova); + + ipu3_dmamap_free_buffer(dev, area->pages, size, attrs); + + vunmap(vaddr); +} + +/* + * Insert each page into user VMA + */ +static int ipu3_dmamap_mmap(struct device *dev, struct vm_area_struct *vma, + void *addr, dma_addr_t iova, size_t size, + unsigned long attrs) +{ + struct vm_struct *area = find_vm_area(addr); + size_t count = PAGE_ALIGN(size) >> PAGE_SHIFT; + size_t i; + + if (!area) + return -EFAULT; + + if (vma->vm_start & ~PAGE_MASK) + return -EINVAL; + + if (size > area->size) + return -EFAULT; + + for (i = 0; i < count; i++) + vm_insert_page(vma, vma->vm_start + (i << PAGE_SHIFT), + area->pages[i]); + + return 0; +} + +static void ipu3_dmamap_unmap_sg(struct device *dev, struct scatterlist *sglist, + int nents, enum dma_data_direction dir, + unsigned long attrs) +{ + struct ipu3_mmu *mmu = to_ipu3_mmu(dev); + struct iova *iova = find_iova(&mmu->iova_domain, + sg_dma_address(sglist) >> PAGE_SHIFT); + + if (!nents || WARN_ON(!iova)) + return; + + if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) + ipu3_dmamap_sync_sg_for_cpu(dev, sglist, nents, + DMA_BIDIRECTIONAL); + + iommu_unmap(mmu->domain, iova->pfn_lo << PAGE_SHIFT, + (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT); + + __free_iova(&mmu->iova_domain, iova); +} + +static int ipu3_dmamap_map_sg(struct device *dev, struct scatterlist *sglist, + int nents, enum dma_data_direction dir, + unsigned long attrs) +{ + struct ipu3_mmu *mmu = to_ipu3_mmu(dev); + struct scatterlist *sg; + struct iova *iova; + size_t size = 0; + uint32_t iova_addr; + int i; + + for_each_sg(sglist, sg, nents, i) + size += PAGE_ALIGN(sg->length) >> PAGE_SHIFT; + + dev_dbg(dev, "dmamap: mapping sg %d entries, %zu pages\n", nents, size); + + iova = alloc_iova(&mmu->iova_domain, size, + dma_get_mask(dev) >> PAGE_SHIFT, 0); + if (!iova) + return 0; + + dev_dbg(dev, "dmamap: iova low pfn %lu, high pfn %lu\n", iova->pfn_lo, + iova->pfn_hi); + + iova_addr = iova->pfn_lo; + + for_each_sg(sglist, sg, nents, i) { + int rval; + + dev_dbg(dev, + "dmamap: entry %d: iova 0x%8.8x, phys 0x%16.16llx\n", + i, iova_addr << PAGE_SHIFT, page_to_phys(sg_page(sg))); + rval = iommu_map(mmu->domain, iova_addr << PAGE_SHIFT, + page_to_phys(sg_page(sg)), + PAGE_ALIGN(sg->length), 0); + if (rval) + goto out_fail; + sg_dma_address(sg) = iova_addr << PAGE_SHIFT; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg_dma_len(sg) = sg->length; +#endif /* CONFIG_NEED_SG_DMA_LENGTH */ + + iova_addr += PAGE_ALIGN(sg->length) >> PAGE_SHIFT; + } + + if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) + ipu3_dmamap_sync_sg_for_cpu(dev, sglist, nents, + DMA_BIDIRECTIONAL); + + return nents; + +out_fail: + ipu3_dmamap_unmap_sg(dev, sglist, i, dir, attrs); + + return 0; +} + +/* + * Create scatter-list for the already allocated DMA buffer + */ +static int ipu3_dmamap_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t handle, + size_t size, unsigned long attrs) +{ + struct vm_struct *area = find_vm_area(cpu_addr); + int n_pages; + int ret; + + if (!area || (WARN_ON(!area->pages))) + return -ENOMEM; + + n_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; + + ret = sg_alloc_table_from_pages(sgt, area->pages, n_pages, 0, size, + GFP_KERNEL); + if (ret) + dev_dbg(dev, "failed to get sgt table\n"); + + return ret; +} + +struct dma_map_ops ipu3_dmamap_ops = { + .alloc = ipu3_dmamap_alloc, + .free = ipu3_dmamap_free, + .mmap = ipu3_dmamap_mmap, + .map_sg = ipu3_dmamap_map_sg, + .unmap_sg = ipu3_dmamap_unmap_sg, + .sync_single_for_cpu = ipu3_dmamap_sync_single_for_cpu, + .sync_single_for_device = ipu3_dmamap_sync_single_for_cpu, + .sync_sg_for_cpu = ipu3_dmamap_sync_sg_for_cpu, + .sync_sg_for_device = ipu3_dmamap_sync_sg_for_cpu, + .get_sgtable = ipu3_dmamap_get_sgtable, +}; +EXPORT_SYMBOL_GPL(ipu3_dmamap_ops); diff --git a/drivers/media/pci/intel/ipu3/ipu3-dmamap.h b/drivers/media/pci/intel/ipu3/ipu3-dmamap.h new file mode 100644 index 0000000..714bac0 --- /dev/null +++ b/drivers/media/pci/intel/ipu3/ipu3-dmamap.h @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2017 Intel Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __IPU3_DMAMAP_H +#define __IPU3_DMAMAP_H + +extern struct dma_map_ops ipu3_dmamap_ops; + +#endif
IPU3 mmu based DMA mapping driver Signed-off-by: Yong Zhi <yong.zhi@intel.com> --- drivers/media/pci/intel/ipu3/Kconfig | 6 + drivers/media/pci/intel/ipu3/Makefile | 1 + drivers/media/pci/intel/ipu3/ipu3-dmamap.c | 408 +++++++++++++++++++++++++++++ drivers/media/pci/intel/ipu3/ipu3-dmamap.h | 20 ++ 4 files changed, 435 insertions(+) create mode 100644 drivers/media/pci/intel/ipu3/ipu3-dmamap.c create mode 100644 drivers/media/pci/intel/ipu3/ipu3-dmamap.h