Message ID | e42771992a73620f23128c0479b2ae91b3e177bf.1536764440.git.robin.murphy@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | iommu: Avoid DMA ops domain refcount contention | expand |
Hi All, On 2018-09-12 17:24, Robin Murphy wrote: > Most parts of iommu-dma already assume they are operating on a default > domain set up by iommu_dma_init_domain(), and can be converted straight > over to avoid the refcounting bottleneck. MSI page mappings may be in > an unmanaged domain with an explicit MSI-only cookie, so retain the > non-specific lookup, but that's OK since they're far from a contended > fast path either way. > > Signed-off-by: Robin Murphy <robin.murphy@arm.com> This breaks Exynos DRM driver with Exynos IOMMU on ARM64 (Exynos5433). Exynos DRM creates it's own domain, attach all devices which performs DMA access (typically CRTC devices) to it and uses standard DMA-mapping calls to allocate/map buffers. This way it can use the same one DMA address for each buffer regardless of the CRTC/display/processing device. This no longer works with this patch. The simplest solution to fix this is add API to change default_domain to the one allocated by the Exynos DRM driver. > --- > drivers/iommu/dma-iommu.c | 23 ++++++++++++----------- > 1 file changed, 12 insertions(+), 11 deletions(-) > > diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c > index 511ff9a1d6d9..320f9ea82f3f 100644 > --- a/drivers/iommu/dma-iommu.c > +++ b/drivers/iommu/dma-iommu.c > @@ -491,7 +491,7 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, > void iommu_dma_free(struct device *dev, struct page **pages, size_t size, > dma_addr_t *handle) > { > - __iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle, size); > + __iommu_dma_unmap(iommu_get_dma_domain(dev), *handle, size); > __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT); > *handle = IOMMU_MAPPING_ERROR; > } > @@ -518,7 +518,7 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp, > unsigned long attrs, int prot, dma_addr_t *handle, > void (*flush_page)(struct device *, const void *, phys_addr_t)) > { > - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); > + struct iommu_domain *domain = iommu_get_dma_domain(dev); > struct iommu_dma_cookie *cookie = domain->iova_cookie; > struct iova_domain *iovad = &cookie->iovad; > struct page **pages; > @@ -606,9 +606,8 @@ int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma) > } > > static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, > - size_t size, int prot) > + size_t size, int prot, struct iommu_domain *domain) > { > - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); > struct iommu_dma_cookie *cookie = domain->iova_cookie; > size_t iova_off = 0; > dma_addr_t iova; > @@ -632,13 +631,14 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, > dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, > unsigned long offset, size_t size, int prot) > { > - return __iommu_dma_map(dev, page_to_phys(page) + offset, size, prot); > + return __iommu_dma_map(dev, page_to_phys(page) + offset, size, prot, > + iommu_get_dma_domain(dev)); > } > > void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size, > enum dma_data_direction dir, unsigned long attrs) > { > - __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size); > + __iommu_dma_unmap(iommu_get_dma_domain(dev), handle, size); > } > > /* > @@ -726,7 +726,7 @@ static void __invalidate_sg(struct scatterlist *sg, int nents) > int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, > int nents, int prot) > { > - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); > + struct iommu_domain *domain = iommu_get_dma_domain(dev); > struct iommu_dma_cookie *cookie = domain->iova_cookie; > struct iova_domain *iovad = &cookie->iovad; > struct scatterlist *s, *prev = NULL; > @@ -811,20 +811,21 @@ void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, > sg = tmp; > } > end = sg_dma_address(sg) + sg_dma_len(sg); > - __iommu_dma_unmap(iommu_get_domain_for_dev(dev), start, end - start); > + __iommu_dma_unmap(iommu_get_dma_domain(dev), start, end - start); > } > > dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, > size_t size, enum dma_data_direction dir, unsigned long attrs) > { > return __iommu_dma_map(dev, phys, size, > - dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO); > + dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO, > + iommu_get_dma_domain(dev)); > } > > void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, > size_t size, enum dma_data_direction dir, unsigned long attrs) > { > - __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size); > + __iommu_dma_unmap(iommu_get_dma_domain(dev), handle, size); > } > > int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) > @@ -850,7 +851,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, > if (!msi_page) > return NULL; > > - iova = __iommu_dma_map(dev, msi_addr, size, prot); > + iova = __iommu_dma_map(dev, msi_addr, size, prot, domain); > if (iommu_dma_mapping_error(dev, iova)) > goto out_free_page; > Best regards
On 28/09/18 14:26, Marek Szyprowski wrote: > Hi All, > > On 2018-09-12 17:24, Robin Murphy wrote: >> Most parts of iommu-dma already assume they are operating on a default >> domain set up by iommu_dma_init_domain(), and can be converted straight >> over to avoid the refcounting bottleneck. MSI page mappings may be in >> an unmanaged domain with an explicit MSI-only cookie, so retain the >> non-specific lookup, but that's OK since they're far from a contended >> fast path either way. >> >> Signed-off-by: Robin Murphy <robin.murphy@arm.com> > > This breaks Exynos DRM driver with Exynos IOMMU on ARM64 (Exynos5433). > Exynos DRM creates it's own domain, attach all devices which performs > DMA access (typically CRTC devices) to it and uses standard DMA-mapping > calls to allocate/map buffers. This way it can use the same one DMA > address for each buffer regardless of the CRTC/display/processing > device. > > This no longer works with this patch. The simplest solution to fix this > is add API to change default_domain to the one allocated by the Exynos > DRM driver. Ugh, I hadn't realised there were any drivers trying to fake up their own default domains - that's always going to be fragile. In fact, one of my proposals for un-breaking Tegra and other DRM drivers involves preventing iommu-dma from trying to operate on non-IOMMU_DOMAIN_DMA domains at all, which would stop that from working permanently. Can Exynos not put all the DRM components into a single group, or at least just pick one of the real default domains to attach everyone to instead of allocating a fake one? Robin. >> --- >> drivers/iommu/dma-iommu.c | 23 ++++++++++++----------- >> 1 file changed, 12 insertions(+), 11 deletions(-) >> >> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c >> index 511ff9a1d6d9..320f9ea82f3f 100644 >> --- a/drivers/iommu/dma-iommu.c >> +++ b/drivers/iommu/dma-iommu.c >> @@ -491,7 +491,7 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, >> void iommu_dma_free(struct device *dev, struct page **pages, size_t size, >> dma_addr_t *handle) >> { >> - __iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle, size); >> + __iommu_dma_unmap(iommu_get_dma_domain(dev), *handle, size); >> __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT); >> *handle = IOMMU_MAPPING_ERROR; >> } >> @@ -518,7 +518,7 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp, >> unsigned long attrs, int prot, dma_addr_t *handle, >> void (*flush_page)(struct device *, const void *, phys_addr_t)) >> { >> - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); >> + struct iommu_domain *domain = iommu_get_dma_domain(dev); >> struct iommu_dma_cookie *cookie = domain->iova_cookie; >> struct iova_domain *iovad = &cookie->iovad; >> struct page **pages; >> @@ -606,9 +606,8 @@ int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma) >> } >> >> static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, >> - size_t size, int prot) >> + size_t size, int prot, struct iommu_domain *domain) >> { >> - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); >> struct iommu_dma_cookie *cookie = domain->iova_cookie; >> size_t iova_off = 0; >> dma_addr_t iova; >> @@ -632,13 +631,14 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, >> dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, >> unsigned long offset, size_t size, int prot) >> { >> - return __iommu_dma_map(dev, page_to_phys(page) + offset, size, prot); >> + return __iommu_dma_map(dev, page_to_phys(page) + offset, size, prot, >> + iommu_get_dma_domain(dev)); >> } >> >> void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size, >> enum dma_data_direction dir, unsigned long attrs) >> { >> - __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size); >> + __iommu_dma_unmap(iommu_get_dma_domain(dev), handle, size); >> } >> >> /* >> @@ -726,7 +726,7 @@ static void __invalidate_sg(struct scatterlist *sg, int nents) >> int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, >> int nents, int prot) >> { >> - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); >> + struct iommu_domain *domain = iommu_get_dma_domain(dev); >> struct iommu_dma_cookie *cookie = domain->iova_cookie; >> struct iova_domain *iovad = &cookie->iovad; >> struct scatterlist *s, *prev = NULL; >> @@ -811,20 +811,21 @@ void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, >> sg = tmp; >> } >> end = sg_dma_address(sg) + sg_dma_len(sg); >> - __iommu_dma_unmap(iommu_get_domain_for_dev(dev), start, end - start); >> + __iommu_dma_unmap(iommu_get_dma_domain(dev), start, end - start); >> } >> >> dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, >> size_t size, enum dma_data_direction dir, unsigned long attrs) >> { >> return __iommu_dma_map(dev, phys, size, >> - dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO); >> + dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO, >> + iommu_get_dma_domain(dev)); >> } >> >> void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, >> size_t size, enum dma_data_direction dir, unsigned long attrs) >> { >> - __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size); >> + __iommu_dma_unmap(iommu_get_dma_domain(dev), handle, size); >> } >> >> int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) >> @@ -850,7 +851,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, >> if (!msi_page) >> return NULL; >> >> - iova = __iommu_dma_map(dev, msi_addr, size, prot); >> + iova = __iommu_dma_map(dev, msi_addr, size, prot, domain); >> if (iommu_dma_mapping_error(dev, iova)) >> goto out_free_page; >> > > Best regards >
Hi Robin, On 2018-09-28 15:52, Robin Murphy wrote: > On 28/09/18 14:26, Marek Szyprowski wrote: >> On 2018-09-12 17:24, Robin Murphy wrote: >>> Most parts of iommu-dma already assume they are operating on a default >>> domain set up by iommu_dma_init_domain(), and can be converted straight >>> over to avoid the refcounting bottleneck. MSI page mappings may be in >>> an unmanaged domain with an explicit MSI-only cookie, so retain the >>> non-specific lookup, but that's OK since they're far from a contended >>> fast path either way. >>> >>> Signed-off-by: Robin Murphy <robin.murphy@arm.com> >> >> This breaks Exynos DRM driver with Exynos IOMMU on ARM64 (Exynos5433). >> Exynos DRM creates it's own domain, attach all devices which performs >> DMA access (typically CRTC devices) to it and uses standard DMA-mapping >> calls to allocate/map buffers. This way it can use the same one DMA >> address for each buffer regardless of the CRTC/display/processing >> device. >> >> This no longer works with this patch. The simplest solution to fix this >> is add API to change default_domain to the one allocated by the Exynos >> DRM driver. > > Ugh, I hadn't realised there were any drivers trying to fake up their > own default domains - that's always going to be fragile. In fact, one > of my proposals for un-breaking Tegra and other DRM drivers involves > preventing iommu-dma from trying to operate on non-IOMMU_DOMAIN_DMA > domains at all, which would stop that from working permanently. IMHO there should be a way to let drivers like DRM to use DMA-mapping infrastructure on their own iommu domain. Better provide such API to avoid hacking in the DRM drivers to get this done without help from IOMMU core. > Can Exynos not put all the DRM components into a single group, or at > least just pick one of the real default domains to attach everyone to > instead of allocating a fake one? Exynos DRM components are not in the single group. Currently iommu-groups have no meaning on Exynos5433 and Exynos IOMMU simply allocate one group per each iommu-master device in the system. Exynos DRM already selects one of its component devices as the 'main DMA device'. It is being used for managing buffers if no IOMMU is available, so it can also use its iommu domain instead of allocating a fake one. I've checked and it fixes Exynos DRM now. The question is how to merge this fix to keep bisectability. From: Marek Szyprowski <m.szyprowski@samsung.com> Date: Fri, 28 Sep 2018 16:17:27 +0200 Subject: [PATCH] drm/exynos: Use selected dma_dev default iommu domain instead of a fake one Instead of allocating a fake IOMMU domain for all Exynos DRM components, simply reuse the default IOMMU domain of the already selected DMA device. This allows some design changes in IOMMU framework without breaking IOMMU support in Exynos DRM. Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com> --- drivers/gpu/drm/exynos/exynos_drm_iommu.h | 34 ++++------------------- 1 file changed, 6 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_iommu.h b/drivers/gpu/drm/exynos/exynos_drm_iommu.h index 87f6b5672e11..51d3b7dcd529 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_iommu.h +++ b/drivers/gpu/drm/exynos/exynos_drm_iommu.h @@ -55,37 +55,12 @@ static inline void __exynos_iommu_detach(struct exynos_drm_private *priv, static inline int __exynos_iommu_create_mapping(struct exynos_drm_private *priv, unsigned long start, unsigned long size) { - struct iommu_domain *domain; - int ret; - - domain = iommu_domain_alloc(priv->dma_dev->bus); - if (!domain) - return -ENOMEM; - - ret = iommu_get_dma_cookie(domain); - if (ret) - goto free_domain; - - ret = iommu_dma_init_domain(domain, start, size, NULL); - if (ret) - goto put_cookie; - - priv->mapping = domain; + priv->mapping = iommu_get_dma_domain(priv->dma_dev); return 0; - -put_cookie: - iommu_put_dma_cookie(domain); -free_domain: - iommu_domain_free(domain); - return ret; } static inline void __exynos_iommu_release_mapping(struct exynos_drm_private *priv) { - struct iommu_domain *domain = priv->mapping; - - iommu_put_dma_cookie(domain); - iommu_domain_free(domain); priv->mapping = NULL; } @@ -94,7 +69,9 @@ static inline int __exynos_iommu_attach(struct exynos_drm_private *priv, { struct iommu_domain *domain = priv->mapping; - return iommu_attach_device(domain, dev); + if (dev != priv->dma_dev) + return iommu_attach_device(domain, dev); + return 0; } static inline void __exynos_iommu_detach(struct exynos_drm_private *priv, @@ -102,7 +79,8 @@ static inline void __exynos_iommu_detach(struct exynos_drm_private *priv, { struct iommu_domain *domain = priv->mapping; - iommu_detach_device(domain, dev); + if (dev != priv->dma_dev) + iommu_detach_device(domain, dev); } #else #error Unsupported architecture and IOMMU/DMA-mapping glue code > ... Best regards
On 28/09/18 15:21, Marek Szyprowski wrote: > Hi Robin, > > On 2018-09-28 15:52, Robin Murphy wrote: >> On 28/09/18 14:26, Marek Szyprowski wrote: >>> On 2018-09-12 17:24, Robin Murphy wrote: >>>> Most parts of iommu-dma already assume they are operating on a default >>>> domain set up by iommu_dma_init_domain(), and can be converted straight >>>> over to avoid the refcounting bottleneck. MSI page mappings may be in >>>> an unmanaged domain with an explicit MSI-only cookie, so retain the >>>> non-specific lookup, but that's OK since they're far from a contended >>>> fast path either way. >>>> >>>> Signed-off-by: Robin Murphy <robin.murphy@arm.com> >>> >>> This breaks Exynos DRM driver with Exynos IOMMU on ARM64 (Exynos5433). >>> Exynos DRM creates it's own domain, attach all devices which performs >>> DMA access (typically CRTC devices) to it and uses standard DMA-mapping >>> calls to allocate/map buffers. This way it can use the same one DMA >>> address for each buffer regardless of the CRTC/display/processing >>> device. >>> >>> This no longer works with this patch. The simplest solution to fix this >>> is add API to change default_domain to the one allocated by the Exynos >>> DRM driver. >> >> Ugh, I hadn't realised there were any drivers trying to fake up their >> own default domains - that's always going to be fragile. In fact, one >> of my proposals for un-breaking Tegra and other DRM drivers involves >> preventing iommu-dma from trying to operate on non-IOMMU_DOMAIN_DMA >> domains at all, which would stop that from working permanently. > > IMHO there should be a way to let drivers like DRM to use DMA-mapping > infrastructure on their own iommu domain. Better provide such API to > avoid hacking in the DRM drivers to get this done without help from > IOMMU core. The tricky part is how to reconcile that with those other drivers which want to do explicit IOMMU management with their own domain but still use the DMA API for coherency of the underlying memory. I do have a couple of half-formed ideas, but they're not quite there yet. >> Can Exynos not put all the DRM components into a single group, or at >> least just pick one of the real default domains to attach everyone to >> instead of allocating a fake one? > > Exynos DRM components are not in the single group. Currently > iommu-groups have no meaning on Exynos5433 and Exynos IOMMU simply > allocate one group per each iommu-master device in the system. Yeah, a group spanning multiple IOMMU devices would have been a bit of a hack for your topology, but still *technically* possible ;) > Exynos DRM already selects one of its component devices as the 'main DMA > device'. It is being used for managing buffers if no IOMMU is available, > so it can also use its iommu domain instead of allocating a fake one. > I've checked and it fixes Exynos DRM now. The question is how to merge > this fix to keep bisectability. > > From: Marek Szyprowski <m.szyprowski@samsung.com> > Date: Fri, 28 Sep 2018 16:17:27 +0200 > Subject: [PATCH] drm/exynos: Use selected dma_dev default iommu domain > instead > of a fake one > > Instead of allocating a fake IOMMU domain for all Exynos DRM components, > simply reuse the default IOMMU domain of the already selected DMA device. > This allows some design changes in IOMMU framework without breaking IOMMU > support in Exynos DRM. > > Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com> > --- > drivers/gpu/drm/exynos/exynos_drm_iommu.h | 34 ++++------------------- > 1 file changed, 6 insertions(+), 28 deletions(-) > > diff --git a/drivers/gpu/drm/exynos/exynos_drm_iommu.h > b/drivers/gpu/drm/exynos/exynos_drm_iommu.h > index 87f6b5672e11..51d3b7dcd529 100644 > --- a/drivers/gpu/drm/exynos/exynos_drm_iommu.h > +++ b/drivers/gpu/drm/exynos/exynos_drm_iommu.h > @@ -55,37 +55,12 @@ static inline void __exynos_iommu_detach(struct > exynos_drm_private *priv, > static inline int __exynos_iommu_create_mapping(struct > exynos_drm_private *priv, > unsigned long start, unsigned long size) > { > - struct iommu_domain *domain; > - int ret; > - > - domain = iommu_domain_alloc(priv->dma_dev->bus); > - if (!domain) > - return -ENOMEM; > - > - ret = iommu_get_dma_cookie(domain); > - if (ret) > - goto free_domain; > - > - ret = iommu_dma_init_domain(domain, start, size, NULL); > - if (ret) > - goto put_cookie; > - > - priv->mapping = domain; > + priv->mapping = iommu_get_dma_domain(priv->dma_dev); iommu_get_domain_for_dev(), please - this isn't a performance-critical path inside a DMA API implementation, plus without an actual dependency on this series maybe there's a chance of sneaking it into 4.19? (you could always still check domain->type == IOMMU_DOMAIN_DMA if you want to be really really paranoid.) > return 0; > - > -put_cookie: > - iommu_put_dma_cookie(domain); > -free_domain: > - iommu_domain_free(domain); > - return ret; > } > > static inline void __exynos_iommu_release_mapping(struct > exynos_drm_private *priv) > { > - struct iommu_domain *domain = priv->mapping; > - > - iommu_put_dma_cookie(domain); > - iommu_domain_free(domain); > priv->mapping = NULL; > } > > @@ -94,7 +69,9 @@ static inline int __exynos_iommu_attach(struct > exynos_drm_private *priv, > { > struct iommu_domain *domain = priv->mapping; > > - return iommu_attach_device(domain, dev); > + if (dev != priv->dma_dev) > + return iommu_attach_device(domain, dev); > + return 0; > } > > static inline void __exynos_iommu_detach(struct exynos_drm_private *priv, > @@ -102,7 +79,8 @@ static inline void __exynos_iommu_detach(struct > exynos_drm_private *priv, > { > struct iommu_domain *domain = priv->mapping; > > - iommu_detach_device(domain, dev); > + if (dev != priv->dma_dev) > + iommu_detach_device(domain, dev); Strictly you could skip that check, as detaching the master device from its real default domain is just a no-op, but I guess maintaining the symmetry is probably more intuitive. Robin. > } > #else > #error Unsupported architecture and IOMMU/DMA-mapping glue code > > > ... > > Best regards >
On Fri, Sep 28, 2018 at 04:31:10PM +0100, Robin Murphy wrote: > The tricky part is how to reconcile that with those other drivers which > want to do explicit IOMMU management with their own domain but still use > the DMA API for coherency of the underlying memory. I do have a couple of > half-formed ideas, but they're not quite there yet. It think the only sensible answer is that they can't, and we'll need coherency API in the iommu API (or augmenting it). Which might be a real possibility now that I'm almost done lifting the coherency management to arch hooks instead of dma op methods.
On 28/09/18 16:33, Christoph Hellwig wrote: > On Fri, Sep 28, 2018 at 04:31:10PM +0100, Robin Murphy wrote: >> The tricky part is how to reconcile that with those other drivers which >> want to do explicit IOMMU management with their own domain but still use >> the DMA API for coherency of the underlying memory. I do have a couple of >> half-formed ideas, but they're not quite there yet. > > It think the only sensible answer is that they can't, and we'll need > coherency API in the iommu API (or augmenting it). Which might be a > real possibility now that I'm almost done lifting the coherency > management to arch hooks instead of dma op methods. I reckon it seems feasible if the few clients that want to had a way to allocate their own proper managed domains, and the IOMMU API knew how to swizzle between iommu_dma_ops and dma_direct_ops upon attach depending on whether the target domain was managed or unmanaged. The worst part is liekly to be the difference between that lovely conceptual "iommu_dma_ops" and the cross-architecture mess of reality, plus where identity domains with a possible need for SWIOTLB come into it. Robin.
Hi Robin, On 2018-09-28 17:31, Robin Murphy wrote: > On 28/09/18 15:21, Marek Szyprowski wrote: >> On 2018-09-28 15:52, Robin Murphy wrote: >>> On 28/09/18 14:26, Marek Szyprowski wrote: >>>> On 2018-09-12 17:24, Robin Murphy wrote: >>>>> Most parts of iommu-dma already assume they are operating on a >>>>> default >>>>> domain set up by iommu_dma_init_domain(), and can be converted >>>>> straight >>>>> over to avoid the refcounting bottleneck. MSI page mappings may be in >>>>> an unmanaged domain with an explicit MSI-only cookie, so retain the >>>>> non-specific lookup, but that's OK since they're far from a contended >>>>> fast path either way. >>>>> >>>>> Signed-off-by: Robin Murphy <robin.murphy@arm.com> >>>> >>>> This breaks Exynos DRM driver with Exynos IOMMU on ARM64 (Exynos5433). >>>> Exynos DRM creates it's own domain, attach all devices which performs >>>> DMA access (typically CRTC devices) to it and uses standard >>>> DMA-mapping >>>> calls to allocate/map buffers. This way it can use the same one DMA >>>> address for each buffer regardless of the CRTC/display/processing >>>> device. >>>> >>>> This no longer works with this patch. The simplest solution to fix >>>> this >>>> is add API to change default_domain to the one allocated by the Exynos >>>> DRM driver. >>> >>> Ugh, I hadn't realised there were any drivers trying to fake up their >>> own default domains - that's always going to be fragile. In fact, one >>> of my proposals for un-breaking Tegra and other DRM drivers involves >>> preventing iommu-dma from trying to operate on non-IOMMU_DOMAIN_DMA >>> domains at all, which would stop that from working permanently. >> >> IMHO there should be a way to let drivers like DRM to use DMA-mapping >> infrastructure on their own iommu domain. Better provide such API to >> avoid hacking in the DRM drivers to get this done without help from >> IOMMU core. > > The tricky part is how to reconcile that with those other drivers > which want to do explicit IOMMU management with their own domain but > still use the DMA API for coherency of the underlying memory. I do > have a couple of half-formed ideas, but they're not quite there yet. The only idea I have here is to add some flags to struct driver to let device core and frameworks note that this driver wants to manage everything on their own. Then such drivers, once they settle their IOMMU domain, should call some simple API to bless it for DMA API. > >>> Can Exynos not put all the DRM components into a single group, or at >>> least just pick one of the real default domains to attach everyone to >>> instead of allocating a fake one? >> >> Exynos DRM components are not in the single group. Currently >> iommu-groups have no meaning on Exynos5433 and Exynos IOMMU simply >> allocate one group per each iommu-master device in the system. > > Yeah, a group spanning multiple IOMMU devices would have been a bit of > a hack for your topology, but still *technically* possible ;) The question if I want to put all DRM components into single IOMMU group, how to propagate such knowledge from Exynos DRM driver to Exynos IOMMU driver (groups are created by IOMMU driver)? Anyway, I don't see any benefits from using IOMMU groups as for now, especially when each Exynos DRM component has its own, separate IOMMU (or even more than one, but that a different story). >> Exynos DRM already selects one of its component devices as the 'main DMA >> device'. It is being used for managing buffers if no IOMMU is available, >> so it can also use its iommu domain instead of allocating a fake one. >> I've checked and it fixes Exynos DRM now. The question is how to merge >> this fix to keep bisectability. >> >> From: Marek Szyprowski <m.szyprowski@samsung.com> >> Date: Fri, 28 Sep 2018 16:17:27 +0200 >> Subject: [PATCH] drm/exynos: Use selected dma_dev default iommu domain >> instead >> of a fake one >> >> Instead of allocating a fake IOMMU domain for all Exynos DRM components, >> simply reuse the default IOMMU domain of the already selected DMA >> device. >> This allows some design changes in IOMMU framework without breaking >> IOMMU >> support in Exynos DRM. >> >> Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com> >> --- >> drivers/gpu/drm/exynos/exynos_drm_iommu.h | 34 >> ++++------------------- >> 1 file changed, 6 insertions(+), 28 deletions(-) >> >> diff --git a/drivers/gpu/drm/exynos/exynos_drm_iommu.h >> b/drivers/gpu/drm/exynos/exynos_drm_iommu.h >> index 87f6b5672e11..51d3b7dcd529 100644 >> --- a/drivers/gpu/drm/exynos/exynos_drm_iommu.h >> +++ b/drivers/gpu/drm/exynos/exynos_drm_iommu.h >> @@ -55,37 +55,12 @@ static inline void __exynos_iommu_detach(struct >> exynos_drm_private *priv, >> static inline int __exynos_iommu_create_mapping(struct >> exynos_drm_private *priv, >> unsigned long start, unsigned long size) >> { >> - struct iommu_domain *domain; >> - int ret; >> - >> - domain = iommu_domain_alloc(priv->dma_dev->bus); >> - if (!domain) >> - return -ENOMEM; >> - >> - ret = iommu_get_dma_cookie(domain); >> - if (ret) >> - goto free_domain; >> - >> - ret = iommu_dma_init_domain(domain, start, size, NULL); >> - if (ret) >> - goto put_cookie; >> - >> - priv->mapping = domain; >> + priv->mapping = iommu_get_dma_domain(priv->dma_dev); > > iommu_get_domain_for_dev(), please - this isn't a performance-critical > path inside a DMA API implementation, plus without an actual > dependency on this series maybe there's a chance of sneaking it into > 4.19? > > (you could always still check domain->type == IOMMU_DOMAIN_DMA if you > want to be really really paranoid.) > I've sent a patch. I hope we will manage to get it as a fix into v4.19, so it won't block your changes in v4.20. >> return 0; >> - >> -put_cookie: >> - iommu_put_dma_cookie(domain); >> -free_domain: >> - iommu_domain_free(domain); >> - return ret; >> } >> >> static inline void __exynos_iommu_release_mapping(struct >> exynos_drm_private *priv) >> { >> - struct iommu_domain *domain = priv->mapping; >> - >> - iommu_put_dma_cookie(domain); >> - iommu_domain_free(domain); >> priv->mapping = NULL; >> } >> >> @@ -94,7 +69,9 @@ static inline int __exynos_iommu_attach(struct >> exynos_drm_private *priv, >> { >> struct iommu_domain *domain = priv->mapping; >> >> - return iommu_attach_device(domain, dev); >> + if (dev != priv->dma_dev) >> + return iommu_attach_device(domain, dev); >> + return 0; >> } >> >> static inline void __exynos_iommu_detach(struct exynos_drm_private >> *priv, >> @@ -102,7 +79,8 @@ static inline void __exynos_iommu_detach(struct >> exynos_drm_private *priv, >> { >> struct iommu_domain *domain = priv->mapping; >> >> - iommu_detach_device(domain, dev); >> + if (dev != priv->dma_dev) >> + iommu_detach_device(domain, dev); > > Strictly you could skip that check, as detaching the master device > from its real default domain is just a no-op, but I guess maintaining > the symmetry is probably more intuitive. I would like to keep the symmetry. > ... Best regards
+ dri-devel This patch may also break other ARM DRM drivers. I cced dri-devel so that they could manage this. And below relevant link for ARM DRM maintainers, https://www.spinics.net/lists/arm-kernel/msg676098.html Thanks, Inki Dae 2018년 9월 29일 (토) 오전 1:19, Marek Szyprowski <m.szyprowski@samsung.com>님이 작성: > > Hi Robin, > > On 2018-09-28 17:31, Robin Murphy wrote: > > On 28/09/18 15:21, Marek Szyprowski wrote: > >> On 2018-09-28 15:52, Robin Murphy wrote: > >>> On 28/09/18 14:26, Marek Szyprowski wrote: > >>>> On 2018-09-12 17:24, Robin Murphy wrote: > >>>>> Most parts of iommu-dma already assume they are operating on a > >>>>> default > >>>>> domain set up by iommu_dma_init_domain(), and can be converted > >>>>> straight > >>>>> over to avoid the refcounting bottleneck. MSI page mappings may be in > >>>>> an unmanaged domain with an explicit MSI-only cookie, so retain the > >>>>> non-specific lookup, but that's OK since they're far from a contended > >>>>> fast path either way. > >>>>> > >>>>> Signed-off-by: Robin Murphy <robin.murphy@arm.com> > >>>> > >>>> This breaks Exynos DRM driver with Exynos IOMMU on ARM64 (Exynos5433). > >>>> Exynos DRM creates it's own domain, attach all devices which performs > >>>> DMA access (typically CRTC devices) to it and uses standard > >>>> DMA-mapping > >>>> calls to allocate/map buffers. This way it can use the same one DMA > >>>> address for each buffer regardless of the CRTC/display/processing > >>>> device. > >>>> > >>>> This no longer works with this patch. The simplest solution to fix > >>>> this > >>>> is add API to change default_domain to the one allocated by the Exynos > >>>> DRM driver. > >>> > >>> Ugh, I hadn't realised there were any drivers trying to fake up their > >>> own default domains - that's always going to be fragile. In fact, one > >>> of my proposals for un-breaking Tegra and other DRM drivers involves > >>> preventing iommu-dma from trying to operate on non-IOMMU_DOMAIN_DMA > >>> domains at all, which would stop that from working permanently. > >> > >> IMHO there should be a way to let drivers like DRM to use DMA-mapping > >> infrastructure on their own iommu domain. Better provide such API to > >> avoid hacking in the DRM drivers to get this done without help from > >> IOMMU core. > > > > The tricky part is how to reconcile that with those other drivers > > which want to do explicit IOMMU management with their own domain but > > still use the DMA API for coherency of the underlying memory. I do > > have a couple of half-formed ideas, but they're not quite there yet. > > The only idea I have here is to add some flags to struct driver to let > device core and frameworks note that this driver wants to manage > everything on their own. Then such drivers, once they settle their IOMMU > domain, should call some simple API to bless it for DMA API. > > > > >>> Can Exynos not put all the DRM components into a single group, or at > >>> least just pick one of the real default domains to attach everyone to > >>> instead of allocating a fake one? > >> > >> Exynos DRM components are not in the single group. Currently > >> iommu-groups have no meaning on Exynos5433 and Exynos IOMMU simply > >> allocate one group per each iommu-master device in the system. > > > > Yeah, a group spanning multiple IOMMU devices would have been a bit of > > a hack for your topology, but still *technically* possible ;) > > The question if I want to put all DRM components into single IOMMU > group, how to propagate such knowledge from Exynos DRM driver to Exynos > IOMMU driver (groups are created by IOMMU driver)? Anyway, I don't see > any benefits from using IOMMU groups as for now, especially when each > Exynos DRM component has its own, separate IOMMU (or even more than one, > but that a different story). > > >> Exynos DRM already selects one of its component devices as the 'main DMA > >> device'. It is being used for managing buffers if no IOMMU is available, > >> so it can also use its iommu domain instead of allocating a fake one. > >> I've checked and it fixes Exynos DRM now. The question is how to merge > >> this fix to keep bisectability. > >> > >> From: Marek Szyprowski <m.szyprowski@samsung.com> > >> Date: Fri, 28 Sep 2018 16:17:27 +0200 > >> Subject: [PATCH] drm/exynos: Use selected dma_dev default iommu domain > >> instead > >> of a fake one > >> > >> Instead of allocating a fake IOMMU domain for all Exynos DRM components, > >> simply reuse the default IOMMU domain of the already selected DMA > >> device. > >> This allows some design changes in IOMMU framework without breaking > >> IOMMU > >> support in Exynos DRM. > >> > >> Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com> > >> --- > >> drivers/gpu/drm/exynos/exynos_drm_iommu.h | 34 > >> ++++------------------- > >> 1 file changed, 6 insertions(+), 28 deletions(-) > >> > >> diff --git a/drivers/gpu/drm/exynos/exynos_drm_iommu.h > >> b/drivers/gpu/drm/exynos/exynos_drm_iommu.h > >> index 87f6b5672e11..51d3b7dcd529 100644 > >> --- a/drivers/gpu/drm/exynos/exynos_drm_iommu.h > >> +++ b/drivers/gpu/drm/exynos/exynos_drm_iommu.h > >> @@ -55,37 +55,12 @@ static inline void __exynos_iommu_detach(struct > >> exynos_drm_private *priv, > >> static inline int __exynos_iommu_create_mapping(struct > >> exynos_drm_private *priv, > >> unsigned long start, unsigned long size) > >> { > >> - struct iommu_domain *domain; > >> - int ret; > >> - > >> - domain = iommu_domain_alloc(priv->dma_dev->bus); > >> - if (!domain) > >> - return -ENOMEM; > >> - > >> - ret = iommu_get_dma_cookie(domain); > >> - if (ret) > >> - goto free_domain; > >> - > >> - ret = iommu_dma_init_domain(domain, start, size, NULL); > >> - if (ret) > >> - goto put_cookie; > >> - > >> - priv->mapping = domain; > >> + priv->mapping = iommu_get_dma_domain(priv->dma_dev); > > > > iommu_get_domain_for_dev(), please - this isn't a performance-critical > > path inside a DMA API implementation, plus without an actual > > dependency on this series maybe there's a chance of sneaking it into > > 4.19? > > > > (you could always still check domain->type == IOMMU_DOMAIN_DMA if you > > want to be really really paranoid.) > > > > I've sent a patch. I hope we will manage to get it as a fix into v4.19, > so it won't block your changes in v4.20. > > >> return 0; > >> - > >> -put_cookie: > >> - iommu_put_dma_cookie(domain); > >> -free_domain: > >> - iommu_domain_free(domain); > >> - return ret; > >> } > >> > >> static inline void __exynos_iommu_release_mapping(struct > >> exynos_drm_private *priv) > >> { > >> - struct iommu_domain *domain = priv->mapping; > >> - > >> - iommu_put_dma_cookie(domain); > >> - iommu_domain_free(domain); > >> priv->mapping = NULL; > >> } > >> > >> @@ -94,7 +69,9 @@ static inline int __exynos_iommu_attach(struct > >> exynos_drm_private *priv, > >> { > >> struct iommu_domain *domain = priv->mapping; > >> > >> - return iommu_attach_device(domain, dev); > >> + if (dev != priv->dma_dev) > >> + return iommu_attach_device(domain, dev); > >> + return 0; > >> } > >> > >> static inline void __exynos_iommu_detach(struct exynos_drm_private > >> *priv, > >> @@ -102,7 +79,8 @@ static inline void __exynos_iommu_detach(struct > >> exynos_drm_private *priv, > >> { > >> struct iommu_domain *domain = priv->mapping; > >> > >> - iommu_detach_device(domain, dev); > >> + if (dev != priv->dma_dev) > >> + iommu_detach_device(domain, dev); > > > > Strictly you could skip that check, as detaching the master device > > from its real default domain is just a no-op, but I guess maintaining > > the symmetry is probably more intuitive. > > I would like to keep the symmetry. > > > ... > > Best regards > -- > Marek Szyprowski, PhD > Samsung R&D Institute Poland >
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 511ff9a1d6d9..320f9ea82f3f 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -491,7 +491,7 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, void iommu_dma_free(struct device *dev, struct page **pages, size_t size, dma_addr_t *handle) { - __iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle, size); + __iommu_dma_unmap(iommu_get_dma_domain(dev), *handle, size); __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT); *handle = IOMMU_MAPPING_ERROR; } @@ -518,7 +518,7 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp, unsigned long attrs, int prot, dma_addr_t *handle, void (*flush_page)(struct device *, const void *, phys_addr_t)) { - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct iommu_domain *domain = iommu_get_dma_domain(dev); struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; struct page **pages; @@ -606,9 +606,8 @@ int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma) } static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, - size_t size, int prot) + size_t size, int prot, struct iommu_domain *domain) { - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); struct iommu_dma_cookie *cookie = domain->iova_cookie; size_t iova_off = 0; dma_addr_t iova; @@ -632,13 +631,14 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, int prot) { - return __iommu_dma_map(dev, page_to_phys(page) + offset, size, prot); + return __iommu_dma_map(dev, page_to_phys(page) + offset, size, prot, + iommu_get_dma_domain(dev)); } void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, unsigned long attrs) { - __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size); + __iommu_dma_unmap(iommu_get_dma_domain(dev), handle, size); } /* @@ -726,7 +726,7 @@ static void __invalidate_sg(struct scatterlist *sg, int nents) int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, int prot) { - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct iommu_domain *domain = iommu_get_dma_domain(dev); struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; struct scatterlist *s, *prev = NULL; @@ -811,20 +811,21 @@ void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, sg = tmp; } end = sg_dma_address(sg) + sg_dma_len(sg); - __iommu_dma_unmap(iommu_get_domain_for_dev(dev), start, end - start); + __iommu_dma_unmap(iommu_get_dma_domain(dev), start, end - start); } dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, size_t size, enum dma_data_direction dir, unsigned long attrs) { return __iommu_dma_map(dev, phys, size, - dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO); + dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO, + iommu_get_dma_domain(dev)); } void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, unsigned long attrs) { - __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size); + __iommu_dma_unmap(iommu_get_dma_domain(dev), handle, size); } int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) @@ -850,7 +851,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, if (!msi_page) return NULL; - iova = __iommu_dma_map(dev, msi_addr, size, prot); + iova = __iommu_dma_map(dev, msi_addr, size, prot, domain); if (iommu_dma_mapping_error(dev, iova)) goto out_free_page;
Most parts of iommu-dma already assume they are operating on a default domain set up by iommu_dma_init_domain(), and can be converted straight over to avoid the refcounting bottleneck. MSI page mappings may be in an unmanaged domain with an explicit MSI-only cookie, so retain the non-specific lookup, but that's OK since they're far from a contended fast path either way. Signed-off-by: Robin Murphy <robin.murphy@arm.com> --- drivers/iommu/dma-iommu.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-)