diff mbox series

[RFC,v2,55/58] drivers/iommu: Add deferred map_sg operations

Message ID 20241212180423.1578358-56-smostafa@google.com (mailing list archive)
State New
Headers show
Series KVM: Arm SMMUv3 driver for pKVM | expand

Commit Message

Mostafa Saleh Dec. 12, 2024, 6:04 p.m. UTC
With pKVM SMMUv3 driver which para-virtualizes the IOMMU in the
hypervisor, has an extra overhead with map_sg, as it loops over
iommu_map, and for each map requires context switching, disabling
interrupts...

Instead, add an new domain operations:
- alloc_cookie_sg: Allocate a new sg deferred cookie
- add_deferred_map_sg: Add a mapping to the cookie
- consume_deferred_map_sg: Consume and release the cookie

Alternativly, we can pass the sg list as is. However, this would
duplicate some of the logic and it would make more sense to
conolidate all the sg list parsing for IOMMU drivers in one place.

virtio-iommu is another IOMMU that can benfit from this, but it
would need to have a new operation that standerdize passing
an sglist based on these ops.

Signed-off-by: Mostafa Saleh <smostafa@google.com>
---
 drivers/iommu/iommu.c | 53 +++++++++++++++++++++++++++++++++++++++++--
 include/linux/iommu.h | 19 ++++++++++++++++
 2 files changed, 70 insertions(+), 2 deletions(-)

Comments

Robin Murphy Dec. 19, 2024, 12:48 p.m. UTC | #1
On 2024-12-12 6:04 pm, Mostafa Saleh wrote:
> With pKVM SMMUv3 driver which para-virtualizes the IOMMU in the
> hypervisor, has an extra overhead with map_sg, as it loops over
> iommu_map, and for each map requires context switching, disabling
> interrupts...
> 
> Instead, add an new domain operations:
> - alloc_cookie_sg: Allocate a new sg deferred cookie
> - add_deferred_map_sg: Add a mapping to the cookie
> - consume_deferred_map_sg: Consume and release the cookie
> 
> Alternativly, we can pass the sg list as is. However, this would
> duplicate some of the logic and it would make more sense to
> conolidate all the sg list parsing for IOMMU drivers in one place.

But why bother with fiddly overly-specific machinery at all when you can 
already make ->map_pages asynchronous and consolidate the expensive part 
into ->iotlb_sync_map in general, like s390 does?

Thanks,
Robin.

> virtio-iommu is another IOMMU that can benfit from this, but it
> would need to have a new operation that standerdize passing
> an sglist based on these ops.
> 
> Signed-off-by: Mostafa Saleh <smostafa@google.com>
> ---
>   drivers/iommu/iommu.c | 53 +++++++++++++++++++++++++++++++++++++++++--
>   include/linux/iommu.h | 19 ++++++++++++++++
>   2 files changed, 70 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 83c8e617a2c5..3a3c48631dd6 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -2608,6 +2608,37 @@ size_t iommu_unmap_fast(struct iommu_domain *domain,
>   }
>   EXPORT_SYMBOL_GPL(iommu_unmap_fast);
>   
> +static int __iommu_add_sg(struct iommu_map_cookie_sg *cookie_sg,
> +			  unsigned long iova, phys_addr_t paddr, size_t size)
> +{
> +	struct iommu_domain *domain = cookie_sg->domain;
> +	const struct iommu_domain_ops *ops = domain->ops;
> +	unsigned int min_pagesz;
> +	size_t pgsize, count;
> +
> +	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
> +		return -EINVAL;
> +
> +	if (WARN_ON(domain->pgsize_bitmap == 0UL))
> +		return -ENODEV;
> +
> +	/* find out the minimum page size supported */
> +	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
> +
> +	/*
> +	 * both the virtual address and the physical one, as well as
> +	 * the size of the mapping, must be aligned (at least) to the
> +	 * size of the smallest page supported by the hardware
> +	 */
> +	if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
> +		pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n",
> +		       iova, &paddr, size, min_pagesz);
> +		return -EINVAL;
> +	}
> +	pgsize = iommu_pgsize(domain, iova, paddr, size, &count);
> +	return ops->add_deferred_map_sg(cookie_sg, paddr, pgsize, count);
> +}
> +
>   ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
>   		     struct scatterlist *sg, unsigned int nents, int prot,
>   		     gfp_t gfp)
> @@ -2617,6 +2648,9 @@ ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
>   	phys_addr_t start;
>   	unsigned int i = 0;
>   	int ret;
> +	bool deferred_sg = ops->alloc_cookie_sg && ops->add_deferred_map_sg &&
> +			   ops->consume_deferred_map_sg;
> +	struct iommu_map_cookie_sg *cookie_sg;
>   
>   	might_sleep_if(gfpflags_allow_blocking(gfp));
>   
> @@ -2625,12 +2659,24 @@ ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
>   				__GFP_HIGHMEM)))
>   		return -EINVAL;
>   
> +	if (deferred_sg) {
> +		cookie_sg = ops->alloc_cookie_sg(iova, prot, nents, gfp);
> +		if (!cookie_sg) {
> +			pr_err("iommu: failed alloc cookie\n");
> +			return -ENOMEM;
> +		}
> +		cookie_sg->domain = domain;
> +	}
> +
>   	while (i <= nents) {
>   		phys_addr_t s_phys = sg_phys(sg);
>   
>   		if (len && s_phys != start + len) {
> -			ret = __iommu_map(domain, iova + mapped, start,
> -					len, prot, gfp);
> +			if (deferred_sg)
> +				ret = __iommu_add_sg(cookie_sg, iova + mapped, start, len);
> +			else
> +				ret = __iommu_map(domain, iova + mapped, start,
> +						  len, prot, gfp);
>   
>   			if (ret)
>   				goto out_err;
> @@ -2654,6 +2700,9 @@ ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
>   			sg = sg_next(sg);
>   	}
>   
> +	if (deferred_sg)
> +		ops->consume_deferred_map_sg(cookie_sg);
> +
>   	if (ops->iotlb_sync_map) {
>   		ret = ops->iotlb_sync_map(domain, iova, mapped);
>   		if (ret)
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index c75877044185..5e60ac349228 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -601,6 +601,14 @@ struct iommu_ops {
>   	u8 user_pasid_table:1;
>   };
>   
> +/**
> + * struct iommu_map_cookie_sg - Cookie for a deferred map sg
> + * @domain: Domain for the sg lit
> + */
> +struct iommu_map_cookie_sg {
> +	struct iommu_domain *domain;
> +};
> +
>   /**
>    * struct iommu_domain_ops - domain specific operations
>    * @attach_dev: attach an iommu domain to a device
> @@ -638,6 +646,11 @@ struct iommu_ops {
>    * @enable_nesting: Enable nesting
>    * @set_pgtable_quirks: Set io page table quirks (IO_PGTABLE_QUIRK_*)
>    * @free: Release the domain after use.
> + * @alloc_cookie_sg: Allocate a cookie that would be used to create
> + *		     a sg list, filled from the next functions
> + * @add_deferred_map_sg: Add a mapping to a cookie of a sg list.
> + * @consume_deferred_map_sg: Consume the sg list as now all mappings are added,
> + *			     it should also release the cookie as it's not used.
>    */
>   struct iommu_domain_ops {
>   	int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
> @@ -668,6 +681,12 @@ struct iommu_domain_ops {
>   				  unsigned long quirks);
>   
>   	void (*free)(struct iommu_domain *domain);
> +
> +	struct iommu_map_cookie_sg *(*alloc_cookie_sg)(unsigned long iova, int prot,
> +						       unsigned int nents, gfp_t gfp);
> +	int (*add_deferred_map_sg)(struct iommu_map_cookie_sg *cookie,
> +				   phys_addr_t paddr, size_t pgsize, size_t pgcount);
> +	int (*consume_deferred_map_sg)(struct iommu_map_cookie_sg *cookie);
>   };
>   
>   /**
Mostafa Saleh Dec. 19, 2024, 2:24 p.m. UTC | #2
Hi Robin,

On Thu, Dec 19, 2024 at 12:48:27PM +0000, Robin Murphy wrote:
> On 2024-12-12 6:04 pm, Mostafa Saleh wrote:
> > With pKVM SMMUv3 driver which para-virtualizes the IOMMU in the
> > hypervisor, has an extra overhead with map_sg, as it loops over
> > iommu_map, and for each map requires context switching, disabling
> > interrupts...
> > 
> > Instead, add an new domain operations:
> > - alloc_cookie_sg: Allocate a new sg deferred cookie
> > - add_deferred_map_sg: Add a mapping to the cookie
> > - consume_deferred_map_sg: Consume and release the cookie
> > 
> > Alternativly, we can pass the sg list as is. However, this would
> > duplicate some of the logic and it would make more sense to
> > conolidate all the sg list parsing for IOMMU drivers in one place.
> 
> But why bother with fiddly overly-specific machinery at all when you can
> already make ->map_pages asynchronous and consolidate the expensive part
> into ->iotlb_sync_map in general, like s390 does?

This was my initial idea too. But I believe there is no enough context in
iotlb_sync_map, so we either have to create a per-domain deferred_map list
which is synced on any iotlb_sync_map, but that would require to lock the
map operation, hence impacting concurrency.

Or we have to use some complex logic to extract context from iotlb_sync_map,
(something like range iova tree in map and then on sync we can retrieve that)
That’s why I proposed this approach, where the IOMMU subsystem by design is
aware of the semantics and “helps” by providing the right data structures/calls.

I had a quick look now at s390, and it seems a bit different as they only
notify the hypervisor about the iova range being changed, and don’t need
to provide iova->paddr mapping which pKVM does.

Thanks,
Mostafa
> 
> Thanks,
> Robin.
> 
> > virtio-iommu is another IOMMU that can benfit from this, but it
> > would need to have a new operation that standerdize passing
> > an sglist based on these ops.
> > 
> > Signed-off-by: Mostafa Saleh <smostafa@google.com>
> > ---
> >   drivers/iommu/iommu.c | 53 +++++++++++++++++++++++++++++++++++++++++--
> >   include/linux/iommu.h | 19 ++++++++++++++++
> >   2 files changed, 70 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> > index 83c8e617a2c5..3a3c48631dd6 100644
> > --- a/drivers/iommu/iommu.c
> > +++ b/drivers/iommu/iommu.c
> > @@ -2608,6 +2608,37 @@ size_t iommu_unmap_fast(struct iommu_domain *domain,
> >   }
> >   EXPORT_SYMBOL_GPL(iommu_unmap_fast);
> > +static int __iommu_add_sg(struct iommu_map_cookie_sg *cookie_sg,
> > +			  unsigned long iova, phys_addr_t paddr, size_t size)
> > +{
> > +	struct iommu_domain *domain = cookie_sg->domain;
> > +	const struct iommu_domain_ops *ops = domain->ops;
> > +	unsigned int min_pagesz;
> > +	size_t pgsize, count;
> > +
> > +	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
> > +		return -EINVAL;
> > +
> > +	if (WARN_ON(domain->pgsize_bitmap == 0UL))
> > +		return -ENODEV;
> > +
> > +	/* find out the minimum page size supported */
> > +	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
> > +
> > +	/*
> > +	 * both the virtual address and the physical one, as well as
> > +	 * the size of the mapping, must be aligned (at least) to the
> > +	 * size of the smallest page supported by the hardware
> > +	 */
> > +	if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
> > +		pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n",
> > +		       iova, &paddr, size, min_pagesz);
> > +		return -EINVAL;
> > +	}
> > +	pgsize = iommu_pgsize(domain, iova, paddr, size, &count);
> > +	return ops->add_deferred_map_sg(cookie_sg, paddr, pgsize, count);
> > +}
> > +
> >   ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
> >   		     struct scatterlist *sg, unsigned int nents, int prot,
> >   		     gfp_t gfp)
> > @@ -2617,6 +2648,9 @@ ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
> >   	phys_addr_t start;
> >   	unsigned int i = 0;
> >   	int ret;
> > +	bool deferred_sg = ops->alloc_cookie_sg && ops->add_deferred_map_sg &&
> > +			   ops->consume_deferred_map_sg;
> > +	struct iommu_map_cookie_sg *cookie_sg;
> >   	might_sleep_if(gfpflags_allow_blocking(gfp));
> > @@ -2625,12 +2659,24 @@ ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
> >   				__GFP_HIGHMEM)))
> >   		return -EINVAL;
> > +	if (deferred_sg) {
> > +		cookie_sg = ops->alloc_cookie_sg(iova, prot, nents, gfp);
> > +		if (!cookie_sg) {
> > +			pr_err("iommu: failed alloc cookie\n");
> > +			return -ENOMEM;
> > +		}
> > +		cookie_sg->domain = domain;
> > +	}
> > +
> >   	while (i <= nents) {
> >   		phys_addr_t s_phys = sg_phys(sg);
> >   		if (len && s_phys != start + len) {
> > -			ret = __iommu_map(domain, iova + mapped, start,
> > -					len, prot, gfp);
> > +			if (deferred_sg)
> > +				ret = __iommu_add_sg(cookie_sg, iova + mapped, start, len);
> > +			else
> > +				ret = __iommu_map(domain, iova + mapped, start,
> > +						  len, prot, gfp);
> >   			if (ret)
> >   				goto out_err;
> > @@ -2654,6 +2700,9 @@ ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
> >   			sg = sg_next(sg);
> >   	}
> > +	if (deferred_sg)
> > +		ops->consume_deferred_map_sg(cookie_sg);
> > +
> >   	if (ops->iotlb_sync_map) {
> >   		ret = ops->iotlb_sync_map(domain, iova, mapped);
> >   		if (ret)
> > diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> > index c75877044185..5e60ac349228 100644
> > --- a/include/linux/iommu.h
> > +++ b/include/linux/iommu.h
> > @@ -601,6 +601,14 @@ struct iommu_ops {
> >   	u8 user_pasid_table:1;
> >   };
> > +/**
> > + * struct iommu_map_cookie_sg - Cookie for a deferred map sg
> > + * @domain: Domain for the sg lit
> > + */
> > +struct iommu_map_cookie_sg {
> > +	struct iommu_domain *domain;
> > +};
> > +
> >   /**
> >    * struct iommu_domain_ops - domain specific operations
> >    * @attach_dev: attach an iommu domain to a device
> > @@ -638,6 +646,11 @@ struct iommu_ops {
> >    * @enable_nesting: Enable nesting
> >    * @set_pgtable_quirks: Set io page table quirks (IO_PGTABLE_QUIRK_*)
> >    * @free: Release the domain after use.
> > + * @alloc_cookie_sg: Allocate a cookie that would be used to create
> > + *		     a sg list, filled from the next functions
> > + * @add_deferred_map_sg: Add a mapping to a cookie of a sg list.
> > + * @consume_deferred_map_sg: Consume the sg list as now all mappings are added,
> > + *			     it should also release the cookie as it's not used.
> >    */
> >   struct iommu_domain_ops {
> >   	int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
> > @@ -668,6 +681,12 @@ struct iommu_domain_ops {
> >   				  unsigned long quirks);
> >   	void (*free)(struct iommu_domain *domain);
> > +
> > +	struct iommu_map_cookie_sg *(*alloc_cookie_sg)(unsigned long iova, int prot,
> > +						       unsigned int nents, gfp_t gfp);
> > +	int (*add_deferred_map_sg)(struct iommu_map_cookie_sg *cookie,
> > +				   phys_addr_t paddr, size_t pgsize, size_t pgcount);
> > +	int (*consume_deferred_map_sg)(struct iommu_map_cookie_sg *cookie);
> >   };
> >   /**
>
diff mbox series

Patch

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 83c8e617a2c5..3a3c48631dd6 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2608,6 +2608,37 @@  size_t iommu_unmap_fast(struct iommu_domain *domain,
 }
 EXPORT_SYMBOL_GPL(iommu_unmap_fast);
 
+static int __iommu_add_sg(struct iommu_map_cookie_sg *cookie_sg,
+			  unsigned long iova, phys_addr_t paddr, size_t size)
+{
+	struct iommu_domain *domain = cookie_sg->domain;
+	const struct iommu_domain_ops *ops = domain->ops;
+	unsigned int min_pagesz;
+	size_t pgsize, count;
+
+	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
+		return -EINVAL;
+
+	if (WARN_ON(domain->pgsize_bitmap == 0UL))
+		return -ENODEV;
+
+	/* find out the minimum page size supported */
+	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
+
+	/*
+	 * both the virtual address and the physical one, as well as
+	 * the size of the mapping, must be aligned (at least) to the
+	 * size of the smallest page supported by the hardware
+	 */
+	if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
+		pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n",
+		       iova, &paddr, size, min_pagesz);
+		return -EINVAL;
+	}
+	pgsize = iommu_pgsize(domain, iova, paddr, size, &count);
+	return ops->add_deferred_map_sg(cookie_sg, paddr, pgsize, count);
+}
+
 ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
 		     struct scatterlist *sg, unsigned int nents, int prot,
 		     gfp_t gfp)
@@ -2617,6 +2648,9 @@  ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
 	phys_addr_t start;
 	unsigned int i = 0;
 	int ret;
+	bool deferred_sg = ops->alloc_cookie_sg && ops->add_deferred_map_sg &&
+			   ops->consume_deferred_map_sg;
+	struct iommu_map_cookie_sg *cookie_sg;
 
 	might_sleep_if(gfpflags_allow_blocking(gfp));
 
@@ -2625,12 +2659,24 @@  ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
 				__GFP_HIGHMEM)))
 		return -EINVAL;
 
+	if (deferred_sg) {
+		cookie_sg = ops->alloc_cookie_sg(iova, prot, nents, gfp);
+		if (!cookie_sg) {
+			pr_err("iommu: failed alloc cookie\n");
+			return -ENOMEM;
+		}
+		cookie_sg->domain = domain;
+	}
+
 	while (i <= nents) {
 		phys_addr_t s_phys = sg_phys(sg);
 
 		if (len && s_phys != start + len) {
-			ret = __iommu_map(domain, iova + mapped, start,
-					len, prot, gfp);
+			if (deferred_sg)
+				ret = __iommu_add_sg(cookie_sg, iova + mapped, start, len);
+			else
+				ret = __iommu_map(domain, iova + mapped, start,
+						  len, prot, gfp);
 
 			if (ret)
 				goto out_err;
@@ -2654,6 +2700,9 @@  ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
 			sg = sg_next(sg);
 	}
 
+	if (deferred_sg)
+		ops->consume_deferred_map_sg(cookie_sg);
+
 	if (ops->iotlb_sync_map) {
 		ret = ops->iotlb_sync_map(domain, iova, mapped);
 		if (ret)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index c75877044185..5e60ac349228 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -601,6 +601,14 @@  struct iommu_ops {
 	u8 user_pasid_table:1;
 };
 
+/**
+ * struct iommu_map_cookie_sg - Cookie for a deferred map sg
+ * @domain: Domain for the sg lit
+ */
+struct iommu_map_cookie_sg {
+	struct iommu_domain *domain;
+};
+
 /**
  * struct iommu_domain_ops - domain specific operations
  * @attach_dev: attach an iommu domain to a device
@@ -638,6 +646,11 @@  struct iommu_ops {
  * @enable_nesting: Enable nesting
  * @set_pgtable_quirks: Set io page table quirks (IO_PGTABLE_QUIRK_*)
  * @free: Release the domain after use.
+ * @alloc_cookie_sg: Allocate a cookie that would be used to create
+ *		     a sg list, filled from the next functions
+ * @add_deferred_map_sg: Add a mapping to a cookie of a sg list.
+ * @consume_deferred_map_sg: Consume the sg list as now all mappings are added,
+ *			     it should also release the cookie as it's not used.
  */
 struct iommu_domain_ops {
 	int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
@@ -668,6 +681,12 @@  struct iommu_domain_ops {
 				  unsigned long quirks);
 
 	void (*free)(struct iommu_domain *domain);
+
+	struct iommu_map_cookie_sg *(*alloc_cookie_sg)(unsigned long iova, int prot,
+						       unsigned int nents, gfp_t gfp);
+	int (*add_deferred_map_sg)(struct iommu_map_cookie_sg *cookie,
+				   phys_addr_t paddr, size_t pgsize, size_t pgcount);
+	int (*consume_deferred_map_sg)(struct iommu_map_cookie_sg *cookie);
 };
 
 /**