diff mbox series

[RFC,20/45] KVM: arm64: iommu: Add map() and unmap() operations

Message ID 20230201125328.2186498-21-jean-philippe@linaro.org (mailing list archive)
State New, archived
Headers show
Series KVM: Arm SMMUv3 driver for pKVM | expand

Commit Message

Jean-Philippe Brucker Feb. 1, 2023, 12:53 p.m. UTC
Handle map() and unmap() hypercalls by calling the io-pgtable library.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
---
 arch/arm64/kvm/hyp/nvhe/iommu/iommu.c | 144 ++++++++++++++++++++++++++
 1 file changed, 144 insertions(+)

Comments

Mostafa Saleh March 30, 2023, 6:14 p.m. UTC | #1
Hi Jean,

On Wed, Feb 01, 2023 at 12:53:04PM +0000, Jean-Philippe Brucker wrote:
> Handle map() and unmap() hypercalls by calling the io-pgtable library.
> 
> Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
> ---
>  arch/arm64/kvm/hyp/nvhe/iommu/iommu.c | 144 ++++++++++++++++++++++++++
>  1 file changed, 144 insertions(+)
> 
> diff --git a/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c b/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c
> index 7404ea77ed9f..0550e7bdf179 100644
> --- a/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c
> +++ b/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c
> @@ -183,6 +183,150 @@ int kvm_iommu_detach_dev(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
>  	return ret;
>  }
>  
> +static int __kvm_iommu_unmap_pages(struct io_pgtable *iopt, unsigned long iova,
> +				   size_t pgsize, size_t pgcount)
> +{
> +	int ret;
> +	size_t unmapped;
> +	phys_addr_t paddr;
> +	size_t total_unmapped = 0;
> +	size_t size = pgsize * pgcount;
> +
> +	while (total_unmapped < size) {
> +		paddr = iopt_iova_to_phys(iopt, iova);
> +		if (paddr == 0)
> +			return -EINVAL;
> +
> +		/*
> +		 * One page/block at a time, because the range provided may not
> +		 * be physically contiguous, and we need to unshare all physical
> +		 * pages.
> +		 */
> +		unmapped = iopt_unmap_pages(iopt, iova, pgsize, 1, NULL);
> +		if (!unmapped)
> +			return -EINVAL;
> +
> +		ret = __pkvm_host_unshare_dma(paddr, pgsize);
> +		if (ret)
> +			return ret;
> +
> +		iova += unmapped;
> +		pgcount -= unmapped / pgsize;
> +		total_unmapped += unmapped;
> +	}
> +
> +	return 0;
> +}
> +
> +#define IOMMU_PROT_MASK (IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE |\
> +			 IOMMU_NOEXEC | IOMMU_MMIO)
> +
> +int kvm_iommu_map_pages(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
> +			unsigned long iova, phys_addr_t paddr, size_t pgsize,
> +			size_t pgcount, int prot)
> +{
> +	size_t size;
> +	size_t granule;
> +	int ret = -EINVAL;
> +	size_t mapped = 0;
> +	struct io_pgtable iopt;
> +	struct kvm_hyp_iommu *iommu;
> +	size_t pgcount_orig = pgcount;
> +	unsigned long iova_orig = iova;
> +	struct kvm_hyp_iommu_domain *domain;
> +
> +	if (prot & ~IOMMU_PROT_MASK)
> +		return -EINVAL;
> +
> +	if (__builtin_mul_overflow(pgsize, pgcount, &size) ||
> +	    iova + size < iova || paddr + size < paddr)
> +		return -EOVERFLOW;
> +
> +	hyp_spin_lock(&iommu_lock);
> +
> +	domain = handle_to_domain(iommu_id, domain_id, &iommu);
> +	if (!domain)
> +		goto err_unlock;
> +
> +	granule = 1 << __ffs(iommu->pgtable->cfg.pgsize_bitmap);
> +	if (!IS_ALIGNED(iova | paddr | pgsize, granule))
> +		goto err_unlock;
> +
> +	ret = __pkvm_host_share_dma(paddr, size, !(prot & IOMMU_MMIO));
> +	if (ret)
> +		goto err_unlock;
> +
> +	iopt = domain_to_iopt(iommu, domain, domain_id);
> +	while (pgcount) {
> +		ret = iopt_map_pages(&iopt, iova, paddr, pgsize, pgcount, prot,
> +				     0, &mapped);
> +		WARN_ON(!IS_ALIGNED(mapped, pgsize));
> +		pgcount -= mapped / pgsize;
> +		if (ret)
> +			goto err_unmap;
> +		iova += mapped;
> +		paddr += mapped;
> +	}
> +
> +	hyp_spin_unlock(&iommu_lock);
> +	return 0;
> +
> +err_unmap:
> +	__kvm_iommu_unmap_pages(&iopt, iova_orig, pgsize, pgcount_orig - pgcount);
On error here, this unmaps (and unshares) only pages that has been
mapped.
But all pages where shared with IOMMU before (via
__pkvm_host_share_dma) and this corrupts the other pages state as
they are marked as shared while they are not.

I see we can add a "bool unshare" arg to __kvm_iommu_unmap_pages which
will be called with false on error from here after calling
__pkvm_host_unshare_dma for the whole range.

And set to true from kvm_iommu_unmap_pages.

> +err_unlock:
> +	hyp_spin_unlock(&iommu_lock);
> +	return ret;
> +}
> +
> +int kvm_iommu_unmap_pages(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
> +			  unsigned long iova, size_t pgsize, size_t pgcount)
> +{
> +	size_t size;
> +	size_t granule;
> +	int ret = -EINVAL;
> +	struct io_pgtable iopt;
> +	struct kvm_hyp_iommu *iommu;
> +	struct kvm_hyp_iommu_domain *domain;
> +
> +	if (__builtin_mul_overflow(pgsize, pgcount, &size) ||
> +	    iova + size < iova)
> +		return -EOVERFLOW;
> +
> +	hyp_spin_lock(&iommu_lock);
> +	domain = handle_to_domain(iommu_id, domain_id, &iommu);
> +	if (!domain)
> +		goto out_unlock;
> +
> +	granule = 1 << __ffs(iommu->pgtable->cfg.pgsize_bitmap);
> +	if (!IS_ALIGNED(iova | pgsize, granule))
> +		goto out_unlock;
> +
> +	iopt = domain_to_iopt(iommu, domain, domain_id);
> +	ret = __kvm_iommu_unmap_pages(&iopt, iova, pgsize, pgcount);
> +out_unlock:
> +	hyp_spin_unlock(&iommu_lock);
> +	return ret;
> +}
> +
> +phys_addr_t kvm_iommu_iova_to_phys(pkvm_handle_t iommu_id,
> +				   pkvm_handle_t domain_id, unsigned long iova)
> +{
> +	phys_addr_t phys = 0;
> +	struct io_pgtable iopt;
> +	struct kvm_hyp_iommu *iommu;
> +	struct kvm_hyp_iommu_domain *domain;
> +
> +	hyp_spin_lock(&iommu_lock);
> +	domain = handle_to_domain(iommu_id, domain_id, &iommu);
> +	if (domain) {
> +		iopt = domain_to_iopt(iommu, domain, domain_id);
> +
> +		phys = iopt_iova_to_phys(&iopt, iova);
> +	}
> +	hyp_spin_unlock(&iommu_lock);
> +	return phys;
> +}
> +
>  int kvm_iommu_init_device(struct kvm_hyp_iommu *iommu)
>  {
>  	void *domains;
> -- 
> 2.39.0

Thanks,
Mostafa
Jean-Philippe Brucker April 4, 2023, 4 p.m. UTC | #2
Hi Mostafa,

On Thu, Mar 30, 2023 at 06:14:04PM +0000, Mostafa Saleh wrote:
> > +err_unmap:
> > +	__kvm_iommu_unmap_pages(&iopt, iova_orig, pgsize, pgcount_orig - pgcount);
> On error here, this unmaps (and unshares) only pages that has been
> mapped.
> But all pages where shared with IOMMU before (via
> __pkvm_host_share_dma) and this corrupts the other pages state as
> they are marked as shared while they are not.

Right, I'll fix this

> I see we can add a "bool unshare" arg to __kvm_iommu_unmap_pages which
> will be called with false on error from here after calling
> __pkvm_host_unshare_dma for the whole range.

I think it's simpler to call iopt_unmap_pages() directly here, followed by
__pkvm_host_unshare_dma(). It even saves us a few lines

Thanks,
Jean
Mostafa Saleh Sept. 20, 2023, 4:23 p.m. UTC | #3
Hi Jean,

On Tue, Apr 04, 2023 at 05:00:46PM +0100, Jean-Philippe Brucker wrote:
> Hi Mostafa,
> 
> On Thu, Mar 30, 2023 at 06:14:04PM +0000, Mostafa Saleh wrote:
> > > +err_unmap:
> > > +	__kvm_iommu_unmap_pages(&iopt, iova_orig, pgsize, pgcount_orig - pgcount);
> > On error here, this unmaps (and unshares) only pages that has been
> > mapped.
> > But all pages where shared with IOMMU before (via
> > __pkvm_host_share_dma) and this corrupts the other pages state as
> > they are marked as shared while they are not.
> 
> Right, I'll fix this
> 
> > I see we can add a "bool unshare" arg to __kvm_iommu_unmap_pages which
> > will be called with false on error from here after calling
> > __pkvm_host_unshare_dma for the whole range.
> 
> I think it's simpler to call iopt_unmap_pages() directly here, followed by
> __pkvm_host_unshare_dma(). It even saves us a few lines
> 

I have been doing some testing based on the latest updates in
https://jpbrucker.net/git/linux/log/?h=pkvm/smmu

I think the unmap here is not enough as it assumes the whole range
can be unmapped in one call, so we would need something like this
instead (patch might no directly apply though):
diff --git a/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c b/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c
index 7ebda87a1c61..32e145b9240f 100644
--- a/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c
+++ b/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c
@@ -432,6 +432,7 @@ int kvm_iommu_map_pages(pkvm_handle_t domain_id,
 	unsigned long iova_orig = iova;
 	struct kvm_hyp_iommu_domain *domain;
 	struct pkvm_hyp_vcpu *ctxt = pkvm_get_loaded_hyp_vcpu();
+	size_t unmapped;
 
 	if (!kvm_iommu_ops)
 		return -ENODEV;
@@ -489,8 +490,13 @@ int kvm_iommu_map_pages(pkvm_handle_t domain_id,
 
 err_unmap:
 	pgcount = pgcount_orig - pgcount;
-	if (pgcount)
-		iopt_unmap_pages(&iopt, iova_orig, pgsize, pgcount, NULL);
+	while (pgcount) {
+		unmapped = iopt_unmap_pages(&iopt, iova_orig, pgsize, pgcount, NULL);
+		iova_orig += unmapped;
+		pgcount -= unmapped/pgsize;
+		if (!unmapped)
+			break;
+	}
 	__pkvm_unshare_dma(paddr_orig, size);
 err_domain_put:
 	domain_put(domain);
Jean-Philippe Brucker Sept. 25, 2023, 5:21 p.m. UTC | #4
On Wed, Sep 20, 2023 at 04:23:49PM +0000, Mostafa Saleh wrote:
> I have been doing some testing based on the latest updates in
> https://jpbrucker.net/git/linux/log/?h=pkvm/smmu
> 
> I think the unmap here is not enough as it assumes the whole range
> can be unmapped in one call, so we would need something like this
> instead (patch might no directly apply though):

Thanks for testing, the unmap is indeed wrong because io-pgtable doesn't
like clearing empty ptes. Another problem with very large mappings is that
we'll return to host several times to allocate more page tables, removing
and recreating everything from scratch every time. So I changed the
map_pages() call to keep track of what's been mapped so far, see below.

I pushed these fixes to the pkvm/smmu branch

Thanks,
Jean

---
diff --git a/arch/arm64/kvm/hyp/include/nvhe/iommu.h b/arch/arm64/kvm/hyp/include/nvhe/iommu.h
index f522ec0002f10..254df53e04783 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/iommu.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/iommu.h
@@ -37,9 +37,9 @@ int kvm_iommu_attach_dev(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
 			 u32 endpoint_id);
 int kvm_iommu_detach_dev(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
 			 u32 endpoint_id);
-int kvm_iommu_map_pages(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
-			unsigned long iova, phys_addr_t paddr, size_t pgsize,
-			size_t pgcount, int prot);
+size_t kvm_iommu_map_pages(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
+			   unsigned long iova, phys_addr_t paddr, size_t pgsize,
+			   size_t pgcount, int prot);
 size_t kvm_iommu_unmap_pages(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
 			     unsigned long iova, size_t pgsize, size_t pgcount);
 phys_addr_t kvm_iommu_iova_to_phys(pkvm_handle_t iommu_id,
@@ -72,12 +72,12 @@ static inline int kvm_iommu_detach_dev(pkvm_handle_t iommu_id,
 	return -ENODEV;
 }

-static inline int kvm_iommu_map_pages(pkvm_handle_t iommu_id,
-				      pkvm_handle_t domain_id,
-				      unsigned long iova, phys_addr_t paddr,
-				      size_t pgsize, size_t pgcount, int prot)
+static inline size_t kvm_iommu_map_pages(pkvm_handle_t iommu_id,
+					 pkvm_handle_t domain_id,
+					 unsigned long iova, phys_addr_t paddr,
+					 size_t pgsize, size_t pgcount, int prot)
 {
-	return -ENODEV;
+	return 0;
 }

 static inline size_t kvm_iommu_unmap_pages(pkvm_handle_t iommu_id,
diff --git a/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c b/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c
index ad8c7b073de46..a69247c861d51 100644
--- a/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c
+++ b/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c
@@ -190,31 +190,29 @@ int kvm_iommu_detach_dev(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
 #define IOMMU_PROT_MASK (IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE |\
 			 IOMMU_NOEXEC | IOMMU_MMIO)

-int kvm_iommu_map_pages(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
-			unsigned long iova, phys_addr_t paddr, size_t pgsize,
-			size_t pgcount, int prot)
+size_t kvm_iommu_map_pages(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
+			   unsigned long iova, phys_addr_t paddr, size_t pgsize,
+			   size_t pgcount, int prot)
 {
 	size_t size;
 	size_t mapped;
 	size_t granule;
 	int ret = -EINVAL;
 	struct io_pgtable iopt;
+	size_t total_mapped = 0;
 	struct kvm_hyp_iommu *iommu;
-	size_t pgcount_orig = pgcount;
-	phys_addr_t paddr_orig = paddr;
-	unsigned long iova_orig = iova;
 	struct kvm_hyp_iommu_domain *domain;

 	if (prot & ~IOMMU_PROT_MASK)
-		return -EINVAL;
+		return 0;

 	if (__builtin_mul_overflow(pgsize, pgcount, &size) ||
 	    iova + size < iova || paddr + size < paddr)
-		return -EOVERFLOW;
+		return 0;

 	iommu = kvm_iommu_ops.get_iommu_by_id(iommu_id);
 	if (!iommu)
-		return -EINVAL;
+		return 0;

 	hyp_spin_lock(&iommu->lock);
 	domain = handle_to_domain(iommu, domain_id);
@@ -230,29 +228,29 @@ int kvm_iommu_map_pages(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
 		goto err_unlock;

 	iopt = domain_to_iopt(iommu, domain, domain_id);
-	while (pgcount) {
+	while (pgcount && !ret) {
 		mapped = 0;
 		ret = iopt_map_pages(&iopt, iova, paddr, pgsize, pgcount, prot,
 				     0, &mapped);
 		WARN_ON(!IS_ALIGNED(mapped, pgsize));
+		WARN_ON(mapped > pgcount * pgsize);
+
 		pgcount -= mapped / pgsize;
-		if (ret)
-			goto err_unmap;
+		total_mapped += mapped;
 		iova += mapped;
 		paddr += mapped;
 	}

-	hyp_spin_unlock(&iommu->lock);
-	return 0;
-
-err_unmap:
-	pgcount = pgcount_orig - pgcount;
+	/*
+	 * Unshare the bits that haven't been mapped yet. The host calls back
+	 * either to continue mapping, or to unmap and unshare what's been done
+	 * so far.
+	 */
 	if (pgcount)
-		iopt_unmap_pages(&iopt, iova_orig, pgsize, pgcount, NULL);
-	__pkvm_host_unshare_dma(paddr_orig, size);
+		__pkvm_host_unshare_dma(paddr, pgcount * pgsize);
 err_unlock:
 	hyp_spin_unlock(&iommu->lock);
-	return ret;
+	return total_mapped;
 }

 size_t kvm_iommu_unmap_pages(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c
index e67cd7485dfd4..0ffe3e32cf419 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c
@@ -369,23 +369,32 @@ static int kvm_arm_smmu_attach_dev(struct iommu_domain *domain,
 static int kvm_arm_smmu_map_pages(struct iommu_domain *domain,
 				  unsigned long iova, phys_addr_t paddr,
 				  size_t pgsize, size_t pgcount, int prot,
-				  gfp_t gfp, size_t *mapped)
+				  gfp_t gfp, size_t *total_mapped)
 {
-	int ret;
+	size_t mapped;
 	unsigned long irqflags;
+	size_t size = pgsize * pgcount;
 	struct kvm_arm_smmu_domain *kvm_smmu_domain = to_kvm_smmu_domain(domain);
 	struct arm_smmu_device *smmu = kvm_smmu_domain->smmu;
 	struct host_arm_smmu_device *host_smmu = smmu_to_host(smmu);

 	local_lock_irqsave(&memcache_lock, irqflags);
-	ret = kvm_call_hyp_nvhe_mc(smmu, __pkvm_host_iommu_map_pages,
-				   host_smmu->id, kvm_smmu_domain->id, iova,
-				   paddr, pgsize, pgcount, prot);
+	do {
+		mapped = kvm_call_hyp_nvhe(__pkvm_host_iommu_map_pages,
+					   host_smmu->id, kvm_smmu_domain->id,
+					   iova, paddr, pgsize, pgcount, prot);
+		iova += mapped;
+		paddr += mapped;
+		WARN_ON(mapped % pgsize);
+		WARN_ON(mapped > pgcount * pgsize);
+		pgcount -= mapped / pgsize;
+		*total_mapped += mapped;
+	} while (*total_mapped < size && !kvm_arm_smmu_topup_memcache(smmu));
+	kvm_arm_smmu_reclaim_memcache();
 	local_unlock_irqrestore(&memcache_lock, irqflags);
-	if (ret)
-		return ret;
+	if (*total_mapped < size)
+		return -EINVAL;

-	*mapped = pgsize * pgcount;
 	return 0;
 }

----

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c
index 0ffe3e32cf419..0055ecd75990e 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c
@@ -405,6 +405,8 @@ static size_t kvm_arm_smmu_unmap_pages(struct iommu_domain *domain,
 {
 	size_t unmapped;
 	unsigned long irqflags;
+	size_t total_unmapped = 0;
+	size_t size = pgsize * pgcount;
 	struct kvm_arm_smmu_domain *kvm_smmu_domain = to_kvm_smmu_domain(domain);
 	struct arm_smmu_device *smmu = kvm_smmu_domain->smmu;
 	struct host_arm_smmu_device *host_smmu = smmu_to_host(smmu);
@@ -414,11 +416,23 @@ static size_t kvm_arm_smmu_unmap_pages(struct iommu_domain *domain,
 		unmapped = kvm_call_hyp_nvhe(__pkvm_host_iommu_unmap_pages,
 					     host_smmu->id, kvm_smmu_domain->id,
 					     iova, pgsize, pgcount);
-	} while (!unmapped && !kvm_arm_smmu_topup_memcache(smmu));
+		total_unmapped += unmapped;
+		iova += unmapped;
+		WARN_ON(unmapped % pgsize);
+		pgcount -= unmapped / pgsize;
+
+		/*
+		 * The page table driver can unmap less than we asked for. If it
+		 * didn't unmap anything at all, then it either reached the end
+		 * of the range, or it needs a page in the memcache to break a
+		 * block mapping.
+		 */
+	} while (total_unmapped < size &&
+		 (unmapped || !kvm_arm_smmu_topup_memcache(smmu)));
 	kvm_arm_smmu_reclaim_memcache();
 	local_unlock_irqrestore(&memcache_lock, irqflags);
 
-	return unmapped;
+	return total_unmapped;
 }
 
 static phys_addr_t kvm_arm_smmu_iova_to_phys(struct iommu_domain *domain,
Mostafa Saleh Feb. 16, 2024, 11:59 a.m. UTC | #5
Hi Jean,

On Wed, Feb 01, 2023 at 12:53:04PM +0000, Jean-Philippe Brucker wrote:
> Handle map() and unmap() hypercalls by calling the io-pgtable library.
> 
> Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
> ---
>  arch/arm64/kvm/hyp/nvhe/iommu/iommu.c | 144 ++++++++++++++++++++++++++
>  1 file changed, 144 insertions(+)
> 
> diff --git a/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c b/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c
> index 7404ea77ed9f..0550e7bdf179 100644
> --- a/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c
> +++ b/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c
> @@ -183,6 +183,150 @@ int kvm_iommu_detach_dev(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
>  	return ret;
>  }
>  
> +static int __kvm_iommu_unmap_pages(struct io_pgtable *iopt, unsigned long iova,
> +				   size_t pgsize, size_t pgcount)
> +{
> +	int ret;
> +	size_t unmapped;
> +	phys_addr_t paddr;
> +	size_t total_unmapped = 0;
> +	size_t size = pgsize * pgcount;
> +
> +	while (total_unmapped < size) {
> +		paddr = iopt_iova_to_phys(iopt, iova);
> +		if (paddr == 0)
> +			return -EINVAL;
> +
> +		/*
> +		 * One page/block at a time, because the range provided may not
> +		 * be physically contiguous, and we need to unshare all physical
> +		 * pages.
> +		 */
> +		unmapped = iopt_unmap_pages(iopt, iova, pgsize, 1, NULL);
> +		if (!unmapped)
> +			return -EINVAL;
> +
> +		ret = __pkvm_host_unshare_dma(paddr, pgsize);
> +		if (ret)
> +			return ret;
> +
> +		iova += unmapped;
> +		pgcount -= unmapped / pgsize;
> +		total_unmapped += unmapped;
> +	}
> +
> +	return 0;
> +}
> +
> +#define IOMMU_PROT_MASK (IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE |\
> +			 IOMMU_NOEXEC | IOMMU_MMIO)
Is there a reason IOMMU_PRIV is not allowed?
> +
> +int kvm_iommu_map_pages(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
> +			unsigned long iova, phys_addr_t paddr, size_t pgsize,
> +			size_t pgcount, int prot)
> +{
> +	size_t size;
> +	size_t granule;
> +	int ret = -EINVAL;
> +	size_t mapped = 0;
> +	struct io_pgtable iopt;
> +	struct kvm_hyp_iommu *iommu;
> +	size_t pgcount_orig = pgcount;
> +	unsigned long iova_orig = iova;
> +	struct kvm_hyp_iommu_domain *domain;
> +
> +	if (prot & ~IOMMU_PROT_MASK)
> +		return -EINVAL;
> +
> +	if (__builtin_mul_overflow(pgsize, pgcount, &size) ||
> +	    iova + size < iova || paddr + size < paddr)
> +		return -EOVERFLOW;
> +
> +	hyp_spin_lock(&iommu_lock);
> +
> +	domain = handle_to_domain(iommu_id, domain_id, &iommu);
> +	if (!domain)
> +		goto err_unlock;
> +
> +	granule = 1 << __ffs(iommu->pgtable->cfg.pgsize_bitmap);
> +	if (!IS_ALIGNED(iova | paddr | pgsize, granule))
> +		goto err_unlock;
> +
> +	ret = __pkvm_host_share_dma(paddr, size, !(prot & IOMMU_MMIO));
> +	if (ret)
> +		goto err_unlock;
> +
> +	iopt = domain_to_iopt(iommu, domain, domain_id);
> +	while (pgcount) {
> +		ret = iopt_map_pages(&iopt, iova, paddr, pgsize, pgcount, prot,
> +				     0, &mapped);
> +		WARN_ON(!IS_ALIGNED(mapped, pgsize));
> +		pgcount -= mapped / pgsize;
> +		if (ret)
> +			goto err_unmap;
> +		iova += mapped;
> +		paddr += mapped;
> +	}
> +
> +	hyp_spin_unlock(&iommu_lock);
> +	return 0;
> +
> +err_unmap:
> +	__kvm_iommu_unmap_pages(&iopt, iova_orig, pgsize, pgcount_orig - pgcount);
> +err_unlock:
> +	hyp_spin_unlock(&iommu_lock);
> +	return ret;
> +}
> +
> +int kvm_iommu_unmap_pages(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
> +			  unsigned long iova, size_t pgsize, size_t pgcount)
> +{
> +	size_t size;
> +	size_t granule;
> +	int ret = -EINVAL;
> +	struct io_pgtable iopt;
> +	struct kvm_hyp_iommu *iommu;
> +	struct kvm_hyp_iommu_domain *domain;
> +
> +	if (__builtin_mul_overflow(pgsize, pgcount, &size) ||
> +	    iova + size < iova)
> +		return -EOVERFLOW;
> +
> +	hyp_spin_lock(&iommu_lock);
> +	domain = handle_to_domain(iommu_id, domain_id, &iommu);
> +	if (!domain)
> +		goto out_unlock;
> +
> +	granule = 1 << __ffs(iommu->pgtable->cfg.pgsize_bitmap);
> +	if (!IS_ALIGNED(iova | pgsize, granule))
> +		goto out_unlock;
> +
> +	iopt = domain_to_iopt(iommu, domain, domain_id);
> +	ret = __kvm_iommu_unmap_pages(&iopt, iova, pgsize, pgcount);
> +out_unlock:
> +	hyp_spin_unlock(&iommu_lock);
> +	return ret;
> +}
> +
> +phys_addr_t kvm_iommu_iova_to_phys(pkvm_handle_t iommu_id,
> +				   pkvm_handle_t domain_id, unsigned long iova)
> +{
> +	phys_addr_t phys = 0;
> +	struct io_pgtable iopt;
> +	struct kvm_hyp_iommu *iommu;
> +	struct kvm_hyp_iommu_domain *domain;
> +
> +	hyp_spin_lock(&iommu_lock);
> +	domain = handle_to_domain(iommu_id, domain_id, &iommu);
> +	if (domain) {
> +		iopt = domain_to_iopt(iommu, domain, domain_id);
> +
> +		phys = iopt_iova_to_phys(&iopt, iova);
> +	}
> +	hyp_spin_unlock(&iommu_lock);
> +	return phys;
> +}
> +
>  int kvm_iommu_init_device(struct kvm_hyp_iommu *iommu)
>  {
>  	void *domains;
> -- 
> 2.39.0
>
Thanks,
Mostafa
Jean-Philippe Brucker Feb. 26, 2024, 2:12 p.m. UTC | #6
On Fri, Feb 16, 2024 at 11:59:26AM +0000, Mostafa Saleh wrote:
> > +#define IOMMU_PROT_MASK (IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE |\
> > +			 IOMMU_NOEXEC | IOMMU_MMIO)
> Is there a reason IOMMU_PRIV is not allowed?

No I probably just forgot it

Thanks,
Jean
diff mbox series

Patch

diff --git a/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c b/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c
index 7404ea77ed9f..0550e7bdf179 100644
--- a/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c
+++ b/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c
@@ -183,6 +183,150 @@  int kvm_iommu_detach_dev(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
 	return ret;
 }
 
+static int __kvm_iommu_unmap_pages(struct io_pgtable *iopt, unsigned long iova,
+				   size_t pgsize, size_t pgcount)
+{
+	int ret;
+	size_t unmapped;
+	phys_addr_t paddr;
+	size_t total_unmapped = 0;
+	size_t size = pgsize * pgcount;
+
+	while (total_unmapped < size) {
+		paddr = iopt_iova_to_phys(iopt, iova);
+		if (paddr == 0)
+			return -EINVAL;
+
+		/*
+		 * One page/block at a time, because the range provided may not
+		 * be physically contiguous, and we need to unshare all physical
+		 * pages.
+		 */
+		unmapped = iopt_unmap_pages(iopt, iova, pgsize, 1, NULL);
+		if (!unmapped)
+			return -EINVAL;
+
+		ret = __pkvm_host_unshare_dma(paddr, pgsize);
+		if (ret)
+			return ret;
+
+		iova += unmapped;
+		pgcount -= unmapped / pgsize;
+		total_unmapped += unmapped;
+	}
+
+	return 0;
+}
+
+#define IOMMU_PROT_MASK (IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE |\
+			 IOMMU_NOEXEC | IOMMU_MMIO)
+
+int kvm_iommu_map_pages(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
+			unsigned long iova, phys_addr_t paddr, size_t pgsize,
+			size_t pgcount, int prot)
+{
+	size_t size;
+	size_t granule;
+	int ret = -EINVAL;
+	size_t mapped = 0;
+	struct io_pgtable iopt;
+	struct kvm_hyp_iommu *iommu;
+	size_t pgcount_orig = pgcount;
+	unsigned long iova_orig = iova;
+	struct kvm_hyp_iommu_domain *domain;
+
+	if (prot & ~IOMMU_PROT_MASK)
+		return -EINVAL;
+
+	if (__builtin_mul_overflow(pgsize, pgcount, &size) ||
+	    iova + size < iova || paddr + size < paddr)
+		return -EOVERFLOW;
+
+	hyp_spin_lock(&iommu_lock);
+
+	domain = handle_to_domain(iommu_id, domain_id, &iommu);
+	if (!domain)
+		goto err_unlock;
+
+	granule = 1 << __ffs(iommu->pgtable->cfg.pgsize_bitmap);
+	if (!IS_ALIGNED(iova | paddr | pgsize, granule))
+		goto err_unlock;
+
+	ret = __pkvm_host_share_dma(paddr, size, !(prot & IOMMU_MMIO));
+	if (ret)
+		goto err_unlock;
+
+	iopt = domain_to_iopt(iommu, domain, domain_id);
+	while (pgcount) {
+		ret = iopt_map_pages(&iopt, iova, paddr, pgsize, pgcount, prot,
+				     0, &mapped);
+		WARN_ON(!IS_ALIGNED(mapped, pgsize));
+		pgcount -= mapped / pgsize;
+		if (ret)
+			goto err_unmap;
+		iova += mapped;
+		paddr += mapped;
+	}
+
+	hyp_spin_unlock(&iommu_lock);
+	return 0;
+
+err_unmap:
+	__kvm_iommu_unmap_pages(&iopt, iova_orig, pgsize, pgcount_orig - pgcount);
+err_unlock:
+	hyp_spin_unlock(&iommu_lock);
+	return ret;
+}
+
+int kvm_iommu_unmap_pages(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
+			  unsigned long iova, size_t pgsize, size_t pgcount)
+{
+	size_t size;
+	size_t granule;
+	int ret = -EINVAL;
+	struct io_pgtable iopt;
+	struct kvm_hyp_iommu *iommu;
+	struct kvm_hyp_iommu_domain *domain;
+
+	if (__builtin_mul_overflow(pgsize, pgcount, &size) ||
+	    iova + size < iova)
+		return -EOVERFLOW;
+
+	hyp_spin_lock(&iommu_lock);
+	domain = handle_to_domain(iommu_id, domain_id, &iommu);
+	if (!domain)
+		goto out_unlock;
+
+	granule = 1 << __ffs(iommu->pgtable->cfg.pgsize_bitmap);
+	if (!IS_ALIGNED(iova | pgsize, granule))
+		goto out_unlock;
+
+	iopt = domain_to_iopt(iommu, domain, domain_id);
+	ret = __kvm_iommu_unmap_pages(&iopt, iova, pgsize, pgcount);
+out_unlock:
+	hyp_spin_unlock(&iommu_lock);
+	return ret;
+}
+
+phys_addr_t kvm_iommu_iova_to_phys(pkvm_handle_t iommu_id,
+				   pkvm_handle_t domain_id, unsigned long iova)
+{
+	phys_addr_t phys = 0;
+	struct io_pgtable iopt;
+	struct kvm_hyp_iommu *iommu;
+	struct kvm_hyp_iommu_domain *domain;
+
+	hyp_spin_lock(&iommu_lock);
+	domain = handle_to_domain(iommu_id, domain_id, &iommu);
+	if (domain) {
+		iopt = domain_to_iopt(iommu, domain, domain_id);
+
+		phys = iopt_iova_to_phys(&iopt, iova);
+	}
+	hyp_spin_unlock(&iommu_lock);
+	return phys;
+}
+
 int kvm_iommu_init_device(struct kvm_hyp_iommu *iommu)
 {
 	void *domains;