diff mbox series

[RFC,v2,40/58] KVM: arm64: smmu-v3: Add map/unmap pages and iova_to_phys

Message ID 20241212180423.1578358-41-smostafa@google.com (mailing list archive)
State New
Headers show
Series KVM: Arm SMMUv3 driver for pKVM | expand

Commit Message

Mostafa Saleh Dec. 12, 2024, 6:04 p.m. UTC
Add map_pages and iova_to_phys HVC code, which
mainly calls the io-pgtable.

For unmap_pages, we rely on IO_PGTABLE_QUIRK_UNMAP_INVAL, where the
driver first calls unmap_pages which invalidate all the pages as
a typical unmap, issuing all the necessary TLB invalidations.
Then, we will start a page table with 2 callbacks:
- visit_leaf: for each unmapped leaf, it would decrement the refcount
  of the page using __pkvm_host_unuse_dma(), reversing the what IOMMU
  core does in map.
- visit_post_table: this would free any invalidated tables as they
  wouldn't be freed because of the quirk.

Signed-off-by: Mostafa Saleh <smostafa@google.com>
---
 arch/arm64/kvm/hyp/nvhe/iommu/arm-smmu-v3.c | 147 ++++++++++++++++++++
 1 file changed, 147 insertions(+)

Comments

Jason Gunthorpe Dec. 12, 2024, 7:44 p.m. UTC | #1
On Thu, Dec 12, 2024 at 06:04:04PM +0000, Mostafa Saleh wrote:
> Add map_pages and iova_to_phys HVC code, which
> mainly calls the io-pgtable.
> 
> For unmap_pages, we rely on IO_PGTABLE_QUIRK_UNMAP_INVAL, where the
> driver first calls unmap_pages which invalidate all the pages as
> a typical unmap, issuing all the necessary TLB invalidations.
> Then, we will start a page table with 2 callbacks:
> - visit_leaf: for each unmapped leaf, it would decrement the refcount
>   of the page using __pkvm_host_unuse_dma(), reversing the what IOMMU
>   core does in map.
> - visit_post_table: this would free any invalidated tables as they
>   wouldn't be freed because of the quirk.

I don't know if the timelines will work out, but the pagetable stuff
I'm working on will let you write a much more appropriate
implementation for pkvm's usage than trying to hack it into the
iopgtable code like this.

Even the iommu focused routines I have got now would solve this
problem because they allways spit out a linked list of all the memory
to free after map/unmap and never internally free it..

Jason
Mostafa Saleh Dec. 13, 2024, 7:48 p.m. UTC | #2
Hi Jason,

On Thu, Dec 12, 2024 at 03:44:35PM -0400, Jason Gunthorpe wrote:
> On Thu, Dec 12, 2024 at 06:04:04PM +0000, Mostafa Saleh wrote:
> > Add map_pages and iova_to_phys HVC code, which
> > mainly calls the io-pgtable.
> > 
> > For unmap_pages, we rely on IO_PGTABLE_QUIRK_UNMAP_INVAL, where the
> > driver first calls unmap_pages which invalidate all the pages as
> > a typical unmap, issuing all the necessary TLB invalidations.
> > Then, we will start a page table with 2 callbacks:
> > - visit_leaf: for each unmapped leaf, it would decrement the refcount
> >   of the page using __pkvm_host_unuse_dma(), reversing the what IOMMU
> >   core does in map.
> > - visit_post_table: this would free any invalidated tables as they
> >   wouldn't be freed because of the quirk.
> 
> I don't know if the timelines will work out, but the pagetable stuff
> I'm working on will let you write a much more appropriate
> implementation for pkvm's usage than trying to hack it into the
> iopgtable code like this.

I didn’t check your new page table patches yet, but I would say it’s
more likely your patches would land first, because as mentioned in the
cover letter, there are still many dependencies for pKVM before IOMMU
support lands, so I don’t mind converging if possible.

Thanks,
Mostafa

> 
> Even the iommu focused routines I have got now would solve this
> problem because they allways spit out a linked list of all the memory
> to free after map/unmap and never internally free it..

> 
> Jason
diff mbox series

Patch

diff --git a/arch/arm64/kvm/hyp/nvhe/iommu/arm-smmu-v3.c b/arch/arm64/kvm/hyp/nvhe/iommu/arm-smmu-v3.c
index ec3f8d9749d3..1821a3420a4d 100644
--- a/arch/arm64/kvm/hyp/nvhe/iommu/arm-smmu-v3.c
+++ b/arch/arm64/kvm/hyp/nvhe/iommu/arm-smmu-v3.c
@@ -808,15 +808,74 @@  static const struct iommu_flush_ops smmu_tlb_ops = {
 	.tlb_add_page	= smmu_tlb_add_page,
 };
 
+static void smmu_unmap_visit_leaf(phys_addr_t addr, size_t size,
+				  struct io_pgtable_walk_common *data,
+				  void *wd)
+{
+	u64 *ptep = wd;
+
+	WARN_ON(__pkvm_host_unuse_dma(addr, size));
+	*ptep = 0;
+}
+
+/*
+ * On unmap with the IO_PGTABLE_QUIRK_UNMAP_INVAL, unmap doesn't clear
+ * or free any tables, so after the unmap walk the table and on the post
+ * walk we free invalid tables.
+ * One caveat, is that a table can be unmapped while it points to other
+ * tables which would be valid, and we would need to free those also.
+ * The simples solution is to look at the walk PTE info and if any of
+ * the parents is invalid it means that this table also needs to freed.
+ */
+static void smmu_unmap_visit_post_table(struct arm_lpae_io_pgtable_walk_data *walk_data,
+					arm_lpae_iopte *ptep, int lvl)
+{
+	struct arm_lpae_io_pgtable *data = walk_data->cookie;
+	size_t table_size;
+	int i;
+	bool invalid = false;
+
+	if (lvl == data->start_level)
+		table_size = ARM_LPAE_PGD_SIZE(data);
+	else
+		table_size = ARM_LPAE_GRANULE(data);
+
+	for (i = 0 ; i <= lvl ; ++i)
+		invalid |= !iopte_valid(walk_data->ptes[lvl]);
+
+	if (!invalid)
+		return;
+
+	__arm_lpae_free_pages(ptep, table_size, &data->iop.cfg, data->iop.cookie);
+	*ptep = 0;
+}
+
 static void smmu_iotlb_sync(struct kvm_hyp_iommu_domain *domain,
 			    struct iommu_iotlb_gather *gather)
 {
 	size_t size;
+	struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
+	struct io_pgtable *pgtable = smmu_domain->pgtable;
+	struct arm_lpae_io_pgtable *data = io_pgtable_to_data(pgtable);
+	struct arm_lpae_io_pgtable_walk_data wd = {
+		.cookie = data,
+		.visit_post_table = smmu_unmap_visit_post_table,
+	};
+	struct io_pgtable_walk_common walk_data = {
+		.visit_leaf = smmu_unmap_visit_leaf,
+		.data = &wd,
+	};
 
 	if (!gather->pgsize)
 		return;
 	size = gather->end - gather->start + 1;
 	smmu_tlb_inv_range(domain, gather->start, size,  gather->pgsize, true);
+
+	/*
+	 * Now decrement the refcount of unmapped pages thanks to
+	 * IO_PGTABLE_QUIRK_UNMAP_INVAL
+	 */
+	pgtable->ops.pgtable_walk(&pgtable->ops, gather->start, size, &walk_data);
 }
 
 static int smmu_domain_config_s2(struct kvm_hyp_iommu_domain *domain,
@@ -966,6 +1025,7 @@  static int smmu_domain_finalise(struct hyp_arm_smmu_v3_device *smmu,
 			.oas = smmu->ias,
 			.coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
 			.tlb = &smmu_tlb_ops,
+			.quirks = IO_PGTABLE_QUIRK_UNMAP_INVAL,
 		};
 	} else {
 		cfg = (struct io_pgtable_cfg) {
@@ -975,6 +1035,7 @@  static int smmu_domain_finalise(struct hyp_arm_smmu_v3_device *smmu,
 			.oas = smmu->oas,
 			.coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
 			.tlb = &smmu_tlb_ops,
+			.quirks = IO_PGTABLE_QUIRK_UNMAP_INVAL,
 		};
 	}
 
@@ -1125,6 +1186,89 @@  static int smmu_detach_dev(struct kvm_hyp_iommu *iommu, struct kvm_hyp_iommu_dom
 	return ret;
 }
 
+static int smmu_map_pages(struct kvm_hyp_iommu_domain *domain, unsigned long iova,
+			  phys_addr_t paddr, size_t pgsize,
+			  size_t pgcount, int prot, size_t *total_mapped)
+{
+	size_t mapped;
+	size_t granule;
+	int ret;
+	struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
+	struct io_pgtable *pgtable = smmu_domain->pgtable;
+
+	if (!pgtable)
+		return -EINVAL;
+
+	granule = 1UL << __ffs(smmu_domain->pgtable->cfg.pgsize_bitmap);
+	if (!IS_ALIGNED(iova | paddr | pgsize, granule))
+		return -EINVAL;
+
+	hyp_spin_lock(&smmu_domain->pgt_lock);
+	while (pgcount && !ret) {
+		mapped = 0;
+		ret = pgtable->ops.map_pages(&pgtable->ops, iova, paddr,
+					     pgsize, pgcount, prot, 0, &mapped);
+		if (ret)
+			break;
+		WARN_ON(!IS_ALIGNED(mapped, pgsize));
+		WARN_ON(mapped > pgcount * pgsize);
+
+		pgcount -= mapped / pgsize;
+		*total_mapped += mapped;
+		iova += mapped;
+		paddr += mapped;
+	}
+	hyp_spin_unlock(&smmu_domain->pgt_lock);
+
+	return 0;
+}
+
+static size_t smmu_unmap_pages(struct kvm_hyp_iommu_domain *domain, unsigned long iova,
+			       size_t pgsize, size_t pgcount, struct iommu_iotlb_gather *gather)
+{
+	size_t granule, unmapped, total_unmapped = 0;
+	size_t size = pgsize * pgcount;
+	struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
+	struct io_pgtable *pgtable = smmu_domain->pgtable;
+
+	if (!pgtable)
+		return -EINVAL;
+
+	granule = 1UL << __ffs(smmu_domain->pgtable->cfg.pgsize_bitmap);
+	if (!IS_ALIGNED(iova | pgsize, granule))
+		return 0;
+
+	hyp_spin_lock(&smmu_domain->pgt_lock);
+	while (total_unmapped < size) {
+		unmapped = pgtable->ops.unmap_pages(&pgtable->ops, iova, pgsize,
+						    pgcount, gather);
+		if (!unmapped)
+			break;
+		iova += unmapped;
+		total_unmapped += unmapped;
+		pgcount -= unmapped / pgsize;
+	}
+	hyp_spin_unlock(&smmu_domain->pgt_lock);
+	return total_unmapped;
+}
+
+static phys_addr_t smmu_iova_to_phys(struct kvm_hyp_iommu_domain *domain,
+				     unsigned long iova)
+{
+	phys_addr_t paddr;
+	struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
+	struct io_pgtable *pgtable = smmu_domain->pgtable;
+
+	if (!pgtable)
+		return -EINVAL;
+
+	hyp_spin_lock(&smmu_domain->pgt_lock);
+	paddr = pgtable->ops.iova_to_phys(&pgtable->ops, iova);
+	hyp_spin_unlock(&smmu_domain->pgt_lock);
+
+	return paddr;
+}
+
 /* Shared with the kernel driver in EL1 */
 struct kvm_iommu_ops smmu_ops = {
 	.init				= smmu_init,
@@ -1134,4 +1278,7 @@  struct kvm_iommu_ops smmu_ops = {
 	.iotlb_sync			= smmu_iotlb_sync,
 	.attach_dev			= smmu_attach_dev,
 	.detach_dev			= smmu_detach_dev,
+	.map_pages			= smmu_map_pages,
+	.unmap_pages			= smmu_unmap_pages,
+	.iova_to_phys			= smmu_iova_to_phys,
 };