diff mbox series

[RFC,03/13] iommu/intel: zap context table entries on kexec

Message ID 20240916113102.710522-4-jgowans@amazon.com (mailing list archive)
State New, archived
Headers show
Series Support iommu(fd) persistence for live update | expand

Commit Message

Gowans, James Sept. 16, 2024, 11:30 a.m. UTC
Instead of fully shutting down the IOMMU on kexec, rather zap context
table entries for devices. This is the initial step to be able to
persist some domains. Once a struct iommu_domain can be marked
persistent then those persistent domains will be skipped when doing the
IOMMU shut down.
---
 drivers/iommu/intel/dmar.c  |  1 +
 drivers/iommu/intel/iommu.c | 34 ++++++++++++++++++++++++++++++----
 drivers/iommu/intel/iommu.h |  2 ++
 3 files changed, 33 insertions(+), 4 deletions(-)

Comments

Jason Gunthorpe Oct. 3, 2024, 1:27 p.m. UTC | #1
On Mon, Sep 16, 2024 at 01:30:52PM +0200, James Gowans wrote:
> Instead of fully shutting down the IOMMU on kexec, rather zap context
> table entries for devices. This is the initial step to be able to
> persist some domains. Once a struct iommu_domain can be marked
> persistent then those persistent domains will be skipped when doing the
> IOMMU shut down.
> ---
>  drivers/iommu/intel/dmar.c  |  1 +
>  drivers/iommu/intel/iommu.c | 34 ++++++++++++++++++++++++++++++----
>  drivers/iommu/intel/iommu.h |  2 ++
>  3 files changed, 33 insertions(+), 4 deletions(-)

We should probably try to avoid doing this kind of stuff in
drivers. The core code can generically ask drivers to attach a
BLOCKING domain as part of the kexec sequence and the core code can
then decide which devices should be held over.

There is also some complexity here around RMRs, we can't always apply
a blocking domain... Not sure what you'd do in those cases.

IIRC we already do something like this with the bus master enable on
the PCI side?? At least, if the kernel is deciding to block DMA when
the IOMMU is on it should do it consistently and inhibit the PCI
device as well.

Jason
diff mbox series

Patch

diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 1c8d3141cb55..f79aba382e77 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -1099,6 +1099,7 @@  static int alloc_iommu(struct dmar_drhd_unit *drhd)
 	spin_lock_init(&iommu->device_rbtree_lock);
 	mutex_init(&iommu->iopf_lock);
 	iommu->node = NUMA_NO_NODE;
+	INIT_LIST_HEAD(&iommu->domains);
 
 	ver = readl(iommu->reg + DMAR_VER_REG);
 	pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 9ff8b83c19a3..2297cbb0253f 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1575,6 +1575,7 @@  int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
 		goto err_clear;
 	}
 	domain_update_iommu_cap(domain);
+	list_add(&domain->domains, &iommu->domains);
 
 	spin_unlock(&iommu->lock);
 	return 0;
@@ -3185,6 +3186,33 @@  static void intel_disable_iommus(void)
 		iommu_disable_translation(iommu);
 }
 
+static void zap_context_table_entries(struct intel_iommu *iommu)
+{
+	struct context_entry *context;
+	struct dmar_domain *domain;
+	struct device_domain_info *device;
+	int bus, devfn;
+	u16 did_old;
+
+	list_for_each_entry(domain, &iommu->domains, domains) {
+		list_for_each_entry(device, &domain->devices, link) {
+			context = iommu_context_addr(iommu, device->bus, device->devfn, 0);
+			if (!context || !context_present(context))
+				continue;
+			context_domain_id(context);
+			context_clear_entry(context);
+			__iommu_flush_cache(iommu, context, sizeof(*context));
+			iommu->flush.flush_context(iommu,
+						   did_old,
+						   (((u16)bus) << 8) | devfn,
+						   DMA_CCMD_MASK_NOBIT,
+						   DMA_CCMD_DEVICE_INVL);
+			iommu->flush.flush_iotlb(iommu,	did_old, 0, 0,
+						 DMA_TLB_DSI_FLUSH);
+		}
+	}
+}
+
 void intel_iommu_shutdown(void)
 {
 	struct dmar_drhd_unit *drhd;
@@ -3197,10 +3225,8 @@  void intel_iommu_shutdown(void)
 
 	/* Disable PMRs explicitly here. */
 	for_each_iommu(iommu, drhd)
-		iommu_disable_protect_mem_regions(iommu);
-
-	/* Make sure the IOMMUs are switched off */
-	intel_disable_iommus();
+		zap_context_table_entries(iommu);
+	return
 
 	up_write(&dmar_global_lock);
 }
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index b67c14da1240..cfd006588824 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -606,6 +606,7 @@  struct dmar_domain {
 	spinlock_t lock;		/* Protect device tracking lists */
 	struct list_head devices;	/* all devices' list */
 	struct list_head dev_pasids;	/* all attached pasids */
+	struct list_head domains;	/* all struct dmar_domains on this IOMMU */
 
 	spinlock_t cache_lock;		/* Protect the cache tag list */
 	struct list_head cache_tags;	/* Cache tag list */
@@ -749,6 +750,7 @@  struct intel_iommu {
 	void *perf_statistic;
 
 	struct iommu_pmu *pmu;
+	struct list_head domains;	/* all struct dmar_domains on this IOMMU */
 };
 
 /* PCI domain-device relationship */