diff mbox series

[RFC,09/13] intel-iommu: Serialise dmar_domain on KHO activaet

Message ID 20240916113102.710522-10-jgowans@amazon.com (mailing list archive)
State New, archived
Headers show
Series Support iommu(fd) persistence for live update | expand

Commit Message

Gowans, James Sept. 16, 2024, 11:30 a.m. UTC
Add logic to iterate through persistent domains, add the page table
pages to KHO persistent memory pages. Also serialise some metadata about
the domains and attached PCI devices.

By adding the page table pages to the `mem` attribute on the KHO object
these pages will be carved out of system memory early in boot by KHO,
guaranteeing that they will not be used for any other purpose by the new
kernel. This persists the page tables across kexec.
---
 drivers/iommu/intel/iommu.c     |  9 ----
 drivers/iommu/intel/iommu.h     | 10 ++++
 drivers/iommu/intel/serialise.c | 92 ++++++++++++++++++++++++++++++++-
 3 files changed, 101 insertions(+), 10 deletions(-)
diff mbox series

Patch

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 7e77b787148a..0a2118a3b7c4 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -46,15 +46,6 @@ 
 
 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
 
-#define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << ((gaw) - VTD_PAGE_SHIFT)) - 1)
-#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << (gaw)) - 1)
-
-/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
-   to match. That way, we can use 'unsigned long' for PFNs with impunity. */
-#define DOMAIN_MAX_PFN(gaw)	((unsigned long) min_t(uint64_t, \
-				__DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
-#define DOMAIN_MAX_ADDR(gaw)	(((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
-
 static void __init check_tylersburg_isoch(void);
 static int rwbf_quirk;
 
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 7866342f0909..cd932a97a9bc 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -38,6 +38,16 @@ 
 
 #define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT)
 
+#define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << ((gaw) - VTD_PAGE_SHIFT)) - 1)
+#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << (gaw)) - 1)
+
+/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
+   to match. That way, we can use 'unsigned long' for PFNs with impunity. */
+#define DOMAIN_MAX_PFN(gaw)	((unsigned long) min_t(uint64_t, \
+				__DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
+#define DOMAIN_MAX_ADDR(gaw)	(((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
+
+
 #define VTD_STRIDE_SHIFT        (9)
 #define VTD_STRIDE_MASK         (((u64)-1) << VTD_STRIDE_SHIFT)
 
diff --git a/drivers/iommu/intel/serialise.c b/drivers/iommu/intel/serialise.c
index 08a548b33703..bc755e51732b 100644
--- a/drivers/iommu/intel/serialise.c
+++ b/drivers/iommu/intel/serialise.c
@@ -2,9 +2,99 @@ 
 
 #include "iommu.h"
 
+/*
+ * Serialised format:
+ * /intel-iommu
+ *     compatible = str
+ *     domains = {
+ *         persistent-id = {
+ *             mem = [ ... ] // page table pages
+ *             agaw = i32
+ *             pgd = u64
+ *             devices = {
+ *                 id = {
+ *                     u8 bus;
+ *                     u8 devfn
+ *                 },
+ *                 ...
+ *             }
+ *         }
+ *      }
+ */
+
+/*
+ * Adds all present PFNs on the PTE page to the kho_mem pointer and advances
+ * the pointer.
+ * Stolen from dma_pte_list_pagetables() */
+static void save_pte_pages(struct dmar_domain *domain, int level,
+			   struct dma_pte *pte, struct kho_mem **kho_mem)
+{
+	struct page *pg;
+
+	pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
+	
+	if (level == 1)
+		return;
+
+	pte = page_address(pg);
+	do {
+		if (dma_pte_present(pte)) {
+			(*kho_mem)->addr = dma_pte_addr(pte);
+			(*kho_mem)->len = PAGE_SIZE;
+			(*kho_mem)++;
+			if (!dma_pte_superpage(pte))
+				save_pte_pages(domain, level - 1, pte, kho_mem);
+		}
+		pte++;
+	} while (!first_pte_in_page(pte));
+}
+		
 static int serialise_domain(void *fdt, struct iommu_domain *domain)
 {
-	return 0;
+	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+	/*
+	 * kho_mems_start points to the original allocated array; kho_mems
+	 * is incremented by the callee. Keep both to know how many were added.
+	 */
+	struct kho_mem *kho_mems, *kho_mems_start;
+	struct device_domain_info *info;
+	int err = 0;
+	char name[24];
+	int device_idx = 0;
+	phys_addr_t pgd;
+
+	/*
+	 * Assume just one page worth of kho_mem objects is enough.
+	 * Better would be to keep track of number of allocated pages in the domain.
+	 * */
+	kho_mems_start = kho_mems = kzalloc(PAGE_SIZE, GFP_KERNEL);
+
+	save_pte_pages(dmar_domain, agaw_to_level(dmar_domain->agaw),
+		       dmar_domain->pgd, &kho_mems);
+
+	snprintf(name, sizeof(name), "%lu", domain->persistent_id);
+	err |= fdt_begin_node(fdt, name);
+	err |= fdt_property(fdt, "mem", kho_mems_start,
+			sizeof(struct kho_mem) * (kho_mems - kho_mems_start));
+	err |= fdt_property(fdt, "persistent_id", &domain->persistent_id,
+			sizeof(domain->persistent_id));
+	pgd = virt_to_phys(dmar_domain->pgd);
+	err |= fdt_property(fdt, "pgd", &pgd, sizeof(pgd));
+	err |= fdt_property(fdt, "agaw", &dmar_domain->agaw,
+			sizeof(dmar_domain->agaw));
+
+	err |= fdt_begin_node(fdt, "devices");
+	list_for_each_entry(info, &dmar_domain->devices, link) {
+		snprintf(name, sizeof(name), "%i", device_idx++);
+		err |= fdt_begin_node(fdt, name);
+		err |= fdt_property(fdt, "bus", &info->bus, sizeof(info->bus));
+		err |= fdt_property(fdt, "devfn", &info->devfn, sizeof(info->devfn));
+		err |= fdt_end_node(fdt); /* device_idx */
+	}
+	err |= fdt_end_node(fdt); /* devices */
+	err |= fdt_end_node(fdt); /* domain->persistent_id */
+
+	return err;
 }
 
 int intel_iommu_serialise_kho(struct notifier_block *self, unsigned long cmd,