diff mbox series

[RFC,v2,24/29] mm: asi: Add infrastructure for mapping userspace addresses

Message ID 20250110-asi-rfc-v2-v2-24-8419288bc805@google.com (mailing list archive)
State RFC
Headers show
Series Address Space Isolation (ASI) | expand

Checks

Context Check Description
conchuod/vmtest-fixes-PR fail merge-conflict

Commit Message

Brendan Jackman Jan. 10, 2025, 6:40 p.m. UTC
In preparation for sandboxing bare-metal processes, teach ASI to map
userspace addresses into the restricted address space.

Add a new policy helper to determine based on the class whether to do
this. If the helper returns true, mirror userspace mappings into the ASI
pagetables.

Later, it will be possible for users who do not have a significant
security boundary between KVM guests and their VMM process, to take
advantage of this to reduce mitigation costs when switching between
those two domains - to illustrate this idea, it's now reflected in the
KVM taint policy, although the KVM class is still hard-coded not to map
userspace addresses.

Co-developed-by: Junaid Shahid <junaids@google.com>
Signed-off-by: Junaid Shahid <junaids@google.com>
Co-developed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Brendan Jackman <jackmanb@google.com>
---
 arch/x86/include/asm/asi.h        | 11 +++++
 arch/x86/include/asm/pgalloc.h    |  6 +++
 arch/x86/include/asm/pgtable_64.h |  4 ++
 arch/x86/kvm/x86.c                | 12 +++--
 arch/x86/mm/asi.c                 | 92 +++++++++++++++++++++++++++++++++++++++
 include/asm-generic/asi.h         |  4 ++
 6 files changed, 125 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/arch/x86/include/asm/asi.h b/arch/x86/include/asm/asi.h
index 555edb5f292e4d6baba782f51d014aa48dc850b6..e925d7d2cfc85bca8480c837548654e7a5a7009e 100644
--- a/arch/x86/include/asm/asi.h
+++ b/arch/x86/include/asm/asi.h
@@ -133,6 +133,7 @@  struct asi {
 	struct mm_struct *mm;
 	int64_t ref_count;
 	enum asi_class_id class_id;
+	spinlock_t pgd_lock;
 };
 
 DECLARE_PER_CPU_ALIGNED(struct asi *, curr_asi);
@@ -147,6 +148,7 @@  const char *asi_class_name(enum asi_class_id class_id);
 
 int asi_init(struct mm_struct *mm, enum asi_class_id class_id, struct asi **out_asi);
 void asi_destroy(struct asi *asi);
+void asi_clone_user_pgtbl(struct mm_struct *mm, pgd_t *pgdp);
 
 /* Enter an ASI domain (restricted address space) and begin the critical section. */
 void asi_enter(struct asi *asi);
@@ -286,6 +288,15 @@  static __always_inline bool asi_in_critical_section(void)
 
 void asi_handle_switch_mm(void);
 
+/*
+ * This function returns true when we would like to map userspace addresses
+ * in the restricted address space.
+ */
+static inline bool asi_maps_user_addr(enum asi_class_id class_id)
+{
+	return false;
+}
+
 #endif /* CONFIG_MITIGATION_ADDRESS_SPACE_ISOLATION */
 
 #endif
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index dcd836b59bebd329c3d265b98e48ef6eb4c9e6fc..edf9fe76c53369eefcd5bf14a09cbf802cf1ea21 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -114,12 +114,16 @@  static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
 {
 	paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT);
 	set_p4d(p4d, __p4d(_PAGE_TABLE | __pa(pud)));
+	if (!pgtable_l5_enabled())
+		asi_clone_user_pgtbl(mm, (pgd_t *)p4d);
 }
 
 static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
 {
 	paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT);
 	set_p4d_safe(p4d, __p4d(_PAGE_TABLE | __pa(pud)));
+	if (!pgtable_l5_enabled())
+		asi_clone_user_pgtbl(mm, (pgd_t *)p4d);
 }
 
 extern void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud);
@@ -137,6 +141,7 @@  static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
 		return;
 	paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT);
 	set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d)));
+	asi_clone_user_pgtbl(mm, pgd);
 }
 
 static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
@@ -145,6 +150,7 @@  static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4
 		return;
 	paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT);
 	set_pgd_safe(pgd, __pgd(_PAGE_TABLE | __pa(p4d)));
+	asi_clone_user_pgtbl(mm, pgd);
 }
 
 static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index d1426b64c1b9715cd9e4d1d7451ae4feadd8b2f5..fe6d83ec632a6894527784f2ebdbd013161c6f09 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -157,6 +157,8 @@  static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
 static inline void native_p4d_clear(p4d_t *p4d)
 {
 	native_set_p4d(p4d, native_make_p4d(0));
+	if (!pgtable_l5_enabled())
+		asi_clone_user_pgtbl(NULL, (pgd_t *)p4d);
 }
 
 static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
@@ -167,6 +169,8 @@  static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
 static inline void native_pgd_clear(pgd_t *pgd)
 {
 	native_set_pgd(pgd, native_make_pgd(0));
+	if (pgtable_l5_enabled())
+		asi_clone_user_pgtbl(NULL, pgd);
 }
 
 /*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3e0811eb510650abc601e4adce1ce4189835a730..920475fe014f6503dd88c7bbdb6b2707c084a689 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9712,11 +9712,15 @@  static inline int kvm_x86_init_asi_class(void)
 	/*
 	 * And the same for data left behind by code in the userspace domain
 	 * (i.e. the VMM itself, plus kernel code serving its syscalls etc).
-	 * This should eventually be configurable: users whose VMMs contain
-	 * no secrets can disable it to avoid paying a mitigation cost on
-	 * transition between their guest and userspace.
+	 *
+	 *
+	 * If we decided to map userspace into the guest's restricted address
+	 * space then we don't bother with this since we assume either no bugs
+	 * allow the guest to leak that data, or the user doesn't care about
+	 * that security boundary.
 	 */
-	policy.protect_data |= ASI_TAINT_USER_DATA;
+	if (!asi_maps_user_addr(ASI_CLASS_KVM))
+		policy.protect_data |= ASI_TAINT_USER_DATA;
 
 	return asi_init_class(ASI_CLASS_KVM, &policy);
 }
diff --git a/arch/x86/mm/asi.c b/arch/x86/mm/asi.c
index c5073af1a82ded1c6fc467cd7a5d29a39d676bb4..093103c1bc2677c81d68008aca064fab53b73a62 100644
--- a/arch/x86/mm/asi.c
+++ b/arch/x86/mm/asi.c
@@ -14,6 +14,7 @@ 
 #include <asm/pgalloc.h>
 #include <asm/mmu_context.h>
 #include <asm/traps.h>
+#include <asm/pgtable.h>
 
 #include "mm_internal.h"
 #include "../../../mm/internal.h"
@@ -351,6 +352,33 @@  static void __asi_destroy(struct asi *asi)
 	memset(asi, 0, sizeof(struct asi));
 }
 
+static void __asi_init_user_pgds(struct mm_struct *mm, struct asi *asi)
+{
+	int i;
+
+	if (!asi_maps_user_addr(asi->class_id))
+		return;
+
+	/*
+	 * The code below must be executed only after the given asi is
+	 * available in mm->asi[index] to ensure at least either this
+	 * function or __asi_clone_user_pgd() will copy entries in the
+	 * unrestricted pgd to the restricted pgd.
+	 */
+	if (WARN_ON_ONCE(&mm->asi[asi->class_id] != asi))
+		return;
+
+	/*
+	 * See the comment for __asi_clone_user_pgd() why we hold the lock here.
+	 */
+	spin_lock(&asi->pgd_lock);
+
+	for (i = 0; i < KERNEL_PGD_BOUNDARY; i++)
+		set_pgd(asi->pgd + i, READ_ONCE(*(mm->pgd + i)));
+
+	spin_unlock(&asi->pgd_lock);
+}
+
 int asi_init(struct mm_struct *mm, enum asi_class_id class_id, struct asi **out_asi)
 {
 	struct asi *asi;
@@ -388,6 +416,7 @@  int asi_init(struct mm_struct *mm, enum asi_class_id class_id, struct asi **out_
 
 	asi->mm = mm;
 	asi->class_id = class_id;
+	spin_lock_init(&asi->pgd_lock);
 
 	for (i = KERNEL_PGD_BOUNDARY; i < PTRS_PER_PGD; i++)
 		set_pgd(asi->pgd + i, asi_global_nonsensitive_pgd[i]);
@@ -398,6 +427,7 @@  int asi_init(struct mm_struct *mm, enum asi_class_id class_id, struct asi **out_
 	else
 		*out_asi = asi;
 
+	__asi_init_user_pgds(mm, asi);
 	mutex_unlock(&mm->asi_init_lock);
 
 	return err;
@@ -891,3 +921,65 @@  void asi_unmap(struct asi *asi, void *addr, size_t len)
 
 	asi_flush_tlb_range(asi, addr, len);
 }
+
+/*
+ * This function is to copy the given unrestricted pgd entry for
+ * userspace addresses to the corresponding restricted pgd entries.
+ * It means that the unrestricted pgd entry must be updated before
+ * this function is called.
+ * We map entire userspace addresses to the restricted address spaces
+ * by copying unrestricted pgd entries to the restricted page tables
+ * so that we don't need to maintain consistency of lower level PTEs
+ * between the unrestricted page table and the restricted page tables.
+ */
+void asi_clone_user_pgtbl(struct mm_struct *mm, pgd_t *pgdp)
+{
+	unsigned long pgd_idx;
+	struct asi *asi;
+	int i;
+
+	if (!static_asi_enabled())
+		return;
+
+	/* We shouldn't need to take care non-userspace mapping. */
+	if (!pgdp_maps_userspace(pgdp))
+		return;
+
+	/*
+	 * The mm will be NULL for p{4,g}d_clear(). We need to get
+	 * the owner mm for this pgd in this case. The pgd page has
+	 * a valid pt_mm only when SHARED_KERNEL_PMD == 0.
+	 */
+	BUILD_BUG_ON(SHARED_KERNEL_PMD);
+	if (!mm) {
+		mm = pgd_page_get_mm(virt_to_page(pgdp));
+		if (WARN_ON_ONCE(!mm))
+			return;
+	}
+
+	/*
+	 * Compute a PGD index of the given pgd entry. This will be the
+	 * index of the ASI PGD entry to be updated.
+	 */
+	pgd_idx = pgdp - PTR_ALIGN_DOWN(pgdp, PAGE_SIZE);
+
+	for (i = 0; i < ARRAY_SIZE(mm->asi); i++) {
+		asi = mm->asi + i;
+
+		if (!asi_pgd(asi) || !asi_maps_user_addr(asi->class_id))
+			continue;
+
+		/*
+		 * We need to synchronize concurrent callers of
+		 * __asi_clone_user_pgd() among themselves, as well as
+		 * __asi_init_user_pgds(). The lock makes sure that reading
+		 * the unrestricted pgd and updating the corresponding
+		 * ASI pgd are not interleaved by concurrent calls.
+		 * We cannot rely on mm->page_table_lock here because it
+		 * is not always held when pgd/p4d_clear_bad() is called.
+		 */
+		spin_lock(&asi->pgd_lock);
+		set_pgd(asi_pgd(asi) + pgd_idx, READ_ONCE(*pgdp));
+		spin_unlock(&asi->pgd_lock);
+	}
+}
diff --git a/include/asm-generic/asi.h b/include/asm-generic/asi.h
index 4f033d3ef5929707fd280f74fc800193e45143c1..d103343292fad567dcd73e45e986fb3974e59898 100644
--- a/include/asm-generic/asi.h
+++ b/include/asm-generic/asi.h
@@ -95,6 +95,10 @@  void asi_flush_tlb_range(struct asi *asi, void *addr, size_t len) { }
 
 static inline void asi_check_boottime_disable(void) { }
 
+static inline void asi_clone_user_pgtbl(struct mm_struct *mm, pgd_t *pgdp) { };
+
+static inline bool asi_maps_user_addr(enum asi_class_id class_id) { return false; }
+
 #endif /* !CONFIG_MITIGATION_ADDRESS_SPACE_ISOLATION */
 
 #endif  /* !_ASSEMBLY_ */