diff mbox series

[RFC,20/47] mm: asi: Support for locally non-sensitive vmalloc allocations

Message ID 20220223052223.1202152-21-junaids@google.com (mailing list archive)
State New
Headers show
Series Address Space Isolation for KVM | expand

Commit Message

Junaid Shahid Feb. 23, 2022, 5:21 a.m. UTC
A new flag, VM_LOCAL_NONSENSITIVE is added to designate locally
non-sensitive vmalloc/vmap areas. When using the __vmalloc /
__vmalloc_node APIs, if the corresponding GFP flag is specified, the
VM flag is automatically added. When using the __vmalloc_node_range API,
either flag can be specified independently. The VM flag will only map
the vmalloc area as non-sensitive, while the GFP flag will only map the
underlying direct map area as non-sensitive.

When using the __vmalloc_node_range API, instead of VMALLOC_START/END,
VMALLOC_LOCAL_NONSENSITIVE_START/END should be used. This is the range
that will have different ASI page tables for each process, thereby
providing the local mapping.

A command line parameter vmalloc_local_nonsensitive_percent is added to
specify the approximate division between the per-process and global
vmalloc ranges. Note that regular/sensitive vmalloc/vmap allocations
are not restricted by this division and can go anywhere in the entire
vmalloc range. The division only applies to non-sensitive allocations.

Since no attempt is made to balance regular/sensitive allocations across
the division, it is possible that one of these ranges gets filled up
by regular allocations, leaving no room for the non-sensitive
allocations for which that range was designated. But since the vmalloc
range is fairly large, so hopefully that will not be a problem in
practice. If that assumption turns out to be incorrect, we could
implement a more sophisticated scheme.

Signed-off-by: Junaid Shahid <junaids@google.com>


---
 arch/x86/include/asm/asi.h              |  2 +
 arch/x86/include/asm/page_64.h          |  2 +
 arch/x86/include/asm/pgtable_64_types.h |  7 ++-
 arch/x86/mm/asi.c                       | 57 ++++++++++++++++++
 include/asm-generic/asi.h               |  5 ++
 include/linux/vmalloc.h                 |  6 ++
 mm/vmalloc.c                            | 78 ++++++++++++++++++++-----
 7 files changed, 142 insertions(+), 15 deletions(-)
diff mbox series

Patch

diff --git a/arch/x86/include/asm/asi.h b/arch/x86/include/asm/asi.h
index f11010c0334b..e3cbf6d8801e 100644
--- a/arch/x86/include/asm/asi.h
+++ b/arch/x86/include/asm/asi.h
@@ -46,6 +46,8 @@  DECLARE_PER_CPU_ALIGNED(struct asi_state, asi_cpu_state);
 
 extern pgd_t asi_global_nonsensitive_pgd[];
 
+void asi_vmalloc_init(void);
+
 int  asi_init_mm_state(struct mm_struct *mm);
 void asi_free_mm_state(struct mm_struct *mm);
 
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 2845eca02552..b17574349572 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -18,6 +18,8 @@  extern unsigned long vmemmap_base;
 
 #ifdef CONFIG_ADDRESS_SPACE_ISOLATION
 
+extern unsigned long vmalloc_global_nonsensitive_start;
+extern unsigned long vmalloc_local_nonsensitive_end;
 extern unsigned long asi_local_map_base;
 DECLARE_STATIC_KEY_FALSE(asi_local_map_initialized);
 
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 0fc380ba25b8..06793f7ef1aa 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -142,8 +142,13 @@  extern unsigned int ptrs_per_p4d;
 #define VMALLOC_END		(VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1)
 
 #ifdef CONFIG_ADDRESS_SPACE_ISOLATION
-#define VMALLOC_GLOBAL_NONSENSITIVE_START	VMALLOC_START
+
+#define VMALLOC_LOCAL_NONSENSITIVE_START	VMALLOC_START
+#define VMALLOC_LOCAL_NONSENSITIVE_END		vmalloc_local_nonsensitive_end
+
+#define VMALLOC_GLOBAL_NONSENSITIVE_START	vmalloc_global_nonsensitive_start
 #define VMALLOC_GLOBAL_NONSENSITIVE_END		VMALLOC_END
+
 #endif
 
 #define MODULES_VADDR		(__START_KERNEL_map + KERNEL_IMAGE_SIZE)
diff --git a/arch/x86/mm/asi.c b/arch/x86/mm/asi.c
index 3ba0971a318d..91e5ff1224ff 100644
--- a/arch/x86/mm/asi.c
+++ b/arch/x86/mm/asi.c
@@ -3,6 +3,7 @@ 
 #include <linux/init.h>
 #include <linux/memblock.h>
 #include <linux/memcontrol.h>
+#include <linux/moduleparam.h>
 
 #include <asm/asi.h>
 #include <asm/pgalloc.h>
@@ -28,6 +29,17 @@  EXPORT_SYMBOL(asi_local_map_initialized);
 unsigned long asi_local_map_base __ro_after_init;
 EXPORT_SYMBOL(asi_local_map_base);
 
+unsigned long vmalloc_global_nonsensitive_start __ro_after_init;
+EXPORT_SYMBOL(vmalloc_global_nonsensitive_start);
+
+unsigned long vmalloc_local_nonsensitive_end __ro_after_init;
+EXPORT_SYMBOL(vmalloc_local_nonsensitive_end);
+
+/* Approximate percent only. Rounded to PGDIR_SIZE boundary. */
+static uint vmalloc_local_nonsensitive_percent __ro_after_init = 50;
+core_param(vmalloc_local_nonsensitive_percent,
+	   vmalloc_local_nonsensitive_percent, uint, 0444);
+
 int asi_register_class(const char *name, uint flags,
 		       const struct asi_hooks *ops)
 {
@@ -307,6 +319,10 @@  int asi_init(struct mm_struct *mm, int asi_index, struct asi **out_asi)
 		     i++)
 			set_pgd(asi->pgd + i, mm->asi[0].pgd[i]);
 
+		for (i = pgd_index(VMALLOC_LOCAL_NONSENSITIVE_START);
+		     i <= pgd_index(VMALLOC_LOCAL_NONSENSITIVE_END); i++)
+			set_pgd(asi->pgd + i, mm->asi[0].pgd[i]);
+
 		for (i = pgd_index(VMALLOC_GLOBAL_NONSENSITIVE_START);
 		     i < PTRS_PER_PGD; i++)
 			set_pgd(asi->pgd + i, asi_global_nonsensitive_pgd[i]);
@@ -432,6 +448,10 @@  void asi_free_mm_state(struct mm_struct *mm)
 			   pgd_index(ASI_LOCAL_MAP +
 				     PFN_PHYS(max_possible_pfn)) + 1);
 
+	asi_free_pgd_range(&mm->asi[0],
+			   pgd_index(VMALLOC_LOCAL_NONSENSITIVE_START),
+			   pgd_index(VMALLOC_LOCAL_NONSENSITIVE_END) + 1);
+
 	free_page((ulong)mm->asi[0].pgd);
 }
 
@@ -671,3 +691,40 @@  void asi_sync_mapping(struct asi *asi, void *start, size_t len)
 		for (; addr < end; addr = pgd_addr_end(addr, end))
 			asi_clone_pgd(asi->pgd, asi->mm->asi[0].pgd, addr);
 }
+
+void __init asi_vmalloc_init(void)
+{
+	uint start_index = pgd_index(VMALLOC_START);
+	uint end_index = pgd_index(VMALLOC_END);
+	uint global_start_index;
+
+	if (!boot_cpu_has(X86_FEATURE_ASI)) {
+		vmalloc_global_nonsensitive_start = VMALLOC_START;
+		vmalloc_local_nonsensitive_end = VMALLOC_END;
+		return;
+	}
+
+	if (vmalloc_local_nonsensitive_percent == 0) {
+		vmalloc_local_nonsensitive_percent = 1;
+		pr_warn("vmalloc_local_nonsensitive_percent must be non-zero");
+	}
+
+	if (vmalloc_local_nonsensitive_percent >= 100) {
+		vmalloc_local_nonsensitive_percent = 99;
+		pr_warn("vmalloc_local_nonsensitive_percent must be less than 100");
+	}
+
+	global_start_index = start_index + (end_index - start_index) *
+			     vmalloc_local_nonsensitive_percent / 100;
+	global_start_index = max(global_start_index, start_index + 1);
+
+	vmalloc_global_nonsensitive_start = -(PTRS_PER_PGD - global_start_index)
+					    * PGDIR_SIZE;
+	vmalloc_local_nonsensitive_end = vmalloc_global_nonsensitive_start - 1;
+
+	pr_debug("vmalloc_global_nonsensitive_start = %llx",
+		 vmalloc_global_nonsensitive_start);
+
+	VM_BUG_ON(vmalloc_local_nonsensitive_end >= VMALLOC_END);
+	VM_BUG_ON(vmalloc_global_nonsensitive_start <= VMALLOC_START);
+}
diff --git a/include/asm-generic/asi.h b/include/asm-generic/asi.h
index a1c8ebff70e8..7c50d8b64fa4 100644
--- a/include/asm-generic/asi.h
+++ b/include/asm-generic/asi.h
@@ -18,6 +18,9 @@ 
 #define VMALLOC_GLOBAL_NONSENSITIVE_START	VMALLOC_START
 #define VMALLOC_GLOBAL_NONSENSITIVE_END		VMALLOC_END
 
+#define VMALLOC_LOCAL_NONSENSITIVE_START	VMALLOC_START
+#define VMALLOC_LOCAL_NONSENSITIVE_END		VMALLOC_END
+
 #ifndef _ASSEMBLY_
 
 struct asi_hooks {};
@@ -36,6 +39,8 @@  static inline int asi_init_mm_state(struct mm_struct *mm) { return 0; }
 
 static inline void asi_free_mm_state(struct mm_struct *mm) { }
 
+static inline void asi_vmalloc_init(void) { }
+
 static inline
 int asi_init(struct mm_struct *mm, int asi_index, struct asi **out_asi)
 {
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 5f85690f27b6..2b4eafc21fa5 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -41,8 +41,10 @@  struct notifier_block;		/* in notifier.h */
 
 #ifdef CONFIG_ADDRESS_SPACE_ISOLATION
 #define VM_GLOBAL_NONSENSITIVE	0x00000800	/* Similar to __GFP_GLOBAL_NONSENSITIVE */
+#define VM_LOCAL_NONSENSITIVE	0x00001000	/* Similar to __GFP_LOCAL_NONSENSITIVE */
 #else
 #define VM_GLOBAL_NONSENSITIVE	0
+#define VM_LOCAL_NONSENSITIVE	0
 #endif
 
 /* bits [20..32] reserved for arch specific ioremap internals */
@@ -67,6 +69,10 @@  struct vm_struct {
 	unsigned int		nr_pages;
 	phys_addr_t		phys_addr;
 	const void		*caller;
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+	/* Valid if flags contain VM_*_NONSENSITIVE */
+	struct asi		*asi;
+#endif
 };
 
 struct vmap_area {
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index f13bfe7e896b..ea94d8a1e2e9 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2391,18 +2391,25 @@  void __init vmalloc_init(void)
 	 */
 	vmap_init_free_space();
 	vmap_initialized = true;
+
+	asi_vmalloc_init();
 }
 
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+
 static int asi_map_vm_area(struct vm_struct *area)
 {
 	if (!static_asi_enabled())
 		return 0;
 
 	if (area->flags & VM_GLOBAL_NONSENSITIVE)
-		return asi_map(ASI_GLOBAL_NONSENSITIVE, area->addr,
-			       get_vm_area_size(area));
+		area->asi = ASI_GLOBAL_NONSENSITIVE;
+	else if (area->flags & VM_LOCAL_NONSENSITIVE)
+		area->asi = ASI_LOCAL_NONSENSITIVE;
+	else
+		return 0;
 
-	return 0;
+	return asi_map(area->asi, area->addr, get_vm_area_size(area));
 }
 
 static void asi_unmap_vm_area(struct vm_struct *area)
@@ -2415,11 +2422,17 @@  static void asi_unmap_vm_area(struct vm_struct *area)
 	 * the case when the existing flush from try_purge_vmap_area_lazy()
 	 * and/or vm_unmap_aliases() happens non-lazily.
 	 */
-	if (area->flags & VM_GLOBAL_NONSENSITIVE)
-		asi_unmap(ASI_GLOBAL_NONSENSITIVE, area->addr,
-			  get_vm_area_size(area), true);
+	if (area->flags & (VM_GLOBAL_NONSENSITIVE | VM_LOCAL_NONSENSITIVE))
+		asi_unmap(area->asi, area->addr, get_vm_area_size(area), true);
 }
 
+#else
+
+static inline int asi_map_vm_area(struct vm_struct *area) { return 0; }
+static inline void asi_unmap_vm_area(struct vm_struct *area) { }
+
+#endif
+
 static inline void setup_vmalloc_vm_locked(struct vm_struct *vm,
 	struct vmap_area *va, unsigned long flags, const void *caller)
 {
@@ -2463,6 +2476,15 @@  static struct vm_struct *__get_vm_area_node(unsigned long size,
 	if (unlikely(!size))
 		return NULL;
 
+	if (static_asi_enabled()) {
+		VM_BUG_ON((flags & VM_LOCAL_NONSENSITIVE) &&
+			  !(start >= VMALLOC_LOCAL_NONSENSITIVE_START &&
+			    end <= VMALLOC_LOCAL_NONSENSITIVE_END));
+
+		VM_BUG_ON((flags & VM_GLOBAL_NONSENSITIVE) &&
+			  start < VMALLOC_GLOBAL_NONSENSITIVE_START);
+	}
+
 	if (flags & VM_IOREMAP)
 		align = 1ul << clamp_t(int, get_count_order_long(size),
 				       PAGE_SHIFT, IOREMAP_MAX_ORDER);
@@ -3073,8 +3095,22 @@  void *__vmalloc_node_range(unsigned long size, unsigned long align,
 	if (WARN_ON_ONCE(!size))
 		return NULL;
 
-	if (static_asi_enabled() && (vm_flags & VM_GLOBAL_NONSENSITIVE))
-		gfp_mask |= __GFP_ZERO;
+	if (static_asi_enabled()) {
+		VM_BUG_ON((vm_flags & (VM_LOCAL_NONSENSITIVE |
+				       VM_GLOBAL_NONSENSITIVE)) ==
+			  (VM_LOCAL_NONSENSITIVE | VM_GLOBAL_NONSENSITIVE));
+
+		if ((vm_flags & VM_LOCAL_NONSENSITIVE) &&
+		    !mm_asi_enabled(current->mm)) {
+			vm_flags &= ~VM_LOCAL_NONSENSITIVE;
+
+			if (end == VMALLOC_LOCAL_NONSENSITIVE_END)
+				end = VMALLOC_END;
+		}
+
+		if (vm_flags & (VM_GLOBAL_NONSENSITIVE | VM_LOCAL_NONSENSITIVE))
+			gfp_mask |= __GFP_ZERO;
+	}
 
 	if ((size >> PAGE_SHIFT) > totalram_pages()) {
 		warn_alloc(gfp_mask, NULL,
@@ -3166,11 +3202,19 @@  void *__vmalloc_node(unsigned long size, unsigned long align,
 			    gfp_t gfp_mask, int node, const void *caller)
 {
 	ulong vm_flags = 0;
+	ulong start = VMALLOC_START, end = VMALLOC_END;
 
-	if (static_asi_enabled() && (gfp_mask & __GFP_GLOBAL_NONSENSITIVE))
-		vm_flags |= VM_GLOBAL_NONSENSITIVE;
+	if (static_asi_enabled()) {
+		if (gfp_mask & __GFP_GLOBAL_NONSENSITIVE) {
+			vm_flags |= VM_GLOBAL_NONSENSITIVE;
+			start = VMALLOC_GLOBAL_NONSENSITIVE_START;
+		} else if (gfp_mask & __GFP_LOCAL_NONSENSITIVE) {
+			vm_flags |= VM_LOCAL_NONSENSITIVE;
+			end = VMALLOC_LOCAL_NONSENSITIVE_END;
+		}
+	}
 
-	return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
+	return __vmalloc_node_range(size, align, start, end,
 				gfp_mask, PAGE_KERNEL, vm_flags, node, caller);
 }
 /*
@@ -3678,9 +3722,15 @@  struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
 	/* verify parameters and allocate data structures */
 	BUG_ON(offset_in_page(align) || !is_power_of_2(align));
 
-	if (static_asi_enabled() && (flags & VM_GLOBAL_NONSENSITIVE)) {
-		vmalloc_start = VMALLOC_GLOBAL_NONSENSITIVE_START;
-		vmalloc_end = VMALLOC_GLOBAL_NONSENSITIVE_END;
+	if (static_asi_enabled()) {
+		VM_BUG_ON((flags & (VM_LOCAL_NONSENSITIVE |
+				    VM_GLOBAL_NONSENSITIVE)) ==
+			  (VM_LOCAL_NONSENSITIVE | VM_GLOBAL_NONSENSITIVE));
+
+		if (flags & VM_GLOBAL_NONSENSITIVE)
+			vmalloc_start = VMALLOC_GLOBAL_NONSENSITIVE_START;
+		else if (flags & VM_LOCAL_NONSENSITIVE)
+			vmalloc_end = VMALLOC_LOCAL_NONSENSITIVE_END;
 	}
 
 	vmalloc_start = ALIGN(vmalloc_start, align);