diff mbox series

[RFC,12/13] mmap: Add XO support for KVM XO

Message ID 20191003212400.31130-13-rick.p.edgecombe@intel.com (mailing list archive)
State New, archived
Headers show
Series XOM for KVM guest userspace | expand

Commit Message

Edgecombe, Rick P Oct. 3, 2019, 9:23 p.m. UTC
The KVM XO feature enables the ability to create execute-only virtual
memory. Use this feature to create XO memory when PROT_EXEC and not
PROT_READ, as the behavior in the case of protection keys for userspace
and some arm64 platforms.

In the case of the ability to create execute only memory with protection
keys AND the ability to create native execute only memory, use the KVM
XO method of creating execute only memory to save a protection key.

Set the values of the __P100 and __S100 in protection_map during boot
instead of statically because the actual KVM XO bit in the PTE is
determinted at boot time and so can't be known at compile time.

Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
---
 arch/x86/include/asm/pgtable_types.h |  2 ++
 arch/x86/kernel/head64.c             |  3 +++
 mm/mmap.c                            | 30 +++++++++++++++++++++++-----
 3 files changed, 30 insertions(+), 5 deletions(-)

Comments

Paolo Bonzini Oct. 4, 2019, 7:34 a.m. UTC | #1
On 03/10/19 23:23, Rick Edgecombe wrote:
> +
> +	protection_map[4] = PAGE_EXECONLY;
> +	protection_map[12] = PAGE_EXECONLY;

Can you add #defines for the bits in protection_map?  Also perhaps you
can replace the p_xo/p_xr/s_xo/s_xr checks with just with "if
(pgtable_kvmxo_enabled()".

Paolo

> +	/* Prefer non-pkey XO capability if available, to save a pkey */
> +
> +	if (flags & MAP_PRIVATE && (p_xo != p_xr))
> +		return 0;
> +
> +	if (flags & MAP_SHARED && (s_xo != s_xr))
> +		return 0;
>
> +	pkey = execute_only_pkey(current->mm);
> +	if (pkey < 0)
Edgecombe, Rick P Oct. 4, 2019, 7:12 p.m. UTC | #2
On Fri, 2019-10-04 at 09:34 +0200, Paolo Bonzini wrote:
> On 03/10/19 23:23, Rick Edgecombe wrote:
> > +
> > +	protection_map[4] = PAGE_EXECONLY;
> > +	protection_map[12] = PAGE_EXECONLY;
> 
> Can you add #defines for the bits in protection_map?  Also perhaps you
> can replace the p_xo/p_xr/s_xo/s_xr checks with just with "if
> (pgtable_kvmxo_enabled()".
> 
> Paolo

PAGE_EXECONLY is not known at compile time since the NR bit position depends
on the number of physical address bits. So it can't be set the way the other
ones are in protection_map[], if thats what you are saying.

I didn't love the p_xo/p_xr/s_xo/s_xr checks, but since mm/mmap.c is cross arch
it seemed the best option. Maybe a cross arch helper like
non_pkey_xo_supported() instead?
diff mbox series

Patch

diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index d3c92c992089..fe976b4f0132 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -176,6 +176,8 @@  enum page_cache_mode {
 					 _PAGE_ACCESSED | _PAGE_NX)
 #define PAGE_READONLY_EXEC	__pgprot(_PAGE_PRESENT | _PAGE_USER |	\
 					 _PAGE_ACCESSED)
+#define PAGE_EXECONLY		__pgprot(_PAGE_PRESENT | _PAGE_USER |	\
+					 _PAGE_ACCESSED | _PAGE_NR)
 
 #define __PAGE_KERNEL_EXEC						\
 	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 7091702a7bec..69772b6e1810 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -133,6 +133,9 @@  static void __head check_kvmxo_support(unsigned long physaddr)
 
 	*fixup_int(&__pgtable_kvmxo_enabled, physaddr) = 1;
 	*fixup_int(&__pgtable_kvmxo_bit, physaddr) = physbits;
+
+	protection_map[4] = PAGE_EXECONLY;
+	protection_map[12] = PAGE_EXECONLY;
 }
 #else /* CONFIG_KVM_XO */
 static void __head check_kvmxo_support(unsigned long physaddr) { }
diff --git a/mm/mmap.c b/mm/mmap.c
index 7e8c3e8ae75f..034ffa0255b2 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1379,6 +1379,29 @@  static inline bool file_mmap_ok(struct file *file, struct inode *inode,
 	return true;
 }
 
+static inline int get_pkey(unsigned long flags)
+{
+	const unsigned long p_xo = pgprot_val(protection_map[4]);
+	const unsigned long p_xr = pgprot_val(protection_map[5]);
+	const unsigned long s_xo = pgprot_val(protection_map[12]);
+	const unsigned long s_xr = pgprot_val(protection_map[13]);
+	int pkey;
+
+	/* Prefer non-pkey XO capability if available, to save a pkey */
+
+	if (flags & MAP_PRIVATE && (p_xo != p_xr))
+		return 0;
+
+	if (flags & MAP_SHARED && (s_xo != s_xr))
+		return 0;
+
+	pkey = execute_only_pkey(current->mm);
+	if (pkey < 0)
+		pkey = 0;
+
+	return pkey;
+}
+
 /*
  * The caller must hold down_write(&current->mm->mmap_sem).
  */
@@ -1440,11 +1463,8 @@  unsigned long do_mmap(struct file *file, unsigned long addr,
 			return -EEXIST;
 	}
 
-	if (prot == PROT_EXEC) {
-		pkey = execute_only_pkey(mm);
-		if (pkey < 0)
-			pkey = 0;
-	}
+	if (prot == PROT_EXEC)
+		pkey = get_pkey(flags);
 
 	/* Do simple checking here so the lower-level routines won't have
 	 * to. we assume access permissions have been handled by the open