diff mbox series

[RFC,03/27] KVM: x86: Introduce KVM separate virtual address space

Message ID 1557758315-12667-4-git-send-email-alexandre.chartre@oracle.com (mailing list archive)
State New, archived
Headers show
Series KVM Address Space Isolation | expand

Commit Message

Alexandre Chartre May 13, 2019, 2:38 p.m. UTC
From: Liran Alon <liran.alon@oracle.com>

Create a separate mm for KVM that will be active when KVM #VMExit
handlers run. Up until the point which we architectully need to
access host (or other VM) sensitive data.

This patch just create kvm_mm but never makes it active yet.
This will be done by next commits.

Signed-off-by: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Alexandre Chartre <alexandre.chartre@oracle.com>
---
 arch/x86/kvm/isolation.c |   95 ++++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/isolation.h |    8 ++++
 arch/x86/kvm/x86.c       |   10 ++++-
 3 files changed, 112 insertions(+), 1 deletions(-)
 create mode 100644 arch/x86/kvm/isolation.h

Comments

Andy Lutomirski May 13, 2019, 3:45 p.m. UTC | #1
On Mon, May 13, 2019 at 7:39 AM Alexandre Chartre
<alexandre.chartre@oracle.com> wrote:
>
> From: Liran Alon <liran.alon@oracle.com>
>
> Create a separate mm for KVM that will be active when KVM #VMExit
> handlers run. Up until the point which we architectully need to
> access host (or other VM) sensitive data.
>
> This patch just create kvm_mm but never makes it active yet.
> This will be done by next commits.

NAK to this whole pile of code.  KVM is not so special that it can
duplicate core infrastructure like this.  Use copy_init_mm() or
improve it as needed.

--Andy
Alexandre Chartre May 13, 2019, 4:04 p.m. UTC | #2
On 5/13/19 5:45 PM, Andy Lutomirski wrote:
> On Mon, May 13, 2019 at 7:39 AM Alexandre Chartre
> <alexandre.chartre@oracle.com> wrote:
>>
>> From: Liran Alon <liran.alon@oracle.com>
>>
>> Create a separate mm for KVM that will be active when KVM #VMExit
>> handlers run. Up until the point which we architectully need to
>> access host (or other VM) sensitive data.
>>
>> This patch just create kvm_mm but never makes it active yet.
>> This will be done by next commits.
> 
> NAK to this whole pile of code.  KVM is not so special that it can
> duplicate core infrastructure like this.  Use copy_init_mm() or
> improve it as needed.
> 
> --Andy
> 

This was originally inspired from how efi_mm is built. If I remember
correctly copy_init_mm() or other mm init functions do initialization
we don't need in this case; we basically want a blank mm. I will have
another look at copy_init_mm().

In any case, if we really need a mm create/init function I agree it
doesn't below to kvm. For now, this part of shortcuts used for the POC.

alex.
diff mbox series

Patch

diff --git a/arch/x86/kvm/isolation.c b/arch/x86/kvm/isolation.c
index e25f663..74bc0cd 100644
--- a/arch/x86/kvm/isolation.c
+++ b/arch/x86/kvm/isolation.c
@@ -7,6 +7,21 @@ 
 
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/printk.h>
+
+#include <asm/mmu_context.h>
+#include <asm/pgalloc.h>
+
+#include "isolation.h"
+
+struct mm_struct kvm_mm = {
+	.mm_rb			= RB_ROOT,
+	.mm_users		= ATOMIC_INIT(2),
+	.mm_count		= ATOMIC_INIT(1),
+	.mmap_sem		= __RWSEM_INITIALIZER(kvm_mm.mmap_sem),
+	.page_table_lock	= __SPIN_LOCK_UNLOCKED(kvm_mm.page_table_lock),
+	.mmlist			= LIST_HEAD_INIT(kvm_mm.mmlist),
+};
 
 /*
  * When set to true, KVM #VMExit handlers run in isolated address space
@@ -24,3 +39,83 @@ 
  */
 static bool __read_mostly address_space_isolation;
 module_param(address_space_isolation, bool, 0444);
+
+static int kvm_isolation_init_mm(void)
+{
+	pgd_t *kvm_pgd;
+	gfp_t gfp_mask;
+
+	gfp_mask = GFP_KERNEL | __GFP_ZERO;
+	kvm_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER);
+	if (!kvm_pgd)
+		return -ENOMEM;
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+	/*
+	 * With PTI, we have two PGDs: one the kernel page table, and one
+	 * for the user page table. The PGD with the kernel page table has
+	 * to be the entire kernel address space because paranoid faults
+	 * will unconditionally use it. So we define the KVM address space
+	 * in the user table space, although it will be used in the kernel.
+	 */
+
+	/* initialize the kernel page table */
+	memcpy(kvm_pgd, current->active_mm->pgd, sizeof(pgd_t) * PTRS_PER_PGD);
+
+	/* define kvm_mm with the user page table */
+	kvm_mm.pgd = kernel_to_user_pgdp(kvm_pgd);
+#else /* CONFIG_PAGE_TABLE_ISOLATION */
+	kvm_mm.pgd = kvm_pgd;
+#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+	mm_init_cpumask(&kvm_mm);
+	init_new_context(NULL, &kvm_mm);
+
+	return 0;
+}
+
+static void kvm_isolation_uninit_mm(void)
+{
+	pgd_t *kvm_pgd;
+
+	BUG_ON(current->active_mm == &kvm_mm);
+
+	destroy_context(&kvm_mm);
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+	/*
+	 * With PTI, the KVM address space is defined in the user
+	 * page table space, but the full PGD starts with the kernel
+	 * page table space.
+	 */
+	kvm_pgd = user_to_kernel_pgdp(kvm_pgd);
+#else /* CONFIG_PAGE_TABLE_ISOLATION */
+	kvm_pgd = kvm_mm.pgd;
+#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+	kvm_mm.pgd = NULL;
+	free_pages((unsigned long)kvm_pgd, PGD_ALLOCATION_ORDER);
+}
+
+int kvm_isolation_init(void)
+{
+	int r;
+
+	if (!address_space_isolation)
+		return 0;
+
+	r = kvm_isolation_init_mm();
+	if (r)
+		return r;
+
+	pr_info("KVM: x86: Running with isolated address space\n");
+
+	return 0;
+}
+
+void kvm_isolation_uninit(void)
+{
+	if (!address_space_isolation)
+		return;
+
+	kvm_isolation_uninit_mm();
+	pr_info("KVM: x86: End of isolated address space\n");
+}
diff --git a/arch/x86/kvm/isolation.h b/arch/x86/kvm/isolation.h
new file mode 100644
index 0000000..cf8c7d4
--- /dev/null
+++ b/arch/x86/kvm/isolation.h
@@ -0,0 +1,8 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_X86_KVM_ISOLATION_H
+#define ARCH_X86_KVM_ISOLATION_H
+
+extern int kvm_isolation_init(void);
+extern void kvm_isolation_uninit(void);
+
+#endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b5edc8e..4b7cec2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -29,6 +29,7 @@ 
 #include "cpuid.h"
 #include "pmu.h"
 #include "hyperv.h"
+#include "isolation.h"
 
 #include <linux/clocksource.h>
 #include <linux/interrupt.h>
@@ -6972,10 +6973,14 @@  int kvm_arch_init(void *opaque)
 		goto out_free_x86_fpu_cache;
 	}
 
-	r = kvm_mmu_module_init();
+	r = kvm_isolation_init();
 	if (r)
 		goto out_free_percpu;
 
+	r = kvm_mmu_module_init();
+	if (r)
+		goto out_uninit_isolation;
+
 	kvm_set_mmio_spte_mask();
 
 	kvm_x86_ops = ops;
@@ -7000,6 +7005,8 @@  int kvm_arch_init(void *opaque)
 
 	return 0;
 
+out_uninit_isolation:
+	kvm_isolation_uninit();
 out_free_percpu:
 	free_percpu(shared_msrs);
 out_free_x86_fpu_cache:
@@ -7024,6 +7031,7 @@  void kvm_arch_exit(void)
 #ifdef CONFIG_X86_64
 	pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
 #endif
+	kvm_isolation_uninit();
 	kvm_x86_ops = NULL;
 	kvm_mmu_module_exit();
 	free_percpu(shared_msrs);