diff mbox series

[RFC,KVM,25/27] kvm/isolation: implement actual KVM isolation enter/exit

Message ID 1557758315-12667-26-git-send-email-alexandre.chartre@oracle.com (mailing list archive)
State New, archived
Headers show
Series KVM Address Space Isolation | expand

Commit Message

Alexandre Chartre May 13, 2019, 2:38 p.m. UTC
From: Liran Alon <liran.alon@oracle.com>

KVM isolation enter/exit is done by switching between the KVM address
space and the kernel address space.

Signed-off-by: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Alexandre Chartre <alexandre.chartre@oracle.com>
---
 arch/x86/kvm/isolation.c |   30 ++++++++++++++++++++++++------
 arch/x86/mm/tlb.c        |    1 +
 include/linux/sched.h    |    1 +
 3 files changed, 26 insertions(+), 6 deletions(-)

Comments

Peter Zijlstra May 13, 2019, 3:16 p.m. UTC | #1
On Mon, May 13, 2019 at 04:38:33PM +0200, Alexandre Chartre wrote:
> diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
> index a4db7f5..7ad5ad1 100644
> --- a/arch/x86/mm/tlb.c
> +++ b/arch/x86/mm/tlb.c
> @@ -444,6 +444,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
>  		switch_ldt(real_prev, next);
>  	}
>  }
> +EXPORT_SYMBOL_GPL(switch_mm_irqs_off);
>  
>  /*
>   * Please ignore the name of this function.  It should be called

NAK
Andy Lutomirski May 13, 2019, 4:01 p.m. UTC | #2
On Mon, May 13, 2019 at 7:40 AM Alexandre Chartre
<alexandre.chartre@oracle.com> wrote:
>
> From: Liran Alon <liran.alon@oracle.com>
>
> KVM isolation enter/exit is done by switching between the KVM address
> space and the kernel address space.
>
> Signed-off-by: Liran Alon <liran.alon@oracle.com>
> Signed-off-by: Alexandre Chartre <alexandre.chartre@oracle.com>
> ---
>  arch/x86/kvm/isolation.c |   30 ++++++++++++++++++++++++------
>  arch/x86/mm/tlb.c        |    1 +
>  include/linux/sched.h    |    1 +
>  3 files changed, 26 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/kvm/isolation.c b/arch/x86/kvm/isolation.c
> index db0a7ce..b0c789f 100644
> --- a/arch/x86/kvm/isolation.c
> +++ b/arch/x86/kvm/isolation.c
> @@ -1383,11 +1383,13 @@ static bool kvm_page_fault(struct pt_regs *regs, unsigned long error_code,
>         printk(KERN_DEFAULT "KVM isolation: page fault %ld at %pS on %lx (%pS) while switching mm\n"
>                "  cr3=%lx\n"
>                "  kvm_mm=%px pgd=%px\n"
> -              "  active_mm=%px pgd=%px\n",
> +              "  active_mm=%px pgd=%px\n"
> +              "  kvm_prev_mm=%px pgd=%px\n",
>                error_code, (void *)regs->ip, address, (void *)address,
>                cr3,
>                &kvm_mm, kvm_mm.pgd,
> -              active_mm, active_mm->pgd);
> +              active_mm, active_mm->pgd,
> +              current->kvm_prev_mm, current->kvm_prev_mm->pgd);
>         dump_stack();
>
>         return false;
> @@ -1649,11 +1651,27 @@ void kvm_may_access_sensitive_data(struct kvm_vcpu *vcpu)
>         kvm_isolation_exit();
>  }
>
> +static void kvm_switch_mm(struct mm_struct *mm)
> +{
> +       unsigned long flags;
> +
> +       /*
> +        * Disable interrupt before updating active_mm, otherwise if an
> +        * interrupt occurs during the switch then the interrupt handler
> +        * can be mislead about the mm effectively in use.
> +        */
> +       local_irq_save(flags);
> +       current->kvm_prev_mm = current->active_mm;

Peter's NAK aside, why on Earth is this in task_struct?  You cannot
possibly context switch while in isolation mode.

--Andy
diff mbox series

Patch

diff --git a/arch/x86/kvm/isolation.c b/arch/x86/kvm/isolation.c
index db0a7ce..b0c789f 100644
--- a/arch/x86/kvm/isolation.c
+++ b/arch/x86/kvm/isolation.c
@@ -1383,11 +1383,13 @@  static bool kvm_page_fault(struct pt_regs *regs, unsigned long error_code,
 	printk(KERN_DEFAULT "KVM isolation: page fault %ld at %pS on %lx (%pS) while switching mm\n"
 	       "  cr3=%lx\n"
 	       "  kvm_mm=%px pgd=%px\n"
-	       "  active_mm=%px pgd=%px\n",
+	       "  active_mm=%px pgd=%px\n"
+	       "  kvm_prev_mm=%px pgd=%px\n",
 	       error_code, (void *)regs->ip, address, (void *)address,
 	       cr3,
 	       &kvm_mm, kvm_mm.pgd,
-	       active_mm, active_mm->pgd);
+	       active_mm, active_mm->pgd,
+	       current->kvm_prev_mm, current->kvm_prev_mm->pgd);
 	dump_stack();
 
 	return false;
@@ -1649,11 +1651,27 @@  void kvm_may_access_sensitive_data(struct kvm_vcpu *vcpu)
 	kvm_isolation_exit();
 }
 
+static void kvm_switch_mm(struct mm_struct *mm)
+{
+	unsigned long flags;
+
+	/*
+	 * Disable interrupt before updating active_mm, otherwise if an
+	 * interrupt occurs during the switch then the interrupt handler
+	 * can be mislead about the mm effectively in use.
+	 */
+	local_irq_save(flags);
+	current->kvm_prev_mm = current->active_mm;
+	current->active_mm = mm;
+	switch_mm_irqs_off(current->kvm_prev_mm, mm, NULL);
+	local_irq_restore(flags);
+}
+
 void kvm_isolation_enter(void)
 {
 	int err;
 
-	if (kvm_isolation()) {
+	if (kvm_isolation() && current->active_mm != &kvm_mm) {
 		/*
 		 * Switches to kvm_mm should happen from vCPU thread,
 		 * which should not be a kernel thread with no mm
@@ -1666,14 +1684,14 @@  void kvm_isolation_enter(void)
 			       current);
 			return;
 		}
-		/* TODO: switch to kvm_mm */
+		kvm_switch_mm(&kvm_mm);
 	}
 }
 
 void kvm_isolation_exit(void)
 {
-	if (kvm_isolation()) {
+	if (kvm_isolation() && current->active_mm == &kvm_mm) {
 		/* TODO: Kick sibling hyperthread before switch to host mm */
-		/* TODO: switch back to original mm */
+		kvm_switch_mm(current->kvm_prev_mm);
 	}
 }
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index a4db7f5..7ad5ad1 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -444,6 +444,7 @@  void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 		switch_ldt(real_prev, next);
 	}
 }
+EXPORT_SYMBOL_GPL(switch_mm_irqs_off);
 
 /*
  * Please ignore the name of this function.  It should be called
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 80e1d75..b03680d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1202,6 +1202,7 @@  struct task_struct {
 #ifdef CONFIG_HAVE_KVM
 	/* Is the task mapped into the KVM address space? */
 	bool				kvm_mapped;
+	struct mm_struct		*kvm_prev_mm;
 #endif
 
 	/*