diff mbox series

KVM: nVMX: Skip IBPB when switching between vmcs01 and vmcs02

Message ID 20200430204123.2608-1-sean.j.christopherson@intel.com (mailing list archive)
State New, archived
Headers show
Series KVM: nVMX: Skip IBPB when switching between vmcs01 and vmcs02 | expand

Commit Message

Sean Christopherson April 30, 2020, 8:41 p.m. UTC
Skip the Indirect Branch Prediction Barrier that is triggered on a VMCS
switch when running with spectre_v2_user=on/auto if the switch is
between two VMCSes in the same guest, i.e. between vmcs01 and vmcs02.
The IBPB is intended to prevent one guest from attacking another, which
is unnecessary in the nested case as it's the same guest from KVM's
perspective.

This all but eliminates the overhead observed for nested VMX transitions
when running with CONFIG_RETPOLINE=y and spectre_v2_user=on/auto, which
can be significant, e.g. roughly 3x on current systems.

Reported-by: Alexander Graf <graf@amazon.com>
Cc: KarimAllah Raslan <karahmed@amazon.de>
Cc: stable@vger.kernel.org
Fixes: 15d45071523d ("KVM/x86: Add IBPB support")
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/kvm/vmx/nested.c |  2 +-
 arch/x86/kvm/vmx/vmx.c    | 12 ++++++++----
 arch/x86/kvm/vmx/vmx.h    |  3 ++-
 3 files changed, 11 insertions(+), 6 deletions(-)

Comments

Alexander Graf April 30, 2020, 9:22 p.m. UTC | #1
On 30.04.20 22:41, Sean Christopherson wrote:
> 
> Skip the Indirect Branch Prediction Barrier that is triggered on a VMCS
> switch when running with spectre_v2_user=on/auto if the switch is
> between two VMCSes in the same guest, i.e. between vmcs01 and vmcs02.
> The IBPB is intended to prevent one guest from attacking another, which
> is unnecessary in the nested case as it's the same guest from KVM's
> perspective.
> 
> This all but eliminates the overhead observed for nested VMX transitions
> when running with CONFIG_RETPOLINE=y and spectre_v2_user=on/auto, which
> can be significant, e.g. roughly 3x on current systems.
> 
> Reported-by: Alexander Graf <graf@amazon.com>
> Cc: KarimAllah Raslan <karahmed@amazon.de>
> Cc: stable@vger.kernel.org
> Fixes: 15d45071523d ("KVM/x86: Add IBPB support")
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>

I can confirm that with kvm-unit-test's vmcall benchmark, the patch does 
make a big difference:

   BEFORE: vmcall 33488
   AFTER:  vmcall 14898

So we're at least getting a good chunk of performance back :)

> ---
>   arch/x86/kvm/vmx/nested.c |  2 +-
>   arch/x86/kvm/vmx/vmx.c    | 12 ++++++++----
>   arch/x86/kvm/vmx/vmx.h    |  3 ++-
>   3 files changed, 11 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index 2c36f3f53108..1a02bdfeeb2b 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -303,7 +303,7 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
>          cpu = get_cpu();
>          prev = vmx->loaded_vmcs;
>          vmx->loaded_vmcs = vmcs;
> -       vmx_vcpu_load_vmcs(vcpu, cpu);
> +       vmx_vcpu_load_vmcs(vcpu, cpu, prev);
>          vmx_sync_vmcs_host_state(vmx, prev);
>          put_cpu();
> 
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 3ab6ca6062ce..818dd8ba5e9f 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -1311,10 +1311,12 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
>                  pi_set_on(pi_desc);
>   }
> 
> -void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu)
> +void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
> +                       struct loaded_vmcs *buddy)
>   {
>          struct vcpu_vmx *vmx = to_vmx(vcpu);
>          bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
> +       struct vmcs *prev;
> 
>          if (!already_loaded) {
>                  loaded_vmcs_clear(vmx->loaded_vmcs);
> @@ -1333,10 +1335,12 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu)
>                  local_irq_enable();
>          }
> 
> -       if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
> +       prev = per_cpu(current_vmcs, cpu);
> +       if (prev != vmx->loaded_vmcs->vmcs) {
>                  per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
>                  vmcs_load(vmx->loaded_vmcs->vmcs);
> -               indirect_branch_prediction_barrier();
> +               if (!buddy || buddy->vmcs != prev)
> +                       indirect_branch_prediction_barrier();

I fail to understand the logic here though. What exactly are you trying 
to catch? We only do the barrier when the current_vmcs as loaded by 
vmx_vcpu_load_vmcs is different from the vmcs of the context that was 
issuing the vmcs load.

Isn't this a really complicated way to say "Don't flush for nested"? Why 
not just make it explicit and pass in a bool that says "nested = true" 
from vmx_switch_vmcs()? Is there any case I'm missing where that would 
be unsafe?


Thanks,

Alex




Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879
Sean Christopherson May 1, 2020, 1:58 p.m. UTC | #2
On Thu, Apr 30, 2020 at 11:22:20PM +0200, Alexander Graf wrote:
> 
> On 30.04.20 22:41, Sean Christopherson wrote:
> >
> >diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> >index 3ab6ca6062ce..818dd8ba5e9f 100644
> >--- a/arch/x86/kvm/vmx/vmx.c
> >+++ b/arch/x86/kvm/vmx/vmx.c
> >@@ -1311,10 +1311,12 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
> >                 pi_set_on(pi_desc);
> >  }
> >
> >-void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu)
> >+void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
> >+                       struct loaded_vmcs *buddy)
> >  {
> >         struct vcpu_vmx *vmx = to_vmx(vcpu);
> >         bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
> >+       struct vmcs *prev;
> >
> >         if (!already_loaded) {
> >                 loaded_vmcs_clear(vmx->loaded_vmcs);
> >@@ -1333,10 +1335,12 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu)
> >                 local_irq_enable();
> >         }
> >
> >-       if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
> >+       prev = per_cpu(current_vmcs, cpu);
> >+       if (prev != vmx->loaded_vmcs->vmcs) {
> >                 per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
> >                 vmcs_load(vmx->loaded_vmcs->vmcs);
> >-               indirect_branch_prediction_barrier();
> >+               if (!buddy || buddy->vmcs != prev)
> >+                       indirect_branch_prediction_barrier();
> 
> I fail to understand the logic here though. What exactly are you trying to
> catch? We only do the barrier when the current_vmcs as loaded by
> vmx_vcpu_load_vmcs is different from the vmcs of the context that was
> issuing the vmcs load.
> 
> Isn't this a really complicated way to say "Don't flush for nested"? Why not
> just make it explicit and pass in a bool that says "nested = true" from
> vmx_switch_vmcs()? Is there any case I'm missing where that would be unsafe?o

I don't think so, the 'buddy' check was added out of paranoia, and partly
because I was rushing.  I originally had a 'bool nested_switch' as well.

I think I like the boolean approach better, the above check should never
fail in the nested switch case.  I'll add a WARN in vmx_switch_vmcs() with
a note in the patch to let Paolo know it's likely paranoia and can be
ripped out at his discretion.
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 2c36f3f53108..1a02bdfeeb2b 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -303,7 +303,7 @@  static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
 	cpu = get_cpu();
 	prev = vmx->loaded_vmcs;
 	vmx->loaded_vmcs = vmcs;
-	vmx_vcpu_load_vmcs(vcpu, cpu);
+	vmx_vcpu_load_vmcs(vcpu, cpu, prev);
 	vmx_sync_vmcs_host_state(vmx, prev);
 	put_cpu();
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 3ab6ca6062ce..818dd8ba5e9f 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1311,10 +1311,12 @@  static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
 		pi_set_on(pi_desc);
 }
 
-void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu)
+void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
+			struct loaded_vmcs *buddy)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
+	struct vmcs *prev;
 
 	if (!already_loaded) {
 		loaded_vmcs_clear(vmx->loaded_vmcs);
@@ -1333,10 +1335,12 @@  void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu)
 		local_irq_enable();
 	}
 
-	if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
+	prev = per_cpu(current_vmcs, cpu);
+	if (prev != vmx->loaded_vmcs->vmcs) {
 		per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
 		vmcs_load(vmx->loaded_vmcs->vmcs);
-		indirect_branch_prediction_barrier();
+		if (!buddy || buddy->vmcs != prev)
+			indirect_branch_prediction_barrier();
 	}
 
 	if (!already_loaded) {
@@ -1377,7 +1381,7 @@  void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-	vmx_vcpu_load_vmcs(vcpu, cpu);
+	vmx_vcpu_load_vmcs(vcpu, cpu, NULL);
 
 	vmx_vcpu_pi_load(vcpu, cpu);
 
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index b5e773267abe..d3d48acc6bd9 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -320,7 +320,8 @@  struct kvm_vmx {
 };
 
 bool nested_vmx_allowed(struct kvm_vcpu *vcpu);
-void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu);
+void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
+			struct loaded_vmcs *buddy);
 void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
 int allocate_vpid(void);
 void free_vpid(int vpid);