diff mbox

[v3,2/4] KVM: x86: Add IBPB support

Message ID 1517271028-15916-3-git-send-email-karahmed@amazon.de (mailing list archive)
State New, archived
Headers show

Commit Message

KarimAllah Ahmed Jan. 30, 2018, 12:10 a.m. UTC
From: Ashok Raj <ashok.raj@intel.com>

Add MSR passthrough for MSR_IA32_PRED_CMD and place branch predictor
barriers on switching between VMs to avoid inter VM Spectre-v2 attacks.

[peterz: rebase and changelog rewrite]
[karahmed: - rebase
           - vmx: expose PRED_CMD whenever it is available
           - svm: only pass through IBPB if it is available
           - vmx: support !cpu_has_vmx_msr_bitmap()]
[dwmw2: Expose CPUID bit too (AMD IBPB only for now as we lack IBRS)
        PRED_CMD is a write-only MSR]

Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Jun Nakajima <jun.nakajima@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1515720739-43819-6-git-send-email-ashok.raj@intel.com
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
---
 arch/x86/kvm/cpuid.c | 11 ++++++++++-
 arch/x86/kvm/svm.c   | 14 ++++++++++++++
 arch/x86/kvm/vmx.c   | 12 ++++++++++++
 3 files changed, 36 insertions(+), 1 deletion(-)

Comments

Tom Lendacky Jan. 30, 2018, 2:22 p.m. UTC | #1
On 1/29/2018 6:10 PM, KarimAllah Ahmed wrote:
> From: Ashok Raj <ashok.raj@intel.com>
> 
> Add MSR passthrough for MSR_IA32_PRED_CMD and place branch predictor
> barriers on switching between VMs to avoid inter VM Spectre-v2 attacks.
> 
> [peterz: rebase and changelog rewrite]
> [karahmed: - rebase
>            - vmx: expose PRED_CMD whenever it is available
>            - svm: only pass through IBPB if it is available
>            - vmx: support !cpu_has_vmx_msr_bitmap()]
> [dwmw2: Expose CPUID bit too (AMD IBPB only for now as we lack IBRS)
>         PRED_CMD is a write-only MSR]
> 
> Cc: Asit Mallick <asit.k.mallick@intel.com>
> Cc: Dave Hansen <dave.hansen@intel.com>
> Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com>
> Cc: Tim Chen <tim.c.chen@linux.intel.com>
> Cc: Linus Torvalds <torvalds@linux-foundation.org>
> Cc: Andrea Arcangeli <aarcange@redhat.com>
> Cc: Andi Kleen <ak@linux.intel.com>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Jun Nakajima <jun.nakajima@intel.com>
> Cc: Andy Lutomirski <luto@kernel.org>
> Cc: Greg KH <gregkh@linuxfoundation.org>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Ashok Raj <ashok.raj@intel.com>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Link: http://lkml.kernel.org/r/1515720739-43819-6-git-send-email-ashok.raj@intel.com
> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
> ---
>  arch/x86/kvm/cpuid.c | 11 ++++++++++-
>  arch/x86/kvm/svm.c   | 14 ++++++++++++++
>  arch/x86/kvm/vmx.c   | 12 ++++++++++++
>  3 files changed, 36 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index c0eb337..033004d 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -365,6 +365,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
>  		F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
>  		0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
>  
> +	/* cpuid 0x80000008.ebx */
> +	const u32 kvm_cpuid_8000_0008_ebx_x86_features =
> +		F(IBPB);
> +
>  	/* cpuid 0xC0000001.edx */
>  	const u32 kvm_cpuid_C000_0001_edx_x86_features =
>  		F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
> @@ -625,7 +629,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
>  		if (!g_phys_as)
>  			g_phys_as = phys_as;
>  		entry->eax = g_phys_as | (virt_as << 8);
> -		entry->ebx = entry->edx = 0;
> +		entry->edx = 0;
> +		/* IBPB isn't necessarily present in hardware cpuid */
> +		if (boot_cpu_has(X86_FEATURE_IBPB))
> +			entry->ebx |= F(IBPB);
> +		entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
> +		cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
>  		break;
>  	}
>  	case 0x80000019:
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 2744b973..c886e46 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -529,6 +529,7 @@ struct svm_cpu_data {
>  	struct kvm_ldttss_desc *tss_desc;
>  
>  	struct page *save_area;
> +	struct vmcb *current_vmcb;
>  };
>  
>  static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
> @@ -918,6 +919,9 @@ static void svm_vcpu_init_msrpm(u32 *msrpm)
>  
>  		set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
>  	}
> +
> +	if (boot_cpu_has(X86_FEATURE_IBPB))
> +		set_msr_interception(msrpm, MSR_IA32_PRED_CMD, 1, 1);

Not sure you really need the check here.  If the feature isn't available
in the hardware, then it won't be advertised in the CPUID bits to the
guest, so the guest shouldn't try to write to the msr.  If it does, it
will #GP. So I would think it could be set all the time to not be
intercepted, no?

>  }
>  
>  static void add_msr_offset(u32 offset)
> @@ -1706,11 +1710,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
>  	__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
>  	kvm_vcpu_uninit(vcpu);
>  	kmem_cache_free(kvm_vcpu_cache, svm);
> +	/*
> +	 * The vmcb page can be recycled, causing a false negative in
> +	 * svm_vcpu_load(). So do a full IBPB now.
> +	 */
> +	indirect_branch_prediction_barrier();
>  }
>  
>  static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>  {
>  	struct vcpu_svm *svm = to_svm(vcpu);
> +	struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
>  	int i;
>  
>  	if (unlikely(cpu != vcpu->cpu)) {
> @@ -1739,6 +1749,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>  	if (static_cpu_has(X86_FEATURE_RDTSCP))
>  		wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
>  
> +	if (sd->current_vmcb != svm->vmcb) {
> +		sd->current_vmcb = svm->vmcb;
> +		indirect_branch_prediction_barrier();
> +	}
>  	avic_vcpu_load(vcpu, cpu);
>  }
>  
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index aa8638a..ea278ce 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -2272,6 +2272,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>  	if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
>  		per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
>  		vmcs_load(vmx->loaded_vmcs->vmcs);
> +		indirect_branch_prediction_barrier();
>  	}
>  
>  	if (!already_loaded) {
> @@ -3330,6 +3331,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  	case MSR_IA32_TSC:
>  		kvm_write_tsc(vcpu, msr_info);
>  		break;
> +	case MSR_IA32_PRED_CMD:
> +		if (!msr_info->host_initiated &&
> +		    !guest_cpuid_has(vcpu, X86_FEATURE_IBPB))
> +			return 1;
> +
> +		if (data & PRED_CMD_IBPB)
> +			wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
> +		break;

Should this also be in svm.c or as common code in x86.c?

>  	case MSR_IA32_CR_PAT:
>  		if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
>  			if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
> @@ -9548,6 +9557,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>  		goto free_msrs;
>  
>  	msr_bitmap = vmx->vmcs01.msr_bitmap;
> +
> +	if (boot_cpu_has(X86_FEATURE_IBPB))
> +		vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_PRED_CMD, MSR_TYPE_W);

Same comment here as in svm.c, is the feature check necessary?

Thanks,
Tom

>  	vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
>  	vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
>  	vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
>
David Woodhouse Jan. 30, 2018, 2:36 p.m. UTC | #2
On Tue, 2018-01-30 at 08:22 -0600, Tom Lendacky wrote:
> > @@ -918,6 +919,9 @@ static void svm_vcpu_init_msrpm(u32 *msrpm)
> >  
> >  		set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
> >  	}
> > +
> > +	if (boot_cpu_has(X86_FEATURE_IBPB))
> > +		set_msr_interception(msrpm, MSR_IA32_PRED_CMD, 1, 1);
>
> Not sure you really need the check here.  If the feature isn't available
> in the hardware, then it won't be advertised in the CPUID bits to the
> guest, so the guest shouldn't try to write to the msr.  If it does, it
> will #GP. So I would think it could be set all the time to not be
> intercepted, no?

The check for boot_cpu_has() is wrong and is fairly redundant as you
say. What we actually want is guest_cpu_has(). We *don't* want to pass
the MSR through for a recalcitrant guest to bash on, if we have elected
not to expose this feature to the guest.

On Intel right now it's *really* important that we don't allow it to be
touched, even if a write would succeed. So even boot_cpu_has() would
not be entirely meaningless there. :)


> > @@ -3330,6 +3331,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >  	case MSR_IA32_TSC:
> >  		kvm_write_tsc(vcpu, msr_info);
> >  		break;
> > +	case MSR_IA32_PRED_CMD:
> > +		if (!msr_info->host_initiated &&
> > +		    !guest_cpuid_has(vcpu, X86_FEATURE_IBPB))
> > +			return 1;
> > +
> > +		if (data & PRED_CMD_IBPB)
> > +			wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
> > +		break;
>
> Should this also be in svm.c or as common code in x86.c?

See my response to [0/4]. I suggested that, but noted that it wasn't
entirely clear where we'd put the storage for SPEC_CTRL. We probably
*could* manage it for IBPB though.

> > 
> >  	case MSR_IA32_CR_PAT:
> >  		if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
> >  			if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
> > @@ -9548,6 +9557,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
> >  		goto free_msrs;
> >  
> >  	msr_bitmap = vmx->vmcs01.msr_bitmap;
> > +
> > +	if (boot_cpu_has(X86_FEATURE_IBPB))
> > +		vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_PRED_CMD, MSR_TYPE_W);
>
> Same comment here as in svm.c, is the feature check necessary?

Again, yes but it should be guest_cpu_has() and we couldn't see how :)
Jim Mattson Jan. 30, 2018, 5:19 p.m. UTC | #3
On Mon, Jan 29, 2018 at 4:10 PM, KarimAllah Ahmed <karahmed@amazon.de> wrote:
> From: Ashok Raj <ashok.raj@intel.com>
>
> Add MSR passthrough for MSR_IA32_PRED_CMD and place branch predictor
> barriers on switching between VMs to avoid inter VM Spectre-v2 attacks.
>
> [peterz: rebase and changelog rewrite]
> [karahmed: - rebase
>            - vmx: expose PRED_CMD whenever it is available
>            - svm: only pass through IBPB if it is available
>            - vmx: support !cpu_has_vmx_msr_bitmap()]
> [dwmw2: Expose CPUID bit too (AMD IBPB only for now as we lack IBRS)
>         PRED_CMD is a write-only MSR]
>
> Cc: Asit Mallick <asit.k.mallick@intel.com>
> Cc: Dave Hansen <dave.hansen@intel.com>
> Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com>
> Cc: Tim Chen <tim.c.chen@linux.intel.com>
> Cc: Linus Torvalds <torvalds@linux-foundation.org>
> Cc: Andrea Arcangeli <aarcange@redhat.com>
> Cc: Andi Kleen <ak@linux.intel.com>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Jun Nakajima <jun.nakajima@intel.com>
> Cc: Andy Lutomirski <luto@kernel.org>
> Cc: Greg KH <gregkh@linuxfoundation.org>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Ashok Raj <ashok.raj@intel.com>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Link: http://lkml.kernel.org/r/1515720739-43819-6-git-send-email-ashok.raj@intel.com
> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
> ---
>  arch/x86/kvm/cpuid.c | 11 ++++++++++-
>  arch/x86/kvm/svm.c   | 14 ++++++++++++++
>  arch/x86/kvm/vmx.c   | 12 ++++++++++++
>  3 files changed, 36 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index c0eb337..033004d 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -365,6 +365,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
>                 F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
>                 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
>
> +       /* cpuid 0x80000008.ebx */
> +       const u32 kvm_cpuid_8000_0008_ebx_x86_features =
> +               F(IBPB);
> +
>         /* cpuid 0xC0000001.edx */
>         const u32 kvm_cpuid_C000_0001_edx_x86_features =
>                 F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
> @@ -625,7 +629,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
>                 if (!g_phys_as)
>                         g_phys_as = phys_as;
>                 entry->eax = g_phys_as | (virt_as << 8);
> -               entry->ebx = entry->edx = 0;
> +               entry->edx = 0;
> +               /* IBPB isn't necessarily present in hardware cpuid */
> +               if (boot_cpu_has(X86_FEATURE_IBPB))
> +                       entry->ebx |= F(IBPB);
> +               entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
> +               cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
>                 break;
>         }
>         case 0x80000019:
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 2744b973..c886e46 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -529,6 +529,7 @@ struct svm_cpu_data {
>         struct kvm_ldttss_desc *tss_desc;
>
>         struct page *save_area;
> +       struct vmcb *current_vmcb;
>  };
>
>  static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
> @@ -918,6 +919,9 @@ static void svm_vcpu_init_msrpm(u32 *msrpm)
>
>                 set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
>         }
> +
> +       if (boot_cpu_has(X86_FEATURE_IBPB))
> +               set_msr_interception(msrpm, MSR_IA32_PRED_CMD, 1, 1);
>  }
>
>  static void add_msr_offset(u32 offset)
> @@ -1706,11 +1710,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
>         __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
>         kvm_vcpu_uninit(vcpu);
>         kmem_cache_free(kvm_vcpu_cache, svm);
> +       /*
> +        * The vmcb page can be recycled, causing a false negative in
> +        * svm_vcpu_load(). So do a full IBPB now.
> +        */
> +       indirect_branch_prediction_barrier();
>  }
>
>  static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>  {
>         struct vcpu_svm *svm = to_svm(vcpu);
> +       struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
>         int i;
>
>         if (unlikely(cpu != vcpu->cpu)) {
> @@ -1739,6 +1749,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>         if (static_cpu_has(X86_FEATURE_RDTSCP))
>                 wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
>
> +       if (sd->current_vmcb != svm->vmcb) {
> +               sd->current_vmcb = svm->vmcb;
> +               indirect_branch_prediction_barrier();
> +       }
>         avic_vcpu_load(vcpu, cpu);
>  }
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index aa8638a..ea278ce 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -2272,6 +2272,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>         if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
>                 per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
>                 vmcs_load(vmx->loaded_vmcs->vmcs);
> +               indirect_branch_prediction_barrier();
>         }
>
>         if (!already_loaded) {
> @@ -3330,6 +3331,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>         case MSR_IA32_TSC:
>                 kvm_write_tsc(vcpu, msr_info);
>                 break;
> +       case MSR_IA32_PRED_CMD:
> +               if (!msr_info->host_initiated &&
> +                   !guest_cpuid_has(vcpu, X86_FEATURE_IBPB))
> +                       return 1;
> +
> +               if (data & PRED_CMD_IBPB)
> +                       wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
> +               break;
>         case MSR_IA32_CR_PAT:
>                 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
>                         if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
> @@ -9548,6 +9557,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>                 goto free_msrs;
>
>         msr_bitmap = vmx->vmcs01.msr_bitmap;
> +
> +       if (boot_cpu_has(X86_FEATURE_IBPB))
> +               vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_PRED_CMD, MSR_TYPE_W);
>         vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
>         vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
>         vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
> --
> 2.7.4
>

Are you planning to allow L2 to write MSR_IA32_PRED_CMD without L0
intercepting it, if the MSR write intercept is disabled in both the
vmcs01 MSR permission bitmap and the vmcs12 MSR permission bitmap?
David Woodhouse Jan. 30, 2018, 5:43 p.m. UTC | #4
On Tue, 2018-01-30 at 09:19 -0800, Jim Mattson wrote:
> 
> Are you planning to allow L2 to write MSR_IA32_PRED_CMD without L0
> intercepting it, if the MSR write intercept is disabled in both the
> vmcs01 MSR permission bitmap and the vmcs12 MSR permission bitmap?

I don't see why we shouldn't.
diff mbox

Patch

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index c0eb337..033004d 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -365,6 +365,10 @@  static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
 		0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
 
+	/* cpuid 0x80000008.ebx */
+	const u32 kvm_cpuid_8000_0008_ebx_x86_features =
+		F(IBPB);
+
 	/* cpuid 0xC0000001.edx */
 	const u32 kvm_cpuid_C000_0001_edx_x86_features =
 		F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
@@ -625,7 +629,12 @@  static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		if (!g_phys_as)
 			g_phys_as = phys_as;
 		entry->eax = g_phys_as | (virt_as << 8);
-		entry->ebx = entry->edx = 0;
+		entry->edx = 0;
+		/* IBPB isn't necessarily present in hardware cpuid */
+		if (boot_cpu_has(X86_FEATURE_IBPB))
+			entry->ebx |= F(IBPB);
+		entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
+		cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
 		break;
 	}
 	case 0x80000019:
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 2744b973..c886e46 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -529,6 +529,7 @@  struct svm_cpu_data {
 	struct kvm_ldttss_desc *tss_desc;
 
 	struct page *save_area;
+	struct vmcb *current_vmcb;
 };
 
 static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
@@ -918,6 +919,9 @@  static void svm_vcpu_init_msrpm(u32 *msrpm)
 
 		set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
 	}
+
+	if (boot_cpu_has(X86_FEATURE_IBPB))
+		set_msr_interception(msrpm, MSR_IA32_PRED_CMD, 1, 1);
 }
 
 static void add_msr_offset(u32 offset)
@@ -1706,11 +1710,17 @@  static void svm_free_vcpu(struct kvm_vcpu *vcpu)
 	__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
 	kvm_vcpu_uninit(vcpu);
 	kmem_cache_free(kvm_vcpu_cache, svm);
+	/*
+	 * The vmcb page can be recycled, causing a false negative in
+	 * svm_vcpu_load(). So do a full IBPB now.
+	 */
+	indirect_branch_prediction_barrier();
 }
 
 static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
+	struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
 	int i;
 
 	if (unlikely(cpu != vcpu->cpu)) {
@@ -1739,6 +1749,10 @@  static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	if (static_cpu_has(X86_FEATURE_RDTSCP))
 		wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
 
+	if (sd->current_vmcb != svm->vmcb) {
+		sd->current_vmcb = svm->vmcb;
+		indirect_branch_prediction_barrier();
+	}
 	avic_vcpu_load(vcpu, cpu);
 }
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index aa8638a..ea278ce 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2272,6 +2272,7 @@  static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
 		per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
 		vmcs_load(vmx->loaded_vmcs->vmcs);
+		indirect_branch_prediction_barrier();
 	}
 
 	if (!already_loaded) {
@@ -3330,6 +3331,14 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_IA32_TSC:
 		kvm_write_tsc(vcpu, msr_info);
 		break;
+	case MSR_IA32_PRED_CMD:
+		if (!msr_info->host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_IBPB))
+			return 1;
+
+		if (data & PRED_CMD_IBPB)
+			wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
+		break;
 	case MSR_IA32_CR_PAT:
 		if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
 			if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
@@ -9548,6 +9557,9 @@  static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 		goto free_msrs;
 
 	msr_bitmap = vmx->vmcs01.msr_bitmap;
+
+	if (boot_cpu_has(X86_FEATURE_IBPB))
+		vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_PRED_CMD, MSR_TYPE_W);
 	vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
 	vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
 	vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);