Message ID | 1516476182-5153-3-git-send-email-karahmed@amazon.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Sat, 2018-01-20 at 20:22 +0100, KarimAllah Ahmed wrote: > > @@ -6791,6 +6792,9 @@ static __init int hardware_setup(void) > kvm_tsc_scaling_ratio_frac_bits = 48; > } > > + if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) > + vmx_disable_intercept_for_msr(MSR_IA32_PRED_CMD, false); > + I've updated that to allow X86_FEATURE_AMD_PRED_CMD too, since some hypervisors may expose *only* that MSR to guests even on Intel hardware. PRED_CMD is a lot easier to expose as it doesn't need storage, live migration support, and all that crap. Our shared tree at http://git.infradead.org/linux-retpoline.git/shortlog/refs/heads/ibpb updated accordingly.
On Sat, Jan 20, 2018 at 11:22 AM, KarimAllah Ahmed <karahmed@amazon.de> wrote: > From: Ashok Raj <ashok.raj@intel.com> > > Add MSR passthrough for MSR_IA32_PRED_CMD and place branch predictor > barriers on switching between VMs to avoid inter VM specte-v2 attacks. > > [peterz: rebase and changelog rewrite] > [dwmw2: fixes] > [karahmed: - vmx: expose PRED_CMD whenever it is available > - svm: only pass through IBPB if it is available] > > Cc: Asit Mallick <asit.k.mallick@intel.com> > Cc: Dave Hansen <dave.hansen@intel.com> > Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com> > Cc: Tim Chen <tim.c.chen@linux.intel.com> > Cc: Linus Torvalds <torvalds@linux-foundation.org> > Cc: Andrea Arcangeli <aarcange@redhat.com> > Cc: Andi Kleen <ak@linux.intel.com> > Cc: Thomas Gleixner <tglx@linutronix.de> > Cc: Dan Williams <dan.j.williams@intel.com> > Cc: Jun Nakajima <jun.nakajima@intel.com> > Cc: Andy Lutomirski <luto@kernel.org> > Cc: Greg KH <gregkh@linuxfoundation.org> > Cc: David Woodhouse <dwmw@amazon.co.uk> > Cc: Paolo Bonzini <pbonzini@redhat.com> > Signed-off-by: Ashok Raj <ashok.raj@intel.com> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> > Link: http://lkml.kernel.org/r/1515720739-43819-6-git-send-email-ashok.raj@intel.com > > Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> > Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de> > --- > arch/x86/kvm/svm.c | 14 ++++++++++++++ > arch/x86/kvm/vmx.c | 4 ++++ > 2 files changed, 18 insertions(+) > > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index 2744b973..cfdb9ab 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -529,6 +529,7 @@ struct svm_cpu_data { > struct kvm_ldttss_desc *tss_desc; > > struct page *save_area; > + struct vmcb *current_vmcb; > }; > > static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); > @@ -918,6 +919,9 @@ static void svm_vcpu_init_msrpm(u32 *msrpm) > > set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1); > } > + > + if (boot_cpu_has(X86_FEATURE_AMD_PRED_CMD)) > + set_msr_interception(msrpm, MSR_IA32_PRED_CMD, 1, 1); > } > > static void add_msr_offset(u32 offset) > @@ -1706,11 +1710,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) > __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); > kvm_vcpu_uninit(vcpu); > kmem_cache_free(kvm_vcpu_cache, svm); > + /* > + * The vmcb page can be recycled, causing a false negative in > + * svm_vcpu_load(). So do a full IBPB now. > + */ > + indirect_branch_prediction_barrier(); > } > > static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > { > struct vcpu_svm *svm = to_svm(vcpu); > + struct svm_cpu_data *sd = per_cpu(svm_data, cpu); > int i; > > if (unlikely(cpu != vcpu->cpu)) { > @@ -1739,6 +1749,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > if (static_cpu_has(X86_FEATURE_RDTSCP)) > wrmsrl(MSR_TSC_AUX, svm->tsc_aux); > > + if (sd->current_vmcb != svm->vmcb) { > + sd->current_vmcb = svm->vmcb; > + indirect_branch_prediction_barrier(); > + } > avic_vcpu_load(vcpu, cpu); > } > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index d1e25db..3b64de2 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -2279,6 +2279,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { > per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; > vmcs_load(vmx->loaded_vmcs->vmcs); > + indirect_branch_prediction_barrier(); > } > > if (!already_loaded) { > @@ -6791,6 +6792,9 @@ static __init int hardware_setup(void) > kvm_tsc_scaling_ratio_frac_bits = 48; > } > > + if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) I think the condition here should be: if (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) __do_cpuid_ent should pass through X86_FEATURE_SPEC_CTRL from the host, but userspace should be allowed to clear it. (Userspace should not be allowed to set it if the host doesn't support it.) > + vmx_disable_intercept_for_msr(MSR_IA32_PRED_CMD, false); > + > vmx_disable_intercept_for_msr(MSR_FS_BASE, false); > vmx_disable_intercept_for_msr(MSR_GS_BASE, false); > vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); > -- > 2.7.4 >
Oh, but to do that properly, you need one of the per-vCPU bitmap implementations that Paolo and I have independently posted. On Mon, Jan 22, 2018 at 10:56 AM, Jim Mattson <jmattson@google.com> wrote: > On Sat, Jan 20, 2018 at 11:22 AM, KarimAllah Ahmed <karahmed@amazon.de> wrote: >> From: Ashok Raj <ashok.raj@intel.com> >> >> Add MSR passthrough for MSR_IA32_PRED_CMD and place branch predictor >> barriers on switching between VMs to avoid inter VM specte-v2 attacks. >> >> [peterz: rebase and changelog rewrite] >> [dwmw2: fixes] >> [karahmed: - vmx: expose PRED_CMD whenever it is available >> - svm: only pass through IBPB if it is available] >> >> Cc: Asit Mallick <asit.k.mallick@intel.com> >> Cc: Dave Hansen <dave.hansen@intel.com> >> Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com> >> Cc: Tim Chen <tim.c.chen@linux.intel.com> >> Cc: Linus Torvalds <torvalds@linux-foundation.org> >> Cc: Andrea Arcangeli <aarcange@redhat.com> >> Cc: Andi Kleen <ak@linux.intel.com> >> Cc: Thomas Gleixner <tglx@linutronix.de> >> Cc: Dan Williams <dan.j.williams@intel.com> >> Cc: Jun Nakajima <jun.nakajima@intel.com> >> Cc: Andy Lutomirski <luto@kernel.org> >> Cc: Greg KH <gregkh@linuxfoundation.org> >> Cc: David Woodhouse <dwmw@amazon.co.uk> >> Cc: Paolo Bonzini <pbonzini@redhat.com> >> Signed-off-by: Ashok Raj <ashok.raj@intel.com> >> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> >> Link: http://lkml.kernel.org/r/1515720739-43819-6-git-send-email-ashok.raj@intel.com >> >> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> >> Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de> >> --- >> arch/x86/kvm/svm.c | 14 ++++++++++++++ >> arch/x86/kvm/vmx.c | 4 ++++ >> 2 files changed, 18 insertions(+) >> >> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c >> index 2744b973..cfdb9ab 100644 >> --- a/arch/x86/kvm/svm.c >> +++ b/arch/x86/kvm/svm.c >> @@ -529,6 +529,7 @@ struct svm_cpu_data { >> struct kvm_ldttss_desc *tss_desc; >> >> struct page *save_area; >> + struct vmcb *current_vmcb; >> }; >> >> static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); >> @@ -918,6 +919,9 @@ static void svm_vcpu_init_msrpm(u32 *msrpm) >> >> set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1); >> } >> + >> + if (boot_cpu_has(X86_FEATURE_AMD_PRED_CMD)) >> + set_msr_interception(msrpm, MSR_IA32_PRED_CMD, 1, 1); >> } >> >> static void add_msr_offset(u32 offset) >> @@ -1706,11 +1710,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) >> __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); >> kvm_vcpu_uninit(vcpu); >> kmem_cache_free(kvm_vcpu_cache, svm); >> + /* >> + * The vmcb page can be recycled, causing a false negative in >> + * svm_vcpu_load(). So do a full IBPB now. >> + */ >> + indirect_branch_prediction_barrier(); >> } >> >> static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) >> { >> struct vcpu_svm *svm = to_svm(vcpu); >> + struct svm_cpu_data *sd = per_cpu(svm_data, cpu); >> int i; >> >> if (unlikely(cpu != vcpu->cpu)) { >> @@ -1739,6 +1749,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) >> if (static_cpu_has(X86_FEATURE_RDTSCP)) >> wrmsrl(MSR_TSC_AUX, svm->tsc_aux); >> >> + if (sd->current_vmcb != svm->vmcb) { >> + sd->current_vmcb = svm->vmcb; >> + indirect_branch_prediction_barrier(); >> + } >> avic_vcpu_load(vcpu, cpu); >> } >> >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >> index d1e25db..3b64de2 100644 >> --- a/arch/x86/kvm/vmx.c >> +++ b/arch/x86/kvm/vmx.c >> @@ -2279,6 +2279,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) >> if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { >> per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; >> vmcs_load(vmx->loaded_vmcs->vmcs); >> + indirect_branch_prediction_barrier(); >> } >> >> if (!already_loaded) { >> @@ -6791,6 +6792,9 @@ static __init int hardware_setup(void) >> kvm_tsc_scaling_ratio_frac_bits = 48; >> } >> >> + if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) > > I think the condition here should be: > > if (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) > > __do_cpuid_ent should pass through X86_FEATURE_SPEC_CTRL from the > host, but userspace should be allowed to clear it. > (Userspace should not be allowed to set it if the host doesn't support it.) > >> + vmx_disable_intercept_for_msr(MSR_IA32_PRED_CMD, false); >> + >> vmx_disable_intercept_for_msr(MSR_FS_BASE, false); >> vmx_disable_intercept_for_msr(MSR_GS_BASE, false); >> vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); >> -- >> 2.7.4 >>
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 2744b973..cfdb9ab 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -529,6 +529,7 @@ struct svm_cpu_data { struct kvm_ldttss_desc *tss_desc; struct page *save_area; + struct vmcb *current_vmcb; }; static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); @@ -918,6 +919,9 @@ static void svm_vcpu_init_msrpm(u32 *msrpm) set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1); } + + if (boot_cpu_has(X86_FEATURE_AMD_PRED_CMD)) + set_msr_interception(msrpm, MSR_IA32_PRED_CMD, 1, 1); } static void add_msr_offset(u32 offset) @@ -1706,11 +1710,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, svm); + /* + * The vmcb page can be recycled, causing a false negative in + * svm_vcpu_load(). So do a full IBPB now. + */ + indirect_branch_prediction_barrier(); } static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_svm *svm = to_svm(vcpu); + struct svm_cpu_data *sd = per_cpu(svm_data, cpu); int i; if (unlikely(cpu != vcpu->cpu)) { @@ -1739,6 +1749,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (static_cpu_has(X86_FEATURE_RDTSCP)) wrmsrl(MSR_TSC_AUX, svm->tsc_aux); + if (sd->current_vmcb != svm->vmcb) { + sd->current_vmcb = svm->vmcb; + indirect_branch_prediction_barrier(); + } avic_vcpu_load(vcpu, cpu); } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d1e25db..3b64de2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2279,6 +2279,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; vmcs_load(vmx->loaded_vmcs->vmcs); + indirect_branch_prediction_barrier(); } if (!already_loaded) { @@ -6791,6 +6792,9 @@ static __init int hardware_setup(void) kvm_tsc_scaling_ratio_frac_bits = 48; } + if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) + vmx_disable_intercept_for_msr(MSR_IA32_PRED_CMD, false); + vmx_disable_intercept_for_msr(MSR_FS_BASE, false); vmx_disable_intercept_for_msr(MSR_GS_BASE, false); vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);