diff mbox series

[3/5] KVM: nVMX: Update VMX controls MSR according to guest CPUID after setting VMX MSRs

Message ID 20200828085622.8365-4-chenyi.qiang@intel.com (mailing list archive)
State New, archived
Headers show
Series Fix nested VMX controls MSRs | expand

Commit Message

Chenyi Qiang Aug. 28, 2020, 8:56 a.m. UTC
Update the fields (i.e. VM_{ENTRY_LOAD, EXIT_CLEAR}_BNDCFGS and
VM_{ENTRY, EXIT}_LOAD_IA32_PERF_GLOBAL_CTRL) in
nested MSR_IA32_VMX_TRUE_{ENTRY, EXIT}_CTLS according to guest CPUID
when user space initializes the features MSRs. Regardless of the order
of SET_CPUID and SET_MSRS from the user space, do the update to avoid
MSR values overriding.

Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
---
 arch/x86/kvm/vmx/vmx.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

Comments

Jim Mattson Aug. 28, 2020, 8:39 p.m. UTC | #1
On Fri, Aug 28, 2020 at 1:54 AM Chenyi Qiang <chenyi.qiang@intel.com> wrote:
>
> Update the fields (i.e. VM_{ENTRY_LOAD, EXIT_CLEAR}_BNDCFGS and
> VM_{ENTRY, EXIT}_LOAD_IA32_PERF_GLOBAL_CTRL) in
> nested MSR_IA32_VMX_TRUE_{ENTRY, EXIT}_CTLS according to guest CPUID
> when user space initializes the features MSRs. Regardless of the order
> of SET_CPUID and SET_MSRS from the user space, do the update to avoid
> MSR values overriding.
>
> Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
> ---
>  arch/x86/kvm/vmx/vmx.c | 6 +++++-
>  1 file changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 819c185adf09..f9664ccc003b 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -345,6 +345,7 @@ static bool guest_state_valid(struct kvm_vcpu *vcpu);
>  static u32 vmx_segment_access_rights(struct kvm_segment *var);
>  static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
>                                                           u32 msr, int type);
> +static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu);
>
>  void vmx_vmexit(void);
>
> @@ -2161,7 +2162,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>                         return 1; /* they are read-only */
>                 if (!nested_vmx_allowed(vcpu))
>                         return 1;
> -               return vmx_set_vmx_msr(vcpu, msr_index, data);
> +               ret = vmx_set_vmx_msr(vcpu, msr_index, data);
> +               nested_vmx_pmu_entry_exit_ctls_update(vcpu);
> +               nested_vmx_entry_exit_ctls_update(vcpu);
> +               break;

Now I see what you're doing. This commit should probably come before
the previous commit, so that at no point in the series can userspace
set VMX MSR bits that should be cleared based on the guest CPUID.

There's an ABI change here: userspace may no longer get -EINVAL if it
tries to set an illegal VMX MSR bit. Instead, some illegal bits are
silently cleared. Moreover, these functions will potentially set VMX
MSR bits that userspace has just asked to clear.

>         case MSR_IA32_RTIT_CTL:
>                 if (!vmx_pt_mode_is_host_guest() ||
>                         vmx_rtit_ctl_check(vcpu, data) ||
> --
> 2.17.1
>
Sean Christopherson Sept. 2, 2020, 6:16 p.m. UTC | #2
On Fri, Aug 28, 2020 at 01:39:39PM -0700, Jim Mattson wrote:
> On Fri, Aug 28, 2020 at 1:54 AM Chenyi Qiang <chenyi.qiang@intel.com> wrote:
> >
> > Update the fields (i.e. VM_{ENTRY_LOAD, EXIT_CLEAR}_BNDCFGS and
> > VM_{ENTRY, EXIT}_LOAD_IA32_PERF_GLOBAL_CTRL) in
> > nested MSR_IA32_VMX_TRUE_{ENTRY, EXIT}_CTLS according to guest CPUID
> > when user space initializes the features MSRs. Regardless of the order
> > of SET_CPUID and SET_MSRS from the user space, do the update to avoid
> > MSR values overriding.
> >
> > Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
> > ---
> >  arch/x86/kvm/vmx/vmx.c | 6 +++++-
> >  1 file changed, 5 insertions(+), 1 deletion(-)
> >
> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > index 819c185adf09..f9664ccc003b 100644
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -345,6 +345,7 @@ static bool guest_state_valid(struct kvm_vcpu *vcpu);
> >  static u32 vmx_segment_access_rights(struct kvm_segment *var);
> >  static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
> >                                                           u32 msr, int type);
> > +static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu);
> >
> >  void vmx_vmexit(void);
> >
> > @@ -2161,7 +2162,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >                         return 1; /* they are read-only */
> >                 if (!nested_vmx_allowed(vcpu))
> >                         return 1;
> > -               return vmx_set_vmx_msr(vcpu, msr_index, data);
> > +               ret = vmx_set_vmx_msr(vcpu, msr_index, data);
> > +               nested_vmx_pmu_entry_exit_ctls_update(vcpu);
> > +               nested_vmx_entry_exit_ctls_update(vcpu);
> > +               break;
> 
> Now I see what you're doing. This commit should probably come before
> the previous commit, so that at no point in the series can userspace
> set VMX MSR bits that should be cleared based on the guest CPUID.
> 
> There's an ABI change here: userspace may no longer get -EINVAL if it
> tries to set an illegal VMX MSR bit. Instead, some illegal bits are
> silently cleared. Moreover, these functions will potentially set VMX
> MSR bits that userspace has just asked to clear.

Can we simply remove nested_vmx_entry_exit_ctls_update() and
nested_vmx_pmu_entry_exit_ctls_update()?  It's userspace's responsibility
to present a valid vCPU model to the guest, I don't see any reason to
silently tweak the VMX MSRs unless allowing the bogus config breaks KVM.
E.g. there are many more controls that are non-sensical without "native"
support for the associated feature.
Jim Mattson Sept. 2, 2020, 6:32 p.m. UTC | #3
On Wed, Sep 2, 2020 at 11:16 AM Sean Christopherson
<sean.j.christopherson@intel.com> wrote:
>
> On Fri, Aug 28, 2020 at 01:39:39PM -0700, Jim Mattson wrote:
> > On Fri, Aug 28, 2020 at 1:54 AM Chenyi Qiang <chenyi.qiang@intel.com> wrote:
> > >
> > > Update the fields (i.e. VM_{ENTRY_LOAD, EXIT_CLEAR}_BNDCFGS and
> > > VM_{ENTRY, EXIT}_LOAD_IA32_PERF_GLOBAL_CTRL) in
> > > nested MSR_IA32_VMX_TRUE_{ENTRY, EXIT}_CTLS according to guest CPUID
> > > when user space initializes the features MSRs. Regardless of the order
> > > of SET_CPUID and SET_MSRS from the user space, do the update to avoid
> > > MSR values overriding.
> > >
> > > Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
> > > ---
> > >  arch/x86/kvm/vmx/vmx.c | 6 +++++-
> > >  1 file changed, 5 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > > index 819c185adf09..f9664ccc003b 100644
> > > --- a/arch/x86/kvm/vmx/vmx.c
> > > +++ b/arch/x86/kvm/vmx/vmx.c
> > > @@ -345,6 +345,7 @@ static bool guest_state_valid(struct kvm_vcpu *vcpu);
> > >  static u32 vmx_segment_access_rights(struct kvm_segment *var);
> > >  static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
> > >                                                           u32 msr, int type);
> > > +static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu);
> > >
> > >  void vmx_vmexit(void);
> > >
> > > @@ -2161,7 +2162,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> > >                         return 1; /* they are read-only */
> > >                 if (!nested_vmx_allowed(vcpu))
> > >                         return 1;
> > > -               return vmx_set_vmx_msr(vcpu, msr_index, data);
> > > +               ret = vmx_set_vmx_msr(vcpu, msr_index, data);
> > > +               nested_vmx_pmu_entry_exit_ctls_update(vcpu);
> > > +               nested_vmx_entry_exit_ctls_update(vcpu);
> > > +               break;
> >
> > Now I see what you're doing. This commit should probably come before
> > the previous commit, so that at no point in the series can userspace
> > set VMX MSR bits that should be cleared based on the guest CPUID.
> >
> > There's an ABI change here: userspace may no longer get -EINVAL if it
> > tries to set an illegal VMX MSR bit. Instead, some illegal bits are
> > silently cleared. Moreover, these functions will potentially set VMX
> > MSR bits that userspace has just asked to clear.
>
> Can we simply remove nested_vmx_entry_exit_ctls_update() and
> nested_vmx_pmu_entry_exit_ctls_update()?  It's userspace's responsibility
> to present a valid vCPU model to the guest, I don't see any reason to
> silently tweak the VMX MSRs unless allowing the bogus config breaks KVM.
> E.g. there are many more controls that are non-sensical without "native"
> support for the associated feature.

We might need a test for kvm_mpx_supported() here:

/* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1.  */
if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
        vmcs_write64(GUEST_BNDCFGS, 0);

BTW, where does the L2 value propagate to L1 if not VM_EXIT_CLEAR_BNDCFGS?
Paolo Bonzini Sept. 11, 2020, 5:09 p.m. UTC | #4
On 02/09/20 20:32, Jim Mattson wrote:
> 
> /* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1.  */
> if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
>         vmcs_write64(GUEST_BNDCFGS, 0);
> 
> BTW, where does the L2 value propagate to L1 if not VM_EXIT_CLEAR_BNDCFGS?

Hmm, nowhere. :/  Probably something like this (not really thought through):

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 1e903d51912b..aba76aa99465 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3317,7 +3317,8 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
 	if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
 		vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
 	if (kvm_mpx_supported() &&
-		!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
+	    (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) ||
+	     !(vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)))
 		vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
 
 	/*
@@ -4186,9 +4187,12 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 	vmcs_write32(GUEST_IDTR_LIMIT, 0xFFFF);
 	vmcs_write32(GUEST_GDTR_LIMIT, 0xFFFF);
 
-	/* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1.  */
-	if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
-		vmcs_write64(GUEST_BNDCFGS, 0);
+	if (kvm_mpx_supported()) {
+		if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
+			vmcs_write64(GUEST_BNDCFGS, 0);
+		else
+			vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs);
+	}
 
 	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
 		vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
@@ -4466,6 +4470,10 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
 		vmx_set_virtual_apic_mode(vcpu);
 	}
 
+	/* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1.  */
+	if (!(vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS))
+		vmx->nested.vmcs01_guest_bndcfgs = vmcs12->guest_bndcfgs;
+
 	/* Unpin physical memory we referred to in vmcs02 */
 	if (vmx->nested.apic_access_page) {
 		kvm_release_page_clean(vmx->nested.apic_access_page);


which will also work in the failed vmentry case.

Paolo
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 819c185adf09..f9664ccc003b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -345,6 +345,7 @@  static bool guest_state_valid(struct kvm_vcpu *vcpu);
 static u32 vmx_segment_access_rights(struct kvm_segment *var);
 static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
 							  u32 msr, int type);
+static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu);
 
 void vmx_vmexit(void);
 
@@ -2161,7 +2162,10 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			return 1; /* they are read-only */
 		if (!nested_vmx_allowed(vcpu))
 			return 1;
-		return vmx_set_vmx_msr(vcpu, msr_index, data);
+		ret = vmx_set_vmx_msr(vcpu, msr_index, data);
+		nested_vmx_pmu_entry_exit_ctls_update(vcpu);
+		nested_vmx_entry_exit_ctls_update(vcpu);
+		break;
 	case MSR_IA32_RTIT_CTL:
 		if (!vmx_pt_mode_is_host_guest() ||
 			vmx_rtit_ctl_check(vcpu, data) ||