diff mbox series

[RFC,3/3] KVM: x86: Use KVM_BUG/KVM_BUG_ON to handle bugs that are fatal to the VM

Message ID 20200923224530.17735-4-sean.j.christopherson@intel.com (mailing list archive)
State New, archived
Headers show
Series KVM: Introduce "VM bugged" concept | expand

Commit Message

Sean Christopherson Sept. 23, 2020, 10:45 p.m. UTC
Add support for KVM_REQ_VM_BUGG in x86, and replace a variety of WARNs
with KVM_BUG() and KVM_BUG_ON().  Return -EIO if a KVM_BUG is hit to
align with the common KVM behavior of rejecting iocts() with -EIO if the
VM is bugged.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/kvm/svm/svm.c |  2 +-
 arch/x86/kvm/vmx/vmx.c | 23 ++++++++++++++---------
 arch/x86/kvm/x86.c     |  4 ++++
 3 files changed, 19 insertions(+), 10 deletions(-)

Comments

Vitaly Kuznetsov Sept. 24, 2020, 12:34 p.m. UTC | #1
Sean Christopherson <sean.j.christopherson@intel.com> writes:

> Add support for KVM_REQ_VM_BUGG in x86, and replace a variety of WARNs
> with KVM_BUG() and KVM_BUG_ON().  Return -EIO if a KVM_BUG is hit to
> align with the common KVM behavior of rejecting iocts() with -EIO if the
> VM is bugged.
>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> ---
>  arch/x86/kvm/svm/svm.c |  2 +-
>  arch/x86/kvm/vmx/vmx.c | 23 ++++++++++++++---------
>  arch/x86/kvm/x86.c     |  4 ++++
>  3 files changed, 19 insertions(+), 10 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 3da5b2f1b4a1..e684794c6249 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -1380,7 +1380,7 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
>  		load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
>  		break;
>  	default:
> -		WARN_ON_ONCE(1);
> +		KVM_BUG_ON(1, vcpu->kvm);
>  	}
>  }
>  
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 6f9a0c6d5dc5..810d46ab0a47 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -2250,7 +2250,7 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
>  		vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & guest_owned_bits;
>  		break;
>  	default:
> -		WARN_ON_ONCE(1);
> +		KVM_BUG_ON(1, vcpu->kvm);
>  		break;
>  	}
>  }
> @@ -4960,6 +4960,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
>  			return kvm_complete_insn_gp(vcpu, err);
>  		case 3:
>  			WARN_ON_ONCE(enable_unrestricted_guest);
> +
>  			err = kvm_set_cr3(vcpu, val);
>  			return kvm_complete_insn_gp(vcpu, err);
>  		case 4:
> @@ -4985,14 +4986,13 @@ static int handle_cr(struct kvm_vcpu *vcpu)
>  		}
>  		break;
>  	case 2: /* clts */
> -		WARN_ONCE(1, "Guest should always own CR0.TS");
> -		vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
> -		trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
> -		return kvm_skip_emulated_instruction(vcpu);
> +		KVM_BUG(1, vcpu->kvm, "Guest always owns CR0.TS");
> +		return -EIO;
>  	case 1: /*mov from cr*/
>  		switch (cr) {
>  		case 3:
>  			WARN_ON_ONCE(enable_unrestricted_guest);
> +

Here, were you intended to replace WARN_ON_ONCE() with KVM_BUG_ON() or
this is just a stray newline added?

>  			val = kvm_read_cr3(vcpu);
>  			kvm_register_write(vcpu, reg, val);
>  			trace_kvm_cr_read(cr, val);
> @@ -5330,7 +5330,9 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
>  
>  static int handle_nmi_window(struct kvm_vcpu *vcpu)
>  {
> -	WARN_ON_ONCE(!enable_vnmi);
> +	if (KVM_BUG_ON(!enable_vnmi, vcpu->kvm))
> +		return -EIO;
> +
>  	exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING);
>  	++vcpu->stat.nmi_window_exits;
>  	kvm_make_request(KVM_REQ_EVENT, vcpu);
> @@ -5908,7 +5910,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
>  	 * below) should never happen as that means we incorrectly allowed a
>  	 * nested VM-Enter with an invalid vmcs12.
>  	 */
> -	WARN_ON_ONCE(vmx->nested.nested_run_pending);
> +	if (KVM_BUG_ON(vmx->nested.nested_run_pending, vcpu->kvm))
> +		return -EIO;
>  
>  	/* If guest state is invalid, start emulating */
>  	if (vmx->emulation_required)
> @@ -6258,7 +6261,9 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
>  	int max_irr;
>  	bool max_irr_updated;
>  
> -	WARN_ON(!vcpu->arch.apicv_active);
> +	if (KVM_BUG_ON(!vcpu->arch.apicv_active, vcpu->kvm))
> +		return -EIO;
> +
>  	if (pi_test_on(&vmx->pi_desc)) {
>  		pi_clear_on(&vmx->pi_desc);
>  		/*
> @@ -6345,7 +6350,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
>  	gate_desc *desc;
>  	u32 intr_info = vmx_get_intr_info(vcpu);
>  
> -	if (WARN_ONCE(!is_external_intr(intr_info),
> +	if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm,
>  	    "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
>  		return;
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 17f4995e80a7..672eb5142b34 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -8363,6 +8363,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  	bool req_immediate_exit = false;
>  
>  	if (kvm_request_pending(vcpu)) {
> +		if (kvm_check_request(KVM_REQ_VM_BUGGED, vcpu)) {

Do we want to allow userspace to continue executing the guest or should
we make KVM_REQ_VM_BUGGED permanent by replacing kvm_check_request()
with kvm_test_request()?

> +			r = -EIO;
> +			goto out;
> +		}
>  		if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) {
>  			if (unlikely(!kvm_x86_ops.nested_ops->get_vmcs12_pages(vcpu))) {
>  				r = 0;
Sean Christopherson Sept. 24, 2020, 6:11 p.m. UTC | #2
On Thu, Sep 24, 2020 at 02:34:14PM +0200, Vitaly Kuznetsov wrote:
> Sean Christopherson <sean.j.christopherson@intel.com> writes:
> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > index 6f9a0c6d5dc5..810d46ab0a47 100644
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -2250,7 +2250,7 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
> >  		vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & guest_owned_bits;
> >  		break;
> >  	default:
> > -		WARN_ON_ONCE(1);
> > +		KVM_BUG_ON(1, vcpu->kvm);
> >  		break;
> >  	}
> >  }
> > @@ -4960,6 +4960,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
> >  			return kvm_complete_insn_gp(vcpu, err);
> >  		case 3:
> >  			WARN_ON_ONCE(enable_unrestricted_guest);
> > +
> >  			err = kvm_set_cr3(vcpu, val);
> >  			return kvm_complete_insn_gp(vcpu, err);
> >  		case 4:
> > @@ -4985,14 +4986,13 @@ static int handle_cr(struct kvm_vcpu *vcpu)
> >  		}
> >  		break;
> >  	case 2: /* clts */
> > -		WARN_ONCE(1, "Guest should always own CR0.TS");
> > -		vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
> > -		trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
> > -		return kvm_skip_emulated_instruction(vcpu);
> > +		KVM_BUG(1, vcpu->kvm, "Guest always owns CR0.TS");
> > +		return -EIO;
> >  	case 1: /*mov from cr*/
> >  		switch (cr) {
> >  		case 3:
> >  			WARN_ON_ONCE(enable_unrestricted_guest);
> > +
> 
> Here, were you intended to replace WARN_ON_ONCE() with KVM_BUG_ON() or
> this is just a stray newline added?

I think it's just a stray newline.  At one point I had converted this to a
KVM_BUG_ON(), but then reversed direction because it's not fatal to the guest,
i.e. KVM should continue to function even though it's spuriously intercepting
CR3 loads.

Which, rereading this patch, completely contradicts the KVM_BUG() for CLTS.

That's probably something we should sort out in this RFC: is KVM_BUG() only
to be used if the bug is fatal/dangerous, or should it be used any time the
error is definitely a KVM (or hardware) bug.

> >  			val = kvm_read_cr3(vcpu);
> >  			kvm_register_write(vcpu, reg, val);
> >  			trace_kvm_cr_read(cr, val);
> > @@ -5330,7 +5330,9 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
> >  
> >  static int handle_nmi_window(struct kvm_vcpu *vcpu)
> >  {
> > -	WARN_ON_ONCE(!enable_vnmi);
> > +	if (KVM_BUG_ON(!enable_vnmi, vcpu->kvm))
> > +		return -EIO;
> > +
> >  	exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING);
> >  	++vcpu->stat.nmi_window_exits;
> >  	kvm_make_request(KVM_REQ_EVENT, vcpu);
> > @@ -5908,7 +5910,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
> >  	 * below) should never happen as that means we incorrectly allowed a
> >  	 * nested VM-Enter with an invalid vmcs12.
> >  	 */
> > -	WARN_ON_ONCE(vmx->nested.nested_run_pending);
> > +	if (KVM_BUG_ON(vmx->nested.nested_run_pending, vcpu->kvm))
> > +		return -EIO;
> >  
> >  	/* If guest state is invalid, start emulating */
> >  	if (vmx->emulation_required)
> > @@ -6258,7 +6261,9 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
> >  	int max_irr;
> >  	bool max_irr_updated;
> >  
> > -	WARN_ON(!vcpu->arch.apicv_active);
> > +	if (KVM_BUG_ON(!vcpu->arch.apicv_active, vcpu->kvm))
> > +		return -EIO;
> > +
> >  	if (pi_test_on(&vmx->pi_desc)) {
> >  		pi_clear_on(&vmx->pi_desc);
> >  		/*
> > @@ -6345,7 +6350,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
> >  	gate_desc *desc;
> >  	u32 intr_info = vmx_get_intr_info(vcpu);
> >  
> > -	if (WARN_ONCE(!is_external_intr(intr_info),
> > +	if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm,
> >  	    "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
> >  		return;
> >  
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 17f4995e80a7..672eb5142b34 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -8363,6 +8363,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> >  	bool req_immediate_exit = false;
> >  
> >  	if (kvm_request_pending(vcpu)) {
> > +		if (kvm_check_request(KVM_REQ_VM_BUGGED, vcpu)) {
> 
> Do we want to allow userspace to continue executing the guest or should
> we make KVM_REQ_VM_BUGGED permanent by replacing kvm_check_request()
> with kvm_test_request()?

In theory, it should be impossible to reach this again as "r = -EIO" will
bounce this out to userspace, the common checks to deny all ioctls() will
prevent reinvoking KVM_RUN.

> > +			r = -EIO;
> > +			goto out;
> > +		}
> >  		if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) {
> >  			if (unlikely(!kvm_x86_ops.nested_ops->get_vmcs12_pages(vcpu))) {
> >  				r = 0;
> 
> -- 
> Vitaly
>
Vitaly Kuznetsov Sept. 25, 2020, 9:50 a.m. UTC | #3
Sean Christopherson <sean.j.christopherson@intel.com> writes:

> On Thu, Sep 24, 2020 at 02:34:14PM +0200, Vitaly Kuznetsov wrote:
>> Sean Christopherson <sean.j.christopherson@intel.com> writes:
>> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>> > index 6f9a0c6d5dc5..810d46ab0a47 100644
>> > --- a/arch/x86/kvm/vmx/vmx.c
>> > +++ b/arch/x86/kvm/vmx/vmx.c
>> > @@ -2250,7 +2250,7 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
>> >  		vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & guest_owned_bits;
>> >  		break;
>> >  	default:
>> > -		WARN_ON_ONCE(1);
>> > +		KVM_BUG_ON(1, vcpu->kvm);
>> >  		break;
>> >  	}
>> >  }
>> > @@ -4960,6 +4960,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
>> >  			return kvm_complete_insn_gp(vcpu, err);
>> >  		case 3:
>> >  			WARN_ON_ONCE(enable_unrestricted_guest);
>> > +
>> >  			err = kvm_set_cr3(vcpu, val);
>> >  			return kvm_complete_insn_gp(vcpu, err);
>> >  		case 4:
>> > @@ -4985,14 +4986,13 @@ static int handle_cr(struct kvm_vcpu *vcpu)
>> >  		}
>> >  		break;
>> >  	case 2: /* clts */
>> > -		WARN_ONCE(1, "Guest should always own CR0.TS");
>> > -		vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
>> > -		trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
>> > -		return kvm_skip_emulated_instruction(vcpu);
>> > +		KVM_BUG(1, vcpu->kvm, "Guest always owns CR0.TS");
>> > +		return -EIO;
>> >  	case 1: /*mov from cr*/
>> >  		switch (cr) {
>> >  		case 3:
>> >  			WARN_ON_ONCE(enable_unrestricted_guest);
>> > +
>> 
>> Here, were you intended to replace WARN_ON_ONCE() with KVM_BUG_ON() or
>> this is just a stray newline added?
>
> I think it's just a stray newline.  At one point I had converted this to a
> KVM_BUG_ON(), but then reversed direction because it's not fatal to the guest,
> i.e. KVM should continue to function even though it's spuriously intercepting
> CR3 loads.
>
> Which, rereading this patch, completely contradicts the KVM_BUG() for CLTS.
>
> That's probably something we should sort out in this RFC: is KVM_BUG() only
> to be used if the bug is fatal/dangerous, or should it be used any time the
> error is definitely a KVM (or hardware) bug.

Personally, I'm feeling adventurous so my vote goes to the later :-)
Whenever a KVM bug was discovered by a VM it's much safer to stop
executing it as who knows what the implications might be?

In this particular case I can think of a nested scenario when L1 didn't
ask for CR3 intercept but L0 is still injecting it. It is not fatal by
itself but probably there is bug in calculating intercepts in L0 so
if we're getting something extra maybe we're also missing some? And this
doesn't sound good at all.

>
>> >  			val = kvm_read_cr3(vcpu);
>> >  			kvm_register_write(vcpu, reg, val);
>> >  			trace_kvm_cr_read(cr, val);
>> > @@ -5330,7 +5330,9 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
>> >  
>> >  static int handle_nmi_window(struct kvm_vcpu *vcpu)
>> >  {
>> > -	WARN_ON_ONCE(!enable_vnmi);
>> > +	if (KVM_BUG_ON(!enable_vnmi, vcpu->kvm))
>> > +		return -EIO;
>> > +
>> >  	exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING);
>> >  	++vcpu->stat.nmi_window_exits;
>> >  	kvm_make_request(KVM_REQ_EVENT, vcpu);
>> > @@ -5908,7 +5910,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
>> >  	 * below) should never happen as that means we incorrectly allowed a
>> >  	 * nested VM-Enter with an invalid vmcs12.
>> >  	 */
>> > -	WARN_ON_ONCE(vmx->nested.nested_run_pending);
>> > +	if (KVM_BUG_ON(vmx->nested.nested_run_pending, vcpu->kvm))
>> > +		return -EIO;
>> >  
>> >  	/* If guest state is invalid, start emulating */
>> >  	if (vmx->emulation_required)
>> > @@ -6258,7 +6261,9 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
>> >  	int max_irr;
>> >  	bool max_irr_updated;
>> >  
>> > -	WARN_ON(!vcpu->arch.apicv_active);
>> > +	if (KVM_BUG_ON(!vcpu->arch.apicv_active, vcpu->kvm))
>> > +		return -EIO;
>> > +
>> >  	if (pi_test_on(&vmx->pi_desc)) {
>> >  		pi_clear_on(&vmx->pi_desc);
>> >  		/*
>> > @@ -6345,7 +6350,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
>> >  	gate_desc *desc;
>> >  	u32 intr_info = vmx_get_intr_info(vcpu);
>> >  
>> > -	if (WARN_ONCE(!is_external_intr(intr_info),
>> > +	if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm,
>> >  	    "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
>> >  		return;
>> >  
>> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> > index 17f4995e80a7..672eb5142b34 100644
>> > --- a/arch/x86/kvm/x86.c
>> > +++ b/arch/x86/kvm/x86.c
>> > @@ -8363,6 +8363,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>> >  	bool req_immediate_exit = false;
>> >  
>> >  	if (kvm_request_pending(vcpu)) {
>> > +		if (kvm_check_request(KVM_REQ_VM_BUGGED, vcpu)) {
>> 
>> Do we want to allow userspace to continue executing the guest or should
>> we make KVM_REQ_VM_BUGGED permanent by replacing kvm_check_request()
>> with kvm_test_request()?
>
> In theory, it should be impossible to reach this again as "r = -EIO" will
> bounce this out to userspace, the common checks to deny all ioctls() will
> prevent reinvoking KVM_RUN.

Do we actually want to prevent *all* ioctls? E.g. when 'vm bugged'
condition is triggered userspace may want to extract some information to
assist debugging but even things like KVM_GET_[S]REGS will just return
-EIO. I'm not sure it is generally safe to enable *everything* (except
for KVM_RUN which should definitely be forbidden) so maybe your approach
is preferable.

>
>> > +			r = -EIO;
>> > +			goto out;
>> > +		}
>> >  		if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) {
>> >  			if (unlikely(!kvm_x86_ops.nested_ops->get_vmcs12_pages(vcpu))) {
>> >  				r = 0;
>> 
>> -- 
>> Vitaly
>> 
>
Sean Christopherson Sept. 25, 2020, 5:12 p.m. UTC | #4
On Fri, Sep 25, 2020 at 11:50:38AM +0200, Vitaly Kuznetsov wrote:
> Sean Christopherson <sean.j.christopherson@intel.com> writes:
> 
> > On Thu, Sep 24, 2020 at 02:34:14PM +0200, Vitaly Kuznetsov wrote:
> >> Sean Christopherson <sean.j.christopherson@intel.com> writes:
> >> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> >> > index 6f9a0c6d5dc5..810d46ab0a47 100644
> >> > --- a/arch/x86/kvm/vmx/vmx.c
> >> > +++ b/arch/x86/kvm/vmx/vmx.c
> >> > @@ -4985,14 +4986,13 @@ static int handle_cr(struct kvm_vcpu *vcpu)
> >> >  		}
> >> >  		break;
> >> >  	case 2: /* clts */
> >> > -		WARN_ONCE(1, "Guest should always own CR0.TS");
> >> > -		vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
> >> > -		trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
> >> > -		return kvm_skip_emulated_instruction(vcpu);
> >> > +		KVM_BUG(1, vcpu->kvm, "Guest always owns CR0.TS");
> >> > +		return -EIO;
> >> >  	case 1: /*mov from cr*/
> >> >  		switch (cr) {
> >> >  		case 3:
> >> >  			WARN_ON_ONCE(enable_unrestricted_guest);
> >> > +
> >> 
> >> Here, were you intended to replace WARN_ON_ONCE() with KVM_BUG_ON() or
> >> this is just a stray newline added?
> >
> > I think it's just a stray newline.  At one point I had converted this to a
> > KVM_BUG_ON(), but then reversed direction because it's not fatal to the guest,
> > i.e. KVM should continue to function even though it's spuriously intercepting
> > CR3 loads.
> >
> > Which, rereading this patch, completely contradicts the KVM_BUG() for CLTS.
> >
> > That's probably something we should sort out in this RFC: is KVM_BUG() only
> > to be used if the bug is fatal/dangerous, or should it be used any time the
> > error is definitely a KVM (or hardware) bug.
> 
> Personally, I'm feeling adventurous so my vote goes to the later :-)
> Whenever a KVM bug was discovered by a VM it's much safer to stop
> executing it as who knows what the implications might be?

Not necessarily, e.g. terminating the VM may corrupt the VM's file system,
which is less safe, for lack of a better word, from the VM's perspective.

> In this particular case I can think of a nested scenario when L1 didn't
> ask for CR3 intercept but L0 is still injecting it. It is not fatal by
> itself but probably there is bug in calculating intercepts in L0 so
> if we're getting something extra maybe we're also missing some? And this
> doesn't sound good at all.

Hmm, but by that argument this scenario would fall into the "dangerous" part
of "bug is fatal/dangerous".  I guess my opinion is that we should set a
fairly high bar for using KVM_BUG() so that KVM can be aggressive in shutting
down.

> > In theory, it should be impossible to reach this again as "r = -EIO" will
> > bounce this out to userspace, the common checks to deny all ioctls() will
> > prevent reinvoking KVM_RUN.
> 
> Do we actually want to prevent *all* ioctls? E.g. when 'vm bugged'
> condition is triggered userspace may want to extract some information to
> assist debugging but even things like KVM_GET_[S]REGS will just return
> -EIO. I'm not sure it is generally safe to enable *everything* (except
> for KVM_RUN which should definitely be forbidden) so maybe your approach
> is preferable.

The answer to this probably depends on the answer to the first question of
when it's appropriate to use KVM_BUG().  E.g. if we limit usage to fatal or
dangrous cases, then blocking all ioctls() is probably the right thing do do.
Paolo Bonzini Sept. 25, 2020, 9:06 p.m. UTC | #5
On 25/09/20 19:12, Sean Christopherson wrote:
>> Do we actually want to prevent *all* ioctls? E.g. when 'vm bugged'
>> condition is triggered userspace may want to extract some information to
>> assist debugging but even things like KVM_GET_[S]REGS will just return
>> -EIO. I'm not sure it is generally safe to enable *everything* (except
>> for KVM_RUN which should definitely be forbidden) so maybe your approach
>> is preferable.
>
> The answer to this probably depends on the answer to the first question of
> when it's appropriate to use KVM_BUG().  E.g. if we limit usage to fatal or
> dangrous cases, then blocking all ioctls() is probably the right thing do do.

I think usage should be limited to dangerous cases, basically WARN_ON
level.  However I agree with Vitaly that KVM_GET_* should be allowed.

The other question is whether to return -EIO or KVM_EXIT_INTERNAL_ERROR.
 The latter is more likely to be handled already by userspace.

Paolo
Sean Christopherson Sept. 29, 2020, 3:52 a.m. UTC | #6
On Fri, Sep 25, 2020 at 11:06:10PM +0200, Paolo Bonzini wrote:
> On 25/09/20 19:12, Sean Christopherson wrote:
> >> Do we actually want to prevent *all* ioctls? E.g. when 'vm bugged'
> >> condition is triggered userspace may want to extract some information to
> >> assist debugging but even things like KVM_GET_[S]REGS will just return
> >> -EIO. I'm not sure it is generally safe to enable *everything* (except
> >> for KVM_RUN which should definitely be forbidden) so maybe your approach
> >> is preferable.
> >
> > The answer to this probably depends on the answer to the first question of
> > when it's appropriate to use KVM_BUG().  E.g. if we limit usage to fatal or
> > dangrous cases, then blocking all ioctls() is probably the right thing do do.
> 
> I think usage should be limited to dangerous cases, basically WARN_ON
> level.  However I agree with Vitaly that KVM_GET_* should be allowed.

Makes sense.

On the topic of feedback from Vitaly, while dredging through my mailbox I
rediscovered his suggestion of kvm->kvm_internal_bug (or maybe just
kvm->internal_bug) instead of kvm->vm_bugged[*].  Like past me, I like the
"internal" variants better.

[*] https://lkml.kernel.org/r/20190930153358.GD14693@linux.intel.com

> The other question is whether to return -EIO or KVM_EXIT_INTERNAL_ERROR.
>  The latter is more likely to be handled already by userspace.

And probably less confusing for unsuspecting users.  E.g. -EIO is most
likely to be interpreted as "I screwed up", whereas KVM_EXIT_INTERNAL_ERROR
will correctly be read as "KVM screwed up".
Paolo Bonzini Sept. 29, 2020, 9:15 a.m. UTC | #7
On 29/09/20 05:52, Sean Christopherson wrote:
>> I think usage should be limited to dangerous cases, basically WARN_ON
>> level.  However I agree with Vitaly that KVM_GET_* should be allowed.
>
> On the topic of feedback from Vitaly, while dredging through my mailbox I
> rediscovered his suggestion of kvm->kvm_internal_bug (or maybe just
> kvm->internal_bug) instead of kvm->vm_bugged[*].

Also agrees with KVM_EXIT_INTERNAL_ERROR.

>> The other question is whether to return -EIO or KVM_EXIT_INTERNAL_ERROR.
>>  The latter is more likely to be handled already by userspace.
>
> And probably less confusing for unsuspecting users.  E.g. -EIO is most
> likely to be interpreted as "I screwed up", whereas KVM_EXIT_INTERNAL_ERROR
> will correctly be read as "KVM screwed up".

All good points, seems like you have enough review material for the
non-RFC version.

Paolo
diff mbox series

Patch

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 3da5b2f1b4a1..e684794c6249 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1380,7 +1380,7 @@  static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
 		load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
 		break;
 	default:
-		WARN_ON_ONCE(1);
+		KVM_BUG_ON(1, vcpu->kvm);
 	}
 }
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 6f9a0c6d5dc5..810d46ab0a47 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2250,7 +2250,7 @@  static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
 		vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & guest_owned_bits;
 		break;
 	default:
-		WARN_ON_ONCE(1);
+		KVM_BUG_ON(1, vcpu->kvm);
 		break;
 	}
 }
@@ -4960,6 +4960,7 @@  static int handle_cr(struct kvm_vcpu *vcpu)
 			return kvm_complete_insn_gp(vcpu, err);
 		case 3:
 			WARN_ON_ONCE(enable_unrestricted_guest);
+
 			err = kvm_set_cr3(vcpu, val);
 			return kvm_complete_insn_gp(vcpu, err);
 		case 4:
@@ -4985,14 +4986,13 @@  static int handle_cr(struct kvm_vcpu *vcpu)
 		}
 		break;
 	case 2: /* clts */
-		WARN_ONCE(1, "Guest should always own CR0.TS");
-		vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
-		trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
-		return kvm_skip_emulated_instruction(vcpu);
+		KVM_BUG(1, vcpu->kvm, "Guest always owns CR0.TS");
+		return -EIO;
 	case 1: /*mov from cr*/
 		switch (cr) {
 		case 3:
 			WARN_ON_ONCE(enable_unrestricted_guest);
+
 			val = kvm_read_cr3(vcpu);
 			kvm_register_write(vcpu, reg, val);
 			trace_kvm_cr_read(cr, val);
@@ -5330,7 +5330,9 @@  static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
 
 static int handle_nmi_window(struct kvm_vcpu *vcpu)
 {
-	WARN_ON_ONCE(!enable_vnmi);
+	if (KVM_BUG_ON(!enable_vnmi, vcpu->kvm))
+		return -EIO;
+
 	exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING);
 	++vcpu->stat.nmi_window_exits;
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
@@ -5908,7 +5910,8 @@  static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
 	 * below) should never happen as that means we incorrectly allowed a
 	 * nested VM-Enter with an invalid vmcs12.
 	 */
-	WARN_ON_ONCE(vmx->nested.nested_run_pending);
+	if (KVM_BUG_ON(vmx->nested.nested_run_pending, vcpu->kvm))
+		return -EIO;
 
 	/* If guest state is invalid, start emulating */
 	if (vmx->emulation_required)
@@ -6258,7 +6261,9 @@  static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
 	int max_irr;
 	bool max_irr_updated;
 
-	WARN_ON(!vcpu->arch.apicv_active);
+	if (KVM_BUG_ON(!vcpu->arch.apicv_active, vcpu->kvm))
+		return -EIO;
+
 	if (pi_test_on(&vmx->pi_desc)) {
 		pi_clear_on(&vmx->pi_desc);
 		/*
@@ -6345,7 +6350,7 @@  static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
 	gate_desc *desc;
 	u32 intr_info = vmx_get_intr_info(vcpu);
 
-	if (WARN_ONCE(!is_external_intr(intr_info),
+	if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm,
 	    "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
 		return;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 17f4995e80a7..672eb5142b34 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8363,6 +8363,10 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	bool req_immediate_exit = false;
 
 	if (kvm_request_pending(vcpu)) {
+		if (kvm_check_request(KVM_REQ_VM_BUGGED, vcpu)) {
+			r = -EIO;
+			goto out;
+		}
 		if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) {
 			if (unlikely(!kvm_x86_ops.nested_ops->get_vmcs12_pages(vcpu))) {
 				r = 0;