[v4,2/7] x86: kvm: svm: propagate errors from skip_emulated_instruction()
diff mbox series

Message ID 20190813135335.25197-3-vkuznets@redhat.com
State New
Headers show
Series
  • x86: KVM: svm: get rid of hardcoded instructions lengths
Related show

Commit Message

Vitaly Kuznetsov Aug. 13, 2019, 1:53 p.m. UTC
On AMD, kvm_x86_ops->skip_emulated_instruction(vcpu) can, in theory,
fail: in !nrips case we call kvm_emulate_instruction(EMULTYPE_SKIP).
Currently, we only do printk(KERN_DEBUG) when this happens and this
is not ideal. Propagate the error up the stack.

On VMX, skip_emulated_instruction() doesn't fail, we have two call
sites calling it explicitly: handle_exception_nmi() and
handle_task_switch(), we can just ignore the result.

On SVM, we also have two explicit call sites:
svm_queue_exception() and it seems we don't need to do anything there as
we check if RIP was advanced or not. In task_switch_interception(),
however, we are better off not proceeding to kvm_task_switch() in case
skip_emulated_instruction() failed.

Suggested-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/svm.c              | 36 ++++++++++++++++++---------------
 arch/x86/kvm/vmx/vmx.c          | 16 ++++++++++++---
 arch/x86/kvm/x86.c              |  6 ++++--
 4 files changed, 38 insertions(+), 22 deletions(-)

Comments

Sean Christopherson Aug. 13, 2019, 6:07 p.m. UTC | #1
On Tue, Aug 13, 2019 at 03:53:30PM +0200, Vitaly Kuznetsov wrote:
> @@ -3899,20 +3898,25 @@ static int task_switch_interception(struct vcpu_svm *svm)
>  	if (reason != TASK_SWITCH_GATE ||
>  	    int_type == SVM_EXITINTINFO_TYPE_SOFT ||
>  	    (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
> -	     (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
> -		skip_emulated_instruction(&svm->vcpu);
> +	     (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) {
> +		if (skip_emulated_instruction(&svm->vcpu) != EMULATE_DONE)

This isn't broken in the current code, but it's flawed in the sense that
it assumes skip_emulated_instruction() never returns EMULATE_USER_EXIT.

Note, both SVM and VMX make the opposite assumption when handling
kvm_task_switch() and kvm_inject_realmode_interrupt().

More below...

> +			goto fail;
> +	}
>  
>  	if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
>  		int_vec = -1;
>  
>  	if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
> -				has_error_code, error_code) == EMULATE_FAIL) {
> -		svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
> -		svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
> -		svm->vcpu.run->internal.ndata = 0;
> -		return 0;
> -	}
> +				has_error_code, error_code) == EMULATE_FAIL)
> +		goto fail;
> +
>  	return 1;
> +
> +fail:
> +	svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
> +	svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
> +	svm->vcpu.run->internal.ndata = 0;
> +	return 0;
>  }
>  
>  static int cpuid_interception(struct vcpu_svm *svm)

...

> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index c6d951cbd76c..e8f797fe9d9e 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6383,9 +6383,11 @@ static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
>  int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
>  {
>  	unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
> -	int r = EMULATE_DONE;
> +	int r;
>  
> -	kvm_x86_ops->skip_emulated_instruction(vcpu);
> +	r = kvm_x86_ops->skip_emulated_instruction(vcpu);
> +	if (unlikely(r != EMULATE_DONE))
> +		return 0;

x86_emulate_instruction() doesn't set vcpu->run->exit_reason when emulation
fails with EMULTYPE_SKIP, i.e. this will exit to userspace with garbage in
the exit_reason.

handle_ept_misconfig() also has the same (pre-existing) flaw.

Given the handle_ept_misconfig() bug and that kvm_emulate_instruction()
sets vcpu->run->exit_reason when it returns EMULATE_FAIL in the normal
case, I think it makes sense to fix the issue in x86_emulate_instruction().
That would also eliminate the need to worry about EMULATE_USER_EXIT in
task_switch_interception(), e.g. the SVM code can just return 0 when it
gets a non-EMULATE_DONE return type.

E.g.:

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 07ab14d73094..73b86f81ed9c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6201,7 +6201,8 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
        if (emulation_type & EMULTYPE_NO_UD_ON_FAIL)
                return EMULATE_FAIL;

-       if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
+       if ((!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) ||
+           (emulation_type & EMULTYPE_SKIP)) {
                vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
                vcpu->run->internal.ndata = 0;
@@ -6525,8 +6526,6 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
                                return EMULATE_DONE;
                        if (ctxt->have_exception && inject_emulated_exception(vcpu))
                                return EMULATE_DONE;
-                       if (emulation_type & EMULTYPE_SKIP)
-                               return EMULATE_FAIL;
                        return handle_emulation_failure(vcpu, emulation_type);
                }
        }


As for the kvm_task_switch() handling and other cases, I think it's
possible to rework all of the functions and callers that return/handle
EMULATE_DONE to instead return 0/1, i.e. contain EMULATE_* to x86.c.
I'll put together a series, I think you've suffered more than enough scope
creep as it is :-)

>  
>  	/*
>  	 * rflags is the old, "raw" value of the flags.  The new value has
> -- 
> 2.20.1
>
Vitaly Kuznetsov Aug. 14, 2019, 9:34 a.m. UTC | #2
Sean Christopherson <sean.j.christopherson@intel.com> writes:

> On Tue, Aug 13, 2019 at 03:53:30PM +0200, Vitaly Kuznetsov wrote:
>> @@ -3899,20 +3898,25 @@ static int task_switch_interception(struct vcpu_svm *svm)
>>  	if (reason != TASK_SWITCH_GATE ||
>>  	    int_type == SVM_EXITINTINFO_TYPE_SOFT ||
>>  	    (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
>> -	     (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
>> -		skip_emulated_instruction(&svm->vcpu);
>> +	     (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) {
>> +		if (skip_emulated_instruction(&svm->vcpu) != EMULATE_DONE)
>
> This isn't broken in the current code, but it's flawed in the sense that
> it assumes skip_emulated_instruction() never returns EMULATE_USER_EXIT.
>
> Note, both SVM and VMX make the opposite assumption when handling
> kvm_task_switch() and kvm_inject_realmode_interrupt().
>
> More below...
>
>> +			goto fail;
>> +	}
>>  
>>  	if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
>>  		int_vec = -1;
>>  
>>  	if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
>> -				has_error_code, error_code) == EMULATE_FAIL) {
>> -		svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
>> -		svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
>> -		svm->vcpu.run->internal.ndata = 0;
>> -		return 0;
>> -	}
>> +				has_error_code, error_code) == EMULATE_FAIL)
>> +		goto fail;
>> +
>>  	return 1;
>> +
>> +fail:
>> +	svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
>> +	svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
>> +	svm->vcpu.run->internal.ndata = 0;
>> +	return 0;
>>  }
>>  
>>  static int cpuid_interception(struct vcpu_svm *svm)
>
> ...
>
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index c6d951cbd76c..e8f797fe9d9e 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -6383,9 +6383,11 @@ static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
>>  int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
>>  {
>>  	unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
>> -	int r = EMULATE_DONE;
>> +	int r;
>>  
>> -	kvm_x86_ops->skip_emulated_instruction(vcpu);
>> +	r = kvm_x86_ops->skip_emulated_instruction(vcpu);
>> +	if (unlikely(r != EMULATE_DONE))
>> +		return 0;
>
> x86_emulate_instruction() doesn't set vcpu->run->exit_reason when emulation
> fails with EMULTYPE_SKIP, i.e. this will exit to userspace with garbage in
> the exit_reason.

Oh, nice catch, will take a look!

>
> handle_ept_misconfig() also has the same (pre-existing) flaw.
>
> Given the handle_ept_misconfig() bug and that kvm_emulate_instruction()
> sets vcpu->run->exit_reason when it returns EMULATE_FAIL in the normal
> case, I think it makes sense to fix the issue in x86_emulate_instruction().
> That would also eliminate the need to worry about EMULATE_USER_EXIT in
> task_switch_interception(), e.g. the SVM code can just return 0 when it
> gets a non-EMULATE_DONE return type.
>
> E.g.:
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 07ab14d73094..73b86f81ed9c 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6201,7 +6201,8 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
>         if (emulation_type & EMULTYPE_NO_UD_ON_FAIL)
>                 return EMULATE_FAIL;
>
> -       if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
> +       if ((!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) ||
> +           (emulation_type & EMULTYPE_SKIP)) {
>                 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
>                 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
>                 vcpu->run->internal.ndata = 0;
> @@ -6525,8 +6526,6 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
>                                 return EMULATE_DONE;
>                         if (ctxt->have_exception && inject_emulated_exception(vcpu))
>                                 return EMULATE_DONE;
> -                       if (emulation_type & EMULTYPE_SKIP)
> -                               return EMULATE_FAIL;
>                         return handle_emulation_failure(vcpu, emulation_type);
>                 }
>         }
>
>
> As for the kvm_task_switch() handling and other cases, I think it's
> possible to rework all of the functions and callers that return/handle
> EMULATE_DONE to instead return 0/1, i.e. contain EMULATE_* to x86.c.
> I'll put together a series, I think you've suffered more than enough scope
> creep as it is :-)

No problem at all, you seem to have a lot of great ideas on how to
improve things :-) Thanks for your review!
Sean Christopherson Aug. 15, 2019, 12:19 a.m. UTC | #3
On Wed, Aug 14, 2019 at 11:34:52AM +0200, Vitaly Kuznetsov wrote:
> Sean Christopherson <sean.j.christopherson@intel.com> writes:
>
> > x86_emulate_instruction() doesn't set vcpu->run->exit_reason when emulation
> > fails with EMULTYPE_SKIP, i.e. this will exit to userspace with garbage in
> > the exit_reason.
> 
> Oh, nice catch, will take a look!

Don't worry about addressing this.  Paolo has already queued the series,
and I've got a patch set waiting that purges emulation_result entirely
that I'll post once your series hits kvm/queue.
Vitaly Kuznetsov Aug. 15, 2019, 9:24 a.m. UTC | #4
Sean Christopherson <sean.j.christopherson@intel.com> writes:

> On Wed, Aug 14, 2019 at 11:34:52AM +0200, Vitaly Kuznetsov wrote:
>> Sean Christopherson <sean.j.christopherson@intel.com> writes:
>>
>> > x86_emulate_instruction() doesn't set vcpu->run->exit_reason when emulation
>> > fails with EMULTYPE_SKIP, i.e. this will exit to userspace with garbage in
>> > the exit_reason.
>> 
>> Oh, nice catch, will take a look!
>
> Don't worry about addressing this.  Paolo has already queued the series,
> and I've got a patch set waiting that purges emulation_result entirely
> that I'll post once your series hits kvm/queue.

Sometimes being slow and lazy pays off :-)

Thanks a lot!

Patch
diff mbox series

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7b0a4ee77313..f9e6d0b0f581 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1068,7 +1068,7 @@  struct kvm_x86_ops {
 
 	void (*run)(struct kvm_vcpu *vcpu);
 	int (*handle_exit)(struct kvm_vcpu *vcpu);
-	void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
+	int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
 	void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
 	u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
 	void (*patch_hypercall)(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7e843b340490..8299b0de06e2 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -770,7 +770,7 @@  static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
 
 }
 
-static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
+static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
@@ -779,18 +779,17 @@  static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 		svm->next_rip = svm->vmcb->control.next_rip;
 	}
 
-	if (!svm->next_rip) {
-		if (kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) !=
-				EMULATE_DONE)
-			printk(KERN_DEBUG "%s: NOP\n", __func__);
-		return;
-	}
+	if (!svm->next_rip)
+		return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP);
+
 	if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
 		printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n",
 		       __func__, kvm_rip_read(vcpu), svm->next_rip);
 
 	kvm_rip_write(vcpu, svm->next_rip);
 	svm_set_interrupt_shadow(vcpu, 0);
+
+	return EMULATE_DONE;
 }
 
 static void svm_queue_exception(struct kvm_vcpu *vcpu)
@@ -821,7 +820,7 @@  static void svm_queue_exception(struct kvm_vcpu *vcpu)
 		 * raises a fault that is not intercepted. Still better than
 		 * failing in all cases.
 		 */
-		skip_emulated_instruction(&svm->vcpu);
+		(void)skip_emulated_instruction(&svm->vcpu);
 		rip = kvm_rip_read(&svm->vcpu);
 		svm->int3_rip = rip + svm->vmcb->save.cs.base;
 		svm->int3_injected = rip - old_rip;
@@ -3899,20 +3898,25 @@  static int task_switch_interception(struct vcpu_svm *svm)
 	if (reason != TASK_SWITCH_GATE ||
 	    int_type == SVM_EXITINTINFO_TYPE_SOFT ||
 	    (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
-	     (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
-		skip_emulated_instruction(&svm->vcpu);
+	     (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) {
+		if (skip_emulated_instruction(&svm->vcpu) != EMULATE_DONE)
+			goto fail;
+	}
 
 	if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
 		int_vec = -1;
 
 	if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
-				has_error_code, error_code) == EMULATE_FAIL) {
-		svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-		svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
-		svm->vcpu.run->internal.ndata = 0;
-		return 0;
-	}
+				has_error_code, error_code) == EMULATE_FAIL)
+		goto fail;
+
 	return 1;
+
+fail:
+	svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+	svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+	svm->vcpu.run->internal.ndata = 0;
+	return 0;
 }
 
 static int cpuid_interception(struct vcpu_svm *svm)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 074385c86c09..358827b5bc44 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1472,8 +1472,11 @@  static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
 	return 0;
 }
 
-
-static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
+/*
+ * Returns an int to be compatible with SVM implementation (which can fail).
+ * Do not use directly, use skip_emulated_instruction() instead.
+ */
+static int __skip_emulated_instruction(struct kvm_vcpu *vcpu)
 {
 	unsigned long rip;
 
@@ -1483,6 +1486,13 @@  static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 
 	/* skipping an emulated instruction also counts */
 	vmx_set_interrupt_shadow(vcpu, 0);
+
+	return EMULATE_DONE;
+}
+
+static inline void skip_emulated_instruction(struct kvm_vcpu *vcpu)
+{
+	(void)__skip_emulated_instruction(vcpu);
 }
 
 static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
@@ -7700,7 +7710,7 @@  static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 
 	.run = vmx_vcpu_run,
 	.handle_exit = vmx_handle_exit,
-	.skip_emulated_instruction = skip_emulated_instruction,
+	.skip_emulated_instruction = __skip_emulated_instruction,
 	.set_interrupt_shadow = vmx_set_interrupt_shadow,
 	.get_interrupt_shadow = vmx_get_interrupt_shadow,
 	.patch_hypercall = vmx_patch_hypercall,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c6d951cbd76c..e8f797fe9d9e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6383,9 +6383,11 @@  static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
 int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
 {
 	unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
-	int r = EMULATE_DONE;
+	int r;
 
-	kvm_x86_ops->skip_emulated_instruction(vcpu);
+	r = kvm_x86_ops->skip_emulated_instruction(vcpu);
+	if (unlikely(r != EMULATE_DONE))
+		return 0;
 
 	/*
 	 * rflags is the old, "raw" value of the flags.  The new value has