diff mbox series

[v2,2/4] KVM: SVM: Add emulation support for #GP triggered by SVM instructions

Message ID 20210121065508.1169585-3-wei.huang2@amd.com (mailing list archive)
State New, archived
Headers show
Series Handle #GP for SVM execution instructions | expand

Commit Message

Wei Huang Jan. 21, 2021, 6:55 a.m. UTC
From: Bandan Das <bsd@redhat.com>

While running SVM related instructions (VMRUN/VMSAVE/VMLOAD), some AMD
CPUs check EAX against reserved memory regions (e.g. SMM memory on host)
before checking VMCB's instruction intercept. If EAX falls into such
memory areas, #GP is triggered before VMEXIT. This causes problem under
nested virtualization. To solve this problem, KVM needs to trap #GP and
check the instructions triggering #GP. For VM execution instructions,
KVM emulates these instructions.

Co-developed-by: Wei Huang <wei.huang2@amd.com>
Signed-off-by: Wei Huang <wei.huang2@amd.com>
Signed-off-by: Bandan Das <bsd@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 99 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 81 insertions(+), 18 deletions(-)

Comments

Maxim Levitsky Jan. 21, 2021, 2:07 p.m. UTC | #1
On Thu, 2021-01-21 at 01:55 -0500, Wei Huang wrote:
> From: Bandan Das <bsd@redhat.com>
> 
> While running SVM related instructions (VMRUN/VMSAVE/VMLOAD), some AMD
> CPUs check EAX against reserved memory regions (e.g. SMM memory on host)
> before checking VMCB's instruction intercept. If EAX falls into such
> memory areas, #GP is triggered before VMEXIT. This causes problem under
> nested virtualization. To solve this problem, KVM needs to trap #GP and
> check the instructions triggering #GP. For VM execution instructions,
> KVM emulates these instructions.
> 
> Co-developed-by: Wei Huang <wei.huang2@amd.com>
> Signed-off-by: Wei Huang <wei.huang2@amd.com>
> Signed-off-by: Bandan Das <bsd@redhat.com>
> ---
>  arch/x86/kvm/svm/svm.c | 99 ++++++++++++++++++++++++++++++++++--------
>  1 file changed, 81 insertions(+), 18 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 7ef171790d02..6ed523cab068 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -288,6 +288,9 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
>  		if (!(efer & EFER_SVME)) {
>  			svm_leave_nested(svm);
>  			svm_set_gif(svm, true);
> +			/* #GP intercept is still needed in vmware_backdoor */
> +			if (!enable_vmware_backdoor)
> +				clr_exception_intercept(svm, GP_VECTOR);
Again I would prefer a flag for the errata workaround, but this is still
better.

>  
>  			/*
>  			 * Free the nested guest state, unless we are in SMM.
> @@ -309,6 +312,9 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
>  
>  	svm->vmcb->save.efer = efer | EFER_SVME;
>  	vmcb_mark_dirty(svm->vmcb, VMCB_CR);
> +	/* Enable #GP interception for SVM instructions */
> +	set_exception_intercept(svm, GP_VECTOR);
> +
>  	return 0;
>  }
>  
> @@ -1957,24 +1963,6 @@ static int ac_interception(struct vcpu_svm *svm)
>  	return 1;
>  }
>  
> -static int gp_interception(struct vcpu_svm *svm)
> -{
> -	struct kvm_vcpu *vcpu = &svm->vcpu;
> -	u32 error_code = svm->vmcb->control.exit_info_1;
> -
> -	WARN_ON_ONCE(!enable_vmware_backdoor);
> -
> -	/*
> -	 * VMware backdoor emulation on #GP interception only handles IN{S},
> -	 * OUT{S}, and RDPMC, none of which generate a non-zero error code.
> -	 */
> -	if (error_code) {
> -		kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
> -		return 1;
> -	}
> -	return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP);
> -}
> -
>  static bool is_erratum_383(void)
>  {
>  	int err, i;
> @@ -2173,6 +2161,81 @@ static int vmrun_interception(struct vcpu_svm *svm)
>  	return nested_svm_vmrun(svm);
>  }
>  
> +enum {
> +	NOT_SVM_INSTR,
> +	SVM_INSTR_VMRUN,
> +	SVM_INSTR_VMLOAD,
> +	SVM_INSTR_VMSAVE,
> +};
> +
> +/* Return NOT_SVM_INSTR if not SVM instrs, otherwise return decode result */
> +static int svm_instr_opcode(struct kvm_vcpu *vcpu)
> +{
> +	struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
> +
> +	if (ctxt->b != 0x1 || ctxt->opcode_len != 2)
> +		return NOT_SVM_INSTR;
> +
> +	switch (ctxt->modrm) {
> +	case 0xd8: /* VMRUN */
> +		return SVM_INSTR_VMRUN;
> +	case 0xda: /* VMLOAD */
> +		return SVM_INSTR_VMLOAD;
> +	case 0xdb: /* VMSAVE */
> +		return SVM_INSTR_VMSAVE;
> +	default:
> +		break;
> +	}
> +
> +	return NOT_SVM_INSTR;
> +}
> +
> +static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode)
> +{
> +	int (*const svm_instr_handlers[])(struct vcpu_svm *svm) = {
> +		[SVM_INSTR_VMRUN] = vmrun_interception,
> +		[SVM_INSTR_VMLOAD] = vmload_interception,
> +		[SVM_INSTR_VMSAVE] = vmsave_interception,
> +	};
> +	struct vcpu_svm *svm = to_svm(vcpu);
> +
> +	return svm_instr_handlers[opcode](svm);
> +}
> +
> +/*
> + * #GP handling code. Note that #GP can be triggered under the following two
> + * cases:
> + *   1) SVM VM-related instructions (VMRUN/VMSAVE/VMLOAD) that trigger #GP on
> + *      some AMD CPUs when EAX of these instructions are in the reserved memory
> + *      regions (e.g. SMM memory on host).
> + *   2) VMware backdoor
> + */
> +static int gp_interception(struct vcpu_svm *svm)
> +{
> +	struct kvm_vcpu *vcpu = &svm->vcpu;
> +	u32 error_code = svm->vmcb->control.exit_info_1;
> +	int opcode;
> +
> +	/* Both #GP cases have zero error_code */

I would have kept the original description of possible #GP reasons
for the VMWARE backdoor and that WARN_ON_ONCE that was removed.


> +	if (error_code)
> +		goto reinject;
> +
> +	/* Decode the instruction for usage later */
> +	if (x86_emulate_decoded_instruction(vcpu, 0, NULL, 0) != EMULATION_OK)
> +		goto reinject;
> +
> +	opcode = svm_instr_opcode(vcpu);
> +	if (opcode)

I prefer opcode != NOT_SVM_INSTR.

> +		return emulate_svm_instr(vcpu, opcode);
> +	else

'WARN_ON_ONCE(!enable_vmware_backdoor)' I think can be placed here.


> +		return kvm_emulate_instruction(vcpu,
> +				EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE);

I tested the vmware backdoor a bit (using the kvm unit tests) and I found out a tiny pre-existing bug
there:

We shouldn't emulate the vmware backdoor for a nested guest, but rather let it do it.

The below patch (on top of your patches) works for me and allows the vmware backdoor 
test to pass when kvm unit tests run in a guest.

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index fe97b0e41824a..4557fdc9c3e1b 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2243,7 +2243,7 @@ static int gp_interception(struct vcpu_svm *svm)
 	opcode = svm_instr_opcode(vcpu);
 	if (opcode)
 		return emulate_svm_instr(vcpu, opcode);
-	else
+	else if (!is_guest_mode(vcpu))
 		return kvm_emulate_instruction(vcpu,
 				EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE);
 


Best regards,
	Maxim Levitsky

> +
> +reinject:
> +	kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
> +	return 1;
> +}
> +
>  void svm_set_gif(struct vcpu_svm *svm, bool value)
>  {
>  	if (value) {
Wei Huang Jan. 21, 2021, 4:06 p.m. UTC | #2
On 1/21/21 8:07 AM, Maxim Levitsky wrote:
> On Thu, 2021-01-21 at 01:55 -0500, Wei Huang wrote:
>> From: Bandan Das <bsd@redhat.com>
>>
>> While running SVM related instructions (VMRUN/VMSAVE/VMLOAD), some AMD
>> CPUs check EAX against reserved memory regions (e.g. SMM memory on host)
>> before checking VMCB's instruction intercept. If EAX falls into such
>> memory areas, #GP is triggered before VMEXIT. This causes problem under
>> nested virtualization. To solve this problem, KVM needs to trap #GP and
>> check the instructions triggering #GP. For VM execution instructions,
>> KVM emulates these instructions.
>>
>> Co-developed-by: Wei Huang <wei.huang2@amd.com>
>> Signed-off-by: Wei Huang <wei.huang2@amd.com>
>> Signed-off-by: Bandan Das <bsd@redhat.com>
>> ---
>>  arch/x86/kvm/svm/svm.c | 99 ++++++++++++++++++++++++++++++++++--------
>>  1 file changed, 81 insertions(+), 18 deletions(-)
>>
>> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
>> index 7ef171790d02..6ed523cab068 100644
>> --- a/arch/x86/kvm/svm/svm.c
>> +++ b/arch/x86/kvm/svm/svm.c
>> @@ -288,6 +288,9 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
>>  		if (!(efer & EFER_SVME)) {
>>  			svm_leave_nested(svm);
>>  			svm_set_gif(svm, true);
>> +			/* #GP intercept is still needed in vmware_backdoor */
>> +			if (!enable_vmware_backdoor)
>> +				clr_exception_intercept(svm, GP_VECTOR);
> Again I would prefer a flag for the errata workaround, but this is still
> better.

Instead of using !enable_vmware_backdoor, will the following be better?
Or the existing form is acceptable.

if (!kvm_cpu_cap_has(X86_FEATURE_SVME_ADDR_CHK))
	clr_exception_intercept(svm, GP_VECTOR);

> 
>>  
>>  			/*
>>  			 * Free the nested guest state, unless we are in SMM.
>> @@ -309,6 +312,9 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
>>  
>>  	svm->vmcb->save.efer = efer | EFER_SVME;
>>  	vmcb_mark_dirty(svm->vmcb, VMCB_CR);
>> +	/* Enable #GP interception for SVM instructions */
>> +	set_exception_intercept(svm, GP_VECTOR);
>> +
>>  	return 0;
>>  }
>>  
>> @@ -1957,24 +1963,6 @@ static int ac_interception(struct vcpu_svm *svm)
>>  	return 1;
>>  }
>>  
>> -static int gp_interception(struct vcpu_svm *svm)
>> -{
>> -	struct kvm_vcpu *vcpu = &svm->vcpu;
>> -	u32 error_code = svm->vmcb->control.exit_info_1;
>> -
>> -	WARN_ON_ONCE(!enable_vmware_backdoor);
>> -
>> -	/*
>> -	 * VMware backdoor emulation on #GP interception only handles IN{S},
>> -	 * OUT{S}, and RDPMC, none of which generate a non-zero error code.
>> -	 */
>> -	if (error_code) {
>> -		kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
>> -		return 1;
>> -	}
>> -	return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP);
>> -}
>> -
>>  static bool is_erratum_383(void)
>>  {
>>  	int err, i;
>> @@ -2173,6 +2161,81 @@ static int vmrun_interception(struct vcpu_svm *svm)
>>  	return nested_svm_vmrun(svm);
>>  }
>>  
>> +enum {
>> +	NOT_SVM_INSTR,
>> +	SVM_INSTR_VMRUN,
>> +	SVM_INSTR_VMLOAD,
>> +	SVM_INSTR_VMSAVE,
>> +};
>> +
>> +/* Return NOT_SVM_INSTR if not SVM instrs, otherwise return decode result */
>> +static int svm_instr_opcode(struct kvm_vcpu *vcpu)
>> +{
>> +	struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
>> +
>> +	if (ctxt->b != 0x1 || ctxt->opcode_len != 2)
>> +		return NOT_SVM_INSTR;
>> +
>> +	switch (ctxt->modrm) {
>> +	case 0xd8: /* VMRUN */
>> +		return SVM_INSTR_VMRUN;
>> +	case 0xda: /* VMLOAD */
>> +		return SVM_INSTR_VMLOAD;
>> +	case 0xdb: /* VMSAVE */
>> +		return SVM_INSTR_VMSAVE;
>> +	default:
>> +		break;
>> +	}
>> +
>> +	return NOT_SVM_INSTR;
>> +}
>> +
>> +static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode)
>> +{
>> +	int (*const svm_instr_handlers[])(struct vcpu_svm *svm) = {
>> +		[SVM_INSTR_VMRUN] = vmrun_interception,
>> +		[SVM_INSTR_VMLOAD] = vmload_interception,
>> +		[SVM_INSTR_VMSAVE] = vmsave_interception,
>> +	};
>> +	struct vcpu_svm *svm = to_svm(vcpu);
>> +
>> +	return svm_instr_handlers[opcode](svm);
>> +}
>> +
>> +/*
>> + * #GP handling code. Note that #GP can be triggered under the following two
>> + * cases:
>> + *   1) SVM VM-related instructions (VMRUN/VMSAVE/VMLOAD) that trigger #GP on
>> + *      some AMD CPUs when EAX of these instructions are in the reserved memory
>> + *      regions (e.g. SMM memory on host).
>> + *   2) VMware backdoor
>> + */
>> +static int gp_interception(struct vcpu_svm *svm)
>> +{
>> +	struct kvm_vcpu *vcpu = &svm->vcpu;
>> +	u32 error_code = svm->vmcb->control.exit_info_1;
>> +	int opcode;
>> +
>> +	/* Both #GP cases have zero error_code */
> 
> I would have kept the original description of possible #GP reasons
> for the VMWARE backdoor and that WARN_ON_ONCE that was removed.
> 

Will do

> 
>> +	if (error_code)
>> +		goto reinject;
>> +
>> +	/* Decode the instruction for usage later */
>> +	if (x86_emulate_decoded_instruction(vcpu, 0, NULL, 0) != EMULATION_OK)
>> +		goto reinject;
>> +
>> +	opcode = svm_instr_opcode(vcpu);
>> +	if (opcode)
> 
> I prefer opcode != NOT_SVM_INSTR.
> 
>> +		return emulate_svm_instr(vcpu, opcode);
>> +	else
> 
> 'WARN_ON_ONCE(!enable_vmware_backdoor)' I think can be placed here.
> 
> 
>> +		return kvm_emulate_instruction(vcpu,
>> +				EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE);
> 
> I tested the vmware backdoor a bit (using the kvm unit tests) and I found out a tiny pre-existing bug
> there:
> 
> We shouldn't emulate the vmware backdoor for a nested guest, but rather let it do it.
> 
> The below patch (on top of your patches) works for me and allows the vmware backdoor 
> test to pass when kvm unit tests run in a guest.
> 

This fix can be a separate patch? This problem exist even before this
patchset.

> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index fe97b0e41824a..4557fdc9c3e1b 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -2243,7 +2243,7 @@ static int gp_interception(struct vcpu_svm *svm)
>  	opcode = svm_instr_opcode(vcpu);
>  	if (opcode)
>  		return emulate_svm_instr(vcpu, opcode);
> -	else
> +	else if (!is_guest_mode(vcpu))
>  		return kvm_emulate_instruction(vcpu,
>  				EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE);
>  
> 
> 
> Best regards,
> 	Maxim Levitsky
> 
>> +
>> +reinject:
>> +	kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
>> +	return 1;
>> +}
>> +
>>  void svm_set_gif(struct vcpu_svm *svm, bool value)
>>  {
>>  	if (value) {
> 
> 
> 
> 
>
Maxim Levitsky Jan. 21, 2021, 4:55 p.m. UTC | #3
On Thu, 2021-01-21 at 10:06 -0600, Wei Huang wrote:
> 
> On 1/21/21 8:07 AM, Maxim Levitsky wrote:
> > On Thu, 2021-01-21 at 01:55 -0500, Wei Huang wrote:
> > > From: Bandan Das <bsd@redhat.com>
> > > 
> > > While running SVM related instructions (VMRUN/VMSAVE/VMLOAD), some AMD
> > > CPUs check EAX against reserved memory regions (e.g. SMM memory on host)
> > > before checking VMCB's instruction intercept. If EAX falls into such
> > > memory areas, #GP is triggered before VMEXIT. This causes problem under
> > > nested virtualization. To solve this problem, KVM needs to trap #GP and
> > > check the instructions triggering #GP. For VM execution instructions,
> > > KVM emulates these instructions.
> > > 
> > > Co-developed-by: Wei Huang <wei.huang2@amd.com>
> > > Signed-off-by: Wei Huang <wei.huang2@amd.com>
> > > Signed-off-by: Bandan Das <bsd@redhat.com>
> > > ---
> > >  arch/x86/kvm/svm/svm.c | 99 ++++++++++++++++++++++++++++++++++--------
> > >  1 file changed, 81 insertions(+), 18 deletions(-)
> > > 
> > > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> > > index 7ef171790d02..6ed523cab068 100644
> > > --- a/arch/x86/kvm/svm/svm.c
> > > +++ b/arch/x86/kvm/svm/svm.c
> > > @@ -288,6 +288,9 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
> > >  		if (!(efer & EFER_SVME)) {
> > >  			svm_leave_nested(svm);
> > >  			svm_set_gif(svm, true);
> > > +			/* #GP intercept is still needed in vmware_backdoor */
> > > +			if (!enable_vmware_backdoor)
> > > +				clr_exception_intercept(svm, GP_VECTOR);
> > Again I would prefer a flag for the errata workaround, but this is still
> > better.
> 
> Instead of using !enable_vmware_backdoor, will the following be better?
> Or the existing form is acceptable.
> 
> if (!kvm_cpu_cap_has(X86_FEATURE_SVME_ADDR_CHK))
> 	clr_exception_intercept(svm, GP_VECTOR);

To be honest I would prefer to have a module param named something like
'enable_svm_gp_errata_workaround' that would have 3 state value: (0,1,-1),
aka true,false,auto

0,1 - would mean force disable/enable the workaround.
-1 - auto select based on X86_FEATURE_SVME_ADDR_CHK.

0 could be used if for example someone is paranoid in regard to attack surface.
-#define USER_BASE      (1 << 24)
+#define USER_BASE      (1 << 25)
This isn't that much importaint to me though, so if you prefer you can leave it as is
as well.

> 
> > >  
> > >  			/*
> > >  			 * Free the nested guest state, unless we are in SMM.
> > > @@ -309,6 +312,9 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
> > >  
> > >  	svm->vmcb->save.efer = efer | EFER_SVME;
> > >  	vmcb_mark_dirty(svm->vmcb, VMCB_CR);
> > > +	/* Enable #GP interception for SVM instructions */
> > > +	set_exception_intercept(svm, GP_VECTOR);
> > > +
> > >  	return 0;
> > >  }
> > >  
> > > @@ -1957,24 +1963,6 @@ static int ac_interception(struct vcpu_svm *svm)
> > >  	return 1;
> > >  }
> > >  
> > > -static int gp_interception(struct vcpu_svm *svm)
> > > -{
> > > -	struct kvm_vcpu *vcpu = &svm->vcpu;
> > > -	u32 error_code = svm->vmcb->control.exit_info_1;
> > > -
> > > -	WARN_ON_ONCE(!enable_vmware_backdoor);
> > > -
> > > -	/*
> > > -	 * VMware backdoor emulation on #GP interception only handles IN{S},
> > > -	 * OUT{S}, and RDPMC, none of which generate a non-zero error code.
> > > -	 */
> > > -	if (error_code) {
> > > -		kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
> > > -		return 1;
> > > -	}
> > > -	return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP);
> > > -}
> > > -
> > >  static bool is_erratum_383(void)
> > >  {
> > >  	int err, i;
> > > @@ -2173,6 +2161,81 @@ static int vmrun_interception(struct vcpu_svm *svm)
> > >  	return nested_svm_vmrun(svm);
> > >  }
> > >  
> > > +enum {
> > > +	NOT_SVM_INSTR,
> > > +	SVM_INSTR_VMRUN,
> > > +	SVM_INSTR_VMLOAD,
> > > +	SVM_INSTR_VMSAVE,
> > > +};
> > > +
> > > +/* Return NOT_SVM_INSTR if not SVM instrs, otherwise return decode result */
> > > +static int svm_instr_opcode(struct kvm_vcpu *vcpu)
> > > +{
> > > +	struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
> > > +
> > > +	if (ctxt->b != 0x1 || ctxt->opcode_len != 2)
> > > +		return NOT_SVM_INSTR;
> > > +
> > > +	switch (ctxt->modrm) {
> > > +	case 0xd8: /* VMRUN */
> > > +		return SVM_INSTR_VMRUN;
> > > +	case 0xda: /* VMLOAD */
> > > +		return SVM_INSTR_VMLOAD;
> > > +	case 0xdb: /* VMSAVE */
> > > +		return SVM_INSTR_VMSAVE;
> > > +	default:
> > > +		break;
> > > +	}
> > > +
> > > +	return NOT_SVM_INSTR;
> > > +}
> > > +
> > > +static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode)
> > > +{
> > > +	int (*const svm_instr_handlers[])(struct vcpu_svm *svm) = {
> > > +		[SVM_INSTR_VMRUN] = vmrun_interception,
> > > +		[SVM_INSTR_VMLOAD] = vmload_interception,
> > > +		[SVM_INSTR_VMSAVE] = vmsave_interception,
> > > +	};
> > > +	struct vcpu_svm *svm = to_svm(vcpu);
> > > +
> > > +	return svm_instr_handlers[opcode](svm);
> > > +}
> > > +
> > > +/*
> > > + * #GP handling code. Note that #GP can be triggered under the following two
> > > + * cases:
> > > + *   1) SVM VM-related instructions (VMRUN/VMSAVE/VMLOAD) that trigger #GP on
> > > + *      some AMD CPUs when EAX of these instructions are in the reserved memory
> > > + *      regions (e.g. SMM memory on host).
> > > + *   2) VMware backdoor
> > > + */
> > > +static int gp_interception(struct vcpu_svm *svm)
> > > +{
> > > +	struct kvm_vcpu *vcpu = &svm->vcpu;
> > > +	u32 error_code = svm->vmcb->control.exit_info_1;
> > > +	int opcode;
> > > +
> > > +	/* Both #GP cases have zero error_code */
> > 
> > I would have kept the original description of possible #GP reasons
> > for the VMWARE backdoor and that WARN_ON_ONCE that was removed.
> > 
> 
> Will do
> 
> > > +	if (error_code)
> > > +		goto reinject;
> > > +
> > > +	/* Decode the instruction for usage later */
> > > +	if (x86_emulate_decoded_instruction(vcpu, 0, NULL, 0) != EMULATION_OK)
> > > +		goto reinject;
> > > +
> > > +	opcode = svm_instr_opcode(vcpu);
> > > +	if (opcode)
> > 
> > I prefer opcode != NOT_SVM_INSTR.
> > 
> > > +		return emulate_svm_instr(vcpu, opcode);
> > > +	else
> > 
> > 'WARN_ON_ONCE(!enable_vmware_backdoor)' I think can be placed here.
> > 
> > 
> > > +		return kvm_emulate_instruction(vcpu,
> > > +				EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE);
> > 
> > I tested the vmware backdoor a bit (using the kvm unit tests) and I found out a tiny pre-existing bug
> > there:
> > 
> > We shouldn't emulate the vmware backdoor for a nested guest, but rather let it do it.
> > 
> > The below patch (on top of your patches) works for me and allows the vmware backdoor 
> > test to pass when kvm unit tests run in a guest.
> > 
> 
> This fix can be a separate patch? This problem exist even before this
> patchset.

It should indeed be a separate patch, but it won't hurt to add it
to this series IMHO if you have time for that.

I just pointed that out because I found this bug during testing,
to avoid forgetting about it.

BTW, on unrelated note, currently the smap test is broken in kvm-unit tests.
I bisected it to commit 322cdd6405250a2a3e48db199f97a45ef519e226

It seems that the following hack (I have no idea why it works,
since I haven't dug deep into the area 'fixes', the smap test for me)

-#define USER_BASE      (1 << 24)
+#define USER_BASE      (1 << 25)


Best regards,
	Maxim Levitsky

> 
> > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> > index fe97b0e41824a..4557fdc9c3e1b 100644
> > --- a/arch/x86/kvm/svm/svm.c
> > +++ b/arch/x86/kvm/svm/svm.c
> > @@ -2243,7 +2243,7 @@ static int gp_interception(struct vcpu_svm *svm)
> >  	opcode = svm_instr_opcode(vcpu);
> >  	if (opcode)
> >  		return emulate_svm_instr(vcpu, opcode);
> > -	else
> > +	else if (!is_guest_mode(vcpu))
> >  		return kvm_emulate_instruction(vcpu,
> >  				EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE);
> >  
> > 
> > 
> > Best regards,
> > 	Maxim Levitsky
> > 
> > > +
> > > +reinject:
> > > +	kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
> > > +	return 1;
> > > +}
> > > +
> > >  void svm_set_gif(struct vcpu_svm *svm, bool value)
> > >  {
> > >  	if (value) {
> > 
> > 
> > 
> >
Sean Christopherson Jan. 21, 2021, 10:40 p.m. UTC | #4
On Thu, Jan 21, 2021, Maxim Levitsky wrote:
> BTW, on unrelated note, currently the smap test is broken in kvm-unit tests.
> I bisected it to commit 322cdd6405250a2a3e48db199f97a45ef519e226
> 
> It seems that the following hack (I have no idea why it works,
> since I haven't dug deep into the area 'fixes', the smap test for me)
> 
> -#define USER_BASE      (1 << 24)
> +#define USER_BASE      (1 << 25)

https://lkml.kernel.org/r/20210121111808.619347-2-imbrenda@linux.ibm.com
Maxim Levitsky Jan. 25, 2021, 1:22 p.m. UTC | #5
On Thu, 2021-01-21 at 14:40 -0800, Sean Christopherson wrote:
> On Thu, Jan 21, 2021, Maxim Levitsky wrote:
> > BTW, on unrelated note, currently the smap test is broken in kvm-unit tests.
> > I bisected it to commit 322cdd6405250a2a3e48db199f97a45ef519e226
> > 
> > It seems that the following hack (I have no idea why it works,
> > since I haven't dug deep into the area 'fixes', the smap test for me)
> > 
> > -#define USER_BASE      (1 << 24)
> > +#define USER_BASE      (1 << 25)
> 
> https://lkml.kernel.org/r/20210121111808.619347-2-imbrenda@linux.ibm.com
> 
Thanks!

Best regards,
	Maxim Levitsky
diff mbox series

Patch

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 7ef171790d02..6ed523cab068 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -288,6 +288,9 @@  int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 		if (!(efer & EFER_SVME)) {
 			svm_leave_nested(svm);
 			svm_set_gif(svm, true);
+			/* #GP intercept is still needed in vmware_backdoor */
+			if (!enable_vmware_backdoor)
+				clr_exception_intercept(svm, GP_VECTOR);
 
 			/*
 			 * Free the nested guest state, unless we are in SMM.
@@ -309,6 +312,9 @@  int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 
 	svm->vmcb->save.efer = efer | EFER_SVME;
 	vmcb_mark_dirty(svm->vmcb, VMCB_CR);
+	/* Enable #GP interception for SVM instructions */
+	set_exception_intercept(svm, GP_VECTOR);
+
 	return 0;
 }
 
@@ -1957,24 +1963,6 @@  static int ac_interception(struct vcpu_svm *svm)
 	return 1;
 }
 
-static int gp_interception(struct vcpu_svm *svm)
-{
-	struct kvm_vcpu *vcpu = &svm->vcpu;
-	u32 error_code = svm->vmcb->control.exit_info_1;
-
-	WARN_ON_ONCE(!enable_vmware_backdoor);
-
-	/*
-	 * VMware backdoor emulation on #GP interception only handles IN{S},
-	 * OUT{S}, and RDPMC, none of which generate a non-zero error code.
-	 */
-	if (error_code) {
-		kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
-		return 1;
-	}
-	return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP);
-}
-
 static bool is_erratum_383(void)
 {
 	int err, i;
@@ -2173,6 +2161,81 @@  static int vmrun_interception(struct vcpu_svm *svm)
 	return nested_svm_vmrun(svm);
 }
 
+enum {
+	NOT_SVM_INSTR,
+	SVM_INSTR_VMRUN,
+	SVM_INSTR_VMLOAD,
+	SVM_INSTR_VMSAVE,
+};
+
+/* Return NOT_SVM_INSTR if not SVM instrs, otherwise return decode result */
+static int svm_instr_opcode(struct kvm_vcpu *vcpu)
+{
+	struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
+
+	if (ctxt->b != 0x1 || ctxt->opcode_len != 2)
+		return NOT_SVM_INSTR;
+
+	switch (ctxt->modrm) {
+	case 0xd8: /* VMRUN */
+		return SVM_INSTR_VMRUN;
+	case 0xda: /* VMLOAD */
+		return SVM_INSTR_VMLOAD;
+	case 0xdb: /* VMSAVE */
+		return SVM_INSTR_VMSAVE;
+	default:
+		break;
+	}
+
+	return NOT_SVM_INSTR;
+}
+
+static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode)
+{
+	int (*const svm_instr_handlers[])(struct vcpu_svm *svm) = {
+		[SVM_INSTR_VMRUN] = vmrun_interception,
+		[SVM_INSTR_VMLOAD] = vmload_interception,
+		[SVM_INSTR_VMSAVE] = vmsave_interception,
+	};
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	return svm_instr_handlers[opcode](svm);
+}
+
+/*
+ * #GP handling code. Note that #GP can be triggered under the following two
+ * cases:
+ *   1) SVM VM-related instructions (VMRUN/VMSAVE/VMLOAD) that trigger #GP on
+ *      some AMD CPUs when EAX of these instructions are in the reserved memory
+ *      regions (e.g. SMM memory on host).
+ *   2) VMware backdoor
+ */
+static int gp_interception(struct vcpu_svm *svm)
+{
+	struct kvm_vcpu *vcpu = &svm->vcpu;
+	u32 error_code = svm->vmcb->control.exit_info_1;
+	int opcode;
+
+	/* Both #GP cases have zero error_code */
+	if (error_code)
+		goto reinject;
+
+	/* Decode the instruction for usage later */
+	if (x86_emulate_decoded_instruction(vcpu, 0, NULL, 0) != EMULATION_OK)
+		goto reinject;
+
+	opcode = svm_instr_opcode(vcpu);
+	if (opcode)
+		return emulate_svm_instr(vcpu, opcode);
+	else
+		return kvm_emulate_instruction(vcpu,
+				EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE);
+
+reinject:
+	kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
+	return 1;
+}
+
 void svm_set_gif(struct vcpu_svm *svm, bool value)
 {
 	if (value) {