diff mbox series

[v3,4/4] KVM: SVM: Support #GP handling for the case of nested on nested

Message ID 20210126081831.570253-5-wei.huang2@amd.com (mailing list archive)
State New, archived
Headers show
Series Handle #GP for SVM execution instructions | expand

Commit Message

Wei Huang Jan. 26, 2021, 8:18 a.m. UTC
Under the case of nested on nested (L0->L1->L2->L3), #GP triggered by
SVM instructions can be hided from L1. Instead the hypervisor can
inject the proper #VMEXIT to inform L1 of what is happening. Thus L1
can avoid invoking the #GP workaround. For this reason we turns on
guest VM's X86_FEATURE_SVME_ADDR_CHK bit for KVM running inside VM to
receive the notification and change behavior.

Similarly we check if vcpu is under guest mode before emulating the
vmware-backdoor instructions. For the case of nested on nested, we
let the guest handle it.

Co-developed-by: Bandan Das <bsd@redhat.com>
Signed-off-by: Bandan Das <bsd@redhat.com>
Signed-off-by: Wei Huang <wei.huang2@amd.com>
Tested-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

Comments

Paolo Bonzini Jan. 26, 2021, 11:39 a.m. UTC | #1
On 26/01/21 09:18, Wei Huang wrote:
> Under the case of nested on nested (L0->L1->L2->L3), #GP triggered by 
> SVM instructions can be hided from L1. Instead the hypervisor can inject 
> the proper #VMEXIT to inform L1 of what is happening. Thus L1 can avoid 
> invoking the #GP workaround. For this reason we turns on guest VM's 
> X86_FEATURE_SVME_ADDR_CHK bit for KVM running inside VM to receive the 
> notification and change behavior.

Slightly reworked commit message:

KVM: SVM: Fix #GP handling for doubly-nested virtualization

Under the case of nested on nested (L0, L1, L2 are all hypervisors),
#GP triggered by SVM instructions can be hidden from L1.  Because
we do not support emulation of the vVMLOAD/VMSAVE feature, the
L0 hypervisor can inject the proper #VMEXIT to inform L1 of what is
happening and L1 can avoid invoking the #GP workaround.

Thanks,

Paolo
Maxim Levitsky Jan. 26, 2021, 11:59 a.m. UTC | #2
On Tue, 2021-01-26 at 03:18 -0500, Wei Huang wrote:
> Under the case of nested on nested (L0->L1->L2->L3), #GP triggered by
> SVM instructions can be hided from L1. Instead the hypervisor can
> inject the proper #VMEXIT to inform L1 of what is happening. Thus L1
> can avoid invoking the #GP workaround. For this reason we turns on
> guest VM's X86_FEATURE_SVME_ADDR_CHK bit for KVM running inside VM to
> receive the notification and change behavior.
> 
> Similarly we check if vcpu is under guest mode before emulating the
> vmware-backdoor instructions. For the case of nested on nested, we
> let the guest handle it.
> 
> Co-developed-by: Bandan Das <bsd@redhat.com>
> Signed-off-by: Bandan Das <bsd@redhat.com>
> Signed-off-by: Wei Huang <wei.huang2@amd.com>
> Tested-by: Maxim Levitsky <mlevitsk@redhat.com>
> Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
>  arch/x86/kvm/svm/svm.c | 20 ++++++++++++++++++--
>  1 file changed, 18 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index f9233c79265b..83c401d2709f 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -929,6 +929,9 @@ static __init void svm_set_cpu_caps(void)
>  
>  		if (npt_enabled)
>  			kvm_cpu_cap_set(X86_FEATURE_NPT);
> +
> +		/* Nested VM can receive #VMEXIT instead of triggering #GP */
> +		kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK);
>  	}
>  
>  	/* CPUID 0x80000008 */
> @@ -2198,6 +2201,11 @@ static int svm_instr_opcode(struct kvm_vcpu *vcpu)
>  
>  static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode)
>  {
> +	const int guest_mode_exit_codes[] = {
> +		[SVM_INSTR_VMRUN] = SVM_EXIT_VMRUN,
> +		[SVM_INSTR_VMLOAD] = SVM_EXIT_VMLOAD,
> +		[SVM_INSTR_VMSAVE] = SVM_EXIT_VMSAVE,
> +	};
>  	int (*const svm_instr_handlers[])(struct vcpu_svm *svm) = {
>  		[SVM_INSTR_VMRUN] = vmrun_interception,
>  		[SVM_INSTR_VMLOAD] = vmload_interception,
> @@ -2205,7 +2213,14 @@ static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode)
>  	};
>  	struct vcpu_svm *svm = to_svm(vcpu);
>  
> -	return svm_instr_handlers[opcode](svm);
> +	if (is_guest_mode(vcpu)) {
> +		svm->vmcb->control.exit_code = guest_mode_exit_codes[opcode];
> +		svm->vmcb->control.exit_info_1 = 0;
> +		svm->vmcb->control.exit_info_2 = 0;
> +
> +		return nested_svm_vmexit(svm);
> +	} else
> +		return svm_instr_handlers[opcode](svm);
>  }
>  
>  /*
> @@ -2239,7 +2254,8 @@ static int gp_interception(struct vcpu_svm *svm)
>  		 * VMware backdoor emulation on #GP interception only handles
>  		 * IN{S}, OUT{S}, and RDPMC.
>  		 */
> -		return kvm_emulate_instruction(vcpu,
> +		if (!is_guest_mode(vcpu))
> +			return kvm_emulate_instruction(vcpu,
>  				EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE);
>  	} else
>  		return emulate_svm_instr(vcpu, opcode);

To be honest I expected the vmware backdoor fix to be in a separate patch,
but I see that Paulo already took these patches so I guess it is too late.

Anyway I am very happy to see this workaround merged, and see that bug
disappear forever.

Best regards,
	Maxim Levitsky
diff mbox series

Patch

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index f9233c79265b..83c401d2709f 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -929,6 +929,9 @@  static __init void svm_set_cpu_caps(void)
 
 		if (npt_enabled)
 			kvm_cpu_cap_set(X86_FEATURE_NPT);
+
+		/* Nested VM can receive #VMEXIT instead of triggering #GP */
+		kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK);
 	}
 
 	/* CPUID 0x80000008 */
@@ -2198,6 +2201,11 @@  static int svm_instr_opcode(struct kvm_vcpu *vcpu)
 
 static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode)
 {
+	const int guest_mode_exit_codes[] = {
+		[SVM_INSTR_VMRUN] = SVM_EXIT_VMRUN,
+		[SVM_INSTR_VMLOAD] = SVM_EXIT_VMLOAD,
+		[SVM_INSTR_VMSAVE] = SVM_EXIT_VMSAVE,
+	};
 	int (*const svm_instr_handlers[])(struct vcpu_svm *svm) = {
 		[SVM_INSTR_VMRUN] = vmrun_interception,
 		[SVM_INSTR_VMLOAD] = vmload_interception,
@@ -2205,7 +2213,14 @@  static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode)
 	};
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	return svm_instr_handlers[opcode](svm);
+	if (is_guest_mode(vcpu)) {
+		svm->vmcb->control.exit_code = guest_mode_exit_codes[opcode];
+		svm->vmcb->control.exit_info_1 = 0;
+		svm->vmcb->control.exit_info_2 = 0;
+
+		return nested_svm_vmexit(svm);
+	} else
+		return svm_instr_handlers[opcode](svm);
 }
 
 /*
@@ -2239,7 +2254,8 @@  static int gp_interception(struct vcpu_svm *svm)
 		 * VMware backdoor emulation on #GP interception only handles
 		 * IN{S}, OUT{S}, and RDPMC.
 		 */
-		return kvm_emulate_instruction(vcpu,
+		if (!is_guest_mode(vcpu))
+			return kvm_emulate_instruction(vcpu,
 				EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE);
 	} else
 		return emulate_svm_instr(vcpu, opcode);